]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/bluestore_types.cc
update sources to 12.2.2
[ceph.git] / ceph / src / os / bluestore / bluestore_types.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "bluestore_types.h"
16 #include "common/Formatter.h"
17 #include "common/Checksummer.h"
18 #include "include/stringify.h"
19
20 void ExtentList::add_extents(int64_t start, int64_t count) {
21 AllocExtent *last_extent = NULL;
22 bool can_merge = false;
23
24 if (!m_extents->empty()) {
25 last_extent = &(m_extents->back());
26 uint64_t last_offset = last_extent->end() / m_block_size;
27 uint32_t last_length = last_extent->length / m_block_size;
28 if ((last_offset == (uint64_t) start) &&
29 (!m_max_blocks || (last_length + count) <= m_max_blocks)) {
30 can_merge = true;
31 }
32 }
33
34 if (can_merge) {
35 last_extent->length += (count * m_block_size);
36 } else {
37 m_extents->emplace_back(AllocExtent(start * m_block_size,
38 count * m_block_size));
39 }
40 }
41
42 // bluestore_bdev_label_t
43
44 void bluestore_bdev_label_t::encode(bufferlist& bl) const
45 {
46 // be slightly friendly to someone who looks at the device
47 bl.append("bluestore block device\n");
48 bl.append(stringify(osd_uuid));
49 bl.append("\n");
50 ENCODE_START(2, 1, bl);
51 ::encode(osd_uuid, bl);
52 ::encode(size, bl);
53 ::encode(btime, bl);
54 ::encode(description, bl);
55 ::encode(meta, bl);
56 ENCODE_FINISH(bl);
57 }
58
59 void bluestore_bdev_label_t::decode(bufferlist::iterator& p)
60 {
61 p.advance(60); // see above
62 DECODE_START(2, p);
63 ::decode(osd_uuid, p);
64 ::decode(size, p);
65 ::decode(btime, p);
66 ::decode(description, p);
67 if (struct_v >= 2) {
68 ::decode(meta, p);
69 }
70 DECODE_FINISH(p);
71 }
72
73 void bluestore_bdev_label_t::dump(Formatter *f) const
74 {
75 f->dump_stream("osd_uuid") << osd_uuid;
76 f->dump_unsigned("size", size);
77 f->dump_stream("btime") << btime;
78 f->dump_string("description", description);
79 for (auto& i : meta) {
80 f->dump_string(i.first.c_str(), i.second);
81 }
82 }
83
84 void bluestore_bdev_label_t::generate_test_instances(
85 list<bluestore_bdev_label_t*>& o)
86 {
87 o.push_back(new bluestore_bdev_label_t);
88 o.push_back(new bluestore_bdev_label_t);
89 o.back()->size = 123;
90 o.back()->btime = utime_t(4, 5);
91 o.back()->description = "fakey";
92 o.back()->meta["foo"] = "bar";
93 }
94
95 ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l)
96 {
97 return out << "bdev(osd_uuid " << l.osd_uuid
98 << ", size 0x" << std::hex << l.size << std::dec
99 << ", btime " << l.btime
100 << ", desc " << l.description
101 << ", " << l.meta.size() << " meta"
102 << ")";
103 }
104
105 // cnode_t
106
107 void bluestore_cnode_t::dump(Formatter *f) const
108 {
109 f->dump_unsigned("bits", bits);
110 }
111
112 void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o)
113 {
114 o.push_back(new bluestore_cnode_t());
115 o.push_back(new bluestore_cnode_t(0));
116 o.push_back(new bluestore_cnode_t(123));
117 }
118
119 // bluestore_extent_ref_map_t
120
121 void bluestore_extent_ref_map_t::_check() const
122 {
123 uint64_t pos = 0;
124 unsigned refs = 0;
125 for (const auto &p : ref_map) {
126 if (p.first < pos)
127 assert(0 == "overlap");
128 if (p.first == pos && p.second.refs == refs)
129 assert(0 == "unmerged");
130 pos = p.first + p.second.length;
131 refs = p.second.refs;
132 }
133 }
134
135 void bluestore_extent_ref_map_t::_maybe_merge_left(
136 map<uint64_t,record_t>::iterator& p)
137 {
138 if (p == ref_map.begin())
139 return;
140 auto q = p;
141 --q;
142 if (q->second.refs == p->second.refs &&
143 q->first + q->second.length == p->first) {
144 q->second.length += p->second.length;
145 ref_map.erase(p);
146 p = q;
147 }
148 }
149
150 void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length)
151 {
152 auto p = ref_map.lower_bound(offset);
153 if (p != ref_map.begin()) {
154 --p;
155 if (p->first + p->second.length <= offset) {
156 ++p;
157 }
158 }
159 while (length > 0) {
160 if (p == ref_map.end()) {
161 // nothing after offset; add the whole thing.
162 p = ref_map.insert(
163 map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first;
164 break;
165 }
166 if (p->first > offset) {
167 // gap
168 uint64_t newlen = MIN(p->first - offset, length);
169 p = ref_map.insert(
170 map<uint64_t,record_t>::value_type(offset,
171 record_t(newlen, 1))).first;
172 offset += newlen;
173 length -= newlen;
174 _maybe_merge_left(p);
175 ++p;
176 continue;
177 }
178 if (p->first < offset) {
179 // split off the portion before offset
180 assert(p->first + p->second.length > offset);
181 uint64_t left = p->first + p->second.length - offset;
182 p->second.length = offset - p->first;
183 p = ref_map.insert(map<uint64_t,record_t>::value_type(
184 offset, record_t(left, p->second.refs))).first;
185 // continue below
186 }
187 assert(p->first == offset);
188 if (length < p->second.length) {
189 ref_map.insert(make_pair(offset + length,
190 record_t(p->second.length - length,
191 p->second.refs)));
192 p->second.length = length;
193 ++p->second.refs;
194 break;
195 }
196 ++p->second.refs;
197 offset += p->second.length;
198 length -= p->second.length;
199 _maybe_merge_left(p);
200 ++p;
201 }
202 if (p != ref_map.end())
203 _maybe_merge_left(p);
204 //_check();
205 }
206
207 void bluestore_extent_ref_map_t::put(
208 uint64_t offset, uint32_t length,
209 PExtentVector *release,
210 bool *maybe_unshared)
211 {
212 //NB: existing entries in 'release' container must be preserved!
213 bool unshared = true;
214 auto p = ref_map.lower_bound(offset);
215 if (p == ref_map.end() || p->first > offset) {
216 if (p == ref_map.begin()) {
217 assert(0 == "put on missing extent (nothing before)");
218 }
219 --p;
220 if (p->first + p->second.length <= offset) {
221 assert(0 == "put on missing extent (gap)");
222 }
223 }
224 if (p->first < offset) {
225 uint64_t left = p->first + p->second.length - offset;
226 p->second.length = offset - p->first;
227 if (p->second.refs != 1) {
228 unshared = false;
229 }
230 p = ref_map.insert(map<uint64_t,record_t>::value_type(
231 offset, record_t(left, p->second.refs))).first;
232 }
233 while (length > 0) {
234 assert(p->first == offset);
235 if (length < p->second.length) {
236 if (p->second.refs != 1) {
237 unshared = false;
238 }
239 ref_map.insert(make_pair(offset + length,
240 record_t(p->second.length - length,
241 p->second.refs)));
242 if (p->second.refs > 1) {
243 p->second.length = length;
244 --p->second.refs;
245 if (p->second.refs != 1) {
246 unshared = false;
247 }
248 _maybe_merge_left(p);
249 } else {
250 if (release)
251 release->push_back(bluestore_pextent_t(p->first, length));
252 ref_map.erase(p);
253 }
254 goto out;
255 }
256 offset += p->second.length;
257 length -= p->second.length;
258 if (p->second.refs > 1) {
259 --p->second.refs;
260 if (p->second.refs != 1) {
261 unshared = false;
262 }
263 _maybe_merge_left(p);
264 ++p;
265 } else {
266 if (release)
267 release->push_back(bluestore_pextent_t(p->first, p->second.length));
268 ref_map.erase(p++);
269 }
270 }
271 if (p != ref_map.end())
272 _maybe_merge_left(p);
273 //_check();
274 out:
275 if (maybe_unshared) {
276 if (unshared) {
277 // we haven't seen a ref != 1 yet; check the whole map.
278 for (auto& p : ref_map) {
279 if (p.second.refs != 1) {
280 unshared = false;
281 break;
282 }
283 }
284 }
285 *maybe_unshared = unshared;
286 }
287 }
288
289 bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const
290 {
291 auto p = ref_map.lower_bound(offset);
292 if (p == ref_map.end() || p->first > offset) {
293 if (p == ref_map.begin()) {
294 return false; // nothing before
295 }
296 --p;
297 if (p->first + p->second.length <= offset) {
298 return false; // gap
299 }
300 }
301 while (length > 0) {
302 if (p == ref_map.end())
303 return false;
304 if (p->first > offset)
305 return false;
306 if (p->first + p->second.length >= offset + length)
307 return true;
308 uint64_t overlap = p->first + p->second.length - offset;
309 offset += overlap;
310 length -= overlap;
311 ++p;
312 }
313 return true;
314 }
315
316 bool bluestore_extent_ref_map_t::intersects(
317 uint64_t offset,
318 uint32_t length) const
319 {
320 auto p = ref_map.lower_bound(offset);
321 if (p != ref_map.begin()) {
322 --p;
323 if (p->first + p->second.length <= offset) {
324 ++p;
325 }
326 }
327 if (p == ref_map.end())
328 return false;
329 if (p->first >= offset + length)
330 return false;
331 return true; // intersects p!
332 }
333
334 void bluestore_extent_ref_map_t::dump(Formatter *f) const
335 {
336 f->open_array_section("ref_map");
337 for (auto& p : ref_map) {
338 f->open_object_section("ref");
339 f->dump_unsigned("offset", p.first);
340 f->dump_unsigned("length", p.second.length);
341 f->dump_unsigned("refs", p.second.refs);
342 f->close_section();
343 }
344 f->close_section();
345 }
346
347 void bluestore_extent_ref_map_t::generate_test_instances(
348 list<bluestore_extent_ref_map_t*>& o)
349 {
350 o.push_back(new bluestore_extent_ref_map_t);
351 o.push_back(new bluestore_extent_ref_map_t);
352 o.back()->get(10, 10);
353 o.back()->get(18, 22);
354 o.back()->get(20, 20);
355 o.back()->get(10, 25);
356 o.back()->get(15, 20);
357 }
358
359 ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m)
360 {
361 out << "ref_map(";
362 for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) {
363 if (p != m.ref_map.begin())
364 out << ",";
365 out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec
366 << "=" << p->second.refs;
367 }
368 out << ")";
369 return out;
370 }
371
372 // bluestore_blob_use_tracker_t
373
374 void bluestore_blob_use_tracker_t::allocate()
375 {
376 assert(num_au != 0);
377 bytes_per_au = new uint32_t[num_au];
378 for (uint32_t i = 0; i < num_au; ++i) {
379 bytes_per_au[i] = 0;
380 }
381 }
382
383 void bluestore_blob_use_tracker_t::init(
384 uint32_t full_length, uint32_t _au_size) {
385 assert(!au_size || is_empty());
386 assert(_au_size > 0);
387 assert(full_length > 0);
388 clear();
389 uint32_t _num_au = ROUND_UP_TO(full_length, _au_size) / _au_size;
390 au_size = _au_size;
391 if( _num_au > 1 ) {
392 num_au = _num_au;
393 allocate();
394 }
395 }
396
397 void bluestore_blob_use_tracker_t::get(
398 uint32_t offset, uint32_t length)
399 {
400 assert(au_size);
401 if (!num_au) {
402 total_bytes += length;
403 }else {
404 auto end = offset + length;
405
406 while (offset < end) {
407 auto phase = offset % au_size;
408 bytes_per_au[offset / au_size] +=
409 MIN(au_size - phase, end - offset);
410 offset += (phase ? au_size - phase : au_size);
411 }
412 }
413 }
414
415 bool bluestore_blob_use_tracker_t::put(
416 uint32_t offset, uint32_t length,
417 PExtentVector *release_units)
418 {
419 assert(au_size);
420 if (release_units) {
421 release_units->clear();
422 }
423 bool maybe_empty = true;
424 if (!num_au) {
425 assert(total_bytes >= length);
426 total_bytes -= length;
427 } else {
428 auto end = offset + length;
429 uint64_t next_offs = 0;
430 while (offset < end) {
431 auto phase = offset % au_size;
432 size_t pos = offset / au_size;
433 auto diff = MIN(au_size - phase, end - offset);
434 assert(diff <= bytes_per_au[pos]);
435 bytes_per_au[pos] -= diff;
436 offset += (phase ? au_size - phase : au_size);
437 if (bytes_per_au[pos] == 0) {
438 if (release_units) {
439 if (release_units->empty() || next_offs != pos * au_size) {
440 release_units->emplace_back(pos * au_size, au_size);
441 } else {
442 release_units->back().length += au_size;
443 }
444 next_offs += au_size;
445 }
446 } else {
447 maybe_empty = false; // micro optimization detecting we aren't empty
448 // even in the affected extent
449 }
450 }
451 }
452 bool empty = maybe_empty ? !is_not_empty() : false;
453 if (empty && release_units) {
454 release_units->clear();
455 }
456 return empty;
457 }
458
459 bool bluestore_blob_use_tracker_t::can_split() const
460 {
461 return num_au > 0;
462 }
463
464 bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const
465 {
466 assert(au_size);
467 return (blob_offset % au_size) == 0 &&
468 blob_offset < num_au * au_size;
469 }
470
471 void bluestore_blob_use_tracker_t::split(
472 uint32_t blob_offset,
473 bluestore_blob_use_tracker_t* r)
474 {
475 assert(au_size);
476 assert(can_split());
477 assert(can_split_at(blob_offset));
478 assert(r->is_empty());
479
480 uint32_t new_num_au = blob_offset / au_size;
481 r->init( (num_au - new_num_au) * au_size, au_size);
482
483 for (auto i = new_num_au; i < num_au; i++) {
484 r->get((i - new_num_au) * au_size, bytes_per_au[i]);
485 bytes_per_au[i] = 0;
486 }
487 if (new_num_au == 0) {
488 clear();
489 } else if (new_num_au == 1) {
490 uint32_t tmp = bytes_per_au[0];
491 uint32_t _au_size = au_size;
492 clear();
493 au_size = _au_size;
494 total_bytes = tmp;
495 } else {
496 num_au = new_num_au;
497 }
498 }
499
500 bool bluestore_blob_use_tracker_t::equal(
501 const bluestore_blob_use_tracker_t& other) const
502 {
503 if (!num_au && !other.num_au) {
504 return total_bytes == other.total_bytes && au_size == other.au_size;
505 } else if (num_au && other.num_au) {
506 if (num_au != other.num_au || au_size != other.au_size) {
507 return false;
508 }
509 for (size_t i = 0; i < num_au; i++) {
510 if (bytes_per_au[i] != other.bytes_per_au[i]) {
511 return false;
512 }
513 }
514 return true;
515 }
516
517 uint32_t n = num_au ? num_au : other.num_au;
518 uint32_t referenced =
519 num_au ? other.get_referenced_bytes() : get_referenced_bytes();
520 auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au;
521 uint32_t my_referenced = 0;
522 for (size_t i = 0; i < n; i++) {
523 my_referenced += bytes_per_au_tmp[i];
524 if (my_referenced > referenced) {
525 return false;
526 }
527 }
528 return my_referenced == referenced;
529 }
530
531 void bluestore_blob_use_tracker_t::dump(Formatter *f) const
532 {
533 f->dump_unsigned("num_au", num_au);
534 f->dump_unsigned("au_size", au_size);
535 if (!num_au) {
536 f->dump_unsigned("total_bytes", total_bytes);
537 } else {
538 f->open_array_section("bytes_per_au");
539 for (size_t i = 0; i < num_au; ++i) {
540 f->dump_unsigned("", bytes_per_au[i]);
541 }
542 f->close_section();
543 }
544 }
545
546 void bluestore_blob_use_tracker_t::generate_test_instances(
547 list<bluestore_blob_use_tracker_t*>& o)
548 {
549 o.push_back(new bluestore_blob_use_tracker_t());
550 o.back()->init(16, 16);
551 o.back()->get(10, 10);
552 o.back()->get(10, 5);
553 o.push_back(new bluestore_blob_use_tracker_t());
554 o.back()->init(60, 16);
555 o.back()->get(18, 22);
556 o.back()->get(20, 20);
557 o.back()->get(15, 20);
558 }
559
560 ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m)
561 {
562 out << "use_tracker(" << std::hex;
563 if (!m.num_au) {
564 out << "0x" << m.au_size
565 << " "
566 << "0x" << m.total_bytes;
567 } else {
568 out << "0x" << m.num_au
569 << "*0x" << m.au_size
570 << " 0x[";
571 for (size_t i = 0; i < m.num_au; ++i) {
572 if (i != 0)
573 out << ",";
574 out << m.bytes_per_au[i];
575 }
576 out << "]";
577 }
578 out << std::dec << ")";
579 return out;
580 }
581
582 // bluestore_pextent_t
583
584 void bluestore_pextent_t::dump(Formatter *f) const
585 {
586 f->dump_unsigned("offset", offset);
587 f->dump_unsigned("length", length);
588 }
589
590 ostream& operator<<(ostream& out, const bluestore_pextent_t& o) {
591 if (o.is_valid())
592 return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec;
593 else
594 return out << "!~" << std::hex << o.length << std::dec;
595 }
596
597 void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls)
598 {
599 ls.push_back(new bluestore_pextent_t);
600 ls.push_back(new bluestore_pextent_t(1, 2));
601 }
602
603 // bluestore_blob_t
604
605 string bluestore_blob_t::get_flags_string(unsigned flags)
606 {
607 string s;
608 if (flags & FLAG_COMPRESSED) {
609 if (s.length())
610 s += '+';
611 s += "compressed";
612 }
613 if (flags & FLAG_CSUM) {
614 if (s.length())
615 s += '+';
616 s += "csum";
617 }
618 if (flags & FLAG_HAS_UNUSED) {
619 if (s.length())
620 s += '+';
621 s += "has_unused";
622 }
623 if (flags & FLAG_SHARED) {
624 if (s.length())
625 s += '+';
626 s += "shared";
627 }
628
629 return s;
630 }
631
632 size_t bluestore_blob_t::get_csum_value_size() const
633 {
634 return Checksummer::get_csum_value_size(csum_type);
635 }
636
637 void bluestore_blob_t::dump(Formatter *f) const
638 {
639 f->open_array_section("extents");
640 for (auto& p : extents) {
641 f->dump_object("extent", p);
642 }
643 f->close_section();
644 f->dump_unsigned("logical_length", logical_length);
645 f->dump_unsigned("compressed_length", compressed_length);
646 f->dump_unsigned("flags", flags);
647 f->dump_unsigned("csum_type", csum_type);
648 f->dump_unsigned("csum_chunk_order", csum_chunk_order);
649 f->open_array_section("csum_data");
650 size_t n = get_csum_count();
651 for (unsigned i = 0; i < n; ++i)
652 f->dump_unsigned("csum", get_csum_item(i));
653 f->close_section();
654 f->dump_unsigned("unused", unused);
655 }
656
657 void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls)
658 {
659 ls.push_back(new bluestore_blob_t);
660 ls.push_back(new bluestore_blob_t(0));
661 ls.push_back(new bluestore_blob_t);
662 ls.back()->allocated_test(bluestore_pextent_t(111, 222));
663 ls.push_back(new bluestore_blob_t);
664 ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536);
665 ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd"));
666 ls.back()->add_unused(0, 3);
667 ls.back()->add_unused(8, 8);
668 ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000));
669 ls.back()->allocated_test(
670 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000));
671 ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000));
672 }
673
674 ostream& operator<<(ostream& out, const bluestore_blob_t& o)
675 {
676 out << "blob(" << o.get_extents();
677 if (o.is_compressed()) {
678 out << " clen 0x" << std::hex
679 << o.get_logical_length()
680 << " -> 0x"
681 << o.get_compressed_payload_length()
682 << std::dec;
683 }
684 if (o.flags) {
685 out << " " << o.get_flags_string();
686 }
687 if (o.has_csum()) {
688 out << " " << Checksummer::get_csum_type_string(o.csum_type)
689 << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec;
690 }
691 if (o.has_unused())
692 out << " unused=0x" << std::hex << o.unused << std::dec;
693 out << ")";
694 return out;
695 }
696
697 void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl)
698 {
699 switch (csum_type) {
700 case Checksummer::CSUM_XXHASH32:
701 Checksummer::calculate<Checksummer::xxhash32>(
702 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
703 break;
704 case Checksummer::CSUM_XXHASH64:
705 Checksummer::calculate<Checksummer::xxhash64>(
706 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
707 break;;
708 case Checksummer::CSUM_CRC32C:
709 Checksummer::calculate<Checksummer::crc32c>(
710 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
711 break;
712 case Checksummer::CSUM_CRC32C_16:
713 Checksummer::calculate<Checksummer::crc32c_16>(
714 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
715 break;
716 case Checksummer::CSUM_CRC32C_8:
717 Checksummer::calculate<Checksummer::crc32c_8>(
718 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
719 break;
720 }
721 }
722
723 int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl,
724 int* b_bad_off, uint64_t *bad_csum) const
725 {
726 int r = 0;
727
728 *b_bad_off = -1;
729 switch (csum_type) {
730 case Checksummer::CSUM_NONE:
731 break;
732 case Checksummer::CSUM_XXHASH32:
733 *b_bad_off = Checksummer::verify<Checksummer::xxhash32>(
734 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
735 break;
736 case Checksummer::CSUM_XXHASH64:
737 *b_bad_off = Checksummer::verify<Checksummer::xxhash64>(
738 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
739 break;
740 case Checksummer::CSUM_CRC32C:
741 *b_bad_off = Checksummer::verify<Checksummer::crc32c>(
742 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
743 break;
744 case Checksummer::CSUM_CRC32C_16:
745 *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>(
746 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
747 break;
748 case Checksummer::CSUM_CRC32C_8:
749 *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>(
750 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
751 break;
752 default:
753 r = -EOPNOTSUPP;
754 break;
755 }
756
757 if (r < 0)
758 return r;
759 else if (*b_bad_off >= 0)
760 return -1; // bad checksum
761 else
762 return 0;
763 }
764
765 void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs)
766 {
767 if (extents.size() == 0) {
768 // if blob is compressed then logical length to be already configured
769 // otherwise - to be unset.
770 assert((is_compressed() && logical_length != 0) ||
771 (!is_compressed() && logical_length == 0));
772
773 extents.reserve(allocs.size() + (b_off ? 1 : 0));
774 if (b_off) {
775 extents.emplace_back(
776 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off));
777 }
778 uint32_t new_len = b_off;
779 for (auto& a : allocs) {
780 extents.emplace_back(a.offset, a.length);
781 new_len += a.length;
782 }
783 if (!is_compressed()) {
784 logical_length = new_len;
785 }
786 } else {
787 assert(!is_compressed()); // partial allocations are forbidden when
788 // compressed
789 assert(b_off < logical_length);
790 uint32_t cur_offs = 0;
791 auto start_it = extents.begin();
792 size_t pos = 0;
793 while(true) {
794 assert(start_it != extents.end());
795 if (cur_offs + start_it->length > b_off) {
796 break;
797 }
798 cur_offs += start_it->length;
799 ++start_it;
800 ++pos;
801 }
802 uint32_t head = b_off - cur_offs;
803 uint32_t end_off = b_off + length;
804 auto end_it = start_it;
805
806 while (true) {
807 assert(end_it != extents.end());
808 assert(!end_it->is_valid());
809 if (cur_offs + end_it->length >= end_off) {
810 break;
811 }
812 cur_offs += end_it->length;
813 ++end_it;
814 }
815 assert(cur_offs + end_it->length >= end_off);
816 uint32_t tail = cur_offs + end_it->length - end_off;
817
818 start_it = extents.erase(start_it, end_it + 1);
819 size_t count = allocs.size();
820 count += head ? 1 : 0;
821 count += tail ? 1 : 0;
822 extents.insert(start_it,
823 count,
824 bluestore_pextent_t(
825 bluestore_pextent_t::INVALID_OFFSET, 0));
826
827 // Workaround to resolve lack of proper iterator return in vector::insert
828 // Looks like some gcc/stl implementations still lack it despite c++11
829 // support claim
830 start_it = extents.begin() + pos;
831
832 if (head) {
833 start_it->length = head;
834 ++start_it;
835 }
836 for(auto& e : allocs) {
837 *start_it = e;
838 ++start_it;
839 }
840 if (tail) {
841 start_it->length = tail;
842 }
843 }
844 }
845
846 // cut it out of extents
847 struct vecbuilder {
848 PExtentVector v;
849 uint64_t invalid = 0;
850
851 void add_invalid(uint64_t length) {
852 invalid += length;
853 }
854 void flush() {
855 if (invalid) {
856 v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET,
857 invalid));
858 invalid = 0;
859 }
860 }
861 void add(uint64_t offset, uint64_t length) {
862 if (offset == bluestore_pextent_t::INVALID_OFFSET) {
863 add_invalid(length);
864 }
865 else {
866 flush();
867 v.emplace_back(bluestore_pextent_t(offset, length));
868 }
869 }
870 };
871
872 void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc)
873 {
874 extents.emplace_back(alloc);
875 if (!is_compressed()) {
876 logical_length += alloc.length;
877 }
878 }
879
880 bool bluestore_blob_t::release_extents(bool all,
881 const PExtentVector& logical,
882 PExtentVector* r)
883 {
884 // common case: all of it?
885 if (all) {
886 uint64_t pos = 0;
887 for (auto& e : extents) {
888 if (e.is_valid()) {
889 r->push_back(e);
890 }
891 pos += e.length;
892 }
893 assert(is_compressed() || get_logical_length() == pos);
894 extents.resize(1);
895 extents[0].offset = bluestore_pextent_t::INVALID_OFFSET;
896 extents[0].length = pos;
897 return true;
898 }
899 // remove from pextents according to logical release list
900 vecbuilder vb;
901 auto loffs_it = logical.begin();
902 auto lend = logical.end();
903 uint32_t pext_loffs_start = 0; //starting loffset of the current pextent
904 uint32_t pext_loffs = 0; //current loffset
905 auto pext_it = extents.begin();
906 auto pext_end = extents.end();
907 while (pext_it != pext_end) {
908 if (loffs_it == lend ||
909 pext_loffs_start + pext_it->length <= loffs_it->offset) {
910 int delta0 = pext_loffs - pext_loffs_start;
911 assert(delta0 >= 0);
912 if ((uint32_t)delta0 < pext_it->length) {
913 vb.add(pext_it->offset + delta0, pext_it->length - delta0);
914 }
915 pext_loffs_start += pext_it->length;
916 pext_loffs = pext_loffs_start;
917 ++pext_it;
918 }
919 else {
920 //assert(pext_loffs == pext_loffs_start);
921 int delta0 = pext_loffs - pext_loffs_start;
922 assert(delta0 >= 0);
923
924 int delta = loffs_it->offset - pext_loffs;
925 assert(delta >= 0);
926 if (delta > 0) {
927 vb.add(pext_it->offset + delta0, delta);
928 pext_loffs += delta;
929 }
930
931 PExtentVector::iterator last_r = r->end();
932 if (r->begin() != last_r) {
933 --last_r;
934 }
935 uint32_t to_release = loffs_it->length;
936 do {
937 uint32_t to_release_part =
938 MIN(pext_it->length - delta0 - delta, to_release);
939 auto o = pext_it->offset + delta0 + delta;
940 if (last_r != r->end() && last_r->offset + last_r->length == o) {
941 last_r->length += to_release_part;
942 }
943 else {
944 last_r = r->emplace(r->end(), o, to_release_part);
945 }
946 to_release -= to_release_part;
947 pext_loffs += to_release_part;
948 if (pext_loffs == pext_loffs_start + pext_it->length) {
949 pext_loffs_start += pext_it->length;
950 pext_loffs = pext_loffs_start;
951 pext_it++;
952 delta0 = delta = 0;
953 }
954 } while (to_release > 0 && pext_it != pext_end);
955 vb.add_invalid(loffs_it->length - to_release);
956 ++loffs_it;
957 }
958 }
959 vb.flush();
960 extents.swap(vb.v);
961 return false;
962 }
963
964 void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb)
965 {
966 size_t left = blob_offset;
967 uint32_t llen_lb = 0;
968 uint32_t llen_rb = 0;
969 unsigned i = 0;
970 for (auto p = extents.begin(); p != extents.end(); ++p, ++i) {
971 if (p->length <= left) {
972 left -= p->length;
973 llen_lb += p->length;
974 continue;
975 }
976 if (left) {
977 if (p->is_valid()) {
978 rb.extents.emplace_back(bluestore_pextent_t(p->offset + left,
979 p->length - left));
980 }
981 else {
982 rb.extents.emplace_back(bluestore_pextent_t(
983 bluestore_pextent_t::INVALID_OFFSET,
984 p->length - left));
985 }
986 llen_rb += p->length - left;
987 llen_lb += left;
988 p->length = left;
989 ++i;
990 ++p;
991 }
992 while (p != extents.end()) {
993 llen_rb += p->length;
994 rb.extents.push_back(*p++);
995 }
996 extents.resize(i);
997 logical_length = llen_lb;
998 rb.logical_length = llen_rb;
999 break;
1000 }
1001 rb.flags = flags;
1002
1003 if (has_csum()) {
1004 rb.csum_type = csum_type;
1005 rb.csum_chunk_order = csum_chunk_order;
1006 size_t csum_order = get_csum_chunk_size();
1007 assert(blob_offset % csum_order == 0);
1008 size_t pos = (blob_offset / csum_order) * get_csum_value_size();
1009 // deep copy csum data
1010 bufferptr old;
1011 old.swap(csum_data);
1012 rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos);
1013 csum_data = bufferptr(old.c_str(), pos);
1014 }
1015 }
1016
1017 // bluestore_shared_blob_t
1018
1019 void bluestore_shared_blob_t::dump(Formatter *f) const
1020 {
1021 f->dump_int("sbid", sbid);
1022 f->dump_object("ref_map", ref_map);
1023 }
1024
1025 void bluestore_shared_blob_t::generate_test_instances(
1026 list<bluestore_shared_blob_t*>& ls)
1027 {
1028 ls.push_back(new bluestore_shared_blob_t(1));
1029 }
1030
1031 ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb)
1032 {
1033 out << "(sbid 0x" << std::hex << sb.sbid << std::dec;
1034 out << " " << sb.ref_map << ")";
1035 return out;
1036 }
1037
1038 // bluestore_onode_t
1039
1040 void bluestore_onode_t::shard_info::dump(Formatter *f) const
1041 {
1042 f->dump_unsigned("offset", offset);
1043 f->dump_unsigned("bytes", bytes);
1044 }
1045
1046 ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si)
1047 {
1048 return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes"
1049 << std::dec << ")";
1050 }
1051
1052 void bluestore_onode_t::dump(Formatter *f) const
1053 {
1054 f->dump_unsigned("nid", nid);
1055 f->dump_unsigned("size", size);
1056 f->open_object_section("attrs");
1057 for (auto p = attrs.begin(); p != attrs.end(); ++p) {
1058 f->open_object_section("attr");
1059 f->dump_string("name", p->first.c_str()); // it's not quite std::string
1060 f->dump_unsigned("len", p->second.length());
1061 f->close_section();
1062 }
1063 f->close_section();
1064 f->dump_string("flags", get_flags_string());
1065 f->open_array_section("extent_map_shards");
1066 for (auto si : extent_map_shards) {
1067 f->dump_object("shard", si);
1068 }
1069 f->close_section();
1070 f->dump_unsigned("expected_object_size", expected_object_size);
1071 f->dump_unsigned("expected_write_size", expected_write_size);
1072 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
1073 }
1074
1075 void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o)
1076 {
1077 o.push_back(new bluestore_onode_t());
1078 // FIXME
1079 }
1080
1081 // bluestore_deferred_op_t
1082
1083 void bluestore_deferred_op_t::dump(Formatter *f) const
1084 {
1085 f->dump_unsigned("op", (int)op);
1086 f->dump_unsigned("data_len", data.length());
1087 f->open_array_section("extents");
1088 for (auto& e : extents) {
1089 f->dump_object("extent", e);
1090 }
1091 f->close_section();
1092 }
1093
1094 void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o)
1095 {
1096 o.push_back(new bluestore_deferred_op_t);
1097 o.push_back(new bluestore_deferred_op_t);
1098 o.back()->op = OP_WRITE;
1099 o.back()->extents.push_back(bluestore_pextent_t(1, 2));
1100 o.back()->extents.push_back(bluestore_pextent_t(100, 5));
1101 o.back()->data.append("my data");
1102 }
1103
1104 void bluestore_deferred_transaction_t::dump(Formatter *f) const
1105 {
1106 f->dump_unsigned("seq", seq);
1107 f->open_array_section("ops");
1108 for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) {
1109 f->dump_object("op", *p);
1110 }
1111 f->close_section();
1112
1113 f->open_array_section("released extents");
1114 for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) {
1115 f->open_object_section("extent");
1116 f->dump_unsigned("offset", p.get_start());
1117 f->dump_unsigned("length", p.get_len());
1118 f->close_section();
1119 }
1120 f->close_section();
1121 }
1122
1123 void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o)
1124 {
1125 o.push_back(new bluestore_deferred_transaction_t());
1126 o.push_back(new bluestore_deferred_transaction_t());
1127 o.back()->seq = 123;
1128 o.back()->ops.push_back(bluestore_deferred_op_t());
1129 o.back()->ops.push_back(bluestore_deferred_op_t());
1130 o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE;
1131 o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7));
1132 o.back()->ops.back().data.append("foodata");
1133 }
1134
1135 void bluestore_compression_header_t::dump(Formatter *f) const
1136 {
1137 f->dump_unsigned("type", type);
1138 f->dump_unsigned("length", length);
1139 }
1140
1141 void bluestore_compression_header_t::generate_test_instances(
1142 list<bluestore_compression_header_t*>& o)
1143 {
1144 o.push_back(new bluestore_compression_header_t);
1145 o.push_back(new bluestore_compression_header_t(1));
1146 o.back()->length = 1234;
1147 }