1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2014 Red Hat
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "bluestore_types.h"
16 #include "common/Formatter.h"
17 #include "common/Checksummer.h"
18 #include "include/stringify.h"
20 // bluestore_bdev_label_t
22 void bluestore_bdev_label_t::encode(bufferlist
& bl
) const
24 // be slightly friendly to someone who looks at the device
25 bl
.append("bluestore block device\n");
26 bl
.append(stringify(osd_uuid
));
28 ENCODE_START(2, 1, bl
);
32 encode(description
, bl
);
37 void bluestore_bdev_label_t::decode(bufferlist::const_iterator
& p
)
39 p
+= 60u; // see above
44 decode(description
, p
);
51 void bluestore_bdev_label_t::dump(Formatter
*f
) const
53 f
->dump_stream("osd_uuid") << osd_uuid
;
54 f
->dump_unsigned("size", size
);
55 f
->dump_stream("btime") << btime
;
56 f
->dump_string("description", description
);
57 for (auto& i
: meta
) {
58 f
->dump_string(i
.first
.c_str(), i
.second
);
62 void bluestore_bdev_label_t::generate_test_instances(
63 list
<bluestore_bdev_label_t
*>& o
)
65 o
.push_back(new bluestore_bdev_label_t
);
66 o
.push_back(new bluestore_bdev_label_t
);
68 o
.back()->btime
= utime_t(4, 5);
69 o
.back()->description
= "fakey";
70 o
.back()->meta
["foo"] = "bar";
73 ostream
& operator<<(ostream
& out
, const bluestore_bdev_label_t
& l
)
75 return out
<< "bdev(osd_uuid " << l
.osd_uuid
76 << ", size 0x" << std::hex
<< l
.size
<< std::dec
77 << ", btime " << l
.btime
78 << ", desc " << l
.description
79 << ", " << l
.meta
.size() << " meta"
85 void bluestore_cnode_t::dump(Formatter
*f
) const
87 f
->dump_unsigned("bits", bits
);
90 void bluestore_cnode_t::generate_test_instances(list
<bluestore_cnode_t
*>& o
)
92 o
.push_back(new bluestore_cnode_t());
93 o
.push_back(new bluestore_cnode_t(0));
94 o
.push_back(new bluestore_cnode_t(123));
97 ostream
& operator<<(ostream
& out
, const bluestore_cnode_t
& l
)
99 return out
<< "cnode(bits " << l
.bits
<< ")";
102 // bluestore_extent_ref_map_t
104 void bluestore_extent_ref_map_t::_check() const
108 for (const auto &p
: ref_map
) {
110 ceph_abort_msg("overlap");
111 if (p
.first
== pos
&& p
.second
.refs
== refs
)
112 ceph_abort_msg("unmerged");
113 pos
= p
.first
+ p
.second
.length
;
114 refs
= p
.second
.refs
;
118 void bluestore_extent_ref_map_t::_maybe_merge_left(
119 map
<uint64_t,record_t
>::iterator
& p
)
121 if (p
== ref_map
.begin())
125 if (q
->second
.refs
== p
->second
.refs
&&
126 q
->first
+ q
->second
.length
== p
->first
) {
127 q
->second
.length
+= p
->second
.length
;
133 void bluestore_extent_ref_map_t::get(uint64_t offset
, uint32_t length
)
135 auto p
= ref_map
.lower_bound(offset
);
136 if (p
!= ref_map
.begin()) {
138 if (p
->first
+ p
->second
.length
<= offset
) {
143 if (p
== ref_map
.end()) {
144 // nothing after offset; add the whole thing.
146 map
<uint64_t,record_t
>::value_type(offset
, record_t(length
, 1))).first
;
149 if (p
->first
> offset
) {
151 uint64_t newlen
= std::min
<uint64_t>(p
->first
- offset
, length
);
153 map
<uint64_t,record_t
>::value_type(offset
,
154 record_t(newlen
, 1))).first
;
157 _maybe_merge_left(p
);
161 if (p
->first
< offset
) {
162 // split off the portion before offset
163 ceph_assert(p
->first
+ p
->second
.length
> offset
);
164 uint64_t left
= p
->first
+ p
->second
.length
- offset
;
165 p
->second
.length
= offset
- p
->first
;
166 p
= ref_map
.insert(map
<uint64_t,record_t
>::value_type(
167 offset
, record_t(left
, p
->second
.refs
))).first
;
170 ceph_assert(p
->first
== offset
);
171 if (length
< p
->second
.length
) {
172 ref_map
.insert(make_pair(offset
+ length
,
173 record_t(p
->second
.length
- length
,
175 p
->second
.length
= length
;
180 offset
+= p
->second
.length
;
181 length
-= p
->second
.length
;
182 _maybe_merge_left(p
);
185 if (p
!= ref_map
.end())
186 _maybe_merge_left(p
);
190 void bluestore_extent_ref_map_t::put(
191 uint64_t offset
, uint32_t length
,
192 PExtentVector
*release
,
193 bool *maybe_unshared
)
195 //NB: existing entries in 'release' container must be preserved!
196 bool unshared
= true;
197 auto p
= ref_map
.lower_bound(offset
);
198 if (p
== ref_map
.end() || p
->first
> offset
) {
199 if (p
== ref_map
.begin()) {
200 ceph_abort_msg("put on missing extent (nothing before)");
203 if (p
->first
+ p
->second
.length
<= offset
) {
204 ceph_abort_msg("put on missing extent (gap)");
207 if (p
->first
< offset
) {
208 uint64_t left
= p
->first
+ p
->second
.length
- offset
;
209 p
->second
.length
= offset
- p
->first
;
210 if (p
->second
.refs
!= 1) {
213 p
= ref_map
.insert(map
<uint64_t,record_t
>::value_type(
214 offset
, record_t(left
, p
->second
.refs
))).first
;
217 ceph_assert(p
->first
== offset
);
218 if (length
< p
->second
.length
) {
219 if (p
->second
.refs
!= 1) {
222 ref_map
.insert(make_pair(offset
+ length
,
223 record_t(p
->second
.length
- length
,
225 if (p
->second
.refs
> 1) {
226 p
->second
.length
= length
;
228 if (p
->second
.refs
!= 1) {
231 _maybe_merge_left(p
);
234 release
->push_back(bluestore_pextent_t(p
->first
, length
));
239 offset
+= p
->second
.length
;
240 length
-= p
->second
.length
;
241 if (p
->second
.refs
> 1) {
243 if (p
->second
.refs
!= 1) {
246 _maybe_merge_left(p
);
250 release
->push_back(bluestore_pextent_t(p
->first
, p
->second
.length
));
254 if (p
!= ref_map
.end())
255 _maybe_merge_left(p
);
258 if (maybe_unshared
) {
260 // we haven't seen a ref != 1 yet; check the whole map.
261 for (auto& p
: ref_map
) {
262 if (p
.second
.refs
!= 1) {
268 *maybe_unshared
= unshared
;
272 bool bluestore_extent_ref_map_t::contains(uint64_t offset
, uint32_t length
) const
274 auto p
= ref_map
.lower_bound(offset
);
275 if (p
== ref_map
.end() || p
->first
> offset
) {
276 if (p
== ref_map
.begin()) {
277 return false; // nothing before
280 if (p
->first
+ p
->second
.length
<= offset
) {
285 if (p
== ref_map
.end())
287 if (p
->first
> offset
)
289 if (p
->first
+ p
->second
.length
>= offset
+ length
)
291 uint64_t overlap
= p
->first
+ p
->second
.length
- offset
;
299 bool bluestore_extent_ref_map_t::intersects(
301 uint32_t length
) const
303 auto p
= ref_map
.lower_bound(offset
);
304 if (p
!= ref_map
.begin()) {
306 if (p
->first
+ p
->second
.length
<= offset
) {
310 if (p
== ref_map
.end())
312 if (p
->first
>= offset
+ length
)
314 return true; // intersects p!
317 void bluestore_extent_ref_map_t::dump(Formatter
*f
) const
319 f
->open_array_section("ref_map");
320 for (auto& p
: ref_map
) {
321 f
->open_object_section("ref");
322 f
->dump_unsigned("offset", p
.first
);
323 f
->dump_unsigned("length", p
.second
.length
);
324 f
->dump_unsigned("refs", p
.second
.refs
);
330 void bluestore_extent_ref_map_t::generate_test_instances(
331 list
<bluestore_extent_ref_map_t
*>& o
)
333 o
.push_back(new bluestore_extent_ref_map_t
);
334 o
.push_back(new bluestore_extent_ref_map_t
);
335 o
.back()->get(10, 10);
336 o
.back()->get(18, 22);
337 o
.back()->get(20, 20);
338 o
.back()->get(10, 25);
339 o
.back()->get(15, 20);
342 ostream
& operator<<(ostream
& out
, const bluestore_extent_ref_map_t
& m
)
345 for (auto p
= m
.ref_map
.begin(); p
!= m
.ref_map
.end(); ++p
) {
346 if (p
!= m
.ref_map
.begin())
348 out
<< std::hex
<< "0x" << p
->first
<< "~" << p
->second
.length
<< std::dec
349 << "=" << p
->second
.refs
;
355 // bluestore_blob_use_tracker_t
356 bluestore_blob_use_tracker_t::bluestore_blob_use_tracker_t(
357 const bluestore_blob_use_tracker_t
& tracker
)
358 : au_size
{tracker
.au_size
},
359 num_au
{tracker
.num_au
},
360 bytes_per_au
{nullptr}
364 std::copy(tracker
.bytes_per_au
, tracker
.bytes_per_au
+ num_au
, bytes_per_au
);
366 total_bytes
= tracker
.total_bytes
;
370 bluestore_blob_use_tracker_t
&
371 bluestore_blob_use_tracker_t::operator=(const bluestore_blob_use_tracker_t
& rhs
)
377 au_size
= rhs
.au_size
;
379 if (rhs
.num_au
> 0) {
381 std::copy(rhs
.bytes_per_au
, rhs
.bytes_per_au
+ num_au
, bytes_per_au
);
383 total_bytes
= rhs
.total_bytes
;
388 void bluestore_blob_use_tracker_t::allocate()
390 ceph_assert(num_au
!= 0);
391 bytes_per_au
= new uint32_t[num_au
];
393 mempool::pool_index_t(mempool::mempool_bluestore_cache_other
)).
394 adjust_count(1, sizeof(uint32_t) * num_au
);
396 for (uint32_t i
= 0; i
< num_au
; ++i
) {
401 void bluestore_blob_use_tracker_t::init(
402 uint32_t full_length
, uint32_t _au_size
) {
403 ceph_assert(!au_size
|| is_empty());
404 ceph_assert(_au_size
> 0);
405 ceph_assert(full_length
> 0);
407 uint32_t _num_au
= round_up_to(full_length
, _au_size
) / _au_size
;
415 void bluestore_blob_use_tracker_t::get(
416 uint32_t offset
, uint32_t length
)
418 ceph_assert(au_size
);
420 total_bytes
+= length
;
422 auto end
= offset
+ length
;
424 while (offset
< end
) {
425 auto phase
= offset
% au_size
;
426 bytes_per_au
[offset
/ au_size
] +=
427 std::min(au_size
- phase
, end
- offset
);
428 offset
+= (phase
? au_size
- phase
: au_size
);
433 bool bluestore_blob_use_tracker_t::put(
434 uint32_t offset
, uint32_t length
,
435 PExtentVector
*release_units
)
437 ceph_assert(au_size
);
439 release_units
->clear();
441 bool maybe_empty
= true;
443 ceph_assert(total_bytes
>= length
);
444 total_bytes
-= length
;
446 auto end
= offset
+ length
;
447 uint64_t next_offs
= 0;
448 while (offset
< end
) {
449 auto phase
= offset
% au_size
;
450 size_t pos
= offset
/ au_size
;
451 auto diff
= std::min(au_size
- phase
, end
- offset
);
452 ceph_assert(diff
<= bytes_per_au
[pos
]);
453 bytes_per_au
[pos
] -= diff
;
454 offset
+= (phase
? au_size
- phase
: au_size
);
455 if (bytes_per_au
[pos
] == 0) {
457 if (release_units
->empty() || next_offs
!= pos
* au_size
) {
458 release_units
->emplace_back(pos
* au_size
, au_size
);
459 next_offs
= pos
* au_size
;
461 release_units
->back().length
+= au_size
;
463 next_offs
+= au_size
;
466 maybe_empty
= false; // micro optimization detecting we aren't empty
467 // even in the affected extent
471 bool empty
= maybe_empty
? !is_not_empty() : false;
472 if (empty
&& release_units
) {
473 release_units
->clear();
478 bool bluestore_blob_use_tracker_t::can_split() const
483 bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset
) const
485 ceph_assert(au_size
);
486 return (blob_offset
% au_size
) == 0 &&
487 blob_offset
< num_au
* au_size
;
490 void bluestore_blob_use_tracker_t::split(
491 uint32_t blob_offset
,
492 bluestore_blob_use_tracker_t
* r
)
494 ceph_assert(au_size
);
495 ceph_assert(can_split());
496 ceph_assert(can_split_at(blob_offset
));
497 ceph_assert(r
->is_empty());
499 uint32_t new_num_au
= blob_offset
/ au_size
;
500 r
->init( (num_au
- new_num_au
) * au_size
, au_size
);
502 for (auto i
= new_num_au
; i
< num_au
; i
++) {
503 r
->get((i
- new_num_au
) * au_size
, bytes_per_au
[i
]);
506 if (new_num_au
== 0) {
508 } else if (new_num_au
== 1) {
509 uint32_t tmp
= bytes_per_au
[0];
510 uint32_t _au_size
= au_size
;
519 bool bluestore_blob_use_tracker_t::equal(
520 const bluestore_blob_use_tracker_t
& other
) const
522 if (!num_au
&& !other
.num_au
) {
523 return total_bytes
== other
.total_bytes
&& au_size
== other
.au_size
;
524 } else if (num_au
&& other
.num_au
) {
525 if (num_au
!= other
.num_au
|| au_size
!= other
.au_size
) {
528 for (size_t i
= 0; i
< num_au
; i
++) {
529 if (bytes_per_au
[i
] != other
.bytes_per_au
[i
]) {
536 uint32_t n
= num_au
? num_au
: other
.num_au
;
537 uint32_t referenced
=
538 num_au
? other
.get_referenced_bytes() : get_referenced_bytes();
539 auto bytes_per_au_tmp
= num_au
? bytes_per_au
: other
.bytes_per_au
;
540 uint32_t my_referenced
= 0;
541 for (size_t i
= 0; i
< n
; i
++) {
542 my_referenced
+= bytes_per_au_tmp
[i
];
543 if (my_referenced
> referenced
) {
547 return my_referenced
== referenced
;
550 void bluestore_blob_use_tracker_t::dump(Formatter
*f
) const
552 f
->dump_unsigned("num_au", num_au
);
553 f
->dump_unsigned("au_size", au_size
);
555 f
->dump_unsigned("total_bytes", total_bytes
);
557 f
->open_array_section("bytes_per_au");
558 for (size_t i
= 0; i
< num_au
; ++i
) {
559 f
->dump_unsigned("", bytes_per_au
[i
]);
565 void bluestore_blob_use_tracker_t::generate_test_instances(
566 list
<bluestore_blob_use_tracker_t
*>& o
)
568 o
.push_back(new bluestore_blob_use_tracker_t());
569 o
.back()->init(16, 16);
570 o
.back()->get(10, 10);
571 o
.back()->get(10, 5);
572 o
.push_back(new bluestore_blob_use_tracker_t());
573 o
.back()->init(60, 16);
574 o
.back()->get(18, 22);
575 o
.back()->get(20, 20);
576 o
.back()->get(15, 20);
579 ostream
& operator<<(ostream
& out
, const bluestore_blob_use_tracker_t
& m
)
581 out
<< "use_tracker(" << std::hex
;
583 out
<< "0x" << m
.au_size
585 << "0x" << m
.total_bytes
;
587 out
<< "0x" << m
.num_au
588 << "*0x" << m
.au_size
590 for (size_t i
= 0; i
< m
.num_au
; ++i
) {
593 out
<< m
.bytes_per_au
[i
];
597 out
<< std::dec
<< ")";
601 // bluestore_pextent_t
603 void bluestore_pextent_t::dump(Formatter
*f
) const
605 f
->dump_unsigned("offset", offset
);
606 f
->dump_unsigned("length", length
);
609 ostream
& operator<<(ostream
& out
, const bluestore_pextent_t
& o
) {
611 return out
<< "0x" << std::hex
<< o
.offset
<< "~" << o
.length
<< std::dec
;
613 return out
<< "!~" << std::hex
<< o
.length
<< std::dec
;
616 void bluestore_pextent_t::generate_test_instances(list
<bluestore_pextent_t
*>& ls
)
618 ls
.push_back(new bluestore_pextent_t
);
619 ls
.push_back(new bluestore_pextent_t(1, 2));
624 string
bluestore_blob_t::get_flags_string(unsigned flags
)
627 if (flags
& FLAG_COMPRESSED
) {
632 if (flags
& FLAG_CSUM
) {
637 if (flags
& FLAG_HAS_UNUSED
) {
642 if (flags
& FLAG_SHARED
) {
651 size_t bluestore_blob_t::get_csum_value_size() const
653 return Checksummer::get_csum_value_size(csum_type
);
656 void bluestore_blob_t::dump(Formatter
*f
) const
658 f
->open_array_section("extents");
659 for (auto& p
: extents
) {
660 f
->dump_object("extent", p
);
663 f
->dump_unsigned("logical_length", logical_length
);
664 f
->dump_unsigned("compressed_length", compressed_length
);
665 f
->dump_unsigned("flags", flags
);
666 f
->dump_unsigned("csum_type", csum_type
);
667 f
->dump_unsigned("csum_chunk_order", csum_chunk_order
);
668 f
->open_array_section("csum_data");
669 size_t n
= get_csum_count();
670 for (unsigned i
= 0; i
< n
; ++i
)
671 f
->dump_unsigned("csum", get_csum_item(i
));
673 f
->dump_unsigned("unused", unused
);
676 void bluestore_blob_t::generate_test_instances(list
<bluestore_blob_t
*>& ls
)
678 ls
.push_back(new bluestore_blob_t
);
679 ls
.push_back(new bluestore_blob_t(0));
680 ls
.push_back(new bluestore_blob_t
);
681 ls
.back()->allocated_test(bluestore_pextent_t(111, 222));
682 ls
.push_back(new bluestore_blob_t
);
683 ls
.back()->init_csum(Checksummer::CSUM_XXHASH32
, 16, 65536);
684 ls
.back()->csum_data
= buffer::claim_malloc(4, strdup("abcd"));
685 ls
.back()->add_unused(0, 3);
686 ls
.back()->add_unused(8, 8);
687 ls
.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000));
688 ls
.back()->allocated_test(
689 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET
, 0x1000));
690 ls
.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000));
693 ostream
& operator<<(ostream
& out
, const bluestore_blob_t
& o
)
695 out
<< "blob(" << o
.get_extents();
696 if (o
.is_compressed()) {
697 out
<< " clen 0x" << std::hex
698 << o
.get_logical_length()
700 << o
.get_compressed_payload_length()
704 out
<< " " << o
.get_flags_string();
707 out
<< " " << Checksummer::get_csum_type_string(o
.csum_type
)
708 << "/0x" << std::hex
<< (1ull << o
.csum_chunk_order
) << std::dec
;
711 out
<< " unused=0x" << std::hex
<< o
.unused
<< std::dec
;
716 void bluestore_blob_t::calc_csum(uint64_t b_off
, const bufferlist
& bl
)
719 case Checksummer::CSUM_XXHASH32
:
720 Checksummer::calculate
<Checksummer::xxhash32
>(
721 get_csum_chunk_size(), b_off
, bl
.length(), bl
, &csum_data
);
723 case Checksummer::CSUM_XXHASH64
:
724 Checksummer::calculate
<Checksummer::xxhash64
>(
725 get_csum_chunk_size(), b_off
, bl
.length(), bl
, &csum_data
);
727 case Checksummer::CSUM_CRC32C
:
728 Checksummer::calculate
<Checksummer::crc32c
>(
729 get_csum_chunk_size(), b_off
, bl
.length(), bl
, &csum_data
);
731 case Checksummer::CSUM_CRC32C_16
:
732 Checksummer::calculate
<Checksummer::crc32c_16
>(
733 get_csum_chunk_size(), b_off
, bl
.length(), bl
, &csum_data
);
735 case Checksummer::CSUM_CRC32C_8
:
736 Checksummer::calculate
<Checksummer::crc32c_8
>(
737 get_csum_chunk_size(), b_off
, bl
.length(), bl
, &csum_data
);
742 int bluestore_blob_t::verify_csum(uint64_t b_off
, const bufferlist
& bl
,
743 int* b_bad_off
, uint64_t *bad_csum
) const
749 case Checksummer::CSUM_NONE
:
751 case Checksummer::CSUM_XXHASH32
:
752 *b_bad_off
= Checksummer::verify
<Checksummer::xxhash32
>(
753 get_csum_chunk_size(), b_off
, bl
.length(), bl
, csum_data
, bad_csum
);
755 case Checksummer::CSUM_XXHASH64
:
756 *b_bad_off
= Checksummer::verify
<Checksummer::xxhash64
>(
757 get_csum_chunk_size(), b_off
, bl
.length(), bl
, csum_data
, bad_csum
);
759 case Checksummer::CSUM_CRC32C
:
760 *b_bad_off
= Checksummer::verify
<Checksummer::crc32c
>(
761 get_csum_chunk_size(), b_off
, bl
.length(), bl
, csum_data
, bad_csum
);
763 case Checksummer::CSUM_CRC32C_16
:
764 *b_bad_off
= Checksummer::verify
<Checksummer::crc32c_16
>(
765 get_csum_chunk_size(), b_off
, bl
.length(), bl
, csum_data
, bad_csum
);
767 case Checksummer::CSUM_CRC32C_8
:
768 *b_bad_off
= Checksummer::verify
<Checksummer::crc32c_8
>(
769 get_csum_chunk_size(), b_off
, bl
.length(), bl
, csum_data
, bad_csum
);
778 else if (*b_bad_off
>= 0)
779 return -1; // bad checksum
784 void bluestore_blob_t::allocated(uint32_t b_off
, uint32_t length
, const PExtentVector
& allocs
)
786 if (extents
.size() == 0) {
787 // if blob is compressed then logical length to be already configured
788 // otherwise - to be unset.
789 ceph_assert((is_compressed() && logical_length
!= 0) ||
790 (!is_compressed() && logical_length
== 0));
792 extents
.reserve(allocs
.size() + (b_off
? 1 : 0));
794 extents
.emplace_back(
795 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET
, b_off
));
798 uint32_t new_len
= b_off
;
799 for (auto& a
: allocs
) {
800 extents
.emplace_back(a
.offset
, a
.length
);
803 if (!is_compressed()) {
804 logical_length
= new_len
;
807 ceph_assert(!is_compressed()); // partial allocations are forbidden when
809 ceph_assert(b_off
< logical_length
);
810 uint32_t cur_offs
= 0;
811 auto start_it
= extents
.begin();
814 ceph_assert(start_it
!= extents
.end());
815 if (cur_offs
+ start_it
->length
> b_off
) {
818 cur_offs
+= start_it
->length
;
822 uint32_t head
= b_off
- cur_offs
;
823 uint32_t end_off
= b_off
+ length
;
824 auto end_it
= start_it
;
827 ceph_assert(end_it
!= extents
.end());
828 ceph_assert(!end_it
->is_valid());
829 if (cur_offs
+ end_it
->length
>= end_off
) {
832 cur_offs
+= end_it
->length
;
835 ceph_assert(cur_offs
+ end_it
->length
>= end_off
);
836 uint32_t tail
= cur_offs
+ end_it
->length
- end_off
;
838 start_it
= extents
.erase(start_it
, end_it
+ 1);
839 size_t count
= allocs
.size();
840 count
+= head
? 1 : 0;
841 count
+= tail
? 1 : 0;
842 extents
.insert(start_it
,
845 bluestore_pextent_t::INVALID_OFFSET
, 0));
847 // Workaround to resolve lack of proper iterator return in vector::insert
848 // Looks like some gcc/stl implementations still lack it despite c++11
850 start_it
= extents
.begin() + pos
;
853 start_it
->length
= head
;
856 for(auto& e
: allocs
) {
861 start_it
->length
= tail
;
866 // cut it out of extents
869 uint64_t invalid
= 0;
871 void add_invalid(uint64_t length
) {
876 v
.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET
,
882 void add(uint64_t offset
, uint64_t length
) {
883 if (offset
== bluestore_pextent_t::INVALID_OFFSET
) {
888 v
.emplace_back(offset
, length
);
893 void bluestore_blob_t::allocated_test(const bluestore_pextent_t
& alloc
)
895 extents
.emplace_back(alloc
);
896 if (!is_compressed()) {
897 logical_length
+= alloc
.length
;
901 bool bluestore_blob_t::release_extents(bool all
,
902 const PExtentVector
& logical
,
905 // common case: all of it?
908 for (auto& e
: extents
) {
914 ceph_assert(is_compressed() || get_logical_length() == pos
);
916 extents
[0].offset
= bluestore_pextent_t::INVALID_OFFSET
;
917 extents
[0].length
= pos
;
920 // remove from pextents according to logical release list
922 auto loffs_it
= logical
.begin();
923 auto lend
= logical
.end();
924 uint32_t pext_loffs_start
= 0; //starting loffset of the current pextent
925 uint32_t pext_loffs
= 0; //current loffset
926 auto pext_it
= extents
.begin();
927 auto pext_end
= extents
.end();
928 while (pext_it
!= pext_end
) {
929 if (loffs_it
== lend
||
930 pext_loffs_start
+ pext_it
->length
<= loffs_it
->offset
) {
931 int delta0
= pext_loffs
- pext_loffs_start
;
932 ceph_assert(delta0
>= 0);
933 if ((uint32_t)delta0
< pext_it
->length
) {
934 vb
.add(pext_it
->offset
+ delta0
, pext_it
->length
- delta0
);
936 pext_loffs_start
+= pext_it
->length
;
937 pext_loffs
= pext_loffs_start
;
941 //assert(pext_loffs == pext_loffs_start);
942 int delta0
= pext_loffs
- pext_loffs_start
;
943 ceph_assert(delta0
>= 0);
945 int delta
= loffs_it
->offset
- pext_loffs
;
946 ceph_assert(delta
>= 0);
948 vb
.add(pext_it
->offset
+ delta0
, delta
);
952 PExtentVector::iterator last_r
= r
->end();
953 if (r
->begin() != last_r
) {
956 uint32_t to_release
= loffs_it
->length
;
958 uint32_t to_release_part
=
959 std::min(pext_it
->length
- delta0
- delta
, to_release
);
960 auto o
= pext_it
->offset
+ delta0
+ delta
;
961 if (last_r
!= r
->end() && last_r
->offset
+ last_r
->length
== o
) {
962 last_r
->length
+= to_release_part
;
965 last_r
= r
->emplace(r
->end(), o
, to_release_part
);
967 to_release
-= to_release_part
;
968 pext_loffs
+= to_release_part
;
969 if (pext_loffs
== pext_loffs_start
+ pext_it
->length
) {
970 pext_loffs_start
+= pext_it
->length
;
971 pext_loffs
= pext_loffs_start
;
975 } while (to_release
> 0 && pext_it
!= pext_end
);
976 vb
.add_invalid(loffs_it
->length
- to_release
);
985 void bluestore_blob_t::split(uint32_t blob_offset
, bluestore_blob_t
& rb
)
987 size_t left
= blob_offset
;
988 uint32_t llen_lb
= 0;
989 uint32_t llen_rb
= 0;
991 for (auto p
= extents
.begin(); p
!= extents
.end(); ++p
, ++i
) {
992 if (p
->length
<= left
) {
994 llen_lb
+= p
->length
;
999 rb
.extents
.emplace_back(bluestore_pextent_t(p
->offset
+ left
,
1003 rb
.extents
.emplace_back(bluestore_pextent_t(
1004 bluestore_pextent_t::INVALID_OFFSET
,
1007 llen_rb
+= p
->length
- left
;
1013 while (p
!= extents
.end()) {
1014 llen_rb
+= p
->length
;
1015 rb
.extents
.push_back(*p
++);
1018 logical_length
= llen_lb
;
1019 rb
.logical_length
= llen_rb
;
1025 rb
.csum_type
= csum_type
;
1026 rb
.csum_chunk_order
= csum_chunk_order
;
1027 size_t csum_order
= get_csum_chunk_size();
1028 ceph_assert(blob_offset
% csum_order
== 0);
1029 size_t pos
= (blob_offset
/ csum_order
) * get_csum_value_size();
1030 // deep copy csum data
1032 old
.swap(csum_data
);
1033 rb
.csum_data
= bufferptr(old
.c_str() + pos
, old
.length() - pos
);
1034 csum_data
= bufferptr(old
.c_str(), pos
);
1038 // bluestore_shared_blob_t
1039 MEMPOOL_DEFINE_OBJECT_FACTORY(bluestore_shared_blob_t
, bluestore_shared_blob_t
,
1040 bluestore_cache_other
);
1042 void bluestore_shared_blob_t::dump(Formatter
*f
) const
1044 f
->dump_int("sbid", sbid
);
1045 f
->dump_object("ref_map", ref_map
);
1048 void bluestore_shared_blob_t::generate_test_instances(
1049 list
<bluestore_shared_blob_t
*>& ls
)
1051 ls
.push_back(new bluestore_shared_blob_t(1));
1054 ostream
& operator<<(ostream
& out
, const bluestore_shared_blob_t
& sb
)
1056 out
<< "(sbid 0x" << std::hex
<< sb
.sbid
<< std::dec
;
1057 out
<< " " << sb
.ref_map
<< ")";
1061 // bluestore_onode_t
1063 void bluestore_onode_t::shard_info::dump(Formatter
*f
) const
1065 f
->dump_unsigned("offset", offset
);
1066 f
->dump_unsigned("bytes", bytes
);
1069 ostream
& operator<<(ostream
& out
, const bluestore_onode_t::shard_info
& si
)
1071 return out
<< std::hex
<< "0x" << si
.offset
<< "(0x" << si
.bytes
<< " bytes"
1075 void bluestore_onode_t::dump(Formatter
*f
) const
1077 f
->dump_unsigned("nid", nid
);
1078 f
->dump_unsigned("size", size
);
1079 f
->open_object_section("attrs");
1080 for (auto p
= attrs
.begin(); p
!= attrs
.end(); ++p
) {
1081 f
->open_object_section("attr");
1082 f
->dump_string("name", p
->first
.c_str()); // it's not quite std::string
1083 f
->dump_unsigned("len", p
->second
.length());
1087 f
->dump_string("flags", get_flags_string());
1088 f
->open_array_section("extent_map_shards");
1089 for (auto si
: extent_map_shards
) {
1090 f
->dump_object("shard", si
);
1093 f
->dump_unsigned("expected_object_size", expected_object_size
);
1094 f
->dump_unsigned("expected_write_size", expected_write_size
);
1095 f
->dump_unsigned("alloc_hint_flags", alloc_hint_flags
);
1098 void bluestore_onode_t::generate_test_instances(list
<bluestore_onode_t
*>& o
)
1100 o
.push_back(new bluestore_onode_t());
1104 // bluestore_deferred_op_t
1106 void bluestore_deferred_op_t::dump(Formatter
*f
) const
1108 f
->dump_unsigned("op", (int)op
);
1109 f
->dump_unsigned("data_len", data
.length());
1110 f
->open_array_section("extents");
1111 for (auto& e
: extents
) {
1112 f
->dump_object("extent", e
);
1117 void bluestore_deferred_op_t::generate_test_instances(list
<bluestore_deferred_op_t
*>& o
)
1119 o
.push_back(new bluestore_deferred_op_t
);
1120 o
.push_back(new bluestore_deferred_op_t
);
1121 o
.back()->op
= OP_WRITE
;
1122 o
.back()->extents
.push_back(bluestore_pextent_t(1, 2));
1123 o
.back()->extents
.push_back(bluestore_pextent_t(100, 5));
1124 o
.back()->data
.append("my data");
1127 void bluestore_deferred_transaction_t::dump(Formatter
*f
) const
1129 f
->dump_unsigned("seq", seq
);
1130 f
->open_array_section("ops");
1131 for (list
<bluestore_deferred_op_t
>::const_iterator p
= ops
.begin(); p
!= ops
.end(); ++p
) {
1132 f
->dump_object("op", *p
);
1136 f
->open_array_section("released extents");
1137 for (interval_set
<uint64_t>::const_iterator p
= released
.begin(); p
!= released
.end(); ++p
) {
1138 f
->open_object_section("extent");
1139 f
->dump_unsigned("offset", p
.get_start());
1140 f
->dump_unsigned("length", p
.get_len());
1146 void bluestore_deferred_transaction_t::generate_test_instances(list
<bluestore_deferred_transaction_t
*>& o
)
1148 o
.push_back(new bluestore_deferred_transaction_t());
1149 o
.push_back(new bluestore_deferred_transaction_t());
1150 o
.back()->seq
= 123;
1151 o
.back()->ops
.push_back(bluestore_deferred_op_t());
1152 o
.back()->ops
.push_back(bluestore_deferred_op_t());
1153 o
.back()->ops
.back().op
= bluestore_deferred_op_t::OP_WRITE
;
1154 o
.back()->ops
.back().extents
.push_back(bluestore_pextent_t(1,7));
1155 o
.back()->ops
.back().data
.append("foodata");
1158 void bluestore_compression_header_t::dump(Formatter
*f
) const
1160 f
->dump_unsigned("type", type
);
1161 f
->dump_unsigned("length", length
);
1164 void bluestore_compression_header_t::generate_test_instances(
1165 list
<bluestore_compression_header_t
*>& o
)
1167 o
.push_back(new bluestore_compression_header_t
);
1168 o
.push_back(new bluestore_compression_header_t(1));
1169 o
.back()->length
= 1234;
1172 // adds more salt to build a hash func input
1173 shared_blob_2hash_tracker_t::hash_input_t
1174 shared_blob_2hash_tracker_t::build_hash_input(
1176 uint64_t offset
) const
1178 hash_input_t res
= {
1180 offset
>> au_void_bits
,
1181 ((sbid
& 0xffffffff) << 32) + ~(uint32_t((offset
>> au_void_bits
) & 0xffffffff))
1186 void shared_blob_2hash_tracker_t::inc(
1191 auto hash_input
= build_hash_input(sbid
, offset
);
1192 ref_counter_2hash_tracker_t::inc(
1193 (char*)hash_input
.data(),
1194 get_hash_input_size(),
1198 void shared_blob_2hash_tracker_t::inc_range(
1204 uint32_t alloc_unit
= 1 << au_void_bits
;
1207 // don't care about ofset alignment as inc() trims it anyway
1208 inc(sbid
, offset
, n
);
1209 offset
+= alloc_unit
;
1214 bool shared_blob_2hash_tracker_t::test_hash_conflict(
1218 uint64_t offset2
) const
1220 auto hash_input1
= build_hash_input(sbid1
, offset1
);
1221 auto hash_input2
= build_hash_input(sbid2
, offset2
);
1222 return ref_counter_2hash_tracker_t::test_hash_conflict(
1223 (char*)hash_input1
.data(),
1224 (char*)hash_input2
.data(),
1225 get_hash_input_size());
1228 bool shared_blob_2hash_tracker_t::test_all_zero(
1230 uint64_t offset
) const
1232 auto hash_input
= build_hash_input(sbid
, offset
);
1234 ref_counter_2hash_tracker_t::test_all_zero(
1235 (char*)hash_input
.data(),
1236 get_hash_input_size());
1239 bool shared_blob_2hash_tracker_t::test_all_zero_range(
1244 uint32_t alloc_unit
= 1 << au_void_bits
;
1247 // don't care about ofset alignment as inc() trims it anyway
1248 if (!test_all_zero(sbid
, offset
)) {
1251 offset
+= alloc_unit
;