]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/bluestore_types.cc
update sources to 12.2.7
[ceph.git] / ceph / src / os / bluestore / bluestore_types.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "bluestore_types.h"
16 #include "common/Formatter.h"
17 #include "common/Checksummer.h"
18 #include "include/stringify.h"
19
20 void ExtentList::add_extents(int64_t start, int64_t count) {
21 AllocExtent *last_extent = NULL;
22 bool can_merge = false;
23
24 if (!m_extents->empty()) {
25 last_extent = &(m_extents->back());
26 uint64_t last_offset = last_extent->end() / m_block_size;
27 uint32_t last_length = last_extent->length / m_block_size;
28 if ((last_offset == (uint64_t) start) &&
29 (!m_max_blocks || (last_length + count) <= m_max_blocks)) {
30 can_merge = true;
31 }
32 }
33
34 if (can_merge) {
35 last_extent->length += (count * m_block_size);
36 } else {
37 m_extents->emplace_back(AllocExtent(start * m_block_size,
38 count * m_block_size));
39 }
40 }
41
42 // bluestore_bdev_label_t
43
44 void bluestore_bdev_label_t::encode(bufferlist& bl) const
45 {
46 // be slightly friendly to someone who looks at the device
47 bl.append("bluestore block device\n");
48 bl.append(stringify(osd_uuid));
49 bl.append("\n");
50 ENCODE_START(2, 1, bl);
51 ::encode(osd_uuid, bl);
52 ::encode(size, bl);
53 ::encode(btime, bl);
54 ::encode(description, bl);
55 ::encode(meta, bl);
56 ENCODE_FINISH(bl);
57 }
58
59 void bluestore_bdev_label_t::decode(bufferlist::iterator& p)
60 {
61 p.advance(60); // see above
62 DECODE_START(2, p);
63 ::decode(osd_uuid, p);
64 ::decode(size, p);
65 ::decode(btime, p);
66 ::decode(description, p);
67 if (struct_v >= 2) {
68 ::decode(meta, p);
69 }
70 DECODE_FINISH(p);
71 }
72
73 void bluestore_bdev_label_t::dump(Formatter *f) const
74 {
75 f->dump_stream("osd_uuid") << osd_uuid;
76 f->dump_unsigned("size", size);
77 f->dump_stream("btime") << btime;
78 f->dump_string("description", description);
79 for (auto& i : meta) {
80 f->dump_string(i.first.c_str(), i.second);
81 }
82 }
83
84 void bluestore_bdev_label_t::generate_test_instances(
85 list<bluestore_bdev_label_t*>& o)
86 {
87 o.push_back(new bluestore_bdev_label_t);
88 o.push_back(new bluestore_bdev_label_t);
89 o.back()->size = 123;
90 o.back()->btime = utime_t(4, 5);
91 o.back()->description = "fakey";
92 o.back()->meta["foo"] = "bar";
93 }
94
95 ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l)
96 {
97 return out << "bdev(osd_uuid " << l.osd_uuid
98 << ", size 0x" << std::hex << l.size << std::dec
99 << ", btime " << l.btime
100 << ", desc " << l.description
101 << ", " << l.meta.size() << " meta"
102 << ")";
103 }
104
105 // cnode_t
106
107 void bluestore_cnode_t::dump(Formatter *f) const
108 {
109 f->dump_unsigned("bits", bits);
110 }
111
112 void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o)
113 {
114 o.push_back(new bluestore_cnode_t());
115 o.push_back(new bluestore_cnode_t(0));
116 o.push_back(new bluestore_cnode_t(123));
117 }
118
119 ostream& operator<<(ostream& out, const bluestore_cnode_t& l)
120 {
121 return out << "cnode(bits " << l.bits << ")";
122 }
123
124 // bluestore_extent_ref_map_t
125
126 void bluestore_extent_ref_map_t::_check() const
127 {
128 uint64_t pos = 0;
129 unsigned refs = 0;
130 for (const auto &p : ref_map) {
131 if (p.first < pos)
132 assert(0 == "overlap");
133 if (p.first == pos && p.second.refs == refs)
134 assert(0 == "unmerged");
135 pos = p.first + p.second.length;
136 refs = p.second.refs;
137 }
138 }
139
140 void bluestore_extent_ref_map_t::_maybe_merge_left(
141 map<uint64_t,record_t>::iterator& p)
142 {
143 if (p == ref_map.begin())
144 return;
145 auto q = p;
146 --q;
147 if (q->second.refs == p->second.refs &&
148 q->first + q->second.length == p->first) {
149 q->second.length += p->second.length;
150 ref_map.erase(p);
151 p = q;
152 }
153 }
154
155 void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length)
156 {
157 auto p = ref_map.lower_bound(offset);
158 if (p != ref_map.begin()) {
159 --p;
160 if (p->first + p->second.length <= offset) {
161 ++p;
162 }
163 }
164 while (length > 0) {
165 if (p == ref_map.end()) {
166 // nothing after offset; add the whole thing.
167 p = ref_map.insert(
168 map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first;
169 break;
170 }
171 if (p->first > offset) {
172 // gap
173 uint64_t newlen = MIN(p->first - offset, length);
174 p = ref_map.insert(
175 map<uint64_t,record_t>::value_type(offset,
176 record_t(newlen, 1))).first;
177 offset += newlen;
178 length -= newlen;
179 _maybe_merge_left(p);
180 ++p;
181 continue;
182 }
183 if (p->first < offset) {
184 // split off the portion before offset
185 assert(p->first + p->second.length > offset);
186 uint64_t left = p->first + p->second.length - offset;
187 p->second.length = offset - p->first;
188 p = ref_map.insert(map<uint64_t,record_t>::value_type(
189 offset, record_t(left, p->second.refs))).first;
190 // continue below
191 }
192 assert(p->first == offset);
193 if (length < p->second.length) {
194 ref_map.insert(make_pair(offset + length,
195 record_t(p->second.length - length,
196 p->second.refs)));
197 p->second.length = length;
198 ++p->second.refs;
199 break;
200 }
201 ++p->second.refs;
202 offset += p->second.length;
203 length -= p->second.length;
204 _maybe_merge_left(p);
205 ++p;
206 }
207 if (p != ref_map.end())
208 _maybe_merge_left(p);
209 //_check();
210 }
211
212 void bluestore_extent_ref_map_t::put(
213 uint64_t offset, uint32_t length,
214 PExtentVector *release,
215 bool *maybe_unshared)
216 {
217 //NB: existing entries in 'release' container must be preserved!
218 bool unshared = true;
219 auto p = ref_map.lower_bound(offset);
220 if (p == ref_map.end() || p->first > offset) {
221 if (p == ref_map.begin()) {
222 assert(0 == "put on missing extent (nothing before)");
223 }
224 --p;
225 if (p->first + p->second.length <= offset) {
226 assert(0 == "put on missing extent (gap)");
227 }
228 }
229 if (p->first < offset) {
230 uint64_t left = p->first + p->second.length - offset;
231 p->second.length = offset - p->first;
232 if (p->second.refs != 1) {
233 unshared = false;
234 }
235 p = ref_map.insert(map<uint64_t,record_t>::value_type(
236 offset, record_t(left, p->second.refs))).first;
237 }
238 while (length > 0) {
239 assert(p->first == offset);
240 if (length < p->second.length) {
241 if (p->second.refs != 1) {
242 unshared = false;
243 }
244 ref_map.insert(make_pair(offset + length,
245 record_t(p->second.length - length,
246 p->second.refs)));
247 if (p->second.refs > 1) {
248 p->second.length = length;
249 --p->second.refs;
250 if (p->second.refs != 1) {
251 unshared = false;
252 }
253 _maybe_merge_left(p);
254 } else {
255 if (release)
256 release->push_back(bluestore_pextent_t(p->first, length));
257 ref_map.erase(p);
258 }
259 goto out;
260 }
261 offset += p->second.length;
262 length -= p->second.length;
263 if (p->second.refs > 1) {
264 --p->second.refs;
265 if (p->second.refs != 1) {
266 unshared = false;
267 }
268 _maybe_merge_left(p);
269 ++p;
270 } else {
271 if (release)
272 release->push_back(bluestore_pextent_t(p->first, p->second.length));
273 ref_map.erase(p++);
274 }
275 }
276 if (p != ref_map.end())
277 _maybe_merge_left(p);
278 //_check();
279 out:
280 if (maybe_unshared) {
281 if (unshared) {
282 // we haven't seen a ref != 1 yet; check the whole map.
283 for (auto& p : ref_map) {
284 if (p.second.refs != 1) {
285 unshared = false;
286 break;
287 }
288 }
289 }
290 *maybe_unshared = unshared;
291 }
292 }
293
294 bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const
295 {
296 auto p = ref_map.lower_bound(offset);
297 if (p == ref_map.end() || p->first > offset) {
298 if (p == ref_map.begin()) {
299 return false; // nothing before
300 }
301 --p;
302 if (p->first + p->second.length <= offset) {
303 return false; // gap
304 }
305 }
306 while (length > 0) {
307 if (p == ref_map.end())
308 return false;
309 if (p->first > offset)
310 return false;
311 if (p->first + p->second.length >= offset + length)
312 return true;
313 uint64_t overlap = p->first + p->second.length - offset;
314 offset += overlap;
315 length -= overlap;
316 ++p;
317 }
318 return true;
319 }
320
321 bool bluestore_extent_ref_map_t::intersects(
322 uint64_t offset,
323 uint32_t length) const
324 {
325 auto p = ref_map.lower_bound(offset);
326 if (p != ref_map.begin()) {
327 --p;
328 if (p->first + p->second.length <= offset) {
329 ++p;
330 }
331 }
332 if (p == ref_map.end())
333 return false;
334 if (p->first >= offset + length)
335 return false;
336 return true; // intersects p!
337 }
338
339 void bluestore_extent_ref_map_t::dump(Formatter *f) const
340 {
341 f->open_array_section("ref_map");
342 for (auto& p : ref_map) {
343 f->open_object_section("ref");
344 f->dump_unsigned("offset", p.first);
345 f->dump_unsigned("length", p.second.length);
346 f->dump_unsigned("refs", p.second.refs);
347 f->close_section();
348 }
349 f->close_section();
350 }
351
352 void bluestore_extent_ref_map_t::generate_test_instances(
353 list<bluestore_extent_ref_map_t*>& o)
354 {
355 o.push_back(new bluestore_extent_ref_map_t);
356 o.push_back(new bluestore_extent_ref_map_t);
357 o.back()->get(10, 10);
358 o.back()->get(18, 22);
359 o.back()->get(20, 20);
360 o.back()->get(10, 25);
361 o.back()->get(15, 20);
362 }
363
364 ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m)
365 {
366 out << "ref_map(";
367 for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) {
368 if (p != m.ref_map.begin())
369 out << ",";
370 out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec
371 << "=" << p->second.refs;
372 }
373 out << ")";
374 return out;
375 }
376
377 // bluestore_blob_use_tracker_t
378
379 void bluestore_blob_use_tracker_t::allocate()
380 {
381 assert(num_au != 0);
382 bytes_per_au = new uint32_t[num_au];
383 for (uint32_t i = 0; i < num_au; ++i) {
384 bytes_per_au[i] = 0;
385 }
386 }
387
388 void bluestore_blob_use_tracker_t::init(
389 uint32_t full_length, uint32_t _au_size) {
390 assert(!au_size || is_empty());
391 assert(_au_size > 0);
392 assert(full_length > 0);
393 clear();
394 uint32_t _num_au = ROUND_UP_TO(full_length, _au_size) / _au_size;
395 au_size = _au_size;
396 if( _num_au > 1 ) {
397 num_au = _num_au;
398 allocate();
399 }
400 }
401
402 void bluestore_blob_use_tracker_t::get(
403 uint32_t offset, uint32_t length)
404 {
405 assert(au_size);
406 if (!num_au) {
407 total_bytes += length;
408 }else {
409 auto end = offset + length;
410
411 while (offset < end) {
412 auto phase = offset % au_size;
413 bytes_per_au[offset / au_size] +=
414 MIN(au_size - phase, end - offset);
415 offset += (phase ? au_size - phase : au_size);
416 }
417 }
418 }
419
420 bool bluestore_blob_use_tracker_t::put(
421 uint32_t offset, uint32_t length,
422 PExtentVector *release_units)
423 {
424 assert(au_size);
425 if (release_units) {
426 release_units->clear();
427 }
428 bool maybe_empty = true;
429 if (!num_au) {
430 assert(total_bytes >= length);
431 total_bytes -= length;
432 } else {
433 auto end = offset + length;
434 uint64_t next_offs = 0;
435 while (offset < end) {
436 auto phase = offset % au_size;
437 size_t pos = offset / au_size;
438 auto diff = MIN(au_size - phase, end - offset);
439 assert(diff <= bytes_per_au[pos]);
440 bytes_per_au[pos] -= diff;
441 offset += (phase ? au_size - phase : au_size);
442 if (bytes_per_au[pos] == 0) {
443 if (release_units) {
444 if (release_units->empty() || next_offs != pos * au_size) {
445 release_units->emplace_back(pos * au_size, au_size);
446 } else {
447 release_units->back().length += au_size;
448 }
449 next_offs += au_size;
450 }
451 } else {
452 maybe_empty = false; // micro optimization detecting we aren't empty
453 // even in the affected extent
454 }
455 }
456 }
457 bool empty = maybe_empty ? !is_not_empty() : false;
458 if (empty && release_units) {
459 release_units->clear();
460 }
461 return empty;
462 }
463
464 bool bluestore_blob_use_tracker_t::can_split() const
465 {
466 return num_au > 0;
467 }
468
469 bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const
470 {
471 assert(au_size);
472 return (blob_offset % au_size) == 0 &&
473 blob_offset < num_au * au_size;
474 }
475
476 void bluestore_blob_use_tracker_t::split(
477 uint32_t blob_offset,
478 bluestore_blob_use_tracker_t* r)
479 {
480 assert(au_size);
481 assert(can_split());
482 assert(can_split_at(blob_offset));
483 assert(r->is_empty());
484
485 uint32_t new_num_au = blob_offset / au_size;
486 r->init( (num_au - new_num_au) * au_size, au_size);
487
488 for (auto i = new_num_au; i < num_au; i++) {
489 r->get((i - new_num_au) * au_size, bytes_per_au[i]);
490 bytes_per_au[i] = 0;
491 }
492 if (new_num_au == 0) {
493 clear();
494 } else if (new_num_au == 1) {
495 uint32_t tmp = bytes_per_au[0];
496 uint32_t _au_size = au_size;
497 clear();
498 au_size = _au_size;
499 total_bytes = tmp;
500 } else {
501 num_au = new_num_au;
502 }
503 }
504
505 bool bluestore_blob_use_tracker_t::equal(
506 const bluestore_blob_use_tracker_t& other) const
507 {
508 if (!num_au && !other.num_au) {
509 return total_bytes == other.total_bytes && au_size == other.au_size;
510 } else if (num_au && other.num_au) {
511 if (num_au != other.num_au || au_size != other.au_size) {
512 return false;
513 }
514 for (size_t i = 0; i < num_au; i++) {
515 if (bytes_per_au[i] != other.bytes_per_au[i]) {
516 return false;
517 }
518 }
519 return true;
520 }
521
522 uint32_t n = num_au ? num_au : other.num_au;
523 uint32_t referenced =
524 num_au ? other.get_referenced_bytes() : get_referenced_bytes();
525 auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au;
526 uint32_t my_referenced = 0;
527 for (size_t i = 0; i < n; i++) {
528 my_referenced += bytes_per_au_tmp[i];
529 if (my_referenced > referenced) {
530 return false;
531 }
532 }
533 return my_referenced == referenced;
534 }
535
536 void bluestore_blob_use_tracker_t::dump(Formatter *f) const
537 {
538 f->dump_unsigned("num_au", num_au);
539 f->dump_unsigned("au_size", au_size);
540 if (!num_au) {
541 f->dump_unsigned("total_bytes", total_bytes);
542 } else {
543 f->open_array_section("bytes_per_au");
544 for (size_t i = 0; i < num_au; ++i) {
545 f->dump_unsigned("", bytes_per_au[i]);
546 }
547 f->close_section();
548 }
549 }
550
551 void bluestore_blob_use_tracker_t::generate_test_instances(
552 list<bluestore_blob_use_tracker_t*>& o)
553 {
554 o.push_back(new bluestore_blob_use_tracker_t());
555 o.back()->init(16, 16);
556 o.back()->get(10, 10);
557 o.back()->get(10, 5);
558 o.push_back(new bluestore_blob_use_tracker_t());
559 o.back()->init(60, 16);
560 o.back()->get(18, 22);
561 o.back()->get(20, 20);
562 o.back()->get(15, 20);
563 }
564
565 ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m)
566 {
567 out << "use_tracker(" << std::hex;
568 if (!m.num_au) {
569 out << "0x" << m.au_size
570 << " "
571 << "0x" << m.total_bytes;
572 } else {
573 out << "0x" << m.num_au
574 << "*0x" << m.au_size
575 << " 0x[";
576 for (size_t i = 0; i < m.num_au; ++i) {
577 if (i != 0)
578 out << ",";
579 out << m.bytes_per_au[i];
580 }
581 out << "]";
582 }
583 out << std::dec << ")";
584 return out;
585 }
586
587 // bluestore_pextent_t
588
589 void bluestore_pextent_t::dump(Formatter *f) const
590 {
591 f->dump_unsigned("offset", offset);
592 f->dump_unsigned("length", length);
593 }
594
595 ostream& operator<<(ostream& out, const bluestore_pextent_t& o) {
596 if (o.is_valid())
597 return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec;
598 else
599 return out << "!~" << std::hex << o.length << std::dec;
600 }
601
602 void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls)
603 {
604 ls.push_back(new bluestore_pextent_t);
605 ls.push_back(new bluestore_pextent_t(1, 2));
606 }
607
608 // bluestore_blob_t
609
610 string bluestore_blob_t::get_flags_string(unsigned flags)
611 {
612 string s;
613 if (flags & FLAG_COMPRESSED) {
614 if (s.length())
615 s += '+';
616 s += "compressed";
617 }
618 if (flags & FLAG_CSUM) {
619 if (s.length())
620 s += '+';
621 s += "csum";
622 }
623 if (flags & FLAG_HAS_UNUSED) {
624 if (s.length())
625 s += '+';
626 s += "has_unused";
627 }
628 if (flags & FLAG_SHARED) {
629 if (s.length())
630 s += '+';
631 s += "shared";
632 }
633
634 return s;
635 }
636
637 size_t bluestore_blob_t::get_csum_value_size() const
638 {
639 return Checksummer::get_csum_value_size(csum_type);
640 }
641
642 void bluestore_blob_t::dump(Formatter *f) const
643 {
644 f->open_array_section("extents");
645 for (auto& p : extents) {
646 f->dump_object("extent", p);
647 }
648 f->close_section();
649 f->dump_unsigned("logical_length", logical_length);
650 f->dump_unsigned("compressed_length", compressed_length);
651 f->dump_unsigned("flags", flags);
652 f->dump_unsigned("csum_type", csum_type);
653 f->dump_unsigned("csum_chunk_order", csum_chunk_order);
654 f->open_array_section("csum_data");
655 size_t n = get_csum_count();
656 for (unsigned i = 0; i < n; ++i)
657 f->dump_unsigned("csum", get_csum_item(i));
658 f->close_section();
659 f->dump_unsigned("unused", unused);
660 }
661
662 void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls)
663 {
664 ls.push_back(new bluestore_blob_t);
665 ls.push_back(new bluestore_blob_t(0));
666 ls.push_back(new bluestore_blob_t);
667 ls.back()->allocated_test(bluestore_pextent_t(111, 222));
668 ls.push_back(new bluestore_blob_t);
669 ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536);
670 ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd"));
671 ls.back()->add_unused(0, 3);
672 ls.back()->add_unused(8, 8);
673 ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000));
674 ls.back()->allocated_test(
675 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000));
676 ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000));
677 }
678
679 ostream& operator<<(ostream& out, const bluestore_blob_t& o)
680 {
681 out << "blob(" << o.get_extents();
682 if (o.is_compressed()) {
683 out << " clen 0x" << std::hex
684 << o.get_logical_length()
685 << " -> 0x"
686 << o.get_compressed_payload_length()
687 << std::dec;
688 }
689 if (o.flags) {
690 out << " " << o.get_flags_string();
691 }
692 if (o.has_csum()) {
693 out << " " << Checksummer::get_csum_type_string(o.csum_type)
694 << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec;
695 }
696 if (o.has_unused())
697 out << " unused=0x" << std::hex << o.unused << std::dec;
698 out << ")";
699 return out;
700 }
701
702 void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl)
703 {
704 switch (csum_type) {
705 case Checksummer::CSUM_XXHASH32:
706 Checksummer::calculate<Checksummer::xxhash32>(
707 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
708 break;
709 case Checksummer::CSUM_XXHASH64:
710 Checksummer::calculate<Checksummer::xxhash64>(
711 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
712 break;;
713 case Checksummer::CSUM_CRC32C:
714 Checksummer::calculate<Checksummer::crc32c>(
715 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
716 break;
717 case Checksummer::CSUM_CRC32C_16:
718 Checksummer::calculate<Checksummer::crc32c_16>(
719 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
720 break;
721 case Checksummer::CSUM_CRC32C_8:
722 Checksummer::calculate<Checksummer::crc32c_8>(
723 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
724 break;
725 }
726 }
727
728 int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl,
729 int* b_bad_off, uint64_t *bad_csum) const
730 {
731 int r = 0;
732
733 *b_bad_off = -1;
734 switch (csum_type) {
735 case Checksummer::CSUM_NONE:
736 break;
737 case Checksummer::CSUM_XXHASH32:
738 *b_bad_off = Checksummer::verify<Checksummer::xxhash32>(
739 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
740 break;
741 case Checksummer::CSUM_XXHASH64:
742 *b_bad_off = Checksummer::verify<Checksummer::xxhash64>(
743 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
744 break;
745 case Checksummer::CSUM_CRC32C:
746 *b_bad_off = Checksummer::verify<Checksummer::crc32c>(
747 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
748 break;
749 case Checksummer::CSUM_CRC32C_16:
750 *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>(
751 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
752 break;
753 case Checksummer::CSUM_CRC32C_8:
754 *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>(
755 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
756 break;
757 default:
758 r = -EOPNOTSUPP;
759 break;
760 }
761
762 if (r < 0)
763 return r;
764 else if (*b_bad_off >= 0)
765 return -1; // bad checksum
766 else
767 return 0;
768 }
769
770 void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs)
771 {
772 if (extents.size() == 0) {
773 // if blob is compressed then logical length to be already configured
774 // otherwise - to be unset.
775 assert((is_compressed() && logical_length != 0) ||
776 (!is_compressed() && logical_length == 0));
777
778 extents.reserve(allocs.size() + (b_off ? 1 : 0));
779 if (b_off) {
780 extents.emplace_back(
781 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off));
782 }
783 uint32_t new_len = b_off;
784 for (auto& a : allocs) {
785 extents.emplace_back(a.offset, a.length);
786 new_len += a.length;
787 }
788 if (!is_compressed()) {
789 logical_length = new_len;
790 }
791 } else {
792 assert(!is_compressed()); // partial allocations are forbidden when
793 // compressed
794 assert(b_off < logical_length);
795 uint32_t cur_offs = 0;
796 auto start_it = extents.begin();
797 size_t pos = 0;
798 while(true) {
799 assert(start_it != extents.end());
800 if (cur_offs + start_it->length > b_off) {
801 break;
802 }
803 cur_offs += start_it->length;
804 ++start_it;
805 ++pos;
806 }
807 uint32_t head = b_off - cur_offs;
808 uint32_t end_off = b_off + length;
809 auto end_it = start_it;
810
811 while (true) {
812 assert(end_it != extents.end());
813 assert(!end_it->is_valid());
814 if (cur_offs + end_it->length >= end_off) {
815 break;
816 }
817 cur_offs += end_it->length;
818 ++end_it;
819 }
820 assert(cur_offs + end_it->length >= end_off);
821 uint32_t tail = cur_offs + end_it->length - end_off;
822
823 start_it = extents.erase(start_it, end_it + 1);
824 size_t count = allocs.size();
825 count += head ? 1 : 0;
826 count += tail ? 1 : 0;
827 extents.insert(start_it,
828 count,
829 bluestore_pextent_t(
830 bluestore_pextent_t::INVALID_OFFSET, 0));
831
832 // Workaround to resolve lack of proper iterator return in vector::insert
833 // Looks like some gcc/stl implementations still lack it despite c++11
834 // support claim
835 start_it = extents.begin() + pos;
836
837 if (head) {
838 start_it->length = head;
839 ++start_it;
840 }
841 for(auto& e : allocs) {
842 *start_it = e;
843 ++start_it;
844 }
845 if (tail) {
846 start_it->length = tail;
847 }
848 }
849 }
850
851 // cut it out of extents
852 struct vecbuilder {
853 PExtentVector v;
854 uint64_t invalid = 0;
855
856 void add_invalid(uint64_t length) {
857 invalid += length;
858 }
859 void flush() {
860 if (invalid) {
861 v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET,
862 invalid));
863 invalid = 0;
864 }
865 }
866 void add(uint64_t offset, uint64_t length) {
867 if (offset == bluestore_pextent_t::INVALID_OFFSET) {
868 add_invalid(length);
869 }
870 else {
871 flush();
872 v.emplace_back(bluestore_pextent_t(offset, length));
873 }
874 }
875 };
876
877 void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc)
878 {
879 extents.emplace_back(alloc);
880 if (!is_compressed()) {
881 logical_length += alloc.length;
882 }
883 }
884
885 bool bluestore_blob_t::release_extents(bool all,
886 const PExtentVector& logical,
887 PExtentVector* r)
888 {
889 // common case: all of it?
890 if (all) {
891 uint64_t pos = 0;
892 for (auto& e : extents) {
893 if (e.is_valid()) {
894 r->push_back(e);
895 }
896 pos += e.length;
897 }
898 assert(is_compressed() || get_logical_length() == pos);
899 extents.resize(1);
900 extents[0].offset = bluestore_pextent_t::INVALID_OFFSET;
901 extents[0].length = pos;
902 return true;
903 }
904 // remove from pextents according to logical release list
905 vecbuilder vb;
906 auto loffs_it = logical.begin();
907 auto lend = logical.end();
908 uint32_t pext_loffs_start = 0; //starting loffset of the current pextent
909 uint32_t pext_loffs = 0; //current loffset
910 auto pext_it = extents.begin();
911 auto pext_end = extents.end();
912 while (pext_it != pext_end) {
913 if (loffs_it == lend ||
914 pext_loffs_start + pext_it->length <= loffs_it->offset) {
915 int delta0 = pext_loffs - pext_loffs_start;
916 assert(delta0 >= 0);
917 if ((uint32_t)delta0 < pext_it->length) {
918 vb.add(pext_it->offset + delta0, pext_it->length - delta0);
919 }
920 pext_loffs_start += pext_it->length;
921 pext_loffs = pext_loffs_start;
922 ++pext_it;
923 }
924 else {
925 //assert(pext_loffs == pext_loffs_start);
926 int delta0 = pext_loffs - pext_loffs_start;
927 assert(delta0 >= 0);
928
929 int delta = loffs_it->offset - pext_loffs;
930 assert(delta >= 0);
931 if (delta > 0) {
932 vb.add(pext_it->offset + delta0, delta);
933 pext_loffs += delta;
934 }
935
936 PExtentVector::iterator last_r = r->end();
937 if (r->begin() != last_r) {
938 --last_r;
939 }
940 uint32_t to_release = loffs_it->length;
941 do {
942 uint32_t to_release_part =
943 MIN(pext_it->length - delta0 - delta, to_release);
944 auto o = pext_it->offset + delta0 + delta;
945 if (last_r != r->end() && last_r->offset + last_r->length == o) {
946 last_r->length += to_release_part;
947 }
948 else {
949 last_r = r->emplace(r->end(), o, to_release_part);
950 }
951 to_release -= to_release_part;
952 pext_loffs += to_release_part;
953 if (pext_loffs == pext_loffs_start + pext_it->length) {
954 pext_loffs_start += pext_it->length;
955 pext_loffs = pext_loffs_start;
956 pext_it++;
957 delta0 = delta = 0;
958 }
959 } while (to_release > 0 && pext_it != pext_end);
960 vb.add_invalid(loffs_it->length - to_release);
961 ++loffs_it;
962 }
963 }
964 vb.flush();
965 extents.swap(vb.v);
966 return false;
967 }
968
969 void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb)
970 {
971 size_t left = blob_offset;
972 uint32_t llen_lb = 0;
973 uint32_t llen_rb = 0;
974 unsigned i = 0;
975 for (auto p = extents.begin(); p != extents.end(); ++p, ++i) {
976 if (p->length <= left) {
977 left -= p->length;
978 llen_lb += p->length;
979 continue;
980 }
981 if (left) {
982 if (p->is_valid()) {
983 rb.extents.emplace_back(bluestore_pextent_t(p->offset + left,
984 p->length - left));
985 }
986 else {
987 rb.extents.emplace_back(bluestore_pextent_t(
988 bluestore_pextent_t::INVALID_OFFSET,
989 p->length - left));
990 }
991 llen_rb += p->length - left;
992 llen_lb += left;
993 p->length = left;
994 ++i;
995 ++p;
996 }
997 while (p != extents.end()) {
998 llen_rb += p->length;
999 rb.extents.push_back(*p++);
1000 }
1001 extents.resize(i);
1002 logical_length = llen_lb;
1003 rb.logical_length = llen_rb;
1004 break;
1005 }
1006 rb.flags = flags;
1007
1008 if (has_csum()) {
1009 rb.csum_type = csum_type;
1010 rb.csum_chunk_order = csum_chunk_order;
1011 size_t csum_order = get_csum_chunk_size();
1012 assert(blob_offset % csum_order == 0);
1013 size_t pos = (blob_offset / csum_order) * get_csum_value_size();
1014 // deep copy csum data
1015 bufferptr old;
1016 old.swap(csum_data);
1017 rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos);
1018 csum_data = bufferptr(old.c_str(), pos);
1019 }
1020 }
1021
1022 // bluestore_shared_blob_t
1023
1024 void bluestore_shared_blob_t::dump(Formatter *f) const
1025 {
1026 f->dump_int("sbid", sbid);
1027 f->dump_object("ref_map", ref_map);
1028 }
1029
1030 void bluestore_shared_blob_t::generate_test_instances(
1031 list<bluestore_shared_blob_t*>& ls)
1032 {
1033 ls.push_back(new bluestore_shared_blob_t(1));
1034 }
1035
1036 ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb)
1037 {
1038 out << "(sbid 0x" << std::hex << sb.sbid << std::dec;
1039 out << " " << sb.ref_map << ")";
1040 return out;
1041 }
1042
1043 // bluestore_onode_t
1044
1045 void bluestore_onode_t::shard_info::dump(Formatter *f) const
1046 {
1047 f->dump_unsigned("offset", offset);
1048 f->dump_unsigned("bytes", bytes);
1049 }
1050
1051 ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si)
1052 {
1053 return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes"
1054 << std::dec << ")";
1055 }
1056
1057 void bluestore_onode_t::dump(Formatter *f) const
1058 {
1059 f->dump_unsigned("nid", nid);
1060 f->dump_unsigned("size", size);
1061 f->open_object_section("attrs");
1062 for (auto p = attrs.begin(); p != attrs.end(); ++p) {
1063 f->open_object_section("attr");
1064 f->dump_string("name", p->first.c_str()); // it's not quite std::string
1065 f->dump_unsigned("len", p->second.length());
1066 f->close_section();
1067 }
1068 f->close_section();
1069 f->dump_string("flags", get_flags_string());
1070 f->open_array_section("extent_map_shards");
1071 for (auto si : extent_map_shards) {
1072 f->dump_object("shard", si);
1073 }
1074 f->close_section();
1075 f->dump_unsigned("expected_object_size", expected_object_size);
1076 f->dump_unsigned("expected_write_size", expected_write_size);
1077 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
1078 }
1079
1080 void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o)
1081 {
1082 o.push_back(new bluestore_onode_t());
1083 // FIXME
1084 }
1085
1086 // bluestore_deferred_op_t
1087
1088 void bluestore_deferred_op_t::dump(Formatter *f) const
1089 {
1090 f->dump_unsigned("op", (int)op);
1091 f->dump_unsigned("data_len", data.length());
1092 f->open_array_section("extents");
1093 for (auto& e : extents) {
1094 f->dump_object("extent", e);
1095 }
1096 f->close_section();
1097 }
1098
1099 void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o)
1100 {
1101 o.push_back(new bluestore_deferred_op_t);
1102 o.push_back(new bluestore_deferred_op_t);
1103 o.back()->op = OP_WRITE;
1104 o.back()->extents.push_back(bluestore_pextent_t(1, 2));
1105 o.back()->extents.push_back(bluestore_pextent_t(100, 5));
1106 o.back()->data.append("my data");
1107 }
1108
1109 void bluestore_deferred_transaction_t::dump(Formatter *f) const
1110 {
1111 f->dump_unsigned("seq", seq);
1112 f->open_array_section("ops");
1113 for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) {
1114 f->dump_object("op", *p);
1115 }
1116 f->close_section();
1117
1118 f->open_array_section("released extents");
1119 for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) {
1120 f->open_object_section("extent");
1121 f->dump_unsigned("offset", p.get_start());
1122 f->dump_unsigned("length", p.get_len());
1123 f->close_section();
1124 }
1125 f->close_section();
1126 }
1127
1128 void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o)
1129 {
1130 o.push_back(new bluestore_deferred_transaction_t());
1131 o.push_back(new bluestore_deferred_transaction_t());
1132 o.back()->seq = 123;
1133 o.back()->ops.push_back(bluestore_deferred_op_t());
1134 o.back()->ops.push_back(bluestore_deferred_op_t());
1135 o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE;
1136 o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7));
1137 o.back()->ops.back().data.append("foodata");
1138 }
1139
1140 void bluestore_compression_header_t::dump(Formatter *f) const
1141 {
1142 f->dump_unsigned("type", type);
1143 f->dump_unsigned("length", length);
1144 }
1145
1146 void bluestore_compression_header_t::generate_test_instances(
1147 list<bluestore_compression_header_t*>& o)
1148 {
1149 o.push_back(new bluestore_compression_header_t);
1150 o.push_back(new bluestore_compression_header_t(1));
1151 o.back()->length = 1234;
1152 }