]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/bluestore_types.cc
import ceph 15.2.16
[ceph.git] / ceph / src / os / bluestore / bluestore_types.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "bluestore_types.h"
16 #include "common/Formatter.h"
17 #include "common/Checksummer.h"
18 #include "include/stringify.h"
19
20 // bluestore_bdev_label_t
21
22 void bluestore_bdev_label_t::encode(bufferlist& bl) const
23 {
24 // be slightly friendly to someone who looks at the device
25 bl.append("bluestore block device\n");
26 bl.append(stringify(osd_uuid));
27 bl.append("\n");
28 ENCODE_START(2, 1, bl);
29 encode(osd_uuid, bl);
30 encode(size, bl);
31 encode(btime, bl);
32 encode(description, bl);
33 encode(meta, bl);
34 ENCODE_FINISH(bl);
35 }
36
37 void bluestore_bdev_label_t::decode(bufferlist::const_iterator& p)
38 {
39 p += 60u; // see above
40 DECODE_START(2, p);
41 decode(osd_uuid, p);
42 decode(size, p);
43 decode(btime, p);
44 decode(description, p);
45 if (struct_v >= 2) {
46 decode(meta, p);
47 }
48 DECODE_FINISH(p);
49 }
50
51 void bluestore_bdev_label_t::dump(Formatter *f) const
52 {
53 f->dump_stream("osd_uuid") << osd_uuid;
54 f->dump_unsigned("size", size);
55 f->dump_stream("btime") << btime;
56 f->dump_string("description", description);
57 for (auto& i : meta) {
58 f->dump_string(i.first.c_str(), i.second);
59 }
60 }
61
62 void bluestore_bdev_label_t::generate_test_instances(
63 list<bluestore_bdev_label_t*>& o)
64 {
65 o.push_back(new bluestore_bdev_label_t);
66 o.push_back(new bluestore_bdev_label_t);
67 o.back()->size = 123;
68 o.back()->btime = utime_t(4, 5);
69 o.back()->description = "fakey";
70 o.back()->meta["foo"] = "bar";
71 }
72
73 ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l)
74 {
75 return out << "bdev(osd_uuid " << l.osd_uuid
76 << ", size 0x" << std::hex << l.size << std::dec
77 << ", btime " << l.btime
78 << ", desc " << l.description
79 << ", " << l.meta.size() << " meta"
80 << ")";
81 }
82
83 // cnode_t
84
85 void bluestore_cnode_t::dump(Formatter *f) const
86 {
87 f->dump_unsigned("bits", bits);
88 }
89
90 void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o)
91 {
92 o.push_back(new bluestore_cnode_t());
93 o.push_back(new bluestore_cnode_t(0));
94 o.push_back(new bluestore_cnode_t(123));
95 }
96
97 ostream& operator<<(ostream& out, const bluestore_cnode_t& l)
98 {
99 return out << "cnode(bits " << l.bits << ")";
100 }
101
102 // bluestore_extent_ref_map_t
103
104 void bluestore_extent_ref_map_t::_check() const
105 {
106 uint64_t pos = 0;
107 unsigned refs = 0;
108 for (const auto &p : ref_map) {
109 if (p.first < pos)
110 ceph_abort_msg("overlap");
111 if (p.first == pos && p.second.refs == refs)
112 ceph_abort_msg("unmerged");
113 pos = p.first + p.second.length;
114 refs = p.second.refs;
115 }
116 }
117
118 void bluestore_extent_ref_map_t::_maybe_merge_left(
119 map<uint64_t,record_t>::iterator& p)
120 {
121 if (p == ref_map.begin())
122 return;
123 auto q = p;
124 --q;
125 if (q->second.refs == p->second.refs &&
126 q->first + q->second.length == p->first) {
127 q->second.length += p->second.length;
128 ref_map.erase(p);
129 p = q;
130 }
131 }
132
133 void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length)
134 {
135 auto p = ref_map.lower_bound(offset);
136 if (p != ref_map.begin()) {
137 --p;
138 if (p->first + p->second.length <= offset) {
139 ++p;
140 }
141 }
142 while (length > 0) {
143 if (p == ref_map.end()) {
144 // nothing after offset; add the whole thing.
145 p = ref_map.insert(
146 map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first;
147 break;
148 }
149 if (p->first > offset) {
150 // gap
151 uint64_t newlen = std::min<uint64_t>(p->first - offset, length);
152 p = ref_map.insert(
153 map<uint64_t,record_t>::value_type(offset,
154 record_t(newlen, 1))).first;
155 offset += newlen;
156 length -= newlen;
157 _maybe_merge_left(p);
158 ++p;
159 continue;
160 }
161 if (p->first < offset) {
162 // split off the portion before offset
163 ceph_assert(p->first + p->second.length > offset);
164 uint64_t left = p->first + p->second.length - offset;
165 p->second.length = offset - p->first;
166 p = ref_map.insert(map<uint64_t,record_t>::value_type(
167 offset, record_t(left, p->second.refs))).first;
168 // continue below
169 }
170 ceph_assert(p->first == offset);
171 if (length < p->second.length) {
172 ref_map.insert(make_pair(offset + length,
173 record_t(p->second.length - length,
174 p->second.refs)));
175 p->second.length = length;
176 ++p->second.refs;
177 break;
178 }
179 ++p->second.refs;
180 offset += p->second.length;
181 length -= p->second.length;
182 _maybe_merge_left(p);
183 ++p;
184 }
185 if (p != ref_map.end())
186 _maybe_merge_left(p);
187 //_check();
188 }
189
190 void bluestore_extent_ref_map_t::put(
191 uint64_t offset, uint32_t length,
192 PExtentVector *release,
193 bool *maybe_unshared)
194 {
195 //NB: existing entries in 'release' container must be preserved!
196 bool unshared = true;
197 auto p = ref_map.lower_bound(offset);
198 if (p == ref_map.end() || p->first > offset) {
199 if (p == ref_map.begin()) {
200 ceph_abort_msg("put on missing extent (nothing before)");
201 }
202 --p;
203 if (p->first + p->second.length <= offset) {
204 ceph_abort_msg("put on missing extent (gap)");
205 }
206 }
207 if (p->first < offset) {
208 uint64_t left = p->first + p->second.length - offset;
209 p->second.length = offset - p->first;
210 if (p->second.refs != 1) {
211 unshared = false;
212 }
213 p = ref_map.insert(map<uint64_t,record_t>::value_type(
214 offset, record_t(left, p->second.refs))).first;
215 }
216 while (length > 0) {
217 ceph_assert(p->first == offset);
218 if (length < p->second.length) {
219 if (p->second.refs != 1) {
220 unshared = false;
221 }
222 ref_map.insert(make_pair(offset + length,
223 record_t(p->second.length - length,
224 p->second.refs)));
225 if (p->second.refs > 1) {
226 p->second.length = length;
227 --p->second.refs;
228 if (p->second.refs != 1) {
229 unshared = false;
230 }
231 _maybe_merge_left(p);
232 } else {
233 if (release)
234 release->push_back(bluestore_pextent_t(p->first, length));
235 ref_map.erase(p);
236 }
237 goto out;
238 }
239 offset += p->second.length;
240 length -= p->second.length;
241 if (p->second.refs > 1) {
242 --p->second.refs;
243 if (p->second.refs != 1) {
244 unshared = false;
245 }
246 _maybe_merge_left(p);
247 ++p;
248 } else {
249 if (release)
250 release->push_back(bluestore_pextent_t(p->first, p->second.length));
251 ref_map.erase(p++);
252 }
253 }
254 if (p != ref_map.end())
255 _maybe_merge_left(p);
256 //_check();
257 out:
258 if (maybe_unshared) {
259 if (unshared) {
260 // we haven't seen a ref != 1 yet; check the whole map.
261 for (auto& p : ref_map) {
262 if (p.second.refs != 1) {
263 unshared = false;
264 break;
265 }
266 }
267 }
268 *maybe_unshared = unshared;
269 }
270 }
271
272 bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const
273 {
274 auto p = ref_map.lower_bound(offset);
275 if (p == ref_map.end() || p->first > offset) {
276 if (p == ref_map.begin()) {
277 return false; // nothing before
278 }
279 --p;
280 if (p->first + p->second.length <= offset) {
281 return false; // gap
282 }
283 }
284 while (length > 0) {
285 if (p == ref_map.end())
286 return false;
287 if (p->first > offset)
288 return false;
289 if (p->first + p->second.length >= offset + length)
290 return true;
291 uint64_t overlap = p->first + p->second.length - offset;
292 offset += overlap;
293 length -= overlap;
294 ++p;
295 }
296 return true;
297 }
298
299 bool bluestore_extent_ref_map_t::intersects(
300 uint64_t offset,
301 uint32_t length) const
302 {
303 auto p = ref_map.lower_bound(offset);
304 if (p != ref_map.begin()) {
305 --p;
306 if (p->first + p->second.length <= offset) {
307 ++p;
308 }
309 }
310 if (p == ref_map.end())
311 return false;
312 if (p->first >= offset + length)
313 return false;
314 return true; // intersects p!
315 }
316
317 void bluestore_extent_ref_map_t::dump(Formatter *f) const
318 {
319 f->open_array_section("ref_map");
320 for (auto& p : ref_map) {
321 f->open_object_section("ref");
322 f->dump_unsigned("offset", p.first);
323 f->dump_unsigned("length", p.second.length);
324 f->dump_unsigned("refs", p.second.refs);
325 f->close_section();
326 }
327 f->close_section();
328 }
329
330 void bluestore_extent_ref_map_t::generate_test_instances(
331 list<bluestore_extent_ref_map_t*>& o)
332 {
333 o.push_back(new bluestore_extent_ref_map_t);
334 o.push_back(new bluestore_extent_ref_map_t);
335 o.back()->get(10, 10);
336 o.back()->get(18, 22);
337 o.back()->get(20, 20);
338 o.back()->get(10, 25);
339 o.back()->get(15, 20);
340 }
341
342 ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m)
343 {
344 out << "ref_map(";
345 for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) {
346 if (p != m.ref_map.begin())
347 out << ",";
348 out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec
349 << "=" << p->second.refs;
350 }
351 out << ")";
352 return out;
353 }
354
355 // bluestore_blob_use_tracker_t
356 bluestore_blob_use_tracker_t::bluestore_blob_use_tracker_t(
357 const bluestore_blob_use_tracker_t& tracker)
358 : au_size{tracker.au_size},
359 num_au{tracker.num_au},
360 bytes_per_au{nullptr}
361 {
362 if (num_au > 0) {
363 allocate();
364 std::copy(tracker.bytes_per_au, tracker.bytes_per_au + num_au, bytes_per_au);
365 } else {
366 total_bytes = tracker.total_bytes;
367 }
368 }
369
370 bluestore_blob_use_tracker_t&
371 bluestore_blob_use_tracker_t::operator=(const bluestore_blob_use_tracker_t& rhs)
372 {
373 if (this == &rhs) {
374 return *this;
375 }
376 clear();
377 au_size = rhs.au_size;
378 num_au = rhs.num_au;
379 if (rhs.num_au > 0) {
380 allocate();
381 std::copy(rhs.bytes_per_au, rhs.bytes_per_au + num_au, bytes_per_au);
382 } else {
383 total_bytes = rhs.total_bytes;
384 }
385 return *this;
386 }
387
388 void bluestore_blob_use_tracker_t::allocate()
389 {
390 ceph_assert(num_au != 0);
391 bytes_per_au = new uint32_t[num_au];
392 mempool::get_pool(
393 mempool::pool_index_t(mempool::mempool_bluestore_cache_other)).
394 adjust_count(1, sizeof(uint32_t) * num_au);
395
396 for (uint32_t i = 0; i < num_au; ++i) {
397 bytes_per_au[i] = 0;
398 }
399 }
400
401 void bluestore_blob_use_tracker_t::init(
402 uint32_t full_length, uint32_t _au_size) {
403 ceph_assert(!au_size || is_empty());
404 ceph_assert(_au_size > 0);
405 ceph_assert(full_length > 0);
406 clear();
407 uint32_t _num_au = round_up_to(full_length, _au_size) / _au_size;
408 au_size = _au_size;
409 if ( _num_au > 1 ) {
410 num_au = _num_au;
411 allocate();
412 }
413 }
414
415 void bluestore_blob_use_tracker_t::get(
416 uint32_t offset, uint32_t length)
417 {
418 ceph_assert(au_size);
419 if (!num_au) {
420 total_bytes += length;
421 } else {
422 auto end = offset + length;
423
424 while (offset < end) {
425 auto phase = offset % au_size;
426 bytes_per_au[offset / au_size] +=
427 std::min(au_size - phase, end - offset);
428 offset += (phase ? au_size - phase : au_size);
429 }
430 }
431 }
432
433 bool bluestore_blob_use_tracker_t::put(
434 uint32_t offset, uint32_t length,
435 PExtentVector *release_units)
436 {
437 ceph_assert(au_size);
438 if (release_units) {
439 release_units->clear();
440 }
441 bool maybe_empty = true;
442 if (!num_au) {
443 ceph_assert(total_bytes >= length);
444 total_bytes -= length;
445 } else {
446 auto end = offset + length;
447 uint64_t next_offs = 0;
448 while (offset < end) {
449 auto phase = offset % au_size;
450 size_t pos = offset / au_size;
451 auto diff = std::min(au_size - phase, end - offset);
452 ceph_assert(diff <= bytes_per_au[pos]);
453 bytes_per_au[pos] -= diff;
454 offset += (phase ? au_size - phase : au_size);
455 if (bytes_per_au[pos] == 0) {
456 if (release_units) {
457 if (release_units->empty() || next_offs != pos * au_size) {
458 release_units->emplace_back(pos * au_size, au_size);
459 next_offs = pos * au_size;
460 } else {
461 release_units->back().length += au_size;
462 }
463 next_offs += au_size;
464 }
465 } else {
466 maybe_empty = false; // micro optimization detecting we aren't empty
467 // even in the affected extent
468 }
469 }
470 }
471 bool empty = maybe_empty ? !is_not_empty() : false;
472 if (empty && release_units) {
473 release_units->clear();
474 }
475 return empty;
476 }
477
478 bool bluestore_blob_use_tracker_t::can_split() const
479 {
480 return num_au > 0;
481 }
482
483 bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const
484 {
485 ceph_assert(au_size);
486 return (blob_offset % au_size) == 0 &&
487 blob_offset < num_au * au_size;
488 }
489
490 void bluestore_blob_use_tracker_t::split(
491 uint32_t blob_offset,
492 bluestore_blob_use_tracker_t* r)
493 {
494 ceph_assert(au_size);
495 ceph_assert(can_split());
496 ceph_assert(can_split_at(blob_offset));
497 ceph_assert(r->is_empty());
498
499 uint32_t new_num_au = blob_offset / au_size;
500 r->init( (num_au - new_num_au) * au_size, au_size);
501
502 for (auto i = new_num_au; i < num_au; i++) {
503 r->get((i - new_num_au) * au_size, bytes_per_au[i]);
504 bytes_per_au[i] = 0;
505 }
506 if (new_num_au == 0) {
507 clear();
508 } else if (new_num_au == 1) {
509 uint32_t tmp = bytes_per_au[0];
510 uint32_t _au_size = au_size;
511 clear();
512 au_size = _au_size;
513 total_bytes = tmp;
514 } else {
515 num_au = new_num_au;
516 }
517 }
518
519 bool bluestore_blob_use_tracker_t::equal(
520 const bluestore_blob_use_tracker_t& other) const
521 {
522 if (!num_au && !other.num_au) {
523 return total_bytes == other.total_bytes && au_size == other.au_size;
524 } else if (num_au && other.num_au) {
525 if (num_au != other.num_au || au_size != other.au_size) {
526 return false;
527 }
528 for (size_t i = 0; i < num_au; i++) {
529 if (bytes_per_au[i] != other.bytes_per_au[i]) {
530 return false;
531 }
532 }
533 return true;
534 }
535
536 uint32_t n = num_au ? num_au : other.num_au;
537 uint32_t referenced =
538 num_au ? other.get_referenced_bytes() : get_referenced_bytes();
539 auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au;
540 uint32_t my_referenced = 0;
541 for (size_t i = 0; i < n; i++) {
542 my_referenced += bytes_per_au_tmp[i];
543 if (my_referenced > referenced) {
544 return false;
545 }
546 }
547 return my_referenced == referenced;
548 }
549
550 void bluestore_blob_use_tracker_t::dump(Formatter *f) const
551 {
552 f->dump_unsigned("num_au", num_au);
553 f->dump_unsigned("au_size", au_size);
554 if (!num_au) {
555 f->dump_unsigned("total_bytes", total_bytes);
556 } else {
557 f->open_array_section("bytes_per_au");
558 for (size_t i = 0; i < num_au; ++i) {
559 f->dump_unsigned("", bytes_per_au[i]);
560 }
561 f->close_section();
562 }
563 }
564
565 void bluestore_blob_use_tracker_t::generate_test_instances(
566 list<bluestore_blob_use_tracker_t*>& o)
567 {
568 o.push_back(new bluestore_blob_use_tracker_t());
569 o.back()->init(16, 16);
570 o.back()->get(10, 10);
571 o.back()->get(10, 5);
572 o.push_back(new bluestore_blob_use_tracker_t());
573 o.back()->init(60, 16);
574 o.back()->get(18, 22);
575 o.back()->get(20, 20);
576 o.back()->get(15, 20);
577 }
578
579 ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m)
580 {
581 out << "use_tracker(" << std::hex;
582 if (!m.num_au) {
583 out << "0x" << m.au_size
584 << " "
585 << "0x" << m.total_bytes;
586 } else {
587 out << "0x" << m.num_au
588 << "*0x" << m.au_size
589 << " 0x[";
590 for (size_t i = 0; i < m.num_au; ++i) {
591 if (i != 0)
592 out << ",";
593 out << m.bytes_per_au[i];
594 }
595 out << "]";
596 }
597 out << std::dec << ")";
598 return out;
599 }
600
601 // bluestore_pextent_t
602
603 void bluestore_pextent_t::dump(Formatter *f) const
604 {
605 f->dump_unsigned("offset", offset);
606 f->dump_unsigned("length", length);
607 }
608
609 ostream& operator<<(ostream& out, const bluestore_pextent_t& o) {
610 if (o.is_valid())
611 return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec;
612 else
613 return out << "!~" << std::hex << o.length << std::dec;
614 }
615
616 void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls)
617 {
618 ls.push_back(new bluestore_pextent_t);
619 ls.push_back(new bluestore_pextent_t(1, 2));
620 }
621
622 // bluestore_blob_t
623
624 string bluestore_blob_t::get_flags_string(unsigned flags)
625 {
626 string s;
627 if (flags & FLAG_COMPRESSED) {
628 if (s.length())
629 s += '+';
630 s += "compressed";
631 }
632 if (flags & FLAG_CSUM) {
633 if (s.length())
634 s += '+';
635 s += "csum";
636 }
637 if (flags & FLAG_HAS_UNUSED) {
638 if (s.length())
639 s += '+';
640 s += "has_unused";
641 }
642 if (flags & FLAG_SHARED) {
643 if (s.length())
644 s += '+';
645 s += "shared";
646 }
647
648 return s;
649 }
650
651 size_t bluestore_blob_t::get_csum_value_size() const
652 {
653 return Checksummer::get_csum_value_size(csum_type);
654 }
655
656 void bluestore_blob_t::dump(Formatter *f) const
657 {
658 f->open_array_section("extents");
659 for (auto& p : extents) {
660 f->dump_object("extent", p);
661 }
662 f->close_section();
663 f->dump_unsigned("logical_length", logical_length);
664 f->dump_unsigned("compressed_length", compressed_length);
665 f->dump_unsigned("flags", flags);
666 f->dump_unsigned("csum_type", csum_type);
667 f->dump_unsigned("csum_chunk_order", csum_chunk_order);
668 f->open_array_section("csum_data");
669 size_t n = get_csum_count();
670 for (unsigned i = 0; i < n; ++i)
671 f->dump_unsigned("csum", get_csum_item(i));
672 f->close_section();
673 f->dump_unsigned("unused", unused);
674 }
675
676 void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls)
677 {
678 ls.push_back(new bluestore_blob_t);
679 ls.push_back(new bluestore_blob_t(0));
680 ls.push_back(new bluestore_blob_t);
681 ls.back()->allocated_test(bluestore_pextent_t(111, 222));
682 ls.push_back(new bluestore_blob_t);
683 ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536);
684 ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd"));
685 ls.back()->add_unused(0, 3);
686 ls.back()->add_unused(8, 8);
687 ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000));
688 ls.back()->allocated_test(
689 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000));
690 ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000));
691 }
692
693 ostream& operator<<(ostream& out, const bluestore_blob_t& o)
694 {
695 out << "blob(" << o.get_extents();
696 if (o.is_compressed()) {
697 out << " clen 0x" << std::hex
698 << o.get_logical_length()
699 << " -> 0x"
700 << o.get_compressed_payload_length()
701 << std::dec;
702 }
703 if (o.flags) {
704 out << " " << o.get_flags_string();
705 }
706 if (o.has_csum()) {
707 out << " " << Checksummer::get_csum_type_string(o.csum_type)
708 << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec;
709 }
710 if (o.has_unused())
711 out << " unused=0x" << std::hex << o.unused << std::dec;
712 out << ")";
713 return out;
714 }
715
716 void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl)
717 {
718 switch (csum_type) {
719 case Checksummer::CSUM_XXHASH32:
720 Checksummer::calculate<Checksummer::xxhash32>(
721 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
722 break;
723 case Checksummer::CSUM_XXHASH64:
724 Checksummer::calculate<Checksummer::xxhash64>(
725 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
726 break;;
727 case Checksummer::CSUM_CRC32C:
728 Checksummer::calculate<Checksummer::crc32c>(
729 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
730 break;
731 case Checksummer::CSUM_CRC32C_16:
732 Checksummer::calculate<Checksummer::crc32c_16>(
733 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
734 break;
735 case Checksummer::CSUM_CRC32C_8:
736 Checksummer::calculate<Checksummer::crc32c_8>(
737 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
738 break;
739 }
740 }
741
742 int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl,
743 int* b_bad_off, uint64_t *bad_csum) const
744 {
745 int r = 0;
746
747 *b_bad_off = -1;
748 switch (csum_type) {
749 case Checksummer::CSUM_NONE:
750 break;
751 case Checksummer::CSUM_XXHASH32:
752 *b_bad_off = Checksummer::verify<Checksummer::xxhash32>(
753 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
754 break;
755 case Checksummer::CSUM_XXHASH64:
756 *b_bad_off = Checksummer::verify<Checksummer::xxhash64>(
757 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
758 break;
759 case Checksummer::CSUM_CRC32C:
760 *b_bad_off = Checksummer::verify<Checksummer::crc32c>(
761 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
762 break;
763 case Checksummer::CSUM_CRC32C_16:
764 *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>(
765 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
766 break;
767 case Checksummer::CSUM_CRC32C_8:
768 *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>(
769 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
770 break;
771 default:
772 r = -EOPNOTSUPP;
773 break;
774 }
775
776 if (r < 0)
777 return r;
778 else if (*b_bad_off >= 0)
779 return -1; // bad checksum
780 else
781 return 0;
782 }
783
784 void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const PExtentVector& allocs)
785 {
786 if (extents.size() == 0) {
787 // if blob is compressed then logical length to be already configured
788 // otherwise - to be unset.
789 ceph_assert((is_compressed() && logical_length != 0) ||
790 (!is_compressed() && logical_length == 0));
791
792 extents.reserve(allocs.size() + (b_off ? 1 : 0));
793 if (b_off) {
794 extents.emplace_back(
795 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off));
796
797 }
798 uint32_t new_len = b_off;
799 for (auto& a : allocs) {
800 extents.emplace_back(a.offset, a.length);
801 new_len += a.length;
802 }
803 if (!is_compressed()) {
804 logical_length = new_len;
805 }
806 } else {
807 ceph_assert(!is_compressed()); // partial allocations are forbidden when
808 // compressed
809 ceph_assert(b_off < logical_length);
810 uint32_t cur_offs = 0;
811 auto start_it = extents.begin();
812 size_t pos = 0;
813 while (true) {
814 ceph_assert(start_it != extents.end());
815 if (cur_offs + start_it->length > b_off) {
816 break;
817 }
818 cur_offs += start_it->length;
819 ++start_it;
820 ++pos;
821 }
822 uint32_t head = b_off - cur_offs;
823 uint32_t end_off = b_off + length;
824 auto end_it = start_it;
825
826 while (true) {
827 ceph_assert(end_it != extents.end());
828 ceph_assert(!end_it->is_valid());
829 if (cur_offs + end_it->length >= end_off) {
830 break;
831 }
832 cur_offs += end_it->length;
833 ++end_it;
834 }
835 ceph_assert(cur_offs + end_it->length >= end_off);
836 uint32_t tail = cur_offs + end_it->length - end_off;
837
838 start_it = extents.erase(start_it, end_it + 1);
839 size_t count = allocs.size();
840 count += head ? 1 : 0;
841 count += tail ? 1 : 0;
842 extents.insert(start_it,
843 count,
844 bluestore_pextent_t(
845 bluestore_pextent_t::INVALID_OFFSET, 0));
846
847 // Workaround to resolve lack of proper iterator return in vector::insert
848 // Looks like some gcc/stl implementations still lack it despite c++11
849 // support claim
850 start_it = extents.begin() + pos;
851
852 if (head) {
853 start_it->length = head;
854 ++start_it;
855 }
856 for(auto& e : allocs) {
857 *start_it = e;
858 ++start_it;
859 }
860 if (tail) {
861 start_it->length = tail;
862 }
863 }
864 }
865
866 // cut it out of extents
867 struct vecbuilder {
868 PExtentVector v;
869 uint64_t invalid = 0;
870
871 void add_invalid(uint64_t length) {
872 invalid += length;
873 }
874 void flush() {
875 if (invalid) {
876 v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET,
877 invalid));
878
879 invalid = 0;
880 }
881 }
882 void add(uint64_t offset, uint64_t length) {
883 if (offset == bluestore_pextent_t::INVALID_OFFSET) {
884 add_invalid(length);
885 }
886 else {
887 flush();
888 v.emplace_back(offset, length);
889 }
890 }
891 };
892
893 void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc)
894 {
895 extents.emplace_back(alloc);
896 if (!is_compressed()) {
897 logical_length += alloc.length;
898 }
899 }
900
901 bool bluestore_blob_t::release_extents(bool all,
902 const PExtentVector& logical,
903 PExtentVector* r)
904 {
905 // common case: all of it?
906 if (all) {
907 uint64_t pos = 0;
908 for (auto& e : extents) {
909 if (e.is_valid()) {
910 r->push_back(e);
911 }
912 pos += e.length;
913 }
914 ceph_assert(is_compressed() || get_logical_length() == pos);
915 extents.resize(1);
916 extents[0].offset = bluestore_pextent_t::INVALID_OFFSET;
917 extents[0].length = pos;
918 return true;
919 }
920 // remove from pextents according to logical release list
921 vecbuilder vb;
922 auto loffs_it = logical.begin();
923 auto lend = logical.end();
924 uint32_t pext_loffs_start = 0; //starting loffset of the current pextent
925 uint32_t pext_loffs = 0; //current loffset
926 auto pext_it = extents.begin();
927 auto pext_end = extents.end();
928 while (pext_it != pext_end) {
929 if (loffs_it == lend ||
930 pext_loffs_start + pext_it->length <= loffs_it->offset) {
931 int delta0 = pext_loffs - pext_loffs_start;
932 ceph_assert(delta0 >= 0);
933 if ((uint32_t)delta0 < pext_it->length) {
934 vb.add(pext_it->offset + delta0, pext_it->length - delta0);
935 }
936 pext_loffs_start += pext_it->length;
937 pext_loffs = pext_loffs_start;
938 ++pext_it;
939 }
940 else {
941 //assert(pext_loffs == pext_loffs_start);
942 int delta0 = pext_loffs - pext_loffs_start;
943 ceph_assert(delta0 >= 0);
944
945 int delta = loffs_it->offset - pext_loffs;
946 ceph_assert(delta >= 0);
947 if (delta > 0) {
948 vb.add(pext_it->offset + delta0, delta);
949 pext_loffs += delta;
950 }
951
952 PExtentVector::iterator last_r = r->end();
953 if (r->begin() != last_r) {
954 --last_r;
955 }
956 uint32_t to_release = loffs_it->length;
957 do {
958 uint32_t to_release_part =
959 std::min(pext_it->length - delta0 - delta, to_release);
960 auto o = pext_it->offset + delta0 + delta;
961 if (last_r != r->end() && last_r->offset + last_r->length == o) {
962 last_r->length += to_release_part;
963 }
964 else {
965 last_r = r->emplace(r->end(), o, to_release_part);
966 }
967 to_release -= to_release_part;
968 pext_loffs += to_release_part;
969 if (pext_loffs == pext_loffs_start + pext_it->length) {
970 pext_loffs_start += pext_it->length;
971 pext_loffs = pext_loffs_start;
972 pext_it++;
973 delta0 = delta = 0;
974 }
975 } while (to_release > 0 && pext_it != pext_end);
976 vb.add_invalid(loffs_it->length - to_release);
977 ++loffs_it;
978 }
979 }
980 vb.flush();
981 extents.swap(vb.v);
982 return false;
983 }
984
985 void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb)
986 {
987 size_t left = blob_offset;
988 uint32_t llen_lb = 0;
989 uint32_t llen_rb = 0;
990 unsigned i = 0;
991 for (auto p = extents.begin(); p != extents.end(); ++p, ++i) {
992 if (p->length <= left) {
993 left -= p->length;
994 llen_lb += p->length;
995 continue;
996 }
997 if (left) {
998 if (p->is_valid()) {
999 rb.extents.emplace_back(bluestore_pextent_t(p->offset + left,
1000 p->length - left));
1001 }
1002 else {
1003 rb.extents.emplace_back(bluestore_pextent_t(
1004 bluestore_pextent_t::INVALID_OFFSET,
1005 p->length - left));
1006 }
1007 llen_rb += p->length - left;
1008 llen_lb += left;
1009 p->length = left;
1010 ++i;
1011 ++p;
1012 }
1013 while (p != extents.end()) {
1014 llen_rb += p->length;
1015 rb.extents.push_back(*p++);
1016 }
1017 extents.resize(i);
1018 logical_length = llen_lb;
1019 rb.logical_length = llen_rb;
1020 break;
1021 }
1022 rb.flags = flags;
1023
1024 if (has_csum()) {
1025 rb.csum_type = csum_type;
1026 rb.csum_chunk_order = csum_chunk_order;
1027 size_t csum_order = get_csum_chunk_size();
1028 ceph_assert(blob_offset % csum_order == 0);
1029 size_t pos = (blob_offset / csum_order) * get_csum_value_size();
1030 // deep copy csum data
1031 bufferptr old;
1032 old.swap(csum_data);
1033 rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos);
1034 csum_data = bufferptr(old.c_str(), pos);
1035 }
1036 }
1037
1038 // bluestore_shared_blob_t
1039 MEMPOOL_DEFINE_OBJECT_FACTORY(bluestore_shared_blob_t, bluestore_shared_blob_t,
1040 bluestore_cache_other);
1041
1042 void bluestore_shared_blob_t::dump(Formatter *f) const
1043 {
1044 f->dump_int("sbid", sbid);
1045 f->dump_object("ref_map", ref_map);
1046 }
1047
1048 void bluestore_shared_blob_t::generate_test_instances(
1049 list<bluestore_shared_blob_t*>& ls)
1050 {
1051 ls.push_back(new bluestore_shared_blob_t(1));
1052 }
1053
1054 ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb)
1055 {
1056 out << "(sbid 0x" << std::hex << sb.sbid << std::dec;
1057 out << " " << sb.ref_map << ")";
1058 return out;
1059 }
1060
1061 // bluestore_onode_t
1062
1063 void bluestore_onode_t::shard_info::dump(Formatter *f) const
1064 {
1065 f->dump_unsigned("offset", offset);
1066 f->dump_unsigned("bytes", bytes);
1067 }
1068
1069 ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si)
1070 {
1071 return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes"
1072 << std::dec << ")";
1073 }
1074
1075 void bluestore_onode_t::dump(Formatter *f) const
1076 {
1077 f->dump_unsigned("nid", nid);
1078 f->dump_unsigned("size", size);
1079 f->open_object_section("attrs");
1080 for (auto p = attrs.begin(); p != attrs.end(); ++p) {
1081 f->open_object_section("attr");
1082 f->dump_string("name", p->first.c_str()); // it's not quite std::string
1083 f->dump_unsigned("len", p->second.length());
1084 f->close_section();
1085 }
1086 f->close_section();
1087 f->dump_string("flags", get_flags_string());
1088 f->open_array_section("extent_map_shards");
1089 for (auto si : extent_map_shards) {
1090 f->dump_object("shard", si);
1091 }
1092 f->close_section();
1093 f->dump_unsigned("expected_object_size", expected_object_size);
1094 f->dump_unsigned("expected_write_size", expected_write_size);
1095 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
1096 }
1097
1098 void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o)
1099 {
1100 o.push_back(new bluestore_onode_t());
1101 // FIXME
1102 }
1103
1104 // bluestore_deferred_op_t
1105
1106 void bluestore_deferred_op_t::dump(Formatter *f) const
1107 {
1108 f->dump_unsigned("op", (int)op);
1109 f->dump_unsigned("data_len", data.length());
1110 f->open_array_section("extents");
1111 for (auto& e : extents) {
1112 f->dump_object("extent", e);
1113 }
1114 f->close_section();
1115 }
1116
1117 void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o)
1118 {
1119 o.push_back(new bluestore_deferred_op_t);
1120 o.push_back(new bluestore_deferred_op_t);
1121 o.back()->op = OP_WRITE;
1122 o.back()->extents.push_back(bluestore_pextent_t(1, 2));
1123 o.back()->extents.push_back(bluestore_pextent_t(100, 5));
1124 o.back()->data.append("my data");
1125 }
1126
1127 void bluestore_deferred_transaction_t::dump(Formatter *f) const
1128 {
1129 f->dump_unsigned("seq", seq);
1130 f->open_array_section("ops");
1131 for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) {
1132 f->dump_object("op", *p);
1133 }
1134 f->close_section();
1135
1136 f->open_array_section("released extents");
1137 for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) {
1138 f->open_object_section("extent");
1139 f->dump_unsigned("offset", p.get_start());
1140 f->dump_unsigned("length", p.get_len());
1141 f->close_section();
1142 }
1143 f->close_section();
1144 }
1145
1146 void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o)
1147 {
1148 o.push_back(new bluestore_deferred_transaction_t());
1149 o.push_back(new bluestore_deferred_transaction_t());
1150 o.back()->seq = 123;
1151 o.back()->ops.push_back(bluestore_deferred_op_t());
1152 o.back()->ops.push_back(bluestore_deferred_op_t());
1153 o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE;
1154 o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7));
1155 o.back()->ops.back().data.append("foodata");
1156 }
1157
1158 void bluestore_compression_header_t::dump(Formatter *f) const
1159 {
1160 f->dump_unsigned("type", type);
1161 f->dump_unsigned("length", length);
1162 }
1163
1164 void bluestore_compression_header_t::generate_test_instances(
1165 list<bluestore_compression_header_t*>& o)
1166 {
1167 o.push_back(new bluestore_compression_header_t);
1168 o.push_back(new bluestore_compression_header_t(1));
1169 o.back()->length = 1234;
1170 }
1171
1172 // adds more salt to build a hash func input
1173 shared_blob_2hash_tracker_t::hash_input_t
1174 shared_blob_2hash_tracker_t::build_hash_input(
1175 uint64_t sbid,
1176 uint64_t offset) const
1177 {
1178 hash_input_t res = {
1179 sbid,
1180 offset >> au_void_bits,
1181 ((sbid & 0xffffffff) << 32) + ~(uint32_t((offset >> au_void_bits) & 0xffffffff))
1182 };
1183 return res;
1184 }
1185
1186 void shared_blob_2hash_tracker_t::inc(
1187 uint64_t sbid,
1188 uint64_t offset,
1189 int n)
1190 {
1191 auto hash_input = build_hash_input(sbid, offset);
1192 ref_counter_2hash_tracker_t::inc(
1193 (char*)hash_input.data(),
1194 get_hash_input_size(),
1195 n);
1196 }
1197
1198 void shared_blob_2hash_tracker_t::inc_range(
1199 uint64_t sbid,
1200 uint64_t offset,
1201 uint32_t len,
1202 int n)
1203 {
1204 uint32_t alloc_unit = 1 << au_void_bits;
1205 int64_t l = len;
1206 while (l > 0) {
1207 // don't care about ofset alignment as inc() trims it anyway
1208 inc(sbid, offset, n);
1209 offset += alloc_unit;
1210 l -= alloc_unit;
1211 }
1212 }
1213
1214 bool shared_blob_2hash_tracker_t::test_hash_conflict(
1215 uint64_t sbid1,
1216 uint64_t offset1,
1217 uint64_t sbid2,
1218 uint64_t offset2) const
1219 {
1220 auto hash_input1 = build_hash_input(sbid1, offset1);
1221 auto hash_input2 = build_hash_input(sbid2, offset2);
1222 return ref_counter_2hash_tracker_t::test_hash_conflict(
1223 (char*)hash_input1.data(),
1224 (char*)hash_input2.data(),
1225 get_hash_input_size());
1226 }
1227
1228 bool shared_blob_2hash_tracker_t::test_all_zero(
1229 uint64_t sbid,
1230 uint64_t offset) const
1231 {
1232 auto hash_input = build_hash_input(sbid, offset);
1233 return
1234 ref_counter_2hash_tracker_t::test_all_zero(
1235 (char*)hash_input.data(),
1236 get_hash_input_size());
1237 }
1238
1239 bool shared_blob_2hash_tracker_t::test_all_zero_range(
1240 uint64_t sbid,
1241 uint64_t offset,
1242 uint32_t len) const
1243 {
1244 uint32_t alloc_unit = 1 << au_void_bits;
1245 int64_t l = len;
1246 while (l > 0) {
1247 // don't care about ofset alignment as inc() trims it anyway
1248 if (!test_all_zero(sbid, offset)) {
1249 return false;
1250 }
1251 offset += alloc_unit;
1252 l -= alloc_unit;
1253 }
1254 return true;
1255 }