]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/bluestore_types.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / os / bluestore / bluestore_types.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "bluestore_types.h"
16#include "common/Formatter.h"
17#include "common/Checksummer.h"
18#include "include/stringify.h"
19
7c673cae
FG
20// bluestore_bdev_label_t
21
22void bluestore_bdev_label_t::encode(bufferlist& bl) const
23{
24 // be slightly friendly to someone who looks at the device
25 bl.append("bluestore block device\n");
26 bl.append(stringify(osd_uuid));
27 bl.append("\n");
3efd9988 28 ENCODE_START(2, 1, bl);
11fdf7f2
TL
29 encode(osd_uuid, bl);
30 encode(size, bl);
31 encode(btime, bl);
32 encode(description, bl);
33 encode(meta, bl);
7c673cae
FG
34 ENCODE_FINISH(bl);
35}
36
11fdf7f2 37void bluestore_bdev_label_t::decode(bufferlist::const_iterator& p)
7c673cae 38{
9f95a23c 39 p += 60u; // see above
3efd9988 40 DECODE_START(2, p);
11fdf7f2
TL
41 decode(osd_uuid, p);
42 decode(size, p);
43 decode(btime, p);
44 decode(description, p);
3efd9988 45 if (struct_v >= 2) {
11fdf7f2 46 decode(meta, p);
3efd9988 47 }
7c673cae
FG
48 DECODE_FINISH(p);
49}
50
51void bluestore_bdev_label_t::dump(Formatter *f) const
52{
53 f->dump_stream("osd_uuid") << osd_uuid;
54 f->dump_unsigned("size", size);
55 f->dump_stream("btime") << btime;
56 f->dump_string("description", description);
3efd9988
FG
57 for (auto& i : meta) {
58 f->dump_string(i.first.c_str(), i.second);
59 }
7c673cae
FG
60}
61
62void bluestore_bdev_label_t::generate_test_instances(
63 list<bluestore_bdev_label_t*>& o)
64{
65 o.push_back(new bluestore_bdev_label_t);
66 o.push_back(new bluestore_bdev_label_t);
67 o.back()->size = 123;
68 o.back()->btime = utime_t(4, 5);
69 o.back()->description = "fakey";
3efd9988 70 o.back()->meta["foo"] = "bar";
7c673cae
FG
71}
72
73ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l)
74{
75 return out << "bdev(osd_uuid " << l.osd_uuid
3efd9988
FG
76 << ", size 0x" << std::hex << l.size << std::dec
77 << ", btime " << l.btime
78 << ", desc " << l.description
79 << ", " << l.meta.size() << " meta"
80 << ")";
7c673cae
FG
81}
82
83// cnode_t
84
85void bluestore_cnode_t::dump(Formatter *f) const
86{
87 f->dump_unsigned("bits", bits);
88}
89
90void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o)
91{
92 o.push_back(new bluestore_cnode_t());
93 o.push_back(new bluestore_cnode_t(0));
94 o.push_back(new bluestore_cnode_t(123));
95}
96
28e407b8
AA
97ostream& operator<<(ostream& out, const bluestore_cnode_t& l)
98{
99 return out << "cnode(bits " << l.bits << ")";
100}
101
7c673cae
FG
102// bluestore_extent_ref_map_t
103
104void bluestore_extent_ref_map_t::_check() const
105{
106 uint64_t pos = 0;
107 unsigned refs = 0;
108 for (const auto &p : ref_map) {
109 if (p.first < pos)
11fdf7f2 110 ceph_abort_msg("overlap");
7c673cae 111 if (p.first == pos && p.second.refs == refs)
11fdf7f2 112 ceph_abort_msg("unmerged");
7c673cae
FG
113 pos = p.first + p.second.length;
114 refs = p.second.refs;
115 }
116}
117
118void bluestore_extent_ref_map_t::_maybe_merge_left(
119 map<uint64_t,record_t>::iterator& p)
120{
121 if (p == ref_map.begin())
122 return;
123 auto q = p;
124 --q;
125 if (q->second.refs == p->second.refs &&
126 q->first + q->second.length == p->first) {
127 q->second.length += p->second.length;
128 ref_map.erase(p);
129 p = q;
130 }
131}
132
133void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length)
134{
135 auto p = ref_map.lower_bound(offset);
136 if (p != ref_map.begin()) {
137 --p;
138 if (p->first + p->second.length <= offset) {
139 ++p;
140 }
141 }
142 while (length > 0) {
143 if (p == ref_map.end()) {
144 // nothing after offset; add the whole thing.
145 p = ref_map.insert(
146 map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first;
147 break;
148 }
149 if (p->first > offset) {
150 // gap
11fdf7f2 151 uint64_t newlen = std::min<uint64_t>(p->first - offset, length);
7c673cae
FG
152 p = ref_map.insert(
153 map<uint64_t,record_t>::value_type(offset,
154 record_t(newlen, 1))).first;
155 offset += newlen;
156 length -= newlen;
157 _maybe_merge_left(p);
158 ++p;
159 continue;
160 }
161 if (p->first < offset) {
162 // split off the portion before offset
11fdf7f2 163 ceph_assert(p->first + p->second.length > offset);
7c673cae
FG
164 uint64_t left = p->first + p->second.length - offset;
165 p->second.length = offset - p->first;
166 p = ref_map.insert(map<uint64_t,record_t>::value_type(
167 offset, record_t(left, p->second.refs))).first;
168 // continue below
169 }
11fdf7f2 170 ceph_assert(p->first == offset);
7c673cae
FG
171 if (length < p->second.length) {
172 ref_map.insert(make_pair(offset + length,
173 record_t(p->second.length - length,
174 p->second.refs)));
175 p->second.length = length;
176 ++p->second.refs;
177 break;
178 }
179 ++p->second.refs;
180 offset += p->second.length;
181 length -= p->second.length;
182 _maybe_merge_left(p);
183 ++p;
184 }
185 if (p != ref_map.end())
186 _maybe_merge_left(p);
187 //_check();
188}
189
190void bluestore_extent_ref_map_t::put(
191 uint64_t offset, uint32_t length,
31f18b77
FG
192 PExtentVector *release,
193 bool *maybe_unshared)
7c673cae
FG
194{
195 //NB: existing entries in 'release' container must be preserved!
31f18b77 196 bool unshared = true;
7c673cae
FG
197 auto p = ref_map.lower_bound(offset);
198 if (p == ref_map.end() || p->first > offset) {
199 if (p == ref_map.begin()) {
11fdf7f2 200 ceph_abort_msg("put on missing extent (nothing before)");
7c673cae
FG
201 }
202 --p;
203 if (p->first + p->second.length <= offset) {
11fdf7f2 204 ceph_abort_msg("put on missing extent (gap)");
7c673cae
FG
205 }
206 }
207 if (p->first < offset) {
208 uint64_t left = p->first + p->second.length - offset;
209 p->second.length = offset - p->first;
31f18b77
FG
210 if (p->second.refs != 1) {
211 unshared = false;
212 }
7c673cae
FG
213 p = ref_map.insert(map<uint64_t,record_t>::value_type(
214 offset, record_t(left, p->second.refs))).first;
215 }
216 while (length > 0) {
11fdf7f2 217 ceph_assert(p->first == offset);
7c673cae 218 if (length < p->second.length) {
31f18b77
FG
219 if (p->second.refs != 1) {
220 unshared = false;
221 }
7c673cae
FG
222 ref_map.insert(make_pair(offset + length,
223 record_t(p->second.length - length,
224 p->second.refs)));
225 if (p->second.refs > 1) {
226 p->second.length = length;
227 --p->second.refs;
31f18b77
FG
228 if (p->second.refs != 1) {
229 unshared = false;
230 }
7c673cae
FG
231 _maybe_merge_left(p);
232 } else {
233 if (release)
234 release->push_back(bluestore_pextent_t(p->first, length));
235 ref_map.erase(p);
236 }
31f18b77 237 goto out;
7c673cae
FG
238 }
239 offset += p->second.length;
240 length -= p->second.length;
241 if (p->second.refs > 1) {
242 --p->second.refs;
31f18b77
FG
243 if (p->second.refs != 1) {
244 unshared = false;
245 }
7c673cae
FG
246 _maybe_merge_left(p);
247 ++p;
248 } else {
249 if (release)
250 release->push_back(bluestore_pextent_t(p->first, p->second.length));
251 ref_map.erase(p++);
252 }
253 }
254 if (p != ref_map.end())
255 _maybe_merge_left(p);
256 //_check();
31f18b77
FG
257out:
258 if (maybe_unshared) {
259 if (unshared) {
260 // we haven't seen a ref != 1 yet; check the whole map.
261 for (auto& p : ref_map) {
262 if (p.second.refs != 1) {
263 unshared = false;
264 break;
265 }
266 }
267 }
268 *maybe_unshared = unshared;
269 }
7c673cae
FG
270}
271
272bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const
273{
274 auto p = ref_map.lower_bound(offset);
275 if (p == ref_map.end() || p->first > offset) {
276 if (p == ref_map.begin()) {
277 return false; // nothing before
278 }
279 --p;
280 if (p->first + p->second.length <= offset) {
281 return false; // gap
282 }
283 }
284 while (length > 0) {
285 if (p == ref_map.end())
286 return false;
287 if (p->first > offset)
288 return false;
289 if (p->first + p->second.length >= offset + length)
290 return true;
291 uint64_t overlap = p->first + p->second.length - offset;
292 offset += overlap;
293 length -= overlap;
294 ++p;
295 }
296 return true;
297}
298
299bool bluestore_extent_ref_map_t::intersects(
300 uint64_t offset,
301 uint32_t length) const
302{
303 auto p = ref_map.lower_bound(offset);
304 if (p != ref_map.begin()) {
305 --p;
306 if (p->first + p->second.length <= offset) {
307 ++p;
308 }
309 }
310 if (p == ref_map.end())
311 return false;
312 if (p->first >= offset + length)
313 return false;
314 return true; // intersects p!
315}
316
317void bluestore_extent_ref_map_t::dump(Formatter *f) const
318{
319 f->open_array_section("ref_map");
320 for (auto& p : ref_map) {
321 f->open_object_section("ref");
322 f->dump_unsigned("offset", p.first);
323 f->dump_unsigned("length", p.second.length);
324 f->dump_unsigned("refs", p.second.refs);
325 f->close_section();
326 }
327 f->close_section();
328}
329
330void bluestore_extent_ref_map_t::generate_test_instances(
331 list<bluestore_extent_ref_map_t*>& o)
332{
333 o.push_back(new bluestore_extent_ref_map_t);
334 o.push_back(new bluestore_extent_ref_map_t);
335 o.back()->get(10, 10);
336 o.back()->get(18, 22);
337 o.back()->get(20, 20);
338 o.back()->get(10, 25);
339 o.back()->get(15, 20);
340}
341
342ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m)
343{
344 out << "ref_map(";
345 for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) {
346 if (p != m.ref_map.begin())
347 out << ",";
348 out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec
349 << "=" << p->second.refs;
350 }
351 out << ")";
352 return out;
353}
354
355// bluestore_blob_use_tracker_t
9f95a23c
TL
356bluestore_blob_use_tracker_t::bluestore_blob_use_tracker_t(
357 const bluestore_blob_use_tracker_t& tracker)
358 : au_size{tracker.au_size},
359 num_au{tracker.num_au},
360 bytes_per_au{nullptr}
361{
362 if (num_au > 0) {
363 allocate();
364 std::copy(tracker.bytes_per_au, tracker.bytes_per_au + num_au, bytes_per_au);
365 } else {
366 total_bytes = tracker.total_bytes;
367 }
368}
369
370bluestore_blob_use_tracker_t&
371bluestore_blob_use_tracker_t::operator=(const bluestore_blob_use_tracker_t& rhs)
372{
373 if (this == &rhs) {
374 return *this;
375 }
376 clear();
377 au_size = rhs.au_size;
378 num_au = rhs.num_au;
379 if (rhs.num_au > 0) {
380 allocate();
381 std::copy(rhs.bytes_per_au, rhs.bytes_per_au + num_au, bytes_per_au);
382 } else {
383 total_bytes = rhs.total_bytes;
384 }
385 return *this;
386}
7c673cae
FG
387
388void bluestore_blob_use_tracker_t::allocate()
389{
11fdf7f2 390 ceph_assert(num_au != 0);
7c673cae
FG
391 bytes_per_au = new uint32_t[num_au];
392 for (uint32_t i = 0; i < num_au; ++i) {
393 bytes_per_au[i] = 0;
394 }
395}
396
397void bluestore_blob_use_tracker_t::init(
398 uint32_t full_length, uint32_t _au_size) {
11fdf7f2
TL
399 ceph_assert(!au_size || is_empty());
400 ceph_assert(_au_size > 0);
401 ceph_assert(full_length > 0);
7c673cae 402 clear();
11fdf7f2 403 uint32_t _num_au = round_up_to(full_length, _au_size) / _au_size;
7c673cae 404 au_size = _au_size;
11fdf7f2 405 if ( _num_au > 1 ) {
7c673cae
FG
406 num_au = _num_au;
407 allocate();
408 }
409}
410
411void bluestore_blob_use_tracker_t::get(
412 uint32_t offset, uint32_t length)
413{
11fdf7f2 414 ceph_assert(au_size);
7c673cae
FG
415 if (!num_au) {
416 total_bytes += length;
11fdf7f2 417 } else {
7c673cae
FG
418 auto end = offset + length;
419
420 while (offset < end) {
421 auto phase = offset % au_size;
422 bytes_per_au[offset / au_size] +=
11fdf7f2 423 std::min(au_size - phase, end - offset);
7c673cae
FG
424 offset += (phase ? au_size - phase : au_size);
425 }
426 }
427}
428
429bool bluestore_blob_use_tracker_t::put(
430 uint32_t offset, uint32_t length,
431 PExtentVector *release_units)
432{
11fdf7f2 433 ceph_assert(au_size);
7c673cae
FG
434 if (release_units) {
435 release_units->clear();
436 }
437 bool maybe_empty = true;
438 if (!num_au) {
11fdf7f2 439 ceph_assert(total_bytes >= length);
7c673cae
FG
440 total_bytes -= length;
441 } else {
442 auto end = offset + length;
443 uint64_t next_offs = 0;
444 while (offset < end) {
445 auto phase = offset % au_size;
446 size_t pos = offset / au_size;
11fdf7f2
TL
447 auto diff = std::min(au_size - phase, end - offset);
448 ceph_assert(diff <= bytes_per_au[pos]);
7c673cae
FG
449 bytes_per_au[pos] -= diff;
450 offset += (phase ? au_size - phase : au_size);
451 if (bytes_per_au[pos] == 0) {
452 if (release_units) {
453 if (release_units->empty() || next_offs != pos * au_size) {
454 release_units->emplace_back(pos * au_size, au_size);
9f95a23c 455 next_offs = pos * au_size;
7c673cae
FG
456 } else {
457 release_units->back().length += au_size;
458 }
459 next_offs += au_size;
460 }
461 } else {
462 maybe_empty = false; // micro optimization detecting we aren't empty
463 // even in the affected extent
464 }
465 }
466 }
467 bool empty = maybe_empty ? !is_not_empty() : false;
468 if (empty && release_units) {
469 release_units->clear();
470 }
471 return empty;
472}
473
474bool bluestore_blob_use_tracker_t::can_split() const
475{
476 return num_au > 0;
477}
478
479bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const
480{
11fdf7f2 481 ceph_assert(au_size);
7c673cae
FG
482 return (blob_offset % au_size) == 0 &&
483 blob_offset < num_au * au_size;
484}
485
486void bluestore_blob_use_tracker_t::split(
487 uint32_t blob_offset,
488 bluestore_blob_use_tracker_t* r)
489{
11fdf7f2
TL
490 ceph_assert(au_size);
491 ceph_assert(can_split());
492 ceph_assert(can_split_at(blob_offset));
493 ceph_assert(r->is_empty());
7c673cae
FG
494
495 uint32_t new_num_au = blob_offset / au_size;
496 r->init( (num_au - new_num_au) * au_size, au_size);
497
498 for (auto i = new_num_au; i < num_au; i++) {
499 r->get((i - new_num_au) * au_size, bytes_per_au[i]);
500 bytes_per_au[i] = 0;
501 }
502 if (new_num_au == 0) {
503 clear();
504 } else if (new_num_au == 1) {
505 uint32_t tmp = bytes_per_au[0];
506 uint32_t _au_size = au_size;
507 clear();
508 au_size = _au_size;
509 total_bytes = tmp;
510 } else {
511 num_au = new_num_au;
512 }
513}
514
515bool bluestore_blob_use_tracker_t::equal(
516 const bluestore_blob_use_tracker_t& other) const
517{
518 if (!num_au && !other.num_au) {
519 return total_bytes == other.total_bytes && au_size == other.au_size;
520 } else if (num_au && other.num_au) {
521 if (num_au != other.num_au || au_size != other.au_size) {
522 return false;
523 }
524 for (size_t i = 0; i < num_au; i++) {
525 if (bytes_per_au[i] != other.bytes_per_au[i]) {
526 return false;
527 }
528 }
529 return true;
530 }
531
532 uint32_t n = num_au ? num_au : other.num_au;
533 uint32_t referenced =
534 num_au ? other.get_referenced_bytes() : get_referenced_bytes();
535 auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au;
536 uint32_t my_referenced = 0;
537 for (size_t i = 0; i < n; i++) {
538 my_referenced += bytes_per_au_tmp[i];
539 if (my_referenced > referenced) {
540 return false;
541 }
542 }
543 return my_referenced == referenced;
544}
545
546void bluestore_blob_use_tracker_t::dump(Formatter *f) const
547{
548 f->dump_unsigned("num_au", num_au);
549 f->dump_unsigned("au_size", au_size);
550 if (!num_au) {
551 f->dump_unsigned("total_bytes", total_bytes);
552 } else {
553 f->open_array_section("bytes_per_au");
554 for (size_t i = 0; i < num_au; ++i) {
555 f->dump_unsigned("", bytes_per_au[i]);
556 }
557 f->close_section();
558 }
559}
560
561void bluestore_blob_use_tracker_t::generate_test_instances(
562 list<bluestore_blob_use_tracker_t*>& o)
563{
564 o.push_back(new bluestore_blob_use_tracker_t());
565 o.back()->init(16, 16);
566 o.back()->get(10, 10);
567 o.back()->get(10, 5);
568 o.push_back(new bluestore_blob_use_tracker_t());
569 o.back()->init(60, 16);
570 o.back()->get(18, 22);
571 o.back()->get(20, 20);
572 o.back()->get(15, 20);
573}
574
575ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m)
576{
577 out << "use_tracker(" << std::hex;
578 if (!m.num_au) {
579 out << "0x" << m.au_size
580 << " "
581 << "0x" << m.total_bytes;
582 } else {
583 out << "0x" << m.num_au
584 << "*0x" << m.au_size
585 << " 0x[";
586 for (size_t i = 0; i < m.num_au; ++i) {
587 if (i != 0)
588 out << ",";
589 out << m.bytes_per_au[i];
590 }
591 out << "]";
592 }
593 out << std::dec << ")";
594 return out;
595}
596
597// bluestore_pextent_t
598
599void bluestore_pextent_t::dump(Formatter *f) const
600{
601 f->dump_unsigned("offset", offset);
602 f->dump_unsigned("length", length);
603}
604
605ostream& operator<<(ostream& out, const bluestore_pextent_t& o) {
606 if (o.is_valid())
607 return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec;
608 else
609 return out << "!~" << std::hex << o.length << std::dec;
610}
611
612void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls)
613{
614 ls.push_back(new bluestore_pextent_t);
615 ls.push_back(new bluestore_pextent_t(1, 2));
616}
617
618// bluestore_blob_t
619
620string bluestore_blob_t::get_flags_string(unsigned flags)
621{
622 string s;
7c673cae
FG
623 if (flags & FLAG_COMPRESSED) {
624 if (s.length())
625 s += '+';
626 s += "compressed";
627 }
628 if (flags & FLAG_CSUM) {
629 if (s.length())
630 s += '+';
631 s += "csum";
632 }
633 if (flags & FLAG_HAS_UNUSED) {
634 if (s.length())
635 s += '+';
636 s += "has_unused";
637 }
638 if (flags & FLAG_SHARED) {
639 if (s.length())
640 s += '+';
641 s += "shared";
642 }
643
644 return s;
645}
646
647size_t bluestore_blob_t::get_csum_value_size() const
648{
649 return Checksummer::get_csum_value_size(csum_type);
650}
651
652void bluestore_blob_t::dump(Formatter *f) const
653{
654 f->open_array_section("extents");
655 for (auto& p : extents) {
656 f->dump_object("extent", p);
657 }
658 f->close_section();
659 f->dump_unsigned("logical_length", logical_length);
660 f->dump_unsigned("compressed_length", compressed_length);
661 f->dump_unsigned("flags", flags);
662 f->dump_unsigned("csum_type", csum_type);
663 f->dump_unsigned("csum_chunk_order", csum_chunk_order);
664 f->open_array_section("csum_data");
665 size_t n = get_csum_count();
666 for (unsigned i = 0; i < n; ++i)
667 f->dump_unsigned("csum", get_csum_item(i));
668 f->close_section();
669 f->dump_unsigned("unused", unused);
670}
671
672void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls)
673{
674 ls.push_back(new bluestore_blob_t);
675 ls.push_back(new bluestore_blob_t(0));
676 ls.push_back(new bluestore_blob_t);
677 ls.back()->allocated_test(bluestore_pextent_t(111, 222));
678 ls.push_back(new bluestore_blob_t);
679 ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536);
680 ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd"));
681 ls.back()->add_unused(0, 3);
682 ls.back()->add_unused(8, 8);
683 ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000));
684 ls.back()->allocated_test(
685 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000));
686 ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000));
687}
688
689ostream& operator<<(ostream& out, const bluestore_blob_t& o)
690{
691 out << "blob(" << o.get_extents();
692 if (o.is_compressed()) {
693 out << " clen 0x" << std::hex
694 << o.get_logical_length()
695 << " -> 0x"
696 << o.get_compressed_payload_length()
697 << std::dec;
698 }
699 if (o.flags) {
700 out << " " << o.get_flags_string();
701 }
181888fb 702 if (o.has_csum()) {
7c673cae
FG
703 out << " " << Checksummer::get_csum_type_string(o.csum_type)
704 << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec;
705 }
706 if (o.has_unused())
707 out << " unused=0x" << std::hex << o.unused << std::dec;
708 out << ")";
709 return out;
710}
711
712void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl)
713{
714 switch (csum_type) {
715 case Checksummer::CSUM_XXHASH32:
716 Checksummer::calculate<Checksummer::xxhash32>(
717 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
718 break;
719 case Checksummer::CSUM_XXHASH64:
720 Checksummer::calculate<Checksummer::xxhash64>(
721 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
722 break;;
723 case Checksummer::CSUM_CRC32C:
724 Checksummer::calculate<Checksummer::crc32c>(
725 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
726 break;
727 case Checksummer::CSUM_CRC32C_16:
728 Checksummer::calculate<Checksummer::crc32c_16>(
729 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
730 break;
731 case Checksummer::CSUM_CRC32C_8:
732 Checksummer::calculate<Checksummer::crc32c_8>(
733 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
734 break;
735 }
736}
737
738int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl,
739 int* b_bad_off, uint64_t *bad_csum) const
740{
741 int r = 0;
742
743 *b_bad_off = -1;
744 switch (csum_type) {
745 case Checksummer::CSUM_NONE:
746 break;
747 case Checksummer::CSUM_XXHASH32:
748 *b_bad_off = Checksummer::verify<Checksummer::xxhash32>(
749 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
750 break;
751 case Checksummer::CSUM_XXHASH64:
752 *b_bad_off = Checksummer::verify<Checksummer::xxhash64>(
753 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
754 break;
755 case Checksummer::CSUM_CRC32C:
756 *b_bad_off = Checksummer::verify<Checksummer::crc32c>(
757 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
758 break;
759 case Checksummer::CSUM_CRC32C_16:
760 *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>(
761 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
762 break;
763 case Checksummer::CSUM_CRC32C_8:
764 *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>(
765 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
766 break;
767 default:
768 r = -EOPNOTSUPP;
769 break;
770 }
771
772 if (r < 0)
773 return r;
774 else if (*b_bad_off >= 0)
775 return -1; // bad checksum
776 else
777 return 0;
778}
779
a8e16298 780void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const PExtentVector& allocs)
7c673cae
FG
781{
782 if (extents.size() == 0) {
783 // if blob is compressed then logical length to be already configured
784 // otherwise - to be unset.
11fdf7f2 785 ceph_assert((is_compressed() && logical_length != 0) ||
7c673cae
FG
786 (!is_compressed() && logical_length == 0));
787
788 extents.reserve(allocs.size() + (b_off ? 1 : 0));
789 if (b_off) {
790 extents.emplace_back(
791 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off));
a8e16298 792
7c673cae
FG
793 }
794 uint32_t new_len = b_off;
795 for (auto& a : allocs) {
796 extents.emplace_back(a.offset, a.length);
797 new_len += a.length;
798 }
799 if (!is_compressed()) {
800 logical_length = new_len;
801 }
802 } else {
11fdf7f2 803 ceph_assert(!is_compressed()); // partial allocations are forbidden when
7c673cae 804 // compressed
11fdf7f2 805 ceph_assert(b_off < logical_length);
7c673cae
FG
806 uint32_t cur_offs = 0;
807 auto start_it = extents.begin();
808 size_t pos = 0;
11fdf7f2
TL
809 while (true) {
810 ceph_assert(start_it != extents.end());
7c673cae
FG
811 if (cur_offs + start_it->length > b_off) {
812 break;
813 }
814 cur_offs += start_it->length;
815 ++start_it;
816 ++pos;
817 }
818 uint32_t head = b_off - cur_offs;
819 uint32_t end_off = b_off + length;
820 auto end_it = start_it;
821
822 while (true) {
11fdf7f2
TL
823 ceph_assert(end_it != extents.end());
824 ceph_assert(!end_it->is_valid());
7c673cae
FG
825 if (cur_offs + end_it->length >= end_off) {
826 break;
827 }
828 cur_offs += end_it->length;
829 ++end_it;
830 }
11fdf7f2 831 ceph_assert(cur_offs + end_it->length >= end_off);
7c673cae
FG
832 uint32_t tail = cur_offs + end_it->length - end_off;
833
834 start_it = extents.erase(start_it, end_it + 1);
835 size_t count = allocs.size();
836 count += head ? 1 : 0;
837 count += tail ? 1 : 0;
838 extents.insert(start_it,
839 count,
840 bluestore_pextent_t(
841 bluestore_pextent_t::INVALID_OFFSET, 0));
842
843 // Workaround to resolve lack of proper iterator return in vector::insert
844 // Looks like some gcc/stl implementations still lack it despite c++11
845 // support claim
846 start_it = extents.begin() + pos;
847
848 if (head) {
849 start_it->length = head;
850 ++start_it;
851 }
852 for(auto& e : allocs) {
853 *start_it = e;
854 ++start_it;
855 }
856 if (tail) {
857 start_it->length = tail;
858 }
859 }
860}
861
862// cut it out of extents
863struct vecbuilder {
864 PExtentVector v;
865 uint64_t invalid = 0;
866
867 void add_invalid(uint64_t length) {
868 invalid += length;
869 }
870 void flush() {
871 if (invalid) {
872 v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET,
a8e16298
TL
873 invalid));
874
7c673cae
FG
875 invalid = 0;
876 }
877 }
878 void add(uint64_t offset, uint64_t length) {
879 if (offset == bluestore_pextent_t::INVALID_OFFSET) {
880 add_invalid(length);
881 }
882 else {
883 flush();
a8e16298 884 v.emplace_back(offset, length);
7c673cae
FG
885 }
886 }
887};
888
889void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc)
890{
891 extents.emplace_back(alloc);
892 if (!is_compressed()) {
893 logical_length += alloc.length;
894 }
895}
896
897bool bluestore_blob_t::release_extents(bool all,
898 const PExtentVector& logical,
899 PExtentVector* r)
900{
901 // common case: all of it?
902 if (all) {
903 uint64_t pos = 0;
904 for (auto& e : extents) {
905 if (e.is_valid()) {
906 r->push_back(e);
907 }
908 pos += e.length;
909 }
11fdf7f2 910 ceph_assert(is_compressed() || get_logical_length() == pos);
7c673cae
FG
911 extents.resize(1);
912 extents[0].offset = bluestore_pextent_t::INVALID_OFFSET;
913 extents[0].length = pos;
914 return true;
915 }
916 // remove from pextents according to logical release list
917 vecbuilder vb;
918 auto loffs_it = logical.begin();
919 auto lend = logical.end();
920 uint32_t pext_loffs_start = 0; //starting loffset of the current pextent
921 uint32_t pext_loffs = 0; //current loffset
922 auto pext_it = extents.begin();
923 auto pext_end = extents.end();
924 while (pext_it != pext_end) {
925 if (loffs_it == lend ||
926 pext_loffs_start + pext_it->length <= loffs_it->offset) {
927 int delta0 = pext_loffs - pext_loffs_start;
11fdf7f2 928 ceph_assert(delta0 >= 0);
7c673cae
FG
929 if ((uint32_t)delta0 < pext_it->length) {
930 vb.add(pext_it->offset + delta0, pext_it->length - delta0);
931 }
932 pext_loffs_start += pext_it->length;
933 pext_loffs = pext_loffs_start;
934 ++pext_it;
935 }
936 else {
937 //assert(pext_loffs == pext_loffs_start);
938 int delta0 = pext_loffs - pext_loffs_start;
11fdf7f2 939 ceph_assert(delta0 >= 0);
7c673cae
FG
940
941 int delta = loffs_it->offset - pext_loffs;
11fdf7f2 942 ceph_assert(delta >= 0);
7c673cae
FG
943 if (delta > 0) {
944 vb.add(pext_it->offset + delta0, delta);
945 pext_loffs += delta;
946 }
947
948 PExtentVector::iterator last_r = r->end();
949 if (r->begin() != last_r) {
950 --last_r;
951 }
952 uint32_t to_release = loffs_it->length;
953 do {
954 uint32_t to_release_part =
11fdf7f2 955 std::min(pext_it->length - delta0 - delta, to_release);
7c673cae
FG
956 auto o = pext_it->offset + delta0 + delta;
957 if (last_r != r->end() && last_r->offset + last_r->length == o) {
958 last_r->length += to_release_part;
959 }
960 else {
961 last_r = r->emplace(r->end(), o, to_release_part);
962 }
963 to_release -= to_release_part;
964 pext_loffs += to_release_part;
965 if (pext_loffs == pext_loffs_start + pext_it->length) {
966 pext_loffs_start += pext_it->length;
967 pext_loffs = pext_loffs_start;
968 pext_it++;
969 delta0 = delta = 0;
970 }
971 } while (to_release > 0 && pext_it != pext_end);
972 vb.add_invalid(loffs_it->length - to_release);
973 ++loffs_it;
974 }
975 }
976 vb.flush();
977 extents.swap(vb.v);
978 return false;
979}
980
981void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb)
982{
983 size_t left = blob_offset;
984 uint32_t llen_lb = 0;
985 uint32_t llen_rb = 0;
986 unsigned i = 0;
987 for (auto p = extents.begin(); p != extents.end(); ++p, ++i) {
988 if (p->length <= left) {
989 left -= p->length;
990 llen_lb += p->length;
991 continue;
992 }
993 if (left) {
994 if (p->is_valid()) {
995 rb.extents.emplace_back(bluestore_pextent_t(p->offset + left,
996 p->length - left));
997 }
998 else {
999 rb.extents.emplace_back(bluestore_pextent_t(
1000 bluestore_pextent_t::INVALID_OFFSET,
1001 p->length - left));
1002 }
1003 llen_rb += p->length - left;
1004 llen_lb += left;
1005 p->length = left;
1006 ++i;
1007 ++p;
1008 }
1009 while (p != extents.end()) {
1010 llen_rb += p->length;
1011 rb.extents.push_back(*p++);
1012 }
1013 extents.resize(i);
1014 logical_length = llen_lb;
1015 rb.logical_length = llen_rb;
1016 break;
1017 }
1018 rb.flags = flags;
1019
1020 if (has_csum()) {
1021 rb.csum_type = csum_type;
1022 rb.csum_chunk_order = csum_chunk_order;
1023 size_t csum_order = get_csum_chunk_size();
11fdf7f2 1024 ceph_assert(blob_offset % csum_order == 0);
7c673cae
FG
1025 size_t pos = (blob_offset / csum_order) * get_csum_value_size();
1026 // deep copy csum data
1027 bufferptr old;
1028 old.swap(csum_data);
1029 rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos);
1030 csum_data = bufferptr(old.c_str(), pos);
1031 }
1032}
1033
1034// bluestore_shared_blob_t
1035
1036void bluestore_shared_blob_t::dump(Formatter *f) const
1037{
1038 f->dump_int("sbid", sbid);
1039 f->dump_object("ref_map", ref_map);
1040}
1041
1042void bluestore_shared_blob_t::generate_test_instances(
1043 list<bluestore_shared_blob_t*>& ls)
1044{
1045 ls.push_back(new bluestore_shared_blob_t(1));
1046}
1047
1048ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb)
1049{
31f18b77
FG
1050 out << "(sbid 0x" << std::hex << sb.sbid << std::dec;
1051 out << " " << sb.ref_map << ")";
7c673cae
FG
1052 return out;
1053}
1054
1055// bluestore_onode_t
1056
1057void bluestore_onode_t::shard_info::dump(Formatter *f) const
1058{
1059 f->dump_unsigned("offset", offset);
1060 f->dump_unsigned("bytes", bytes);
1061}
1062
1063ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si)
1064{
1065 return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes"
1066 << std::dec << ")";
1067}
1068
1069void bluestore_onode_t::dump(Formatter *f) const
1070{
1071 f->dump_unsigned("nid", nid);
1072 f->dump_unsigned("size", size);
1073 f->open_object_section("attrs");
1074 for (auto p = attrs.begin(); p != attrs.end(); ++p) {
1075 f->open_object_section("attr");
1076 f->dump_string("name", p->first.c_str()); // it's not quite std::string
1077 f->dump_unsigned("len", p->second.length());
1078 f->close_section();
1079 }
1080 f->close_section();
1081 f->dump_string("flags", get_flags_string());
1082 f->open_array_section("extent_map_shards");
1083 for (auto si : extent_map_shards) {
1084 f->dump_object("shard", si);
1085 }
1086 f->close_section();
1087 f->dump_unsigned("expected_object_size", expected_object_size);
1088 f->dump_unsigned("expected_write_size", expected_write_size);
1089 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
1090}
1091
1092void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o)
1093{
1094 o.push_back(new bluestore_onode_t());
1095 // FIXME
1096}
1097
1098// bluestore_deferred_op_t
1099
1100void bluestore_deferred_op_t::dump(Formatter *f) const
1101{
1102 f->dump_unsigned("op", (int)op);
1103 f->dump_unsigned("data_len", data.length());
1104 f->open_array_section("extents");
1105 for (auto& e : extents) {
1106 f->dump_object("extent", e);
1107 }
1108 f->close_section();
1109}
1110
1111void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o)
1112{
1113 o.push_back(new bluestore_deferred_op_t);
1114 o.push_back(new bluestore_deferred_op_t);
1115 o.back()->op = OP_WRITE;
1116 o.back()->extents.push_back(bluestore_pextent_t(1, 2));
1117 o.back()->extents.push_back(bluestore_pextent_t(100, 5));
1118 o.back()->data.append("my data");
1119}
1120
1121void bluestore_deferred_transaction_t::dump(Formatter *f) const
1122{
1123 f->dump_unsigned("seq", seq);
1124 f->open_array_section("ops");
1125 for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) {
1126 f->dump_object("op", *p);
1127 }
1128 f->close_section();
1129
1130 f->open_array_section("released extents");
1131 for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) {
1132 f->open_object_section("extent");
1133 f->dump_unsigned("offset", p.get_start());
1134 f->dump_unsigned("length", p.get_len());
1135 f->close_section();
1136 }
1137 f->close_section();
1138}
1139
1140void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o)
1141{
1142 o.push_back(new bluestore_deferred_transaction_t());
1143 o.push_back(new bluestore_deferred_transaction_t());
1144 o.back()->seq = 123;
1145 o.back()->ops.push_back(bluestore_deferred_op_t());
1146 o.back()->ops.push_back(bluestore_deferred_op_t());
1147 o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE;
1148 o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7));
1149 o.back()->ops.back().data.append("foodata");
1150}
1151
1152void bluestore_compression_header_t::dump(Formatter *f) const
1153{
1154 f->dump_unsigned("type", type);
1155 f->dump_unsigned("length", length);
1156}
1157
1158void bluestore_compression_header_t::generate_test_instances(
1159 list<bluestore_compression_header_t*>& o)
1160{
1161 o.push_back(new bluestore_compression_header_t);
1162 o.push_back(new bluestore_compression_header_t(1));
1163 o.back()->length = 1234;
1164}