]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/bluestore_types.cc
import ceph quincy 17.2.6
[ceph.git] / ceph / src / os / bluestore / bluestore_types.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "bluestore_types.h"
16#include "common/Formatter.h"
17#include "common/Checksummer.h"
18#include "include/stringify.h"
19
f67539c2
TL
20using std::list;
21using std::map;
22using std::make_pair;
23using std::ostream;
24using std::string;
25
26using ceph::bufferlist;
27using ceph::bufferptr;
28using ceph::Formatter;
29
7c673cae
FG
30// bluestore_bdev_label_t
31
32void bluestore_bdev_label_t::encode(bufferlist& bl) const
33{
34 // be slightly friendly to someone who looks at the device
35 bl.append("bluestore block device\n");
36 bl.append(stringify(osd_uuid));
37 bl.append("\n");
3efd9988 38 ENCODE_START(2, 1, bl);
11fdf7f2
TL
39 encode(osd_uuid, bl);
40 encode(size, bl);
41 encode(btime, bl);
42 encode(description, bl);
43 encode(meta, bl);
7c673cae
FG
44 ENCODE_FINISH(bl);
45}
46
11fdf7f2 47void bluestore_bdev_label_t::decode(bufferlist::const_iterator& p)
7c673cae 48{
9f95a23c 49 p += 60u; // see above
3efd9988 50 DECODE_START(2, p);
11fdf7f2
TL
51 decode(osd_uuid, p);
52 decode(size, p);
53 decode(btime, p);
54 decode(description, p);
3efd9988 55 if (struct_v >= 2) {
11fdf7f2 56 decode(meta, p);
3efd9988 57 }
7c673cae
FG
58 DECODE_FINISH(p);
59}
60
61void bluestore_bdev_label_t::dump(Formatter *f) const
62{
63 f->dump_stream("osd_uuid") << osd_uuid;
64 f->dump_unsigned("size", size);
65 f->dump_stream("btime") << btime;
66 f->dump_string("description", description);
3efd9988
FG
67 for (auto& i : meta) {
68 f->dump_string(i.first.c_str(), i.second);
69 }
7c673cae
FG
70}
71
72void bluestore_bdev_label_t::generate_test_instances(
73 list<bluestore_bdev_label_t*>& o)
74{
75 o.push_back(new bluestore_bdev_label_t);
76 o.push_back(new bluestore_bdev_label_t);
77 o.back()->size = 123;
78 o.back()->btime = utime_t(4, 5);
79 o.back()->description = "fakey";
3efd9988 80 o.back()->meta["foo"] = "bar";
7c673cae
FG
81}
82
83ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l)
84{
85 return out << "bdev(osd_uuid " << l.osd_uuid
3efd9988
FG
86 << ", size 0x" << std::hex << l.size << std::dec
87 << ", btime " << l.btime
88 << ", desc " << l.description
89 << ", " << l.meta.size() << " meta"
90 << ")";
7c673cae
FG
91}
92
93// cnode_t
94
95void bluestore_cnode_t::dump(Formatter *f) const
96{
97 f->dump_unsigned("bits", bits);
98}
99
100void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o)
101{
102 o.push_back(new bluestore_cnode_t());
103 o.push_back(new bluestore_cnode_t(0));
104 o.push_back(new bluestore_cnode_t(123));
105}
106
28e407b8
AA
107ostream& operator<<(ostream& out, const bluestore_cnode_t& l)
108{
109 return out << "cnode(bits " << l.bits << ")";
110}
111
7c673cae
FG
112// bluestore_extent_ref_map_t
113
114void bluestore_extent_ref_map_t::_check() const
115{
116 uint64_t pos = 0;
117 unsigned refs = 0;
118 for (const auto &p : ref_map) {
119 if (p.first < pos)
11fdf7f2 120 ceph_abort_msg("overlap");
7c673cae 121 if (p.first == pos && p.second.refs == refs)
11fdf7f2 122 ceph_abort_msg("unmerged");
7c673cae
FG
123 pos = p.first + p.second.length;
124 refs = p.second.refs;
125 }
126}
127
128void bluestore_extent_ref_map_t::_maybe_merge_left(
129 map<uint64_t,record_t>::iterator& p)
130{
131 if (p == ref_map.begin())
132 return;
133 auto q = p;
134 --q;
135 if (q->second.refs == p->second.refs &&
136 q->first + q->second.length == p->first) {
137 q->second.length += p->second.length;
138 ref_map.erase(p);
139 p = q;
140 }
141}
142
143void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length)
144{
145 auto p = ref_map.lower_bound(offset);
146 if (p != ref_map.begin()) {
147 --p;
148 if (p->first + p->second.length <= offset) {
149 ++p;
150 }
151 }
152 while (length > 0) {
153 if (p == ref_map.end()) {
154 // nothing after offset; add the whole thing.
155 p = ref_map.insert(
156 map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first;
157 break;
158 }
159 if (p->first > offset) {
160 // gap
11fdf7f2 161 uint64_t newlen = std::min<uint64_t>(p->first - offset, length);
7c673cae
FG
162 p = ref_map.insert(
163 map<uint64_t,record_t>::value_type(offset,
164 record_t(newlen, 1))).first;
165 offset += newlen;
166 length -= newlen;
167 _maybe_merge_left(p);
168 ++p;
169 continue;
170 }
171 if (p->first < offset) {
172 // split off the portion before offset
11fdf7f2 173 ceph_assert(p->first + p->second.length > offset);
7c673cae
FG
174 uint64_t left = p->first + p->second.length - offset;
175 p->second.length = offset - p->first;
176 p = ref_map.insert(map<uint64_t,record_t>::value_type(
177 offset, record_t(left, p->second.refs))).first;
178 // continue below
179 }
11fdf7f2 180 ceph_assert(p->first == offset);
7c673cae
FG
181 if (length < p->second.length) {
182 ref_map.insert(make_pair(offset + length,
183 record_t(p->second.length - length,
184 p->second.refs)));
185 p->second.length = length;
186 ++p->second.refs;
187 break;
188 }
189 ++p->second.refs;
190 offset += p->second.length;
191 length -= p->second.length;
192 _maybe_merge_left(p);
193 ++p;
194 }
195 if (p != ref_map.end())
196 _maybe_merge_left(p);
197 //_check();
198}
199
200void bluestore_extent_ref_map_t::put(
201 uint64_t offset, uint32_t length,
31f18b77
FG
202 PExtentVector *release,
203 bool *maybe_unshared)
7c673cae
FG
204{
205 //NB: existing entries in 'release' container must be preserved!
31f18b77 206 bool unshared = true;
7c673cae
FG
207 auto p = ref_map.lower_bound(offset);
208 if (p == ref_map.end() || p->first > offset) {
209 if (p == ref_map.begin()) {
11fdf7f2 210 ceph_abort_msg("put on missing extent (nothing before)");
7c673cae
FG
211 }
212 --p;
213 if (p->first + p->second.length <= offset) {
11fdf7f2 214 ceph_abort_msg("put on missing extent (gap)");
7c673cae
FG
215 }
216 }
217 if (p->first < offset) {
218 uint64_t left = p->first + p->second.length - offset;
219 p->second.length = offset - p->first;
31f18b77
FG
220 if (p->second.refs != 1) {
221 unshared = false;
222 }
7c673cae
FG
223 p = ref_map.insert(map<uint64_t,record_t>::value_type(
224 offset, record_t(left, p->second.refs))).first;
225 }
226 while (length > 0) {
11fdf7f2 227 ceph_assert(p->first == offset);
7c673cae 228 if (length < p->second.length) {
31f18b77
FG
229 if (p->second.refs != 1) {
230 unshared = false;
231 }
7c673cae
FG
232 ref_map.insert(make_pair(offset + length,
233 record_t(p->second.length - length,
234 p->second.refs)));
235 if (p->second.refs > 1) {
236 p->second.length = length;
237 --p->second.refs;
31f18b77
FG
238 if (p->second.refs != 1) {
239 unshared = false;
240 }
7c673cae
FG
241 _maybe_merge_left(p);
242 } else {
243 if (release)
244 release->push_back(bluestore_pextent_t(p->first, length));
245 ref_map.erase(p);
246 }
31f18b77 247 goto out;
7c673cae
FG
248 }
249 offset += p->second.length;
250 length -= p->second.length;
251 if (p->second.refs > 1) {
252 --p->second.refs;
31f18b77
FG
253 if (p->second.refs != 1) {
254 unshared = false;
255 }
7c673cae
FG
256 _maybe_merge_left(p);
257 ++p;
258 } else {
259 if (release)
260 release->push_back(bluestore_pextent_t(p->first, p->second.length));
261 ref_map.erase(p++);
262 }
263 }
264 if (p != ref_map.end())
265 _maybe_merge_left(p);
266 //_check();
31f18b77
FG
267out:
268 if (maybe_unshared) {
269 if (unshared) {
270 // we haven't seen a ref != 1 yet; check the whole map.
271 for (auto& p : ref_map) {
272 if (p.second.refs != 1) {
273 unshared = false;
274 break;
275 }
276 }
277 }
278 *maybe_unshared = unshared;
279 }
7c673cae
FG
280}
281
282bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const
283{
284 auto p = ref_map.lower_bound(offset);
285 if (p == ref_map.end() || p->first > offset) {
286 if (p == ref_map.begin()) {
287 return false; // nothing before
288 }
289 --p;
290 if (p->first + p->second.length <= offset) {
291 return false; // gap
292 }
293 }
294 while (length > 0) {
295 if (p == ref_map.end())
296 return false;
297 if (p->first > offset)
298 return false;
299 if (p->first + p->second.length >= offset + length)
300 return true;
301 uint64_t overlap = p->first + p->second.length - offset;
302 offset += overlap;
303 length -= overlap;
304 ++p;
305 }
306 return true;
307}
308
309bool bluestore_extent_ref_map_t::intersects(
310 uint64_t offset,
311 uint32_t length) const
312{
313 auto p = ref_map.lower_bound(offset);
314 if (p != ref_map.begin()) {
315 --p;
316 if (p->first + p->second.length <= offset) {
317 ++p;
318 }
319 }
320 if (p == ref_map.end())
321 return false;
322 if (p->first >= offset + length)
323 return false;
324 return true; // intersects p!
325}
326
327void bluestore_extent_ref_map_t::dump(Formatter *f) const
328{
329 f->open_array_section("ref_map");
330 for (auto& p : ref_map) {
331 f->open_object_section("ref");
332 f->dump_unsigned("offset", p.first);
333 f->dump_unsigned("length", p.second.length);
334 f->dump_unsigned("refs", p.second.refs);
335 f->close_section();
336 }
337 f->close_section();
338}
339
340void bluestore_extent_ref_map_t::generate_test_instances(
341 list<bluestore_extent_ref_map_t*>& o)
342{
343 o.push_back(new bluestore_extent_ref_map_t);
344 o.push_back(new bluestore_extent_ref_map_t);
345 o.back()->get(10, 10);
346 o.back()->get(18, 22);
347 o.back()->get(20, 20);
348 o.back()->get(10, 25);
349 o.back()->get(15, 20);
350}
351
352ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m)
353{
354 out << "ref_map(";
355 for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) {
356 if (p != m.ref_map.begin())
357 out << ",";
358 out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec
359 << "=" << p->second.refs;
360 }
361 out << ")";
362 return out;
363}
364
365// bluestore_blob_use_tracker_t
9f95a23c
TL
366bluestore_blob_use_tracker_t::bluestore_blob_use_tracker_t(
367 const bluestore_blob_use_tracker_t& tracker)
368 : au_size{tracker.au_size},
2a845540
TL
369 num_au(0),
370 alloc_au(0),
9f95a23c
TL
371 bytes_per_au{nullptr}
372{
2a845540
TL
373 if (tracker.num_au > 0) {
374 allocate(tracker.num_au);
9f95a23c
TL
375 std::copy(tracker.bytes_per_au, tracker.bytes_per_au + num_au, bytes_per_au);
376 } else {
377 total_bytes = tracker.total_bytes;
378 }
379}
380
381bluestore_blob_use_tracker_t&
382bluestore_blob_use_tracker_t::operator=(const bluestore_blob_use_tracker_t& rhs)
383{
384 if (this == &rhs) {
385 return *this;
386 }
387 clear();
388 au_size = rhs.au_size;
9f95a23c 389 if (rhs.num_au > 0) {
2a845540 390 allocate( rhs.num_au);
9f95a23c
TL
391 std::copy(rhs.bytes_per_au, rhs.bytes_per_au + num_au, bytes_per_au);
392 } else {
393 total_bytes = rhs.total_bytes;
394 }
395 return *this;
396}
7c673cae 397
2a845540 398void bluestore_blob_use_tracker_t::allocate(uint32_t au_count)
7c673cae 399{
2a845540
TL
400 ceph_assert(au_count != 0);
401 ceph_assert(num_au == 0);
402 ceph_assert(alloc_au == 0);
403 num_au = alloc_au = au_count;
404 bytes_per_au = new uint32_t[alloc_au];
f91f0fd5
TL
405 mempool::get_pool(
406 mempool::pool_index_t(mempool::mempool_bluestore_cache_other)).
2a845540 407 adjust_count(alloc_au, sizeof(uint32_t) * alloc_au);
f91f0fd5 408
7c673cae
FG
409 for (uint32_t i = 0; i < num_au; ++i) {
410 bytes_per_au[i] = 0;
411 }
412}
413
2a845540
TL
414void bluestore_blob_use_tracker_t::release(uint32_t au_count, uint32_t* ptr) {
415 if (au_count) {
416 delete[] ptr;
417 mempool::get_pool(
418 mempool::pool_index_t(mempool::mempool_bluestore_cache_other)).
419 adjust_count(-(int32_t)au_count, -(int32_t)(sizeof(uint32_t) * au_count));
420 }
421}
422
7c673cae
FG
423void bluestore_blob_use_tracker_t::init(
424 uint32_t full_length, uint32_t _au_size) {
11fdf7f2
TL
425 ceph_assert(!au_size || is_empty());
426 ceph_assert(_au_size > 0);
427 ceph_assert(full_length > 0);
7c673cae 428 clear();
11fdf7f2 429 uint32_t _num_au = round_up_to(full_length, _au_size) / _au_size;
7c673cae 430 au_size = _au_size;
11fdf7f2 431 if ( _num_au > 1 ) {
2a845540 432 allocate(_num_au);
7c673cae
FG
433 }
434}
435
436void bluestore_blob_use_tracker_t::get(
437 uint32_t offset, uint32_t length)
438{
11fdf7f2 439 ceph_assert(au_size);
7c673cae
FG
440 if (!num_au) {
441 total_bytes += length;
11fdf7f2 442 } else {
7c673cae
FG
443 auto end = offset + length;
444
445 while (offset < end) {
446 auto phase = offset % au_size;
447 bytes_per_au[offset / au_size] +=
11fdf7f2 448 std::min(au_size - phase, end - offset);
7c673cae
FG
449 offset += (phase ? au_size - phase : au_size);
450 }
451 }
452}
453
454bool bluestore_blob_use_tracker_t::put(
455 uint32_t offset, uint32_t length,
456 PExtentVector *release_units)
457{
11fdf7f2 458 ceph_assert(au_size);
7c673cae
FG
459 if (release_units) {
460 release_units->clear();
461 }
462 bool maybe_empty = true;
463 if (!num_au) {
11fdf7f2 464 ceph_assert(total_bytes >= length);
7c673cae
FG
465 total_bytes -= length;
466 } else {
467 auto end = offset + length;
468 uint64_t next_offs = 0;
469 while (offset < end) {
470 auto phase = offset % au_size;
471 size_t pos = offset / au_size;
11fdf7f2
TL
472 auto diff = std::min(au_size - phase, end - offset);
473 ceph_assert(diff <= bytes_per_au[pos]);
7c673cae
FG
474 bytes_per_au[pos] -= diff;
475 offset += (phase ? au_size - phase : au_size);
476 if (bytes_per_au[pos] == 0) {
477 if (release_units) {
478 if (release_units->empty() || next_offs != pos * au_size) {
479 release_units->emplace_back(pos * au_size, au_size);
9f95a23c 480 next_offs = pos * au_size;
7c673cae
FG
481 } else {
482 release_units->back().length += au_size;
483 }
484 next_offs += au_size;
485 }
486 } else {
487 maybe_empty = false; // micro optimization detecting we aren't empty
488 // even in the affected extent
489 }
490 }
491 }
492 bool empty = maybe_empty ? !is_not_empty() : false;
493 if (empty && release_units) {
494 release_units->clear();
495 }
496 return empty;
497}
498
499bool bluestore_blob_use_tracker_t::can_split() const
500{
501 return num_au > 0;
502}
503
504bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const
505{
11fdf7f2 506 ceph_assert(au_size);
7c673cae
FG
507 return (blob_offset % au_size) == 0 &&
508 blob_offset < num_au * au_size;
509}
510
511void bluestore_blob_use_tracker_t::split(
512 uint32_t blob_offset,
513 bluestore_blob_use_tracker_t* r)
514{
11fdf7f2
TL
515 ceph_assert(au_size);
516 ceph_assert(can_split());
517 ceph_assert(can_split_at(blob_offset));
518 ceph_assert(r->is_empty());
7c673cae
FG
519
520 uint32_t new_num_au = blob_offset / au_size;
521 r->init( (num_au - new_num_au) * au_size, au_size);
522
523 for (auto i = new_num_au; i < num_au; i++) {
524 r->get((i - new_num_au) * au_size, bytes_per_au[i]);
525 bytes_per_au[i] = 0;
526 }
527 if (new_num_au == 0) {
528 clear();
529 } else if (new_num_au == 1) {
530 uint32_t tmp = bytes_per_au[0];
531 uint32_t _au_size = au_size;
532 clear();
533 au_size = _au_size;
534 total_bytes = tmp;
535 } else {
536 num_au = new_num_au;
537 }
538}
539
540bool bluestore_blob_use_tracker_t::equal(
541 const bluestore_blob_use_tracker_t& other) const
542{
543 if (!num_au && !other.num_au) {
544 return total_bytes == other.total_bytes && au_size == other.au_size;
545 } else if (num_au && other.num_au) {
546 if (num_au != other.num_au || au_size != other.au_size) {
547 return false;
548 }
549 for (size_t i = 0; i < num_au; i++) {
550 if (bytes_per_au[i] != other.bytes_per_au[i]) {
551 return false;
552 }
553 }
554 return true;
555 }
556
557 uint32_t n = num_au ? num_au : other.num_au;
558 uint32_t referenced =
559 num_au ? other.get_referenced_bytes() : get_referenced_bytes();
560 auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au;
561 uint32_t my_referenced = 0;
562 for (size_t i = 0; i < n; i++) {
563 my_referenced += bytes_per_au_tmp[i];
564 if (my_referenced > referenced) {
565 return false;
566 }
567 }
568 return my_referenced == referenced;
569}
570
571void bluestore_blob_use_tracker_t::dump(Formatter *f) const
572{
573 f->dump_unsigned("num_au", num_au);
574 f->dump_unsigned("au_size", au_size);
575 if (!num_au) {
576 f->dump_unsigned("total_bytes", total_bytes);
577 } else {
578 f->open_array_section("bytes_per_au");
579 for (size_t i = 0; i < num_au; ++i) {
580 f->dump_unsigned("", bytes_per_au[i]);
581 }
582 f->close_section();
583 }
584}
585
586void bluestore_blob_use_tracker_t::generate_test_instances(
587 list<bluestore_blob_use_tracker_t*>& o)
588{
589 o.push_back(new bluestore_blob_use_tracker_t());
590 o.back()->init(16, 16);
591 o.back()->get(10, 10);
592 o.back()->get(10, 5);
593 o.push_back(new bluestore_blob_use_tracker_t());
594 o.back()->init(60, 16);
595 o.back()->get(18, 22);
596 o.back()->get(20, 20);
597 o.back()->get(15, 20);
598}
599
600ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m)
601{
602 out << "use_tracker(" << std::hex;
603 if (!m.num_au) {
604 out << "0x" << m.au_size
605 << " "
606 << "0x" << m.total_bytes;
607 } else {
608 out << "0x" << m.num_au
609 << "*0x" << m.au_size
610 << " 0x[";
611 for (size_t i = 0; i < m.num_au; ++i) {
612 if (i != 0)
613 out << ",";
614 out << m.bytes_per_au[i];
615 }
616 out << "]";
617 }
618 out << std::dec << ")";
619 return out;
620}
621
622// bluestore_pextent_t
623
624void bluestore_pextent_t::dump(Formatter *f) const
625{
626 f->dump_unsigned("offset", offset);
627 f->dump_unsigned("length", length);
628}
629
630ostream& operator<<(ostream& out, const bluestore_pextent_t& o) {
631 if (o.is_valid())
632 return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec;
633 else
634 return out << "!~" << std::hex << o.length << std::dec;
635}
636
637void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls)
638{
639 ls.push_back(new bluestore_pextent_t);
640 ls.push_back(new bluestore_pextent_t(1, 2));
641}
642
643// bluestore_blob_t
644
645string bluestore_blob_t::get_flags_string(unsigned flags)
646{
647 string s;
7c673cae
FG
648 if (flags & FLAG_COMPRESSED) {
649 if (s.length())
650 s += '+';
651 s += "compressed";
652 }
653 if (flags & FLAG_CSUM) {
654 if (s.length())
655 s += '+';
656 s += "csum";
657 }
658 if (flags & FLAG_HAS_UNUSED) {
659 if (s.length())
660 s += '+';
661 s += "has_unused";
662 }
663 if (flags & FLAG_SHARED) {
664 if (s.length())
665 s += '+';
666 s += "shared";
667 }
668
669 return s;
670}
671
672size_t bluestore_blob_t::get_csum_value_size() const
673{
674 return Checksummer::get_csum_value_size(csum_type);
675}
676
677void bluestore_blob_t::dump(Formatter *f) const
678{
679 f->open_array_section("extents");
680 for (auto& p : extents) {
681 f->dump_object("extent", p);
682 }
683 f->close_section();
684 f->dump_unsigned("logical_length", logical_length);
685 f->dump_unsigned("compressed_length", compressed_length);
686 f->dump_unsigned("flags", flags);
687 f->dump_unsigned("csum_type", csum_type);
688 f->dump_unsigned("csum_chunk_order", csum_chunk_order);
689 f->open_array_section("csum_data");
690 size_t n = get_csum_count();
691 for (unsigned i = 0; i < n; ++i)
692 f->dump_unsigned("csum", get_csum_item(i));
693 f->close_section();
694 f->dump_unsigned("unused", unused);
695}
696
697void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls)
698{
699 ls.push_back(new bluestore_blob_t);
700 ls.push_back(new bluestore_blob_t(0));
701 ls.push_back(new bluestore_blob_t);
702 ls.back()->allocated_test(bluestore_pextent_t(111, 222));
703 ls.push_back(new bluestore_blob_t);
704 ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536);
f67539c2 705 ls.back()->csum_data = ceph::buffer::claim_malloc(4, strdup("abcd"));
7c673cae
FG
706 ls.back()->add_unused(0, 3);
707 ls.back()->add_unused(8, 8);
708 ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000));
709 ls.back()->allocated_test(
710 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000));
711 ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000));
712}
713
714ostream& operator<<(ostream& out, const bluestore_blob_t& o)
715{
716 out << "blob(" << o.get_extents();
717 if (o.is_compressed()) {
718 out << " clen 0x" << std::hex
719 << o.get_logical_length()
720 << " -> 0x"
721 << o.get_compressed_payload_length()
722 << std::dec;
723 }
724 if (o.flags) {
725 out << " " << o.get_flags_string();
726 }
181888fb 727 if (o.has_csum()) {
7c673cae
FG
728 out << " " << Checksummer::get_csum_type_string(o.csum_type)
729 << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec;
730 }
731 if (o.has_unused())
732 out << " unused=0x" << std::hex << o.unused << std::dec;
733 out << ")";
734 return out;
735}
736
737void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl)
738{
739 switch (csum_type) {
740 case Checksummer::CSUM_XXHASH32:
741 Checksummer::calculate<Checksummer::xxhash32>(
742 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
743 break;
744 case Checksummer::CSUM_XXHASH64:
745 Checksummer::calculate<Checksummer::xxhash64>(
746 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
747 break;;
748 case Checksummer::CSUM_CRC32C:
749 Checksummer::calculate<Checksummer::crc32c>(
750 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
751 break;
752 case Checksummer::CSUM_CRC32C_16:
753 Checksummer::calculate<Checksummer::crc32c_16>(
754 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
755 break;
756 case Checksummer::CSUM_CRC32C_8:
757 Checksummer::calculate<Checksummer::crc32c_8>(
758 get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data);
759 break;
760 }
761}
762
763int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl,
764 int* b_bad_off, uint64_t *bad_csum) const
765{
766 int r = 0;
767
768 *b_bad_off = -1;
769 switch (csum_type) {
770 case Checksummer::CSUM_NONE:
771 break;
772 case Checksummer::CSUM_XXHASH32:
773 *b_bad_off = Checksummer::verify<Checksummer::xxhash32>(
774 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
775 break;
776 case Checksummer::CSUM_XXHASH64:
777 *b_bad_off = Checksummer::verify<Checksummer::xxhash64>(
778 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
779 break;
780 case Checksummer::CSUM_CRC32C:
781 *b_bad_off = Checksummer::verify<Checksummer::crc32c>(
782 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
783 break;
784 case Checksummer::CSUM_CRC32C_16:
785 *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>(
786 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
787 break;
788 case Checksummer::CSUM_CRC32C_8:
789 *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>(
790 get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum);
791 break;
792 default:
793 r = -EOPNOTSUPP;
794 break;
795 }
796
797 if (r < 0)
798 return r;
799 else if (*b_bad_off >= 0)
800 return -1; // bad checksum
801 else
802 return 0;
803}
804
a8e16298 805void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const PExtentVector& allocs)
7c673cae
FG
806{
807 if (extents.size() == 0) {
808 // if blob is compressed then logical length to be already configured
809 // otherwise - to be unset.
11fdf7f2 810 ceph_assert((is_compressed() && logical_length != 0) ||
7c673cae
FG
811 (!is_compressed() && logical_length == 0));
812
813 extents.reserve(allocs.size() + (b_off ? 1 : 0));
814 if (b_off) {
815 extents.emplace_back(
816 bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off));
a8e16298 817
7c673cae
FG
818 }
819 uint32_t new_len = b_off;
820 for (auto& a : allocs) {
821 extents.emplace_back(a.offset, a.length);
822 new_len += a.length;
823 }
824 if (!is_compressed()) {
825 logical_length = new_len;
826 }
827 } else {
11fdf7f2 828 ceph_assert(!is_compressed()); // partial allocations are forbidden when
7c673cae 829 // compressed
11fdf7f2 830 ceph_assert(b_off < logical_length);
7c673cae
FG
831 uint32_t cur_offs = 0;
832 auto start_it = extents.begin();
833 size_t pos = 0;
11fdf7f2
TL
834 while (true) {
835 ceph_assert(start_it != extents.end());
7c673cae
FG
836 if (cur_offs + start_it->length > b_off) {
837 break;
838 }
839 cur_offs += start_it->length;
840 ++start_it;
841 ++pos;
842 }
843 uint32_t head = b_off - cur_offs;
844 uint32_t end_off = b_off + length;
845 auto end_it = start_it;
846
847 while (true) {
11fdf7f2
TL
848 ceph_assert(end_it != extents.end());
849 ceph_assert(!end_it->is_valid());
7c673cae
FG
850 if (cur_offs + end_it->length >= end_off) {
851 break;
852 }
853 cur_offs += end_it->length;
854 ++end_it;
855 }
11fdf7f2 856 ceph_assert(cur_offs + end_it->length >= end_off);
7c673cae
FG
857 uint32_t tail = cur_offs + end_it->length - end_off;
858
859 start_it = extents.erase(start_it, end_it + 1);
860 size_t count = allocs.size();
861 count += head ? 1 : 0;
862 count += tail ? 1 : 0;
863 extents.insert(start_it,
864 count,
865 bluestore_pextent_t(
866 bluestore_pextent_t::INVALID_OFFSET, 0));
867
868 // Workaround to resolve lack of proper iterator return in vector::insert
869 // Looks like some gcc/stl implementations still lack it despite c++11
870 // support claim
871 start_it = extents.begin() + pos;
872
873 if (head) {
874 start_it->length = head;
875 ++start_it;
876 }
877 for(auto& e : allocs) {
878 *start_it = e;
879 ++start_it;
880 }
881 if (tail) {
882 start_it->length = tail;
883 }
884 }
885}
886
887// cut it out of extents
888struct vecbuilder {
889 PExtentVector v;
890 uint64_t invalid = 0;
891
892 void add_invalid(uint64_t length) {
893 invalid += length;
894 }
895 void flush() {
896 if (invalid) {
897 v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET,
a8e16298
TL
898 invalid));
899
7c673cae
FG
900 invalid = 0;
901 }
902 }
903 void add(uint64_t offset, uint64_t length) {
904 if (offset == bluestore_pextent_t::INVALID_OFFSET) {
905 add_invalid(length);
906 }
907 else {
908 flush();
a8e16298 909 v.emplace_back(offset, length);
7c673cae
FG
910 }
911 }
912};
913
914void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc)
915{
916 extents.emplace_back(alloc);
917 if (!is_compressed()) {
918 logical_length += alloc.length;
919 }
920}
921
922bool bluestore_blob_t::release_extents(bool all,
923 const PExtentVector& logical,
924 PExtentVector* r)
925{
926 // common case: all of it?
927 if (all) {
928 uint64_t pos = 0;
929 for (auto& e : extents) {
930 if (e.is_valid()) {
931 r->push_back(e);
932 }
933 pos += e.length;
934 }
11fdf7f2 935 ceph_assert(is_compressed() || get_logical_length() == pos);
7c673cae
FG
936 extents.resize(1);
937 extents[0].offset = bluestore_pextent_t::INVALID_OFFSET;
938 extents[0].length = pos;
939 return true;
940 }
941 // remove from pextents according to logical release list
942 vecbuilder vb;
943 auto loffs_it = logical.begin();
944 auto lend = logical.end();
945 uint32_t pext_loffs_start = 0; //starting loffset of the current pextent
946 uint32_t pext_loffs = 0; //current loffset
947 auto pext_it = extents.begin();
948 auto pext_end = extents.end();
949 while (pext_it != pext_end) {
950 if (loffs_it == lend ||
951 pext_loffs_start + pext_it->length <= loffs_it->offset) {
952 int delta0 = pext_loffs - pext_loffs_start;
11fdf7f2 953 ceph_assert(delta0 >= 0);
7c673cae
FG
954 if ((uint32_t)delta0 < pext_it->length) {
955 vb.add(pext_it->offset + delta0, pext_it->length - delta0);
956 }
957 pext_loffs_start += pext_it->length;
958 pext_loffs = pext_loffs_start;
959 ++pext_it;
960 }
961 else {
962 //assert(pext_loffs == pext_loffs_start);
963 int delta0 = pext_loffs - pext_loffs_start;
11fdf7f2 964 ceph_assert(delta0 >= 0);
7c673cae
FG
965
966 int delta = loffs_it->offset - pext_loffs;
11fdf7f2 967 ceph_assert(delta >= 0);
7c673cae
FG
968 if (delta > 0) {
969 vb.add(pext_it->offset + delta0, delta);
970 pext_loffs += delta;
971 }
972
973 PExtentVector::iterator last_r = r->end();
974 if (r->begin() != last_r) {
975 --last_r;
976 }
977 uint32_t to_release = loffs_it->length;
978 do {
979 uint32_t to_release_part =
11fdf7f2 980 std::min(pext_it->length - delta0 - delta, to_release);
7c673cae
FG
981 auto o = pext_it->offset + delta0 + delta;
982 if (last_r != r->end() && last_r->offset + last_r->length == o) {
983 last_r->length += to_release_part;
984 }
985 else {
986 last_r = r->emplace(r->end(), o, to_release_part);
987 }
988 to_release -= to_release_part;
989 pext_loffs += to_release_part;
990 if (pext_loffs == pext_loffs_start + pext_it->length) {
991 pext_loffs_start += pext_it->length;
992 pext_loffs = pext_loffs_start;
993 pext_it++;
994 delta0 = delta = 0;
995 }
996 } while (to_release > 0 && pext_it != pext_end);
997 vb.add_invalid(loffs_it->length - to_release);
998 ++loffs_it;
999 }
1000 }
1001 vb.flush();
1002 extents.swap(vb.v);
1003 return false;
1004}
1005
1006void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb)
1007{
1008 size_t left = blob_offset;
1009 uint32_t llen_lb = 0;
1010 uint32_t llen_rb = 0;
1011 unsigned i = 0;
1012 for (auto p = extents.begin(); p != extents.end(); ++p, ++i) {
1013 if (p->length <= left) {
1014 left -= p->length;
1015 llen_lb += p->length;
1016 continue;
1017 }
1018 if (left) {
1019 if (p->is_valid()) {
1020 rb.extents.emplace_back(bluestore_pextent_t(p->offset + left,
1021 p->length - left));
1022 }
1023 else {
1024 rb.extents.emplace_back(bluestore_pextent_t(
1025 bluestore_pextent_t::INVALID_OFFSET,
1026 p->length - left));
1027 }
1028 llen_rb += p->length - left;
1029 llen_lb += left;
1030 p->length = left;
1031 ++i;
1032 ++p;
1033 }
1034 while (p != extents.end()) {
1035 llen_rb += p->length;
1036 rb.extents.push_back(*p++);
1037 }
1038 extents.resize(i);
1039 logical_length = llen_lb;
1040 rb.logical_length = llen_rb;
1041 break;
1042 }
1043 rb.flags = flags;
1044
1045 if (has_csum()) {
1046 rb.csum_type = csum_type;
1047 rb.csum_chunk_order = csum_chunk_order;
1048 size_t csum_order = get_csum_chunk_size();
11fdf7f2 1049 ceph_assert(blob_offset % csum_order == 0);
7c673cae
FG
1050 size_t pos = (blob_offset / csum_order) * get_csum_value_size();
1051 // deep copy csum data
1052 bufferptr old;
1053 old.swap(csum_data);
1054 rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos);
1055 csum_data = bufferptr(old.c_str(), pos);
1056 }
1057}
1058
1059// bluestore_shared_blob_t
f91f0fd5
TL
1060MEMPOOL_DEFINE_OBJECT_FACTORY(bluestore_shared_blob_t, bluestore_shared_blob_t,
1061 bluestore_cache_other);
7c673cae
FG
1062
1063void bluestore_shared_blob_t::dump(Formatter *f) const
1064{
1065 f->dump_int("sbid", sbid);
1066 f->dump_object("ref_map", ref_map);
1067}
1068
1069void bluestore_shared_blob_t::generate_test_instances(
1070 list<bluestore_shared_blob_t*>& ls)
1071{
1072 ls.push_back(new bluestore_shared_blob_t(1));
1073}
1074
1075ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb)
1076{
31f18b77
FG
1077 out << "(sbid 0x" << std::hex << sb.sbid << std::dec;
1078 out << " " << sb.ref_map << ")";
7c673cae
FG
1079 return out;
1080}
1081
1082// bluestore_onode_t
1083
1084void bluestore_onode_t::shard_info::dump(Formatter *f) const
1085{
1086 f->dump_unsigned("offset", offset);
1087 f->dump_unsigned("bytes", bytes);
1088}
1089
1090ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si)
1091{
1092 return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes"
1093 << std::dec << ")";
1094}
1095
1096void bluestore_onode_t::dump(Formatter *f) const
1097{
1098 f->dump_unsigned("nid", nid);
1099 f->dump_unsigned("size", size);
1100 f->open_object_section("attrs");
1101 for (auto p = attrs.begin(); p != attrs.end(); ++p) {
1102 f->open_object_section("attr");
1103 f->dump_string("name", p->first.c_str()); // it's not quite std::string
1104 f->dump_unsigned("len", p->second.length());
1105 f->close_section();
1106 }
1107 f->close_section();
1108 f->dump_string("flags", get_flags_string());
1109 f->open_array_section("extent_map_shards");
1110 for (auto si : extent_map_shards) {
1111 f->dump_object("shard", si);
1112 }
1113 f->close_section();
1114 f->dump_unsigned("expected_object_size", expected_object_size);
1115 f->dump_unsigned("expected_write_size", expected_write_size);
1116 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
1117}
1118
1119void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o)
1120{
1121 o.push_back(new bluestore_onode_t());
1122 // FIXME
1123}
1124
1125// bluestore_deferred_op_t
1126
1127void bluestore_deferred_op_t::dump(Formatter *f) const
1128{
1129 f->dump_unsigned("op", (int)op);
1130 f->dump_unsigned("data_len", data.length());
1131 f->open_array_section("extents");
1132 for (auto& e : extents) {
1133 f->dump_object("extent", e);
1134 }
1135 f->close_section();
1136}
1137
1138void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o)
1139{
1140 o.push_back(new bluestore_deferred_op_t);
1141 o.push_back(new bluestore_deferred_op_t);
1142 o.back()->op = OP_WRITE;
1143 o.back()->extents.push_back(bluestore_pextent_t(1, 2));
1144 o.back()->extents.push_back(bluestore_pextent_t(100, 5));
1145 o.back()->data.append("my data");
1146}
1147
1148void bluestore_deferred_transaction_t::dump(Formatter *f) const
1149{
1150 f->dump_unsigned("seq", seq);
1151 f->open_array_section("ops");
1152 for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) {
1153 f->dump_object("op", *p);
1154 }
1155 f->close_section();
1156
1157 f->open_array_section("released extents");
1158 for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) {
1159 f->open_object_section("extent");
1160 f->dump_unsigned("offset", p.get_start());
1161 f->dump_unsigned("length", p.get_len());
1162 f->close_section();
1163 }
1164 f->close_section();
1165}
1166
1167void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o)
1168{
1169 o.push_back(new bluestore_deferred_transaction_t());
1170 o.push_back(new bluestore_deferred_transaction_t());
1171 o.back()->seq = 123;
1172 o.back()->ops.push_back(bluestore_deferred_op_t());
1173 o.back()->ops.push_back(bluestore_deferred_op_t());
1174 o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE;
1175 o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7));
1176 o.back()->ops.back().data.append("foodata");
1177}
1178
1179void bluestore_compression_header_t::dump(Formatter *f) const
1180{
1181 f->dump_unsigned("type", type);
1182 f->dump_unsigned("length", length);
f67539c2
TL
1183 if (compressor_message) {
1184 f->dump_int("compressor_message", *compressor_message);
1185 }
7c673cae
FG
1186}
1187
1188void bluestore_compression_header_t::generate_test_instances(
1189 list<bluestore_compression_header_t*>& o)
1190{
1191 o.push_back(new bluestore_compression_header_t);
1192 o.push_back(new bluestore_compression_header_t(1));
1193 o.back()->length = 1234;
1194}
20effc67
TL
1195
1196// adds more salt to build a hash func input
1197shared_blob_2hash_tracker_t::hash_input_t
1198 shared_blob_2hash_tracker_t::build_hash_input(
1199 uint64_t sbid,
1200 uint64_t offset) const
1201{
1202 hash_input_t res = {
1203 sbid,
1204 offset >> au_void_bits,
1205 ((sbid & 0xffffffff) << 32) + ~(uint32_t((offset >> au_void_bits) & 0xffffffff))
1206 };
1207 return res;
1208}
1209
1210void shared_blob_2hash_tracker_t::inc(
1211 uint64_t sbid,
1212 uint64_t offset,
1213 int n)
1214{
1215 auto hash_input = build_hash_input(sbid, offset);
1216 ref_counter_2hash_tracker_t::inc(
1217 (char*)hash_input.data(),
1218 get_hash_input_size(),
1219 n);
1220}
1221
1222void shared_blob_2hash_tracker_t::inc_range(
1223 uint64_t sbid,
1224 uint64_t offset,
1225 uint32_t len,
1226 int n)
1227{
1228 uint32_t alloc_unit = 1 << au_void_bits;
1229 int64_t l = len;
1230 while (l > 0) {
1231 // don't care about ofset alignment as inc() trims it anyway
1232 inc(sbid, offset, n);
1233 offset += alloc_unit;
1234 l -= alloc_unit;
1235 }
1236}
1237
1238bool shared_blob_2hash_tracker_t::test_hash_conflict(
1239 uint64_t sbid1,
1240 uint64_t offset1,
1241 uint64_t sbid2,
1242 uint64_t offset2) const
1243{
1244 auto hash_input1 = build_hash_input(sbid1, offset1);
1245 auto hash_input2 = build_hash_input(sbid2, offset2);
1246 return ref_counter_2hash_tracker_t::test_hash_conflict(
1247 (char*)hash_input1.data(),
1248 (char*)hash_input2.data(),
1249 get_hash_input_size());
1250}
1251
1252bool shared_blob_2hash_tracker_t::test_all_zero(
1253 uint64_t sbid,
1254 uint64_t offset) const
1255{
1256 auto hash_input = build_hash_input(sbid, offset);
1257 return
1258 ref_counter_2hash_tracker_t::test_all_zero(
1259 (char*)hash_input.data(),
1260 get_hash_input_size());
1261}
1262
1263bool shared_blob_2hash_tracker_t::test_all_zero_range(
1264 uint64_t sbid,
1265 uint64_t offset,
1266 uint32_t len) const
1267{
1268 uint32_t alloc_unit = 1 << au_void_bits;
1269 int64_t l = len;
1270 while (l > 0) {
1271 // don't care about ofset alignment as inc() trims it anyway
1272 if (!test_all_zero(sbid, offset)) {
1273 return false;
1274 }
1275 offset += alloc_unit;
1276 l -= alloc_unit;
1277 }
1278 return true;
1279}