]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "bluestore_types.h" | |
16 | #include "common/Formatter.h" | |
17 | #include "common/Checksummer.h" | |
18 | #include "include/stringify.h" | |
19 | ||
20 | void ExtentList::add_extents(int64_t start, int64_t count) { | |
21 | AllocExtent *last_extent = NULL; | |
22 | bool can_merge = false; | |
23 | ||
24 | if (!m_extents->empty()) { | |
25 | last_extent = &(m_extents->back()); | |
26 | uint64_t last_offset = last_extent->end() / m_block_size; | |
27 | uint32_t last_length = last_extent->length / m_block_size; | |
28 | if ((last_offset == (uint64_t) start) && | |
29 | (!m_max_blocks || (last_length + count) <= m_max_blocks)) { | |
30 | can_merge = true; | |
31 | } | |
32 | } | |
33 | ||
34 | if (can_merge) { | |
35 | last_extent->length += (count * m_block_size); | |
36 | } else { | |
37 | m_extents->emplace_back(AllocExtent(start * m_block_size, | |
38 | count * m_block_size)); | |
39 | } | |
40 | } | |
41 | ||
42 | // bluestore_bdev_label_t | |
43 | ||
44 | void bluestore_bdev_label_t::encode(bufferlist& bl) const | |
45 | { | |
46 | // be slightly friendly to someone who looks at the device | |
47 | bl.append("bluestore block device\n"); | |
48 | bl.append(stringify(osd_uuid)); | |
49 | bl.append("\n"); | |
50 | ENCODE_START(1, 1, bl); | |
51 | ::encode(osd_uuid, bl); | |
52 | ::encode(size, bl); | |
53 | ::encode(btime, bl); | |
54 | ::encode(description, bl); | |
55 | ENCODE_FINISH(bl); | |
56 | } | |
57 | ||
58 | void bluestore_bdev_label_t::decode(bufferlist::iterator& p) | |
59 | { | |
60 | p.advance(60); // see above | |
61 | DECODE_START(1, p); | |
62 | ::decode(osd_uuid, p); | |
63 | ::decode(size, p); | |
64 | ::decode(btime, p); | |
65 | ::decode(description, p); | |
66 | DECODE_FINISH(p); | |
67 | } | |
68 | ||
69 | void bluestore_bdev_label_t::dump(Formatter *f) const | |
70 | { | |
71 | f->dump_stream("osd_uuid") << osd_uuid; | |
72 | f->dump_unsigned("size", size); | |
73 | f->dump_stream("btime") << btime; | |
74 | f->dump_string("description", description); | |
75 | } | |
76 | ||
77 | void bluestore_bdev_label_t::generate_test_instances( | |
78 | list<bluestore_bdev_label_t*>& o) | |
79 | { | |
80 | o.push_back(new bluestore_bdev_label_t); | |
81 | o.push_back(new bluestore_bdev_label_t); | |
82 | o.back()->size = 123; | |
83 | o.back()->btime = utime_t(4, 5); | |
84 | o.back()->description = "fakey"; | |
85 | } | |
86 | ||
87 | ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l) | |
88 | { | |
89 | return out << "bdev(osd_uuid " << l.osd_uuid | |
90 | << " size 0x" << std::hex << l.size << std::dec | |
91 | << " btime " << l.btime | |
92 | << " desc " << l.description << ")"; | |
93 | } | |
94 | ||
95 | // cnode_t | |
96 | ||
97 | void bluestore_cnode_t::dump(Formatter *f) const | |
98 | { | |
99 | f->dump_unsigned("bits", bits); | |
100 | } | |
101 | ||
102 | void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o) | |
103 | { | |
104 | o.push_back(new bluestore_cnode_t()); | |
105 | o.push_back(new bluestore_cnode_t(0)); | |
106 | o.push_back(new bluestore_cnode_t(123)); | |
107 | } | |
108 | ||
109 | // bluestore_extent_ref_map_t | |
110 | ||
111 | void bluestore_extent_ref_map_t::_check() const | |
112 | { | |
113 | uint64_t pos = 0; | |
114 | unsigned refs = 0; | |
115 | for (const auto &p : ref_map) { | |
116 | if (p.first < pos) | |
117 | assert(0 == "overlap"); | |
118 | if (p.first == pos && p.second.refs == refs) | |
119 | assert(0 == "unmerged"); | |
120 | pos = p.first + p.second.length; | |
121 | refs = p.second.refs; | |
122 | } | |
123 | } | |
124 | ||
125 | void bluestore_extent_ref_map_t::_maybe_merge_left( | |
126 | map<uint64_t,record_t>::iterator& p) | |
127 | { | |
128 | if (p == ref_map.begin()) | |
129 | return; | |
130 | auto q = p; | |
131 | --q; | |
132 | if (q->second.refs == p->second.refs && | |
133 | q->first + q->second.length == p->first) { | |
134 | q->second.length += p->second.length; | |
135 | ref_map.erase(p); | |
136 | p = q; | |
137 | } | |
138 | } | |
139 | ||
140 | void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length) | |
141 | { | |
142 | auto p = ref_map.lower_bound(offset); | |
143 | if (p != ref_map.begin()) { | |
144 | --p; | |
145 | if (p->first + p->second.length <= offset) { | |
146 | ++p; | |
147 | } | |
148 | } | |
149 | while (length > 0) { | |
150 | if (p == ref_map.end()) { | |
151 | // nothing after offset; add the whole thing. | |
152 | p = ref_map.insert( | |
153 | map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first; | |
154 | break; | |
155 | } | |
156 | if (p->first > offset) { | |
157 | // gap | |
158 | uint64_t newlen = MIN(p->first - offset, length); | |
159 | p = ref_map.insert( | |
160 | map<uint64_t,record_t>::value_type(offset, | |
161 | record_t(newlen, 1))).first; | |
162 | offset += newlen; | |
163 | length -= newlen; | |
164 | _maybe_merge_left(p); | |
165 | ++p; | |
166 | continue; | |
167 | } | |
168 | if (p->first < offset) { | |
169 | // split off the portion before offset | |
170 | assert(p->first + p->second.length > offset); | |
171 | uint64_t left = p->first + p->second.length - offset; | |
172 | p->second.length = offset - p->first; | |
173 | p = ref_map.insert(map<uint64_t,record_t>::value_type( | |
174 | offset, record_t(left, p->second.refs))).first; | |
175 | // continue below | |
176 | } | |
177 | assert(p->first == offset); | |
178 | if (length < p->second.length) { | |
179 | ref_map.insert(make_pair(offset + length, | |
180 | record_t(p->second.length - length, | |
181 | p->second.refs))); | |
182 | p->second.length = length; | |
183 | ++p->second.refs; | |
184 | break; | |
185 | } | |
186 | ++p->second.refs; | |
187 | offset += p->second.length; | |
188 | length -= p->second.length; | |
189 | _maybe_merge_left(p); | |
190 | ++p; | |
191 | } | |
192 | if (p != ref_map.end()) | |
193 | _maybe_merge_left(p); | |
194 | //_check(); | |
195 | } | |
196 | ||
197 | void bluestore_extent_ref_map_t::put( | |
198 | uint64_t offset, uint32_t length, | |
199 | PExtentVector *release) | |
200 | { | |
201 | //NB: existing entries in 'release' container must be preserved! | |
202 | ||
203 | auto p = ref_map.lower_bound(offset); | |
204 | if (p == ref_map.end() || p->first > offset) { | |
205 | if (p == ref_map.begin()) { | |
206 | assert(0 == "put on missing extent (nothing before)"); | |
207 | } | |
208 | --p; | |
209 | if (p->first + p->second.length <= offset) { | |
210 | assert(0 == "put on missing extent (gap)"); | |
211 | } | |
212 | } | |
213 | if (p->first < offset) { | |
214 | uint64_t left = p->first + p->second.length - offset; | |
215 | p->second.length = offset - p->first; | |
216 | p = ref_map.insert(map<uint64_t,record_t>::value_type( | |
217 | offset, record_t(left, p->second.refs))).first; | |
218 | } | |
219 | while (length > 0) { | |
220 | assert(p->first == offset); | |
221 | if (length < p->second.length) { | |
222 | ref_map.insert(make_pair(offset + length, | |
223 | record_t(p->second.length - length, | |
224 | p->second.refs))); | |
225 | if (p->second.refs > 1) { | |
226 | p->second.length = length; | |
227 | --p->second.refs; | |
228 | _maybe_merge_left(p); | |
229 | } else { | |
230 | if (release) | |
231 | release->push_back(bluestore_pextent_t(p->first, length)); | |
232 | ref_map.erase(p); | |
233 | } | |
234 | return; | |
235 | } | |
236 | offset += p->second.length; | |
237 | length -= p->second.length; | |
238 | if (p->second.refs > 1) { | |
239 | --p->second.refs; | |
240 | _maybe_merge_left(p); | |
241 | ++p; | |
242 | } else { | |
243 | if (release) | |
244 | release->push_back(bluestore_pextent_t(p->first, p->second.length)); | |
245 | ref_map.erase(p++); | |
246 | } | |
247 | } | |
248 | if (p != ref_map.end()) | |
249 | _maybe_merge_left(p); | |
250 | //_check(); | |
251 | } | |
252 | ||
253 | bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const | |
254 | { | |
255 | auto p = ref_map.lower_bound(offset); | |
256 | if (p == ref_map.end() || p->first > offset) { | |
257 | if (p == ref_map.begin()) { | |
258 | return false; // nothing before | |
259 | } | |
260 | --p; | |
261 | if (p->first + p->second.length <= offset) { | |
262 | return false; // gap | |
263 | } | |
264 | } | |
265 | while (length > 0) { | |
266 | if (p == ref_map.end()) | |
267 | return false; | |
268 | if (p->first > offset) | |
269 | return false; | |
270 | if (p->first + p->second.length >= offset + length) | |
271 | return true; | |
272 | uint64_t overlap = p->first + p->second.length - offset; | |
273 | offset += overlap; | |
274 | length -= overlap; | |
275 | ++p; | |
276 | } | |
277 | return true; | |
278 | } | |
279 | ||
280 | bool bluestore_extent_ref_map_t::intersects( | |
281 | uint64_t offset, | |
282 | uint32_t length) const | |
283 | { | |
284 | auto p = ref_map.lower_bound(offset); | |
285 | if (p != ref_map.begin()) { | |
286 | --p; | |
287 | if (p->first + p->second.length <= offset) { | |
288 | ++p; | |
289 | } | |
290 | } | |
291 | if (p == ref_map.end()) | |
292 | return false; | |
293 | if (p->first >= offset + length) | |
294 | return false; | |
295 | return true; // intersects p! | |
296 | } | |
297 | ||
298 | void bluestore_extent_ref_map_t::dump(Formatter *f) const | |
299 | { | |
300 | f->open_array_section("ref_map"); | |
301 | for (auto& p : ref_map) { | |
302 | f->open_object_section("ref"); | |
303 | f->dump_unsigned("offset", p.first); | |
304 | f->dump_unsigned("length", p.second.length); | |
305 | f->dump_unsigned("refs", p.second.refs); | |
306 | f->close_section(); | |
307 | } | |
308 | f->close_section(); | |
309 | } | |
310 | ||
311 | void bluestore_extent_ref_map_t::generate_test_instances( | |
312 | list<bluestore_extent_ref_map_t*>& o) | |
313 | { | |
314 | o.push_back(new bluestore_extent_ref_map_t); | |
315 | o.push_back(new bluestore_extent_ref_map_t); | |
316 | o.back()->get(10, 10); | |
317 | o.back()->get(18, 22); | |
318 | o.back()->get(20, 20); | |
319 | o.back()->get(10, 25); | |
320 | o.back()->get(15, 20); | |
321 | } | |
322 | ||
323 | ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m) | |
324 | { | |
325 | out << "ref_map("; | |
326 | for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) { | |
327 | if (p != m.ref_map.begin()) | |
328 | out << ","; | |
329 | out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec | |
330 | << "=" << p->second.refs; | |
331 | } | |
332 | out << ")"; | |
333 | return out; | |
334 | } | |
335 | ||
336 | // bluestore_blob_use_tracker_t | |
337 | ||
338 | void bluestore_blob_use_tracker_t::allocate() | |
339 | { | |
340 | assert(num_au != 0); | |
341 | bytes_per_au = new uint32_t[num_au]; | |
342 | for (uint32_t i = 0; i < num_au; ++i) { | |
343 | bytes_per_au[i] = 0; | |
344 | } | |
345 | } | |
346 | ||
347 | void bluestore_blob_use_tracker_t::init( | |
348 | uint32_t full_length, uint32_t _au_size) { | |
349 | assert(!au_size || is_empty()); | |
350 | assert(_au_size > 0); | |
351 | assert(full_length > 0); | |
352 | clear(); | |
353 | uint32_t _num_au = ROUND_UP_TO(full_length, _au_size) / _au_size; | |
354 | au_size = _au_size; | |
355 | if( _num_au > 1 ) { | |
356 | num_au = _num_au; | |
357 | allocate(); | |
358 | } | |
359 | } | |
360 | ||
361 | void bluestore_blob_use_tracker_t::get( | |
362 | uint32_t offset, uint32_t length) | |
363 | { | |
364 | assert(au_size); | |
365 | if (!num_au) { | |
366 | total_bytes += length; | |
367 | }else { | |
368 | auto end = offset + length; | |
369 | ||
370 | while (offset < end) { | |
371 | auto phase = offset % au_size; | |
372 | bytes_per_au[offset / au_size] += | |
373 | MIN(au_size - phase, end - offset); | |
374 | offset += (phase ? au_size - phase : au_size); | |
375 | } | |
376 | } | |
377 | } | |
378 | ||
379 | bool bluestore_blob_use_tracker_t::put( | |
380 | uint32_t offset, uint32_t length, | |
381 | PExtentVector *release_units) | |
382 | { | |
383 | assert(au_size); | |
384 | if (release_units) { | |
385 | release_units->clear(); | |
386 | } | |
387 | bool maybe_empty = true; | |
388 | if (!num_au) { | |
389 | assert(total_bytes >= length); | |
390 | total_bytes -= length; | |
391 | } else { | |
392 | auto end = offset + length; | |
393 | uint64_t next_offs = 0; | |
394 | while (offset < end) { | |
395 | auto phase = offset % au_size; | |
396 | size_t pos = offset / au_size; | |
397 | auto diff = MIN(au_size - phase, end - offset); | |
398 | assert(diff <= bytes_per_au[pos]); | |
399 | bytes_per_au[pos] -= diff; | |
400 | offset += (phase ? au_size - phase : au_size); | |
401 | if (bytes_per_au[pos] == 0) { | |
402 | if (release_units) { | |
403 | if (release_units->empty() || next_offs != pos * au_size) { | |
404 | release_units->emplace_back(pos * au_size, au_size); | |
405 | } else { | |
406 | release_units->back().length += au_size; | |
407 | } | |
408 | next_offs += au_size; | |
409 | } | |
410 | } else { | |
411 | maybe_empty = false; // micro optimization detecting we aren't empty | |
412 | // even in the affected extent | |
413 | } | |
414 | } | |
415 | } | |
416 | bool empty = maybe_empty ? !is_not_empty() : false; | |
417 | if (empty && release_units) { | |
418 | release_units->clear(); | |
419 | } | |
420 | return empty; | |
421 | } | |
422 | ||
423 | bool bluestore_blob_use_tracker_t::can_split() const | |
424 | { | |
425 | return num_au > 0; | |
426 | } | |
427 | ||
428 | bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const | |
429 | { | |
430 | assert(au_size); | |
431 | return (blob_offset % au_size) == 0 && | |
432 | blob_offset < num_au * au_size; | |
433 | } | |
434 | ||
435 | void bluestore_blob_use_tracker_t::split( | |
436 | uint32_t blob_offset, | |
437 | bluestore_blob_use_tracker_t* r) | |
438 | { | |
439 | assert(au_size); | |
440 | assert(can_split()); | |
441 | assert(can_split_at(blob_offset)); | |
442 | assert(r->is_empty()); | |
443 | ||
444 | uint32_t new_num_au = blob_offset / au_size; | |
445 | r->init( (num_au - new_num_au) * au_size, au_size); | |
446 | ||
447 | for (auto i = new_num_au; i < num_au; i++) { | |
448 | r->get((i - new_num_au) * au_size, bytes_per_au[i]); | |
449 | bytes_per_au[i] = 0; | |
450 | } | |
451 | if (new_num_au == 0) { | |
452 | clear(); | |
453 | } else if (new_num_au == 1) { | |
454 | uint32_t tmp = bytes_per_au[0]; | |
455 | uint32_t _au_size = au_size; | |
456 | clear(); | |
457 | au_size = _au_size; | |
458 | total_bytes = tmp; | |
459 | } else { | |
460 | num_au = new_num_au; | |
461 | } | |
462 | } | |
463 | ||
464 | bool bluestore_blob_use_tracker_t::equal( | |
465 | const bluestore_blob_use_tracker_t& other) const | |
466 | { | |
467 | if (!num_au && !other.num_au) { | |
468 | return total_bytes == other.total_bytes && au_size == other.au_size; | |
469 | } else if (num_au && other.num_au) { | |
470 | if (num_au != other.num_au || au_size != other.au_size) { | |
471 | return false; | |
472 | } | |
473 | for (size_t i = 0; i < num_au; i++) { | |
474 | if (bytes_per_au[i] != other.bytes_per_au[i]) { | |
475 | return false; | |
476 | } | |
477 | } | |
478 | return true; | |
479 | } | |
480 | ||
481 | uint32_t n = num_au ? num_au : other.num_au; | |
482 | uint32_t referenced = | |
483 | num_au ? other.get_referenced_bytes() : get_referenced_bytes(); | |
484 | auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au; | |
485 | uint32_t my_referenced = 0; | |
486 | for (size_t i = 0; i < n; i++) { | |
487 | my_referenced += bytes_per_au_tmp[i]; | |
488 | if (my_referenced > referenced) { | |
489 | return false; | |
490 | } | |
491 | } | |
492 | return my_referenced == referenced; | |
493 | } | |
494 | ||
495 | void bluestore_blob_use_tracker_t::dump(Formatter *f) const | |
496 | { | |
497 | f->dump_unsigned("num_au", num_au); | |
498 | f->dump_unsigned("au_size", au_size); | |
499 | if (!num_au) { | |
500 | f->dump_unsigned("total_bytes", total_bytes); | |
501 | } else { | |
502 | f->open_array_section("bytes_per_au"); | |
503 | for (size_t i = 0; i < num_au; ++i) { | |
504 | f->dump_unsigned("", bytes_per_au[i]); | |
505 | } | |
506 | f->close_section(); | |
507 | } | |
508 | } | |
509 | ||
510 | void bluestore_blob_use_tracker_t::generate_test_instances( | |
511 | list<bluestore_blob_use_tracker_t*>& o) | |
512 | { | |
513 | o.push_back(new bluestore_blob_use_tracker_t()); | |
514 | o.back()->init(16, 16); | |
515 | o.back()->get(10, 10); | |
516 | o.back()->get(10, 5); | |
517 | o.push_back(new bluestore_blob_use_tracker_t()); | |
518 | o.back()->init(60, 16); | |
519 | o.back()->get(18, 22); | |
520 | o.back()->get(20, 20); | |
521 | o.back()->get(15, 20); | |
522 | } | |
523 | ||
524 | ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m) | |
525 | { | |
526 | out << "use_tracker(" << std::hex; | |
527 | if (!m.num_au) { | |
528 | out << "0x" << m.au_size | |
529 | << " " | |
530 | << "0x" << m.total_bytes; | |
531 | } else { | |
532 | out << "0x" << m.num_au | |
533 | << "*0x" << m.au_size | |
534 | << " 0x["; | |
535 | for (size_t i = 0; i < m.num_au; ++i) { | |
536 | if (i != 0) | |
537 | out << ","; | |
538 | out << m.bytes_per_au[i]; | |
539 | } | |
540 | out << "]"; | |
541 | } | |
542 | out << std::dec << ")"; | |
543 | return out; | |
544 | } | |
545 | ||
546 | // bluestore_pextent_t | |
547 | ||
548 | void bluestore_pextent_t::dump(Formatter *f) const | |
549 | { | |
550 | f->dump_unsigned("offset", offset); | |
551 | f->dump_unsigned("length", length); | |
552 | } | |
553 | ||
554 | ostream& operator<<(ostream& out, const bluestore_pextent_t& o) { | |
555 | if (o.is_valid()) | |
556 | return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec; | |
557 | else | |
558 | return out << "!~" << std::hex << o.length << std::dec; | |
559 | } | |
560 | ||
561 | void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls) | |
562 | { | |
563 | ls.push_back(new bluestore_pextent_t); | |
564 | ls.push_back(new bluestore_pextent_t(1, 2)); | |
565 | } | |
566 | ||
567 | // bluestore_blob_t | |
568 | ||
569 | string bluestore_blob_t::get_flags_string(unsigned flags) | |
570 | { | |
571 | string s; | |
572 | if (flags & FLAG_MUTABLE) { | |
573 | s = "mutable"; | |
574 | } | |
575 | if (flags & FLAG_COMPRESSED) { | |
576 | if (s.length()) | |
577 | s += '+'; | |
578 | s += "compressed"; | |
579 | } | |
580 | if (flags & FLAG_CSUM) { | |
581 | if (s.length()) | |
582 | s += '+'; | |
583 | s += "csum"; | |
584 | } | |
585 | if (flags & FLAG_HAS_UNUSED) { | |
586 | if (s.length()) | |
587 | s += '+'; | |
588 | s += "has_unused"; | |
589 | } | |
590 | if (flags & FLAG_SHARED) { | |
591 | if (s.length()) | |
592 | s += '+'; | |
593 | s += "shared"; | |
594 | } | |
595 | ||
596 | return s; | |
597 | } | |
598 | ||
599 | size_t bluestore_blob_t::get_csum_value_size() const | |
600 | { | |
601 | return Checksummer::get_csum_value_size(csum_type); | |
602 | } | |
603 | ||
604 | void bluestore_blob_t::dump(Formatter *f) const | |
605 | { | |
606 | f->open_array_section("extents"); | |
607 | for (auto& p : extents) { | |
608 | f->dump_object("extent", p); | |
609 | } | |
610 | f->close_section(); | |
611 | f->dump_unsigned("logical_length", logical_length); | |
612 | f->dump_unsigned("compressed_length", compressed_length); | |
613 | f->dump_unsigned("flags", flags); | |
614 | f->dump_unsigned("csum_type", csum_type); | |
615 | f->dump_unsigned("csum_chunk_order", csum_chunk_order); | |
616 | f->open_array_section("csum_data"); | |
617 | size_t n = get_csum_count(); | |
618 | for (unsigned i = 0; i < n; ++i) | |
619 | f->dump_unsigned("csum", get_csum_item(i)); | |
620 | f->close_section(); | |
621 | f->dump_unsigned("unused", unused); | |
622 | } | |
623 | ||
624 | void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls) | |
625 | { | |
626 | ls.push_back(new bluestore_blob_t); | |
627 | ls.push_back(new bluestore_blob_t(0)); | |
628 | ls.push_back(new bluestore_blob_t); | |
629 | ls.back()->allocated_test(bluestore_pextent_t(111, 222)); | |
630 | ls.push_back(new bluestore_blob_t); | |
631 | ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536); | |
632 | ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd")); | |
633 | ls.back()->add_unused(0, 3); | |
634 | ls.back()->add_unused(8, 8); | |
635 | ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000)); | |
636 | ls.back()->allocated_test( | |
637 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000)); | |
638 | ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000)); | |
639 | } | |
640 | ||
641 | ostream& operator<<(ostream& out, const bluestore_blob_t& o) | |
642 | { | |
643 | out << "blob(" << o.get_extents(); | |
644 | if (o.is_compressed()) { | |
645 | out << " clen 0x" << std::hex | |
646 | << o.get_logical_length() | |
647 | << " -> 0x" | |
648 | << o.get_compressed_payload_length() | |
649 | << std::dec; | |
650 | } | |
651 | if (o.flags) { | |
652 | out << " " << o.get_flags_string(); | |
653 | } | |
654 | if (o.csum_type) { | |
655 | out << " " << Checksummer::get_csum_type_string(o.csum_type) | |
656 | << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec; | |
657 | } | |
658 | if (o.has_unused()) | |
659 | out << " unused=0x" << std::hex << o.unused << std::dec; | |
660 | out << ")"; | |
661 | return out; | |
662 | } | |
663 | ||
664 | void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl) | |
665 | { | |
666 | switch (csum_type) { | |
667 | case Checksummer::CSUM_XXHASH32: | |
668 | Checksummer::calculate<Checksummer::xxhash32>( | |
669 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
670 | break; | |
671 | case Checksummer::CSUM_XXHASH64: | |
672 | Checksummer::calculate<Checksummer::xxhash64>( | |
673 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
674 | break;; | |
675 | case Checksummer::CSUM_CRC32C: | |
676 | Checksummer::calculate<Checksummer::crc32c>( | |
677 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
678 | break; | |
679 | case Checksummer::CSUM_CRC32C_16: | |
680 | Checksummer::calculate<Checksummer::crc32c_16>( | |
681 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
682 | break; | |
683 | case Checksummer::CSUM_CRC32C_8: | |
684 | Checksummer::calculate<Checksummer::crc32c_8>( | |
685 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
686 | break; | |
687 | } | |
688 | } | |
689 | ||
690 | int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl, | |
691 | int* b_bad_off, uint64_t *bad_csum) const | |
692 | { | |
693 | int r = 0; | |
694 | ||
695 | *b_bad_off = -1; | |
696 | switch (csum_type) { | |
697 | case Checksummer::CSUM_NONE: | |
698 | break; | |
699 | case Checksummer::CSUM_XXHASH32: | |
700 | *b_bad_off = Checksummer::verify<Checksummer::xxhash32>( | |
701 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
702 | break; | |
703 | case Checksummer::CSUM_XXHASH64: | |
704 | *b_bad_off = Checksummer::verify<Checksummer::xxhash64>( | |
705 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
706 | break; | |
707 | case Checksummer::CSUM_CRC32C: | |
708 | *b_bad_off = Checksummer::verify<Checksummer::crc32c>( | |
709 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
710 | break; | |
711 | case Checksummer::CSUM_CRC32C_16: | |
712 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>( | |
713 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
714 | break; | |
715 | case Checksummer::CSUM_CRC32C_8: | |
716 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>( | |
717 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
718 | break; | |
719 | default: | |
720 | r = -EOPNOTSUPP; | |
721 | break; | |
722 | } | |
723 | ||
724 | if (r < 0) | |
725 | return r; | |
726 | else if (*b_bad_off >= 0) | |
727 | return -1; // bad checksum | |
728 | else | |
729 | return 0; | |
730 | } | |
731 | ||
732 | void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs) | |
733 | { | |
734 | if (extents.size() == 0) { | |
735 | // if blob is compressed then logical length to be already configured | |
736 | // otherwise - to be unset. | |
737 | assert((is_compressed() && logical_length != 0) || | |
738 | (!is_compressed() && logical_length == 0)); | |
739 | ||
740 | extents.reserve(allocs.size() + (b_off ? 1 : 0)); | |
741 | if (b_off) { | |
742 | extents.emplace_back( | |
743 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off)); | |
744 | } | |
745 | uint32_t new_len = b_off; | |
746 | for (auto& a : allocs) { | |
747 | extents.emplace_back(a.offset, a.length); | |
748 | new_len += a.length; | |
749 | } | |
750 | if (!is_compressed()) { | |
751 | logical_length = new_len; | |
752 | } | |
753 | } else { | |
754 | assert(!is_compressed()); // partial allocations are forbidden when | |
755 | // compressed | |
756 | assert(b_off < logical_length); | |
757 | uint32_t cur_offs = 0; | |
758 | auto start_it = extents.begin(); | |
759 | size_t pos = 0; | |
760 | while(true) { | |
761 | if (cur_offs + start_it->length > b_off) { | |
762 | break; | |
763 | } | |
764 | cur_offs += start_it->length; | |
765 | ++start_it; | |
766 | ++pos; | |
767 | } | |
768 | uint32_t head = b_off - cur_offs; | |
769 | uint32_t end_off = b_off + length; | |
770 | auto end_it = start_it; | |
771 | ||
772 | while (true) { | |
773 | assert(!end_it->is_valid()); | |
774 | if (cur_offs + end_it->length >= end_off) { | |
775 | break; | |
776 | } | |
777 | cur_offs += end_it->length; | |
778 | ++end_it; | |
779 | } | |
780 | assert(cur_offs + end_it->length >= end_off); | |
781 | uint32_t tail = cur_offs + end_it->length - end_off; | |
782 | ||
783 | start_it = extents.erase(start_it, end_it + 1); | |
784 | size_t count = allocs.size(); | |
785 | count += head ? 1 : 0; | |
786 | count += tail ? 1 : 0; | |
787 | extents.insert(start_it, | |
788 | count, | |
789 | bluestore_pextent_t( | |
790 | bluestore_pextent_t::INVALID_OFFSET, 0)); | |
791 | ||
792 | // Workaround to resolve lack of proper iterator return in vector::insert | |
793 | // Looks like some gcc/stl implementations still lack it despite c++11 | |
794 | // support claim | |
795 | start_it = extents.begin() + pos; | |
796 | ||
797 | if (head) { | |
798 | start_it->length = head; | |
799 | ++start_it; | |
800 | } | |
801 | for(auto& e : allocs) { | |
802 | *start_it = e; | |
803 | ++start_it; | |
804 | } | |
805 | if (tail) { | |
806 | start_it->length = tail; | |
807 | } | |
808 | } | |
809 | } | |
810 | ||
811 | // cut it out of extents | |
812 | struct vecbuilder { | |
813 | PExtentVector v; | |
814 | uint64_t invalid = 0; | |
815 | ||
816 | void add_invalid(uint64_t length) { | |
817 | invalid += length; | |
818 | } | |
819 | void flush() { | |
820 | if (invalid) { | |
821 | v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, | |
822 | invalid)); | |
823 | invalid = 0; | |
824 | } | |
825 | } | |
826 | void add(uint64_t offset, uint64_t length) { | |
827 | if (offset == bluestore_pextent_t::INVALID_OFFSET) { | |
828 | add_invalid(length); | |
829 | } | |
830 | else { | |
831 | flush(); | |
832 | v.emplace_back(bluestore_pextent_t(offset, length)); | |
833 | } | |
834 | } | |
835 | }; | |
836 | ||
837 | void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc) | |
838 | { | |
839 | extents.emplace_back(alloc); | |
840 | if (!is_compressed()) { | |
841 | logical_length += alloc.length; | |
842 | } | |
843 | } | |
844 | ||
845 | bool bluestore_blob_t::release_extents(bool all, | |
846 | const PExtentVector& logical, | |
847 | PExtentVector* r) | |
848 | { | |
849 | // common case: all of it? | |
850 | if (all) { | |
851 | uint64_t pos = 0; | |
852 | for (auto& e : extents) { | |
853 | if (e.is_valid()) { | |
854 | r->push_back(e); | |
855 | } | |
856 | pos += e.length; | |
857 | } | |
858 | assert(is_compressed() || get_logical_length() == pos); | |
859 | extents.resize(1); | |
860 | extents[0].offset = bluestore_pextent_t::INVALID_OFFSET; | |
861 | extents[0].length = pos; | |
862 | return true; | |
863 | } | |
864 | // remove from pextents according to logical release list | |
865 | vecbuilder vb; | |
866 | auto loffs_it = logical.begin(); | |
867 | auto lend = logical.end(); | |
868 | uint32_t pext_loffs_start = 0; //starting loffset of the current pextent | |
869 | uint32_t pext_loffs = 0; //current loffset | |
870 | auto pext_it = extents.begin(); | |
871 | auto pext_end = extents.end(); | |
872 | while (pext_it != pext_end) { | |
873 | if (loffs_it == lend || | |
874 | pext_loffs_start + pext_it->length <= loffs_it->offset) { | |
875 | int delta0 = pext_loffs - pext_loffs_start; | |
876 | assert(delta0 >= 0); | |
877 | if ((uint32_t)delta0 < pext_it->length) { | |
878 | vb.add(pext_it->offset + delta0, pext_it->length - delta0); | |
879 | } | |
880 | pext_loffs_start += pext_it->length; | |
881 | pext_loffs = pext_loffs_start; | |
882 | ++pext_it; | |
883 | } | |
884 | else { | |
885 | //assert(pext_loffs == pext_loffs_start); | |
886 | int delta0 = pext_loffs - pext_loffs_start; | |
887 | assert(delta0 >= 0); | |
888 | ||
889 | int delta = loffs_it->offset - pext_loffs; | |
890 | assert(delta >= 0); | |
891 | if (delta > 0) { | |
892 | vb.add(pext_it->offset + delta0, delta); | |
893 | pext_loffs += delta; | |
894 | } | |
895 | ||
896 | PExtentVector::iterator last_r = r->end(); | |
897 | if (r->begin() != last_r) { | |
898 | --last_r; | |
899 | } | |
900 | uint32_t to_release = loffs_it->length; | |
901 | do { | |
902 | uint32_t to_release_part = | |
903 | MIN(pext_it->length - delta0 - delta, to_release); | |
904 | auto o = pext_it->offset + delta0 + delta; | |
905 | if (last_r != r->end() && last_r->offset + last_r->length == o) { | |
906 | last_r->length += to_release_part; | |
907 | } | |
908 | else { | |
909 | last_r = r->emplace(r->end(), o, to_release_part); | |
910 | } | |
911 | to_release -= to_release_part; | |
912 | pext_loffs += to_release_part; | |
913 | if (pext_loffs == pext_loffs_start + pext_it->length) { | |
914 | pext_loffs_start += pext_it->length; | |
915 | pext_loffs = pext_loffs_start; | |
916 | pext_it++; | |
917 | delta0 = delta = 0; | |
918 | } | |
919 | } while (to_release > 0 && pext_it != pext_end); | |
920 | vb.add_invalid(loffs_it->length - to_release); | |
921 | ++loffs_it; | |
922 | } | |
923 | } | |
924 | vb.flush(); | |
925 | extents.swap(vb.v); | |
926 | return false; | |
927 | } | |
928 | ||
929 | void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb) | |
930 | { | |
931 | size_t left = blob_offset; | |
932 | uint32_t llen_lb = 0; | |
933 | uint32_t llen_rb = 0; | |
934 | unsigned i = 0; | |
935 | for (auto p = extents.begin(); p != extents.end(); ++p, ++i) { | |
936 | if (p->length <= left) { | |
937 | left -= p->length; | |
938 | llen_lb += p->length; | |
939 | continue; | |
940 | } | |
941 | if (left) { | |
942 | if (p->is_valid()) { | |
943 | rb.extents.emplace_back(bluestore_pextent_t(p->offset + left, | |
944 | p->length - left)); | |
945 | } | |
946 | else { | |
947 | rb.extents.emplace_back(bluestore_pextent_t( | |
948 | bluestore_pextent_t::INVALID_OFFSET, | |
949 | p->length - left)); | |
950 | } | |
951 | llen_rb += p->length - left; | |
952 | llen_lb += left; | |
953 | p->length = left; | |
954 | ++i; | |
955 | ++p; | |
956 | } | |
957 | while (p != extents.end()) { | |
958 | llen_rb += p->length; | |
959 | rb.extents.push_back(*p++); | |
960 | } | |
961 | extents.resize(i); | |
962 | logical_length = llen_lb; | |
963 | rb.logical_length = llen_rb; | |
964 | break; | |
965 | } | |
966 | rb.flags = flags; | |
967 | ||
968 | if (has_csum()) { | |
969 | rb.csum_type = csum_type; | |
970 | rb.csum_chunk_order = csum_chunk_order; | |
971 | size_t csum_order = get_csum_chunk_size(); | |
972 | assert(blob_offset % csum_order == 0); | |
973 | size_t pos = (blob_offset / csum_order) * get_csum_value_size(); | |
974 | // deep copy csum data | |
975 | bufferptr old; | |
976 | old.swap(csum_data); | |
977 | rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos); | |
978 | csum_data = bufferptr(old.c_str(), pos); | |
979 | } | |
980 | } | |
981 | ||
982 | // bluestore_shared_blob_t | |
983 | ||
984 | void bluestore_shared_blob_t::dump(Formatter *f) const | |
985 | { | |
986 | f->dump_int("sbid", sbid); | |
987 | f->dump_object("ref_map", ref_map); | |
988 | } | |
989 | ||
990 | void bluestore_shared_blob_t::generate_test_instances( | |
991 | list<bluestore_shared_blob_t*>& ls) | |
992 | { | |
993 | ls.push_back(new bluestore_shared_blob_t(1)); | |
994 | } | |
995 | ||
996 | ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb) | |
997 | { | |
998 | out << " sbid 0x" << std::hex << sb.sbid << std::dec; | |
999 | out << " ref_map(" << sb.ref_map << ")"; | |
1000 | return out; | |
1001 | } | |
1002 | ||
1003 | // bluestore_onode_t | |
1004 | ||
1005 | void bluestore_onode_t::shard_info::dump(Formatter *f) const | |
1006 | { | |
1007 | f->dump_unsigned("offset", offset); | |
1008 | f->dump_unsigned("bytes", bytes); | |
1009 | } | |
1010 | ||
1011 | ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si) | |
1012 | { | |
1013 | return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes" | |
1014 | << std::dec << ")"; | |
1015 | } | |
1016 | ||
1017 | void bluestore_onode_t::dump(Formatter *f) const | |
1018 | { | |
1019 | f->dump_unsigned("nid", nid); | |
1020 | f->dump_unsigned("size", size); | |
1021 | f->open_object_section("attrs"); | |
1022 | for (auto p = attrs.begin(); p != attrs.end(); ++p) { | |
1023 | f->open_object_section("attr"); | |
1024 | f->dump_string("name", p->first.c_str()); // it's not quite std::string | |
1025 | f->dump_unsigned("len", p->second.length()); | |
1026 | f->close_section(); | |
1027 | } | |
1028 | f->close_section(); | |
1029 | f->dump_string("flags", get_flags_string()); | |
1030 | f->open_array_section("extent_map_shards"); | |
1031 | for (auto si : extent_map_shards) { | |
1032 | f->dump_object("shard", si); | |
1033 | } | |
1034 | f->close_section(); | |
1035 | f->dump_unsigned("expected_object_size", expected_object_size); | |
1036 | f->dump_unsigned("expected_write_size", expected_write_size); | |
1037 | f->dump_unsigned("alloc_hint_flags", alloc_hint_flags); | |
1038 | } | |
1039 | ||
1040 | void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o) | |
1041 | { | |
1042 | o.push_back(new bluestore_onode_t()); | |
1043 | // FIXME | |
1044 | } | |
1045 | ||
1046 | // bluestore_deferred_op_t | |
1047 | ||
1048 | void bluestore_deferred_op_t::dump(Formatter *f) const | |
1049 | { | |
1050 | f->dump_unsigned("op", (int)op); | |
1051 | f->dump_unsigned("data_len", data.length()); | |
1052 | f->open_array_section("extents"); | |
1053 | for (auto& e : extents) { | |
1054 | f->dump_object("extent", e); | |
1055 | } | |
1056 | f->close_section(); | |
1057 | } | |
1058 | ||
1059 | void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o) | |
1060 | { | |
1061 | o.push_back(new bluestore_deferred_op_t); | |
1062 | o.push_back(new bluestore_deferred_op_t); | |
1063 | o.back()->op = OP_WRITE; | |
1064 | o.back()->extents.push_back(bluestore_pextent_t(1, 2)); | |
1065 | o.back()->extents.push_back(bluestore_pextent_t(100, 5)); | |
1066 | o.back()->data.append("my data"); | |
1067 | } | |
1068 | ||
1069 | void bluestore_deferred_transaction_t::dump(Formatter *f) const | |
1070 | { | |
1071 | f->dump_unsigned("seq", seq); | |
1072 | f->open_array_section("ops"); | |
1073 | for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) { | |
1074 | f->dump_object("op", *p); | |
1075 | } | |
1076 | f->close_section(); | |
1077 | ||
1078 | f->open_array_section("released extents"); | |
1079 | for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) { | |
1080 | f->open_object_section("extent"); | |
1081 | f->dump_unsigned("offset", p.get_start()); | |
1082 | f->dump_unsigned("length", p.get_len()); | |
1083 | f->close_section(); | |
1084 | } | |
1085 | f->close_section(); | |
1086 | } | |
1087 | ||
1088 | void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o) | |
1089 | { | |
1090 | o.push_back(new bluestore_deferred_transaction_t()); | |
1091 | o.push_back(new bluestore_deferred_transaction_t()); | |
1092 | o.back()->seq = 123; | |
1093 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1094 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1095 | o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE; | |
1096 | o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7)); | |
1097 | o.back()->ops.back().data.append("foodata"); | |
1098 | } | |
1099 | ||
1100 | void bluestore_compression_header_t::dump(Formatter *f) const | |
1101 | { | |
1102 | f->dump_unsigned("type", type); | |
1103 | f->dump_unsigned("length", length); | |
1104 | } | |
1105 | ||
1106 | void bluestore_compression_header_t::generate_test_instances( | |
1107 | list<bluestore_compression_header_t*>& o) | |
1108 | { | |
1109 | o.push_back(new bluestore_compression_header_t); | |
1110 | o.push_back(new bluestore_compression_header_t(1)); | |
1111 | o.back()->length = 1234; | |
1112 | } |