]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "bluestore_types.h" | |
16 | #include "common/Formatter.h" | |
17 | #include "common/Checksummer.h" | |
18 | #include "include/stringify.h" | |
19 | ||
20 | void ExtentList::add_extents(int64_t start, int64_t count) { | |
21 | AllocExtent *last_extent = NULL; | |
22 | bool can_merge = false; | |
23 | ||
24 | if (!m_extents->empty()) { | |
25 | last_extent = &(m_extents->back()); | |
26 | uint64_t last_offset = last_extent->end() / m_block_size; | |
27 | uint32_t last_length = last_extent->length / m_block_size; | |
28 | if ((last_offset == (uint64_t) start) && | |
29 | (!m_max_blocks || (last_length + count) <= m_max_blocks)) { | |
30 | can_merge = true; | |
31 | } | |
32 | } | |
33 | ||
34 | if (can_merge) { | |
35 | last_extent->length += (count * m_block_size); | |
36 | } else { | |
37 | m_extents->emplace_back(AllocExtent(start * m_block_size, | |
38 | count * m_block_size)); | |
39 | } | |
40 | } | |
41 | ||
42 | // bluestore_bdev_label_t | |
43 | ||
44 | void bluestore_bdev_label_t::encode(bufferlist& bl) const | |
45 | { | |
46 | // be slightly friendly to someone who looks at the device | |
47 | bl.append("bluestore block device\n"); | |
48 | bl.append(stringify(osd_uuid)); | |
49 | bl.append("\n"); | |
50 | ENCODE_START(1, 1, bl); | |
51 | ::encode(osd_uuid, bl); | |
52 | ::encode(size, bl); | |
53 | ::encode(btime, bl); | |
54 | ::encode(description, bl); | |
55 | ENCODE_FINISH(bl); | |
56 | } | |
57 | ||
58 | void bluestore_bdev_label_t::decode(bufferlist::iterator& p) | |
59 | { | |
60 | p.advance(60); // see above | |
61 | DECODE_START(1, p); | |
62 | ::decode(osd_uuid, p); | |
63 | ::decode(size, p); | |
64 | ::decode(btime, p); | |
65 | ::decode(description, p); | |
66 | DECODE_FINISH(p); | |
67 | } | |
68 | ||
69 | void bluestore_bdev_label_t::dump(Formatter *f) const | |
70 | { | |
71 | f->dump_stream("osd_uuid") << osd_uuid; | |
72 | f->dump_unsigned("size", size); | |
73 | f->dump_stream("btime") << btime; | |
74 | f->dump_string("description", description); | |
75 | } | |
76 | ||
77 | void bluestore_bdev_label_t::generate_test_instances( | |
78 | list<bluestore_bdev_label_t*>& o) | |
79 | { | |
80 | o.push_back(new bluestore_bdev_label_t); | |
81 | o.push_back(new bluestore_bdev_label_t); | |
82 | o.back()->size = 123; | |
83 | o.back()->btime = utime_t(4, 5); | |
84 | o.back()->description = "fakey"; | |
85 | } | |
86 | ||
87 | ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l) | |
88 | { | |
89 | return out << "bdev(osd_uuid " << l.osd_uuid | |
90 | << " size 0x" << std::hex << l.size << std::dec | |
91 | << " btime " << l.btime | |
92 | << " desc " << l.description << ")"; | |
93 | } | |
94 | ||
95 | // cnode_t | |
96 | ||
97 | void bluestore_cnode_t::dump(Formatter *f) const | |
98 | { | |
99 | f->dump_unsigned("bits", bits); | |
100 | } | |
101 | ||
102 | void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o) | |
103 | { | |
104 | o.push_back(new bluestore_cnode_t()); | |
105 | o.push_back(new bluestore_cnode_t(0)); | |
106 | o.push_back(new bluestore_cnode_t(123)); | |
107 | } | |
108 | ||
109 | // bluestore_extent_ref_map_t | |
110 | ||
111 | void bluestore_extent_ref_map_t::_check() const | |
112 | { | |
113 | uint64_t pos = 0; | |
114 | unsigned refs = 0; | |
115 | for (const auto &p : ref_map) { | |
116 | if (p.first < pos) | |
117 | assert(0 == "overlap"); | |
118 | if (p.first == pos && p.second.refs == refs) | |
119 | assert(0 == "unmerged"); | |
120 | pos = p.first + p.second.length; | |
121 | refs = p.second.refs; | |
122 | } | |
123 | } | |
124 | ||
125 | void bluestore_extent_ref_map_t::_maybe_merge_left( | |
126 | map<uint64_t,record_t>::iterator& p) | |
127 | { | |
128 | if (p == ref_map.begin()) | |
129 | return; | |
130 | auto q = p; | |
131 | --q; | |
132 | if (q->second.refs == p->second.refs && | |
133 | q->first + q->second.length == p->first) { | |
134 | q->second.length += p->second.length; | |
135 | ref_map.erase(p); | |
136 | p = q; | |
137 | } | |
138 | } | |
139 | ||
140 | void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length) | |
141 | { | |
142 | auto p = ref_map.lower_bound(offset); | |
143 | if (p != ref_map.begin()) { | |
144 | --p; | |
145 | if (p->first + p->second.length <= offset) { | |
146 | ++p; | |
147 | } | |
148 | } | |
149 | while (length > 0) { | |
150 | if (p == ref_map.end()) { | |
151 | // nothing after offset; add the whole thing. | |
152 | p = ref_map.insert( | |
153 | map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first; | |
154 | break; | |
155 | } | |
156 | if (p->first > offset) { | |
157 | // gap | |
158 | uint64_t newlen = MIN(p->first - offset, length); | |
159 | p = ref_map.insert( | |
160 | map<uint64_t,record_t>::value_type(offset, | |
161 | record_t(newlen, 1))).first; | |
162 | offset += newlen; | |
163 | length -= newlen; | |
164 | _maybe_merge_left(p); | |
165 | ++p; | |
166 | continue; | |
167 | } | |
168 | if (p->first < offset) { | |
169 | // split off the portion before offset | |
170 | assert(p->first + p->second.length > offset); | |
171 | uint64_t left = p->first + p->second.length - offset; | |
172 | p->second.length = offset - p->first; | |
173 | p = ref_map.insert(map<uint64_t,record_t>::value_type( | |
174 | offset, record_t(left, p->second.refs))).first; | |
175 | // continue below | |
176 | } | |
177 | assert(p->first == offset); | |
178 | if (length < p->second.length) { | |
179 | ref_map.insert(make_pair(offset + length, | |
180 | record_t(p->second.length - length, | |
181 | p->second.refs))); | |
182 | p->second.length = length; | |
183 | ++p->second.refs; | |
184 | break; | |
185 | } | |
186 | ++p->second.refs; | |
187 | offset += p->second.length; | |
188 | length -= p->second.length; | |
189 | _maybe_merge_left(p); | |
190 | ++p; | |
191 | } | |
192 | if (p != ref_map.end()) | |
193 | _maybe_merge_left(p); | |
194 | //_check(); | |
195 | } | |
196 | ||
197 | void bluestore_extent_ref_map_t::put( | |
198 | uint64_t offset, uint32_t length, | |
31f18b77 FG |
199 | PExtentVector *release, |
200 | bool *maybe_unshared) | |
7c673cae FG |
201 | { |
202 | //NB: existing entries in 'release' container must be preserved! | |
31f18b77 | 203 | bool unshared = true; |
7c673cae FG |
204 | auto p = ref_map.lower_bound(offset); |
205 | if (p == ref_map.end() || p->first > offset) { | |
206 | if (p == ref_map.begin()) { | |
207 | assert(0 == "put on missing extent (nothing before)"); | |
208 | } | |
209 | --p; | |
210 | if (p->first + p->second.length <= offset) { | |
211 | assert(0 == "put on missing extent (gap)"); | |
212 | } | |
213 | } | |
214 | if (p->first < offset) { | |
215 | uint64_t left = p->first + p->second.length - offset; | |
216 | p->second.length = offset - p->first; | |
31f18b77 FG |
217 | if (p->second.refs != 1) { |
218 | unshared = false; | |
219 | } | |
7c673cae FG |
220 | p = ref_map.insert(map<uint64_t,record_t>::value_type( |
221 | offset, record_t(left, p->second.refs))).first; | |
222 | } | |
223 | while (length > 0) { | |
224 | assert(p->first == offset); | |
225 | if (length < p->second.length) { | |
31f18b77 FG |
226 | if (p->second.refs != 1) { |
227 | unshared = false; | |
228 | } | |
7c673cae FG |
229 | ref_map.insert(make_pair(offset + length, |
230 | record_t(p->second.length - length, | |
231 | p->second.refs))); | |
232 | if (p->second.refs > 1) { | |
233 | p->second.length = length; | |
234 | --p->second.refs; | |
31f18b77 FG |
235 | if (p->second.refs != 1) { |
236 | unshared = false; | |
237 | } | |
7c673cae FG |
238 | _maybe_merge_left(p); |
239 | } else { | |
240 | if (release) | |
241 | release->push_back(bluestore_pextent_t(p->first, length)); | |
242 | ref_map.erase(p); | |
243 | } | |
31f18b77 | 244 | goto out; |
7c673cae FG |
245 | } |
246 | offset += p->second.length; | |
247 | length -= p->second.length; | |
248 | if (p->second.refs > 1) { | |
249 | --p->second.refs; | |
31f18b77 FG |
250 | if (p->second.refs != 1) { |
251 | unshared = false; | |
252 | } | |
7c673cae FG |
253 | _maybe_merge_left(p); |
254 | ++p; | |
255 | } else { | |
256 | if (release) | |
257 | release->push_back(bluestore_pextent_t(p->first, p->second.length)); | |
258 | ref_map.erase(p++); | |
259 | } | |
260 | } | |
261 | if (p != ref_map.end()) | |
262 | _maybe_merge_left(p); | |
263 | //_check(); | |
31f18b77 FG |
264 | out: |
265 | if (maybe_unshared) { | |
266 | if (unshared) { | |
267 | // we haven't seen a ref != 1 yet; check the whole map. | |
268 | for (auto& p : ref_map) { | |
269 | if (p.second.refs != 1) { | |
270 | unshared = false; | |
271 | break; | |
272 | } | |
273 | } | |
274 | } | |
275 | *maybe_unshared = unshared; | |
276 | } | |
7c673cae FG |
277 | } |
278 | ||
279 | bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const | |
280 | { | |
281 | auto p = ref_map.lower_bound(offset); | |
282 | if (p == ref_map.end() || p->first > offset) { | |
283 | if (p == ref_map.begin()) { | |
284 | return false; // nothing before | |
285 | } | |
286 | --p; | |
287 | if (p->first + p->second.length <= offset) { | |
288 | return false; // gap | |
289 | } | |
290 | } | |
291 | while (length > 0) { | |
292 | if (p == ref_map.end()) | |
293 | return false; | |
294 | if (p->first > offset) | |
295 | return false; | |
296 | if (p->first + p->second.length >= offset + length) | |
297 | return true; | |
298 | uint64_t overlap = p->first + p->second.length - offset; | |
299 | offset += overlap; | |
300 | length -= overlap; | |
301 | ++p; | |
302 | } | |
303 | return true; | |
304 | } | |
305 | ||
306 | bool bluestore_extent_ref_map_t::intersects( | |
307 | uint64_t offset, | |
308 | uint32_t length) const | |
309 | { | |
310 | auto p = ref_map.lower_bound(offset); | |
311 | if (p != ref_map.begin()) { | |
312 | --p; | |
313 | if (p->first + p->second.length <= offset) { | |
314 | ++p; | |
315 | } | |
316 | } | |
317 | if (p == ref_map.end()) | |
318 | return false; | |
319 | if (p->first >= offset + length) | |
320 | return false; | |
321 | return true; // intersects p! | |
322 | } | |
323 | ||
324 | void bluestore_extent_ref_map_t::dump(Formatter *f) const | |
325 | { | |
326 | f->open_array_section("ref_map"); | |
327 | for (auto& p : ref_map) { | |
328 | f->open_object_section("ref"); | |
329 | f->dump_unsigned("offset", p.first); | |
330 | f->dump_unsigned("length", p.second.length); | |
331 | f->dump_unsigned("refs", p.second.refs); | |
332 | f->close_section(); | |
333 | } | |
334 | f->close_section(); | |
335 | } | |
336 | ||
337 | void bluestore_extent_ref_map_t::generate_test_instances( | |
338 | list<bluestore_extent_ref_map_t*>& o) | |
339 | { | |
340 | o.push_back(new bluestore_extent_ref_map_t); | |
341 | o.push_back(new bluestore_extent_ref_map_t); | |
342 | o.back()->get(10, 10); | |
343 | o.back()->get(18, 22); | |
344 | o.back()->get(20, 20); | |
345 | o.back()->get(10, 25); | |
346 | o.back()->get(15, 20); | |
347 | } | |
348 | ||
349 | ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m) | |
350 | { | |
351 | out << "ref_map("; | |
352 | for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) { | |
353 | if (p != m.ref_map.begin()) | |
354 | out << ","; | |
355 | out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec | |
356 | << "=" << p->second.refs; | |
357 | } | |
358 | out << ")"; | |
359 | return out; | |
360 | } | |
361 | ||
362 | // bluestore_blob_use_tracker_t | |
363 | ||
364 | void bluestore_blob_use_tracker_t::allocate() | |
365 | { | |
366 | assert(num_au != 0); | |
367 | bytes_per_au = new uint32_t[num_au]; | |
368 | for (uint32_t i = 0; i < num_au; ++i) { | |
369 | bytes_per_au[i] = 0; | |
370 | } | |
371 | } | |
372 | ||
373 | void bluestore_blob_use_tracker_t::init( | |
374 | uint32_t full_length, uint32_t _au_size) { | |
375 | assert(!au_size || is_empty()); | |
376 | assert(_au_size > 0); | |
377 | assert(full_length > 0); | |
378 | clear(); | |
379 | uint32_t _num_au = ROUND_UP_TO(full_length, _au_size) / _au_size; | |
380 | au_size = _au_size; | |
381 | if( _num_au > 1 ) { | |
382 | num_au = _num_au; | |
383 | allocate(); | |
384 | } | |
385 | } | |
386 | ||
387 | void bluestore_blob_use_tracker_t::get( | |
388 | uint32_t offset, uint32_t length) | |
389 | { | |
390 | assert(au_size); | |
391 | if (!num_au) { | |
392 | total_bytes += length; | |
393 | }else { | |
394 | auto end = offset + length; | |
395 | ||
396 | while (offset < end) { | |
397 | auto phase = offset % au_size; | |
398 | bytes_per_au[offset / au_size] += | |
399 | MIN(au_size - phase, end - offset); | |
400 | offset += (phase ? au_size - phase : au_size); | |
401 | } | |
402 | } | |
403 | } | |
404 | ||
405 | bool bluestore_blob_use_tracker_t::put( | |
406 | uint32_t offset, uint32_t length, | |
407 | PExtentVector *release_units) | |
408 | { | |
409 | assert(au_size); | |
410 | if (release_units) { | |
411 | release_units->clear(); | |
412 | } | |
413 | bool maybe_empty = true; | |
414 | if (!num_au) { | |
415 | assert(total_bytes >= length); | |
416 | total_bytes -= length; | |
417 | } else { | |
418 | auto end = offset + length; | |
419 | uint64_t next_offs = 0; | |
420 | while (offset < end) { | |
421 | auto phase = offset % au_size; | |
422 | size_t pos = offset / au_size; | |
423 | auto diff = MIN(au_size - phase, end - offset); | |
424 | assert(diff <= bytes_per_au[pos]); | |
425 | bytes_per_au[pos] -= diff; | |
426 | offset += (phase ? au_size - phase : au_size); | |
427 | if (bytes_per_au[pos] == 0) { | |
428 | if (release_units) { | |
429 | if (release_units->empty() || next_offs != pos * au_size) { | |
430 | release_units->emplace_back(pos * au_size, au_size); | |
431 | } else { | |
432 | release_units->back().length += au_size; | |
433 | } | |
434 | next_offs += au_size; | |
435 | } | |
436 | } else { | |
437 | maybe_empty = false; // micro optimization detecting we aren't empty | |
438 | // even in the affected extent | |
439 | } | |
440 | } | |
441 | } | |
442 | bool empty = maybe_empty ? !is_not_empty() : false; | |
443 | if (empty && release_units) { | |
444 | release_units->clear(); | |
445 | } | |
446 | return empty; | |
447 | } | |
448 | ||
449 | bool bluestore_blob_use_tracker_t::can_split() const | |
450 | { | |
451 | return num_au > 0; | |
452 | } | |
453 | ||
454 | bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const | |
455 | { | |
456 | assert(au_size); | |
457 | return (blob_offset % au_size) == 0 && | |
458 | blob_offset < num_au * au_size; | |
459 | } | |
460 | ||
461 | void bluestore_blob_use_tracker_t::split( | |
462 | uint32_t blob_offset, | |
463 | bluestore_blob_use_tracker_t* r) | |
464 | { | |
465 | assert(au_size); | |
466 | assert(can_split()); | |
467 | assert(can_split_at(blob_offset)); | |
468 | assert(r->is_empty()); | |
469 | ||
470 | uint32_t new_num_au = blob_offset / au_size; | |
471 | r->init( (num_au - new_num_au) * au_size, au_size); | |
472 | ||
473 | for (auto i = new_num_au; i < num_au; i++) { | |
474 | r->get((i - new_num_au) * au_size, bytes_per_au[i]); | |
475 | bytes_per_au[i] = 0; | |
476 | } | |
477 | if (new_num_au == 0) { | |
478 | clear(); | |
479 | } else if (new_num_au == 1) { | |
480 | uint32_t tmp = bytes_per_au[0]; | |
481 | uint32_t _au_size = au_size; | |
482 | clear(); | |
483 | au_size = _au_size; | |
484 | total_bytes = tmp; | |
485 | } else { | |
486 | num_au = new_num_au; | |
487 | } | |
488 | } | |
489 | ||
490 | bool bluestore_blob_use_tracker_t::equal( | |
491 | const bluestore_blob_use_tracker_t& other) const | |
492 | { | |
493 | if (!num_au && !other.num_au) { | |
494 | return total_bytes == other.total_bytes && au_size == other.au_size; | |
495 | } else if (num_au && other.num_au) { | |
496 | if (num_au != other.num_au || au_size != other.au_size) { | |
497 | return false; | |
498 | } | |
499 | for (size_t i = 0; i < num_au; i++) { | |
500 | if (bytes_per_au[i] != other.bytes_per_au[i]) { | |
501 | return false; | |
502 | } | |
503 | } | |
504 | return true; | |
505 | } | |
506 | ||
507 | uint32_t n = num_au ? num_au : other.num_au; | |
508 | uint32_t referenced = | |
509 | num_au ? other.get_referenced_bytes() : get_referenced_bytes(); | |
510 | auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au; | |
511 | uint32_t my_referenced = 0; | |
512 | for (size_t i = 0; i < n; i++) { | |
513 | my_referenced += bytes_per_au_tmp[i]; | |
514 | if (my_referenced > referenced) { | |
515 | return false; | |
516 | } | |
517 | } | |
518 | return my_referenced == referenced; | |
519 | } | |
520 | ||
521 | void bluestore_blob_use_tracker_t::dump(Formatter *f) const | |
522 | { | |
523 | f->dump_unsigned("num_au", num_au); | |
524 | f->dump_unsigned("au_size", au_size); | |
525 | if (!num_au) { | |
526 | f->dump_unsigned("total_bytes", total_bytes); | |
527 | } else { | |
528 | f->open_array_section("bytes_per_au"); | |
529 | for (size_t i = 0; i < num_au; ++i) { | |
530 | f->dump_unsigned("", bytes_per_au[i]); | |
531 | } | |
532 | f->close_section(); | |
533 | } | |
534 | } | |
535 | ||
536 | void bluestore_blob_use_tracker_t::generate_test_instances( | |
537 | list<bluestore_blob_use_tracker_t*>& o) | |
538 | { | |
539 | o.push_back(new bluestore_blob_use_tracker_t()); | |
540 | o.back()->init(16, 16); | |
541 | o.back()->get(10, 10); | |
542 | o.back()->get(10, 5); | |
543 | o.push_back(new bluestore_blob_use_tracker_t()); | |
544 | o.back()->init(60, 16); | |
545 | o.back()->get(18, 22); | |
546 | o.back()->get(20, 20); | |
547 | o.back()->get(15, 20); | |
548 | } | |
549 | ||
550 | ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m) | |
551 | { | |
552 | out << "use_tracker(" << std::hex; | |
553 | if (!m.num_au) { | |
554 | out << "0x" << m.au_size | |
555 | << " " | |
556 | << "0x" << m.total_bytes; | |
557 | } else { | |
558 | out << "0x" << m.num_au | |
559 | << "*0x" << m.au_size | |
560 | << " 0x["; | |
561 | for (size_t i = 0; i < m.num_au; ++i) { | |
562 | if (i != 0) | |
563 | out << ","; | |
564 | out << m.bytes_per_au[i]; | |
565 | } | |
566 | out << "]"; | |
567 | } | |
568 | out << std::dec << ")"; | |
569 | return out; | |
570 | } | |
571 | ||
572 | // bluestore_pextent_t | |
573 | ||
574 | void bluestore_pextent_t::dump(Formatter *f) const | |
575 | { | |
576 | f->dump_unsigned("offset", offset); | |
577 | f->dump_unsigned("length", length); | |
578 | } | |
579 | ||
580 | ostream& operator<<(ostream& out, const bluestore_pextent_t& o) { | |
581 | if (o.is_valid()) | |
582 | return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec; | |
583 | else | |
584 | return out << "!~" << std::hex << o.length << std::dec; | |
585 | } | |
586 | ||
587 | void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls) | |
588 | { | |
589 | ls.push_back(new bluestore_pextent_t); | |
590 | ls.push_back(new bluestore_pextent_t(1, 2)); | |
591 | } | |
592 | ||
593 | // bluestore_blob_t | |
594 | ||
595 | string bluestore_blob_t::get_flags_string(unsigned flags) | |
596 | { | |
597 | string s; | |
7c673cae FG |
598 | if (flags & FLAG_COMPRESSED) { |
599 | if (s.length()) | |
600 | s += '+'; | |
601 | s += "compressed"; | |
602 | } | |
603 | if (flags & FLAG_CSUM) { | |
604 | if (s.length()) | |
605 | s += '+'; | |
606 | s += "csum"; | |
607 | } | |
608 | if (flags & FLAG_HAS_UNUSED) { | |
609 | if (s.length()) | |
610 | s += '+'; | |
611 | s += "has_unused"; | |
612 | } | |
613 | if (flags & FLAG_SHARED) { | |
614 | if (s.length()) | |
615 | s += '+'; | |
616 | s += "shared"; | |
617 | } | |
618 | ||
619 | return s; | |
620 | } | |
621 | ||
622 | size_t bluestore_blob_t::get_csum_value_size() const | |
623 | { | |
624 | return Checksummer::get_csum_value_size(csum_type); | |
625 | } | |
626 | ||
627 | void bluestore_blob_t::dump(Formatter *f) const | |
628 | { | |
629 | f->open_array_section("extents"); | |
630 | for (auto& p : extents) { | |
631 | f->dump_object("extent", p); | |
632 | } | |
633 | f->close_section(); | |
634 | f->dump_unsigned("logical_length", logical_length); | |
635 | f->dump_unsigned("compressed_length", compressed_length); | |
636 | f->dump_unsigned("flags", flags); | |
637 | f->dump_unsigned("csum_type", csum_type); | |
638 | f->dump_unsigned("csum_chunk_order", csum_chunk_order); | |
639 | f->open_array_section("csum_data"); | |
640 | size_t n = get_csum_count(); | |
641 | for (unsigned i = 0; i < n; ++i) | |
642 | f->dump_unsigned("csum", get_csum_item(i)); | |
643 | f->close_section(); | |
644 | f->dump_unsigned("unused", unused); | |
645 | } | |
646 | ||
647 | void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls) | |
648 | { | |
649 | ls.push_back(new bluestore_blob_t); | |
650 | ls.push_back(new bluestore_blob_t(0)); | |
651 | ls.push_back(new bluestore_blob_t); | |
652 | ls.back()->allocated_test(bluestore_pextent_t(111, 222)); | |
653 | ls.push_back(new bluestore_blob_t); | |
654 | ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536); | |
655 | ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd")); | |
656 | ls.back()->add_unused(0, 3); | |
657 | ls.back()->add_unused(8, 8); | |
658 | ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000)); | |
659 | ls.back()->allocated_test( | |
660 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000)); | |
661 | ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000)); | |
662 | } | |
663 | ||
664 | ostream& operator<<(ostream& out, const bluestore_blob_t& o) | |
665 | { | |
666 | out << "blob(" << o.get_extents(); | |
667 | if (o.is_compressed()) { | |
668 | out << " clen 0x" << std::hex | |
669 | << o.get_logical_length() | |
670 | << " -> 0x" | |
671 | << o.get_compressed_payload_length() | |
672 | << std::dec; | |
673 | } | |
674 | if (o.flags) { | |
675 | out << " " << o.get_flags_string(); | |
676 | } | |
677 | if (o.csum_type) { | |
678 | out << " " << Checksummer::get_csum_type_string(o.csum_type) | |
679 | << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec; | |
680 | } | |
681 | if (o.has_unused()) | |
682 | out << " unused=0x" << std::hex << o.unused << std::dec; | |
683 | out << ")"; | |
684 | return out; | |
685 | } | |
686 | ||
687 | void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl) | |
688 | { | |
689 | switch (csum_type) { | |
690 | case Checksummer::CSUM_XXHASH32: | |
691 | Checksummer::calculate<Checksummer::xxhash32>( | |
692 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
693 | break; | |
694 | case Checksummer::CSUM_XXHASH64: | |
695 | Checksummer::calculate<Checksummer::xxhash64>( | |
696 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
697 | break;; | |
698 | case Checksummer::CSUM_CRC32C: | |
699 | Checksummer::calculate<Checksummer::crc32c>( | |
700 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
701 | break; | |
702 | case Checksummer::CSUM_CRC32C_16: | |
703 | Checksummer::calculate<Checksummer::crc32c_16>( | |
704 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
705 | break; | |
706 | case Checksummer::CSUM_CRC32C_8: | |
707 | Checksummer::calculate<Checksummer::crc32c_8>( | |
708 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
709 | break; | |
710 | } | |
711 | } | |
712 | ||
713 | int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl, | |
714 | int* b_bad_off, uint64_t *bad_csum) const | |
715 | { | |
716 | int r = 0; | |
717 | ||
718 | *b_bad_off = -1; | |
719 | switch (csum_type) { | |
720 | case Checksummer::CSUM_NONE: | |
721 | break; | |
722 | case Checksummer::CSUM_XXHASH32: | |
723 | *b_bad_off = Checksummer::verify<Checksummer::xxhash32>( | |
724 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
725 | break; | |
726 | case Checksummer::CSUM_XXHASH64: | |
727 | *b_bad_off = Checksummer::verify<Checksummer::xxhash64>( | |
728 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
729 | break; | |
730 | case Checksummer::CSUM_CRC32C: | |
731 | *b_bad_off = Checksummer::verify<Checksummer::crc32c>( | |
732 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
733 | break; | |
734 | case Checksummer::CSUM_CRC32C_16: | |
735 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>( | |
736 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
737 | break; | |
738 | case Checksummer::CSUM_CRC32C_8: | |
739 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>( | |
740 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
741 | break; | |
742 | default: | |
743 | r = -EOPNOTSUPP; | |
744 | break; | |
745 | } | |
746 | ||
747 | if (r < 0) | |
748 | return r; | |
749 | else if (*b_bad_off >= 0) | |
750 | return -1; // bad checksum | |
751 | else | |
752 | return 0; | |
753 | } | |
754 | ||
755 | void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs) | |
756 | { | |
757 | if (extents.size() == 0) { | |
758 | // if blob is compressed then logical length to be already configured | |
759 | // otherwise - to be unset. | |
760 | assert((is_compressed() && logical_length != 0) || | |
761 | (!is_compressed() && logical_length == 0)); | |
762 | ||
763 | extents.reserve(allocs.size() + (b_off ? 1 : 0)); | |
764 | if (b_off) { | |
765 | extents.emplace_back( | |
766 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off)); | |
767 | } | |
768 | uint32_t new_len = b_off; | |
769 | for (auto& a : allocs) { | |
770 | extents.emplace_back(a.offset, a.length); | |
771 | new_len += a.length; | |
772 | } | |
773 | if (!is_compressed()) { | |
774 | logical_length = new_len; | |
775 | } | |
776 | } else { | |
777 | assert(!is_compressed()); // partial allocations are forbidden when | |
778 | // compressed | |
779 | assert(b_off < logical_length); | |
780 | uint32_t cur_offs = 0; | |
781 | auto start_it = extents.begin(); | |
782 | size_t pos = 0; | |
783 | while(true) { | |
31f18b77 | 784 | assert(start_it != extents.end()); |
7c673cae FG |
785 | if (cur_offs + start_it->length > b_off) { |
786 | break; | |
787 | } | |
788 | cur_offs += start_it->length; | |
789 | ++start_it; | |
790 | ++pos; | |
791 | } | |
792 | uint32_t head = b_off - cur_offs; | |
793 | uint32_t end_off = b_off + length; | |
794 | auto end_it = start_it; | |
795 | ||
796 | while (true) { | |
31f18b77 | 797 | assert(end_it != extents.end()); |
7c673cae FG |
798 | assert(!end_it->is_valid()); |
799 | if (cur_offs + end_it->length >= end_off) { | |
800 | break; | |
801 | } | |
802 | cur_offs += end_it->length; | |
803 | ++end_it; | |
804 | } | |
805 | assert(cur_offs + end_it->length >= end_off); | |
806 | uint32_t tail = cur_offs + end_it->length - end_off; | |
807 | ||
808 | start_it = extents.erase(start_it, end_it + 1); | |
809 | size_t count = allocs.size(); | |
810 | count += head ? 1 : 0; | |
811 | count += tail ? 1 : 0; | |
812 | extents.insert(start_it, | |
813 | count, | |
814 | bluestore_pextent_t( | |
815 | bluestore_pextent_t::INVALID_OFFSET, 0)); | |
816 | ||
817 | // Workaround to resolve lack of proper iterator return in vector::insert | |
818 | // Looks like some gcc/stl implementations still lack it despite c++11 | |
819 | // support claim | |
820 | start_it = extents.begin() + pos; | |
821 | ||
822 | if (head) { | |
823 | start_it->length = head; | |
824 | ++start_it; | |
825 | } | |
826 | for(auto& e : allocs) { | |
827 | *start_it = e; | |
828 | ++start_it; | |
829 | } | |
830 | if (tail) { | |
831 | start_it->length = tail; | |
832 | } | |
833 | } | |
834 | } | |
835 | ||
836 | // cut it out of extents | |
837 | struct vecbuilder { | |
838 | PExtentVector v; | |
839 | uint64_t invalid = 0; | |
840 | ||
841 | void add_invalid(uint64_t length) { | |
842 | invalid += length; | |
843 | } | |
844 | void flush() { | |
845 | if (invalid) { | |
846 | v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, | |
847 | invalid)); | |
848 | invalid = 0; | |
849 | } | |
850 | } | |
851 | void add(uint64_t offset, uint64_t length) { | |
852 | if (offset == bluestore_pextent_t::INVALID_OFFSET) { | |
853 | add_invalid(length); | |
854 | } | |
855 | else { | |
856 | flush(); | |
857 | v.emplace_back(bluestore_pextent_t(offset, length)); | |
858 | } | |
859 | } | |
860 | }; | |
861 | ||
862 | void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc) | |
863 | { | |
864 | extents.emplace_back(alloc); | |
865 | if (!is_compressed()) { | |
866 | logical_length += alloc.length; | |
867 | } | |
868 | } | |
869 | ||
870 | bool bluestore_blob_t::release_extents(bool all, | |
871 | const PExtentVector& logical, | |
872 | PExtentVector* r) | |
873 | { | |
874 | // common case: all of it? | |
875 | if (all) { | |
876 | uint64_t pos = 0; | |
877 | for (auto& e : extents) { | |
878 | if (e.is_valid()) { | |
879 | r->push_back(e); | |
880 | } | |
881 | pos += e.length; | |
882 | } | |
883 | assert(is_compressed() || get_logical_length() == pos); | |
884 | extents.resize(1); | |
885 | extents[0].offset = bluestore_pextent_t::INVALID_OFFSET; | |
886 | extents[0].length = pos; | |
887 | return true; | |
888 | } | |
889 | // remove from pextents according to logical release list | |
890 | vecbuilder vb; | |
891 | auto loffs_it = logical.begin(); | |
892 | auto lend = logical.end(); | |
893 | uint32_t pext_loffs_start = 0; //starting loffset of the current pextent | |
894 | uint32_t pext_loffs = 0; //current loffset | |
895 | auto pext_it = extents.begin(); | |
896 | auto pext_end = extents.end(); | |
897 | while (pext_it != pext_end) { | |
898 | if (loffs_it == lend || | |
899 | pext_loffs_start + pext_it->length <= loffs_it->offset) { | |
900 | int delta0 = pext_loffs - pext_loffs_start; | |
901 | assert(delta0 >= 0); | |
902 | if ((uint32_t)delta0 < pext_it->length) { | |
903 | vb.add(pext_it->offset + delta0, pext_it->length - delta0); | |
904 | } | |
905 | pext_loffs_start += pext_it->length; | |
906 | pext_loffs = pext_loffs_start; | |
907 | ++pext_it; | |
908 | } | |
909 | else { | |
910 | //assert(pext_loffs == pext_loffs_start); | |
911 | int delta0 = pext_loffs - pext_loffs_start; | |
912 | assert(delta0 >= 0); | |
913 | ||
914 | int delta = loffs_it->offset - pext_loffs; | |
915 | assert(delta >= 0); | |
916 | if (delta > 0) { | |
917 | vb.add(pext_it->offset + delta0, delta); | |
918 | pext_loffs += delta; | |
919 | } | |
920 | ||
921 | PExtentVector::iterator last_r = r->end(); | |
922 | if (r->begin() != last_r) { | |
923 | --last_r; | |
924 | } | |
925 | uint32_t to_release = loffs_it->length; | |
926 | do { | |
927 | uint32_t to_release_part = | |
928 | MIN(pext_it->length - delta0 - delta, to_release); | |
929 | auto o = pext_it->offset + delta0 + delta; | |
930 | if (last_r != r->end() && last_r->offset + last_r->length == o) { | |
931 | last_r->length += to_release_part; | |
932 | } | |
933 | else { | |
934 | last_r = r->emplace(r->end(), o, to_release_part); | |
935 | } | |
936 | to_release -= to_release_part; | |
937 | pext_loffs += to_release_part; | |
938 | if (pext_loffs == pext_loffs_start + pext_it->length) { | |
939 | pext_loffs_start += pext_it->length; | |
940 | pext_loffs = pext_loffs_start; | |
941 | pext_it++; | |
942 | delta0 = delta = 0; | |
943 | } | |
944 | } while (to_release > 0 && pext_it != pext_end); | |
945 | vb.add_invalid(loffs_it->length - to_release); | |
946 | ++loffs_it; | |
947 | } | |
948 | } | |
949 | vb.flush(); | |
950 | extents.swap(vb.v); | |
951 | return false; | |
952 | } | |
953 | ||
954 | void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb) | |
955 | { | |
956 | size_t left = blob_offset; | |
957 | uint32_t llen_lb = 0; | |
958 | uint32_t llen_rb = 0; | |
959 | unsigned i = 0; | |
960 | for (auto p = extents.begin(); p != extents.end(); ++p, ++i) { | |
961 | if (p->length <= left) { | |
962 | left -= p->length; | |
963 | llen_lb += p->length; | |
964 | continue; | |
965 | } | |
966 | if (left) { | |
967 | if (p->is_valid()) { | |
968 | rb.extents.emplace_back(bluestore_pextent_t(p->offset + left, | |
969 | p->length - left)); | |
970 | } | |
971 | else { | |
972 | rb.extents.emplace_back(bluestore_pextent_t( | |
973 | bluestore_pextent_t::INVALID_OFFSET, | |
974 | p->length - left)); | |
975 | } | |
976 | llen_rb += p->length - left; | |
977 | llen_lb += left; | |
978 | p->length = left; | |
979 | ++i; | |
980 | ++p; | |
981 | } | |
982 | while (p != extents.end()) { | |
983 | llen_rb += p->length; | |
984 | rb.extents.push_back(*p++); | |
985 | } | |
986 | extents.resize(i); | |
987 | logical_length = llen_lb; | |
988 | rb.logical_length = llen_rb; | |
989 | break; | |
990 | } | |
991 | rb.flags = flags; | |
992 | ||
993 | if (has_csum()) { | |
994 | rb.csum_type = csum_type; | |
995 | rb.csum_chunk_order = csum_chunk_order; | |
996 | size_t csum_order = get_csum_chunk_size(); | |
997 | assert(blob_offset % csum_order == 0); | |
998 | size_t pos = (blob_offset / csum_order) * get_csum_value_size(); | |
999 | // deep copy csum data | |
1000 | bufferptr old; | |
1001 | old.swap(csum_data); | |
1002 | rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos); | |
1003 | csum_data = bufferptr(old.c_str(), pos); | |
1004 | } | |
1005 | } | |
1006 | ||
1007 | // bluestore_shared_blob_t | |
1008 | ||
1009 | void bluestore_shared_blob_t::dump(Formatter *f) const | |
1010 | { | |
1011 | f->dump_int("sbid", sbid); | |
1012 | f->dump_object("ref_map", ref_map); | |
1013 | } | |
1014 | ||
1015 | void bluestore_shared_blob_t::generate_test_instances( | |
1016 | list<bluestore_shared_blob_t*>& ls) | |
1017 | { | |
1018 | ls.push_back(new bluestore_shared_blob_t(1)); | |
1019 | } | |
1020 | ||
1021 | ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb) | |
1022 | { | |
31f18b77 FG |
1023 | out << "(sbid 0x" << std::hex << sb.sbid << std::dec; |
1024 | out << " " << sb.ref_map << ")"; | |
7c673cae FG |
1025 | return out; |
1026 | } | |
1027 | ||
1028 | // bluestore_onode_t | |
1029 | ||
1030 | void bluestore_onode_t::shard_info::dump(Formatter *f) const | |
1031 | { | |
1032 | f->dump_unsigned("offset", offset); | |
1033 | f->dump_unsigned("bytes", bytes); | |
1034 | } | |
1035 | ||
1036 | ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si) | |
1037 | { | |
1038 | return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes" | |
1039 | << std::dec << ")"; | |
1040 | } | |
1041 | ||
1042 | void bluestore_onode_t::dump(Formatter *f) const | |
1043 | { | |
1044 | f->dump_unsigned("nid", nid); | |
1045 | f->dump_unsigned("size", size); | |
1046 | f->open_object_section("attrs"); | |
1047 | for (auto p = attrs.begin(); p != attrs.end(); ++p) { | |
1048 | f->open_object_section("attr"); | |
1049 | f->dump_string("name", p->first.c_str()); // it's not quite std::string | |
1050 | f->dump_unsigned("len", p->second.length()); | |
1051 | f->close_section(); | |
1052 | } | |
1053 | f->close_section(); | |
1054 | f->dump_string("flags", get_flags_string()); | |
1055 | f->open_array_section("extent_map_shards"); | |
1056 | for (auto si : extent_map_shards) { | |
1057 | f->dump_object("shard", si); | |
1058 | } | |
1059 | f->close_section(); | |
1060 | f->dump_unsigned("expected_object_size", expected_object_size); | |
1061 | f->dump_unsigned("expected_write_size", expected_write_size); | |
1062 | f->dump_unsigned("alloc_hint_flags", alloc_hint_flags); | |
1063 | } | |
1064 | ||
1065 | void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o) | |
1066 | { | |
1067 | o.push_back(new bluestore_onode_t()); | |
1068 | // FIXME | |
1069 | } | |
1070 | ||
1071 | // bluestore_deferred_op_t | |
1072 | ||
1073 | void bluestore_deferred_op_t::dump(Formatter *f) const | |
1074 | { | |
1075 | f->dump_unsigned("op", (int)op); | |
1076 | f->dump_unsigned("data_len", data.length()); | |
1077 | f->open_array_section("extents"); | |
1078 | for (auto& e : extents) { | |
1079 | f->dump_object("extent", e); | |
1080 | } | |
1081 | f->close_section(); | |
1082 | } | |
1083 | ||
1084 | void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o) | |
1085 | { | |
1086 | o.push_back(new bluestore_deferred_op_t); | |
1087 | o.push_back(new bluestore_deferred_op_t); | |
1088 | o.back()->op = OP_WRITE; | |
1089 | o.back()->extents.push_back(bluestore_pextent_t(1, 2)); | |
1090 | o.back()->extents.push_back(bluestore_pextent_t(100, 5)); | |
1091 | o.back()->data.append("my data"); | |
1092 | } | |
1093 | ||
1094 | void bluestore_deferred_transaction_t::dump(Formatter *f) const | |
1095 | { | |
1096 | f->dump_unsigned("seq", seq); | |
1097 | f->open_array_section("ops"); | |
1098 | for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) { | |
1099 | f->dump_object("op", *p); | |
1100 | } | |
1101 | f->close_section(); | |
1102 | ||
1103 | f->open_array_section("released extents"); | |
1104 | for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) { | |
1105 | f->open_object_section("extent"); | |
1106 | f->dump_unsigned("offset", p.get_start()); | |
1107 | f->dump_unsigned("length", p.get_len()); | |
1108 | f->close_section(); | |
1109 | } | |
1110 | f->close_section(); | |
1111 | } | |
1112 | ||
1113 | void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o) | |
1114 | { | |
1115 | o.push_back(new bluestore_deferred_transaction_t()); | |
1116 | o.push_back(new bluestore_deferred_transaction_t()); | |
1117 | o.back()->seq = 123; | |
1118 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1119 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1120 | o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE; | |
1121 | o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7)); | |
1122 | o.back()->ops.back().data.append("foodata"); | |
1123 | } | |
1124 | ||
1125 | void bluestore_compression_header_t::dump(Formatter *f) const | |
1126 | { | |
1127 | f->dump_unsigned("type", type); | |
1128 | f->dump_unsigned("length", length); | |
1129 | } | |
1130 | ||
1131 | void bluestore_compression_header_t::generate_test_instances( | |
1132 | list<bluestore_compression_header_t*>& o) | |
1133 | { | |
1134 | o.push_back(new bluestore_compression_header_t); | |
1135 | o.push_back(new bluestore_compression_header_t(1)); | |
1136 | o.back()->length = 1234; | |
1137 | } |