]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "bluestore_types.h" | |
16 | #include "common/Formatter.h" | |
17 | #include "common/Checksummer.h" | |
18 | #include "include/stringify.h" | |
19 | ||
20 | void ExtentList::add_extents(int64_t start, int64_t count) { | |
21 | AllocExtent *last_extent = NULL; | |
22 | bool can_merge = false; | |
23 | ||
24 | if (!m_extents->empty()) { | |
25 | last_extent = &(m_extents->back()); | |
26 | uint64_t last_offset = last_extent->end() / m_block_size; | |
27 | uint32_t last_length = last_extent->length / m_block_size; | |
28 | if ((last_offset == (uint64_t) start) && | |
29 | (!m_max_blocks || (last_length + count) <= m_max_blocks)) { | |
30 | can_merge = true; | |
31 | } | |
32 | } | |
33 | ||
34 | if (can_merge) { | |
35 | last_extent->length += (count * m_block_size); | |
36 | } else { | |
37 | m_extents->emplace_back(AllocExtent(start * m_block_size, | |
38 | count * m_block_size)); | |
39 | } | |
40 | } | |
41 | ||
42 | // bluestore_bdev_label_t | |
43 | ||
44 | void bluestore_bdev_label_t::encode(bufferlist& bl) const | |
45 | { | |
46 | // be slightly friendly to someone who looks at the device | |
47 | bl.append("bluestore block device\n"); | |
48 | bl.append(stringify(osd_uuid)); | |
49 | bl.append("\n"); | |
50 | ENCODE_START(2, 1, bl); | |
51 | ::encode(osd_uuid, bl); | |
52 | ::encode(size, bl); | |
53 | ::encode(btime, bl); | |
54 | ::encode(description, bl); | |
55 | ::encode(meta, bl); | |
56 | ENCODE_FINISH(bl); | |
57 | } | |
58 | ||
59 | void bluestore_bdev_label_t::decode(bufferlist::iterator& p) | |
60 | { | |
61 | p.advance(60); // see above | |
62 | DECODE_START(2, p); | |
63 | ::decode(osd_uuid, p); | |
64 | ::decode(size, p); | |
65 | ::decode(btime, p); | |
66 | ::decode(description, p); | |
67 | if (struct_v >= 2) { | |
68 | ::decode(meta, p); | |
69 | } | |
70 | DECODE_FINISH(p); | |
71 | } | |
72 | ||
73 | void bluestore_bdev_label_t::dump(Formatter *f) const | |
74 | { | |
75 | f->dump_stream("osd_uuid") << osd_uuid; | |
76 | f->dump_unsigned("size", size); | |
77 | f->dump_stream("btime") << btime; | |
78 | f->dump_string("description", description); | |
79 | for (auto& i : meta) { | |
80 | f->dump_string(i.first.c_str(), i.second); | |
81 | } | |
82 | } | |
83 | ||
84 | void bluestore_bdev_label_t::generate_test_instances( | |
85 | list<bluestore_bdev_label_t*>& o) | |
86 | { | |
87 | o.push_back(new bluestore_bdev_label_t); | |
88 | o.push_back(new bluestore_bdev_label_t); | |
89 | o.back()->size = 123; | |
90 | o.back()->btime = utime_t(4, 5); | |
91 | o.back()->description = "fakey"; | |
92 | o.back()->meta["foo"] = "bar"; | |
93 | } | |
94 | ||
95 | ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l) | |
96 | { | |
97 | return out << "bdev(osd_uuid " << l.osd_uuid | |
98 | << ", size 0x" << std::hex << l.size << std::dec | |
99 | << ", btime " << l.btime | |
100 | << ", desc " << l.description | |
101 | << ", " << l.meta.size() << " meta" | |
102 | << ")"; | |
103 | } | |
104 | ||
105 | // cnode_t | |
106 | ||
107 | void bluestore_cnode_t::dump(Formatter *f) const | |
108 | { | |
109 | f->dump_unsigned("bits", bits); | |
110 | } | |
111 | ||
112 | void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o) | |
113 | { | |
114 | o.push_back(new bluestore_cnode_t()); | |
115 | o.push_back(new bluestore_cnode_t(0)); | |
116 | o.push_back(new bluestore_cnode_t(123)); | |
117 | } | |
118 | ||
119 | // bluestore_extent_ref_map_t | |
120 | ||
121 | void bluestore_extent_ref_map_t::_check() const | |
122 | { | |
123 | uint64_t pos = 0; | |
124 | unsigned refs = 0; | |
125 | for (const auto &p : ref_map) { | |
126 | if (p.first < pos) | |
127 | assert(0 == "overlap"); | |
128 | if (p.first == pos && p.second.refs == refs) | |
129 | assert(0 == "unmerged"); | |
130 | pos = p.first + p.second.length; | |
131 | refs = p.second.refs; | |
132 | } | |
133 | } | |
134 | ||
135 | void bluestore_extent_ref_map_t::_maybe_merge_left( | |
136 | map<uint64_t,record_t>::iterator& p) | |
137 | { | |
138 | if (p == ref_map.begin()) | |
139 | return; | |
140 | auto q = p; | |
141 | --q; | |
142 | if (q->second.refs == p->second.refs && | |
143 | q->first + q->second.length == p->first) { | |
144 | q->second.length += p->second.length; | |
145 | ref_map.erase(p); | |
146 | p = q; | |
147 | } | |
148 | } | |
149 | ||
150 | void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length) | |
151 | { | |
152 | auto p = ref_map.lower_bound(offset); | |
153 | if (p != ref_map.begin()) { | |
154 | --p; | |
155 | if (p->first + p->second.length <= offset) { | |
156 | ++p; | |
157 | } | |
158 | } | |
159 | while (length > 0) { | |
160 | if (p == ref_map.end()) { | |
161 | // nothing after offset; add the whole thing. | |
162 | p = ref_map.insert( | |
163 | map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first; | |
164 | break; | |
165 | } | |
166 | if (p->first > offset) { | |
167 | // gap | |
168 | uint64_t newlen = MIN(p->first - offset, length); | |
169 | p = ref_map.insert( | |
170 | map<uint64_t,record_t>::value_type(offset, | |
171 | record_t(newlen, 1))).first; | |
172 | offset += newlen; | |
173 | length -= newlen; | |
174 | _maybe_merge_left(p); | |
175 | ++p; | |
176 | continue; | |
177 | } | |
178 | if (p->first < offset) { | |
179 | // split off the portion before offset | |
180 | assert(p->first + p->second.length > offset); | |
181 | uint64_t left = p->first + p->second.length - offset; | |
182 | p->second.length = offset - p->first; | |
183 | p = ref_map.insert(map<uint64_t,record_t>::value_type( | |
184 | offset, record_t(left, p->second.refs))).first; | |
185 | // continue below | |
186 | } | |
187 | assert(p->first == offset); | |
188 | if (length < p->second.length) { | |
189 | ref_map.insert(make_pair(offset + length, | |
190 | record_t(p->second.length - length, | |
191 | p->second.refs))); | |
192 | p->second.length = length; | |
193 | ++p->second.refs; | |
194 | break; | |
195 | } | |
196 | ++p->second.refs; | |
197 | offset += p->second.length; | |
198 | length -= p->second.length; | |
199 | _maybe_merge_left(p); | |
200 | ++p; | |
201 | } | |
202 | if (p != ref_map.end()) | |
203 | _maybe_merge_left(p); | |
204 | //_check(); | |
205 | } | |
206 | ||
207 | void bluestore_extent_ref_map_t::put( | |
208 | uint64_t offset, uint32_t length, | |
209 | PExtentVector *release, | |
210 | bool *maybe_unshared) | |
211 | { | |
212 | //NB: existing entries in 'release' container must be preserved! | |
213 | bool unshared = true; | |
214 | auto p = ref_map.lower_bound(offset); | |
215 | if (p == ref_map.end() || p->first > offset) { | |
216 | if (p == ref_map.begin()) { | |
217 | assert(0 == "put on missing extent (nothing before)"); | |
218 | } | |
219 | --p; | |
220 | if (p->first + p->second.length <= offset) { | |
221 | assert(0 == "put on missing extent (gap)"); | |
222 | } | |
223 | } | |
224 | if (p->first < offset) { | |
225 | uint64_t left = p->first + p->second.length - offset; | |
226 | p->second.length = offset - p->first; | |
227 | if (p->second.refs != 1) { | |
228 | unshared = false; | |
229 | } | |
230 | p = ref_map.insert(map<uint64_t,record_t>::value_type( | |
231 | offset, record_t(left, p->second.refs))).first; | |
232 | } | |
233 | while (length > 0) { | |
234 | assert(p->first == offset); | |
235 | if (length < p->second.length) { | |
236 | if (p->second.refs != 1) { | |
237 | unshared = false; | |
238 | } | |
239 | ref_map.insert(make_pair(offset + length, | |
240 | record_t(p->second.length - length, | |
241 | p->second.refs))); | |
242 | if (p->second.refs > 1) { | |
243 | p->second.length = length; | |
244 | --p->second.refs; | |
245 | if (p->second.refs != 1) { | |
246 | unshared = false; | |
247 | } | |
248 | _maybe_merge_left(p); | |
249 | } else { | |
250 | if (release) | |
251 | release->push_back(bluestore_pextent_t(p->first, length)); | |
252 | ref_map.erase(p); | |
253 | } | |
254 | goto out; | |
255 | } | |
256 | offset += p->second.length; | |
257 | length -= p->second.length; | |
258 | if (p->second.refs > 1) { | |
259 | --p->second.refs; | |
260 | if (p->second.refs != 1) { | |
261 | unshared = false; | |
262 | } | |
263 | _maybe_merge_left(p); | |
264 | ++p; | |
265 | } else { | |
266 | if (release) | |
267 | release->push_back(bluestore_pextent_t(p->first, p->second.length)); | |
268 | ref_map.erase(p++); | |
269 | } | |
270 | } | |
271 | if (p != ref_map.end()) | |
272 | _maybe_merge_left(p); | |
273 | //_check(); | |
274 | out: | |
275 | if (maybe_unshared) { | |
276 | if (unshared) { | |
277 | // we haven't seen a ref != 1 yet; check the whole map. | |
278 | for (auto& p : ref_map) { | |
279 | if (p.second.refs != 1) { | |
280 | unshared = false; | |
281 | break; | |
282 | } | |
283 | } | |
284 | } | |
285 | *maybe_unshared = unshared; | |
286 | } | |
287 | } | |
288 | ||
289 | bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const | |
290 | { | |
291 | auto p = ref_map.lower_bound(offset); | |
292 | if (p == ref_map.end() || p->first > offset) { | |
293 | if (p == ref_map.begin()) { | |
294 | return false; // nothing before | |
295 | } | |
296 | --p; | |
297 | if (p->first + p->second.length <= offset) { | |
298 | return false; // gap | |
299 | } | |
300 | } | |
301 | while (length > 0) { | |
302 | if (p == ref_map.end()) | |
303 | return false; | |
304 | if (p->first > offset) | |
305 | return false; | |
306 | if (p->first + p->second.length >= offset + length) | |
307 | return true; | |
308 | uint64_t overlap = p->first + p->second.length - offset; | |
309 | offset += overlap; | |
310 | length -= overlap; | |
311 | ++p; | |
312 | } | |
313 | return true; | |
314 | } | |
315 | ||
316 | bool bluestore_extent_ref_map_t::intersects( | |
317 | uint64_t offset, | |
318 | uint32_t length) const | |
319 | { | |
320 | auto p = ref_map.lower_bound(offset); | |
321 | if (p != ref_map.begin()) { | |
322 | --p; | |
323 | if (p->first + p->second.length <= offset) { | |
324 | ++p; | |
325 | } | |
326 | } | |
327 | if (p == ref_map.end()) | |
328 | return false; | |
329 | if (p->first >= offset + length) | |
330 | return false; | |
331 | return true; // intersects p! | |
332 | } | |
333 | ||
334 | void bluestore_extent_ref_map_t::dump(Formatter *f) const | |
335 | { | |
336 | f->open_array_section("ref_map"); | |
337 | for (auto& p : ref_map) { | |
338 | f->open_object_section("ref"); | |
339 | f->dump_unsigned("offset", p.first); | |
340 | f->dump_unsigned("length", p.second.length); | |
341 | f->dump_unsigned("refs", p.second.refs); | |
342 | f->close_section(); | |
343 | } | |
344 | f->close_section(); | |
345 | } | |
346 | ||
347 | void bluestore_extent_ref_map_t::generate_test_instances( | |
348 | list<bluestore_extent_ref_map_t*>& o) | |
349 | { | |
350 | o.push_back(new bluestore_extent_ref_map_t); | |
351 | o.push_back(new bluestore_extent_ref_map_t); | |
352 | o.back()->get(10, 10); | |
353 | o.back()->get(18, 22); | |
354 | o.back()->get(20, 20); | |
355 | o.back()->get(10, 25); | |
356 | o.back()->get(15, 20); | |
357 | } | |
358 | ||
359 | ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m) | |
360 | { | |
361 | out << "ref_map("; | |
362 | for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) { | |
363 | if (p != m.ref_map.begin()) | |
364 | out << ","; | |
365 | out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec | |
366 | << "=" << p->second.refs; | |
367 | } | |
368 | out << ")"; | |
369 | return out; | |
370 | } | |
371 | ||
372 | // bluestore_blob_use_tracker_t | |
373 | ||
374 | void bluestore_blob_use_tracker_t::allocate() | |
375 | { | |
376 | assert(num_au != 0); | |
377 | bytes_per_au = new uint32_t[num_au]; | |
378 | for (uint32_t i = 0; i < num_au; ++i) { | |
379 | bytes_per_au[i] = 0; | |
380 | } | |
381 | } | |
382 | ||
383 | void bluestore_blob_use_tracker_t::init( | |
384 | uint32_t full_length, uint32_t _au_size) { | |
385 | assert(!au_size || is_empty()); | |
386 | assert(_au_size > 0); | |
387 | assert(full_length > 0); | |
388 | clear(); | |
389 | uint32_t _num_au = ROUND_UP_TO(full_length, _au_size) / _au_size; | |
390 | au_size = _au_size; | |
391 | if( _num_au > 1 ) { | |
392 | num_au = _num_au; | |
393 | allocate(); | |
394 | } | |
395 | } | |
396 | ||
397 | void bluestore_blob_use_tracker_t::get( | |
398 | uint32_t offset, uint32_t length) | |
399 | { | |
400 | assert(au_size); | |
401 | if (!num_au) { | |
402 | total_bytes += length; | |
403 | }else { | |
404 | auto end = offset + length; | |
405 | ||
406 | while (offset < end) { | |
407 | auto phase = offset % au_size; | |
408 | bytes_per_au[offset / au_size] += | |
409 | MIN(au_size - phase, end - offset); | |
410 | offset += (phase ? au_size - phase : au_size); | |
411 | } | |
412 | } | |
413 | } | |
414 | ||
415 | bool bluestore_blob_use_tracker_t::put( | |
416 | uint32_t offset, uint32_t length, | |
417 | PExtentVector *release_units) | |
418 | { | |
419 | assert(au_size); | |
420 | if (release_units) { | |
421 | release_units->clear(); | |
422 | } | |
423 | bool maybe_empty = true; | |
424 | if (!num_au) { | |
425 | assert(total_bytes >= length); | |
426 | total_bytes -= length; | |
427 | } else { | |
428 | auto end = offset + length; | |
429 | uint64_t next_offs = 0; | |
430 | while (offset < end) { | |
431 | auto phase = offset % au_size; | |
432 | size_t pos = offset / au_size; | |
433 | auto diff = MIN(au_size - phase, end - offset); | |
434 | assert(diff <= bytes_per_au[pos]); | |
435 | bytes_per_au[pos] -= diff; | |
436 | offset += (phase ? au_size - phase : au_size); | |
437 | if (bytes_per_au[pos] == 0) { | |
438 | if (release_units) { | |
439 | if (release_units->empty() || next_offs != pos * au_size) { | |
440 | release_units->emplace_back(pos * au_size, au_size); | |
441 | } else { | |
442 | release_units->back().length += au_size; | |
443 | } | |
444 | next_offs += au_size; | |
445 | } | |
446 | } else { | |
447 | maybe_empty = false; // micro optimization detecting we aren't empty | |
448 | // even in the affected extent | |
449 | } | |
450 | } | |
451 | } | |
452 | bool empty = maybe_empty ? !is_not_empty() : false; | |
453 | if (empty && release_units) { | |
454 | release_units->clear(); | |
455 | } | |
456 | return empty; | |
457 | } | |
458 | ||
459 | bool bluestore_blob_use_tracker_t::can_split() const | |
460 | { | |
461 | return num_au > 0; | |
462 | } | |
463 | ||
464 | bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const | |
465 | { | |
466 | assert(au_size); | |
467 | return (blob_offset % au_size) == 0 && | |
468 | blob_offset < num_au * au_size; | |
469 | } | |
470 | ||
471 | void bluestore_blob_use_tracker_t::split( | |
472 | uint32_t blob_offset, | |
473 | bluestore_blob_use_tracker_t* r) | |
474 | { | |
475 | assert(au_size); | |
476 | assert(can_split()); | |
477 | assert(can_split_at(blob_offset)); | |
478 | assert(r->is_empty()); | |
479 | ||
480 | uint32_t new_num_au = blob_offset / au_size; | |
481 | r->init( (num_au - new_num_au) * au_size, au_size); | |
482 | ||
483 | for (auto i = new_num_au; i < num_au; i++) { | |
484 | r->get((i - new_num_au) * au_size, bytes_per_au[i]); | |
485 | bytes_per_au[i] = 0; | |
486 | } | |
487 | if (new_num_au == 0) { | |
488 | clear(); | |
489 | } else if (new_num_au == 1) { | |
490 | uint32_t tmp = bytes_per_au[0]; | |
491 | uint32_t _au_size = au_size; | |
492 | clear(); | |
493 | au_size = _au_size; | |
494 | total_bytes = tmp; | |
495 | } else { | |
496 | num_au = new_num_au; | |
497 | } | |
498 | } | |
499 | ||
500 | bool bluestore_blob_use_tracker_t::equal( | |
501 | const bluestore_blob_use_tracker_t& other) const | |
502 | { | |
503 | if (!num_au && !other.num_au) { | |
504 | return total_bytes == other.total_bytes && au_size == other.au_size; | |
505 | } else if (num_au && other.num_au) { | |
506 | if (num_au != other.num_au || au_size != other.au_size) { | |
507 | return false; | |
508 | } | |
509 | for (size_t i = 0; i < num_au; i++) { | |
510 | if (bytes_per_au[i] != other.bytes_per_au[i]) { | |
511 | return false; | |
512 | } | |
513 | } | |
514 | return true; | |
515 | } | |
516 | ||
517 | uint32_t n = num_au ? num_au : other.num_au; | |
518 | uint32_t referenced = | |
519 | num_au ? other.get_referenced_bytes() : get_referenced_bytes(); | |
520 | auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au; | |
521 | uint32_t my_referenced = 0; | |
522 | for (size_t i = 0; i < n; i++) { | |
523 | my_referenced += bytes_per_au_tmp[i]; | |
524 | if (my_referenced > referenced) { | |
525 | return false; | |
526 | } | |
527 | } | |
528 | return my_referenced == referenced; | |
529 | } | |
530 | ||
531 | void bluestore_blob_use_tracker_t::dump(Formatter *f) const | |
532 | { | |
533 | f->dump_unsigned("num_au", num_au); | |
534 | f->dump_unsigned("au_size", au_size); | |
535 | if (!num_au) { | |
536 | f->dump_unsigned("total_bytes", total_bytes); | |
537 | } else { | |
538 | f->open_array_section("bytes_per_au"); | |
539 | for (size_t i = 0; i < num_au; ++i) { | |
540 | f->dump_unsigned("", bytes_per_au[i]); | |
541 | } | |
542 | f->close_section(); | |
543 | } | |
544 | } | |
545 | ||
546 | void bluestore_blob_use_tracker_t::generate_test_instances( | |
547 | list<bluestore_blob_use_tracker_t*>& o) | |
548 | { | |
549 | o.push_back(new bluestore_blob_use_tracker_t()); | |
550 | o.back()->init(16, 16); | |
551 | o.back()->get(10, 10); | |
552 | o.back()->get(10, 5); | |
553 | o.push_back(new bluestore_blob_use_tracker_t()); | |
554 | o.back()->init(60, 16); | |
555 | o.back()->get(18, 22); | |
556 | o.back()->get(20, 20); | |
557 | o.back()->get(15, 20); | |
558 | } | |
559 | ||
560 | ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m) | |
561 | { | |
562 | out << "use_tracker(" << std::hex; | |
563 | if (!m.num_au) { | |
564 | out << "0x" << m.au_size | |
565 | << " " | |
566 | << "0x" << m.total_bytes; | |
567 | } else { | |
568 | out << "0x" << m.num_au | |
569 | << "*0x" << m.au_size | |
570 | << " 0x["; | |
571 | for (size_t i = 0; i < m.num_au; ++i) { | |
572 | if (i != 0) | |
573 | out << ","; | |
574 | out << m.bytes_per_au[i]; | |
575 | } | |
576 | out << "]"; | |
577 | } | |
578 | out << std::dec << ")"; | |
579 | return out; | |
580 | } | |
581 | ||
582 | // bluestore_pextent_t | |
583 | ||
584 | void bluestore_pextent_t::dump(Formatter *f) const | |
585 | { | |
586 | f->dump_unsigned("offset", offset); | |
587 | f->dump_unsigned("length", length); | |
588 | } | |
589 | ||
590 | ostream& operator<<(ostream& out, const bluestore_pextent_t& o) { | |
591 | if (o.is_valid()) | |
592 | return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec; | |
593 | else | |
594 | return out << "!~" << std::hex << o.length << std::dec; | |
595 | } | |
596 | ||
597 | void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls) | |
598 | { | |
599 | ls.push_back(new bluestore_pextent_t); | |
600 | ls.push_back(new bluestore_pextent_t(1, 2)); | |
601 | } | |
602 | ||
603 | // bluestore_blob_t | |
604 | ||
605 | string bluestore_blob_t::get_flags_string(unsigned flags) | |
606 | { | |
607 | string s; | |
608 | if (flags & FLAG_COMPRESSED) { | |
609 | if (s.length()) | |
610 | s += '+'; | |
611 | s += "compressed"; | |
612 | } | |
613 | if (flags & FLAG_CSUM) { | |
614 | if (s.length()) | |
615 | s += '+'; | |
616 | s += "csum"; | |
617 | } | |
618 | if (flags & FLAG_HAS_UNUSED) { | |
619 | if (s.length()) | |
620 | s += '+'; | |
621 | s += "has_unused"; | |
622 | } | |
623 | if (flags & FLAG_SHARED) { | |
624 | if (s.length()) | |
625 | s += '+'; | |
626 | s += "shared"; | |
627 | } | |
628 | ||
629 | return s; | |
630 | } | |
631 | ||
632 | size_t bluestore_blob_t::get_csum_value_size() const | |
633 | { | |
634 | return Checksummer::get_csum_value_size(csum_type); | |
635 | } | |
636 | ||
637 | void bluestore_blob_t::dump(Formatter *f) const | |
638 | { | |
639 | f->open_array_section("extents"); | |
640 | for (auto& p : extents) { | |
641 | f->dump_object("extent", p); | |
642 | } | |
643 | f->close_section(); | |
644 | f->dump_unsigned("logical_length", logical_length); | |
645 | f->dump_unsigned("compressed_length", compressed_length); | |
646 | f->dump_unsigned("flags", flags); | |
647 | f->dump_unsigned("csum_type", csum_type); | |
648 | f->dump_unsigned("csum_chunk_order", csum_chunk_order); | |
649 | f->open_array_section("csum_data"); | |
650 | size_t n = get_csum_count(); | |
651 | for (unsigned i = 0; i < n; ++i) | |
652 | f->dump_unsigned("csum", get_csum_item(i)); | |
653 | f->close_section(); | |
654 | f->dump_unsigned("unused", unused); | |
655 | } | |
656 | ||
657 | void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls) | |
658 | { | |
659 | ls.push_back(new bluestore_blob_t); | |
660 | ls.push_back(new bluestore_blob_t(0)); | |
661 | ls.push_back(new bluestore_blob_t); | |
662 | ls.back()->allocated_test(bluestore_pextent_t(111, 222)); | |
663 | ls.push_back(new bluestore_blob_t); | |
664 | ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536); | |
665 | ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd")); | |
666 | ls.back()->add_unused(0, 3); | |
667 | ls.back()->add_unused(8, 8); | |
668 | ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000)); | |
669 | ls.back()->allocated_test( | |
670 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000)); | |
671 | ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000)); | |
672 | } | |
673 | ||
674 | ostream& operator<<(ostream& out, const bluestore_blob_t& o) | |
675 | { | |
676 | out << "blob(" << o.get_extents(); | |
677 | if (o.is_compressed()) { | |
678 | out << " clen 0x" << std::hex | |
679 | << o.get_logical_length() | |
680 | << " -> 0x" | |
681 | << o.get_compressed_payload_length() | |
682 | << std::dec; | |
683 | } | |
684 | if (o.flags) { | |
685 | out << " " << o.get_flags_string(); | |
686 | } | |
687 | if (o.has_csum()) { | |
688 | out << " " << Checksummer::get_csum_type_string(o.csum_type) | |
689 | << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec; | |
690 | } | |
691 | if (o.has_unused()) | |
692 | out << " unused=0x" << std::hex << o.unused << std::dec; | |
693 | out << ")"; | |
694 | return out; | |
695 | } | |
696 | ||
697 | void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl) | |
698 | { | |
699 | switch (csum_type) { | |
700 | case Checksummer::CSUM_XXHASH32: | |
701 | Checksummer::calculate<Checksummer::xxhash32>( | |
702 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
703 | break; | |
704 | case Checksummer::CSUM_XXHASH64: | |
705 | Checksummer::calculate<Checksummer::xxhash64>( | |
706 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
707 | break;; | |
708 | case Checksummer::CSUM_CRC32C: | |
709 | Checksummer::calculate<Checksummer::crc32c>( | |
710 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
711 | break; | |
712 | case Checksummer::CSUM_CRC32C_16: | |
713 | Checksummer::calculate<Checksummer::crc32c_16>( | |
714 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
715 | break; | |
716 | case Checksummer::CSUM_CRC32C_8: | |
717 | Checksummer::calculate<Checksummer::crc32c_8>( | |
718 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
719 | break; | |
720 | } | |
721 | } | |
722 | ||
723 | int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl, | |
724 | int* b_bad_off, uint64_t *bad_csum) const | |
725 | { | |
726 | int r = 0; | |
727 | ||
728 | *b_bad_off = -1; | |
729 | switch (csum_type) { | |
730 | case Checksummer::CSUM_NONE: | |
731 | break; | |
732 | case Checksummer::CSUM_XXHASH32: | |
733 | *b_bad_off = Checksummer::verify<Checksummer::xxhash32>( | |
734 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
735 | break; | |
736 | case Checksummer::CSUM_XXHASH64: | |
737 | *b_bad_off = Checksummer::verify<Checksummer::xxhash64>( | |
738 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
739 | break; | |
740 | case Checksummer::CSUM_CRC32C: | |
741 | *b_bad_off = Checksummer::verify<Checksummer::crc32c>( | |
742 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
743 | break; | |
744 | case Checksummer::CSUM_CRC32C_16: | |
745 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>( | |
746 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
747 | break; | |
748 | case Checksummer::CSUM_CRC32C_8: | |
749 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>( | |
750 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
751 | break; | |
752 | default: | |
753 | r = -EOPNOTSUPP; | |
754 | break; | |
755 | } | |
756 | ||
757 | if (r < 0) | |
758 | return r; | |
759 | else if (*b_bad_off >= 0) | |
760 | return -1; // bad checksum | |
761 | else | |
762 | return 0; | |
763 | } | |
764 | ||
765 | void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs) | |
766 | { | |
767 | if (extents.size() == 0) { | |
768 | // if blob is compressed then logical length to be already configured | |
769 | // otherwise - to be unset. | |
770 | assert((is_compressed() && logical_length != 0) || | |
771 | (!is_compressed() && logical_length == 0)); | |
772 | ||
773 | extents.reserve(allocs.size() + (b_off ? 1 : 0)); | |
774 | if (b_off) { | |
775 | extents.emplace_back( | |
776 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off)); | |
777 | } | |
778 | uint32_t new_len = b_off; | |
779 | for (auto& a : allocs) { | |
780 | extents.emplace_back(a.offset, a.length); | |
781 | new_len += a.length; | |
782 | } | |
783 | if (!is_compressed()) { | |
784 | logical_length = new_len; | |
785 | } | |
786 | } else { | |
787 | assert(!is_compressed()); // partial allocations are forbidden when | |
788 | // compressed | |
789 | assert(b_off < logical_length); | |
790 | uint32_t cur_offs = 0; | |
791 | auto start_it = extents.begin(); | |
792 | size_t pos = 0; | |
793 | while(true) { | |
794 | assert(start_it != extents.end()); | |
795 | if (cur_offs + start_it->length > b_off) { | |
796 | break; | |
797 | } | |
798 | cur_offs += start_it->length; | |
799 | ++start_it; | |
800 | ++pos; | |
801 | } | |
802 | uint32_t head = b_off - cur_offs; | |
803 | uint32_t end_off = b_off + length; | |
804 | auto end_it = start_it; | |
805 | ||
806 | while (true) { | |
807 | assert(end_it != extents.end()); | |
808 | assert(!end_it->is_valid()); | |
809 | if (cur_offs + end_it->length >= end_off) { | |
810 | break; | |
811 | } | |
812 | cur_offs += end_it->length; | |
813 | ++end_it; | |
814 | } | |
815 | assert(cur_offs + end_it->length >= end_off); | |
816 | uint32_t tail = cur_offs + end_it->length - end_off; | |
817 | ||
818 | start_it = extents.erase(start_it, end_it + 1); | |
819 | size_t count = allocs.size(); | |
820 | count += head ? 1 : 0; | |
821 | count += tail ? 1 : 0; | |
822 | extents.insert(start_it, | |
823 | count, | |
824 | bluestore_pextent_t( | |
825 | bluestore_pextent_t::INVALID_OFFSET, 0)); | |
826 | ||
827 | // Workaround to resolve lack of proper iterator return in vector::insert | |
828 | // Looks like some gcc/stl implementations still lack it despite c++11 | |
829 | // support claim | |
830 | start_it = extents.begin() + pos; | |
831 | ||
832 | if (head) { | |
833 | start_it->length = head; | |
834 | ++start_it; | |
835 | } | |
836 | for(auto& e : allocs) { | |
837 | *start_it = e; | |
838 | ++start_it; | |
839 | } | |
840 | if (tail) { | |
841 | start_it->length = tail; | |
842 | } | |
843 | } | |
844 | } | |
845 | ||
846 | // cut it out of extents | |
847 | struct vecbuilder { | |
848 | PExtentVector v; | |
849 | uint64_t invalid = 0; | |
850 | ||
851 | void add_invalid(uint64_t length) { | |
852 | invalid += length; | |
853 | } | |
854 | void flush() { | |
855 | if (invalid) { | |
856 | v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, | |
857 | invalid)); | |
858 | invalid = 0; | |
859 | } | |
860 | } | |
861 | void add(uint64_t offset, uint64_t length) { | |
862 | if (offset == bluestore_pextent_t::INVALID_OFFSET) { | |
863 | add_invalid(length); | |
864 | } | |
865 | else { | |
866 | flush(); | |
867 | v.emplace_back(bluestore_pextent_t(offset, length)); | |
868 | } | |
869 | } | |
870 | }; | |
871 | ||
872 | void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc) | |
873 | { | |
874 | extents.emplace_back(alloc); | |
875 | if (!is_compressed()) { | |
876 | logical_length += alloc.length; | |
877 | } | |
878 | } | |
879 | ||
880 | bool bluestore_blob_t::release_extents(bool all, | |
881 | const PExtentVector& logical, | |
882 | PExtentVector* r) | |
883 | { | |
884 | // common case: all of it? | |
885 | if (all) { | |
886 | uint64_t pos = 0; | |
887 | for (auto& e : extents) { | |
888 | if (e.is_valid()) { | |
889 | r->push_back(e); | |
890 | } | |
891 | pos += e.length; | |
892 | } | |
893 | assert(is_compressed() || get_logical_length() == pos); | |
894 | extents.resize(1); | |
895 | extents[0].offset = bluestore_pextent_t::INVALID_OFFSET; | |
896 | extents[0].length = pos; | |
897 | return true; | |
898 | } | |
899 | // remove from pextents according to logical release list | |
900 | vecbuilder vb; | |
901 | auto loffs_it = logical.begin(); | |
902 | auto lend = logical.end(); | |
903 | uint32_t pext_loffs_start = 0; //starting loffset of the current pextent | |
904 | uint32_t pext_loffs = 0; //current loffset | |
905 | auto pext_it = extents.begin(); | |
906 | auto pext_end = extents.end(); | |
907 | while (pext_it != pext_end) { | |
908 | if (loffs_it == lend || | |
909 | pext_loffs_start + pext_it->length <= loffs_it->offset) { | |
910 | int delta0 = pext_loffs - pext_loffs_start; | |
911 | assert(delta0 >= 0); | |
912 | if ((uint32_t)delta0 < pext_it->length) { | |
913 | vb.add(pext_it->offset + delta0, pext_it->length - delta0); | |
914 | } | |
915 | pext_loffs_start += pext_it->length; | |
916 | pext_loffs = pext_loffs_start; | |
917 | ++pext_it; | |
918 | } | |
919 | else { | |
920 | //assert(pext_loffs == pext_loffs_start); | |
921 | int delta0 = pext_loffs - pext_loffs_start; | |
922 | assert(delta0 >= 0); | |
923 | ||
924 | int delta = loffs_it->offset - pext_loffs; | |
925 | assert(delta >= 0); | |
926 | if (delta > 0) { | |
927 | vb.add(pext_it->offset + delta0, delta); | |
928 | pext_loffs += delta; | |
929 | } | |
930 | ||
931 | PExtentVector::iterator last_r = r->end(); | |
932 | if (r->begin() != last_r) { | |
933 | --last_r; | |
934 | } | |
935 | uint32_t to_release = loffs_it->length; | |
936 | do { | |
937 | uint32_t to_release_part = | |
938 | MIN(pext_it->length - delta0 - delta, to_release); | |
939 | auto o = pext_it->offset + delta0 + delta; | |
940 | if (last_r != r->end() && last_r->offset + last_r->length == o) { | |
941 | last_r->length += to_release_part; | |
942 | } | |
943 | else { | |
944 | last_r = r->emplace(r->end(), o, to_release_part); | |
945 | } | |
946 | to_release -= to_release_part; | |
947 | pext_loffs += to_release_part; | |
948 | if (pext_loffs == pext_loffs_start + pext_it->length) { | |
949 | pext_loffs_start += pext_it->length; | |
950 | pext_loffs = pext_loffs_start; | |
951 | pext_it++; | |
952 | delta0 = delta = 0; | |
953 | } | |
954 | } while (to_release > 0 && pext_it != pext_end); | |
955 | vb.add_invalid(loffs_it->length - to_release); | |
956 | ++loffs_it; | |
957 | } | |
958 | } | |
959 | vb.flush(); | |
960 | extents.swap(vb.v); | |
961 | return false; | |
962 | } | |
963 | ||
964 | void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb) | |
965 | { | |
966 | size_t left = blob_offset; | |
967 | uint32_t llen_lb = 0; | |
968 | uint32_t llen_rb = 0; | |
969 | unsigned i = 0; | |
970 | for (auto p = extents.begin(); p != extents.end(); ++p, ++i) { | |
971 | if (p->length <= left) { | |
972 | left -= p->length; | |
973 | llen_lb += p->length; | |
974 | continue; | |
975 | } | |
976 | if (left) { | |
977 | if (p->is_valid()) { | |
978 | rb.extents.emplace_back(bluestore_pextent_t(p->offset + left, | |
979 | p->length - left)); | |
980 | } | |
981 | else { | |
982 | rb.extents.emplace_back(bluestore_pextent_t( | |
983 | bluestore_pextent_t::INVALID_OFFSET, | |
984 | p->length - left)); | |
985 | } | |
986 | llen_rb += p->length - left; | |
987 | llen_lb += left; | |
988 | p->length = left; | |
989 | ++i; | |
990 | ++p; | |
991 | } | |
992 | while (p != extents.end()) { | |
993 | llen_rb += p->length; | |
994 | rb.extents.push_back(*p++); | |
995 | } | |
996 | extents.resize(i); | |
997 | logical_length = llen_lb; | |
998 | rb.logical_length = llen_rb; | |
999 | break; | |
1000 | } | |
1001 | rb.flags = flags; | |
1002 | ||
1003 | if (has_csum()) { | |
1004 | rb.csum_type = csum_type; | |
1005 | rb.csum_chunk_order = csum_chunk_order; | |
1006 | size_t csum_order = get_csum_chunk_size(); | |
1007 | assert(blob_offset % csum_order == 0); | |
1008 | size_t pos = (blob_offset / csum_order) * get_csum_value_size(); | |
1009 | // deep copy csum data | |
1010 | bufferptr old; | |
1011 | old.swap(csum_data); | |
1012 | rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos); | |
1013 | csum_data = bufferptr(old.c_str(), pos); | |
1014 | } | |
1015 | } | |
1016 | ||
1017 | // bluestore_shared_blob_t | |
1018 | ||
1019 | void bluestore_shared_blob_t::dump(Formatter *f) const | |
1020 | { | |
1021 | f->dump_int("sbid", sbid); | |
1022 | f->dump_object("ref_map", ref_map); | |
1023 | } | |
1024 | ||
1025 | void bluestore_shared_blob_t::generate_test_instances( | |
1026 | list<bluestore_shared_blob_t*>& ls) | |
1027 | { | |
1028 | ls.push_back(new bluestore_shared_blob_t(1)); | |
1029 | } | |
1030 | ||
1031 | ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb) | |
1032 | { | |
1033 | out << "(sbid 0x" << std::hex << sb.sbid << std::dec; | |
1034 | out << " " << sb.ref_map << ")"; | |
1035 | return out; | |
1036 | } | |
1037 | ||
1038 | // bluestore_onode_t | |
1039 | ||
1040 | void bluestore_onode_t::shard_info::dump(Formatter *f) const | |
1041 | { | |
1042 | f->dump_unsigned("offset", offset); | |
1043 | f->dump_unsigned("bytes", bytes); | |
1044 | } | |
1045 | ||
1046 | ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si) | |
1047 | { | |
1048 | return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes" | |
1049 | << std::dec << ")"; | |
1050 | } | |
1051 | ||
1052 | void bluestore_onode_t::dump(Formatter *f) const | |
1053 | { | |
1054 | f->dump_unsigned("nid", nid); | |
1055 | f->dump_unsigned("size", size); | |
1056 | f->open_object_section("attrs"); | |
1057 | for (auto p = attrs.begin(); p != attrs.end(); ++p) { | |
1058 | f->open_object_section("attr"); | |
1059 | f->dump_string("name", p->first.c_str()); // it's not quite std::string | |
1060 | f->dump_unsigned("len", p->second.length()); | |
1061 | f->close_section(); | |
1062 | } | |
1063 | f->close_section(); | |
1064 | f->dump_string("flags", get_flags_string()); | |
1065 | f->open_array_section("extent_map_shards"); | |
1066 | for (auto si : extent_map_shards) { | |
1067 | f->dump_object("shard", si); | |
1068 | } | |
1069 | f->close_section(); | |
1070 | f->dump_unsigned("expected_object_size", expected_object_size); | |
1071 | f->dump_unsigned("expected_write_size", expected_write_size); | |
1072 | f->dump_unsigned("alloc_hint_flags", alloc_hint_flags); | |
1073 | } | |
1074 | ||
1075 | void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o) | |
1076 | { | |
1077 | o.push_back(new bluestore_onode_t()); | |
1078 | // FIXME | |
1079 | } | |
1080 | ||
1081 | // bluestore_deferred_op_t | |
1082 | ||
1083 | void bluestore_deferred_op_t::dump(Formatter *f) const | |
1084 | { | |
1085 | f->dump_unsigned("op", (int)op); | |
1086 | f->dump_unsigned("data_len", data.length()); | |
1087 | f->open_array_section("extents"); | |
1088 | for (auto& e : extents) { | |
1089 | f->dump_object("extent", e); | |
1090 | } | |
1091 | f->close_section(); | |
1092 | } | |
1093 | ||
1094 | void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o) | |
1095 | { | |
1096 | o.push_back(new bluestore_deferred_op_t); | |
1097 | o.push_back(new bluestore_deferred_op_t); | |
1098 | o.back()->op = OP_WRITE; | |
1099 | o.back()->extents.push_back(bluestore_pextent_t(1, 2)); | |
1100 | o.back()->extents.push_back(bluestore_pextent_t(100, 5)); | |
1101 | o.back()->data.append("my data"); | |
1102 | } | |
1103 | ||
1104 | void bluestore_deferred_transaction_t::dump(Formatter *f) const | |
1105 | { | |
1106 | f->dump_unsigned("seq", seq); | |
1107 | f->open_array_section("ops"); | |
1108 | for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) { | |
1109 | f->dump_object("op", *p); | |
1110 | } | |
1111 | f->close_section(); | |
1112 | ||
1113 | f->open_array_section("released extents"); | |
1114 | for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) { | |
1115 | f->open_object_section("extent"); | |
1116 | f->dump_unsigned("offset", p.get_start()); | |
1117 | f->dump_unsigned("length", p.get_len()); | |
1118 | f->close_section(); | |
1119 | } | |
1120 | f->close_section(); | |
1121 | } | |
1122 | ||
1123 | void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o) | |
1124 | { | |
1125 | o.push_back(new bluestore_deferred_transaction_t()); | |
1126 | o.push_back(new bluestore_deferred_transaction_t()); | |
1127 | o.back()->seq = 123; | |
1128 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1129 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1130 | o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE; | |
1131 | o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7)); | |
1132 | o.back()->ops.back().data.append("foodata"); | |
1133 | } | |
1134 | ||
1135 | void bluestore_compression_header_t::dump(Formatter *f) const | |
1136 | { | |
1137 | f->dump_unsigned("type", type); | |
1138 | f->dump_unsigned("length", length); | |
1139 | } | |
1140 | ||
1141 | void bluestore_compression_header_t::generate_test_instances( | |
1142 | list<bluestore_compression_header_t*>& o) | |
1143 | { | |
1144 | o.push_back(new bluestore_compression_header_t); | |
1145 | o.push_back(new bluestore_compression_header_t(1)); | |
1146 | o.back()->length = 1234; | |
1147 | } |