]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "bluestore_types.h" | |
16 | #include "common/Formatter.h" | |
17 | #include "common/Checksummer.h" | |
18 | #include "include/stringify.h" | |
19 | ||
20 | void ExtentList::add_extents(int64_t start, int64_t count) { | |
21 | AllocExtent *last_extent = NULL; | |
22 | bool can_merge = false; | |
23 | ||
24 | if (!m_extents->empty()) { | |
25 | last_extent = &(m_extents->back()); | |
26 | uint64_t last_offset = last_extent->end() / m_block_size; | |
27 | uint32_t last_length = last_extent->length / m_block_size; | |
28 | if ((last_offset == (uint64_t) start) && | |
29 | (!m_max_blocks || (last_length + count) <= m_max_blocks)) { | |
30 | can_merge = true; | |
31 | } | |
32 | } | |
33 | ||
34 | if (can_merge) { | |
35 | last_extent->length += (count * m_block_size); | |
36 | } else { | |
37 | m_extents->emplace_back(AllocExtent(start * m_block_size, | |
38 | count * m_block_size)); | |
39 | } | |
40 | } | |
41 | ||
42 | // bluestore_bdev_label_t | |
43 | ||
44 | void bluestore_bdev_label_t::encode(bufferlist& bl) const | |
45 | { | |
46 | // be slightly friendly to someone who looks at the device | |
47 | bl.append("bluestore block device\n"); | |
48 | bl.append(stringify(osd_uuid)); | |
49 | bl.append("\n"); | |
3efd9988 | 50 | ENCODE_START(2, 1, bl); |
7c673cae FG |
51 | ::encode(osd_uuid, bl); |
52 | ::encode(size, bl); | |
53 | ::encode(btime, bl); | |
54 | ::encode(description, bl); | |
3efd9988 | 55 | ::encode(meta, bl); |
7c673cae FG |
56 | ENCODE_FINISH(bl); |
57 | } | |
58 | ||
59 | void bluestore_bdev_label_t::decode(bufferlist::iterator& p) | |
60 | { | |
61 | p.advance(60); // see above | |
3efd9988 | 62 | DECODE_START(2, p); |
7c673cae FG |
63 | ::decode(osd_uuid, p); |
64 | ::decode(size, p); | |
65 | ::decode(btime, p); | |
66 | ::decode(description, p); | |
3efd9988 FG |
67 | if (struct_v >= 2) { |
68 | ::decode(meta, p); | |
69 | } | |
7c673cae FG |
70 | DECODE_FINISH(p); |
71 | } | |
72 | ||
73 | void bluestore_bdev_label_t::dump(Formatter *f) const | |
74 | { | |
75 | f->dump_stream("osd_uuid") << osd_uuid; | |
76 | f->dump_unsigned("size", size); | |
77 | f->dump_stream("btime") << btime; | |
78 | f->dump_string("description", description); | |
3efd9988 FG |
79 | for (auto& i : meta) { |
80 | f->dump_string(i.first.c_str(), i.second); | |
81 | } | |
7c673cae FG |
82 | } |
83 | ||
84 | void bluestore_bdev_label_t::generate_test_instances( | |
85 | list<bluestore_bdev_label_t*>& o) | |
86 | { | |
87 | o.push_back(new bluestore_bdev_label_t); | |
88 | o.push_back(new bluestore_bdev_label_t); | |
89 | o.back()->size = 123; | |
90 | o.back()->btime = utime_t(4, 5); | |
91 | o.back()->description = "fakey"; | |
3efd9988 | 92 | o.back()->meta["foo"] = "bar"; |
7c673cae FG |
93 | } |
94 | ||
95 | ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l) | |
96 | { | |
97 | return out << "bdev(osd_uuid " << l.osd_uuid | |
3efd9988 FG |
98 | << ", size 0x" << std::hex << l.size << std::dec |
99 | << ", btime " << l.btime | |
100 | << ", desc " << l.description | |
101 | << ", " << l.meta.size() << " meta" | |
102 | << ")"; | |
7c673cae FG |
103 | } |
104 | ||
105 | // cnode_t | |
106 | ||
107 | void bluestore_cnode_t::dump(Formatter *f) const | |
108 | { | |
109 | f->dump_unsigned("bits", bits); | |
110 | } | |
111 | ||
112 | void bluestore_cnode_t::generate_test_instances(list<bluestore_cnode_t*>& o) | |
113 | { | |
114 | o.push_back(new bluestore_cnode_t()); | |
115 | o.push_back(new bluestore_cnode_t(0)); | |
116 | o.push_back(new bluestore_cnode_t(123)); | |
117 | } | |
118 | ||
28e407b8 AA |
119 | ostream& operator<<(ostream& out, const bluestore_cnode_t& l) |
120 | { | |
121 | return out << "cnode(bits " << l.bits << ")"; | |
122 | } | |
123 | ||
7c673cae FG |
124 | // bluestore_extent_ref_map_t |
125 | ||
126 | void bluestore_extent_ref_map_t::_check() const | |
127 | { | |
128 | uint64_t pos = 0; | |
129 | unsigned refs = 0; | |
130 | for (const auto &p : ref_map) { | |
131 | if (p.first < pos) | |
132 | assert(0 == "overlap"); | |
133 | if (p.first == pos && p.second.refs == refs) | |
134 | assert(0 == "unmerged"); | |
135 | pos = p.first + p.second.length; | |
136 | refs = p.second.refs; | |
137 | } | |
138 | } | |
139 | ||
140 | void bluestore_extent_ref_map_t::_maybe_merge_left( | |
141 | map<uint64_t,record_t>::iterator& p) | |
142 | { | |
143 | if (p == ref_map.begin()) | |
144 | return; | |
145 | auto q = p; | |
146 | --q; | |
147 | if (q->second.refs == p->second.refs && | |
148 | q->first + q->second.length == p->first) { | |
149 | q->second.length += p->second.length; | |
150 | ref_map.erase(p); | |
151 | p = q; | |
152 | } | |
153 | } | |
154 | ||
155 | void bluestore_extent_ref_map_t::get(uint64_t offset, uint32_t length) | |
156 | { | |
157 | auto p = ref_map.lower_bound(offset); | |
158 | if (p != ref_map.begin()) { | |
159 | --p; | |
160 | if (p->first + p->second.length <= offset) { | |
161 | ++p; | |
162 | } | |
163 | } | |
164 | while (length > 0) { | |
165 | if (p == ref_map.end()) { | |
166 | // nothing after offset; add the whole thing. | |
167 | p = ref_map.insert( | |
168 | map<uint64_t,record_t>::value_type(offset, record_t(length, 1))).first; | |
169 | break; | |
170 | } | |
171 | if (p->first > offset) { | |
172 | // gap | |
173 | uint64_t newlen = MIN(p->first - offset, length); | |
174 | p = ref_map.insert( | |
175 | map<uint64_t,record_t>::value_type(offset, | |
176 | record_t(newlen, 1))).first; | |
177 | offset += newlen; | |
178 | length -= newlen; | |
179 | _maybe_merge_left(p); | |
180 | ++p; | |
181 | continue; | |
182 | } | |
183 | if (p->first < offset) { | |
184 | // split off the portion before offset | |
185 | assert(p->first + p->second.length > offset); | |
186 | uint64_t left = p->first + p->second.length - offset; | |
187 | p->second.length = offset - p->first; | |
188 | p = ref_map.insert(map<uint64_t,record_t>::value_type( | |
189 | offset, record_t(left, p->second.refs))).first; | |
190 | // continue below | |
191 | } | |
192 | assert(p->first == offset); | |
193 | if (length < p->second.length) { | |
194 | ref_map.insert(make_pair(offset + length, | |
195 | record_t(p->second.length - length, | |
196 | p->second.refs))); | |
197 | p->second.length = length; | |
198 | ++p->second.refs; | |
199 | break; | |
200 | } | |
201 | ++p->second.refs; | |
202 | offset += p->second.length; | |
203 | length -= p->second.length; | |
204 | _maybe_merge_left(p); | |
205 | ++p; | |
206 | } | |
207 | if (p != ref_map.end()) | |
208 | _maybe_merge_left(p); | |
209 | //_check(); | |
210 | } | |
211 | ||
212 | void bluestore_extent_ref_map_t::put( | |
213 | uint64_t offset, uint32_t length, | |
31f18b77 FG |
214 | PExtentVector *release, |
215 | bool *maybe_unshared) | |
7c673cae FG |
216 | { |
217 | //NB: existing entries in 'release' container must be preserved! | |
31f18b77 | 218 | bool unshared = true; |
7c673cae FG |
219 | auto p = ref_map.lower_bound(offset); |
220 | if (p == ref_map.end() || p->first > offset) { | |
221 | if (p == ref_map.begin()) { | |
222 | assert(0 == "put on missing extent (nothing before)"); | |
223 | } | |
224 | --p; | |
225 | if (p->first + p->second.length <= offset) { | |
226 | assert(0 == "put on missing extent (gap)"); | |
227 | } | |
228 | } | |
229 | if (p->first < offset) { | |
230 | uint64_t left = p->first + p->second.length - offset; | |
231 | p->second.length = offset - p->first; | |
31f18b77 FG |
232 | if (p->second.refs != 1) { |
233 | unshared = false; | |
234 | } | |
7c673cae FG |
235 | p = ref_map.insert(map<uint64_t,record_t>::value_type( |
236 | offset, record_t(left, p->second.refs))).first; | |
237 | } | |
238 | while (length > 0) { | |
239 | assert(p->first == offset); | |
240 | if (length < p->second.length) { | |
31f18b77 FG |
241 | if (p->second.refs != 1) { |
242 | unshared = false; | |
243 | } | |
7c673cae FG |
244 | ref_map.insert(make_pair(offset + length, |
245 | record_t(p->second.length - length, | |
246 | p->second.refs))); | |
247 | if (p->second.refs > 1) { | |
248 | p->second.length = length; | |
249 | --p->second.refs; | |
31f18b77 FG |
250 | if (p->second.refs != 1) { |
251 | unshared = false; | |
252 | } | |
7c673cae FG |
253 | _maybe_merge_left(p); |
254 | } else { | |
255 | if (release) | |
256 | release->push_back(bluestore_pextent_t(p->first, length)); | |
257 | ref_map.erase(p); | |
258 | } | |
31f18b77 | 259 | goto out; |
7c673cae FG |
260 | } |
261 | offset += p->second.length; | |
262 | length -= p->second.length; | |
263 | if (p->second.refs > 1) { | |
264 | --p->second.refs; | |
31f18b77 FG |
265 | if (p->second.refs != 1) { |
266 | unshared = false; | |
267 | } | |
7c673cae FG |
268 | _maybe_merge_left(p); |
269 | ++p; | |
270 | } else { | |
271 | if (release) | |
272 | release->push_back(bluestore_pextent_t(p->first, p->second.length)); | |
273 | ref_map.erase(p++); | |
274 | } | |
275 | } | |
276 | if (p != ref_map.end()) | |
277 | _maybe_merge_left(p); | |
278 | //_check(); | |
31f18b77 FG |
279 | out: |
280 | if (maybe_unshared) { | |
281 | if (unshared) { | |
282 | // we haven't seen a ref != 1 yet; check the whole map. | |
283 | for (auto& p : ref_map) { | |
284 | if (p.second.refs != 1) { | |
285 | unshared = false; | |
286 | break; | |
287 | } | |
288 | } | |
289 | } | |
290 | *maybe_unshared = unshared; | |
291 | } | |
7c673cae FG |
292 | } |
293 | ||
294 | bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const | |
295 | { | |
296 | auto p = ref_map.lower_bound(offset); | |
297 | if (p == ref_map.end() || p->first > offset) { | |
298 | if (p == ref_map.begin()) { | |
299 | return false; // nothing before | |
300 | } | |
301 | --p; | |
302 | if (p->first + p->second.length <= offset) { | |
303 | return false; // gap | |
304 | } | |
305 | } | |
306 | while (length > 0) { | |
307 | if (p == ref_map.end()) | |
308 | return false; | |
309 | if (p->first > offset) | |
310 | return false; | |
311 | if (p->first + p->second.length >= offset + length) | |
312 | return true; | |
313 | uint64_t overlap = p->first + p->second.length - offset; | |
314 | offset += overlap; | |
315 | length -= overlap; | |
316 | ++p; | |
317 | } | |
318 | return true; | |
319 | } | |
320 | ||
321 | bool bluestore_extent_ref_map_t::intersects( | |
322 | uint64_t offset, | |
323 | uint32_t length) const | |
324 | { | |
325 | auto p = ref_map.lower_bound(offset); | |
326 | if (p != ref_map.begin()) { | |
327 | --p; | |
328 | if (p->first + p->second.length <= offset) { | |
329 | ++p; | |
330 | } | |
331 | } | |
332 | if (p == ref_map.end()) | |
333 | return false; | |
334 | if (p->first >= offset + length) | |
335 | return false; | |
336 | return true; // intersects p! | |
337 | } | |
338 | ||
339 | void bluestore_extent_ref_map_t::dump(Formatter *f) const | |
340 | { | |
341 | f->open_array_section("ref_map"); | |
342 | for (auto& p : ref_map) { | |
343 | f->open_object_section("ref"); | |
344 | f->dump_unsigned("offset", p.first); | |
345 | f->dump_unsigned("length", p.second.length); | |
346 | f->dump_unsigned("refs", p.second.refs); | |
347 | f->close_section(); | |
348 | } | |
349 | f->close_section(); | |
350 | } | |
351 | ||
352 | void bluestore_extent_ref_map_t::generate_test_instances( | |
353 | list<bluestore_extent_ref_map_t*>& o) | |
354 | { | |
355 | o.push_back(new bluestore_extent_ref_map_t); | |
356 | o.push_back(new bluestore_extent_ref_map_t); | |
357 | o.back()->get(10, 10); | |
358 | o.back()->get(18, 22); | |
359 | o.back()->get(20, 20); | |
360 | o.back()->get(10, 25); | |
361 | o.back()->get(15, 20); | |
362 | } | |
363 | ||
364 | ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& m) | |
365 | { | |
366 | out << "ref_map("; | |
367 | for (auto p = m.ref_map.begin(); p != m.ref_map.end(); ++p) { | |
368 | if (p != m.ref_map.begin()) | |
369 | out << ","; | |
370 | out << std::hex << "0x" << p->first << "~" << p->second.length << std::dec | |
371 | << "=" << p->second.refs; | |
372 | } | |
373 | out << ")"; | |
374 | return out; | |
375 | } | |
376 | ||
377 | // bluestore_blob_use_tracker_t | |
378 | ||
379 | void bluestore_blob_use_tracker_t::allocate() | |
380 | { | |
381 | assert(num_au != 0); | |
382 | bytes_per_au = new uint32_t[num_au]; | |
383 | for (uint32_t i = 0; i < num_au; ++i) { | |
384 | bytes_per_au[i] = 0; | |
385 | } | |
386 | } | |
387 | ||
388 | void bluestore_blob_use_tracker_t::init( | |
389 | uint32_t full_length, uint32_t _au_size) { | |
390 | assert(!au_size || is_empty()); | |
391 | assert(_au_size > 0); | |
392 | assert(full_length > 0); | |
393 | clear(); | |
394 | uint32_t _num_au = ROUND_UP_TO(full_length, _au_size) / _au_size; | |
395 | au_size = _au_size; | |
396 | if( _num_au > 1 ) { | |
397 | num_au = _num_au; | |
398 | allocate(); | |
399 | } | |
400 | } | |
401 | ||
402 | void bluestore_blob_use_tracker_t::get( | |
403 | uint32_t offset, uint32_t length) | |
404 | { | |
405 | assert(au_size); | |
406 | if (!num_au) { | |
407 | total_bytes += length; | |
408 | }else { | |
409 | auto end = offset + length; | |
410 | ||
411 | while (offset < end) { | |
412 | auto phase = offset % au_size; | |
413 | bytes_per_au[offset / au_size] += | |
414 | MIN(au_size - phase, end - offset); | |
415 | offset += (phase ? au_size - phase : au_size); | |
416 | } | |
417 | } | |
418 | } | |
419 | ||
420 | bool bluestore_blob_use_tracker_t::put( | |
421 | uint32_t offset, uint32_t length, | |
422 | PExtentVector *release_units) | |
423 | { | |
424 | assert(au_size); | |
425 | if (release_units) { | |
426 | release_units->clear(); | |
427 | } | |
428 | bool maybe_empty = true; | |
429 | if (!num_au) { | |
430 | assert(total_bytes >= length); | |
431 | total_bytes -= length; | |
432 | } else { | |
433 | auto end = offset + length; | |
434 | uint64_t next_offs = 0; | |
435 | while (offset < end) { | |
436 | auto phase = offset % au_size; | |
437 | size_t pos = offset / au_size; | |
438 | auto diff = MIN(au_size - phase, end - offset); | |
439 | assert(diff <= bytes_per_au[pos]); | |
440 | bytes_per_au[pos] -= diff; | |
441 | offset += (phase ? au_size - phase : au_size); | |
442 | if (bytes_per_au[pos] == 0) { | |
443 | if (release_units) { | |
444 | if (release_units->empty() || next_offs != pos * au_size) { | |
445 | release_units->emplace_back(pos * au_size, au_size); | |
446 | } else { | |
447 | release_units->back().length += au_size; | |
448 | } | |
449 | next_offs += au_size; | |
450 | } | |
451 | } else { | |
452 | maybe_empty = false; // micro optimization detecting we aren't empty | |
453 | // even in the affected extent | |
454 | } | |
455 | } | |
456 | } | |
457 | bool empty = maybe_empty ? !is_not_empty() : false; | |
458 | if (empty && release_units) { | |
459 | release_units->clear(); | |
460 | } | |
461 | return empty; | |
462 | } | |
463 | ||
464 | bool bluestore_blob_use_tracker_t::can_split() const | |
465 | { | |
466 | return num_au > 0; | |
467 | } | |
468 | ||
469 | bool bluestore_blob_use_tracker_t::can_split_at(uint32_t blob_offset) const | |
470 | { | |
471 | assert(au_size); | |
472 | return (blob_offset % au_size) == 0 && | |
473 | blob_offset < num_au * au_size; | |
474 | } | |
475 | ||
476 | void bluestore_blob_use_tracker_t::split( | |
477 | uint32_t blob_offset, | |
478 | bluestore_blob_use_tracker_t* r) | |
479 | { | |
480 | assert(au_size); | |
481 | assert(can_split()); | |
482 | assert(can_split_at(blob_offset)); | |
483 | assert(r->is_empty()); | |
484 | ||
485 | uint32_t new_num_au = blob_offset / au_size; | |
486 | r->init( (num_au - new_num_au) * au_size, au_size); | |
487 | ||
488 | for (auto i = new_num_au; i < num_au; i++) { | |
489 | r->get((i - new_num_au) * au_size, bytes_per_au[i]); | |
490 | bytes_per_au[i] = 0; | |
491 | } | |
492 | if (new_num_au == 0) { | |
493 | clear(); | |
494 | } else if (new_num_au == 1) { | |
495 | uint32_t tmp = bytes_per_au[0]; | |
496 | uint32_t _au_size = au_size; | |
497 | clear(); | |
498 | au_size = _au_size; | |
499 | total_bytes = tmp; | |
500 | } else { | |
501 | num_au = new_num_au; | |
502 | } | |
503 | } | |
504 | ||
505 | bool bluestore_blob_use_tracker_t::equal( | |
506 | const bluestore_blob_use_tracker_t& other) const | |
507 | { | |
508 | if (!num_au && !other.num_au) { | |
509 | return total_bytes == other.total_bytes && au_size == other.au_size; | |
510 | } else if (num_au && other.num_au) { | |
511 | if (num_au != other.num_au || au_size != other.au_size) { | |
512 | return false; | |
513 | } | |
514 | for (size_t i = 0; i < num_au; i++) { | |
515 | if (bytes_per_au[i] != other.bytes_per_au[i]) { | |
516 | return false; | |
517 | } | |
518 | } | |
519 | return true; | |
520 | } | |
521 | ||
522 | uint32_t n = num_au ? num_au : other.num_au; | |
523 | uint32_t referenced = | |
524 | num_au ? other.get_referenced_bytes() : get_referenced_bytes(); | |
525 | auto bytes_per_au_tmp = num_au ? bytes_per_au : other.bytes_per_au; | |
526 | uint32_t my_referenced = 0; | |
527 | for (size_t i = 0; i < n; i++) { | |
528 | my_referenced += bytes_per_au_tmp[i]; | |
529 | if (my_referenced > referenced) { | |
530 | return false; | |
531 | } | |
532 | } | |
533 | return my_referenced == referenced; | |
534 | } | |
535 | ||
536 | void bluestore_blob_use_tracker_t::dump(Formatter *f) const | |
537 | { | |
538 | f->dump_unsigned("num_au", num_au); | |
539 | f->dump_unsigned("au_size", au_size); | |
540 | if (!num_au) { | |
541 | f->dump_unsigned("total_bytes", total_bytes); | |
542 | } else { | |
543 | f->open_array_section("bytes_per_au"); | |
544 | for (size_t i = 0; i < num_au; ++i) { | |
545 | f->dump_unsigned("", bytes_per_au[i]); | |
546 | } | |
547 | f->close_section(); | |
548 | } | |
549 | } | |
550 | ||
551 | void bluestore_blob_use_tracker_t::generate_test_instances( | |
552 | list<bluestore_blob_use_tracker_t*>& o) | |
553 | { | |
554 | o.push_back(new bluestore_blob_use_tracker_t()); | |
555 | o.back()->init(16, 16); | |
556 | o.back()->get(10, 10); | |
557 | o.back()->get(10, 5); | |
558 | o.push_back(new bluestore_blob_use_tracker_t()); | |
559 | o.back()->init(60, 16); | |
560 | o.back()->get(18, 22); | |
561 | o.back()->get(20, 20); | |
562 | o.back()->get(15, 20); | |
563 | } | |
564 | ||
565 | ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& m) | |
566 | { | |
567 | out << "use_tracker(" << std::hex; | |
568 | if (!m.num_au) { | |
569 | out << "0x" << m.au_size | |
570 | << " " | |
571 | << "0x" << m.total_bytes; | |
572 | } else { | |
573 | out << "0x" << m.num_au | |
574 | << "*0x" << m.au_size | |
575 | << " 0x["; | |
576 | for (size_t i = 0; i < m.num_au; ++i) { | |
577 | if (i != 0) | |
578 | out << ","; | |
579 | out << m.bytes_per_au[i]; | |
580 | } | |
581 | out << "]"; | |
582 | } | |
583 | out << std::dec << ")"; | |
584 | return out; | |
585 | } | |
586 | ||
587 | // bluestore_pextent_t | |
588 | ||
589 | void bluestore_pextent_t::dump(Formatter *f) const | |
590 | { | |
591 | f->dump_unsigned("offset", offset); | |
592 | f->dump_unsigned("length", length); | |
593 | } | |
594 | ||
595 | ostream& operator<<(ostream& out, const bluestore_pextent_t& o) { | |
596 | if (o.is_valid()) | |
597 | return out << "0x" << std::hex << o.offset << "~" << o.length << std::dec; | |
598 | else | |
599 | return out << "!~" << std::hex << o.length << std::dec; | |
600 | } | |
601 | ||
602 | void bluestore_pextent_t::generate_test_instances(list<bluestore_pextent_t*>& ls) | |
603 | { | |
604 | ls.push_back(new bluestore_pextent_t); | |
605 | ls.push_back(new bluestore_pextent_t(1, 2)); | |
606 | } | |
607 | ||
608 | // bluestore_blob_t | |
609 | ||
610 | string bluestore_blob_t::get_flags_string(unsigned flags) | |
611 | { | |
612 | string s; | |
7c673cae FG |
613 | if (flags & FLAG_COMPRESSED) { |
614 | if (s.length()) | |
615 | s += '+'; | |
616 | s += "compressed"; | |
617 | } | |
618 | if (flags & FLAG_CSUM) { | |
619 | if (s.length()) | |
620 | s += '+'; | |
621 | s += "csum"; | |
622 | } | |
623 | if (flags & FLAG_HAS_UNUSED) { | |
624 | if (s.length()) | |
625 | s += '+'; | |
626 | s += "has_unused"; | |
627 | } | |
628 | if (flags & FLAG_SHARED) { | |
629 | if (s.length()) | |
630 | s += '+'; | |
631 | s += "shared"; | |
632 | } | |
633 | ||
634 | return s; | |
635 | } | |
636 | ||
637 | size_t bluestore_blob_t::get_csum_value_size() const | |
638 | { | |
639 | return Checksummer::get_csum_value_size(csum_type); | |
640 | } | |
641 | ||
642 | void bluestore_blob_t::dump(Formatter *f) const | |
643 | { | |
644 | f->open_array_section("extents"); | |
645 | for (auto& p : extents) { | |
646 | f->dump_object("extent", p); | |
647 | } | |
648 | f->close_section(); | |
649 | f->dump_unsigned("logical_length", logical_length); | |
650 | f->dump_unsigned("compressed_length", compressed_length); | |
651 | f->dump_unsigned("flags", flags); | |
652 | f->dump_unsigned("csum_type", csum_type); | |
653 | f->dump_unsigned("csum_chunk_order", csum_chunk_order); | |
654 | f->open_array_section("csum_data"); | |
655 | size_t n = get_csum_count(); | |
656 | for (unsigned i = 0; i < n; ++i) | |
657 | f->dump_unsigned("csum", get_csum_item(i)); | |
658 | f->close_section(); | |
659 | f->dump_unsigned("unused", unused); | |
660 | } | |
661 | ||
662 | void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls) | |
663 | { | |
664 | ls.push_back(new bluestore_blob_t); | |
665 | ls.push_back(new bluestore_blob_t(0)); | |
666 | ls.push_back(new bluestore_blob_t); | |
667 | ls.back()->allocated_test(bluestore_pextent_t(111, 222)); | |
668 | ls.push_back(new bluestore_blob_t); | |
669 | ls.back()->init_csum(Checksummer::CSUM_XXHASH32, 16, 65536); | |
670 | ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd")); | |
671 | ls.back()->add_unused(0, 3); | |
672 | ls.back()->add_unused(8, 8); | |
673 | ls.back()->allocated_test(bluestore_pextent_t(0x40100000, 0x10000)); | |
674 | ls.back()->allocated_test( | |
675 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000)); | |
676 | ls.back()->allocated_test(bluestore_pextent_t(0x40120000, 0x10000)); | |
677 | } | |
678 | ||
679 | ostream& operator<<(ostream& out, const bluestore_blob_t& o) | |
680 | { | |
681 | out << "blob(" << o.get_extents(); | |
682 | if (o.is_compressed()) { | |
683 | out << " clen 0x" << std::hex | |
684 | << o.get_logical_length() | |
685 | << " -> 0x" | |
686 | << o.get_compressed_payload_length() | |
687 | << std::dec; | |
688 | } | |
689 | if (o.flags) { | |
690 | out << " " << o.get_flags_string(); | |
691 | } | |
181888fb | 692 | if (o.has_csum()) { |
7c673cae FG |
693 | out << " " << Checksummer::get_csum_type_string(o.csum_type) |
694 | << "/0x" << std::hex << (1ull << o.csum_chunk_order) << std::dec; | |
695 | } | |
696 | if (o.has_unused()) | |
697 | out << " unused=0x" << std::hex << o.unused << std::dec; | |
698 | out << ")"; | |
699 | return out; | |
700 | } | |
701 | ||
702 | void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl) | |
703 | { | |
704 | switch (csum_type) { | |
705 | case Checksummer::CSUM_XXHASH32: | |
706 | Checksummer::calculate<Checksummer::xxhash32>( | |
707 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
708 | break; | |
709 | case Checksummer::CSUM_XXHASH64: | |
710 | Checksummer::calculate<Checksummer::xxhash64>( | |
711 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
712 | break;; | |
713 | case Checksummer::CSUM_CRC32C: | |
714 | Checksummer::calculate<Checksummer::crc32c>( | |
715 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
716 | break; | |
717 | case Checksummer::CSUM_CRC32C_16: | |
718 | Checksummer::calculate<Checksummer::crc32c_16>( | |
719 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
720 | break; | |
721 | case Checksummer::CSUM_CRC32C_8: | |
722 | Checksummer::calculate<Checksummer::crc32c_8>( | |
723 | get_csum_chunk_size(), b_off, bl.length(), bl, &csum_data); | |
724 | break; | |
725 | } | |
726 | } | |
727 | ||
728 | int bluestore_blob_t::verify_csum(uint64_t b_off, const bufferlist& bl, | |
729 | int* b_bad_off, uint64_t *bad_csum) const | |
730 | { | |
731 | int r = 0; | |
732 | ||
733 | *b_bad_off = -1; | |
734 | switch (csum_type) { | |
735 | case Checksummer::CSUM_NONE: | |
736 | break; | |
737 | case Checksummer::CSUM_XXHASH32: | |
738 | *b_bad_off = Checksummer::verify<Checksummer::xxhash32>( | |
739 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
740 | break; | |
741 | case Checksummer::CSUM_XXHASH64: | |
742 | *b_bad_off = Checksummer::verify<Checksummer::xxhash64>( | |
743 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
744 | break; | |
745 | case Checksummer::CSUM_CRC32C: | |
746 | *b_bad_off = Checksummer::verify<Checksummer::crc32c>( | |
747 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
748 | break; | |
749 | case Checksummer::CSUM_CRC32C_16: | |
750 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_16>( | |
751 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
752 | break; | |
753 | case Checksummer::CSUM_CRC32C_8: | |
754 | *b_bad_off = Checksummer::verify<Checksummer::crc32c_8>( | |
755 | get_csum_chunk_size(), b_off, bl.length(), bl, csum_data, bad_csum); | |
756 | break; | |
757 | default: | |
758 | r = -EOPNOTSUPP; | |
759 | break; | |
760 | } | |
761 | ||
762 | if (r < 0) | |
763 | return r; | |
764 | else if (*b_bad_off >= 0) | |
765 | return -1; // bad checksum | |
766 | else | |
767 | return 0; | |
768 | } | |
769 | ||
770 | void bluestore_blob_t::allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs) | |
771 | { | |
772 | if (extents.size() == 0) { | |
773 | // if blob is compressed then logical length to be already configured | |
774 | // otherwise - to be unset. | |
775 | assert((is_compressed() && logical_length != 0) || | |
776 | (!is_compressed() && logical_length == 0)); | |
777 | ||
778 | extents.reserve(allocs.size() + (b_off ? 1 : 0)); | |
779 | if (b_off) { | |
780 | extents.emplace_back( | |
781 | bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, b_off)); | |
782 | } | |
783 | uint32_t new_len = b_off; | |
784 | for (auto& a : allocs) { | |
785 | extents.emplace_back(a.offset, a.length); | |
786 | new_len += a.length; | |
787 | } | |
788 | if (!is_compressed()) { | |
789 | logical_length = new_len; | |
790 | } | |
791 | } else { | |
792 | assert(!is_compressed()); // partial allocations are forbidden when | |
793 | // compressed | |
794 | assert(b_off < logical_length); | |
795 | uint32_t cur_offs = 0; | |
796 | auto start_it = extents.begin(); | |
797 | size_t pos = 0; | |
798 | while(true) { | |
31f18b77 | 799 | assert(start_it != extents.end()); |
7c673cae FG |
800 | if (cur_offs + start_it->length > b_off) { |
801 | break; | |
802 | } | |
803 | cur_offs += start_it->length; | |
804 | ++start_it; | |
805 | ++pos; | |
806 | } | |
807 | uint32_t head = b_off - cur_offs; | |
808 | uint32_t end_off = b_off + length; | |
809 | auto end_it = start_it; | |
810 | ||
811 | while (true) { | |
31f18b77 | 812 | assert(end_it != extents.end()); |
7c673cae FG |
813 | assert(!end_it->is_valid()); |
814 | if (cur_offs + end_it->length >= end_off) { | |
815 | break; | |
816 | } | |
817 | cur_offs += end_it->length; | |
818 | ++end_it; | |
819 | } | |
820 | assert(cur_offs + end_it->length >= end_off); | |
821 | uint32_t tail = cur_offs + end_it->length - end_off; | |
822 | ||
823 | start_it = extents.erase(start_it, end_it + 1); | |
824 | size_t count = allocs.size(); | |
825 | count += head ? 1 : 0; | |
826 | count += tail ? 1 : 0; | |
827 | extents.insert(start_it, | |
828 | count, | |
829 | bluestore_pextent_t( | |
830 | bluestore_pextent_t::INVALID_OFFSET, 0)); | |
831 | ||
832 | // Workaround to resolve lack of proper iterator return in vector::insert | |
833 | // Looks like some gcc/stl implementations still lack it despite c++11 | |
834 | // support claim | |
835 | start_it = extents.begin() + pos; | |
836 | ||
837 | if (head) { | |
838 | start_it->length = head; | |
839 | ++start_it; | |
840 | } | |
841 | for(auto& e : allocs) { | |
842 | *start_it = e; | |
843 | ++start_it; | |
844 | } | |
845 | if (tail) { | |
846 | start_it->length = tail; | |
847 | } | |
848 | } | |
849 | } | |
850 | ||
851 | // cut it out of extents | |
852 | struct vecbuilder { | |
853 | PExtentVector v; | |
854 | uint64_t invalid = 0; | |
855 | ||
856 | void add_invalid(uint64_t length) { | |
857 | invalid += length; | |
858 | } | |
859 | void flush() { | |
860 | if (invalid) { | |
861 | v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, | |
862 | invalid)); | |
863 | invalid = 0; | |
864 | } | |
865 | } | |
866 | void add(uint64_t offset, uint64_t length) { | |
867 | if (offset == bluestore_pextent_t::INVALID_OFFSET) { | |
868 | add_invalid(length); | |
869 | } | |
870 | else { | |
871 | flush(); | |
872 | v.emplace_back(bluestore_pextent_t(offset, length)); | |
873 | } | |
874 | } | |
875 | }; | |
876 | ||
877 | void bluestore_blob_t::allocated_test(const bluestore_pextent_t& alloc) | |
878 | { | |
879 | extents.emplace_back(alloc); | |
880 | if (!is_compressed()) { | |
881 | logical_length += alloc.length; | |
882 | } | |
883 | } | |
884 | ||
885 | bool bluestore_blob_t::release_extents(bool all, | |
886 | const PExtentVector& logical, | |
887 | PExtentVector* r) | |
888 | { | |
889 | // common case: all of it? | |
890 | if (all) { | |
891 | uint64_t pos = 0; | |
892 | for (auto& e : extents) { | |
893 | if (e.is_valid()) { | |
894 | r->push_back(e); | |
895 | } | |
896 | pos += e.length; | |
897 | } | |
898 | assert(is_compressed() || get_logical_length() == pos); | |
899 | extents.resize(1); | |
900 | extents[0].offset = bluestore_pextent_t::INVALID_OFFSET; | |
901 | extents[0].length = pos; | |
902 | return true; | |
903 | } | |
904 | // remove from pextents according to logical release list | |
905 | vecbuilder vb; | |
906 | auto loffs_it = logical.begin(); | |
907 | auto lend = logical.end(); | |
908 | uint32_t pext_loffs_start = 0; //starting loffset of the current pextent | |
909 | uint32_t pext_loffs = 0; //current loffset | |
910 | auto pext_it = extents.begin(); | |
911 | auto pext_end = extents.end(); | |
912 | while (pext_it != pext_end) { | |
913 | if (loffs_it == lend || | |
914 | pext_loffs_start + pext_it->length <= loffs_it->offset) { | |
915 | int delta0 = pext_loffs - pext_loffs_start; | |
916 | assert(delta0 >= 0); | |
917 | if ((uint32_t)delta0 < pext_it->length) { | |
918 | vb.add(pext_it->offset + delta0, pext_it->length - delta0); | |
919 | } | |
920 | pext_loffs_start += pext_it->length; | |
921 | pext_loffs = pext_loffs_start; | |
922 | ++pext_it; | |
923 | } | |
924 | else { | |
925 | //assert(pext_loffs == pext_loffs_start); | |
926 | int delta0 = pext_loffs - pext_loffs_start; | |
927 | assert(delta0 >= 0); | |
928 | ||
929 | int delta = loffs_it->offset - pext_loffs; | |
930 | assert(delta >= 0); | |
931 | if (delta > 0) { | |
932 | vb.add(pext_it->offset + delta0, delta); | |
933 | pext_loffs += delta; | |
934 | } | |
935 | ||
936 | PExtentVector::iterator last_r = r->end(); | |
937 | if (r->begin() != last_r) { | |
938 | --last_r; | |
939 | } | |
940 | uint32_t to_release = loffs_it->length; | |
941 | do { | |
942 | uint32_t to_release_part = | |
943 | MIN(pext_it->length - delta0 - delta, to_release); | |
944 | auto o = pext_it->offset + delta0 + delta; | |
945 | if (last_r != r->end() && last_r->offset + last_r->length == o) { | |
946 | last_r->length += to_release_part; | |
947 | } | |
948 | else { | |
949 | last_r = r->emplace(r->end(), o, to_release_part); | |
950 | } | |
951 | to_release -= to_release_part; | |
952 | pext_loffs += to_release_part; | |
953 | if (pext_loffs == pext_loffs_start + pext_it->length) { | |
954 | pext_loffs_start += pext_it->length; | |
955 | pext_loffs = pext_loffs_start; | |
956 | pext_it++; | |
957 | delta0 = delta = 0; | |
958 | } | |
959 | } while (to_release > 0 && pext_it != pext_end); | |
960 | vb.add_invalid(loffs_it->length - to_release); | |
961 | ++loffs_it; | |
962 | } | |
963 | } | |
964 | vb.flush(); | |
965 | extents.swap(vb.v); | |
966 | return false; | |
967 | } | |
968 | ||
969 | void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb) | |
970 | { | |
971 | size_t left = blob_offset; | |
972 | uint32_t llen_lb = 0; | |
973 | uint32_t llen_rb = 0; | |
974 | unsigned i = 0; | |
975 | for (auto p = extents.begin(); p != extents.end(); ++p, ++i) { | |
976 | if (p->length <= left) { | |
977 | left -= p->length; | |
978 | llen_lb += p->length; | |
979 | continue; | |
980 | } | |
981 | if (left) { | |
982 | if (p->is_valid()) { | |
983 | rb.extents.emplace_back(bluestore_pextent_t(p->offset + left, | |
984 | p->length - left)); | |
985 | } | |
986 | else { | |
987 | rb.extents.emplace_back(bluestore_pextent_t( | |
988 | bluestore_pextent_t::INVALID_OFFSET, | |
989 | p->length - left)); | |
990 | } | |
991 | llen_rb += p->length - left; | |
992 | llen_lb += left; | |
993 | p->length = left; | |
994 | ++i; | |
995 | ++p; | |
996 | } | |
997 | while (p != extents.end()) { | |
998 | llen_rb += p->length; | |
999 | rb.extents.push_back(*p++); | |
1000 | } | |
1001 | extents.resize(i); | |
1002 | logical_length = llen_lb; | |
1003 | rb.logical_length = llen_rb; | |
1004 | break; | |
1005 | } | |
1006 | rb.flags = flags; | |
1007 | ||
1008 | if (has_csum()) { | |
1009 | rb.csum_type = csum_type; | |
1010 | rb.csum_chunk_order = csum_chunk_order; | |
1011 | size_t csum_order = get_csum_chunk_size(); | |
1012 | assert(blob_offset % csum_order == 0); | |
1013 | size_t pos = (blob_offset / csum_order) * get_csum_value_size(); | |
1014 | // deep copy csum data | |
1015 | bufferptr old; | |
1016 | old.swap(csum_data); | |
1017 | rb.csum_data = bufferptr(old.c_str() + pos, old.length() - pos); | |
1018 | csum_data = bufferptr(old.c_str(), pos); | |
1019 | } | |
1020 | } | |
1021 | ||
1022 | // bluestore_shared_blob_t | |
1023 | ||
1024 | void bluestore_shared_blob_t::dump(Formatter *f) const | |
1025 | { | |
1026 | f->dump_int("sbid", sbid); | |
1027 | f->dump_object("ref_map", ref_map); | |
1028 | } | |
1029 | ||
1030 | void bluestore_shared_blob_t::generate_test_instances( | |
1031 | list<bluestore_shared_blob_t*>& ls) | |
1032 | { | |
1033 | ls.push_back(new bluestore_shared_blob_t(1)); | |
1034 | } | |
1035 | ||
1036 | ostream& operator<<(ostream& out, const bluestore_shared_blob_t& sb) | |
1037 | { | |
31f18b77 FG |
1038 | out << "(sbid 0x" << std::hex << sb.sbid << std::dec; |
1039 | out << " " << sb.ref_map << ")"; | |
7c673cae FG |
1040 | return out; |
1041 | } | |
1042 | ||
1043 | // bluestore_onode_t | |
1044 | ||
1045 | void bluestore_onode_t::shard_info::dump(Formatter *f) const | |
1046 | { | |
1047 | f->dump_unsigned("offset", offset); | |
1048 | f->dump_unsigned("bytes", bytes); | |
1049 | } | |
1050 | ||
1051 | ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si) | |
1052 | { | |
1053 | return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes" | |
1054 | << std::dec << ")"; | |
1055 | } | |
1056 | ||
1057 | void bluestore_onode_t::dump(Formatter *f) const | |
1058 | { | |
1059 | f->dump_unsigned("nid", nid); | |
1060 | f->dump_unsigned("size", size); | |
1061 | f->open_object_section("attrs"); | |
1062 | for (auto p = attrs.begin(); p != attrs.end(); ++p) { | |
1063 | f->open_object_section("attr"); | |
1064 | f->dump_string("name", p->first.c_str()); // it's not quite std::string | |
1065 | f->dump_unsigned("len", p->second.length()); | |
1066 | f->close_section(); | |
1067 | } | |
1068 | f->close_section(); | |
1069 | f->dump_string("flags", get_flags_string()); | |
1070 | f->open_array_section("extent_map_shards"); | |
1071 | for (auto si : extent_map_shards) { | |
1072 | f->dump_object("shard", si); | |
1073 | } | |
1074 | f->close_section(); | |
1075 | f->dump_unsigned("expected_object_size", expected_object_size); | |
1076 | f->dump_unsigned("expected_write_size", expected_write_size); | |
1077 | f->dump_unsigned("alloc_hint_flags", alloc_hint_flags); | |
1078 | } | |
1079 | ||
1080 | void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o) | |
1081 | { | |
1082 | o.push_back(new bluestore_onode_t()); | |
1083 | // FIXME | |
1084 | } | |
1085 | ||
1086 | // bluestore_deferred_op_t | |
1087 | ||
1088 | void bluestore_deferred_op_t::dump(Formatter *f) const | |
1089 | { | |
1090 | f->dump_unsigned("op", (int)op); | |
1091 | f->dump_unsigned("data_len", data.length()); | |
1092 | f->open_array_section("extents"); | |
1093 | for (auto& e : extents) { | |
1094 | f->dump_object("extent", e); | |
1095 | } | |
1096 | f->close_section(); | |
1097 | } | |
1098 | ||
1099 | void bluestore_deferred_op_t::generate_test_instances(list<bluestore_deferred_op_t*>& o) | |
1100 | { | |
1101 | o.push_back(new bluestore_deferred_op_t); | |
1102 | o.push_back(new bluestore_deferred_op_t); | |
1103 | o.back()->op = OP_WRITE; | |
1104 | o.back()->extents.push_back(bluestore_pextent_t(1, 2)); | |
1105 | o.back()->extents.push_back(bluestore_pextent_t(100, 5)); | |
1106 | o.back()->data.append("my data"); | |
1107 | } | |
1108 | ||
1109 | void bluestore_deferred_transaction_t::dump(Formatter *f) const | |
1110 | { | |
1111 | f->dump_unsigned("seq", seq); | |
1112 | f->open_array_section("ops"); | |
1113 | for (list<bluestore_deferred_op_t>::const_iterator p = ops.begin(); p != ops.end(); ++p) { | |
1114 | f->dump_object("op", *p); | |
1115 | } | |
1116 | f->close_section(); | |
1117 | ||
1118 | f->open_array_section("released extents"); | |
1119 | for (interval_set<uint64_t>::const_iterator p = released.begin(); p != released.end(); ++p) { | |
1120 | f->open_object_section("extent"); | |
1121 | f->dump_unsigned("offset", p.get_start()); | |
1122 | f->dump_unsigned("length", p.get_len()); | |
1123 | f->close_section(); | |
1124 | } | |
1125 | f->close_section(); | |
1126 | } | |
1127 | ||
1128 | void bluestore_deferred_transaction_t::generate_test_instances(list<bluestore_deferred_transaction_t*>& o) | |
1129 | { | |
1130 | o.push_back(new bluestore_deferred_transaction_t()); | |
1131 | o.push_back(new bluestore_deferred_transaction_t()); | |
1132 | o.back()->seq = 123; | |
1133 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1134 | o.back()->ops.push_back(bluestore_deferred_op_t()); | |
1135 | o.back()->ops.back().op = bluestore_deferred_op_t::OP_WRITE; | |
1136 | o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7)); | |
1137 | o.back()->ops.back().data.append("foodata"); | |
1138 | } | |
1139 | ||
1140 | void bluestore_compression_header_t::dump(Formatter *f) const | |
1141 | { | |
1142 | f->dump_unsigned("type", type); | |
1143 | f->dump_unsigned("length", length); | |
1144 | } | |
1145 | ||
1146 | void bluestore_compression_header_t::generate_test_instances( | |
1147 | list<bluestore_compression_header_t*>& o) | |
1148 | { | |
1149 | o.push_back(new bluestore_compression_header_t); | |
1150 | o.push_back(new bluestore_compression_header_t(1)); | |
1151 | o.back()->length = 1234; | |
1152 | } |