]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/bluestore_types.h
update sources to 12.2.7
[ceph.git] / ceph / src / os / bluestore / bluestore_types.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
16 #define CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
17
18 #include <ostream>
19 #include <bitset>
20 #include <type_traits>
21 #include "include/types.h"
22 #include "include/interval_set.h"
23 #include "include/utime.h"
24 #include "common/hobject.h"
25 #include "compressor/Compressor.h"
26 #include "common/Checksummer.h"
27 #include "include/mempool.h"
28
29 namespace ceph {
30 class Formatter;
31 }
32
33 /// label for block device
34 struct bluestore_bdev_label_t {
35 uuid_d osd_uuid; ///< osd uuid
36 uint64_t size; ///< device size
37 utime_t btime; ///< birth time
38 string description; ///< device description
39
40 map<string,string> meta; ///< {read,write}_meta() content from ObjectStore
41
42 void encode(bufferlist& bl) const;
43 void decode(bufferlist::iterator& p);
44 void dump(Formatter *f) const;
45 static void generate_test_instances(list<bluestore_bdev_label_t*>& o);
46 };
47 WRITE_CLASS_ENCODER(bluestore_bdev_label_t)
48
49 ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l);
50
51 /// collection metadata
52 struct bluestore_cnode_t {
53 uint32_t bits; ///< how many bits of coll pgid are significant
54
55 explicit bluestore_cnode_t(int b=0) : bits(b) {}
56
57 DENC(bluestore_cnode_t, v, p) {
58 DENC_START(1, 1, p);
59 denc(v.bits, p);
60 DENC_FINISH(p);
61 }
62 void dump(Formatter *f) const;
63 static void generate_test_instances(list<bluestore_cnode_t*>& o);
64 };
65 WRITE_CLASS_DENC(bluestore_cnode_t)
66
67 ostream& operator<<(ostream& out, const bluestore_cnode_t& l);
68
69 class AllocExtent;
70 typedef mempool::bluestore_alloc::vector<AllocExtent> AllocExtentVector;
71 class AllocExtent {
72 public:
73 uint64_t offset;
74 uint32_t length;
75
76 AllocExtent() {
77 offset = 0;
78 length = 0;
79 }
80
81 AllocExtent(int64_t off, int32_t len) : offset(off), length(len) { }
82 uint64_t end() const {
83 return offset + length;
84 }
85 bool operator==(const AllocExtent& other) const {
86 return offset == other.offset && length == other.length;
87 }
88 };
89
90 inline static ostream& operator<<(ostream& out, const AllocExtent& e) {
91 return out << "0x" << std::hex << e.offset << "~" << e.length << std::dec;
92 }
93
94 class ExtentList {
95 AllocExtentVector *m_extents;
96 int64_t m_block_size;
97 int64_t m_max_blocks;
98
99 public:
100 void init(AllocExtentVector *extents, int64_t block_size,
101 uint64_t max_alloc_size) {
102 m_extents = extents;
103 m_block_size = block_size;
104 m_max_blocks = max_alloc_size / block_size;
105 assert(m_extents->empty());
106 }
107
108 ExtentList(AllocExtentVector *extents, int64_t block_size) {
109 init(extents, block_size, 0);
110 }
111
112 ExtentList(AllocExtentVector *extents, int64_t block_size,
113 uint64_t max_alloc_size) {
114 init(extents, block_size, max_alloc_size);
115 }
116
117 void reset() {
118 m_extents->clear();
119 }
120
121 void add_extents(int64_t start, int64_t count);
122
123 AllocExtentVector *get_extents() {
124 return m_extents;
125 }
126
127 std::pair<int64_t, int64_t> get_nth_extent(int index) {
128 return std::make_pair
129 ((*m_extents)[index].offset / m_block_size,
130 (*m_extents)[index].length / m_block_size);
131 }
132
133 int64_t get_extent_count() {
134 return m_extents->size();
135 }
136 };
137
138
139 /// pextent: physical extent
140 struct bluestore_pextent_t : public AllocExtent {
141 const static uint64_t INVALID_OFFSET = ~0ull;
142
143 bluestore_pextent_t() : AllocExtent() {}
144 bluestore_pextent_t(uint64_t o, uint64_t l) : AllocExtent(o, l) {}
145 bluestore_pextent_t(const AllocExtent &ext) :
146 AllocExtent(ext.offset, ext.length) { }
147
148 bluestore_pextent_t& operator=(const AllocExtent &ext) {
149 offset = ext.offset;
150 length = ext.length;
151 return *this;
152 }
153 bool is_valid() const {
154 return offset != INVALID_OFFSET;
155 }
156
157 DENC(bluestore_pextent_t, v, p) {
158 denc_lba(v.offset, p);
159 denc_varint_lowz(v.length, p);
160 }
161
162 void dump(Formatter *f) const;
163 static void generate_test_instances(list<bluestore_pextent_t*>& ls);
164 };
165 WRITE_CLASS_DENC(bluestore_pextent_t)
166
167 ostream& operator<<(ostream& out, const bluestore_pextent_t& o);
168
169 typedef mempool::bluestore_cache_other::vector<bluestore_pextent_t> PExtentVector;
170
171 template<>
172 struct denc_traits<PExtentVector> {
173 static constexpr bool supported = true;
174 static constexpr bool bounded = false;
175 static constexpr bool featured = false;
176 static constexpr bool need_contiguous = true;
177 static void bound_encode(const PExtentVector& v, size_t& p) {
178 p += sizeof(uint32_t);
179 const auto size = v.size();
180 if (size) {
181 size_t per = 0;
182 denc(v.front(), per);
183 p += per * size;
184 }
185 }
186 static void encode(const PExtentVector& v,
187 bufferlist::contiguous_appender& p) {
188 denc_varint(v.size(), p);
189 for (auto& i : v) {
190 denc(i, p);
191 }
192 }
193 static void decode(PExtentVector& v, bufferptr::iterator& p) {
194 unsigned num;
195 denc_varint(num, p);
196 v.clear();
197 v.resize(num);
198 for (unsigned i=0; i<num; ++i) {
199 denc(v[i], p);
200 }
201 }
202 };
203
204
205 /// extent_map: a map of reference counted extents
206 struct bluestore_extent_ref_map_t {
207 struct record_t {
208 uint32_t length;
209 uint32_t refs;
210 record_t(uint32_t l=0, uint32_t r=0) : length(l), refs(r) {}
211 DENC(bluestore_extent_ref_map_t::record_t, v, p) {
212 denc_varint_lowz(v.length, p);
213 denc_varint(v.refs, p);
214 }
215 };
216
217 typedef mempool::bluestore_cache_other::map<uint64_t,record_t> map_t;
218 map_t ref_map;
219
220 void _check() const;
221 void _maybe_merge_left(map_t::iterator& p);
222
223 void clear() {
224 ref_map.clear();
225 }
226 bool empty() const {
227 return ref_map.empty();
228 }
229
230 void get(uint64_t offset, uint32_t len);
231 void put(uint64_t offset, uint32_t len, PExtentVector *release,
232 bool *maybe_unshared);
233
234 bool contains(uint64_t offset, uint32_t len) const;
235 bool intersects(uint64_t offset, uint32_t len) const;
236
237 void bound_encode(size_t& p) const {
238 denc_varint((uint32_t)0, p);
239 if (!ref_map.empty()) {
240 size_t elem_size = 0;
241 denc_varint_lowz((uint64_t)0, elem_size);
242 ref_map.begin()->second.bound_encode(elem_size);
243 p += elem_size * ref_map.size();
244 }
245 }
246 void encode(bufferlist::contiguous_appender& p) const {
247 uint32_t n = ref_map.size();
248 denc_varint(n, p);
249 if (n) {
250 auto i = ref_map.begin();
251 denc_varint_lowz(i->first, p);
252 i->second.encode(p);
253 int64_t pos = i->first;
254 while (--n) {
255 ++i;
256 denc_varint_lowz((int64_t)i->first - pos, p);
257 i->second.encode(p);
258 pos = i->first;
259 }
260 }
261 }
262 void decode(bufferptr::iterator& p) {
263 uint32_t n;
264 denc_varint(n, p);
265 if (n) {
266 int64_t pos;
267 denc_varint_lowz(pos, p);
268 ref_map[pos].decode(p);
269 while (--n) {
270 int64_t delta;
271 denc_varint_lowz(delta, p);
272 pos += delta;
273 ref_map[pos].decode(p);
274 }
275 }
276 }
277
278 void dump(Formatter *f) const;
279 static void generate_test_instances(list<bluestore_extent_ref_map_t*>& o);
280 };
281 WRITE_CLASS_DENC(bluestore_extent_ref_map_t)
282
283
284 ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& rm);
285 static inline bool operator==(const bluestore_extent_ref_map_t::record_t& l,
286 const bluestore_extent_ref_map_t::record_t& r) {
287 return l.length == r.length && l.refs == r.refs;
288 }
289 static inline bool operator==(const bluestore_extent_ref_map_t& l,
290 const bluestore_extent_ref_map_t& r) {
291 return l.ref_map == r.ref_map;
292 }
293 static inline bool operator!=(const bluestore_extent_ref_map_t& l,
294 const bluestore_extent_ref_map_t& r) {
295 return !(l == r);
296 }
297
298 /// blob_use_tracker: a set of per-alloc unit ref counters to track blob usage
299 struct bluestore_blob_use_tracker_t {
300 // N.B.: There is no need to minimize au_size/num_au
301 // as much as possible (e.g. have just a single byte for au_size) since:
302 // 1) Struct isn't packed hence it's padded. And even if it's packed see 2)
303 // 2) Mem manager has its own granularity, most probably >= 8 bytes
304 //
305 uint32_t au_size; // Allocation (=tracking) unit size,
306 // == 0 if uninitialized
307 uint32_t num_au; // Amount of allocation units tracked
308 // == 0 if single unit or the whole blob is tracked
309
310 union {
311 uint32_t* bytes_per_au;
312 uint32_t total_bytes;
313 };
314
315 bluestore_blob_use_tracker_t()
316 : au_size(0), num_au(0), bytes_per_au(nullptr) {
317 }
318 ~bluestore_blob_use_tracker_t() {
319 clear();
320 }
321
322 void clear() {
323 if (num_au != 0) {
324 delete[] bytes_per_au;
325 }
326 bytes_per_au = 0;
327 au_size = 0;
328 num_au = 0;
329 }
330
331 uint32_t get_referenced_bytes() const {
332 uint32_t total = 0;
333 if (!num_au) {
334 total = total_bytes;
335 } else {
336 for (size_t i = 0; i < num_au; ++i) {
337 total += bytes_per_au[i];
338 }
339 }
340 return total;
341 }
342 bool is_not_empty() const {
343 if (!num_au) {
344 return total_bytes != 0;
345 } else {
346 for (size_t i = 0; i < num_au; ++i) {
347 if (bytes_per_au[i]) {
348 return true;
349 }
350 }
351 }
352 return false;
353 }
354 bool is_empty() const {
355 return !is_not_empty();
356 }
357 void prune_tail(uint32_t new_len) {
358 if (num_au) {
359 new_len = ROUND_UP_TO(new_len, au_size);
360 uint32_t _num_au = new_len / au_size;
361 assert(_num_au <= num_au);
362 if (_num_au) {
363 num_au = _num_au; // bytes_per_au array is left unmodified
364
365 } else {
366 clear();
367 }
368 }
369 }
370 void add_tail(uint32_t new_len, uint32_t _au_size) {
371 auto full_size = au_size * (num_au ? num_au : 1);
372 assert(new_len >= full_size);
373 if (new_len == full_size) {
374 return;
375 }
376 if (!num_au) {
377 uint32_t old_total = total_bytes;
378 total_bytes = 0;
379 init(new_len, _au_size);
380 assert(num_au);
381 bytes_per_au[0] = old_total;
382 } else {
383 assert(_au_size == au_size);
384 new_len = ROUND_UP_TO(new_len, au_size);
385 uint32_t _num_au = new_len / au_size;
386 assert(_num_au >= num_au);
387 if (_num_au > num_au) {
388 auto old_bytes = bytes_per_au;
389 auto old_num_au = num_au;
390 num_au = _num_au;
391 allocate();
392 for (size_t i = 0; i < old_num_au; i++) {
393 bytes_per_au[i] = old_bytes[i];
394 }
395 for (size_t i = old_num_au; i < num_au; i++) {
396 bytes_per_au[i] = 0;
397 }
398 delete[] old_bytes;
399 }
400 }
401 }
402
403 void init(
404 uint32_t full_length,
405 uint32_t _au_size);
406
407 void get(
408 uint32_t offset,
409 uint32_t len);
410
411 /// put: return true if the blob has no references any more after the call,
412 /// no release_units is filled for the sake of performance.
413 /// return false if there are some references to the blob,
414 /// in this case release_units contains pextents
415 /// (identified by their offsets relative to the blob start)
416 /// that are not used any more and can be safely deallocated.
417 bool put(
418 uint32_t offset,
419 uint32_t len,
420 PExtentVector *release);
421
422 bool can_split() const;
423 bool can_split_at(uint32_t blob_offset) const;
424 void split(
425 uint32_t blob_offset,
426 bluestore_blob_use_tracker_t* r);
427
428 bool equal(
429 const bluestore_blob_use_tracker_t& other) const;
430
431 void bound_encode(size_t& p) const {
432 denc_varint(au_size, p);
433 if (au_size) {
434 denc_varint(num_au, p);
435 if (!num_au) {
436 denc_varint(total_bytes, p);
437 } else {
438 size_t elem_size = 0;
439 denc_varint((uint32_t)0, elem_size);
440 p += elem_size * num_au;
441 }
442 }
443 }
444 void encode(bufferlist::contiguous_appender& p) const {
445 denc_varint(au_size, p);
446 if (au_size) {
447 denc_varint(num_au, p);
448 if (!num_au) {
449 denc_varint(total_bytes, p);
450 } else {
451 size_t elem_size = 0;
452 denc_varint((uint32_t)0, elem_size);
453 for (size_t i = 0; i < num_au; ++i) {
454 denc_varint(bytes_per_au[i], p);
455 }
456 }
457 }
458 }
459 void decode(bufferptr::iterator& p) {
460 clear();
461 denc_varint(au_size, p);
462 if (au_size) {
463 denc_varint(num_au, p);
464 if (!num_au) {
465 denc_varint(total_bytes, p);
466 } else {
467 allocate();
468 for (size_t i = 0; i < num_au; ++i) {
469 denc_varint(bytes_per_au[i], p);
470 }
471 }
472 }
473 }
474
475 void dump(Formatter *f) const;
476 static void generate_test_instances(list<bluestore_blob_use_tracker_t*>& o);
477 private:
478 void allocate();
479 };
480 WRITE_CLASS_DENC(bluestore_blob_use_tracker_t)
481 ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& rm);
482
483 /// blob: a piece of data on disk
484 struct bluestore_blob_t {
485 private:
486 PExtentVector extents; ///< raw data position on device
487 uint32_t logical_length = 0; ///< original length of data stored in the blob
488 uint32_t compressed_length = 0; ///< compressed length if any
489
490 public:
491 enum {
492 LEGACY_FLAG_MUTABLE = 1, ///< [legacy] blob can be overwritten or split
493 FLAG_COMPRESSED = 2, ///< blob is compressed
494 FLAG_CSUM = 4, ///< blob has checksums
495 FLAG_HAS_UNUSED = 8, ///< blob has unused map
496 FLAG_SHARED = 16, ///< blob is shared; see external SharedBlob
497 };
498 static string get_flags_string(unsigned flags);
499
500 uint32_t flags = 0; ///< FLAG_*
501
502 typedef uint16_t unused_t;
503 unused_t unused = 0; ///< portion that has never been written to (bitmap)
504
505 uint8_t csum_type = Checksummer::CSUM_NONE; ///< CSUM_*
506 uint8_t csum_chunk_order = 0; ///< csum block size is 1<<block_order bytes
507
508 bufferptr csum_data; ///< opaque vector of csum data
509
510 bluestore_blob_t(uint32_t f = 0) : flags(f) {}
511
512 const PExtentVector& get_extents() const {
513 return extents;
514 }
515
516 DENC_HELPERS;
517 void bound_encode(size_t& p, uint64_t struct_v) const {
518 assert(struct_v == 1 || struct_v == 2);
519 denc(extents, p);
520 denc_varint(flags, p);
521 denc_varint_lowz(logical_length, p);
522 denc_varint_lowz(compressed_length, p);
523 denc(csum_type, p);
524 denc(csum_chunk_order, p);
525 denc_varint(csum_data.length(), p);
526 p += csum_data.length();
527 p += sizeof(unused_t);
528 }
529
530 void encode(bufferlist::contiguous_appender& p, uint64_t struct_v) const {
531 assert(struct_v == 1 || struct_v == 2);
532 denc(extents, p);
533 denc_varint(flags, p);
534 if (is_compressed()) {
535 denc_varint_lowz(logical_length, p);
536 denc_varint_lowz(compressed_length, p);
537 }
538 if (has_csum()) {
539 denc(csum_type, p);
540 denc(csum_chunk_order, p);
541 denc_varint(csum_data.length(), p);
542 memcpy(p.get_pos_add(csum_data.length()), csum_data.c_str(),
543 csum_data.length());
544 }
545 if (has_unused()) {
546 denc(unused, p);
547 }
548 }
549
550 void decode(bufferptr::iterator& p, uint64_t struct_v) {
551 assert(struct_v == 1 || struct_v == 2);
552 denc(extents, p);
553 denc_varint(flags, p);
554 if (is_compressed()) {
555 denc_varint_lowz(logical_length, p);
556 denc_varint_lowz(compressed_length, p);
557 } else {
558 logical_length = get_ondisk_length();
559 }
560 if (has_csum()) {
561 denc(csum_type, p);
562 denc(csum_chunk_order, p);
563 int len;
564 denc_varint(len, p);
565 csum_data = p.get_ptr(len);
566 csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
567 }
568 if (has_unused()) {
569 denc(unused, p);
570 }
571 }
572
573 bool can_split() const {
574 return
575 !has_flag(FLAG_SHARED) &&
576 !has_flag(FLAG_COMPRESSED) &&
577 !has_flag(FLAG_HAS_UNUSED); // splitting unused set is complex
578 }
579 bool can_split_at(uint32_t blob_offset) const {
580 return !has_csum() || blob_offset % get_csum_chunk_size() == 0;
581 }
582
583 void dump(Formatter *f) const;
584 static void generate_test_instances(list<bluestore_blob_t*>& ls);
585
586 bool has_flag(unsigned f) const {
587 return flags & f;
588 }
589 void set_flag(unsigned f) {
590 flags |= f;
591 }
592 void clear_flag(unsigned f) {
593 flags &= ~f;
594 }
595 string get_flags_string() const {
596 return get_flags_string(flags);
597 }
598
599 void set_compressed(uint64_t clen_orig, uint64_t clen) {
600 set_flag(FLAG_COMPRESSED);
601 logical_length = clen_orig;
602 compressed_length = clen;
603 }
604 bool is_mutable() const {
605 return !is_compressed() && !is_shared();
606 }
607 bool is_compressed() const {
608 return has_flag(FLAG_COMPRESSED);
609 }
610 bool has_csum() const {
611 return has_flag(FLAG_CSUM);
612 }
613 bool has_unused() const {
614 return has_flag(FLAG_HAS_UNUSED);
615 }
616 bool is_shared() const {
617 return has_flag(FLAG_SHARED);
618 }
619
620 /// return chunk (i.e. min readable block) size for the blob
621 uint64_t get_chunk_size(uint64_t dev_block_size) const {
622 return has_csum() ?
623 MAX(dev_block_size, get_csum_chunk_size()) : dev_block_size;
624 }
625 uint32_t get_csum_chunk_size() const {
626 return 1 << csum_chunk_order;
627 }
628 uint32_t get_compressed_payload_length() const {
629 return is_compressed() ? compressed_length : 0;
630 }
631 uint64_t calc_offset(uint64_t x_off, uint64_t *plen) const {
632 auto p = extents.begin();
633 assert(p != extents.end());
634 while (x_off >= p->length) {
635 x_off -= p->length;
636 ++p;
637 assert(p != extents.end());
638 }
639 if (plen)
640 *plen = p->length - x_off;
641 return p->offset + x_off;
642 }
643
644 // validate whether or not the status of pextents within the given range
645 // meets the requirement(allocated or unallocated).
646 bool _validate_range(uint64_t b_off, uint64_t b_len,
647 bool require_allocated) const {
648 auto p = extents.begin();
649 assert(p != extents.end());
650 while (b_off >= p->length) {
651 b_off -= p->length;
652 ++p;
653 assert(p != extents.end());
654 }
655 b_len += b_off;
656 while (b_len) {
657 assert(p != extents.end());
658 if (require_allocated != p->is_valid()) {
659 return false;
660 }
661
662 if (p->length >= b_len) {
663 return true;
664 }
665 b_len -= p->length;
666 ++p;
667 }
668 assert(0 == "we should not get here");
669 }
670
671 /// return true if the entire range is allocated
672 /// (mapped to extents on disk)
673 bool is_allocated(uint64_t b_off, uint64_t b_len) const {
674 return _validate_range(b_off, b_len, true);
675 }
676
677 /// return true if the entire range is unallocated
678 /// (not mapped to extents on disk)
679 bool is_unallocated(uint64_t b_off, uint64_t b_len) const {
680 return _validate_range(b_off, b_len, false);
681 }
682
683 /// return true if the logical range has never been used
684 bool is_unused(uint64_t offset, uint64_t length) const {
685 if (!has_unused()) {
686 return false;
687 }
688 uint64_t blob_len = get_logical_length();
689 assert((blob_len % (sizeof(unused)*8)) == 0);
690 assert(offset + length <= blob_len);
691 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
692 uint64_t start = offset / chunk_size;
693 uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
694 auto i = start;
695 while (i < end && (unused & (1u << i))) {
696 i++;
697 }
698 return i >= end;
699 }
700
701 /// mark a range that has never been used
702 void add_unused(uint64_t offset, uint64_t length) {
703 uint64_t blob_len = get_logical_length();
704 assert((blob_len % (sizeof(unused)*8)) == 0);
705 assert(offset + length <= blob_len);
706 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
707 uint64_t start = ROUND_UP_TO(offset, chunk_size) / chunk_size;
708 uint64_t end = (offset + length) / chunk_size;
709 for (auto i = start; i < end; ++i) {
710 unused |= (1u << i);
711 }
712 if (start != end) {
713 set_flag(FLAG_HAS_UNUSED);
714 }
715 }
716
717 /// indicate that a range has (now) been used.
718 void mark_used(uint64_t offset, uint64_t length) {
719 if (has_unused()) {
720 uint64_t blob_len = get_logical_length();
721 assert((blob_len % (sizeof(unused)*8)) == 0);
722 assert(offset + length <= blob_len);
723 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
724 uint64_t start = offset / chunk_size;
725 uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
726 for (auto i = start; i < end; ++i) {
727 unused &= ~(1u << i);
728 }
729 if (unused == 0) {
730 clear_flag(FLAG_HAS_UNUSED);
731 }
732 }
733 }
734
735 template<class F>
736 int map(uint64_t x_off, uint64_t x_len, F&& f) const {
737 auto p = extents.begin();
738 assert(p != extents.end());
739 while (x_off >= p->length) {
740 x_off -= p->length;
741 ++p;
742 assert(p != extents.end());
743 }
744 while (x_len > 0) {
745 assert(p != extents.end());
746 uint64_t l = MIN(p->length - x_off, x_len);
747 int r = f(p->offset + x_off, l);
748 if (r < 0)
749 return r;
750 x_off = 0;
751 x_len -= l;
752 ++p;
753 }
754 return 0;
755 }
756 template<class F>
757 void map_bl(uint64_t x_off,
758 bufferlist& bl,
759 F&& f) const {
760 auto p = extents.begin();
761 assert(p != extents.end());
762 while (x_off >= p->length) {
763 x_off -= p->length;
764 ++p;
765 assert(p != extents.end());
766 }
767 bufferlist::iterator it = bl.begin();
768 uint64_t x_len = bl.length();
769 while (x_len > 0) {
770 assert(p != extents.end());
771 uint64_t l = MIN(p->length - x_off, x_len);
772 bufferlist t;
773 it.copy(l, t);
774 f(p->offset + x_off, t);
775 x_off = 0;
776 x_len -= l;
777 ++p;
778 }
779 }
780
781 uint32_t get_ondisk_length() const {
782 uint32_t len = 0;
783 for (auto &p : extents) {
784 len += p.length;
785 }
786 return len;
787 }
788
789 uint32_t get_logical_length() const {
790 return logical_length;
791 }
792 size_t get_csum_value_size() const;
793
794 size_t get_csum_count() const {
795 size_t vs = get_csum_value_size();
796 if (!vs)
797 return 0;
798 return csum_data.length() / vs;
799 }
800 uint64_t get_csum_item(unsigned i) const {
801 size_t cs = get_csum_value_size();
802 const char *p = csum_data.c_str();
803 switch (cs) {
804 case 0:
805 assert(0 == "no csum data, bad index");
806 case 1:
807 return reinterpret_cast<const uint8_t*>(p)[i];
808 case 2:
809 return reinterpret_cast<const __le16*>(p)[i];
810 case 4:
811 return reinterpret_cast<const __le32*>(p)[i];
812 case 8:
813 return reinterpret_cast<const __le64*>(p)[i];
814 default:
815 assert(0 == "unrecognized csum word size");
816 }
817 }
818 const char *get_csum_item_ptr(unsigned i) const {
819 size_t cs = get_csum_value_size();
820 return csum_data.c_str() + (cs * i);
821 }
822 char *get_csum_item_ptr(unsigned i) {
823 size_t cs = get_csum_value_size();
824 return csum_data.c_str() + (cs * i);
825 }
826
827 void init_csum(unsigned type, unsigned order, unsigned len) {
828 flags |= FLAG_CSUM;
829 csum_type = type;
830 csum_chunk_order = order;
831 csum_data = buffer::create(get_csum_value_size() * len / get_csum_chunk_size());
832 csum_data.zero();
833 csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
834 }
835
836 /// calculate csum for the buffer at the given b_off
837 void calc_csum(uint64_t b_off, const bufferlist& bl);
838
839 /// verify csum: return -EOPNOTSUPP for unsupported checksum type;
840 /// return -1 and valid(nonnegative) b_bad_off for checksum error;
841 /// return 0 if all is well.
842 int verify_csum(uint64_t b_off, const bufferlist& bl, int* b_bad_off,
843 uint64_t *bad_csum) const;
844
845 bool can_prune_tail() const {
846 return
847 extents.size() > 1 && // if it's all invalid it's not pruning.
848 !extents.back().is_valid() &&
849 !has_unused();
850 }
851 void prune_tail() {
852 const auto &p = extents.back();
853 logical_length -= p.length;
854 extents.pop_back();
855 if (has_csum()) {
856 bufferptr t;
857 t.swap(csum_data);
858 csum_data = bufferptr(t.c_str(),
859 get_logical_length() / get_csum_chunk_size() *
860 get_csum_value_size());
861 }
862 }
863 void add_tail(uint32_t new_len) {
864 assert(is_mutable());
865 assert(!has_unused());
866 assert(new_len > logical_length);
867 extents.emplace_back(
868 bluestore_pextent_t(
869 bluestore_pextent_t::INVALID_OFFSET,
870 new_len - logical_length));
871 logical_length = new_len;
872 if (has_csum()) {
873 bufferptr t;
874 t.swap(csum_data);
875 csum_data = buffer::create(
876 get_csum_value_size() * logical_length / get_csum_chunk_size());
877 csum_data.copy_in(0, t.length(), t.c_str());
878 csum_data.zero(t.length(), csum_data.length() - t.length());
879 }
880 }
881 uint32_t get_release_size(uint32_t min_alloc_size) const {
882 if (is_compressed()) {
883 return get_logical_length();
884 }
885 uint32_t res = get_csum_chunk_size();
886 if (!has_csum() || res < min_alloc_size) {
887 res = min_alloc_size;
888 }
889 return res;
890 }
891
892 void split(uint32_t blob_offset, bluestore_blob_t& rb);
893 void allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs);
894 void allocated_test(const bluestore_pextent_t& alloc); // intended for UT only
895
896 /// updates blob's pextents container and return unused pextents eligible
897 /// for release.
898 /// all - indicates that the whole blob to be released.
899 /// logical - specifies set of logical extents within blob's
900 /// to be released
901 /// Returns true if blob has no more valid pextents
902 bool release_extents(
903 bool all,
904 const PExtentVector& logical,
905 PExtentVector* r);
906 };
907 WRITE_CLASS_DENC_FEATURED(bluestore_blob_t)
908
909 ostream& operator<<(ostream& out, const bluestore_blob_t& o);
910
911
912 /// shared blob state
913 struct bluestore_shared_blob_t {
914 uint64_t sbid; ///> shared blob id
915 bluestore_extent_ref_map_t ref_map; ///< shared blob extents
916
917 bluestore_shared_blob_t(uint64_t _sbid) : sbid(_sbid) {}
918
919 DENC(bluestore_shared_blob_t, v, p) {
920 DENC_START(1, 1, p);
921 denc(v.ref_map, p);
922 DENC_FINISH(p);
923 }
924
925
926 void dump(Formatter *f) const;
927 static void generate_test_instances(list<bluestore_shared_blob_t*>& ls);
928
929 bool empty() const {
930 return ref_map.empty();
931 }
932 };
933 WRITE_CLASS_DENC(bluestore_shared_blob_t)
934
935 ostream& operator<<(ostream& out, const bluestore_shared_blob_t& o);
936
937 /// onode: per-object metadata
938 struct bluestore_onode_t {
939 uint64_t nid = 0; ///< numeric id (locally unique)
940 uint64_t size = 0; ///< object size
941 map<mempool::bluestore_cache_other::string, bufferptr> attrs; ///< attrs
942
943 struct shard_info {
944 uint32_t offset = 0; ///< logical offset for start of shard
945 uint32_t bytes = 0; ///< encoded bytes
946 DENC(shard_info, v, p) {
947 denc_varint(v.offset, p);
948 denc_varint(v.bytes, p);
949 }
950 void dump(Formatter *f) const;
951 };
952 vector<shard_info> extent_map_shards; ///< extent map shards (if any)
953
954 uint32_t expected_object_size = 0;
955 uint32_t expected_write_size = 0;
956 uint32_t alloc_hint_flags = 0;
957
958 uint8_t flags = 0;
959
960 enum {
961 FLAG_OMAP = 1,
962 };
963
964 string get_flags_string() const {
965 string s;
966 if (flags & FLAG_OMAP) {
967 s = "omap";
968 }
969 return s;
970 }
971
972 bool has_flag(unsigned f) const {
973 return flags & f;
974 }
975
976 void set_flag(unsigned f) {
977 flags |= f;
978 }
979
980 void clear_flag(unsigned f) {
981 flags &= ~f;
982 }
983
984 bool has_omap() const {
985 return has_flag(FLAG_OMAP);
986 }
987
988 void set_omap_flag() {
989 set_flag(FLAG_OMAP);
990 }
991
992 void clear_omap_flag() {
993 clear_flag(FLAG_OMAP);
994 }
995
996 DENC(bluestore_onode_t, v, p) {
997 DENC_START(1, 1, p);
998 denc_varint(v.nid, p);
999 denc_varint(v.size, p);
1000 denc(v.attrs, p);
1001 denc(v.flags, p);
1002 denc(v.extent_map_shards, p);
1003 denc_varint(v.expected_object_size, p);
1004 denc_varint(v.expected_write_size, p);
1005 denc_varint(v.alloc_hint_flags, p);
1006 DENC_FINISH(p);
1007 }
1008 void dump(Formatter *f) const;
1009 static void generate_test_instances(list<bluestore_onode_t*>& o);
1010 };
1011 WRITE_CLASS_DENC(bluestore_onode_t::shard_info)
1012 WRITE_CLASS_DENC(bluestore_onode_t)
1013
1014 ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si);
1015
1016 /// writeahead-logged op
1017 struct bluestore_deferred_op_t {
1018 typedef enum {
1019 OP_WRITE = 1,
1020 } type_t;
1021 __u8 op = 0;
1022
1023 PExtentVector extents;
1024 bufferlist data;
1025
1026 DENC(bluestore_deferred_op_t, v, p) {
1027 DENC_START(1, 1, p);
1028 denc(v.op, p);
1029 denc(v.extents, p);
1030 denc(v.data, p);
1031 DENC_FINISH(p);
1032 }
1033 void dump(Formatter *f) const;
1034 static void generate_test_instances(list<bluestore_deferred_op_t*>& o);
1035 };
1036 WRITE_CLASS_DENC(bluestore_deferred_op_t)
1037
1038
1039 /// writeahead-logged transaction
1040 struct bluestore_deferred_transaction_t {
1041 uint64_t seq = 0;
1042 list<bluestore_deferred_op_t> ops;
1043 interval_set<uint64_t> released; ///< allocations to release after tx
1044
1045 bluestore_deferred_transaction_t() : seq(0) {}
1046
1047 DENC(bluestore_deferred_transaction_t, v, p) {
1048 DENC_START(1, 1, p);
1049 denc(v.seq, p);
1050 denc(v.ops, p);
1051 denc(v.released, p);
1052 DENC_FINISH(p);
1053 }
1054 void dump(Formatter *f) const;
1055 static void generate_test_instances(list<bluestore_deferred_transaction_t*>& o);
1056 };
1057 WRITE_CLASS_DENC(bluestore_deferred_transaction_t)
1058
1059 struct bluestore_compression_header_t {
1060 uint8_t type = Compressor::COMP_ALG_NONE;
1061 uint32_t length = 0;
1062
1063 bluestore_compression_header_t() {}
1064 bluestore_compression_header_t(uint8_t _type)
1065 : type(_type) {}
1066
1067 DENC(bluestore_compression_header_t, v, p) {
1068 DENC_START(1, 1, p);
1069 denc(v.type, p);
1070 denc(v.length, p);
1071 DENC_FINISH(p);
1072 }
1073 void dump(Formatter *f) const;
1074 static void generate_test_instances(list<bluestore_compression_header_t*>& o);
1075 };
1076 WRITE_CLASS_DENC(bluestore_compression_header_t)
1077
1078
1079 #endif