]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/bluestore_types.h
update sources to v12.1.0
[ceph.git] / ceph / src / os / bluestore / bluestore_types.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
16#define CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
17
18#include <ostream>
19#include <bitset>
20#include "include/types.h"
21#include "include/interval_set.h"
22#include "include/utime.h"
23#include "common/hobject.h"
24#include "compressor/Compressor.h"
25#include "common/Checksummer.h"
26#include "include/mempool.h"
27
28namespace ceph {
29 class Formatter;
30}
31
32/// label for block device
33struct bluestore_bdev_label_t {
34 uuid_d osd_uuid; ///< osd uuid
35 uint64_t size; ///< device size
36 utime_t btime; ///< birth time
37 string description; ///< device description
38
39 void encode(bufferlist& bl) const;
40 void decode(bufferlist::iterator& p);
41 void dump(Formatter *f) const;
42 static void generate_test_instances(list<bluestore_bdev_label_t*>& o);
43};
44WRITE_CLASS_ENCODER(bluestore_bdev_label_t)
45
46ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l);
47
48/// collection metadata
49struct bluestore_cnode_t {
50 uint32_t bits; ///< how many bits of coll pgid are significant
51
52 explicit bluestore_cnode_t(int b=0) : bits(b) {}
53
54 DENC(bluestore_cnode_t, v, p) {
55 DENC_START(1, 1, p);
56 denc(v.bits, p);
57 DENC_FINISH(p);
58 }
59 void dump(Formatter *f) const;
60 static void generate_test_instances(list<bluestore_cnode_t*>& o);
61};
62WRITE_CLASS_DENC(bluestore_cnode_t)
63
64class AllocExtent;
65typedef mempool::bluestore_alloc::vector<AllocExtent> AllocExtentVector;
66class AllocExtent {
67public:
68 uint64_t offset;
69 uint32_t length;
70
71 AllocExtent() {
72 offset = 0;
73 length = 0;
74 }
75
76 AllocExtent(int64_t off, int32_t len) : offset(off), length(len) { }
77 uint64_t end() const {
78 return offset + length;
79 }
80 bool operator==(const AllocExtent& other) const {
81 return offset == other.offset && length == other.length;
82 }
83};
84
85inline static ostream& operator<<(ostream& out, const AllocExtent& e) {
86 return out << "0x" << std::hex << e.offset << "~" << e.length << std::dec;
87}
88
89class ExtentList {
90 AllocExtentVector *m_extents;
91 int64_t m_block_size;
92 int64_t m_max_blocks;
93
94public:
95 void init(AllocExtentVector *extents, int64_t block_size,
96 uint64_t max_alloc_size) {
97 m_extents = extents;
98 m_block_size = block_size;
99 m_max_blocks = max_alloc_size / block_size;
100 assert(m_extents->empty());
101 }
102
103 ExtentList(AllocExtentVector *extents, int64_t block_size) {
104 init(extents, block_size, 0);
105 }
106
107 ExtentList(AllocExtentVector *extents, int64_t block_size,
108 uint64_t max_alloc_size) {
109 init(extents, block_size, max_alloc_size);
110 }
111
112 void reset() {
113 m_extents->clear();
114 }
115
116 void add_extents(int64_t start, int64_t count);
117
118 AllocExtentVector *get_extents() {
119 return m_extents;
120 }
121
122 std::pair<int64_t, int64_t> get_nth_extent(int index) {
123 return std::make_pair
124 ((*m_extents)[index].offset / m_block_size,
125 (*m_extents)[index].length / m_block_size);
126 }
127
128 int64_t get_extent_count() {
129 return m_extents->size();
130 }
131};
132
133
134/// pextent: physical extent
135struct bluestore_pextent_t : public AllocExtent {
136 const static uint64_t INVALID_OFFSET = ~0ull;
137
138 bluestore_pextent_t() : AllocExtent() {}
139 bluestore_pextent_t(uint64_t o, uint64_t l) : AllocExtent(o, l) {}
140 bluestore_pextent_t(const AllocExtent &ext) :
141 AllocExtent(ext.offset, ext.length) { }
142
143 bluestore_pextent_t& operator=(const AllocExtent &ext) {
144 offset = ext.offset;
145 length = ext.length;
146 return *this;
147 }
148 bool is_valid() const {
149 return offset != INVALID_OFFSET;
150 }
151
152 DENC(bluestore_pextent_t, v, p) {
153 denc_lba(v.offset, p);
154 denc_varint_lowz(v.length, p);
155 }
156
157 void dump(Formatter *f) const;
158 static void generate_test_instances(list<bluestore_pextent_t*>& ls);
159};
160WRITE_CLASS_DENC(bluestore_pextent_t)
161
162ostream& operator<<(ostream& out, const bluestore_pextent_t& o);
163
31f18b77 164typedef mempool::bluestore_cache_other::vector<bluestore_pextent_t> PExtentVector;
7c673cae
FG
165
166template<>
167struct denc_traits<PExtentVector> {
168 static constexpr bool supported = true;
169 static constexpr bool bounded = false;
170 static constexpr bool featured = false;
31f18b77 171 static constexpr bool need_contiguous = true;
7c673cae
FG
172 static void bound_encode(const PExtentVector& v, size_t& p) {
173 p += sizeof(uint32_t);
174 const auto size = v.size();
175 if (size) {
176 size_t per = 0;
177 denc(v.front(), per);
178 p += per * size;
179 }
180 }
181 static void encode(const PExtentVector& v,
182 bufferlist::contiguous_appender& p) {
183 denc_varint(v.size(), p);
184 for (auto& i : v) {
185 denc(i, p);
186 }
187 }
188 static void decode(PExtentVector& v, bufferptr::iterator& p) {
189 unsigned num;
190 denc_varint(num, p);
191 v.clear();
192 v.resize(num);
193 for (unsigned i=0; i<num; ++i) {
194 denc(v[i], p);
195 }
196 }
197};
198
199
200/// extent_map: a map of reference counted extents
201struct bluestore_extent_ref_map_t {
202 struct record_t {
203 uint32_t length;
204 uint32_t refs;
205 record_t(uint32_t l=0, uint32_t r=0) : length(l), refs(r) {}
206 DENC(bluestore_extent_ref_map_t::record_t, v, p) {
207 denc_varint_lowz(v.length, p);
208 denc_varint(v.refs, p);
209 }
210 };
211
31f18b77 212 typedef mempool::bluestore_cache_other::map<uint64_t,record_t> map_t;
7c673cae
FG
213 map_t ref_map;
214
215 void _check() const;
216 void _maybe_merge_left(map_t::iterator& p);
217
218 void clear() {
219 ref_map.clear();
220 }
221 bool empty() const {
222 return ref_map.empty();
223 }
224
225 void get(uint64_t offset, uint32_t len);
31f18b77
FG
226 void put(uint64_t offset, uint32_t len, PExtentVector *release,
227 bool *maybe_unshared);
7c673cae
FG
228
229 bool contains(uint64_t offset, uint32_t len) const;
230 bool intersects(uint64_t offset, uint32_t len) const;
231
232 void bound_encode(size_t& p) const {
233 denc_varint((uint32_t)0, p);
234 if (!ref_map.empty()) {
235 size_t elem_size = 0;
236 denc_varint_lowz((uint64_t)0, elem_size);
237 ref_map.begin()->second.bound_encode(elem_size);
238 p += elem_size * ref_map.size();
239 }
240 }
241 void encode(bufferlist::contiguous_appender& p) const {
242 uint32_t n = ref_map.size();
243 denc_varint(n, p);
244 if (n) {
245 auto i = ref_map.begin();
246 denc_varint_lowz(i->first, p);
247 i->second.encode(p);
248 int64_t pos = i->first;
249 while (--n) {
250 ++i;
251 denc_varint_lowz((int64_t)i->first - pos, p);
252 i->second.encode(p);
253 pos = i->first;
254 }
255 }
256 }
257 void decode(bufferptr::iterator& p) {
258 uint32_t n;
259 denc_varint(n, p);
260 if (n) {
261 int64_t pos;
262 denc_varint_lowz(pos, p);
263 ref_map[pos].decode(p);
264 while (--n) {
265 int64_t delta;
266 denc_varint_lowz(delta, p);
267 pos += delta;
268 ref_map[pos].decode(p);
269 }
270 }
271 }
272
273 void dump(Formatter *f) const;
274 static void generate_test_instances(list<bluestore_extent_ref_map_t*>& o);
275};
276WRITE_CLASS_DENC(bluestore_extent_ref_map_t)
277
278
279ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& rm);
280static inline bool operator==(const bluestore_extent_ref_map_t::record_t& l,
281 const bluestore_extent_ref_map_t::record_t& r) {
282 return l.length == r.length && l.refs == r.refs;
283}
284static inline bool operator==(const bluestore_extent_ref_map_t& l,
285 const bluestore_extent_ref_map_t& r) {
286 return l.ref_map == r.ref_map;
287}
288static inline bool operator!=(const bluestore_extent_ref_map_t& l,
289 const bluestore_extent_ref_map_t& r) {
290 return !(l == r);
291}
292
293/// blob_use_tracker: a set of per-alloc unit ref counters to track blob usage
294struct bluestore_blob_use_tracker_t {
295 // N.B.: There is no need to minimize au_size/num_au
296 // as much as possible (e.g. have just a single byte for au_size) since:
297 // 1) Struct isn't packed hence it's padded. And even if it's packed see 2)
298 // 2) Mem manager has its own granularity, most probably >= 8 bytes
299 //
300 uint32_t au_size; // Allocation (=tracking) unit size,
301 // == 0 if uninitialized
302 uint32_t num_au; // Amount of allocation units tracked
303 // == 0 if single unit or the whole blob is tracked
304
305 union {
306 uint32_t* bytes_per_au;
307 uint32_t total_bytes;
308 };
309
310 bluestore_blob_use_tracker_t()
311 : au_size(0), num_au(0), bytes_per_au(nullptr) {
312 }
313 ~bluestore_blob_use_tracker_t() {
314 clear();
315 }
316
317 void clear() {
318 if (num_au != 0) {
319 delete[] bytes_per_au;
320 }
321 bytes_per_au = 0;
322 au_size = 0;
323 num_au = 0;
324 }
325
326 uint32_t get_referenced_bytes() const {
327 uint32_t total = 0;
328 if (!num_au) {
329 total = total_bytes;
330 } else {
331 for (size_t i = 0; i < num_au; ++i) {
332 total += bytes_per_au[i];
333 }
334 }
335 return total;
336 }
337 bool is_not_empty() const {
338 if (!num_au) {
339 return total_bytes != 0;
340 } else {
341 for (size_t i = 0; i < num_au; ++i) {
342 if (bytes_per_au[i]) {
343 return true;
344 }
345 }
346 }
347 return false;
348 }
349 bool is_empty() const {
350 return !is_not_empty();
351 }
352 void prune_tail(uint32_t new_len) {
353 if (num_au) {
354 new_len = ROUND_UP_TO(new_len, au_size);
355 uint32_t _num_au = new_len / au_size;
356 assert(_num_au <= num_au);
357 if (_num_au) {
358 num_au = _num_au; // bytes_per_au array is left unmodified
359
360 } else {
361 clear();
362 }
363 }
364 }
365 void add_tail(uint32_t new_len, uint32_t _au_size) {
366 auto full_size = au_size * (num_au ? num_au : 1);
367 assert(new_len >= full_size);
368 if (new_len == full_size) {
369 return;
370 }
371 if (!num_au) {
372 uint32_t old_total = total_bytes;
373 total_bytes = 0;
374 init(new_len, _au_size);
375 assert(num_au);
376 bytes_per_au[0] = old_total;
377 } else {
378 assert(_au_size == au_size);
379 new_len = ROUND_UP_TO(new_len, au_size);
380 uint32_t _num_au = new_len / au_size;
381 assert(_num_au >= num_au);
382 if (_num_au > num_au) {
383 auto old_bytes = bytes_per_au;
384 auto old_num_au = num_au;
385 num_au = _num_au;
386 allocate();
387 for (size_t i = 0; i < old_num_au; i++) {
388 bytes_per_au[i] = old_bytes[i];
389 }
390 for (size_t i = old_num_au; i < num_au; i++) {
391 bytes_per_au[i] = 0;
392 }
393 delete[] old_bytes;
394 }
395 }
396 }
397
398 void init(
399 uint32_t full_length,
400 uint32_t _au_size);
401
402 void get(
403 uint32_t offset,
404 uint32_t len);
405
406 /// put: return true if the blob has no references any more after the call,
407 /// no release_units is filled for the sake of performance.
408 /// return false if there are some references to the blob,
409 /// in this case release_units contains pextents
410 /// (identified by their offsets relative to the blob start)
31f18b77 411 /// that are not used any more and can be safely deallocated.
7c673cae
FG
412 bool put(
413 uint32_t offset,
414 uint32_t len,
415 PExtentVector *release);
416
417 bool can_split() const;
418 bool can_split_at(uint32_t blob_offset) const;
419 void split(
420 uint32_t blob_offset,
421 bluestore_blob_use_tracker_t* r);
422
423 bool equal(
424 const bluestore_blob_use_tracker_t& other) const;
425
426 void bound_encode(size_t& p) const {
427 denc_varint(au_size, p);
428 if (au_size) {
429 denc_varint(num_au, p);
430 if (!num_au) {
431 denc_varint(total_bytes, p);
432 } else {
433 size_t elem_size = 0;
434 denc_varint((uint32_t)0, elem_size);
435 p += elem_size * num_au;
436 }
437 }
438 }
439 void encode(bufferlist::contiguous_appender& p) const {
440 denc_varint(au_size, p);
441 if (au_size) {
442 denc_varint(num_au, p);
443 if (!num_au) {
444 denc_varint(total_bytes, p);
445 } else {
446 size_t elem_size = 0;
447 denc_varint((uint32_t)0, elem_size);
448 for (size_t i = 0; i < num_au; ++i) {
449 denc_varint(bytes_per_au[i], p);
450 }
451 }
452 }
453 }
454 void decode(bufferptr::iterator& p) {
455 clear();
456 denc_varint(au_size, p);
457 if (au_size) {
458 denc_varint(num_au, p);
459 if (!num_au) {
460 denc_varint(total_bytes, p);
461 } else {
462 allocate();
463 for (size_t i = 0; i < num_au; ++i) {
464 denc_varint(bytes_per_au[i], p);
465 }
466 }
467 }
468 }
469
470 void dump(Formatter *f) const;
471 static void generate_test_instances(list<bluestore_blob_use_tracker_t*>& o);
472private:
473 void allocate();
7c673cae
FG
474};
475WRITE_CLASS_DENC(bluestore_blob_use_tracker_t)
476ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& rm);
477
478/// blob: a piece of data on disk
479struct bluestore_blob_t {
480private:
481 PExtentVector extents; ///< raw data position on device
31f18b77 482 uint32_t logical_length = 0; ///< original length of data stored in the blob
7c673cae
FG
483 uint32_t compressed_length = 0; ///< compressed length if any
484
485public:
486 enum {
31f18b77 487 LEGACY_FLAG_MUTABLE = 1, ///< [legacy] blob can be overwritten or split
7c673cae
FG
488 FLAG_COMPRESSED = 2, ///< blob is compressed
489 FLAG_CSUM = 4, ///< blob has checksums
490 FLAG_HAS_UNUSED = 8, ///< blob has unused map
491 FLAG_SHARED = 16, ///< blob is shared; see external SharedBlob
492 };
493 static string get_flags_string(unsigned flags);
494
495 uint32_t flags = 0; ///< FLAG_*
496
497 typedef uint16_t unused_t;
498 unused_t unused = 0; ///< portion that has never been written to (bitmap)
499
500 uint8_t csum_type = Checksummer::CSUM_NONE; ///< CSUM_*
501 uint8_t csum_chunk_order = 0; ///< csum block size is 1<<block_order bytes
502
503 bufferptr csum_data; ///< opaque vector of csum data
504
505 bluestore_blob_t(uint32_t f = 0) : flags(f) {}
506
507 const PExtentVector& get_extents() const {
508 return extents;
509 }
510
511 DENC_HELPERS;
512 void bound_encode(size_t& p, uint64_t struct_v) const {
513 assert(struct_v == 1 || struct_v == 2);
514 denc(extents, p);
515 denc_varint(flags, p);
516 denc_varint_lowz(logical_length, p);
517 denc_varint_lowz(compressed_length, p);
518 denc(csum_type, p);
519 denc(csum_chunk_order, p);
520 denc_varint(csum_data.length(), p);
521 p += csum_data.length();
522 p += sizeof(unused_t);
523 }
524
525 void encode(bufferlist::contiguous_appender& p, uint64_t struct_v) const {
526 assert(struct_v == 1 || struct_v == 2);
527 denc(extents, p);
528 denc_varint(flags, p);
529 if (is_compressed()) {
530 denc_varint_lowz(logical_length, p);
531 denc_varint_lowz(compressed_length, p);
532 }
533 if (has_csum()) {
534 denc(csum_type, p);
535 denc(csum_chunk_order, p);
536 denc_varint(csum_data.length(), p);
537 memcpy(p.get_pos_add(csum_data.length()), csum_data.c_str(),
538 csum_data.length());
539 }
540 if (has_unused()) {
541 denc(unused, p);
542 }
543 }
544
545 void decode(bufferptr::iterator& p, uint64_t struct_v) {
546 assert(struct_v == 1 || struct_v == 2);
547 denc(extents, p);
548 denc_varint(flags, p);
549 if (is_compressed()) {
550 denc_varint_lowz(logical_length, p);
551 denc_varint_lowz(compressed_length, p);
552 } else {
553 logical_length = get_ondisk_length();
554 }
555 if (has_csum()) {
556 denc(csum_type, p);
557 denc(csum_chunk_order, p);
558 int len;
559 denc_varint(len, p);
560 csum_data = p.get_ptr(len);
561 }
562 if (has_unused()) {
563 denc(unused, p);
564 }
565 }
566
567 bool can_split() const {
568 return
569 !has_flag(FLAG_SHARED) &&
570 !has_flag(FLAG_COMPRESSED) &&
571 !has_flag(FLAG_HAS_UNUSED); // splitting unused set is complex
572 }
573 bool can_split_at(uint32_t blob_offset) const {
574 return !has_csum() || blob_offset % get_csum_chunk_size() == 0;
575 }
576
577 void dump(Formatter *f) const;
578 static void generate_test_instances(list<bluestore_blob_t*>& ls);
579
580 bool has_flag(unsigned f) const {
581 return flags & f;
582 }
583 void set_flag(unsigned f) {
584 flags |= f;
585 }
586 void clear_flag(unsigned f) {
587 flags &= ~f;
588 }
589 string get_flags_string() const {
590 return get_flags_string(flags);
591 }
592
593 void set_compressed(uint64_t clen_orig, uint64_t clen) {
594 set_flag(FLAG_COMPRESSED);
595 logical_length = clen_orig;
596 compressed_length = clen;
597 }
598 bool is_mutable() const {
31f18b77 599 return !is_compressed() && !is_shared();
7c673cae
FG
600 }
601 bool is_compressed() const {
602 return has_flag(FLAG_COMPRESSED);
603 }
604 bool has_csum() const {
605 return has_flag(FLAG_CSUM);
606 }
607 bool has_unused() const {
608 return has_flag(FLAG_HAS_UNUSED);
609 }
610 bool is_shared() const {
611 return has_flag(FLAG_SHARED);
612 }
613
614 /// return chunk (i.e. min readable block) size for the blob
615 uint64_t get_chunk_size(uint64_t dev_block_size) const {
616 return has_csum() ?
617 MAX(dev_block_size, get_csum_chunk_size()) : dev_block_size;
618 }
619 uint32_t get_csum_chunk_size() const {
620 return 1 << csum_chunk_order;
621 }
622 uint32_t get_compressed_payload_length() const {
623 return is_compressed() ? compressed_length : 0;
624 }
625 uint64_t calc_offset(uint64_t x_off, uint64_t *plen) const {
626 auto p = extents.begin();
627 assert(p != extents.end());
628 while (x_off >= p->length) {
629 x_off -= p->length;
630 ++p;
631 assert(p != extents.end());
632 }
633 if (plen)
634 *plen = p->length - x_off;
635 return p->offset + x_off;
636 }
637
31f18b77
FG
638 // validate whether or not the status of pextents within the given range
639 // meets the requirement(allocated or unallocated).
640 bool _validate_range(uint64_t b_off, uint64_t b_len,
641 bool require_allocated) const {
7c673cae
FG
642 auto p = extents.begin();
643 assert(p != extents.end());
644 while (b_off >= p->length) {
645 b_off -= p->length;
646 ++p;
647 assert(p != extents.end());
648 }
649 b_len += b_off;
650 while (b_len) {
651 assert(p != extents.end());
31f18b77
FG
652 if (require_allocated != p->is_valid()) {
653 return false;
7c673cae 654 }
31f18b77 655
7c673cae 656 if (p->length >= b_len) {
31f18b77 657 return true;
7c673cae
FG
658 }
659 b_len -= p->length;
660 ++p;
661 }
662 assert(0 == "we should not get here");
663 }
664
31f18b77
FG
665 /// return true if the entire range is allocated
666 /// (mapped to extents on disk)
667 bool is_allocated(uint64_t b_off, uint64_t b_len) const {
668 return _validate_range(b_off, b_len, true);
669 }
670
7c673cae 671 /// return true if the entire range is unallocated
31f18b77 672 /// (not mapped to extents on disk)
7c673cae 673 bool is_unallocated(uint64_t b_off, uint64_t b_len) const {
31f18b77 674 return _validate_range(b_off, b_len, false);
7c673cae
FG
675 }
676
677 /// return true if the logical range has never been used
678 bool is_unused(uint64_t offset, uint64_t length) const {
679 if (!has_unused()) {
680 return false;
681 }
682 uint64_t blob_len = get_logical_length();
683 assert((blob_len % (sizeof(unused)*8)) == 0);
684 assert(offset + length <= blob_len);
685 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
686 uint64_t start = offset / chunk_size;
687 uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
688 auto i = start;
689 while (i < end && (unused & (1u << i))) {
690 i++;
691 }
692 return i >= end;
693 }
694
695 /// mark a range that has never been used
696 void add_unused(uint64_t offset, uint64_t length) {
697 uint64_t blob_len = get_logical_length();
698 assert((blob_len % (sizeof(unused)*8)) == 0);
699 assert(offset + length <= blob_len);
700 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
701 uint64_t start = ROUND_UP_TO(offset, chunk_size) / chunk_size;
702 uint64_t end = (offset + length) / chunk_size;
703 for (auto i = start; i < end; ++i) {
704 unused |= (1u << i);
705 }
706 if (start != end) {
707 set_flag(FLAG_HAS_UNUSED);
708 }
709 }
710
711 /// indicate that a range has (now) been used.
712 void mark_used(uint64_t offset, uint64_t length) {
713 if (has_unused()) {
714 uint64_t blob_len = get_logical_length();
715 assert((blob_len % (sizeof(unused)*8)) == 0);
716 assert(offset + length <= blob_len);
717 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
718 uint64_t start = offset / chunk_size;
719 uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
720 for (auto i = start; i < end; ++i) {
721 unused &= ~(1u << i);
722 }
723 if (unused == 0) {
724 clear_flag(FLAG_HAS_UNUSED);
725 }
726 }
727 }
728
729 int map(uint64_t x_off, uint64_t x_len,
730 std::function<int(uint64_t,uint64_t)> f) const {
731 auto p = extents.begin();
732 assert(p != extents.end());
733 while (x_off >= p->length) {
734 x_off -= p->length;
735 ++p;
736 assert(p != extents.end());
737 }
738 while (x_len > 0) {
739 assert(p != extents.end());
740 uint64_t l = MIN(p->length - x_off, x_len);
741 int r = f(p->offset + x_off, l);
742 if (r < 0)
743 return r;
744 x_off = 0;
745 x_len -= l;
746 ++p;
747 }
748 return 0;
749 }
750 void map_bl(uint64_t x_off,
751 bufferlist& bl,
752 std::function<void(uint64_t,bufferlist&)> f) const {
753 auto p = extents.begin();
754 assert(p != extents.end());
755 while (x_off >= p->length) {
756 x_off -= p->length;
757 ++p;
758 assert(p != extents.end());
759 }
760 bufferlist::iterator it = bl.begin();
761 uint64_t x_len = bl.length();
762 while (x_len > 0) {
763 assert(p != extents.end());
764 uint64_t l = MIN(p->length - x_off, x_len);
765 bufferlist t;
766 it.copy(l, t);
767 f(p->offset + x_off, t);
768 x_off = 0;
769 x_len -= l;
770 ++p;
771 }
772 }
773
774 uint32_t get_ondisk_length() const {
775 uint32_t len = 0;
776 for (auto &p : extents) {
777 len += p.length;
778 }
779 return len;
780 }
781
782 uint32_t get_logical_length() const {
783 return logical_length;
784 }
785 size_t get_csum_value_size() const;
786
787 size_t get_csum_count() const {
788 size_t vs = get_csum_value_size();
789 if (!vs)
790 return 0;
791 return csum_data.length() / vs;
792 }
793 uint64_t get_csum_item(unsigned i) const {
794 size_t cs = get_csum_value_size();
795 const char *p = csum_data.c_str();
796 switch (cs) {
797 case 0:
798 assert(0 == "no csum data, bad index");
799 case 1:
800 return reinterpret_cast<const uint8_t*>(p)[i];
801 case 2:
802 return reinterpret_cast<const __le16*>(p)[i];
803 case 4:
804 return reinterpret_cast<const __le32*>(p)[i];
805 case 8:
806 return reinterpret_cast<const __le64*>(p)[i];
807 default:
808 assert(0 == "unrecognized csum word size");
809 }
810 }
811 const char *get_csum_item_ptr(unsigned i) const {
812 size_t cs = get_csum_value_size();
813 return csum_data.c_str() + (cs * i);
814 }
815 char *get_csum_item_ptr(unsigned i) {
816 size_t cs = get_csum_value_size();
817 return csum_data.c_str() + (cs * i);
818 }
819
820 void init_csum(unsigned type, unsigned order, unsigned len) {
821 flags |= FLAG_CSUM;
822 csum_type = type;
823 csum_chunk_order = order;
824 csum_data = buffer::create(get_csum_value_size() * len / get_csum_chunk_size());
825 csum_data.zero();
826 }
827
828 /// calculate csum for the buffer at the given b_off
829 void calc_csum(uint64_t b_off, const bufferlist& bl);
830
831 /// verify csum: return -EOPNOTSUPP for unsupported checksum type;
832 /// return -1 and valid(nonnegative) b_bad_off for checksum error;
833 /// return 0 if all is well.
834 int verify_csum(uint64_t b_off, const bufferlist& bl, int* b_bad_off,
835 uint64_t *bad_csum) const;
836
837 bool can_prune_tail() const {
838 return
839 extents.size() > 1 && // if it's all invalid it's not pruning.
840 !extents.back().is_valid() &&
841 !has_unused();
842 }
843 void prune_tail() {
844 const auto &p = extents.back();
845 logical_length -= p.length;
846 extents.pop_back();
847 if (has_csum()) {
848 bufferptr t;
849 t.swap(csum_data);
850 csum_data = bufferptr(t.c_str(),
851 get_logical_length() / get_csum_chunk_size() *
852 get_csum_value_size());
853 }
854 }
855 void add_tail(uint32_t new_len) {
856 assert(is_mutable());
857 assert(!has_unused());
858 assert(new_len > logical_length);
859 extents.emplace_back(
860 bluestore_pextent_t(
861 bluestore_pextent_t::INVALID_OFFSET,
862 new_len - logical_length));
863 logical_length = new_len;
864 if (has_csum()) {
865 bufferptr t;
866 t.swap(csum_data);
867 csum_data = buffer::create(
868 get_csum_value_size() * logical_length / get_csum_chunk_size());
869 csum_data.copy_in(0, t.length(), t.c_str());
870 csum_data.zero(t.length(), csum_data.length() - t.length());
871 }
872 }
873 uint32_t get_release_size(uint32_t min_alloc_size) const {
874 if (is_compressed()) {
875 return get_logical_length();
876 }
877 uint32_t res = get_csum_chunk_size();
878 if (!has_csum() || res < min_alloc_size) {
879 res = min_alloc_size;
880 }
881 return res;
882 }
883
884 void split(uint32_t blob_offset, bluestore_blob_t& rb);
885 void allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs);
886 void allocated_test(const bluestore_pextent_t& alloc); // intended for UT only
887
888 /// updates blob's pextents container and return unused pextents eligible
889 /// for release.
890 /// all - indicates that the whole blob to be released.
891 /// logical - specifies set of logical extents within blob's
892 /// to be released
893 /// Returns true if blob has no more valid pextents
894 bool release_extents(
895 bool all,
896 const PExtentVector& logical,
897 PExtentVector* r);
898};
899WRITE_CLASS_DENC_FEATURED(bluestore_blob_t)
900
901ostream& operator<<(ostream& out, const bluestore_blob_t& o);
902
903
904/// shared blob state
905struct bluestore_shared_blob_t {
906 uint64_t sbid; ///> shared blob id
907 bluestore_extent_ref_map_t ref_map; ///< shared blob extents
908
909 bluestore_shared_blob_t(uint64_t _sbid) : sbid(_sbid) {}
910
911 DENC(bluestore_shared_blob_t, v, p) {
912 DENC_START(1, 1, p);
913 denc(v.ref_map, p);
914 DENC_FINISH(p);
915 }
916
917
918 void dump(Formatter *f) const;
919 static void generate_test_instances(list<bluestore_shared_blob_t*>& ls);
920
921 bool empty() const {
922 return ref_map.empty();
923 }
924};
925WRITE_CLASS_DENC(bluestore_shared_blob_t)
926
927ostream& operator<<(ostream& out, const bluestore_shared_blob_t& o);
928
929/// onode: per-object metadata
930struct bluestore_onode_t {
931 uint64_t nid = 0; ///< numeric id (locally unique)
932 uint64_t size = 0; ///< object size
31f18b77 933 map<mempool::bluestore_cache_other::string, bufferptr> attrs; ///< attrs
7c673cae
FG
934
935 struct shard_info {
936 uint32_t offset = 0; ///< logical offset for start of shard
937 uint32_t bytes = 0; ///< encoded bytes
938 DENC(shard_info, v, p) {
939 denc_varint(v.offset, p);
940 denc_varint(v.bytes, p);
941 }
942 void dump(Formatter *f) const;
943 };
944 vector<shard_info> extent_map_shards; ///< extent map shards (if any)
945
946 uint32_t expected_object_size = 0;
947 uint32_t expected_write_size = 0;
948 uint32_t alloc_hint_flags = 0;
949
950 uint8_t flags = 0;
951
952 enum {
953 FLAG_OMAP = 1,
954 };
955
956 string get_flags_string() const {
957 string s;
958 if (flags & FLAG_OMAP) {
959 s = "omap";
960 }
961 return s;
962 }
963
964 bool has_flag(unsigned f) const {
965 return flags & f;
966 }
967
968 void set_flag(unsigned f) {
969 flags |= f;
970 }
971
972 void clear_flag(unsigned f) {
973 flags &= ~f;
974 }
975
976 bool has_omap() const {
977 return has_flag(FLAG_OMAP);
978 }
979
980 void set_omap_flag() {
981 set_flag(FLAG_OMAP);
982 }
983
984 void clear_omap_flag() {
985 clear_flag(FLAG_OMAP);
986 }
987
988 DENC(bluestore_onode_t, v, p) {
989 DENC_START(1, 1, p);
990 denc_varint(v.nid, p);
991 denc_varint(v.size, p);
992 denc(v.attrs, p);
993 denc(v.flags, p);
994 denc(v.extent_map_shards, p);
995 denc_varint(v.expected_object_size, p);
996 denc_varint(v.expected_write_size, p);
997 denc_varint(v.alloc_hint_flags, p);
998 DENC_FINISH(p);
999 }
1000 void dump(Formatter *f) const;
1001 static void generate_test_instances(list<bluestore_onode_t*>& o);
1002};
1003WRITE_CLASS_DENC(bluestore_onode_t::shard_info)
1004WRITE_CLASS_DENC(bluestore_onode_t)
1005
1006ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si);
1007
1008/// writeahead-logged op
1009struct bluestore_deferred_op_t {
1010 typedef enum {
1011 OP_WRITE = 1,
1012 } type_t;
1013 __u8 op = 0;
1014
1015 PExtentVector extents;
1016 bufferlist data;
1017
1018 DENC(bluestore_deferred_op_t, v, p) {
1019 DENC_START(1, 1, p);
1020 denc(v.op, p);
1021 denc(v.extents, p);
1022 denc(v.data, p);
1023 DENC_FINISH(p);
1024 }
1025 void dump(Formatter *f) const;
1026 static void generate_test_instances(list<bluestore_deferred_op_t*>& o);
1027};
1028WRITE_CLASS_DENC(bluestore_deferred_op_t)
1029
1030
1031/// writeahead-logged transaction
1032struct bluestore_deferred_transaction_t {
1033 uint64_t seq = 0;
1034 list<bluestore_deferred_op_t> ops;
1035 interval_set<uint64_t> released; ///< allocations to release after tx
1036
1037 bluestore_deferred_transaction_t() : seq(0) {}
1038
1039 DENC(bluestore_deferred_transaction_t, v, p) {
1040 DENC_START(1, 1, p);
1041 denc(v.seq, p);
1042 denc(v.ops, p);
1043 denc(v.released, p);
1044 DENC_FINISH(p);
1045 }
1046 void dump(Formatter *f) const;
1047 static void generate_test_instances(list<bluestore_deferred_transaction_t*>& o);
1048};
1049WRITE_CLASS_DENC(bluestore_deferred_transaction_t)
1050
1051struct bluestore_compression_header_t {
1052 uint8_t type = Compressor::COMP_ALG_NONE;
1053 uint32_t length = 0;
1054
1055 bluestore_compression_header_t() {}
1056 bluestore_compression_header_t(uint8_t _type)
1057 : type(_type) {}
1058
1059 DENC(bluestore_compression_header_t, v, p) {
1060 DENC_START(1, 1, p);
1061 denc(v.type, p);
1062 denc(v.length, p);
1063 DENC_FINISH(p);
1064 }
1065 void dump(Formatter *f) const;
1066 static void generate_test_instances(list<bluestore_compression_header_t*>& o);
1067};
1068WRITE_CLASS_DENC(bluestore_compression_header_t)
1069
1070
1071#endif