]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/bluestore_types.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / os / bluestore / bluestore_types.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
16#define CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
17
18#include <ostream>
19#include <bitset>
94b18763 20#include <type_traits>
7c673cae
FG
21#include "include/types.h"
22#include "include/interval_set.h"
23#include "include/utime.h"
24#include "common/hobject.h"
25#include "compressor/Compressor.h"
26#include "common/Checksummer.h"
27#include "include/mempool.h"
28
29namespace ceph {
30 class Formatter;
31}
32
33/// label for block device
34struct bluestore_bdev_label_t {
35 uuid_d osd_uuid; ///< osd uuid
36 uint64_t size; ///< device size
37 utime_t btime; ///< birth time
38 string description; ///< device description
39
3efd9988
FG
40 map<string,string> meta; ///< {read,write}_meta() content from ObjectStore
41
7c673cae 42 void encode(bufferlist& bl) const;
11fdf7f2 43 void decode(bufferlist::const_iterator& p);
7c673cae
FG
44 void dump(Formatter *f) const;
45 static void generate_test_instances(list<bluestore_bdev_label_t*>& o);
46};
47WRITE_CLASS_ENCODER(bluestore_bdev_label_t)
48
49ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l);
50
51/// collection metadata
52struct bluestore_cnode_t {
53 uint32_t bits; ///< how many bits of coll pgid are significant
54
55 explicit bluestore_cnode_t(int b=0) : bits(b) {}
56
57 DENC(bluestore_cnode_t, v, p) {
58 DENC_START(1, 1, p);
59 denc(v.bits, p);
60 DENC_FINISH(p);
61 }
62 void dump(Formatter *f) const;
63 static void generate_test_instances(list<bluestore_cnode_t*>& o);
64};
65WRITE_CLASS_DENC(bluestore_cnode_t)
66
28e407b8
AA
67ostream& operator<<(ostream& out, const bluestore_cnode_t& l);
68
a8e16298
TL
69template <typename OFFS_TYPE, typename LEN_TYPE>
70struct bluestore_interval_t
71{
72 static const uint64_t INVALID_OFFSET = ~0ull;
7c673cae 73
a8e16298
TL
74 OFFS_TYPE offset = 0;
75 LEN_TYPE length = 0;
7c673cae 76
a8e16298
TL
77 bluestore_interval_t(){}
78 bluestore_interval_t(uint64_t o, uint64_t l) : offset(o), length(l) {}
7c673cae 79
a8e16298
TL
80 bool is_valid() const {
81 return offset != INVALID_OFFSET;
7c673cae 82 }
a8e16298
TL
83 uint64_t end() const {
84 return offset != INVALID_OFFSET ? offset + length : INVALID_OFFSET;
7c673cae
FG
85 }
86
a8e16298
TL
87 bool operator==(const bluestore_interval_t& other) const {
88 return offset == other.offset && length == other.length;
7c673cae
FG
89 }
90
7c673cae
FG
91};
92
7c673cae 93/// pextent: physical extent
a8e16298
TL
94struct bluestore_pextent_t : public bluestore_interval_t<uint64_t, uint32_t>
95{
96 bluestore_pextent_t() {}
97 bluestore_pextent_t(uint64_t o, uint64_t l) : bluestore_interval_t(o, l) {}
98 bluestore_pextent_t(const bluestore_interval_t &ext) :
99 bluestore_interval_t(ext.offset, ext.length) {}
7c673cae
FG
100
101 DENC(bluestore_pextent_t, v, p) {
102 denc_lba(v.offset, p);
103 denc_varint_lowz(v.length, p);
104 }
105
106 void dump(Formatter *f) const;
107 static void generate_test_instances(list<bluestore_pextent_t*>& ls);
108};
109WRITE_CLASS_DENC(bluestore_pextent_t)
110
111ostream& operator<<(ostream& out, const bluestore_pextent_t& o);
112
31f18b77 113typedef mempool::bluestore_cache_other::vector<bluestore_pextent_t> PExtentVector;
7c673cae
FG
114
115template<>
116struct denc_traits<PExtentVector> {
117 static constexpr bool supported = true;
118 static constexpr bool bounded = false;
119 static constexpr bool featured = false;
31f18b77 120 static constexpr bool need_contiguous = true;
7c673cae
FG
121 static void bound_encode(const PExtentVector& v, size_t& p) {
122 p += sizeof(uint32_t);
123 const auto size = v.size();
124 if (size) {
125 size_t per = 0;
126 denc(v.front(), per);
127 p += per * size;
128 }
129 }
130 static void encode(const PExtentVector& v,
131 bufferlist::contiguous_appender& p) {
132 denc_varint(v.size(), p);
133 for (auto& i : v) {
134 denc(i, p);
135 }
136 }
11fdf7f2 137 static void decode(PExtentVector& v, bufferptr::const_iterator& p) {
7c673cae
FG
138 unsigned num;
139 denc_varint(num, p);
140 v.clear();
141 v.resize(num);
142 for (unsigned i=0; i<num; ++i) {
143 denc(v[i], p);
144 }
145 }
146};
147
7c673cae
FG
148/// extent_map: a map of reference counted extents
149struct bluestore_extent_ref_map_t {
150 struct record_t {
151 uint32_t length;
152 uint32_t refs;
153 record_t(uint32_t l=0, uint32_t r=0) : length(l), refs(r) {}
154 DENC(bluestore_extent_ref_map_t::record_t, v, p) {
155 denc_varint_lowz(v.length, p);
156 denc_varint(v.refs, p);
157 }
158 };
159
31f18b77 160 typedef mempool::bluestore_cache_other::map<uint64_t,record_t> map_t;
7c673cae
FG
161 map_t ref_map;
162
163 void _check() const;
164 void _maybe_merge_left(map_t::iterator& p);
165
166 void clear() {
167 ref_map.clear();
168 }
169 bool empty() const {
170 return ref_map.empty();
171 }
172
173 void get(uint64_t offset, uint32_t len);
31f18b77
FG
174 void put(uint64_t offset, uint32_t len, PExtentVector *release,
175 bool *maybe_unshared);
7c673cae
FG
176
177 bool contains(uint64_t offset, uint32_t len) const;
178 bool intersects(uint64_t offset, uint32_t len) const;
179
180 void bound_encode(size_t& p) const {
181 denc_varint((uint32_t)0, p);
182 if (!ref_map.empty()) {
183 size_t elem_size = 0;
184 denc_varint_lowz((uint64_t)0, elem_size);
185 ref_map.begin()->second.bound_encode(elem_size);
186 p += elem_size * ref_map.size();
187 }
188 }
189 void encode(bufferlist::contiguous_appender& p) const {
11fdf7f2 190 const uint32_t n = ref_map.size();
7c673cae
FG
191 denc_varint(n, p);
192 if (n) {
193 auto i = ref_map.begin();
194 denc_varint_lowz(i->first, p);
195 i->second.encode(p);
196 int64_t pos = i->first;
11fdf7f2 197 while (++i != ref_map.end()) {
7c673cae
FG
198 denc_varint_lowz((int64_t)i->first - pos, p);
199 i->second.encode(p);
200 pos = i->first;
201 }
202 }
203 }
11fdf7f2 204 void decode(bufferptr::const_iterator& p) {
7c673cae
FG
205 uint32_t n;
206 denc_varint(n, p);
207 if (n) {
208 int64_t pos;
209 denc_varint_lowz(pos, p);
210 ref_map[pos].decode(p);
211 while (--n) {
212 int64_t delta;
213 denc_varint_lowz(delta, p);
214 pos += delta;
215 ref_map[pos].decode(p);
216 }
217 }
218 }
219
220 void dump(Formatter *f) const;
221 static void generate_test_instances(list<bluestore_extent_ref_map_t*>& o);
222};
223WRITE_CLASS_DENC(bluestore_extent_ref_map_t)
224
225
226ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& rm);
227static inline bool operator==(const bluestore_extent_ref_map_t::record_t& l,
228 const bluestore_extent_ref_map_t::record_t& r) {
229 return l.length == r.length && l.refs == r.refs;
230}
231static inline bool operator==(const bluestore_extent_ref_map_t& l,
232 const bluestore_extent_ref_map_t& r) {
233 return l.ref_map == r.ref_map;
234}
235static inline bool operator!=(const bluestore_extent_ref_map_t& l,
236 const bluestore_extent_ref_map_t& r) {
237 return !(l == r);
238}
239
240/// blob_use_tracker: a set of per-alloc unit ref counters to track blob usage
241struct bluestore_blob_use_tracker_t {
242 // N.B.: There is no need to minimize au_size/num_au
243 // as much as possible (e.g. have just a single byte for au_size) since:
244 // 1) Struct isn't packed hence it's padded. And even if it's packed see 2)
245 // 2) Mem manager has its own granularity, most probably >= 8 bytes
246 //
247 uint32_t au_size; // Allocation (=tracking) unit size,
248 // == 0 if uninitialized
249 uint32_t num_au; // Amount of allocation units tracked
250 // == 0 if single unit or the whole blob is tracked
251
252 union {
253 uint32_t* bytes_per_au;
254 uint32_t total_bytes;
255 };
256
257 bluestore_blob_use_tracker_t()
258 : au_size(0), num_au(0), bytes_per_au(nullptr) {
259 }
260 ~bluestore_blob_use_tracker_t() {
261 clear();
262 }
263
264 void clear() {
265 if (num_au != 0) {
266 delete[] bytes_per_au;
267 }
268 bytes_per_au = 0;
269 au_size = 0;
270 num_au = 0;
271 }
272
273 uint32_t get_referenced_bytes() const {
274 uint32_t total = 0;
275 if (!num_au) {
276 total = total_bytes;
277 } else {
278 for (size_t i = 0; i < num_au; ++i) {
279 total += bytes_per_au[i];
280 }
281 }
282 return total;
283 }
284 bool is_not_empty() const {
285 if (!num_au) {
286 return total_bytes != 0;
287 } else {
288 for (size_t i = 0; i < num_au; ++i) {
289 if (bytes_per_au[i]) {
290 return true;
291 }
292 }
293 }
294 return false;
295 }
296 bool is_empty() const {
297 return !is_not_empty();
298 }
299 void prune_tail(uint32_t new_len) {
300 if (num_au) {
11fdf7f2 301 new_len = round_up_to(new_len, au_size);
7c673cae 302 uint32_t _num_au = new_len / au_size;
11fdf7f2 303 ceph_assert(_num_au <= num_au);
7c673cae
FG
304 if (_num_au) {
305 num_au = _num_au; // bytes_per_au array is left unmodified
306
307 } else {
308 clear();
309 }
310 }
311 }
312 void add_tail(uint32_t new_len, uint32_t _au_size) {
313 auto full_size = au_size * (num_au ? num_au : 1);
11fdf7f2 314 ceph_assert(new_len >= full_size);
7c673cae
FG
315 if (new_len == full_size) {
316 return;
317 }
318 if (!num_au) {
319 uint32_t old_total = total_bytes;
320 total_bytes = 0;
321 init(new_len, _au_size);
11fdf7f2 322 ceph_assert(num_au);
7c673cae
FG
323 bytes_per_au[0] = old_total;
324 } else {
11fdf7f2
TL
325 ceph_assert(_au_size == au_size);
326 new_len = round_up_to(new_len, au_size);
7c673cae 327 uint32_t _num_au = new_len / au_size;
11fdf7f2 328 ceph_assert(_num_au >= num_au);
7c673cae
FG
329 if (_num_au > num_au) {
330 auto old_bytes = bytes_per_au;
331 auto old_num_au = num_au;
332 num_au = _num_au;
333 allocate();
334 for (size_t i = 0; i < old_num_au; i++) {
335 bytes_per_au[i] = old_bytes[i];
336 }
337 for (size_t i = old_num_au; i < num_au; i++) {
338 bytes_per_au[i] = 0;
339 }
340 delete[] old_bytes;
341 }
342 }
343 }
344
345 void init(
346 uint32_t full_length,
347 uint32_t _au_size);
348
349 void get(
350 uint32_t offset,
351 uint32_t len);
352
353 /// put: return true if the blob has no references any more after the call,
354 /// no release_units is filled for the sake of performance.
355 /// return false if there are some references to the blob,
356 /// in this case release_units contains pextents
357 /// (identified by their offsets relative to the blob start)
31f18b77 358 /// that are not used any more and can be safely deallocated.
7c673cae
FG
359 bool put(
360 uint32_t offset,
361 uint32_t len,
362 PExtentVector *release);
363
364 bool can_split() const;
365 bool can_split_at(uint32_t blob_offset) const;
366 void split(
367 uint32_t blob_offset,
368 bluestore_blob_use_tracker_t* r);
369
370 bool equal(
371 const bluestore_blob_use_tracker_t& other) const;
372
373 void bound_encode(size_t& p) const {
374 denc_varint(au_size, p);
375 if (au_size) {
376 denc_varint(num_au, p);
377 if (!num_au) {
378 denc_varint(total_bytes, p);
379 } else {
380 size_t elem_size = 0;
381 denc_varint((uint32_t)0, elem_size);
382 p += elem_size * num_au;
383 }
384 }
385 }
386 void encode(bufferlist::contiguous_appender& p) const {
387 denc_varint(au_size, p);
388 if (au_size) {
389 denc_varint(num_au, p);
390 if (!num_au) {
391 denc_varint(total_bytes, p);
392 } else {
393 size_t elem_size = 0;
394 denc_varint((uint32_t)0, elem_size);
395 for (size_t i = 0; i < num_au; ++i) {
396 denc_varint(bytes_per_au[i], p);
397 }
398 }
399 }
400 }
11fdf7f2 401 void decode(bufferptr::const_iterator& p) {
7c673cae
FG
402 clear();
403 denc_varint(au_size, p);
404 if (au_size) {
405 denc_varint(num_au, p);
406 if (!num_au) {
407 denc_varint(total_bytes, p);
408 } else {
409 allocate();
410 for (size_t i = 0; i < num_au; ++i) {
411 denc_varint(bytes_per_au[i], p);
412 }
413 }
414 }
415 }
416
417 void dump(Formatter *f) const;
418 static void generate_test_instances(list<bluestore_blob_use_tracker_t*>& o);
419private:
420 void allocate();
7c673cae
FG
421};
422WRITE_CLASS_DENC(bluestore_blob_use_tracker_t)
423ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& rm);
424
425/// blob: a piece of data on disk
426struct bluestore_blob_t {
427private:
428 PExtentVector extents; ///< raw data position on device
31f18b77 429 uint32_t logical_length = 0; ///< original length of data stored in the blob
7c673cae
FG
430 uint32_t compressed_length = 0; ///< compressed length if any
431
432public:
433 enum {
31f18b77 434 LEGACY_FLAG_MUTABLE = 1, ///< [legacy] blob can be overwritten or split
7c673cae
FG
435 FLAG_COMPRESSED = 2, ///< blob is compressed
436 FLAG_CSUM = 4, ///< blob has checksums
437 FLAG_HAS_UNUSED = 8, ///< blob has unused map
438 FLAG_SHARED = 16, ///< blob is shared; see external SharedBlob
439 };
440 static string get_flags_string(unsigned flags);
441
442 uint32_t flags = 0; ///< FLAG_*
443
444 typedef uint16_t unused_t;
445 unused_t unused = 0; ///< portion that has never been written to (bitmap)
446
447 uint8_t csum_type = Checksummer::CSUM_NONE; ///< CSUM_*
448 uint8_t csum_chunk_order = 0; ///< csum block size is 1<<block_order bytes
449
450 bufferptr csum_data; ///< opaque vector of csum data
451
452 bluestore_blob_t(uint32_t f = 0) : flags(f) {}
453
454 const PExtentVector& get_extents() const {
455 return extents;
456 }
11fdf7f2
TL
457 PExtentVector& dirty_extents() {
458 return extents;
459 }
7c673cae
FG
460
461 DENC_HELPERS;
462 void bound_encode(size_t& p, uint64_t struct_v) const {
11fdf7f2 463 ceph_assert(struct_v == 1 || struct_v == 2);
7c673cae
FG
464 denc(extents, p);
465 denc_varint(flags, p);
466 denc_varint_lowz(logical_length, p);
467 denc_varint_lowz(compressed_length, p);
468 denc(csum_type, p);
469 denc(csum_chunk_order, p);
470 denc_varint(csum_data.length(), p);
471 p += csum_data.length();
472 p += sizeof(unused_t);
473 }
474
475 void encode(bufferlist::contiguous_appender& p, uint64_t struct_v) const {
11fdf7f2 476 ceph_assert(struct_v == 1 || struct_v == 2);
7c673cae
FG
477 denc(extents, p);
478 denc_varint(flags, p);
479 if (is_compressed()) {
480 denc_varint_lowz(logical_length, p);
481 denc_varint_lowz(compressed_length, p);
482 }
483 if (has_csum()) {
484 denc(csum_type, p);
485 denc(csum_chunk_order, p);
486 denc_varint(csum_data.length(), p);
487 memcpy(p.get_pos_add(csum_data.length()), csum_data.c_str(),
488 csum_data.length());
489 }
490 if (has_unused()) {
491 denc(unused, p);
492 }
493 }
494
11fdf7f2
TL
495 void decode(bufferptr::const_iterator& p, uint64_t struct_v) {
496 ceph_assert(struct_v == 1 || struct_v == 2);
7c673cae
FG
497 denc(extents, p);
498 denc_varint(flags, p);
499 if (is_compressed()) {
500 denc_varint_lowz(logical_length, p);
501 denc_varint_lowz(compressed_length, p);
502 } else {
503 logical_length = get_ondisk_length();
504 }
505 if (has_csum()) {
506 denc(csum_type, p);
507 denc(csum_chunk_order, p);
508 int len;
509 denc_varint(len, p);
510 csum_data = p.get_ptr(len);
3efd9988 511 csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
7c673cae
FG
512 }
513 if (has_unused()) {
514 denc(unused, p);
515 }
516 }
517
518 bool can_split() const {
519 return
520 !has_flag(FLAG_SHARED) &&
521 !has_flag(FLAG_COMPRESSED) &&
522 !has_flag(FLAG_HAS_UNUSED); // splitting unused set is complex
523 }
524 bool can_split_at(uint32_t blob_offset) const {
525 return !has_csum() || blob_offset % get_csum_chunk_size() == 0;
526 }
527
528 void dump(Formatter *f) const;
529 static void generate_test_instances(list<bluestore_blob_t*>& ls);
530
531 bool has_flag(unsigned f) const {
532 return flags & f;
533 }
534 void set_flag(unsigned f) {
535 flags |= f;
536 }
537 void clear_flag(unsigned f) {
538 flags &= ~f;
539 }
540 string get_flags_string() const {
541 return get_flags_string(flags);
542 }
543
544 void set_compressed(uint64_t clen_orig, uint64_t clen) {
545 set_flag(FLAG_COMPRESSED);
546 logical_length = clen_orig;
547 compressed_length = clen;
548 }
549 bool is_mutable() const {
31f18b77 550 return !is_compressed() && !is_shared();
7c673cae
FG
551 }
552 bool is_compressed() const {
553 return has_flag(FLAG_COMPRESSED);
554 }
555 bool has_csum() const {
556 return has_flag(FLAG_CSUM);
557 }
558 bool has_unused() const {
559 return has_flag(FLAG_HAS_UNUSED);
560 }
561 bool is_shared() const {
562 return has_flag(FLAG_SHARED);
563 }
564
565 /// return chunk (i.e. min readable block) size for the blob
566 uint64_t get_chunk_size(uint64_t dev_block_size) const {
567 return has_csum() ?
11fdf7f2 568 std::max<uint64_t>(dev_block_size, get_csum_chunk_size()) : dev_block_size;
7c673cae
FG
569 }
570 uint32_t get_csum_chunk_size() const {
571 return 1 << csum_chunk_order;
572 }
573 uint32_t get_compressed_payload_length() const {
574 return is_compressed() ? compressed_length : 0;
575 }
576 uint64_t calc_offset(uint64_t x_off, uint64_t *plen) const {
577 auto p = extents.begin();
11fdf7f2 578 ceph_assert(p != extents.end());
7c673cae
FG
579 while (x_off >= p->length) {
580 x_off -= p->length;
581 ++p;
11fdf7f2 582 ceph_assert(p != extents.end());
7c673cae
FG
583 }
584 if (plen)
585 *plen = p->length - x_off;
586 return p->offset + x_off;
587 }
588
31f18b77
FG
589 // validate whether or not the status of pextents within the given range
590 // meets the requirement(allocated or unallocated).
591 bool _validate_range(uint64_t b_off, uint64_t b_len,
592 bool require_allocated) const {
7c673cae 593 auto p = extents.begin();
11fdf7f2 594 ceph_assert(p != extents.end());
7c673cae
FG
595 while (b_off >= p->length) {
596 b_off -= p->length;
597 ++p;
11fdf7f2 598 ceph_assert(p != extents.end());
7c673cae
FG
599 }
600 b_len += b_off;
601 while (b_len) {
11fdf7f2 602 ceph_assert(p != extents.end());
31f18b77
FG
603 if (require_allocated != p->is_valid()) {
604 return false;
7c673cae 605 }
31f18b77 606
7c673cae 607 if (p->length >= b_len) {
31f18b77 608 return true;
7c673cae
FG
609 }
610 b_len -= p->length;
611 ++p;
612 }
11fdf7f2
TL
613 ceph_abort_msg("we should not get here");
614 return false;
7c673cae
FG
615 }
616
31f18b77
FG
617 /// return true if the entire range is allocated
618 /// (mapped to extents on disk)
619 bool is_allocated(uint64_t b_off, uint64_t b_len) const {
620 return _validate_range(b_off, b_len, true);
621 }
622
7c673cae 623 /// return true if the entire range is unallocated
31f18b77 624 /// (not mapped to extents on disk)
7c673cae 625 bool is_unallocated(uint64_t b_off, uint64_t b_len) const {
31f18b77 626 return _validate_range(b_off, b_len, false);
7c673cae
FG
627 }
628
629 /// return true if the logical range has never been used
630 bool is_unused(uint64_t offset, uint64_t length) const {
631 if (!has_unused()) {
632 return false;
633 }
634 uint64_t blob_len = get_logical_length();
11fdf7f2
TL
635 ceph_assert((blob_len % (sizeof(unused)*8)) == 0);
636 ceph_assert(offset + length <= blob_len);
7c673cae
FG
637 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
638 uint64_t start = offset / chunk_size;
11fdf7f2 639 uint64_t end = round_up_to(offset + length, chunk_size) / chunk_size;
7c673cae
FG
640 auto i = start;
641 while (i < end && (unused & (1u << i))) {
642 i++;
643 }
644 return i >= end;
645 }
646
647 /// mark a range that has never been used
648 void add_unused(uint64_t offset, uint64_t length) {
649 uint64_t blob_len = get_logical_length();
11fdf7f2
TL
650 ceph_assert((blob_len % (sizeof(unused)*8)) == 0);
651 ceph_assert(offset + length <= blob_len);
7c673cae 652 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
11fdf7f2 653 uint64_t start = round_up_to(offset, chunk_size) / chunk_size;
7c673cae
FG
654 uint64_t end = (offset + length) / chunk_size;
655 for (auto i = start; i < end; ++i) {
656 unused |= (1u << i);
657 }
658 if (start != end) {
659 set_flag(FLAG_HAS_UNUSED);
660 }
661 }
662
663 /// indicate that a range has (now) been used.
664 void mark_used(uint64_t offset, uint64_t length) {
665 if (has_unused()) {
666 uint64_t blob_len = get_logical_length();
11fdf7f2
TL
667 ceph_assert((blob_len % (sizeof(unused)*8)) == 0);
668 ceph_assert(offset + length <= blob_len);
7c673cae
FG
669 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
670 uint64_t start = offset / chunk_size;
11fdf7f2 671 uint64_t end = round_up_to(offset + length, chunk_size) / chunk_size;
7c673cae
FG
672 for (auto i = start; i < end; ++i) {
673 unused &= ~(1u << i);
674 }
675 if (unused == 0) {
676 clear_flag(FLAG_HAS_UNUSED);
677 }
678 }
679 }
680
94b18763
FG
681 template<class F>
682 int map(uint64_t x_off, uint64_t x_len, F&& f) const {
11fdf7f2
TL
683 static_assert(std::is_invocable_r_v<int, F, uint64_t, uint64_t>);
684
7c673cae 685 auto p = extents.begin();
11fdf7f2 686 ceph_assert(p != extents.end());
7c673cae
FG
687 while (x_off >= p->length) {
688 x_off -= p->length;
689 ++p;
11fdf7f2 690 ceph_assert(p != extents.end());
7c673cae
FG
691 }
692 while (x_len > 0) {
11fdf7f2
TL
693 ceph_assert(p != extents.end());
694 uint64_t l = std::min(p->length - x_off, x_len);
7c673cae
FG
695 int r = f(p->offset + x_off, l);
696 if (r < 0)
697 return r;
698 x_off = 0;
699 x_len -= l;
700 ++p;
701 }
702 return 0;
703 }
94b18763 704 template<class F>
7c673cae
FG
705 void map_bl(uint64_t x_off,
706 bufferlist& bl,
94b18763 707 F&& f) const {
11fdf7f2
TL
708 static_assert(std::is_invocable_v<F, uint64_t, bufferlist&>);
709
7c673cae 710 auto p = extents.begin();
11fdf7f2 711 ceph_assert(p != extents.end());
7c673cae
FG
712 while (x_off >= p->length) {
713 x_off -= p->length;
714 ++p;
11fdf7f2 715 ceph_assert(p != extents.end());
7c673cae
FG
716 }
717 bufferlist::iterator it = bl.begin();
718 uint64_t x_len = bl.length();
719 while (x_len > 0) {
11fdf7f2
TL
720 ceph_assert(p != extents.end());
721 uint64_t l = std::min(p->length - x_off, x_len);
7c673cae
FG
722 bufferlist t;
723 it.copy(l, t);
724 f(p->offset + x_off, t);
725 x_off = 0;
726 x_len -= l;
727 ++p;
728 }
729 }
730
731 uint32_t get_ondisk_length() const {
732 uint32_t len = 0;
733 for (auto &p : extents) {
734 len += p.length;
735 }
736 return len;
737 }
738
739 uint32_t get_logical_length() const {
740 return logical_length;
741 }
742 size_t get_csum_value_size() const;
743
744 size_t get_csum_count() const {
745 size_t vs = get_csum_value_size();
746 if (!vs)
747 return 0;
748 return csum_data.length() / vs;
749 }
750 uint64_t get_csum_item(unsigned i) const {
751 size_t cs = get_csum_value_size();
752 const char *p = csum_data.c_str();
753 switch (cs) {
754 case 0:
11fdf7f2 755 ceph_abort_msg("no csum data, bad index");
7c673cae
FG
756 case 1:
757 return reinterpret_cast<const uint8_t*>(p)[i];
758 case 2:
759 return reinterpret_cast<const __le16*>(p)[i];
760 case 4:
761 return reinterpret_cast<const __le32*>(p)[i];
762 case 8:
763 return reinterpret_cast<const __le64*>(p)[i];
764 default:
11fdf7f2 765 ceph_abort_msg("unrecognized csum word size");
7c673cae
FG
766 }
767 }
768 const char *get_csum_item_ptr(unsigned i) const {
769 size_t cs = get_csum_value_size();
770 return csum_data.c_str() + (cs * i);
771 }
772 char *get_csum_item_ptr(unsigned i) {
773 size_t cs = get_csum_value_size();
774 return csum_data.c_str() + (cs * i);
775 }
776
777 void init_csum(unsigned type, unsigned order, unsigned len) {
778 flags |= FLAG_CSUM;
779 csum_type = type;
780 csum_chunk_order = order;
781 csum_data = buffer::create(get_csum_value_size() * len / get_csum_chunk_size());
782 csum_data.zero();
3efd9988 783 csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
7c673cae
FG
784 }
785
786 /// calculate csum for the buffer at the given b_off
787 void calc_csum(uint64_t b_off, const bufferlist& bl);
788
789 /// verify csum: return -EOPNOTSUPP for unsupported checksum type;
790 /// return -1 and valid(nonnegative) b_bad_off for checksum error;
791 /// return 0 if all is well.
792 int verify_csum(uint64_t b_off, const bufferlist& bl, int* b_bad_off,
793 uint64_t *bad_csum) const;
794
795 bool can_prune_tail() const {
796 return
797 extents.size() > 1 && // if it's all invalid it's not pruning.
798 !extents.back().is_valid() &&
799 !has_unused();
800 }
801 void prune_tail() {
802 const auto &p = extents.back();
803 logical_length -= p.length;
804 extents.pop_back();
805 if (has_csum()) {
806 bufferptr t;
807 t.swap(csum_data);
808 csum_data = bufferptr(t.c_str(),
809 get_logical_length() / get_csum_chunk_size() *
810 get_csum_value_size());
811 }
812 }
813 void add_tail(uint32_t new_len) {
11fdf7f2
TL
814 ceph_assert(is_mutable());
815 ceph_assert(!has_unused());
816 ceph_assert(new_len > logical_length);
7c673cae
FG
817 extents.emplace_back(
818 bluestore_pextent_t(
819 bluestore_pextent_t::INVALID_OFFSET,
820 new_len - logical_length));
821 logical_length = new_len;
822 if (has_csum()) {
823 bufferptr t;
824 t.swap(csum_data);
825 csum_data = buffer::create(
826 get_csum_value_size() * logical_length / get_csum_chunk_size());
827 csum_data.copy_in(0, t.length(), t.c_str());
828 csum_data.zero(t.length(), csum_data.length() - t.length());
829 }
830 }
831 uint32_t get_release_size(uint32_t min_alloc_size) const {
832 if (is_compressed()) {
833 return get_logical_length();
834 }
835 uint32_t res = get_csum_chunk_size();
836 if (!has_csum() || res < min_alloc_size) {
837 res = min_alloc_size;
838 }
839 return res;
840 }
841
842 void split(uint32_t blob_offset, bluestore_blob_t& rb);
a8e16298 843 void allocated(uint32_t b_off, uint32_t length, const PExtentVector& allocs);
7c673cae
FG
844 void allocated_test(const bluestore_pextent_t& alloc); // intended for UT only
845
846 /// updates blob's pextents container and return unused pextents eligible
847 /// for release.
848 /// all - indicates that the whole blob to be released.
849 /// logical - specifies set of logical extents within blob's
850 /// to be released
851 /// Returns true if blob has no more valid pextents
852 bool release_extents(
853 bool all,
854 const PExtentVector& logical,
855 PExtentVector* r);
856};
857WRITE_CLASS_DENC_FEATURED(bluestore_blob_t)
858
859ostream& operator<<(ostream& out, const bluestore_blob_t& o);
860
861
862/// shared blob state
863struct bluestore_shared_blob_t {
864 uint64_t sbid; ///> shared blob id
865 bluestore_extent_ref_map_t ref_map; ///< shared blob extents
866
867 bluestore_shared_blob_t(uint64_t _sbid) : sbid(_sbid) {}
11fdf7f2
TL
868 bluestore_shared_blob_t(uint64_t _sbid,
869 bluestore_extent_ref_map_t&& _ref_map )
870 : sbid(_sbid), ref_map(std::move(_ref_map)) {}
7c673cae
FG
871
872 DENC(bluestore_shared_blob_t, v, p) {
873 DENC_START(1, 1, p);
874 denc(v.ref_map, p);
875 DENC_FINISH(p);
876 }
877
878
879 void dump(Formatter *f) const;
880 static void generate_test_instances(list<bluestore_shared_blob_t*>& ls);
881
882 bool empty() const {
883 return ref_map.empty();
884 }
885};
886WRITE_CLASS_DENC(bluestore_shared_blob_t)
887
888ostream& operator<<(ostream& out, const bluestore_shared_blob_t& o);
889
890/// onode: per-object metadata
891struct bluestore_onode_t {
892 uint64_t nid = 0; ///< numeric id (locally unique)
893 uint64_t size = 0; ///< object size
31f18b77 894 map<mempool::bluestore_cache_other::string, bufferptr> attrs; ///< attrs
7c673cae
FG
895
896 struct shard_info {
897 uint32_t offset = 0; ///< logical offset for start of shard
898 uint32_t bytes = 0; ///< encoded bytes
899 DENC(shard_info, v, p) {
900 denc_varint(v.offset, p);
901 denc_varint(v.bytes, p);
902 }
903 void dump(Formatter *f) const;
904 };
905 vector<shard_info> extent_map_shards; ///< extent map shards (if any)
906
907 uint32_t expected_object_size = 0;
908 uint32_t expected_write_size = 0;
909 uint32_t alloc_hint_flags = 0;
910
911 uint8_t flags = 0;
912
913 enum {
11fdf7f2
TL
914 FLAG_OMAP = 1, ///< object may have omap data
915 FLAG_PGMETA_OMAP = 2, ///< omap data is in meta omap prefix
7c673cae
FG
916 };
917
918 string get_flags_string() const {
919 string s;
920 if (flags & FLAG_OMAP) {
921 s = "omap";
922 }
923 return s;
924 }
925
926 bool has_flag(unsigned f) const {
927 return flags & f;
928 }
929
930 void set_flag(unsigned f) {
931 flags |= f;
932 }
933
934 void clear_flag(unsigned f) {
935 flags &= ~f;
936 }
937
938 bool has_omap() const {
939 return has_flag(FLAG_OMAP);
940 }
11fdf7f2
TL
941 bool is_pgmeta_omap() const {
942 return has_flag(FLAG_PGMETA_OMAP);
943 }
7c673cae
FG
944
945 void set_omap_flag() {
946 set_flag(FLAG_OMAP);
947 }
948
949 void clear_omap_flag() {
950 clear_flag(FLAG_OMAP);
951 }
952
953 DENC(bluestore_onode_t, v, p) {
954 DENC_START(1, 1, p);
955 denc_varint(v.nid, p);
956 denc_varint(v.size, p);
957 denc(v.attrs, p);
958 denc(v.flags, p);
959 denc(v.extent_map_shards, p);
960 denc_varint(v.expected_object_size, p);
961 denc_varint(v.expected_write_size, p);
962 denc_varint(v.alloc_hint_flags, p);
963 DENC_FINISH(p);
964 }
965 void dump(Formatter *f) const;
966 static void generate_test_instances(list<bluestore_onode_t*>& o);
967};
968WRITE_CLASS_DENC(bluestore_onode_t::shard_info)
969WRITE_CLASS_DENC(bluestore_onode_t)
970
971ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si);
972
973/// writeahead-logged op
974struct bluestore_deferred_op_t {
975 typedef enum {
976 OP_WRITE = 1,
977 } type_t;
978 __u8 op = 0;
979
980 PExtentVector extents;
981 bufferlist data;
982
983 DENC(bluestore_deferred_op_t, v, p) {
984 DENC_START(1, 1, p);
985 denc(v.op, p);
986 denc(v.extents, p);
987 denc(v.data, p);
988 DENC_FINISH(p);
989 }
990 void dump(Formatter *f) const;
991 static void generate_test_instances(list<bluestore_deferred_op_t*>& o);
992};
993WRITE_CLASS_DENC(bluestore_deferred_op_t)
994
995
996/// writeahead-logged transaction
997struct bluestore_deferred_transaction_t {
998 uint64_t seq = 0;
999 list<bluestore_deferred_op_t> ops;
1000 interval_set<uint64_t> released; ///< allocations to release after tx
1001
1002 bluestore_deferred_transaction_t() : seq(0) {}
1003
1004 DENC(bluestore_deferred_transaction_t, v, p) {
1005 DENC_START(1, 1, p);
1006 denc(v.seq, p);
1007 denc(v.ops, p);
1008 denc(v.released, p);
1009 DENC_FINISH(p);
1010 }
1011 void dump(Formatter *f) const;
1012 static void generate_test_instances(list<bluestore_deferred_transaction_t*>& o);
1013};
1014WRITE_CLASS_DENC(bluestore_deferred_transaction_t)
1015
1016struct bluestore_compression_header_t {
1017 uint8_t type = Compressor::COMP_ALG_NONE;
1018 uint32_t length = 0;
1019
1020 bluestore_compression_header_t() {}
1021 bluestore_compression_header_t(uint8_t _type)
1022 : type(_type) {}
1023
1024 DENC(bluestore_compression_header_t, v, p) {
1025 DENC_START(1, 1, p);
1026 denc(v.type, p);
1027 denc(v.length, p);
1028 DENC_FINISH(p);
1029 }
1030 void dump(Formatter *f) const;
1031 static void generate_test_instances(list<bluestore_compression_header_t*>& o);
1032};
1033WRITE_CLASS_DENC(bluestore_compression_header_t)
1034
1035
1036#endif