]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/bluestore_types.h
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / os / bluestore / bluestore_types.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
16#define CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
17
1e59de90 18#include <bit>
7c673cae 19#include <ostream>
94b18763 20#include <type_traits>
20effc67
TL
21#include <vector>
22#include <array>
f67539c2 23#include "include/mempool.h"
7c673cae
FG
24#include "include/types.h"
25#include "include/interval_set.h"
26#include "include/utime.h"
27#include "common/hobject.h"
28#include "compressor/Compressor.h"
29#include "common/Checksummer.h"
20effc67 30#include "include/ceph_hash.h"
7c673cae
FG
31
32namespace ceph {
33 class Formatter;
34}
35
36/// label for block device
37struct bluestore_bdev_label_t {
38 uuid_d osd_uuid; ///< osd uuid
9f95a23c 39 uint64_t size = 0; ///< device size
7c673cae 40 utime_t btime; ///< birth time
f67539c2 41 std::string description; ///< device description
7c673cae 42
f67539c2 43 std::map<std::string,std::string> meta; ///< {read,write}_meta() content from ObjectStore
3efd9988 44
f67539c2
TL
45 void encode(ceph::buffer::list& bl) const;
46 void decode(ceph::buffer::list::const_iterator& p);
47 void dump(ceph::Formatter *f) const;
48 static void generate_test_instances(std::list<bluestore_bdev_label_t*>& o);
7c673cae
FG
49};
50WRITE_CLASS_ENCODER(bluestore_bdev_label_t)
51
f67539c2 52std::ostream& operator<<(std::ostream& out, const bluestore_bdev_label_t& l);
7c673cae
FG
53
54/// collection metadata
55struct bluestore_cnode_t {
56 uint32_t bits; ///< how many bits of coll pgid are significant
57
58 explicit bluestore_cnode_t(int b=0) : bits(b) {}
59
60 DENC(bluestore_cnode_t, v, p) {
61 DENC_START(1, 1, p);
62 denc(v.bits, p);
63 DENC_FINISH(p);
64 }
f67539c2
TL
65 void dump(ceph::Formatter *f) const;
66 static void generate_test_instances(std::list<bluestore_cnode_t*>& o);
7c673cae
FG
67};
68WRITE_CLASS_DENC(bluestore_cnode_t)
69
f67539c2 70std::ostream& operator<<(std::ostream& out, const bluestore_cnode_t& l);
28e407b8 71
a8e16298
TL
72template <typename OFFS_TYPE, typename LEN_TYPE>
73struct bluestore_interval_t
74{
75 static const uint64_t INVALID_OFFSET = ~0ull;
7c673cae 76
a8e16298
TL
77 OFFS_TYPE offset = 0;
78 LEN_TYPE length = 0;
7c673cae 79
a8e16298
TL
80 bluestore_interval_t(){}
81 bluestore_interval_t(uint64_t o, uint64_t l) : offset(o), length(l) {}
7c673cae 82
a8e16298
TL
83 bool is_valid() const {
84 return offset != INVALID_OFFSET;
7c673cae 85 }
a8e16298
TL
86 uint64_t end() const {
87 return offset != INVALID_OFFSET ? offset + length : INVALID_OFFSET;
7c673cae
FG
88 }
89
a8e16298
TL
90 bool operator==(const bluestore_interval_t& other) const {
91 return offset == other.offset && length == other.length;
7c673cae
FG
92 }
93
7c673cae
FG
94};
95
7c673cae 96/// pextent: physical extent
a8e16298
TL
97struct bluestore_pextent_t : public bluestore_interval_t<uint64_t, uint32_t>
98{
99 bluestore_pextent_t() {}
100 bluestore_pextent_t(uint64_t o, uint64_t l) : bluestore_interval_t(o, l) {}
101 bluestore_pextent_t(const bluestore_interval_t &ext) :
102 bluestore_interval_t(ext.offset, ext.length) {}
7c673cae
FG
103
104 DENC(bluestore_pextent_t, v, p) {
105 denc_lba(v.offset, p);
106 denc_varint_lowz(v.length, p);
107 }
108
f67539c2
TL
109 void dump(ceph::Formatter *f) const;
110 static void generate_test_instances(std::list<bluestore_pextent_t*>& ls);
7c673cae
FG
111};
112WRITE_CLASS_DENC(bluestore_pextent_t)
113
f67539c2 114std::ostream& operator<<(std::ostream& out, const bluestore_pextent_t& o);
7c673cae 115
31f18b77 116typedef mempool::bluestore_cache_other::vector<bluestore_pextent_t> PExtentVector;
7c673cae
FG
117
118template<>
119struct denc_traits<PExtentVector> {
120 static constexpr bool supported = true;
121 static constexpr bool bounded = false;
122 static constexpr bool featured = false;
31f18b77 123 static constexpr bool need_contiguous = true;
7c673cae
FG
124 static void bound_encode(const PExtentVector& v, size_t& p) {
125 p += sizeof(uint32_t);
126 const auto size = v.size();
127 if (size) {
128 size_t per = 0;
129 denc(v.front(), per);
130 p += per * size;
131 }
132 }
133 static void encode(const PExtentVector& v,
f67539c2 134 ceph::buffer::list::contiguous_appender& p) {
7c673cae
FG
135 denc_varint(v.size(), p);
136 for (auto& i : v) {
137 denc(i, p);
138 }
139 }
f67539c2 140 static void decode(PExtentVector& v, ceph::buffer::ptr::const_iterator& p) {
7c673cae
FG
141 unsigned num;
142 denc_varint(num, p);
143 v.clear();
144 v.resize(num);
145 for (unsigned i=0; i<num; ++i) {
146 denc(v[i], p);
147 }
148 }
149};
150
f67539c2 151/// extent_map: a std::map of reference counted extents
7c673cae
FG
152struct bluestore_extent_ref_map_t {
153 struct record_t {
154 uint32_t length;
155 uint32_t refs;
156 record_t(uint32_t l=0, uint32_t r=0) : length(l), refs(r) {}
157 DENC(bluestore_extent_ref_map_t::record_t, v, p) {
158 denc_varint_lowz(v.length, p);
159 denc_varint(v.refs, p);
160 }
161 };
162
31f18b77 163 typedef mempool::bluestore_cache_other::map<uint64_t,record_t> map_t;
7c673cae
FG
164 map_t ref_map;
165
166 void _check() const;
167 void _maybe_merge_left(map_t::iterator& p);
168
169 void clear() {
170 ref_map.clear();
171 }
172 bool empty() const {
173 return ref_map.empty();
174 }
175
176 void get(uint64_t offset, uint32_t len);
31f18b77
FG
177 void put(uint64_t offset, uint32_t len, PExtentVector *release,
178 bool *maybe_unshared);
7c673cae
FG
179
180 bool contains(uint64_t offset, uint32_t len) const;
181 bool intersects(uint64_t offset, uint32_t len) const;
182
183 void bound_encode(size_t& p) const {
184 denc_varint((uint32_t)0, p);
185 if (!ref_map.empty()) {
186 size_t elem_size = 0;
187 denc_varint_lowz((uint64_t)0, elem_size);
188 ref_map.begin()->second.bound_encode(elem_size);
189 p += elem_size * ref_map.size();
190 }
191 }
f67539c2 192 void encode(ceph::buffer::list::contiguous_appender& p) const {
11fdf7f2 193 const uint32_t n = ref_map.size();
7c673cae
FG
194 denc_varint(n, p);
195 if (n) {
196 auto i = ref_map.begin();
197 denc_varint_lowz(i->first, p);
198 i->second.encode(p);
199 int64_t pos = i->first;
11fdf7f2 200 while (++i != ref_map.end()) {
7c673cae
FG
201 denc_varint_lowz((int64_t)i->first - pos, p);
202 i->second.encode(p);
203 pos = i->first;
204 }
205 }
206 }
f67539c2 207 void decode(ceph::buffer::ptr::const_iterator& p) {
7c673cae
FG
208 uint32_t n;
209 denc_varint(n, p);
210 if (n) {
211 int64_t pos;
212 denc_varint_lowz(pos, p);
213 ref_map[pos].decode(p);
214 while (--n) {
215 int64_t delta;
216 denc_varint_lowz(delta, p);
217 pos += delta;
218 ref_map[pos].decode(p);
219 }
220 }
221 }
222
f67539c2
TL
223 void dump(ceph::Formatter *f) const;
224 static void generate_test_instances(std::list<bluestore_extent_ref_map_t*>& o);
7c673cae
FG
225};
226WRITE_CLASS_DENC(bluestore_extent_ref_map_t)
227
228
f67539c2 229std::ostream& operator<<(std::ostream& out, const bluestore_extent_ref_map_t& rm);
7c673cae
FG
230static inline bool operator==(const bluestore_extent_ref_map_t::record_t& l,
231 const bluestore_extent_ref_map_t::record_t& r) {
232 return l.length == r.length && l.refs == r.refs;
233}
234static inline bool operator==(const bluestore_extent_ref_map_t& l,
235 const bluestore_extent_ref_map_t& r) {
236 return l.ref_map == r.ref_map;
237}
238static inline bool operator!=(const bluestore_extent_ref_map_t& l,
239 const bluestore_extent_ref_map_t& r) {
240 return !(l == r);
241}
242
20effc67 243/// blob_use_tracker: a set of per-alloc unit ref buckets to track blob usage
7c673cae
FG
244struct bluestore_blob_use_tracker_t {
245 // N.B.: There is no need to minimize au_size/num_au
246 // as much as possible (e.g. have just a single byte for au_size) since:
247 // 1) Struct isn't packed hence it's padded. And even if it's packed see 2)
248 // 2) Mem manager has its own granularity, most probably >= 8 bytes
249 //
2a845540
TL
250 uint32_t au_size; // Allocation (=tracking) unit size,
251 // == 0 if uninitialized
252 uint32_t num_au; // Amount of allocation units tracked
253 // == 0 if single unit or the whole blob is tracked
254 uint32_t alloc_au; // Amount of allocation units allocated
7c673cae
FG
255
256 union {
257 uint32_t* bytes_per_au;
258 uint32_t total_bytes;
259 };
260
261 bluestore_blob_use_tracker_t()
2a845540 262 : au_size(0), num_au(0), alloc_au(0), bytes_per_au(nullptr) {
7c673cae 263 }
9f95a23c
TL
264 bluestore_blob_use_tracker_t(const bluestore_blob_use_tracker_t& tracker);
265 bluestore_blob_use_tracker_t& operator=(const bluestore_blob_use_tracker_t& rhs);
7c673cae
FG
266 ~bluestore_blob_use_tracker_t() {
267 clear();
268 }
269
270 void clear() {
2a845540
TL
271 release(alloc_au, bytes_per_au);
272 num_au = 0;
273 alloc_au = 0;
7c673cae
FG
274 bytes_per_au = 0;
275 au_size = 0;
7c673cae
FG
276 }
277
278 uint32_t get_referenced_bytes() const {
279 uint32_t total = 0;
280 if (!num_au) {
281 total = total_bytes;
282 } else {
283 for (size_t i = 0; i < num_au; ++i) {
284 total += bytes_per_au[i];
285 }
286 }
287 return total;
288 }
289 bool is_not_empty() const {
290 if (!num_au) {
291 return total_bytes != 0;
292 } else {
293 for (size_t i = 0; i < num_au; ++i) {
294 if (bytes_per_au[i]) {
295 return true;
296 }
297 }
298 }
299 return false;
300 }
301 bool is_empty() const {
302 return !is_not_empty();
303 }
304 void prune_tail(uint32_t new_len) {
305 if (num_au) {
11fdf7f2 306 new_len = round_up_to(new_len, au_size);
7c673cae 307 uint32_t _num_au = new_len / au_size;
11fdf7f2 308 ceph_assert(_num_au <= num_au);
7c673cae
FG
309 if (_num_au) {
310 num_au = _num_au; // bytes_per_au array is left unmodified
7c673cae
FG
311 } else {
312 clear();
313 }
314 }
315 }
316 void add_tail(uint32_t new_len, uint32_t _au_size) {
317 auto full_size = au_size * (num_au ? num_au : 1);
11fdf7f2 318 ceph_assert(new_len >= full_size);
7c673cae
FG
319 if (new_len == full_size) {
320 return;
321 }
322 if (!num_au) {
323 uint32_t old_total = total_bytes;
324 total_bytes = 0;
325 init(new_len, _au_size);
11fdf7f2 326 ceph_assert(num_au);
7c673cae
FG
327 bytes_per_au[0] = old_total;
328 } else {
11fdf7f2
TL
329 ceph_assert(_au_size == au_size);
330 new_len = round_up_to(new_len, au_size);
7c673cae 331 uint32_t _num_au = new_len / au_size;
11fdf7f2 332 ceph_assert(_num_au >= num_au);
7c673cae
FG
333 if (_num_au > num_au) {
334 auto old_bytes = bytes_per_au;
335 auto old_num_au = num_au;
2a845540
TL
336 auto old_alloc_au = alloc_au;
337 alloc_au = num_au = 0; // to bypass an assertion in allocate()
338 bytes_per_au = nullptr;
339 allocate(_num_au);
7c673cae
FG
340 for (size_t i = 0; i < old_num_au; i++) {
341 bytes_per_au[i] = old_bytes[i];
342 }
343 for (size_t i = old_num_au; i < num_au; i++) {
344 bytes_per_au[i] = 0;
345 }
2a845540 346 release(old_alloc_au, old_bytes);
7c673cae
FG
347 }
348 }
349 }
350
351 void init(
352 uint32_t full_length,
353 uint32_t _au_size);
354
355 void get(
356 uint32_t offset,
357 uint32_t len);
358
359 /// put: return true if the blob has no references any more after the call,
360 /// no release_units is filled for the sake of performance.
361 /// return false if there are some references to the blob,
362 /// in this case release_units contains pextents
363 /// (identified by their offsets relative to the blob start)
31f18b77 364 /// that are not used any more and can be safely deallocated.
7c673cae
FG
365 bool put(
366 uint32_t offset,
367 uint32_t len,
368 PExtentVector *release);
369
370 bool can_split() const;
371 bool can_split_at(uint32_t blob_offset) const;
372 void split(
373 uint32_t blob_offset,
374 bluestore_blob_use_tracker_t* r);
375
376 bool equal(
377 const bluestore_blob_use_tracker_t& other) const;
378
379 void bound_encode(size_t& p) const {
380 denc_varint(au_size, p);
381 if (au_size) {
382 denc_varint(num_au, p);
383 if (!num_au) {
384 denc_varint(total_bytes, p);
385 } else {
386 size_t elem_size = 0;
387 denc_varint((uint32_t)0, elem_size);
388 p += elem_size * num_au;
389 }
390 }
391 }
f67539c2 392 void encode(ceph::buffer::list::contiguous_appender& p) const {
7c673cae
FG
393 denc_varint(au_size, p);
394 if (au_size) {
395 denc_varint(num_au, p);
396 if (!num_au) {
397 denc_varint(total_bytes, p);
398 } else {
399 size_t elem_size = 0;
400 denc_varint((uint32_t)0, elem_size);
401 for (size_t i = 0; i < num_au; ++i) {
402 denc_varint(bytes_per_au[i], p);
403 }
404 }
405 }
406 }
f67539c2 407 void decode(ceph::buffer::ptr::const_iterator& p) {
7c673cae
FG
408 clear();
409 denc_varint(au_size, p);
410 if (au_size) {
2a845540
TL
411 uint32_t _num_au;
412 denc_varint(_num_au, p);
413 if (!_num_au) {
414 num_au = 0;
7c673cae
FG
415 denc_varint(total_bytes, p);
416 } else {
2a845540
TL
417 allocate(_num_au);
418 for (size_t i = 0; i < _num_au; ++i) {
7c673cae
FG
419 denc_varint(bytes_per_au[i], p);
420 }
421 }
422 }
423 }
424
f67539c2
TL
425 void dump(ceph::Formatter *f) const;
426 static void generate_test_instances(std::list<bluestore_blob_use_tracker_t*>& o);
7c673cae 427private:
2a845540
TL
428 void allocate(uint32_t _num_au);
429 void release(uint32_t _num_au, uint32_t* ptr);
7c673cae
FG
430};
431WRITE_CLASS_DENC(bluestore_blob_use_tracker_t)
f67539c2 432std::ostream& operator<<(std::ostream& out, const bluestore_blob_use_tracker_t& rm);
7c673cae
FG
433
434/// blob: a piece of data on disk
435struct bluestore_blob_t {
436private:
437 PExtentVector extents; ///< raw data position on device
31f18b77 438 uint32_t logical_length = 0; ///< original length of data stored in the blob
7c673cae
FG
439 uint32_t compressed_length = 0; ///< compressed length if any
440
441public:
442 enum {
31f18b77 443 LEGACY_FLAG_MUTABLE = 1, ///< [legacy] blob can be overwritten or split
7c673cae
FG
444 FLAG_COMPRESSED = 2, ///< blob is compressed
445 FLAG_CSUM = 4, ///< blob has checksums
f67539c2 446 FLAG_HAS_UNUSED = 8, ///< blob has unused std::map
7c673cae
FG
447 FLAG_SHARED = 16, ///< blob is shared; see external SharedBlob
448 };
f67539c2 449 static std::string get_flags_string(unsigned flags);
7c673cae
FG
450
451 uint32_t flags = 0; ///< FLAG_*
452
453 typedef uint16_t unused_t;
454 unused_t unused = 0; ///< portion that has never been written to (bitmap)
455
456 uint8_t csum_type = Checksummer::CSUM_NONE; ///< CSUM_*
457 uint8_t csum_chunk_order = 0; ///< csum block size is 1<<block_order bytes
458
f67539c2 459 ceph::buffer::ptr csum_data; ///< opaque std::vector of csum data
7c673cae
FG
460
461 bluestore_blob_t(uint32_t f = 0) : flags(f) {}
462
463 const PExtentVector& get_extents() const {
464 return extents;
465 }
11fdf7f2
TL
466 PExtentVector& dirty_extents() {
467 return extents;
468 }
7c673cae
FG
469
470 DENC_HELPERS;
471 void bound_encode(size_t& p, uint64_t struct_v) const {
11fdf7f2 472 ceph_assert(struct_v == 1 || struct_v == 2);
7c673cae
FG
473 denc(extents, p);
474 denc_varint(flags, p);
475 denc_varint_lowz(logical_length, p);
476 denc_varint_lowz(compressed_length, p);
477 denc(csum_type, p);
478 denc(csum_chunk_order, p);
479 denc_varint(csum_data.length(), p);
480 p += csum_data.length();
481 p += sizeof(unused_t);
482 }
483
f67539c2 484 void encode(ceph::buffer::list::contiguous_appender& p, uint64_t struct_v) const {
11fdf7f2 485 ceph_assert(struct_v == 1 || struct_v == 2);
7c673cae
FG
486 denc(extents, p);
487 denc_varint(flags, p);
488 if (is_compressed()) {
489 denc_varint_lowz(logical_length, p);
490 denc_varint_lowz(compressed_length, p);
491 }
492 if (has_csum()) {
493 denc(csum_type, p);
494 denc(csum_chunk_order, p);
495 denc_varint(csum_data.length(), p);
496 memcpy(p.get_pos_add(csum_data.length()), csum_data.c_str(),
497 csum_data.length());
498 }
499 if (has_unused()) {
500 denc(unused, p);
501 }
502 }
503
f67539c2 504 void decode(ceph::buffer::ptr::const_iterator& p, uint64_t struct_v) {
11fdf7f2 505 ceph_assert(struct_v == 1 || struct_v == 2);
7c673cae
FG
506 denc(extents, p);
507 denc_varint(flags, p);
508 if (is_compressed()) {
509 denc_varint_lowz(logical_length, p);
510 denc_varint_lowz(compressed_length, p);
511 } else {
512 logical_length = get_ondisk_length();
513 }
514 if (has_csum()) {
515 denc(csum_type, p);
516 denc(csum_chunk_order, p);
517 int len;
518 denc_varint(len, p);
519 csum_data = p.get_ptr(len);
3efd9988 520 csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
7c673cae
FG
521 }
522 if (has_unused()) {
523 denc(unused, p);
524 }
525 }
526
527 bool can_split() const {
528 return
529 !has_flag(FLAG_SHARED) &&
530 !has_flag(FLAG_COMPRESSED) &&
531 !has_flag(FLAG_HAS_UNUSED); // splitting unused set is complex
532 }
533 bool can_split_at(uint32_t blob_offset) const {
534 return !has_csum() || blob_offset % get_csum_chunk_size() == 0;
535 }
536
f67539c2
TL
537 void dump(ceph::Formatter *f) const;
538 static void generate_test_instances(std::list<bluestore_blob_t*>& ls);
7c673cae
FG
539
540 bool has_flag(unsigned f) const {
541 return flags & f;
542 }
543 void set_flag(unsigned f) {
544 flags |= f;
545 }
546 void clear_flag(unsigned f) {
547 flags &= ~f;
548 }
f67539c2 549 std::string get_flags_string() const {
7c673cae
FG
550 return get_flags_string(flags);
551 }
552
553 void set_compressed(uint64_t clen_orig, uint64_t clen) {
554 set_flag(FLAG_COMPRESSED);
555 logical_length = clen_orig;
556 compressed_length = clen;
557 }
558 bool is_mutable() const {
31f18b77 559 return !is_compressed() && !is_shared();
7c673cae
FG
560 }
561 bool is_compressed() const {
562 return has_flag(FLAG_COMPRESSED);
563 }
564 bool has_csum() const {
565 return has_flag(FLAG_CSUM);
566 }
567 bool has_unused() const {
568 return has_flag(FLAG_HAS_UNUSED);
569 }
570 bool is_shared() const {
571 return has_flag(FLAG_SHARED);
572 }
573
574 /// return chunk (i.e. min readable block) size for the blob
575 uint64_t get_chunk_size(uint64_t dev_block_size) const {
576 return has_csum() ?
11fdf7f2 577 std::max<uint64_t>(dev_block_size, get_csum_chunk_size()) : dev_block_size;
7c673cae
FG
578 }
579 uint32_t get_csum_chunk_size() const {
580 return 1 << csum_chunk_order;
581 }
582 uint32_t get_compressed_payload_length() const {
583 return is_compressed() ? compressed_length : 0;
584 }
585 uint64_t calc_offset(uint64_t x_off, uint64_t *plen) const {
586 auto p = extents.begin();
11fdf7f2 587 ceph_assert(p != extents.end());
7c673cae
FG
588 while (x_off >= p->length) {
589 x_off -= p->length;
590 ++p;
11fdf7f2 591 ceph_assert(p != extents.end());
7c673cae
FG
592 }
593 if (plen)
594 *plen = p->length - x_off;
595 return p->offset + x_off;
596 }
597
31f18b77
FG
598 // validate whether or not the status of pextents within the given range
599 // meets the requirement(allocated or unallocated).
600 bool _validate_range(uint64_t b_off, uint64_t b_len,
601 bool require_allocated) const {
7c673cae 602 auto p = extents.begin();
11fdf7f2 603 ceph_assert(p != extents.end());
7c673cae
FG
604 while (b_off >= p->length) {
605 b_off -= p->length;
f67539c2
TL
606 if (++p == extents.end())
607 return false;
7c673cae
FG
608 }
609 b_len += b_off;
610 while (b_len) {
31f18b77
FG
611 if (require_allocated != p->is_valid()) {
612 return false;
7c673cae
FG
613 }
614 if (p->length >= b_len) {
31f18b77 615 return true;
7c673cae
FG
616 }
617 b_len -= p->length;
f67539c2
TL
618 if (++p == extents.end())
619 return false;
7c673cae 620 }
11fdf7f2
TL
621 ceph_abort_msg("we should not get here");
622 return false;
7c673cae
FG
623 }
624
31f18b77
FG
625 /// return true if the entire range is allocated
626 /// (mapped to extents on disk)
627 bool is_allocated(uint64_t b_off, uint64_t b_len) const {
628 return _validate_range(b_off, b_len, true);
629 }
630
7c673cae 631 /// return true if the entire range is unallocated
31f18b77 632 /// (not mapped to extents on disk)
7c673cae 633 bool is_unallocated(uint64_t b_off, uint64_t b_len) const {
31f18b77 634 return _validate_range(b_off, b_len, false);
7c673cae
FG
635 }
636
637 /// return true if the logical range has never been used
638 bool is_unused(uint64_t offset, uint64_t length) const {
639 if (!has_unused()) {
640 return false;
641 }
1911f103 642 ceph_assert(!is_compressed());
7c673cae 643 uint64_t blob_len = get_logical_length();
11fdf7f2
TL
644 ceph_assert((blob_len % (sizeof(unused)*8)) == 0);
645 ceph_assert(offset + length <= blob_len);
7c673cae
FG
646 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
647 uint64_t start = offset / chunk_size;
11fdf7f2 648 uint64_t end = round_up_to(offset + length, chunk_size) / chunk_size;
7c673cae
FG
649 auto i = start;
650 while (i < end && (unused & (1u << i))) {
651 i++;
652 }
653 return i >= end;
654 }
655
656 /// mark a range that has never been used
657 void add_unused(uint64_t offset, uint64_t length) {
1911f103 658 ceph_assert(!is_compressed());
7c673cae 659 uint64_t blob_len = get_logical_length();
11fdf7f2
TL
660 ceph_assert((blob_len % (sizeof(unused)*8)) == 0);
661 ceph_assert(offset + length <= blob_len);
7c673cae 662 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
11fdf7f2 663 uint64_t start = round_up_to(offset, chunk_size) / chunk_size;
7c673cae
FG
664 uint64_t end = (offset + length) / chunk_size;
665 for (auto i = start; i < end; ++i) {
666 unused |= (1u << i);
667 }
668 if (start != end) {
669 set_flag(FLAG_HAS_UNUSED);
670 }
671 }
672
673 /// indicate that a range has (now) been used.
674 void mark_used(uint64_t offset, uint64_t length) {
675 if (has_unused()) {
1911f103 676 ceph_assert(!is_compressed());
7c673cae 677 uint64_t blob_len = get_logical_length();
11fdf7f2
TL
678 ceph_assert((blob_len % (sizeof(unused)*8)) == 0);
679 ceph_assert(offset + length <= blob_len);
7c673cae
FG
680 uint64_t chunk_size = blob_len / (sizeof(unused)*8);
681 uint64_t start = offset / chunk_size;
11fdf7f2 682 uint64_t end = round_up_to(offset + length, chunk_size) / chunk_size;
7c673cae
FG
683 for (auto i = start; i < end; ++i) {
684 unused &= ~(1u << i);
685 }
686 if (unused == 0) {
687 clear_flag(FLAG_HAS_UNUSED);
688 }
689 }
690 }
691
f67539c2
TL
692 // map_f_invoke templates intended to mask parameters which are not expected
693 // by the provided callback
694 template<class F, typename std::enable_if<std::is_invocable_r_v<
695 int,
696 F,
697 uint64_t,
698 uint64_t>>::type* = nullptr>
699 int map_f_invoke(uint64_t lo,
700 const bluestore_pextent_t& p,
701 uint64_t o,
702 uint64_t l, F&& f) const{
703 return f(o, l);
704 }
705
706 template<class F, typename std::enable_if<std::is_invocable_r_v<
707 int,
708 F,
709 uint64_t,
710 uint64_t,
711 uint64_t>>::type * = nullptr>
712 int map_f_invoke(uint64_t lo,
713 const bluestore_pextent_t& p,
714 uint64_t o,
715 uint64_t l, F&& f) const {
716 return f(lo, o, l);
717 }
718
719 template<class F, typename std::enable_if<std::is_invocable_r_v<
720 int,
721 F,
722 const bluestore_pextent_t&,
723 uint64_t,
724 uint64_t>>::type * = nullptr>
725 int map_f_invoke(uint64_t lo,
726 const bluestore_pextent_t& p,
727 uint64_t o,
728 uint64_t l, F&& f) const {
729 return f(p, o, l);
730 }
731
94b18763
FG
732 template<class F>
733 int map(uint64_t x_off, uint64_t x_len, F&& f) const {
f67539c2 734 auto x_off0 = x_off;
7c673cae 735 auto p = extents.begin();
11fdf7f2 736 ceph_assert(p != extents.end());
7c673cae
FG
737 while (x_off >= p->length) {
738 x_off -= p->length;
739 ++p;
11fdf7f2 740 ceph_assert(p != extents.end());
7c673cae 741 }
f67539c2 742 while (x_len > 0 && p != extents.end()) {
11fdf7f2 743 uint64_t l = std::min(p->length - x_off, x_len);
f67539c2 744 int r = map_f_invoke(x_off0, *p, p->offset + x_off, l, f);
7c673cae
FG
745 if (r < 0)
746 return r;
747 x_off = 0;
748 x_len -= l;
f67539c2 749 x_off0 += l;
7c673cae
FG
750 ++p;
751 }
752 return 0;
753 }
f67539c2 754
94b18763 755 template<class F>
7c673cae 756 void map_bl(uint64_t x_off,
f67539c2 757 ceph::buffer::list& bl,
94b18763 758 F&& f) const {
f67539c2 759 static_assert(std::is_invocable_v<F, uint64_t, ceph::buffer::list&>);
11fdf7f2 760
7c673cae 761 auto p = extents.begin();
11fdf7f2 762 ceph_assert(p != extents.end());
7c673cae
FG
763 while (x_off >= p->length) {
764 x_off -= p->length;
765 ++p;
11fdf7f2 766 ceph_assert(p != extents.end());
7c673cae 767 }
f67539c2 768 ceph::buffer::list::iterator it = bl.begin();
7c673cae
FG
769 uint64_t x_len = bl.length();
770 while (x_len > 0) {
11fdf7f2
TL
771 ceph_assert(p != extents.end());
772 uint64_t l = std::min(p->length - x_off, x_len);
f67539c2 773 ceph::buffer::list t;
7c673cae
FG
774 it.copy(l, t);
775 f(p->offset + x_off, t);
776 x_off = 0;
777 x_len -= l;
778 ++p;
779 }
780 }
781
782 uint32_t get_ondisk_length() const {
783 uint32_t len = 0;
784 for (auto &p : extents) {
785 len += p.length;
786 }
787 return len;
788 }
789
790 uint32_t get_logical_length() const {
791 return logical_length;
792 }
793 size_t get_csum_value_size() const;
794
795 size_t get_csum_count() const {
796 size_t vs = get_csum_value_size();
797 if (!vs)
798 return 0;
799 return csum_data.length() / vs;
800 }
801 uint64_t get_csum_item(unsigned i) const {
802 size_t cs = get_csum_value_size();
803 const char *p = csum_data.c_str();
804 switch (cs) {
805 case 0:
11fdf7f2 806 ceph_abort_msg("no csum data, bad index");
7c673cae
FG
807 case 1:
808 return reinterpret_cast<const uint8_t*>(p)[i];
809 case 2:
eafe8130 810 return reinterpret_cast<const ceph_le16*>(p)[i];
7c673cae 811 case 4:
eafe8130 812 return reinterpret_cast<const ceph_le32*>(p)[i];
7c673cae 813 case 8:
eafe8130 814 return reinterpret_cast<const ceph_le64*>(p)[i];
7c673cae 815 default:
11fdf7f2 816 ceph_abort_msg("unrecognized csum word size");
7c673cae
FG
817 }
818 }
819 const char *get_csum_item_ptr(unsigned i) const {
820 size_t cs = get_csum_value_size();
821 return csum_data.c_str() + (cs * i);
822 }
823 char *get_csum_item_ptr(unsigned i) {
824 size_t cs = get_csum_value_size();
825 return csum_data.c_str() + (cs * i);
826 }
827
828 void init_csum(unsigned type, unsigned order, unsigned len) {
829 flags |= FLAG_CSUM;
830 csum_type = type;
831 csum_chunk_order = order;
f67539c2 832 csum_data = ceph::buffer::create(get_csum_value_size() * len / get_csum_chunk_size());
7c673cae 833 csum_data.zero();
3efd9988 834 csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
7c673cae
FG
835 }
836
837 /// calculate csum for the buffer at the given b_off
f67539c2 838 void calc_csum(uint64_t b_off, const ceph::buffer::list& bl);
7c673cae
FG
839
840 /// verify csum: return -EOPNOTSUPP for unsupported checksum type;
841 /// return -1 and valid(nonnegative) b_bad_off for checksum error;
842 /// return 0 if all is well.
f67539c2 843 int verify_csum(uint64_t b_off, const ceph::buffer::list& bl, int* b_bad_off,
7c673cae
FG
844 uint64_t *bad_csum) const;
845
846 bool can_prune_tail() const {
847 return
848 extents.size() > 1 && // if it's all invalid it's not pruning.
849 !extents.back().is_valid() &&
850 !has_unused();
851 }
852 void prune_tail() {
853 const auto &p = extents.back();
854 logical_length -= p.length;
855 extents.pop_back();
856 if (has_csum()) {
f67539c2 857 ceph::buffer::ptr t;
7c673cae 858 t.swap(csum_data);
f67539c2 859 csum_data = ceph::buffer::ptr(t.c_str(),
7c673cae
FG
860 get_logical_length() / get_csum_chunk_size() *
861 get_csum_value_size());
862 }
863 }
864 void add_tail(uint32_t new_len) {
11fdf7f2
TL
865 ceph_assert(is_mutable());
866 ceph_assert(!has_unused());
867 ceph_assert(new_len > logical_length);
7c673cae
FG
868 extents.emplace_back(
869 bluestore_pextent_t(
870 bluestore_pextent_t::INVALID_OFFSET,
871 new_len - logical_length));
872 logical_length = new_len;
873 if (has_csum()) {
f67539c2 874 ceph::buffer::ptr t;
7c673cae 875 t.swap(csum_data);
f67539c2 876 csum_data = ceph::buffer::create(
7c673cae
FG
877 get_csum_value_size() * logical_length / get_csum_chunk_size());
878 csum_data.copy_in(0, t.length(), t.c_str());
879 csum_data.zero(t.length(), csum_data.length() - t.length());
880 }
881 }
882 uint32_t get_release_size(uint32_t min_alloc_size) const {
883 if (is_compressed()) {
884 return get_logical_length();
885 }
886 uint32_t res = get_csum_chunk_size();
887 if (!has_csum() || res < min_alloc_size) {
888 res = min_alloc_size;
889 }
890 return res;
891 }
892
893 void split(uint32_t blob_offset, bluestore_blob_t& rb);
a8e16298 894 void allocated(uint32_t b_off, uint32_t length, const PExtentVector& allocs);
7c673cae
FG
895 void allocated_test(const bluestore_pextent_t& alloc); // intended for UT only
896
897 /// updates blob's pextents container and return unused pextents eligible
898 /// for release.
899 /// all - indicates that the whole blob to be released.
900 /// logical - specifies set of logical extents within blob's
901 /// to be released
902 /// Returns true if blob has no more valid pextents
903 bool release_extents(
904 bool all,
905 const PExtentVector& logical,
906 PExtentVector* r);
907};
908WRITE_CLASS_DENC_FEATURED(bluestore_blob_t)
909
f67539c2 910std::ostream& operator<<(std::ostream& out, const bluestore_blob_t& o);
7c673cae
FG
911
912
913/// shared blob state
914struct bluestore_shared_blob_t {
f91f0fd5 915 MEMPOOL_CLASS_HELPERS();
7c673cae
FG
916 uint64_t sbid; ///> shared blob id
917 bluestore_extent_ref_map_t ref_map; ///< shared blob extents
918
919 bluestore_shared_blob_t(uint64_t _sbid) : sbid(_sbid) {}
11fdf7f2
TL
920 bluestore_shared_blob_t(uint64_t _sbid,
921 bluestore_extent_ref_map_t&& _ref_map )
922 : sbid(_sbid), ref_map(std::move(_ref_map)) {}
7c673cae
FG
923
924 DENC(bluestore_shared_blob_t, v, p) {
925 DENC_START(1, 1, p);
926 denc(v.ref_map, p);
927 DENC_FINISH(p);
928 }
929
930
f67539c2
TL
931 void dump(ceph::Formatter *f) const;
932 static void generate_test_instances(std::list<bluestore_shared_blob_t*>& ls);
7c673cae
FG
933
934 bool empty() const {
935 return ref_map.empty();
936 }
937};
938WRITE_CLASS_DENC(bluestore_shared_blob_t)
939
f67539c2 940std::ostream& operator<<(std::ostream& out, const bluestore_shared_blob_t& o);
7c673cae
FG
941
942/// onode: per-object metadata
943struct bluestore_onode_t {
944 uint64_t nid = 0; ///< numeric id (locally unique)
945 uint64_t size = 0; ///< object size
f91f0fd5
TL
946 // mempool to be assigned to buffer::ptr manually
947 std::map<mempool::bluestore_cache_meta::string, ceph::buffer::ptr> attrs;
7c673cae
FG
948
949 struct shard_info {
950 uint32_t offset = 0; ///< logical offset for start of shard
951 uint32_t bytes = 0; ///< encoded bytes
952 DENC(shard_info, v, p) {
953 denc_varint(v.offset, p);
954 denc_varint(v.bytes, p);
955 }
f67539c2 956 void dump(ceph::Formatter *f) const;
7c673cae 957 };
f67539c2 958 std::vector<shard_info> extent_map_shards; ///< extent std::map shards (if any)
7c673cae
FG
959
960 uint32_t expected_object_size = 0;
961 uint32_t expected_write_size = 0;
962 uint32_t alloc_hint_flags = 0;
963
964 uint8_t flags = 0;
965
20effc67
TL
966 std::map<uint32_t, uint64_t> zone_offset_refs; ///< (zone, offset) refs to this onode
967
7c673cae 968 enum {
f67539c2 969 FLAG_OMAP = 1, ///< object may have omap data
11fdf7f2 970 FLAG_PGMETA_OMAP = 2, ///< omap data is in meta omap prefix
9f95a23c 971 FLAG_PERPOOL_OMAP = 4, ///< omap data is in per-pool prefix; per-pool keys
f67539c2 972 FLAG_PERPG_OMAP = 8, ///< omap data is in per-pg prefix; per-pg keys
7c673cae
FG
973 };
974
f67539c2
TL
975 std::string get_flags_string() const {
976 std::string s;
7c673cae
FG
977 if (flags & FLAG_OMAP) {
978 s = "omap";
979 }
9f95a23c
TL
980 if (flags & FLAG_PGMETA_OMAP) {
981 s += "+pgmeta_omap";
982 }
983 if (flags & FLAG_PERPOOL_OMAP) {
f67539c2
TL
984 s += "+per_pool_omap";
985 }
986 if (flags & FLAG_PERPG_OMAP) {
987 s += "+per_pg_omap";
9f95a23c 988 }
7c673cae
FG
989 return s;
990 }
991
992 bool has_flag(unsigned f) const {
993 return flags & f;
994 }
995
996 void set_flag(unsigned f) {
997 flags |= f;
998 }
999
1000 void clear_flag(unsigned f) {
1001 flags &= ~f;
1002 }
1003
1004 bool has_omap() const {
1005 return has_flag(FLAG_OMAP);
1006 }
522d829b
TL
1007
1008 static bool is_pgmeta_omap(uint8_t flags) {
1009 return flags & FLAG_PGMETA_OMAP;
1010 }
1011 static bool is_perpool_omap(uint8_t flags) {
1012 return flags & FLAG_PERPOOL_OMAP;
1013 }
1014 static bool is_perpg_omap(uint8_t flags) {
1015 return flags & FLAG_PERPG_OMAP;
1016 }
11fdf7f2
TL
1017 bool is_pgmeta_omap() const {
1018 return has_flag(FLAG_PGMETA_OMAP);
1019 }
9f95a23c
TL
1020 bool is_perpool_omap() const {
1021 return has_flag(FLAG_PERPOOL_OMAP);
1022 }
f67539c2
TL
1023 bool is_perpg_omap() const {
1024 return has_flag(FLAG_PERPG_OMAP);
1025 }
7c673cae 1026
522d829b
TL
1027 void set_omap_flags(bool legacy) {
1028 set_flag(FLAG_OMAP | (legacy ? 0 : (FLAG_PERPOOL_OMAP | FLAG_PERPG_OMAP)));
9f95a23c
TL
1029 }
1030 void set_omap_flags_pgmeta() {
1031 set_flag(FLAG_OMAP | FLAG_PGMETA_OMAP);
7c673cae
FG
1032 }
1033
1034 void clear_omap_flag() {
20effc67
TL
1035 clear_flag(FLAG_OMAP |
1036 FLAG_PGMETA_OMAP |
1037 FLAG_PERPOOL_OMAP |
1038 FLAG_PERPG_OMAP);
7c673cae
FG
1039 }
1040
1041 DENC(bluestore_onode_t, v, p) {
20effc67 1042 DENC_START(2, 1, p);
7c673cae
FG
1043 denc_varint(v.nid, p);
1044 denc_varint(v.size, p);
1045 denc(v.attrs, p);
1046 denc(v.flags, p);
1047 denc(v.extent_map_shards, p);
1048 denc_varint(v.expected_object_size, p);
1049 denc_varint(v.expected_write_size, p);
1050 denc_varint(v.alloc_hint_flags, p);
20effc67
TL
1051 if (struct_v >= 2) {
1052 denc(v.zone_offset_refs, p);
1053 }
7c673cae
FG
1054 DENC_FINISH(p);
1055 }
f67539c2
TL
1056 void dump(ceph::Formatter *f) const;
1057 static void generate_test_instances(std::list<bluestore_onode_t*>& o);
7c673cae
FG
1058};
1059WRITE_CLASS_DENC(bluestore_onode_t::shard_info)
1060WRITE_CLASS_DENC(bluestore_onode_t)
1061
f67539c2 1062std::ostream& operator<<(std::ostream& out, const bluestore_onode_t::shard_info& si);
7c673cae
FG
1063
1064/// writeahead-logged op
1065struct bluestore_deferred_op_t {
1066 typedef enum {
1067 OP_WRITE = 1,
1068 } type_t;
1069 __u8 op = 0;
1070
1071 PExtentVector extents;
f67539c2 1072 ceph::buffer::list data;
7c673cae
FG
1073
1074 DENC(bluestore_deferred_op_t, v, p) {
1075 DENC_START(1, 1, p);
1076 denc(v.op, p);
1077 denc(v.extents, p);
1078 denc(v.data, p);
1079 DENC_FINISH(p);
1080 }
f67539c2
TL
1081 void dump(ceph::Formatter *f) const;
1082 static void generate_test_instances(std::list<bluestore_deferred_op_t*>& o);
7c673cae
FG
1083};
1084WRITE_CLASS_DENC(bluestore_deferred_op_t)
1085
1086
1087/// writeahead-logged transaction
1088struct bluestore_deferred_transaction_t {
1089 uint64_t seq = 0;
f67539c2 1090 std::list<bluestore_deferred_op_t> ops;
7c673cae
FG
1091 interval_set<uint64_t> released; ///< allocations to release after tx
1092
1093 bluestore_deferred_transaction_t() : seq(0) {}
1094
1095 DENC(bluestore_deferred_transaction_t, v, p) {
1096 DENC_START(1, 1, p);
1097 denc(v.seq, p);
1098 denc(v.ops, p);
1099 denc(v.released, p);
1100 DENC_FINISH(p);
1101 }
f67539c2
TL
1102 void dump(ceph::Formatter *f) const;
1103 static void generate_test_instances(std::list<bluestore_deferred_transaction_t*>& o);
7c673cae
FG
1104};
1105WRITE_CLASS_DENC(bluestore_deferred_transaction_t)
1106
1107struct bluestore_compression_header_t {
1108 uint8_t type = Compressor::COMP_ALG_NONE;
1109 uint32_t length = 0;
1e59de90 1110 std::optional<int32_t> compressor_message;
7c673cae
FG
1111
1112 bluestore_compression_header_t() {}
1113 bluestore_compression_header_t(uint8_t _type)
1114 : type(_type) {}
1115
1116 DENC(bluestore_compression_header_t, v, p) {
f67539c2 1117 DENC_START(2, 1, p);
7c673cae
FG
1118 denc(v.type, p);
1119 denc(v.length, p);
f67539c2
TL
1120 if (struct_v >= 2) {
1121 denc(v.compressor_message, p);
1122 }
7c673cae
FG
1123 DENC_FINISH(p);
1124 }
f67539c2
TL
1125 void dump(ceph::Formatter *f) const;
1126 static void generate_test_instances(std::list<bluestore_compression_header_t*>& o);
7c673cae
FG
1127};
1128WRITE_CLASS_DENC(bluestore_compression_header_t)
1129
20effc67
TL
1130template <template <typename> typename V, class COUNTER_TYPE = int32_t>
1131class ref_counter_2hash_tracker_t {
1132 size_t num_non_zero = 0;
1133 size_t num_buckets = 0;
1134 V<COUNTER_TYPE> buckets1;
1135 V<COUNTER_TYPE> buckets2;
1136
1137public:
1138 ref_counter_2hash_tracker_t(uint64_t mem_cap) {
1139 num_buckets = mem_cap / sizeof(COUNTER_TYPE) / 2;
1140 ceph_assert(num_buckets);
1141 buckets1.resize(num_buckets);
1142 buckets2.resize(num_buckets);
1143 reset();
1144 }
1145
1146 size_t get_num_buckets() const {
1147 return num_buckets;
1148 }
1149
1150 void inc(const char* hash_val, size_t hash_val_len, int n) {
1151 auto h = ceph_str_hash_rjenkins((const char*)hash_val, hash_val_len) %
1152 num_buckets;
1153 if (buckets1[h] == 0 && n) {
1154 ++num_non_zero;
1155 } else if (buckets1[h] == -n) {
1156 --num_non_zero;
1157 }
1158 buckets1[h] += n;
1159 h = ceph_str_hash_linux((const char*)hash_val, hash_val_len) % num_buckets;
1160 if (buckets2[h] == 0 && n) {
1161 ++num_non_zero;
1162 } else if (buckets2[h] == -n) {
1163 --num_non_zero;
1164 }
1165 buckets2[h] += n;
1166 }
1167
1168 bool test_hash_conflict(
1169 const char* hash_val1,
1170 const char* hash_val2,
1171 size_t hash_val_len) const {
1172
1173 auto h1 = ceph_str_hash_rjenkins((const char*)hash_val1, hash_val_len);
1174 auto h2 = ceph_str_hash_rjenkins((const char*)hash_val2, hash_val_len);
1175 auto h3 = ceph_str_hash_linux((const char*)hash_val1, hash_val_len);
1176 auto h4 = ceph_str_hash_linux((const char*)hash_val2, hash_val_len);
1177 return ((h1 % num_buckets) == (h2 % num_buckets)) &&
1178 ((h3 % num_buckets) == (h4 % num_buckets));
1179 }
1180
1181 bool test_all_zero(const char* hash_val, size_t hash_val_len) const {
1182 auto h = ceph_str_hash_rjenkins((const char*)hash_val, hash_val_len);
1183 if (buckets1[h % num_buckets] != 0) {
1184 return false;
1185 }
1186 h = ceph_str_hash_linux((const char*)hash_val, hash_val_len);
1187 return buckets2[h % num_buckets] == 0;
1188 }
1189
1190 // returns number of mismatching buckets
1191 size_t count_non_zero() const {
1192 return num_non_zero;
1193 }
1194 void reset() {
1195 for (size_t i = 0; i < num_buckets; i++) {
1196 buckets1[i] = 0;
1197 buckets2[i] = 0;
1198 }
1199 num_non_zero = 0;
1200 }
1201};
1202
1203class shared_blob_2hash_tracker_t
1204 : public ref_counter_2hash_tracker_t<mempool::bluestore_fsck::vector> {
1205
1206 static const size_t hash_input_len = 3;
1207
1208 typedef std::array<uint64_t, hash_input_len> hash_input_t;
1209
1210 static size_t get_hash_input_size() {
1211 return hash_input_len * sizeof(hash_input_t::value_type);
1212 }
1213
1214 inline hash_input_t build_hash_input(uint64_t sbid, uint64_t offset) const;
1215
1216 size_t au_void_bits = 0;
1217
1218
1219public:
1220 shared_blob_2hash_tracker_t(uint64_t mem_cap, size_t alloc_unit)
1221 : ref_counter_2hash_tracker_t(mem_cap) {
1222 ceph_assert(alloc_unit);
1e59de90
TL
1223 ceph_assert(std::has_single_bit(alloc_unit));
1224 au_void_bits = std::countr_zero(alloc_unit);
20effc67
TL
1225 }
1226 void inc(uint64_t sbid, uint64_t offset, int n);
1227 void inc_range(uint64_t sbid, uint64_t offset, uint32_t len, int n);
1228
1229 bool test_hash_conflict(
1230 uint64_t sbid,
1231 uint64_t offset,
1232 uint64_t sbid2,
1233 uint64_t offset2) const;
1234 bool test_all_zero(
1235 uint64_t sbid,
1236 uint64_t offset) const;
1237 bool test_all_zero_range(
1238 uint64_t sbid,
1239 uint64_t offset,
1240 uint32_t len) const;
1241};
1242
1243class sb_info_t {
1244 // subzero value indicates (potentially) stray blob,
1245 // i.e. blob that has got no real references from onodes
1246 int64_t sbid = 0;
1247
1248public:
1249 enum {
1250 INVALID_POOL_ID = INT64_MIN
1251 };
1252
1253 int64_t pool_id = INVALID_POOL_ID;
1254 // subzero value indicates compressed_allocated as well
1255 int32_t allocated_chunks = 0;
1256
1257 sb_info_t(int64_t _sbid = 0) : sbid(_sbid)
1258 {
1259 }
1260 bool operator< (const sb_info_t& other) const {
1261 return std::abs(sbid) < std::abs(other.sbid);
1262 }
1263 bool operator< (const uint64_t& other_sbid) const {
1264 return uint64_t(std::abs(sbid)) < other_sbid;
1265 }
1266 bool is_stray() const {
1267 return sbid < 0;
1268 }
1269 uint64_t get_sbid() const {
1270 return uint64_t(std::abs(sbid));
1271 }
1272 void adopt() {
1273 sbid = std::abs(sbid);
1274 }
1275} __attribute__((packed));
1276
1277// Space-efficient container to keep a set of sb_info structures
1278// given that the majority of entries are appended in a proper id-sorted
1279// order. Hence one can keep them in a regular vector and apply binary search
1280// whenever specific entry to be found.
1281// For the rare occasions when out-of-order append takes place - an auxilliary
1282// regular map is used.
1283struct sb_info_space_efficient_map_t {
1284 // large array sorted by the user
1285 mempool::bluestore_fsck::vector<sb_info_t> items;
1286 // small additional set of items we maintain sorting ourselves
1287 // this would never keep an entry with id > items.back().id
1288 mempool::bluestore_fsck::vector<sb_info_t> aux_items;
1289
1290 sb_info_t& add_maybe_stray(uint64_t sbid) {
1291 return _add(-int64_t(sbid));
1292 }
1293 sb_info_t& add_or_adopt(uint64_t sbid) {
1294 auto& r = _add(sbid);
1295 r.adopt();
1296 return r;
1297 }
1298 auto find(uint64_t id) {
1299 if (items.size() != 0) {
1300 auto it = std::lower_bound(
1301 items.begin(),
1302 items.end() - 1,
1303 id,
1304 [](const sb_info_t& a, const uint64_t& b) {
1305 return a < b;
1306 });
1307 if (it->get_sbid() == id) {
1308 return it;
1309 }
1310 if (aux_items.size() != 0) {
1311 auto it = std::lower_bound(
1312 aux_items.begin(),
1313 aux_items.end(),
1314 id,
1315 [](const sb_info_t& a, const uint64_t& b) {
1316 return a < b;
1317 });
1318 if (it->get_sbid() == id) {
1319 return it;
1320 }
1321 }
1322 }
1323 return items.end();
1324 }
1325 // enumerates strays, order isn't guaranteed.
1326 void foreach_stray(std::function<void(const sb_info_t&)> cb) {
1327 for (auto& sbi : items) {
1328 if (sbi.is_stray()) {
1329 cb(sbi);
1330 }
1331 }
1332 for (auto& sbi : aux_items) {
1333 if (sbi.is_stray()) {
1334 cb(sbi);
1335 }
1336 }
1337 }
1338 auto end() {
1339 return items.end();
1340 }
1341
1342 void shrink() {
1343 items.shrink_to_fit();
1344 aux_items.shrink_to_fit();
1345 }
1346 void clear() {
1347 items.clear();
1348 aux_items.clear();
1349 shrink();
1350 }
1351private:
1352 sb_info_t& _add(int64_t id) {
1353 uint64_t n_id = uint64_t(std::abs(id));
1354 if (items.size() == 0 || n_id > items.back().get_sbid()) {
1355 return items.emplace_back(id);
1356 }
1357 auto it = find(n_id);
1358 if (it != items.end()) {
1359 return *it;
1360 }
1361 if (aux_items.size() == 0 || n_id > aux_items.back().get_sbid()) {
1362 return aux_items.emplace_back(id);
1363 }
1364 // do sorted insertion, may be expensive!
1365 it = std::upper_bound(
1366 aux_items.begin(),
1367 aux_items.end(),
1368 n_id,
1369 [](const uint64_t& a, const sb_info_t& b) {
1370 return a < b.get_sbid();
1371 });
1372 return *aux_items.emplace(it, id);
1373 }
1374};
7c673cae
FG
1375
1376#endif