1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #ifndef CEPH_OS_BLUESTORE_BLUEFS_TYPES_H
4 #define CEPH_OS_BLUESTORE_BLUEFS_TYPES_H
8 #include "bluestore_types.h"
9 #include "include/utime.h"
10 #include "include/encoding.h"
11 #include "include/denc.h"
13 class bluefs_extent_t
{
19 bluefs_extent_t(uint8_t b
= 0, uint64_t o
= 0, uint32_t l
= 0)
20 : offset(o
), length(l
), bdev(b
) {}
22 uint64_t end() const { return offset
+ length
; }
23 DENC(bluefs_extent_t
, v
, p
) {
25 denc_lba(v
.offset
, p
);
26 denc_varint_lowz(v
.length
, p
);
31 void dump(ceph::Formatter
*f
) const;
32 static void generate_test_instances(std::list
<bluefs_extent_t
*>&);
34 WRITE_CLASS_DENC(bluefs_extent_t
)
36 std::ostream
& operator<<(std::ostream
& out
, const bluefs_extent_t
& e
);
38 struct bluefs_fnode_delta_t
{
42 uint64_t offset
; // Contains offset in file of extents.
43 // Equal to 'allocated' when created.
44 // Used for consistency checking.
45 mempool::bluefs::vector
<bluefs_extent_t
> extents
;
47 DENC(bluefs_fnode_delta_t
, v
, p
) {
49 denc_varint(v
.ino
, p
);
50 denc_varint(v
.size
, p
);
57 WRITE_CLASS_DENC(bluefs_fnode_delta_t
)
59 std::ostream
& operator<<(std::ostream
& out
, const bluefs_fnode_delta_t
& delta
);
61 struct bluefs_fnode_t
{
65 uint8_t __unused__
; // was prefer_bdev
66 mempool::bluefs::vector
<bluefs_extent_t
> extents
;
68 // precalculated logical offsets for extents vector entries
69 // allows fast lookup for extent index by the offset value via upper_bound()
70 mempool::bluefs::vector
<uint64_t> extents_index
;
73 uint64_t allocated_commited
;
75 bluefs_fnode_t() : ino(0), size(0), __unused__(0), allocated(0), allocated_commited(0) {}
77 uint64_t get_allocated() const {
81 void recalc_allocated() {
83 extents_index
.reserve(extents
.size());
84 for (auto& p
: extents
) {
85 extents_index
.emplace_back(allocated
);
86 allocated
+= p
.length
;
88 allocated_commited
= allocated
;
92 void bound_encode(size_t& p
) const {
93 _denc_friend(*this, p
);
95 void encode(ceph::buffer::list::contiguous_appender
& p
) const {
96 DENC_DUMP_PRE(bluefs_fnode_t
);
97 _denc_friend(*this, p
);
99 void decode(ceph::buffer::ptr::const_iterator
& p
) {
100 _denc_friend(*this, p
);
103 template<typename T
, typename P
>
104 friend std::enable_if_t
<std::is_same_v
<bluefs_fnode_t
, std::remove_const_t
<T
>>>
105 _denc_friend(T
& v
, P
& p
) {
107 denc_varint(v
.ino
, p
);
108 denc_varint(v
.size
, p
);
110 denc(v
.__unused__
, p
);
116 allocated_commited
= allocated
;
118 void claim_extents(mempool::bluefs::vector
<bluefs_extent_t
>& extents
) {
119 for (const auto& p
: extents
) {
124 void append_extent(const bluefs_extent_t
& ext
) {
125 if (!extents
.empty() &&
126 extents
.back().end() == ext
.offset
&&
127 extents
.back().bdev
== ext
.bdev
&&
128 (uint64_t)extents
.back().length
+ (uint64_t)ext
.length
< 0xffffffff) {
129 extents
.back().length
+= ext
.length
;
131 extents_index
.emplace_back(allocated
);
132 extents
.push_back(ext
);
134 allocated
+= ext
.length
;
137 void pop_front_extent() {
138 auto it
= extents
.begin();
139 allocated
-= it
->length
;
140 extents_index
.erase(extents_index
.begin());
141 for (auto& i
: extents_index
) {
147 void swap_extents(bluefs_fnode_t
& other
) {
148 other
.extents
.swap(extents
);
149 other
.extents_index
.swap(extents_index
);
150 std::swap(allocated
, other
.allocated
);
151 std::swap(allocated_commited
, other
.allocated_commited
);
153 void clear_extents() {
154 extents_index
.clear();
157 allocated_commited
= 0;
160 mempool::bluefs::vector
<bluefs_extent_t
>::iterator
seek(
161 uint64_t off
, uint64_t *x_off
);
162 bluefs_fnode_delta_t
* make_delta(bluefs_fnode_delta_t
* delta
);
164 void dump(ceph::Formatter
*f
) const;
165 static void generate_test_instances(std::list
<bluefs_fnode_t
*>& ls
);
168 WRITE_CLASS_DENC(bluefs_fnode_t
)
170 std::ostream
& operator<<(std::ostream
& out
, const bluefs_fnode_t
& file
);
172 struct bluefs_layout_t
{
173 unsigned shared_bdev
= 0; ///< which bluefs bdev we are sharing
174 bool dedicated_db
= false; ///< whether block.db is present
175 bool dedicated_wal
= false; ///< whether block.wal is present
177 bool single_shared_device() const {
178 return !dedicated_db
&& !dedicated_wal
;
181 bool operator==(const bluefs_layout_t
& other
) const {
182 return shared_bdev
== other
.shared_bdev
&&
183 dedicated_db
== other
.dedicated_db
&&
184 dedicated_wal
== other
.dedicated_wal
;
187 void encode(ceph::buffer::list
& bl
) const;
188 void decode(ceph::buffer::list::const_iterator
& p
);
189 void dump(ceph::Formatter
*f
) const;
191 WRITE_CLASS_ENCODER(bluefs_layout_t
)
193 struct bluefs_super_t
{
194 uuid_d uuid
; ///< unique to this bluefs instance
195 uuid_d osd_uuid
; ///< matches the osd that owns us
199 bluefs_fnode_t log_fnode
;
201 std::optional
<bluefs_layout_t
> memorized_layout
;
207 uint64_t block_mask() const {
208 return ~((uint64_t)block_size
- 1);
211 void encode(ceph::buffer::list
& bl
) const;
212 void decode(ceph::buffer::list::const_iterator
& p
);
213 void dump(ceph::Formatter
*f
) const;
214 static void generate_test_instances(std::list
<bluefs_super_t
*>& ls
);
216 WRITE_CLASS_ENCODER(bluefs_super_t
)
218 std::ostream
& operator<<(std::ostream
&, const bluefs_super_t
& s
);
221 struct bluefs_transaction_t
{
224 OP_INIT
, ///< initial (empty) file system marker
225 OP_ALLOC_ADD
, ///< OBSOLETE: add extent to available block storage (extent)
226 OP_ALLOC_RM
, ///< OBSOLETE: remove extent from available block storage (extent)
227 OP_DIR_LINK
, ///< (re)set a dir entry (dirname, filename, ino)
228 OP_DIR_UNLINK
, ///< remove a dir entry (dirname, filename)
229 OP_DIR_CREATE
, ///< create a dir (dirname)
230 OP_DIR_REMOVE
, ///< remove a dir (dirname)
231 OP_FILE_UPDATE
, ///< set/update file metadata (file)
232 OP_FILE_REMOVE
, ///< remove file (ino)
233 OP_JUMP
, ///< jump the seq # and offset
234 OP_JUMP_SEQ
, ///< jump the seq #
235 OP_FILE_UPDATE_INC
, ///< incremental update file metadata (file)
238 uuid_d uuid
; ///< fs uuid
239 uint64_t seq
; ///< sequence number
240 ceph::buffer::list op_bl
; ///< encoded transaction ops
242 bluefs_transaction_t() : seq(0) {}
245 *this = bluefs_transaction_t();
248 return op_bl
.length() == 0;
253 encode((__u8
)OP_INIT
, op_bl
);
255 void op_dir_create(std::string_view dir
) {
257 encode((__u8
)OP_DIR_CREATE
, op_bl
);
260 void op_dir_remove(std::string_view dir
) {
262 encode((__u8
)OP_DIR_REMOVE
, op_bl
);
265 void op_dir_link(std::string_view dir
, std::string_view file
, uint64_t ino
) {
267 encode((__u8
)OP_DIR_LINK
, op_bl
);
272 void op_dir_unlink(std::string_view dir
, std::string_view file
) {
274 encode((__u8
)OP_DIR_UNLINK
, op_bl
);
278 void op_file_update(bluefs_fnode_t
& file
) {
280 encode((__u8
)OP_FILE_UPDATE
, op_bl
);
284 /* streams update to bufferlist and clears update state */
285 void op_file_update_inc(bluefs_fnode_t
& file
) {
287 bluefs_fnode_delta_t delta
;
288 file
.make_delta(&delta
); //also resets delta to zero
289 encode((__u8
)OP_FILE_UPDATE_INC
, op_bl
);
290 encode(delta
, op_bl
);
292 void op_file_remove(uint64_t ino
) {
294 encode((__u8
)OP_FILE_REMOVE
, op_bl
);
297 void op_jump(uint64_t next_seq
, uint64_t offset
) {
299 encode((__u8
)OP_JUMP
, op_bl
);
300 encode(next_seq
, op_bl
);
301 encode(offset
, op_bl
);
303 void op_jump_seq(uint64_t next_seq
) {
305 encode((__u8
)OP_JUMP_SEQ
, op_bl
);
306 encode(next_seq
, op_bl
);
308 void claim_ops(bluefs_transaction_t
& from
) {
309 op_bl
.claim_append(from
.op_bl
);
312 void encode(ceph::buffer::list
& bl
) const;
313 void decode(ceph::buffer::list::const_iterator
& p
);
314 void dump(ceph::Formatter
*f
) const;
315 static void generate_test_instances(std::list
<bluefs_transaction_t
*>& ls
);
317 WRITE_CLASS_ENCODER(bluefs_transaction_t
)
319 std::ostream
& operator<<(std::ostream
& out
, const bluefs_transaction_t
& t
);