1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "BitmapFreelistManager.h"
5 #include "kv/KeyValueDB.h"
8 #include "common/debug.h"
10 #define dout_context cct
11 #define dout_subsys ceph_subsys_bluestore
13 #define dout_prefix *_dout << "freelist "
15 void make_offset_key(uint64_t offset
, std::string
*key
)
18 _key_encode_u64(offset
, key
);
21 struct XorMergeOperator
: public KeyValueDB::MergeOperator
{
22 void merge_nonexistent(
23 const char *rdata
, size_t rlen
, std::string
*new_value
) override
{
24 *new_value
= std::string(rdata
, rlen
);
27 const char *ldata
, size_t llen
,
28 const char *rdata
, size_t rlen
,
29 std::string
*new_value
) override
{
31 *new_value
= std::string(ldata
, llen
);
32 for (size_t i
= 0; i
< rlen
; ++i
) {
33 (*new_value
)[i
] ^= rdata
[i
];
36 // We use each operator name and each prefix to construct the
37 // overall RocksDB operator name for consistency check at open time.
38 string
name() const override
{
43 void BitmapFreelistManager::setup_merge_operator(KeyValueDB
*db
, string prefix
)
45 ceph::shared_ptr
<XorMergeOperator
> merge_op(new XorMergeOperator
);
46 db
->set_merge_operator(prefix
, merge_op
);
49 BitmapFreelistManager::BitmapFreelistManager(CephContext
* cct
,
53 : FreelistManager(cct
),
54 meta_prefix(meta_prefix
),
55 bitmap_prefix(bitmap_prefix
),
61 int BitmapFreelistManager::create(uint64_t new_size
, KeyValueDB::Transaction txn
)
63 bytes_per_block
= cct
->_conf
->bdev_block_size
;
64 assert(ISP2(bytes_per_block
));
65 size
= P2ALIGN(new_size
, bytes_per_block
);
66 blocks_per_key
= cct
->_conf
->bluestore_freelist_blocks_per_key
;
70 blocks
= size
/ bytes_per_block
;
71 if (blocks
/ blocks_per_key
* blocks_per_key
!= blocks
) {
72 blocks
= (blocks
/ blocks_per_key
+ 1) * blocks_per_key
;
73 dout(10) << __func__
<< " rounding blocks up from 0x" << std::hex
<< size
74 << " to 0x" << (blocks
* bytes_per_block
)
75 << " (0x" << blocks
<< " blocks)" << std::dec
<< dendl
;
76 // set past-eof blocks as allocated
77 _xor(size
, blocks
* bytes_per_block
- size
, txn
);
80 << " size 0x" << std::hex
<< size
81 << " bytes_per_block 0x" << bytes_per_block
82 << " blocks 0x" << blocks
83 << " blocks_per_key 0x" << blocks_per_key
87 ::encode(bytes_per_block
, bl
);
88 txn
->set(meta_prefix
, "bytes_per_block", bl
);
92 ::encode(blocks_per_key
, bl
);
93 txn
->set(meta_prefix
, "blocks_per_key", bl
);
98 txn
->set(meta_prefix
, "blocks", bl
);
103 txn
->set(meta_prefix
, "size", bl
);
108 int BitmapFreelistManager::init()
110 dout(1) << __func__
<< dendl
;
112 KeyValueDB::Iterator it
= kvdb
->get_iterator(meta_prefix
);
113 it
->lower_bound(string());
116 while (it
->valid()) {
117 string k
= it
->key();
118 if (k
== "bytes_per_block") {
119 bufferlist bl
= it
->value();
120 bufferlist::iterator p
= bl
.begin();
121 ::decode(bytes_per_block
, p
);
122 dout(10) << __func__
<< " bytes_per_block 0x" << std::hex
123 << bytes_per_block
<< std::dec
<< dendl
;
124 } else if (k
== "blocks") {
125 bufferlist bl
= it
->value();
126 bufferlist::iterator p
= bl
.begin();
128 dout(10) << __func__
<< " blocks 0x" << std::hex
<< blocks
<< std::dec
130 } else if (k
== "size") {
131 bufferlist bl
= it
->value();
132 bufferlist::iterator p
= bl
.begin();
134 dout(10) << __func__
<< " size 0x" << std::hex
<< size
<< std::dec
136 } else if (k
== "blocks_per_key") {
137 bufferlist bl
= it
->value();
138 bufferlist::iterator p
= bl
.begin();
139 ::decode(blocks_per_key
, p
);
140 dout(10) << __func__
<< " blocks_per_key 0x" << std::hex
<< blocks_per_key
141 << std::dec
<< dendl
;
143 derr
<< __func__
<< " unrecognized meta " << k
<< dendl
;
149 dout(10) << __func__
<< std::hex
150 << " size 0x" << size
151 << " bytes_per_block 0x" << bytes_per_block
152 << " blocks 0x" << blocks
153 << " blocks_per_key 0x" << blocks_per_key
154 << std::dec
<< dendl
;
159 void BitmapFreelistManager::_init_misc()
161 bufferptr
z(blocks_per_key
>> 3);
162 memset(z
.c_str(), 0xff, z
.length());
164 all_set_bl
.append(z
);
166 block_mask
= ~(bytes_per_block
- 1);
168 bytes_per_key
= bytes_per_block
* blocks_per_key
;
169 key_mask
= ~(bytes_per_key
- 1);
170 dout(10) << __func__
<< std::hex
<< " bytes_per_key 0x" << bytes_per_key
171 << ", key_mask 0x" << key_mask
<< std::dec
175 void BitmapFreelistManager::shutdown()
177 dout(1) << __func__
<< dendl
;
180 void BitmapFreelistManager::enumerate_reset()
182 std::lock_guard
<std::mutex
> l(lock
);
183 enumerate_offset
= 0;
184 enumerate_bl_pos
= 0;
185 enumerate_bl
.clear();
189 int get_next_clear_bit(bufferlist
& bl
, int start
)
191 const char *p
= bl
.c_str();
192 int bits
= bl
.length() << 3;
193 while (start
< bits
) {
194 // byte = start / 8 (or start >> 3)
195 // bit = start % 8 (or start & 7)
196 unsigned char byte_mask
= 1 << (start
& 7);
197 if ((p
[start
>> 3] & byte_mask
) == 0) {
202 return -1; // not found
205 int get_next_set_bit(bufferlist
& bl
, int start
)
207 const char *p
= bl
.c_str();
208 int bits
= bl
.length() << 3;
209 while (start
< bits
) {
210 int which_byte
= start
/ 8;
211 int which_bit
= start
% 8;
212 unsigned char byte_mask
= 1 << which_bit
;
213 if (p
[which_byte
] & byte_mask
) {
218 return -1; // not found
221 bool BitmapFreelistManager::enumerate_next(uint64_t *offset
, uint64_t *length
)
223 std::lock_guard
<std::mutex
> l(lock
);
225 // initial base case is a bit awkward
226 if (enumerate_offset
== 0 && enumerate_bl_pos
== 0) {
227 dout(10) << __func__
<< " start" << dendl
;
228 enumerate_p
= kvdb
->get_iterator(bitmap_prefix
);
229 enumerate_p
->lower_bound(string());
230 // we assert that the first block is always allocated; it's true,
231 // and it simplifies our lives a bit.
232 assert(enumerate_p
->valid());
233 string k
= enumerate_p
->key();
234 const char *p
= k
.c_str();
235 _key_decode_u64(p
, &enumerate_offset
);
236 enumerate_bl
= enumerate_p
->value();
237 assert(enumerate_offset
== 0);
238 assert(get_next_set_bit(enumerate_bl
, 0) == 0);
241 if (enumerate_offset
>= size
) {
242 dout(10) << __func__
<< " end" << dendl
;
246 // skip set bits to find offset
248 enumerate_bl_pos
= get_next_clear_bit(enumerate_bl
, enumerate_bl_pos
);
249 if (enumerate_bl_pos
>= 0) {
250 *offset
= _get_offset(enumerate_offset
, enumerate_bl_pos
);
251 dout(30) << __func__
<< " found clear bit, key 0x" << std::hex
252 << enumerate_offset
<< " bit 0x" << enumerate_bl_pos
253 << " offset 0x" << *offset
254 << std::dec
<< dendl
;
257 dout(30) << " no more clear bits in 0x" << std::hex
<< enumerate_offset
258 << std::dec
<< dendl
;
260 enumerate_bl
.clear();
261 if (!enumerate_p
->valid()) {
262 enumerate_offset
+= bytes_per_key
;
263 enumerate_bl_pos
= 0;
264 *offset
= _get_offset(enumerate_offset
, enumerate_bl_pos
);
267 string k
= enumerate_p
->key();
268 const char *p
= k
.c_str();
269 uint64_t next
= enumerate_offset
+ bytes_per_key
;
270 _key_decode_u64(p
, &enumerate_offset
);
271 enumerate_bl
= enumerate_p
->value();
272 enumerate_bl_pos
= 0;
273 if (enumerate_offset
> next
) {
274 dout(30) << " no key at 0x" << std::hex
<< next
<< ", got 0x"
275 << enumerate_offset
<< std::dec
<< dendl
;
281 // skip clear bits to find the end
283 if (enumerate_p
->valid()) {
285 enumerate_bl_pos
= get_next_set_bit(enumerate_bl
, enumerate_bl_pos
);
286 if (enumerate_bl_pos
>= 0) {
287 end
= _get_offset(enumerate_offset
, enumerate_bl_pos
);
288 dout(30) << __func__
<< " found set bit, key 0x" << std::hex
289 << enumerate_offset
<< " bit 0x" << enumerate_bl_pos
290 << " offset 0x" << end
<< std::dec
292 *length
= end
- *offset
;
293 assert((*offset
+ *length
) <= size
);
294 dout(10) << __func__
<< std::hex
<< " 0x" << *offset
<< "~" << *length
295 << std::dec
<< dendl
;
298 dout(30) << " no more set bits in 0x" << std::hex
<< enumerate_offset
299 << std::dec
<< dendl
;
301 enumerate_bl
.clear();
302 enumerate_bl_pos
= 0;
303 if (!enumerate_p
->valid()) {
306 string k
= enumerate_p
->key();
307 const char *p
= k
.c_str();
308 _key_decode_u64(p
, &enumerate_offset
);
309 enumerate_bl
= enumerate_p
->value();
314 if (enumerate_offset
< end
) {
315 *length
= end
- *offset
;
316 dout(10) << __func__
<< std::hex
<< " 0x" << *offset
<< "~" << *length
317 << std::dec
<< dendl
;
318 enumerate_offset
= end
;
319 enumerate_bl_pos
= blocks_per_key
;
320 assert((*offset
+ *length
) <= size
);
324 dout(10) << __func__
<< " end" << dendl
;
328 void BitmapFreelistManager::dump()
331 uint64_t offset
, length
;
332 while (enumerate_next(&offset
, &length
)) {
333 dout(20) << __func__
<< " 0x" << std::hex
<< offset
<< "~" << length
334 << std::dec
<< dendl
;
338 void BitmapFreelistManager::_verify_range(uint64_t offset
, uint64_t length
,
342 uint64_t first_key
= offset
& key_mask
;
343 uint64_t last_key
= (offset
+ length
- 1) & key_mask
;
344 if (first_key
== last_key
) {
346 make_offset_key(first_key
, &k
);
348 kvdb
->get(bitmap_prefix
, k
, &bl
);
349 if (bl
.length() > 0) {
350 const char *p
= bl
.c_str();
351 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
352 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
353 for (unsigned i
= s
; i
<= e
; ++i
) {
354 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
356 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
357 << i
<< " has 0x" << has
<< " expected 0x" << val
358 << std::dec
<< dendl
;
364 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
365 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
373 make_offset_key(first_key
, &k
);
375 kvdb
->get(bitmap_prefix
, k
, &bl
);
377 const char *p
= bl
.c_str();
378 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
379 unsigned e
= blocks_per_key
;
380 for (unsigned i
= s
; i
< e
; ++i
) {
381 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
383 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
384 << i
<< " has 0x" << has
<< " expected 0x" << val
<< std::dec
391 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
392 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
396 first_key
+= bytes_per_key
;
399 if (first_key
< last_key
) {
400 while (first_key
< last_key
) {
402 make_offset_key(first_key
, &k
);
404 kvdb
->get(bitmap_prefix
, k
, &bl
);
405 if (bl
.length() > 0) {
406 const char *p
= bl
.c_str();
407 for (unsigned i
= 0; i
< blocks_per_key
; ++i
) {
408 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
410 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
411 << i
<< " has 0x" << has
<< " expected 0x" << val
412 << std::dec
<< dendl
;
418 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
419 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
423 first_key
+= bytes_per_key
;
426 assert(first_key
== last_key
);
429 make_offset_key(first_key
, &k
);
431 kvdb
->get(bitmap_prefix
, k
, &bl
);
432 if (bl
.length() > 0) {
433 const char *p
= bl
.c_str();
434 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
435 for (unsigned i
= 0; i
< e
; ++i
) {
436 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
438 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
439 << i
<< " has 0x" << has
<< " expected 0x" << val
<< std::dec
446 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
447 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
454 derr
<< __func__
<< " saw " << errors
<< " errors" << dendl
;
455 assert(0 == "bitmap freelist errors");
459 void BitmapFreelistManager::allocate(
460 uint64_t offset
, uint64_t length
,
461 KeyValueDB::Transaction txn
)
463 dout(10) << __func__
<< " 0x" << std::hex
<< offset
<< "~" << length
464 << std::dec
<< dendl
;
465 if (cct
->_conf
->bluestore_debug_freelist
)
466 _verify_range(offset
, length
, 0);
467 _xor(offset
, length
, txn
);
470 void BitmapFreelistManager::release(
471 uint64_t offset
, uint64_t length
,
472 KeyValueDB::Transaction txn
)
474 dout(10) << __func__
<< " 0x" << std::hex
<< offset
<< "~" << length
475 << std::dec
<< dendl
;
476 if (cct
->_conf
->bluestore_debug_freelist
)
477 _verify_range(offset
, length
, 1);
478 _xor(offset
, length
, txn
);
481 void BitmapFreelistManager::_xor(
482 uint64_t offset
, uint64_t length
,
483 KeyValueDB::Transaction txn
)
485 // must be block aligned
486 assert((offset
& block_mask
) == offset
);
487 assert((length
& block_mask
) == length
);
489 uint64_t first_key
= offset
& key_mask
;
490 uint64_t last_key
= (offset
+ length
- 1) & key_mask
;
491 dout(20) << __func__
<< " first_key 0x" << std::hex
<< first_key
492 << " last_key 0x" << last_key
<< std::dec
<< dendl
;
494 if (first_key
== last_key
) {
495 bufferptr
p(blocks_per_key
>> 3);
497 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
498 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
499 for (unsigned i
= s
; i
<= e
; ++i
) {
500 p
[i
>> 3] ^= 1ull << (i
& 7);
503 make_offset_key(first_key
, &k
);
506 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
<< ": ";
507 bl
.hexdump(*_dout
, false);
509 txn
->merge(bitmap_prefix
, k
, bl
);
513 bufferptr
p(blocks_per_key
>> 3);
515 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
516 unsigned e
= blocks_per_key
;
517 for (unsigned i
= s
; i
< e
; ++i
) {
518 p
[i
>> 3] ^= 1ull << (i
& 7);
521 make_offset_key(first_key
, &k
);
524 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
<< ": ";
525 bl
.hexdump(*_dout
, false);
527 txn
->merge(bitmap_prefix
, k
, bl
);
528 first_key
+= bytes_per_key
;
531 while (first_key
< last_key
) {
533 make_offset_key(first_key
, &k
);
534 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
536 all_set_bl
.hexdump(*_dout
, false);
538 txn
->merge(bitmap_prefix
, k
, all_set_bl
);
539 first_key
+= bytes_per_key
;
541 assert(first_key
== last_key
);
543 bufferptr
p(blocks_per_key
>> 3);
545 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
546 for (unsigned i
= 0; i
<= e
; ++i
) {
547 p
[i
>> 3] ^= 1ull << (i
& 7);
550 make_offset_key(first_key
, &k
);
553 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
<< ": ";
554 bl
.hexdump(*_dout
, false);
556 txn
->merge(bitmap_prefix
, k
, bl
);