1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "BitmapFreelistManager.h"
5 #include "kv/KeyValueDB.h"
8 #include "common/debug.h"
10 #define dout_context cct
11 #define dout_subsys ceph_subsys_bluestore
13 #define dout_prefix *_dout << "freelist "
15 void make_offset_key(uint64_t offset
, std::string
*key
)
18 _key_encode_u64(offset
, key
);
21 struct XorMergeOperator
: public KeyValueDB::MergeOperator
{
22 void merge_nonexistent(
23 const char *rdata
, size_t rlen
, std::string
*new_value
) override
{
24 *new_value
= std::string(rdata
, rlen
);
27 const char *ldata
, size_t llen
,
28 const char *rdata
, size_t rlen
,
29 std::string
*new_value
) override
{
31 *new_value
= std::string(ldata
, llen
);
32 for (size_t i
= 0; i
< rlen
; ++i
) {
33 (*new_value
)[i
] ^= rdata
[i
];
36 // We use each operator name and each prefix to construct the
37 // overall RocksDB operator name for consistency check at open time.
38 string
name() const override
{
43 void BitmapFreelistManager::setup_merge_operator(KeyValueDB
*db
, string prefix
)
45 ceph::shared_ptr
<XorMergeOperator
> merge_op(new XorMergeOperator
);
46 db
->set_merge_operator(prefix
, merge_op
);
49 BitmapFreelistManager::BitmapFreelistManager(CephContext
* cct
,
53 : FreelistManager(cct
),
54 meta_prefix(meta_prefix
),
55 bitmap_prefix(bitmap_prefix
),
61 int BitmapFreelistManager::create(uint64_t new_size
, KeyValueDB::Transaction txn
)
63 bytes_per_block
= cct
->_conf
->bdev_block_size
;
64 assert(ISP2(bytes_per_block
));
65 size
= P2ALIGN(new_size
, bytes_per_block
);
66 blocks_per_key
= cct
->_conf
->bluestore_freelist_blocks_per_key
;
70 blocks
= size
/ bytes_per_block
;
71 if (blocks
/ blocks_per_key
* blocks_per_key
!= blocks
) {
72 blocks
= (blocks
/ blocks_per_key
+ 1) * blocks_per_key
;
73 dout(10) << __func__
<< " rounding blocks up from 0x" << std::hex
<< size
74 << " to 0x" << (blocks
* bytes_per_block
)
75 << " (0x" << blocks
<< " blocks)" << std::dec
<< dendl
;
76 // set past-eof blocks as allocated
77 _xor(size
, blocks
* bytes_per_block
- size
, txn
);
80 << " size 0x" << std::hex
<< size
81 << " bytes_per_block 0x" << bytes_per_block
82 << " blocks 0x" << blocks
83 << " blocks_per_key 0x" << blocks_per_key
87 ::encode(bytes_per_block
, bl
);
88 txn
->set(meta_prefix
, "bytes_per_block", bl
);
92 ::encode(blocks_per_key
, bl
);
93 txn
->set(meta_prefix
, "blocks_per_key", bl
);
98 txn
->set(meta_prefix
, "blocks", bl
);
103 txn
->set(meta_prefix
, "size", bl
);
108 int BitmapFreelistManager::init()
110 dout(1) << __func__
<< dendl
;
112 KeyValueDB::Iterator it
= kvdb
->get_iterator(meta_prefix
);
113 it
->lower_bound(string());
116 while (it
->valid()) {
117 string k
= it
->key();
118 if (k
== "bytes_per_block") {
119 bufferlist bl
= it
->value();
120 bufferlist::iterator p
= bl
.begin();
121 ::decode(bytes_per_block
, p
);
122 dout(10) << __func__
<< " bytes_per_block 0x" << std::hex
123 << bytes_per_block
<< std::dec
<< dendl
;
124 } else if (k
== "blocks") {
125 bufferlist bl
= it
->value();
126 bufferlist::iterator p
= bl
.begin();
128 dout(10) << __func__
<< " blocks 0x" << std::hex
<< blocks
<< std::dec
130 } else if (k
== "size") {
131 bufferlist bl
= it
->value();
132 bufferlist::iterator p
= bl
.begin();
134 dout(10) << __func__
<< " size 0x" << std::hex
<< size
<< std::dec
136 } else if (k
== "blocks_per_key") {
137 bufferlist bl
= it
->value();
138 bufferlist::iterator p
= bl
.begin();
139 ::decode(blocks_per_key
, p
);
140 dout(10) << __func__
<< " blocks_per_key 0x" << std::hex
<< blocks_per_key
141 << std::dec
<< dendl
;
143 derr
<< __func__
<< " unrecognized meta " << k
<< dendl
;
149 dout(10) << __func__
<< std::hex
150 << " size 0x" << size
151 << " bytes_per_block 0x" << bytes_per_block
152 << " blocks 0x" << blocks
153 << " blocks_per_key 0x" << blocks_per_key
154 << std::dec
<< dendl
;
159 void BitmapFreelistManager::_init_misc()
161 bufferptr
z(blocks_per_key
>> 3);
162 memset(z
.c_str(), 0xff, z
.length());
164 all_set_bl
.append(z
);
166 block_mask
= ~(bytes_per_block
- 1);
168 bytes_per_key
= bytes_per_block
* blocks_per_key
;
169 key_mask
= ~(bytes_per_key
- 1);
170 dout(10) << __func__
<< std::hex
<< " bytes_per_key 0x" << bytes_per_key
171 << ", key_mask 0x" << key_mask
<< std::dec
175 void BitmapFreelistManager::shutdown()
177 dout(1) << __func__
<< dendl
;
180 void BitmapFreelistManager::enumerate_reset()
182 std::lock_guard
<std::mutex
> l(lock
);
183 enumerate_offset
= 0;
184 enumerate_bl_pos
= 0;
185 enumerate_bl
.clear();
188 int get_next_clear_bit(bufferlist
& bl
, int start
)
190 const char *p
= bl
.c_str();
191 int bits
= bl
.length() << 3;
192 while (start
< bits
) {
193 int which_byte
= start
/ 8;
194 int which_bit
= start
% 8;
195 unsigned char byte_mask
= 1 << which_bit
;
196 if ((p
[which_byte
] & byte_mask
) == 0) {
201 return -1; // not found
204 int get_next_set_bit(bufferlist
& bl
, int start
)
206 const char *p
= bl
.c_str();
207 int bits
= bl
.length() << 3;
208 while (start
< bits
) {
209 int which_byte
= start
/ 8;
210 int which_bit
= start
% 8;
211 unsigned char byte_mask
= 1 << which_bit
;
212 if (p
[which_byte
] & byte_mask
) {
217 return -1; // not found
220 bool BitmapFreelistManager::enumerate_next(uint64_t *offset
, uint64_t *length
)
222 std::lock_guard
<std::mutex
> l(lock
);
224 // initial base case is a bit awkward
225 if (enumerate_offset
== 0 && enumerate_bl_pos
== 0) {
226 dout(10) << __func__
<< " start" << dendl
;
227 enumerate_p
= kvdb
->get_iterator(bitmap_prefix
);
228 enumerate_p
->lower_bound(string());
229 // we assert that the first block is always allocated; it's true,
230 // and it simplifies our lives a bit.
231 assert(enumerate_p
->valid());
232 string k
= enumerate_p
->key();
233 const char *p
= k
.c_str();
234 _key_decode_u64(p
, &enumerate_offset
);
235 enumerate_bl
= enumerate_p
->value();
236 assert(enumerate_offset
== 0);
237 assert(get_next_set_bit(enumerate_bl
, 0) == 0);
240 if (enumerate_offset
>= size
) {
241 dout(10) << __func__
<< " end" << dendl
;
245 // skip set bits to find offset
247 enumerate_bl_pos
= get_next_clear_bit(enumerate_bl
, enumerate_bl_pos
);
248 if (enumerate_bl_pos
>= 0) {
249 *offset
= _get_offset(enumerate_offset
, enumerate_bl_pos
);
250 dout(30) << __func__
<< " found clear bit, key 0x" << std::hex
251 << enumerate_offset
<< " bit 0x" << enumerate_bl_pos
252 << " offset 0x" << *offset
253 << std::dec
<< dendl
;
256 dout(30) << " no more clear bits in 0x" << std::hex
<< enumerate_offset
257 << std::dec
<< dendl
;
259 enumerate_bl
.clear();
260 if (!enumerate_p
->valid()) {
261 enumerate_offset
+= bytes_per_key
;
262 enumerate_bl_pos
= 0;
263 *offset
= _get_offset(enumerate_offset
, enumerate_bl_pos
);
266 string k
= enumerate_p
->key();
267 const char *p
= k
.c_str();
268 uint64_t next
= enumerate_offset
+ bytes_per_key
;
269 _key_decode_u64(p
, &enumerate_offset
);
270 enumerate_bl
= enumerate_p
->value();
271 enumerate_bl_pos
= 0;
272 if (enumerate_offset
> next
) {
273 dout(30) << " no key at 0x" << std::hex
<< next
<< ", got 0x"
274 << enumerate_offset
<< std::dec
<< dendl
;
280 // skip clear bits to find the end
282 if (enumerate_p
->valid()) {
284 enumerate_bl_pos
= get_next_set_bit(enumerate_bl
, enumerate_bl_pos
);
285 if (enumerate_bl_pos
>= 0) {
286 end
= _get_offset(enumerate_offset
, enumerate_bl_pos
);
287 dout(30) << __func__
<< " found set bit, key 0x" << std::hex
288 << enumerate_offset
<< " bit 0x" << enumerate_bl_pos
289 << " offset 0x" << end
<< std::dec
291 *length
= end
- *offset
;
292 assert((*offset
+ *length
) <= size
);
293 dout(10) << __func__
<< std::hex
<< " 0x" << *offset
<< "~" << *length
294 << std::dec
<< dendl
;
297 dout(30) << " no more set bits in 0x" << std::hex
<< enumerate_offset
298 << std::dec
<< dendl
;
300 enumerate_bl
.clear();
301 enumerate_bl_pos
= 0;
302 if (!enumerate_p
->valid()) {
305 string k
= enumerate_p
->key();
306 const char *p
= k
.c_str();
307 _key_decode_u64(p
, &enumerate_offset
);
308 enumerate_bl
= enumerate_p
->value();
313 if (enumerate_offset
< end
) {
314 *length
= end
- *offset
;
315 dout(10) << __func__
<< std::hex
<< " 0x" << *offset
<< "~" << *length
316 << std::dec
<< dendl
;
317 enumerate_offset
= end
;
318 enumerate_bl_pos
= blocks_per_key
;
319 assert((*offset
+ *length
) <= size
);
323 dout(10) << __func__
<< " end" << dendl
;
327 void BitmapFreelistManager::dump()
330 uint64_t offset
, length
;
331 while (enumerate_next(&offset
, &length
)) {
332 dout(20) << __func__
<< " 0x" << std::hex
<< offset
<< "~" << length
333 << std::dec
<< dendl
;
337 void BitmapFreelistManager::_verify_range(uint64_t offset
, uint64_t length
,
341 uint64_t first_key
= offset
& key_mask
;
342 uint64_t last_key
= (offset
+ length
- 1) & key_mask
;
343 if (first_key
== last_key
) {
345 make_offset_key(first_key
, &k
);
347 kvdb
->get(bitmap_prefix
, k
, &bl
);
348 if (bl
.length() > 0) {
349 const char *p
= bl
.c_str();
350 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
351 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
352 for (unsigned i
= s
; i
<= e
; ++i
) {
353 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
355 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
356 << i
<< " has 0x" << has
<< " expected 0x" << val
357 << std::dec
<< dendl
;
363 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
364 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
372 make_offset_key(first_key
, &k
);
374 kvdb
->get(bitmap_prefix
, k
, &bl
);
376 const char *p
= bl
.c_str();
377 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
378 unsigned e
= blocks_per_key
;
379 for (unsigned i
= s
; i
< e
; ++i
) {
380 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
382 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
383 << i
<< " has 0x" << has
<< " expected 0x" << val
<< std::dec
390 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
391 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
395 first_key
+= bytes_per_key
;
398 if (first_key
< last_key
) {
399 while (first_key
< last_key
) {
401 make_offset_key(first_key
, &k
);
403 kvdb
->get(bitmap_prefix
, k
, &bl
);
404 if (bl
.length() > 0) {
405 const char *p
= bl
.c_str();
406 for (unsigned i
= 0; i
< blocks_per_key
; ++i
) {
407 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
409 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
410 << i
<< " has 0x" << has
<< " expected 0x" << val
411 << std::dec
<< dendl
;
417 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
418 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
422 first_key
+= bytes_per_key
;
425 assert(first_key
== last_key
);
428 make_offset_key(first_key
, &k
);
430 kvdb
->get(bitmap_prefix
, k
, &bl
);
431 if (bl
.length() > 0) {
432 const char *p
= bl
.c_str();
433 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
434 for (unsigned i
= 0; i
< e
; ++i
) {
435 int has
= !!(p
[i
>> 3] & (1ull << (i
& 7)));
437 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
<< " bit 0x"
438 << i
<< " has 0x" << has
<< " expected 0x" << val
<< std::dec
445 derr
<< __func__
<< " key 0x" << std::hex
<< first_key
446 << " not present, expected 0x" << val
<< std::dec
<< dendl
;
453 derr
<< __func__
<< " saw " << errors
<< " errors" << dendl
;
454 assert(0 == "bitmap freelist errors");
458 void BitmapFreelistManager::allocate(
459 uint64_t offset
, uint64_t length
,
460 KeyValueDB::Transaction txn
)
462 dout(10) << __func__
<< " 0x" << std::hex
<< offset
<< "~" << length
463 << std::dec
<< dendl
;
464 if (cct
->_conf
->bluestore_debug_freelist
)
465 _verify_range(offset
, length
, 0);
466 _xor(offset
, length
, txn
);
469 void BitmapFreelistManager::release(
470 uint64_t offset
, uint64_t length
,
471 KeyValueDB::Transaction txn
)
473 dout(10) << __func__
<< " 0x" << std::hex
<< offset
<< "~" << length
474 << std::dec
<< dendl
;
475 if (cct
->_conf
->bluestore_debug_freelist
)
476 _verify_range(offset
, length
, 1);
477 _xor(offset
, length
, txn
);
480 void BitmapFreelistManager::_xor(
481 uint64_t offset
, uint64_t length
,
482 KeyValueDB::Transaction txn
)
484 // must be block aligned
485 assert((offset
& block_mask
) == offset
);
486 assert((length
& block_mask
) == length
);
488 uint64_t first_key
= offset
& key_mask
;
489 uint64_t last_key
= (offset
+ length
- 1) & key_mask
;
490 dout(20) << __func__
<< " first_key 0x" << std::hex
<< first_key
491 << " last_key 0x" << last_key
<< std::dec
<< dendl
;
493 if (first_key
== last_key
) {
494 bufferptr
p(blocks_per_key
>> 3);
496 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
497 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
498 for (unsigned i
= s
; i
<= e
; ++i
) {
499 p
[i
>> 3] ^= 1ull << (i
& 7);
502 make_offset_key(first_key
, &k
);
505 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
<< ": ";
506 bl
.hexdump(*_dout
, false);
508 txn
->merge(bitmap_prefix
, k
, bl
);
512 bufferptr
p(blocks_per_key
>> 3);
514 unsigned s
= (offset
& ~key_mask
) / bytes_per_block
;
515 unsigned e
= blocks_per_key
;
516 for (unsigned i
= s
; i
< e
; ++i
) {
517 p
[i
>> 3] ^= 1ull << (i
& 7);
520 make_offset_key(first_key
, &k
);
523 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
<< ": ";
524 bl
.hexdump(*_dout
, false);
526 txn
->merge(bitmap_prefix
, k
, bl
);
527 first_key
+= bytes_per_key
;
530 while (first_key
< last_key
) {
532 make_offset_key(first_key
, &k
);
533 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
535 all_set_bl
.hexdump(*_dout
, false);
537 txn
->merge(bitmap_prefix
, k
, all_set_bl
);
538 first_key
+= bytes_per_key
;
540 assert(first_key
== last_key
);
542 bufferptr
p(blocks_per_key
>> 3);
544 unsigned e
= ((offset
+ length
- 1) & ~key_mask
) / bytes_per_block
;
545 for (unsigned i
= 0; i
<= e
; ++i
) {
546 p
[i
>> 3] ^= 1ull << (i
& 7);
549 make_offset_key(first_key
, &k
);
552 dout(30) << __func__
<< " 0x" << std::hex
<< first_key
<< std::dec
<< ": ";
553 bl
.hexdump(*_dout
, false);
555 txn
->merge(bitmap_prefix
, k
, bl
);