]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/BitmapFreelistManager.cc
a3b3a5ec8a5905e4170337c9b81856e64c248fb9
[ceph.git] / ceph / src / os / bluestore / BitmapFreelistManager.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "BitmapFreelistManager.h"
5 #include "kv/KeyValueDB.h"
6 #include "os/kv.h"
7
8 #include "common/debug.h"
9
10 #define dout_context cct
11 #define dout_subsys ceph_subsys_bluestore
12 #undef dout_prefix
13 #define dout_prefix *_dout << "freelist "
14
15 void make_offset_key(uint64_t offset, std::string *key)
16 {
17 key->reserve(10);
18 _key_encode_u64(offset, key);
19 }
20
21 struct XorMergeOperator : public KeyValueDB::MergeOperator {
22 void merge_nonexistent(
23 const char *rdata, size_t rlen, std::string *new_value) override {
24 *new_value = std::string(rdata, rlen);
25 }
26 void merge(
27 const char *ldata, size_t llen,
28 const char *rdata, size_t rlen,
29 std::string *new_value) override {
30 assert(llen == rlen);
31 *new_value = std::string(ldata, llen);
32 for (size_t i = 0; i < rlen; ++i) {
33 (*new_value)[i] ^= rdata[i];
34 }
35 }
36 // We use each operator name and each prefix to construct the
37 // overall RocksDB operator name for consistency check at open time.
38 string name() const override {
39 return "bitwise_xor";
40 }
41 };
42
43 void BitmapFreelistManager::setup_merge_operator(KeyValueDB *db, string prefix)
44 {
45 ceph::shared_ptr<XorMergeOperator> merge_op(new XorMergeOperator);
46 db->set_merge_operator(prefix, merge_op);
47 }
48
49 BitmapFreelistManager::BitmapFreelistManager(CephContext* cct,
50 KeyValueDB *db,
51 string meta_prefix,
52 string bitmap_prefix)
53 : FreelistManager(cct),
54 meta_prefix(meta_prefix),
55 bitmap_prefix(bitmap_prefix),
56 kvdb(db),
57 enumerate_bl_pos(0)
58 {
59 }
60
61 int BitmapFreelistManager::create(uint64_t new_size, KeyValueDB::Transaction txn)
62 {
63 bytes_per_block = cct->_conf->bdev_block_size;
64 assert(ISP2(bytes_per_block));
65 size = P2ALIGN(new_size, bytes_per_block);
66 blocks_per_key = cct->_conf->bluestore_freelist_blocks_per_key;
67
68 _init_misc();
69
70 blocks = size / bytes_per_block;
71 if (blocks / blocks_per_key * blocks_per_key != blocks) {
72 blocks = (blocks / blocks_per_key + 1) * blocks_per_key;
73 dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size
74 << " to 0x" << (blocks * bytes_per_block)
75 << " (0x" << blocks << " blocks)" << std::dec << dendl;
76 // set past-eof blocks as allocated
77 _xor(size, blocks * bytes_per_block - size, txn);
78 }
79 dout(10) << __func__
80 << " size 0x" << std::hex << size
81 << " bytes_per_block 0x" << bytes_per_block
82 << " blocks 0x" << blocks
83 << " blocks_per_key 0x" << blocks_per_key
84 << std::dec << dendl;
85 {
86 bufferlist bl;
87 ::encode(bytes_per_block, bl);
88 txn->set(meta_prefix, "bytes_per_block", bl);
89 }
90 {
91 bufferlist bl;
92 ::encode(blocks_per_key, bl);
93 txn->set(meta_prefix, "blocks_per_key", bl);
94 }
95 {
96 bufferlist bl;
97 ::encode(blocks, bl);
98 txn->set(meta_prefix, "blocks", bl);
99 }
100 {
101 bufferlist bl;
102 ::encode(size, bl);
103 txn->set(meta_prefix, "size", bl);
104 }
105 return 0;
106 }
107
108 int BitmapFreelistManager::init()
109 {
110 dout(1) << __func__ << dendl;
111
112 KeyValueDB::Iterator it = kvdb->get_iterator(meta_prefix);
113 it->lower_bound(string());
114
115 // load meta
116 while (it->valid()) {
117 string k = it->key();
118 if (k == "bytes_per_block") {
119 bufferlist bl = it->value();
120 bufferlist::iterator p = bl.begin();
121 ::decode(bytes_per_block, p);
122 dout(10) << __func__ << " bytes_per_block 0x" << std::hex
123 << bytes_per_block << std::dec << dendl;
124 } else if (k == "blocks") {
125 bufferlist bl = it->value();
126 bufferlist::iterator p = bl.begin();
127 ::decode(blocks, p);
128 dout(10) << __func__ << " blocks 0x" << std::hex << blocks << std::dec
129 << dendl;
130 } else if (k == "size") {
131 bufferlist bl = it->value();
132 bufferlist::iterator p = bl.begin();
133 ::decode(size, p);
134 dout(10) << __func__ << " size 0x" << std::hex << size << std::dec
135 << dendl;
136 } else if (k == "blocks_per_key") {
137 bufferlist bl = it->value();
138 bufferlist::iterator p = bl.begin();
139 ::decode(blocks_per_key, p);
140 dout(10) << __func__ << " blocks_per_key 0x" << std::hex << blocks_per_key
141 << std::dec << dendl;
142 } else {
143 derr << __func__ << " unrecognized meta " << k << dendl;
144 return -EIO;
145 }
146 it->next();
147 }
148
149 dout(10) << __func__ << std::hex
150 << " size 0x" << size
151 << " bytes_per_block 0x" << bytes_per_block
152 << " blocks 0x" << blocks
153 << " blocks_per_key 0x" << blocks_per_key
154 << std::dec << dendl;
155 _init_misc();
156 return 0;
157 }
158
159 void BitmapFreelistManager::_init_misc()
160 {
161 bufferptr z(blocks_per_key >> 3);
162 memset(z.c_str(), 0xff, z.length());
163 all_set_bl.clear();
164 all_set_bl.append(z);
165
166 block_mask = ~(bytes_per_block - 1);
167
168 bytes_per_key = bytes_per_block * blocks_per_key;
169 key_mask = ~(bytes_per_key - 1);
170 dout(10) << __func__ << std::hex << " bytes_per_key 0x" << bytes_per_key
171 << ", key_mask 0x" << key_mask << std::dec
172 << dendl;
173 }
174
175 void BitmapFreelistManager::shutdown()
176 {
177 dout(1) << __func__ << dendl;
178 }
179
180 void BitmapFreelistManager::enumerate_reset()
181 {
182 std::lock_guard<std::mutex> l(lock);
183 enumerate_offset = 0;
184 enumerate_bl_pos = 0;
185 enumerate_bl.clear();
186 }
187
188 int get_next_clear_bit(bufferlist& bl, int start)
189 {
190 const char *p = bl.c_str();
191 int bits = bl.length() << 3;
192 while (start < bits) {
193 int which_byte = start / 8;
194 int which_bit = start % 8;
195 unsigned char byte_mask = 1 << which_bit;
196 if ((p[which_byte] & byte_mask) == 0) {
197 return start;
198 }
199 ++start;
200 }
201 return -1; // not found
202 }
203
204 int get_next_set_bit(bufferlist& bl, int start)
205 {
206 const char *p = bl.c_str();
207 int bits = bl.length() << 3;
208 while (start < bits) {
209 int which_byte = start / 8;
210 int which_bit = start % 8;
211 unsigned char byte_mask = 1 << which_bit;
212 if (p[which_byte] & byte_mask) {
213 return start;
214 }
215 ++start;
216 }
217 return -1; // not found
218 }
219
220 bool BitmapFreelistManager::enumerate_next(uint64_t *offset, uint64_t *length)
221 {
222 std::lock_guard<std::mutex> l(lock);
223
224 // initial base case is a bit awkward
225 if (enumerate_offset == 0 && enumerate_bl_pos == 0) {
226 dout(10) << __func__ << " start" << dendl;
227 enumerate_p = kvdb->get_iterator(bitmap_prefix);
228 enumerate_p->lower_bound(string());
229 // we assert that the first block is always allocated; it's true,
230 // and it simplifies our lives a bit.
231 assert(enumerate_p->valid());
232 string k = enumerate_p->key();
233 const char *p = k.c_str();
234 _key_decode_u64(p, &enumerate_offset);
235 enumerate_bl = enumerate_p->value();
236 assert(enumerate_offset == 0);
237 assert(get_next_set_bit(enumerate_bl, 0) == 0);
238 }
239
240 if (enumerate_offset >= size) {
241 dout(10) << __func__ << " end" << dendl;
242 return false;
243 }
244
245 // skip set bits to find offset
246 while (true) {
247 enumerate_bl_pos = get_next_clear_bit(enumerate_bl, enumerate_bl_pos);
248 if (enumerate_bl_pos >= 0) {
249 *offset = _get_offset(enumerate_offset, enumerate_bl_pos);
250 dout(30) << __func__ << " found clear bit, key 0x" << std::hex
251 << enumerate_offset << " bit 0x" << enumerate_bl_pos
252 << " offset 0x" << *offset
253 << std::dec << dendl;
254 break;
255 }
256 dout(30) << " no more clear bits in 0x" << std::hex << enumerate_offset
257 << std::dec << dendl;
258 enumerate_p->next();
259 enumerate_bl.clear();
260 if (!enumerate_p->valid()) {
261 enumerate_offset += bytes_per_key;
262 enumerate_bl_pos = 0;
263 *offset = _get_offset(enumerate_offset, enumerate_bl_pos);
264 break;
265 }
266 string k = enumerate_p->key();
267 const char *p = k.c_str();
268 uint64_t next = enumerate_offset + bytes_per_key;
269 _key_decode_u64(p, &enumerate_offset);
270 enumerate_bl = enumerate_p->value();
271 enumerate_bl_pos = 0;
272 if (enumerate_offset > next) {
273 dout(30) << " no key at 0x" << std::hex << next << ", got 0x"
274 << enumerate_offset << std::dec << dendl;
275 *offset = next;
276 break;
277 }
278 }
279
280 // skip clear bits to find the end
281 uint64_t end = 0;
282 if (enumerate_p->valid()) {
283 while (true) {
284 enumerate_bl_pos = get_next_set_bit(enumerate_bl, enumerate_bl_pos);
285 if (enumerate_bl_pos >= 0) {
286 end = _get_offset(enumerate_offset, enumerate_bl_pos);
287 dout(30) << __func__ << " found set bit, key 0x" << std::hex
288 << enumerate_offset << " bit 0x" << enumerate_bl_pos
289 << " offset 0x" << end << std::dec
290 << dendl;
291 *length = end - *offset;
292 assert((*offset + *length) <= size);
293 dout(10) << __func__ << std::hex << " 0x" << *offset << "~" << *length
294 << std::dec << dendl;
295 return true;
296 }
297 dout(30) << " no more set bits in 0x" << std::hex << enumerate_offset
298 << std::dec << dendl;
299 enumerate_p->next();
300 enumerate_bl.clear();
301 enumerate_bl_pos = 0;
302 if (!enumerate_p->valid()) {
303 break;
304 }
305 string k = enumerate_p->key();
306 const char *p = k.c_str();
307 _key_decode_u64(p, &enumerate_offset);
308 enumerate_bl = enumerate_p->value();
309 }
310 }
311
312 end = size;
313 if (enumerate_offset < end) {
314 *length = end - *offset;
315 dout(10) << __func__ << std::hex << " 0x" << *offset << "~" << *length
316 << std::dec << dendl;
317 enumerate_offset = end;
318 enumerate_bl_pos = blocks_per_key;
319 assert((*offset + *length) <= size);
320 return true;
321 }
322
323 dout(10) << __func__ << " end" << dendl;
324 return false;
325 }
326
327 void BitmapFreelistManager::dump()
328 {
329 enumerate_reset();
330 uint64_t offset, length;
331 while (enumerate_next(&offset, &length)) {
332 dout(20) << __func__ << " 0x" << std::hex << offset << "~" << length
333 << std::dec << dendl;
334 }
335 }
336
337 void BitmapFreelistManager::_verify_range(uint64_t offset, uint64_t length,
338 int val)
339 {
340 unsigned errors = 0;
341 uint64_t first_key = offset & key_mask;
342 uint64_t last_key = (offset + length - 1) & key_mask;
343 if (first_key == last_key) {
344 string k;
345 make_offset_key(first_key, &k);
346 bufferlist bl;
347 kvdb->get(bitmap_prefix, k, &bl);
348 if (bl.length() > 0) {
349 const char *p = bl.c_str();
350 unsigned s = (offset & ~key_mask) / bytes_per_block;
351 unsigned e = ((offset + length - 1) & ~key_mask) / bytes_per_block;
352 for (unsigned i = s; i <= e; ++i) {
353 int has = !!(p[i >> 3] & (1ull << (i & 7)));
354 if (has != val) {
355 derr << __func__ << " key 0x" << std::hex << first_key << " bit 0x"
356 << i << " has 0x" << has << " expected 0x" << val
357 << std::dec << dendl;
358 ++errors;
359 }
360 }
361 } else {
362 if (val) {
363 derr << __func__ << " key 0x" << std::hex << first_key
364 << " not present, expected 0x" << val << std::dec << dendl;
365 ++errors;
366 }
367 }
368 } else {
369 // first key
370 {
371 string k;
372 make_offset_key(first_key, &k);
373 bufferlist bl;
374 kvdb->get(bitmap_prefix, k, &bl);
375 if (bl.length()) {
376 const char *p = bl.c_str();
377 unsigned s = (offset & ~key_mask) / bytes_per_block;
378 unsigned e = blocks_per_key;
379 for (unsigned i = s; i < e; ++i) {
380 int has = !!(p[i >> 3] & (1ull << (i & 7)));
381 if (has != val) {
382 derr << __func__ << " key 0x" << std::hex << first_key << " bit 0x"
383 << i << " has 0x" << has << " expected 0x" << val << std::dec
384 << dendl;
385 ++errors;
386 }
387 }
388 } else {
389 if (val) {
390 derr << __func__ << " key 0x" << std::hex << first_key
391 << " not present, expected 0x" << val << std::dec << dendl;
392 ++errors;
393 }
394 }
395 first_key += bytes_per_key;
396 }
397 // middle keys
398 if (first_key < last_key) {
399 while (first_key < last_key) {
400 string k;
401 make_offset_key(first_key, &k);
402 bufferlist bl;
403 kvdb->get(bitmap_prefix, k, &bl);
404 if (bl.length() > 0) {
405 const char *p = bl.c_str();
406 for (unsigned i = 0; i < blocks_per_key; ++i) {
407 int has = !!(p[i >> 3] & (1ull << (i & 7)));
408 if (has != val) {
409 derr << __func__ << " key 0x" << std::hex << first_key << " bit 0x"
410 << i << " has 0x" << has << " expected 0x" << val
411 << std::dec << dendl;
412 ++errors;
413 }
414 }
415 } else {
416 if (val) {
417 derr << __func__ << " key 0x" << std::hex << first_key
418 << " not present, expected 0x" << val << std::dec << dendl;
419 ++errors;
420 }
421 }
422 first_key += bytes_per_key;
423 }
424 }
425 assert(first_key == last_key);
426 {
427 string k;
428 make_offset_key(first_key, &k);
429 bufferlist bl;
430 kvdb->get(bitmap_prefix, k, &bl);
431 if (bl.length() > 0) {
432 const char *p = bl.c_str();
433 unsigned e = ((offset + length - 1) & ~key_mask) / bytes_per_block;
434 for (unsigned i = 0; i < e; ++i) {
435 int has = !!(p[i >> 3] & (1ull << (i & 7)));
436 if (has != val) {
437 derr << __func__ << " key 0x" << std::hex << first_key << " bit 0x"
438 << i << " has 0x" << has << " expected 0x" << val << std::dec
439 << dendl;
440 ++errors;
441 }
442 }
443 } else {
444 if (val) {
445 derr << __func__ << " key 0x" << std::hex << first_key
446 << " not present, expected 0x" << val << std::dec << dendl;
447 ++errors;
448 }
449 }
450 }
451 }
452 if (errors) {
453 derr << __func__ << " saw " << errors << " errors" << dendl;
454 assert(0 == "bitmap freelist errors");
455 }
456 }
457
458 void BitmapFreelistManager::allocate(
459 uint64_t offset, uint64_t length,
460 KeyValueDB::Transaction txn)
461 {
462 dout(10) << __func__ << " 0x" << std::hex << offset << "~" << length
463 << std::dec << dendl;
464 if (cct->_conf->bluestore_debug_freelist)
465 _verify_range(offset, length, 0);
466 _xor(offset, length, txn);
467 }
468
469 void BitmapFreelistManager::release(
470 uint64_t offset, uint64_t length,
471 KeyValueDB::Transaction txn)
472 {
473 dout(10) << __func__ << " 0x" << std::hex << offset << "~" << length
474 << std::dec << dendl;
475 if (cct->_conf->bluestore_debug_freelist)
476 _verify_range(offset, length, 1);
477 _xor(offset, length, txn);
478 }
479
480 void BitmapFreelistManager::_xor(
481 uint64_t offset, uint64_t length,
482 KeyValueDB::Transaction txn)
483 {
484 // must be block aligned
485 assert((offset & block_mask) == offset);
486 assert((length & block_mask) == length);
487
488 uint64_t first_key = offset & key_mask;
489 uint64_t last_key = (offset + length - 1) & key_mask;
490 dout(20) << __func__ << " first_key 0x" << std::hex << first_key
491 << " last_key 0x" << last_key << std::dec << dendl;
492
493 if (first_key == last_key) {
494 bufferptr p(blocks_per_key >> 3);
495 p.zero();
496 unsigned s = (offset & ~key_mask) / bytes_per_block;
497 unsigned e = ((offset + length - 1) & ~key_mask) / bytes_per_block;
498 for (unsigned i = s; i <= e; ++i) {
499 p[i >> 3] ^= 1ull << (i & 7);
500 }
501 string k;
502 make_offset_key(first_key, &k);
503 bufferlist bl;
504 bl.append(p);
505 dout(30) << __func__ << " 0x" << std::hex << first_key << std::dec << ": ";
506 bl.hexdump(*_dout, false);
507 *_dout << dendl;
508 txn->merge(bitmap_prefix, k, bl);
509 } else {
510 // first key
511 {
512 bufferptr p(blocks_per_key >> 3);
513 p.zero();
514 unsigned s = (offset & ~key_mask) / bytes_per_block;
515 unsigned e = blocks_per_key;
516 for (unsigned i = s; i < e; ++i) {
517 p[i >> 3] ^= 1ull << (i & 7);
518 }
519 string k;
520 make_offset_key(first_key, &k);
521 bufferlist bl;
522 bl.append(p);
523 dout(30) << __func__ << " 0x" << std::hex << first_key << std::dec << ": ";
524 bl.hexdump(*_dout, false);
525 *_dout << dendl;
526 txn->merge(bitmap_prefix, k, bl);
527 first_key += bytes_per_key;
528 }
529 // middle keys
530 while (first_key < last_key) {
531 string k;
532 make_offset_key(first_key, &k);
533 dout(30) << __func__ << " 0x" << std::hex << first_key << std::dec
534 << ": ";
535 all_set_bl.hexdump(*_dout, false);
536 *_dout << dendl;
537 txn->merge(bitmap_prefix, k, all_set_bl);
538 first_key += bytes_per_key;
539 }
540 assert(first_key == last_key);
541 {
542 bufferptr p(blocks_per_key >> 3);
543 p.zero();
544 unsigned e = ((offset + length - 1) & ~key_mask) / bytes_per_block;
545 for (unsigned i = 0; i <= e; ++i) {
546 p[i >> 3] ^= 1ull << (i & 7);
547 }
548 string k;
549 make_offset_key(first_key, &k);
550 bufferlist bl;
551 bl.append(p);
552 dout(30) << __func__ << " 0x" << std::hex << first_key << std::dec << ": ";
553 bl.hexdump(*_dout, false);
554 *_dout << dendl;
555 txn->merge(bitmap_prefix, k, bl);
556 }
557 }
558 }