X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fsrc%2Fos%2Fbluestore%2FBitmapFreelistManager.cc;h=adf85e98a264def8a46bd0999943a526168ce2d9;hb=1911f103e16ae0d04db10fb41db8217ef4c320d3;hp=7dcef4384adb1058f8832581b8e08eb69fe4ec57;hpb=78f773100ed5d2ebc9d99e65a3d7e3a6f541a97e;p=ceph.git diff --git a/ceph/src/os/bluestore/BitmapFreelistManager.cc b/ceph/src/os/bluestore/BitmapFreelistManager.cc index 7dcef4384..adf85e98a 100644 --- a/ceph/src/os/bluestore/BitmapFreelistManager.cc +++ b/ceph/src/os/bluestore/BitmapFreelistManager.cc @@ -4,6 +4,7 @@ #include "BitmapFreelistManager.h" #include "kv/KeyValueDB.h" #include "os/kv.h" +#include "include/stringify.h" #include "common/debug.h" @@ -66,9 +67,8 @@ int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity, _init_misc(); - blocks = size / bytes_per_block; - if (blocks / blocks_per_key * blocks_per_key != blocks) { - blocks = (blocks / blocks_per_key + 1) * blocks_per_key; + blocks = size_2_block_count(size); + if (blocks * bytes_per_block > size) { dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size << " to 0x" << (blocks * bytes_per_block) << " (0x" << blocks << " blocks)" << std::dec << dendl; @@ -104,28 +104,29 @@ int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity, return 0; } -int BitmapFreelistManager::expand(uint64_t new_size, KeyValueDB::Transaction txn) +int BitmapFreelistManager::_expand(uint64_t old_size, KeyValueDB* db) { - assert(new_size > size); + assert(old_size < size); ceph_assert(isp2(bytes_per_block)); - uint64_t blocks0 = size / bytes_per_block; - if (blocks0 / blocks_per_key * blocks_per_key != blocks0) { - blocks0 = (blocks / blocks_per_key + 1) * blocks_per_key; - dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size - << " to 0x" << (blocks0 * bytes_per_block) + KeyValueDB::Transaction txn; + txn = db->get_transaction(); + + auto blocks0 = size_2_block_count(old_size); + if (blocks0 * bytes_per_block > old_size) { + dout(10) << __func__ << " rounding1 blocks up from 0x" << std::hex + << old_size << " to 0x" << (blocks0 * bytes_per_block) << " (0x" << blocks0 << " blocks)" << std::dec << dendl; // reset past-eof blocks to unallocated - _xor(size, blocks0 * bytes_per_block - size, txn); + _xor(old_size, blocks0 * bytes_per_block - old_size, txn); } - size = p2align(new_size, bytes_per_block); - blocks = size / bytes_per_block; + size = p2align(size, bytes_per_block); + blocks = size_2_block_count(size); - if (blocks / blocks_per_key * blocks_per_key != blocks) { - blocks = (blocks / blocks_per_key + 1) * blocks_per_key; - dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size - << " to 0x" << (blocks * bytes_per_block) + if (blocks * bytes_per_block > size) { + dout(10) << __func__ << " rounding2 blocks up from 0x" << std::hex + << size << " to 0x" << (blocks * bytes_per_block) << " (0x" << blocks << " blocks)" << std::dec << dendl; // set past-eof blocks as allocated _xor(size, blocks * bytes_per_block - size, txn); @@ -147,13 +148,29 @@ int BitmapFreelistManager::expand(uint64_t new_size, KeyValueDB::Transaction txn encode(size, bl); txn->set(meta_prefix, "size", bl); } + db->submit_transaction_sync(txn); + return 0; } -int BitmapFreelistManager::init(KeyValueDB *kvdb) +int BitmapFreelistManager::read_size_meta_from_db(KeyValueDB* kvdb, + uint64_t* res) { - dout(1) << __func__ << dendl; + bufferlist v; + int r = kvdb->get(meta_prefix, "size", &v); + if (r < 0) { + derr << __func__ << " missing size meta in DB" << dendl; + return ENOENT; + } else { + auto p = v.cbegin(); + decode(*res, p); + r = 0; + } + return r; +} +void BitmapFreelistManager::_load_from_db(KeyValueDB* kvdb) +{ KeyValueDB::Iterator it = kvdb->get_iterator(meta_prefix); it->lower_bound(string()); @@ -165,31 +182,44 @@ int BitmapFreelistManager::init(KeyValueDB *kvdb) auto p = bl.cbegin(); decode(bytes_per_block, p); dout(10) << __func__ << " bytes_per_block 0x" << std::hex - << bytes_per_block << std::dec << dendl; + << bytes_per_block << std::dec << dendl; } else if (k == "blocks") { bufferlist bl = it->value(); auto p = bl.cbegin(); decode(blocks, p); dout(10) << __func__ << " blocks 0x" << std::hex << blocks << std::dec - << dendl; + << dendl; } else if (k == "size") { bufferlist bl = it->value(); auto p = bl.cbegin(); decode(size, p); dout(10) << __func__ << " size 0x" << std::hex << size << std::dec - << dendl; + << dendl; } else if (k == "blocks_per_key") { bufferlist bl = it->value(); auto p = bl.cbegin(); decode(blocks_per_key, p); dout(10) << __func__ << " blocks_per_key 0x" << std::hex << blocks_per_key - << std::dec << dendl; + << std::dec << dendl; } else { derr << __func__ << " unrecognized meta " << k << dendl; - return -EIO; } it->next(); } +} + + +int BitmapFreelistManager::init(const bluestore_bdev_label_t& label, + KeyValueDB *kvdb, + bool db_in_read_only) +{ + dout(1) << __func__ << dendl; + int r = _init_from_label(label); + if (r != 0) { + dout(1) << __func__ << " fall back to legacy meta repo" << dendl; + _load_from_db(kvdb); + } + _sync(kvdb, db_in_read_only); dout(10) << __func__ << std::hex << " size 0x" << size @@ -201,6 +231,73 @@ int BitmapFreelistManager::init(KeyValueDB *kvdb) return 0; } +int BitmapFreelistManager::_init_from_label(const bluestore_bdev_label_t& label) +{ + dout(1) << __func__ << dendl; + + int r = ENOENT; + string err; + + auto it = label.meta.find("bfm_size"); + auto end = label.meta.end(); + if (it != end) { + size = strict_iecstrtoll(it->second.c_str(), &err); + if (!err.empty()) { + derr << __func__ << " Failed to parse - " + << it->first << ":" << it->second + << ", error: " << err << dendl; + return r; + } + } else { + // this is expected for legacy deployed OSDs + dout(0) << __func__ << " bfm_size not found in bdev meta" << dendl; + return r; + } + + it = label.meta.find("bfm_blocks"); + if (it != end) { + blocks = strict_iecstrtoll(it->second.c_str(), &err); + if (!err.empty()) { + derr << __func__ << " Failed to parse - " + << it->first << ":" << it->second + << ", error: " << err << dendl; + return r; + } + } else { + derr << __func__ << " bfm_blocks not found in bdev meta" << dendl; + return r; + } + + it = label.meta.find("bfm_bytes_per_block"); + if (it != end) { + bytes_per_block = strict_iecstrtoll(it->second.c_str(), &err); + if (!err.empty()) { + derr << __func__ << " Failed to parse - " + << it->first << ":" << it->second + << ", error: " << err << dendl; + return r; + } + } else { + derr << __func__ << " bfm_bytes_per_block not found in bdev meta" << dendl; + return r; + } + it = label.meta.find("bfm_blocks_per_key"); + if (it != end) { + blocks_per_key = strict_iecstrtoll(it->second.c_str(), &err); + if (!err.empty()) { + derr << __func__ << " Failed to parse - " + << it->first << ":" << it->second + << ", error: " << err << dendl; + return r; + } + } else { + derr << __func__ << " bfm_blocks_per_key not found in bdev meta" << dendl; + return r; + } + r = 0; + return 0; +} + void BitmapFreelistManager::_init_misc() { bufferptr z(blocks_per_key >> 3); @@ -217,6 +314,31 @@ void BitmapFreelistManager::_init_misc() << dendl; } +void BitmapFreelistManager::sync(KeyValueDB* kvdb) +{ + _sync(kvdb, true); +} + +void BitmapFreelistManager::_sync(KeyValueDB* kvdb, bool read_only) +{ + dout(10) << __func__ << " checks if size sync is needed" << dendl; + uint64_t size_db = 0; + int r = read_size_meta_from_db(kvdb, &size_db); + ceph_assert(r >= 0); + if (!read_only && size_db < size) { + dout(1) << __func__ << " committing new size 0x" << std::hex << size + << std::dec << dendl; + r = _expand(size_db, kvdb); + ceph_assert(r == 0); + } else if (size_db > size) { + // this might hapen when OSD passed the following sequence: + // upgrade -> downgrade -> expand -> upgrade + // One needs to run expand once again to syncup + dout(1) << __func__ << " fall back to legacy meta repo" << dendl; + _load_from_db(kvdb); + } +} + void BitmapFreelistManager::shutdown() { dout(1) << __func__ << dendl; @@ -598,3 +720,30 @@ void BitmapFreelistManager::_xor( } } } + +uint64_t BitmapFreelistManager::size_2_block_count(uint64_t target_size) const +{ + auto target_blocks = target_size / bytes_per_block; + if (target_blocks / blocks_per_key * blocks_per_key != target_blocks) { + target_blocks = (target_blocks / blocks_per_key + 1) * blocks_per_key; + } + return target_blocks; +} + +void BitmapFreelistManager::get_meta( + uint64_t target_size, + std::vector>* res) const +{ + if (target_size == 0) { + res->emplace_back("bfm_blocks", stringify(blocks)); + res->emplace_back("bfm_size", stringify(size)); + } else { + target_size = p2align(target_size, bytes_per_block); + auto target_blocks = size_2_block_count(target_size); + + res->emplace_back("bfm_blocks", stringify(target_blocks)); + res->emplace_back("bfm_size", stringify(target_size)); + } + res->emplace_back("bfm_bytes_per_block", stringify(bytes_per_block)); + res->emplace_back("bfm_blocks_per_key", stringify(blocks_per_key)); +}