if (b.is_spanning()) {
out << " spanning " << b.id;
}
- out << " " << b.get_blob() << " " << b.get_blob_use_tracker()
- << " " << *b.shared_blob
- << ")";
+ out << " " << b.get_blob() << " " << b.get_blob_use_tracker();
+ if (b.shared_blob) {
+ out << " " << *b.shared_blob;
+ } else {
+ out << " (shared_blob=NULL)";
+ }
+ out << ")";
return out;
}
(double)1.0 - (double)cache_meta_ratio - (double)cache_kv_ratio;
if (cache_meta_ratio < 0 || cache_meta_ratio > 1.0) {
- derr << __func__ << "bluestore_cache_meta_ratio (" << cache_meta_ratio
+ derr << __func__ << " bluestore_cache_meta_ratio (" << cache_meta_ratio
<< ") must be in range [0,1.0]" << dendl;
return -EINVAL;
}
if (cache_kv_ratio < 0 || cache_kv_ratio > 1.0) {
- derr << __func__ << "bluestore_cache_kv_ratio (" << cache_kv_ratio
+ derr << __func__ << " bluestore_cache_kv_ratio (" << cache_kv_ratio
<< ") must be in range [0,1.0]" << dendl;
return -EINVAL;
}
if (cache_meta_ratio + cache_kv_ratio > 1.0) {
- derr << __func__ << "bluestore_cache_meta_ratio (" << cache_meta_ratio
+ derr << __func__ << " bluestore_cache_meta_ratio (" << cache_meta_ratio
<< ") + bluestore_cache_kv_ratio (" << cache_kv_ratio
<< ") = " << cache_meta_ratio + cache_kv_ratio << "; must be <= 1.0"
<< dendl;
return rotational;
}
+bool BlueStore::is_journal_rotational()
+{
+ if (!bluefs) {
+ dout(5) << __func__ << " bluefs disabled, default to store media type"
+ << dendl;
+ return is_rotational();
+ }
+ dout(10) << __func__ << " " << (int)bluefs->wal_is_rotational() << dendl;
+ return bluefs->wal_is_rotational();
+}
+
bool BlueStore::test_mount_in_use()
{
// most error conditions mean the mount is not in use (e.g., because
}
fm->enumerate_reset();
size_t count = used_blocks.count();
+ if (used_blocks.size() == count + 1) {
+ // this due to http://tracker.ceph.com/issues/21089
+ bufferlist fm_bpb_bl, fm_blocks_bl, fm_bpk_bl;
+ db->get(PREFIX_ALLOC, "bytes_per_block", &fm_bpb_bl);
+ db->get(PREFIX_ALLOC, "blocks", &fm_blocks_bl);
+ db->get(PREFIX_ALLOC, "blocks_per_key", &fm_bpk_bl);
+ uint64_t fm_blocks = 0;
+ uint64_t fm_bsize = 1;
+ uint64_t fm_blocks_per_key = 1;
+ try {
+ auto p = fm_blocks_bl.begin();
+ ::decode(fm_blocks, p);
+ auto q = fm_bpb_bl.begin();
+ ::decode(fm_bsize, q);
+ auto r = fm_bpk_bl.begin();
+ ::decode(fm_blocks_per_key, r);
+ } catch (buffer::error& e) {
+ }
+ uint64_t dev_bsize = bdev->get_block_size();
+ uint64_t bad_size = bdev->get_size() & ~fm_bsize;
+ if (used_blocks.test(bad_size / dev_bsize) == 0) {
+ // this is the last block of the device that we previously
+ // (incorrectly) truncated off of the effective device size. this
+ // prevented BitmapFreelistManager from marking it as used along with
+ // the other "past-eof" blocks in the last key slot. mark it used
+ // now.
+ derr << __func__ << " warning: fixing leaked block 0x" << std::hex
+ << bad_size << "~" << fm_bsize << std::dec << " due to old bug"
+ << dendl;
+ KeyValueDB::Transaction t = db->get_transaction();
+ // fix freelistmanager metadata (the internal 'blocks' count is
+ // rounded up to include the trailing key, past eof)
+ uint64_t new_blocks = bdev->get_size() / fm_bsize;
+ if (new_blocks / fm_blocks_per_key * fm_blocks_per_key != new_blocks) {
+ new_blocks = (new_blocks / fm_blocks_per_key + 1) *
+ fm_blocks_per_key;
+ }
+ if (new_blocks != fm_blocks) {
+ // the fm block count increased
+ derr << __func__ << " freelist block and key count changed, fixing 0x"
+ << std::hex << bdev->get_size() << "~"
+ << ((new_blocks * fm_bsize) - bdev->get_size()) << std::dec
+ << dendl;
+ bufferlist bl;
+ ::encode(new_blocks, bl);
+ t->set(PREFIX_ALLOC, "blocks", bl);
+ fm->allocate(bdev->get_size(),
+ (new_blocks * fm_bsize) - bdev->get_size(),
+ t);
+ } else {
+ // block count is the same, but size changed; fix just the size
+ derr << __func__ << " fixing just the stray block at 0x"
+ << std::hex << bad_size << "~" << fm_bsize << std::dec << dendl;
+ fm->allocate(bad_size, fm_bsize, t);
+ }
+ bufferlist sizebl;
+ ::encode(bdev->get_size(), sizebl);
+ t->set(PREFIX_ALLOC, "size", sizebl);
+ int r = db->submit_transaction_sync(t);
+ assert(r == 0);
+
+ used_blocks.set(bad_size / dev_bsize);
+ ++count;
+ }
+ }
if (used_blocks.size() != count) {
assert(used_blocks.size() > count);
- derr << __func__ << " error: leaked some space;"
- << (used_blocks.size() - count) * min_alloc_size
- << " bytes leaked" << dendl;
++errors;
+ used_blocks.flip();
+ size_t start = used_blocks.find_first();
+ while (start != decltype(used_blocks)::npos) {
+ size_t cur = start;
+ while (true) {
+ size_t next = used_blocks.find_next(cur);
+ if (next != cur + 1) {
+ derr << __func__ << " error: leaked extent 0x" << std::hex
+ << ((uint64_t)start * block_size) << "~"
+ << ((cur + 1 - start) * block_size) << std::dec
+ << dendl;
+ start = next;
+ break;
+ }
+ cur = next;
+ }
+ }
+ used_blocks.flip();
}
}
if (txc->deferred_txn) {
// ensure we do not block here because of deferred writes
if (!throttle_deferred_bytes.get_or_fail(txc->cost)) {
+ dout(10) << __func__ << " failed get throttle_deferred_bytes, aggressive"
+ << dendl;
+ ++deferred_aggressive;
deferred_try_submit();
throttle_deferred_bytes.get(txc->cost);
- }
+ --deferred_aggressive;
+ }
}
utime_t tend = ceph_clock_now();
case Transaction::OP_TRUNCATE:
{
uint64_t off = op->off;
- _truncate(txc, c, o, off);
+ r = _truncate(txc, c, o, off);
}
break;
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< dendl;
- _assign_nid(txc, o);
- int r = _do_write(txc, c, o, offset, length, bl, fadvise_flags);
- txc->write_onode(o);
-
+ int r = 0;
+ if (offset + length >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ } else {
+ _assign_nid(txc, o);
+ r = _do_write(txc, c, o, offset, length, bl, fadvise_flags);
+ txc->write_onode(o);
+ }
dout(10) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< " = " << r << dendl;
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< dendl;
- _assign_nid(txc, o);
- int r = _do_zero(txc, c, o, offset, length);
+ int r = 0;
+ if (offset + length >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ } else {
+ _assign_nid(txc, o);
+ r = _do_zero(txc, c, o, offset, length);
+ }
dout(10) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< " = " << r << dendl;
txc->write_onode(o);
}
-void BlueStore::_truncate(TransContext *txc,
+int BlueStore::_truncate(TransContext *txc,
CollectionRef& c,
OnodeRef& o,
uint64_t offset)
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << std::dec
<< dendl;
- _do_truncate(txc, c, o, offset);
+ int r = 0;
+ if (offset >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ } else {
+ _do_truncate(txc, c, o, offset);
+ }
+ dout(10) << __func__ << " " << c->cid << " " << o->oid
+ << " 0x" << std::hex << offset << std::dec
+ << " = " << r << dendl;
+ return r;
}
int BlueStore::_do_remove(
return 0;
}
- uint32_t b_start = OBJECT_MAX_SIZE;
- uint32_t b_end = 0;
for (auto& e : h->extent_map.extent_map) {
const bluestore_blob_t& b = e.blob->get_blob();
SharedBlob *sb = e.blob->shared_blob.get();
dout(20) << __func__ << " unsharing " << e << dendl;
bluestore_blob_t& blob = e.blob->dirty_blob();
blob.clear_flag(bluestore_blob_t::FLAG_SHARED);
- if (e.logical_offset < b_start) {
- b_start = e.logical_offset;
- }
- if (e.logical_end() > b_end) {
- b_end = e.logical_end();
- }
+ h->extent_map.dirty_range(e.logical_offset, 1);
}
}
-
- assert(b_end > b_start);
- h->extent_map.dirty_range(b_start, b_end - b_start);
txc->write_onode(h);
return 0;
uint64_t end = srcoff + length;
uint32_t dirty_range_begin = 0;
uint32_t dirty_range_end = 0;
+ bool src_dirty = false;
for (auto ep = oldo->extent_map.seek_lextent(srcoff);
ep != oldo->extent_map.extent_map.end();
++ep) {
// make sure it is shared
if (!blob.is_shared()) {
c->make_blob_shared(_assign_blobid(txc), e.blob);
- if (dirty_range_begin == 0) {
+ if (!src_dirty) {
+ src_dirty = true;
dirty_range_begin = e.logical_offset;
}
assert(e.logical_end() > 0);
dout(20) << __func__ << " dst " << *ne << dendl;
++n;
}
- if (dirty_range_end > dirty_range_begin) {
+ if (src_dirty) {
oldo->extent_map.dirty_range(dirty_range_begin,
dirty_range_end - dirty_range_begin);
txc->write_onode(oldo);
<< " to offset 0x" << dstoff << std::dec << dendl;
int r = 0;
+ if (srcoff + length >= OBJECT_MAX_SIZE ||
+ dstoff + length >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ goto out;
+ }
if (srcoff + length > oldo->onode.size) {
r = -EINVAL;
goto out;