<< " removing self from set " << get_parent()
<< dendl;
if (get_parent()) {
- if (get_parent()->try_remove(this)) {
- delete this;
- } else {
- ldout(coll->store->cct, 20)
- << __func__ << " " << this << " lost race to remove myself from set"
- << dendl;
- }
- } else {
- delete this;
+ get_parent()->remove(this);
}
+ delete this;
}
}
string bfn;
struct stat st;
- if (read_meta("path_block.db", &bfn) < 0) {
- bfn = path + "/block.db";
- }
+ bfn = path + "/block.db";
if (::stat(bfn.c_str(), &st) == 0) {
r = bluefs->add_block_device(BlueFS::BDEV_DB, bfn);
if (r < 0) {
}
bluefs_shared_bdev = BlueFS::BDEV_SLOW;
bluefs_single_shared_device = false;
- } else if (::lstat(bfn.c_str(), &st) == -1) {
- bluefs_shared_bdev = BlueFS::BDEV_DB;
} else {
- //symlink exist is bug
- derr << __func__ << " " << bfn << " link target doesn't exist" << dendl;
r = -errno;
- goto free_bluefs;
+ if (::lstat(bfn.c_str(), &st) == -1) {
+ r = 0;
+ bluefs_shared_bdev = BlueFS::BDEV_DB;
+ } else {
+ derr << __func__ << " " << bfn << " symlink exists but target unusable: "
+ << cpp_strerror(r) << dendl;
+ goto free_bluefs;
+ }
}
// shared device
- if (read_meta("path_block", &bfn) < 0) {
- bfn = path + "/block";
- }
+ bfn = path + "/block";
r = bluefs->add_block_device(bluefs_shared_bdev, bfn);
if (r < 0) {
derr << __func__ << " add block device(" << bfn << ") returned: "
bluefs_extents.insert(start, initial);
}
- if (read_meta("path_block.wal", &bfn) < 0) {
- bfn = path + "/block.wal";
- }
+ bfn = path + "/block.wal";
if (::stat(bfn.c_str(), &st) == 0) {
r = bluefs->add_block_device(BlueFS::BDEV_WAL, bfn);
if (r < 0) {
}
cct->_conf->set_val("rocksdb_separate_wal_dir", "true");
bluefs_single_shared_device = false;
- } else if (::lstat(bfn.c_str(), &st) == -1) {
- cct->_conf->set_val("rocksdb_separate_wal_dir", "false");
} else {
- //symlink exist is bug
- derr << __func__ << " " << bfn << " link target doesn't exist" << dendl;
r = -errno;
- goto free_bluefs;
+ if (::lstat(bfn.c_str(), &st) == -1) {
+ r = 0;
+ cct->_conf->set_val("rocksdb_separate_wal_dir", "false");
+ } else {
+ derr << __func__ << " " << bfn << " symlink exists but target unusable: "
+ << cpp_strerror(r) << dendl;
+ goto free_bluefs;
+ }
}
if (create) {
int64_t alloc_len = alloc->allocate(gift, cct->_conf->bluefs_alloc_size,
0, 0, &exts);
- if (alloc_len < (int64_t)gift) {
- derr << __func__ << " allocate failed on 0x" << std::hex << gift
- << " min_alloc_size 0x" << min_alloc_size << std::dec << dendl;
+ if (alloc_len <= 0) {
+ dout(1) << __func__ << " no allocate on 0x" << std::hex << gift
+ << " min_alloc_size 0x" << min_alloc_size << std::dec << dendl;
+ alloc->unreserve(gift);
+ alloc->dump();
+ return 0;
+ } else if (alloc_len < (int64_t)gift) {
+ dout(1) << __func__ << " insufficient allocate on 0x" << std::hex << gift
+ << " min_alloc_size 0x" << min_alloc_size
+ << " allocated 0x" << alloc_len
+ << std::dec << dendl;
+ alloc->unreserve(gift - alloc_len);
alloc->dump();
- assert(0 == "allocate failed, wtf");
- return -ENOSPC;
}
for (auto& p : exts) {
bluestore_pextent_t e = bluestore_pextent_t(p);
int BlueStore::_open_collections(int *errors)
{
+ dout(10) << __func__ << dendl;
assert(coll_map.empty());
KeyValueDB::Iterator it = db->get_iterator(PREFIX_COLL);
for (it->upper_bound(string());
<< pretty_binary_string(it->key()) << dendl;
return -EIO;
}
- dout(20) << __func__ << " opened " << cid << " " << c << dendl;
+ dout(20) << __func__ << " opened " << cid << " " << c
+ << " " << c->cnode << dendl;
coll_map[cid] = c;
} else {
derr << __func__ << " unrecognized collection " << it->key() << dendl;
}
if (cct->_conf->bluestore_block_preallocate_file) {
-#ifdef HAVE_POSIX_FALLOCATE
- r = ::posix_fallocate(fd, 0, size);
- if (r) {
+ r = ::ceph_posix_fallocate(fd, 0, size);
+ if (r > 0) {
derr << __func__ << " failed to prefallocate " << name << " file to "
<< size << ": " << cpp_strerror(r) << dendl;
VOID_TEMP_FAILURE_RETRY(::close(fd));
return -r;
}
-#else
- char data[1024*128];
- for (uint64_t off = 0; off < size; off += sizeof(data)) {
- if (off + sizeof(data) > size)
- r = ::write(fd, data, size - off);
- else
- r = ::write(fd, data, sizeof(data));
- if (r < 0) {
- r = -errno;
- derr << __func__ << " failed to prefallocate w/ write " << name << " file to "
- << size << ": " << cpp_strerror(r) << dendl;
- VOID_TEMP_FAILURE_RETRY(::close(fd));
- return r;
- }
- }
-#endif
}
dout(1) << __func__ << " resized " << name << " file to "
<< pretty_si_t(size) << "B" << dendl;
if (r < 0)
goto out_close_fsid;
- {
- string wal_path = cct->_conf->get_val<string>("bluestore_block_wal_path");
- if (wal_path.size()) {
- write_meta("path_block.wal", wal_path);
- }
- string db_path = cct->_conf->get_val<string>("bluestore_block_db_path");
- if (db_path.size()) {
- write_meta("path_block.db", db_path);
- }
- }
-
// choose min_alloc_size
if (cct->_conf->bluestore_min_alloc_size) {
min_alloc_size = cct->_conf->bluestore_min_alloc_size;
mempool_thread.init();
-
mounted = true;
return 0;
mempool_thread.shutdown();
dout(20) << __func__ << " stopping kv thread" << dendl;
_kv_stop();
- _reap_collections();
_flush_cache();
dout(20) << __func__ << " closing" << dendl;
continue;
}
c->cid.is_pg(&pgid);
- dout(20) << __func__ << " collection " << c->cid << dendl;
+ dout(20) << __func__ << " collection " << c->cid << " " << c->cnode
+ << dendl;
}
if (!expecting_shards.empty()) {
buf->available = alloc->get_free();
if (bluefs) {
- // part of our shared device is "free" according to BlueFS
- // Don't include bluestore_bluefs_min because that space can't
- // be used for any other purpose.
- buf->available += bluefs->get_free(bluefs_shared_bdev) - cct->_conf->bluestore_bluefs_min;
-
- // include dedicated db, too, if that isn't the shared device.
- if (bluefs_shared_bdev != BlueFS::BDEV_DB) {
- buf->total += bluefs->get_total(BlueFS::BDEV_DB);
+ // part of our shared device is "free" according to BlueFS, but we
+ // can't touch bluestore_bluefs_min of it.
+ int64_t shared_available = std::min(
+ bluefs->get_free(bluefs_shared_bdev),
+ bluefs->get_total(bluefs_shared_bdev) - cct->_conf->bluestore_bluefs_min);
+ if (shared_available > 0) {
+ buf->available += shared_available;
}
}
void BlueStore::_queue_reap_collection(CollectionRef& c)
{
dout(10) << __func__ << " " << c << " " << c->cid << dendl;
- std::lock_guard<std::mutex> l(reap_lock);
+ // _reap_collections and this in the same thread,
+ // so no need a lock.
removed_collections.push_back(c);
}
void BlueStore::_reap_collections()
{
+
list<CollectionRef> removed_colls;
{
- std::lock_guard<std::mutex> l(reap_lock);
- removed_colls.swap(removed_collections);
+ // _queue_reap_collection and this in the same thread.
+ // So no need a lock.
+ if (!removed_collections.empty())
+ removed_colls.swap(removed_collections);
+ else
+ return;
}
- bool all_reaped = true;
-
- for (list<CollectionRef>::iterator p = removed_colls.begin();
- p != removed_colls.end();
- ++p) {
+ list<CollectionRef>::iterator p = removed_colls.begin();
+ while (p != removed_colls.end()) {
CollectionRef c = *p;
dout(10) << __func__ << " " << c << " " << c->cid << dendl;
if (c->onode_map.map_any([&](OnodeRef o) {
if (o->flushing_count.load()) {
dout(10) << __func__ << " " << c << " " << c->cid << " " << o->oid
<< " flush_txns " << o->flushing_count << dendl;
- return false;
+ return true;
}
- return true;
+ return false;
})) {
- all_reaped = false;
+ ++p;
continue;
}
c->onode_map.clear();
+ p = removed_colls.erase(p);
dout(10) << __func__ << " " << c << " " << c->cid << " done" << dendl;
}
-
- if (all_reaped) {
+ if (removed_colls.empty()) {
dout(10) << __func__ << " all reaped" << dendl;
+ } else {
+ removed_collections.splice(removed_collections.begin(), removed_colls);
}
}
}
out:
- if (r == 0 && _debug_data_eio(oid)) {
+ if (r >= 0 && _debug_data_eio(oid)) {
r = -EIO;
derr << __func__ << " " << c->cid << " " << oid << " INJECT EIO" << dendl;
} else if (cct->_conf->bluestore_debug_random_read_err &&
pos += hole;
left -= hole;
}
- BlobRef bptr = lp->blob;
+ BlobRef& bptr = lp->blob;
unsigned l_off = pos - lp->logical_offset;
unsigned b_off = l_off + lp->blob_offset;
unsigned b_len = std::min(left, lp->length - l_off);
vector<bufferlist> compressed_blob_bls;
IOContext ioc(cct, NULL, true); // allow EIO
for (auto& p : blobs2read) {
- BlobRef bptr = p.first;
+ const BlobRef& bptr = p.first;
dout(20) << __func__ << " blob " << *bptr << std::hex
<< " need " << p.second << std::dec << dendl;
if (bptr->get_blob().is_compressed()) {
auto p = compressed_blob_bls.begin();
blobs2read_t::iterator b2r_it = blobs2read.begin();
while (b2r_it != blobs2read.end()) {
- BlobRef bptr = b2r_it->first;
+ const BlobRef& bptr = b2r_it->first;
dout(20) << __func__ << " blob " << *bptr << std::hex
<< " need 0x" << b2r_it->second << std::dec << dendl;
if (bptr->get_blob().is_compressed()) {
{
// update allocator with full released set
if (!cct->_conf->bluestore_debug_no_reuse_blocks) {
- dout(10) << __func__ << " " << txc << " " << txc->released << dendl;
+ dout(10) << __func__ << " " << txc << " " << std::hex
+ << txc->released << std::dec << dendl;
for (interval_set<uint64_t>::iterator p = txc->released.begin();
p != txc->released.end();
++p) {
}
kv_sync_thread.join();
kv_finalize_thread.join();
+ assert(removed_collections.empty());
{
std::lock_guard<std::mutex> l(kv_lock);
kv_stop = false;
return r;
}
-void BlueStore::_dump_onode(OnodeRef o, int log_level)
+void BlueStore::_dump_onode(const OnodeRef& o, int log_level)
{
if (!cct->_conf->subsys.should_gather(ceph_subsys_bluestore, log_level))
return;