f->close_section();
}
+void BlueFS::dump_block_extents(ostream& out)
+{
+ for (unsigned i = 0; i < MAX_BDEV; ++i) {
+ if (!bdev[i]) {
+ continue;
+ }
+ out << i << " : size 0x" << std::hex << bdev[i]->get_size()
+ << " : own 0x" << block_all[i] << std::dec << "\n";
+ }
+}
void BlueFS::get_usage(vector<pair<uint64_t,uint64_t>> *usage)
{
int r = _allocate(
log_file->fnode.prefer_bdev,
cct->_conf->bluefs_max_log_runway,
- &log_file->fnode.extents);
- log_file->fnode.recalc_allocated();
+ &log_file->fnode);
assert(r == 0);
log_writer = _create_writer(log_file);
}
file_map.erase(file->fnode.ino);
file->deleted = true;
- file->fnode.recalc_allocated();
+
if (file->dirty_seq) {
assert(file->dirty_seq > log_seq_stable);
assert(dirty_files.count(file->dirty_seq));
dout(20) << __func__ << " need " << need << dendl;
mempool::bluefs::vector<bluefs_extent_t> old_extents;
- old_extents.swap(log_file->fnode.extents);
- log_file->fnode.recalc_allocated();
+ uint64_t old_allocated = 0;
+ log_file->fnode.swap_extents(old_extents, old_allocated);
while (log_file->fnode.get_allocated() < need) {
int r = _allocate(log_file->fnode.prefer_bdev,
need - log_file->fnode.get_allocated(),
- &log_file->fnode.extents);
- log_file->fnode.recalc_allocated();
+ &log_file->fnode);
assert(r == 0);
}
assert(!new_log);
assert(!new_log_writer);
+ // create a new log [writer] so that we know compaction is in progress
+ // (see _should_compact_log)
+ new_log = new File;
+ new_log->fnode.ino = 0; // so that _flush_range won't try to log the fnode
+
+ // 0. wait for any racing flushes to complete. (We do not want to block
+ // in _flush_sync_log with jump_to set or else a racing thread might flush
+ // our entries and our jump_to update won't be correct.)
+ while (log_flushing) {
+ dout(10) << __func__ << " log is currently flushing, waiting" << dendl;
+ log_cond.wait(l);
+ }
+
// 1. allocate new log space and jump to it.
old_log_jump_to = log_file->fnode.get_allocated();
uint64_t need = old_log_jump_to + cct->_conf->bluefs_max_log_runway;
while (log_file->fnode.get_allocated() < need) {
int r = _allocate(log_file->fnode.prefer_bdev,
cct->_conf->bluefs_max_log_runway,
- &log_file->fnode.extents);
+ &log_file->fnode);
assert(r == 0);
- log_file->fnode.recalc_allocated();
}
dout(10) << __func__ << " log extents " << log_file->fnode.extents << dendl;
dout(10) << __func__ << " new_log_jump_to 0x" << std::hex << new_log_jump_to
<< std::dec << dendl;
- // create a new log [writer]
- new_log = new File;
- new_log->fnode.ino = 0; // so that _flush_range won't try to log the fnode
+ // allocate
int r = _allocate(BlueFS::BDEV_DB, new_log_jump_to,
- &new_log->fnode.extents);
+ &new_log->fnode);
assert(r == 0);
- new_log->fnode.recalc_allocated();
new_log_writer = _create_writer(new_log);
new_log_writer->append(bl);
if (discarded + e.length <= old_log_jump_to) {
dout(10) << __func__ << " remove old log extent " << e << dendl;
discarded += e.length;
- log_file->fnode.extents.erase(log_file->fnode.extents.begin());
+ log_file->fnode.pop_front_extent();
} else {
dout(10) << __func__ << " remove front of old log extent " << e << dendl;
uint64_t drop = old_log_jump_to - discarded;
}
old_extents.push_back(temp);
}
- new_log->fnode.extents.insert(new_log->fnode.extents.end(),
- log_file->fnode.extents.begin(),
- log_file->fnode.extents.end());
+ auto from = log_file->fnode.extents.begin();
+ auto to = log_file->fnode.extents.end();
+ while (from != to) {
+ new_log->fnode.append_extent(*from);
+ ++from;
+ }
// clear the extents from old log file, they are added to new log
- log_file->fnode.extents.clear();
-
+ log_file->fnode.clear_extents();
// swap the log files. New log file is the log file now.
- log_file->fnode.extents.swap(new_log->fnode.extents);
- log_file->fnode.recalc_allocated();
- new_log->fnode.recalc_allocated();
+ new_log->fnode.swap_extents(log_file->fnode);
+
log_writer->pos = log_writer->file->fnode.size =
log_writer->pos - old_log_jump_to + new_log_jump_to;
while (log_flushing) {
dout(10) << __func__ << " want_seq " << want_seq
<< " log is currently flushing, waiting" << dendl;
+ assert(!jump_to);
log_cond.wait(l);
}
if (want_seq && want_seq <= log_seq_stable) {
dout(10) << __func__ << " want_seq " << want_seq << " <= log_seq_stable "
<< log_seq_stable << ", done" << dendl;
+ assert(!jump_to);
return 0;
}
if (log_t.empty() && dirty_files.empty()) {
dout(10) << __func__ << " want_seq " << want_seq
<< " " << log_t << " not dirty, dirty_files empty, no-op" << dendl;
+ assert(!jump_to);
return 0;
}
}
int r = _allocate(log_writer->file->fnode.prefer_bdev,
cct->_conf->bluefs_max_log_runway,
- &log_writer->file->fnode.extents);
+ &log_writer->file->fnode);
assert(r == 0);
- log_writer->file->fnode.recalc_allocated();
log_t.op_file_update(log_writer->file->fnode);
}
assert(h->file->fnode.ino != 1);
int r = _allocate(h->file->fnode.prefer_bdev,
offset + length - allocated,
- &h->file->fnode.extents);
+ &h->file->fnode);
if (r < 0) {
derr << __func__ << " allocated: 0x" << std::hex << allocated
<< " offset: 0x" << offset << " length: 0x" << length << std::dec
<< dendl;
+ assert(0 == "bluefs enospc");
return r;
}
- h->file->fnode.recalc_allocated();
if (cct->_conf->bluefs_preextend_wal_files &&
h->writer_type == WRITER_WAL) {
// NOTE: this *requires* that rocksdb also has log recycling
}
int BlueFS::_allocate(uint8_t id, uint64_t len,
- mempool::bluefs::vector<bluefs_extent_t> *ev)
+ bluefs_fnode_t* node)
{
dout(10) << __func__ << " len 0x" << std::hex << len << std::dec
<< " from " << (int)id << dendl;
uint64_t left = ROUND_UP_TO(len, min_alloc_size);
int r = -ENOSPC;
+ int64_t alloc_len = 0;
+ AllocExtentVector extents;
+
if (alloc[id]) {
r = alloc[id]->reserve(left);
}
- if (r < 0) {
+
+ if (r == 0) {
+ uint64_t hint = 0;
+ if (!node->extents.empty() && node->extents.back().bdev == id) {
+ hint = node->extents.back().end();
+ }
+ extents.reserve(4); // 4 should be (more than) enough for most allocations
+ alloc_len = alloc[id]->allocate(left, min_alloc_size, hint, &extents);
+ }
+ if (r < 0 || (alloc_len < (int64_t)left)) {
+ if (r == 0) {
+ alloc[id]->unreserve(left - alloc_len);
+ for (auto& p : extents) {
+ alloc[id]->release(p.offset, p.length);
+ }
+ }
if (id != BDEV_SLOW) {
if (bdev[id]) {
dout(1) << __func__ << " failed to allocate 0x" << std::hex << left
<< "; fallback to bdev " << (int)id + 1
<< std::dec << dendl;
}
- return _allocate(id + 1, len, ev);
+ return _allocate(id + 1, len, node);
}
if (bdev[id])
derr << __func__ << " failed to allocate 0x" << std::hex << left
else
derr << __func__ << " failed to allocate 0x" << std::hex << left
<< " on bdev " << (int)id << ", dne" << std::dec << dendl;
- return r;
- }
-
- uint64_t hint = 0;
- if (!ev->empty()) {
- hint = ev->back().end();
- }
-
- AllocExtentVector extents;
- extents.reserve(4); // 4 should be (more than) enough for most allocations
- int64_t alloc_len = alloc[id]->allocate(left, min_alloc_size, hint,
- &extents);
- if (alloc_len < (int64_t)left) {
- derr << __func__ << " allocate failed on 0x" << std::hex << left
- << " min_alloc_size 0x" << min_alloc_size
- << " hint 0x" << hint << std::dec << dendl;
- alloc[id]->dump();
- assert(0 == "allocate failed... wtf");
+ if (alloc[id])
+ alloc[id]->dump();
return -ENOSPC;
}
for (auto& p : extents) {
- bluefs_extent_t e = bluefs_extent_t(id, p.offset, p.length);
- if (!ev->empty() &&
- ev->back().bdev == e.bdev &&
- ev->back().end() == (uint64_t) e.offset) {
- ev->back().length += e.length;
- } else {
- ev->push_back(e);
- }
+ node->append_extent(bluefs_extent_t(id, p.offset, p.length));
}
return 0;
uint64_t allocated = f->fnode.get_allocated();
if (off + len > allocated) {
uint64_t want = off + len - allocated;
- int r = _allocate(f->fnode.prefer_bdev, want, &f->fnode.extents);
+ int r = _allocate(f->fnode.prefer_bdev, want, &f->fnode);
if (r < 0)
return r;
- f->fnode.recalc_allocated();
log_t.op_file_update(f->fnode);
}
return 0;
for (auto& p : file->fnode.extents) {
pending_release[p.bdev].insert(p.offset, p.length);
}
- file->fnode.extents.clear();
- file->fnode.recalc_allocated();
+
+ file->fnode.clear_extents();
}
}
assert(file->fnode.ino > 1);
bool BlueFS::wal_is_rotational()
{
- if (!bdev[BDEV_WAL] || bdev[BDEV_WAL]->is_rotational())
- return true;
- return false;
+ if (bdev[BDEV_WAL]) {
+ return bdev[BDEV_WAL]->is_rotational();
+ } else if (bdev[BDEV_DB]) {
+ return bdev[BDEV_DB]->is_rotational();
+ }
+ return bdev[BDEV_SLOW]->is_rotational();
}