return _block_picker(t, cursor, size, align);
}
-namespace {
- struct dispose_rs {
- void operator()(range_seg_t* p)
- {
- delete p;
- }
- };
-}
-
void AvlAllocator::_add_to_tree(uint64_t start, uint64_t size)
{
- assert(size != 0);
+ ceph_assert(size != 0);
uint64_t end = start + size;
bool merge_after = (rs_after != range_tree.end() && rs_after->start == end);
if (merge_before && merge_after) {
- range_size_tree.erase(*rs_before);
- range_size_tree.erase(*rs_after);
+ _range_size_tree_rm(*rs_before);
+ _range_size_tree_rm(*rs_after);
rs_after->start = rs_before->start;
range_tree.erase_and_dispose(rs_before, dispose_rs{});
- range_size_tree.insert(*rs_after);
+ _range_size_tree_try_insert(*rs_after);
} else if (merge_before) {
- range_size_tree.erase(*rs_before);
+ _range_size_tree_rm(*rs_before);
rs_before->end = end;
- range_size_tree.insert(*rs_before);
+ _range_size_tree_try_insert(*rs_before);
} else if (merge_after) {
- range_size_tree.erase(*rs_after);
+ _range_size_tree_rm(*rs_after);
rs_after->start = start;
- range_size_tree.insert(*rs_after);
+ _range_size_tree_try_insert(*rs_after);
} else {
- auto new_rs = new range_seg_t{start, end};
- range_tree.insert_before(rs_after, *new_rs);
- range_size_tree.insert(*new_rs);
+ _try_insert_range(start, end, &rs_after);
}
- num_free += size;
}
-void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size)
+void AvlAllocator::_process_range_removal(uint64_t start, uint64_t end,
+ AvlAllocator::range_tree_t::iterator& rs)
{
- uint64_t end = start + size;
-
- assert(size != 0);
- assert(size <= num_free);
-
- auto rs = range_tree.find(range_t{start, end}, range_tree.key_comp());
- /* Make sure we completely overlap with someone */
- assert(rs != range_tree.end());
- assert(rs->start <= start);
- assert(rs->end >= end);
-
bool left_over = (rs->start != start);
bool right_over = (rs->end != end);
- range_size_tree.erase(*rs);
+ _range_size_tree_rm(*rs);
if (left_over && right_over) {
- auto new_seg = new range_seg_t{end, rs->end};
+ auto old_right_end = rs->end;
+ auto insert_pos = rs;
+ ceph_assert(insert_pos != range_tree.end());
+ ++insert_pos;
rs->end = start;
- range_tree.insert(rs, *new_seg);
- range_size_tree.insert(*new_seg);
- range_size_tree.insert(*rs);
+
+ // Insert tail first to be sure insert_pos hasn't been disposed.
+ // This woulnd't dispose rs though since it's out of range_size_tree.
+ // Don't care about a small chance of 'not-the-best-choice-for-removal' case
+ // which might happen if rs has the lowest size.
+ _try_insert_range(end, old_right_end, &insert_pos);
+ _range_size_tree_try_insert(*rs);
+
} else if (left_over) {
rs->end = start;
- range_size_tree.insert(*rs);
+ _range_size_tree_try_insert(*rs);
} else if (right_over) {
rs->start = end;
- range_size_tree.insert(*rs);
+ _range_size_tree_try_insert(*rs);
} else {
range_tree.erase_and_dispose(rs, dispose_rs{});
}
- assert(num_free >= size);
- num_free -= size;
+}
+
+void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size)
+{
+ uint64_t end = start + size;
+
+ ceph_assert(size != 0);
+ ceph_assert(size <= num_free);
+
+ auto rs = range_tree.find(range_t{start, end}, range_tree.key_comp());
+ /* Make sure we completely overlap with someone */
+ ceph_assert(rs != range_tree.end());
+ ceph_assert(rs->start <= start);
+ ceph_assert(rs->end >= end);
+
+ _process_range_removal(start, end, rs);
+}
+
+void AvlAllocator::_try_remove_from_tree(uint64_t start, uint64_t size,
+ std::function<void(uint64_t, uint64_t, bool)> cb)
+{
+ uint64_t end = start + size;
+
+ ceph_assert(size != 0);
+
+ auto rs = range_tree.find(range_t{ start, end },
+ range_tree.key_comp());
+
+ if (rs == range_tree.end() || rs->start >= end) {
+ cb(start, size, false);
+ return;
+ }
+
+ do {
+
+ auto next_rs = rs;
+ ++next_rs;
+
+ if (start < rs->start) {
+ cb(start, rs->start - start, false);
+ start = rs->start;
+ }
+ auto range_end = std::min(rs->end, end);
+ _process_range_removal(start, range_end, rs);
+ cb(start, range_end - start, true);
+ start = range_end;
+
+ rs = next_rs;
+ } while (rs != range_tree.end() && rs->start < end && start < end);
+ if (start < end) {
+ cb(start, end - start, false);
+ }
+}
+
+int64_t AvlAllocator::_allocate(
+ uint64_t want,
+ uint64_t unit,
+ uint64_t max_alloc_size,
+ int64_t hint, // unused, for now!
+ PExtentVector* extents)
+{
+ uint64_t allocated = 0;
+ while (allocated < want) {
+ uint64_t offset, length;
+ int r = _allocate(std::min(max_alloc_size, want - allocated),
+ unit, &offset, &length);
+ if (r < 0) {
+ // Allocation failed.
+ break;
+ }
+ extents->emplace_back(offset, length);
+ allocated += length;
+ }
+ return allocated ? allocated : -ENOSPC;
}
int AvlAllocator::_allocate(
uint64_t *offset,
uint64_t *length)
{
- std::lock_guard l(lock);
uint64_t max_size = 0;
if (auto p = range_size_tree.rbegin(); p != range_size_tree.rend()) {
max_size = p->end - p->start;
return -ENOSPC;
}
size = p2align(max_size, unit);
- assert(size > 0);
+ ceph_assert(size > 0);
force_range_size_alloc = true;
}
/*
* region.
*/
const uint64_t align = size & -size;
- assert(align != 0);
+ ceph_assert(align != 0);
uint64_t *cursor = &lbas[cbits(align) - 1];
const int free_pct = num_free * 100 / num_total;
return 0;
}
+void AvlAllocator::_release(const interval_set<uint64_t>& release_set)
+{
+ for (auto p = release_set.begin(); p != release_set.end(); ++p) {
+ const auto offset = p.get_start();
+ const auto length = p.get_len();
+ ldout(cct, 10) << __func__ << std::hex
+ << " offset 0x" << offset
+ << " length 0x" << length
+ << std::dec << dendl;
+ _add_to_tree(offset, length);
+ }
+}
+
+void AvlAllocator::_release(const PExtentVector& release_set) {
+ for (auto& e : release_set) {
+ ldout(cct, 10) << __func__ << std::hex
+ << " offset 0x" << e.offset
+ << " length 0x" << e.length
+ << std::dec << dendl;
+ _add_to_tree(e.offset, e.length);
+ }
+}
+
+void AvlAllocator::_shutdown()
+{
+ range_size_tree.clear();
+ range_tree.clear_and_dispose(dispose_rs{});
+}
+
+AvlAllocator::AvlAllocator(CephContext* cct,
+ int64_t device_size,
+ int64_t block_size,
+ uint64_t max_mem,
+ const std::string& name) :
+ Allocator(name),
+ num_total(device_size),
+ block_size(block_size),
+ range_size_alloc_threshold(
+ cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")),
+ range_size_alloc_free_pct(
+ cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
+ range_count_cap(max_mem / sizeof(range_seg_t)),
+ cct(cct)
+{}
+
AvlAllocator::AvlAllocator(CephContext* cct,
int64_t device_size,
int64_t block_size,
cct(cct)
{}
+AvlAllocator::~AvlAllocator()
+{
+ shutdown();
+}
+
int64_t AvlAllocator::allocate(
uint64_t want,
uint64_t unit,
<< " max_alloc_size 0x" << max_alloc_size
<< " hint 0x" << hint
<< std::dec << dendl;
- assert(isp2(unit));
- assert(want % unit == 0);
+ ceph_assert(isp2(unit));
+ ceph_assert(want % unit == 0);
if (max_alloc_size == 0) {
max_alloc_size = want;
}
if (constexpr auto cap = std::numeric_limits<decltype(bluestore_pextent_t::length)>::max();
max_alloc_size >= cap) {
- max_alloc_size = cap;
- }
-
- uint64_t allocated = 0;
- while (allocated < want) {
- uint64_t offset, length;
- int r = _allocate(std::min(max_alloc_size, want - allocated),
- unit, &offset, &length);
- if (r < 0) {
- // Allocation failed.
- break;
- }
- extents->emplace_back(offset, length);
- allocated += length;
+ max_alloc_size = p2align(uint64_t(cap), (uint64_t)block_size);
}
- return allocated ? allocated : -ENOSPC;
+ std::lock_guard l(lock);
+ return _allocate(want, unit, max_alloc_size, hint, extents);
}
-void AvlAllocator::release(const interval_set<uint64_t>& release_set)
-{
+void AvlAllocator::release(const interval_set<uint64_t>& release_set) {
std::lock_guard l(lock);
- for (auto p = release_set.begin(); p != release_set.end(); ++p) {
- const auto offset = p.get_start();
- const auto length = p.get_len();
- ldout(cct, 10) << __func__ << std::hex
- << " offset 0x" << offset
- << " length 0x" << length
- << std::dec << dendl;
- _add_to_tree(offset, length);
- }
+ _release(release_set);
}
uint64_t AvlAllocator::get_free()
double AvlAllocator::get_fragmentation()
{
std::lock_guard l(lock);
- auto free_blocks = p2align(num_free, block_size) / block_size;
- if (free_blocks <= 1) {
- return .0;
- }
- return (static_cast<double>(range_tree.size() - 1) / (free_blocks - 1));
+ return _get_fragmentation();
}
void AvlAllocator::dump()
{
std::lock_guard l(lock);
+ _dump();
+}
+
+void AvlAllocator::_dump() const
+{
ldout(cct, 0) << __func__ << " range_tree: " << dendl;
for (auto& rs : range_tree) {
ldout(cct, 0) << std::hex
- << "0x" << rs.start << "~" << rs.end
- << std::dec
- << dendl;
+ << "0x" << rs.start << "~" << rs.end
+ << std::dec
+ << dendl;
}
ldout(cct, 0) << __func__ << " range_size_tree: " << dendl;
for (auto& rs : range_size_tree) {
ldout(cct, 0) << std::hex
- << "0x" << rs.start << "~" << rs.end
- << std::dec
- << dendl;
+ << "0x" << rs.start << "~" << rs.end
+ << std::dec
+ << dendl;
}
}
void AvlAllocator::shutdown()
{
std::lock_guard l(lock);
- range_size_tree.clear();
- range_tree.clear_and_dispose(dispose_rs{});
+ _shutdown();
}