1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "AvlAllocator.h"
8 #include "common/config_proxy.h"
9 #include "common/debug.h"
11 #define dout_context cct
12 #define dout_subsys ceph_subsys_bluestore
14 #define dout_prefix *_dout << "AvlAllocator "
16 MEMPOOL_DEFINE_OBJECT_FACTORY(range_seg_t
, range_seg_t
, bluestore_alloc
);
19 // a light-weight "range_seg_t", which only used as the key when searching in
20 // range_tree and range_size_tree
28 * This is a helper function that can be used by the allocator to find
29 * a suitable block to allocate. This will search the specified AVL
30 * tree looking for a block that matches the specified criteria.
33 uint64_t AvlAllocator::_block_picker(const Tree
& t
,
38 const auto compare
= t
.key_comp();
39 for (auto rs
= t
.lower_bound(range_t
{*cursor
, size
}, compare
);
40 rs
!= t
.end(); ++rs
) {
41 uint64_t offset
= p2roundup(rs
->start
, align
);
42 if (offset
+ size
<= rs
->end
) {
43 *cursor
= offset
+ size
;
48 * If we know we've searched the whole tree (*cursor == 0), give up.
49 * Otherwise, reset the cursor to the beginning and try again.
55 return _block_picker(t
, cursor
, size
, align
);
58 void AvlAllocator::_add_to_tree(uint64_t start
, uint64_t size
)
60 ceph_assert(size
!= 0);
62 uint64_t end
= start
+ size
;
64 auto rs_after
= range_tree
.upper_bound(range_t
{start
, end
},
65 range_tree
.key_comp());
67 /* Make sure we don't overlap with either of our neighbors */
68 auto rs_before
= range_tree
.end();
69 if (rs_after
!= range_tree
.begin()) {
70 rs_before
= std::prev(rs_after
);
73 bool merge_before
= (rs_before
!= range_tree
.end() && rs_before
->end
== start
);
74 bool merge_after
= (rs_after
!= range_tree
.end() && rs_after
->start
== end
);
76 if (merge_before
&& merge_after
) {
77 _range_size_tree_rm(*rs_before
);
78 _range_size_tree_rm(*rs_after
);
79 rs_after
->start
= rs_before
->start
;
80 range_tree
.erase_and_dispose(rs_before
, dispose_rs
{});
81 _range_size_tree_try_insert(*rs_after
);
82 } else if (merge_before
) {
83 _range_size_tree_rm(*rs_before
);
85 _range_size_tree_try_insert(*rs_before
);
86 } else if (merge_after
) {
87 _range_size_tree_rm(*rs_after
);
88 rs_after
->start
= start
;
89 _range_size_tree_try_insert(*rs_after
);
91 _try_insert_range(start
, end
, &rs_after
);
95 void AvlAllocator::_process_range_removal(uint64_t start
, uint64_t end
,
96 AvlAllocator::range_tree_t::iterator
& rs
)
98 bool left_over
= (rs
->start
!= start
);
99 bool right_over
= (rs
->end
!= end
);
101 _range_size_tree_rm(*rs
);
103 if (left_over
&& right_over
) {
104 auto old_right_end
= rs
->end
;
105 auto insert_pos
= rs
;
106 ceph_assert(insert_pos
!= range_tree
.end());
110 // Insert tail first to be sure insert_pos hasn't been disposed.
111 // This woulnd't dispose rs though since it's out of range_size_tree.
112 // Don't care about a small chance of 'not-the-best-choice-for-removal' case
113 // which might happen if rs has the lowest size.
114 _try_insert_range(end
, old_right_end
, &insert_pos
);
115 _range_size_tree_try_insert(*rs
);
117 } else if (left_over
) {
119 _range_size_tree_try_insert(*rs
);
120 } else if (right_over
) {
122 _range_size_tree_try_insert(*rs
);
124 range_tree
.erase_and_dispose(rs
, dispose_rs
{});
128 void AvlAllocator::_remove_from_tree(uint64_t start
, uint64_t size
)
130 uint64_t end
= start
+ size
;
132 ceph_assert(size
!= 0);
133 ceph_assert(size
<= num_free
);
135 auto rs
= range_tree
.find(range_t
{start
, end
}, range_tree
.key_comp());
136 /* Make sure we completely overlap with someone */
137 ceph_assert(rs
!= range_tree
.end());
138 ceph_assert(rs
->start
<= start
);
139 ceph_assert(rs
->end
>= end
);
141 _process_range_removal(start
, end
, rs
);
144 void AvlAllocator::_try_remove_from_tree(uint64_t start
, uint64_t size
,
145 std::function
<void(uint64_t, uint64_t, bool)> cb
)
147 uint64_t end
= start
+ size
;
149 ceph_assert(size
!= 0);
151 auto rs
= range_tree
.find(range_t
{ start
, end
},
152 range_tree
.key_comp());
154 if (rs
== range_tree
.end() || rs
->start
>= end
) {
155 cb(start
, size
, false);
164 if (start
< rs
->start
) {
165 cb(start
, rs
->start
- start
, false);
168 auto range_end
= std::min(rs
->end
, end
);
169 _process_range_removal(start
, range_end
, rs
);
170 cb(start
, range_end
- start
, true);
174 } while (rs
!= range_tree
.end() && rs
->start
< end
&& start
< end
);
176 cb(start
, end
- start
, false);
180 int64_t AvlAllocator::_allocate(
183 uint64_t max_alloc_size
,
184 int64_t hint
, // unused, for now!
185 PExtentVector
* extents
)
187 uint64_t allocated
= 0;
188 while (allocated
< want
) {
189 uint64_t offset
, length
;
190 int r
= _allocate(std::min(max_alloc_size
, want
- allocated
),
191 unit
, &offset
, &length
);
193 // Allocation failed.
196 extents
->emplace_back(offset
, length
);
199 return allocated
? allocated
: -ENOSPC
;
202 int AvlAllocator::_allocate(
208 uint64_t max_size
= 0;
209 if (auto p
= range_size_tree
.rbegin(); p
!= range_size_tree
.rend()) {
210 max_size
= p
->end
- p
->start
;
213 bool force_range_size_alloc
= false;
214 if (max_size
< size
) {
215 if (max_size
< unit
) {
218 size
= p2align(max_size
, unit
);
219 ceph_assert(size
> 0);
220 force_range_size_alloc
= true;
223 const int free_pct
= num_free
* 100 / num_total
;
226 * If we're running low on space switch to using the size
227 * sorted AVL tree (best-fit).
229 if (force_range_size_alloc
||
230 max_size
< range_size_alloc_threshold
||
231 free_pct
< range_size_alloc_free_pct
) {
232 uint64_t fake_cursor
= 0;
234 start
= _block_picker(range_size_tree
, &fake_cursor
, size
, unit
);
235 dout(20) << __func__
<< " best fit=" << start
<< " size=" << size
<< dendl
;
236 if (start
!= uint64_t(-1ULL)) {
239 // try to collect smaller extents as we could fail to retrieve
240 // that large block due to misaligned extents
241 size
= p2align(size
>> 1, unit
);
242 } while (size
>= unit
);
246 * Find the largest power of 2 block size that evenly divides the
247 * requested size. This is used to try to allocate blocks with similar
248 * alignment from the same area (i.e. same cursor bucket) but it does
249 * not guarantee that other allocations sizes may exist in the same
252 uint64_t align
= size
& -size
;
253 ceph_assert(align
!= 0);
254 uint64_t* cursor
= &lbas
[cbits(align
) - 1];
256 start
= _block_picker(range_tree
, cursor
, size
, unit
);
257 dout(20) << __func__
<< " first fit=" << start
<< " size=" << size
<< dendl
;
258 if (start
!= uint64_t(-1ULL)) {
261 // try to collect smaller extents as we could fail to retrieve
262 // that large block due to misaligned extents
263 size
= p2align(size
>> 1, unit
);
264 } while (size
>= unit
);
266 if (start
== -1ULL) {
270 _remove_from_tree(start
, size
);
277 void AvlAllocator::_release(const interval_set
<uint64_t>& release_set
)
279 for (auto p
= release_set
.begin(); p
!= release_set
.end(); ++p
) {
280 const auto offset
= p
.get_start();
281 const auto length
= p
.get_len();
282 ldout(cct
, 10) << __func__
<< std::hex
283 << " offset 0x" << offset
284 << " length 0x" << length
285 << std::dec
<< dendl
;
286 _add_to_tree(offset
, length
);
290 void AvlAllocator::_release(const PExtentVector
& release_set
) {
291 for (auto& e
: release_set
) {
292 ldout(cct
, 10) << __func__
<< std::hex
293 << " offset 0x" << e
.offset
294 << " length 0x" << e
.length
295 << std::dec
<< dendl
;
296 _add_to_tree(e
.offset
, e
.length
);
300 void AvlAllocator::_shutdown()
302 range_size_tree
.clear();
303 range_tree
.clear_and_dispose(dispose_rs
{});
306 AvlAllocator::AvlAllocator(CephContext
* cct
,
310 const std::string
& name
) :
312 num_total(device_size
),
313 block_size(block_size
),
314 range_size_alloc_threshold(
315 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_threshold")),
316 range_size_alloc_free_pct(
317 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
318 range_count_cap(max_mem
/ sizeof(range_seg_t
)),
322 AvlAllocator::AvlAllocator(CephContext
* cct
,
325 const std::string
& name
) :
327 num_total(device_size
),
328 block_size(block_size
),
329 range_size_alloc_threshold(
330 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_threshold")),
331 range_size_alloc_free_pct(
332 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
336 AvlAllocator::~AvlAllocator()
341 int64_t AvlAllocator::allocate(
344 uint64_t max_alloc_size
,
345 int64_t hint
, // unused, for now!
346 PExtentVector
* extents
)
348 ldout(cct
, 10) << __func__
<< std::hex
349 << " want 0x" << want
350 << " unit 0x" << unit
351 << " max_alloc_size 0x" << max_alloc_size
352 << " hint 0x" << hint
353 << std::dec
<< dendl
;
354 ceph_assert(isp2(unit
));
355 ceph_assert(want
% unit
== 0);
357 if (max_alloc_size
== 0) {
358 max_alloc_size
= want
;
360 if (constexpr auto cap
= std::numeric_limits
<decltype(bluestore_pextent_t::length
)>::max();
361 max_alloc_size
>= cap
) {
362 max_alloc_size
= p2align(uint64_t(cap
), (uint64_t)block_size
);
364 std::lock_guard
l(lock
);
365 return _allocate(want
, unit
, max_alloc_size
, hint
, extents
);
368 void AvlAllocator::release(const interval_set
<uint64_t>& release_set
) {
369 std::lock_guard
l(lock
);
370 _release(release_set
);
373 uint64_t AvlAllocator::get_free()
375 std::lock_guard
l(lock
);
379 double AvlAllocator::get_fragmentation()
381 std::lock_guard
l(lock
);
382 return _get_fragmentation();
385 void AvlAllocator::dump()
387 std::lock_guard
l(lock
);
391 void AvlAllocator::_dump() const
393 ldout(cct
, 0) << __func__
<< " range_tree: " << dendl
;
394 for (auto& rs
: range_tree
) {
395 ldout(cct
, 0) << std::hex
396 << "0x" << rs
.start
<< "~" << rs
.end
401 ldout(cct
, 0) << __func__
<< " range_size_tree: " << dendl
;
402 for (auto& rs
: range_size_tree
) {
403 ldout(cct
, 0) << std::hex
404 << "0x" << rs
.start
<< "~" << rs
.end
410 void AvlAllocator::dump(std::function
<void(uint64_t offset
, uint64_t length
)> notify
)
412 for (auto& rs
: range_tree
) {
413 notify(rs
.start
, rs
.end
- rs
.start
);
417 void AvlAllocator::init_add_free(uint64_t offset
, uint64_t length
)
421 std::lock_guard
l(lock
);
422 ldout(cct
, 10) << __func__
<< std::hex
423 << " offset 0x" << offset
424 << " length 0x" << length
425 << std::dec
<< dendl
;
426 _add_to_tree(offset
, length
);
429 void AvlAllocator::init_rm_free(uint64_t offset
, uint64_t length
)
433 std::lock_guard
l(lock
);
434 ldout(cct
, 10) << __func__
<< std::hex
435 << " offset 0x" << offset
436 << " length 0x" << length
437 << std::dec
<< dendl
;
438 _remove_from_tree(offset
, length
);
441 void AvlAllocator::shutdown()
443 std::lock_guard
l(lock
);