1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "AvlAllocator.h"
8 #include "common/config_proxy.h"
9 #include "common/debug.h"
11 #define dout_context cct
12 #define dout_subsys ceph_subsys_bluestore
14 #define dout_prefix *_dout << "AvlAllocator "
16 MEMPOOL_DEFINE_OBJECT_FACTORY(range_seg_t
, range_seg_t
, bluestore_alloc
);
19 // a light-weight "range_seg_t", which only used as the key when searching in
20 // range_tree and range_size_tree
28 * This is a helper function that can be used by the allocator to find
29 * a suitable block to allocate. This will search the specified AVL
30 * tree looking for a block that matches the specified criteria.
33 uint64_t AvlAllocator::_block_picker(const Tree
& t
,
38 const auto compare
= t
.key_comp();
39 auto rs_start
= t
.lower_bound(range_t
{*cursor
, size
}, compare
);
40 for (auto rs
= rs_start
; rs
!= t
.end(); ++rs
) {
41 uint64_t offset
= p2roundup(rs
->start
, align
);
42 if (offset
+ size
<= rs
->end
) {
43 *cursor
= offset
+ size
;
48 * If we know we've searched the whole tree (*cursor == 0), give up.
49 * Otherwise, reset the cursor to the beginning and try again.
51 if (*cursor
== 0 || rs_start
== t
.begin()) {
55 return _block_picker(t
, cursor
, size
, align
);
58 void AvlAllocator::_add_to_tree(uint64_t start
, uint64_t size
)
60 ceph_assert(size
!= 0);
62 uint64_t end
= start
+ size
;
64 auto rs_after
= range_tree
.upper_bound(range_t
{start
, end
},
65 range_tree
.key_comp());
67 /* Make sure we don't overlap with either of our neighbors */
68 auto rs_before
= range_tree
.end();
69 if (rs_after
!= range_tree
.begin()) {
70 rs_before
= std::prev(rs_after
);
73 bool merge_before
= (rs_before
!= range_tree
.end() && rs_before
->end
== start
);
74 bool merge_after
= (rs_after
!= range_tree
.end() && rs_after
->start
== end
);
76 if (merge_before
&& merge_after
) {
77 _range_size_tree_rm(*rs_before
);
78 _range_size_tree_rm(*rs_after
);
79 rs_after
->start
= rs_before
->start
;
80 range_tree
.erase_and_dispose(rs_before
, dispose_rs
{});
81 _range_size_tree_try_insert(*rs_after
);
82 } else if (merge_before
) {
83 _range_size_tree_rm(*rs_before
);
85 _range_size_tree_try_insert(*rs_before
);
86 } else if (merge_after
) {
87 _range_size_tree_rm(*rs_after
);
88 rs_after
->start
= start
;
89 _range_size_tree_try_insert(*rs_after
);
91 _try_insert_range(start
, end
, &rs_after
);
95 void AvlAllocator::_process_range_removal(uint64_t start
, uint64_t end
,
96 AvlAllocator::range_tree_t::iterator
& rs
)
98 bool left_over
= (rs
->start
!= start
);
99 bool right_over
= (rs
->end
!= end
);
101 _range_size_tree_rm(*rs
);
103 if (left_over
&& right_over
) {
104 auto old_right_end
= rs
->end
;
105 auto insert_pos
= rs
;
106 ceph_assert(insert_pos
!= range_tree
.end());
110 // Insert tail first to be sure insert_pos hasn't been disposed.
111 // This woulnd't dispose rs though since it's out of range_size_tree.
112 // Don't care about a small chance of 'not-the-best-choice-for-removal' case
113 // which might happen if rs has the lowest size.
114 _try_insert_range(end
, old_right_end
, &insert_pos
);
115 _range_size_tree_try_insert(*rs
);
117 } else if (left_over
) {
119 _range_size_tree_try_insert(*rs
);
120 } else if (right_over
) {
122 _range_size_tree_try_insert(*rs
);
124 range_tree
.erase_and_dispose(rs
, dispose_rs
{});
128 void AvlAllocator::_remove_from_tree(uint64_t start
, uint64_t size
)
130 uint64_t end
= start
+ size
;
132 ceph_assert(size
!= 0);
133 ceph_assert(size
<= num_free
);
135 auto rs
= range_tree
.find(range_t
{start
, end
}, range_tree
.key_comp());
136 /* Make sure we completely overlap with someone */
137 ceph_assert(rs
!= range_tree
.end());
138 ceph_assert(rs
->start
<= start
);
139 ceph_assert(rs
->end
>= end
);
141 _process_range_removal(start
, end
, rs
);
144 void AvlAllocator::_try_remove_from_tree(uint64_t start
, uint64_t size
,
145 std::function
<void(uint64_t, uint64_t, bool)> cb
)
147 uint64_t end
= start
+ size
;
149 ceph_assert(size
!= 0);
151 auto rs
= range_tree
.find(range_t
{ start
, end
},
152 range_tree
.key_comp());
154 if (rs
== range_tree
.end() || rs
->start
>= end
) {
155 cb(start
, size
, false);
164 if (start
< rs
->start
) {
165 cb(start
, rs
->start
- start
, false);
168 auto range_end
= std::min(rs
->end
, end
);
169 _process_range_removal(start
, range_end
, rs
);
170 cb(start
, range_end
- start
, true);
174 } while (rs
!= range_tree
.end() && rs
->start
< end
&& start
< end
);
176 cb(start
, end
- start
, false);
180 int64_t AvlAllocator::_allocate(
183 uint64_t max_alloc_size
,
184 int64_t hint
, // unused, for now!
185 PExtentVector
* extents
)
187 uint64_t allocated
= 0;
188 while (allocated
< want
) {
189 uint64_t offset
, length
;
190 int r
= _allocate(std::min(max_alloc_size
, want
- allocated
),
191 unit
, &offset
, &length
);
193 // Allocation failed.
196 extents
->emplace_back(offset
, length
);
199 return allocated
? allocated
: -ENOSPC
;
202 int AvlAllocator::_allocate(
208 uint64_t max_size
= 0;
209 if (auto p
= range_size_tree
.rbegin(); p
!= range_size_tree
.rend()) {
210 max_size
= p
->end
- p
->start
;
213 bool force_range_size_alloc
= false;
214 if (max_size
< size
) {
215 if (max_size
< unit
) {
218 size
= p2align(max_size
, unit
);
219 ceph_assert(size
> 0);
220 force_range_size_alloc
= true;
223 * Find the largest power of 2 block size that evenly divides the
224 * requested size. This is used to try to allocate blocks with similar
225 * alignment from the same area (i.e. same cursor bucket) but it does
226 * not guarantee that other allocations sizes may exist in the same
229 const uint64_t align
= size
& -size
;
230 ceph_assert(align
!= 0);
231 uint64_t *cursor
= &lbas
[cbits(align
) - 1];
233 const int free_pct
= num_free
* 100 / num_total
;
236 * If we're running low on space switch to using the size
237 * sorted AVL tree (best-fit).
239 if (force_range_size_alloc
||
240 max_size
< range_size_alloc_threshold
||
241 free_pct
< range_size_alloc_free_pct
) {
244 start
= _block_picker(range_size_tree
, cursor
, size
, unit
);
245 if (start
!= -1ULL || !force_range_size_alloc
) {
248 // try to collect smaller extents as we could fail to retrieve
249 // that large block due to misaligned extents
250 size
= p2align(size
>> 1, unit
);
251 } while (size
>= unit
);
253 start
= _block_picker(range_tree
, cursor
, size
, unit
);
255 if (start
== -1ULL) {
259 _remove_from_tree(start
, size
);
266 void AvlAllocator::_release(const interval_set
<uint64_t>& release_set
)
268 for (auto p
= release_set
.begin(); p
!= release_set
.end(); ++p
) {
269 const auto offset
= p
.get_start();
270 const auto length
= p
.get_len();
271 ldout(cct
, 10) << __func__
<< std::hex
272 << " offset 0x" << offset
273 << " length 0x" << length
274 << std::dec
<< dendl
;
275 _add_to_tree(offset
, length
);
279 void AvlAllocator::_release(const PExtentVector
& release_set
) {
280 for (auto& e
: release_set
) {
281 ldout(cct
, 10) << __func__
<< std::hex
282 << " offset 0x" << e
.offset
283 << " length 0x" << e
.length
284 << std::dec
<< dendl
;
285 _add_to_tree(e
.offset
, e
.length
);
289 void AvlAllocator::_shutdown()
291 range_size_tree
.clear();
292 range_tree
.clear_and_dispose(dispose_rs
{});
295 AvlAllocator::AvlAllocator(CephContext
* cct
,
299 const std::string
& name
) :
300 Allocator(name
, device_size
, block_size
),
301 num_total(device_size
),
302 block_size(block_size
),
303 range_size_alloc_threshold(
304 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_threshold")),
305 range_size_alloc_free_pct(
306 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
307 range_count_cap(max_mem
/ sizeof(range_seg_t
)),
311 AvlAllocator::AvlAllocator(CephContext
* cct
,
314 const std::string
& name
) :
315 Allocator(name
, device_size
, block_size
),
316 num_total(device_size
),
317 block_size(block_size
),
318 range_size_alloc_threshold(
319 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_threshold")),
320 range_size_alloc_free_pct(
321 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
325 AvlAllocator::~AvlAllocator()
330 int64_t AvlAllocator::allocate(
333 uint64_t max_alloc_size
,
334 int64_t hint
, // unused, for now!
335 PExtentVector
* extents
)
337 ldout(cct
, 10) << __func__
<< std::hex
338 << " want 0x" << want
339 << " unit 0x" << unit
340 << " max_alloc_size 0x" << max_alloc_size
341 << " hint 0x" << hint
342 << std::dec
<< dendl
;
343 ceph_assert(isp2(unit
));
344 ceph_assert(want
% unit
== 0);
346 if (max_alloc_size
== 0) {
347 max_alloc_size
= want
;
349 if (constexpr auto cap
= std::numeric_limits
<decltype(bluestore_pextent_t::length
)>::max();
350 max_alloc_size
>= cap
) {
351 max_alloc_size
= p2align(uint64_t(cap
), (uint64_t)block_size
);
353 std::lock_guard
l(lock
);
354 return _allocate(want
, unit
, max_alloc_size
, hint
, extents
);
357 void AvlAllocator::release(const interval_set
<uint64_t>& release_set
) {
358 std::lock_guard
l(lock
);
359 _release(release_set
);
362 uint64_t AvlAllocator::get_free()
364 std::lock_guard
l(lock
);
368 double AvlAllocator::get_fragmentation()
370 std::lock_guard
l(lock
);
371 return _get_fragmentation();
374 void AvlAllocator::dump()
376 std::lock_guard
l(lock
);
380 void AvlAllocator::_dump() const
382 ldout(cct
, 0) << __func__
<< " range_tree: " << dendl
;
383 for (auto& rs
: range_tree
) {
384 ldout(cct
, 0) << std::hex
385 << "0x" << rs
.start
<< "~" << rs
.end
390 ldout(cct
, 0) << __func__
<< " range_size_tree: " << dendl
;
391 for (auto& rs
: range_size_tree
) {
392 ldout(cct
, 0) << std::hex
393 << "0x" << rs
.start
<< "~" << rs
.end
399 void AvlAllocator::dump(std::function
<void(uint64_t offset
, uint64_t length
)> notify
)
401 for (auto& rs
: range_tree
) {
402 notify(rs
.start
, rs
.end
- rs
.start
);
406 void AvlAllocator::init_add_free(uint64_t offset
, uint64_t length
)
408 std::lock_guard
l(lock
);
409 ldout(cct
, 10) << __func__
<< std::hex
410 << " offset 0x" << offset
411 << " length 0x" << length
412 << std::dec
<< dendl
;
413 _add_to_tree(offset
, length
);
416 void AvlAllocator::init_rm_free(uint64_t offset
, uint64_t length
)
418 std::lock_guard
l(lock
);
419 ldout(cct
, 10) << __func__
<< std::hex
420 << " offset 0x" << offset
421 << " length 0x" << length
422 << std::dec
<< dendl
;
423 _remove_from_tree(offset
, length
);
426 void AvlAllocator::shutdown()
428 std::lock_guard
l(lock
);