1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "AvlAllocator.h"
8 #include "common/config_proxy.h"
9 #include "common/debug.h"
11 #define dout_context cct
12 #define dout_subsys ceph_subsys_bluestore
14 #define dout_prefix *_dout << "AvlAllocator "
16 MEMPOOL_DEFINE_OBJECT_FACTORY(range_seg_t
, range_seg_t
, bluestore_alloc
);
19 // a light-weight "range_seg_t", which only used as the key when searching in
20 // range_tree and range_size_tree
28 * This is a helper function that can be used by the allocator to find
29 * a suitable block to allocate. This will search the specified AVL
30 * tree looking for a block that matches the specified criteria.
33 uint64_t AvlAllocator::_block_picker(const Tree
& t
,
38 const auto compare
= t
.key_comp();
39 for (auto rs
= t
.lower_bound(range_t
{*cursor
, size
}, compare
);
40 rs
!= t
.end(); ++rs
) {
41 uint64_t offset
= p2roundup(rs
->start
, align
);
42 if (offset
+ size
<= rs
->end
) {
43 *cursor
= offset
+ size
;
48 * If we know we've searched the whole tree (*cursor == 0), give up.
49 * Otherwise, reset the cursor to the beginning and try again.
55 return _block_picker(t
, cursor
, size
, align
);
60 void operator()(range_seg_t
* p
)
67 void AvlAllocator::_add_to_tree(uint64_t start
, uint64_t size
)
71 uint64_t end
= start
+ size
;
73 auto rs_after
= range_tree
.upper_bound(range_t
{start
, end
},
74 range_tree
.key_comp());
76 /* Make sure we don't overlap with either of our neighbors */
77 auto rs_before
= range_tree
.end();
78 if (rs_after
!= range_tree
.begin()) {
79 rs_before
= std::prev(rs_after
);
82 bool merge_before
= (rs_before
!= range_tree
.end() && rs_before
->end
== start
);
83 bool merge_after
= (rs_after
!= range_tree
.end() && rs_after
->start
== end
);
85 if (merge_before
&& merge_after
) {
86 range_size_tree
.erase(*rs_before
);
87 range_size_tree
.erase(*rs_after
);
88 rs_after
->start
= rs_before
->start
;
89 range_tree
.erase_and_dispose(rs_before
, dispose_rs
{});
90 range_size_tree
.insert(*rs_after
);
91 } else if (merge_before
) {
92 range_size_tree
.erase(*rs_before
);
94 range_size_tree
.insert(*rs_before
);
95 } else if (merge_after
) {
96 range_size_tree
.erase(*rs_after
);
97 rs_after
->start
= start
;
98 range_size_tree
.insert(*rs_after
);
100 auto new_rs
= new range_seg_t
{start
, end
};
101 range_tree
.insert_before(rs_after
, *new_rs
);
102 range_size_tree
.insert(*new_rs
);
107 void AvlAllocator::_remove_from_tree(uint64_t start
, uint64_t size
)
109 uint64_t end
= start
+ size
;
112 assert(size
<= num_free
);
114 auto rs
= range_tree
.find(range_t
{start
, end
}, range_tree
.key_comp());
115 /* Make sure we completely overlap with someone */
116 assert(rs
!= range_tree
.end());
117 assert(rs
->start
<= start
);
118 assert(rs
->end
>= end
);
120 bool left_over
= (rs
->start
!= start
);
121 bool right_over
= (rs
->end
!= end
);
123 range_size_tree
.erase(*rs
);
125 if (left_over
&& right_over
) {
126 auto new_seg
= new range_seg_t
{end
, rs
->end
};
128 range_tree
.insert(rs
, *new_seg
);
129 range_size_tree
.insert(*new_seg
);
130 range_size_tree
.insert(*rs
);
131 } else if (left_over
) {
133 range_size_tree
.insert(*rs
);
134 } else if (right_over
) {
136 range_size_tree
.insert(*rs
);
138 range_tree
.erase_and_dispose(rs
, dispose_rs
{});
140 assert(num_free
>= size
);
144 int AvlAllocator::_allocate(
150 std::lock_guard
l(lock
);
151 uint64_t max_size
= 0;
152 if (auto p
= range_size_tree
.rbegin(); p
!= range_size_tree
.rend()) {
153 max_size
= p
->end
- p
->start
;
156 bool force_range_size_alloc
= false;
157 if (max_size
< size
) {
158 if (max_size
< unit
) {
161 size
= p2align(max_size
, unit
);
163 force_range_size_alloc
= true;
166 * Find the largest power of 2 block size that evenly divides the
167 * requested size. This is used to try to allocate blocks with similar
168 * alignment from the same area (i.e. same cursor bucket) but it does
169 * not guarantee that other allocations sizes may exist in the same
172 const uint64_t align
= size
& -size
;
174 uint64_t *cursor
= &lbas
[cbits(align
) - 1];
176 const int free_pct
= num_free
* 100 / num_total
;
179 * If we're running low on space switch to using the size
180 * sorted AVL tree (best-fit).
182 if (force_range_size_alloc
||
183 max_size
< range_size_alloc_threshold
||
184 free_pct
< range_size_alloc_free_pct
) {
186 start
= _block_picker(range_size_tree
, cursor
, size
, unit
);
188 start
= _block_picker(range_tree
, cursor
, size
, unit
);
190 if (start
== -1ULL) {
194 _remove_from_tree(start
, size
);
201 AvlAllocator::AvlAllocator(CephContext
* cct
,
204 const std::string
& name
) :
206 num_total(device_size
),
207 block_size(block_size
),
208 range_size_alloc_threshold(
209 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_threshold")),
210 range_size_alloc_free_pct(
211 cct
->_conf
.get_val
<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
215 int64_t AvlAllocator::allocate(
218 uint64_t max_alloc_size
,
219 int64_t hint
, // unused, for now!
220 PExtentVector
* extents
)
222 ldout(cct
, 10) << __func__
<< std::hex
223 << " want 0x" << want
224 << " unit 0x" << unit
225 << " max_alloc_size 0x" << max_alloc_size
226 << " hint 0x" << hint
227 << std::dec
<< dendl
;
229 assert(want
% unit
== 0);
231 if (max_alloc_size
== 0) {
232 max_alloc_size
= want
;
234 if (constexpr auto cap
= std::numeric_limits
<decltype(bluestore_pextent_t::length
)>::max();
235 max_alloc_size
>= cap
) {
236 max_alloc_size
= cap
;
239 uint64_t allocated
= 0;
240 while (allocated
< want
) {
241 uint64_t offset
, length
;
242 int r
= _allocate(std::min(max_alloc_size
, want
- allocated
),
243 unit
, &offset
, &length
);
245 // Allocation failed.
248 extents
->emplace_back(offset
, length
);
251 return allocated
? allocated
: -ENOSPC
;
254 void AvlAllocator::release(const interval_set
<uint64_t>& release_set
)
256 std::lock_guard
l(lock
);
257 for (auto p
= release_set
.begin(); p
!= release_set
.end(); ++p
) {
258 const auto offset
= p
.get_start();
259 const auto length
= p
.get_len();
260 ldout(cct
, 10) << __func__
<< std::hex
261 << " offset 0x" << offset
262 << " length 0x" << length
263 << std::dec
<< dendl
;
264 _add_to_tree(offset
, length
);
268 uint64_t AvlAllocator::get_free()
270 std::lock_guard
l(lock
);
274 double AvlAllocator::get_fragmentation()
276 std::lock_guard
l(lock
);
277 auto free_blocks
= p2align(num_free
, block_size
) / block_size
;
278 if (free_blocks
<= 1) {
281 return (static_cast<double>(range_tree
.size() - 1) / (free_blocks
- 1));
284 void AvlAllocator::dump()
286 std::lock_guard
l(lock
);
287 ldout(cct
, 0) << __func__
<< " range_tree: " << dendl
;
288 for (auto& rs
: range_tree
) {
289 ldout(cct
, 0) << std::hex
290 << "0x" << rs
.start
<< "~" << rs
.end
295 ldout(cct
, 0) << __func__
<< " range_size_tree: " << dendl
;
296 for (auto& rs
: range_size_tree
) {
297 ldout(cct
, 0) << std::hex
298 << "0x" << rs
.start
<< "~" << rs
.end
304 void AvlAllocator::dump(std::function
<void(uint64_t offset
, uint64_t length
)> notify
)
306 for (auto& rs
: range_tree
) {
307 notify(rs
.start
, rs
.end
- rs
.start
);
311 void AvlAllocator::init_add_free(uint64_t offset
, uint64_t length
)
313 std::lock_guard
l(lock
);
314 ldout(cct
, 10) << __func__
<< std::hex
315 << " offset 0x" << offset
316 << " length 0x" << length
317 << std::dec
<< dendl
;
318 _add_to_tree(offset
, length
);
321 void AvlAllocator::init_rm_free(uint64_t offset
, uint64_t length
)
323 std::lock_guard
l(lock
);
324 ldout(cct
, 10) << __func__
<< std::hex
325 << " offset 0x" << offset
326 << " length 0x" << length
327 << std::dec
<< dendl
;
328 _remove_from_tree(offset
, length
);
331 void AvlAllocator::shutdown()
333 std::lock_guard
l(lock
);
334 range_size_tree
.clear();
335 range_tree
.clear_and_dispose(dispose_rs
{});