]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "AvlAllocator.h" | |
5 | ||
6 | #include <limits> | |
7 | ||
8 | #include "common/config_proxy.h" | |
9 | #include "common/debug.h" | |
10 | ||
11 | #define dout_context cct | |
12 | #define dout_subsys ceph_subsys_bluestore | |
13 | #undef dout_prefix | |
14 | #define dout_prefix *_dout << "AvlAllocator " | |
15 | ||
16 | MEMPOOL_DEFINE_OBJECT_FACTORY(range_seg_t, range_seg_t, bluestore_alloc); | |
17 | ||
18 | namespace { | |
19 | // a light-weight "range_seg_t", which only used as the key when searching in | |
20 | // range_tree and range_size_tree | |
21 | struct range_t { | |
22 | uint64_t start; | |
23 | uint64_t end; | |
24 | }; | |
25 | } | |
26 | ||
27 | /* | |
28 | * This is a helper function that can be used by the allocator to find | |
29 | * a suitable block to allocate. This will search the specified AVL | |
30 | * tree looking for a block that matches the specified criteria. | |
31 | */ | |
32 | template<class Tree> | |
33 | uint64_t AvlAllocator::_block_picker(const Tree& t, | |
34 | uint64_t *cursor, | |
35 | uint64_t size, | |
36 | uint64_t align) | |
37 | { | |
38 | const auto compare = t.key_comp(); | |
39 | for (auto rs = t.lower_bound(range_t{*cursor, size}, compare); | |
40 | rs != t.end(); ++rs) { | |
41 | uint64_t offset = p2roundup(rs->start, align); | |
42 | if (offset + size <= rs->end) { | |
43 | *cursor = offset + size; | |
44 | return offset; | |
45 | } | |
46 | } | |
47 | /* | |
48 | * If we know we've searched the whole tree (*cursor == 0), give up. | |
49 | * Otherwise, reset the cursor to the beginning and try again. | |
50 | */ | |
51 | if (*cursor == 0) { | |
52 | return -1ULL; | |
53 | } | |
54 | *cursor = 0; | |
55 | return _block_picker(t, cursor, size, align); | |
56 | } | |
57 | ||
9f95a23c TL |
58 | void AvlAllocator::_add_to_tree(uint64_t start, uint64_t size) |
59 | { | |
e306af50 | 60 | ceph_assert(size != 0); |
9f95a23c TL |
61 | |
62 | uint64_t end = start + size; | |
63 | ||
64 | auto rs_after = range_tree.upper_bound(range_t{start, end}, | |
65 | range_tree.key_comp()); | |
66 | ||
67 | /* Make sure we don't overlap with either of our neighbors */ | |
68 | auto rs_before = range_tree.end(); | |
69 | if (rs_after != range_tree.begin()) { | |
70 | rs_before = std::prev(rs_after); | |
71 | } | |
72 | ||
73 | bool merge_before = (rs_before != range_tree.end() && rs_before->end == start); | |
74 | bool merge_after = (rs_after != range_tree.end() && rs_after->start == end); | |
75 | ||
76 | if (merge_before && merge_after) { | |
e306af50 TL |
77 | _range_size_tree_rm(*rs_before); |
78 | _range_size_tree_rm(*rs_after); | |
9f95a23c TL |
79 | rs_after->start = rs_before->start; |
80 | range_tree.erase_and_dispose(rs_before, dispose_rs{}); | |
e306af50 | 81 | _range_size_tree_try_insert(*rs_after); |
9f95a23c | 82 | } else if (merge_before) { |
e306af50 | 83 | _range_size_tree_rm(*rs_before); |
9f95a23c | 84 | rs_before->end = end; |
e306af50 | 85 | _range_size_tree_try_insert(*rs_before); |
9f95a23c | 86 | } else if (merge_after) { |
e306af50 | 87 | _range_size_tree_rm(*rs_after); |
9f95a23c | 88 | rs_after->start = start; |
e306af50 | 89 | _range_size_tree_try_insert(*rs_after); |
9f95a23c | 90 | } else { |
e306af50 | 91 | _try_insert_range(start, end, &rs_after); |
9f95a23c | 92 | } |
9f95a23c TL |
93 | } |
94 | ||
e306af50 TL |
95 | void AvlAllocator::_process_range_removal(uint64_t start, uint64_t end, |
96 | AvlAllocator::range_tree_t::iterator& rs) | |
9f95a23c | 97 | { |
9f95a23c TL |
98 | bool left_over = (rs->start != start); |
99 | bool right_over = (rs->end != end); | |
100 | ||
e306af50 | 101 | _range_size_tree_rm(*rs); |
9f95a23c TL |
102 | |
103 | if (left_over && right_over) { | |
e306af50 TL |
104 | auto old_right_end = rs->end; |
105 | auto insert_pos = rs; | |
106 | ceph_assert(insert_pos != range_tree.end()); | |
107 | ++insert_pos; | |
9f95a23c | 108 | rs->end = start; |
e306af50 TL |
109 | |
110 | // Insert tail first to be sure insert_pos hasn't been disposed. | |
111 | // This woulnd't dispose rs though since it's out of range_size_tree. | |
112 | // Don't care about a small chance of 'not-the-best-choice-for-removal' case | |
113 | // which might happen if rs has the lowest size. | |
114 | _try_insert_range(end, old_right_end, &insert_pos); | |
115 | _range_size_tree_try_insert(*rs); | |
116 | ||
9f95a23c TL |
117 | } else if (left_over) { |
118 | rs->end = start; | |
e306af50 | 119 | _range_size_tree_try_insert(*rs); |
9f95a23c TL |
120 | } else if (right_over) { |
121 | rs->start = end; | |
e306af50 | 122 | _range_size_tree_try_insert(*rs); |
9f95a23c TL |
123 | } else { |
124 | range_tree.erase_and_dispose(rs, dispose_rs{}); | |
125 | } | |
e306af50 TL |
126 | } |
127 | ||
128 | void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size) | |
129 | { | |
130 | uint64_t end = start + size; | |
131 | ||
132 | ceph_assert(size != 0); | |
133 | ceph_assert(size <= num_free); | |
134 | ||
135 | auto rs = range_tree.find(range_t{start, end}, range_tree.key_comp()); | |
136 | /* Make sure we completely overlap with someone */ | |
137 | ceph_assert(rs != range_tree.end()); | |
138 | ceph_assert(rs->start <= start); | |
139 | ceph_assert(rs->end >= end); | |
140 | ||
141 | _process_range_removal(start, end, rs); | |
142 | } | |
143 | ||
144 | void AvlAllocator::_try_remove_from_tree(uint64_t start, uint64_t size, | |
145 | std::function<void(uint64_t, uint64_t, bool)> cb) | |
146 | { | |
147 | uint64_t end = start + size; | |
148 | ||
149 | ceph_assert(size != 0); | |
150 | ||
151 | auto rs = range_tree.find(range_t{ start, end }, | |
152 | range_tree.key_comp()); | |
153 | ||
154 | if (rs == range_tree.end() || rs->start >= end) { | |
155 | cb(start, size, false); | |
156 | return; | |
157 | } | |
158 | ||
159 | do { | |
160 | ||
161 | auto next_rs = rs; | |
162 | ++next_rs; | |
163 | ||
164 | if (start < rs->start) { | |
165 | cb(start, rs->start - start, false); | |
166 | start = rs->start; | |
167 | } | |
168 | auto range_end = std::min(rs->end, end); | |
169 | _process_range_removal(start, range_end, rs); | |
170 | cb(start, range_end - start, true); | |
171 | start = range_end; | |
172 | ||
173 | rs = next_rs; | |
174 | } while (rs != range_tree.end() && rs->start < end && start < end); | |
175 | if (start < end) { | |
176 | cb(start, end - start, false); | |
177 | } | |
178 | } | |
179 | ||
180 | int64_t AvlAllocator::_allocate( | |
181 | uint64_t want, | |
182 | uint64_t unit, | |
183 | uint64_t max_alloc_size, | |
184 | int64_t hint, // unused, for now! | |
185 | PExtentVector* extents) | |
186 | { | |
187 | uint64_t allocated = 0; | |
188 | while (allocated < want) { | |
189 | uint64_t offset, length; | |
190 | int r = _allocate(std::min(max_alloc_size, want - allocated), | |
191 | unit, &offset, &length); | |
192 | if (r < 0) { | |
193 | // Allocation failed. | |
194 | break; | |
195 | } | |
196 | extents->emplace_back(offset, length); | |
197 | allocated += length; | |
198 | } | |
199 | return allocated ? allocated : -ENOSPC; | |
9f95a23c TL |
200 | } |
201 | ||
202 | int AvlAllocator::_allocate( | |
203 | uint64_t size, | |
204 | uint64_t unit, | |
205 | uint64_t *offset, | |
206 | uint64_t *length) | |
207 | { | |
9f95a23c TL |
208 | uint64_t max_size = 0; |
209 | if (auto p = range_size_tree.rbegin(); p != range_size_tree.rend()) { | |
210 | max_size = p->end - p->start; | |
211 | } | |
212 | ||
213 | bool force_range_size_alloc = false; | |
214 | if (max_size < size) { | |
215 | if (max_size < unit) { | |
216 | return -ENOSPC; | |
217 | } | |
218 | size = p2align(max_size, unit); | |
e306af50 | 219 | ceph_assert(size > 0); |
9f95a23c TL |
220 | force_range_size_alloc = true; |
221 | } | |
222 | /* | |
223 | * Find the largest power of 2 block size that evenly divides the | |
224 | * requested size. This is used to try to allocate blocks with similar | |
225 | * alignment from the same area (i.e. same cursor bucket) but it does | |
226 | * not guarantee that other allocations sizes may exist in the same | |
227 | * region. | |
228 | */ | |
229 | const uint64_t align = size & -size; | |
e306af50 | 230 | ceph_assert(align != 0); |
9f95a23c TL |
231 | uint64_t *cursor = &lbas[cbits(align) - 1]; |
232 | ||
233 | const int free_pct = num_free * 100 / num_total; | |
234 | uint64_t start = 0; | |
235 | /* | |
236 | * If we're running low on space switch to using the size | |
237 | * sorted AVL tree (best-fit). | |
238 | */ | |
239 | if (force_range_size_alloc || | |
240 | max_size < range_size_alloc_threshold || | |
241 | free_pct < range_size_alloc_free_pct) { | |
242 | *cursor = 0; | |
243 | start = _block_picker(range_size_tree, cursor, size, unit); | |
244 | } else { | |
245 | start = _block_picker(range_tree, cursor, size, unit); | |
246 | } | |
247 | if (start == -1ULL) { | |
248 | return -ENOSPC; | |
249 | } | |
250 | ||
251 | _remove_from_tree(start, size); | |
252 | ||
253 | *offset = start; | |
254 | *length = size; | |
255 | return 0; | |
256 | } | |
257 | ||
e306af50 TL |
258 | void AvlAllocator::_release(const interval_set<uint64_t>& release_set) |
259 | { | |
260 | for (auto p = release_set.begin(); p != release_set.end(); ++p) { | |
261 | const auto offset = p.get_start(); | |
262 | const auto length = p.get_len(); | |
263 | ldout(cct, 10) << __func__ << std::hex | |
264 | << " offset 0x" << offset | |
265 | << " length 0x" << length | |
266 | << std::dec << dendl; | |
267 | _add_to_tree(offset, length); | |
268 | } | |
269 | } | |
270 | ||
271 | void AvlAllocator::_release(const PExtentVector& release_set) { | |
272 | for (auto& e : release_set) { | |
273 | ldout(cct, 10) << __func__ << std::hex | |
274 | << " offset 0x" << e.offset | |
275 | << " length 0x" << e.length | |
276 | << std::dec << dendl; | |
277 | _add_to_tree(e.offset, e.length); | |
278 | } | |
279 | } | |
280 | ||
281 | void AvlAllocator::_shutdown() | |
282 | { | |
283 | range_size_tree.clear(); | |
284 | range_tree.clear_and_dispose(dispose_rs{}); | |
285 | } | |
286 | ||
287 | AvlAllocator::AvlAllocator(CephContext* cct, | |
288 | int64_t device_size, | |
289 | int64_t block_size, | |
290 | uint64_t max_mem, | |
291 | const std::string& name) : | |
292 | Allocator(name), | |
293 | num_total(device_size), | |
294 | block_size(block_size), | |
295 | range_size_alloc_threshold( | |
296 | cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")), | |
297 | range_size_alloc_free_pct( | |
298 | cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")), | |
299 | range_count_cap(max_mem / sizeof(range_seg_t)), | |
300 | cct(cct) | |
301 | {} | |
302 | ||
9f95a23c TL |
303 | AvlAllocator::AvlAllocator(CephContext* cct, |
304 | int64_t device_size, | |
305 | int64_t block_size, | |
306 | const std::string& name) : | |
307 | Allocator(name), | |
308 | num_total(device_size), | |
309 | block_size(block_size), | |
310 | range_size_alloc_threshold( | |
311 | cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")), | |
312 | range_size_alloc_free_pct( | |
313 | cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")), | |
314 | cct(cct) | |
315 | {} | |
316 | ||
e306af50 TL |
317 | AvlAllocator::~AvlAllocator() |
318 | { | |
319 | shutdown(); | |
320 | } | |
321 | ||
9f95a23c TL |
322 | int64_t AvlAllocator::allocate( |
323 | uint64_t want, | |
324 | uint64_t unit, | |
325 | uint64_t max_alloc_size, | |
326 | int64_t hint, // unused, for now! | |
327 | PExtentVector* extents) | |
328 | { | |
329 | ldout(cct, 10) << __func__ << std::hex | |
330 | << " want 0x" << want | |
331 | << " unit 0x" << unit | |
332 | << " max_alloc_size 0x" << max_alloc_size | |
333 | << " hint 0x" << hint | |
334 | << std::dec << dendl; | |
e306af50 TL |
335 | ceph_assert(isp2(unit)); |
336 | ceph_assert(want % unit == 0); | |
9f95a23c TL |
337 | |
338 | if (max_alloc_size == 0) { | |
339 | max_alloc_size = want; | |
340 | } | |
341 | if (constexpr auto cap = std::numeric_limits<decltype(bluestore_pextent_t::length)>::max(); | |
342 | max_alloc_size >= cap) { | |
e306af50 | 343 | max_alloc_size = p2align(uint64_t(cap), (uint64_t)block_size); |
9f95a23c | 344 | } |
e306af50 TL |
345 | std::lock_guard l(lock); |
346 | return _allocate(want, unit, max_alloc_size, hint, extents); | |
9f95a23c TL |
347 | } |
348 | ||
e306af50 | 349 | void AvlAllocator::release(const interval_set<uint64_t>& release_set) { |
9f95a23c | 350 | std::lock_guard l(lock); |
e306af50 | 351 | _release(release_set); |
9f95a23c TL |
352 | } |
353 | ||
354 | uint64_t AvlAllocator::get_free() | |
355 | { | |
356 | std::lock_guard l(lock); | |
357 | return num_free; | |
358 | } | |
359 | ||
360 | double AvlAllocator::get_fragmentation() | |
361 | { | |
362 | std::lock_guard l(lock); | |
e306af50 | 363 | return _get_fragmentation(); |
9f95a23c TL |
364 | } |
365 | ||
366 | void AvlAllocator::dump() | |
367 | { | |
368 | std::lock_guard l(lock); | |
e306af50 TL |
369 | _dump(); |
370 | } | |
371 | ||
372 | void AvlAllocator::_dump() const | |
373 | { | |
9f95a23c TL |
374 | ldout(cct, 0) << __func__ << " range_tree: " << dendl; |
375 | for (auto& rs : range_tree) { | |
376 | ldout(cct, 0) << std::hex | |
e306af50 TL |
377 | << "0x" << rs.start << "~" << rs.end |
378 | << std::dec | |
379 | << dendl; | |
9f95a23c TL |
380 | } |
381 | ||
382 | ldout(cct, 0) << __func__ << " range_size_tree: " << dendl; | |
383 | for (auto& rs : range_size_tree) { | |
384 | ldout(cct, 0) << std::hex | |
e306af50 TL |
385 | << "0x" << rs.start << "~" << rs.end |
386 | << std::dec | |
387 | << dendl; | |
9f95a23c TL |
388 | } |
389 | } | |
390 | ||
391 | void AvlAllocator::dump(std::function<void(uint64_t offset, uint64_t length)> notify) | |
392 | { | |
393 | for (auto& rs : range_tree) { | |
394 | notify(rs.start, rs.end - rs.start); | |
395 | } | |
396 | } | |
397 | ||
398 | void AvlAllocator::init_add_free(uint64_t offset, uint64_t length) | |
399 | { | |
400 | std::lock_guard l(lock); | |
401 | ldout(cct, 10) << __func__ << std::hex | |
402 | << " offset 0x" << offset | |
403 | << " length 0x" << length | |
404 | << std::dec << dendl; | |
405 | _add_to_tree(offset, length); | |
406 | } | |
407 | ||
408 | void AvlAllocator::init_rm_free(uint64_t offset, uint64_t length) | |
409 | { | |
410 | std::lock_guard l(lock); | |
411 | ldout(cct, 10) << __func__ << std::hex | |
412 | << " offset 0x" << offset | |
413 | << " length 0x" << length | |
414 | << std::dec << dendl; | |
415 | _remove_from_tree(offset, length); | |
416 | } | |
417 | ||
418 | void AvlAllocator::shutdown() | |
419 | { | |
420 | std::lock_guard l(lock); | |
e306af50 | 421 | _shutdown(); |
9f95a23c | 422 | } |