]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/AvlAllocator.cc
import 15.2.4
[ceph.git] / ceph / src / os / bluestore / AvlAllocator.cc
CommitLineData
9f95a23c
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "AvlAllocator.h"
5
6#include <limits>
7
8#include "common/config_proxy.h"
9#include "common/debug.h"
10
11#define dout_context cct
12#define dout_subsys ceph_subsys_bluestore
13#undef dout_prefix
14#define dout_prefix *_dout << "AvlAllocator "
15
16MEMPOOL_DEFINE_OBJECT_FACTORY(range_seg_t, range_seg_t, bluestore_alloc);
17
18namespace {
19 // a light-weight "range_seg_t", which only used as the key when searching in
20 // range_tree and range_size_tree
21 struct range_t {
22 uint64_t start;
23 uint64_t end;
24 };
25}
26
27/*
28 * This is a helper function that can be used by the allocator to find
29 * a suitable block to allocate. This will search the specified AVL
30 * tree looking for a block that matches the specified criteria.
31 */
32template<class Tree>
33uint64_t AvlAllocator::_block_picker(const Tree& t,
34 uint64_t *cursor,
35 uint64_t size,
36 uint64_t align)
37{
38 const auto compare = t.key_comp();
39 for (auto rs = t.lower_bound(range_t{*cursor, size}, compare);
40 rs != t.end(); ++rs) {
41 uint64_t offset = p2roundup(rs->start, align);
42 if (offset + size <= rs->end) {
43 *cursor = offset + size;
44 return offset;
45 }
46 }
47 /*
48 * If we know we've searched the whole tree (*cursor == 0), give up.
49 * Otherwise, reset the cursor to the beginning and try again.
50 */
51 if (*cursor == 0) {
52 return -1ULL;
53 }
54 *cursor = 0;
55 return _block_picker(t, cursor, size, align);
56}
57
9f95a23c
TL
58void AvlAllocator::_add_to_tree(uint64_t start, uint64_t size)
59{
e306af50 60 ceph_assert(size != 0);
9f95a23c
TL
61
62 uint64_t end = start + size;
63
64 auto rs_after = range_tree.upper_bound(range_t{start, end},
65 range_tree.key_comp());
66
67 /* Make sure we don't overlap with either of our neighbors */
68 auto rs_before = range_tree.end();
69 if (rs_after != range_tree.begin()) {
70 rs_before = std::prev(rs_after);
71 }
72
73 bool merge_before = (rs_before != range_tree.end() && rs_before->end == start);
74 bool merge_after = (rs_after != range_tree.end() && rs_after->start == end);
75
76 if (merge_before && merge_after) {
e306af50
TL
77 _range_size_tree_rm(*rs_before);
78 _range_size_tree_rm(*rs_after);
9f95a23c
TL
79 rs_after->start = rs_before->start;
80 range_tree.erase_and_dispose(rs_before, dispose_rs{});
e306af50 81 _range_size_tree_try_insert(*rs_after);
9f95a23c 82 } else if (merge_before) {
e306af50 83 _range_size_tree_rm(*rs_before);
9f95a23c 84 rs_before->end = end;
e306af50 85 _range_size_tree_try_insert(*rs_before);
9f95a23c 86 } else if (merge_after) {
e306af50 87 _range_size_tree_rm(*rs_after);
9f95a23c 88 rs_after->start = start;
e306af50 89 _range_size_tree_try_insert(*rs_after);
9f95a23c 90 } else {
e306af50 91 _try_insert_range(start, end, &rs_after);
9f95a23c 92 }
9f95a23c
TL
93}
94
e306af50
TL
95void AvlAllocator::_process_range_removal(uint64_t start, uint64_t end,
96 AvlAllocator::range_tree_t::iterator& rs)
9f95a23c 97{
9f95a23c
TL
98 bool left_over = (rs->start != start);
99 bool right_over = (rs->end != end);
100
e306af50 101 _range_size_tree_rm(*rs);
9f95a23c
TL
102
103 if (left_over && right_over) {
e306af50
TL
104 auto old_right_end = rs->end;
105 auto insert_pos = rs;
106 ceph_assert(insert_pos != range_tree.end());
107 ++insert_pos;
9f95a23c 108 rs->end = start;
e306af50
TL
109
110 // Insert tail first to be sure insert_pos hasn't been disposed.
111 // This woulnd't dispose rs though since it's out of range_size_tree.
112 // Don't care about a small chance of 'not-the-best-choice-for-removal' case
113 // which might happen if rs has the lowest size.
114 _try_insert_range(end, old_right_end, &insert_pos);
115 _range_size_tree_try_insert(*rs);
116
9f95a23c
TL
117 } else if (left_over) {
118 rs->end = start;
e306af50 119 _range_size_tree_try_insert(*rs);
9f95a23c
TL
120 } else if (right_over) {
121 rs->start = end;
e306af50 122 _range_size_tree_try_insert(*rs);
9f95a23c
TL
123 } else {
124 range_tree.erase_and_dispose(rs, dispose_rs{});
125 }
e306af50
TL
126}
127
128void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size)
129{
130 uint64_t end = start + size;
131
132 ceph_assert(size != 0);
133 ceph_assert(size <= num_free);
134
135 auto rs = range_tree.find(range_t{start, end}, range_tree.key_comp());
136 /* Make sure we completely overlap with someone */
137 ceph_assert(rs != range_tree.end());
138 ceph_assert(rs->start <= start);
139 ceph_assert(rs->end >= end);
140
141 _process_range_removal(start, end, rs);
142}
143
144void AvlAllocator::_try_remove_from_tree(uint64_t start, uint64_t size,
145 std::function<void(uint64_t, uint64_t, bool)> cb)
146{
147 uint64_t end = start + size;
148
149 ceph_assert(size != 0);
150
151 auto rs = range_tree.find(range_t{ start, end },
152 range_tree.key_comp());
153
154 if (rs == range_tree.end() || rs->start >= end) {
155 cb(start, size, false);
156 return;
157 }
158
159 do {
160
161 auto next_rs = rs;
162 ++next_rs;
163
164 if (start < rs->start) {
165 cb(start, rs->start - start, false);
166 start = rs->start;
167 }
168 auto range_end = std::min(rs->end, end);
169 _process_range_removal(start, range_end, rs);
170 cb(start, range_end - start, true);
171 start = range_end;
172
173 rs = next_rs;
174 } while (rs != range_tree.end() && rs->start < end && start < end);
175 if (start < end) {
176 cb(start, end - start, false);
177 }
178}
179
180int64_t AvlAllocator::_allocate(
181 uint64_t want,
182 uint64_t unit,
183 uint64_t max_alloc_size,
184 int64_t hint, // unused, for now!
185 PExtentVector* extents)
186{
187 uint64_t allocated = 0;
188 while (allocated < want) {
189 uint64_t offset, length;
190 int r = _allocate(std::min(max_alloc_size, want - allocated),
191 unit, &offset, &length);
192 if (r < 0) {
193 // Allocation failed.
194 break;
195 }
196 extents->emplace_back(offset, length);
197 allocated += length;
198 }
199 return allocated ? allocated : -ENOSPC;
9f95a23c
TL
200}
201
202int AvlAllocator::_allocate(
203 uint64_t size,
204 uint64_t unit,
205 uint64_t *offset,
206 uint64_t *length)
207{
9f95a23c
TL
208 uint64_t max_size = 0;
209 if (auto p = range_size_tree.rbegin(); p != range_size_tree.rend()) {
210 max_size = p->end - p->start;
211 }
212
213 bool force_range_size_alloc = false;
214 if (max_size < size) {
215 if (max_size < unit) {
216 return -ENOSPC;
217 }
218 size = p2align(max_size, unit);
e306af50 219 ceph_assert(size > 0);
9f95a23c
TL
220 force_range_size_alloc = true;
221 }
222 /*
223 * Find the largest power of 2 block size that evenly divides the
224 * requested size. This is used to try to allocate blocks with similar
225 * alignment from the same area (i.e. same cursor bucket) but it does
226 * not guarantee that other allocations sizes may exist in the same
227 * region.
228 */
229 const uint64_t align = size & -size;
e306af50 230 ceph_assert(align != 0);
9f95a23c
TL
231 uint64_t *cursor = &lbas[cbits(align) - 1];
232
233 const int free_pct = num_free * 100 / num_total;
234 uint64_t start = 0;
235 /*
236 * If we're running low on space switch to using the size
237 * sorted AVL tree (best-fit).
238 */
239 if (force_range_size_alloc ||
240 max_size < range_size_alloc_threshold ||
241 free_pct < range_size_alloc_free_pct) {
242 *cursor = 0;
243 start = _block_picker(range_size_tree, cursor, size, unit);
244 } else {
245 start = _block_picker(range_tree, cursor, size, unit);
246 }
247 if (start == -1ULL) {
248 return -ENOSPC;
249 }
250
251 _remove_from_tree(start, size);
252
253 *offset = start;
254 *length = size;
255 return 0;
256}
257
e306af50
TL
258void AvlAllocator::_release(const interval_set<uint64_t>& release_set)
259{
260 for (auto p = release_set.begin(); p != release_set.end(); ++p) {
261 const auto offset = p.get_start();
262 const auto length = p.get_len();
263 ldout(cct, 10) << __func__ << std::hex
264 << " offset 0x" << offset
265 << " length 0x" << length
266 << std::dec << dendl;
267 _add_to_tree(offset, length);
268 }
269}
270
271void AvlAllocator::_release(const PExtentVector& release_set) {
272 for (auto& e : release_set) {
273 ldout(cct, 10) << __func__ << std::hex
274 << " offset 0x" << e.offset
275 << " length 0x" << e.length
276 << std::dec << dendl;
277 _add_to_tree(e.offset, e.length);
278 }
279}
280
281void AvlAllocator::_shutdown()
282{
283 range_size_tree.clear();
284 range_tree.clear_and_dispose(dispose_rs{});
285}
286
287AvlAllocator::AvlAllocator(CephContext* cct,
288 int64_t device_size,
289 int64_t block_size,
290 uint64_t max_mem,
291 const std::string& name) :
292 Allocator(name),
293 num_total(device_size),
294 block_size(block_size),
295 range_size_alloc_threshold(
296 cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")),
297 range_size_alloc_free_pct(
298 cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
299 range_count_cap(max_mem / sizeof(range_seg_t)),
300 cct(cct)
301{}
302
9f95a23c
TL
303AvlAllocator::AvlAllocator(CephContext* cct,
304 int64_t device_size,
305 int64_t block_size,
306 const std::string& name) :
307 Allocator(name),
308 num_total(device_size),
309 block_size(block_size),
310 range_size_alloc_threshold(
311 cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")),
312 range_size_alloc_free_pct(
313 cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
314 cct(cct)
315{}
316
e306af50
TL
317AvlAllocator::~AvlAllocator()
318{
319 shutdown();
320}
321
9f95a23c
TL
322int64_t AvlAllocator::allocate(
323 uint64_t want,
324 uint64_t unit,
325 uint64_t max_alloc_size,
326 int64_t hint, // unused, for now!
327 PExtentVector* extents)
328{
329 ldout(cct, 10) << __func__ << std::hex
330 << " want 0x" << want
331 << " unit 0x" << unit
332 << " max_alloc_size 0x" << max_alloc_size
333 << " hint 0x" << hint
334 << std::dec << dendl;
e306af50
TL
335 ceph_assert(isp2(unit));
336 ceph_assert(want % unit == 0);
9f95a23c
TL
337
338 if (max_alloc_size == 0) {
339 max_alloc_size = want;
340 }
341 if (constexpr auto cap = std::numeric_limits<decltype(bluestore_pextent_t::length)>::max();
342 max_alloc_size >= cap) {
e306af50 343 max_alloc_size = p2align(uint64_t(cap), (uint64_t)block_size);
9f95a23c 344 }
e306af50
TL
345 std::lock_guard l(lock);
346 return _allocate(want, unit, max_alloc_size, hint, extents);
9f95a23c
TL
347}
348
e306af50 349void AvlAllocator::release(const interval_set<uint64_t>& release_set) {
9f95a23c 350 std::lock_guard l(lock);
e306af50 351 _release(release_set);
9f95a23c
TL
352}
353
354uint64_t AvlAllocator::get_free()
355{
356 std::lock_guard l(lock);
357 return num_free;
358}
359
360double AvlAllocator::get_fragmentation()
361{
362 std::lock_guard l(lock);
e306af50 363 return _get_fragmentation();
9f95a23c
TL
364}
365
366void AvlAllocator::dump()
367{
368 std::lock_guard l(lock);
e306af50
TL
369 _dump();
370}
371
372void AvlAllocator::_dump() const
373{
9f95a23c
TL
374 ldout(cct, 0) << __func__ << " range_tree: " << dendl;
375 for (auto& rs : range_tree) {
376 ldout(cct, 0) << std::hex
e306af50
TL
377 << "0x" << rs.start << "~" << rs.end
378 << std::dec
379 << dendl;
9f95a23c
TL
380 }
381
382 ldout(cct, 0) << __func__ << " range_size_tree: " << dendl;
383 for (auto& rs : range_size_tree) {
384 ldout(cct, 0) << std::hex
e306af50
TL
385 << "0x" << rs.start << "~" << rs.end
386 << std::dec
387 << dendl;
9f95a23c
TL
388 }
389}
390
391void AvlAllocator::dump(std::function<void(uint64_t offset, uint64_t length)> notify)
392{
393 for (auto& rs : range_tree) {
394 notify(rs.start, rs.end - rs.start);
395 }
396}
397
398void AvlAllocator::init_add_free(uint64_t offset, uint64_t length)
399{
400 std::lock_guard l(lock);
401 ldout(cct, 10) << __func__ << std::hex
402 << " offset 0x" << offset
403 << " length 0x" << length
404 << std::dec << dendl;
405 _add_to_tree(offset, length);
406}
407
408void AvlAllocator::init_rm_free(uint64_t offset, uint64_t length)
409{
410 std::lock_guard l(lock);
411 ldout(cct, 10) << __func__ << std::hex
412 << " offset 0x" << offset
413 << " length 0x" << length
414 << std::dec << dendl;
415 _remove_from_tree(offset, length);
416}
417
418void AvlAllocator::shutdown()
419{
420 std::lock_guard l(lock);
e306af50 421 _shutdown();
9f95a23c 422}