]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/AvlAllocator.cc
ccc3c756379cb4ffca3cc9933feff9a9459cf3ad
[ceph.git] / ceph / src / os / bluestore / AvlAllocator.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "AvlAllocator.h"
5
6 #include <limits>
7
8 #include "common/config_proxy.h"
9 #include "common/debug.h"
10
11 #define dout_context cct
12 #define dout_subsys ceph_subsys_bluestore
13 #undef dout_prefix
14 #define dout_prefix *_dout << "AvlAllocator "
15
16 MEMPOOL_DEFINE_OBJECT_FACTORY(range_seg_t, range_seg_t, bluestore_alloc);
17
18 namespace {
19 // a light-weight "range_seg_t", which only used as the key when searching in
20 // range_tree and range_size_tree
21 struct range_t {
22 uint64_t start;
23 uint64_t end;
24 };
25 }
26
27 /*
28 * This is a helper function that can be used by the allocator to find
29 * a suitable block to allocate. This will search the specified AVL
30 * tree looking for a block that matches the specified criteria.
31 */
32 template<class Tree>
33 uint64_t AvlAllocator::_block_picker(const Tree& t,
34 uint64_t *cursor,
35 uint64_t size,
36 uint64_t align)
37 {
38 const auto compare = t.key_comp();
39 for (auto rs = t.lower_bound(range_t{*cursor, size}, compare);
40 rs != t.end(); ++rs) {
41 uint64_t offset = p2roundup(rs->start, align);
42 if (offset + size <= rs->end) {
43 *cursor = offset + size;
44 return offset;
45 }
46 }
47 /*
48 * If we know we've searched the whole tree (*cursor == 0), give up.
49 * Otherwise, reset the cursor to the beginning and try again.
50 */
51 if (*cursor == 0) {
52 return -1ULL;
53 }
54 *cursor = 0;
55 return _block_picker(t, cursor, size, align);
56 }
57
58 namespace {
59 struct dispose_rs {
60 void operator()(range_seg_t* p)
61 {
62 delete p;
63 }
64 };
65 }
66
67 void AvlAllocator::_add_to_tree(uint64_t start, uint64_t size)
68 {
69 assert(size != 0);
70
71 uint64_t end = start + size;
72
73 auto rs_after = range_tree.upper_bound(range_t{start, end},
74 range_tree.key_comp());
75
76 /* Make sure we don't overlap with either of our neighbors */
77 auto rs_before = range_tree.end();
78 if (rs_after != range_tree.begin()) {
79 rs_before = std::prev(rs_after);
80 }
81
82 bool merge_before = (rs_before != range_tree.end() && rs_before->end == start);
83 bool merge_after = (rs_after != range_tree.end() && rs_after->start == end);
84
85 if (merge_before && merge_after) {
86 range_size_tree.erase(*rs_before);
87 range_size_tree.erase(*rs_after);
88 rs_after->start = rs_before->start;
89 range_tree.erase_and_dispose(rs_before, dispose_rs{});
90 range_size_tree.insert(*rs_after);
91 } else if (merge_before) {
92 range_size_tree.erase(*rs_before);
93 rs_before->end = end;
94 range_size_tree.insert(*rs_before);
95 } else if (merge_after) {
96 range_size_tree.erase(*rs_after);
97 rs_after->start = start;
98 range_size_tree.insert(*rs_after);
99 } else {
100 auto new_rs = new range_seg_t{start, end};
101 range_tree.insert_before(rs_after, *new_rs);
102 range_size_tree.insert(*new_rs);
103 }
104 num_free += size;
105 }
106
107 void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size)
108 {
109 uint64_t end = start + size;
110
111 assert(size != 0);
112 assert(size <= num_free);
113
114 auto rs = range_tree.find(range_t{start, end}, range_tree.key_comp());
115 /* Make sure we completely overlap with someone */
116 assert(rs != range_tree.end());
117 assert(rs->start <= start);
118 assert(rs->end >= end);
119
120 bool left_over = (rs->start != start);
121 bool right_over = (rs->end != end);
122
123 range_size_tree.erase(*rs);
124
125 if (left_over && right_over) {
126 auto new_seg = new range_seg_t{end, rs->end};
127 rs->end = start;
128 range_tree.insert(rs, *new_seg);
129 range_size_tree.insert(*new_seg);
130 range_size_tree.insert(*rs);
131 } else if (left_over) {
132 rs->end = start;
133 range_size_tree.insert(*rs);
134 } else if (right_over) {
135 rs->start = end;
136 range_size_tree.insert(*rs);
137 } else {
138 range_tree.erase_and_dispose(rs, dispose_rs{});
139 }
140 assert(num_free >= size);
141 num_free -= size;
142 }
143
144 int AvlAllocator::_allocate(
145 uint64_t size,
146 uint64_t unit,
147 uint64_t *offset,
148 uint64_t *length)
149 {
150 std::lock_guard l(lock);
151 uint64_t max_size = 0;
152 if (auto p = range_size_tree.rbegin(); p != range_size_tree.rend()) {
153 max_size = p->end - p->start;
154 }
155
156 bool force_range_size_alloc = false;
157 if (max_size < size) {
158 if (max_size < unit) {
159 return -ENOSPC;
160 }
161 size = p2align(max_size, unit);
162 assert(size > 0);
163 force_range_size_alloc = true;
164 }
165 /*
166 * Find the largest power of 2 block size that evenly divides the
167 * requested size. This is used to try to allocate blocks with similar
168 * alignment from the same area (i.e. same cursor bucket) but it does
169 * not guarantee that other allocations sizes may exist in the same
170 * region.
171 */
172 const uint64_t align = size & -size;
173 assert(align != 0);
174 uint64_t *cursor = &lbas[cbits(align) - 1];
175
176 const int free_pct = num_free * 100 / num_total;
177 uint64_t start = 0;
178 /*
179 * If we're running low on space switch to using the size
180 * sorted AVL tree (best-fit).
181 */
182 if (force_range_size_alloc ||
183 max_size < range_size_alloc_threshold ||
184 free_pct < range_size_alloc_free_pct) {
185 *cursor = 0;
186 start = _block_picker(range_size_tree, cursor, size, unit);
187 } else {
188 start = _block_picker(range_tree, cursor, size, unit);
189 }
190 if (start == -1ULL) {
191 return -ENOSPC;
192 }
193
194 _remove_from_tree(start, size);
195
196 *offset = start;
197 *length = size;
198 return 0;
199 }
200
201 AvlAllocator::AvlAllocator(CephContext* cct,
202 int64_t device_size,
203 int64_t block_size,
204 const std::string& name) :
205 Allocator(name),
206 num_total(device_size),
207 block_size(block_size),
208 range_size_alloc_threshold(
209 cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")),
210 range_size_alloc_free_pct(
211 cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_free_pct")),
212 cct(cct)
213 {}
214
215 int64_t AvlAllocator::allocate(
216 uint64_t want,
217 uint64_t unit,
218 uint64_t max_alloc_size,
219 int64_t hint, // unused, for now!
220 PExtentVector* extents)
221 {
222 ldout(cct, 10) << __func__ << std::hex
223 << " want 0x" << want
224 << " unit 0x" << unit
225 << " max_alloc_size 0x" << max_alloc_size
226 << " hint 0x" << hint
227 << std::dec << dendl;
228 assert(isp2(unit));
229 assert(want % unit == 0);
230
231 if (max_alloc_size == 0) {
232 max_alloc_size = want;
233 }
234 if (constexpr auto cap = std::numeric_limits<decltype(bluestore_pextent_t::length)>::max();
235 max_alloc_size >= cap) {
236 max_alloc_size = cap;
237 }
238
239 uint64_t allocated = 0;
240 while (allocated < want) {
241 uint64_t offset, length;
242 int r = _allocate(std::min(max_alloc_size, want - allocated),
243 unit, &offset, &length);
244 if (r < 0) {
245 // Allocation failed.
246 break;
247 }
248 extents->emplace_back(offset, length);
249 allocated += length;
250 }
251 return allocated ? allocated : -ENOSPC;
252 }
253
254 void AvlAllocator::release(const interval_set<uint64_t>& release_set)
255 {
256 std::lock_guard l(lock);
257 for (auto p = release_set.begin(); p != release_set.end(); ++p) {
258 const auto offset = p.get_start();
259 const auto length = p.get_len();
260 ldout(cct, 10) << __func__ << std::hex
261 << " offset 0x" << offset
262 << " length 0x" << length
263 << std::dec << dendl;
264 _add_to_tree(offset, length);
265 }
266 }
267
268 uint64_t AvlAllocator::get_free()
269 {
270 std::lock_guard l(lock);
271 return num_free;
272 }
273
274 double AvlAllocator::get_fragmentation()
275 {
276 std::lock_guard l(lock);
277 auto free_blocks = p2align(num_free, block_size) / block_size;
278 if (free_blocks <= 1) {
279 return .0;
280 }
281 return (static_cast<double>(range_tree.size() - 1) / (free_blocks - 1));
282 }
283
284 void AvlAllocator::dump()
285 {
286 std::lock_guard l(lock);
287 ldout(cct, 0) << __func__ << " range_tree: " << dendl;
288 for (auto& rs : range_tree) {
289 ldout(cct, 0) << std::hex
290 << "0x" << rs.start << "~" << rs.end
291 << std::dec
292 << dendl;
293 }
294
295 ldout(cct, 0) << __func__ << " range_size_tree: " << dendl;
296 for (auto& rs : range_size_tree) {
297 ldout(cct, 0) << std::hex
298 << "0x" << rs.start << "~" << rs.end
299 << std::dec
300 << dendl;
301 }
302 }
303
304 void AvlAllocator::dump(std::function<void(uint64_t offset, uint64_t length)> notify)
305 {
306 for (auto& rs : range_tree) {
307 notify(rs.start, rs.end - rs.start);
308 }
309 }
310
311 void AvlAllocator::init_add_free(uint64_t offset, uint64_t length)
312 {
313 std::lock_guard l(lock);
314 ldout(cct, 10) << __func__ << std::hex
315 << " offset 0x" << offset
316 << " length 0x" << length
317 << std::dec << dendl;
318 _add_to_tree(offset, length);
319 }
320
321 void AvlAllocator::init_rm_free(uint64_t offset, uint64_t length)
322 {
323 std::lock_guard l(lock);
324 ldout(cct, 10) << __func__ << std::hex
325 << " offset 0x" << offset
326 << " length 0x" << length
327 << std::dec << dendl;
328 _remove_from_tree(offset, length);
329 }
330
331 void AvlAllocator::shutdown()
332 {
333 std::lock_guard l(lock);
334 range_size_tree.clear();
335 range_tree.clear_and_dispose(dispose_rs{});
336 }