]>
Commit | Line | Data |
---|---|---|
a8e16298 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Bitmap based in-memory allocator implementation. | |
5 | * Author: Igor Fedotov, ifedotov@suse.com | |
6 | * | |
7 | */ | |
8 | ||
9 | #ifndef __FAST_BITMAP_ALLOCATOR_IMPL_H | |
10 | #define __FAST_BITMAP_ALLOCATOR_IMPL_H | |
a8e16298 TL |
11 | #include "include/intarith.h" |
12 | ||
13 | #include <vector> | |
14 | #include <algorithm> | |
15 | #include <mutex> | |
16 | ||
17 | typedef uint64_t slot_t; | |
18 | ||
19 | #ifdef NON_CEPH_BUILD | |
20 | #include <assert.h> | |
21 | struct interval_t | |
22 | { | |
23 | uint64_t offset = 0; | |
24 | uint64_t length = 0; | |
25 | ||
26 | interval_t() {} | |
27 | interval_t(uint64_t o, uint64_t l) : offset(o), length(l) {} | |
28 | interval_t(const interval_t &ext) : | |
29 | offset(ext.offset), length(ext.length) {} | |
30 | }; | |
31 | typedef std::vector<interval_t> interval_vector_t; | |
32 | typedef std::vector<slot_t> slot_vector_t; | |
33 | #else | |
11fdf7f2 | 34 | #include "include/ceph_assert.h" |
a8e16298 TL |
35 | #include "common/likely.h" |
36 | #include "os/bluestore/bluestore_types.h" | |
37 | #include "include/mempool.h" | |
11fdf7f2 | 38 | #include "common/ceph_mutex.h" |
a8e16298 TL |
39 | |
40 | typedef bluestore_interval_t<uint64_t, uint64_t> interval_t; | |
41 | typedef PExtentVector interval_vector_t; | |
42 | ||
43 | typedef mempool::bluestore_alloc::vector<slot_t> slot_vector_t; | |
44 | ||
45 | #endif | |
46 | ||
47 | // fitting into cache line on x86_64 | |
9f95a23c TL |
48 | static const size_t slots_per_slotset = 8; // 8 slots per set |
49 | static const size_t slotset_bytes = sizeof(slot_t) * slots_per_slotset; | |
a8e16298 TL |
50 | static const size_t bits_per_slot = sizeof(slot_t) * 8; |
51 | static const size_t bits_per_slotset = slotset_bytes * 8; | |
52 | static const slot_t all_slot_set = 0xffffffffffffffff; | |
53 | static const slot_t all_slot_clear = 0; | |
54 | ||
55 | inline size_t find_next_set_bit(slot_t slot_val, size_t start_pos) | |
56 | { | |
57 | #ifdef __GNUC__ | |
58 | if (start_pos == 0) { | |
59 | start_pos = __builtin_ffsll(slot_val); | |
60 | return start_pos ? start_pos - 1 : bits_per_slot; | |
61 | } | |
62 | #endif | |
63 | slot_t mask = slot_t(1) << start_pos; | |
64 | while (start_pos < bits_per_slot && !(slot_val & mask)) { | |
65 | mask <<= 1; | |
66 | ++start_pos; | |
67 | } | |
68 | return start_pos; | |
69 | } | |
70 | ||
71 | ||
72 | class AllocatorLevel | |
73 | { | |
74 | protected: | |
75 | ||
76 | virtual uint64_t _children_per_slot() const = 0; | |
77 | virtual uint64_t _level_granularity() const = 0; | |
78 | ||
79 | public: | |
80 | static uint64_t l0_dives; | |
81 | static uint64_t l0_iterations; | |
82 | static uint64_t l0_inner_iterations; | |
83 | static uint64_t alloc_fragments; | |
84 | static uint64_t alloc_fragments_fast; | |
85 | static uint64_t l2_allocs; | |
86 | ||
87 | virtual ~AllocatorLevel() | |
88 | {} | |
89 | ||
90 | virtual void collect_stats( | |
91 | std::map<size_t, size_t>& bins_overall) = 0; | |
92 | ||
93 | }; | |
94 | ||
95 | class AllocatorLevel01 : public AllocatorLevel | |
96 | { | |
97 | protected: | |
98 | slot_vector_t l0; // set bit means free entry | |
99 | slot_vector_t l1; | |
100 | uint64_t l0_granularity = 0; // space per entry | |
101 | uint64_t l1_granularity = 0; // space per entry | |
102 | ||
103 | size_t partial_l1_count = 0; | |
104 | size_t unalloc_l1_count = 0; | |
105 | ||
106 | double get_fragmentation() const { | |
107 | double res = 0.0; | |
108 | auto total = unalloc_l1_count + partial_l1_count; | |
109 | if (total) { | |
110 | res = double(partial_l1_count) / double(total); | |
111 | } | |
112 | return res; | |
113 | } | |
114 | ||
115 | uint64_t _level_granularity() const override | |
116 | { | |
117 | return l1_granularity; | |
118 | } | |
119 | ||
120 | inline bool _is_slot_fully_allocated(uint64_t idx) const { | |
121 | return l1[idx] == all_slot_clear; | |
122 | } | |
123 | public: | |
124 | inline uint64_t get_min_alloc_size() const | |
125 | { | |
126 | return l0_granularity; | |
127 | } | |
128 | ||
129 | }; | |
130 | ||
131 | template <class T> | |
132 | class AllocatorLevel02; | |
133 | ||
134 | class AllocatorLevel01Loose : public AllocatorLevel01 | |
135 | { | |
136 | enum { | |
137 | L1_ENTRY_WIDTH = 2, | |
138 | L1_ENTRY_MASK = (1 << L1_ENTRY_WIDTH) - 1, | |
139 | L1_ENTRY_FULL = 0x00, | |
140 | L1_ENTRY_PARTIAL = 0x01, | |
141 | L1_ENTRY_NOT_USED = 0x02, | |
142 | L1_ENTRY_FREE = 0x03, | |
eafe8130 | 143 | L1_ENTRIES_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, //32 |
9f95a23c | 144 | L0_ENTRIES_PER_SLOT = bits_per_slot, // 64 |
a8e16298 TL |
145 | }; |
146 | uint64_t _children_per_slot() const override | |
147 | { | |
9f95a23c | 148 | return L1_ENTRIES_PER_SLOT; |
a8e16298 TL |
149 | } |
150 | ||
151 | interval_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1, | |
152 | uint64_t min_length, interval_t* tail) const; | |
153 | ||
154 | inline void _fragment_and_emplace(uint64_t max_length, uint64_t offset, | |
155 | uint64_t len, | |
156 | interval_vector_t* res) | |
157 | { | |
158 | auto it = res->rbegin(); | |
159 | if (max_length) { | |
160 | if (it != res->rend() && it->offset + it->length == offset) { | |
161 | auto l = max_length - it->length; | |
162 | if (l >= len) { | |
163 | it->length += len; | |
164 | return; | |
165 | } else { | |
166 | offset += l; | |
167 | len -= l; | |
168 | it->length += l; | |
169 | } | |
170 | } | |
171 | ||
172 | while (len > max_length) { | |
173 | res->emplace_back(offset, max_length); | |
174 | offset += max_length; | |
175 | len -= max_length; | |
176 | } | |
177 | res->emplace_back(offset, len); | |
178 | return; | |
179 | } | |
180 | ||
181 | if (it != res->rend() && it->offset + it->length == offset) { | |
182 | it->length += len; | |
183 | } else { | |
184 | res->emplace_back(offset, len); | |
185 | } | |
186 | } | |
187 | ||
188 | bool _allocate_l0(uint64_t length, | |
189 | uint64_t max_length, | |
190 | uint64_t l0_pos0, uint64_t l0_pos1, | |
191 | uint64_t* allocated, | |
192 | interval_vector_t* res) | |
193 | { | |
9f95a23c | 194 | uint64_t d0 = L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
195 | |
196 | ++l0_dives; | |
197 | ||
11fdf7f2 TL |
198 | ceph_assert(l0_pos0 < l0_pos1); |
199 | ceph_assert(length > *allocated); | |
9f95a23c TL |
200 | ceph_assert(0 == (l0_pos0 % (slots_per_slotset * d0))); |
201 | ceph_assert(0 == (l0_pos1 % (slots_per_slotset * d0))); | |
11fdf7f2 | 202 | ceph_assert(((length - *allocated) % l0_granularity) == 0); |
a8e16298 TL |
203 | |
204 | uint64_t need_entries = (length - *allocated) / l0_granularity; | |
205 | ||
206 | for (auto idx = l0_pos0 / d0; (idx < l0_pos1 / d0) && (length > *allocated); | |
207 | ++idx) { | |
208 | ++l0_iterations; | |
209 | slot_t& slot_val = l0[idx]; | |
210 | auto base = idx * d0; | |
211 | if (slot_val == all_slot_clear) { | |
212 | continue; | |
213 | } else if (slot_val == all_slot_set) { | |
214 | uint64_t to_alloc = std::min(need_entries, d0); | |
215 | *allocated += to_alloc * l0_granularity; | |
216 | ++alloc_fragments; | |
217 | need_entries -= to_alloc; | |
218 | ||
219 | _fragment_and_emplace(max_length, base * l0_granularity, | |
220 | to_alloc * l0_granularity, res); | |
221 | ||
222 | if (to_alloc == d0) { | |
223 | slot_val = all_slot_clear; | |
224 | } else { | |
225 | _mark_alloc_l0(base, base + to_alloc); | |
226 | } | |
227 | continue; | |
228 | } | |
229 | ||
230 | auto free_pos = find_next_set_bit(slot_val, 0); | |
11fdf7f2 | 231 | ceph_assert(free_pos < bits_per_slot); |
a8e16298 TL |
232 | auto next_pos = free_pos + 1; |
233 | while (next_pos < bits_per_slot && | |
234 | (next_pos - free_pos) < need_entries) { | |
235 | ++l0_inner_iterations; | |
236 | ||
237 | if (0 == (slot_val & (slot_t(1) << next_pos))) { | |
238 | auto to_alloc = (next_pos - free_pos); | |
239 | *allocated += to_alloc * l0_granularity; | |
240 | ++alloc_fragments; | |
241 | need_entries -= to_alloc; | |
242 | _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity, | |
243 | to_alloc * l0_granularity, res); | |
244 | _mark_alloc_l0(base + free_pos, base + next_pos); | |
245 | free_pos = find_next_set_bit(slot_val, next_pos + 1); | |
246 | next_pos = free_pos + 1; | |
247 | } else { | |
248 | ++next_pos; | |
249 | } | |
250 | } | |
251 | if (need_entries && free_pos < bits_per_slot) { | |
252 | auto to_alloc = std::min(need_entries, d0 - free_pos); | |
253 | *allocated += to_alloc * l0_granularity; | |
254 | ++alloc_fragments; | |
255 | need_entries -= to_alloc; | |
256 | _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity, | |
257 | to_alloc * l0_granularity, res); | |
258 | _mark_alloc_l0(base + free_pos, base + free_pos + to_alloc); | |
259 | } | |
260 | } | |
261 | return _is_empty_l0(l0_pos0, l0_pos1); | |
262 | } | |
263 | ||
264 | protected: | |
265 | ||
266 | friend class AllocatorLevel02<AllocatorLevel01Loose>; | |
267 | ||
268 | void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true) | |
269 | { | |
270 | l0_granularity = _alloc_unit; | |
271 | // 512 bits at L0 mapped to L1 entry | |
272 | l1_granularity = l0_granularity * bits_per_slotset; | |
273 | ||
274 | // capacity to have slot alignment at l1 | |
275 | auto aligned_capacity = | |
11fdf7f2 | 276 | p2roundup((int64_t)capacity, |
9f95a23c | 277 | int64_t(l1_granularity * slots_per_slotset * _children_per_slot())); |
a8e16298 TL |
278 | size_t slot_count = |
279 | aligned_capacity / l1_granularity / _children_per_slot(); | |
280 | // we use set bit(s) as a marker for (partially) free entry | |
281 | l1.resize(slot_count, mark_as_free ? all_slot_set : all_slot_clear); | |
282 | ||
283 | // l0 slot count | |
284 | size_t slot_count_l0 = aligned_capacity / _alloc_unit / bits_per_slot; | |
285 | // we use set bit(s) as a marker for (partially) free entry | |
286 | l0.resize(slot_count_l0, mark_as_free ? all_slot_set : all_slot_clear); | |
287 | ||
288 | partial_l1_count = unalloc_l1_count = 0; | |
289 | if (mark_as_free) { | |
290 | unalloc_l1_count = slot_count * _children_per_slot(); | |
11fdf7f2 | 291 | auto l0_pos_no_use = p2roundup((int64_t)capacity, (int64_t)l0_granularity) / l0_granularity; |
a8e16298 TL |
292 | _mark_alloc_l1_l0(l0_pos_no_use, aligned_capacity / l0_granularity); |
293 | } | |
294 | } | |
295 | ||
296 | struct search_ctx_t | |
297 | { | |
298 | size_t partial_count = 0; | |
299 | size_t free_count = 0; | |
300 | uint64_t free_l1_pos = 0; | |
301 | ||
302 | uint64_t min_affordable_len = 0; | |
303 | uint64_t min_affordable_offs = 0; | |
304 | uint64_t affordable_len = 0; | |
305 | uint64_t affordable_offs = 0; | |
306 | ||
307 | bool fully_processed = false; | |
308 | ||
309 | void reset() | |
310 | { | |
311 | *this = search_ctx_t(); | |
312 | } | |
313 | }; | |
314 | enum { | |
315 | NO_STOP, | |
316 | STOP_ON_EMPTY, | |
317 | STOP_ON_PARTIAL, | |
318 | }; | |
319 | void _analyze_partials(uint64_t pos_start, uint64_t pos_end, | |
320 | uint64_t length, uint64_t min_length, int mode, | |
321 | search_ctx_t* ctx); | |
322 | ||
323 | void _mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end); | |
324 | void _mark_alloc_l0(int64_t l0_pos_start, int64_t l0_pos_end); | |
e306af50 TL |
325 | uint64_t _claim_free_to_left_l0(int64_t l0_pos_start); |
326 | uint64_t _claim_free_to_right_l0(int64_t l0_pos_start); | |
327 | ||
a8e16298 TL |
328 | |
329 | void _mark_alloc_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end) | |
330 | { | |
331 | _mark_alloc_l0(l0_pos_start, l0_pos_end); | |
11fdf7f2 TL |
332 | l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset)); |
333 | l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset)); | |
a8e16298 TL |
334 | _mark_l1_on_l0(l0_pos_start, l0_pos_end); |
335 | } | |
336 | ||
337 | void _mark_free_l0(int64_t l0_pos_start, int64_t l0_pos_end) | |
338 | { | |
9f95a23c | 339 | auto d0 = L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
340 | |
341 | auto pos = l0_pos_start; | |
342 | slot_t bits = (slot_t)1 << (l0_pos_start % d0); | |
81eedcae | 343 | slot_t* val_s = &l0[pos / d0]; |
11fdf7f2 TL |
344 | int64_t pos_e = std::min(l0_pos_end, |
345 | p2roundup<int64_t>(l0_pos_start + 1, d0)); | |
a8e16298 | 346 | while (pos < pos_e) { |
81eedcae | 347 | *val_s |= bits; |
a8e16298 TL |
348 | bits <<= 1; |
349 | pos++; | |
350 | } | |
11fdf7f2 | 351 | pos_e = std::min(l0_pos_end, p2align<int64_t>(l0_pos_end, d0)); |
a8e16298 | 352 | while (pos < pos_e) { |
81eedcae | 353 | *(++val_s) = all_slot_set; |
a8e16298 TL |
354 | pos += d0; |
355 | } | |
356 | bits = 1; | |
81eedcae | 357 | ++val_s; |
a8e16298 | 358 | while (pos < l0_pos_end) { |
81eedcae | 359 | *val_s |= bits; |
a8e16298 TL |
360 | bits <<= 1; |
361 | pos++; | |
362 | } | |
363 | } | |
364 | ||
365 | void _mark_free_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end) | |
366 | { | |
367 | _mark_free_l0(l0_pos_start, l0_pos_end); | |
11fdf7f2 TL |
368 | l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset)); |
369 | l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset)); | |
a8e16298 TL |
370 | _mark_l1_on_l0(l0_pos_start, l0_pos_end); |
371 | } | |
372 | ||
373 | bool _is_empty_l0(uint64_t l0_pos, uint64_t l0_pos_end) | |
374 | { | |
375 | bool no_free = true; | |
9f95a23c | 376 | uint64_t d = slots_per_slotset * L0_ENTRIES_PER_SLOT; |
11fdf7f2 TL |
377 | ceph_assert(0 == (l0_pos % d)); |
378 | ceph_assert(0 == (l0_pos_end % d)); | |
a8e16298 | 379 | |
9f95a23c TL |
380 | auto idx = l0_pos / L0_ENTRIES_PER_SLOT; |
381 | auto idx_end = l0_pos_end / L0_ENTRIES_PER_SLOT; | |
a8e16298 TL |
382 | while (idx < idx_end && no_free) { |
383 | no_free = l0[idx] == all_slot_clear; | |
384 | ++idx; | |
385 | } | |
386 | return no_free; | |
387 | } | |
388 | bool _is_empty_l1(uint64_t l1_pos, uint64_t l1_pos_end) | |
389 | { | |
390 | bool no_free = true; | |
9f95a23c | 391 | uint64_t d = slots_per_slotset * _children_per_slot(); |
11fdf7f2 TL |
392 | ceph_assert(0 == (l1_pos % d)); |
393 | ceph_assert(0 == (l1_pos_end % d)); | |
a8e16298 | 394 | |
9f95a23c TL |
395 | auto idx = l1_pos / L1_ENTRIES_PER_SLOT; |
396 | auto idx_end = l1_pos_end / L1_ENTRIES_PER_SLOT; | |
a8e16298 TL |
397 | while (idx < idx_end && no_free) { |
398 | no_free = _is_slot_fully_allocated(idx); | |
399 | ++idx; | |
400 | } | |
401 | return no_free; | |
402 | } | |
403 | ||
404 | interval_t _allocate_l1_contiguous(uint64_t length, | |
405 | uint64_t min_length, uint64_t max_length, | |
406 | uint64_t pos_start, uint64_t pos_end); | |
407 | ||
408 | bool _allocate_l1(uint64_t length, | |
409 | uint64_t min_length, uint64_t max_length, | |
410 | uint64_t l1_pos_start, uint64_t l1_pos_end, | |
411 | uint64_t* allocated, | |
412 | interval_vector_t* res); | |
413 | ||
11fdf7f2 | 414 | uint64_t _mark_alloc_l1(uint64_t offset, uint64_t length) |
a8e16298 | 415 | { |
11fdf7f2 TL |
416 | uint64_t l0_pos_start = offset / l0_granularity; |
417 | uint64_t l0_pos_end = p2roundup(offset + length, l0_granularity) / l0_granularity; | |
a8e16298 TL |
418 | _mark_alloc_l1_l0(l0_pos_start, l0_pos_end); |
419 | return l0_granularity * (l0_pos_end - l0_pos_start); | |
420 | } | |
421 | ||
422 | uint64_t _free_l1(uint64_t offs, uint64_t len) | |
423 | { | |
424 | uint64_t l0_pos_start = offs / l0_granularity; | |
11fdf7f2 | 425 | uint64_t l0_pos_end = p2roundup(offs + len, l0_granularity) / l0_granularity; |
a8e16298 TL |
426 | _mark_free_l1_l0(l0_pos_start, l0_pos_end); |
427 | return l0_granularity * (l0_pos_end - l0_pos_start); | |
428 | } | |
429 | ||
e306af50 TL |
430 | uint64_t claim_free_to_left_l1(uint64_t offs) |
431 | { | |
432 | uint64_t l0_pos_end = offs / l0_granularity; | |
433 | uint64_t l0_pos_start = _claim_free_to_left_l0(l0_pos_end); | |
434 | if (l0_pos_start < l0_pos_end) { | |
435 | _mark_l1_on_l0( | |
436 | p2align(l0_pos_start, uint64_t(bits_per_slotset)), | |
437 | p2roundup(l0_pos_end, uint64_t(bits_per_slotset))); | |
438 | return l0_granularity * (l0_pos_end - l0_pos_start); | |
439 | } | |
440 | return 0; | |
441 | } | |
442 | ||
443 | uint64_t claim_free_to_right_l1(uint64_t offs) | |
444 | { | |
445 | uint64_t l0_pos_start = offs / l0_granularity; | |
446 | uint64_t l0_pos_end = _claim_free_to_right_l0(l0_pos_start); | |
447 | ||
448 | if (l0_pos_start < l0_pos_end) { | |
449 | _mark_l1_on_l0( | |
450 | p2align(l0_pos_start, uint64_t(bits_per_slotset)), | |
451 | p2roundup(l0_pos_end, uint64_t(bits_per_slotset))); | |
452 | return l0_granularity * (l0_pos_end - l0_pos_start); | |
453 | } | |
454 | return 0; | |
455 | } | |
456 | ||
457 | ||
a8e16298 TL |
458 | public: |
459 | uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0) | |
460 | { | |
461 | if (pos1 == 0) { | |
9f95a23c | 462 | pos1 = l1.size() * L1_ENTRIES_PER_SLOT; |
a8e16298 TL |
463 | } |
464 | auto avail = debug_get_free(pos0, pos1); | |
465 | return (pos1 - pos0) * l1_granularity - avail; | |
466 | } | |
467 | ||
468 | uint64_t debug_get_free(uint64_t l1_pos0 = 0, uint64_t l1_pos1 = 0) | |
469 | { | |
9f95a23c TL |
470 | ceph_assert(0 == (l1_pos0 % L1_ENTRIES_PER_SLOT)); |
471 | ceph_assert(0 == (l1_pos1 % L1_ENTRIES_PER_SLOT)); | |
a8e16298 | 472 | |
9f95a23c TL |
473 | auto idx0 = l1_pos0 * slots_per_slotset; |
474 | auto idx1 = l1_pos1 * slots_per_slotset; | |
a8e16298 TL |
475 | |
476 | if (idx1 == 0) { | |
477 | idx1 = l0.size(); | |
478 | } | |
479 | ||
480 | uint64_t res = 0; | |
481 | for (uint64_t i = idx0; i < idx1; ++i) { | |
482 | auto v = l0[i]; | |
483 | if (v == all_slot_set) { | |
9f95a23c | 484 | res += L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
485 | } else if (v != all_slot_clear) { |
486 | size_t cnt = 0; | |
487 | #ifdef __GNUC__ | |
488 | cnt = __builtin_popcountll(v); | |
489 | #else | |
490 | // Kernighan's Alg to count set bits | |
491 | while (v) { | |
492 | v &= (v - 1); | |
493 | cnt++; | |
494 | } | |
495 | #endif | |
496 | res += cnt; | |
497 | } | |
498 | } | |
499 | return res * l0_granularity; | |
500 | } | |
501 | void collect_stats( | |
502 | std::map<size_t, size_t>& bins_overall) override; | |
eafe8130 TL |
503 | |
504 | static inline ssize_t count_0s(slot_t slot_val, size_t start_pos); | |
505 | static inline ssize_t count_1s(slot_t slot_val, size_t start_pos); | |
506 | void dump(std::function<void(uint64_t offset, uint64_t length)> notify); | |
a8e16298 TL |
507 | }; |
508 | ||
eafe8130 | 509 | |
a8e16298 TL |
510 | class AllocatorLevel01Compact : public AllocatorLevel01 |
511 | { | |
512 | uint64_t _children_per_slot() const override | |
513 | { | |
514 | return 8; | |
515 | } | |
516 | public: | |
517 | void collect_stats( | |
518 | std::map<size_t, size_t>& bins_overall) override | |
519 | { | |
520 | // not implemented | |
521 | } | |
522 | }; | |
523 | ||
524 | template <class L1> | |
525 | class AllocatorLevel02 : public AllocatorLevel | |
526 | { | |
527 | public: | |
528 | uint64_t debug_get_free(uint64_t pos0 = 0, uint64_t pos1 = 0) | |
529 | { | |
11fdf7f2 | 530 | std::lock_guard l(lock); |
a8e16298 TL |
531 | return l1.debug_get_free(pos0 * l1._children_per_slot() * bits_per_slot, |
532 | pos1 * l1._children_per_slot() * bits_per_slot); | |
533 | } | |
534 | uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0) | |
535 | { | |
11fdf7f2 | 536 | std::lock_guard l(lock); |
a8e16298 TL |
537 | return l1.debug_get_allocated(pos0 * l1._children_per_slot() * bits_per_slot, |
538 | pos1 * l1._children_per_slot() * bits_per_slot); | |
539 | } | |
540 | ||
541 | uint64_t get_available() | |
542 | { | |
11fdf7f2 | 543 | std::lock_guard l(lock); |
a8e16298 TL |
544 | return available; |
545 | } | |
546 | inline uint64_t get_min_alloc_size() const | |
547 | { | |
548 | return l1.get_min_alloc_size(); | |
549 | } | |
550 | void collect_stats( | |
551 | std::map<size_t, size_t>& bins_overall) override { | |
552 | ||
11fdf7f2 | 553 | std::lock_guard l(lock); |
a8e16298 TL |
554 | l1.collect_stats(bins_overall); |
555 | } | |
e306af50 TL |
556 | uint64_t claim_free_to_left(uint64_t offset) { |
557 | std::lock_guard l(lock); | |
558 | auto allocated = l1.claim_free_to_left_l1(offset); | |
559 | ceph_assert(available >= allocated); | |
560 | available -= allocated; | |
a8e16298 | 561 | |
e306af50 TL |
562 | uint64_t l2_pos = (offset - allocated) / l2_granularity; |
563 | uint64_t l2_pos_end = | |
564 | p2roundup(int64_t(offset), int64_t(l2_granularity)) / l2_granularity; | |
565 | _mark_l2_on_l1(l2_pos, l2_pos_end); | |
566 | return allocated; | |
567 | } | |
568 | ||
569 | uint64_t claim_free_to_right(uint64_t offset) { | |
570 | std::lock_guard l(lock); | |
571 | auto allocated = l1.claim_free_to_right_l1(offset); | |
572 | ceph_assert(available >= allocated); | |
573 | available -= allocated; | |
574 | ||
575 | uint64_t l2_pos = (offset) / l2_granularity; | |
576 | int64_t end = offset + allocated; | |
577 | uint64_t l2_pos_end = p2roundup(end, int64_t(l2_granularity)) / l2_granularity; | |
578 | _mark_l2_on_l1(l2_pos, l2_pos_end); | |
579 | return allocated; | |
580 | } | |
a8e16298 | 581 | protected: |
11fdf7f2 | 582 | ceph::mutex lock = ceph::make_mutex("AllocatorLevel02::lock"); |
a8e16298 TL |
583 | L1 l1; |
584 | slot_vector_t l2; | |
585 | uint64_t l2_granularity = 0; // space per entry | |
586 | uint64_t available = 0; | |
587 | uint64_t last_pos = 0; | |
588 | ||
589 | enum { | |
9f95a23c | 590 | L1_ENTRIES_PER_SLOT = bits_per_slot, // 64 |
a8e16298 TL |
591 | }; |
592 | ||
593 | uint64_t _children_per_slot() const override | |
594 | { | |
9f95a23c | 595 | return L1_ENTRIES_PER_SLOT; |
a8e16298 TL |
596 | } |
597 | uint64_t _level_granularity() const override | |
598 | { | |
599 | return l2_granularity; | |
600 | } | |
601 | ||
602 | void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true) | |
603 | { | |
11fdf7f2 | 604 | ceph_assert(isp2(_alloc_unit)); |
a8e16298 TL |
605 | l1._init(capacity, _alloc_unit, mark_as_free); |
606 | ||
607 | l2_granularity = | |
9f95a23c | 608 | l1._level_granularity() * l1._children_per_slot() * slots_per_slotset; |
a8e16298 TL |
609 | |
610 | // capacity to have slot alignment at l2 | |
611 | auto aligned_capacity = | |
9f95a23c TL |
612 | p2roundup((int64_t)capacity, (int64_t)l2_granularity * L1_ENTRIES_PER_SLOT); |
613 | size_t elem_count = aligned_capacity / l2_granularity / L1_ENTRIES_PER_SLOT; | |
a8e16298 TL |
614 | // we use set bit(s) as a marker for (partially) free entry |
615 | l2.resize(elem_count, mark_as_free ? all_slot_set : all_slot_clear); | |
616 | ||
617 | if (mark_as_free) { | |
618 | // capacity to have slotset alignment at l1 | |
619 | auto l2_pos_no_use = | |
11fdf7f2 | 620 | p2roundup((int64_t)capacity, (int64_t)l2_granularity) / l2_granularity; |
a8e16298 | 621 | _mark_l2_allocated(l2_pos_no_use, aligned_capacity / l2_granularity); |
11fdf7f2 | 622 | available = p2align(capacity, _alloc_unit); |
a8e16298 TL |
623 | } else { |
624 | available = 0; | |
625 | } | |
626 | } | |
627 | ||
628 | void _mark_l2_allocated(int64_t l2_pos, int64_t l2_pos_end) | |
629 | { | |
9f95a23c | 630 | auto d = L1_ENTRIES_PER_SLOT; |
11fdf7f2 TL |
631 | ceph_assert(0 <= l2_pos_end); |
632 | ceph_assert((int64_t)l2.size() >= (l2_pos_end / d)); | |
a8e16298 TL |
633 | |
634 | while (l2_pos < l2_pos_end) { | |
635 | l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d)); | |
636 | ++l2_pos; | |
637 | } | |
638 | } | |
639 | ||
640 | void _mark_l2_free(int64_t l2_pos, int64_t l2_pos_end) | |
641 | { | |
9f95a23c | 642 | auto d = L1_ENTRIES_PER_SLOT; |
11fdf7f2 TL |
643 | ceph_assert(0 <= l2_pos_end); |
644 | ceph_assert((int64_t)l2.size() >= (l2_pos_end / d)); | |
a8e16298 TL |
645 | |
646 | while (l2_pos < l2_pos_end) { | |
647 | l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d)); | |
648 | ++l2_pos; | |
649 | } | |
650 | } | |
651 | ||
652 | void _mark_l2_on_l1(int64_t l2_pos, int64_t l2_pos_end) | |
653 | { | |
9f95a23c | 654 | auto d = L1_ENTRIES_PER_SLOT; |
11fdf7f2 TL |
655 | ceph_assert(0 <= l2_pos_end); |
656 | ceph_assert((int64_t)l2.size() >= (l2_pos_end / d)); | |
a8e16298 | 657 | |
9f95a23c TL |
658 | auto idx = l2_pos * slots_per_slotset; |
659 | auto idx_end = l2_pos_end * slots_per_slotset; | |
a8e16298 TL |
660 | bool all_allocated = true; |
661 | while (idx < idx_end) { | |
662 | if (!l1._is_slot_fully_allocated(idx)) { | |
663 | all_allocated = false; | |
9f95a23c | 664 | idx = p2roundup(int64_t(++idx), int64_t(slots_per_slotset)); |
a8e16298 TL |
665 | } |
666 | else { | |
667 | ++idx; | |
668 | } | |
9f95a23c | 669 | if ((idx % slots_per_slotset) == 0) { |
a8e16298 TL |
670 | if (all_allocated) { |
671 | l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d)); | |
672 | } | |
673 | else { | |
674 | l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d)); | |
675 | } | |
676 | all_allocated = true; | |
677 | ++l2_pos; | |
678 | } | |
679 | } | |
680 | } | |
681 | ||
682 | void _allocate_l2(uint64_t length, | |
683 | uint64_t min_length, | |
684 | uint64_t max_length, | |
685 | uint64_t hint, | |
686 | ||
687 | uint64_t* allocated, | |
688 | interval_vector_t* res) | |
689 | { | |
690 | uint64_t prev_allocated = *allocated; | |
9f95a23c | 691 | uint64_t d = L1_ENTRIES_PER_SLOT; |
11fdf7f2 TL |
692 | ceph_assert(min_length <= l2_granularity); |
693 | ceph_assert(max_length == 0 || max_length >= min_length); | |
694 | ceph_assert(max_length == 0 || (max_length % min_length) == 0); | |
695 | ceph_assert(length >= min_length); | |
696 | ceph_assert((length % min_length) == 0); | |
a8e16298 TL |
697 | |
698 | uint64_t cap = 1ull << 31; | |
699 | if (max_length == 0 || max_length >= cap) { | |
700 | max_length = cap; | |
701 | } | |
702 | ||
9f95a23c | 703 | uint64_t l1_w = slots_per_slotset * l1._children_per_slot(); |
a8e16298 | 704 | |
11fdf7f2 | 705 | std::lock_guard l(lock); |
a8e16298 TL |
706 | |
707 | if (available < min_length) { | |
708 | return; | |
709 | } | |
710 | if (hint != 0) { | |
11fdf7f2 | 711 | last_pos = (hint / d) < l2.size() ? p2align(hint, d) : 0; |
a8e16298 TL |
712 | } |
713 | auto l2_pos = last_pos; | |
714 | auto last_pos0 = last_pos; | |
715 | auto pos = last_pos / d; | |
716 | auto pos_end = l2.size(); | |
717 | // outer loop below is intended to optimize the performance by | |
718 | // avoiding 'modulo' operations inside the internal loop. | |
719 | // Looks like they have negative impact on the performance | |
720 | for (auto i = 0; i < 2; ++i) { | |
721 | for(; length > *allocated && pos < pos_end; ++pos) { | |
722 | slot_t& slot_val = l2[pos]; | |
723 | size_t free_pos = 0; | |
724 | bool all_set = false; | |
725 | if (slot_val == all_slot_clear) { | |
726 | l2_pos += d; | |
727 | last_pos = l2_pos; | |
728 | continue; | |
729 | } else if (slot_val == all_slot_set) { | |
730 | free_pos = 0; | |
731 | all_set = true; | |
732 | } else { | |
733 | free_pos = find_next_set_bit(slot_val, 0); | |
11fdf7f2 | 734 | ceph_assert(free_pos < bits_per_slot); |
a8e16298 TL |
735 | } |
736 | do { | |
11fdf7f2 | 737 | ceph_assert(length > *allocated); |
a8e16298 TL |
738 | bool empty = l1._allocate_l1(length, |
739 | min_length, | |
740 | max_length, | |
741 | (l2_pos + free_pos) * l1_w, | |
742 | (l2_pos + free_pos + 1) * l1_w, | |
743 | allocated, | |
744 | res); | |
745 | if (empty) { | |
746 | slot_val &= ~(slot_t(1) << free_pos); | |
747 | } | |
748 | if (length <= *allocated || slot_val == all_slot_clear) { | |
749 | break; | |
750 | } | |
751 | ++free_pos; | |
752 | if (!all_set) { | |
753 | free_pos = find_next_set_bit(slot_val, free_pos); | |
754 | } | |
755 | } while (free_pos < bits_per_slot); | |
756 | last_pos = l2_pos; | |
757 | l2_pos += d; | |
758 | } | |
759 | l2_pos = 0; | |
760 | pos = 0; | |
761 | pos_end = last_pos0 / d; | |
762 | } | |
763 | ||
764 | ++l2_allocs; | |
765 | auto allocated_here = *allocated - prev_allocated; | |
11fdf7f2 | 766 | ceph_assert(available >= allocated_here); |
a8e16298 TL |
767 | available -= allocated_here; |
768 | } | |
769 | ||
770 | #ifndef NON_CEPH_BUILD | |
771 | // to provide compatibility with BlueStore's allocator interface | |
772 | void _free_l2(const interval_set<uint64_t> & rr) | |
773 | { | |
774 | uint64_t released = 0; | |
11fdf7f2 | 775 | std::lock_guard l(lock); |
a8e16298 TL |
776 | for (auto r : rr) { |
777 | released += l1._free_l1(r.first, r.second); | |
778 | uint64_t l2_pos = r.first / l2_granularity; | |
11fdf7f2 | 779 | uint64_t l2_pos_end = p2roundup(int64_t(r.first + r.second), int64_t(l2_granularity)) / l2_granularity; |
a8e16298 TL |
780 | |
781 | _mark_l2_free(l2_pos, l2_pos_end); | |
782 | } | |
783 | available += released; | |
784 | } | |
785 | #endif | |
786 | ||
787 | template <typename T> | |
788 | void _free_l2(const T& rr) | |
789 | { | |
790 | uint64_t released = 0; | |
11fdf7f2 | 791 | std::lock_guard l(lock); |
a8e16298 TL |
792 | for (auto r : rr) { |
793 | released += l1._free_l1(r.offset, r.length); | |
794 | uint64_t l2_pos = r.offset / l2_granularity; | |
11fdf7f2 | 795 | uint64_t l2_pos_end = p2roundup(int64_t(r.offset + r.length), int64_t(l2_granularity)) / l2_granularity; |
a8e16298 TL |
796 | |
797 | _mark_l2_free(l2_pos, l2_pos_end); | |
798 | } | |
799 | available += released; | |
800 | } | |
801 | ||
802 | void _mark_allocated(uint64_t o, uint64_t len) | |
803 | { | |
804 | uint64_t l2_pos = o / l2_granularity; | |
11fdf7f2 | 805 | uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity; |
a8e16298 | 806 | |
11fdf7f2 TL |
807 | std::lock_guard l(lock); |
808 | auto allocated = l1._mark_alloc_l1(o, len); | |
809 | ceph_assert(available >= allocated); | |
a8e16298 TL |
810 | available -= allocated; |
811 | _mark_l2_on_l1(l2_pos, l2_pos_end); | |
812 | } | |
813 | ||
814 | void _mark_free(uint64_t o, uint64_t len) | |
815 | { | |
816 | uint64_t l2_pos = o / l2_granularity; | |
11fdf7f2 | 817 | uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity; |
a8e16298 | 818 | |
11fdf7f2 | 819 | std::lock_guard l(lock); |
a8e16298 TL |
820 | available += l1._free_l1(o, len); |
821 | _mark_l2_free(l2_pos, l2_pos_end); | |
822 | } | |
823 | void _shutdown() | |
824 | { | |
825 | last_pos = 0; | |
826 | } | |
827 | double _get_fragmentation() { | |
11fdf7f2 | 828 | std::lock_guard l(lock); |
a8e16298 TL |
829 | return l1.get_fragmentation(); |
830 | } | |
831 | }; | |
832 | ||
833 | #endif |