]>
Commit | Line | Data |
---|---|---|
a8e16298 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Bitmap based in-memory allocator implementation. | |
5 | * Author: Igor Fedotov, ifedotov@suse.com | |
6 | * | |
7 | */ | |
8 | ||
9 | #include "fastbmap_allocator_impl.h" | |
10 | ||
11 | uint64_t AllocatorLevel::l0_dives = 0; | |
12 | uint64_t AllocatorLevel::l0_iterations = 0; | |
13 | uint64_t AllocatorLevel::l0_inner_iterations = 0; | |
14 | uint64_t AllocatorLevel::alloc_fragments = 0; | |
15 | uint64_t AllocatorLevel::alloc_fragments_fast = 0; | |
16 | uint64_t AllocatorLevel::l2_allocs = 0; | |
17 | ||
18 | inline interval_t _align2units(uint64_t offset, uint64_t len, uint64_t min_length) | |
19 | { | |
20 | interval_t res; | |
21 | if (len >= min_length) { | |
11fdf7f2 | 22 | res.offset = p2roundup(offset, min_length); |
a8e16298 TL |
23 | auto delta_off = res.offset - offset; |
24 | if (len > delta_off) { | |
25 | res.length = len - delta_off; | |
11fdf7f2 | 26 | res.length = p2align<uint64_t>(res.length, min_length); |
a8e16298 TL |
27 | if (res.length) { |
28 | return res; | |
29 | } | |
30 | } | |
31 | } | |
32 | return interval_t(); | |
33 | } | |
34 | ||
35 | interval_t AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0, | |
36 | uint64_t pos1, uint64_t min_length, interval_t* tail) const | |
37 | { | |
38 | interval_t res; | |
39 | if (pos0 >= pos1) { | |
40 | return res; | |
41 | } | |
42 | auto pos = pos0; | |
43 | ||
44 | interval_t res_candidate; | |
45 | if (tail->length != 0) { | |
11fdf7f2 TL |
46 | ceph_assert((tail->offset % l0_granularity) == 0); |
47 | ceph_assert((tail->length % l0_granularity) == 0); | |
a8e16298 TL |
48 | res_candidate.offset = tail->offset / l0_granularity; |
49 | res_candidate.length = tail->length / l0_granularity; | |
50 | } | |
51 | *tail = interval_t(); | |
52 | ||
53 | auto d = bits_per_slot; | |
54 | slot_t bits = l0[pos / d]; | |
55 | bits >>= pos % d; | |
56 | bool end_loop = false; | |
57 | auto min_granules = min_length / l0_granularity; | |
58 | ||
59 | do { | |
60 | if ((pos % d) == 0) { | |
61 | bits = l0[pos / d]; | |
62 | if (pos1 - pos >= d) { | |
63 | switch(bits) { | |
64 | case all_slot_set: | |
65 | // slot is totally free | |
66 | if (!res_candidate.length) { | |
67 | res_candidate.offset = pos; | |
68 | } | |
69 | res_candidate.length += d; | |
70 | pos += d; | |
71 | end_loop = pos >= pos1; | |
72 | if (end_loop) { | |
73 | *tail = res_candidate; | |
74 | res_candidate = _align2units(res_candidate.offset, | |
75 | res_candidate.length, min_granules); | |
76 | if(res.length < res_candidate.length) { | |
77 | res = res_candidate; | |
78 | } | |
79 | } | |
80 | continue; | |
81 | case all_slot_clear: | |
82 | // slot is totally allocated | |
83 | res_candidate = _align2units(res_candidate.offset, | |
84 | res_candidate.length, min_granules); | |
85 | if (res.length < res_candidate.length) { | |
86 | res = res_candidate; | |
87 | } | |
88 | res_candidate = interval_t(); | |
89 | pos += d; | |
90 | end_loop = pos >= pos1; | |
91 | continue; | |
92 | } | |
93 | } | |
94 | } //if ((pos % d) == 0) | |
95 | ||
96 | end_loop = ++pos >= pos1; | |
97 | if (bits & 1) { | |
98 | // item is free | |
99 | if (!res_candidate.length) { | |
100 | res_candidate.offset = pos - 1; | |
101 | } | |
102 | ++res_candidate.length; | |
103 | if (end_loop) { | |
104 | *tail = res_candidate; | |
105 | res_candidate = _align2units(res_candidate.offset, | |
106 | res_candidate.length, min_granules); | |
107 | if (res.length < res_candidate.length) { | |
108 | res = res_candidate; | |
109 | } | |
110 | } | |
111 | } else { | |
112 | res_candidate = _align2units(res_candidate.offset, | |
113 | res_candidate.length, min_granules); | |
114 | if (res.length < res_candidate.length) { | |
115 | res = res_candidate; | |
116 | } | |
117 | res_candidate = interval_t(); | |
118 | } | |
119 | bits >>= 1; | |
120 | } while (!end_loop); | |
121 | res.offset *= l0_granularity; | |
122 | res.length *= l0_granularity; | |
123 | tail->offset *= l0_granularity; | |
124 | tail->length *= l0_granularity; | |
125 | return res; | |
126 | } | |
127 | ||
128 | void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start, | |
129 | uint64_t pos_end, uint64_t length, uint64_t min_length, int mode, | |
130 | search_ctx_t* ctx) | |
131 | { | |
9f95a23c | 132 | auto d = L1_ENTRIES_PER_SLOT; |
11fdf7f2 TL |
133 | ceph_assert((pos_start % d) == 0); |
134 | ceph_assert((pos_end % d) == 0); | |
a8e16298 | 135 | |
9f95a23c | 136 | uint64_t l0_w = slots_per_slotset * L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
137 | |
138 | uint64_t l1_pos = pos_start; | |
139 | const interval_t empty_tail; | |
140 | interval_t prev_tail; | |
141 | ||
142 | uint64_t next_free_l1_pos = 0; | |
143 | for (auto pos = pos_start / d; pos < pos_end / d; ++pos) { | |
144 | slot_t slot_val = l1[pos]; | |
145 | // FIXME minor: code below can be optimized to check slot_val against | |
146 | // all_slot_set(_clear) value | |
147 | ||
148 | for (auto c = 0; c < d; c++) { | |
149 | switch (slot_val & L1_ENTRY_MASK) { | |
150 | case L1_ENTRY_FREE: | |
151 | prev_tail = empty_tail; | |
152 | if (!ctx->free_count) { | |
153 | ctx->free_l1_pos = l1_pos; | |
154 | } else if (l1_pos != next_free_l1_pos){ | |
155 | auto o = ctx->free_l1_pos * l1_granularity; | |
156 | auto l = ctx->free_count * l1_granularity; | |
157 | // check if already found extent fits min_length after alignment | |
158 | if (_align2units(o, l, min_length).length >= min_length) { | |
159 | break; | |
160 | } | |
161 | // if not - proceed with the next one | |
162 | ctx->free_l1_pos = l1_pos; | |
163 | ctx->free_count = 0; | |
164 | } | |
165 | next_free_l1_pos = l1_pos + 1; | |
166 | ++ctx->free_count; | |
167 | if (mode == STOP_ON_EMPTY) { | |
168 | return; | |
169 | } | |
170 | break; | |
171 | case L1_ENTRY_FULL: | |
172 | prev_tail = empty_tail; | |
173 | break; | |
174 | case L1_ENTRY_PARTIAL: | |
175 | interval_t longest; | |
176 | ++ctx->partial_count; | |
177 | ||
178 | longest = _get_longest_from_l0(l1_pos * l0_w, (l1_pos + 1) * l0_w, min_length, &prev_tail); | |
179 | ||
180 | if (longest.length >= length) { | |
181 | if ((ctx->affordable_len == 0) || | |
182 | ((ctx->affordable_len != 0) && | |
183 | (longest.length < ctx->affordable_len))) { | |
184 | ctx->affordable_len = longest.length; | |
185 | ctx->affordable_offs = longest.offset; | |
186 | } | |
187 | } | |
188 | if (longest.length >= min_length && | |
189 | (ctx->min_affordable_len == 0 || | |
190 | (longest.length < ctx->min_affordable_len))) { | |
191 | ||
11fdf7f2 | 192 | ctx->min_affordable_len = p2align<uint64_t>(longest.length, min_length); |
a8e16298 TL |
193 | ctx->min_affordable_offs = longest.offset; |
194 | } | |
195 | if (mode == STOP_ON_PARTIAL) { | |
196 | return; | |
197 | } | |
198 | break; | |
199 | } | |
200 | slot_val >>= L1_ENTRY_WIDTH; | |
201 | ++l1_pos; | |
202 | } | |
203 | } | |
204 | ctx->fully_processed = true; | |
205 | } | |
206 | ||
207 | void AllocatorLevel01Loose::_mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end) | |
208 | { | |
209 | if (l0_pos == l0_pos_end) { | |
210 | return; | |
211 | } | |
212 | auto d0 = bits_per_slotset; | |
9f95a23c | 213 | uint64_t l1_w = L1_ENTRIES_PER_SLOT; |
a8e16298 | 214 | // this should be aligned with slotset boundaries |
11fdf7f2 TL |
215 | ceph_assert(0 == (l0_pos % d0)); |
216 | ceph_assert(0 == (l0_pos_end % d0)); | |
a8e16298 TL |
217 | |
218 | int64_t idx = l0_pos / bits_per_slot; | |
219 | int64_t idx_end = l0_pos_end / bits_per_slot; | |
220 | slot_t mask_to_apply = L1_ENTRY_NOT_USED; | |
221 | ||
222 | auto l1_pos = l0_pos / d0; | |
223 | ||
224 | while (idx < idx_end) { | |
225 | if (l0[idx] == all_slot_clear) { | |
226 | // if not all prev slots are allocated then no need to check the | |
227 | // current slot set, it's partial | |
228 | ++idx; | |
229 | if (mask_to_apply == L1_ENTRY_NOT_USED) { | |
230 | mask_to_apply = L1_ENTRY_FULL; | |
231 | } else if (mask_to_apply != L1_ENTRY_FULL) { | |
9f95a23c | 232 | idx = p2roundup(idx, int64_t(slots_per_slotset)); |
a8e16298 TL |
233 | mask_to_apply = L1_ENTRY_PARTIAL; |
234 | } | |
235 | } else if (l0[idx] == all_slot_set) { | |
236 | // if not all prev slots are free then no need to check the | |
237 | // current slot set, it's partial | |
238 | ++idx; | |
239 | if (mask_to_apply == L1_ENTRY_NOT_USED) { | |
240 | mask_to_apply = L1_ENTRY_FREE; | |
241 | } else if (mask_to_apply != L1_ENTRY_FREE) { | |
9f95a23c | 242 | idx = p2roundup(idx, int64_t(slots_per_slotset)); |
a8e16298 TL |
243 | mask_to_apply = L1_ENTRY_PARTIAL; |
244 | } | |
245 | } else { | |
246 | // no need to check the current slot set, it's partial | |
247 | mask_to_apply = L1_ENTRY_PARTIAL; | |
248 | ++idx; | |
9f95a23c | 249 | idx = p2roundup(idx, int64_t(slots_per_slotset)); |
a8e16298 | 250 | } |
9f95a23c | 251 | if ((idx % slots_per_slotset) == 0) { |
11fdf7f2 | 252 | ceph_assert(mask_to_apply != L1_ENTRY_NOT_USED); |
a8e16298 TL |
253 | uint64_t shift = (l1_pos % l1_w) * L1_ENTRY_WIDTH; |
254 | slot_t& slot_val = l1[l1_pos / l1_w]; | |
255 | auto mask = slot_t(L1_ENTRY_MASK) << shift; | |
256 | ||
257 | slot_t old_mask = (slot_val & mask) >> shift; | |
258 | switch(old_mask) { | |
259 | case L1_ENTRY_FREE: | |
260 | unalloc_l1_count--; | |
261 | break; | |
262 | case L1_ENTRY_PARTIAL: | |
263 | partial_l1_count--; | |
264 | break; | |
265 | } | |
266 | slot_val &= ~mask; | |
267 | slot_val |= slot_t(mask_to_apply) << shift; | |
268 | switch(mask_to_apply) { | |
269 | case L1_ENTRY_FREE: | |
270 | unalloc_l1_count++; | |
271 | break; | |
272 | case L1_ENTRY_PARTIAL: | |
273 | partial_l1_count++; | |
274 | break; | |
275 | } | |
276 | mask_to_apply = L1_ENTRY_NOT_USED; | |
277 | ++l1_pos; | |
278 | } | |
279 | } | |
280 | } | |
281 | ||
282 | void AllocatorLevel01Loose::_mark_alloc_l0(int64_t l0_pos_start, | |
283 | int64_t l0_pos_end) | |
284 | { | |
9f95a23c | 285 | auto d0 = L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
286 | |
287 | int64_t pos = l0_pos_start; | |
288 | slot_t bits = (slot_t)1 << (l0_pos_start % d0); | |
9f95a23c | 289 | slot_t* val_s = l0.data() + (pos / d0); |
81eedcae TL |
290 | int64_t pos_e = std::min(l0_pos_end, p2roundup<int64_t>(l0_pos_start + 1, d0)); |
291 | while (pos < pos_e) { | |
292 | (*val_s) &= ~bits; | |
a8e16298 TL |
293 | bits <<= 1; |
294 | pos++; | |
295 | } | |
81eedcae TL |
296 | pos_e = std::min(l0_pos_end, p2align<int64_t>(l0_pos_end, d0)); |
297 | while (pos < pos_e) { | |
298 | *(++val_s) = all_slot_clear; | |
a8e16298 TL |
299 | pos += d0; |
300 | } | |
301 | bits = 1; | |
81eedcae | 302 | ++val_s; |
a8e16298 | 303 | while (pos < l0_pos_end) { |
81eedcae | 304 | (*val_s) &= ~bits; |
a8e16298 TL |
305 | bits <<= 1; |
306 | pos++; | |
307 | } | |
308 | } | |
309 | ||
310 | interval_t AllocatorLevel01Loose::_allocate_l1_contiguous(uint64_t length, | |
311 | uint64_t min_length, uint64_t max_length, | |
312 | uint64_t pos_start, uint64_t pos_end) | |
313 | { | |
314 | interval_t res = { 0, 0 }; | |
9f95a23c | 315 | uint64_t l0_w = slots_per_slotset * L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
316 | |
317 | if (unlikely(length <= l0_granularity)) { | |
318 | search_ctx_t ctx; | |
319 | _analyze_partials(pos_start, pos_end, l0_granularity, l0_granularity, | |
320 | STOP_ON_PARTIAL, &ctx); | |
321 | ||
322 | // check partially free slot sets first (including neighboring), | |
323 | // full length match required. | |
324 | if (ctx.affordable_len) { | |
325 | // allocate as specified | |
11fdf7f2 | 326 | ceph_assert(ctx.affordable_len >= length); |
a8e16298 TL |
327 | auto pos = ctx.affordable_offs / l0_granularity; |
328 | _mark_alloc_l1_l0(pos, pos + 1); | |
329 | res = interval_t(ctx.affordable_offs, length); | |
330 | return res; | |
331 | } | |
332 | ||
333 | // allocate from free slot sets | |
334 | if (ctx.free_count) { | |
335 | auto l = std::min(length, ctx.free_count * l1_granularity); | |
11fdf7f2 | 336 | ceph_assert((l % l0_granularity) == 0); |
a8e16298 TL |
337 | auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity; |
338 | ||
339 | _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end); | |
340 | res = interval_t(ctx.free_l1_pos * l1_granularity, l); | |
341 | return res; | |
342 | } | |
343 | } else if (unlikely(length == l1_granularity)) { | |
344 | search_ctx_t ctx; | |
345 | _analyze_partials(pos_start, pos_end, length, min_length, STOP_ON_EMPTY, &ctx); | |
346 | ||
347 | // allocate using contiguous extent found at l1 if any | |
348 | if (ctx.free_count) { | |
349 | ||
350 | auto l = std::min(length, ctx.free_count * l1_granularity); | |
11fdf7f2 | 351 | ceph_assert((l % l0_granularity) == 0); |
a8e16298 TL |
352 | auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity; |
353 | ||
354 | _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end); | |
355 | res = interval_t(ctx.free_l1_pos * l1_granularity, l); | |
356 | ||
357 | return res; | |
358 | } | |
359 | ||
360 | // we can terminate earlier on free entry only | |
11fdf7f2 | 361 | ceph_assert(ctx.fully_processed); |
a8e16298 TL |
362 | |
363 | // check partially free slot sets first (including neighboring), | |
364 | // full length match required. | |
365 | if (ctx.affordable_len) { | |
11fdf7f2 TL |
366 | ceph_assert(ctx.affordable_len >= length); |
367 | ceph_assert((length % l0_granularity) == 0); | |
81eedcae | 368 | auto pos_start = ctx.affordable_offs / l0_granularity; |
a8e16298 TL |
369 | auto pos_end = (ctx.affordable_offs + length) / l0_granularity; |
370 | _mark_alloc_l1_l0(pos_start, pos_end); | |
371 | res = interval_t(ctx.affordable_offs, length); | |
372 | return res; | |
373 | } | |
374 | if (ctx.min_affordable_len) { | |
375 | auto pos_start = ctx.min_affordable_offs / l0_granularity; | |
376 | auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity; | |
377 | _mark_alloc_l1_l0(pos_start, pos_end); | |
378 | return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len); | |
379 | } | |
380 | } else { | |
381 | search_ctx_t ctx; | |
382 | _analyze_partials(pos_start, pos_end, length, min_length, NO_STOP, &ctx); | |
11fdf7f2 | 383 | ceph_assert(ctx.fully_processed); |
a8e16298 TL |
384 | // check partially free slot sets first (including neighboring), |
385 | // full length match required. | |
386 | if (ctx.affordable_len) { | |
11fdf7f2 TL |
387 | ceph_assert(ctx.affordable_len >= length); |
388 | ceph_assert((length % l0_granularity) == 0); | |
a8e16298 TL |
389 | auto pos_start = ctx.affordable_offs / l0_granularity; |
390 | auto pos_end = (ctx.affordable_offs + length) / l0_granularity; | |
391 | _mark_alloc_l1_l0(pos_start, pos_end); | |
392 | res = interval_t(ctx.affordable_offs, length); | |
393 | return res; | |
394 | } | |
395 | // allocate using contiguous extent found at l1 if affordable | |
396 | // align allocated extent with min_length | |
397 | if (ctx.free_count) { | |
398 | auto o = ctx.free_l1_pos * l1_granularity; | |
399 | auto l = ctx.free_count * l1_granularity; | |
400 | interval_t aligned_extent = _align2units(o, l, min_length); | |
401 | if (aligned_extent.length > 0) { | |
402 | aligned_extent.length = std::min(length, | |
403 | uint64_t(aligned_extent.length)); | |
11fdf7f2 TL |
404 | ceph_assert((aligned_extent.offset % l0_granularity) == 0); |
405 | ceph_assert((aligned_extent.length % l0_granularity) == 0); | |
a8e16298 TL |
406 | |
407 | auto pos_start = aligned_extent.offset / l0_granularity; | |
408 | auto pos_end = (aligned_extent.offset + aligned_extent.length) / l0_granularity; | |
409 | ||
410 | _mark_alloc_l1_l0(pos_start, pos_end); | |
411 | return aligned_extent; | |
412 | } | |
413 | } | |
414 | if (ctx.min_affordable_len) { | |
415 | auto pos_start = ctx.min_affordable_offs / l0_granularity; | |
416 | auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity; | |
417 | _mark_alloc_l1_l0(pos_start, pos_end); | |
418 | return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len); | |
419 | } | |
420 | } | |
421 | return res; | |
422 | } | |
423 | ||
424 | bool AllocatorLevel01Loose::_allocate_l1(uint64_t length, | |
425 | uint64_t min_length, uint64_t max_length, | |
426 | uint64_t l1_pos_start, uint64_t l1_pos_end, | |
427 | uint64_t* allocated, | |
428 | interval_vector_t* res) | |
429 | { | |
9f95a23c TL |
430 | uint64_t d0 = L0_ENTRIES_PER_SLOT; |
431 | uint64_t d1 = L1_ENTRIES_PER_SLOT; | |
a8e16298 | 432 | |
9f95a23c TL |
433 | ceph_assert(0 == (l1_pos_start % (slots_per_slotset * d1))); |
434 | ceph_assert(0 == (l1_pos_end % (slots_per_slotset * d1))); | |
a8e16298 TL |
435 | if (min_length != l0_granularity) { |
436 | // probably not the most effecient way but | |
437 | // don't care much about that at the moment | |
438 | bool has_space = true; | |
439 | while (length > *allocated && has_space) { | |
440 | interval_t i = | |
441 | _allocate_l1_contiguous(length - *allocated, min_length, max_length, | |
442 | l1_pos_start, l1_pos_end); | |
443 | if (i.length == 0) { | |
444 | has_space = false; | |
445 | } else { | |
446 | _fragment_and_emplace(max_length, i.offset, i.length, res); | |
447 | *allocated += i.length; | |
448 | } | |
449 | } | |
450 | } else { | |
9f95a23c | 451 | uint64_t l0_w = slots_per_slotset * d0; |
a8e16298 TL |
452 | |
453 | for (auto idx = l1_pos_start / d1; | |
454 | idx < l1_pos_end / d1 && length > *allocated; | |
455 | ++idx) { | |
456 | slot_t& slot_val = l1[idx]; | |
457 | if (slot_val == all_slot_clear) { | |
458 | continue; | |
459 | } else if (slot_val == all_slot_set) { | |
460 | uint64_t to_alloc = std::min(length - *allocated, | |
461 | l1_granularity * d1); | |
462 | *allocated += to_alloc; | |
463 | ++alloc_fragments_fast; | |
464 | _fragment_and_emplace(max_length, idx * d1 * l1_granularity, to_alloc, | |
465 | res); | |
466 | _mark_alloc_l1_l0(idx * d1 * bits_per_slotset, | |
467 | idx * d1 * bits_per_slotset + to_alloc / l0_granularity); | |
468 | continue; | |
469 | } | |
470 | auto free_pos = find_next_set_bit(slot_val, 0); | |
11fdf7f2 | 471 | ceph_assert(free_pos < bits_per_slot); |
a8e16298 | 472 | do { |
11fdf7f2 | 473 | ceph_assert(length > *allocated); |
a8e16298 TL |
474 | |
475 | bool empty; | |
476 | empty = _allocate_l0(length, max_length, | |
477 | (idx * d1 + free_pos / L1_ENTRY_WIDTH) * l0_w, | |
478 | (idx * d1 + free_pos / L1_ENTRY_WIDTH + 1) * l0_w, | |
479 | allocated, | |
480 | res); | |
481 | ||
482 | auto mask = slot_t(L1_ENTRY_MASK) << free_pos; | |
483 | ||
484 | slot_t old_mask = (slot_val & mask) >> free_pos; | |
485 | switch(old_mask) { | |
486 | case L1_ENTRY_FREE: | |
487 | unalloc_l1_count--; | |
488 | break; | |
489 | case L1_ENTRY_PARTIAL: | |
490 | partial_l1_count--; | |
491 | break; | |
492 | } | |
493 | slot_val &= ~mask; | |
494 | if (empty) { | |
495 | // the next line is no op with the current L1_ENTRY_FULL but left | |
496 | // as-is for the sake of uniformity and to avoid potential errors | |
497 | // in future | |
498 | slot_val |= slot_t(L1_ENTRY_FULL) << free_pos; | |
499 | } else { | |
500 | slot_val |= slot_t(L1_ENTRY_PARTIAL) << free_pos; | |
501 | partial_l1_count++; | |
502 | } | |
503 | if (length <= *allocated || slot_val == all_slot_clear) { | |
504 | break; | |
505 | } | |
506 | free_pos = find_next_set_bit(slot_val, free_pos + L1_ENTRY_WIDTH); | |
507 | } while (free_pos < bits_per_slot); | |
508 | } | |
509 | } | |
510 | return _is_empty_l1(l1_pos_start, l1_pos_end); | |
511 | } | |
512 | ||
513 | void AllocatorLevel01Loose::collect_stats( | |
514 | std::map<size_t, size_t>& bins_overall) | |
515 | { | |
516 | size_t free_seq_cnt = 0; | |
517 | for (auto slot : l0) { | |
518 | if (slot == all_slot_set) { | |
9f95a23c | 519 | free_seq_cnt += L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
520 | } else if(slot != all_slot_clear) { |
521 | size_t pos = 0; | |
522 | do { | |
523 | auto pos1 = find_next_set_bit(slot, pos); | |
524 | if (pos1 == pos) { | |
525 | free_seq_cnt++; | |
526 | pos = pos1 + 1; | |
527 | } else { | |
528 | if (free_seq_cnt) { | |
529 | bins_overall[cbits(free_seq_cnt) - 1]++; | |
530 | free_seq_cnt = 0; | |
531 | } | |
532 | if (pos1 < bits_per_slot) { | |
533 | free_seq_cnt = 1; | |
534 | } | |
535 | pos = pos1 + 1; | |
536 | } | |
537 | } while (pos < bits_per_slot); | |
538 | } else if (free_seq_cnt) { | |
539 | bins_overall[cbits(free_seq_cnt) - 1]++; | |
540 | free_seq_cnt = 0; | |
541 | } | |
542 | } | |
543 | if (free_seq_cnt) { | |
544 | bins_overall[cbits(free_seq_cnt) - 1]++; | |
545 | } | |
546 | } | |
eafe8130 TL |
547 | |
548 | inline ssize_t AllocatorLevel01Loose::count_0s(slot_t slot_val, size_t start_pos) | |
549 | { | |
550 | #ifdef __GNUC__ | |
551 | size_t pos = __builtin_ffsll(slot_val >> start_pos); | |
552 | if (pos == 0) | |
553 | return sizeof(slot_t)*8 - start_pos; | |
554 | return pos - 1; | |
555 | #else | |
556 | size_t pos = start_pos; | |
557 | slot_t mask = slot_t(1) << pos; | |
558 | while (pos < bits_per_slot && (slot_val & mask) == 0) { | |
559 | mask <<= 1; | |
560 | pos++; | |
561 | } | |
562 | return pos - start_pos; | |
563 | #endif | |
564 | } | |
565 | ||
566 | inline ssize_t AllocatorLevel01Loose::count_1s(slot_t slot_val, size_t start_pos) | |
567 | { | |
568 | return count_0s(~slot_val, start_pos); | |
569 | } | |
570 | void AllocatorLevel01Loose::dump( | |
571 | std::function<void(uint64_t offset, uint64_t length)> notify) | |
572 | { | |
573 | size_t len = 0; | |
574 | size_t off = 0; | |
575 | for (size_t i = 0; i < l1.size(); i++) | |
576 | { | |
577 | for (size_t j = 0; j < L1_ENTRIES_PER_SLOT * L1_ENTRY_WIDTH; j += L1_ENTRY_WIDTH) | |
578 | { | |
579 | size_t w = (l1[i] >> j) & L1_ENTRY_MASK; | |
580 | switch (w) { | |
581 | case L1_ENTRY_FULL: | |
582 | if (len > 0) { | |
583 | notify(off, len); | |
584 | len = 0; | |
585 | } | |
586 | break; | |
587 | case L1_ENTRY_FREE: | |
588 | if (len == 0) | |
589 | off = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset * bits_per_slot; | |
590 | len += bits_per_slotset; | |
591 | break; | |
592 | case L1_ENTRY_PARTIAL: | |
593 | size_t pos = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset; | |
594 | for (size_t t = 0; t < slots_per_slotset; t++) { | |
595 | size_t p = 0; | |
596 | slot_t allocation_pattern = l0[pos + t]; | |
597 | while (p < bits_per_slot) { | |
598 | if (len == 0) { | |
599 | //continue to skip allocated space, meaning bits set to 0 | |
600 | ssize_t alloc_count = count_0s(allocation_pattern, p); | |
601 | p += alloc_count; | |
602 | //now we are switched to expecting free space | |
603 | if (p < bits_per_slot) { | |
604 | //now @p are 1s | |
605 | ssize_t free_count = count_1s(allocation_pattern, p); | |
606 | assert(free_count > 0); | |
607 | len = free_count; | |
608 | off = (pos + t) * bits_per_slot + p; | |
609 | p += free_count; | |
610 | } | |
611 | } else { | |
612 | //continue free region | |
613 | ssize_t free_count = count_1s(allocation_pattern, p); | |
614 | if (free_count == 0) { | |
615 | notify(off, len); | |
616 | len = 0; | |
617 | } else { | |
618 | p += free_count; | |
619 | len += free_count; | |
620 | } | |
621 | } | |
622 | } | |
623 | } | |
624 | break; | |
625 | } | |
626 | } | |
627 | } | |
628 | if (len > 0) | |
629 | notify(off, len); | |
630 | } | |
631 | ||
e306af50 TL |
632 | uint64_t AllocatorLevel01Loose::_claim_free_to_left_l0(int64_t l0_pos_start) |
633 | { | |
634 | int64_t d0 = L0_ENTRIES_PER_SLOT; | |
635 | ||
636 | int64_t pos = l0_pos_start - 1; | |
637 | slot_t bits = (slot_t)1 << (pos % d0); | |
638 | int64_t idx = pos / d0; | |
639 | slot_t* val_s = l0.data() + idx; | |
640 | ||
641 | int64_t pos_e = p2align<int64_t>(pos, d0); | |
642 | ||
643 | while (pos >= pos_e) { | |
644 | if (0 == ((*val_s) & bits)) | |
645 | return pos + 1; | |
646 | (*val_s) &= ~bits; | |
647 | bits >>= 1; | |
648 | --pos; | |
649 | } | |
650 | --idx; | |
651 | val_s = l0.data() + idx; | |
652 | while (idx >= 0 && (*val_s) == all_slot_set) { | |
653 | *val_s = all_slot_clear; | |
654 | --idx; | |
655 | pos -= d0; | |
656 | val_s = l0.data() + idx; | |
657 | } | |
658 | ||
659 | if (idx >= 0 && | |
660 | (*val_s) != all_slot_set && (*val_s) != all_slot_clear) { | |
661 | int64_t pos_e = p2align<int64_t>(pos, d0); | |
662 | slot_t bits = (slot_t)1 << (pos % d0); | |
663 | while (pos >= pos_e) { | |
664 | if (0 == ((*val_s) & bits)) | |
665 | return pos + 1; | |
666 | (*val_s) &= ~bits; | |
667 | bits >>= 1; | |
668 | --pos; | |
669 | } | |
670 | } | |
671 | return pos + 1; | |
672 | } | |
673 | ||
674 | uint64_t AllocatorLevel01Loose::_claim_free_to_right_l0(int64_t l0_pos_start) | |
675 | { | |
676 | auto d0 = L0_ENTRIES_PER_SLOT; | |
677 | ||
678 | int64_t pos = l0_pos_start; | |
679 | slot_t bits = (slot_t)1 << (pos % d0); | |
680 | size_t idx = pos / d0; | |
681 | slot_t* val_s = l0.data() + idx; | |
682 | ||
683 | int64_t pos_e = p2roundup<int64_t>(pos + 1, d0); | |
684 | ||
685 | while (pos < pos_e) { | |
686 | if (0 == ((*val_s) & bits)) | |
687 | return pos; | |
688 | (*val_s) &= ~bits; | |
689 | bits <<= 1; | |
690 | ++pos; | |
691 | } | |
692 | ++idx; | |
693 | val_s = l0.data() + idx; | |
694 | while (idx < l0.size() && (*val_s) == all_slot_set) { | |
695 | *val_s = all_slot_clear; | |
696 | ++idx; | |
697 | pos += d0; | |
698 | val_s = l0.data() + idx; | |
699 | } | |
700 | ||
701 | if (idx < l0.size() && | |
702 | (*val_s) != all_slot_set && (*val_s) != all_slot_clear) { | |
703 | int64_t pos_e = p2roundup<int64_t>(pos + 1, d0); | |
704 | slot_t bits = (slot_t)1 << (pos % d0); | |
705 | while (pos < pos_e) { | |
706 | if (0 == ((*val_s) & bits)) | |
707 | return pos; | |
708 | (*val_s) &= ~bits; | |
709 | bits <<= 1; | |
710 | ++pos; | |
711 | } | |
712 | } | |
713 | return pos; | |
714 | } |