]>
Commit | Line | Data |
---|---|---|
a8e16298 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Bitmap based in-memory allocator implementation. | |
5 | * Author: Igor Fedotov, ifedotov@suse.com | |
6 | * | |
7 | */ | |
8 | ||
9 | #include "fastbmap_allocator_impl.h" | |
10 | ||
11 | uint64_t AllocatorLevel::l0_dives = 0; | |
12 | uint64_t AllocatorLevel::l0_iterations = 0; | |
13 | uint64_t AllocatorLevel::l0_inner_iterations = 0; | |
14 | uint64_t AllocatorLevel::alloc_fragments = 0; | |
15 | uint64_t AllocatorLevel::alloc_fragments_fast = 0; | |
16 | uint64_t AllocatorLevel::l2_allocs = 0; | |
17 | ||
18 | inline interval_t _align2units(uint64_t offset, uint64_t len, uint64_t min_length) | |
19 | { | |
aee94f69 TL |
20 | return len >= min_length ? |
21 | interval_t(offset, p2align<uint64_t>(len, min_length)) : | |
22 | interval_t(); | |
a8e16298 TL |
23 | } |
24 | ||
25 | interval_t AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0, | |
26 | uint64_t pos1, uint64_t min_length, interval_t* tail) const | |
27 | { | |
28 | interval_t res; | |
29 | if (pos0 >= pos1) { | |
30 | return res; | |
31 | } | |
32 | auto pos = pos0; | |
33 | ||
34 | interval_t res_candidate; | |
35 | if (tail->length != 0) { | |
11fdf7f2 TL |
36 | ceph_assert((tail->offset % l0_granularity) == 0); |
37 | ceph_assert((tail->length % l0_granularity) == 0); | |
a8e16298 TL |
38 | res_candidate.offset = tail->offset / l0_granularity; |
39 | res_candidate.length = tail->length / l0_granularity; | |
40 | } | |
41 | *tail = interval_t(); | |
42 | ||
43 | auto d = bits_per_slot; | |
44 | slot_t bits = l0[pos / d]; | |
45 | bits >>= pos % d; | |
46 | bool end_loop = false; | |
47 | auto min_granules = min_length / l0_granularity; | |
48 | ||
49 | do { | |
50 | if ((pos % d) == 0) { | |
51 | bits = l0[pos / d]; | |
52 | if (pos1 - pos >= d) { | |
53 | switch(bits) { | |
54 | case all_slot_set: | |
55 | // slot is totally free | |
56 | if (!res_candidate.length) { | |
57 | res_candidate.offset = pos; | |
58 | } | |
59 | res_candidate.length += d; | |
60 | pos += d; | |
61 | end_loop = pos >= pos1; | |
62 | if (end_loop) { | |
63 | *tail = res_candidate; | |
64 | res_candidate = _align2units(res_candidate.offset, | |
65 | res_candidate.length, min_granules); | |
66 | if(res.length < res_candidate.length) { | |
67 | res = res_candidate; | |
68 | } | |
69 | } | |
70 | continue; | |
71 | case all_slot_clear: | |
72 | // slot is totally allocated | |
73 | res_candidate = _align2units(res_candidate.offset, | |
74 | res_candidate.length, min_granules); | |
75 | if (res.length < res_candidate.length) { | |
76 | res = res_candidate; | |
77 | } | |
78 | res_candidate = interval_t(); | |
79 | pos += d; | |
80 | end_loop = pos >= pos1; | |
81 | continue; | |
82 | } | |
83 | } | |
84 | } //if ((pos % d) == 0) | |
85 | ||
86 | end_loop = ++pos >= pos1; | |
87 | if (bits & 1) { | |
88 | // item is free | |
89 | if (!res_candidate.length) { | |
90 | res_candidate.offset = pos - 1; | |
91 | } | |
92 | ++res_candidate.length; | |
93 | if (end_loop) { | |
94 | *tail = res_candidate; | |
95 | res_candidate = _align2units(res_candidate.offset, | |
96 | res_candidate.length, min_granules); | |
97 | if (res.length < res_candidate.length) { | |
98 | res = res_candidate; | |
99 | } | |
100 | } | |
101 | } else { | |
102 | res_candidate = _align2units(res_candidate.offset, | |
103 | res_candidate.length, min_granules); | |
104 | if (res.length < res_candidate.length) { | |
105 | res = res_candidate; | |
106 | } | |
107 | res_candidate = interval_t(); | |
108 | } | |
109 | bits >>= 1; | |
110 | } while (!end_loop); | |
111 | res.offset *= l0_granularity; | |
112 | res.length *= l0_granularity; | |
113 | tail->offset *= l0_granularity; | |
114 | tail->length *= l0_granularity; | |
115 | return res; | |
116 | } | |
117 | ||
118 | void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start, | |
119 | uint64_t pos_end, uint64_t length, uint64_t min_length, int mode, | |
120 | search_ctx_t* ctx) | |
121 | { | |
9f95a23c | 122 | auto d = L1_ENTRIES_PER_SLOT; |
11fdf7f2 TL |
123 | ceph_assert((pos_start % d) == 0); |
124 | ceph_assert((pos_end % d) == 0); | |
a8e16298 | 125 | |
9f95a23c | 126 | uint64_t l0_w = slots_per_slotset * L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
127 | |
128 | uint64_t l1_pos = pos_start; | |
129 | const interval_t empty_tail; | |
130 | interval_t prev_tail; | |
131 | ||
132 | uint64_t next_free_l1_pos = 0; | |
133 | for (auto pos = pos_start / d; pos < pos_end / d; ++pos) { | |
134 | slot_t slot_val = l1[pos]; | |
135 | // FIXME minor: code below can be optimized to check slot_val against | |
136 | // all_slot_set(_clear) value | |
137 | ||
138 | for (auto c = 0; c < d; c++) { | |
139 | switch (slot_val & L1_ENTRY_MASK) { | |
140 | case L1_ENTRY_FREE: | |
141 | prev_tail = empty_tail; | |
142 | if (!ctx->free_count) { | |
143 | ctx->free_l1_pos = l1_pos; | |
144 | } else if (l1_pos != next_free_l1_pos){ | |
145 | auto o = ctx->free_l1_pos * l1_granularity; | |
146 | auto l = ctx->free_count * l1_granularity; | |
147 | // check if already found extent fits min_length after alignment | |
148 | if (_align2units(o, l, min_length).length >= min_length) { | |
149 | break; | |
150 | } | |
151 | // if not - proceed with the next one | |
152 | ctx->free_l1_pos = l1_pos; | |
153 | ctx->free_count = 0; | |
154 | } | |
155 | next_free_l1_pos = l1_pos + 1; | |
156 | ++ctx->free_count; | |
157 | if (mode == STOP_ON_EMPTY) { | |
158 | return; | |
159 | } | |
160 | break; | |
161 | case L1_ENTRY_FULL: | |
162 | prev_tail = empty_tail; | |
163 | break; | |
164 | case L1_ENTRY_PARTIAL: | |
165 | interval_t longest; | |
166 | ++ctx->partial_count; | |
167 | ||
168 | longest = _get_longest_from_l0(l1_pos * l0_w, (l1_pos + 1) * l0_w, min_length, &prev_tail); | |
169 | ||
170 | if (longest.length >= length) { | |
171 | if ((ctx->affordable_len == 0) || | |
172 | ((ctx->affordable_len != 0) && | |
173 | (longest.length < ctx->affordable_len))) { | |
174 | ctx->affordable_len = longest.length; | |
175 | ctx->affordable_offs = longest.offset; | |
176 | } | |
177 | } | |
178 | if (longest.length >= min_length && | |
179 | (ctx->min_affordable_len == 0 || | |
180 | (longest.length < ctx->min_affordable_len))) { | |
181 | ||
11fdf7f2 | 182 | ctx->min_affordable_len = p2align<uint64_t>(longest.length, min_length); |
a8e16298 TL |
183 | ctx->min_affordable_offs = longest.offset; |
184 | } | |
185 | if (mode == STOP_ON_PARTIAL) { | |
186 | return; | |
187 | } | |
188 | break; | |
189 | } | |
190 | slot_val >>= L1_ENTRY_WIDTH; | |
191 | ++l1_pos; | |
192 | } | |
193 | } | |
194 | ctx->fully_processed = true; | |
195 | } | |
196 | ||
197 | void AllocatorLevel01Loose::_mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end) | |
198 | { | |
199 | if (l0_pos == l0_pos_end) { | |
200 | return; | |
201 | } | |
202 | auto d0 = bits_per_slotset; | |
9f95a23c | 203 | uint64_t l1_w = L1_ENTRIES_PER_SLOT; |
a8e16298 | 204 | // this should be aligned with slotset boundaries |
11fdf7f2 TL |
205 | ceph_assert(0 == (l0_pos % d0)); |
206 | ceph_assert(0 == (l0_pos_end % d0)); | |
a8e16298 TL |
207 | |
208 | int64_t idx = l0_pos / bits_per_slot; | |
209 | int64_t idx_end = l0_pos_end / bits_per_slot; | |
210 | slot_t mask_to_apply = L1_ENTRY_NOT_USED; | |
211 | ||
212 | auto l1_pos = l0_pos / d0; | |
213 | ||
214 | while (idx < idx_end) { | |
215 | if (l0[idx] == all_slot_clear) { | |
216 | // if not all prev slots are allocated then no need to check the | |
217 | // current slot set, it's partial | |
218 | ++idx; | |
219 | if (mask_to_apply == L1_ENTRY_NOT_USED) { | |
220 | mask_to_apply = L1_ENTRY_FULL; | |
221 | } else if (mask_to_apply != L1_ENTRY_FULL) { | |
9f95a23c | 222 | idx = p2roundup(idx, int64_t(slots_per_slotset)); |
a8e16298 TL |
223 | mask_to_apply = L1_ENTRY_PARTIAL; |
224 | } | |
225 | } else if (l0[idx] == all_slot_set) { | |
226 | // if not all prev slots are free then no need to check the | |
227 | // current slot set, it's partial | |
228 | ++idx; | |
229 | if (mask_to_apply == L1_ENTRY_NOT_USED) { | |
230 | mask_to_apply = L1_ENTRY_FREE; | |
231 | } else if (mask_to_apply != L1_ENTRY_FREE) { | |
9f95a23c | 232 | idx = p2roundup(idx, int64_t(slots_per_slotset)); |
a8e16298 TL |
233 | mask_to_apply = L1_ENTRY_PARTIAL; |
234 | } | |
235 | } else { | |
236 | // no need to check the current slot set, it's partial | |
237 | mask_to_apply = L1_ENTRY_PARTIAL; | |
238 | ++idx; | |
9f95a23c | 239 | idx = p2roundup(idx, int64_t(slots_per_slotset)); |
a8e16298 | 240 | } |
9f95a23c | 241 | if ((idx % slots_per_slotset) == 0) { |
11fdf7f2 | 242 | ceph_assert(mask_to_apply != L1_ENTRY_NOT_USED); |
a8e16298 TL |
243 | uint64_t shift = (l1_pos % l1_w) * L1_ENTRY_WIDTH; |
244 | slot_t& slot_val = l1[l1_pos / l1_w]; | |
245 | auto mask = slot_t(L1_ENTRY_MASK) << shift; | |
246 | ||
247 | slot_t old_mask = (slot_val & mask) >> shift; | |
248 | switch(old_mask) { | |
249 | case L1_ENTRY_FREE: | |
250 | unalloc_l1_count--; | |
251 | break; | |
252 | case L1_ENTRY_PARTIAL: | |
253 | partial_l1_count--; | |
254 | break; | |
255 | } | |
256 | slot_val &= ~mask; | |
257 | slot_val |= slot_t(mask_to_apply) << shift; | |
258 | switch(mask_to_apply) { | |
259 | case L1_ENTRY_FREE: | |
260 | unalloc_l1_count++; | |
261 | break; | |
262 | case L1_ENTRY_PARTIAL: | |
263 | partial_l1_count++; | |
264 | break; | |
265 | } | |
266 | mask_to_apply = L1_ENTRY_NOT_USED; | |
267 | ++l1_pos; | |
268 | } | |
269 | } | |
270 | } | |
271 | ||
272 | void AllocatorLevel01Loose::_mark_alloc_l0(int64_t l0_pos_start, | |
273 | int64_t l0_pos_end) | |
274 | { | |
9f95a23c | 275 | auto d0 = L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
276 | |
277 | int64_t pos = l0_pos_start; | |
278 | slot_t bits = (slot_t)1 << (l0_pos_start % d0); | |
9f95a23c | 279 | slot_t* val_s = l0.data() + (pos / d0); |
81eedcae TL |
280 | int64_t pos_e = std::min(l0_pos_end, p2roundup<int64_t>(l0_pos_start + 1, d0)); |
281 | while (pos < pos_e) { | |
282 | (*val_s) &= ~bits; | |
a8e16298 TL |
283 | bits <<= 1; |
284 | pos++; | |
285 | } | |
81eedcae TL |
286 | pos_e = std::min(l0_pos_end, p2align<int64_t>(l0_pos_end, d0)); |
287 | while (pos < pos_e) { | |
288 | *(++val_s) = all_slot_clear; | |
a8e16298 TL |
289 | pos += d0; |
290 | } | |
291 | bits = 1; | |
81eedcae | 292 | ++val_s; |
a8e16298 | 293 | while (pos < l0_pos_end) { |
81eedcae | 294 | (*val_s) &= ~bits; |
a8e16298 TL |
295 | bits <<= 1; |
296 | pos++; | |
297 | } | |
298 | } | |
299 | ||
300 | interval_t AllocatorLevel01Loose::_allocate_l1_contiguous(uint64_t length, | |
301 | uint64_t min_length, uint64_t max_length, | |
302 | uint64_t pos_start, uint64_t pos_end) | |
303 | { | |
304 | interval_t res = { 0, 0 }; | |
9f95a23c | 305 | uint64_t l0_w = slots_per_slotset * L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
306 | |
307 | if (unlikely(length <= l0_granularity)) { | |
308 | search_ctx_t ctx; | |
309 | _analyze_partials(pos_start, pos_end, l0_granularity, l0_granularity, | |
310 | STOP_ON_PARTIAL, &ctx); | |
311 | ||
312 | // check partially free slot sets first (including neighboring), | |
313 | // full length match required. | |
314 | if (ctx.affordable_len) { | |
315 | // allocate as specified | |
11fdf7f2 | 316 | ceph_assert(ctx.affordable_len >= length); |
a8e16298 TL |
317 | auto pos = ctx.affordable_offs / l0_granularity; |
318 | _mark_alloc_l1_l0(pos, pos + 1); | |
319 | res = interval_t(ctx.affordable_offs, length); | |
320 | return res; | |
321 | } | |
322 | ||
323 | // allocate from free slot sets | |
324 | if (ctx.free_count) { | |
325 | auto l = std::min(length, ctx.free_count * l1_granularity); | |
11fdf7f2 | 326 | ceph_assert((l % l0_granularity) == 0); |
a8e16298 TL |
327 | auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity; |
328 | ||
329 | _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end); | |
330 | res = interval_t(ctx.free_l1_pos * l1_granularity, l); | |
331 | return res; | |
332 | } | |
333 | } else if (unlikely(length == l1_granularity)) { | |
334 | search_ctx_t ctx; | |
335 | _analyze_partials(pos_start, pos_end, length, min_length, STOP_ON_EMPTY, &ctx); | |
336 | ||
337 | // allocate using contiguous extent found at l1 if any | |
338 | if (ctx.free_count) { | |
339 | ||
340 | auto l = std::min(length, ctx.free_count * l1_granularity); | |
11fdf7f2 | 341 | ceph_assert((l % l0_granularity) == 0); |
a8e16298 TL |
342 | auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity; |
343 | ||
344 | _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end); | |
345 | res = interval_t(ctx.free_l1_pos * l1_granularity, l); | |
346 | ||
347 | return res; | |
348 | } | |
349 | ||
350 | // we can terminate earlier on free entry only | |
11fdf7f2 | 351 | ceph_assert(ctx.fully_processed); |
a8e16298 TL |
352 | |
353 | // check partially free slot sets first (including neighboring), | |
354 | // full length match required. | |
355 | if (ctx.affordable_len) { | |
11fdf7f2 TL |
356 | ceph_assert(ctx.affordable_len >= length); |
357 | ceph_assert((length % l0_granularity) == 0); | |
81eedcae | 358 | auto pos_start = ctx.affordable_offs / l0_granularity; |
a8e16298 TL |
359 | auto pos_end = (ctx.affordable_offs + length) / l0_granularity; |
360 | _mark_alloc_l1_l0(pos_start, pos_end); | |
361 | res = interval_t(ctx.affordable_offs, length); | |
362 | return res; | |
363 | } | |
364 | if (ctx.min_affordable_len) { | |
365 | auto pos_start = ctx.min_affordable_offs / l0_granularity; | |
366 | auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity; | |
367 | _mark_alloc_l1_l0(pos_start, pos_end); | |
368 | return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len); | |
369 | } | |
370 | } else { | |
371 | search_ctx_t ctx; | |
372 | _analyze_partials(pos_start, pos_end, length, min_length, NO_STOP, &ctx); | |
11fdf7f2 | 373 | ceph_assert(ctx.fully_processed); |
a8e16298 TL |
374 | // check partially free slot sets first (including neighboring), |
375 | // full length match required. | |
376 | if (ctx.affordable_len) { | |
11fdf7f2 TL |
377 | ceph_assert(ctx.affordable_len >= length); |
378 | ceph_assert((length % l0_granularity) == 0); | |
a8e16298 TL |
379 | auto pos_start = ctx.affordable_offs / l0_granularity; |
380 | auto pos_end = (ctx.affordable_offs + length) / l0_granularity; | |
381 | _mark_alloc_l1_l0(pos_start, pos_end); | |
382 | res = interval_t(ctx.affordable_offs, length); | |
383 | return res; | |
384 | } | |
385 | // allocate using contiguous extent found at l1 if affordable | |
386 | // align allocated extent with min_length | |
387 | if (ctx.free_count) { | |
388 | auto o = ctx.free_l1_pos * l1_granularity; | |
389 | auto l = ctx.free_count * l1_granularity; | |
390 | interval_t aligned_extent = _align2units(o, l, min_length); | |
391 | if (aligned_extent.length > 0) { | |
392 | aligned_extent.length = std::min(length, | |
393 | uint64_t(aligned_extent.length)); | |
11fdf7f2 TL |
394 | ceph_assert((aligned_extent.offset % l0_granularity) == 0); |
395 | ceph_assert((aligned_extent.length % l0_granularity) == 0); | |
a8e16298 TL |
396 | |
397 | auto pos_start = aligned_extent.offset / l0_granularity; | |
398 | auto pos_end = (aligned_extent.offset + aligned_extent.length) / l0_granularity; | |
399 | ||
400 | _mark_alloc_l1_l0(pos_start, pos_end); | |
401 | return aligned_extent; | |
402 | } | |
403 | } | |
404 | if (ctx.min_affordable_len) { | |
405 | auto pos_start = ctx.min_affordable_offs / l0_granularity; | |
406 | auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity; | |
407 | _mark_alloc_l1_l0(pos_start, pos_end); | |
408 | return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len); | |
409 | } | |
410 | } | |
411 | return res; | |
412 | } | |
413 | ||
414 | bool AllocatorLevel01Loose::_allocate_l1(uint64_t length, | |
415 | uint64_t min_length, uint64_t max_length, | |
416 | uint64_t l1_pos_start, uint64_t l1_pos_end, | |
417 | uint64_t* allocated, | |
418 | interval_vector_t* res) | |
419 | { | |
9f95a23c TL |
420 | uint64_t d0 = L0_ENTRIES_PER_SLOT; |
421 | uint64_t d1 = L1_ENTRIES_PER_SLOT; | |
a8e16298 | 422 | |
9f95a23c TL |
423 | ceph_assert(0 == (l1_pos_start % (slots_per_slotset * d1))); |
424 | ceph_assert(0 == (l1_pos_end % (slots_per_slotset * d1))); | |
a8e16298 TL |
425 | if (min_length != l0_granularity) { |
426 | // probably not the most effecient way but | |
427 | // don't care much about that at the moment | |
428 | bool has_space = true; | |
429 | while (length > *allocated && has_space) { | |
430 | interval_t i = | |
431 | _allocate_l1_contiguous(length - *allocated, min_length, max_length, | |
432 | l1_pos_start, l1_pos_end); | |
433 | if (i.length == 0) { | |
434 | has_space = false; | |
435 | } else { | |
436 | _fragment_and_emplace(max_length, i.offset, i.length, res); | |
437 | *allocated += i.length; | |
438 | } | |
439 | } | |
440 | } else { | |
9f95a23c | 441 | uint64_t l0_w = slots_per_slotset * d0; |
a8e16298 TL |
442 | |
443 | for (auto idx = l1_pos_start / d1; | |
444 | idx < l1_pos_end / d1 && length > *allocated; | |
445 | ++idx) { | |
446 | slot_t& slot_val = l1[idx]; | |
447 | if (slot_val == all_slot_clear) { | |
448 | continue; | |
449 | } else if (slot_val == all_slot_set) { | |
450 | uint64_t to_alloc = std::min(length - *allocated, | |
451 | l1_granularity * d1); | |
452 | *allocated += to_alloc; | |
453 | ++alloc_fragments_fast; | |
454 | _fragment_and_emplace(max_length, idx * d1 * l1_granularity, to_alloc, | |
455 | res); | |
456 | _mark_alloc_l1_l0(idx * d1 * bits_per_slotset, | |
457 | idx * d1 * bits_per_slotset + to_alloc / l0_granularity); | |
458 | continue; | |
459 | } | |
460 | auto free_pos = find_next_set_bit(slot_val, 0); | |
11fdf7f2 | 461 | ceph_assert(free_pos < bits_per_slot); |
a8e16298 | 462 | do { |
11fdf7f2 | 463 | ceph_assert(length > *allocated); |
a8e16298 TL |
464 | |
465 | bool empty; | |
466 | empty = _allocate_l0(length, max_length, | |
467 | (idx * d1 + free_pos / L1_ENTRY_WIDTH) * l0_w, | |
468 | (idx * d1 + free_pos / L1_ENTRY_WIDTH + 1) * l0_w, | |
469 | allocated, | |
470 | res); | |
471 | ||
472 | auto mask = slot_t(L1_ENTRY_MASK) << free_pos; | |
473 | ||
474 | slot_t old_mask = (slot_val & mask) >> free_pos; | |
475 | switch(old_mask) { | |
476 | case L1_ENTRY_FREE: | |
477 | unalloc_l1_count--; | |
478 | break; | |
479 | case L1_ENTRY_PARTIAL: | |
480 | partial_l1_count--; | |
481 | break; | |
482 | } | |
483 | slot_val &= ~mask; | |
484 | if (empty) { | |
485 | // the next line is no op with the current L1_ENTRY_FULL but left | |
486 | // as-is for the sake of uniformity and to avoid potential errors | |
487 | // in future | |
488 | slot_val |= slot_t(L1_ENTRY_FULL) << free_pos; | |
489 | } else { | |
490 | slot_val |= slot_t(L1_ENTRY_PARTIAL) << free_pos; | |
491 | partial_l1_count++; | |
492 | } | |
493 | if (length <= *allocated || slot_val == all_slot_clear) { | |
494 | break; | |
495 | } | |
496 | free_pos = find_next_set_bit(slot_val, free_pos + L1_ENTRY_WIDTH); | |
497 | } while (free_pos < bits_per_slot); | |
498 | } | |
499 | } | |
500 | return _is_empty_l1(l1_pos_start, l1_pos_end); | |
501 | } | |
502 | ||
503 | void AllocatorLevel01Loose::collect_stats( | |
504 | std::map<size_t, size_t>& bins_overall) | |
505 | { | |
506 | size_t free_seq_cnt = 0; | |
507 | for (auto slot : l0) { | |
508 | if (slot == all_slot_set) { | |
9f95a23c | 509 | free_seq_cnt += L0_ENTRIES_PER_SLOT; |
a8e16298 TL |
510 | } else if(slot != all_slot_clear) { |
511 | size_t pos = 0; | |
512 | do { | |
513 | auto pos1 = find_next_set_bit(slot, pos); | |
514 | if (pos1 == pos) { | |
515 | free_seq_cnt++; | |
516 | pos = pos1 + 1; | |
517 | } else { | |
518 | if (free_seq_cnt) { | |
519 | bins_overall[cbits(free_seq_cnt) - 1]++; | |
520 | free_seq_cnt = 0; | |
521 | } | |
522 | if (pos1 < bits_per_slot) { | |
523 | free_seq_cnt = 1; | |
524 | } | |
525 | pos = pos1 + 1; | |
526 | } | |
527 | } while (pos < bits_per_slot); | |
528 | } else if (free_seq_cnt) { | |
529 | bins_overall[cbits(free_seq_cnt) - 1]++; | |
530 | free_seq_cnt = 0; | |
531 | } | |
532 | } | |
533 | if (free_seq_cnt) { | |
534 | bins_overall[cbits(free_seq_cnt) - 1]++; | |
535 | } | |
536 | } | |
eafe8130 TL |
537 | |
538 | inline ssize_t AllocatorLevel01Loose::count_0s(slot_t slot_val, size_t start_pos) | |
539 | { | |
540 | #ifdef __GNUC__ | |
541 | size_t pos = __builtin_ffsll(slot_val >> start_pos); | |
542 | if (pos == 0) | |
543 | return sizeof(slot_t)*8 - start_pos; | |
544 | return pos - 1; | |
545 | #else | |
546 | size_t pos = start_pos; | |
547 | slot_t mask = slot_t(1) << pos; | |
548 | while (pos < bits_per_slot && (slot_val & mask) == 0) { | |
549 | mask <<= 1; | |
550 | pos++; | |
551 | } | |
552 | return pos - start_pos; | |
553 | #endif | |
554 | } | |
555 | ||
556 | inline ssize_t AllocatorLevel01Loose::count_1s(slot_t slot_val, size_t start_pos) | |
557 | { | |
558 | return count_0s(~slot_val, start_pos); | |
559 | } | |
1e59de90 | 560 | void AllocatorLevel01Loose::foreach_internal( |
eafe8130 TL |
561 | std::function<void(uint64_t offset, uint64_t length)> notify) |
562 | { | |
563 | size_t len = 0; | |
564 | size_t off = 0; | |
565 | for (size_t i = 0; i < l1.size(); i++) | |
566 | { | |
567 | for (size_t j = 0; j < L1_ENTRIES_PER_SLOT * L1_ENTRY_WIDTH; j += L1_ENTRY_WIDTH) | |
568 | { | |
569 | size_t w = (l1[i] >> j) & L1_ENTRY_MASK; | |
570 | switch (w) { | |
571 | case L1_ENTRY_FULL: | |
572 | if (len > 0) { | |
573 | notify(off, len); | |
574 | len = 0; | |
575 | } | |
576 | break; | |
577 | case L1_ENTRY_FREE: | |
578 | if (len == 0) | |
579 | off = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset * bits_per_slot; | |
580 | len += bits_per_slotset; | |
581 | break; | |
582 | case L1_ENTRY_PARTIAL: | |
583 | size_t pos = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset; | |
584 | for (size_t t = 0; t < slots_per_slotset; t++) { | |
585 | size_t p = 0; | |
586 | slot_t allocation_pattern = l0[pos + t]; | |
587 | while (p < bits_per_slot) { | |
588 | if (len == 0) { | |
589 | //continue to skip allocated space, meaning bits set to 0 | |
590 | ssize_t alloc_count = count_0s(allocation_pattern, p); | |
591 | p += alloc_count; | |
592 | //now we are switched to expecting free space | |
593 | if (p < bits_per_slot) { | |
594 | //now @p are 1s | |
595 | ssize_t free_count = count_1s(allocation_pattern, p); | |
596 | assert(free_count > 0); | |
597 | len = free_count; | |
598 | off = (pos + t) * bits_per_slot + p; | |
599 | p += free_count; | |
600 | } | |
601 | } else { | |
602 | //continue free region | |
603 | ssize_t free_count = count_1s(allocation_pattern, p); | |
604 | if (free_count == 0) { | |
605 | notify(off, len); | |
606 | len = 0; | |
607 | } else { | |
608 | p += free_count; | |
609 | len += free_count; | |
610 | } | |
611 | } | |
612 | } | |
613 | } | |
614 | break; | |
615 | } | |
616 | } | |
617 | } | |
618 | if (len > 0) | |
619 | notify(off, len); | |
620 | } | |
621 | ||
e306af50 TL |
622 | uint64_t AllocatorLevel01Loose::_claim_free_to_left_l0(int64_t l0_pos_start) |
623 | { | |
624 | int64_t d0 = L0_ENTRIES_PER_SLOT; | |
625 | ||
626 | int64_t pos = l0_pos_start - 1; | |
627 | slot_t bits = (slot_t)1 << (pos % d0); | |
628 | int64_t idx = pos / d0; | |
629 | slot_t* val_s = l0.data() + idx; | |
630 | ||
631 | int64_t pos_e = p2align<int64_t>(pos, d0); | |
632 | ||
633 | while (pos >= pos_e) { | |
634 | if (0 == ((*val_s) & bits)) | |
635 | return pos + 1; | |
636 | (*val_s) &= ~bits; | |
637 | bits >>= 1; | |
638 | --pos; | |
639 | } | |
640 | --idx; | |
641 | val_s = l0.data() + idx; | |
642 | while (idx >= 0 && (*val_s) == all_slot_set) { | |
643 | *val_s = all_slot_clear; | |
644 | --idx; | |
645 | pos -= d0; | |
646 | val_s = l0.data() + idx; | |
647 | } | |
648 | ||
649 | if (idx >= 0 && | |
650 | (*val_s) != all_slot_set && (*val_s) != all_slot_clear) { | |
651 | int64_t pos_e = p2align<int64_t>(pos, d0); | |
652 | slot_t bits = (slot_t)1 << (pos % d0); | |
653 | while (pos >= pos_e) { | |
654 | if (0 == ((*val_s) & bits)) | |
655 | return pos + 1; | |
656 | (*val_s) &= ~bits; | |
657 | bits >>= 1; | |
658 | --pos; | |
659 | } | |
660 | } | |
661 | return pos + 1; | |
662 | } | |
663 | ||
664 | uint64_t AllocatorLevel01Loose::_claim_free_to_right_l0(int64_t l0_pos_start) | |
665 | { | |
666 | auto d0 = L0_ENTRIES_PER_SLOT; | |
667 | ||
668 | int64_t pos = l0_pos_start; | |
669 | slot_t bits = (slot_t)1 << (pos % d0); | |
670 | size_t idx = pos / d0; | |
adb31ebb TL |
671 | if (idx >= l0.size()) { |
672 | return pos; | |
673 | } | |
e306af50 TL |
674 | slot_t* val_s = l0.data() + idx; |
675 | ||
676 | int64_t pos_e = p2roundup<int64_t>(pos + 1, d0); | |
677 | ||
678 | while (pos < pos_e) { | |
679 | if (0 == ((*val_s) & bits)) | |
680 | return pos; | |
681 | (*val_s) &= ~bits; | |
682 | bits <<= 1; | |
683 | ++pos; | |
684 | } | |
685 | ++idx; | |
686 | val_s = l0.data() + idx; | |
687 | while (idx < l0.size() && (*val_s) == all_slot_set) { | |
688 | *val_s = all_slot_clear; | |
689 | ++idx; | |
690 | pos += d0; | |
691 | val_s = l0.data() + idx; | |
692 | } | |
693 | ||
694 | if (idx < l0.size() && | |
695 | (*val_s) != all_slot_set && (*val_s) != all_slot_clear) { | |
696 | int64_t pos_e = p2roundup<int64_t>(pos + 1, d0); | |
697 | slot_t bits = (slot_t)1 << (pos % d0); | |
698 | while (pos < pos_e) { | |
699 | if (0 == ((*val_s) & bits)) | |
700 | return pos; | |
701 | (*val_s) &= ~bits; | |
702 | bits <<= 1; | |
703 | ++pos; | |
704 | } | |
705 | } | |
706 | return pos; | |
707 | } |