1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Bitmap based in-memory allocator implementation.
5 * Author: Igor Fedotov, ifedotov@suse.com
9 #include "fastbmap_allocator_impl.h"
11 uint64_t AllocatorLevel::l0_dives
= 0;
12 uint64_t AllocatorLevel::l0_iterations
= 0;
13 uint64_t AllocatorLevel::l0_inner_iterations
= 0;
14 uint64_t AllocatorLevel::alloc_fragments
= 0;
15 uint64_t AllocatorLevel::alloc_fragments_fast
= 0;
16 uint64_t AllocatorLevel::l2_allocs
= 0;
18 inline interval_t
_align2units(uint64_t offset
, uint64_t len
, uint64_t min_length
)
21 if (len
>= min_length
) {
22 res
.offset
= p2roundup(offset
, min_length
);
23 auto delta_off
= res
.offset
- offset
;
24 if (len
> delta_off
) {
25 res
.length
= len
- delta_off
;
26 res
.length
= p2align
<uint64_t>(res
.length
, min_length
);
35 interval_t
AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0
,
36 uint64_t pos1
, uint64_t min_length
, interval_t
* tail
) const
44 interval_t res_candidate
;
45 if (tail
->length
!= 0) {
46 ceph_assert((tail
->offset
% l0_granularity
) == 0);
47 ceph_assert((tail
->length
% l0_granularity
) == 0);
48 res_candidate
.offset
= tail
->offset
/ l0_granularity
;
49 res_candidate
.length
= tail
->length
/ l0_granularity
;
53 auto d
= bits_per_slot
;
54 slot_t bits
= l0
[pos
/ d
];
56 bool end_loop
= false;
57 auto min_granules
= min_length
/ l0_granularity
;
62 if (pos1
- pos
>= d
) {
65 // slot is totally free
66 if (!res_candidate
.length
) {
67 res_candidate
.offset
= pos
;
69 res_candidate
.length
+= d
;
71 end_loop
= pos
>= pos1
;
73 *tail
= res_candidate
;
74 res_candidate
= _align2units(res_candidate
.offset
,
75 res_candidate
.length
, min_granules
);
76 if(res
.length
< res_candidate
.length
) {
82 // slot is totally allocated
83 res_candidate
= _align2units(res_candidate
.offset
,
84 res_candidate
.length
, min_granules
);
85 if (res
.length
< res_candidate
.length
) {
88 res_candidate
= interval_t();
90 end_loop
= pos
>= pos1
;
94 } //if ((pos % d) == 0)
96 end_loop
= ++pos
>= pos1
;
99 if (!res_candidate
.length
) {
100 res_candidate
.offset
= pos
- 1;
102 ++res_candidate
.length
;
104 *tail
= res_candidate
;
105 res_candidate
= _align2units(res_candidate
.offset
,
106 res_candidate
.length
, min_granules
);
107 if (res
.length
< res_candidate
.length
) {
112 res_candidate
= _align2units(res_candidate
.offset
,
113 res_candidate
.length
, min_granules
);
114 if (res
.length
< res_candidate
.length
) {
117 res_candidate
= interval_t();
121 res
.offset
*= l0_granularity
;
122 res
.length
*= l0_granularity
;
123 tail
->offset
*= l0_granularity
;
124 tail
->length
*= l0_granularity
;
128 void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start
,
129 uint64_t pos_end
, uint64_t length
, uint64_t min_length
, int mode
,
132 auto d
= CHILD_PER_SLOT
;
133 ceph_assert((pos_start
% d
) == 0);
134 ceph_assert((pos_end
% d
) == 0);
136 uint64_t l0_w
= slotset_width
* CHILD_PER_SLOT_L0
;
138 uint64_t l1_pos
= pos_start
;
139 const interval_t empty_tail
;
140 interval_t prev_tail
;
142 uint64_t next_free_l1_pos
= 0;
143 for (auto pos
= pos_start
/ d
; pos
< pos_end
/ d
; ++pos
) {
144 slot_t slot_val
= l1
[pos
];
145 // FIXME minor: code below can be optimized to check slot_val against
146 // all_slot_set(_clear) value
148 for (auto c
= 0; c
< d
; c
++) {
149 switch (slot_val
& L1_ENTRY_MASK
) {
151 prev_tail
= empty_tail
;
152 if (!ctx
->free_count
) {
153 ctx
->free_l1_pos
= l1_pos
;
154 } else if (l1_pos
!= next_free_l1_pos
){
155 auto o
= ctx
->free_l1_pos
* l1_granularity
;
156 auto l
= ctx
->free_count
* l1_granularity
;
157 // check if already found extent fits min_length after alignment
158 if (_align2units(o
, l
, min_length
).length
>= min_length
) {
161 // if not - proceed with the next one
162 ctx
->free_l1_pos
= l1_pos
;
165 next_free_l1_pos
= l1_pos
+ 1;
167 if (mode
== STOP_ON_EMPTY
) {
172 prev_tail
= empty_tail
;
174 case L1_ENTRY_PARTIAL
:
176 ++ctx
->partial_count
;
178 longest
= _get_longest_from_l0(l1_pos
* l0_w
, (l1_pos
+ 1) * l0_w
, min_length
, &prev_tail
);
180 if (longest
.length
>= length
) {
181 if ((ctx
->affordable_len
== 0) ||
182 ((ctx
->affordable_len
!= 0) &&
183 (longest
.length
< ctx
->affordable_len
))) {
184 ctx
->affordable_len
= longest
.length
;
185 ctx
->affordable_offs
= longest
.offset
;
188 if (longest
.length
>= min_length
&&
189 (ctx
->min_affordable_len
== 0 ||
190 (longest
.length
< ctx
->min_affordable_len
))) {
192 ctx
->min_affordable_len
= p2align
<uint64_t>(longest
.length
, min_length
);
193 ctx
->min_affordable_offs
= longest
.offset
;
195 if (mode
== STOP_ON_PARTIAL
) {
200 slot_val
>>= L1_ENTRY_WIDTH
;
204 ctx
->fully_processed
= true;
207 void AllocatorLevel01Loose::_mark_l1_on_l0(int64_t l0_pos
, int64_t l0_pos_end
)
209 if (l0_pos
== l0_pos_end
) {
212 auto d0
= bits_per_slotset
;
213 uint64_t l1_w
= CHILD_PER_SLOT
;
214 // this should be aligned with slotset boundaries
215 ceph_assert(0 == (l0_pos
% d0
));
216 ceph_assert(0 == (l0_pos_end
% d0
));
218 int64_t idx
= l0_pos
/ bits_per_slot
;
219 int64_t idx_end
= l0_pos_end
/ bits_per_slot
;
220 slot_t mask_to_apply
= L1_ENTRY_NOT_USED
;
222 auto l1_pos
= l0_pos
/ d0
;
224 while (idx
< idx_end
) {
225 if (l0
[idx
] == all_slot_clear
) {
226 // if not all prev slots are allocated then no need to check the
227 // current slot set, it's partial
229 if (mask_to_apply
== L1_ENTRY_NOT_USED
) {
230 mask_to_apply
= L1_ENTRY_FULL
;
231 } else if (mask_to_apply
!= L1_ENTRY_FULL
) {
232 idx
= p2roundup(idx
, int64_t(slotset_width
));
233 mask_to_apply
= L1_ENTRY_PARTIAL
;
235 } else if (l0
[idx
] == all_slot_set
) {
236 // if not all prev slots are free then no need to check the
237 // current slot set, it's partial
239 if (mask_to_apply
== L1_ENTRY_NOT_USED
) {
240 mask_to_apply
= L1_ENTRY_FREE
;
241 } else if (mask_to_apply
!= L1_ENTRY_FREE
) {
242 idx
= p2roundup(idx
, int64_t(slotset_width
));
243 mask_to_apply
= L1_ENTRY_PARTIAL
;
246 // no need to check the current slot set, it's partial
247 mask_to_apply
= L1_ENTRY_PARTIAL
;
249 idx
= p2roundup(idx
, int64_t(slotset_width
));
251 if ((idx
% slotset_width
) == 0) {
252 ceph_assert(mask_to_apply
!= L1_ENTRY_NOT_USED
);
253 uint64_t shift
= (l1_pos
% l1_w
) * L1_ENTRY_WIDTH
;
254 slot_t
& slot_val
= l1
[l1_pos
/ l1_w
];
255 auto mask
= slot_t(L1_ENTRY_MASK
) << shift
;
257 slot_t old_mask
= (slot_val
& mask
) >> shift
;
262 case L1_ENTRY_PARTIAL
:
267 slot_val
|= slot_t(mask_to_apply
) << shift
;
268 switch(mask_to_apply
) {
272 case L1_ENTRY_PARTIAL
:
276 mask_to_apply
= L1_ENTRY_NOT_USED
;
282 void AllocatorLevel01Loose::_mark_alloc_l0(int64_t l0_pos_start
,
285 auto d0
= CHILD_PER_SLOT_L0
;
287 int64_t pos
= l0_pos_start
;
288 slot_t bits
= (slot_t
)1 << (l0_pos_start
% d0
);
290 while (pos
< std::min(l0_pos_end
, p2roundup
<int64_t>(l0_pos_start
, d0
))) {
291 l0
[pos
/ d0
] &= ~bits
;
296 while (pos
< std::min(l0_pos_end
, p2align
<int64_t>(l0_pos_end
, d0
))) {
297 l0
[pos
/ d0
] = all_slot_clear
;
301 while (pos
< l0_pos_end
) {
302 l0
[pos
/ d0
] &= ~bits
;
308 interval_t
AllocatorLevel01Loose::_allocate_l1_contiguous(uint64_t length
,
309 uint64_t min_length
, uint64_t max_length
,
310 uint64_t pos_start
, uint64_t pos_end
)
312 interval_t res
= { 0, 0 };
313 uint64_t l0_w
= slotset_width
* CHILD_PER_SLOT_L0
;
315 if (unlikely(length
<= l0_granularity
)) {
317 _analyze_partials(pos_start
, pos_end
, l0_granularity
, l0_granularity
,
318 STOP_ON_PARTIAL
, &ctx
);
320 // check partially free slot sets first (including neighboring),
321 // full length match required.
322 if (ctx
.affordable_len
) {
323 // allocate as specified
324 ceph_assert(ctx
.affordable_len
>= length
);
325 auto pos
= ctx
.affordable_offs
/ l0_granularity
;
326 _mark_alloc_l1_l0(pos
, pos
+ 1);
327 res
= interval_t(ctx
.affordable_offs
, length
);
331 // allocate from free slot sets
332 if (ctx
.free_count
) {
333 auto l
= std::min(length
, ctx
.free_count
* l1_granularity
);
334 ceph_assert((l
% l0_granularity
) == 0);
335 auto pos_end
= ctx
.free_l1_pos
* l0_w
+ l
/ l0_granularity
;
337 _mark_alloc_l1_l0(ctx
.free_l1_pos
* l0_w
, pos_end
);
338 res
= interval_t(ctx
.free_l1_pos
* l1_granularity
, l
);
341 } else if (unlikely(length
== l1_granularity
)) {
343 _analyze_partials(pos_start
, pos_end
, length
, min_length
, STOP_ON_EMPTY
, &ctx
);
345 // allocate using contiguous extent found at l1 if any
346 if (ctx
.free_count
) {
348 auto l
= std::min(length
, ctx
.free_count
* l1_granularity
);
349 ceph_assert((l
% l0_granularity
) == 0);
350 auto pos_end
= ctx
.free_l1_pos
* l0_w
+ l
/ l0_granularity
;
352 _mark_alloc_l1_l0(ctx
.free_l1_pos
* l0_w
, pos_end
);
353 res
= interval_t(ctx
.free_l1_pos
* l1_granularity
, l
);
358 // we can terminate earlier on free entry only
359 ceph_assert(ctx
.fully_processed
);
361 // check partially free slot sets first (including neighboring),
362 // full length match required.
363 if (ctx
.affordable_len
) {
364 ceph_assert(ctx
.affordable_len
>= length
);
365 ceph_assert((length
% l0_granularity
) == 0);
366 auto pos_start
= ctx
.affordable_offs
+ length
/ l0_granularity
;
367 auto pos_end
= (ctx
.affordable_offs
+ length
) / l0_granularity
;
368 _mark_alloc_l1_l0(pos_start
, pos_end
);
369 res
= interval_t(ctx
.affordable_offs
, length
);
372 if (ctx
.min_affordable_len
) {
373 auto pos_start
= ctx
.min_affordable_offs
/ l0_granularity
;
374 auto pos_end
= (ctx
.min_affordable_offs
+ ctx
.min_affordable_len
) / l0_granularity
;
375 _mark_alloc_l1_l0(pos_start
, pos_end
);
376 return interval_t(ctx
.min_affordable_offs
, ctx
.min_affordable_len
);
380 _analyze_partials(pos_start
, pos_end
, length
, min_length
, NO_STOP
, &ctx
);
381 ceph_assert(ctx
.fully_processed
);
382 // check partially free slot sets first (including neighboring),
383 // full length match required.
384 if (ctx
.affordable_len
) {
385 ceph_assert(ctx
.affordable_len
>= length
);
386 ceph_assert((length
% l0_granularity
) == 0);
387 auto pos_start
= ctx
.affordable_offs
/ l0_granularity
;
388 auto pos_end
= (ctx
.affordable_offs
+ length
) / l0_granularity
;
389 _mark_alloc_l1_l0(pos_start
, pos_end
);
390 res
= interval_t(ctx
.affordable_offs
, length
);
393 // allocate using contiguous extent found at l1 if affordable
394 // align allocated extent with min_length
395 if (ctx
.free_count
) {
396 auto o
= ctx
.free_l1_pos
* l1_granularity
;
397 auto l
= ctx
.free_count
* l1_granularity
;
398 interval_t aligned_extent
= _align2units(o
, l
, min_length
);
399 if (aligned_extent
.length
> 0) {
400 aligned_extent
.length
= std::min(length
,
401 uint64_t(aligned_extent
.length
));
402 ceph_assert((aligned_extent
.offset
% l0_granularity
) == 0);
403 ceph_assert((aligned_extent
.length
% l0_granularity
) == 0);
405 auto pos_start
= aligned_extent
.offset
/ l0_granularity
;
406 auto pos_end
= (aligned_extent
.offset
+ aligned_extent
.length
) / l0_granularity
;
408 _mark_alloc_l1_l0(pos_start
, pos_end
);
409 return aligned_extent
;
412 if (ctx
.min_affordable_len
) {
413 auto pos_start
= ctx
.min_affordable_offs
/ l0_granularity
;
414 auto pos_end
= (ctx
.min_affordable_offs
+ ctx
.min_affordable_len
) / l0_granularity
;
415 _mark_alloc_l1_l0(pos_start
, pos_end
);
416 return interval_t(ctx
.min_affordable_offs
, ctx
.min_affordable_len
);
422 bool AllocatorLevel01Loose::_allocate_l1(uint64_t length
,
423 uint64_t min_length
, uint64_t max_length
,
424 uint64_t l1_pos_start
, uint64_t l1_pos_end
,
426 interval_vector_t
* res
)
428 uint64_t d0
= CHILD_PER_SLOT_L0
;
429 uint64_t d1
= CHILD_PER_SLOT
;
431 ceph_assert(0 == (l1_pos_start
% (slotset_width
* d1
)));
432 ceph_assert(0 == (l1_pos_end
% (slotset_width
* d1
)));
433 if (min_length
!= l0_granularity
) {
434 // probably not the most effecient way but
435 // don't care much about that at the moment
436 bool has_space
= true;
437 while (length
> *allocated
&& has_space
) {
439 _allocate_l1_contiguous(length
- *allocated
, min_length
, max_length
,
440 l1_pos_start
, l1_pos_end
);
444 _fragment_and_emplace(max_length
, i
.offset
, i
.length
, res
);
445 *allocated
+= i
.length
;
449 uint64_t l0_w
= slotset_width
* d0
;
451 for (auto idx
= l1_pos_start
/ d1
;
452 idx
< l1_pos_end
/ d1
&& length
> *allocated
;
454 slot_t
& slot_val
= l1
[idx
];
455 if (slot_val
== all_slot_clear
) {
457 } else if (slot_val
== all_slot_set
) {
458 uint64_t to_alloc
= std::min(length
- *allocated
,
459 l1_granularity
* d1
);
460 *allocated
+= to_alloc
;
461 ++alloc_fragments_fast
;
462 _fragment_and_emplace(max_length
, idx
* d1
* l1_granularity
, to_alloc
,
464 _mark_alloc_l1_l0(idx
* d1
* bits_per_slotset
,
465 idx
* d1
* bits_per_slotset
+ to_alloc
/ l0_granularity
);
468 auto free_pos
= find_next_set_bit(slot_val
, 0);
469 ceph_assert(free_pos
< bits_per_slot
);
471 ceph_assert(length
> *allocated
);
474 empty
= _allocate_l0(length
, max_length
,
475 (idx
* d1
+ free_pos
/ L1_ENTRY_WIDTH
) * l0_w
,
476 (idx
* d1
+ free_pos
/ L1_ENTRY_WIDTH
+ 1) * l0_w
,
480 auto mask
= slot_t(L1_ENTRY_MASK
) << free_pos
;
482 slot_t old_mask
= (slot_val
& mask
) >> free_pos
;
487 case L1_ENTRY_PARTIAL
:
493 // the next line is no op with the current L1_ENTRY_FULL but left
494 // as-is for the sake of uniformity and to avoid potential errors
496 slot_val
|= slot_t(L1_ENTRY_FULL
) << free_pos
;
498 slot_val
|= slot_t(L1_ENTRY_PARTIAL
) << free_pos
;
501 if (length
<= *allocated
|| slot_val
== all_slot_clear
) {
504 free_pos
= find_next_set_bit(slot_val
, free_pos
+ L1_ENTRY_WIDTH
);
505 } while (free_pos
< bits_per_slot
);
508 return _is_empty_l1(l1_pos_start
, l1_pos_end
);
511 void AllocatorLevel01Loose::collect_stats(
512 std::map
<size_t, size_t>& bins_overall
)
514 size_t free_seq_cnt
= 0;
515 for (auto slot
: l0
) {
516 if (slot
== all_slot_set
) {
517 free_seq_cnt
+= CHILD_PER_SLOT_L0
;
518 } else if(slot
!= all_slot_clear
) {
521 auto pos1
= find_next_set_bit(slot
, pos
);
527 bins_overall
[cbits(free_seq_cnt
) - 1]++;
530 if (pos1
< bits_per_slot
) {
535 } while (pos
< bits_per_slot
);
536 } else if (free_seq_cnt
) {
537 bins_overall
[cbits(free_seq_cnt
) - 1]++;
542 bins_overall
[cbits(free_seq_cnt
) - 1]++;