]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/fastbmap_allocator_impl.h
import 15.2.4
[ceph.git] / ceph / src / os / bluestore / fastbmap_allocator_impl.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Bitmap based in-memory allocator implementation.
5 * Author: Igor Fedotov, ifedotov@suse.com
6 *
7 */
8
9 #ifndef __FAST_BITMAP_ALLOCATOR_IMPL_H
10 #define __FAST_BITMAP_ALLOCATOR_IMPL_H
11 #include "include/intarith.h"
12
13 #include <vector>
14 #include <algorithm>
15 #include <mutex>
16
17 typedef uint64_t slot_t;
18
19 #ifdef NON_CEPH_BUILD
20 #include <assert.h>
21 struct interval_t
22 {
23 uint64_t offset = 0;
24 uint64_t length = 0;
25
26 interval_t() {}
27 interval_t(uint64_t o, uint64_t l) : offset(o), length(l) {}
28 interval_t(const interval_t &ext) :
29 offset(ext.offset), length(ext.length) {}
30 };
31 typedef std::vector<interval_t> interval_vector_t;
32 typedef std::vector<slot_t> slot_vector_t;
33 #else
34 #include "include/ceph_assert.h"
35 #include "common/likely.h"
36 #include "os/bluestore/bluestore_types.h"
37 #include "include/mempool.h"
38 #include "common/ceph_mutex.h"
39
40 typedef bluestore_interval_t<uint64_t, uint64_t> interval_t;
41 typedef PExtentVector interval_vector_t;
42
43 typedef mempool::bluestore_alloc::vector<slot_t> slot_vector_t;
44
45 #endif
46
47 // fitting into cache line on x86_64
48 static const size_t slots_per_slotset = 8; // 8 slots per set
49 static const size_t slotset_bytes = sizeof(slot_t) * slots_per_slotset;
50 static const size_t bits_per_slot = sizeof(slot_t) * 8;
51 static const size_t bits_per_slotset = slotset_bytes * 8;
52 static const slot_t all_slot_set = 0xffffffffffffffff;
53 static const slot_t all_slot_clear = 0;
54
55 inline size_t find_next_set_bit(slot_t slot_val, size_t start_pos)
56 {
57 #ifdef __GNUC__
58 if (start_pos == 0) {
59 start_pos = __builtin_ffsll(slot_val);
60 return start_pos ? start_pos - 1 : bits_per_slot;
61 }
62 #endif
63 slot_t mask = slot_t(1) << start_pos;
64 while (start_pos < bits_per_slot && !(slot_val & mask)) {
65 mask <<= 1;
66 ++start_pos;
67 }
68 return start_pos;
69 }
70
71
72 class AllocatorLevel
73 {
74 protected:
75
76 virtual uint64_t _children_per_slot() const = 0;
77 virtual uint64_t _level_granularity() const = 0;
78
79 public:
80 static uint64_t l0_dives;
81 static uint64_t l0_iterations;
82 static uint64_t l0_inner_iterations;
83 static uint64_t alloc_fragments;
84 static uint64_t alloc_fragments_fast;
85 static uint64_t l2_allocs;
86
87 virtual ~AllocatorLevel()
88 {}
89
90 virtual void collect_stats(
91 std::map<size_t, size_t>& bins_overall) = 0;
92
93 };
94
95 class AllocatorLevel01 : public AllocatorLevel
96 {
97 protected:
98 slot_vector_t l0; // set bit means free entry
99 slot_vector_t l1;
100 uint64_t l0_granularity = 0; // space per entry
101 uint64_t l1_granularity = 0; // space per entry
102
103 size_t partial_l1_count = 0;
104 size_t unalloc_l1_count = 0;
105
106 double get_fragmentation() const {
107 double res = 0.0;
108 auto total = unalloc_l1_count + partial_l1_count;
109 if (total) {
110 res = double(partial_l1_count) / double(total);
111 }
112 return res;
113 }
114
115 uint64_t _level_granularity() const override
116 {
117 return l1_granularity;
118 }
119
120 inline bool _is_slot_fully_allocated(uint64_t idx) const {
121 return l1[idx] == all_slot_clear;
122 }
123 public:
124 inline uint64_t get_min_alloc_size() const
125 {
126 return l0_granularity;
127 }
128
129 };
130
131 template <class T>
132 class AllocatorLevel02;
133
134 class AllocatorLevel01Loose : public AllocatorLevel01
135 {
136 enum {
137 L1_ENTRY_WIDTH = 2,
138 L1_ENTRY_MASK = (1 << L1_ENTRY_WIDTH) - 1,
139 L1_ENTRY_FULL = 0x00,
140 L1_ENTRY_PARTIAL = 0x01,
141 L1_ENTRY_NOT_USED = 0x02,
142 L1_ENTRY_FREE = 0x03,
143 L1_ENTRIES_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, //32
144 L0_ENTRIES_PER_SLOT = bits_per_slot, // 64
145 };
146 uint64_t _children_per_slot() const override
147 {
148 return L1_ENTRIES_PER_SLOT;
149 }
150
151 interval_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1,
152 uint64_t min_length, interval_t* tail) const;
153
154 inline void _fragment_and_emplace(uint64_t max_length, uint64_t offset,
155 uint64_t len,
156 interval_vector_t* res)
157 {
158 auto it = res->rbegin();
159 if (max_length) {
160 if (it != res->rend() && it->offset + it->length == offset) {
161 auto l = max_length - it->length;
162 if (l >= len) {
163 it->length += len;
164 return;
165 } else {
166 offset += l;
167 len -= l;
168 it->length += l;
169 }
170 }
171
172 while (len > max_length) {
173 res->emplace_back(offset, max_length);
174 offset += max_length;
175 len -= max_length;
176 }
177 res->emplace_back(offset, len);
178 return;
179 }
180
181 if (it != res->rend() && it->offset + it->length == offset) {
182 it->length += len;
183 } else {
184 res->emplace_back(offset, len);
185 }
186 }
187
188 bool _allocate_l0(uint64_t length,
189 uint64_t max_length,
190 uint64_t l0_pos0, uint64_t l0_pos1,
191 uint64_t* allocated,
192 interval_vector_t* res)
193 {
194 uint64_t d0 = L0_ENTRIES_PER_SLOT;
195
196 ++l0_dives;
197
198 ceph_assert(l0_pos0 < l0_pos1);
199 ceph_assert(length > *allocated);
200 ceph_assert(0 == (l0_pos0 % (slots_per_slotset * d0)));
201 ceph_assert(0 == (l0_pos1 % (slots_per_slotset * d0)));
202 ceph_assert(((length - *allocated) % l0_granularity) == 0);
203
204 uint64_t need_entries = (length - *allocated) / l0_granularity;
205
206 for (auto idx = l0_pos0 / d0; (idx < l0_pos1 / d0) && (length > *allocated);
207 ++idx) {
208 ++l0_iterations;
209 slot_t& slot_val = l0[idx];
210 auto base = idx * d0;
211 if (slot_val == all_slot_clear) {
212 continue;
213 } else if (slot_val == all_slot_set) {
214 uint64_t to_alloc = std::min(need_entries, d0);
215 *allocated += to_alloc * l0_granularity;
216 ++alloc_fragments;
217 need_entries -= to_alloc;
218
219 _fragment_and_emplace(max_length, base * l0_granularity,
220 to_alloc * l0_granularity, res);
221
222 if (to_alloc == d0) {
223 slot_val = all_slot_clear;
224 } else {
225 _mark_alloc_l0(base, base + to_alloc);
226 }
227 continue;
228 }
229
230 auto free_pos = find_next_set_bit(slot_val, 0);
231 ceph_assert(free_pos < bits_per_slot);
232 auto next_pos = free_pos + 1;
233 while (next_pos < bits_per_slot &&
234 (next_pos - free_pos) < need_entries) {
235 ++l0_inner_iterations;
236
237 if (0 == (slot_val & (slot_t(1) << next_pos))) {
238 auto to_alloc = (next_pos - free_pos);
239 *allocated += to_alloc * l0_granularity;
240 ++alloc_fragments;
241 need_entries -= to_alloc;
242 _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity,
243 to_alloc * l0_granularity, res);
244 _mark_alloc_l0(base + free_pos, base + next_pos);
245 free_pos = find_next_set_bit(slot_val, next_pos + 1);
246 next_pos = free_pos + 1;
247 } else {
248 ++next_pos;
249 }
250 }
251 if (need_entries && free_pos < bits_per_slot) {
252 auto to_alloc = std::min(need_entries, d0 - free_pos);
253 *allocated += to_alloc * l0_granularity;
254 ++alloc_fragments;
255 need_entries -= to_alloc;
256 _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity,
257 to_alloc * l0_granularity, res);
258 _mark_alloc_l0(base + free_pos, base + free_pos + to_alloc);
259 }
260 }
261 return _is_empty_l0(l0_pos0, l0_pos1);
262 }
263
264 protected:
265
266 friend class AllocatorLevel02<AllocatorLevel01Loose>;
267
268 void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true)
269 {
270 l0_granularity = _alloc_unit;
271 // 512 bits at L0 mapped to L1 entry
272 l1_granularity = l0_granularity * bits_per_slotset;
273
274 // capacity to have slot alignment at l1
275 auto aligned_capacity =
276 p2roundup((int64_t)capacity,
277 int64_t(l1_granularity * slots_per_slotset * _children_per_slot()));
278 size_t slot_count =
279 aligned_capacity / l1_granularity / _children_per_slot();
280 // we use set bit(s) as a marker for (partially) free entry
281 l1.resize(slot_count, mark_as_free ? all_slot_set : all_slot_clear);
282
283 // l0 slot count
284 size_t slot_count_l0 = aligned_capacity / _alloc_unit / bits_per_slot;
285 // we use set bit(s) as a marker for (partially) free entry
286 l0.resize(slot_count_l0, mark_as_free ? all_slot_set : all_slot_clear);
287
288 partial_l1_count = unalloc_l1_count = 0;
289 if (mark_as_free) {
290 unalloc_l1_count = slot_count * _children_per_slot();
291 auto l0_pos_no_use = p2roundup((int64_t)capacity, (int64_t)l0_granularity) / l0_granularity;
292 _mark_alloc_l1_l0(l0_pos_no_use, aligned_capacity / l0_granularity);
293 }
294 }
295
296 struct search_ctx_t
297 {
298 size_t partial_count = 0;
299 size_t free_count = 0;
300 uint64_t free_l1_pos = 0;
301
302 uint64_t min_affordable_len = 0;
303 uint64_t min_affordable_offs = 0;
304 uint64_t affordable_len = 0;
305 uint64_t affordable_offs = 0;
306
307 bool fully_processed = false;
308
309 void reset()
310 {
311 *this = search_ctx_t();
312 }
313 };
314 enum {
315 NO_STOP,
316 STOP_ON_EMPTY,
317 STOP_ON_PARTIAL,
318 };
319 void _analyze_partials(uint64_t pos_start, uint64_t pos_end,
320 uint64_t length, uint64_t min_length, int mode,
321 search_ctx_t* ctx);
322
323 void _mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end);
324 void _mark_alloc_l0(int64_t l0_pos_start, int64_t l0_pos_end);
325 uint64_t _claim_free_to_left_l0(int64_t l0_pos_start);
326 uint64_t _claim_free_to_right_l0(int64_t l0_pos_start);
327
328
329 void _mark_alloc_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end)
330 {
331 _mark_alloc_l0(l0_pos_start, l0_pos_end);
332 l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset));
333 l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset));
334 _mark_l1_on_l0(l0_pos_start, l0_pos_end);
335 }
336
337 void _mark_free_l0(int64_t l0_pos_start, int64_t l0_pos_end)
338 {
339 auto d0 = L0_ENTRIES_PER_SLOT;
340
341 auto pos = l0_pos_start;
342 slot_t bits = (slot_t)1 << (l0_pos_start % d0);
343 slot_t* val_s = &l0[pos / d0];
344 int64_t pos_e = std::min(l0_pos_end,
345 p2roundup<int64_t>(l0_pos_start + 1, d0));
346 while (pos < pos_e) {
347 *val_s |= bits;
348 bits <<= 1;
349 pos++;
350 }
351 pos_e = std::min(l0_pos_end, p2align<int64_t>(l0_pos_end, d0));
352 while (pos < pos_e) {
353 *(++val_s) = all_slot_set;
354 pos += d0;
355 }
356 bits = 1;
357 ++val_s;
358 while (pos < l0_pos_end) {
359 *val_s |= bits;
360 bits <<= 1;
361 pos++;
362 }
363 }
364
365 void _mark_free_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end)
366 {
367 _mark_free_l0(l0_pos_start, l0_pos_end);
368 l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset));
369 l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset));
370 _mark_l1_on_l0(l0_pos_start, l0_pos_end);
371 }
372
373 bool _is_empty_l0(uint64_t l0_pos, uint64_t l0_pos_end)
374 {
375 bool no_free = true;
376 uint64_t d = slots_per_slotset * L0_ENTRIES_PER_SLOT;
377 ceph_assert(0 == (l0_pos % d));
378 ceph_assert(0 == (l0_pos_end % d));
379
380 auto idx = l0_pos / L0_ENTRIES_PER_SLOT;
381 auto idx_end = l0_pos_end / L0_ENTRIES_PER_SLOT;
382 while (idx < idx_end && no_free) {
383 no_free = l0[idx] == all_slot_clear;
384 ++idx;
385 }
386 return no_free;
387 }
388 bool _is_empty_l1(uint64_t l1_pos, uint64_t l1_pos_end)
389 {
390 bool no_free = true;
391 uint64_t d = slots_per_slotset * _children_per_slot();
392 ceph_assert(0 == (l1_pos % d));
393 ceph_assert(0 == (l1_pos_end % d));
394
395 auto idx = l1_pos / L1_ENTRIES_PER_SLOT;
396 auto idx_end = l1_pos_end / L1_ENTRIES_PER_SLOT;
397 while (idx < idx_end && no_free) {
398 no_free = _is_slot_fully_allocated(idx);
399 ++idx;
400 }
401 return no_free;
402 }
403
404 interval_t _allocate_l1_contiguous(uint64_t length,
405 uint64_t min_length, uint64_t max_length,
406 uint64_t pos_start, uint64_t pos_end);
407
408 bool _allocate_l1(uint64_t length,
409 uint64_t min_length, uint64_t max_length,
410 uint64_t l1_pos_start, uint64_t l1_pos_end,
411 uint64_t* allocated,
412 interval_vector_t* res);
413
414 uint64_t _mark_alloc_l1(uint64_t offset, uint64_t length)
415 {
416 uint64_t l0_pos_start = offset / l0_granularity;
417 uint64_t l0_pos_end = p2roundup(offset + length, l0_granularity) / l0_granularity;
418 _mark_alloc_l1_l0(l0_pos_start, l0_pos_end);
419 return l0_granularity * (l0_pos_end - l0_pos_start);
420 }
421
422 uint64_t _free_l1(uint64_t offs, uint64_t len)
423 {
424 uint64_t l0_pos_start = offs / l0_granularity;
425 uint64_t l0_pos_end = p2roundup(offs + len, l0_granularity) / l0_granularity;
426 _mark_free_l1_l0(l0_pos_start, l0_pos_end);
427 return l0_granularity * (l0_pos_end - l0_pos_start);
428 }
429
430 uint64_t claim_free_to_left_l1(uint64_t offs)
431 {
432 uint64_t l0_pos_end = offs / l0_granularity;
433 uint64_t l0_pos_start = _claim_free_to_left_l0(l0_pos_end);
434 if (l0_pos_start < l0_pos_end) {
435 _mark_l1_on_l0(
436 p2align(l0_pos_start, uint64_t(bits_per_slotset)),
437 p2roundup(l0_pos_end, uint64_t(bits_per_slotset)));
438 return l0_granularity * (l0_pos_end - l0_pos_start);
439 }
440 return 0;
441 }
442
443 uint64_t claim_free_to_right_l1(uint64_t offs)
444 {
445 uint64_t l0_pos_start = offs / l0_granularity;
446 uint64_t l0_pos_end = _claim_free_to_right_l0(l0_pos_start);
447
448 if (l0_pos_start < l0_pos_end) {
449 _mark_l1_on_l0(
450 p2align(l0_pos_start, uint64_t(bits_per_slotset)),
451 p2roundup(l0_pos_end, uint64_t(bits_per_slotset)));
452 return l0_granularity * (l0_pos_end - l0_pos_start);
453 }
454 return 0;
455 }
456
457
458 public:
459 uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0)
460 {
461 if (pos1 == 0) {
462 pos1 = l1.size() * L1_ENTRIES_PER_SLOT;
463 }
464 auto avail = debug_get_free(pos0, pos1);
465 return (pos1 - pos0) * l1_granularity - avail;
466 }
467
468 uint64_t debug_get_free(uint64_t l1_pos0 = 0, uint64_t l1_pos1 = 0)
469 {
470 ceph_assert(0 == (l1_pos0 % L1_ENTRIES_PER_SLOT));
471 ceph_assert(0 == (l1_pos1 % L1_ENTRIES_PER_SLOT));
472
473 auto idx0 = l1_pos0 * slots_per_slotset;
474 auto idx1 = l1_pos1 * slots_per_slotset;
475
476 if (idx1 == 0) {
477 idx1 = l0.size();
478 }
479
480 uint64_t res = 0;
481 for (uint64_t i = idx0; i < idx1; ++i) {
482 auto v = l0[i];
483 if (v == all_slot_set) {
484 res += L0_ENTRIES_PER_SLOT;
485 } else if (v != all_slot_clear) {
486 size_t cnt = 0;
487 #ifdef __GNUC__
488 cnt = __builtin_popcountll(v);
489 #else
490 // Kernighan's Alg to count set bits
491 while (v) {
492 v &= (v - 1);
493 cnt++;
494 }
495 #endif
496 res += cnt;
497 }
498 }
499 return res * l0_granularity;
500 }
501 void collect_stats(
502 std::map<size_t, size_t>& bins_overall) override;
503
504 static inline ssize_t count_0s(slot_t slot_val, size_t start_pos);
505 static inline ssize_t count_1s(slot_t slot_val, size_t start_pos);
506 void dump(std::function<void(uint64_t offset, uint64_t length)> notify);
507 };
508
509
510 class AllocatorLevel01Compact : public AllocatorLevel01
511 {
512 uint64_t _children_per_slot() const override
513 {
514 return 8;
515 }
516 public:
517 void collect_stats(
518 std::map<size_t, size_t>& bins_overall) override
519 {
520 // not implemented
521 }
522 };
523
524 template <class L1>
525 class AllocatorLevel02 : public AllocatorLevel
526 {
527 public:
528 uint64_t debug_get_free(uint64_t pos0 = 0, uint64_t pos1 = 0)
529 {
530 std::lock_guard l(lock);
531 return l1.debug_get_free(pos0 * l1._children_per_slot() * bits_per_slot,
532 pos1 * l1._children_per_slot() * bits_per_slot);
533 }
534 uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0)
535 {
536 std::lock_guard l(lock);
537 return l1.debug_get_allocated(pos0 * l1._children_per_slot() * bits_per_slot,
538 pos1 * l1._children_per_slot() * bits_per_slot);
539 }
540
541 uint64_t get_available()
542 {
543 std::lock_guard l(lock);
544 return available;
545 }
546 inline uint64_t get_min_alloc_size() const
547 {
548 return l1.get_min_alloc_size();
549 }
550 void collect_stats(
551 std::map<size_t, size_t>& bins_overall) override {
552
553 std::lock_guard l(lock);
554 l1.collect_stats(bins_overall);
555 }
556 uint64_t claim_free_to_left(uint64_t offset) {
557 std::lock_guard l(lock);
558 auto allocated = l1.claim_free_to_left_l1(offset);
559 ceph_assert(available >= allocated);
560 available -= allocated;
561
562 uint64_t l2_pos = (offset - allocated) / l2_granularity;
563 uint64_t l2_pos_end =
564 p2roundup(int64_t(offset), int64_t(l2_granularity)) / l2_granularity;
565 _mark_l2_on_l1(l2_pos, l2_pos_end);
566 return allocated;
567 }
568
569 uint64_t claim_free_to_right(uint64_t offset) {
570 std::lock_guard l(lock);
571 auto allocated = l1.claim_free_to_right_l1(offset);
572 ceph_assert(available >= allocated);
573 available -= allocated;
574
575 uint64_t l2_pos = (offset) / l2_granularity;
576 int64_t end = offset + allocated;
577 uint64_t l2_pos_end = p2roundup(end, int64_t(l2_granularity)) / l2_granularity;
578 _mark_l2_on_l1(l2_pos, l2_pos_end);
579 return allocated;
580 }
581 protected:
582 ceph::mutex lock = ceph::make_mutex("AllocatorLevel02::lock");
583 L1 l1;
584 slot_vector_t l2;
585 uint64_t l2_granularity = 0; // space per entry
586 uint64_t available = 0;
587 uint64_t last_pos = 0;
588
589 enum {
590 L1_ENTRIES_PER_SLOT = bits_per_slot, // 64
591 };
592
593 uint64_t _children_per_slot() const override
594 {
595 return L1_ENTRIES_PER_SLOT;
596 }
597 uint64_t _level_granularity() const override
598 {
599 return l2_granularity;
600 }
601
602 void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true)
603 {
604 ceph_assert(isp2(_alloc_unit));
605 l1._init(capacity, _alloc_unit, mark_as_free);
606
607 l2_granularity =
608 l1._level_granularity() * l1._children_per_slot() * slots_per_slotset;
609
610 // capacity to have slot alignment at l2
611 auto aligned_capacity =
612 p2roundup((int64_t)capacity, (int64_t)l2_granularity * L1_ENTRIES_PER_SLOT);
613 size_t elem_count = aligned_capacity / l2_granularity / L1_ENTRIES_PER_SLOT;
614 // we use set bit(s) as a marker for (partially) free entry
615 l2.resize(elem_count, mark_as_free ? all_slot_set : all_slot_clear);
616
617 if (mark_as_free) {
618 // capacity to have slotset alignment at l1
619 auto l2_pos_no_use =
620 p2roundup((int64_t)capacity, (int64_t)l2_granularity) / l2_granularity;
621 _mark_l2_allocated(l2_pos_no_use, aligned_capacity / l2_granularity);
622 available = p2align(capacity, _alloc_unit);
623 } else {
624 available = 0;
625 }
626 }
627
628 void _mark_l2_allocated(int64_t l2_pos, int64_t l2_pos_end)
629 {
630 auto d = L1_ENTRIES_PER_SLOT;
631 ceph_assert(0 <= l2_pos_end);
632 ceph_assert((int64_t)l2.size() >= (l2_pos_end / d));
633
634 while (l2_pos < l2_pos_end) {
635 l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d));
636 ++l2_pos;
637 }
638 }
639
640 void _mark_l2_free(int64_t l2_pos, int64_t l2_pos_end)
641 {
642 auto d = L1_ENTRIES_PER_SLOT;
643 ceph_assert(0 <= l2_pos_end);
644 ceph_assert((int64_t)l2.size() >= (l2_pos_end / d));
645
646 while (l2_pos < l2_pos_end) {
647 l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d));
648 ++l2_pos;
649 }
650 }
651
652 void _mark_l2_on_l1(int64_t l2_pos, int64_t l2_pos_end)
653 {
654 auto d = L1_ENTRIES_PER_SLOT;
655 ceph_assert(0 <= l2_pos_end);
656 ceph_assert((int64_t)l2.size() >= (l2_pos_end / d));
657
658 auto idx = l2_pos * slots_per_slotset;
659 auto idx_end = l2_pos_end * slots_per_slotset;
660 bool all_allocated = true;
661 while (idx < idx_end) {
662 if (!l1._is_slot_fully_allocated(idx)) {
663 all_allocated = false;
664 idx = p2roundup(int64_t(++idx), int64_t(slots_per_slotset));
665 }
666 else {
667 ++idx;
668 }
669 if ((idx % slots_per_slotset) == 0) {
670 if (all_allocated) {
671 l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d));
672 }
673 else {
674 l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d));
675 }
676 all_allocated = true;
677 ++l2_pos;
678 }
679 }
680 }
681
682 void _allocate_l2(uint64_t length,
683 uint64_t min_length,
684 uint64_t max_length,
685 uint64_t hint,
686
687 uint64_t* allocated,
688 interval_vector_t* res)
689 {
690 uint64_t prev_allocated = *allocated;
691 uint64_t d = L1_ENTRIES_PER_SLOT;
692 ceph_assert(min_length <= l2_granularity);
693 ceph_assert(max_length == 0 || max_length >= min_length);
694 ceph_assert(max_length == 0 || (max_length % min_length) == 0);
695 ceph_assert(length >= min_length);
696 ceph_assert((length % min_length) == 0);
697
698 uint64_t cap = 1ull << 31;
699 if (max_length == 0 || max_length >= cap) {
700 max_length = cap;
701 }
702
703 uint64_t l1_w = slots_per_slotset * l1._children_per_slot();
704
705 std::lock_guard l(lock);
706
707 if (available < min_length) {
708 return;
709 }
710 if (hint != 0) {
711 last_pos = (hint / d) < l2.size() ? p2align(hint, d) : 0;
712 }
713 auto l2_pos = last_pos;
714 auto last_pos0 = last_pos;
715 auto pos = last_pos / d;
716 auto pos_end = l2.size();
717 // outer loop below is intended to optimize the performance by
718 // avoiding 'modulo' operations inside the internal loop.
719 // Looks like they have negative impact on the performance
720 for (auto i = 0; i < 2; ++i) {
721 for(; length > *allocated && pos < pos_end; ++pos) {
722 slot_t& slot_val = l2[pos];
723 size_t free_pos = 0;
724 bool all_set = false;
725 if (slot_val == all_slot_clear) {
726 l2_pos += d;
727 last_pos = l2_pos;
728 continue;
729 } else if (slot_val == all_slot_set) {
730 free_pos = 0;
731 all_set = true;
732 } else {
733 free_pos = find_next_set_bit(slot_val, 0);
734 ceph_assert(free_pos < bits_per_slot);
735 }
736 do {
737 ceph_assert(length > *allocated);
738 bool empty = l1._allocate_l1(length,
739 min_length,
740 max_length,
741 (l2_pos + free_pos) * l1_w,
742 (l2_pos + free_pos + 1) * l1_w,
743 allocated,
744 res);
745 if (empty) {
746 slot_val &= ~(slot_t(1) << free_pos);
747 }
748 if (length <= *allocated || slot_val == all_slot_clear) {
749 break;
750 }
751 ++free_pos;
752 if (!all_set) {
753 free_pos = find_next_set_bit(slot_val, free_pos);
754 }
755 } while (free_pos < bits_per_slot);
756 last_pos = l2_pos;
757 l2_pos += d;
758 }
759 l2_pos = 0;
760 pos = 0;
761 pos_end = last_pos0 / d;
762 }
763
764 ++l2_allocs;
765 auto allocated_here = *allocated - prev_allocated;
766 ceph_assert(available >= allocated_here);
767 available -= allocated_here;
768 }
769
770 #ifndef NON_CEPH_BUILD
771 // to provide compatibility with BlueStore's allocator interface
772 void _free_l2(const interval_set<uint64_t> & rr)
773 {
774 uint64_t released = 0;
775 std::lock_guard l(lock);
776 for (auto r : rr) {
777 released += l1._free_l1(r.first, r.second);
778 uint64_t l2_pos = r.first / l2_granularity;
779 uint64_t l2_pos_end = p2roundup(int64_t(r.first + r.second), int64_t(l2_granularity)) / l2_granularity;
780
781 _mark_l2_free(l2_pos, l2_pos_end);
782 }
783 available += released;
784 }
785 #endif
786
787 template <typename T>
788 void _free_l2(const T& rr)
789 {
790 uint64_t released = 0;
791 std::lock_guard l(lock);
792 for (auto r : rr) {
793 released += l1._free_l1(r.offset, r.length);
794 uint64_t l2_pos = r.offset / l2_granularity;
795 uint64_t l2_pos_end = p2roundup(int64_t(r.offset + r.length), int64_t(l2_granularity)) / l2_granularity;
796
797 _mark_l2_free(l2_pos, l2_pos_end);
798 }
799 available += released;
800 }
801
802 void _mark_allocated(uint64_t o, uint64_t len)
803 {
804 uint64_t l2_pos = o / l2_granularity;
805 uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity;
806
807 std::lock_guard l(lock);
808 auto allocated = l1._mark_alloc_l1(o, len);
809 ceph_assert(available >= allocated);
810 available -= allocated;
811 _mark_l2_on_l1(l2_pos, l2_pos_end);
812 }
813
814 void _mark_free(uint64_t o, uint64_t len)
815 {
816 uint64_t l2_pos = o / l2_granularity;
817 uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity;
818
819 std::lock_guard l(lock);
820 available += l1._free_l1(o, len);
821 _mark_l2_free(l2_pos, l2_pos_end);
822 }
823 void _shutdown()
824 {
825 last_pos = 0;
826 }
827 double _get_fragmentation() {
828 std::lock_guard l(lock);
829 return l1.get_fragmentation();
830 }
831 };
832
833 #endif