]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/fastbmap_allocator_impl.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / os / bluestore / fastbmap_allocator_impl.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Bitmap based in-memory allocator implementation.
5 * Author: Igor Fedotov, ifedotov@suse.com
6 *
7 */
8
9 #ifndef __FAST_BITMAP_ALLOCATOR_IMPL_H
10 #define __FAST_BITMAP_ALLOCATOR_IMPL_H
11 #include "include/intarith.h"
12
13 #include <bit>
14 #include <vector>
15 #include <algorithm>
16 #include <mutex>
17
18 typedef uint64_t slot_t;
19
20 #ifdef NON_CEPH_BUILD
21 #include <assert.h>
22 struct interval_t
23 {
24 uint64_t offset = 0;
25 uint64_t length = 0;
26
27 interval_t() {}
28 interval_t(uint64_t o, uint64_t l) : offset(o), length(l) {}
29 interval_t(const interval_t &ext) :
30 offset(ext.offset), length(ext.length) {}
31 };
32 typedef std::vector<interval_t> interval_vector_t;
33 typedef std::vector<slot_t> slot_vector_t;
34 #else
35 #include "include/ceph_assert.h"
36 #include "common/likely.h"
37 #include "os/bluestore/bluestore_types.h"
38 #include "include/mempool.h"
39 #include "common/ceph_mutex.h"
40
41 typedef bluestore_interval_t<uint64_t, uint64_t> interval_t;
42 typedef PExtentVector interval_vector_t;
43
44 typedef mempool::bluestore_alloc::vector<slot_t> slot_vector_t;
45
46 #endif
47
48 // fitting into cache line on x86_64
49 static const size_t slots_per_slotset = 8; // 8 slots per set
50 static const size_t slotset_bytes = sizeof(slot_t) * slots_per_slotset;
51 static const size_t bits_per_slot = sizeof(slot_t) * 8;
52 static const size_t bits_per_slotset = slotset_bytes * 8;
53 static const slot_t all_slot_set = 0xffffffffffffffff;
54 static const slot_t all_slot_clear = 0;
55
56 inline size_t find_next_set_bit(slot_t slot_val, size_t start_pos)
57 {
58 #ifdef __GNUC__
59 if (start_pos == 0) {
60 start_pos = __builtin_ffsll(slot_val);
61 return start_pos ? start_pos - 1 : bits_per_slot;
62 }
63 #endif
64 slot_t mask = slot_t(1) << start_pos;
65 while (start_pos < bits_per_slot && !(slot_val & mask)) {
66 mask <<= 1;
67 ++start_pos;
68 }
69 return start_pos;
70 }
71
72
73 class AllocatorLevel
74 {
75 protected:
76
77 virtual uint64_t _children_per_slot() const = 0;
78 virtual uint64_t _level_granularity() const = 0;
79
80 public:
81 static uint64_t l0_dives;
82 static uint64_t l0_iterations;
83 static uint64_t l0_inner_iterations;
84 static uint64_t alloc_fragments;
85 static uint64_t alloc_fragments_fast;
86 static uint64_t l2_allocs;
87
88 virtual ~AllocatorLevel()
89 {}
90
91 virtual void collect_stats(
92 std::map<size_t, size_t>& bins_overall) = 0;
93
94 };
95
96 class AllocatorLevel01 : public AllocatorLevel
97 {
98 protected:
99 slot_vector_t l0; // set bit means free entry
100 slot_vector_t l1;
101 uint64_t l0_granularity = 0; // space per entry
102 uint64_t l1_granularity = 0; // space per entry
103
104 size_t partial_l1_count = 0;
105 size_t unalloc_l1_count = 0;
106
107 double get_fragmentation() const {
108 double res = 0.0;
109 auto total = unalloc_l1_count + partial_l1_count;
110 if (total) {
111 res = double(partial_l1_count) / double(total);
112 }
113 return res;
114 }
115
116 uint64_t _level_granularity() const override
117 {
118 return l1_granularity;
119 }
120
121 inline bool _is_slot_fully_allocated(uint64_t idx) const {
122 return l1[idx] == all_slot_clear;
123 }
124 public:
125 inline uint64_t get_min_alloc_size() const
126 {
127 return l0_granularity;
128 }
129
130 };
131
132 template <class T>
133 class AllocatorLevel02;
134
135 class AllocatorLevel01Loose : public AllocatorLevel01
136 {
137 enum {
138 L1_ENTRY_WIDTH = 2,
139 L1_ENTRY_MASK = (1 << L1_ENTRY_WIDTH) - 1,
140 L1_ENTRY_FULL = 0x00,
141 L1_ENTRY_PARTIAL = 0x01,
142 L1_ENTRY_NOT_USED = 0x02,
143 L1_ENTRY_FREE = 0x03,
144 L1_ENTRIES_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, //32
145 L0_ENTRIES_PER_SLOT = bits_per_slot, // 64
146 };
147 uint64_t _children_per_slot() const override
148 {
149 return L1_ENTRIES_PER_SLOT;
150 }
151
152 interval_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1,
153 uint64_t min_length, interval_t* tail) const;
154
155 inline void _fragment_and_emplace(uint64_t max_length, uint64_t offset,
156 uint64_t len,
157 interval_vector_t* res)
158 {
159 auto it = res->rbegin();
160 if (max_length) {
161 if (it != res->rend() && it->offset + it->length == offset) {
162 auto l = max_length - it->length;
163 if (l >= len) {
164 it->length += len;
165 return;
166 } else {
167 offset += l;
168 len -= l;
169 it->length += l;
170 }
171 }
172
173 while (len > max_length) {
174 res->emplace_back(offset, max_length);
175 offset += max_length;
176 len -= max_length;
177 }
178 res->emplace_back(offset, len);
179 return;
180 }
181
182 if (it != res->rend() && it->offset + it->length == offset) {
183 it->length += len;
184 } else {
185 res->emplace_back(offset, len);
186 }
187 }
188
189 bool _allocate_l0(uint64_t length,
190 uint64_t max_length,
191 uint64_t l0_pos0, uint64_t l0_pos1,
192 uint64_t* allocated,
193 interval_vector_t* res)
194 {
195 uint64_t d0 = L0_ENTRIES_PER_SLOT;
196
197 ++l0_dives;
198
199 ceph_assert(l0_pos0 < l0_pos1);
200 ceph_assert(length > *allocated);
201 ceph_assert(0 == (l0_pos0 % (slots_per_slotset * d0)));
202 ceph_assert(0 == (l0_pos1 % (slots_per_slotset * d0)));
203 ceph_assert(((length - *allocated) % l0_granularity) == 0);
204
205 uint64_t need_entries = (length - *allocated) / l0_granularity;
206
207 for (auto idx = l0_pos0 / d0; (idx < l0_pos1 / d0) && (length > *allocated);
208 ++idx) {
209 ++l0_iterations;
210 slot_t& slot_val = l0[idx];
211 auto base = idx * d0;
212 if (slot_val == all_slot_clear) {
213 continue;
214 } else if (slot_val == all_slot_set) {
215 uint64_t to_alloc = std::min(need_entries, d0);
216 *allocated += to_alloc * l0_granularity;
217 ++alloc_fragments;
218 need_entries -= to_alloc;
219
220 _fragment_and_emplace(max_length, base * l0_granularity,
221 to_alloc * l0_granularity, res);
222
223 if (to_alloc == d0) {
224 slot_val = all_slot_clear;
225 } else {
226 _mark_alloc_l0(base, base + to_alloc);
227 }
228 continue;
229 }
230
231 auto free_pos = find_next_set_bit(slot_val, 0);
232 ceph_assert(free_pos < bits_per_slot);
233 auto next_pos = free_pos + 1;
234 while (next_pos < bits_per_slot &&
235 (next_pos - free_pos) < need_entries) {
236 ++l0_inner_iterations;
237
238 if (0 == (slot_val & (slot_t(1) << next_pos))) {
239 auto to_alloc = (next_pos - free_pos);
240 *allocated += to_alloc * l0_granularity;
241 ++alloc_fragments;
242 need_entries -= to_alloc;
243 _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity,
244 to_alloc * l0_granularity, res);
245 _mark_alloc_l0(base + free_pos, base + next_pos);
246 free_pos = find_next_set_bit(slot_val, next_pos + 1);
247 next_pos = free_pos + 1;
248 } else {
249 ++next_pos;
250 }
251 }
252 if (need_entries && free_pos < bits_per_slot) {
253 auto to_alloc = std::min(need_entries, d0 - free_pos);
254 *allocated += to_alloc * l0_granularity;
255 ++alloc_fragments;
256 need_entries -= to_alloc;
257 _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity,
258 to_alloc * l0_granularity, res);
259 _mark_alloc_l0(base + free_pos, base + free_pos + to_alloc);
260 }
261 }
262 return _is_empty_l0(l0_pos0, l0_pos1);
263 }
264
265 protected:
266
267 friend class AllocatorLevel02<AllocatorLevel01Loose>;
268
269 void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true)
270 {
271 l0_granularity = _alloc_unit;
272 // 512 bits at L0 mapped to L1 entry
273 l1_granularity = l0_granularity * bits_per_slotset;
274
275 // capacity to have slot alignment at l1
276 auto aligned_capacity =
277 p2roundup((int64_t)capacity,
278 int64_t(l1_granularity * slots_per_slotset * _children_per_slot()));
279 size_t slot_count =
280 aligned_capacity / l1_granularity / _children_per_slot();
281 // we use set bit(s) as a marker for (partially) free entry
282 l1.resize(slot_count, mark_as_free ? all_slot_set : all_slot_clear);
283
284 // l0 slot count
285 size_t slot_count_l0 = aligned_capacity / _alloc_unit / bits_per_slot;
286 // we use set bit(s) as a marker for (partially) free entry
287 l0.resize(slot_count_l0, mark_as_free ? all_slot_set : all_slot_clear);
288
289 partial_l1_count = unalloc_l1_count = 0;
290 if (mark_as_free) {
291 unalloc_l1_count = slot_count * _children_per_slot();
292 auto l0_pos_no_use = p2roundup((int64_t)capacity, (int64_t)l0_granularity) / l0_granularity;
293 _mark_alloc_l1_l0(l0_pos_no_use, aligned_capacity / l0_granularity);
294 }
295 }
296
297 struct search_ctx_t
298 {
299 size_t partial_count = 0;
300 size_t free_count = 0;
301 uint64_t free_l1_pos = 0;
302
303 uint64_t min_affordable_len = 0;
304 uint64_t min_affordable_offs = 0;
305 uint64_t affordable_len = 0;
306 uint64_t affordable_offs = 0;
307
308 bool fully_processed = false;
309
310 void reset()
311 {
312 *this = search_ctx_t();
313 }
314 };
315 enum {
316 NO_STOP,
317 STOP_ON_EMPTY,
318 STOP_ON_PARTIAL,
319 };
320 void _analyze_partials(uint64_t pos_start, uint64_t pos_end,
321 uint64_t length, uint64_t min_length, int mode,
322 search_ctx_t* ctx);
323
324 void _mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end);
325 void _mark_alloc_l0(int64_t l0_pos_start, int64_t l0_pos_end);
326 uint64_t _claim_free_to_left_l0(int64_t l0_pos_start);
327 uint64_t _claim_free_to_right_l0(int64_t l0_pos_start);
328
329
330 void _mark_alloc_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end)
331 {
332 _mark_alloc_l0(l0_pos_start, l0_pos_end);
333 l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset));
334 l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset));
335 _mark_l1_on_l0(l0_pos_start, l0_pos_end);
336 }
337
338 void _mark_free_l0(int64_t l0_pos_start, int64_t l0_pos_end)
339 {
340 auto d0 = L0_ENTRIES_PER_SLOT;
341
342 auto pos = l0_pos_start;
343 slot_t bits = (slot_t)1 << (l0_pos_start % d0);
344 slot_t* val_s = &l0[pos / d0];
345 int64_t pos_e = std::min(l0_pos_end,
346 p2roundup<int64_t>(l0_pos_start + 1, d0));
347 while (pos < pos_e) {
348 *val_s |= bits;
349 bits <<= 1;
350 pos++;
351 }
352 pos_e = std::min(l0_pos_end, p2align<int64_t>(l0_pos_end, d0));
353 while (pos < pos_e) {
354 *(++val_s) = all_slot_set;
355 pos += d0;
356 }
357 bits = 1;
358 ++val_s;
359 while (pos < l0_pos_end) {
360 *val_s |= bits;
361 bits <<= 1;
362 pos++;
363 }
364 }
365
366 void _mark_free_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end)
367 {
368 _mark_free_l0(l0_pos_start, l0_pos_end);
369 l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset));
370 l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset));
371 _mark_l1_on_l0(l0_pos_start, l0_pos_end);
372 }
373
374 bool _is_empty_l0(uint64_t l0_pos, uint64_t l0_pos_end)
375 {
376 bool no_free = true;
377 uint64_t d = slots_per_slotset * L0_ENTRIES_PER_SLOT;
378 ceph_assert(0 == (l0_pos % d));
379 ceph_assert(0 == (l0_pos_end % d));
380
381 auto idx = l0_pos / L0_ENTRIES_PER_SLOT;
382 auto idx_end = l0_pos_end / L0_ENTRIES_PER_SLOT;
383 while (idx < idx_end && no_free) {
384 no_free = l0[idx] == all_slot_clear;
385 ++idx;
386 }
387 return no_free;
388 }
389 bool _is_empty_l1(uint64_t l1_pos, uint64_t l1_pos_end)
390 {
391 bool no_free = true;
392 uint64_t d = slots_per_slotset * _children_per_slot();
393 ceph_assert(0 == (l1_pos % d));
394 ceph_assert(0 == (l1_pos_end % d));
395
396 auto idx = l1_pos / L1_ENTRIES_PER_SLOT;
397 auto idx_end = l1_pos_end / L1_ENTRIES_PER_SLOT;
398 while (idx < idx_end && no_free) {
399 no_free = _is_slot_fully_allocated(idx);
400 ++idx;
401 }
402 return no_free;
403 }
404
405 interval_t _allocate_l1_contiguous(uint64_t length,
406 uint64_t min_length, uint64_t max_length,
407 uint64_t pos_start, uint64_t pos_end);
408
409 bool _allocate_l1(uint64_t length,
410 uint64_t min_length, uint64_t max_length,
411 uint64_t l1_pos_start, uint64_t l1_pos_end,
412 uint64_t* allocated,
413 interval_vector_t* res);
414
415 uint64_t _mark_alloc_l1(uint64_t offset, uint64_t length)
416 {
417 uint64_t l0_pos_start = offset / l0_granularity;
418 uint64_t l0_pos_end = p2roundup(offset + length, l0_granularity) / l0_granularity;
419 _mark_alloc_l1_l0(l0_pos_start, l0_pos_end);
420 return l0_granularity * (l0_pos_end - l0_pos_start);
421 }
422
423 uint64_t _free_l1(uint64_t offs, uint64_t len)
424 {
425 uint64_t l0_pos_start = offs / l0_granularity;
426 uint64_t l0_pos_end = p2roundup(offs + len, l0_granularity) / l0_granularity;
427 _mark_free_l1_l0(l0_pos_start, l0_pos_end);
428 return l0_granularity * (l0_pos_end - l0_pos_start);
429 }
430
431 uint64_t claim_free_to_left_l1(uint64_t offs)
432 {
433 uint64_t l0_pos_end = offs / l0_granularity;
434 uint64_t l0_pos_start = _claim_free_to_left_l0(l0_pos_end);
435 if (l0_pos_start < l0_pos_end) {
436 _mark_l1_on_l0(
437 p2align(l0_pos_start, uint64_t(bits_per_slotset)),
438 p2roundup(l0_pos_end, uint64_t(bits_per_slotset)));
439 return l0_granularity * (l0_pos_end - l0_pos_start);
440 }
441 return 0;
442 }
443
444 uint64_t claim_free_to_right_l1(uint64_t offs)
445 {
446 uint64_t l0_pos_start = offs / l0_granularity;
447 uint64_t l0_pos_end = _claim_free_to_right_l0(l0_pos_start);
448
449 if (l0_pos_start < l0_pos_end) {
450 _mark_l1_on_l0(
451 p2align(l0_pos_start, uint64_t(bits_per_slotset)),
452 p2roundup(l0_pos_end, uint64_t(bits_per_slotset)));
453 return l0_granularity * (l0_pos_end - l0_pos_start);
454 }
455 return 0;
456 }
457
458
459 public:
460 uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0)
461 {
462 if (pos1 == 0) {
463 pos1 = l1.size() * L1_ENTRIES_PER_SLOT;
464 }
465 auto avail = debug_get_free(pos0, pos1);
466 return (pos1 - pos0) * l1_granularity - avail;
467 }
468
469 uint64_t debug_get_free(uint64_t l1_pos0 = 0, uint64_t l1_pos1 = 0)
470 {
471 ceph_assert(0 == (l1_pos0 % L1_ENTRIES_PER_SLOT));
472 ceph_assert(0 == (l1_pos1 % L1_ENTRIES_PER_SLOT));
473
474 auto idx0 = l1_pos0 * slots_per_slotset;
475 auto idx1 = l1_pos1 * slots_per_slotset;
476
477 if (idx1 == 0) {
478 idx1 = l0.size();
479 }
480
481 uint64_t res = 0;
482 for (uint64_t i = idx0; i < idx1; ++i) {
483 auto v = l0[i];
484 if (v == all_slot_set) {
485 res += L0_ENTRIES_PER_SLOT;
486 } else if (v != all_slot_clear) {
487 size_t cnt = 0;
488 #ifdef __GNUC__
489 cnt = __builtin_popcountll(v);
490 #else
491 // Kernighan's Alg to count set bits
492 while (v) {
493 v &= (v - 1);
494 cnt++;
495 }
496 #endif
497 res += cnt;
498 }
499 }
500 return res * l0_granularity;
501 }
502 void collect_stats(
503 std::map<size_t, size_t>& bins_overall) override;
504
505 static inline ssize_t count_0s(slot_t slot_val, size_t start_pos);
506 static inline ssize_t count_1s(slot_t slot_val, size_t start_pos);
507 void foreach_internal(std::function<void(uint64_t offset, uint64_t length)> notify);
508 };
509
510
511 class AllocatorLevel01Compact : public AllocatorLevel01
512 {
513 uint64_t _children_per_slot() const override
514 {
515 return 8;
516 }
517 public:
518 void collect_stats(
519 std::map<size_t, size_t>& bins_overall) override
520 {
521 // not implemented
522 }
523 };
524
525 template <class L1>
526 class AllocatorLevel02 : public AllocatorLevel
527 {
528 public:
529 uint64_t debug_get_free(uint64_t pos0 = 0, uint64_t pos1 = 0)
530 {
531 std::lock_guard l(lock);
532 return l1.debug_get_free(pos0 * l1._children_per_slot() * bits_per_slot,
533 pos1 * l1._children_per_slot() * bits_per_slot);
534 }
535 uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0)
536 {
537 std::lock_guard l(lock);
538 return l1.debug_get_allocated(pos0 * l1._children_per_slot() * bits_per_slot,
539 pos1 * l1._children_per_slot() * bits_per_slot);
540 }
541
542 uint64_t get_available()
543 {
544 std::lock_guard l(lock);
545 return available;
546 }
547 inline uint64_t get_min_alloc_size() const
548 {
549 return l1.get_min_alloc_size();
550 }
551 void collect_stats(
552 std::map<size_t, size_t>& bins_overall) override {
553
554 std::lock_guard l(lock);
555 l1.collect_stats(bins_overall);
556 }
557 uint64_t claim_free_to_left(uint64_t offset) {
558 std::lock_guard l(lock);
559 auto allocated = l1.claim_free_to_left_l1(offset);
560 ceph_assert(available >= allocated);
561 available -= allocated;
562
563 uint64_t l2_pos = (offset - allocated) / l2_granularity;
564 uint64_t l2_pos_end =
565 p2roundup(int64_t(offset), int64_t(l2_granularity)) / l2_granularity;
566 _mark_l2_on_l1(l2_pos, l2_pos_end);
567 return allocated;
568 }
569
570 uint64_t claim_free_to_right(uint64_t offset) {
571 std::lock_guard l(lock);
572 auto allocated = l1.claim_free_to_right_l1(offset);
573 ceph_assert(available >= allocated);
574 available -= allocated;
575
576 uint64_t l2_pos = (offset) / l2_granularity;
577 int64_t end = offset + allocated;
578 uint64_t l2_pos_end = p2roundup(end, int64_t(l2_granularity)) / l2_granularity;
579 _mark_l2_on_l1(l2_pos, l2_pos_end);
580 return allocated;
581 }
582
583 void foreach_internal(
584 std::function<void(uint64_t offset, uint64_t length)> notify)
585 {
586 size_t alloc_size = get_min_alloc_size();
587 auto multiply_by_alloc_size = [alloc_size, notify](size_t off, size_t len) {
588 notify(off * alloc_size, len * alloc_size);
589 };
590 std::lock_guard l(lock);
591 l1.foreach_internal(multiply_by_alloc_size);
592 }
593 double get_fragmentation_internal() {
594 std::lock_guard l(lock);
595 return l1.get_fragmentation();
596 }
597
598 protected:
599 ceph::mutex lock = ceph::make_mutex("AllocatorLevel02::lock");
600 L1 l1;
601 slot_vector_t l2;
602 uint64_t l2_granularity = 0; // space per entry
603 uint64_t available = 0;
604 uint64_t last_pos = 0;
605
606 enum {
607 L1_ENTRIES_PER_SLOT = bits_per_slot, // 64
608 };
609
610 uint64_t _children_per_slot() const override
611 {
612 return L1_ENTRIES_PER_SLOT;
613 }
614 uint64_t _level_granularity() const override
615 {
616 return l2_granularity;
617 }
618
619 void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true)
620 {
621 ceph_assert(std::has_single_bit(_alloc_unit));
622 l1._init(capacity, _alloc_unit, mark_as_free);
623
624 l2_granularity =
625 l1._level_granularity() * l1._children_per_slot() * slots_per_slotset;
626
627 // capacity to have slot alignment at l2
628 auto aligned_capacity =
629 p2roundup((int64_t)capacity, (int64_t)l2_granularity * L1_ENTRIES_PER_SLOT);
630 size_t elem_count = aligned_capacity / l2_granularity / L1_ENTRIES_PER_SLOT;
631 // we use set bit(s) as a marker for (partially) free entry
632 l2.resize(elem_count, mark_as_free ? all_slot_set : all_slot_clear);
633
634 if (mark_as_free) {
635 // capacity to have slotset alignment at l1
636 auto l2_pos_no_use =
637 p2roundup((int64_t)capacity, (int64_t)l2_granularity) / l2_granularity;
638 _mark_l2_allocated(l2_pos_no_use, aligned_capacity / l2_granularity);
639 available = p2align(capacity, _alloc_unit);
640 } else {
641 available = 0;
642 }
643 }
644
645 void _mark_l2_allocated(int64_t l2_pos, int64_t l2_pos_end)
646 {
647 auto d = L1_ENTRIES_PER_SLOT;
648 ceph_assert(0 <= l2_pos_end);
649 ceph_assert((int64_t)l2.size() >= (l2_pos_end / d));
650
651 while (l2_pos < l2_pos_end) {
652 l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d));
653 ++l2_pos;
654 }
655 }
656
657 void _mark_l2_free(int64_t l2_pos, int64_t l2_pos_end)
658 {
659 auto d = L1_ENTRIES_PER_SLOT;
660 ceph_assert(0 <= l2_pos_end);
661 ceph_assert((int64_t)l2.size() >= (l2_pos_end / d));
662
663 while (l2_pos < l2_pos_end) {
664 l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d));
665 ++l2_pos;
666 }
667 }
668
669 void _mark_l2_on_l1(int64_t l2_pos, int64_t l2_pos_end)
670 {
671 auto d = L1_ENTRIES_PER_SLOT;
672 ceph_assert(0 <= l2_pos_end);
673 ceph_assert((int64_t)l2.size() >= (l2_pos_end / d));
674
675 auto idx = l2_pos * slots_per_slotset;
676 auto idx_end = l2_pos_end * slots_per_slotset;
677 bool all_allocated = true;
678 while (idx < idx_end) {
679 if (!l1._is_slot_fully_allocated(idx)) {
680 all_allocated = false;
681 idx = p2roundup(int64_t(++idx), int64_t(slots_per_slotset));
682 }
683 else {
684 ++idx;
685 }
686 if ((idx % slots_per_slotset) == 0) {
687 if (all_allocated) {
688 l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d));
689 }
690 else {
691 l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d));
692 }
693 all_allocated = true;
694 ++l2_pos;
695 }
696 }
697 }
698
699 void _allocate_l2(uint64_t length,
700 uint64_t min_length,
701 uint64_t max_length,
702 uint64_t hint,
703
704 uint64_t* allocated,
705 interval_vector_t* res)
706 {
707 uint64_t prev_allocated = *allocated;
708 uint64_t d = L1_ENTRIES_PER_SLOT;
709 ceph_assert(min_length <= l2_granularity);
710 ceph_assert(max_length == 0 || max_length >= min_length);
711 ceph_assert(max_length == 0 || (max_length % min_length) == 0);
712 ceph_assert(length >= min_length);
713 ceph_assert((length % min_length) == 0);
714
715 uint64_t cap = 1ull << 31;
716 if (max_length == 0 || max_length >= cap) {
717 max_length = cap;
718 }
719
720 uint64_t l1_w = slots_per_slotset * l1._children_per_slot();
721
722 std::lock_guard l(lock);
723
724 if (available < min_length) {
725 return;
726 }
727 if (hint != 0) {
728 last_pos = (hint / (d * l2_granularity)) < l2.size() ? p2align(hint / l2_granularity, d) : 0;
729 }
730 auto l2_pos = last_pos;
731 auto last_pos0 = last_pos;
732 auto pos = last_pos / d;
733 auto pos_end = l2.size();
734 // outer loop below is intended to optimize the performance by
735 // avoiding 'modulo' operations inside the internal loop.
736 // Looks like they have negative impact on the performance
737 for (auto i = 0; i < 2; ++i) {
738 for(; length > *allocated && pos < pos_end; ++pos) {
739 slot_t& slot_val = l2[pos];
740 size_t free_pos = 0;
741 bool all_set = false;
742 if (slot_val == all_slot_clear) {
743 l2_pos += d;
744 last_pos = l2_pos;
745 continue;
746 } else if (slot_val == all_slot_set) {
747 free_pos = 0;
748 all_set = true;
749 } else {
750 free_pos = find_next_set_bit(slot_val, 0);
751 ceph_assert(free_pos < bits_per_slot);
752 }
753 do {
754 ceph_assert(length > *allocated);
755 bool empty = l1._allocate_l1(length,
756 min_length,
757 max_length,
758 (l2_pos + free_pos) * l1_w,
759 (l2_pos + free_pos + 1) * l1_w,
760 allocated,
761 res);
762 if (empty) {
763 slot_val &= ~(slot_t(1) << free_pos);
764 }
765 if (length <= *allocated || slot_val == all_slot_clear) {
766 break;
767 }
768 ++free_pos;
769 if (!all_set) {
770 free_pos = find_next_set_bit(slot_val, free_pos);
771 }
772 } while (free_pos < bits_per_slot);
773 last_pos = l2_pos;
774 l2_pos += d;
775 }
776 l2_pos = 0;
777 pos = 0;
778 pos_end = last_pos0 / d;
779 }
780
781 ++l2_allocs;
782 auto allocated_here = *allocated - prev_allocated;
783 ceph_assert(available >= allocated_here);
784 available -= allocated_here;
785 }
786
787 #ifndef NON_CEPH_BUILD
788 // to provide compatibility with BlueStore's allocator interface
789 void _free_l2(const interval_set<uint64_t> & rr)
790 {
791 uint64_t released = 0;
792 std::lock_guard l(lock);
793 for (auto r : rr) {
794 released += l1._free_l1(r.first, r.second);
795 uint64_t l2_pos = r.first / l2_granularity;
796 uint64_t l2_pos_end = p2roundup(int64_t(r.first + r.second), int64_t(l2_granularity)) / l2_granularity;
797
798 _mark_l2_free(l2_pos, l2_pos_end);
799 }
800 available += released;
801 }
802 #endif
803
804 template <typename T>
805 void _free_l2(const T& rr)
806 {
807 uint64_t released = 0;
808 std::lock_guard l(lock);
809 for (auto r : rr) {
810 released += l1._free_l1(r.offset, r.length);
811 uint64_t l2_pos = r.offset / l2_granularity;
812 uint64_t l2_pos_end = p2roundup(int64_t(r.offset + r.length), int64_t(l2_granularity)) / l2_granularity;
813
814 _mark_l2_free(l2_pos, l2_pos_end);
815 }
816 available += released;
817 }
818
819 void _mark_allocated(uint64_t o, uint64_t len)
820 {
821 uint64_t l2_pos = o / l2_granularity;
822 uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity;
823
824 std::lock_guard l(lock);
825 auto allocated = l1._mark_alloc_l1(o, len);
826 ceph_assert(available >= allocated);
827 available -= allocated;
828 _mark_l2_on_l1(l2_pos, l2_pos_end);
829 }
830
831 void _mark_free(uint64_t o, uint64_t len)
832 {
833 uint64_t l2_pos = o / l2_granularity;
834 uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity;
835
836 std::lock_guard l(lock);
837 available += l1._free_l1(o, len);
838 _mark_l2_free(l2_pos, l2_pos_end);
839 }
840 void _shutdown()
841 {
842 last_pos = 0;
843 }
844 };
845
846 #endif