1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Bitmap based in memory allocator.
5 * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
8 #ifndef CEPH_OS_BLUESTORE_BITALLOCATOR_H
9 #define CEPH_OS_BLUESTORE_BITALLOCATOR_H
18 #include "include/intarith.h"
19 #include "os/bluestore/bluestore_types.h"
21 #define alloc_assert assert
23 #ifdef BIT_ALLOCATOR_DEBUG
24 #define alloc_dbg_assert(x) assert(x)
26 #define alloc_dbg_assert(x) (static_cast<void> (0))
30 class BitAllocatorStats
{
32 std::atomic
<int64_t> m_total_alloc_calls
;
33 std::atomic
<int64_t> m_total_free_calls
;
34 std::atomic
<int64_t> m_total_allocated
;
35 std::atomic
<int64_t> m_total_freed
;
36 std::atomic
<int64_t> m_total_serial_scans
;
37 std::atomic
<int64_t> m_total_concurrent_scans
;
38 std::atomic
<int64_t> m_total_node_scanned
;
41 m_total_alloc_calls
= 0;
42 m_total_free_calls
= 0;
43 m_total_allocated
= 0;
45 m_total_serial_scans
= 0;
46 m_total_concurrent_scans
= 0;
47 m_total_node_scanned
= 0;
50 void add_alloc_calls(int64_t val
) {
51 std::atomic_fetch_add(&m_total_alloc_calls
, val
);
53 void add_free_calls(int64_t val
) {
54 std::atomic_fetch_add(&m_total_free_calls
, val
);
56 void add_allocated(int64_t val
) {
57 std::atomic_fetch_add(&m_total_allocated
, val
);
59 void add_freed(int64_t val
) {
60 std::atomic_fetch_add(&m_total_freed
, val
);
62 void add_serial_scans(int64_t val
) {
63 std::atomic_fetch_add(&m_total_serial_scans
, val
);
65 void add_concurrent_scans(int64_t val
) {
66 std::atomic_fetch_add(&m_total_concurrent_scans
, val
);
68 void add_node_scanned(int64_t val
) {
69 std::atomic_fetch_add(&m_total_node_scanned
, val
);
73 template <class BitMapEntity
>
74 class BitMapEntityIter
{
75 typedef mempool::bluestore_alloc::vector
<BitMapEntity
> BitMapEntityVector
;
76 BitMapEntityVector
*m_list
;
84 void init(BitMapEntityVector
*list
, bool wrap
, int64_t start_idx
) {
87 m_start_idx
= start_idx
;
88 m_cur_idx
= m_start_idx
;
93 BitMapEntityIter(BitMapEntityVector
*list
, int64_t start_idx
) {
94 init(list
, false, start_idx
);
96 BitMapEntityIter(BitMapEntityVector
*list
, int64_t start_idx
, bool wrap
) {
97 init(list
, wrap
, start_idx
);
100 BitMapEntity
*next() {
101 int64_t cur_idx
= m_cur_idx
;
104 cur_idx
== m_start_idx
) {
106 * End of wrap cycle + 1
110 return &(*m_list
)[cur_idx
];
116 if (m_cur_idx
== (int64_t)m_list
->size() &&
122 if (cur_idx
== (int64_t)m_list
->size()) {
129 alloc_assert(cur_idx
< (int64_t)m_list
->size());
130 return &(*m_list
)[cur_idx
];
138 typedef unsigned long bmap_t
;
139 typedef mempool::bluestore_alloc::vector
<bmap_t
> bmap_mask_vec_t
;
146 MEMPOOL_CLASS_HELPERS();
147 static bmap_t
full_bmask() {
150 static int64_t size() {
151 return (sizeof(bmap_t
) * 8);
153 static bmap_t
empty_bmask() {
156 static bmap_t
align_mask(int x
) {
157 return ((x
) >= BmapEntry::size()? (bmap_t
) -1 : (~(((bmap_t
) -1) >> (x
))));
159 static bmap_t
bit_mask(int bit_num
) {
160 return (bmap_t
) 0x1 << ((BmapEntry::size() - 1) - bit_num
);
162 bmap_t
atomic_fetch() {
165 BmapEntry(CephContext
*, bool val
);
166 BmapEntry(CephContext
*) {
169 BmapEntry(const BmapEntry
& bmap
) {
170 m_bits
= bmap
.m_bits
;
173 void clear_bit(int bit
);
174 void clear_bits(int offset
, int num_bits
);
175 void set_bits(int offset
, int num_bits
);
176 bool check_n_set_bit(int bit
);
177 bool check_bit(int bit
);
178 bool is_allocated(int64_t start_bit
, int64_t num_bits
);
180 int find_n_cont_bits(int start_offset
, int64_t num_bits
);
181 int find_n_free_bits(int start_idx
, int64_t max_bits
,
182 int *free_bit
, int *end_idx
);
183 int find_first_set_bits(int64_t required_blocks
, int bit_offset
,
184 int *start_offset
, int64_t *scanned
);
186 void dump_state(CephContext
* cct
, const int& count
);
193 int16_t m_area_index
;
196 MEMPOOL_CLASS_HELPERS();
197 static int64_t get_zone_size(CephContext
* cct
);
198 static int64_t get_span_size(CephContext
* cct
);
199 static int get_level(CephContext
* cct
, int64_t total_blocks
);
200 static int64_t get_level_factor(CephContext
* cct
, int level
);
201 virtual bool is_allocated(int64_t start_block
, int64_t num_blocks
) = 0;
202 virtual bool is_exhausted() = 0;
203 virtual bool child_check_n_lock(BitMapArea
*child
, int64_t required
) {
207 virtual bool child_check_n_lock(BitMapArea
*child
, int64_t required
, bool lock
) {
211 virtual void child_unlock(BitMapArea
*child
) {
215 virtual void lock_excl() = 0;
216 virtual bool lock_excl_try() {
220 virtual void lock_shared() {
224 virtual void unlock() = 0;
226 virtual int64_t sub_used_blocks(int64_t num_blocks
) = 0;
227 virtual int64_t add_used_blocks(int64_t num_blocks
) = 0;
228 virtual bool reserve_blocks(int64_t num_blocks
) = 0;
229 virtual void unreserve(int64_t num_blocks
, int64_t allocated
) = 0;
230 virtual int64_t get_reserved_blocks() = 0;
231 virtual int64_t get_used_blocks() = 0;
233 virtual void shutdown() = 0;
235 virtual int64_t alloc_blocks_dis(int64_t num_blocks
, int64_t min_alloc
,
236 int64_t hint
, int64_t blk_off
, ExtentList
*block_list
) {
241 virtual void set_blocks_used(int64_t start_block
, int64_t num_blocks
) = 0;
242 virtual void free_blocks(int64_t start_block
, int64_t num_blocks
) = 0;
243 virtual int64_t size() = 0;
245 int64_t child_count();
248 virtual void dump_state(CephContext
* cct
, int& count
) = 0;
249 BitMapArea(CephContext
*) { }
250 virtual ~BitMapArea() { }
253 class BitMapAreaList
{
256 std::vector
<BitMapArea
*> m_items
;
259 /* Must be DefaultConstructible as BitMapAreaIN and derivates employ
260 * a deferred init, sorry. */
261 BitMapAreaList() = default;
263 BitMapAreaList(std::vector
<BitMapArea
*>&& m_items
)
264 : m_items(std::move(m_items
)) {
267 BitMapArea
*get_nth_item(const int64_t idx
) {
271 /* FIXME: we really should use size_t. */
272 int64_t size() const {
273 return m_items
.size();
277 /* Intensionally inlined for the sake of BitMapAreaLeaf::alloc_blocks_dis_int. */
278 class BmapEntityListIter
{
279 BitMapAreaList
* m_list
;
287 BmapEntityListIter(BitMapAreaList
* const list
,
288 const int64_t start_idx
,
289 const bool wrap
= false)
291 m_start_idx(start_idx
),
292 m_cur_idx(start_idx
),
299 int64_t cur_idx
= m_cur_idx
;
302 cur_idx
== m_start_idx
) {
304 * End of wrap cycle + 1
308 return m_list
->get_nth_item(cur_idx
);
314 if (m_cur_idx
== m_list
->size() &&
319 if (cur_idx
== m_list
->size()) {
326 /* This method should be *really* fast as it's being executed over
327 * and over during traversal of allocators indexes. */
328 alloc_dbg_assert(cur_idx
< m_list
->size());
329 return m_list
->get_nth_item(cur_idx
);
335 typedef mempool::bluestore_alloc::vector
<BmapEntry
> BmapEntryVector
;
337 class BitMapZone
: public BitMapArea
{
340 std::atomic
<int32_t> m_used_blocks
;
341 BmapEntryVector m_bmap_vec
;
345 MEMPOOL_CLASS_HELPERS();
346 static int64_t count
;
347 static int64_t total_blocks
;
348 static void incr_count() { count
++;}
349 static int64_t get_total_blocks() {return total_blocks
;}
350 bool is_allocated(int64_t start_block
, int64_t num_blocks
) override
;
351 bool is_exhausted() override final
;
354 int64_t sub_used_blocks(int64_t num_blocks
) override
;
355 int64_t add_used_blocks(int64_t num_blocks
) override
;
356 bool reserve_blocks(int64_t num_blocks
) override
;
357 void unreserve(int64_t num_blocks
, int64_t allocated
) override
;
358 int64_t get_reserved_blocks() override
;
359 int64_t get_used_blocks() override final
;
360 int64_t size() override final
{
361 return get_total_blocks();
364 void lock_excl() override
;
365 bool lock_excl_try() override
;
366 void unlock() override
;
369 void free_blocks_int(int64_t start_block
, int64_t num_blocks
);
370 void init(CephContext
* cct
, int64_t zone_num
, int64_t total_blocks
, bool def
);
372 BitMapZone(CephContext
* cct
, int64_t total_blocks
, int64_t zone_num
);
373 BitMapZone(CephContext
* cct
, int64_t total_blocks
, int64_t zone_num
, bool def
);
375 ~BitMapZone() override
;
376 void shutdown() override
;
377 int64_t alloc_blocks_dis(int64_t num_blocks
, int64_t min_alloc
, int64_t hint
,
378 int64_t blk_off
, ExtentList
*block_list
) override
;
379 void set_blocks_used(int64_t start_block
, int64_t num_blocks
) override
;
381 void free_blocks(int64_t start_block
, int64_t num_blocks
) override
;
382 void dump_state(CephContext
* cct
, int& count
) override
;
385 class BitMapAreaIN
: public BitMapArea
{
388 int64_t m_child_size_blocks
;
389 int64_t m_total_blocks
;
392 int64_t m_used_blocks
;
393 int64_t m_reserved_blocks
;
394 std::mutex m_blocks_lock
;
395 BitMapAreaList m_child_list
;
397 bool is_allocated(int64_t start_block
, int64_t num_blocks
) override
;
398 bool is_exhausted() override
;
400 bool child_check_n_lock(BitMapArea
*child
, int64_t required
, bool lock
) override
{
405 bool child_check_n_lock(BitMapArea
*child
, int64_t required
) override
;
406 void child_unlock(BitMapArea
*child
) override
;
408 void lock_excl() override
{
411 void lock_shared() override
{
414 void unlock() override
{
418 void init(CephContext
* cct
, int64_t total_blocks
, int64_t zone_size_block
, bool def
);
419 void init_common(CephContext
* cct
,
420 int64_t total_blocks
,
421 int64_t zone_size_block
,
423 int64_t alloc_blocks_dis_int_work(bool wrap
, int64_t num_blocks
, int64_t min_alloc
, int64_t hint
,
424 int64_t blk_off
, ExtentList
*block_list
);
426 int64_t alloc_blocks_int_work(bool wait
, bool wrap
,
427 int64_t num_blocks
, int64_t hint
, int64_t *start_block
);
430 MEMPOOL_CLASS_HELPERS();
431 BitMapAreaIN(CephContext
* cct
);
432 BitMapAreaIN(CephContext
* cct
, int64_t zone_num
, int64_t total_blocks
);
433 BitMapAreaIN(CephContext
* cct
, int64_t zone_num
, int64_t total_blocks
,
436 ~BitMapAreaIN() override
;
437 void shutdown() override
;
438 int64_t sub_used_blocks(int64_t num_blocks
) override
;
439 int64_t add_used_blocks(int64_t num_blocks
) override
;
440 bool reserve_blocks(int64_t num_blocks
) override
;
441 void unreserve(int64_t num_blocks
, int64_t allocated
) override
;
442 int64_t get_reserved_blocks() override
;
443 int64_t get_used_blocks() override
;
444 virtual int64_t get_used_blocks_adj();
445 int64_t size() override
{
446 return m_total_blocks
;
448 using BitMapArea::alloc_blocks_dis
; //non-wait version
450 virtual int64_t alloc_blocks_dis_int(int64_t num_blocks
, int64_t min_alloc
, int64_t hint
,
451 int64_t blk_off
, ExtentList
*block_list
);
452 int64_t alloc_blocks_dis(int64_t num_blocks
, int64_t min_alloc
, int64_t hint
,
453 int64_t blk_off
, ExtentList
*block_list
) override
;
454 virtual void set_blocks_used_int(int64_t start_block
, int64_t num_blocks
);
455 void set_blocks_used(int64_t start_block
, int64_t num_blocks
) override
;
457 virtual void free_blocks_int(int64_t start_block
, int64_t num_blocks
);
458 void free_blocks(int64_t start_block
, int64_t num_blocks
) override
;
459 void dump_state(CephContext
* cct
, int& count
) override
;
462 class BitMapAreaLeaf
: public BitMapAreaIN
{
465 void init(CephContext
* cct
, int64_t total_blocks
, int64_t zone_size_block
,
469 MEMPOOL_CLASS_HELPERS();
470 static int64_t count
;
471 static void incr_count() { count
++;}
472 BitMapAreaLeaf(CephContext
* cct
) : BitMapAreaIN(cct
) { }
473 BitMapAreaLeaf(CephContext
* cct
, int64_t zone_num
, int64_t total_blocks
);
474 BitMapAreaLeaf(CephContext
* cct
, int64_t zone_num
, int64_t total_blocks
,
477 using BitMapAreaIN::child_check_n_lock
;
478 bool child_check_n_lock(BitMapArea
*child
, int64_t required
) override
{
483 bool child_check_n_lock(BitMapZone
* child
, int64_t required
, bool lock
);
485 int64_t alloc_blocks_int(int64_t num_blocks
, int64_t hint
, int64_t *start_block
);
486 int64_t alloc_blocks_dis_int(int64_t num_blocks
, int64_t min_alloc
, int64_t hint
,
487 int64_t blk_off
, ExtentList
*block_list
) override
;
488 void free_blocks_int(int64_t start_block
, int64_t num_blocks
) override
;
490 ~BitMapAreaLeaf() override
;
494 typedef enum bmap_alloc_mode
{
499 class BitAllocator
:public BitMapAreaIN
{
501 CephContext
* const cct
;
502 bmap_alloc_mode_t m_alloc_mode
;
503 std::mutex m_serial_mutex
;
504 pthread_rwlock_t m_rw_lock
;
505 BitAllocatorStats
*m_stats
;
507 int64_t m_extra_blocks
;
510 return m_is_stats_on
;
513 using BitMapArea::child_check_n_lock
;
514 bool child_check_n_lock(BitMapArea
*child
, int64_t required
) override
;
515 void child_unlock(BitMapArea
*child
) override
;
518 bool try_serial_lock();
519 void serial_unlock();
520 void lock_excl() override
;
521 void lock_shared() override
;
523 void unlock() override
;
525 bool check_input(int64_t num_blocks
);
526 bool check_input_dis(int64_t num_blocks
);
527 void init_check(int64_t total_blocks
, int64_t zone_size_block
,
528 bmap_alloc_mode_t mode
, bool def
, bool stats_on
);
529 int64_t alloc_blocks_dis_work(int64_t num_blocks
, int64_t min_alloc
, int64_t hint
, ExtentList
*block_list
, bool reserved
);
531 int64_t alloc_blocks_dis_int(int64_t num_blocks
, int64_t min_alloc
,
532 int64_t hint
, int64_t area_blk_off
, ExtentList
*block_list
) override
;
535 MEMPOOL_CLASS_HELPERS();
537 BitAllocator(CephContext
* cct
, int64_t total_blocks
,
538 int64_t zone_size_block
, bmap_alloc_mode_t mode
);
539 BitAllocator(CephContext
* cct
, int64_t total_blocks
, int64_t zone_size_block
,
540 bmap_alloc_mode_t mode
, bool def
);
541 BitAllocator(CephContext
* cct
, int64_t total_blocks
, int64_t zone_size_block
,
542 bmap_alloc_mode_t mode
, bool def
, bool stats_on
);
543 ~BitAllocator() override
;
544 void shutdown() override
;
545 using BitMapAreaIN::alloc_blocks_dis
; //Wait version
547 void free_blocks(int64_t start_block
, int64_t num_blocks
) override
;
548 void set_blocks_used(int64_t start_block
, int64_t num_blocks
) override
;
549 void unreserve_blocks(int64_t blocks
);
551 int64_t alloc_blocks_dis_res(int64_t num_blocks
, int64_t min_alloc
, int64_t hint
, ExtentList
*block_list
);
553 void free_blocks_dis(int64_t num_blocks
, ExtentList
*block_list
);
554 bool is_allocated_dis(ExtentList
*blocks
, int64_t num_blocks
);
556 int64_t total_blocks() const {
557 return m_total_blocks
- m_extra_blocks
;
559 int64_t get_used_blocks() override
{
560 return (BitMapAreaIN::get_used_blocks_adj() - m_extra_blocks
);
563 BitAllocatorStats
*get_stats() {