1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
7 #include <boost/intrusive/avl_set.hpp>
10 #include "os/bluestore/bluestore_types.h"
11 #include "include/mempool.h"
14 MEMPOOL_CLASS_HELPERS(); ///< memory monitoring
15 uint64_t start
; ///< starting offset of this segment
16 uint64_t end
; ///< ending offset (non-inclusive)
18 range_seg_t(uint64_t start
, uint64_t end
)
22 // Tree is sorted by offset, greater offsets at the end of the tree.
24 template<typename KeyLeft
, typename KeyRight
>
25 bool operator()(const KeyLeft
& lhs
, const KeyRight
& rhs
) const {
26 return lhs
.end
<= rhs
.start
;
29 boost::intrusive::avl_set_member_hook
<> offset_hook
;
31 // Tree is sorted by size, larger sizes at the end of the tree.
33 template<typename KeyType
>
34 bool operator()(const range_seg_t
& lhs
, const KeyType
& rhs
) const {
35 auto lhs_size
= lhs
.end
- lhs
.start
;
36 auto rhs_size
= rhs
.end
- rhs
.start
;
37 if (lhs_size
< rhs_size
) {
39 } else if (lhs_size
> rhs_size
) {
42 return lhs
.start
< rhs
.start
;
46 inline uint64_t length() const {
49 boost::intrusive::avl_set_member_hook
<> size_hook
;
52 class AvlAllocator
: public Allocator
{
54 void operator()(range_seg_t
* p
)
62 * ctor intended for the usage from descendant class(es) which
63 * provides handling for spilled over entries
64 * (when entry count >= max_entries)
66 AvlAllocator(CephContext
* cct
, int64_t device_size
, int64_t block_size
,
68 const std::string
& name
);
71 AvlAllocator(CephContext
* cct
, int64_t device_size
, int64_t block_size
,
72 const std::string
& name
);
74 const char* get_type() const override
81 uint64_t max_alloc_size
,
83 PExtentVector
*extents
) override
;
84 void release(const interval_set
<uint64_t>& release_set
) override
;
85 int64_t get_capacity() const {
89 uint64_t get_block_size() const {
92 uint64_t get_free() override
;
93 double get_fragmentation() override
;
96 void dump(std::function
<void(uint64_t offset
, uint64_t length
)> notify
) override
;
97 void init_add_free(uint64_t offset
, uint64_t length
) override
;
98 void init_rm_free(uint64_t offset
, uint64_t length
) override
;
99 void shutdown() override
;
103 uint64_t _block_picker(const Tree
& t
, uint64_t *cursor
, uint64_t size
,
112 boost::intrusive::avl_set
<
114 boost::intrusive::compare
<range_seg_t::before_t
>,
115 boost::intrusive::member_hook
<
117 boost::intrusive::avl_set_member_hook
<>,
118 &range_seg_t::offset_hook
>>;
119 range_tree_t range_tree
; ///< main range tree
121 * The range_size_tree should always contain the
122 * same number of segments as the range_tree.
123 * The only difference is that the range_size_tree
124 * is ordered by segment sizes.
126 using range_size_tree_t
=
127 boost::intrusive::avl_multiset
<
129 boost::intrusive::compare
<range_seg_t::shorter_t
>,
130 boost::intrusive::member_hook
<
132 boost::intrusive::avl_set_member_hook
<>,
133 &range_seg_t::size_hook
>,
134 boost::intrusive::constant_time_size
<true>>;
135 range_size_tree_t range_size_tree
;
137 const int64_t num_total
; ///< device size
138 const uint64_t block_size
; ///< block size
139 uint64_t num_free
= 0; ///< total bytes in freelist
142 * This value defines the number of elements in the ms_lbas array.
143 * The value of 64 was chosen as it covers all power of 2 buckets
145 * This is the equivalent of highest-bit of UINT64_MAX.
147 static constexpr unsigned MAX_LBAS
= 64;
148 uint64_t lbas
[MAX_LBAS
] = {0};
151 * Minimum size which forces the dynamic allocator to change
152 * it's allocation strategy. Once the allocator cannot satisfy
153 * an allocation of this size then it switches to using more
154 * aggressive strategy (i.e search by size rather than offset).
156 uint64_t range_size_alloc_threshold
= 0;
158 * The minimum free space, in percent, which must be available
159 * in allocator to continue allocations in a first-fit fashion.
160 * Once the allocator's free space drops below this level we dynamically
161 * switch to using best-fit allocations.
163 int range_size_alloc_free_pct
= 0;
166 * Max amount of range entries allowed. 0 - unlimited
168 uint64_t range_count_cap
= 0;
170 void _range_size_tree_rm(range_seg_t
& r
) {
171 ceph_assert(num_free
>= r
.length());
172 num_free
-= r
.length();
173 range_size_tree
.erase(r
);
176 void _range_size_tree_try_insert(range_seg_t
& r
) {
177 if (_try_insert_range(r
.start
, r
.end
)) {
178 range_size_tree
.insert(r
);
179 num_free
+= r
.length();
181 range_tree
.erase_and_dispose(r
, dispose_rs
{});
184 bool _try_insert_range(uint64_t start
,
186 range_tree_t::iterator
* insert_pos
= nullptr) {
187 bool res
= !range_count_cap
|| range_size_tree
.size() < range_count_cap
;
188 bool remove_lowest
= false;
190 if (end
- start
> _lowest_size_available()) {
191 remove_lowest
= true;
196 _spillover_range(start
, end
);
198 // NB: we should do insertion before the following removal
199 // to avoid potential iterator disposal insertion might depend on.
201 auto new_rs
= new range_seg_t
{ start
, end
};
202 range_tree
.insert_before(*insert_pos
, *new_rs
);
203 range_size_tree
.insert(*new_rs
);
204 num_free
+= new_rs
->length();
207 auto r
= range_size_tree
.begin();
208 _range_size_tree_rm(*r
);
209 _spillover_range(r
->start
, r
->end
);
210 range_tree
.erase_and_dispose(*r
, dispose_rs
{});
215 virtual void _spillover_range(uint64_t start
, uint64_t end
) {
216 // this should be overriden when range count cap is present,
217 // i.e. (range_count_cap > 0)
221 // called when extent to be released/marked free
222 virtual void _add_to_tree(uint64_t start
, uint64_t size
);
228 double _get_fragmentation() const {
229 auto free_blocks
= p2align(num_free
, block_size
) / block_size
;
230 if (free_blocks
<= 1) {
233 return (static_cast<double>(range_tree
.size() - 1) / (free_blocks
- 1));
237 uint64_t _lowest_size_available() {
238 auto rs
= range_size_tree
.begin();
239 return rs
!= range_size_tree
.end() ? rs
->length() : 0;
245 uint64_t max_alloc_size
,
247 PExtentVector
*extents
);
249 void _release(const interval_set
<uint64_t>& release_set
);
250 void _release(const PExtentVector
& release_set
);
253 void _process_range_removal(uint64_t start
, uint64_t end
, range_tree_t::iterator
& rs
);
254 void _remove_from_tree(uint64_t start
, uint64_t size
);
255 void _try_remove_from_tree(uint64_t start
, uint64_t size
,
256 std::function
<void(uint64_t offset
, uint64_t length
, bool found
)> cb
);
258 uint64_t _get_free() const {