]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/AvlAllocator.h
a855d975da611ed1011b352826aaa9475b69224b
[ceph.git] / ceph / src / os / bluestore / AvlAllocator.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include <mutex>
7 #include <boost/intrusive/avl_set.hpp>
8
9 #include "Allocator.h"
10 #include "os/bluestore/bluestore_types.h"
11 #include "include/mempool.h"
12
13 struct range_seg_t {
14 MEMPOOL_CLASS_HELPERS(); ///< memory monitoring
15 uint64_t start; ///< starting offset of this segment
16 uint64_t end; ///< ending offset (non-inclusive)
17
18 range_seg_t(uint64_t start, uint64_t end)
19 : start{start},
20 end{end}
21 {}
22 // Tree is sorted by offset, greater offsets at the end of the tree.
23 struct before_t {
24 template<typename KeyLeft, typename KeyRight>
25 bool operator()(const KeyLeft& lhs, const KeyRight& rhs) const {
26 return lhs.end <= rhs.start;
27 }
28 };
29 boost::intrusive::avl_set_member_hook<> offset_hook;
30
31 // Tree is sorted by size, larger sizes at the end of the tree.
32 struct shorter_t {
33 template<typename KeyType>
34 bool operator()(const range_seg_t& lhs, const KeyType& rhs) const {
35 auto lhs_size = lhs.end - lhs.start;
36 auto rhs_size = rhs.end - rhs.start;
37 if (lhs_size < rhs_size) {
38 return true;
39 } else if (lhs_size > rhs_size) {
40 return false;
41 } else {
42 return lhs.start < rhs.start;
43 }
44 }
45 };
46 boost::intrusive::avl_set_member_hook<> size_hook;
47 };
48
49 class AvlAllocator final : public Allocator {
50 public:
51 AvlAllocator(CephContext* cct, int64_t device_size, int64_t block_size,
52 const std::string& name);
53 int64_t allocate(
54 uint64_t want,
55 uint64_t unit,
56 uint64_t max_alloc_size,
57 int64_t hint,
58 PExtentVector *extents) final;
59 void release(const interval_set<uint64_t>& release_set) final;
60 uint64_t get_free() final;
61 double get_fragmentation() final;
62
63 void dump() final;
64 void dump(std::function<void(uint64_t offset, uint64_t length)> notify) final;
65 void init_add_free(uint64_t offset, uint64_t length) final;
66 void init_rm_free(uint64_t offset, uint64_t length) final;
67 void shutdown() final;
68
69 private:
70 template<class Tree>
71 uint64_t _block_picker(const Tree& t, uint64_t *cursor, uint64_t size,
72 uint64_t align);
73 void _add_to_tree(uint64_t start, uint64_t size);
74 void _remove_from_tree(uint64_t start, uint64_t size);
75 int _allocate(
76 uint64_t size,
77 uint64_t unit,
78 uint64_t *offset,
79 uint64_t *length);
80
81 using range_tree_t =
82 boost::intrusive::avl_set<
83 range_seg_t,
84 boost::intrusive::compare<range_seg_t::before_t>,
85 boost::intrusive::member_hook<
86 range_seg_t,
87 boost::intrusive::avl_set_member_hook<>,
88 &range_seg_t::offset_hook>>;
89 range_tree_t range_tree; ///< main range tree
90 /*
91 * The range_size_tree should always contain the
92 * same number of segments as the range_tree.
93 * The only difference is that the range_size_tree
94 * is ordered by segment sizes.
95 */
96 using range_size_tree_t =
97 boost::intrusive::avl_multiset<
98 range_seg_t,
99 boost::intrusive::compare<range_seg_t::shorter_t>,
100 boost::intrusive::member_hook<
101 range_seg_t,
102 boost::intrusive::avl_set_member_hook<>,
103 &range_seg_t::size_hook>>;
104 range_size_tree_t range_size_tree;
105
106 const int64_t num_total; ///< device size
107 const uint64_t block_size; ///< block size
108 uint64_t num_free = 0; ///< total bytes in freelist
109
110 /*
111 * This value defines the number of elements in the ms_lbas array.
112 * The value of 64 was chosen as it covers all power of 2 buckets
113 * up to UINT64_MAX.
114 * This is the equivalent of highest-bit of UINT64_MAX.
115 */
116 static constexpr unsigned MAX_LBAS = 64;
117 uint64_t lbas[MAX_LBAS] = {0};
118
119 /*
120 * Minimum size which forces the dynamic allocator to change
121 * it's allocation strategy. Once the allocator cannot satisfy
122 * an allocation of this size then it switches to using more
123 * aggressive strategy (i.e search by size rather than offset).
124 */
125 uint64_t range_size_alloc_threshold = 0;
126 /*
127 * The minimum free space, in percent, which must be available
128 * in allocator to continue allocations in a first-fit fashion.
129 * Once the allocator's free space drops below this level we dynamically
130 * switch to using best-fit allocations.
131 */
132 int range_size_alloc_free_pct = 0;
133
134 CephContext* cct;
135 std::mutex lock;
136 };