]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "Allocator.h" | |
5 | #include "StupidAllocator.h" | |
a8e16298 | 6 | #include "BitmapAllocator.h" |
9f95a23c | 7 | #include "AvlAllocator.h" |
20effc67 | 8 | #include "BtreeAllocator.h" |
e306af50 | 9 | #include "HybridAllocator.h" |
f67539c2 TL |
10 | #ifdef HAVE_LIBZBD |
11 | #include "ZonedAllocator.h" | |
12 | #endif | |
7c673cae | 13 | #include "common/debug.h" |
eafe8130 | 14 | #include "common/admin_socket.h" |
7c673cae FG |
15 | #define dout_subsys ceph_subsys_bluestore |
16 | ||
f67539c2 TL |
17 | using std::string; |
18 | using std::to_string; | |
19 | ||
20 | using ceph::bufferlist; | |
21 | using ceph::Formatter; | |
22 | ||
eafe8130 TL |
23 | class Allocator::SocketHook : public AdminSocketHook { |
24 | Allocator *alloc; | |
25 | ||
e306af50 | 26 | friend class Allocator; |
eafe8130 TL |
27 | std::string name; |
28 | public: | |
20effc67 | 29 | SocketHook(Allocator *alloc, std::string_view _name) : |
9f95a23c | 30 | alloc(alloc), name(_name) |
eafe8130 TL |
31 | { |
32 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
33 | if (name.empty()) { | |
34 | name = to_string((uintptr_t)this); | |
35 | } | |
36 | if (admin_socket) { | |
9f95a23c TL |
37 | int r = admin_socket->register_command( |
38 | ("bluestore allocator dump " + name).c_str(), | |
39 | this, | |
40 | "dump allocator free regions"); | |
eafe8130 TL |
41 | if (r != 0) |
42 | alloc = nullptr; //some collision, disable | |
43 | if (alloc) { | |
9f95a23c TL |
44 | r = admin_socket->register_command( |
45 | ("bluestore allocator score " + name).c_str(), | |
46 | this, | |
47 | "give score on allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)"); | |
48 | ceph_assert(r == 0); | |
49 | r = admin_socket->register_command( | |
50 | ("bluestore allocator fragmentation " + name).c_str(), | |
51 | this, | |
52 | "give allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)"); | |
eafe8130 TL |
53 | ceph_assert(r == 0); |
54 | } | |
55 | } | |
56 | } | |
57 | ~SocketHook() | |
58 | { | |
59 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
60 | if (admin_socket && alloc) { | |
9f95a23c | 61 | admin_socket->unregister_commands(this); |
eafe8130 TL |
62 | } |
63 | } | |
64 | ||
9f95a23c TL |
65 | int call(std::string_view command, |
66 | const cmdmap_t& cmdmap, | |
39ae355f | 67 | const bufferlist&, |
9f95a23c TL |
68 | Formatter *f, |
69 | std::ostream& ss, | |
70 | bufferlist& out) override { | |
71 | int r = 0; | |
eafe8130 | 72 | if (command == "bluestore allocator dump " + name) { |
f67539c2 TL |
73 | f->open_object_section("allocator_dump"); |
74 | f->dump_unsigned("capacity", alloc->get_capacity()); | |
75 | f->dump_unsigned("alloc_unit", alloc->get_block_size()); | |
76 | f->dump_string("alloc_type", alloc->get_type()); | |
77 | f->dump_string("alloc_name", name); | |
78 | ||
79 | f->open_array_section("extents"); | |
eafe8130 TL |
80 | auto iterated_allocation = [&](size_t off, size_t len) { |
81 | ceph_assert(len > 0); | |
82 | f->open_object_section("free"); | |
83 | char off_hex[30]; | |
84 | char len_hex[30]; | |
20effc67 TL |
85 | snprintf(off_hex, sizeof(off_hex) - 1, "0x%zx", off); |
86 | snprintf(len_hex, sizeof(len_hex) - 1, "0x%zx", len); | |
eafe8130 TL |
87 | f->dump_string("offset", off_hex); |
88 | f->dump_string("length", len_hex); | |
89 | f->close_section(); | |
90 | }; | |
91 | alloc->dump(iterated_allocation); | |
eafe8130 | 92 | f->close_section(); |
f67539c2 | 93 | f->close_section(); |
eafe8130 | 94 | } else if (command == "bluestore allocator score " + name) { |
eafe8130 TL |
95 | f->open_object_section("fragmentation_score"); |
96 | f->dump_float("fragmentation_rating", alloc->get_fragmentation_score()); | |
97 | f->close_section(); | |
9f95a23c TL |
98 | } else if (command == "bluestore allocator fragmentation " + name) { |
99 | f->open_object_section("fragmentation"); | |
100 | f->dump_float("fragmentation_rating", alloc->get_fragmentation()); | |
101 | f->close_section(); | |
eafe8130 TL |
102 | } else { |
103 | ss << "Invalid command" << std::endl; | |
9f95a23c | 104 | r = -ENOSYS; |
eafe8130 | 105 | } |
eafe8130 TL |
106 | return r; |
107 | } | |
108 | ||
109 | }; | |
20effc67 | 110 | Allocator::Allocator(std::string_view name, |
f67539c2 TL |
111 | int64_t _capacity, |
112 | int64_t _block_size) | |
20effc67 TL |
113 | : device_size(_capacity), |
114 | block_size(_block_size) | |
eafe8130 TL |
115 | { |
116 | asok_hook = new SocketHook(this, name); | |
117 | } | |
118 | ||
119 | ||
120 | Allocator::~Allocator() | |
121 | { | |
122 | delete asok_hook; | |
123 | } | |
124 | ||
e306af50 TL |
125 | const string& Allocator::get_name() const { |
126 | return asok_hook->name; | |
127 | } | |
eafe8130 | 128 | |
20effc67 TL |
129 | Allocator *Allocator::create( |
130 | CephContext* cct, | |
131 | std::string_view type, | |
132 | int64_t size, | |
133 | int64_t block_size, | |
134 | int64_t zone_size, | |
135 | int64_t first_sequential_zone, | |
136 | std::string_view name) | |
7c673cae | 137 | { |
eafe8130 | 138 | Allocator* alloc = nullptr; |
7c673cae | 139 | if (type == "stupid") { |
20effc67 | 140 | alloc = new StupidAllocator(cct, size, block_size, name); |
7c673cae | 141 | } else if (type == "bitmap") { |
eafe8130 | 142 | alloc = new BitmapAllocator(cct, size, block_size, name); |
9f95a23c TL |
143 | } else if (type == "avl") { |
144 | return new AvlAllocator(cct, size, block_size, name); | |
20effc67 TL |
145 | } else if (type == "btree") { |
146 | return new BtreeAllocator(cct, size, block_size, name); | |
e306af50 TL |
147 | } else if (type == "hybrid") { |
148 | return new HybridAllocator(cct, size, block_size, | |
149 | cct->_conf.get_val<uint64_t>("bluestore_hybrid_alloc_mem_cap"), | |
150 | name); | |
f67539c2 TL |
151 | #ifdef HAVE_LIBZBD |
152 | } else if (type == "zoned") { | |
20effc67 TL |
153 | return new ZonedAllocator(cct, size, block_size, zone_size, first_sequential_zone, |
154 | name); | |
f67539c2 | 155 | #endif |
7c673cae | 156 | } |
eafe8130 TL |
157 | if (alloc == nullptr) { |
158 | lderr(cct) << "Allocator::" << __func__ << " unknown alloc type " | |
7c673cae | 159 | << type << dendl; |
eafe8130 TL |
160 | } |
161 | return alloc; | |
7c673cae | 162 | } |
a8e16298 TL |
163 | |
164 | void Allocator::release(const PExtentVector& release_vec) | |
165 | { | |
166 | interval_set<uint64_t> release_set; | |
167 | for (auto e : release_vec) { | |
168 | release_set.insert(e.offset, e.length); | |
169 | } | |
170 | release(release_set); | |
171 | } | |
eafe8130 TL |
172 | |
173 | /** | |
174 | * Gives fragmentation a numeric value. | |
175 | * | |
176 | * Following algorithm applies value to each existing free unallocated block. | |
177 | * Value of single block is a multiply of size and per-byte-value. | |
178 | * Per-byte-value is greater for larger blocks. | |
179 | * Assume block size X has value per-byte p; then block size 2*X will have per-byte value 1.1*p. | |
180 | * | |
181 | * This could be expressed in logarithms, but for speed this is interpolated inside ranges. | |
182 | * [1] [2..3] [4..7] [8..15] ... | |
183 | * ^ ^ ^ ^ | |
184 | * 1.1 1.1^2 1.1^3 1.1^4 ... | |
185 | * | |
186 | * Final score is obtained by proportion between score that would have been obtained | |
187 | * in condition of absolute fragmentation and score in no fragmentation at all. | |
188 | */ | |
189 | double Allocator::get_fragmentation_score() | |
190 | { | |
191 | // this value represents how much worth is 2X bytes in one chunk then in X + X bytes | |
192 | static const double double_size_worth = 1.1 ; | |
193 | std::vector<double> scales{1}; | |
194 | double score_sum = 0; | |
195 | size_t sum = 0; | |
196 | ||
197 | auto get_score = [&](size_t v) -> double { | |
198 | size_t sc = sizeof(v) * 8 - clz(v) - 1; //assign to grade depending on log2(len) | |
199 | while (scales.size() <= sc + 1) { | |
200 | //unlikely expand scales vector | |
201 | scales.push_back(scales[scales.size() - 1] * double_size_worth); | |
202 | } | |
203 | ||
204 | size_t sc_shifted = size_t(1) << sc; | |
205 | double x = double(v - sc_shifted) / sc_shifted; //x is <0,1) in its scale grade | |
206 | // linear extrapolation in its scale grade | |
207 | double score = (sc_shifted ) * scales[sc] * (1-x) + | |
208 | (sc_shifted * 2) * scales[sc+1] * x; | |
209 | return score; | |
210 | }; | |
211 | ||
212 | auto iterated_allocation = [&](size_t off, size_t len) { | |
213 | ceph_assert(len > 0); | |
214 | score_sum += get_score(len); | |
215 | sum += len; | |
216 | }; | |
217 | dump(iterated_allocation); | |
218 | ||
219 | ||
220 | double ideal = get_score(sum); | |
221 | double terrible = sum * get_score(1); | |
222 | return (ideal - score_sum) / (ideal - terrible); | |
223 | } |