]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "Allocator.h" | |
5 | #include "StupidAllocator.h" | |
a8e16298 | 6 | #include "BitmapAllocator.h" |
9f95a23c | 7 | #include "AvlAllocator.h" |
20effc67 | 8 | #include "BtreeAllocator.h" |
e306af50 | 9 | #include "HybridAllocator.h" |
f67539c2 TL |
10 | #ifdef HAVE_LIBZBD |
11 | #include "ZonedAllocator.h" | |
12 | #endif | |
7c673cae | 13 | #include "common/debug.h" |
eafe8130 | 14 | #include "common/admin_socket.h" |
7c673cae FG |
15 | #define dout_subsys ceph_subsys_bluestore |
16 | ||
f67539c2 TL |
17 | using std::string; |
18 | using std::to_string; | |
19 | ||
20 | using ceph::bufferlist; | |
21 | using ceph::Formatter; | |
22 | ||
eafe8130 TL |
23 | class Allocator::SocketHook : public AdminSocketHook { |
24 | Allocator *alloc; | |
25 | ||
e306af50 | 26 | friend class Allocator; |
eafe8130 TL |
27 | std::string name; |
28 | public: | |
20effc67 | 29 | SocketHook(Allocator *alloc, std::string_view _name) : |
9f95a23c | 30 | alloc(alloc), name(_name) |
eafe8130 TL |
31 | { |
32 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
33 | if (name.empty()) { | |
34 | name = to_string((uintptr_t)this); | |
35 | } | |
36 | if (admin_socket) { | |
9f95a23c TL |
37 | int r = admin_socket->register_command( |
38 | ("bluestore allocator dump " + name).c_str(), | |
39 | this, | |
40 | "dump allocator free regions"); | |
eafe8130 TL |
41 | if (r != 0) |
42 | alloc = nullptr; //some collision, disable | |
43 | if (alloc) { | |
9f95a23c TL |
44 | r = admin_socket->register_command( |
45 | ("bluestore allocator score " + name).c_str(), | |
46 | this, | |
47 | "give score on allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)"); | |
48 | ceph_assert(r == 0); | |
49 | r = admin_socket->register_command( | |
50 | ("bluestore allocator fragmentation " + name).c_str(), | |
51 | this, | |
52 | "give allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)"); | |
eafe8130 TL |
53 | ceph_assert(r == 0); |
54 | } | |
55 | } | |
56 | } | |
57 | ~SocketHook() | |
58 | { | |
59 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
60 | if (admin_socket && alloc) { | |
9f95a23c | 61 | admin_socket->unregister_commands(this); |
eafe8130 TL |
62 | } |
63 | } | |
64 | ||
9f95a23c TL |
65 | int call(std::string_view command, |
66 | const cmdmap_t& cmdmap, | |
67 | Formatter *f, | |
68 | std::ostream& ss, | |
69 | bufferlist& out) override { | |
70 | int r = 0; | |
eafe8130 | 71 | if (command == "bluestore allocator dump " + name) { |
f67539c2 TL |
72 | f->open_object_section("allocator_dump"); |
73 | f->dump_unsigned("capacity", alloc->get_capacity()); | |
74 | f->dump_unsigned("alloc_unit", alloc->get_block_size()); | |
75 | f->dump_string("alloc_type", alloc->get_type()); | |
76 | f->dump_string("alloc_name", name); | |
77 | ||
78 | f->open_array_section("extents"); | |
eafe8130 TL |
79 | auto iterated_allocation = [&](size_t off, size_t len) { |
80 | ceph_assert(len > 0); | |
81 | f->open_object_section("free"); | |
82 | char off_hex[30]; | |
83 | char len_hex[30]; | |
20effc67 TL |
84 | snprintf(off_hex, sizeof(off_hex) - 1, "0x%zx", off); |
85 | snprintf(len_hex, sizeof(len_hex) - 1, "0x%zx", len); | |
eafe8130 TL |
86 | f->dump_string("offset", off_hex); |
87 | f->dump_string("length", len_hex); | |
88 | f->close_section(); | |
89 | }; | |
90 | alloc->dump(iterated_allocation); | |
eafe8130 | 91 | f->close_section(); |
f67539c2 | 92 | f->close_section(); |
eafe8130 | 93 | } else if (command == "bluestore allocator score " + name) { |
eafe8130 TL |
94 | f->open_object_section("fragmentation_score"); |
95 | f->dump_float("fragmentation_rating", alloc->get_fragmentation_score()); | |
96 | f->close_section(); | |
9f95a23c TL |
97 | } else if (command == "bluestore allocator fragmentation " + name) { |
98 | f->open_object_section("fragmentation"); | |
99 | f->dump_float("fragmentation_rating", alloc->get_fragmentation()); | |
100 | f->close_section(); | |
eafe8130 TL |
101 | } else { |
102 | ss << "Invalid command" << std::endl; | |
9f95a23c | 103 | r = -ENOSYS; |
eafe8130 | 104 | } |
eafe8130 TL |
105 | return r; |
106 | } | |
107 | ||
108 | }; | |
20effc67 | 109 | Allocator::Allocator(std::string_view name, |
f67539c2 TL |
110 | int64_t _capacity, |
111 | int64_t _block_size) | |
20effc67 TL |
112 | : device_size(_capacity), |
113 | block_size(_block_size) | |
eafe8130 TL |
114 | { |
115 | asok_hook = new SocketHook(this, name); | |
116 | } | |
117 | ||
118 | ||
119 | Allocator::~Allocator() | |
120 | { | |
121 | delete asok_hook; | |
122 | } | |
123 | ||
e306af50 TL |
124 | const string& Allocator::get_name() const { |
125 | return asok_hook->name; | |
126 | } | |
eafe8130 | 127 | |
20effc67 TL |
128 | Allocator *Allocator::create( |
129 | CephContext* cct, | |
130 | std::string_view type, | |
131 | int64_t size, | |
132 | int64_t block_size, | |
133 | int64_t zone_size, | |
134 | int64_t first_sequential_zone, | |
135 | std::string_view name) | |
7c673cae | 136 | { |
eafe8130 | 137 | Allocator* alloc = nullptr; |
7c673cae | 138 | if (type == "stupid") { |
20effc67 | 139 | alloc = new StupidAllocator(cct, size, block_size, name); |
7c673cae | 140 | } else if (type == "bitmap") { |
eafe8130 | 141 | alloc = new BitmapAllocator(cct, size, block_size, name); |
9f95a23c TL |
142 | } else if (type == "avl") { |
143 | return new AvlAllocator(cct, size, block_size, name); | |
20effc67 TL |
144 | } else if (type == "btree") { |
145 | return new BtreeAllocator(cct, size, block_size, name); | |
e306af50 TL |
146 | } else if (type == "hybrid") { |
147 | return new HybridAllocator(cct, size, block_size, | |
148 | cct->_conf.get_val<uint64_t>("bluestore_hybrid_alloc_mem_cap"), | |
149 | name); | |
f67539c2 TL |
150 | #ifdef HAVE_LIBZBD |
151 | } else if (type == "zoned") { | |
20effc67 TL |
152 | return new ZonedAllocator(cct, size, block_size, zone_size, first_sequential_zone, |
153 | name); | |
f67539c2 | 154 | #endif |
7c673cae | 155 | } |
eafe8130 TL |
156 | if (alloc == nullptr) { |
157 | lderr(cct) << "Allocator::" << __func__ << " unknown alloc type " | |
7c673cae | 158 | << type << dendl; |
eafe8130 TL |
159 | } |
160 | return alloc; | |
7c673cae | 161 | } |
a8e16298 TL |
162 | |
163 | void Allocator::release(const PExtentVector& release_vec) | |
164 | { | |
165 | interval_set<uint64_t> release_set; | |
166 | for (auto e : release_vec) { | |
167 | release_set.insert(e.offset, e.length); | |
168 | } | |
169 | release(release_set); | |
170 | } | |
eafe8130 TL |
171 | |
172 | /** | |
173 | * Gives fragmentation a numeric value. | |
174 | * | |
175 | * Following algorithm applies value to each existing free unallocated block. | |
176 | * Value of single block is a multiply of size and per-byte-value. | |
177 | * Per-byte-value is greater for larger blocks. | |
178 | * Assume block size X has value per-byte p; then block size 2*X will have per-byte value 1.1*p. | |
179 | * | |
180 | * This could be expressed in logarithms, but for speed this is interpolated inside ranges. | |
181 | * [1] [2..3] [4..7] [8..15] ... | |
182 | * ^ ^ ^ ^ | |
183 | * 1.1 1.1^2 1.1^3 1.1^4 ... | |
184 | * | |
185 | * Final score is obtained by proportion between score that would have been obtained | |
186 | * in condition of absolute fragmentation and score in no fragmentation at all. | |
187 | */ | |
188 | double Allocator::get_fragmentation_score() | |
189 | { | |
190 | // this value represents how much worth is 2X bytes in one chunk then in X + X bytes | |
191 | static const double double_size_worth = 1.1 ; | |
192 | std::vector<double> scales{1}; | |
193 | double score_sum = 0; | |
194 | size_t sum = 0; | |
195 | ||
196 | auto get_score = [&](size_t v) -> double { | |
197 | size_t sc = sizeof(v) * 8 - clz(v) - 1; //assign to grade depending on log2(len) | |
198 | while (scales.size() <= sc + 1) { | |
199 | //unlikely expand scales vector | |
200 | scales.push_back(scales[scales.size() - 1] * double_size_worth); | |
201 | } | |
202 | ||
203 | size_t sc_shifted = size_t(1) << sc; | |
204 | double x = double(v - sc_shifted) / sc_shifted; //x is <0,1) in its scale grade | |
205 | // linear extrapolation in its scale grade | |
206 | double score = (sc_shifted ) * scales[sc] * (1-x) + | |
207 | (sc_shifted * 2) * scales[sc+1] * x; | |
208 | return score; | |
209 | }; | |
210 | ||
211 | auto iterated_allocation = [&](size_t off, size_t len) { | |
212 | ceph_assert(len > 0); | |
213 | score_sum += get_score(len); | |
214 | sum += len; | |
215 | }; | |
216 | dump(iterated_allocation); | |
217 | ||
218 | ||
219 | double ideal = get_score(sum); | |
220 | double terrible = sum * get_score(1); | |
221 | return (ideal - score_sum) / (ideal - terrible); | |
222 | } |