]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "Allocator.h" | |
5 | #include "StupidAllocator.h" | |
a8e16298 | 6 | #include "BitmapAllocator.h" |
9f95a23c | 7 | #include "AvlAllocator.h" |
e306af50 | 8 | #include "HybridAllocator.h" |
f67539c2 TL |
9 | #ifdef HAVE_LIBZBD |
10 | #include "ZonedAllocator.h" | |
11 | #endif | |
7c673cae | 12 | #include "common/debug.h" |
eafe8130 | 13 | #include "common/admin_socket.h" |
7c673cae FG |
14 | #define dout_subsys ceph_subsys_bluestore |
15 | ||
f67539c2 TL |
16 | using std::string; |
17 | using std::to_string; | |
18 | ||
19 | using ceph::bufferlist; | |
20 | using ceph::Formatter; | |
21 | ||
eafe8130 TL |
22 | class Allocator::SocketHook : public AdminSocketHook { |
23 | Allocator *alloc; | |
24 | ||
e306af50 | 25 | friend class Allocator; |
eafe8130 TL |
26 | std::string name; |
27 | public: | |
9f95a23c TL |
28 | explicit SocketHook(Allocator *alloc, |
29 | const std::string& _name) : | |
30 | alloc(alloc), name(_name) | |
eafe8130 TL |
31 | { |
32 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
33 | if (name.empty()) { | |
34 | name = to_string((uintptr_t)this); | |
35 | } | |
36 | if (admin_socket) { | |
9f95a23c TL |
37 | int r = admin_socket->register_command( |
38 | ("bluestore allocator dump " + name).c_str(), | |
39 | this, | |
40 | "dump allocator free regions"); | |
eafe8130 TL |
41 | if (r != 0) |
42 | alloc = nullptr; //some collision, disable | |
43 | if (alloc) { | |
9f95a23c TL |
44 | r = admin_socket->register_command( |
45 | ("bluestore allocator score " + name).c_str(), | |
46 | this, | |
47 | "give score on allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)"); | |
48 | ceph_assert(r == 0); | |
49 | r = admin_socket->register_command( | |
50 | ("bluestore allocator fragmentation " + name).c_str(), | |
51 | this, | |
52 | "give allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)"); | |
eafe8130 TL |
53 | ceph_assert(r == 0); |
54 | } | |
55 | } | |
56 | } | |
57 | ~SocketHook() | |
58 | { | |
59 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
60 | if (admin_socket && alloc) { | |
9f95a23c | 61 | admin_socket->unregister_commands(this); |
eafe8130 TL |
62 | } |
63 | } | |
64 | ||
9f95a23c TL |
65 | int call(std::string_view command, |
66 | const cmdmap_t& cmdmap, | |
67 | Formatter *f, | |
68 | std::ostream& ss, | |
69 | bufferlist& out) override { | |
70 | int r = 0; | |
eafe8130 | 71 | if (command == "bluestore allocator dump " + name) { |
f67539c2 TL |
72 | f->open_object_section("allocator_dump"); |
73 | f->dump_unsigned("capacity", alloc->get_capacity()); | |
74 | f->dump_unsigned("alloc_unit", alloc->get_block_size()); | |
75 | f->dump_string("alloc_type", alloc->get_type()); | |
76 | f->dump_string("alloc_name", name); | |
77 | ||
78 | f->open_array_section("extents"); | |
eafe8130 TL |
79 | auto iterated_allocation = [&](size_t off, size_t len) { |
80 | ceph_assert(len > 0); | |
81 | f->open_object_section("free"); | |
82 | char off_hex[30]; | |
83 | char len_hex[30]; | |
84 | snprintf(off_hex, sizeof(off_hex) - 1, "0x%lx", off); | |
85 | snprintf(len_hex, sizeof(len_hex) - 1, "0x%lx", len); | |
86 | f->dump_string("offset", off_hex); | |
87 | f->dump_string("length", len_hex); | |
88 | f->close_section(); | |
89 | }; | |
90 | alloc->dump(iterated_allocation); | |
eafe8130 | 91 | f->close_section(); |
f67539c2 | 92 | f->close_section(); |
eafe8130 | 93 | } else if (command == "bluestore allocator score " + name) { |
eafe8130 TL |
94 | f->open_object_section("fragmentation_score"); |
95 | f->dump_float("fragmentation_rating", alloc->get_fragmentation_score()); | |
96 | f->close_section(); | |
9f95a23c TL |
97 | } else if (command == "bluestore allocator fragmentation " + name) { |
98 | f->open_object_section("fragmentation"); | |
99 | f->dump_float("fragmentation_rating", alloc->get_fragmentation()); | |
100 | f->close_section(); | |
eafe8130 TL |
101 | } else { |
102 | ss << "Invalid command" << std::endl; | |
9f95a23c | 103 | r = -ENOSYS; |
eafe8130 | 104 | } |
eafe8130 TL |
105 | return r; |
106 | } | |
107 | ||
108 | }; | |
f67539c2 TL |
109 | Allocator::Allocator(const std::string& name, |
110 | int64_t _capacity, | |
111 | int64_t _block_size) | |
112 | : capacity(_capacity), block_size(_block_size) | |
eafe8130 TL |
113 | { |
114 | asok_hook = new SocketHook(this, name); | |
115 | } | |
116 | ||
117 | ||
118 | Allocator::~Allocator() | |
119 | { | |
120 | delete asok_hook; | |
121 | } | |
122 | ||
e306af50 TL |
123 | const string& Allocator::get_name() const { |
124 | return asok_hook->name; | |
125 | } | |
eafe8130 | 126 | |
7c673cae | 127 | Allocator *Allocator::create(CephContext* cct, string type, |
eafe8130 | 128 | int64_t size, int64_t block_size, const std::string& name) |
7c673cae | 129 | { |
eafe8130 | 130 | Allocator* alloc = nullptr; |
7c673cae | 131 | if (type == "stupid") { |
f67539c2 | 132 | alloc = new StupidAllocator(cct, name, size, block_size); |
7c673cae | 133 | } else if (type == "bitmap") { |
eafe8130 | 134 | alloc = new BitmapAllocator(cct, size, block_size, name); |
9f95a23c TL |
135 | } else if (type == "avl") { |
136 | return new AvlAllocator(cct, size, block_size, name); | |
e306af50 TL |
137 | } else if (type == "hybrid") { |
138 | return new HybridAllocator(cct, size, block_size, | |
139 | cct->_conf.get_val<uint64_t>("bluestore_hybrid_alloc_mem_cap"), | |
140 | name); | |
f67539c2 TL |
141 | #ifdef HAVE_LIBZBD |
142 | } else if (type == "zoned") { | |
143 | return new ZonedAllocator(cct, size, block_size, name); | |
144 | #endif | |
7c673cae | 145 | } |
eafe8130 TL |
146 | if (alloc == nullptr) { |
147 | lderr(cct) << "Allocator::" << __func__ << " unknown alloc type " | |
7c673cae | 148 | << type << dendl; |
eafe8130 TL |
149 | } |
150 | return alloc; | |
7c673cae | 151 | } |
a8e16298 TL |
152 | |
153 | void Allocator::release(const PExtentVector& release_vec) | |
154 | { | |
155 | interval_set<uint64_t> release_set; | |
156 | for (auto e : release_vec) { | |
157 | release_set.insert(e.offset, e.length); | |
158 | } | |
159 | release(release_set); | |
160 | } | |
eafe8130 TL |
161 | |
162 | /** | |
163 | * Gives fragmentation a numeric value. | |
164 | * | |
165 | * Following algorithm applies value to each existing free unallocated block. | |
166 | * Value of single block is a multiply of size and per-byte-value. | |
167 | * Per-byte-value is greater for larger blocks. | |
168 | * Assume block size X has value per-byte p; then block size 2*X will have per-byte value 1.1*p. | |
169 | * | |
170 | * This could be expressed in logarithms, but for speed this is interpolated inside ranges. | |
171 | * [1] [2..3] [4..7] [8..15] ... | |
172 | * ^ ^ ^ ^ | |
173 | * 1.1 1.1^2 1.1^3 1.1^4 ... | |
174 | * | |
175 | * Final score is obtained by proportion between score that would have been obtained | |
176 | * in condition of absolute fragmentation and score in no fragmentation at all. | |
177 | */ | |
178 | double Allocator::get_fragmentation_score() | |
179 | { | |
180 | // this value represents how much worth is 2X bytes in one chunk then in X + X bytes | |
181 | static const double double_size_worth = 1.1 ; | |
182 | std::vector<double> scales{1}; | |
183 | double score_sum = 0; | |
184 | size_t sum = 0; | |
185 | ||
186 | auto get_score = [&](size_t v) -> double { | |
187 | size_t sc = sizeof(v) * 8 - clz(v) - 1; //assign to grade depending on log2(len) | |
188 | while (scales.size() <= sc + 1) { | |
189 | //unlikely expand scales vector | |
190 | scales.push_back(scales[scales.size() - 1] * double_size_worth); | |
191 | } | |
192 | ||
193 | size_t sc_shifted = size_t(1) << sc; | |
194 | double x = double(v - sc_shifted) / sc_shifted; //x is <0,1) in its scale grade | |
195 | // linear extrapolation in its scale grade | |
196 | double score = (sc_shifted ) * scales[sc] * (1-x) + | |
197 | (sc_shifted * 2) * scales[sc+1] * x; | |
198 | return score; | |
199 | }; | |
200 | ||
201 | auto iterated_allocation = [&](size_t off, size_t len) { | |
202 | ceph_assert(len > 0); | |
203 | score_sum += get_score(len); | |
204 | sum += len; | |
205 | }; | |
206 | dump(iterated_allocation); | |
207 | ||
208 | ||
209 | double ideal = get_score(sum); | |
210 | double terrible = sum * get_score(1); | |
211 | return (ideal - score_sum) / (ideal - terrible); | |
212 | } |