]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/Allocator.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / os / bluestore / Allocator.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "Allocator.h"
5#include "StupidAllocator.h"
a8e16298 6#include "BitmapAllocator.h"
9f95a23c 7#include "AvlAllocator.h"
20effc67 8#include "BtreeAllocator.h"
e306af50 9#include "HybridAllocator.h"
f67539c2
TL
10#ifdef HAVE_LIBZBD
11#include "ZonedAllocator.h"
12#endif
7c673cae 13#include "common/debug.h"
eafe8130 14#include "common/admin_socket.h"
7c673cae
FG
15#define dout_subsys ceph_subsys_bluestore
16
f67539c2
TL
17using std::string;
18using std::to_string;
19
20using ceph::bufferlist;
21using ceph::Formatter;
22
eafe8130
TL
23class Allocator::SocketHook : public AdminSocketHook {
24 Allocator *alloc;
25
e306af50 26 friend class Allocator;
eafe8130
TL
27 std::string name;
28public:
20effc67 29 SocketHook(Allocator *alloc, std::string_view _name) :
9f95a23c 30 alloc(alloc), name(_name)
eafe8130
TL
31 {
32 AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
33 if (name.empty()) {
34 name = to_string((uintptr_t)this);
35 }
36 if (admin_socket) {
9f95a23c
TL
37 int r = admin_socket->register_command(
38 ("bluestore allocator dump " + name).c_str(),
39 this,
40 "dump allocator free regions");
eafe8130
TL
41 if (r != 0)
42 alloc = nullptr; //some collision, disable
43 if (alloc) {
9f95a23c
TL
44 r = admin_socket->register_command(
45 ("bluestore allocator score " + name).c_str(),
46 this,
47 "give score on allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)");
48 ceph_assert(r == 0);
49 r = admin_socket->register_command(
50 ("bluestore allocator fragmentation " + name).c_str(),
51 this,
52 "give allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)");
eafe8130
TL
53 ceph_assert(r == 0);
54 }
55 }
56 }
57 ~SocketHook()
58 {
59 AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
60 if (admin_socket && alloc) {
9f95a23c 61 admin_socket->unregister_commands(this);
eafe8130
TL
62 }
63 }
64
9f95a23c
TL
65 int call(std::string_view command,
66 const cmdmap_t& cmdmap,
67 Formatter *f,
68 std::ostream& ss,
69 bufferlist& out) override {
70 int r = 0;
eafe8130 71 if (command == "bluestore allocator dump " + name) {
f67539c2
TL
72 f->open_object_section("allocator_dump");
73 f->dump_unsigned("capacity", alloc->get_capacity());
74 f->dump_unsigned("alloc_unit", alloc->get_block_size());
75 f->dump_string("alloc_type", alloc->get_type());
76 f->dump_string("alloc_name", name);
77
78 f->open_array_section("extents");
eafe8130
TL
79 auto iterated_allocation = [&](size_t off, size_t len) {
80 ceph_assert(len > 0);
81 f->open_object_section("free");
82 char off_hex[30];
83 char len_hex[30];
20effc67
TL
84 snprintf(off_hex, sizeof(off_hex) - 1, "0x%zx", off);
85 snprintf(len_hex, sizeof(len_hex) - 1, "0x%zx", len);
eafe8130
TL
86 f->dump_string("offset", off_hex);
87 f->dump_string("length", len_hex);
88 f->close_section();
89 };
90 alloc->dump(iterated_allocation);
eafe8130 91 f->close_section();
f67539c2 92 f->close_section();
eafe8130 93 } else if (command == "bluestore allocator score " + name) {
eafe8130
TL
94 f->open_object_section("fragmentation_score");
95 f->dump_float("fragmentation_rating", alloc->get_fragmentation_score());
96 f->close_section();
9f95a23c
TL
97 } else if (command == "bluestore allocator fragmentation " + name) {
98 f->open_object_section("fragmentation");
99 f->dump_float("fragmentation_rating", alloc->get_fragmentation());
100 f->close_section();
eafe8130
TL
101 } else {
102 ss << "Invalid command" << std::endl;
9f95a23c 103 r = -ENOSYS;
eafe8130 104 }
eafe8130
TL
105 return r;
106 }
107
108};
20effc67 109Allocator::Allocator(std::string_view name,
f67539c2
TL
110 int64_t _capacity,
111 int64_t _block_size)
20effc67
TL
112 : device_size(_capacity),
113 block_size(_block_size)
eafe8130
TL
114{
115 asok_hook = new SocketHook(this, name);
116}
117
118
119Allocator::~Allocator()
120{
121 delete asok_hook;
122}
123
e306af50
TL
124const string& Allocator::get_name() const {
125 return asok_hook->name;
126}
eafe8130 127
20effc67
TL
128Allocator *Allocator::create(
129 CephContext* cct,
130 std::string_view type,
131 int64_t size,
132 int64_t block_size,
133 int64_t zone_size,
134 int64_t first_sequential_zone,
135 std::string_view name)
7c673cae 136{
eafe8130 137 Allocator* alloc = nullptr;
7c673cae 138 if (type == "stupid") {
20effc67 139 alloc = new StupidAllocator(cct, size, block_size, name);
7c673cae 140 } else if (type == "bitmap") {
eafe8130 141 alloc = new BitmapAllocator(cct, size, block_size, name);
9f95a23c
TL
142 } else if (type == "avl") {
143 return new AvlAllocator(cct, size, block_size, name);
20effc67
TL
144 } else if (type == "btree") {
145 return new BtreeAllocator(cct, size, block_size, name);
e306af50
TL
146 } else if (type == "hybrid") {
147 return new HybridAllocator(cct, size, block_size,
148 cct->_conf.get_val<uint64_t>("bluestore_hybrid_alloc_mem_cap"),
149 name);
f67539c2
TL
150#ifdef HAVE_LIBZBD
151 } else if (type == "zoned") {
20effc67
TL
152 return new ZonedAllocator(cct, size, block_size, zone_size, first_sequential_zone,
153 name);
f67539c2 154#endif
7c673cae 155 }
eafe8130
TL
156 if (alloc == nullptr) {
157 lderr(cct) << "Allocator::" << __func__ << " unknown alloc type "
7c673cae 158 << type << dendl;
eafe8130
TL
159 }
160 return alloc;
7c673cae 161}
a8e16298
TL
162
163void Allocator::release(const PExtentVector& release_vec)
164{
165 interval_set<uint64_t> release_set;
166 for (auto e : release_vec) {
167 release_set.insert(e.offset, e.length);
168 }
169 release(release_set);
170}
eafe8130
TL
171
172/**
173 * Gives fragmentation a numeric value.
174 *
175 * Following algorithm applies value to each existing free unallocated block.
176 * Value of single block is a multiply of size and per-byte-value.
177 * Per-byte-value is greater for larger blocks.
178 * Assume block size X has value per-byte p; then block size 2*X will have per-byte value 1.1*p.
179 *
180 * This could be expressed in logarithms, but for speed this is interpolated inside ranges.
181 * [1] [2..3] [4..7] [8..15] ...
182 * ^ ^ ^ ^
183 * 1.1 1.1^2 1.1^3 1.1^4 ...
184 *
185 * Final score is obtained by proportion between score that would have been obtained
186 * in condition of absolute fragmentation and score in no fragmentation at all.
187 */
188double Allocator::get_fragmentation_score()
189{
190 // this value represents how much worth is 2X bytes in one chunk then in X + X bytes
191 static const double double_size_worth = 1.1 ;
192 std::vector<double> scales{1};
193 double score_sum = 0;
194 size_t sum = 0;
195
196 auto get_score = [&](size_t v) -> double {
197 size_t sc = sizeof(v) * 8 - clz(v) - 1; //assign to grade depending on log2(len)
198 while (scales.size() <= sc + 1) {
199 //unlikely expand scales vector
200 scales.push_back(scales[scales.size() - 1] * double_size_worth);
201 }
202
203 size_t sc_shifted = size_t(1) << sc;
204 double x = double(v - sc_shifted) / sc_shifted; //x is <0,1) in its scale grade
205 // linear extrapolation in its scale grade
206 double score = (sc_shifted ) * scales[sc] * (1-x) +
207 (sc_shifted * 2) * scales[sc+1] * x;
208 return score;
209 };
210
211 auto iterated_allocation = [&](size_t off, size_t len) {
212 ceph_assert(len > 0);
213 score_sum += get_score(len);
214 sum += len;
215 };
216 dump(iterated_allocation);
217
218
219 double ideal = get_score(sum);
220 double terrible = sum * get_score(1);
221 return (ideal - score_sum) / (ideal - terrible);
222}