]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/ZonedAllocator.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / os / bluestore / ZonedAllocator.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 //
5 // A simple allocator that just hands out space from the next empty zone. This
6 // is temporary, just to get the simplest append-only write workload to work.
7 //
8 // Copyright (C) 2020 Abutalib Aghayev
9 //
10
11 #include "ZonedAllocator.h"
12 #include "bluestore_types.h"
13 #include "zoned_types.h"
14 #include "common/debug.h"
15
16 #define dout_context cct
17 #define dout_subsys ceph_subsys_bluestore
18 #undef dout_prefix
19 #define dout_prefix *_dout << "ZonedAllocator(" << this << ") " << __func__ << " "
20
21 ZonedAllocator::ZonedAllocator(CephContext* cct,
22 int64_t size,
23 int64_t blk_size,
24 int64_t _zone_size,
25 int64_t _first_sequential_zone,
26 std::string_view name)
27 : Allocator(name, size, blk_size),
28 cct(cct),
29 size(size),
30 conventional_size(_first_sequential_zone * _zone_size),
31 sequential_size(size - conventional_size),
32 num_sequential_free(0),
33 block_size(blk_size),
34 zone_size(_zone_size),
35 first_seq_zone_num(_first_sequential_zone),
36 starting_zone_num(first_seq_zone_num),
37 num_zones(size / zone_size)
38 {
39 ldout(cct, 10) << " size 0x" << std::hex << size
40 << ", zone size 0x" << zone_size << std::dec
41 << ", number of zones 0x" << num_zones
42 << ", first sequential zone 0x" << starting_zone_num
43 << ", sequential size 0x" << sequential_size
44 << std::dec
45 << dendl;
46 ceph_assert(size % zone_size == 0);
47
48 zone_states.resize(num_zones);
49 }
50
51 ZonedAllocator::~ZonedAllocator()
52 {
53 }
54
55 int64_t ZonedAllocator::allocate(
56 uint64_t want_size,
57 uint64_t alloc_unit,
58 uint64_t max_alloc_size,
59 int64_t hint,
60 PExtentVector *extents)
61 {
62 std::lock_guard l(lock);
63
64 ceph_assert(want_size % 4096 == 0);
65
66 ldout(cct, 10) << " trying to allocate 0x"
67 << std::hex << want_size << std::dec << dendl;
68
69 uint64_t left = num_zones - first_seq_zone_num;
70 uint64_t zone_num = starting_zone_num;
71 for ( ; left > 0; ++zone_num, --left) {
72 if (zone_num == num_zones) {
73 zone_num = first_seq_zone_num;
74 }
75 if (zone_num == cleaning_zone) {
76 ldout(cct, 10) << " skipping zone 0x" << std::hex << zone_num
77 << " because we are cleaning it" << std::dec << dendl;
78 continue;
79 }
80 if (!fits(want_size, zone_num)) {
81 ldout(cct, 10) << " skipping zone 0x" << std::hex << zone_num
82 << " because there is not enough space: "
83 << " want_size = 0x" << want_size
84 << " available = 0x" << get_remaining_space(zone_num)
85 << std::dec
86 << dendl;
87 continue;
88 }
89 break;
90 }
91
92 if (left == 0) {
93 ldout(cct, 10) << " failed to allocate" << dendl;
94 return -ENOSPC;
95 }
96
97 uint64_t offset = get_offset(zone_num);
98
99 ldout(cct, 10) << " moving zone 0x" << std::hex
100 << zone_num << " write pointer from 0x" << offset
101 << " -> 0x" << offset + want_size
102 << std::dec << dendl;
103
104 increment_write_pointer(zone_num, want_size);
105 num_sequential_free -= want_size;
106 if (get_remaining_space(zone_num) == 0) {
107 starting_zone_num = zone_num + 1;
108 }
109
110 ldout(cct, 10) << " allocated 0x" << std::hex << offset << "~" << want_size
111 << " from zone 0x" << zone_num
112 << " and zone offset 0x" << (offset % zone_size)
113 << std::dec << dendl;
114
115 extents->emplace_back(bluestore_pextent_t(offset, want_size));
116 return want_size;
117 }
118
119 void ZonedAllocator::release(const interval_set<uint64_t>& release_set)
120 {
121 std::lock_guard l(lock);
122 for (auto p = cbegin(release_set); p != cend(release_set); ++p) {
123 auto offset = p.get_start();
124 auto length = p.get_len();
125 uint64_t zone_num = offset / zone_size;
126 ldout(cct, 10) << " 0x" << std::hex << offset << "~" << length
127 << " from zone 0x" << zone_num << std::dec << dendl;
128 uint64_t num_dead = std::min(zone_size - offset % zone_size, length);
129 for ( ; length; ++zone_num) {
130 increment_num_dead_bytes(zone_num, num_dead);
131 length -= num_dead;
132 num_dead = std::min(zone_size, length);
133 }
134 }
135 }
136
137 uint64_t ZonedAllocator::get_free()
138 {
139 return num_sequential_free;
140 }
141
142 void ZonedAllocator::dump()
143 {
144 std::lock_guard l(lock);
145 }
146
147 void ZonedAllocator::dump(std::function<void(uint64_t offset,
148 uint64_t length)> notify)
149 {
150 std::lock_guard l(lock);
151 }
152
153 void ZonedAllocator::init_from_zone_pointers(
154 std::vector<zone_state_t> &&_zone_states)
155 {
156 // this is called once, based on the device's zone pointers
157 std::lock_guard l(lock);
158 ldout(cct, 10) << dendl;
159 zone_states = std::move(_zone_states);
160 num_sequential_free = 0;
161 for (size_t i = first_seq_zone_num; i < num_zones; ++i) {
162 num_sequential_free += zone_size - (zone_states[i].write_pointer % zone_size);
163 }
164 ldout(cct, 10) << "free 0x" << std::hex << num_sequential_free
165 << " / 0x" << sequential_size << std::dec
166 << dendl;
167 }
168
169 int64_t ZonedAllocator::pick_zone_to_clean(float min_score, uint64_t min_saved)
170 {
171 std::lock_guard l(lock);
172 int32_t best = -1;
173 float best_score = 0.0;
174 for (size_t i = first_seq_zone_num; i < num_zones; ++i) {
175 // value (score) = benefit / cost
176 // benefit = how much net free space we'll get (dead bytes)
177 // cost = how many bytes we'll have to rewrite (live bytes)
178 // avoid divide by zero on a zone with no live bytes
179 float score =
180 (float)zone_states[i].num_dead_bytes /
181 (float)(zone_states[i].get_num_live_bytes() + 1);
182 if (score > 0) {
183 ldout(cct, 20) << " zone 0x" << std::hex << i
184 << " dead 0x" << zone_states[i].num_dead_bytes
185 << " score " << score
186 << dendl;
187 }
188 if (zone_states[i].num_dead_bytes < min_saved) {
189 continue;
190 }
191 if (best < 0 || score > best_score) {
192 best = i;
193 best_score = score;
194 }
195 }
196 if (best_score >= min_score) {
197 ldout(cct, 10) << " zone 0x" << std::hex << best << " with score " << best_score
198 << ": 0x" << zone_states[best].num_dead_bytes
199 << " dead and 0x"
200 << zone_states[best].write_pointer - zone_states[best].num_dead_bytes
201 << " live bytes" << std::dec << dendl;
202 } else if (best > 0) {
203 ldout(cct, 10) << " zone 0x" << std::hex << best << " with score " << best_score
204 << ": 0x" << zone_states[best].num_dead_bytes
205 << " dead and 0x"
206 << zone_states[best].write_pointer - zone_states[best].num_dead_bytes
207 << " live bytes" << std::dec
208 << " but below min_score " << min_score
209 << dendl;
210 best = -1;
211 } else {
212 ldout(cct, 10) << " no zones found that are good cleaning candidates" << dendl;
213 }
214 return best;
215 }
216
217 void ZonedAllocator::reset_zone(uint32_t zone)
218 {
219 num_sequential_free += zone_states[zone].write_pointer;
220 zone_states[zone].reset();
221 }
222
223 bool ZonedAllocator::low_on_space(void)
224 {
225 std::lock_guard l(lock);
226 double free_ratio = static_cast<double>(num_sequential_free) / sequential_size;
227
228 ldout(cct, 10) << " free 0x" << std::hex << num_sequential_free
229 << "/ 0x" << sequential_size << std::dec
230 << ", free ratio is " << free_ratio << dendl;
231 ceph_assert(num_sequential_free <= (int64_t)sequential_size);
232
233 // TODO: make 0.25 tunable
234 return free_ratio <= 0.25;
235 }
236
237 void ZonedAllocator::shutdown()
238 {
239 ldout(cct, 1) << dendl;
240 }