]>
git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/ZonedAllocator.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
5 // A simple allocator that just hands out space from the next empty zone. This
6 // is temporary, just to get the simplest append-only write workload to work.
8 // Copyright (C) 2020 Abutalib Aghayev
11 #include "ZonedAllocator.h"
12 #include "bluestore_types.h"
13 #include "zoned_types.h"
14 #include "common/debug.h"
16 #define dout_context cct
17 #define dout_subsys ceph_subsys_bluestore
19 #define dout_prefix *_dout << "ZonedAllocator(" << this << ") " << __func__ << " "
21 ZonedAllocator::ZonedAllocator(CephContext
* cct
,
25 int64_t _first_sequential_zone
,
26 std::string_view name
)
27 : Allocator(name
, size
, blk_size
),
30 conventional_size(_first_sequential_zone
* _zone_size
),
31 sequential_size(size
- conventional_size
),
32 num_sequential_free(0),
34 zone_size(_zone_size
),
35 first_seq_zone_num(_first_sequential_zone
),
36 starting_zone_num(first_seq_zone_num
),
37 num_zones(size
/ zone_size
)
39 ldout(cct
, 10) << " size 0x" << std::hex
<< size
40 << ", zone size 0x" << zone_size
<< std::dec
41 << ", number of zones 0x" << num_zones
42 << ", first sequential zone 0x" << starting_zone_num
43 << ", sequential size 0x" << sequential_size
46 ceph_assert(size
% zone_size
== 0);
48 zone_states
.resize(num_zones
);
51 ZonedAllocator::~ZonedAllocator()
55 int64_t ZonedAllocator::allocate(
58 uint64_t max_alloc_size
,
60 PExtentVector
*extents
)
62 std::lock_guard
l(lock
);
64 ceph_assert(want_size
% 4096 == 0);
66 ldout(cct
, 10) << " trying to allocate 0x"
67 << std::hex
<< want_size
<< std::dec
<< dendl
;
69 uint64_t left
= num_zones
- first_seq_zone_num
;
70 uint64_t zone_num
= starting_zone_num
;
71 for ( ; left
> 0; ++zone_num
, --left
) {
72 if (zone_num
== num_zones
) {
73 zone_num
= first_seq_zone_num
;
75 if (zone_num
== cleaning_zone
) {
76 ldout(cct
, 10) << " skipping zone 0x" << std::hex
<< zone_num
77 << " because we are cleaning it" << std::dec
<< dendl
;
80 if (!fits(want_size
, zone_num
)) {
81 ldout(cct
, 10) << " skipping zone 0x" << std::hex
<< zone_num
82 << " because there is not enough space: "
83 << " want_size = 0x" << want_size
84 << " available = 0x" << get_remaining_space(zone_num
)
93 ldout(cct
, 10) << " failed to allocate" << dendl
;
97 uint64_t offset
= get_offset(zone_num
);
99 ldout(cct
, 10) << " moving zone 0x" << std::hex
100 << zone_num
<< " write pointer from 0x" << offset
101 << " -> 0x" << offset
+ want_size
102 << std::dec
<< dendl
;
104 increment_write_pointer(zone_num
, want_size
);
105 num_sequential_free
-= want_size
;
106 if (get_remaining_space(zone_num
) == 0) {
107 starting_zone_num
= zone_num
+ 1;
110 ldout(cct
, 10) << " allocated 0x" << std::hex
<< offset
<< "~" << want_size
111 << " from zone 0x" << zone_num
112 << " and zone offset 0x" << (offset
% zone_size
)
113 << std::dec
<< dendl
;
115 extents
->emplace_back(bluestore_pextent_t(offset
, want_size
));
119 void ZonedAllocator::release(const interval_set
<uint64_t>& release_set
)
121 std::lock_guard
l(lock
);
122 for (auto p
= cbegin(release_set
); p
!= cend(release_set
); ++p
) {
123 auto offset
= p
.get_start();
124 auto length
= p
.get_len();
125 uint64_t zone_num
= offset
/ zone_size
;
126 ldout(cct
, 10) << " 0x" << std::hex
<< offset
<< "~" << length
127 << " from zone 0x" << zone_num
<< std::dec
<< dendl
;
128 uint64_t num_dead
= std::min(zone_size
- offset
% zone_size
, length
);
129 for ( ; length
; ++zone_num
) {
130 increment_num_dead_bytes(zone_num
, num_dead
);
132 num_dead
= std::min(zone_size
, length
);
137 uint64_t ZonedAllocator::get_free()
139 return num_sequential_free
;
142 void ZonedAllocator::dump()
144 std::lock_guard
l(lock
);
147 void ZonedAllocator::foreach(
148 std::function
<void(uint64_t offset
, uint64_t length
)> notify
)
150 std::lock_guard
l(lock
);
153 void ZonedAllocator::init_from_zone_pointers(
154 std::vector
<zone_state_t
> &&_zone_states
)
156 // this is called once, based on the device's zone pointers
157 std::lock_guard
l(lock
);
158 ldout(cct
, 10) << dendl
;
159 zone_states
= std::move(_zone_states
);
160 num_sequential_free
= 0;
161 for (size_t i
= first_seq_zone_num
; i
< num_zones
; ++i
) {
162 num_sequential_free
+= zone_size
- (zone_states
[i
].write_pointer
% zone_size
);
164 ldout(cct
, 10) << "free 0x" << std::hex
<< num_sequential_free
165 << " / 0x" << sequential_size
<< std::dec
169 int64_t ZonedAllocator::pick_zone_to_clean(float min_score
, uint64_t min_saved
)
171 std::lock_guard
l(lock
);
173 float best_score
= 0.0;
174 for (size_t i
= first_seq_zone_num
; i
< num_zones
; ++i
) {
175 // value (score) = benefit / cost
176 // benefit = how much net free space we'll get (dead bytes)
177 // cost = how many bytes we'll have to rewrite (live bytes)
178 // avoid divide by zero on a zone with no live bytes
180 (float)zone_states
[i
].num_dead_bytes
/
181 (float)(zone_states
[i
].get_num_live_bytes() + 1);
183 ldout(cct
, 20) << " zone 0x" << std::hex
<< i
184 << " dead 0x" << zone_states
[i
].num_dead_bytes
185 << " score " << score
188 if (zone_states
[i
].num_dead_bytes
< min_saved
) {
191 if (best
< 0 || score
> best_score
) {
196 if (best_score
>= min_score
) {
197 ldout(cct
, 10) << " zone 0x" << std::hex
<< best
<< " with score " << best_score
198 << ": 0x" << zone_states
[best
].num_dead_bytes
200 << zone_states
[best
].write_pointer
- zone_states
[best
].num_dead_bytes
201 << " live bytes" << std::dec
<< dendl
;
202 } else if (best
> 0) {
203 ldout(cct
, 10) << " zone 0x" << std::hex
<< best
<< " with score " << best_score
204 << ": 0x" << zone_states
[best
].num_dead_bytes
206 << zone_states
[best
].write_pointer
- zone_states
[best
].num_dead_bytes
207 << " live bytes" << std::dec
208 << " but below min_score " << min_score
212 ldout(cct
, 10) << " no zones found that are good cleaning candidates" << dendl
;
217 void ZonedAllocator::reset_zone(uint32_t zone
)
219 num_sequential_free
+= zone_states
[zone
].write_pointer
;
220 zone_states
[zone
].reset();
223 bool ZonedAllocator::low_on_space(void)
225 std::lock_guard
l(lock
);
226 double free_ratio
= static_cast<double>(num_sequential_free
) / sequential_size
;
228 ldout(cct
, 10) << " free 0x" << std::hex
<< num_sequential_free
229 << "/ 0x" << sequential_size
<< std::dec
230 << ", free ratio is " << free_ratio
<< dendl
;
231 ceph_assert(num_sequential_free
<= (int64_t)sequential_size
);
233 // TODO: make 0.25 tunable
234 return free_ratio
<= 0.25;
237 void ZonedAllocator::shutdown()
239 ldout(cct
, 1) << dendl
;