X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fsrc%2Fos%2Fbluestore%2FZonedAllocator.cc;h=0a535e361ce7891a031c3eea2dba1fdba6330a3b;hb=20effc670b57271cb089376d6d0800990e5218d5;hp=0ea278c37dad866d1d490c46d3304a4562a3de60;hpb=a71831dadd1e1f3e0fa70405511f65cc33db0498;p=ceph.git diff --git a/ceph/src/os/bluestore/ZonedAllocator.cc b/ceph/src/os/bluestore/ZonedAllocator.cc index 0ea278c37..0a535e361 100644 --- a/ceph/src/os/bluestore/ZonedAllocator.cc +++ b/ceph/src/os/bluestore/ZonedAllocator.cc @@ -16,160 +16,225 @@ #define dout_context cct #define dout_subsys ceph_subsys_bluestore #undef dout_prefix -#define dout_prefix *_dout << "ZonedAllocator " << this << " " +#define dout_prefix *_dout << "ZonedAllocator(" << this << ") " << __func__ << " " ZonedAllocator::ZonedAllocator(CephContext* cct, int64_t size, - int64_t block_size, - const std::string& name) - : Allocator(name, size, block_size), + int64_t blk_size, + int64_t _zone_size, + int64_t _first_sequential_zone, + std::string_view name) + : Allocator(name, size, blk_size), cct(cct), - num_free(0), size(size), - // To avoid interface changes, we piggyback zone size and the first - // sequential zone number onto the first 32 bits of 64-bit |block_size|. - // The last 32 bits of |block_size| is holding the actual block size. - block_size((block_size & 0x00000000ffffffff)), - zone_size(((block_size & 0x0000ffff00000000) >> 32) * 1024 * 1024), - starting_zone_num((block_size & 0xffff000000000000) >> 48), - num_zones(size / zone_size) { - ldout(cct, 10) << __func__ << " size 0x" << std::hex << size - << " zone size 0x" << zone_size << std::dec - << " number of zones " << num_zones - << " first sequential zone " << starting_zone_num + conventional_size(_first_sequential_zone * _zone_size), + sequential_size(size - conventional_size), + num_sequential_free(0), + block_size(blk_size), + zone_size(_zone_size), + first_seq_zone_num(_first_sequential_zone), + starting_zone_num(first_seq_zone_num), + num_zones(size / zone_size) +{ + ldout(cct, 10) << " size 0x" << std::hex << size + << ", zone size 0x" << zone_size << std::dec + << ", number of zones 0x" << num_zones + << ", first sequential zone 0x" << starting_zone_num + << ", sequential size 0x" << sequential_size + << std::dec << dendl; ceph_assert(size % zone_size == 0); + + zone_states.resize(num_zones); } -ZonedAllocator::~ZonedAllocator() {} +ZonedAllocator::~ZonedAllocator() +{ +} int64_t ZonedAllocator::allocate( uint64_t want_size, uint64_t alloc_unit, uint64_t max_alloc_size, int64_t hint, - PExtentVector *extents) { + PExtentVector *extents) +{ std::lock_guard l(lock); ceph_assert(want_size % 4096 == 0); - ldout(cct, 10) << __func__ << " trying to allocate " - << std::hex << want_size << dendl; + ldout(cct, 10) << " trying to allocate 0x" + << std::hex << want_size << std::dec << dendl; + uint64_t left = num_zones - first_seq_zone_num; uint64_t zone_num = starting_zone_num; - for ( ; zone_num < num_zones; ++zone_num) { - if (fits(want_size, zone_num)) { - break; + for ( ; left > 0; ++zone_num, --left) { + if (zone_num == num_zones) { + zone_num = first_seq_zone_num; } - ldout(cct, 10) << __func__ << " skipping zone " << zone_num - << " because there is not enough space: " - << " want_size = " << want_size - << " available = " << get_remaining_space(zone_num) - << dendl; + if (zone_num == cleaning_zone) { + ldout(cct, 10) << " skipping zone 0x" << std::hex << zone_num + << " because we are cleaning it" << std::dec << dendl; + continue; + } + if (!fits(want_size, zone_num)) { + ldout(cct, 10) << " skipping zone 0x" << std::hex << zone_num + << " because there is not enough space: " + << " want_size = 0x" << want_size + << " available = 0x" << get_remaining_space(zone_num) + << std::dec + << dendl; + continue; + } + break; } - if (zone_num == num_zones) { - ldout(cct, 10) << __func__ << " failed to allocate" << dendl; + if (left == 0) { + ldout(cct, 10) << " failed to allocate" << dendl; return -ENOSPC; } uint64_t offset = get_offset(zone_num); - ldout(cct, 10) << __func__ << " advancing zone " << std::hex - << zone_num << " write pointer from " << offset - << " to " << offset + want_size << dendl; + ldout(cct, 10) << " moving zone 0x" << std::hex + << zone_num << " write pointer from 0x" << offset + << " -> 0x" << offset + want_size + << std::dec << dendl; - advance_write_pointer(zone_num, want_size); + increment_write_pointer(zone_num, want_size); + num_sequential_free -= want_size; if (get_remaining_space(zone_num) == 0) { starting_zone_num = zone_num + 1; } - ldout(cct, 10) << __func__ << std::hex << " zone " << zone_num - << " offset is now " << get_write_pointer(zone_num) << dendl; - - ldout(cct, 10) << __func__ << " allocated " << std::hex << want_size - << " bytes at offset " << offset - << " located at zone " << zone_num - << " and zone offset " << offset % zone_size << dendl; + ldout(cct, 10) << " allocated 0x" << std::hex << offset << "~" << want_size + << " from zone 0x" << zone_num + << " and zone offset 0x" << (offset % zone_size) + << std::dec << dendl; extents->emplace_back(bluestore_pextent_t(offset, want_size)); return want_size; } -void ZonedAllocator::release(const interval_set& release_set) { +void ZonedAllocator::release(const interval_set& release_set) +{ std::lock_guard l(lock); + for (auto p = cbegin(release_set); p != cend(release_set); ++p) { + auto offset = p.get_start(); + auto length = p.get_len(); + uint64_t zone_num = offset / zone_size; + ldout(cct, 10) << " 0x" << std::hex << offset << "~" << length + << " from zone 0x" << zone_num << std::dec << dendl; + uint64_t num_dead = std::min(zone_size - offset % zone_size, length); + for ( ; length; ++zone_num) { + increment_num_dead_bytes(zone_num, num_dead); + length -= num_dead; + num_dead = std::min(zone_size, length); + } + } } -uint64_t ZonedAllocator::get_free() { - return num_free; +uint64_t ZonedAllocator::get_free() +{ + return num_sequential_free; } -void ZonedAllocator::dump() { +void ZonedAllocator::dump() +{ std::lock_guard l(lock); } void ZonedAllocator::dump(std::function notify) { + uint64_t length)> notify) +{ std::lock_guard l(lock); } -// This just increments |num_free|. The actual free space is added by -// set_zone_states, as it updates the write pointer for each zone. -void ZonedAllocator::init_add_free(uint64_t offset, uint64_t length) { - ldout(cct, 40) << __func__ << " " << std::hex - << offset << "~" << length << dendl; - - num_free += length; -} - -void ZonedAllocator::init_rm_free(uint64_t offset, uint64_t length) { +void ZonedAllocator::init_from_zone_pointers( + std::vector &&_zone_states) +{ + // this is called once, based on the device's zone pointers std::lock_guard l(lock); - ldout(cct, 40) << __func__ << " 0x" << std::hex - << offset << "~" << length << dendl; - - num_free -= length; - ceph_assert(num_free >= 0); - - uint64_t zone_num = offset / zone_size; - uint64_t write_pointer = offset % zone_size; - uint64_t remaining_space = get_remaining_space(zone_num); - - ceph_assert(get_write_pointer(zone_num) == write_pointer); - ceph_assert(remaining_space <= length); - advance_write_pointer(zone_num, remaining_space); - - ldout(cct, 40) << __func__ << " set zone 0x" << std::hex - << zone_num << " write pointer to 0x" << zone_size << dendl; - - length -= remaining_space; - ceph_assert(length % zone_size == 0); - - for ( ; length; length -= zone_size) { - advance_write_pointer(++zone_num, zone_size); - ldout(cct, 40) << __func__ << " set zone 0x" << std::hex - << zone_num << " write pointer to 0x" << zone_size << dendl; + ldout(cct, 10) << dendl; + zone_states = std::move(_zone_states); + num_sequential_free = 0; + for (size_t i = first_seq_zone_num; i < num_zones; ++i) { + num_sequential_free += zone_size - (zone_states[i].write_pointer % zone_size); } + ldout(cct, 10) << "free 0x" << std::hex << num_sequential_free + << " / 0x" << sequential_size << std::dec + << dendl; } -bool ZonedAllocator::zoned_get_zones_to_clean(std::deque *zones_to_clean) { - // TODO: make 0.25 tunable - if (static_cast(num_free) / size > 0.25) { - return false; +int64_t ZonedAllocator::pick_zone_to_clean(float min_score, uint64_t min_saved) +{ + std::lock_guard l(lock); + int32_t best = -1; + float best_score = 0.0; + for (size_t i = first_seq_zone_num; i < num_zones; ++i) { + // value (score) = benefit / cost + // benefit = how much net free space we'll get (dead bytes) + // cost = how many bytes we'll have to rewrite (live bytes) + // avoid divide by zero on a zone with no live bytes + float score = + (float)zone_states[i].num_dead_bytes / + (float)(zone_states[i].get_num_live_bytes() + 1); + if (score > 0) { + ldout(cct, 20) << " zone 0x" << std::hex << i + << " dead 0x" << zone_states[i].num_dead_bytes + << " score " << score + << dendl; + } + if (zone_states[i].num_dead_bytes < min_saved) { + continue; + } + if (best < 0 || score > best_score) { + best = i; + best_score = score; + } } - { - std::lock_guard l(lock); - // TODO: populate |zones_to_clean| with the numbers of zones that should be - // cleaned. + if (best_score >= min_score) { + ldout(cct, 10) << " zone 0x" << std::hex << best << " with score " << best_score + << ": 0x" << zone_states[best].num_dead_bytes + << " dead and 0x" + << zone_states[best].write_pointer - zone_states[best].num_dead_bytes + << " live bytes" << std::dec << dendl; + } else if (best > 0) { + ldout(cct, 10) << " zone 0x" << std::hex << best << " with score " << best_score + << ": 0x" << zone_states[best].num_dead_bytes + << " dead and 0x" + << zone_states[best].write_pointer - zone_states[best].num_dead_bytes + << " live bytes" << std::dec + << " but below min_score " << min_score + << dendl; + best = -1; + } else { + ldout(cct, 10) << " no zones found that are good cleaning candidates" << dendl; } - return true; + return best; +} + +void ZonedAllocator::reset_zone(uint32_t zone) +{ + num_sequential_free += zone_states[zone].write_pointer; + zone_states[zone].reset(); } -void ZonedAllocator::zoned_set_zone_states(std::vector &&_zone_states) { +bool ZonedAllocator::low_on_space(void) +{ std::lock_guard l(lock); - ldout(cct, 10) << __func__ << dendl; - zone_states = std::move(_zone_states); + double free_ratio = static_cast(num_sequential_free) / sequential_size; + + ldout(cct, 10) << " free 0x" << std::hex << num_sequential_free + << "/ 0x" << sequential_size << std::dec + << ", free ratio is " << free_ratio << dendl; + ceph_assert(num_sequential_free <= (int64_t)sequential_size); + + // TODO: make 0.25 tunable + return free_ratio <= 0.25; } -void ZonedAllocator::shutdown() { - ldout(cct, 1) << __func__ << dendl; +void ZonedAllocator::shutdown() +{ + ldout(cct, 1) << dendl; }