#include <seastar/core/cacheline.hh>
#include <seastar/core/memory.hh>
#include <seastar/core/reactor.hh>
+#include <seastar/core/print.hh>
#include <seastar/util/alloc_failure_injector.hh>
+#include <seastar/util/std-compat.hh>
#include <iostream>
namespace seastar {
+void* internal::allocate_aligned_buffer_impl(size_t size, size_t align) {
+ void *ret;
+ auto r = posix_memalign(&ret, align, size);
+ if (r == ENOMEM) {
+ throw std::bad_alloc();
+ } else if (r == EINVAL) {
+ throw std::runtime_error(format("Invalid alignment of {:d}; allocating {:d} bytes", align, size));
+ } else {
+ assert(r == 0);
+ return ret;
+ }
+}
+
namespace memory {
static thread_local int abort_on_alloc_failure_suppressed = 0;
--abort_on_alloc_failure_suppressed;
}
+static compat::polymorphic_allocator<char> static_malloc_allocator{compat::pmr_get_default_resource()};;
+compat::polymorphic_allocator<char>* malloc_allocator{&static_malloc_allocator};
+
}
}
seastar::logger seastar_memory_logger("seastar_memory");
-static allocation_site_ptr get_allocation_site() __attribute__((unused));
+[[gnu::unused]]
+static allocation_site_ptr get_allocation_site();
static void on_allocation_failure(size_t size);
page_list free_spans[nr_span_lists]; // contains aligned spans with span_size == 2^idx
small_pool_array small_pools;
alignas(seastar::cache_line_size) std::atomic<cross_cpu_free_item*> xcpu_freelist;
- alignas(seastar::cache_line_size) std::vector<physical_address> virt_to_phys_map;
static std::atomic<unsigned> cpu_id_gen;
static cpu_pages* all_cpus[max_cpus];
union asu {
bool is_initialized() const;
bool initialize();
- reclaiming_result run_reclaimers(reclaimer_scope);
+ reclaiming_result run_reclaimers(reclaimer_scope, size_t pages_to_reclaim);
void schedule_reclaim();
void set_reclaim_hook(std::function<void (std::function<void ()>)> hook);
void set_min_free_pages(size_t pages);
void resize(size_t new_size, allocate_system_memory_fn alloc_sys_mem);
void do_resize(size_t new_size, allocate_system_memory_fn alloc_sys_mem);
void replace_memory_backing(allocate_system_memory_fn alloc_sys_mem);
- void init_virt_to_phys_map();
void check_large_allocation(size_t size);
void warn_large_allocation(size_t size);
memory::memory_layout memory_layout();
- translation translate(const void* addr, size_t size);
~cpu_pages();
};
std::atomic<unsigned> cpu_pages::cpu_id_gen;
cpu_pages* cpu_pages::all_cpus[max_cpus];
+#ifdef SEASTAR_HEAPPROF
+
void set_heap_profiling_enabled(bool enable) {
bool is_enabled = cpu_mem.collect_backtrace;
if (enable) {
cpu_mem.collect_backtrace = enable;
}
+static thread_local int64_t scoped_heap_profiling_embed_count = 0;
+
+scoped_heap_profiling::scoped_heap_profiling() noexcept {
+ ++scoped_heap_profiling_embed_count;
+ set_heap_profiling_enabled(true);
+}
+
+scoped_heap_profiling::~scoped_heap_profiling() {
+ if (!--scoped_heap_profiling_embed_count) {
+ set_heap_profiling_enabled(false);
+ }
+}
+
+#else
+
+void set_heap_profiling_enabled(bool enable) {
+ seastar_logger.warn("Seastar compiled without heap profiling support, heap profiler not supported;"
+ " compile with the Seastar_HEAP_PROFILING=ON CMake option to add heap profiling support");
+}
+
+scoped_heap_profiling::scoped_heap_profiling() noexcept {
+ set_heap_profiling_enabled(true); // let it print the warning
+}
+
+scoped_heap_profiling::~scoped_heap_profiling() {
+}
+
+#endif
+
// Smallest index i such that all spans stored in the index are >= pages.
static inline
unsigned index_of(unsigned pages) {
if (span) {
return span;
}
- if (run_reclaimers(reclaimer_scope::sync) == reclaiming_result::reclaimed_nothing) {
+ if (run_reclaimers(reclaimer_scope::sync, n_pages) == reclaiming_result::reclaimed_nothing) {
return nullptr;
}
}
void cpu_pages::maybe_reclaim() {
if (nr_free_pages < current_min_free_pages) {
drain_cross_cpu_freelist();
- run_reclaimers(reclaimer_scope::sync);
+ if (nr_free_pages < current_min_free_pages) {
+ run_reclaimers(reclaimer_scope::sync, current_min_free_pages - nr_free_pages);
+ }
if (nr_free_pages < current_min_free_pages) {
schedule_reclaim();
}
void
cpu_pages::warn_large_allocation(size_t size) {
++g_large_allocs;
- seastar_memory_logger.warn("oversized allocation: {} bytes, please report: at {}", size, current_backtrace());
+ seastar_memory_logger.warn("oversized allocation: {} bytes. This is non-fatal, but could lead to latency and/or fragmentation issues. Please report: at {}", size, current_backtrace());
large_allocation_warning_threshold *= 1.618; // prevent spam
}
std::memcpy(old_mem, relocated_old_mem.get(), bytes);
}
-void cpu_pages::init_virt_to_phys_map() {
- auto nr_entries = nr_pages / (huge_page_size / page_size);
- virt_to_phys_map.resize(nr_entries);
- auto fd = file_desc::open("/proc/self/pagemap", O_RDONLY | O_CLOEXEC);
- for (size_t i = 0; i != nr_entries; ++i) {
- uint64_t entry = 0;
- auto phys = std::numeric_limits<physical_address>::max();
- auto pfn = reinterpret_cast<uintptr_t>(mem() + i * huge_page_size) / page_size;
- fd.pread(&entry, 8, pfn * 8);
- assert(entry & 0x8000'0000'0000'0000);
- phys = (entry & 0x007f'ffff'ffff'ffff) << page_bits;
- virt_to_phys_map[i] = phys;
- }
-}
-
-translation
-cpu_pages::translate(const void* addr, size_t size) {
- auto a = reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(mem());
- auto pfn = a / huge_page_size;
- if (pfn >= virt_to_phys_map.size()) {
- return {};
- }
- auto phys = virt_to_phys_map[pfn];
- if (phys == std::numeric_limits<physical_address>::max()) {
- return {};
- }
- auto translation_size = align_up(a + 1, huge_page_size) - a;
- size = std::min(size, translation_size);
- phys += a & (huge_page_size - 1);
- return translation{phys, size};
-}
-
void cpu_pages::do_resize(size_t new_size, allocate_system_memory_fn alloc_sys_mem) {
auto new_pages = new_size / page_size;
if (new_pages <= nr_pages) {
}
}
-reclaiming_result cpu_pages::run_reclaimers(reclaimer_scope scope) {
- auto target = std::max(nr_free_pages + 1, min_free_pages);
+reclaiming_result cpu_pages::run_reclaimers(reclaimer_scope scope, size_t n_pages) {
+ auto target = std::max<size_t>(nr_free_pages + n_pages, min_free_pages);
reclaiming_result result = reclaiming_result::reclaimed_nothing;
while (nr_free_pages < target) {
bool made_progress = false;
++g_reclaims;
for (auto&& r : reclaimers) {
if (r->scope() >= scope) {
- made_progress |= r->do_reclaim() == reclaiming_result::reclaimed_something;
+ made_progress |= r->do_reclaim((target - nr_free_pages) * page_size) == reclaiming_result::reclaimed_something;
}
}
if (!made_progress) {
reclaim_hook([this] {
if (nr_free_pages < min_free_pages) {
try {
- run_reclaimers(reclaimer_scope::async);
+ run_reclaimers(reclaimer_scope::async, min_free_pages - nr_free_pages);
} catch (...) {
current_min_free_pages = min_free_pages;
throw;
return;
}
}
- _pages_in_use += span_size;
auto span = cpu_mem.to_page(data);
+ span_size = span->span_size;
+ _pages_in_use += span_size;
for (unsigned i = 0; i < span_size; ++i) {
span[i].offset_in_span = i;
span[i].pool = this;
return cpu_pages::all_cpus[object_cpu_id(ptr)]->object_size(ptr);
}
+// Mark as cold so that GCC8+ can move to .text.unlikely.
+[[gnu::cold]]
+static void init_cpu_mem_ptr(cpu_pages*& cpu_mem_ptr) {
+ cpu_mem_ptr = &cpu_mem;
+};
+
[[gnu::always_inline]]
static inline cpu_pages& get_cpu_mem()
{
// whether the object has already been constructed.
static thread_local cpu_pages* cpu_mem_ptr;
if (__builtin_expect(!bool(cpu_mem_ptr), false)) {
- // Mark as cold so that GCC8+ can move this part of the function
- // to .text.unlikely.
- [&] () [[gnu::cold]] {
- cpu_mem_ptr = &cpu_mem;
- }();
+ init_cpu_mem_ptr(cpu_mem_ptr);
}
return *cpu_mem_ptr;
}
cpu_mem.set_reclaim_hook(hook);
}
-reclaimer::reclaimer(reclaim_fn reclaim, reclaimer_scope scope)
+reclaimer::reclaimer(std::function<reclaiming_result ()> reclaim, reclaimer_scope scope)
+ : reclaimer([reclaim = std::move(reclaim)] (request) {
+ return reclaim();
+ }, scope) {
+}
+
+reclaimer::reclaimer(std::function<reclaiming_result (request)> reclaim, reclaimer_scope scope)
: _reclaim(std::move(reclaim))
, _scope(scope) {
cpu_mem.reclaimers.push_back(this);
#endif
pos += x.bytes;
}
- if (hugetlbfs_path) {
- cpu_mem.init_virt_to_phys_map();
- }
}
statistics stats() {
return cpu_mem.drain_cross_cpu_freelist();
}
-translation
-translate(const void* addr, size_t size) {
- auto cpu_id = object_cpu_id(addr);
- if (cpu_id >= max_cpus) {
- return {};
- }
- auto cp = cpu_pages::all_cpus[cpu_id];
- if (!cp) {
- return {};
- }
- return cp->translate(addr, size);
-}
-
memory_layout get_memory_layout() {
return cpu_mem.memory_layout();
}
extern "C"
[[gnu::visibility("default")]]
-[[gnu::externally_visible]]
+[[gnu::used]]
void* malloc(size_t n) throw () {
if (try_trigger_error_injector()) {
return nullptr;
extern "C"
[[gnu::alias("malloc")]]
[[gnu::visibility("default")]]
+[[gnu::malloc]]
+[[gnu::alloc_size(1)]]
+#ifndef __clang__
+[[gnu::leaf]]
+#endif
void* __libc_malloc(size_t n) throw ();
extern "C"
[[gnu::visibility("default")]]
-[[gnu::externally_visible]]
+[[gnu::used]]
void free(void* ptr) {
if (ptr) {
seastar::memory::free(ptr);
extern "C"
[[gnu::alias("free")]]
[[gnu::visibility("default")]]
+#ifndef __clang__
+[[gnu::leaf]]
+#endif
void __libc_free(void* obj) throw ();
extern "C"
extern "C"
[[gnu::alias("calloc")]]
[[gnu::visibility("default")]]
+[[gnu::alloc_size(1, 2)]]
+[[gnu::malloc]]
+#ifndef __clang__
+[[gnu::leaf]]
+#endif
void* __libc_calloc(size_t n, size_t m) throw ();
extern "C"
extern "C"
[[gnu::alias("realloc")]]
[[gnu::visibility("default")]]
+[[gnu::alloc_size(2)]]
+#ifndef __clang__
+[[gnu::leaf]]
+#endif
void* __libc_realloc(void* obj, size_t size) throw ();
extern "C"
[[gnu::visibility("default")]]
-[[gnu::externally_visible]]
-int posix_memalign(void** ptr, size_t align, size_t size) {
+[[gnu::used]]
+#ifndef __clang__
+[[gnu::leaf]]
+#endif
+[[gnu::nonnull(1)]]
+int posix_memalign(void** ptr, size_t align, size_t size) throw () {
if (try_trigger_error_injector()) {
return ENOMEM;
}
extern "C"
[[gnu::alias("posix_memalign")]]
[[gnu::visibility("default")]]
+#ifndef __clang__
+[[gnu::leaf]]
+#endif
+[[gnu::nonnull(1)]]
int __libc_posix_memalign(void** ptr, size_t align, size_t size) throw ();
extern "C"
[[gnu::visibility("default")]]
-void* memalign(size_t align, size_t size) {
+[[gnu::malloc]]
+#if defined(__GLIBC__) && __GLIBC_PREREQ(2, 30)
+[[gnu::alloc_size(2)]]
+#endif
+void* memalign(size_t align, size_t size) throw () {
if (try_trigger_error_injector()) {
return nullptr;
}
extern "C"
[[gnu::visibility("default")]]
-void *aligned_alloc(size_t align, size_t size) {
+void *aligned_alloc(size_t align, size_t size) throw () {
if (try_trigger_error_injector()) {
return nullptr;
}
extern "C"
[[gnu::alias("memalign")]]
[[gnu::visibility("default")]]
-void* __libc_memalign(size_t align, size_t size);
+[[gnu::malloc]]
+#if defined(__GLIBC__) && __GLIBC_PREREQ(2, 30)
+[[gnu::alloc_size(2)]]
+#endif
+void* __libc_memalign(size_t align, size_t size) throw ();
extern "C"
[[gnu::visibility("default")]]
-void cfree(void* obj) {
+void cfree(void* obj) throw () {
return ::free(obj);
}
extern "C"
[[gnu::alias("cfree")]]
[[gnu::visibility("default")]]
-void __libc_cfree(void* obj);
+void __libc_cfree(void* obj) throw ();
extern "C"
[[gnu::visibility("default")]]
seastar_logger.warn("Seastar compiled with default allocator, heap profiler not supported");
}
+scoped_heap_profiling::scoped_heap_profiling() noexcept {
+ set_heap_profiling_enabled(true); // let it print the warning
+}
+
+scoped_heap_profiling::~scoped_heap_profiling() {
+}
+
void enable_abort_on_allocation_failure() {
seastar_logger.warn("Seastar compiled with default allocator, will not abort on bad_alloc");
}
-reclaimer::reclaimer(reclaim_fn reclaim, reclaimer_scope) {
+reclaimer::reclaimer(std::function<reclaiming_result ()> reclaim, reclaimer_scope) {
+}
+
+reclaimer::reclaimer(std::function<reclaiming_result (request)> reclaim, reclaimer_scope) {
}
reclaimer::~reclaimer() {
return false;
}
-translation
-translate(const void* addr, size_t size) {
- return {};
-}
-
memory_layout get_memory_layout() {
throw std::runtime_error("get_memory_layout() not supported");
}
/// \endcond
}
-