]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/jemalloc_nodump_allocator.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / util / jemalloc_nodump_allocator.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #include "util/jemalloc_nodump_allocator.h"
7
8 #include <string>
9 #include <thread>
10
11 #include "port/likely.h"
12 #include "port/port.h"
13 #include "util/string_util.h"
14
15 namespace rocksdb {
16
17 #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
18
19 std::atomic<extent_alloc_t*> JemallocNodumpAllocator::original_alloc_{nullptr};
20
21 JemallocNodumpAllocator::JemallocNodumpAllocator(
22 JemallocAllocatorOptions& options,
23 std::unique_ptr<extent_hooks_t>&& arena_hooks, unsigned arena_index)
24 : options_(options),
25 arena_hooks_(std::move(arena_hooks)),
26 arena_index_(arena_index),
27 tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache) {}
28
29 int JemallocNodumpAllocator::GetThreadSpecificCache(size_t size) {
30 // We always enable tcache. The only corner case is when there are a ton of
31 // threads accessing with low frequency, then it could consume a lot of
32 // memory (may reach # threads * ~1MB) without bringing too much benefit.
33 if (options_.limit_tcache_size && (size <= options_.tcache_size_lower_bound ||
34 size > options_.tcache_size_upper_bound)) {
35 return MALLOCX_TCACHE_NONE;
36 }
37 unsigned* tcache_index = reinterpret_cast<unsigned*>(tcache_.Get());
38 if (UNLIKELY(tcache_index == nullptr)) {
39 // Instantiate tcache.
40 tcache_index = new unsigned(0);
41 size_t tcache_index_size = sizeof(unsigned);
42 int ret =
43 mallctl("tcache.create", tcache_index, &tcache_index_size, nullptr, 0);
44 if (ret != 0) {
45 // No good way to expose the error. Silently disable tcache.
46 delete tcache_index;
47 return MALLOCX_TCACHE_NONE;
48 }
49 tcache_.Reset(static_cast<void*>(tcache_index));
50 }
51 return MALLOCX_TCACHE(*tcache_index);
52 }
53
54 void* JemallocNodumpAllocator::Allocate(size_t size) {
55 int tcache_flag = GetThreadSpecificCache(size);
56 return mallocx(size, MALLOCX_ARENA(arena_index_) | tcache_flag);
57 }
58
59 void JemallocNodumpAllocator::Deallocate(void* p) {
60 // Obtain tcache.
61 size_t size = 0;
62 if (options_.limit_tcache_size) {
63 size = malloc_usable_size(p);
64 }
65 int tcache_flag = GetThreadSpecificCache(size);
66 // No need to pass arena index to dallocx(). Jemalloc will find arena index
67 // from its own metadata.
68 dallocx(p, tcache_flag);
69 }
70
71 void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr,
72 size_t size, size_t alignment, bool* zero,
73 bool* commit, unsigned arena_ind) {
74 extent_alloc_t* original_alloc =
75 original_alloc_.load(std::memory_order_relaxed);
76 assert(original_alloc != nullptr);
77 void* result = original_alloc(extent, new_addr, size, alignment, zero, commit,
78 arena_ind);
79 if (result != nullptr) {
80 int ret = madvise(result, size, MADV_DONTDUMP);
81 if (ret != 0) {
82 fprintf(
83 stderr,
84 "JemallocNodumpAllocator failed to set MADV_DONTDUMP, error code: %d",
85 ret);
86 assert(false);
87 }
88 }
89 return result;
90 }
91
92 Status JemallocNodumpAllocator::DestroyArena(unsigned arena_index) {
93 assert(arena_index != 0);
94 std::string key = "arena." + ToString(arena_index) + ".destroy";
95 int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0);
96 if (ret != 0) {
97 return Status::Incomplete("Failed to destroy jemalloc arena, error code: " +
98 ToString(ret));
99 }
100 return Status::OK();
101 }
102
103 void JemallocNodumpAllocator::DestroyThreadSpecificCache(void* ptr) {
104 assert(ptr != nullptr);
105 unsigned* tcache_index = static_cast<unsigned*>(ptr);
106 size_t tcache_index_size = sizeof(unsigned);
107 int ret __attribute__((__unused__)) =
108 mallctl("tcache.destroy", nullptr, 0, tcache_index, tcache_index_size);
109 // Silently ignore error.
110 assert(ret == 0);
111 delete tcache_index;
112 }
113
114 JemallocNodumpAllocator::~JemallocNodumpAllocator() {
115 // Destroy tcache before destroying arena.
116 autovector<void*> tcache_list;
117 tcache_.Scrape(&tcache_list, nullptr);
118 for (void* tcache_index : tcache_list) {
119 DestroyThreadSpecificCache(tcache_index);
120 }
121 // Destroy arena. Silently ignore error.
122 Status s __attribute__((__unused__)) = DestroyArena(arena_index_);
123 assert(s.ok());
124 }
125
126 size_t JemallocNodumpAllocator::UsableSize(void* p,
127 size_t /*allocation_size*/) const {
128 return malloc_usable_size(static_cast<void*>(p));
129 }
130 #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
131
132 Status NewJemallocNodumpAllocator(
133 JemallocAllocatorOptions& options,
134 std::shared_ptr<MemoryAllocator>* memory_allocator) {
135 *memory_allocator = nullptr;
136 Status unsupported = Status::NotSupported(
137 "JemallocNodumpAllocator only available with jemalloc version >= 5 "
138 "and MADV_DONTDUMP is available.");
139 #ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
140 (void)options;
141 return unsupported;
142 #else
143 if (!HasJemalloc()) {
144 return unsupported;
145 }
146 if (memory_allocator == nullptr) {
147 return Status::InvalidArgument("memory_allocator must be non-null.");
148 }
149 if (options.limit_tcache_size &&
150 options.tcache_size_lower_bound >= options.tcache_size_upper_bound) {
151 return Status::InvalidArgument(
152 "tcache_size_lower_bound larger or equal to tcache_size_upper_bound.");
153 }
154
155 // Create arena.
156 unsigned arena_index = 0;
157 size_t arena_index_size = sizeof(arena_index);
158 int ret =
159 mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0);
160 if (ret != 0) {
161 return Status::Incomplete("Failed to create jemalloc arena, error code: " +
162 ToString(ret));
163 }
164 assert(arena_index != 0);
165
166 // Read existing hooks.
167 std::string key = "arena." + ToString(arena_index) + ".extent_hooks";
168 extent_hooks_t* hooks;
169 size_t hooks_size = sizeof(hooks);
170 ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0);
171 if (ret != 0) {
172 JemallocNodumpAllocator::DestroyArena(arena_index);
173 return Status::Incomplete("Failed to read existing hooks, error code: " +
174 ToString(ret));
175 }
176
177 // Store existing alloc.
178 extent_alloc_t* original_alloc = hooks->alloc;
179 extent_alloc_t* expected = nullptr;
180 bool success =
181 JemallocNodumpAllocator::original_alloc_.compare_exchange_strong(
182 expected, original_alloc);
183 if (!success && original_alloc != expected) {
184 JemallocNodumpAllocator::DestroyArena(arena_index);
185 return Status::Incomplete("Original alloc conflict.");
186 }
187
188 // Set the custom hook.
189 std::unique_ptr<extent_hooks_t> new_hooks(new extent_hooks_t(*hooks));
190 new_hooks->alloc = &JemallocNodumpAllocator::Alloc;
191 extent_hooks_t* hooks_ptr = new_hooks.get();
192 ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr));
193 if (ret != 0) {
194 JemallocNodumpAllocator::DestroyArena(arena_index);
195 return Status::Incomplete("Failed to set custom hook, error code: " +
196 ToString(ret));
197 }
198
199 // Create cache allocator.
200 memory_allocator->reset(
201 new JemallocNodumpAllocator(options, std::move(new_hooks), arena_index));
202 return Status::OK();
203 #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
204 }
205
206 } // namespace rocksdb