]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/memory/arena.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / memory / arena.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "memory/arena.h"
11 #ifndef OS_WIN
12 #include <sys/mman.h>
13 #endif
14 #include <algorithm>
15 #include "logging/logging.h"
16 #include "port/malloc.h"
17 #include "port/port.h"
18 #include "rocksdb/env.h"
19 #include "test_util/sync_point.h"
20
21 namespace ROCKSDB_NAMESPACE {
22
23 // MSVC complains that it is already defined since it is static in the header.
24 #ifndef _MSC_VER
25 const size_t Arena::kInlineSize;
26 #endif
27
28 const size_t Arena::kMinBlockSize = 4096;
29 const size_t Arena::kMaxBlockSize = 2u << 30;
30 static const int kAlignUnit = alignof(max_align_t);
31
32 size_t OptimizeBlockSize(size_t block_size) {
33 // Make sure block_size is in optimal range
34 block_size = std::max(Arena::kMinBlockSize, block_size);
35 block_size = std::min(Arena::kMaxBlockSize, block_size);
36
37 // make sure block_size is the multiple of kAlignUnit
38 if (block_size % kAlignUnit != 0) {
39 block_size = (1 + block_size / kAlignUnit) * kAlignUnit;
40 }
41
42 return block_size;
43 }
44
45 Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size)
46 : kBlockSize(OptimizeBlockSize(block_size)), tracker_(tracker) {
47 assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
48 kBlockSize % kAlignUnit == 0);
49 TEST_SYNC_POINT_CALLBACK("Arena::Arena:0", const_cast<size_t*>(&kBlockSize));
50 alloc_bytes_remaining_ = sizeof(inline_block_);
51 blocks_memory_ += alloc_bytes_remaining_;
52 aligned_alloc_ptr_ = inline_block_;
53 unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
54 #ifdef MAP_HUGETLB
55 hugetlb_size_ = huge_page_size;
56 if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
57 hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
58 }
59 #else
60 (void)huge_page_size;
61 #endif
62 if (tracker_ != nullptr) {
63 tracker_->Allocate(kInlineSize);
64 }
65 }
66
67 Arena::~Arena() {
68 if (tracker_ != nullptr) {
69 assert(tracker_->is_freed());
70 tracker_->FreeMem();
71 }
72 for (const auto& block : blocks_) {
73 delete[] block;
74 }
75
76 #ifdef MAP_HUGETLB
77 for (const auto& mmap_info : huge_blocks_) {
78 if (mmap_info.addr_ == nullptr) {
79 continue;
80 }
81 auto ret = munmap(mmap_info.addr_, mmap_info.length_);
82 if (ret != 0) {
83 // TODO(sdong): Better handling
84 }
85 }
86 #endif
87 }
88
89 char* Arena::AllocateFallback(size_t bytes, bool aligned) {
90 if (bytes > kBlockSize / 4) {
91 ++irregular_block_num;
92 // Object is more than a quarter of our block size. Allocate it separately
93 // to avoid wasting too much space in leftover bytes.
94 return AllocateNewBlock(bytes);
95 }
96
97 // We waste the remaining space in the current block.
98 size_t size = 0;
99 char* block_head = nullptr;
100 #ifdef MAP_HUGETLB
101 if (hugetlb_size_) {
102 size = hugetlb_size_;
103 block_head = AllocateFromHugePage(size);
104 }
105 #endif
106 if (!block_head) {
107 size = kBlockSize;
108 block_head = AllocateNewBlock(size);
109 }
110 alloc_bytes_remaining_ = size - bytes;
111
112 if (aligned) {
113 aligned_alloc_ptr_ = block_head + bytes;
114 unaligned_alloc_ptr_ = block_head + size;
115 return block_head;
116 } else {
117 aligned_alloc_ptr_ = block_head;
118 unaligned_alloc_ptr_ = block_head + size - bytes;
119 return unaligned_alloc_ptr_;
120 }
121 }
122
123 char* Arena::AllocateFromHugePage(size_t bytes) {
124 #ifdef MAP_HUGETLB
125 if (hugetlb_size_ == 0) {
126 return nullptr;
127 }
128 // Reserve space in `huge_blocks_` before calling `mmap`.
129 // Use `emplace_back()` instead of `reserve()` to let std::vector manage its
130 // own memory and do fewer reallocations.
131 //
132 // - If `emplace_back` throws, no memory leaks because we haven't called
133 // `mmap` yet.
134 // - If `mmap` throws, no memory leaks because the vector will be cleaned up
135 // via RAII.
136 huge_blocks_.emplace_back(nullptr /* addr */, 0 /* length */);
137
138 void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE),
139 (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), -1, 0);
140
141 if (addr == MAP_FAILED) {
142 return nullptr;
143 }
144 huge_blocks_.back() = MmapInfo(addr, bytes);
145 blocks_memory_ += bytes;
146 if (tracker_ != nullptr) {
147 tracker_->Allocate(bytes);
148 }
149 return reinterpret_cast<char*>(addr);
150 #else
151 (void)bytes;
152 return nullptr;
153 #endif
154 }
155
156 char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
157 Logger* logger) {
158 assert((kAlignUnit & (kAlignUnit - 1)) ==
159 0); // Pointer size should be a power of 2
160
161 #ifdef MAP_HUGETLB
162 if (huge_page_size > 0 && bytes > 0) {
163 // Allocate from a huge page TLB table.
164 assert(logger != nullptr); // logger need to be passed in.
165 size_t reserved_size =
166 ((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
167 assert(reserved_size >= bytes);
168
169 char* addr = AllocateFromHugePage(reserved_size);
170 if (addr == nullptr) {
171 ROCKS_LOG_WARN(logger,
172 "AllocateAligned fail to allocate huge TLB pages: %s",
173 strerror(errno));
174 // fail back to malloc
175 } else {
176 return addr;
177 }
178 }
179 #else
180 (void)huge_page_size;
181 (void)logger;
182 #endif
183
184 size_t current_mod =
185 reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
186 size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);
187 size_t needed = bytes + slop;
188 char* result;
189 if (needed <= alloc_bytes_remaining_) {
190 result = aligned_alloc_ptr_ + slop;
191 aligned_alloc_ptr_ += needed;
192 alloc_bytes_remaining_ -= needed;
193 } else {
194 // AllocateFallback always returns aligned memory
195 result = AllocateFallback(bytes, true /* aligned */);
196 }
197 assert((reinterpret_cast<uintptr_t>(result) & (kAlignUnit - 1)) == 0);
198 return result;
199 }
200
201 char* Arena::AllocateNewBlock(size_t block_bytes) {
202 // Reserve space in `blocks_` before allocating memory via new.
203 // Use `emplace_back()` instead of `reserve()` to let std::vector manage its
204 // own memory and do fewer reallocations.
205 //
206 // - If `emplace_back` throws, no memory leaks because we haven't called `new`
207 // yet.
208 // - If `new` throws, no memory leaks because the vector will be cleaned up
209 // via RAII.
210 blocks_.emplace_back(nullptr);
211
212 char* block = new char[block_bytes];
213 size_t allocated_size;
214 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
215 allocated_size = malloc_usable_size(block);
216 #ifndef NDEBUG
217 // It's hard to predict what malloc_usable_size() returns.
218 // A callback can allow users to change the costed size.
219 std::pair<size_t*, size_t*> pair(&allocated_size, &block_bytes);
220 TEST_SYNC_POINT_CALLBACK("Arena::AllocateNewBlock:0", &pair);
221 #endif // NDEBUG
222 #else
223 allocated_size = block_bytes;
224 #endif // ROCKSDB_MALLOC_USABLE_SIZE
225 blocks_memory_ += allocated_size;
226 if (tracker_ != nullptr) {
227 tracker_->Allocate(allocated_size);
228 }
229 blocks_.back() = block;
230 return block;
231 }
232
233 } // namespace ROCKSDB_NAMESPACE