]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/util/arena.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / util / arena.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "util/arena.h"
11#ifdef ROCKSDB_MALLOC_USABLE_SIZE
12#ifdef OS_FREEBSD
13#include <malloc_np.h>
14#else
15#include <malloc.h>
16#endif
17#endif
18#ifndef OS_WIN
19#include <sys/mman.h>
20#endif
21#include <algorithm>
22#include "port/port.h"
23#include "rocksdb/env.h"
24#include "util/logging.h"
11fdf7f2 25#include "util/sync_point.h"
7c673cae
FG
26
27namespace rocksdb {
28
29// MSVC complains that it is already defined since it is static in the header.
30#ifndef _MSC_VER
31const size_t Arena::kInlineSize;
32#endif
33
34const size_t Arena::kMinBlockSize = 4096;
35const size_t Arena::kMaxBlockSize = 2u << 30;
11fdf7f2 36static const int kAlignUnit = alignof(max_align_t);
7c673cae
FG
37
38size_t OptimizeBlockSize(size_t block_size) {
39 // Make sure block_size is in optimal range
40 block_size = std::max(Arena::kMinBlockSize, block_size);
41 block_size = std::min(Arena::kMaxBlockSize, block_size);
42
43 // make sure block_size is the multiple of kAlignUnit
44 if (block_size % kAlignUnit != 0) {
45 block_size = (1 + block_size / kAlignUnit) * kAlignUnit;
46 }
47
48 return block_size;
49}
50
11fdf7f2
TL
51Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size)
52 : kBlockSize(OptimizeBlockSize(block_size)), tracker_(tracker) {
7c673cae
FG
53 assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
54 kBlockSize % kAlignUnit == 0);
11fdf7f2 55 TEST_SYNC_POINT_CALLBACK("Arena::Arena:0", const_cast<size_t*>(&kBlockSize));
7c673cae
FG
56 alloc_bytes_remaining_ = sizeof(inline_block_);
57 blocks_memory_ += alloc_bytes_remaining_;
58 aligned_alloc_ptr_ = inline_block_;
59 unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
60#ifdef MAP_HUGETLB
61 hugetlb_size_ = huge_page_size;
62 if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
63 hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
64 }
11fdf7f2
TL
65#else
66 (void)huge_page_size;
7c673cae 67#endif
11fdf7f2
TL
68 if (tracker_ != nullptr) {
69 tracker_->Allocate(kInlineSize);
70 }
7c673cae
FG
71}
72
73Arena::~Arena() {
11fdf7f2
TL
74 if (tracker_ != nullptr) {
75 assert(tracker_->is_freed());
76 tracker_->FreeMem();
77 }
7c673cae
FG
78 for (const auto& block : blocks_) {
79 delete[] block;
80 }
81
82#ifdef MAP_HUGETLB
83 for (const auto& mmap_info : huge_blocks_) {
11fdf7f2
TL
84 if (mmap_info.addr_ == nullptr) {
85 continue;
86 }
7c673cae
FG
87 auto ret = munmap(mmap_info.addr_, mmap_info.length_);
88 if (ret != 0) {
89 // TODO(sdong): Better handling
90 }
91 }
92#endif
93}
94
95char* Arena::AllocateFallback(size_t bytes, bool aligned) {
96 if (bytes > kBlockSize / 4) {
97 ++irregular_block_num;
98 // Object is more than a quarter of our block size. Allocate it separately
99 // to avoid wasting too much space in leftover bytes.
100 return AllocateNewBlock(bytes);
101 }
102
103 // We waste the remaining space in the current block.
104 size_t size = 0;
105 char* block_head = nullptr;
106#ifdef MAP_HUGETLB
107 if (hugetlb_size_) {
108 size = hugetlb_size_;
109 block_head = AllocateFromHugePage(size);
110 }
111#endif
112 if (!block_head) {
113 size = kBlockSize;
114 block_head = AllocateNewBlock(size);
115 }
116 alloc_bytes_remaining_ = size - bytes;
117
118 if (aligned) {
119 aligned_alloc_ptr_ = block_head + bytes;
120 unaligned_alloc_ptr_ = block_head + size;
121 return block_head;
122 } else {
123 aligned_alloc_ptr_ = block_head;
124 unaligned_alloc_ptr_ = block_head + size - bytes;
125 return unaligned_alloc_ptr_;
126 }
127}
128
129char* Arena::AllocateFromHugePage(size_t bytes) {
130#ifdef MAP_HUGETLB
131 if (hugetlb_size_ == 0) {
132 return nullptr;
133 }
11fdf7f2
TL
134 // Reserve space in `huge_blocks_` before calling `mmap`.
135 // Use `emplace_back()` instead of `reserve()` to let std::vector manage its
136 // own memory and do fewer reallocations.
137 //
138 // - If `emplace_back` throws, no memory leaks because we haven't called
139 // `mmap` yet.
140 // - If `mmap` throws, no memory leaks because the vector will be cleaned up
141 // via RAII.
142 huge_blocks_.emplace_back(nullptr /* addr */, 0 /* length */);
7c673cae
FG
143
144 void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE),
145 (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), -1, 0);
146
147 if (addr == MAP_FAILED) {
148 return nullptr;
149 }
11fdf7f2 150 huge_blocks_.back() = MmapInfo(addr, bytes);
7c673cae 151 blocks_memory_ += bytes;
11fdf7f2
TL
152 if (tracker_ != nullptr) {
153 tracker_->Allocate(bytes);
154 }
7c673cae
FG
155 return reinterpret_cast<char*>(addr);
156#else
11fdf7f2 157 (void)bytes;
7c673cae
FG
158 return nullptr;
159#endif
160}
161
162char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
163 Logger* logger) {
164 assert((kAlignUnit & (kAlignUnit - 1)) ==
165 0); // Pointer size should be a power of 2
166
167#ifdef MAP_HUGETLB
168 if (huge_page_size > 0 && bytes > 0) {
169 // Allocate from a huge page TBL table.
170 assert(logger != nullptr); // logger need to be passed in.
171 size_t reserved_size =
172 ((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
173 assert(reserved_size >= bytes);
174
175 char* addr = AllocateFromHugePage(reserved_size);
176 if (addr == nullptr) {
177 ROCKS_LOG_WARN(logger,
178 "AllocateAligned fail to allocate huge TLB pages: %s",
179 strerror(errno));
180 // fail back to malloc
181 } else {
182 return addr;
183 }
184 }
11fdf7f2
TL
185#else
186 (void)huge_page_size;
187 (void)logger;
7c673cae
FG
188#endif
189
190 size_t current_mod =
191 reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
192 size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);
193 size_t needed = bytes + slop;
194 char* result;
195 if (needed <= alloc_bytes_remaining_) {
196 result = aligned_alloc_ptr_ + slop;
197 aligned_alloc_ptr_ += needed;
198 alloc_bytes_remaining_ -= needed;
199 } else {
200 // AllocateFallback always returns aligned memory
201 result = AllocateFallback(bytes, true /* aligned */);
202 }
203 assert((reinterpret_cast<uintptr_t>(result) & (kAlignUnit - 1)) == 0);
204 return result;
205}
206
207char* Arena::AllocateNewBlock(size_t block_bytes) {
11fdf7f2
TL
208 // Reserve space in `blocks_` before allocating memory via new.
209 // Use `emplace_back()` instead of `reserve()` to let std::vector manage its
210 // own memory and do fewer reallocations.
211 //
212 // - If `emplace_back` throws, no memory leaks because we haven't called `new`
213 // yet.
214 // - If `new` throws, no memory leaks because the vector will be cleaned up
215 // via RAII.
216 blocks_.emplace_back(nullptr);
7c673cae
FG
217
218 char* block = new char[block_bytes];
11fdf7f2 219 size_t allocated_size;
7c673cae 220#ifdef ROCKSDB_MALLOC_USABLE_SIZE
11fdf7f2
TL
221 allocated_size = malloc_usable_size(block);
222#ifndef NDEBUG
223 // It's hard to predict what malloc_usable_size() returns.
224 // A callback can allow users to change the costed size.
225 std::pair<size_t*, size_t*> pair(&allocated_size, &block_bytes);
226 TEST_SYNC_POINT_CALLBACK("Arena::AllocateNewBlock:0", &pair);
227#endif // NDEBUG
7c673cae 228#else
11fdf7f2 229 allocated_size = block_bytes;
7c673cae 230#endif // ROCKSDB_MALLOC_USABLE_SIZE
11fdf7f2
TL
231 blocks_memory_ += allocated_size;
232 if (tracker_ != nullptr) {
233 tracker_->Allocate(allocated_size);
234 }
235 blocks_.back() = block;
7c673cae
FG
236 return block;
237}
238
239} // namespace rocksdb