]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/memory/arena.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / memory / arena.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
f67539c2 10#include "memory/arena.h"
7c673cae
FG
11#ifndef OS_WIN
12#include <sys/mman.h>
13#endif
14#include <algorithm>
1e59de90 15
f67539c2
TL
16#include "logging/logging.h"
17#include "port/malloc.h"
7c673cae
FG
18#include "port/port.h"
19#include "rocksdb/env.h"
f67539c2 20#include "test_util/sync_point.h"
1e59de90 21#include "util/string_util.h"
7c673cae 22
f67539c2 23namespace ROCKSDB_NAMESPACE {
7c673cae
FG
24
25// MSVC complains that it is already defined since it is static in the header.
26#ifndef _MSC_VER
27const size_t Arena::kInlineSize;
28#endif
29
30const size_t Arena::kMinBlockSize = 4096;
31const size_t Arena::kMaxBlockSize = 2u << 30;
11fdf7f2 32static const int kAlignUnit = alignof(max_align_t);
7c673cae
FG
33
34size_t OptimizeBlockSize(size_t block_size) {
35 // Make sure block_size is in optimal range
36 block_size = std::max(Arena::kMinBlockSize, block_size);
37 block_size = std::min(Arena::kMaxBlockSize, block_size);
38
39 // make sure block_size is the multiple of kAlignUnit
40 if (block_size % kAlignUnit != 0) {
41 block_size = (1 + block_size / kAlignUnit) * kAlignUnit;
42 }
43
44 return block_size;
45}
46
11fdf7f2
TL
47Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size)
48 : kBlockSize(OptimizeBlockSize(block_size)), tracker_(tracker) {
7c673cae
FG
49 assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
50 kBlockSize % kAlignUnit == 0);
11fdf7f2 51 TEST_SYNC_POINT_CALLBACK("Arena::Arena:0", const_cast<size_t*>(&kBlockSize));
7c673cae
FG
52 alloc_bytes_remaining_ = sizeof(inline_block_);
53 blocks_memory_ += alloc_bytes_remaining_;
54 aligned_alloc_ptr_ = inline_block_;
55 unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
56#ifdef MAP_HUGETLB
57 hugetlb_size_ = huge_page_size;
58 if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
59 hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
60 }
11fdf7f2
TL
61#else
62 (void)huge_page_size;
7c673cae 63#endif
11fdf7f2
TL
64 if (tracker_ != nullptr) {
65 tracker_->Allocate(kInlineSize);
66 }
7c673cae
FG
67}
68
69Arena::~Arena() {
11fdf7f2
TL
70 if (tracker_ != nullptr) {
71 assert(tracker_->is_freed());
72 tracker_->FreeMem();
73 }
7c673cae
FG
74 for (const auto& block : blocks_) {
75 delete[] block;
76 }
77
78#ifdef MAP_HUGETLB
79 for (const auto& mmap_info : huge_blocks_) {
11fdf7f2
TL
80 if (mmap_info.addr_ == nullptr) {
81 continue;
82 }
7c673cae
FG
83 auto ret = munmap(mmap_info.addr_, mmap_info.length_);
84 if (ret != 0) {
85 // TODO(sdong): Better handling
86 }
87 }
88#endif
89}
90
91char* Arena::AllocateFallback(size_t bytes, bool aligned) {
92 if (bytes > kBlockSize / 4) {
93 ++irregular_block_num;
94 // Object is more than a quarter of our block size. Allocate it separately
95 // to avoid wasting too much space in leftover bytes.
96 return AllocateNewBlock(bytes);
97 }
98
99 // We waste the remaining space in the current block.
100 size_t size = 0;
101 char* block_head = nullptr;
102#ifdef MAP_HUGETLB
103 if (hugetlb_size_) {
104 size = hugetlb_size_;
105 block_head = AllocateFromHugePage(size);
106 }
107#endif
108 if (!block_head) {
109 size = kBlockSize;
110 block_head = AllocateNewBlock(size);
111 }
112 alloc_bytes_remaining_ = size - bytes;
113
114 if (aligned) {
115 aligned_alloc_ptr_ = block_head + bytes;
116 unaligned_alloc_ptr_ = block_head + size;
117 return block_head;
118 } else {
119 aligned_alloc_ptr_ = block_head;
120 unaligned_alloc_ptr_ = block_head + size - bytes;
121 return unaligned_alloc_ptr_;
122 }
123}
124
125char* Arena::AllocateFromHugePage(size_t bytes) {
126#ifdef MAP_HUGETLB
127 if (hugetlb_size_ == 0) {
128 return nullptr;
129 }
11fdf7f2
TL
130 // Reserve space in `huge_blocks_` before calling `mmap`.
131 // Use `emplace_back()` instead of `reserve()` to let std::vector manage its
132 // own memory and do fewer reallocations.
133 //
134 // - If `emplace_back` throws, no memory leaks because we haven't called
135 // `mmap` yet.
136 // - If `mmap` throws, no memory leaks because the vector will be cleaned up
137 // via RAII.
138 huge_blocks_.emplace_back(nullptr /* addr */, 0 /* length */);
7c673cae
FG
139
140 void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE),
141 (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), -1, 0);
142
143 if (addr == MAP_FAILED) {
144 return nullptr;
145 }
11fdf7f2 146 huge_blocks_.back() = MmapInfo(addr, bytes);
7c673cae 147 blocks_memory_ += bytes;
11fdf7f2
TL
148 if (tracker_ != nullptr) {
149 tracker_->Allocate(bytes);
150 }
7c673cae
FG
151 return reinterpret_cast<char*>(addr);
152#else
11fdf7f2 153 (void)bytes;
7c673cae
FG
154 return nullptr;
155#endif
156}
157
158char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
159 Logger* logger) {
160 assert((kAlignUnit & (kAlignUnit - 1)) ==
161 0); // Pointer size should be a power of 2
162
163#ifdef MAP_HUGETLB
164 if (huge_page_size > 0 && bytes > 0) {
20effc67 165 // Allocate from a huge page TLB table.
7c673cae
FG
166 size_t reserved_size =
167 ((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
168 assert(reserved_size >= bytes);
169
170 char* addr = AllocateFromHugePage(reserved_size);
171 if (addr == nullptr) {
172 ROCKS_LOG_WARN(logger,
173 "AllocateAligned fail to allocate huge TLB pages: %s",
1e59de90 174 errnoStr(errno).c_str());
7c673cae
FG
175 // fail back to malloc
176 } else {
177 return addr;
178 }
179 }
11fdf7f2
TL
180#else
181 (void)huge_page_size;
182 (void)logger;
7c673cae
FG
183#endif
184
185 size_t current_mod =
186 reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
187 size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);
188 size_t needed = bytes + slop;
189 char* result;
190 if (needed <= alloc_bytes_remaining_) {
191 result = aligned_alloc_ptr_ + slop;
192 aligned_alloc_ptr_ += needed;
193 alloc_bytes_remaining_ -= needed;
194 } else {
195 // AllocateFallback always returns aligned memory
196 result = AllocateFallback(bytes, true /* aligned */);
197 }
198 assert((reinterpret_cast<uintptr_t>(result) & (kAlignUnit - 1)) == 0);
199 return result;
200}
201
202char* Arena::AllocateNewBlock(size_t block_bytes) {
11fdf7f2
TL
203 // Reserve space in `blocks_` before allocating memory via new.
204 // Use `emplace_back()` instead of `reserve()` to let std::vector manage its
205 // own memory and do fewer reallocations.
206 //
207 // - If `emplace_back` throws, no memory leaks because we haven't called `new`
208 // yet.
209 // - If `new` throws, no memory leaks because the vector will be cleaned up
210 // via RAII.
211 blocks_.emplace_back(nullptr);
7c673cae
FG
212
213 char* block = new char[block_bytes];
11fdf7f2 214 size_t allocated_size;
7c673cae 215#ifdef ROCKSDB_MALLOC_USABLE_SIZE
11fdf7f2
TL
216 allocated_size = malloc_usable_size(block);
217#ifndef NDEBUG
218 // It's hard to predict what malloc_usable_size() returns.
219 // A callback can allow users to change the costed size.
220 std::pair<size_t*, size_t*> pair(&allocated_size, &block_bytes);
221 TEST_SYNC_POINT_CALLBACK("Arena::AllocateNewBlock:0", &pair);
222#endif // NDEBUG
7c673cae 223#else
11fdf7f2 224 allocated_size = block_bytes;
7c673cae 225#endif // ROCKSDB_MALLOC_USABLE_SIZE
11fdf7f2
TL
226 blocks_memory_ += allocated_size;
227 if (tracker_ != nullptr) {
228 tracker_->Allocate(allocated_size);
229 }
230 blocks_.back() = block;
7c673cae
FG
231 return block;
232}
233
f67539c2 234} // namespace ROCKSDB_NAMESPACE