]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/memory/concurrent_arena.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / memory / concurrent_arena.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#pragma once
11#include <atomic>
12#include <memory>
13#include <utility>
f67539c2
TL
14#include "memory/allocator.h"
15#include "memory/arena.h"
7c673cae 16#include "port/likely.h"
11fdf7f2 17#include "util/core_local.h"
7c673cae
FG
18#include "util/mutexlock.h"
19#include "util/thread_local.h"
20
21// Only generate field unused warning for padding array, or build under
22// GCC 4.8.1 will fail.
23#ifdef __clang__
24#define ROCKSDB_FIELD_UNUSED __attribute__((__unused__))
25#else
26#define ROCKSDB_FIELD_UNUSED
27#endif // __clang__
28
f67539c2 29namespace ROCKSDB_NAMESPACE {
7c673cae
FG
30
31class Logger;
32
33// ConcurrentArena wraps an Arena. It makes it thread safe using a fast
34// inlined spinlock, and adds small per-core allocation caches to avoid
35// contention for small allocations. To avoid any memory waste from the
36// per-core shards, they are kept small, they are lazily instantiated
37// only if ConcurrentArena actually notices concurrent use, and they
38// adjust their size so that there is no fragmentation waste when the
39// shard blocks are allocated from the underlying main arena.
40class ConcurrentArena : public Allocator {
41 public:
42 // block_size and huge_page_size are the same as for Arena (and are
43 // in fact just passed to the constructor of arena_. The core-local
44 // shards compute their shard_block_size as a fraction of block_size
45 // that varies according to the hardware concurrency level.
46 explicit ConcurrentArena(size_t block_size = Arena::kMinBlockSize,
11fdf7f2 47 AllocTracker* tracker = nullptr,
7c673cae
FG
48 size_t huge_page_size = 0);
49
50 char* Allocate(size_t bytes) override {
51 return AllocateImpl(bytes, false /*force_arena*/,
52 [=]() { return arena_.Allocate(bytes); });
53 }
54
55 char* AllocateAligned(size_t bytes, size_t huge_page_size = 0,
56 Logger* logger = nullptr) override {
57 size_t rounded_up = ((bytes - 1) | (sizeof(void*) - 1)) + 1;
58 assert(rounded_up >= bytes && rounded_up < bytes + sizeof(void*) &&
59 (rounded_up % sizeof(void*)) == 0);
60
61 return AllocateImpl(rounded_up, huge_page_size != 0 /*force_arena*/, [=]() {
62 return arena_.AllocateAligned(rounded_up, huge_page_size, logger);
63 });
64 }
65
66 size_t ApproximateMemoryUsage() const {
67 std::unique_lock<SpinMutex> lock(arena_mutex_, std::defer_lock);
11fdf7f2 68 lock.lock();
7c673cae
FG
69 return arena_.ApproximateMemoryUsage() - ShardAllocatedAndUnused();
70 }
71
72 size_t MemoryAllocatedBytes() const {
73 return memory_allocated_bytes_.load(std::memory_order_relaxed);
74 }
75
76 size_t AllocatedAndUnused() const {
77 return arena_allocated_and_unused_.load(std::memory_order_relaxed) +
78 ShardAllocatedAndUnused();
79 }
80
81 size_t IrregularBlockNum() const {
82 return irregular_block_num_.load(std::memory_order_relaxed);
83 }
84
85 size_t BlockSize() const override { return arena_.BlockSize(); }
86
87 private:
88 struct Shard {
89 char padding[40] ROCKSDB_FIELD_UNUSED;
90 mutable SpinMutex mutex;
91 char* free_begin_;
92 std::atomic<size_t> allocated_and_unused_;
93
11fdf7f2 94 Shard() : free_begin_(nullptr), allocated_and_unused_(0) {}
7c673cae
FG
95 };
96
97#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
11fdf7f2 98 static __thread size_t tls_cpuid;
7c673cae 99#else
11fdf7f2 100 enum ZeroFirstEnum : size_t { tls_cpuid = 0 };
7c673cae
FG
101#endif
102
103 char padding0[56] ROCKSDB_FIELD_UNUSED;
104
105 size_t shard_block_size_;
106
11fdf7f2 107 CoreLocalArray<Shard> shards_;
7c673cae
FG
108
109 Arena arena_;
110 mutable SpinMutex arena_mutex_;
111 std::atomic<size_t> arena_allocated_and_unused_;
112 std::atomic<size_t> memory_allocated_bytes_;
113 std::atomic<size_t> irregular_block_num_;
114
115 char padding1[56] ROCKSDB_FIELD_UNUSED;
116
117 Shard* Repick();
118
119 size_t ShardAllocatedAndUnused() const {
120 size_t total = 0;
11fdf7f2
TL
121 for (size_t i = 0; i < shards_.Size(); ++i) {
122 total += shards_.AccessAtCore(i)->allocated_and_unused_.load(
123 std::memory_order_relaxed);
7c673cae
FG
124 }
125 return total;
126 }
127
128 template <typename Func>
129 char* AllocateImpl(size_t bytes, bool force_arena, const Func& func) {
11fdf7f2 130 size_t cpu;
7c673cae
FG
131
132 // Go directly to the arena if the allocation is too large, or if
133 // we've never needed to Repick() and the arena mutex is available
134 // with no waiting. This keeps the fragmentation penalty of
135 // concurrency zero unless it might actually confer an advantage.
136 std::unique_lock<SpinMutex> arena_lock(arena_mutex_, std::defer_lock);
137 if (bytes > shard_block_size_ / 4 || force_arena ||
138 ((cpu = tls_cpuid) == 0 &&
11fdf7f2
TL
139 !shards_.AccessAtCore(0)->allocated_and_unused_.load(
140 std::memory_order_relaxed) &&
7c673cae
FG
141 arena_lock.try_lock())) {
142 if (!arena_lock.owns_lock()) {
143 arena_lock.lock();
144 }
145 auto rv = func();
146 Fixup();
147 return rv;
148 }
149
150 // pick a shard from which to allocate
11fdf7f2 151 Shard* s = shards_.AccessAtCore(cpu & (shards_.Size() - 1));
7c673cae
FG
152 if (!s->mutex.try_lock()) {
153 s = Repick();
154 s->mutex.lock();
155 }
156 std::unique_lock<SpinMutex> lock(s->mutex, std::adopt_lock);
157
158 size_t avail = s->allocated_and_unused_.load(std::memory_order_relaxed);
159 if (avail < bytes) {
160 // reload
161 std::lock_guard<SpinMutex> reload_lock(arena_mutex_);
162
163 // If the arena's current block is within a factor of 2 of the right
164 // size, we adjust our request to avoid arena waste.
165 auto exact = arena_allocated_and_unused_.load(std::memory_order_relaxed);
166 assert(exact == arena_.AllocatedAndUnused());
11fdf7f2
TL
167
168 if (exact >= bytes && arena_.IsInInlineBlock()) {
169 // If we haven't exhausted arena's inline block yet, allocate from arena
170 // directly. This ensures that we'll do the first few small allocations
171 // without allocating any blocks.
172 // In particular this prevents empty memtables from using
173 // disproportionately large amount of memory: a memtable allocates on
174 // the order of 1 KB of memory when created; we wouldn't want to
175 // allocate a full arena block (typically a few megabytes) for that,
176 // especially if there are thousands of empty memtables.
177 auto rv = func();
178 Fixup();
179 return rv;
180 }
181
7c673cae
FG
182 avail = exact >= shard_block_size_ / 2 && exact < shard_block_size_ * 2
183 ? exact
184 : shard_block_size_;
185 s->free_begin_ = arena_.AllocateAligned(avail);
186 Fixup();
187 }
188 s->allocated_and_unused_.store(avail - bytes, std::memory_order_relaxed);
189
190 char* rv;
191 if ((bytes % sizeof(void*)) == 0) {
192 // aligned allocation from the beginning
193 rv = s->free_begin_;
194 s->free_begin_ += bytes;
195 } else {
196 // unaligned from the end
197 rv = s->free_begin_ + avail - bytes;
198 }
199 return rv;
200 }
201
202 void Fixup() {
203 arena_allocated_and_unused_.store(arena_.AllocatedAndUnused(),
204 std::memory_order_relaxed);
205 memory_allocated_bytes_.store(arena_.MemoryAllocatedBytes(),
206 std::memory_order_relaxed);
207 irregular_block_num_.store(arena_.IrregularBlockNum(),
208 std::memory_order_relaxed);
209 }
210
211 ConcurrentArena(const ConcurrentArena&) = delete;
212 ConcurrentArena& operator=(const ConcurrentArena&) = delete;
213};
214
f67539c2 215} // namespace ROCKSDB_NAMESPACE