]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/memory_allocator.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / include / rocksdb / memory_allocator.h
CommitLineData
494da23a
TL
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5
6#pragma once
7
494da23a
TL
8#include <memory>
9
1e59de90
TL
10#include "rocksdb/customizable.h"
11#include "rocksdb/status.h"
12
f67539c2 13namespace ROCKSDB_NAMESPACE {
494da23a
TL
14
15// MemoryAllocator is an interface that a client can implement to supply custom
16// memory allocation and deallocation methods. See rocksdb/cache.h for more
17// information.
18// All methods should be thread-safe.
1e59de90 19class MemoryAllocator : public Customizable {
494da23a 20 public:
1e59de90
TL
21 static const char* Type() { return "MemoryAllocator"; }
22 static Status CreateFromString(const ConfigOptions& options,
23 const std::string& value,
24 std::shared_ptr<MemoryAllocator>* result);
494da23a
TL
25
26 // Allocate a block of at least size. Has to be thread-safe.
27 virtual void* Allocate(size_t size) = 0;
28
29 // Deallocate previously allocated block. Has to be thread-safe.
30 virtual void Deallocate(void* p) = 0;
31
32 // Returns the memory size of the block allocated at p. The default
33 // implementation that just returns the original allocation_size is fine.
34 virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const {
35 // default implementation just returns the allocation size
36 return allocation_size;
37 }
1e59de90
TL
38
39 std::string GetId() const override { return GenerateIndividualId(); }
494da23a
TL
40};
41
42struct JemallocAllocatorOptions {
1e59de90 43 static const char* kName() { return "JemallocAllocatorOptions"; }
494da23a
TL
44 // Jemalloc tcache cache allocations by size class. For each size class,
45 // it caches between 20 (for large size classes) to 200 (for small size
46 // classes). To reduce tcache memory usage in case the allocator is access
47 // by large number of threads, we can control whether to cache an allocation
48 // by its size.
49 bool limit_tcache_size = false;
50
51 // Lower bound of allocation size to use tcache, if limit_tcache_size=true.
20effc67 52 // When used with block cache, it is recommended to set it to block_size/4.
494da23a
TL
53 size_t tcache_size_lower_bound = 1024;
54
55 // Upper bound of allocation size to use tcache, if limit_tcache_size=true.
20effc67 56 // When used with block cache, it is recommended to set it to block_size.
494da23a
TL
57 size_t tcache_size_upper_bound = 16 * 1024;
58};
59
20effc67
TL
60// Generate memory allocator which allocates through Jemalloc and utilize
61// MADV_DONTDUMP through madvise to exclude cache items from core dump.
494da23a
TL
62// Applications can use the allocator with block cache to exclude block cache
63// usage from core dump.
64//
65// Implementation details:
20effc67
TL
66// The JemallocNodumpAllocator creates a dedicated jemalloc arena, and all
67// allocations of the JemallocNodumpAllocator are through the same arena.
68// The memory allocator hooks memory allocation of the arena, and calls
69// madvise() with MADV_DONTDUMP flag to exclude the piece of memory from
70// core dump. Side benefit of using single arena would be reduction of jemalloc
71// metadata for some workloads.
494da23a
TL
72//
73// To mitigate mutex contention for using one single arena, jemalloc tcache
74// (thread-local cache) is enabled to cache unused allocations for future use.
20effc67
TL
75// The tcache normally incurs 0.5M extra memory usage per-thread. The usage
76// can be reduced by limiting allocation sizes to cache.
494da23a
TL
77extern Status NewJemallocNodumpAllocator(
78 JemallocAllocatorOptions& options,
79 std::shared_ptr<MemoryAllocator>* memory_allocator);
80
f67539c2 81} // namespace ROCKSDB_NAMESPACE