[ceph.git] / ceph / src / rocksdb / include / rocksdb / memory_allocator.h

// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#pragma once

#include <memory>

#include "rocksdb/customizable.h"
#include "rocksdb/status.h"

namespace ROCKSDB_NAMESPACE {

// MemoryAllocator is an interface that a client can implement to supply custom
// memory allocation and deallocation methods. See rocksdb/cache.h for more
// information.
// All methods should be thread-safe.
class MemoryAllocator : public Customizable {
 public:
  static const char* Type() { return "MemoryAllocator"; }
  static Status CreateFromString(const ConfigOptions& options,
                                 const std::string& value,
                                 std::shared_ptr<MemoryAllocator>* result);

  // Allocate a block of at least size. Has to be thread-safe.
  virtual void* Allocate(size_t size) = 0;

  // Deallocate previously allocated block. Has to be thread-safe.
  virtual void Deallocate(void* p) = 0;

  // Returns the memory size of the block allocated at p. The default
  // implementation that just returns the original allocation_size is fine.
  virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const {
    // default implementation just returns the allocation size
    return allocation_size;
  }

  std::string GetId() const override { return GenerateIndividualId(); }
};

struct JemallocAllocatorOptions {
  static const char* kName() { return "JemallocAllocatorOptions"; }
  // Jemalloc tcache cache allocations by size class. For each size class,
  // it caches between 20 (for large size classes) to 200 (for small size
  // classes). To reduce tcache memory usage in case the allocator is access
  // by large number of threads, we can control whether to cache an allocation
  // by its size.
  bool limit_tcache_size = false;

  // Lower bound of allocation size to use tcache, if limit_tcache_size=true.
  // When used with block cache, it is recommended to set it to block_size/4.
  size_t tcache_size_lower_bound = 1024;

  // Upper bound of allocation size to use tcache, if limit_tcache_size=true.
  // When used with block cache, it is recommended to set it to block_size.
  size_t tcache_size_upper_bound = 16 * 1024;
};

// Generate memory allocator which allocates through Jemalloc and utilize
// MADV_DONTDUMP through madvise to exclude cache items from core dump.
// Applications can use the allocator with block cache to exclude block cache
// usage from core dump.
//
// Implementation details:
// The JemallocNodumpAllocator creates a dedicated jemalloc arena, and all
// allocations of the JemallocNodumpAllocator are through the same arena.
// The memory allocator hooks memory allocation of the arena, and calls
// madvise() with MADV_DONTDUMP flag to exclude the piece of memory from
// core dump. Side benefit of using single arena would be reduction of jemalloc
// metadata for some workloads.
//
// To mitigate mutex contention for using one single arena, jemalloc tcache
// (thread-local cache) is enabled to cache unused allocations for future use.
// The tcache normally incurs 0.5M extra memory usage per-thread. The usage
// can be reduced by limiting allocation sizes to cache.
extern Status NewJemallocNodumpAllocator(
    JemallocAllocatorOptions& options,
    std::shared_ptr<MemoryAllocator>* memory_allocator);

}  // namespace ROCKSDB_NAMESPACE
Commit	Line	Data
494da23a TL	1	// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
	2	// This source code is licensed under both the GPLv2 (found in the
	3	// COPYING file in the root directory) and Apache 2.0 License
	4	// (found in the LICENSE.Apache file in the root directory).
	5
	6	#pragma once
	7
494da23a TL	8	#include <memory>
494da23a TL	9
1e59de90 TL	10	#include "rocksdb/customizable.h"
	11	#include "rocksdb/status.h"
	12
f67539c2	13	namespace ROCKSDB_NAMESPACE {
494da23a TL	14
	15	// MemoryAllocator is an interface that a client can implement to supply custom
	16	// memory allocation and deallocation methods. See rocksdb/cache.h for more
	17	// information.
	18	// All methods should be thread-safe.
1e59de90	19	class MemoryAllocator : public Customizable {
494da23a	20	public:
1e59de90 TL	21	static const char* Type() { return "MemoryAllocator"; }
	22	static Status CreateFromString(const ConfigOptions& options,
	23	const std::string& value,
	24	std::shared_ptr<MemoryAllocator>* result);
494da23a TL	25
	26	// Allocate a block of at least size. Has to be thread-safe.
	27	virtual void* Allocate(size_t size) = 0;
	28
	29	// Deallocate previously allocated block. Has to be thread-safe.
	30	virtual void Deallocate(void* p) = 0;
	31
	32	// Returns the memory size of the block allocated at p. The default
	33	// implementation that just returns the original allocation_size is fine.
	34	virtual size_t UsableSize(void* /p/, size_t allocation_size) const {
	35	// default implementation just returns the allocation size
	36	return allocation_size;
	37	}
1e59de90 TL	38
1e59de90 TL	39	std::string GetId() const override { return GenerateIndividualId(); }
494da23a TL	40	};
	41
	42	struct JemallocAllocatorOptions {
1e59de90	43	static const char* kName() { return "JemallocAllocatorOptions"; }
494da23a TL	44	// Jemalloc tcache cache allocations by size class. For each size class,
	45	// it caches between 20 (for large size classes) to 200 (for small size
	46	// classes). To reduce tcache memory usage in case the allocator is access
	47	// by large number of threads, we can control whether to cache an allocation
	48	// by its size.
	49	bool limit_tcache_size = false;
	50
	51	// Lower bound of allocation size to use tcache, if limit_tcache_size=true.
20effc67	52	// When used with block cache, it is recommended to set it to block_size/4.
494da23a TL	53	size_t tcache_size_lower_bound = 1024;
	54
	55	// Upper bound of allocation size to use tcache, if limit_tcache_size=true.
20effc67	56	// When used with block cache, it is recommended to set it to block_size.
494da23a TL	57	size_t tcache_size_upper_bound = 16 * 1024;
	58	};
	59
20effc67 TL	60	// Generate memory allocator which allocates through Jemalloc and utilize
20effc67 TL	61	// MADV_DONTDUMP through madvise to exclude cache items from core dump.
494da23a TL	62	// Applications can use the allocator with block cache to exclude block cache
	63	// usage from core dump.
	64	//
	65	// Implementation details:
20effc67 TL	66	// The JemallocNodumpAllocator creates a dedicated jemalloc arena, and all
	67	// allocations of the JemallocNodumpAllocator are through the same arena.
	68	// The memory allocator hooks memory allocation of the arena, and calls
	69	// madvise() with MADV_DONTDUMP flag to exclude the piece of memory from
	70	// core dump. Side benefit of using single arena would be reduction of jemalloc
	71	// metadata for some workloads.
494da23a TL	72	//
	73	// To mitigate mutex contention for using one single arena, jemalloc tcache
	74	// (thread-local cache) is enabled to cache unused allocations for future use.
20effc67 TL	75	// The tcache normally incurs 0.5M extra memory usage per-thread. The usage
20effc67 TL	76	// can be reduced by limiting allocation sizes to cache.
494da23a TL	77	extern Status NewJemallocNodumpAllocator(
	78	JemallocAllocatorOptions& options,
	79	std::shared_ptr<MemoryAllocator>* memory_allocator);
	80
f67539c2	81	} // namespace ROCKSDB_NAMESPACE