]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/write_buffer_manager.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / include / rocksdb / write_buffer_manager.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9//
10// WriteBufferManager is for managing memory allocation for one or more
11// MemTables.
12
13#pragma once
14
15#include <atomic>
1e59de90 16#include <condition_variable>
7c673cae 17#include <cstddef>
1e59de90
TL
18#include <list>
19#include <mutex>
20
11fdf7f2 21#include "rocksdb/cache.h"
7c673cae 22
f67539c2 23namespace ROCKSDB_NAMESPACE {
1e59de90
TL
24class CacheReservationManager;
25
26// Interface to block and signal DB instances, intended for RocksDB
27// internal use only. Each DB instance contains ptr to StallInterface.
28class StallInterface {
29 public:
30 virtual ~StallInterface() {}
31
32 virtual void Block() = 0;
7c673cae 33
1e59de90
TL
34 virtual void Signal() = 0;
35};
36
37class WriteBufferManager final {
7c673cae 38 public:
1e59de90
TL
39 // Parameters:
40 // _buffer_size: _buffer_size = 0 indicates no limit. Memory won't be capped.
7c673cae 41 // memory_usage() won't be valid and ShouldFlush() will always return true.
1e59de90
TL
42 //
43 // cache_: if `cache` is provided, we'll put dummy entries in the cache and
44 // cost the memory allocated to the cache. It can be used even if _buffer_size
45 // = 0.
46 //
47 // allow_stall: if set true, it will enable stalling of writes when
48 // memory_usage() exceeds buffer_size. It will wait for flush to complete and
49 // memory usage to drop down.
11fdf7f2 50 explicit WriteBufferManager(size_t _buffer_size,
1e59de90
TL
51 std::shared_ptr<Cache> cache = {},
52 bool allow_stall = false);
f67539c2
TL
53 // No copying allowed
54 WriteBufferManager(const WriteBufferManager&) = delete;
55 WriteBufferManager& operator=(const WriteBufferManager&) = delete;
56
11fdf7f2 57 ~WriteBufferManager();
7c673cae 58
1e59de90
TL
59 // Returns true if buffer_limit is passed to limit the total memory usage and
60 // is greater than 0.
61 bool enabled() const { return buffer_size() > 0; }
7c673cae 62
1e59de90
TL
63 // Returns true if pointer to cache is passed.
64 bool cost_to_cache() const { return cache_res_mgr_ != nullptr; }
494da23a 65
1e59de90 66 // Returns the total memory used by memtables.
7c673cae
FG
67 // Only valid if enabled()
68 size_t memory_usage() const {
69 return memory_used_.load(std::memory_order_relaxed);
70 }
1e59de90
TL
71
72 // Returns the total memory used by active memtables.
11fdf7f2
TL
73 size_t mutable_memtable_memory_usage() const {
74 return memory_active_.load(std::memory_order_relaxed);
75 }
1e59de90
TL
76
77 size_t dummy_entries_in_cache_usage() const;
78
79 // Returns the buffer_size.
80 size_t buffer_size() const {
81 return buffer_size_.load(std::memory_order_relaxed);
82 }
83
84 void SetBufferSize(size_t new_size) {
85 buffer_size_.store(new_size, std::memory_order_relaxed);
86 mutable_limit_.store(new_size * 7 / 8, std::memory_order_relaxed);
87 // Check if stall is active and can be ended.
88 MaybeEndWriteStall();
89 }
90
91 // Below functions should be called by RocksDB internally.
7c673cae
FG
92
93 // Should only be called from write thread
94 bool ShouldFlush() const {
11fdf7f2 95 if (enabled()) {
1e59de90
TL
96 if (mutable_memtable_memory_usage() >
97 mutable_limit_.load(std::memory_order_relaxed)) {
11fdf7f2
TL
98 return true;
99 }
1e59de90
TL
100 size_t local_size = buffer_size();
101 if (memory_usage() >= local_size &&
102 mutable_memtable_memory_usage() >= local_size / 2) {
11fdf7f2
TL
103 // If the memory exceeds the buffer size, we trigger more aggressive
104 // flush. But if already more than half memory is being flushed,
105 // triggering more flush may not help. We will hold it instead.
106 return true;
107 }
108 }
109 return false;
7c673cae
FG
110 }
111
1e59de90
TL
112 // Returns true if total memory usage exceeded buffer_size.
113 // We stall the writes untill memory_usage drops below buffer_size. When the
114 // function returns true, all writer threads (including one checking this
115 // condition) across all DBs will be stalled. Stall is allowed only if user
116 // pass allow_stall = true during WriteBufferManager instance creation.
117 //
118 // Should only be called by RocksDB internally .
119 bool ShouldStall() const {
120 if (!allow_stall_ || !enabled()) {
121 return false;
11fdf7f2 122 }
1e59de90
TL
123
124 return IsStallActive() || IsStallThresholdExceeded();
7c673cae 125 }
1e59de90
TL
126
127 // Returns true if stall is active.
128 bool IsStallActive() const {
129 return stall_active_.load(std::memory_order_relaxed);
11fdf7f2 130 }
1e59de90
TL
131
132 // Returns true if stalling condition is met.
133 bool IsStallThresholdExceeded() const {
134 return memory_usage() >= buffer_size_;
7c673cae
FG
135 }
136
1e59de90
TL
137 void ReserveMem(size_t mem);
138
139 // We are in the process of freeing `mem` bytes, so it is not considered
140 // when checking the soft limit.
141 void ScheduleFreeMem(size_t mem);
142
143 void FreeMem(size_t mem);
144
145 // Add the DB instance to the queue and block the DB.
146 // Should only be called by RocksDB internally.
147 void BeginWriteStall(StallInterface* wbm_stall);
148
149 // If stall conditions have resolved, remove DB instances from queue and
150 // signal them to continue.
151 void MaybeEndWriteStall();
152
153 void RemoveDBFromQueue(StallInterface* wbm_stall);
154
7c673cae 155 private:
1e59de90
TL
156 std::atomic<size_t> buffer_size_;
157 std::atomic<size_t> mutable_limit_;
7c673cae 158 std::atomic<size_t> memory_used_;
11fdf7f2
TL
159 // Memory that hasn't been scheduled to free.
160 std::atomic<size_t> memory_active_;
1e59de90
TL
161 std::shared_ptr<CacheReservationManager> cache_res_mgr_;
162 // Protects cache_res_mgr_
163 std::mutex cache_res_mgr_mu_;
164
165 std::list<StallInterface*> queue_;
166 // Protects the queue_ and stall_active_.
167 std::mutex mu_;
168 bool allow_stall_;
169 // Value should only be changed by BeginWriteStall() and MaybeEndWriteStall()
170 // while holding mu_, but it can be read without a lock.
171 std::atomic<bool> stall_active_;
11fdf7f2
TL
172
173 void ReserveMemWithCache(size_t mem);
174 void FreeMemWithCache(size_t mem);
7c673cae 175};
f67539c2 176} // namespace ROCKSDB_NAMESPACE