]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | // | |
10 | // WriteBufferManager is for managing memory allocation for one or more | |
11 | // MemTables. | |
12 | ||
13 | #pragma once | |
14 | ||
15 | #include <atomic> | |
1e59de90 | 16 | #include <condition_variable> |
7c673cae | 17 | #include <cstddef> |
1e59de90 TL |
18 | #include <list> |
19 | #include <mutex> | |
20 | ||
11fdf7f2 | 21 | #include "rocksdb/cache.h" |
7c673cae | 22 | |
f67539c2 | 23 | namespace ROCKSDB_NAMESPACE { |
1e59de90 TL |
24 | class CacheReservationManager; |
25 | ||
26 | // Interface to block and signal DB instances, intended for RocksDB | |
27 | // internal use only. Each DB instance contains ptr to StallInterface. | |
28 | class StallInterface { | |
29 | public: | |
30 | virtual ~StallInterface() {} | |
31 | ||
32 | virtual void Block() = 0; | |
7c673cae | 33 | |
1e59de90 TL |
34 | virtual void Signal() = 0; |
35 | }; | |
36 | ||
37 | class WriteBufferManager final { | |
7c673cae | 38 | public: |
1e59de90 TL |
39 | // Parameters: |
40 | // _buffer_size: _buffer_size = 0 indicates no limit. Memory won't be capped. | |
7c673cae | 41 | // memory_usage() won't be valid and ShouldFlush() will always return true. |
1e59de90 TL |
42 | // |
43 | // cache_: if `cache` is provided, we'll put dummy entries in the cache and | |
44 | // cost the memory allocated to the cache. It can be used even if _buffer_size | |
45 | // = 0. | |
46 | // | |
47 | // allow_stall: if set true, it will enable stalling of writes when | |
48 | // memory_usage() exceeds buffer_size. It will wait for flush to complete and | |
49 | // memory usage to drop down. | |
11fdf7f2 | 50 | explicit WriteBufferManager(size_t _buffer_size, |
1e59de90 TL |
51 | std::shared_ptr<Cache> cache = {}, |
52 | bool allow_stall = false); | |
f67539c2 TL |
53 | // No copying allowed |
54 | WriteBufferManager(const WriteBufferManager&) = delete; | |
55 | WriteBufferManager& operator=(const WriteBufferManager&) = delete; | |
56 | ||
11fdf7f2 | 57 | ~WriteBufferManager(); |
7c673cae | 58 | |
1e59de90 TL |
59 | // Returns true if buffer_limit is passed to limit the total memory usage and |
60 | // is greater than 0. | |
61 | bool enabled() const { return buffer_size() > 0; } | |
7c673cae | 62 | |
1e59de90 TL |
63 | // Returns true if pointer to cache is passed. |
64 | bool cost_to_cache() const { return cache_res_mgr_ != nullptr; } | |
494da23a | 65 | |
1e59de90 | 66 | // Returns the total memory used by memtables. |
7c673cae FG |
67 | // Only valid if enabled() |
68 | size_t memory_usage() const { | |
69 | return memory_used_.load(std::memory_order_relaxed); | |
70 | } | |
1e59de90 TL |
71 | |
72 | // Returns the total memory used by active memtables. | |
11fdf7f2 TL |
73 | size_t mutable_memtable_memory_usage() const { |
74 | return memory_active_.load(std::memory_order_relaxed); | |
75 | } | |
1e59de90 TL |
76 | |
77 | size_t dummy_entries_in_cache_usage() const; | |
78 | ||
79 | // Returns the buffer_size. | |
80 | size_t buffer_size() const { | |
81 | return buffer_size_.load(std::memory_order_relaxed); | |
82 | } | |
83 | ||
84 | void SetBufferSize(size_t new_size) { | |
85 | buffer_size_.store(new_size, std::memory_order_relaxed); | |
86 | mutable_limit_.store(new_size * 7 / 8, std::memory_order_relaxed); | |
87 | // Check if stall is active and can be ended. | |
88 | MaybeEndWriteStall(); | |
89 | } | |
90 | ||
91 | // Below functions should be called by RocksDB internally. | |
7c673cae FG |
92 | |
93 | // Should only be called from write thread | |
94 | bool ShouldFlush() const { | |
11fdf7f2 | 95 | if (enabled()) { |
1e59de90 TL |
96 | if (mutable_memtable_memory_usage() > |
97 | mutable_limit_.load(std::memory_order_relaxed)) { | |
11fdf7f2 TL |
98 | return true; |
99 | } | |
1e59de90 TL |
100 | size_t local_size = buffer_size(); |
101 | if (memory_usage() >= local_size && | |
102 | mutable_memtable_memory_usage() >= local_size / 2) { | |
11fdf7f2 TL |
103 | // If the memory exceeds the buffer size, we trigger more aggressive |
104 | // flush. But if already more than half memory is being flushed, | |
105 | // triggering more flush may not help. We will hold it instead. | |
106 | return true; | |
107 | } | |
108 | } | |
109 | return false; | |
7c673cae FG |
110 | } |
111 | ||
1e59de90 TL |
112 | // Returns true if total memory usage exceeded buffer_size. |
113 | // We stall the writes untill memory_usage drops below buffer_size. When the | |
114 | // function returns true, all writer threads (including one checking this | |
115 | // condition) across all DBs will be stalled. Stall is allowed only if user | |
116 | // pass allow_stall = true during WriteBufferManager instance creation. | |
117 | // | |
118 | // Should only be called by RocksDB internally . | |
119 | bool ShouldStall() const { | |
120 | if (!allow_stall_ || !enabled()) { | |
121 | return false; | |
11fdf7f2 | 122 | } |
1e59de90 TL |
123 | |
124 | return IsStallActive() || IsStallThresholdExceeded(); | |
7c673cae | 125 | } |
1e59de90 TL |
126 | |
127 | // Returns true if stall is active. | |
128 | bool IsStallActive() const { | |
129 | return stall_active_.load(std::memory_order_relaxed); | |
11fdf7f2 | 130 | } |
1e59de90 TL |
131 | |
132 | // Returns true if stalling condition is met. | |
133 | bool IsStallThresholdExceeded() const { | |
134 | return memory_usage() >= buffer_size_; | |
7c673cae FG |
135 | } |
136 | ||
1e59de90 TL |
137 | void ReserveMem(size_t mem); |
138 | ||
139 | // We are in the process of freeing `mem` bytes, so it is not considered | |
140 | // when checking the soft limit. | |
141 | void ScheduleFreeMem(size_t mem); | |
142 | ||
143 | void FreeMem(size_t mem); | |
144 | ||
145 | // Add the DB instance to the queue and block the DB. | |
146 | // Should only be called by RocksDB internally. | |
147 | void BeginWriteStall(StallInterface* wbm_stall); | |
148 | ||
149 | // If stall conditions have resolved, remove DB instances from queue and | |
150 | // signal them to continue. | |
151 | void MaybeEndWriteStall(); | |
152 | ||
153 | void RemoveDBFromQueue(StallInterface* wbm_stall); | |
154 | ||
7c673cae | 155 | private: |
1e59de90 TL |
156 | std::atomic<size_t> buffer_size_; |
157 | std::atomic<size_t> mutable_limit_; | |
7c673cae | 158 | std::atomic<size_t> memory_used_; |
11fdf7f2 TL |
159 | // Memory that hasn't been scheduled to free. |
160 | std::atomic<size_t> memory_active_; | |
1e59de90 TL |
161 | std::shared_ptr<CacheReservationManager> cache_res_mgr_; |
162 | // Protects cache_res_mgr_ | |
163 | std::mutex cache_res_mgr_mu_; | |
164 | ||
165 | std::list<StallInterface*> queue_; | |
166 | // Protects the queue_ and stall_active_. | |
167 | std::mutex mu_; | |
168 | bool allow_stall_; | |
169 | // Value should only be changed by BeginWriteStall() and MaybeEndWriteStall() | |
170 | // while holding mu_, but it can be read without a lock. | |
171 | std::atomic<bool> stall_active_; | |
11fdf7f2 TL |
172 | |
173 | void ReserveMemWithCache(size_t mem); | |
174 | void FreeMemWithCache(size_t mem); | |
7c673cae | 175 | }; |
f67539c2 | 176 | } // namespace ROCKSDB_NAMESPACE |