]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2013, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | #pragma once | |
7 | ||
8 | #ifndef ROCKSDB_LITE | |
9 | ||
10 | #include <limits> | |
11 | #include <list> | |
12 | #include <map> | |
13 | #include <string> | |
14 | #include <vector> | |
15 | ||
16 | #include "monitoring/histogram.h" | |
17 | #include "rocksdb/env.h" | |
18 | #include "rocksdb/persistent_cache.h" | |
19 | #include "rocksdb/status.h" | |
20 | ||
21 | // Persistent Cache | |
22 | // | |
23 | // Persistent cache is tiered key-value cache that can use persistent medium. It | |
24 | // is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM. | |
25 | // The code has been kept generic but significant benchmark/design/development | |
26 | // time has been spent to make sure the cache performs appropriately for | |
27 | // respective storage medium. | |
28 | // The file defines | |
29 | // PersistentCacheTier : Implementation that handles individual cache tier | |
30 | // PersistentTieresCache : Implementation that handles all tiers as a logical | |
31 | // unit | |
32 | // | |
33 | // PersistentTieredCache architecture: | |
34 | // +--------------------------+ PersistentCacheTier that handles multiple tiers | |
35 | // | +----------------+ | | |
36 | // | | RAM | PersistentCacheTier that handles RAM (VolatileCacheImpl) | |
37 | // | +----------------+ | | |
38 | // | | next | | |
39 | // | v | | |
40 | // | +----------------+ | | |
41 | // | | NVM | PersistentCacheTier implementation that handles NVM | |
42 | // | +----------------+ (BlockCacheImpl) | |
43 | // | | next | | |
44 | // | V | | |
45 | // | +----------------+ | | |
46 | // | | LE-SSD | PersistentCacheTier implementation that handles LE-SSD | |
47 | // | +----------------+ (BlockCacheImpl) | |
48 | // | | | | |
49 | // | V | | |
50 | // | null | | |
51 | // +--------------------------+ | |
52 | // | | |
53 | // V | |
54 | // null | |
55 | namespace rocksdb { | |
56 | ||
57 | // Persistent Cache Config | |
58 | // | |
59 | // This struct captures all the options that are used to configure persistent | |
60 | // cache. Some of the terminologies used in naming the options are | |
61 | // | |
62 | // dispatch size : | |
63 | // This is the size in which IO is dispatched to the device | |
64 | // | |
65 | // write buffer size : | |
66 | // This is the size of an individual write buffer size. Write buffers are | |
67 | // grouped to form buffered file. | |
68 | // | |
69 | // cache size : | |
70 | // This is the logical maximum for the cache size | |
71 | // | |
72 | // qdepth : | |
73 | // This is the max number of IOs that can issues to the device in parallel | |
74 | // | |
75 | // pepeling : | |
76 | // The writer code path follows pipelined architecture, which means the | |
77 | // operations are handed off from one stage to another | |
78 | // | |
79 | // pipelining backlog size : | |
80 | // With the pipelined architecture, there can always be backlogging of ops in | |
81 | // pipeline queues. This is the maximum backlog size after which ops are dropped | |
82 | // from queue | |
83 | struct PersistentCacheConfig { | |
84 | explicit PersistentCacheConfig( | |
85 | Env* const _env, const std::string& _path, const uint64_t _cache_size, | |
86 | const std::shared_ptr<Logger>& _log, | |
87 | const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) { | |
88 | env = _env; | |
89 | path = _path; | |
90 | log = _log; | |
91 | cache_size = _cache_size; | |
92 | writer_dispatch_size = write_buffer_size = _write_buffer_size; | |
93 | } | |
94 | ||
95 | // | |
96 | // Validate the settings. Our intentions are to catch erroneous settings ahead | |
97 | // of time instead going violating invariants or causing dead locks. | |
98 | // | |
99 | Status ValidateSettings() const { | |
100 | // (1) check pre-conditions for variables | |
101 | if (!env || path.empty()) { | |
102 | return Status::InvalidArgument("empty or null args"); | |
103 | } | |
104 | ||
105 | // (2) assert size related invariants | |
106 | // - cache size cannot be less than cache file size | |
107 | // - individual write buffer size cannot be greater than cache file size | |
108 | // - total write buffer size cannot be less than 2X cache file size | |
109 | if (cache_size < cache_file_size || write_buffer_size >= cache_file_size || | |
110 | write_buffer_size * write_buffer_count() < 2 * cache_file_size) { | |
111 | return Status::InvalidArgument("invalid cache size"); | |
112 | } | |
113 | ||
114 | // (2) check writer settings | |
115 | // - Queue depth cannot be 0 | |
116 | // - writer_dispatch_size cannot be greater than writer_buffer_size | |
117 | // - dispatch size and buffer size need to be aligned | |
118 | if (!writer_qdepth || writer_dispatch_size > write_buffer_size || | |
119 | write_buffer_size % writer_dispatch_size) { | |
120 | return Status::InvalidArgument("invalid writer settings"); | |
121 | } | |
122 | ||
123 | return Status::OK(); | |
124 | } | |
125 | ||
126 | // | |
127 | // Env abstraction to use for systmer level operations | |
128 | // | |
129 | Env* env; | |
130 | ||
131 | // | |
132 | // Path for the block cache where blocks are persisted | |
133 | // | |
134 | std::string path; | |
135 | ||
136 | // | |
137 | // Log handle for logging messages | |
138 | // | |
139 | std::shared_ptr<Logger> log; | |
140 | ||
141 | // | |
142 | // Enable direct IO for reading | |
143 | // | |
144 | bool enable_direct_reads = true; | |
145 | ||
146 | // | |
147 | // Enable direct IO for writing | |
148 | // | |
149 | bool enable_direct_writes = false; | |
150 | ||
151 | // | |
152 | // Logical cache size | |
153 | // | |
154 | uint64_t cache_size = std::numeric_limits<uint64_t>::max(); | |
155 | ||
156 | // cache-file-size | |
157 | // | |
158 | // Cache consists of multiples of small files. This parameter defines the | |
159 | // size of an individual cache file | |
160 | // | |
161 | // default: 1M | |
162 | uint32_t cache_file_size = 100ULL * 1024 * 1024; | |
163 | ||
164 | // writer-qdepth | |
165 | // | |
166 | // The writers can issues IO to the devices in parallel. This parameter | |
167 | // controls the max number if IOs that can issues in parallel to the block | |
168 | // device | |
169 | // | |
170 | // default :1 | |
171 | uint32_t writer_qdepth = 1; | |
172 | ||
173 | // pipeline-writes | |
174 | // | |
175 | // The write optionally follow pipelined architecture. This helps | |
176 | // avoid regression in the eviction code path of the primary tier. This | |
177 | // parameter defines if pipelining is enabled or disabled | |
178 | // | |
179 | // default: true | |
180 | bool pipeline_writes = true; | |
181 | ||
182 | // max-write-pipeline-backlog-size | |
183 | // | |
184 | // Max pipeline buffer size. This is the maximum backlog we can accumulate | |
185 | // while waiting for writes. After the limit, new ops will be dropped. | |
186 | // | |
187 | // Default: 1GiB | |
188 | uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024; | |
189 | ||
190 | // write-buffer-size | |
191 | // | |
192 | // This is the size in which buffer slabs are allocated. | |
193 | // | |
194 | // Default: 1M | |
195 | uint32_t write_buffer_size = 1ULL * 1024 * 1024; | |
196 | ||
197 | // write-buffer-count | |
198 | // | |
199 | // This is the total number of buffer slabs. This is calculated as a factor of | |
200 | // file size in order to avoid dead lock. | |
201 | size_t write_buffer_count() const { | |
202 | assert(write_buffer_size); | |
203 | return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size / | |
204 | write_buffer_size); | |
205 | } | |
206 | ||
207 | // writer-dispatch-size | |
208 | // | |
209 | // The writer thread will dispatch the IO at the specified IO size | |
210 | // | |
211 | // default: 1M | |
212 | uint64_t writer_dispatch_size = 1ULL * 1024 * 1024; | |
213 | ||
214 | // is_compressed | |
215 | // | |
216 | // This option determines if the cache will run in compressed mode or | |
217 | // uncompressed mode | |
218 | bool is_compressed = true; | |
219 | ||
220 | PersistentCacheConfig MakePersistentCacheConfig( | |
221 | const std::string& path, const uint64_t size, | |
222 | const std::shared_ptr<Logger>& log); | |
223 | ||
224 | std::string ToString() const; | |
225 | }; | |
226 | ||
227 | // Persistent Cache Tier | |
228 | // | |
229 | // This a logical abstraction that defines a tier of the persistent cache. Tiers | |
230 | // can be stacked over one another. PersistentCahe provides the basic definition | |
231 | // for accessing/storing in the cache. PersistentCacheTier extends the interface | |
232 | // to enable management and stacking of tiers. | |
233 | class PersistentCacheTier : public PersistentCache { | |
234 | public: | |
235 | typedef std::shared_ptr<PersistentCacheTier> Tier; | |
236 | ||
237 | virtual ~PersistentCacheTier() {} | |
238 | ||
239 | // Open the persistent cache tier | |
240 | virtual Status Open(); | |
241 | ||
242 | // Close the persistent cache tier | |
243 | virtual Status Close(); | |
244 | ||
245 | // Reserve space up to 'size' bytes | |
246 | virtual bool Reserve(const size_t size); | |
247 | ||
248 | // Erase a key from the cache | |
249 | virtual bool Erase(const Slice& key); | |
250 | ||
251 | // Print stats to string recursively | |
252 | virtual std::string PrintStats(); | |
253 | ||
11fdf7f2 | 254 | virtual PersistentCache::StatsType Stats() override; |
7c673cae FG |
255 | |
256 | // Insert to page cache | |
257 | virtual Status Insert(const Slice& page_key, const char* data, | |
11fdf7f2 | 258 | const size_t size) override = 0; |
7c673cae FG |
259 | |
260 | // Lookup page cache by page identifier | |
261 | virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data, | |
11fdf7f2 | 262 | size_t* size) override = 0; |
7c673cae FG |
263 | |
264 | // Does it store compressed data ? | |
11fdf7f2 | 265 | virtual bool IsCompressed() override = 0; |
7c673cae | 266 | |
11fdf7f2 | 267 | virtual std::string GetPrintableOptions() const override = 0; |
7c673cae FG |
268 | |
269 | // Return a reference to next tier | |
270 | virtual Tier& next_tier() { return next_tier_; } | |
271 | ||
272 | // Set the value for next tier | |
273 | virtual void set_next_tier(const Tier& tier) { | |
274 | assert(!next_tier_); | |
275 | next_tier_ = tier; | |
276 | } | |
277 | ||
278 | virtual void TEST_Flush() { | |
279 | if (next_tier_) { | |
280 | next_tier_->TEST_Flush(); | |
281 | } | |
282 | } | |
283 | ||
284 | private: | |
285 | Tier next_tier_; // next tier | |
286 | }; | |
287 | ||
288 | // PersistentTieredCache | |
289 | // | |
290 | // Abstraction that helps you construct a tiers of persistent caches as a | |
291 | // unified cache. The tier(s) of cache will act a single tier for management | |
292 | // ease and support PersistentCache methods for accessing data. | |
293 | class PersistentTieredCache : public PersistentCacheTier { | |
294 | public: | |
295 | virtual ~PersistentTieredCache(); | |
296 | ||
297 | Status Open() override; | |
298 | Status Close() override; | |
299 | bool Erase(const Slice& key) override; | |
300 | std::string PrintStats() override; | |
301 | PersistentCache::StatsType Stats() override; | |
302 | Status Insert(const Slice& page_key, const char* data, | |
303 | const size_t size) override; | |
304 | Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data, | |
305 | size_t* size) override; | |
306 | bool IsCompressed() override; | |
307 | ||
308 | std::string GetPrintableOptions() const override { | |
309 | return "PersistentTieredCache"; | |
310 | } | |
311 | ||
312 | void AddTier(const Tier& tier); | |
313 | ||
314 | Tier& next_tier() override { | |
315 | auto it = tiers_.end(); | |
316 | return (*it)->next_tier(); | |
317 | } | |
318 | ||
319 | void set_next_tier(const Tier& tier) override { | |
320 | auto it = tiers_.end(); | |
321 | (*it)->set_next_tier(tier); | |
322 | } | |
323 | ||
324 | void TEST_Flush() override { | |
325 | assert(!tiers_.empty()); | |
326 | tiers_.front()->TEST_Flush(); | |
327 | PersistentCacheTier::TEST_Flush(); | |
328 | } | |
329 | ||
330 | protected: | |
331 | std::list<Tier> tiers_; // list of tiers top-down | |
332 | }; | |
333 | ||
334 | } // namespace rocksdb | |
335 | ||
336 | #endif |