]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/utilities/persistent_cache/persistent_cache_tier.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / utilities / persistent_cache / persistent_cache_tier.h
CommitLineData
7c673cae 1// Copyright (c) 2013, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6#pragma once
7
8#ifndef ROCKSDB_LITE
9
10#include <limits>
11#include <list>
12#include <map>
13#include <string>
14#include <vector>
15
16#include "monitoring/histogram.h"
17#include "rocksdb/env.h"
18#include "rocksdb/persistent_cache.h"
19#include "rocksdb/status.h"
20
21// Persistent Cache
22//
23// Persistent cache is tiered key-value cache that can use persistent medium. It
24// is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM.
25// The code has been kept generic but significant benchmark/design/development
26// time has been spent to make sure the cache performs appropriately for
27// respective storage medium.
28// The file defines
29// PersistentCacheTier : Implementation that handles individual cache tier
30// PersistentTieresCache : Implementation that handles all tiers as a logical
31// unit
32//
33// PersistentTieredCache architecture:
34// +--------------------------+ PersistentCacheTier that handles multiple tiers
35// | +----------------+ |
36// | | RAM | PersistentCacheTier that handles RAM (VolatileCacheImpl)
37// | +----------------+ |
38// | | next |
39// | v |
40// | +----------------+ |
41// | | NVM | PersistentCacheTier implementation that handles NVM
42// | +----------------+ (BlockCacheImpl)
43// | | next |
44// | V |
45// | +----------------+ |
46// | | LE-SSD | PersistentCacheTier implementation that handles LE-SSD
47// | +----------------+ (BlockCacheImpl)
48// | | |
49// | V |
50// | null |
51// +--------------------------+
52// |
53// V
54// null
55namespace rocksdb {
56
57// Persistent Cache Config
58//
59// This struct captures all the options that are used to configure persistent
60// cache. Some of the terminologies used in naming the options are
61//
62// dispatch size :
63// This is the size in which IO is dispatched to the device
64//
65// write buffer size :
66// This is the size of an individual write buffer size. Write buffers are
67// grouped to form buffered file.
68//
69// cache size :
70// This is the logical maximum for the cache size
71//
72// qdepth :
73// This is the max number of IOs that can issues to the device in parallel
74//
75// pepeling :
76// The writer code path follows pipelined architecture, which means the
77// operations are handed off from one stage to another
78//
79// pipelining backlog size :
80// With the pipelined architecture, there can always be backlogging of ops in
81// pipeline queues. This is the maximum backlog size after which ops are dropped
82// from queue
83struct PersistentCacheConfig {
84 explicit PersistentCacheConfig(
85 Env* const _env, const std::string& _path, const uint64_t _cache_size,
86 const std::shared_ptr<Logger>& _log,
87 const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) {
88 env = _env;
89 path = _path;
90 log = _log;
91 cache_size = _cache_size;
92 writer_dispatch_size = write_buffer_size = _write_buffer_size;
93 }
94
95 //
96 // Validate the settings. Our intentions are to catch erroneous settings ahead
97 // of time instead going violating invariants or causing dead locks.
98 //
99 Status ValidateSettings() const {
100 // (1) check pre-conditions for variables
101 if (!env || path.empty()) {
102 return Status::InvalidArgument("empty or null args");
103 }
104
105 // (2) assert size related invariants
106 // - cache size cannot be less than cache file size
107 // - individual write buffer size cannot be greater than cache file size
108 // - total write buffer size cannot be less than 2X cache file size
109 if (cache_size < cache_file_size || write_buffer_size >= cache_file_size ||
110 write_buffer_size * write_buffer_count() < 2 * cache_file_size) {
111 return Status::InvalidArgument("invalid cache size");
112 }
113
114 // (2) check writer settings
115 // - Queue depth cannot be 0
116 // - writer_dispatch_size cannot be greater than writer_buffer_size
117 // - dispatch size and buffer size need to be aligned
118 if (!writer_qdepth || writer_dispatch_size > write_buffer_size ||
119 write_buffer_size % writer_dispatch_size) {
120 return Status::InvalidArgument("invalid writer settings");
121 }
122
123 return Status::OK();
124 }
125
126 //
127 // Env abstraction to use for systmer level operations
128 //
129 Env* env;
130
131 //
132 // Path for the block cache where blocks are persisted
133 //
134 std::string path;
135
136 //
137 // Log handle for logging messages
138 //
139 std::shared_ptr<Logger> log;
140
141 //
142 // Enable direct IO for reading
143 //
144 bool enable_direct_reads = true;
145
146 //
147 // Enable direct IO for writing
148 //
149 bool enable_direct_writes = false;
150
151 //
152 // Logical cache size
153 //
154 uint64_t cache_size = std::numeric_limits<uint64_t>::max();
155
156 // cache-file-size
157 //
158 // Cache consists of multiples of small files. This parameter defines the
159 // size of an individual cache file
160 //
161 // default: 1M
162 uint32_t cache_file_size = 100ULL * 1024 * 1024;
163
164 // writer-qdepth
165 //
166 // The writers can issues IO to the devices in parallel. This parameter
167 // controls the max number if IOs that can issues in parallel to the block
168 // device
169 //
170 // default :1
171 uint32_t writer_qdepth = 1;
172
173 // pipeline-writes
174 //
175 // The write optionally follow pipelined architecture. This helps
176 // avoid regression in the eviction code path of the primary tier. This
177 // parameter defines if pipelining is enabled or disabled
178 //
179 // default: true
180 bool pipeline_writes = true;
181
182 // max-write-pipeline-backlog-size
183 //
184 // Max pipeline buffer size. This is the maximum backlog we can accumulate
185 // while waiting for writes. After the limit, new ops will be dropped.
186 //
187 // Default: 1GiB
188 uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024;
189
190 // write-buffer-size
191 //
192 // This is the size in which buffer slabs are allocated.
193 //
194 // Default: 1M
195 uint32_t write_buffer_size = 1ULL * 1024 * 1024;
196
197 // write-buffer-count
198 //
199 // This is the total number of buffer slabs. This is calculated as a factor of
200 // file size in order to avoid dead lock.
201 size_t write_buffer_count() const {
202 assert(write_buffer_size);
203 return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size /
204 write_buffer_size);
205 }
206
207 // writer-dispatch-size
208 //
209 // The writer thread will dispatch the IO at the specified IO size
210 //
211 // default: 1M
212 uint64_t writer_dispatch_size = 1ULL * 1024 * 1024;
213
214 // is_compressed
215 //
216 // This option determines if the cache will run in compressed mode or
217 // uncompressed mode
218 bool is_compressed = true;
219
220 PersistentCacheConfig MakePersistentCacheConfig(
221 const std::string& path, const uint64_t size,
222 const std::shared_ptr<Logger>& log);
223
224 std::string ToString() const;
225};
226
227// Persistent Cache Tier
228//
229// This a logical abstraction that defines a tier of the persistent cache. Tiers
230// can be stacked over one another. PersistentCahe provides the basic definition
231// for accessing/storing in the cache. PersistentCacheTier extends the interface
232// to enable management and stacking of tiers.
233class PersistentCacheTier : public PersistentCache {
234 public:
235 typedef std::shared_ptr<PersistentCacheTier> Tier;
236
237 virtual ~PersistentCacheTier() {}
238
239 // Open the persistent cache tier
240 virtual Status Open();
241
242 // Close the persistent cache tier
243 virtual Status Close();
244
245 // Reserve space up to 'size' bytes
246 virtual bool Reserve(const size_t size);
247
248 // Erase a key from the cache
249 virtual bool Erase(const Slice& key);
250
251 // Print stats to string recursively
252 virtual std::string PrintStats();
253
11fdf7f2 254 virtual PersistentCache::StatsType Stats() override;
7c673cae
FG
255
256 // Insert to page cache
257 virtual Status Insert(const Slice& page_key, const char* data,
11fdf7f2 258 const size_t size) override = 0;
7c673cae
FG
259
260 // Lookup page cache by page identifier
261 virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
11fdf7f2 262 size_t* size) override = 0;
7c673cae
FG
263
264 // Does it store compressed data ?
11fdf7f2 265 virtual bool IsCompressed() override = 0;
7c673cae 266
11fdf7f2 267 virtual std::string GetPrintableOptions() const override = 0;
7c673cae
FG
268
269 // Return a reference to next tier
270 virtual Tier& next_tier() { return next_tier_; }
271
272 // Set the value for next tier
273 virtual void set_next_tier(const Tier& tier) {
274 assert(!next_tier_);
275 next_tier_ = tier;
276 }
277
278 virtual void TEST_Flush() {
279 if (next_tier_) {
280 next_tier_->TEST_Flush();
281 }
282 }
283
284 private:
285 Tier next_tier_; // next tier
286};
287
288// PersistentTieredCache
289//
290// Abstraction that helps you construct a tiers of persistent caches as a
291// unified cache. The tier(s) of cache will act a single tier for management
292// ease and support PersistentCache methods for accessing data.
293class PersistentTieredCache : public PersistentCacheTier {
294 public:
295 virtual ~PersistentTieredCache();
296
297 Status Open() override;
298 Status Close() override;
299 bool Erase(const Slice& key) override;
300 std::string PrintStats() override;
301 PersistentCache::StatsType Stats() override;
302 Status Insert(const Slice& page_key, const char* data,
303 const size_t size) override;
304 Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
305 size_t* size) override;
306 bool IsCompressed() override;
307
308 std::string GetPrintableOptions() const override {
309 return "PersistentTieredCache";
310 }
311
312 void AddTier(const Tier& tier);
313
314 Tier& next_tier() override {
315 auto it = tiers_.end();
316 return (*it)->next_tier();
317 }
318
319 void set_next_tier(const Tier& tier) override {
320 auto it = tiers_.end();
321 (*it)->set_next_tier(tier);
322 }
323
324 void TEST_Flush() override {
325 assert(!tiers_.empty());
326 tiers_.front()->TEST_Flush();
327 PersistentCacheTier::TEST_Flush();
328 }
329
330 protected:
331 std::list<Tier> tiers_; // list of tiers top-down
332};
333
334} // namespace rocksdb
335
336#endif