]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/utilities/blob_db/blob_db.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / utilities / blob_db / blob_db.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#pragma once
7
11fdf7f2
TL
8#ifndef ROCKSDB_LITE
9
10#include <functional>
20effc67 11#include <limits>
7c673cae 12#include <string>
11fdf7f2 13#include <vector>
20effc67 14
7c673cae
FG
15#include "rocksdb/db.h"
16#include "rocksdb/status.h"
11fdf7f2 17#include "rocksdb/utilities/stackable_db.h"
7c673cae 18
f67539c2 19namespace ROCKSDB_NAMESPACE {
11fdf7f2
TL
20
21namespace blob_db {
22
7c673cae
FG
23// A wrapped database which puts values of KV pairs in a separate log
24// and store location to the log in the underlying DB.
7c673cae
FG
25//
26// The factory needs to be moved to include/rocksdb/utilities to allow
27// users to use blob DB.
11fdf7f2 28
20effc67
TL
29constexpr uint64_t kNoExpiration = std::numeric_limits<uint64_t>::max();
30
11fdf7f2 31struct BlobDBOptions {
20effc67
TL
32 // Name of the directory under the base DB where blobs will be stored. Using
33 // a directory where the base DB stores its SST files is not supported.
34 // Default is "blob_dir"
11fdf7f2
TL
35 std::string blob_dir = "blob_dir";
36
37 // whether the blob_dir path is relative or absolute.
38 bool path_relative = true;
39
40 // When max_db_size is reached, evict blob files to free up space
41 // instead of returnning NoSpace error on write. Blob files will be
42 // evicted from oldest to newest, based on file creation time.
43 bool is_fifo = false;
44
45 // Maximum size of the database (including SST files and blob files).
46 //
47 // Default: 0 (no limits)
48 uint64_t max_db_size = 0;
49
50 // a new bucket is opened, for ttl_range. So if ttl_range is 600seconds
51 // (10 minutes), and the first bucket starts at 1471542000
52 // then the blob buckets will be
53 // first bucket is 1471542000 - 1471542600
54 // second bucket is 1471542600 - 1471543200
55 // and so on
56 uint64_t ttl_range_secs = 3600;
57
494da23a 58 // The smallest value to store in blob log. Values smaller than this threshold
11fdf7f2
TL
59 // will be inlined in base DB together with the key.
60 uint64_t min_blob_size = 0;
61
62 // Allows OS to incrementally sync blob files to disk for every
63 // bytes_per_sync bytes written. Users shouldn't rely on it for
64 // persistency guarantee.
65 uint64_t bytes_per_sync = 512 * 1024;
66
67 // the target size of each blob file. File will become immutable
68 // after it exceeds that size
69 uint64_t blob_file_size = 256 * 1024 * 1024;
70
71 // what compression to use for Blob's
72 CompressionType compression = kNoCompression;
73
f67539c2
TL
74 // If enabled, BlobDB cleans up stale blobs in non-TTL files during compaction
75 // by rewriting the remaining live blobs to new files.
11fdf7f2
TL
76 bool enable_garbage_collection = false;
77
f67539c2
TL
78 // The cutoff in terms of blob file age for garbage collection. Blobs in
79 // the oldest N non-TTL blob files will be rewritten when encountered during
80 // compaction, where N = garbage_collection_cutoff * number_of_non_TTL_files.
81 double garbage_collection_cutoff = 0.25;
82
11fdf7f2
TL
83 // Disable all background job. Used for test only.
84 bool disable_background_tasks = false;
85
86 void Dump(Logger* log) const;
87};
88
89class BlobDB : public StackableDB {
90 public:
f67539c2 91 using ROCKSDB_NAMESPACE::StackableDB::Put;
11fdf7f2
TL
92 virtual Status Put(const WriteOptions& options, const Slice& key,
93 const Slice& value) override = 0;
94 virtual Status Put(const WriteOptions& options,
95 ColumnFamilyHandle* column_family, const Slice& key,
96 const Slice& value) override {
f67539c2 97 if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
11fdf7f2
TL
98 return Status::NotSupported(
99 "Blob DB doesn't support non-default column family.");
100 }
101 return Put(options, key, value);
102 }
103
f67539c2 104 using ROCKSDB_NAMESPACE::StackableDB::Delete;
11fdf7f2
TL
105 virtual Status Delete(const WriteOptions& options,
106 ColumnFamilyHandle* column_family,
107 const Slice& key) override {
f67539c2 108 if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
11fdf7f2
TL
109 return Status::NotSupported(
110 "Blob DB doesn't support non-default column family.");
111 }
112 assert(db_ != nullptr);
113 return db_->Delete(options, column_family, key);
114 }
115
116 virtual Status PutWithTTL(const WriteOptions& options, const Slice& key,
117 const Slice& value, uint64_t ttl) = 0;
118 virtual Status PutWithTTL(const WriteOptions& options,
119 ColumnFamilyHandle* column_family, const Slice& key,
120 const Slice& value, uint64_t ttl) {
f67539c2 121 if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
11fdf7f2
TL
122 return Status::NotSupported(
123 "Blob DB doesn't support non-default column family.");
124 }
125 return PutWithTTL(options, key, value, ttl);
126 }
127
128 // Put with expiration. Key with expiration time equal to
129 // std::numeric_limits<uint64_t>::max() means the key don't expire.
130 virtual Status PutUntil(const WriteOptions& options, const Slice& key,
131 const Slice& value, uint64_t expiration) = 0;
132 virtual Status PutUntil(const WriteOptions& options,
133 ColumnFamilyHandle* column_family, const Slice& key,
134 const Slice& value, uint64_t expiration) {
f67539c2 135 if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
11fdf7f2
TL
136 return Status::NotSupported(
137 "Blob DB doesn't support non-default column family.");
138 }
139 return PutUntil(options, key, value, expiration);
140 }
141
f67539c2 142 using ROCKSDB_NAMESPACE::StackableDB::Get;
11fdf7f2
TL
143 virtual Status Get(const ReadOptions& options,
144 ColumnFamilyHandle* column_family, const Slice& key,
145 PinnableSlice* value) override = 0;
146
147 // Get value and expiration.
148 virtual Status Get(const ReadOptions& options,
149 ColumnFamilyHandle* column_family, const Slice& key,
150 PinnableSlice* value, uint64_t* expiration) = 0;
151 virtual Status Get(const ReadOptions& options, const Slice& key,
152 PinnableSlice* value, uint64_t* expiration) {
153 return Get(options, DefaultColumnFamily(), key, value, expiration);
154 }
155
f67539c2 156 using ROCKSDB_NAMESPACE::StackableDB::MultiGet;
11fdf7f2 157 virtual std::vector<Status> MultiGet(
1e59de90 158 const ReadOptions& options, const std::vector<Slice>& keys,
11fdf7f2
TL
159 std::vector<std::string>* values) override = 0;
160 virtual std::vector<Status> MultiGet(
161 const ReadOptions& options,
162 const std::vector<ColumnFamilyHandle*>& column_families,
163 const std::vector<Slice>& keys,
164 std::vector<std::string>* values) override {
165 for (auto column_family : column_families) {
f67539c2 166 if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
11fdf7f2
TL
167 return std::vector<Status>(
168 column_families.size(),
169 Status::NotSupported(
170 "Blob DB doesn't support non-default column family."));
171 }
172 }
173 return MultiGet(options, keys, values);
174 }
f67539c2
TL
175 virtual void MultiGet(const ReadOptions& /*options*/,
176 ColumnFamilyHandle* /*column_family*/,
177 const size_t num_keys, const Slice* /*keys*/,
178 PinnableSlice* /*values*/, Status* statuses,
179 const bool /*sorted_input*/ = false) override {
180 for (size_t i = 0; i < num_keys; ++i) {
1e59de90
TL
181 statuses[i] =
182 Status::NotSupported("Blob DB doesn't support batched MultiGet");
f67539c2
TL
183 }
184 }
11fdf7f2 185
f67539c2 186 using ROCKSDB_NAMESPACE::StackableDB::SingleDelete;
11fdf7f2
TL
187 virtual Status SingleDelete(const WriteOptions& /*wopts*/,
188 ColumnFamilyHandle* /*column_family*/,
189 const Slice& /*key*/) override {
190 return Status::NotSupported("Not supported operation in blob db.");
191 }
192
f67539c2 193 using ROCKSDB_NAMESPACE::StackableDB::Merge;
11fdf7f2
TL
194 virtual Status Merge(const WriteOptions& /*options*/,
195 ColumnFamilyHandle* /*column_family*/,
196 const Slice& /*key*/, const Slice& /*value*/) override {
197 return Status::NotSupported("Not supported operation in blob db.");
198 }
199
200 virtual Status Write(const WriteOptions& opts,
201 WriteBatch* updates) override = 0;
1e59de90 202
f67539c2 203 using ROCKSDB_NAMESPACE::StackableDB::NewIterator;
11fdf7f2
TL
204 virtual Iterator* NewIterator(const ReadOptions& options) override = 0;
205 virtual Iterator* NewIterator(const ReadOptions& options,
206 ColumnFamilyHandle* column_family) override {
f67539c2 207 if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
11fdf7f2
TL
208 // Blob DB doesn't support non-default column family.
209 return nullptr;
210 }
211 return NewIterator(options);
212 }
213
f67539c2
TL
214 Status CompactFiles(
215 const CompactionOptions& compact_options,
216 const std::vector<std::string>& input_file_names, const int output_level,
217 const int output_path_id = -1,
218 std::vector<std::string>* const output_file_names = nullptr,
219 CompactionJobInfo* compaction_job_info = nullptr) override = 0;
220 Status CompactFiles(
221 const CompactionOptions& compact_options,
222 ColumnFamilyHandle* column_family,
223 const std::vector<std::string>& input_file_names, const int output_level,
224 const int output_path_id = -1,
225 std::vector<std::string>* const output_file_names = nullptr,
226 CompactionJobInfo* compaction_job_info = nullptr) override {
227 if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
228 return Status::NotSupported(
229 "Blob DB doesn't support non-default column family.");
230 }
231
232 return CompactFiles(compact_options, input_file_names, output_level,
233 output_path_id, output_file_names, compaction_job_info);
234 }
235
236 using ROCKSDB_NAMESPACE::StackableDB::Close;
11fdf7f2
TL
237 virtual Status Close() override = 0;
238
239 // Opening blob db.
240 static Status Open(const Options& options, const BlobDBOptions& bdb_options,
241 const std::string& dbname, BlobDB** blob_db);
242
243 static Status Open(const DBOptions& db_options,
244 const BlobDBOptions& bdb_options,
245 const std::string& dbname,
246 const std::vector<ColumnFamilyDescriptor>& column_families,
247 std::vector<ColumnFamilyHandle*>* handles,
248 BlobDB** blob_db);
249
250 virtual BlobDBOptions GetBlobDBOptions() const = 0;
251
252 virtual Status SyncBlobFiles() = 0;
253
254 virtual ~BlobDB() {}
255
256 protected:
257 explicit BlobDB();
258};
259
260// Destroy the content of the database.
261Status DestroyBlobDB(const std::string& dbname, const Options& options,
262 const BlobDBOptions& bdb_options);
263
264} // namespace blob_db
f67539c2 265} // namespace ROCKSDB_NAMESPACE
11fdf7f2 266#endif // ROCKSDB_LITE