]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
6 | // Use of this source code is governed by a BSD-style license that can be | |
7 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
8 | ||
11fdf7f2 | 9 | #pragma once |
7c673cae FG |
10 | |
11 | #include <stdint.h> | |
12 | #include <stdio.h> | |
13 | #include <map> | |
14 | #include <memory> | |
15 | #include <string> | |
16 | #include <unordered_map> | |
17 | #include <vector> | |
18 | #include "rocksdb/iterator.h" | |
19 | #include "rocksdb/listener.h" | |
20 | #include "rocksdb/metadata.h" | |
21 | #include "rocksdb/options.h" | |
22 | #include "rocksdb/snapshot.h" | |
23 | #include "rocksdb/sst_file_writer.h" | |
24 | #include "rocksdb/thread_status.h" | |
25 | #include "rocksdb/transaction_log.h" | |
26 | #include "rocksdb/types.h" | |
27 | #include "rocksdb/version.h" | |
28 | ||
29 | #ifdef _WIN32 | |
30 | // Windows API macro interference | |
31 | #undef DeleteFile | |
32 | #endif | |
33 | ||
34 | #if defined(__GNUC__) || defined(__clang__) | |
35 | #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__)) | |
36 | #elif _WIN32 | |
37 | #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated) | |
38 | #endif | |
39 | ||
40 | namespace rocksdb { | |
41 | ||
42 | struct Options; | |
43 | struct DBOptions; | |
44 | struct ColumnFamilyOptions; | |
45 | struct ReadOptions; | |
46 | struct WriteOptions; | |
47 | struct FlushOptions; | |
48 | struct CompactionOptions; | |
49 | struct CompactRangeOptions; | |
50 | struct TableProperties; | |
51 | struct ExternalSstFileInfo; | |
52 | class WriteBatch; | |
53 | class Env; | |
54 | class EventListener; | |
494da23a | 55 | class StatsHistoryIterator; |
11fdf7f2 | 56 | class TraceWriter; |
494da23a TL |
57 | #ifdef ROCKSDB_LITE |
58 | class CompactionJobInfo; | |
59 | #endif | |
7c673cae FG |
60 | |
61 | extern const std::string kDefaultColumnFamilyName; | |
62 | struct ColumnFamilyDescriptor { | |
63 | std::string name; | |
64 | ColumnFamilyOptions options; | |
65 | ColumnFamilyDescriptor() | |
66 | : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {} | |
67 | ColumnFamilyDescriptor(const std::string& _name, | |
68 | const ColumnFamilyOptions& _options) | |
69 | : name(_name), options(_options) {} | |
70 | }; | |
71 | ||
72 | class ColumnFamilyHandle { | |
73 | public: | |
74 | virtual ~ColumnFamilyHandle() {} | |
75 | // Returns the name of the column family associated with the current handle. | |
76 | virtual const std::string& GetName() const = 0; | |
77 | // Returns the ID of the column family associated with the current handle. | |
78 | virtual uint32_t GetID() const = 0; | |
79 | // Fills "*desc" with the up-to-date descriptor of the column family | |
80 | // associated with this handle. Since it fills "*desc" with the up-to-date | |
81 | // information, this call might internally lock and release DB mutex to | |
82 | // access the up-to-date CF options. In addition, all the pointer-typed | |
83 | // options cannot be referenced any longer than the original options exist. | |
84 | // | |
85 | // Note that this function is not supported in RocksDBLite. | |
86 | virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0; | |
87 | // Returns the comparator of the column family associated with the | |
88 | // current handle. | |
89 | virtual const Comparator* GetComparator() const = 0; | |
90 | }; | |
91 | ||
92 | static const int kMajorVersion = __ROCKSDB_MAJOR__; | |
93 | static const int kMinorVersion = __ROCKSDB_MINOR__; | |
94 | ||
95 | // A range of keys | |
96 | struct Range { | |
11fdf7f2 TL |
97 | Slice start; |
98 | Slice limit; | |
7c673cae | 99 | |
494da23a TL |
100 | Range() {} |
101 | Range(const Slice& s, const Slice& l) : start(s), limit(l) {} | |
7c673cae FG |
102 | }; |
103 | ||
11fdf7f2 TL |
104 | struct RangePtr { |
105 | const Slice* start; | |
106 | const Slice* limit; | |
107 | ||
494da23a TL |
108 | RangePtr() : start(nullptr), limit(nullptr) {} |
109 | RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {} | |
110 | }; | |
111 | ||
112 | struct IngestExternalFileArg { | |
113 | ColumnFamilyHandle* column_family = nullptr; | |
114 | std::vector<std::string> external_files; | |
115 | IngestExternalFileOptions options; | |
11fdf7f2 TL |
116 | }; |
117 | ||
7c673cae FG |
118 | // A collections of table properties objects, where |
119 | // key: is the table's file name. | |
120 | // value: the table properties object of the given table. | |
121 | typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>> | |
122 | TablePropertiesCollection; | |
123 | ||
124 | // A DB is a persistent ordered map from keys to values. | |
125 | // A DB is safe for concurrent access from multiple threads without | |
126 | // any external synchronization. | |
127 | class DB { | |
128 | public: | |
129 | // Open the database with the specified "name". | |
130 | // Stores a pointer to a heap-allocated database in *dbptr and returns | |
131 | // OK on success. | |
132 | // Stores nullptr in *dbptr and returns a non-OK status on error. | |
133 | // Caller should delete *dbptr when it is no longer needed. | |
494da23a | 134 | static Status Open(const Options& options, const std::string& name, |
7c673cae FG |
135 | DB** dbptr); |
136 | ||
137 | // Open the database for read only. All DB interfaces | |
138 | // that modify data, like put/delete, will return error. | |
139 | // If the db is opened in read only mode, then no compactions | |
140 | // will happen. | |
141 | // | |
142 | // Not supported in ROCKSDB_LITE, in which case the function will | |
143 | // return Status::NotSupported. | |
494da23a TL |
144 | static Status OpenForReadOnly(const Options& options, const std::string& name, |
145 | DB** dbptr, | |
146 | bool error_if_log_file_exist = false); | |
7c673cae FG |
147 | |
148 | // Open the database for read only with column families. When opening DB with | |
149 | // read only, you can specify only a subset of column families in the | |
150 | // database that should be opened. However, you always need to specify default | |
151 | // column family. The default column family name is 'default' and it's stored | |
152 | // in rocksdb::kDefaultColumnFamilyName | |
153 | // | |
154 | // Not supported in ROCKSDB_LITE, in which case the function will | |
155 | // return Status::NotSupported. | |
156 | static Status OpenForReadOnly( | |
157 | const DBOptions& db_options, const std::string& name, | |
158 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
159 | std::vector<ColumnFamilyHandle*>* handles, DB** dbptr, | |
160 | bool error_if_log_file_exist = false); | |
161 | ||
494da23a TL |
162 | // The following OpenAsSecondary functions create a secondary instance that |
163 | // can dynamically tail the MANIFEST of a primary that must have already been | |
164 | // created. User can call TryCatchUpWithPrimary to make the secondary | |
165 | // instance catch up with primary (WAL tailing is NOT supported now) whenever | |
166 | // the user feels necessary. Column families created by the primary after the | |
167 | // secondary instance starts are currently ignored by the secondary instance. | |
168 | // Column families opened by secondary and dropped by the primary will be | |
169 | // dropped by secondary as well. However the user of the secondary instance | |
170 | // can still access the data of such dropped column family as long as they | |
171 | // do not destroy the corresponding column family handle. | |
172 | // WAL tailing is not supported at present, but will arrive soon. | |
173 | // | |
174 | // The options argument specifies the options to open the secondary instance. | |
175 | // The name argument specifies the name of the primary db that you have used | |
176 | // to open the primary instance. | |
177 | // The secondary_path argument points to a directory where the secondary | |
178 | // instance stores its info log. | |
179 | // The dbptr is an out-arg corresponding to the opened secondary instance. | |
180 | // The pointer points to a heap-allocated database, and the user should | |
181 | // delete it after use. | |
182 | // Open DB as secondary instance with only the default column family. | |
183 | // Return OK on success, non-OK on failures. | |
184 | static Status OpenAsSecondary(const Options& options, const std::string& name, | |
185 | const std::string& secondary_path, DB** dbptr); | |
186 | ||
187 | // Open DB as secondary instance with column families. You can open a subset | |
188 | // of column families in secondary mode. | |
189 | // The db_options specify the database specific options. | |
190 | // The name argument specifies the name of the primary db that you have used | |
191 | // to open the primary instance. | |
192 | // The secondary_path argument points to a directory where the secondary | |
193 | // instance stores its info log. | |
194 | // The column_families argument specifieds a list of column families to open. | |
195 | // If any of the column families does not exist, the function returns non-OK | |
196 | // status. | |
197 | // The handles is an out-arg corresponding to the opened database column | |
198 | // familiy handles. | |
199 | // The dbptr is an out-arg corresponding to the opened secondary instance. | |
200 | // The pointer points to a heap-allocated database, and the caller should | |
201 | // delete it after use. Before deleting the dbptr, the user should also | |
202 | // delete the pointers stored in handles vector. | |
203 | // Return OK on success, on-OK on failures. | |
204 | static Status OpenAsSecondary( | |
205 | const DBOptions& db_options, const std::string& name, | |
206 | const std::string& secondary_path, | |
207 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
208 | std::vector<ColumnFamilyHandle*>* handles, DB** dbptr); | |
209 | ||
7c673cae FG |
210 | // Open DB with column families. |
211 | // db_options specify database specific options | |
212 | // column_families is the vector of all column families in the database, | |
213 | // containing column family name and options. You need to open ALL column | |
214 | // families in the database. To get the list of column families, you can use | |
215 | // ListColumnFamilies(). Also, you can open only a subset of column families | |
216 | // for read-only access. | |
217 | // The default column family name is 'default' and it's stored | |
218 | // in rocksdb::kDefaultColumnFamilyName. | |
219 | // If everything is OK, handles will on return be the same size | |
220 | // as column_families --- handles[i] will be a handle that you | |
221 | // will use to operate on column family column_family[i]. | |
222 | // Before delete DB, you have to close All column families by calling | |
223 | // DestroyColumnFamilyHandle() with all the handles. | |
224 | static Status Open(const DBOptions& db_options, const std::string& name, | |
225 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
226 | std::vector<ColumnFamilyHandle*>* handles, DB** dbptr); | |
227 | ||
11fdf7f2 TL |
228 | virtual Status Resume() { return Status::NotSupported(); } |
229 | ||
230 | // Close the DB by releasing resources, closing files etc. This should be | |
231 | // called before calling the destructor so that the caller can get back a | |
232 | // status in case there are any errors. This will not fsync the WAL files. | |
233 | // If syncing is required, the caller must first call SyncWAL(), or Write() | |
234 | // using an empty write batch with WriteOptions.sync=true. | |
235 | // Regardless of the return status, the DB must be freed. If the return | |
236 | // status is NotSupported(), then the DB implementation does cleanup in the | |
237 | // destructor | |
238 | virtual Status Close() { return Status::NotSupported(); } | |
239 | ||
7c673cae FG |
240 | // ListColumnFamilies will open the DB specified by argument name |
241 | // and return the list of all column families in that DB | |
242 | // through column_families argument. The ordering of | |
243 | // column families in column_families is unspecified. | |
244 | static Status ListColumnFamilies(const DBOptions& db_options, | |
245 | const std::string& name, | |
246 | std::vector<std::string>* column_families); | |
247 | ||
494da23a | 248 | DB() {} |
7c673cae FG |
249 | virtual ~DB(); |
250 | ||
251 | // Create a column_family and return the handle of column family | |
252 | // through the argument handle. | |
253 | virtual Status CreateColumnFamily(const ColumnFamilyOptions& options, | |
254 | const std::string& column_family_name, | |
255 | ColumnFamilyHandle** handle); | |
256 | ||
11fdf7f2 TL |
257 | // Bulk create column families with the same column family options. |
258 | // Return the handles of the column families through the argument handles. | |
259 | // In case of error, the request may succeed partially, and handles will | |
260 | // contain column family handles that it managed to create, and have size | |
261 | // equal to the number of created column families. | |
262 | virtual Status CreateColumnFamilies( | |
263 | const ColumnFamilyOptions& options, | |
264 | const std::vector<std::string>& column_family_names, | |
265 | std::vector<ColumnFamilyHandle*>* handles); | |
266 | ||
267 | // Bulk create column families. | |
268 | // Return the handles of the column families through the argument handles. | |
269 | // In case of error, the request may succeed partially, and handles will | |
270 | // contain column family handles that it managed to create, and have size | |
271 | // equal to the number of created column families. | |
272 | virtual Status CreateColumnFamilies( | |
273 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
274 | std::vector<ColumnFamilyHandle*>* handles); | |
275 | ||
7c673cae FG |
276 | // Drop a column family specified by column_family handle. This call |
277 | // only records a drop record in the manifest and prevents the column | |
278 | // family from flushing and compacting. | |
279 | virtual Status DropColumnFamily(ColumnFamilyHandle* column_family); | |
11fdf7f2 TL |
280 | |
281 | // Bulk drop column families. This call only records drop records in the | |
282 | // manifest and prevents the column families from flushing and compacting. | |
283 | // In case of error, the request may succeed partially. User may call | |
284 | // ListColumnFamilies to check the result. | |
285 | virtual Status DropColumnFamilies( | |
286 | const std::vector<ColumnFamilyHandle*>& column_families); | |
287 | ||
7c673cae FG |
288 | // Close a column family specified by column_family handle and destroy |
289 | // the column family handle specified to avoid double deletion. This call | |
290 | // deletes the column family handle by default. Use this method to | |
291 | // close column family instead of deleting column family handle directly | |
292 | virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family); | |
293 | ||
294 | // Set the database entry for "key" to "value". | |
295 | // If "key" already exists, it will be overwritten. | |
296 | // Returns OK on success, and a non-OK status on error. | |
297 | // Note: consider setting options.sync = true. | |
298 | virtual Status Put(const WriteOptions& options, | |
299 | ColumnFamilyHandle* column_family, const Slice& key, | |
300 | const Slice& value) = 0; | |
301 | virtual Status Put(const WriteOptions& options, const Slice& key, | |
302 | const Slice& value) { | |
303 | return Put(options, DefaultColumnFamily(), key, value); | |
304 | } | |
305 | ||
306 | // Remove the database entry (if any) for "key". Returns OK on | |
307 | // success, and a non-OK status on error. It is not an error if "key" | |
308 | // did not exist in the database. | |
309 | // Note: consider setting options.sync = true. | |
310 | virtual Status Delete(const WriteOptions& options, | |
311 | ColumnFamilyHandle* column_family, | |
312 | const Slice& key) = 0; | |
313 | virtual Status Delete(const WriteOptions& options, const Slice& key) { | |
314 | return Delete(options, DefaultColumnFamily(), key); | |
315 | } | |
316 | ||
317 | // Remove the database entry for "key". Requires that the key exists | |
318 | // and was not overwritten. Returns OK on success, and a non-OK status | |
319 | // on error. It is not an error if "key" did not exist in the database. | |
320 | // | |
321 | // If a key is overwritten (by calling Put() multiple times), then the result | |
322 | // of calling SingleDelete() on this key is undefined. SingleDelete() only | |
323 | // behaves correctly if there has been only one Put() for this key since the | |
324 | // previous call to SingleDelete() for this key. | |
325 | // | |
326 | // This feature is currently an experimental performance optimization | |
327 | // for a very specific workload. It is up to the caller to ensure that | |
328 | // SingleDelete is only used for a key that is not deleted using Delete() or | |
329 | // written using Merge(). Mixing SingleDelete operations with Deletes and | |
330 | // Merges can result in undefined behavior. | |
331 | // | |
332 | // Note: consider setting options.sync = true. | |
333 | virtual Status SingleDelete(const WriteOptions& options, | |
334 | ColumnFamilyHandle* column_family, | |
335 | const Slice& key) = 0; | |
336 | virtual Status SingleDelete(const WriteOptions& options, const Slice& key) { | |
337 | return SingleDelete(options, DefaultColumnFamily(), key); | |
338 | } | |
339 | ||
340 | // Removes the database entries in the range ["begin_key", "end_key"), i.e., | |
341 | // including "begin_key" and excluding "end_key". Returns OK on success, and | |
342 | // a non-OK status on error. It is not an error if no keys exist in the range | |
343 | // ["begin_key", "end_key"). | |
344 | // | |
494da23a TL |
345 | // This feature is now usable in production, with the following caveats: |
346 | // 1) Accumulating many range tombstones in the memtable will degrade read | |
347 | // performance; this can be avoided by manually flushing occasionally. | |
348 | // 2) Limiting the maximum number of open files in the presence of range | |
349 | // tombstones can degrade read performance. To avoid this problem, set | |
350 | // max_open_files to -1 whenever possible. | |
7c673cae FG |
351 | virtual Status DeleteRange(const WriteOptions& options, |
352 | ColumnFamilyHandle* column_family, | |
353 | const Slice& begin_key, const Slice& end_key); | |
354 | ||
355 | // Merge the database entry for "key" with "value". Returns OK on success, | |
356 | // and a non-OK status on error. The semantics of this operation is | |
357 | // determined by the user provided merge_operator when opening DB. | |
358 | // Note: consider setting options.sync = true. | |
359 | virtual Status Merge(const WriteOptions& options, | |
360 | ColumnFamilyHandle* column_family, const Slice& key, | |
361 | const Slice& value) = 0; | |
362 | virtual Status Merge(const WriteOptions& options, const Slice& key, | |
363 | const Slice& value) { | |
364 | return Merge(options, DefaultColumnFamily(), key, value); | |
365 | } | |
366 | ||
367 | // Apply the specified updates to the database. | |
368 | // If `updates` contains no update, WAL will still be synced if | |
369 | // options.sync=true. | |
370 | // Returns OK on success, non-OK on failure. | |
371 | // Note: consider setting options.sync = true. | |
372 | virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0; | |
373 | ||
374 | // If the database contains an entry for "key" store the | |
375 | // corresponding value in *value and return OK. | |
376 | // | |
377 | // If there is no entry for "key" leave *value unchanged and return | |
378 | // a status for which Status::IsNotFound() returns true. | |
379 | // | |
380 | // May return some other Status on an error. | |
381 | virtual inline Status Get(const ReadOptions& options, | |
382 | ColumnFamilyHandle* column_family, const Slice& key, | |
383 | std::string* value) { | |
384 | assert(value != nullptr); | |
385 | PinnableSlice pinnable_val(value); | |
386 | assert(!pinnable_val.IsPinned()); | |
387 | auto s = Get(options, column_family, key, &pinnable_val); | |
388 | if (s.ok() && pinnable_val.IsPinned()) { | |
389 | value->assign(pinnable_val.data(), pinnable_val.size()); | |
390 | } // else value is already assigned | |
391 | return s; | |
392 | } | |
393 | virtual Status Get(const ReadOptions& options, | |
394 | ColumnFamilyHandle* column_family, const Slice& key, | |
395 | PinnableSlice* value) = 0; | |
494da23a TL |
396 | virtual Status Get(const ReadOptions& options, const Slice& key, |
397 | std::string* value) { | |
7c673cae FG |
398 | return Get(options, DefaultColumnFamily(), key, value); |
399 | } | |
400 | ||
401 | // If keys[i] does not exist in the database, then the i'th returned | |
402 | // status will be one for which Status::IsNotFound() is true, and | |
403 | // (*values)[i] will be set to some arbitrary value (often ""). Otherwise, | |
404 | // the i'th returned status will have Status::ok() true, and (*values)[i] | |
405 | // will store the value associated with keys[i]. | |
406 | // | |
407 | // (*values) will always be resized to be the same size as (keys). | |
408 | // Similarly, the number of returned statuses will be the number of keys. | |
409 | // Note: keys will not be "de-duplicated". Duplicate keys will return | |
410 | // duplicate values in order. | |
411 | virtual std::vector<Status> MultiGet( | |
412 | const ReadOptions& options, | |
413 | const std::vector<ColumnFamilyHandle*>& column_family, | |
414 | const std::vector<Slice>& keys, std::vector<std::string>* values) = 0; | |
415 | virtual std::vector<Status> MultiGet(const ReadOptions& options, | |
416 | const std::vector<Slice>& keys, | |
417 | std::vector<std::string>* values) { | |
494da23a TL |
418 | return MultiGet( |
419 | options, | |
420 | std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()), | |
421 | keys, values); | |
7c673cae FG |
422 | } |
423 | ||
424 | // If the key definitely does not exist in the database, then this method | |
425 | // returns false, else true. If the caller wants to obtain value when the key | |
426 | // is found in memory, a bool for 'value_found' must be passed. 'value_found' | |
427 | // will be true on return if value has been set properly. | |
428 | // This check is potentially lighter-weight than invoking DB::Get(). One way | |
429 | // to make this lighter weight is to avoid doing any IOs. | |
430 | // Default implementation here returns true and sets 'value_found' to false | |
431 | virtual bool KeyMayExist(const ReadOptions& /*options*/, | |
432 | ColumnFamilyHandle* /*column_family*/, | |
433 | const Slice& /*key*/, std::string* /*value*/, | |
434 | bool* value_found = nullptr) { | |
435 | if (value_found != nullptr) { | |
436 | *value_found = false; | |
437 | } | |
438 | return true; | |
439 | } | |
440 | virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, | |
441 | std::string* value, bool* value_found = nullptr) { | |
442 | return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found); | |
443 | } | |
444 | ||
445 | // Return a heap-allocated iterator over the contents of the database. | |
446 | // The result of NewIterator() is initially invalid (caller must | |
447 | // call one of the Seek methods on the iterator before using it). | |
448 | // | |
449 | // Caller should delete the iterator when it is no longer needed. | |
450 | // The returned iterator should be deleted before this db is deleted. | |
451 | virtual Iterator* NewIterator(const ReadOptions& options, | |
452 | ColumnFamilyHandle* column_family) = 0; | |
453 | virtual Iterator* NewIterator(const ReadOptions& options) { | |
454 | return NewIterator(options, DefaultColumnFamily()); | |
455 | } | |
456 | // Returns iterators from a consistent database state across multiple | |
457 | // column families. Iterators are heap allocated and need to be deleted | |
458 | // before the db is deleted | |
459 | virtual Status NewIterators( | |
460 | const ReadOptions& options, | |
461 | const std::vector<ColumnFamilyHandle*>& column_families, | |
462 | std::vector<Iterator*>* iterators) = 0; | |
463 | ||
464 | // Return a handle to the current DB state. Iterators created with | |
465 | // this handle will all observe a stable snapshot of the current DB | |
466 | // state. The caller must call ReleaseSnapshot(result) when the | |
467 | // snapshot is no longer needed. | |
468 | // | |
469 | // nullptr will be returned if the DB fails to take a snapshot or does | |
470 | // not support snapshot. | |
471 | virtual const Snapshot* GetSnapshot() = 0; | |
472 | ||
473 | // Release a previously acquired snapshot. The caller must not | |
474 | // use "snapshot" after this call. | |
475 | virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; | |
476 | ||
477 | #ifndef ROCKSDB_LITE | |
478 | // Contains all valid property arguments for GetProperty(). | |
479 | // | |
480 | // NOTE: Property names cannot end in numbers since those are interpreted as | |
481 | // arguments, e.g., see kNumFilesAtLevelPrefix. | |
482 | struct Properties { | |
483 | // "rocksdb.num-files-at-level<N>" - returns string containing the number | |
484 | // of files at level <N>, where <N> is an ASCII representation of a | |
485 | // level number (e.g., "0"). | |
486 | static const std::string kNumFilesAtLevelPrefix; | |
487 | ||
488 | // "rocksdb.compression-ratio-at-level<N>" - returns string containing the | |
489 | // compression ratio of data at level <N>, where <N> is an ASCII | |
490 | // representation of a level number (e.g., "0"). Here, compression | |
491 | // ratio is defined as uncompressed data size / compressed file size. | |
492 | // Returns "-1.0" if no open files at level <N>. | |
493 | static const std::string kCompressionRatioAtLevelPrefix; | |
494 | ||
495 | // "rocksdb.stats" - returns a multi-line string containing the data | |
496 | // described by kCFStats followed by the data described by kDBStats. | |
497 | static const std::string kStats; | |
498 | ||
499 | // "rocksdb.sstables" - returns a multi-line string summarizing current | |
500 | // SST files. | |
501 | static const std::string kSSTables; | |
502 | ||
503 | // "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and | |
504 | // "rocksdb.cf-file-histogram" together. See below for description | |
505 | // of the two. | |
506 | static const std::string kCFStats; | |
507 | ||
508 | // "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with | |
509 | // general columm family stats per-level over db's lifetime ("L<n>"), | |
510 | // aggregated over db's lifetime ("Sum"), and aggregated over the | |
511 | // interval since the last retrieval ("Int"). | |
512 | // It could also be used to return the stats in the format of the map. | |
513 | // In this case there will a pair of string to array of double for | |
514 | // each level as well as for "Sum". "Int" stats will not be affected | |
11fdf7f2 | 515 | // when this form of stats are retrieved. |
7c673cae FG |
516 | static const std::string kCFStatsNoFileHistogram; |
517 | ||
518 | // "rocksdb.cf-file-histogram" - print out how many file reads to every | |
519 | // level, as well as the histogram of latency of single requests. | |
520 | static const std::string kCFFileHistogram; | |
521 | ||
522 | // "rocksdb.dbstats" - returns a multi-line string with general database | |
523 | // stats, both cumulative (over the db's lifetime) and interval (since | |
524 | // the last retrieval of kDBStats). | |
525 | static const std::string kDBStats; | |
526 | ||
527 | // "rocksdb.levelstats" - returns multi-line string containing the number | |
528 | // of files per level and total size of each level (MB). | |
529 | static const std::string kLevelStats; | |
530 | ||
531 | // "rocksdb.num-immutable-mem-table" - returns number of immutable | |
532 | // memtables that have not yet been flushed. | |
533 | static const std::string kNumImmutableMemTable; | |
534 | ||
535 | // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable | |
536 | // memtables that have already been flushed. | |
537 | static const std::string kNumImmutableMemTableFlushed; | |
538 | ||
539 | // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is | |
540 | // pending; otherwise, returns 0. | |
541 | static const std::string kMemTableFlushPending; | |
542 | ||
543 | // "rocksdb.num-running-flushes" - returns the number of currently running | |
544 | // flushes. | |
545 | static const std::string kNumRunningFlushes; | |
546 | ||
547 | // "rocksdb.compaction-pending" - returns 1 if at least one compaction is | |
548 | // pending; otherwise, returns 0. | |
549 | static const std::string kCompactionPending; | |
550 | ||
551 | // "rocksdb.num-running-compactions" - returns the number of currently | |
552 | // running compactions. | |
553 | static const std::string kNumRunningCompactions; | |
554 | ||
555 | // "rocksdb.background-errors" - returns accumulated number of background | |
556 | // errors. | |
557 | static const std::string kBackgroundErrors; | |
558 | ||
559 | // "rocksdb.cur-size-active-mem-table" - returns approximate size of active | |
560 | // memtable (bytes). | |
561 | static const std::string kCurSizeActiveMemTable; | |
562 | ||
563 | // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active | |
564 | // and unflushed immutable memtables (bytes). | |
565 | static const std::string kCurSizeAllMemTables; | |
566 | ||
567 | // "rocksdb.size-all-mem-tables" - returns approximate size of active, | |
568 | // unflushed immutable, and pinned immutable memtables (bytes). | |
569 | static const std::string kSizeAllMemTables; | |
570 | ||
571 | // "rocksdb.num-entries-active-mem-table" - returns total number of entries | |
572 | // in the active memtable. | |
573 | static const std::string kNumEntriesActiveMemTable; | |
574 | ||
575 | // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries | |
576 | // in the unflushed immutable memtables. | |
577 | static const std::string kNumEntriesImmMemTables; | |
578 | ||
579 | // "rocksdb.num-deletes-active-mem-table" - returns total number of delete | |
580 | // entries in the active memtable. | |
581 | static const std::string kNumDeletesActiveMemTable; | |
582 | ||
583 | // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete | |
584 | // entries in the unflushed immutable memtables. | |
585 | static const std::string kNumDeletesImmMemTables; | |
586 | ||
587 | // "rocksdb.estimate-num-keys" - returns estimated number of total keys in | |
588 | // the active and unflushed immutable memtables and storage. | |
589 | static const std::string kEstimateNumKeys; | |
590 | ||
591 | // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for | |
592 | // reading SST tables, excluding memory used in block cache (e.g., | |
593 | // filter and index blocks). | |
594 | static const std::string kEstimateTableReadersMem; | |
595 | ||
596 | // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete | |
597 | // files is enabled; otherwise, returns a non-zero number. | |
598 | static const std::string kIsFileDeletionsEnabled; | |
599 | ||
600 | // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the | |
601 | // database. | |
602 | static const std::string kNumSnapshots; | |
603 | ||
604 | // "rocksdb.oldest-snapshot-time" - returns number representing unix | |
605 | // timestamp of oldest unreleased snapshot. | |
606 | static const std::string kOldestSnapshotTime; | |
607 | ||
608 | // "rocksdb.num-live-versions" - returns number of live versions. `Version` | |
609 | // is an internal data structure. See version_set.h for details. More | |
610 | // live versions often mean more SST files are held from being deleted, | |
611 | // by iterators or unfinished compactions. | |
612 | static const std::string kNumLiveVersions; | |
613 | ||
11fdf7f2 | 614 | // "rocksdb.current-super-version-number" - returns number of current LSM |
7c673cae FG |
615 | // version. It is a uint64_t integer number, incremented after there is |
616 | // any change to the LSM tree. The number is not preserved after restarting | |
617 | // the DB. After DB restart, it will start from 0 again. | |
618 | static const std::string kCurrentSuperVersionNumber; | |
619 | ||
620 | // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of | |
621 | // live data in bytes. | |
622 | static const std::string kEstimateLiveDataSize; | |
623 | ||
11fdf7f2 | 624 | // "rocksdb.min-log-number-to-keep" - return the minimum log number of the |
7c673cae FG |
625 | // log files that should be kept. |
626 | static const std::string kMinLogNumberToKeep; | |
627 | ||
494da23a TL |
628 | // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file |
629 | // number for an obsolete SST to be kept. The max value of `uint64_t` | |
630 | // will be returned if all obsolete files can be deleted. | |
631 | static const std::string kMinObsoleteSstNumberToKeep; | |
632 | ||
7c673cae FG |
633 | // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST |
634 | // files. | |
635 | // WARNING: may slow down online queries if there are too many files. | |
636 | static const std::string kTotalSstFilesSize; | |
637 | ||
11fdf7f2 TL |
638 | // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST |
639 | // files belong to the latest LSM tree. | |
640 | static const std::string kLiveSstFilesSize; | |
641 | ||
7c673cae FG |
642 | // "rocksdb.base-level" - returns number of level to which L0 data will be |
643 | // compacted. | |
644 | static const std::string kBaseLevel; | |
645 | ||
646 | // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total | |
647 | // number of bytes compaction needs to rewrite to get all levels down | |
648 | // to under target size. Not valid for other compactions than level- | |
649 | // based. | |
650 | static const std::string kEstimatePendingCompactionBytes; | |
651 | ||
652 | // "rocksdb.aggregated-table-properties" - returns a string representation | |
653 | // of the aggregated table properties of the target column family. | |
654 | static const std::string kAggregatedTableProperties; | |
655 | ||
656 | // "rocksdb.aggregated-table-properties-at-level<N>", same as the previous | |
657 | // one but only returns the aggregated table properties of the | |
658 | // specified level "N" at the target column family. | |
659 | static const std::string kAggregatedTablePropertiesAtLevel; | |
660 | ||
661 | // "rocksdb.actual-delayed-write-rate" - returns the current actual delayed | |
662 | // write rate. 0 means no delay. | |
663 | static const std::string kActualDelayedWriteRate; | |
664 | ||
665 | // "rocksdb.is-write-stopped" - Return 1 if write has been stopped. | |
666 | static const std::string kIsWriteStopped; | |
11fdf7f2 TL |
667 | |
668 | // "rocksdb.estimate-oldest-key-time" - returns an estimation of | |
669 | // oldest key timestamp in the DB. Currently only available for | |
670 | // FIFO compaction with | |
671 | // compaction_options_fifo.allow_compaction = false. | |
672 | static const std::string kEstimateOldestKeyTime; | |
673 | ||
674 | // "rocksdb.block-cache-capacity" - returns block cache capacity. | |
675 | static const std::string kBlockCacheCapacity; | |
676 | ||
677 | // "rocksdb.block-cache-usage" - returns the memory size for the entries | |
678 | // residing in block cache. | |
679 | static const std::string kBlockCacheUsage; | |
680 | ||
681 | // "rocksdb.block-cache-pinned-usage" - returns the memory size for the | |
682 | // entries being pinned. | |
683 | static const std::string kBlockCachePinnedUsage; | |
684 | ||
685 | // "rocksdb.options-statistics" - returns multi-line string | |
686 | // of options.statistics | |
687 | static const std::string kOptionsStatistics; | |
7c673cae FG |
688 | }; |
689 | #endif /* ROCKSDB_LITE */ | |
690 | ||
691 | // DB implementations can export properties about their state via this method. | |
692 | // If "property" is a valid property understood by this DB implementation (see | |
693 | // Properties struct above for valid options), fills "*value" with its current | |
694 | // value and returns true. Otherwise, returns false. | |
695 | virtual bool GetProperty(ColumnFamilyHandle* column_family, | |
696 | const Slice& property, std::string* value) = 0; | |
697 | virtual bool GetProperty(const Slice& property, std::string* value) { | |
698 | return GetProperty(DefaultColumnFamily(), property, value); | |
699 | } | |
700 | virtual bool GetMapProperty(ColumnFamilyHandle* column_family, | |
701 | const Slice& property, | |
11fdf7f2 | 702 | std::map<std::string, std::string>* value) = 0; |
7c673cae | 703 | virtual bool GetMapProperty(const Slice& property, |
11fdf7f2 | 704 | std::map<std::string, std::string>* value) { |
7c673cae FG |
705 | return GetMapProperty(DefaultColumnFamily(), property, value); |
706 | } | |
707 | ||
708 | // Similar to GetProperty(), but only works for a subset of properties whose | |
709 | // return value is an integer. Return the value by integer. Supported | |
710 | // properties: | |
711 | // "rocksdb.num-immutable-mem-table" | |
712 | // "rocksdb.mem-table-flush-pending" | |
713 | // "rocksdb.compaction-pending" | |
714 | // "rocksdb.background-errors" | |
715 | // "rocksdb.cur-size-active-mem-table" | |
716 | // "rocksdb.cur-size-all-mem-tables" | |
717 | // "rocksdb.size-all-mem-tables" | |
718 | // "rocksdb.num-entries-active-mem-table" | |
719 | // "rocksdb.num-entries-imm-mem-tables" | |
720 | // "rocksdb.num-deletes-active-mem-table" | |
721 | // "rocksdb.num-deletes-imm-mem-tables" | |
722 | // "rocksdb.estimate-num-keys" | |
723 | // "rocksdb.estimate-table-readers-mem" | |
724 | // "rocksdb.is-file-deletions-enabled" | |
725 | // "rocksdb.num-snapshots" | |
726 | // "rocksdb.oldest-snapshot-time" | |
727 | // "rocksdb.num-live-versions" | |
728 | // "rocksdb.current-super-version-number" | |
729 | // "rocksdb.estimate-live-data-size" | |
730 | // "rocksdb.min-log-number-to-keep" | |
494da23a | 731 | // "rocksdb.min-obsolete-sst-number-to-keep" |
7c673cae | 732 | // "rocksdb.total-sst-files-size" |
11fdf7f2 | 733 | // "rocksdb.live-sst-files-size" |
7c673cae FG |
734 | // "rocksdb.base-level" |
735 | // "rocksdb.estimate-pending-compaction-bytes" | |
736 | // "rocksdb.num-running-compactions" | |
737 | // "rocksdb.num-running-flushes" | |
738 | // "rocksdb.actual-delayed-write-rate" | |
739 | // "rocksdb.is-write-stopped" | |
11fdf7f2 TL |
740 | // "rocksdb.estimate-oldest-key-time" |
741 | // "rocksdb.block-cache-capacity" | |
742 | // "rocksdb.block-cache-usage" | |
743 | // "rocksdb.block-cache-pinned-usage" | |
7c673cae FG |
744 | virtual bool GetIntProperty(ColumnFamilyHandle* column_family, |
745 | const Slice& property, uint64_t* value) = 0; | |
746 | virtual bool GetIntProperty(const Slice& property, uint64_t* value) { | |
747 | return GetIntProperty(DefaultColumnFamily(), property, value); | |
748 | } | |
749 | ||
750 | // Reset internal stats for DB and all column families. | |
751 | // Note this doesn't reset options.statistics as it is not owned by | |
752 | // DB. | |
753 | virtual Status ResetStats() { | |
754 | return Status::NotSupported("Not implemented"); | |
755 | } | |
756 | ||
757 | // Same as GetIntProperty(), but this one returns the aggregated int | |
758 | // property from all column families. | |
759 | virtual bool GetAggregatedIntProperty(const Slice& property, | |
760 | uint64_t* value) = 0; | |
761 | ||
762 | // Flags for DB::GetSizeApproximation that specify whether memtable | |
763 | // stats should be included, or file stats approximation or both | |
764 | enum SizeApproximationFlags : uint8_t { | |
765 | NONE = 0, | |
766 | INCLUDE_MEMTABLES = 1, | |
767 | INCLUDE_FILES = 1 << 1 | |
768 | }; | |
769 | ||
770 | // For each i in [0,n-1], store in "sizes[i]", the approximate | |
771 | // file system space used by keys in "[range[i].start .. range[i].limit)". | |
772 | // | |
773 | // Note that the returned sizes measure file system space usage, so | |
774 | // if the user data compresses by a factor of ten, the returned | |
775 | // sizes will be one-tenth the size of the corresponding user data size. | |
776 | // | |
777 | // If include_flags defines whether the returned size should include | |
778 | // the recently written data in the mem-tables (if | |
779 | // the mem-table type supports it), data serialized to disk, or both. | |
780 | // include_flags should be of type DB::SizeApproximationFlags | |
781 | virtual void GetApproximateSizes(ColumnFamilyHandle* column_family, | |
782 | const Range* range, int n, uint64_t* sizes, | |
494da23a | 783 | uint8_t include_flags = INCLUDE_FILES) = 0; |
7c673cae | 784 | virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes, |
494da23a TL |
785 | uint8_t include_flags = INCLUDE_FILES) { |
786 | GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags); | |
7c673cae FG |
787 | } |
788 | ||
789 | // The method is similar to GetApproximateSizes, except it | |
790 | // returns approximate number of records in memtables. | |
791 | virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family, | |
792 | const Range& range, | |
793 | uint64_t* const count, | |
794 | uint64_t* const size) = 0; | |
795 | virtual void GetApproximateMemTableStats(const Range& range, | |
796 | uint64_t* const count, | |
797 | uint64_t* const size) { | |
798 | GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size); | |
799 | } | |
800 | ||
801 | // Deprecated versions of GetApproximateSizes | |
802 | ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( | |
494da23a | 803 | const Range* range, int n, uint64_t* sizes, bool include_memtable) { |
7c673cae FG |
804 | uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; |
805 | if (include_memtable) { | |
806 | include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; | |
807 | } | |
808 | GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags); | |
809 | } | |
810 | ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( | |
494da23a TL |
811 | ColumnFamilyHandle* column_family, const Range* range, int n, |
812 | uint64_t* sizes, bool include_memtable) { | |
7c673cae FG |
813 | uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; |
814 | if (include_memtable) { | |
815 | include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; | |
816 | } | |
817 | GetApproximateSizes(column_family, range, n, sizes, include_flags); | |
818 | } | |
819 | ||
820 | // Compact the underlying storage for the key range [*begin,*end]. | |
821 | // The actual compaction interval might be superset of [*begin, *end]. | |
822 | // In particular, deleted and overwritten versions are discarded, | |
823 | // and the data is rearranged to reduce the cost of operations | |
824 | // needed to access the data. This operation should typically only | |
825 | // be invoked by users who understand the underlying implementation. | |
826 | // | |
827 | // begin==nullptr is treated as a key before all keys in the database. | |
828 | // end==nullptr is treated as a key after all keys in the database. | |
829 | // Therefore the following call will compact the entire database: | |
830 | // db->CompactRange(options, nullptr, nullptr); | |
831 | // Note that after the entire database is compacted, all data are pushed | |
832 | // down to the last level containing any data. If the total data size after | |
833 | // compaction is reduced, that level might not be appropriate for hosting all | |
834 | // the files. In this case, client could set options.change_level to true, to | |
835 | // move the files back to the minimum level capable of holding the data set | |
836 | // or a given level (specified by non-negative options.target_level). | |
837 | virtual Status CompactRange(const CompactRangeOptions& options, | |
838 | ColumnFamilyHandle* column_family, | |
839 | const Slice* begin, const Slice* end) = 0; | |
840 | virtual Status CompactRange(const CompactRangeOptions& options, | |
841 | const Slice* begin, const Slice* end) { | |
842 | return CompactRange(options, DefaultColumnFamily(), begin, end); | |
843 | } | |
844 | ||
845 | ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( | |
846 | ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end, | |
847 | bool change_level = false, int target_level = -1, | |
848 | uint32_t target_path_id = 0) { | |
849 | CompactRangeOptions options; | |
850 | options.change_level = change_level; | |
851 | options.target_level = target_level; | |
852 | options.target_path_id = target_path_id; | |
853 | return CompactRange(options, column_family, begin, end); | |
854 | } | |
855 | ||
856 | ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( | |
857 | const Slice* begin, const Slice* end, bool change_level = false, | |
858 | int target_level = -1, uint32_t target_path_id = 0) { | |
859 | CompactRangeOptions options; | |
860 | options.change_level = change_level; | |
861 | options.target_level = target_level; | |
862 | options.target_path_id = target_path_id; | |
863 | return CompactRange(options, DefaultColumnFamily(), begin, end); | |
864 | } | |
865 | ||
866 | virtual Status SetOptions( | |
867 | ColumnFamilyHandle* /*column_family*/, | |
868 | const std::unordered_map<std::string, std::string>& /*new_options*/) { | |
869 | return Status::NotSupported("Not implemented"); | |
870 | } | |
871 | virtual Status SetOptions( | |
872 | const std::unordered_map<std::string, std::string>& new_options) { | |
873 | return SetOptions(DefaultColumnFamily(), new_options); | |
874 | } | |
875 | ||
876 | virtual Status SetDBOptions( | |
877 | const std::unordered_map<std::string, std::string>& new_options) = 0; | |
878 | ||
879 | // CompactFiles() inputs a list of files specified by file numbers and | |
880 | // compacts them to the specified level. Note that the behavior is different | |
881 | // from CompactRange() in that CompactFiles() performs the compaction job | |
882 | // using the CURRENT thread. | |
883 | // | |
884 | // @see GetDataBaseMetaData | |
885 | // @see GetColumnFamilyMetaData | |
886 | virtual Status CompactFiles( | |
887 | const CompactionOptions& compact_options, | |
888 | ColumnFamilyHandle* column_family, | |
494da23a TL |
889 | const std::vector<std::string>& input_file_names, const int output_level, |
890 | const int output_path_id = -1, | |
891 | std::vector<std::string>* const output_file_names = nullptr, | |
892 | CompactionJobInfo* compaction_job_info = nullptr) = 0; | |
7c673cae FG |
893 | |
894 | virtual Status CompactFiles( | |
895 | const CompactionOptions& compact_options, | |
494da23a TL |
896 | const std::vector<std::string>& input_file_names, const int output_level, |
897 | const int output_path_id = -1, | |
898 | std::vector<std::string>* const output_file_names = nullptr, | |
899 | CompactionJobInfo* compaction_job_info = nullptr) { | |
7c673cae | 900 | return CompactFiles(compact_options, DefaultColumnFamily(), |
11fdf7f2 | 901 | input_file_names, output_level, output_path_id, |
494da23a | 902 | output_file_names, compaction_job_info); |
7c673cae FG |
903 | } |
904 | ||
905 | // This function will wait until all currently running background processes | |
906 | // finish. After it returns, no background process will be run until | |
11fdf7f2 | 907 | // ContinueBackgroundWork is called |
7c673cae FG |
908 | virtual Status PauseBackgroundWork() = 0; |
909 | virtual Status ContinueBackgroundWork() = 0; | |
910 | ||
911 | // This function will enable automatic compactions for the given column | |
912 | // families if they were previously disabled. The function will first set the | |
913 | // disable_auto_compactions option for each column family to 'false', after | |
914 | // which it will schedule a flush/compaction. | |
915 | // | |
916 | // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API | |
917 | // does NOT schedule a flush/compaction afterwards, and only changes the | |
918 | // parameter itself within the column family option. | |
919 | // | |
920 | virtual Status EnableAutoCompaction( | |
921 | const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0; | |
922 | ||
923 | // Number of levels used for this DB. | |
924 | virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0; | |
925 | virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); } | |
926 | ||
927 | // Maximum level to which a new compacted memtable is pushed if it | |
928 | // does not create overlap. | |
929 | virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0; | |
930 | virtual int MaxMemCompactionLevel() { | |
931 | return MaxMemCompactionLevel(DefaultColumnFamily()); | |
932 | } | |
933 | ||
934 | // Number of files in level-0 that would stop writes. | |
935 | virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0; | |
936 | virtual int Level0StopWriteTrigger() { | |
937 | return Level0StopWriteTrigger(DefaultColumnFamily()); | |
938 | } | |
939 | ||
940 | // Get DB name -- the exact same name that was provided as an argument to | |
941 | // DB::Open() | |
942 | virtual const std::string& GetName() const = 0; | |
943 | ||
944 | // Get Env object from the DB | |
945 | virtual Env* GetEnv() const = 0; | |
946 | ||
947 | // Get DB Options that we use. During the process of opening the | |
948 | // column family, the options provided when calling DB::Open() or | |
949 | // DB::CreateColumnFamily() will have been "sanitized" and transformed | |
950 | // in an implementation-defined manner. | |
951 | virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0; | |
952 | virtual Options GetOptions() const { | |
953 | return GetOptions(DefaultColumnFamily()); | |
954 | } | |
955 | ||
956 | virtual DBOptions GetDBOptions() const = 0; | |
957 | ||
958 | // Flush all mem-table data. | |
494da23a TL |
959 | // Flush a single column family, even when atomic flush is enabled. To flush |
960 | // multiple column families, use Flush(options, column_families). | |
7c673cae FG |
961 | virtual Status Flush(const FlushOptions& options, |
962 | ColumnFamilyHandle* column_family) = 0; | |
963 | virtual Status Flush(const FlushOptions& options) { | |
964 | return Flush(options, DefaultColumnFamily()); | |
965 | } | |
494da23a TL |
966 | // Flushes multiple column families. |
967 | // If atomic flush is not enabled, Flush(options, column_families) is | |
968 | // equivalent to calling Flush(options, column_family) multiple times. | |
969 | // If atomic flush is enabled, Flush(options, column_families) will flush all | |
970 | // column families specified in 'column_families' up to the latest sequence | |
971 | // number at the time when flush is requested. | |
972 | // Note that RocksDB 5.15 and earlier may not be able to open later versions | |
973 | // with atomic flush enabled. | |
974 | virtual Status Flush( | |
975 | const FlushOptions& options, | |
976 | const std::vector<ColumnFamilyHandle*>& column_families) = 0; | |
7c673cae | 977 | |
11fdf7f2 TL |
978 | // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL |
979 | // afterwards. | |
980 | virtual Status FlushWAL(bool /*sync*/) { | |
981 | return Status::NotSupported("FlushWAL not implemented"); | |
982 | } | |
7c673cae FG |
983 | // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the |
984 | // same as Write() with sync=true: in the latter case the changes won't be | |
985 | // visible until the sync is done. | |
986 | // Currently only works if allow_mmap_writes = false in Options. | |
987 | virtual Status SyncWAL() = 0; | |
988 | ||
494da23a TL |
989 | // Lock the WAL. Also flushes the WAL after locking. |
990 | virtual Status LockWAL() { | |
991 | return Status::NotSupported("LockWAL not implemented"); | |
992 | } | |
993 | ||
994 | // Unlock the WAL. | |
995 | virtual Status UnlockWAL() { | |
996 | return Status::NotSupported("UnlockWAL not implemented"); | |
997 | } | |
998 | ||
7c673cae FG |
999 | // The sequence number of the most recent transaction. |
1000 | virtual SequenceNumber GetLatestSequenceNumber() const = 0; | |
1001 | ||
11fdf7f2 TL |
1002 | // Instructs DB to preserve deletes with sequence numbers >= passed seqnum. |
1003 | // Has no effect if DBOptions.preserve_deletes is set to false. | |
1004 | // This function assumes that user calls this function with monotonically | |
1005 | // increasing seqnums (otherwise we can't guarantee that a particular delete | |
1006 | // hasn't been already processed); returns true if the value was successfully | |
1007 | // updated, false if user attempted to call if with seqnum <= current value. | |
1008 | virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0; | |
1009 | ||
7c673cae FG |
1010 | #ifndef ROCKSDB_LITE |
1011 | ||
1012 | // Prevent file deletions. Compactions will continue to occur, | |
1013 | // but no obsolete files will be deleted. Calling this multiple | |
1014 | // times have the same effect as calling it once. | |
1015 | virtual Status DisableFileDeletions() = 0; | |
1016 | ||
1017 | // Allow compactions to delete obsolete files. | |
1018 | // If force == true, the call to EnableFileDeletions() will guarantee that | |
1019 | // file deletions are enabled after the call, even if DisableFileDeletions() | |
1020 | // was called multiple times before. | |
1021 | // If force == false, EnableFileDeletions will only enable file deletion | |
1022 | // after it's been called at least as many times as DisableFileDeletions(), | |
1023 | // enabling the two methods to be called by two threads concurrently without | |
1024 | // synchronization -- i.e., file deletions will be enabled only after both | |
1025 | // threads call EnableFileDeletions() | |
1026 | virtual Status EnableFileDeletions(bool force = true) = 0; | |
1027 | ||
1028 | // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup | |
1029 | ||
1030 | // Retrieve the list of all files in the database. The files are | |
11fdf7f2 TL |
1031 | // relative to the dbname and are not absolute paths. Despite being relative |
1032 | // paths, the file names begin with "/". The valid size of the manifest file | |
1033 | // is returned in manifest_file_size. The manifest file is an ever growing | |
1034 | // file, but only the portion specified by manifest_file_size is valid for | |
1035 | // this snapshot. Setting flush_memtable to true does Flush before recording | |
1036 | // the live files. Setting flush_memtable to false is useful when we don't | |
1037 | // want to wait for flush which may have to wait for compaction to complete | |
1038 | // taking an indeterminate time. | |
7c673cae FG |
1039 | // |
1040 | // In case you have multiple column families, even if flush_memtable is true, | |
1041 | // you still need to call GetSortedWalFiles after GetLiveFiles to compensate | |
1042 | // for new data that arrived to already-flushed column families while other | |
1043 | // column families were flushing | |
1044 | virtual Status GetLiveFiles(std::vector<std::string>&, | |
1045 | uint64_t* manifest_file_size, | |
1046 | bool flush_memtable = true) = 0; | |
1047 | ||
1048 | // Retrieve the sorted list of all wal files with earliest file first | |
1049 | virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0; | |
1050 | ||
11fdf7f2 | 1051 | // Note: this API is not yet consistent with WritePrepared transactions. |
7c673cae FG |
1052 | // Sets iter to an iterator that is positioned at a write-batch containing |
1053 | // seq_number. If the sequence number is non existent, it returns an iterator | |
1054 | // at the first available seq_no after the requested seq_no | |
1055 | // Returns Status::OK if iterator is valid | |
1056 | // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to | |
1057 | // use this api, else the WAL files will get | |
1058 | // cleared aggressively and the iterator might keep getting invalid before | |
1059 | // an update is read. | |
1060 | virtual Status GetUpdatesSince( | |
494da23a TL |
1061 | SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter, |
1062 | const TransactionLogIterator::ReadOptions& read_options = | |
1063 | TransactionLogIterator::ReadOptions()) = 0; | |
7c673cae FG |
1064 | |
1065 | // Windows API macro interference | |
1066 | #undef DeleteFile | |
1067 | // Delete the file name from the db directory and update the internal state to | |
1068 | // reflect that. Supports deletion of sst and log files only. 'name' must be | |
1069 | // path relative to the db directory. eg. 000001.sst, /archive/000003.log | |
1070 | virtual Status DeleteFile(std::string name) = 0; | |
1071 | ||
1072 | // Returns a list of all table files with their level, start key | |
1073 | // and end key | |
1074 | virtual void GetLiveFilesMetaData( | |
1075 | std::vector<LiveFileMetaData>* /*metadata*/) {} | |
1076 | ||
1077 | // Obtains the meta data of the specified column family of the DB. | |
7c673cae FG |
1078 | virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, |
1079 | ColumnFamilyMetaData* /*metadata*/) {} | |
1080 | ||
1081 | // Get the metadata of the default column family. | |
494da23a | 1082 | void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) { |
7c673cae FG |
1083 | GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); |
1084 | } | |
1085 | ||
1086 | // IngestExternalFile() will load a list of external SST files (1) into the DB | |
11fdf7f2 TL |
1087 | // Two primary modes are supported: |
1088 | // - Duplicate keys in the new files will overwrite exiting keys (default) | |
1089 | // - Duplicate keys will be skipped (set ingest_behind=true) | |
1090 | // In the first mode we will try to find the lowest possible level that | |
1091 | // the file can fit in, and ingest the file into this level (2). A file that | |
1092 | // have a key range that overlap with the memtable key range will require us | |
1093 | // to Flush the memtable first before ingesting the file. | |
1094 | // In the second mode we will always ingest in the bottom most level (see | |
1095 | // docs to IngestExternalFileOptions::ingest_behind). | |
7c673cae FG |
1096 | // |
1097 | // (1) External SST files can be created using SstFileWriter | |
1098 | // (2) We will try to ingest the files to the lowest possible level | |
11fdf7f2 TL |
1099 | // even if the file compression doesn't match the level compression |
1100 | // (3) If IngestExternalFileOptions->ingest_behind is set to true, | |
1101 | // we always ingest at the bottommost level, which should be reserved | |
1102 | // for this purpose (see DBOPtions::allow_ingest_behind flag). | |
7c673cae FG |
1103 | virtual Status IngestExternalFile( |
1104 | ColumnFamilyHandle* column_family, | |
1105 | const std::vector<std::string>& external_files, | |
1106 | const IngestExternalFileOptions& options) = 0; | |
1107 | ||
1108 | virtual Status IngestExternalFile( | |
1109 | const std::vector<std::string>& external_files, | |
1110 | const IngestExternalFileOptions& options) { | |
1111 | return IngestExternalFile(DefaultColumnFamily(), external_files, options); | |
1112 | } | |
1113 | ||
494da23a TL |
1114 | // IngestExternalFiles() will ingest files for multiple column families, and |
1115 | // record the result atomically to the MANIFEST. | |
1116 | // If this function returns OK, all column families' ingestion must succeed. | |
1117 | // If this function returns NOK, or the process crashes, then non-of the | |
1118 | // files will be ingested into the database after recovery. | |
1119 | // Note that it is possible for application to observe a mixed state during | |
1120 | // the execution of this function. If the user performs range scan over the | |
1121 | // column families with iterators, iterator on one column family may return | |
1122 | // ingested data, while iterator on other column family returns old data. | |
1123 | // Users can use snapshot for a consistent view of data. | |
1124 | // If your db ingests multiple SST files using this API, i.e. args.size() | |
1125 | // > 1, then RocksDB 5.15 and earlier will not be able to open it. | |
1126 | // | |
1127 | // REQUIRES: each arg corresponds to a different column family: namely, for | |
1128 | // 0 <= i < j < len(args), args[i].column_family != args[j].column_family. | |
1129 | virtual Status IngestExternalFiles( | |
1130 | const std::vector<IngestExternalFileArg>& args) = 0; | |
1131 | ||
11fdf7f2 TL |
1132 | virtual Status VerifyChecksum() = 0; |
1133 | ||
7c673cae FG |
1134 | // AddFile() is deprecated, please use IngestExternalFile() |
1135 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1136 | ColumnFamilyHandle* column_family, | |
1137 | const std::vector<std::string>& file_path_list, bool move_file = false, | |
1138 | bool skip_snapshot_check = false) { | |
1139 | IngestExternalFileOptions ifo; | |
1140 | ifo.move_files = move_file; | |
1141 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1142 | ifo.allow_global_seqno = false; | |
1143 | ifo.allow_blocking_flush = false; | |
1144 | return IngestExternalFile(column_family, file_path_list, ifo); | |
1145 | } | |
1146 | ||
1147 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1148 | const std::vector<std::string>& file_path_list, bool move_file = false, | |
1149 | bool skip_snapshot_check = false) { | |
1150 | IngestExternalFileOptions ifo; | |
1151 | ifo.move_files = move_file; | |
1152 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1153 | ifo.allow_global_seqno = false; | |
1154 | ifo.allow_blocking_flush = false; | |
1155 | return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo); | |
1156 | } | |
1157 | ||
1158 | // AddFile() is deprecated, please use IngestExternalFile() | |
1159 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1160 | ColumnFamilyHandle* column_family, const std::string& file_path, | |
1161 | bool move_file = false, bool skip_snapshot_check = false) { | |
1162 | IngestExternalFileOptions ifo; | |
1163 | ifo.move_files = move_file; | |
1164 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1165 | ifo.allow_global_seqno = false; | |
1166 | ifo.allow_blocking_flush = false; | |
1167 | return IngestExternalFile(column_family, {file_path}, ifo); | |
1168 | } | |
1169 | ||
1170 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1171 | const std::string& file_path, bool move_file = false, | |
1172 | bool skip_snapshot_check = false) { | |
1173 | IngestExternalFileOptions ifo; | |
1174 | ifo.move_files = move_file; | |
1175 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1176 | ifo.allow_global_seqno = false; | |
1177 | ifo.allow_blocking_flush = false; | |
1178 | return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo); | |
1179 | } | |
1180 | ||
1181 | // Load table file with information "file_info" into "column_family" | |
1182 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1183 | ColumnFamilyHandle* column_family, | |
1184 | const std::vector<ExternalSstFileInfo>& file_info_list, | |
1185 | bool move_file = false, bool skip_snapshot_check = false) { | |
1186 | std::vector<std::string> external_files; | |
1187 | for (const ExternalSstFileInfo& file_info : file_info_list) { | |
1188 | external_files.push_back(file_info.file_path); | |
1189 | } | |
1190 | IngestExternalFileOptions ifo; | |
1191 | ifo.move_files = move_file; | |
1192 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1193 | ifo.allow_global_seqno = false; | |
1194 | ifo.allow_blocking_flush = false; | |
1195 | return IngestExternalFile(column_family, external_files, ifo); | |
1196 | } | |
1197 | ||
1198 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1199 | const std::vector<ExternalSstFileInfo>& file_info_list, | |
1200 | bool move_file = false, bool skip_snapshot_check = false) { | |
1201 | std::vector<std::string> external_files; | |
1202 | for (const ExternalSstFileInfo& file_info : file_info_list) { | |
1203 | external_files.push_back(file_info.file_path); | |
1204 | } | |
1205 | IngestExternalFileOptions ifo; | |
1206 | ifo.move_files = move_file; | |
1207 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1208 | ifo.allow_global_seqno = false; | |
1209 | ifo.allow_blocking_flush = false; | |
1210 | return IngestExternalFile(DefaultColumnFamily(), external_files, ifo); | |
1211 | } | |
1212 | ||
1213 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1214 | ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info, | |
1215 | bool move_file = false, bool skip_snapshot_check = false) { | |
1216 | IngestExternalFileOptions ifo; | |
1217 | ifo.move_files = move_file; | |
1218 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1219 | ifo.allow_global_seqno = false; | |
1220 | ifo.allow_blocking_flush = false; | |
1221 | return IngestExternalFile(column_family, {file_info->file_path}, ifo); | |
1222 | } | |
1223 | ||
1224 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1225 | const ExternalSstFileInfo* file_info, bool move_file = false, | |
1226 | bool skip_snapshot_check = false) { | |
1227 | IngestExternalFileOptions ifo; | |
1228 | ifo.move_files = move_file; | |
1229 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1230 | ifo.allow_global_seqno = false; | |
1231 | ifo.allow_blocking_flush = false; | |
1232 | return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path}, | |
1233 | ifo); | |
1234 | } | |
1235 | ||
1236 | #endif // ROCKSDB_LITE | |
1237 | ||
1238 | // Sets the globally unique ID created at database creation time by invoking | |
1239 | // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could | |
1240 | // be set properly | |
1241 | virtual Status GetDbIdentity(std::string& identity) const = 0; | |
1242 | ||
1243 | // Returns default column family handle | |
1244 | virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0; | |
1245 | ||
1246 | #ifndef ROCKSDB_LITE | |
1247 | virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, | |
1248 | TablePropertiesCollection* props) = 0; | |
1249 | virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { | |
1250 | return GetPropertiesOfAllTables(DefaultColumnFamily(), props); | |
1251 | } | |
1252 | virtual Status GetPropertiesOfTablesInRange( | |
1253 | ColumnFamilyHandle* column_family, const Range* range, std::size_t n, | |
1254 | TablePropertiesCollection* props) = 0; | |
11fdf7f2 TL |
1255 | |
1256 | virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/, | |
1257 | const Slice* /*begin*/, | |
1258 | const Slice* /*end*/) { | |
1259 | return Status::NotSupported("SuggestCompactRange() is not implemented."); | |
1260 | } | |
1261 | ||
1262 | virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/, | |
1263 | int /*target_level*/) { | |
1264 | return Status::NotSupported("PromoteL0() is not implemented."); | |
1265 | } | |
1266 | ||
1267 | // Trace DB operations. Use EndTrace() to stop tracing. | |
1268 | virtual Status StartTrace(const TraceOptions& /*options*/, | |
1269 | std::unique_ptr<TraceWriter>&& /*trace_writer*/) { | |
1270 | return Status::NotSupported("StartTrace() is not implemented."); | |
1271 | } | |
1272 | ||
1273 | virtual Status EndTrace() { | |
1274 | return Status::NotSupported("EndTrace() is not implemented."); | |
1275 | } | |
7c673cae FG |
1276 | #endif // ROCKSDB_LITE |
1277 | ||
1278 | // Needed for StackableDB | |
1279 | virtual DB* GetRootDB() { return this; } | |
1280 | ||
494da23a TL |
1281 | // Given a time window, return an iterator for accessing stats history |
1282 | // User is responsible for deleting StatsHistoryIterator after use | |
1283 | virtual Status GetStatsHistory( | |
1284 | uint64_t /*start_time*/, uint64_t /*end_time*/, | |
1285 | std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) { | |
1286 | return Status::NotSupported("GetStatsHistory() is not implemented."); | |
1287 | } | |
1288 | ||
1289 | #ifndef ROCKSDB_LITE | |
1290 | // Make the secondary instance catch up with the primary by tailing and | |
1291 | // replaying the MANIFEST and WAL of the primary. | |
1292 | // Column families created by the primary after the secondary instance starts | |
1293 | // will be ignored unless the secondary instance closes and restarts with the | |
1294 | // newly created column families. | |
1295 | // Column families that exist before secondary instance starts and dropped by | |
1296 | // the primary afterwards will be marked as dropped. However, as long as the | |
1297 | // secondary instance does not delete the corresponding column family | |
1298 | // handles, the data of the column family is still accessible to the | |
1299 | // secondary. | |
1300 | // TODO: we will support WAL tailing soon. | |
1301 | virtual Status TryCatchUpWithPrimary() { | |
1302 | return Status::NotSupported("Supported only by secondary instance"); | |
1303 | } | |
1304 | #endif // !ROCKSDB_LITE | |
1305 | ||
7c673cae FG |
1306 | private: |
1307 | // No copying allowed | |
1308 | DB(const DB&); | |
1309 | void operator=(const DB&); | |
1310 | }; | |
1311 | ||
1312 | // Destroy the contents of the specified database. | |
1313 | // Be very careful using this method. | |
11fdf7f2 TL |
1314 | Status DestroyDB(const std::string& name, const Options& options, |
1315 | const std::vector<ColumnFamilyDescriptor>& column_families = | |
494da23a | 1316 | std::vector<ColumnFamilyDescriptor>()); |
7c673cae FG |
1317 | |
1318 | #ifndef ROCKSDB_LITE | |
1319 | // If a DB cannot be opened, you may attempt to call this method to | |
1320 | // resurrect as much of the contents of the database as possible. | |
1321 | // Some data may be lost, so be careful when calling this function | |
1322 | // on a database that contains important information. | |
1323 | // | |
1324 | // With this API, we will warn and skip data associated with column families not | |
1325 | // specified in column_families. | |
1326 | // | |
1327 | // @param column_families Descriptors for known column families | |
1328 | Status RepairDB(const std::string& dbname, const DBOptions& db_options, | |
1329 | const std::vector<ColumnFamilyDescriptor>& column_families); | |
1330 | ||
1331 | // @param unknown_cf_opts Options for column families encountered during the | |
1332 | // repair that were not specified in column_families. | |
1333 | Status RepairDB(const std::string& dbname, const DBOptions& db_options, | |
1334 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
1335 | const ColumnFamilyOptions& unknown_cf_opts); | |
1336 | ||
1337 | // @param options These options will be used for the database and for ALL column | |
1338 | // families encountered during the repair | |
1339 | Status RepairDB(const std::string& dbname, const Options& options); | |
1340 | ||
1341 | #endif | |
1342 | ||
1343 | } // namespace rocksdb |