]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/db.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / include / rocksdb / db.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6// Use of this source code is governed by a BSD-style license that can be
7// found in the LICENSE file. See the AUTHORS file for names of contributors.
8
11fdf7f2 9#pragma once
7c673cae
FG
10
11#include <stdint.h>
12#include <stdio.h>
13#include <map>
14#include <memory>
15#include <string>
16#include <unordered_map>
17#include <vector>
18#include "rocksdb/iterator.h"
19#include "rocksdb/listener.h"
20#include "rocksdb/metadata.h"
21#include "rocksdb/options.h"
22#include "rocksdb/snapshot.h"
23#include "rocksdb/sst_file_writer.h"
24#include "rocksdb/thread_status.h"
25#include "rocksdb/transaction_log.h"
26#include "rocksdb/types.h"
27#include "rocksdb/version.h"
28
29#ifdef _WIN32
30// Windows API macro interference
31#undef DeleteFile
32#endif
33
34#if defined(__GNUC__) || defined(__clang__)
35#define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
36#elif _WIN32
37#define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
38#endif
39
f67539c2 40namespace ROCKSDB_NAMESPACE {
7c673cae
FG
41
42struct Options;
43struct DBOptions;
44struct ColumnFamilyOptions;
45struct ReadOptions;
46struct WriteOptions;
47struct FlushOptions;
48struct CompactionOptions;
49struct CompactRangeOptions;
50struct TableProperties;
51struct ExternalSstFileInfo;
52class WriteBatch;
53class Env;
54class EventListener;
494da23a 55class StatsHistoryIterator;
11fdf7f2 56class TraceWriter;
494da23a
TL
57#ifdef ROCKSDB_LITE
58class CompactionJobInfo;
59#endif
f67539c2 60class FileSystem;
7c673cae
FG
61
62extern const std::string kDefaultColumnFamilyName;
f67539c2 63extern const std::string kPersistentStatsColumnFamilyName;
7c673cae
FG
64struct ColumnFamilyDescriptor {
65 std::string name;
66 ColumnFamilyOptions options;
67 ColumnFamilyDescriptor()
68 : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
69 ColumnFamilyDescriptor(const std::string& _name,
70 const ColumnFamilyOptions& _options)
71 : name(_name), options(_options) {}
72};
73
74class ColumnFamilyHandle {
75 public:
76 virtual ~ColumnFamilyHandle() {}
77 // Returns the name of the column family associated with the current handle.
78 virtual const std::string& GetName() const = 0;
79 // Returns the ID of the column family associated with the current handle.
80 virtual uint32_t GetID() const = 0;
81 // Fills "*desc" with the up-to-date descriptor of the column family
82 // associated with this handle. Since it fills "*desc" with the up-to-date
83 // information, this call might internally lock and release DB mutex to
84 // access the up-to-date CF options. In addition, all the pointer-typed
85 // options cannot be referenced any longer than the original options exist.
86 //
87 // Note that this function is not supported in RocksDBLite.
88 virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
89 // Returns the comparator of the column family associated with the
90 // current handle.
91 virtual const Comparator* GetComparator() const = 0;
92};
93
94static const int kMajorVersion = __ROCKSDB_MAJOR__;
95static const int kMinorVersion = __ROCKSDB_MINOR__;
96
97// A range of keys
98struct Range {
11fdf7f2
TL
99 Slice start;
100 Slice limit;
7c673cae 101
494da23a
TL
102 Range() {}
103 Range(const Slice& s, const Slice& l) : start(s), limit(l) {}
7c673cae
FG
104};
105
11fdf7f2
TL
106struct RangePtr {
107 const Slice* start;
108 const Slice* limit;
109
494da23a
TL
110 RangePtr() : start(nullptr), limit(nullptr) {}
111 RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {}
112};
113
20effc67
TL
114// It is valid that files_checksums and files_checksum_func_names are both
115// empty (no checksum informaiton is provided for ingestion). Otherwise,
116// their sizes should be the same as external_files. The file order should
117// be the same in three vectors and guaranteed by the caller.
494da23a
TL
118struct IngestExternalFileArg {
119 ColumnFamilyHandle* column_family = nullptr;
120 std::vector<std::string> external_files;
121 IngestExternalFileOptions options;
20effc67
TL
122 std::vector<std::string> files_checksums;
123 std::vector<std::string> files_checksum_func_names;
11fdf7f2
TL
124};
125
f67539c2
TL
126struct GetMergeOperandsOptions {
127 int expected_max_number_of_operands = 0;
128};
129
7c673cae
FG
130// A collections of table properties objects, where
131// key: is the table's file name.
132// value: the table properties object of the given table.
133typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
134 TablePropertiesCollection;
135
20effc67 136// A DB is a persistent, versioned ordered map from keys to values.
7c673cae
FG
137// A DB is safe for concurrent access from multiple threads without
138// any external synchronization.
20effc67
TL
139// DB is an abstract base class with one primary implementation (DBImpl)
140// and a number of wrapper implementations.
7c673cae
FG
141class DB {
142 public:
143 // Open the database with the specified "name".
144 // Stores a pointer to a heap-allocated database in *dbptr and returns
145 // OK on success.
146 // Stores nullptr in *dbptr and returns a non-OK status on error.
147 // Caller should delete *dbptr when it is no longer needed.
494da23a 148 static Status Open(const Options& options, const std::string& name,
7c673cae
FG
149 DB** dbptr);
150
151 // Open the database for read only. All DB interfaces
152 // that modify data, like put/delete, will return error.
153 // If the db is opened in read only mode, then no compactions
154 // will happen.
155 //
156 // Not supported in ROCKSDB_LITE, in which case the function will
157 // return Status::NotSupported.
494da23a
TL
158 static Status OpenForReadOnly(const Options& options, const std::string& name,
159 DB** dbptr,
20effc67 160 bool error_if_wal_file_exists = false);
7c673cae
FG
161
162 // Open the database for read only with column families. When opening DB with
163 // read only, you can specify only a subset of column families in the
164 // database that should be opened. However, you always need to specify default
165 // column family. The default column family name is 'default' and it's stored
f67539c2 166 // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName
7c673cae
FG
167 //
168 // Not supported in ROCKSDB_LITE, in which case the function will
169 // return Status::NotSupported.
170 static Status OpenForReadOnly(
171 const DBOptions& db_options, const std::string& name,
172 const std::vector<ColumnFamilyDescriptor>& column_families,
173 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
20effc67 174 bool error_if_wal_file_exists = false);
7c673cae 175
494da23a
TL
176 // The following OpenAsSecondary functions create a secondary instance that
177 // can dynamically tail the MANIFEST of a primary that must have already been
178 // created. User can call TryCatchUpWithPrimary to make the secondary
179 // instance catch up with primary (WAL tailing is NOT supported now) whenever
180 // the user feels necessary. Column families created by the primary after the
181 // secondary instance starts are currently ignored by the secondary instance.
182 // Column families opened by secondary and dropped by the primary will be
183 // dropped by secondary as well. However the user of the secondary instance
184 // can still access the data of such dropped column family as long as they
185 // do not destroy the corresponding column family handle.
186 // WAL tailing is not supported at present, but will arrive soon.
187 //
188 // The options argument specifies the options to open the secondary instance.
189 // The name argument specifies the name of the primary db that you have used
190 // to open the primary instance.
191 // The secondary_path argument points to a directory where the secondary
192 // instance stores its info log.
193 // The dbptr is an out-arg corresponding to the opened secondary instance.
194 // The pointer points to a heap-allocated database, and the user should
195 // delete it after use.
196 // Open DB as secondary instance with only the default column family.
197 // Return OK on success, non-OK on failures.
198 static Status OpenAsSecondary(const Options& options, const std::string& name,
199 const std::string& secondary_path, DB** dbptr);
200
201 // Open DB as secondary instance with column families. You can open a subset
202 // of column families in secondary mode.
203 // The db_options specify the database specific options.
204 // The name argument specifies the name of the primary db that you have used
205 // to open the primary instance.
206 // The secondary_path argument points to a directory where the secondary
207 // instance stores its info log.
208 // The column_families argument specifieds a list of column families to open.
209 // If any of the column families does not exist, the function returns non-OK
210 // status.
211 // The handles is an out-arg corresponding to the opened database column
212 // familiy handles.
213 // The dbptr is an out-arg corresponding to the opened secondary instance.
214 // The pointer points to a heap-allocated database, and the caller should
215 // delete it after use. Before deleting the dbptr, the user should also
216 // delete the pointers stored in handles vector.
217 // Return OK on success, on-OK on failures.
218 static Status OpenAsSecondary(
219 const DBOptions& db_options, const std::string& name,
220 const std::string& secondary_path,
221 const std::vector<ColumnFamilyDescriptor>& column_families,
222 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
223
7c673cae
FG
224 // Open DB with column families.
225 // db_options specify database specific options
226 // column_families is the vector of all column families in the database,
227 // containing column family name and options. You need to open ALL column
228 // families in the database. To get the list of column families, you can use
229 // ListColumnFamilies(). Also, you can open only a subset of column families
230 // for read-only access.
231 // The default column family name is 'default' and it's stored
f67539c2 232 // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName.
7c673cae
FG
233 // If everything is OK, handles will on return be the same size
234 // as column_families --- handles[i] will be a handle that you
235 // will use to operate on column family column_family[i].
236 // Before delete DB, you have to close All column families by calling
237 // DestroyColumnFamilyHandle() with all the handles.
238 static Status Open(const DBOptions& db_options, const std::string& name,
239 const std::vector<ColumnFamilyDescriptor>& column_families,
240 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
241
11fdf7f2
TL
242 virtual Status Resume() { return Status::NotSupported(); }
243
244 // Close the DB by releasing resources, closing files etc. This should be
245 // called before calling the destructor so that the caller can get back a
246 // status in case there are any errors. This will not fsync the WAL files.
247 // If syncing is required, the caller must first call SyncWAL(), or Write()
248 // using an empty write batch with WriteOptions.sync=true.
f67539c2
TL
249 // Regardless of the return status, the DB must be freed.
250 // If the return status is Aborted(), closing fails because there is
251 // unreleased snapshot in the system. In this case, users can release
252 // the unreleased snapshots and try again and expect it to succeed. For
253 // other status, recalling Close() will be no-op.
254 // If the return status is NotSupported(), then the DB implementation does
255 // cleanup in the destructor
11fdf7f2
TL
256 virtual Status Close() { return Status::NotSupported(); }
257
7c673cae
FG
258 // ListColumnFamilies will open the DB specified by argument name
259 // and return the list of all column families in that DB
260 // through column_families argument. The ordering of
261 // column families in column_families is unspecified.
262 static Status ListColumnFamilies(const DBOptions& db_options,
263 const std::string& name,
264 std::vector<std::string>* column_families);
265
20effc67 266 // Abstract class ctor
494da23a 267 DB() {}
f67539c2
TL
268 // No copying allowed
269 DB(const DB&) = delete;
270 void operator=(const DB&) = delete;
271
7c673cae
FG
272 virtual ~DB();
273
274 // Create a column_family and return the handle of column family
275 // through the argument handle.
276 virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
277 const std::string& column_family_name,
278 ColumnFamilyHandle** handle);
279
11fdf7f2
TL
280 // Bulk create column families with the same column family options.
281 // Return the handles of the column families through the argument handles.
282 // In case of error, the request may succeed partially, and handles will
283 // contain column family handles that it managed to create, and have size
284 // equal to the number of created column families.
285 virtual Status CreateColumnFamilies(
286 const ColumnFamilyOptions& options,
287 const std::vector<std::string>& column_family_names,
288 std::vector<ColumnFamilyHandle*>* handles);
289
290 // Bulk create column families.
291 // Return the handles of the column families through the argument handles.
292 // In case of error, the request may succeed partially, and handles will
293 // contain column family handles that it managed to create, and have size
294 // equal to the number of created column families.
295 virtual Status CreateColumnFamilies(
296 const std::vector<ColumnFamilyDescriptor>& column_families,
297 std::vector<ColumnFamilyHandle*>* handles);
298
7c673cae
FG
299 // Drop a column family specified by column_family handle. This call
300 // only records a drop record in the manifest and prevents the column
301 // family from flushing and compacting.
302 virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
11fdf7f2
TL
303
304 // Bulk drop column families. This call only records drop records in the
305 // manifest and prevents the column families from flushing and compacting.
306 // In case of error, the request may succeed partially. User may call
307 // ListColumnFamilies to check the result.
308 virtual Status DropColumnFamilies(
309 const std::vector<ColumnFamilyHandle*>& column_families);
310
7c673cae
FG
311 // Close a column family specified by column_family handle and destroy
312 // the column family handle specified to avoid double deletion. This call
313 // deletes the column family handle by default. Use this method to
314 // close column family instead of deleting column family handle directly
315 virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
316
317 // Set the database entry for "key" to "value".
318 // If "key" already exists, it will be overwritten.
319 // Returns OK on success, and a non-OK status on error.
320 // Note: consider setting options.sync = true.
321 virtual Status Put(const WriteOptions& options,
322 ColumnFamilyHandle* column_family, const Slice& key,
323 const Slice& value) = 0;
324 virtual Status Put(const WriteOptions& options, const Slice& key,
325 const Slice& value) {
326 return Put(options, DefaultColumnFamily(), key, value);
327 }
328
329 // Remove the database entry (if any) for "key". Returns OK on
330 // success, and a non-OK status on error. It is not an error if "key"
331 // did not exist in the database.
332 // Note: consider setting options.sync = true.
333 virtual Status Delete(const WriteOptions& options,
334 ColumnFamilyHandle* column_family,
335 const Slice& key) = 0;
336 virtual Status Delete(const WriteOptions& options, const Slice& key) {
337 return Delete(options, DefaultColumnFamily(), key);
338 }
339
340 // Remove the database entry for "key". Requires that the key exists
341 // and was not overwritten. Returns OK on success, and a non-OK status
342 // on error. It is not an error if "key" did not exist in the database.
343 //
344 // If a key is overwritten (by calling Put() multiple times), then the result
345 // of calling SingleDelete() on this key is undefined. SingleDelete() only
346 // behaves correctly if there has been only one Put() for this key since the
347 // previous call to SingleDelete() for this key.
348 //
349 // This feature is currently an experimental performance optimization
350 // for a very specific workload. It is up to the caller to ensure that
351 // SingleDelete is only used for a key that is not deleted using Delete() or
352 // written using Merge(). Mixing SingleDelete operations with Deletes and
353 // Merges can result in undefined behavior.
354 //
355 // Note: consider setting options.sync = true.
356 virtual Status SingleDelete(const WriteOptions& options,
357 ColumnFamilyHandle* column_family,
358 const Slice& key) = 0;
359 virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
360 return SingleDelete(options, DefaultColumnFamily(), key);
361 }
362
363 // Removes the database entries in the range ["begin_key", "end_key"), i.e.,
364 // including "begin_key" and excluding "end_key". Returns OK on success, and
20effc67
TL
365 // a non-OK status on error. It is not an error if the database does not
366 // contain any existing data in the range ["begin_key", "end_key").
367 //
368 // If "end_key" comes before "start_key" according to the user's comparator,
369 // a `Status::InvalidArgument` is returned.
7c673cae 370 //
494da23a
TL
371 // This feature is now usable in production, with the following caveats:
372 // 1) Accumulating many range tombstones in the memtable will degrade read
373 // performance; this can be avoided by manually flushing occasionally.
374 // 2) Limiting the maximum number of open files in the presence of range
375 // tombstones can degrade read performance. To avoid this problem, set
376 // max_open_files to -1 whenever possible.
7c673cae
FG
377 virtual Status DeleteRange(const WriteOptions& options,
378 ColumnFamilyHandle* column_family,
379 const Slice& begin_key, const Slice& end_key);
380
381 // Merge the database entry for "key" with "value". Returns OK on success,
382 // and a non-OK status on error. The semantics of this operation is
383 // determined by the user provided merge_operator when opening DB.
384 // Note: consider setting options.sync = true.
385 virtual Status Merge(const WriteOptions& options,
386 ColumnFamilyHandle* column_family, const Slice& key,
387 const Slice& value) = 0;
388 virtual Status Merge(const WriteOptions& options, const Slice& key,
389 const Slice& value) {
390 return Merge(options, DefaultColumnFamily(), key, value);
391 }
392
393 // Apply the specified updates to the database.
394 // If `updates` contains no update, WAL will still be synced if
395 // options.sync=true.
396 // Returns OK on success, non-OK on failure.
397 // Note: consider setting options.sync = true.
398 virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
399
400 // If the database contains an entry for "key" store the
401 // corresponding value in *value and return OK.
402 //
20effc67
TL
403 // If timestamp is enabled and a non-null timestamp pointer is passed in,
404 // timestamp is returned.
405 //
7c673cae
FG
406 // If there is no entry for "key" leave *value unchanged and return
407 // a status for which Status::IsNotFound() returns true.
408 //
409 // May return some other Status on an error.
410 virtual inline Status Get(const ReadOptions& options,
411 ColumnFamilyHandle* column_family, const Slice& key,
412 std::string* value) {
413 assert(value != nullptr);
414 PinnableSlice pinnable_val(value);
415 assert(!pinnable_val.IsPinned());
416 auto s = Get(options, column_family, key, &pinnable_val);
417 if (s.ok() && pinnable_val.IsPinned()) {
418 value->assign(pinnable_val.data(), pinnable_val.size());
419 } // else value is already assigned
420 return s;
421 }
422 virtual Status Get(const ReadOptions& options,
423 ColumnFamilyHandle* column_family, const Slice& key,
424 PinnableSlice* value) = 0;
494da23a
TL
425 virtual Status Get(const ReadOptions& options, const Slice& key,
426 std::string* value) {
7c673cae
FG
427 return Get(options, DefaultColumnFamily(), key, value);
428 }
429
20effc67
TL
430 // Get() methods that return timestamp. Derived DB classes don't need to worry
431 // about this group of methods if they don't care about timestamp feature.
432 virtual inline Status Get(const ReadOptions& options,
433 ColumnFamilyHandle* column_family, const Slice& key,
434 std::string* value, std::string* timestamp) {
435 assert(value != nullptr);
436 PinnableSlice pinnable_val(value);
437 assert(!pinnable_val.IsPinned());
438 auto s = Get(options, column_family, key, &pinnable_val, timestamp);
439 if (s.ok() && pinnable_val.IsPinned()) {
440 value->assign(pinnable_val.data(), pinnable_val.size());
441 } // else value is already assigned
442 return s;
443 }
444 virtual Status Get(const ReadOptions& /*options*/,
445 ColumnFamilyHandle* /*column_family*/,
446 const Slice& /*key*/, PinnableSlice* /*value*/,
447 std::string* /*timestamp*/) {
448 return Status::NotSupported(
449 "Get() that returns timestamp is not implemented.");
450 }
451 virtual Status Get(const ReadOptions& options, const Slice& key,
452 std::string* value, std::string* timestamp) {
453 return Get(options, DefaultColumnFamily(), key, value, timestamp);
454 }
455
f67539c2
TL
456 // Returns all the merge operands corresponding to the key. If the
457 // number of merge operands in DB is greater than
458 // merge_operands_options.expected_max_number_of_operands
459 // no merge operands are returned and status is Incomplete. Merge operands
460 // returned are in the order of insertion.
461 // merge_operands- Points to an array of at-least
462 // merge_operands_options.expected_max_number_of_operands and the
463 // caller is responsible for allocating it. If the status
464 // returned is Incomplete then number_of_operands will contain
465 // the total number of merge operands found in DB for key.
466 virtual Status GetMergeOperands(
467 const ReadOptions& options, ColumnFamilyHandle* column_family,
468 const Slice& key, PinnableSlice* merge_operands,
469 GetMergeOperandsOptions* get_merge_operands_options,
470 int* number_of_operands) = 0;
471
20effc67
TL
472 // Consistent Get of many keys across column families without the need
473 // for an explicit snapshot. NOTE: the implementation of this MultiGet API
474 // does not have the performance benefits of the void-returning MultiGet
475 // functions.
476 //
7c673cae
FG
477 // If keys[i] does not exist in the database, then the i'th returned
478 // status will be one for which Status::IsNotFound() is true, and
479 // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
480 // the i'th returned status will have Status::ok() true, and (*values)[i]
481 // will store the value associated with keys[i].
482 //
483 // (*values) will always be resized to be the same size as (keys).
484 // Similarly, the number of returned statuses will be the number of keys.
485 // Note: keys will not be "de-duplicated". Duplicate keys will return
486 // duplicate values in order.
487 virtual std::vector<Status> MultiGet(
488 const ReadOptions& options,
489 const std::vector<ColumnFamilyHandle*>& column_family,
490 const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
491 virtual std::vector<Status> MultiGet(const ReadOptions& options,
492 const std::vector<Slice>& keys,
493 std::vector<std::string>* values) {
494da23a
TL
494 return MultiGet(
495 options,
496 std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
497 keys, values);
7c673cae
FG
498 }
499
20effc67
TL
500 virtual std::vector<Status> MultiGet(
501 const ReadOptions& /*options*/,
502 const std::vector<ColumnFamilyHandle*>& /*column_family*/,
503 const std::vector<Slice>& keys, std::vector<std::string>* /*values*/,
504 std::vector<std::string>* /*timestamps*/) {
505 return std::vector<Status>(
506 keys.size(), Status::NotSupported(
507 "MultiGet() returning timestamps not implemented."));
508 }
509 virtual std::vector<Status> MultiGet(const ReadOptions& options,
510 const std::vector<Slice>& keys,
511 std::vector<std::string>* values,
512 std::vector<std::string>* timestamps) {
513 return MultiGet(
514 options,
515 std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
516 keys, values, timestamps);
517 }
518
f67539c2
TL
519 // Overloaded MultiGet API that improves performance by batching operations
520 // in the read path for greater efficiency. Currently, only the block based
521 // table format with full filters are supported. Other table formats such
522 // as plain table, block based table with block based filters and
523 // partitioned indexes will still work, but will not get any performance
524 // benefits.
525 // Parameters -
526 // options - ReadOptions
527 // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
528 // passed to the API are restricted to a single column family
529 // num_keys - Number of keys to lookup
530 // keys - Pointer to C style array of key Slices with num_keys elements
531 // values - Pointer to C style array of PinnableSlices with num_keys elements
532 // statuses - Pointer to C style array of Status with num_keys elements
533 // sorted_input - If true, it means the input keys are already sorted by key
534 // order, so the MultiGet() API doesn't have to sort them
535 // again. If false, the keys will be copied and sorted
536 // internally by the API - the input array will not be
537 // modified
538 virtual void MultiGet(const ReadOptions& options,
539 ColumnFamilyHandle* column_family,
540 const size_t num_keys, const Slice* keys,
541 PinnableSlice* values, Status* statuses,
542 const bool /*sorted_input*/ = false) {
543 std::vector<ColumnFamilyHandle*> cf;
544 std::vector<Slice> user_keys;
545 std::vector<Status> status;
546 std::vector<std::string> vals;
547
548 for (size_t i = 0; i < num_keys; ++i) {
549 cf.emplace_back(column_family);
550 user_keys.emplace_back(keys[i]);
551 }
552 status = MultiGet(options, cf, user_keys, &vals);
553 std::copy(status.begin(), status.end(), statuses);
554 for (auto& value : vals) {
555 values->PinSelf(value);
556 values++;
557 }
558 }
559
20effc67
TL
560 virtual void MultiGet(const ReadOptions& options,
561 ColumnFamilyHandle* column_family,
562 const size_t num_keys, const Slice* keys,
563 PinnableSlice* values, std::string* timestamps,
564 Status* statuses, const bool /*sorted_input*/ = false) {
565 std::vector<ColumnFamilyHandle*> cf;
566 std::vector<Slice> user_keys;
567 std::vector<Status> status;
568 std::vector<std::string> vals;
569 std::vector<std::string> tss;
570
571 for (size_t i = 0; i < num_keys; ++i) {
572 cf.emplace_back(column_family);
573 user_keys.emplace_back(keys[i]);
574 }
575 status = MultiGet(options, cf, user_keys, &vals, &tss);
576 std::copy(status.begin(), status.end(), statuses);
577 std::copy(tss.begin(), tss.end(), timestamps);
578 for (auto& value : vals) {
579 values->PinSelf(value);
580 values++;
581 }
582 }
583
f67539c2
TL
584 // Overloaded MultiGet API that improves performance by batching operations
585 // in the read path for greater efficiency. Currently, only the block based
586 // table format with full filters are supported. Other table formats such
587 // as plain table, block based table with block based filters and
588 // partitioned indexes will still work, but will not get any performance
589 // benefits.
590 // Parameters -
591 // options - ReadOptions
592 // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
593 // passed to the API are restricted to a single column family
594 // num_keys - Number of keys to lookup
595 // keys - Pointer to C style array of key Slices with num_keys elements
596 // values - Pointer to C style array of PinnableSlices with num_keys elements
597 // statuses - Pointer to C style array of Status with num_keys elements
598 // sorted_input - If true, it means the input keys are already sorted by key
599 // order, so the MultiGet() API doesn't have to sort them
600 // again. If false, the keys will be copied and sorted
601 // internally by the API - the input array will not be
602 // modified
603 virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
604 ColumnFamilyHandle** column_families, const Slice* keys,
605 PinnableSlice* values, Status* statuses,
606 const bool /*sorted_input*/ = false) {
607 std::vector<ColumnFamilyHandle*> cf;
608 std::vector<Slice> user_keys;
609 std::vector<Status> status;
610 std::vector<std::string> vals;
611
612 for (size_t i = 0; i < num_keys; ++i) {
613 cf.emplace_back(column_families[i]);
614 user_keys.emplace_back(keys[i]);
615 }
616 status = MultiGet(options, cf, user_keys, &vals);
617 std::copy(status.begin(), status.end(), statuses);
618 for (auto& value : vals) {
619 values->PinSelf(value);
620 values++;
621 }
622 }
20effc67
TL
623 virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
624 ColumnFamilyHandle** column_families, const Slice* keys,
625 PinnableSlice* values, std::string* timestamps,
626 Status* statuses, const bool /*sorted_input*/ = false) {
627 std::vector<ColumnFamilyHandle*> cf;
628 std::vector<Slice> user_keys;
629 std::vector<Status> status;
630 std::vector<std::string> vals;
631 std::vector<std::string> tss;
632
633 for (size_t i = 0; i < num_keys; ++i) {
634 cf.emplace_back(column_families[i]);
635 user_keys.emplace_back(keys[i]);
636 }
637 status = MultiGet(options, cf, user_keys, &vals, &tss);
638 std::copy(status.begin(), status.end(), statuses);
639 std::copy(tss.begin(), tss.end(), timestamps);
640 for (auto& value : vals) {
641 values->PinSelf(value);
642 values++;
643 }
644 }
f67539c2 645
7c673cae
FG
646 // If the key definitely does not exist in the database, then this method
647 // returns false, else true. If the caller wants to obtain value when the key
648 // is found in memory, a bool for 'value_found' must be passed. 'value_found'
649 // will be true on return if value has been set properly.
650 // This check is potentially lighter-weight than invoking DB::Get(). One way
651 // to make this lighter weight is to avoid doing any IOs.
652 // Default implementation here returns true and sets 'value_found' to false
653 virtual bool KeyMayExist(const ReadOptions& /*options*/,
654 ColumnFamilyHandle* /*column_family*/,
655 const Slice& /*key*/, std::string* /*value*/,
20effc67 656 std::string* /*timestamp*/,
7c673cae
FG
657 bool* value_found = nullptr) {
658 if (value_found != nullptr) {
659 *value_found = false;
660 }
661 return true;
662 }
20effc67
TL
663
664 virtual bool KeyMayExist(const ReadOptions& options,
665 ColumnFamilyHandle* column_family, const Slice& key,
666 std::string* value, bool* value_found = nullptr) {
667 return KeyMayExist(options, column_family, key, value,
668 /*timestamp=*/nullptr, value_found);
669 }
670
7c673cae
FG
671 virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
672 std::string* value, bool* value_found = nullptr) {
673 return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
674 }
675
20effc67
TL
676 virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
677 std::string* value, std::string* timestamp,
678 bool* value_found = nullptr) {
679 return KeyMayExist(options, DefaultColumnFamily(), key, value, timestamp,
680 value_found);
681 }
682
7c673cae
FG
683 // Return a heap-allocated iterator over the contents of the database.
684 // The result of NewIterator() is initially invalid (caller must
685 // call one of the Seek methods on the iterator before using it).
686 //
687 // Caller should delete the iterator when it is no longer needed.
688 // The returned iterator should be deleted before this db is deleted.
689 virtual Iterator* NewIterator(const ReadOptions& options,
690 ColumnFamilyHandle* column_family) = 0;
691 virtual Iterator* NewIterator(const ReadOptions& options) {
692 return NewIterator(options, DefaultColumnFamily());
693 }
694 // Returns iterators from a consistent database state across multiple
695 // column families. Iterators are heap allocated and need to be deleted
696 // before the db is deleted
697 virtual Status NewIterators(
698 const ReadOptions& options,
699 const std::vector<ColumnFamilyHandle*>& column_families,
700 std::vector<Iterator*>* iterators) = 0;
701
702 // Return a handle to the current DB state. Iterators created with
703 // this handle will all observe a stable snapshot of the current DB
704 // state. The caller must call ReleaseSnapshot(result) when the
705 // snapshot is no longer needed.
706 //
707 // nullptr will be returned if the DB fails to take a snapshot or does
708 // not support snapshot.
709 virtual const Snapshot* GetSnapshot() = 0;
710
711 // Release a previously acquired snapshot. The caller must not
712 // use "snapshot" after this call.
713 virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
714
715#ifndef ROCKSDB_LITE
716 // Contains all valid property arguments for GetProperty().
717 //
718 // NOTE: Property names cannot end in numbers since those are interpreted as
719 // arguments, e.g., see kNumFilesAtLevelPrefix.
720 struct Properties {
721 // "rocksdb.num-files-at-level<N>" - returns string containing the number
722 // of files at level <N>, where <N> is an ASCII representation of a
723 // level number (e.g., "0").
724 static const std::string kNumFilesAtLevelPrefix;
725
726 // "rocksdb.compression-ratio-at-level<N>" - returns string containing the
727 // compression ratio of data at level <N>, where <N> is an ASCII
728 // representation of a level number (e.g., "0"). Here, compression
729 // ratio is defined as uncompressed data size / compressed file size.
730 // Returns "-1.0" if no open files at level <N>.
731 static const std::string kCompressionRatioAtLevelPrefix;
732
733 // "rocksdb.stats" - returns a multi-line string containing the data
734 // described by kCFStats followed by the data described by kDBStats.
735 static const std::string kStats;
736
737 // "rocksdb.sstables" - returns a multi-line string summarizing current
738 // SST files.
739 static const std::string kSSTables;
740
741 // "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and
742 // "rocksdb.cf-file-histogram" together. See below for description
743 // of the two.
744 static const std::string kCFStats;
745
746 // "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with
747 // general columm family stats per-level over db's lifetime ("L<n>"),
748 // aggregated over db's lifetime ("Sum"), and aggregated over the
749 // interval since the last retrieval ("Int").
750 // It could also be used to return the stats in the format of the map.
751 // In this case there will a pair of string to array of double for
752 // each level as well as for "Sum". "Int" stats will not be affected
11fdf7f2 753 // when this form of stats are retrieved.
7c673cae
FG
754 static const std::string kCFStatsNoFileHistogram;
755
756 // "rocksdb.cf-file-histogram" - print out how many file reads to every
757 // level, as well as the histogram of latency of single requests.
758 static const std::string kCFFileHistogram;
759
760 // "rocksdb.dbstats" - returns a multi-line string with general database
761 // stats, both cumulative (over the db's lifetime) and interval (since
762 // the last retrieval of kDBStats).
763 static const std::string kDBStats;
764
765 // "rocksdb.levelstats" - returns multi-line string containing the number
766 // of files per level and total size of each level (MB).
767 static const std::string kLevelStats;
768
769 // "rocksdb.num-immutable-mem-table" - returns number of immutable
770 // memtables that have not yet been flushed.
771 static const std::string kNumImmutableMemTable;
772
773 // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
774 // memtables that have already been flushed.
775 static const std::string kNumImmutableMemTableFlushed;
776
777 // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
778 // pending; otherwise, returns 0.
779 static const std::string kMemTableFlushPending;
780
781 // "rocksdb.num-running-flushes" - returns the number of currently running
782 // flushes.
783 static const std::string kNumRunningFlushes;
784
785 // "rocksdb.compaction-pending" - returns 1 if at least one compaction is
786 // pending; otherwise, returns 0.
787 static const std::string kCompactionPending;
788
789 // "rocksdb.num-running-compactions" - returns the number of currently
790 // running compactions.
791 static const std::string kNumRunningCompactions;
792
793 // "rocksdb.background-errors" - returns accumulated number of background
794 // errors.
795 static const std::string kBackgroundErrors;
796
797 // "rocksdb.cur-size-active-mem-table" - returns approximate size of active
798 // memtable (bytes).
799 static const std::string kCurSizeActiveMemTable;
800
801 // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
802 // and unflushed immutable memtables (bytes).
803 static const std::string kCurSizeAllMemTables;
804
805 // "rocksdb.size-all-mem-tables" - returns approximate size of active,
806 // unflushed immutable, and pinned immutable memtables (bytes).
807 static const std::string kSizeAllMemTables;
808
809 // "rocksdb.num-entries-active-mem-table" - returns total number of entries
810 // in the active memtable.
811 static const std::string kNumEntriesActiveMemTable;
812
813 // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
814 // in the unflushed immutable memtables.
815 static const std::string kNumEntriesImmMemTables;
816
817 // "rocksdb.num-deletes-active-mem-table" - returns total number of delete
818 // entries in the active memtable.
819 static const std::string kNumDeletesActiveMemTable;
820
821 // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
822 // entries in the unflushed immutable memtables.
823 static const std::string kNumDeletesImmMemTables;
824
825 // "rocksdb.estimate-num-keys" - returns estimated number of total keys in
826 // the active and unflushed immutable memtables and storage.
827 static const std::string kEstimateNumKeys;
828
829 // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
830 // reading SST tables, excluding memory used in block cache (e.g.,
831 // filter and index blocks).
832 static const std::string kEstimateTableReadersMem;
833
834 // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
835 // files is enabled; otherwise, returns a non-zero number.
836 static const std::string kIsFileDeletionsEnabled;
837
838 // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
839 // database.
840 static const std::string kNumSnapshots;
841
842 // "rocksdb.oldest-snapshot-time" - returns number representing unix
843 // timestamp of oldest unreleased snapshot.
844 static const std::string kOldestSnapshotTime;
845
f67539c2
TL
846 // "rocksdb.oldest-snapshot-sequence" - returns number representing
847 // sequence number of oldest unreleased snapshot.
848 static const std::string kOldestSnapshotSequence;
849
7c673cae
FG
850 // "rocksdb.num-live-versions" - returns number of live versions. `Version`
851 // is an internal data structure. See version_set.h for details. More
852 // live versions often mean more SST files are held from being deleted,
853 // by iterators or unfinished compactions.
854 static const std::string kNumLiveVersions;
855
11fdf7f2 856 // "rocksdb.current-super-version-number" - returns number of current LSM
7c673cae
FG
857 // version. It is a uint64_t integer number, incremented after there is
858 // any change to the LSM tree. The number is not preserved after restarting
859 // the DB. After DB restart, it will start from 0 again.
860 static const std::string kCurrentSuperVersionNumber;
861
862 // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
863 // live data in bytes.
864 static const std::string kEstimateLiveDataSize;
865
11fdf7f2 866 // "rocksdb.min-log-number-to-keep" - return the minimum log number of the
7c673cae
FG
867 // log files that should be kept.
868 static const std::string kMinLogNumberToKeep;
869
494da23a
TL
870 // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file
871 // number for an obsolete SST to be kept. The max value of `uint64_t`
872 // will be returned if all obsolete files can be deleted.
873 static const std::string kMinObsoleteSstNumberToKeep;
874
7c673cae
FG
875 // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
876 // files.
877 // WARNING: may slow down online queries if there are too many files.
878 static const std::string kTotalSstFilesSize;
879
11fdf7f2
TL
880 // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST
881 // files belong to the latest LSM tree.
882 static const std::string kLiveSstFilesSize;
883
7c673cae
FG
884 // "rocksdb.base-level" - returns number of level to which L0 data will be
885 // compacted.
886 static const std::string kBaseLevel;
887
888 // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
889 // number of bytes compaction needs to rewrite to get all levels down
890 // to under target size. Not valid for other compactions than level-
891 // based.
892 static const std::string kEstimatePendingCompactionBytes;
893
894 // "rocksdb.aggregated-table-properties" - returns a string representation
895 // of the aggregated table properties of the target column family.
896 static const std::string kAggregatedTableProperties;
897
898 // "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
899 // one but only returns the aggregated table properties of the
900 // specified level "N" at the target column family.
901 static const std::string kAggregatedTablePropertiesAtLevel;
902
903 // "rocksdb.actual-delayed-write-rate" - returns the current actual delayed
904 // write rate. 0 means no delay.
905 static const std::string kActualDelayedWriteRate;
906
907 // "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
908 static const std::string kIsWriteStopped;
11fdf7f2
TL
909
910 // "rocksdb.estimate-oldest-key-time" - returns an estimation of
911 // oldest key timestamp in the DB. Currently only available for
912 // FIFO compaction with
913 // compaction_options_fifo.allow_compaction = false.
914 static const std::string kEstimateOldestKeyTime;
915
916 // "rocksdb.block-cache-capacity" - returns block cache capacity.
917 static const std::string kBlockCacheCapacity;
918
919 // "rocksdb.block-cache-usage" - returns the memory size for the entries
920 // residing in block cache.
921 static const std::string kBlockCacheUsage;
922
923 // "rocksdb.block-cache-pinned-usage" - returns the memory size for the
924 // entries being pinned.
925 static const std::string kBlockCachePinnedUsage;
926
927 // "rocksdb.options-statistics" - returns multi-line string
928 // of options.statistics
929 static const std::string kOptionsStatistics;
7c673cae
FG
930 };
931#endif /* ROCKSDB_LITE */
932
933 // DB implementations can export properties about their state via this method.
934 // If "property" is a valid property understood by this DB implementation (see
935 // Properties struct above for valid options), fills "*value" with its current
936 // value and returns true. Otherwise, returns false.
937 virtual bool GetProperty(ColumnFamilyHandle* column_family,
938 const Slice& property, std::string* value) = 0;
939 virtual bool GetProperty(const Slice& property, std::string* value) {
940 return GetProperty(DefaultColumnFamily(), property, value);
941 }
942 virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
943 const Slice& property,
11fdf7f2 944 std::map<std::string, std::string>* value) = 0;
7c673cae 945 virtual bool GetMapProperty(const Slice& property,
11fdf7f2 946 std::map<std::string, std::string>* value) {
7c673cae
FG
947 return GetMapProperty(DefaultColumnFamily(), property, value);
948 }
949
950 // Similar to GetProperty(), but only works for a subset of properties whose
951 // return value is an integer. Return the value by integer. Supported
952 // properties:
953 // "rocksdb.num-immutable-mem-table"
954 // "rocksdb.mem-table-flush-pending"
955 // "rocksdb.compaction-pending"
956 // "rocksdb.background-errors"
957 // "rocksdb.cur-size-active-mem-table"
958 // "rocksdb.cur-size-all-mem-tables"
959 // "rocksdb.size-all-mem-tables"
960 // "rocksdb.num-entries-active-mem-table"
961 // "rocksdb.num-entries-imm-mem-tables"
962 // "rocksdb.num-deletes-active-mem-table"
963 // "rocksdb.num-deletes-imm-mem-tables"
964 // "rocksdb.estimate-num-keys"
965 // "rocksdb.estimate-table-readers-mem"
966 // "rocksdb.is-file-deletions-enabled"
967 // "rocksdb.num-snapshots"
968 // "rocksdb.oldest-snapshot-time"
969 // "rocksdb.num-live-versions"
970 // "rocksdb.current-super-version-number"
971 // "rocksdb.estimate-live-data-size"
972 // "rocksdb.min-log-number-to-keep"
494da23a 973 // "rocksdb.min-obsolete-sst-number-to-keep"
7c673cae 974 // "rocksdb.total-sst-files-size"
11fdf7f2 975 // "rocksdb.live-sst-files-size"
7c673cae
FG
976 // "rocksdb.base-level"
977 // "rocksdb.estimate-pending-compaction-bytes"
978 // "rocksdb.num-running-compactions"
979 // "rocksdb.num-running-flushes"
980 // "rocksdb.actual-delayed-write-rate"
981 // "rocksdb.is-write-stopped"
11fdf7f2
TL
982 // "rocksdb.estimate-oldest-key-time"
983 // "rocksdb.block-cache-capacity"
984 // "rocksdb.block-cache-usage"
985 // "rocksdb.block-cache-pinned-usage"
7c673cae
FG
986 virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
987 const Slice& property, uint64_t* value) = 0;
988 virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
989 return GetIntProperty(DefaultColumnFamily(), property, value);
990 }
991
992 // Reset internal stats for DB and all column families.
993 // Note this doesn't reset options.statistics as it is not owned by
994 // DB.
995 virtual Status ResetStats() {
996 return Status::NotSupported("Not implemented");
997 }
998
999 // Same as GetIntProperty(), but this one returns the aggregated int
1000 // property from all column families.
1001 virtual bool GetAggregatedIntProperty(const Slice& property,
1002 uint64_t* value) = 0;
1003
1004 // Flags for DB::GetSizeApproximation that specify whether memtable
1005 // stats should be included, or file stats approximation or both
1006 enum SizeApproximationFlags : uint8_t {
1007 NONE = 0,
f67539c2 1008 INCLUDE_MEMTABLES = 1 << 0,
7c673cae
FG
1009 INCLUDE_FILES = 1 << 1
1010 };
1011
1012 // For each i in [0,n-1], store in "sizes[i]", the approximate
20effc67
TL
1013 // file system space used by keys in "[range[i].start .. range[i].limit)"
1014 // in a single column family.
7c673cae
FG
1015 //
1016 // Note that the returned sizes measure file system space usage, so
1017 // if the user data compresses by a factor of ten, the returned
1018 // sizes will be one-tenth the size of the corresponding user data size.
f67539c2
TL
1019 virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
1020 ColumnFamilyHandle* column_family,
20effc67 1021 const Range* ranges, int n,
f67539c2
TL
1022 uint64_t* sizes) = 0;
1023
1024 // Simpler versions of the GetApproximateSizes() method above.
1025 // The include_flags argumenbt must of type DB::SizeApproximationFlags
1026 // and can not be NONE.
7c673cae 1027 virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
20effc67 1028 const Range* ranges, int n, uint64_t* sizes,
f67539c2
TL
1029 uint8_t include_flags = INCLUDE_FILES) {
1030 SizeApproximationOptions options;
1031 options.include_memtabtles =
1032 (include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0;
1033 options.include_files =
1034 (include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0;
20effc67 1035 GetApproximateSizes(options, column_family, ranges, n, sizes);
f67539c2 1036 }
20effc67 1037 virtual void GetApproximateSizes(const Range* ranges, int n, uint64_t* sizes,
494da23a 1038 uint8_t include_flags = INCLUDE_FILES) {
20effc67 1039 GetApproximateSizes(DefaultColumnFamily(), ranges, n, sizes, include_flags);
7c673cae
FG
1040 }
1041
1042 // The method is similar to GetApproximateSizes, except it
1043 // returns approximate number of records in memtables.
1044 virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
1045 const Range& range,
1046 uint64_t* const count,
1047 uint64_t* const size) = 0;
1048 virtual void GetApproximateMemTableStats(const Range& range,
1049 uint64_t* const count,
1050 uint64_t* const size) {
1051 GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size);
1052 }
1053
1054 // Deprecated versions of GetApproximateSizes
1055 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
494da23a 1056 const Range* range, int n, uint64_t* sizes, bool include_memtable) {
7c673cae
FG
1057 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
1058 if (include_memtable) {
1059 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
1060 }
1061 GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
1062 }
1063 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
494da23a
TL
1064 ColumnFamilyHandle* column_family, const Range* range, int n,
1065 uint64_t* sizes, bool include_memtable) {
7c673cae
FG
1066 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
1067 if (include_memtable) {
1068 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
1069 }
1070 GetApproximateSizes(column_family, range, n, sizes, include_flags);
1071 }
1072
1073 // Compact the underlying storage for the key range [*begin,*end].
1074 // The actual compaction interval might be superset of [*begin, *end].
1075 // In particular, deleted and overwritten versions are discarded,
1076 // and the data is rearranged to reduce the cost of operations
1077 // needed to access the data. This operation should typically only
1078 // be invoked by users who understand the underlying implementation.
1079 //
1080 // begin==nullptr is treated as a key before all keys in the database.
1081 // end==nullptr is treated as a key after all keys in the database.
1082 // Therefore the following call will compact the entire database:
1083 // db->CompactRange(options, nullptr, nullptr);
1084 // Note that after the entire database is compacted, all data are pushed
1085 // down to the last level containing any data. If the total data size after
1086 // compaction is reduced, that level might not be appropriate for hosting all
1087 // the files. In this case, client could set options.change_level to true, to
1088 // move the files back to the minimum level capable of holding the data set
1089 // or a given level (specified by non-negative options.target_level).
1090 virtual Status CompactRange(const CompactRangeOptions& options,
1091 ColumnFamilyHandle* column_family,
1092 const Slice* begin, const Slice* end) = 0;
1093 virtual Status CompactRange(const CompactRangeOptions& options,
1094 const Slice* begin, const Slice* end) {
1095 return CompactRange(options, DefaultColumnFamily(), begin, end);
1096 }
1097
1098 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
1099 ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end,
1100 bool change_level = false, int target_level = -1,
1101 uint32_t target_path_id = 0) {
1102 CompactRangeOptions options;
1103 options.change_level = change_level;
1104 options.target_level = target_level;
1105 options.target_path_id = target_path_id;
1106 return CompactRange(options, column_family, begin, end);
1107 }
1108
1109 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
1110 const Slice* begin, const Slice* end, bool change_level = false,
1111 int target_level = -1, uint32_t target_path_id = 0) {
1112 CompactRangeOptions options;
1113 options.change_level = change_level;
1114 options.target_level = target_level;
1115 options.target_path_id = target_path_id;
1116 return CompactRange(options, DefaultColumnFamily(), begin, end);
1117 }
1118
1119 virtual Status SetOptions(
1120 ColumnFamilyHandle* /*column_family*/,
1121 const std::unordered_map<std::string, std::string>& /*new_options*/) {
1122 return Status::NotSupported("Not implemented");
1123 }
1124 virtual Status SetOptions(
1125 const std::unordered_map<std::string, std::string>& new_options) {
1126 return SetOptions(DefaultColumnFamily(), new_options);
1127 }
1128
1129 virtual Status SetDBOptions(
1130 const std::unordered_map<std::string, std::string>& new_options) = 0;
1131
1132 // CompactFiles() inputs a list of files specified by file numbers and
1133 // compacts them to the specified level. Note that the behavior is different
1134 // from CompactRange() in that CompactFiles() performs the compaction job
1135 // using the CURRENT thread.
1136 //
1137 // @see GetDataBaseMetaData
1138 // @see GetColumnFamilyMetaData
1139 virtual Status CompactFiles(
1140 const CompactionOptions& compact_options,
1141 ColumnFamilyHandle* column_family,
494da23a
TL
1142 const std::vector<std::string>& input_file_names, const int output_level,
1143 const int output_path_id = -1,
1144 std::vector<std::string>* const output_file_names = nullptr,
1145 CompactionJobInfo* compaction_job_info = nullptr) = 0;
7c673cae
FG
1146
1147 virtual Status CompactFiles(
1148 const CompactionOptions& compact_options,
494da23a
TL
1149 const std::vector<std::string>& input_file_names, const int output_level,
1150 const int output_path_id = -1,
1151 std::vector<std::string>* const output_file_names = nullptr,
1152 CompactionJobInfo* compaction_job_info = nullptr) {
7c673cae 1153 return CompactFiles(compact_options, DefaultColumnFamily(),
11fdf7f2 1154 input_file_names, output_level, output_path_id,
494da23a 1155 output_file_names, compaction_job_info);
7c673cae
FG
1156 }
1157
1158 // This function will wait until all currently running background processes
1159 // finish. After it returns, no background process will be run until
20effc67
TL
1160 // ContinueBackgroundWork is called, once for each preceding OK-returning
1161 // call to PauseBackgroundWork.
7c673cae
FG
1162 virtual Status PauseBackgroundWork() = 0;
1163 virtual Status ContinueBackgroundWork() = 0;
1164
1165 // This function will enable automatic compactions for the given column
1166 // families if they were previously disabled. The function will first set the
1167 // disable_auto_compactions option for each column family to 'false', after
1168 // which it will schedule a flush/compaction.
1169 //
1170 // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
1171 // does NOT schedule a flush/compaction afterwards, and only changes the
1172 // parameter itself within the column family option.
1173 //
1174 virtual Status EnableAutoCompaction(
1175 const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;
1176
f67539c2
TL
1177 virtual void DisableManualCompaction() = 0;
1178 virtual void EnableManualCompaction() = 0;
1179
7c673cae
FG
1180 // Number of levels used for this DB.
1181 virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
1182 virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
1183
1184 // Maximum level to which a new compacted memtable is pushed if it
1185 // does not create overlap.
1186 virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
1187 virtual int MaxMemCompactionLevel() {
1188 return MaxMemCompactionLevel(DefaultColumnFamily());
1189 }
1190
1191 // Number of files in level-0 that would stop writes.
1192 virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
1193 virtual int Level0StopWriteTrigger() {
1194 return Level0StopWriteTrigger(DefaultColumnFamily());
1195 }
1196
1197 // Get DB name -- the exact same name that was provided as an argument to
1198 // DB::Open()
1199 virtual const std::string& GetName() const = 0;
1200
1201 // Get Env object from the DB
1202 virtual Env* GetEnv() const = 0;
1203
f67539c2
TL
1204 virtual FileSystem* GetFileSystem() const;
1205
7c673cae
FG
1206 // Get DB Options that we use. During the process of opening the
1207 // column family, the options provided when calling DB::Open() or
1208 // DB::CreateColumnFamily() will have been "sanitized" and transformed
1209 // in an implementation-defined manner.
1210 virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
1211 virtual Options GetOptions() const {
1212 return GetOptions(DefaultColumnFamily());
1213 }
1214
1215 virtual DBOptions GetDBOptions() const = 0;
1216
1217 // Flush all mem-table data.
494da23a
TL
1218 // Flush a single column family, even when atomic flush is enabled. To flush
1219 // multiple column families, use Flush(options, column_families).
7c673cae
FG
1220 virtual Status Flush(const FlushOptions& options,
1221 ColumnFamilyHandle* column_family) = 0;
1222 virtual Status Flush(const FlushOptions& options) {
1223 return Flush(options, DefaultColumnFamily());
1224 }
494da23a
TL
1225 // Flushes multiple column families.
1226 // If atomic flush is not enabled, Flush(options, column_families) is
1227 // equivalent to calling Flush(options, column_family) multiple times.
1228 // If atomic flush is enabled, Flush(options, column_families) will flush all
1229 // column families specified in 'column_families' up to the latest sequence
1230 // number at the time when flush is requested.
1231 // Note that RocksDB 5.15 and earlier may not be able to open later versions
1232 // with atomic flush enabled.
1233 virtual Status Flush(
1234 const FlushOptions& options,
1235 const std::vector<ColumnFamilyHandle*>& column_families) = 0;
7c673cae 1236
11fdf7f2
TL
1237 // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL
1238 // afterwards.
1239 virtual Status FlushWAL(bool /*sync*/) {
1240 return Status::NotSupported("FlushWAL not implemented");
1241 }
7c673cae
FG
1242 // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
1243 // same as Write() with sync=true: in the latter case the changes won't be
1244 // visible until the sync is done.
1245 // Currently only works if allow_mmap_writes = false in Options.
1246 virtual Status SyncWAL() = 0;
1247
494da23a
TL
1248 // Lock the WAL. Also flushes the WAL after locking.
1249 virtual Status LockWAL() {
1250 return Status::NotSupported("LockWAL not implemented");
1251 }
1252
1253 // Unlock the WAL.
1254 virtual Status UnlockWAL() {
1255 return Status::NotSupported("UnlockWAL not implemented");
1256 }
1257
7c673cae
FG
1258 // The sequence number of the most recent transaction.
1259 virtual SequenceNumber GetLatestSequenceNumber() const = 0;
1260
11fdf7f2
TL
1261 // Instructs DB to preserve deletes with sequence numbers >= passed seqnum.
1262 // Has no effect if DBOptions.preserve_deletes is set to false.
1263 // This function assumes that user calls this function with monotonically
1264 // increasing seqnums (otherwise we can't guarantee that a particular delete
1265 // hasn't been already processed); returns true if the value was successfully
1266 // updated, false if user attempted to call if with seqnum <= current value.
1267 virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0;
1268
7c673cae
FG
1269 // Prevent file deletions. Compactions will continue to occur,
1270 // but no obsolete files will be deleted. Calling this multiple
1271 // times have the same effect as calling it once.
1272 virtual Status DisableFileDeletions() = 0;
1273
1274 // Allow compactions to delete obsolete files.
1275 // If force == true, the call to EnableFileDeletions() will guarantee that
1276 // file deletions are enabled after the call, even if DisableFileDeletions()
1277 // was called multiple times before.
1278 // If force == false, EnableFileDeletions will only enable file deletion
1279 // after it's been called at least as many times as DisableFileDeletions(),
1280 // enabling the two methods to be called by two threads concurrently without
1281 // synchronization -- i.e., file deletions will be enabled only after both
1282 // threads call EnableFileDeletions()
1283 virtual Status EnableFileDeletions(bool force = true) = 0;
1284
20effc67 1285#ifndef ROCKSDB_LITE
7c673cae
FG
1286 // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
1287
1288 // Retrieve the list of all files in the database. The files are
11fdf7f2
TL
1289 // relative to the dbname and are not absolute paths. Despite being relative
1290 // paths, the file names begin with "/". The valid size of the manifest file
1291 // is returned in manifest_file_size. The manifest file is an ever growing
1292 // file, but only the portion specified by manifest_file_size is valid for
1293 // this snapshot. Setting flush_memtable to true does Flush before recording
1294 // the live files. Setting flush_memtable to false is useful when we don't
1295 // want to wait for flush which may have to wait for compaction to complete
1296 // taking an indeterminate time.
7c673cae
FG
1297 //
1298 // In case you have multiple column families, even if flush_memtable is true,
1299 // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
1300 // for new data that arrived to already-flushed column families while other
1301 // column families were flushing
1302 virtual Status GetLiveFiles(std::vector<std::string>&,
1303 uint64_t* manifest_file_size,
1304 bool flush_memtable = true) = 0;
1305
1306 // Retrieve the sorted list of all wal files with earliest file first
1307 virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
1308
f67539c2
TL
1309 // Retrieve information about the current wal file
1310 //
1311 // Note that the log might have rolled after this call in which case
1312 // the current_log_file would not point to the current log file.
1313 //
1314 // Additionally, for the sake of optimization current_log_file->StartSequence
1315 // would always be set to 0
1316 virtual Status GetCurrentWalFile(
1317 std::unique_ptr<LogFile>* current_log_file) = 0;
1318
1319 // Retrieves the creation time of the oldest file in the DB.
1320 // This API only works if max_open_files = -1, if it is not then
1321 // Status returned is Status::NotSupported()
1322 // The file creation time is set using the env provided to the DB.
1323 // If the DB was created from a very old release then its possible that
1324 // the SST files might not have file_creation_time property and even after
1325 // moving to a newer release its possible that some files never got compacted
1326 // and may not have file_creation_time property. In both the cases
1327 // file_creation_time is considered 0 which means this API will return
1328 // creation_time = 0 as there wouldn't be a timestamp lower than 0.
1329 virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0;
1330
11fdf7f2 1331 // Note: this API is not yet consistent with WritePrepared transactions.
7c673cae
FG
1332 // Sets iter to an iterator that is positioned at a write-batch containing
1333 // seq_number. If the sequence number is non existent, it returns an iterator
1334 // at the first available seq_no after the requested seq_no
1335 // Returns Status::OK if iterator is valid
1336 // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
1337 // use this api, else the WAL files will get
1338 // cleared aggressively and the iterator might keep getting invalid before
1339 // an update is read.
1340 virtual Status GetUpdatesSince(
494da23a
TL
1341 SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
1342 const TransactionLogIterator::ReadOptions& read_options =
1343 TransactionLogIterator::ReadOptions()) = 0;
7c673cae
FG
1344
1345// Windows API macro interference
1346#undef DeleteFile
20effc67
TL
1347 // WARNING: This API is planned for removal in RocksDB 7.0 since it does not
1348 // operate at the proper level of abstraction for a key-value store, and its
1349 // contract/restrictions are poorly documented. For example, it returns non-OK
1350 // `Status` for non-bottommost files and files undergoing compaction. Since we
1351 // do not plan to maintain it, the contract will likely remain underspecified
1352 // until its removal. Any user is encouraged to read the implementation
1353 // carefully and migrate away from it when possible.
1354 //
7c673cae
FG
1355 // Delete the file name from the db directory and update the internal state to
1356 // reflect that. Supports deletion of sst and log files only. 'name' must be
1357 // path relative to the db directory. eg. 000001.sst, /archive/000003.log
1358 virtual Status DeleteFile(std::string name) = 0;
1359
1360 // Returns a list of all table files with their level, start key
1361 // and end key
1362 virtual void GetLiveFilesMetaData(
1363 std::vector<LiveFileMetaData>* /*metadata*/) {}
1364
20effc67
TL
1365 // Return a list of all table file checksum info.
1366 // Note: This function might be of limited use because it cannot be
1367 // synchronized with GetLiveFiles.
1368 virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
1369
7c673cae 1370 // Obtains the meta data of the specified column family of the DB.
7c673cae
FG
1371 virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
1372 ColumnFamilyMetaData* /*metadata*/) {}
1373
1374 // Get the metadata of the default column family.
494da23a 1375 void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) {
7c673cae
FG
1376 GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
1377 }
1378
1379 // IngestExternalFile() will load a list of external SST files (1) into the DB
11fdf7f2
TL
1380 // Two primary modes are supported:
1381 // - Duplicate keys in the new files will overwrite exiting keys (default)
1382 // - Duplicate keys will be skipped (set ingest_behind=true)
1383 // In the first mode we will try to find the lowest possible level that
1384 // the file can fit in, and ingest the file into this level (2). A file that
1385 // have a key range that overlap with the memtable key range will require us
1386 // to Flush the memtable first before ingesting the file.
1387 // In the second mode we will always ingest in the bottom most level (see
1388 // docs to IngestExternalFileOptions::ingest_behind).
7c673cae
FG
1389 //
1390 // (1) External SST files can be created using SstFileWriter
1391 // (2) We will try to ingest the files to the lowest possible level
11fdf7f2
TL
1392 // even if the file compression doesn't match the level compression
1393 // (3) If IngestExternalFileOptions->ingest_behind is set to true,
1394 // we always ingest at the bottommost level, which should be reserved
1395 // for this purpose (see DBOPtions::allow_ingest_behind flag).
7c673cae
FG
1396 virtual Status IngestExternalFile(
1397 ColumnFamilyHandle* column_family,
1398 const std::vector<std::string>& external_files,
1399 const IngestExternalFileOptions& options) = 0;
1400
1401 virtual Status IngestExternalFile(
1402 const std::vector<std::string>& external_files,
1403 const IngestExternalFileOptions& options) {
1404 return IngestExternalFile(DefaultColumnFamily(), external_files, options);
1405 }
1406
494da23a
TL
1407 // IngestExternalFiles() will ingest files for multiple column families, and
1408 // record the result atomically to the MANIFEST.
1409 // If this function returns OK, all column families' ingestion must succeed.
1410 // If this function returns NOK, or the process crashes, then non-of the
1411 // files will be ingested into the database after recovery.
1412 // Note that it is possible for application to observe a mixed state during
1413 // the execution of this function. If the user performs range scan over the
1414 // column families with iterators, iterator on one column family may return
1415 // ingested data, while iterator on other column family returns old data.
1416 // Users can use snapshot for a consistent view of data.
1417 // If your db ingests multiple SST files using this API, i.e. args.size()
1418 // > 1, then RocksDB 5.15 and earlier will not be able to open it.
1419 //
1420 // REQUIRES: each arg corresponds to a different column family: namely, for
1421 // 0 <= i < j < len(args), args[i].column_family != args[j].column_family.
1422 virtual Status IngestExternalFiles(
1423 const std::vector<IngestExternalFileArg>& args) = 0;
1424
f67539c2
TL
1425 // CreateColumnFamilyWithImport() will create a new column family with
1426 // column_family_name and import external SST files specified in metadata into
1427 // this column family.
1428 // (1) External SST files can be created using SstFileWriter.
1429 // (2) External SST files can be exported from a particular column family in
1430 // an existing DB.
1431 // Option in import_options specifies whether the external files are copied or
1432 // moved (default is copy). When option specifies copy, managing files at
1433 // external_file_path is caller's responsibility. When option specifies a
1434 // move, the call ensures that the specified files at external_file_path are
1435 // deleted on successful return and files are not modified on any error
1436 // return.
1437 // On error return, column family handle returned will be nullptr.
1438 // ColumnFamily will be present on successful return and will not be present
1439 // on error return. ColumnFamily may be present on any crash during this call.
1440 virtual Status CreateColumnFamilyWithImport(
1441 const ColumnFamilyOptions& options, const std::string& column_family_name,
1442 const ImportColumnFamilyOptions& import_options,
1443 const ExportImportFilesMetaData& metadata,
1444 ColumnFamilyHandle** handle) = 0;
1445
20effc67
TL
1446 // Verify the checksums of files in db. Currently the whole-file checksum of
1447 // table files are checked.
1448 virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) {
1449 return Status::NotSupported("File verification not supported");
1450 }
1451
1452 // Verify the block checksums of files in db. The block checksums of table
1453 // files are checked.
f67539c2
TL
1454 virtual Status VerifyChecksum(const ReadOptions& read_options) = 0;
1455
1456 virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
11fdf7f2 1457
7c673cae
FG
1458 // AddFile() is deprecated, please use IngestExternalFile()
1459 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1460 ColumnFamilyHandle* column_family,
1461 const std::vector<std::string>& file_path_list, bool move_file = false,
1462 bool skip_snapshot_check = false) {
1463 IngestExternalFileOptions ifo;
1464 ifo.move_files = move_file;
1465 ifo.snapshot_consistency = !skip_snapshot_check;
1466 ifo.allow_global_seqno = false;
1467 ifo.allow_blocking_flush = false;
1468 return IngestExternalFile(column_family, file_path_list, ifo);
1469 }
1470
1471 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1472 const std::vector<std::string>& file_path_list, bool move_file = false,
1473 bool skip_snapshot_check = false) {
1474 IngestExternalFileOptions ifo;
1475 ifo.move_files = move_file;
1476 ifo.snapshot_consistency = !skip_snapshot_check;
1477 ifo.allow_global_seqno = false;
1478 ifo.allow_blocking_flush = false;
1479 return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo);
1480 }
1481
1482 // AddFile() is deprecated, please use IngestExternalFile()
1483 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1484 ColumnFamilyHandle* column_family, const std::string& file_path,
1485 bool move_file = false, bool skip_snapshot_check = false) {
1486 IngestExternalFileOptions ifo;
1487 ifo.move_files = move_file;
1488 ifo.snapshot_consistency = !skip_snapshot_check;
1489 ifo.allow_global_seqno = false;
1490 ifo.allow_blocking_flush = false;
1491 return IngestExternalFile(column_family, {file_path}, ifo);
1492 }
1493
1494 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1495 const std::string& file_path, bool move_file = false,
1496 bool skip_snapshot_check = false) {
1497 IngestExternalFileOptions ifo;
1498 ifo.move_files = move_file;
1499 ifo.snapshot_consistency = !skip_snapshot_check;
1500 ifo.allow_global_seqno = false;
1501 ifo.allow_blocking_flush = false;
1502 return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo);
1503 }
1504
1505 // Load table file with information "file_info" into "column_family"
1506 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1507 ColumnFamilyHandle* column_family,
1508 const std::vector<ExternalSstFileInfo>& file_info_list,
1509 bool move_file = false, bool skip_snapshot_check = false) {
1510 std::vector<std::string> external_files;
1511 for (const ExternalSstFileInfo& file_info : file_info_list) {
1512 external_files.push_back(file_info.file_path);
1513 }
1514 IngestExternalFileOptions ifo;
1515 ifo.move_files = move_file;
1516 ifo.snapshot_consistency = !skip_snapshot_check;
1517 ifo.allow_global_seqno = false;
1518 ifo.allow_blocking_flush = false;
1519 return IngestExternalFile(column_family, external_files, ifo);
1520 }
1521
1522 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1523 const std::vector<ExternalSstFileInfo>& file_info_list,
1524 bool move_file = false, bool skip_snapshot_check = false) {
1525 std::vector<std::string> external_files;
1526 for (const ExternalSstFileInfo& file_info : file_info_list) {
1527 external_files.push_back(file_info.file_path);
1528 }
1529 IngestExternalFileOptions ifo;
1530 ifo.move_files = move_file;
1531 ifo.snapshot_consistency = !skip_snapshot_check;
1532 ifo.allow_global_seqno = false;
1533 ifo.allow_blocking_flush = false;
1534 return IngestExternalFile(DefaultColumnFamily(), external_files, ifo);
1535 }
1536
1537 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1538 ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info,
1539 bool move_file = false, bool skip_snapshot_check = false) {
1540 IngestExternalFileOptions ifo;
1541 ifo.move_files = move_file;
1542 ifo.snapshot_consistency = !skip_snapshot_check;
1543 ifo.allow_global_seqno = false;
1544 ifo.allow_blocking_flush = false;
1545 return IngestExternalFile(column_family, {file_info->file_path}, ifo);
1546 }
1547
1548 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1549 const ExternalSstFileInfo* file_info, bool move_file = false,
1550 bool skip_snapshot_check = false) {
1551 IngestExternalFileOptions ifo;
1552 ifo.move_files = move_file;
1553 ifo.snapshot_consistency = !skip_snapshot_check;
1554 ifo.allow_global_seqno = false;
1555 ifo.allow_blocking_flush = false;
1556 return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path},
1557 ifo);
1558 }
1559
1560#endif // ROCKSDB_LITE
1561
f67539c2
TL
1562 // Returns the unique ID which is read from IDENTITY file during the opening
1563 // of database by setting in the identity variable
1564 // Returns Status::OK if identity could be set properly
7c673cae
FG
1565 virtual Status GetDbIdentity(std::string& identity) const = 0;
1566
20effc67
TL
1567 // Return a unique identifier for each DB object that is opened
1568 // This DB session ID should be unique among all open DB instances on all
1569 // hosts, and should be unique among re-openings of the same or other DBs.
1570 // (Two open DBs have the same identity from other function GetDbIdentity when
1571 // one is physically copied from the other.)
1572 virtual Status GetDbSessionId(std::string& session_id) const = 0;
1573
7c673cae
FG
1574 // Returns default column family handle
1575 virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
1576
1577#ifndef ROCKSDB_LITE
1578 virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
1579 TablePropertiesCollection* props) = 0;
1580 virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
1581 return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
1582 }
1583 virtual Status GetPropertiesOfTablesInRange(
1584 ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
1585 TablePropertiesCollection* props) = 0;
11fdf7f2
TL
1586
1587 virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
1588 const Slice* /*begin*/,
1589 const Slice* /*end*/) {
1590 return Status::NotSupported("SuggestCompactRange() is not implemented.");
1591 }
1592
1593 virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/,
1594 int /*target_level*/) {
1595 return Status::NotSupported("PromoteL0() is not implemented.");
1596 }
1597
1598 // Trace DB operations. Use EndTrace() to stop tracing.
1599 virtual Status StartTrace(const TraceOptions& /*options*/,
1600 std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1601 return Status::NotSupported("StartTrace() is not implemented.");
1602 }
1603
1604 virtual Status EndTrace() {
1605 return Status::NotSupported("EndTrace() is not implemented.");
1606 }
f67539c2 1607
20effc67
TL
1608 // IO Tracing operations. Use EndIOTrace() to stop tracing.
1609 virtual Status StartIOTrace(Env* /*env*/, const TraceOptions& /*options*/,
1610 std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1611 return Status::NotSupported("StartIOTrace() is not implemented.");
1612 }
1613
1614 virtual Status EndIOTrace() {
1615 return Status::NotSupported("EndIOTrace() is not implemented.");
1616 }
1617
f67539c2
TL
1618 // Trace block cache accesses. Use EndBlockCacheTrace() to stop tracing.
1619 virtual Status StartBlockCacheTrace(
1620 const TraceOptions& /*options*/,
1621 std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1622 return Status::NotSupported("StartBlockCacheTrace() is not implemented.");
1623 }
1624
1625 virtual Status EndBlockCacheTrace() {
1626 return Status::NotSupported("EndBlockCacheTrace() is not implemented.");
1627 }
7c673cae
FG
1628#endif // ROCKSDB_LITE
1629
1630 // Needed for StackableDB
1631 virtual DB* GetRootDB() { return this; }
1632
f67539c2
TL
1633 // Given a window [start_time, end_time), setup a StatsHistoryIterator
1634 // to access stats history. Note the start_time and end_time are epoch
1635 // time measured in seconds, and end_time is an exclusive bound.
494da23a
TL
1636 virtual Status GetStatsHistory(
1637 uint64_t /*start_time*/, uint64_t /*end_time*/,
1638 std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) {
1639 return Status::NotSupported("GetStatsHistory() is not implemented.");
1640 }
1641
1642#ifndef ROCKSDB_LITE
1643 // Make the secondary instance catch up with the primary by tailing and
1644 // replaying the MANIFEST and WAL of the primary.
1645 // Column families created by the primary after the secondary instance starts
1646 // will be ignored unless the secondary instance closes and restarts with the
1647 // newly created column families.
1648 // Column families that exist before secondary instance starts and dropped by
1649 // the primary afterwards will be marked as dropped. However, as long as the
1650 // secondary instance does not delete the corresponding column family
1651 // handles, the data of the column family is still accessible to the
1652 // secondary.
1653 // TODO: we will support WAL tailing soon.
1654 virtual Status TryCatchUpWithPrimary() {
1655 return Status::NotSupported("Supported only by secondary instance");
1656 }
1657#endif // !ROCKSDB_LITE
7c673cae
FG
1658};
1659
1660// Destroy the contents of the specified database.
1661// Be very careful using this method.
11fdf7f2
TL
1662Status DestroyDB(const std::string& name, const Options& options,
1663 const std::vector<ColumnFamilyDescriptor>& column_families =
494da23a 1664 std::vector<ColumnFamilyDescriptor>());
7c673cae
FG
1665
1666#ifndef ROCKSDB_LITE
1667// If a DB cannot be opened, you may attempt to call this method to
1668// resurrect as much of the contents of the database as possible.
1669// Some data may be lost, so be careful when calling this function
1670// on a database that contains important information.
1671//
1672// With this API, we will warn and skip data associated with column families not
1673// specified in column_families.
1674//
1675// @param column_families Descriptors for known column families
1676Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1677 const std::vector<ColumnFamilyDescriptor>& column_families);
1678
1679// @param unknown_cf_opts Options for column families encountered during the
1680// repair that were not specified in column_families.
1681Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1682 const std::vector<ColumnFamilyDescriptor>& column_families,
1683 const ColumnFamilyOptions& unknown_cf_opts);
1684
1685// @param options These options will be used for the database and for ALL column
1686// families encountered during the repair
1687Status RepairDB(const std::string& dbname, const Options& options);
1688
1689#endif
1690
f67539c2 1691} // namespace ROCKSDB_NAMESPACE