// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree. An additional grant
-// of patent rights can be found in the PATENTS file in the same directory.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
-#ifndef STORAGE_ROCKSDB_INCLUDE_DB_H_
-#define STORAGE_ROCKSDB_INCLUDE_DB_H_
+#pragma once
#include <stdint.h>
#include <stdio.h>
class WriteBatch;
class Env;
class EventListener;
+class TraceWriter;
using std::unique_ptr;
// A range of keys
struct Range {
- Slice start; // Included in the range
- Slice limit; // Not included in the range
+ Slice start;
+ Slice limit;
Range() { }
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
};
+struct RangePtr {
+ const Slice* start;
+ const Slice* limit;
+
+ RangePtr() : start(nullptr), limit(nullptr) { }
+ RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) { }
+};
+
// A collections of table properties objects, where
// key: is the table's file name.
// value: the table properties object of the given table.
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
+ virtual Status Resume() { return Status::NotSupported(); }
+
+ // Close the DB by releasing resources, closing files etc. This should be
+ // called before calling the destructor so that the caller can get back a
+ // status in case there are any errors. This will not fsync the WAL files.
+ // If syncing is required, the caller must first call SyncWAL(), or Write()
+ // using an empty write batch with WriteOptions.sync=true.
+ // Regardless of the return status, the DB must be freed. If the return
+ // status is NotSupported(), then the DB implementation does cleanup in the
+ // destructor
+ virtual Status Close() { return Status::NotSupported(); }
+
// ListColumnFamilies will open the DB specified by argument name
// and return the list of all column families in that DB
// through column_families argument. The ordering of
const std::string& column_family_name,
ColumnFamilyHandle** handle);
+ // Bulk create column families with the same column family options.
+ // Return the handles of the column families through the argument handles.
+ // In case of error, the request may succeed partially, and handles will
+ // contain column family handles that it managed to create, and have size
+ // equal to the number of created column families.
+ virtual Status CreateColumnFamilies(
+ const ColumnFamilyOptions& options,
+ const std::vector<std::string>& column_family_names,
+ std::vector<ColumnFamilyHandle*>* handles);
+
+ // Bulk create column families.
+ // Return the handles of the column families through the argument handles.
+ // In case of error, the request may succeed partially, and handles will
+ // contain column family handles that it managed to create, and have size
+ // equal to the number of created column families.
+ virtual Status CreateColumnFamilies(
+ const std::vector<ColumnFamilyDescriptor>& column_families,
+ std::vector<ColumnFamilyHandle*>* handles);
+
// Drop a column family specified by column_family handle. This call
// only records a drop record in the manifest and prevents the column
// family from flushing and compacting.
virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
+
+ // Bulk drop column families. This call only records drop records in the
+ // manifest and prevents the column families from flushing and compacting.
+ // In case of error, the request may succeed partially. User may call
+ // ListColumnFamilies to check the result.
+ virtual Status DropColumnFamilies(
+ const std::vector<ColumnFamilyHandle*>& column_families);
+
// Close a column family specified by column_family handle and destroy
// the column family handle specified to avoid double deletion. This call
// deletes the column family handle by default. Use this method to
// It could also be used to return the stats in the format of the map.
// In this case there will a pair of string to array of double for
// each level as well as for "Sum". "Int" stats will not be affected
- // when this form of stats are retrived.
+ // when this form of stats are retrieved.
static const std::string kCFStatsNoFileHistogram;
// "rocksdb.cf-file-histogram" - print out how many file reads to every
// by iterators or unfinished compactions.
static const std::string kNumLiveVersions;
- // "rocksdb.current-super-version-number" - returns number of curent LSM
+ // "rocksdb.current-super-version-number" - returns number of current LSM
// version. It is a uint64_t integer number, incremented after there is
// any change to the LSM tree. The number is not preserved after restarting
// the DB. After DB restart, it will start from 0 again.
// live data in bytes.
static const std::string kEstimateLiveDataSize;
- // "rocksdb.min-log-number-to-keep" - return the minmum log number of the
+ // "rocksdb.min-log-number-to-keep" - return the minimum log number of the
// log files that should be kept.
static const std::string kMinLogNumberToKeep;
// WARNING: may slow down online queries if there are too many files.
static const std::string kTotalSstFilesSize;
+ // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST
+ // files belong to the latest LSM tree.
+ static const std::string kLiveSstFilesSize;
+
// "rocksdb.base-level" - returns number of level to which L0 data will be
// compacted.
static const std::string kBaseLevel;
// "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
static const std::string kIsWriteStopped;
+
+ // "rocksdb.estimate-oldest-key-time" - returns an estimation of
+ // oldest key timestamp in the DB. Currently only available for
+ // FIFO compaction with
+ // compaction_options_fifo.allow_compaction = false.
+ static const std::string kEstimateOldestKeyTime;
+
+ // "rocksdb.block-cache-capacity" - returns block cache capacity.
+ static const std::string kBlockCacheCapacity;
+
+ // "rocksdb.block-cache-usage" - returns the memory size for the entries
+ // residing in block cache.
+ static const std::string kBlockCacheUsage;
+
+ // "rocksdb.block-cache-pinned-usage" - returns the memory size for the
+ // entries being pinned.
+ static const std::string kBlockCachePinnedUsage;
+
+ // "rocksdb.options-statistics" - returns multi-line string
+ // of options.statistics
+ static const std::string kOptionsStatistics;
};
#endif /* ROCKSDB_LITE */
}
virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
const Slice& property,
- std::map<std::string, double>* value) = 0;
+ std::map<std::string, std::string>* value) = 0;
virtual bool GetMapProperty(const Slice& property,
- std::map<std::string, double>* value) {
+ std::map<std::string, std::string>* value) {
return GetMapProperty(DefaultColumnFamily(), property, value);
}
// "rocksdb.estimate-live-data-size"
// "rocksdb.min-log-number-to-keep"
// "rocksdb.total-sst-files-size"
+ // "rocksdb.live-sst-files-size"
// "rocksdb.base-level"
// "rocksdb.estimate-pending-compaction-bytes"
// "rocksdb.num-running-compactions"
// "rocksdb.num-running-flushes"
// "rocksdb.actual-delayed-write-rate"
// "rocksdb.is-write-stopped"
+ // "rocksdb.estimate-oldest-key-time"
+ // "rocksdb.block-cache-capacity"
+ // "rocksdb.block-cache-usage"
+ // "rocksdb.block-cache-pinned-usage"
virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
const Slice& property, uint64_t* value) = 0;
virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
const CompactionOptions& compact_options,
ColumnFamilyHandle* column_family,
const std::vector<std::string>& input_file_names,
- const int output_level, const int output_path_id = -1) = 0;
+ const int output_level, const int output_path_id = -1,
+ std::vector<std::string>* const output_file_names = nullptr) = 0;
virtual Status CompactFiles(
const CompactionOptions& compact_options,
const std::vector<std::string>& input_file_names,
- const int output_level, const int output_path_id = -1) {
+ const int output_level, const int output_path_id = -1,
+ std::vector<std::string>* const output_file_names = nullptr) {
return CompactFiles(compact_options, DefaultColumnFamily(),
- input_file_names, output_level, output_path_id);
+ input_file_names, output_level, output_path_id,
+ output_file_names);
}
// This function will wait until all currently running background processes
// finish. After it returns, no background process will be run until
- // UnblockBackgroundWork is called
+ // ContinueBackgroundWork is called
virtual Status PauseBackgroundWork() = 0;
virtual Status ContinueBackgroundWork() = 0;
return Flush(options, DefaultColumnFamily());
}
+ // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL
+ // afterwards.
+ virtual Status FlushWAL(bool /*sync*/) {
+ return Status::NotSupported("FlushWAL not implemented");
+ }
// Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
// same as Write() with sync=true: in the latter case the changes won't be
// visible until the sync is done.
// The sequence number of the most recent transaction.
virtual SequenceNumber GetLatestSequenceNumber() const = 0;
+ // Instructs DB to preserve deletes with sequence numbers >= passed seqnum.
+ // Has no effect if DBOptions.preserve_deletes is set to false.
+ // This function assumes that user calls this function with monotonically
+ // increasing seqnums (otherwise we can't guarantee that a particular delete
+ // hasn't been already processed); returns true if the value was successfully
+ // updated, false if user attempted to call if with seqnum <= current value.
+ virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0;
+
#ifndef ROCKSDB_LITE
// Prevent file deletions. Compactions will continue to occur,
// GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
// Retrieve the list of all files in the database. The files are
- // relative to the dbname and are not absolute paths. The valid size of the
- // manifest file is returned in manifest_file_size. The manifest file is an
- // ever growing file, but only the portion specified by manifest_file_size is
- // valid for this snapshot.
- // Setting flush_memtable to true does Flush before recording the live files.
- // Setting flush_memtable to false is useful when we don't want to wait for
- // flush which may have to wait for compaction to complete taking an
- // indeterminate time.
+ // relative to the dbname and are not absolute paths. Despite being relative
+ // paths, the file names begin with "/". The valid size of the manifest file
+ // is returned in manifest_file_size. The manifest file is an ever growing
+ // file, but only the portion specified by manifest_file_size is valid for
+ // this snapshot. Setting flush_memtable to true does Flush before recording
+ // the live files. Setting flush_memtable to false is useful when we don't
+ // want to wait for flush which may have to wait for compaction to complete
+ // taking an indeterminate time.
//
// In case you have multiple column families, even if flush_memtable is true,
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
// Retrieve the sorted list of all wal files with earliest file first
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
+ // Note: this API is not yet consistent with WritePrepared transactions.
// Sets iter to an iterator that is positioned at a write-batch containing
// seq_number. If the sequence number is non existent, it returns an iterator
// at the first available seq_no after the requested seq_no
std::vector<LiveFileMetaData>* /*metadata*/) {}
// Obtains the meta data of the specified column family of the DB.
- // Status::NotFound() will be returned if the current DB does not have
- // any column family match the specified name.
- //
- // If cf_name is not specified, then the metadata of the default
- // column family will be returned.
virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
ColumnFamilyMetaData* /*metadata*/) {}
}
// IngestExternalFile() will load a list of external SST files (1) into the DB
- // We will try to find the lowest possible level that the file can fit in, and
- // ingest the file into this level (2). A file that have a key range that
- // overlap with the memtable key range will require us to Flush the memtable
- // first before ingesting the file.
+ // Two primary modes are supported:
+ // - Duplicate keys in the new files will overwrite exiting keys (default)
+ // - Duplicate keys will be skipped (set ingest_behind=true)
+ // In the first mode we will try to find the lowest possible level that
+ // the file can fit in, and ingest the file into this level (2). A file that
+ // have a key range that overlap with the memtable key range will require us
+ // to Flush the memtable first before ingesting the file.
+ // In the second mode we will always ingest in the bottom most level (see
+ // docs to IngestExternalFileOptions::ingest_behind).
//
// (1) External SST files can be created using SstFileWriter
// (2) We will try to ingest the files to the lowest possible level
- // even if the file compression dont match the level compression
+ // even if the file compression doesn't match the level compression
+ // (3) If IngestExternalFileOptions->ingest_behind is set to true,
+ // we always ingest at the bottommost level, which should be reserved
+ // for this purpose (see DBOPtions::allow_ingest_behind flag).
virtual Status IngestExternalFile(
ColumnFamilyHandle* column_family,
const std::vector<std::string>& external_files,
return IngestExternalFile(DefaultColumnFamily(), external_files, options);
}
+ virtual Status VerifyChecksum() = 0;
+
// AddFile() is deprecated, please use IngestExternalFile()
ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
ColumnFamilyHandle* column_family,
virtual Status GetPropertiesOfTablesInRange(
ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
TablePropertiesCollection* props) = 0;
+
+ virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
+ const Slice* /*begin*/,
+ const Slice* /*end*/) {
+ return Status::NotSupported("SuggestCompactRange() is not implemented.");
+ }
+
+ virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/,
+ int /*target_level*/) {
+ return Status::NotSupported("PromoteL0() is not implemented.");
+ }
+
+ // Trace DB operations. Use EndTrace() to stop tracing.
+ virtual Status StartTrace(const TraceOptions& /*options*/,
+ std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
+ return Status::NotSupported("StartTrace() is not implemented.");
+ }
+
+ virtual Status EndTrace() {
+ return Status::NotSupported("EndTrace() is not implemented.");
+ }
#endif // ROCKSDB_LITE
// Needed for StackableDB
// Destroy the contents of the specified database.
// Be very careful using this method.
-Status DestroyDB(const std::string& name, const Options& options);
+Status DestroyDB(const std::string& name, const Options& options,
+ const std::vector<ColumnFamilyDescriptor>& column_families =
+ std::vector<ColumnFamilyDescriptor>());
#ifndef ROCKSDB_LITE
// If a DB cannot be opened, you may attempt to call this method to
#endif
} // namespace rocksdb
-
-#endif // STORAGE_ROCKSDB_INCLUDE_DB_H_