]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/db.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / include / rocksdb / db.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8
9 #ifndef STORAGE_ROCKSDB_INCLUDE_DB_H_
10 #define STORAGE_ROCKSDB_INCLUDE_DB_H_
11
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <map>
15 #include <memory>
16 #include <string>
17 #include <unordered_map>
18 #include <vector>
19 #include "rocksdb/iterator.h"
20 #include "rocksdb/listener.h"
21 #include "rocksdb/metadata.h"
22 #include "rocksdb/options.h"
23 #include "rocksdb/snapshot.h"
24 #include "rocksdb/sst_file_writer.h"
25 #include "rocksdb/thread_status.h"
26 #include "rocksdb/transaction_log.h"
27 #include "rocksdb/types.h"
28 #include "rocksdb/version.h"
29
30 #ifdef _WIN32
31 // Windows API macro interference
32 #undef DeleteFile
33 #endif
34
35 #if defined(__GNUC__) || defined(__clang__)
36 #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
37 #elif _WIN32
38 #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
39 #endif
40
41 namespace rocksdb {
42
43 struct Options;
44 struct DBOptions;
45 struct ColumnFamilyOptions;
46 struct ReadOptions;
47 struct WriteOptions;
48 struct FlushOptions;
49 struct CompactionOptions;
50 struct CompactRangeOptions;
51 struct TableProperties;
52 struct ExternalSstFileInfo;
53 class WriteBatch;
54 class Env;
55 class EventListener;
56
57 using std::unique_ptr;
58
59 extern const std::string kDefaultColumnFamilyName;
60 struct ColumnFamilyDescriptor {
61 std::string name;
62 ColumnFamilyOptions options;
63 ColumnFamilyDescriptor()
64 : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
65 ColumnFamilyDescriptor(const std::string& _name,
66 const ColumnFamilyOptions& _options)
67 : name(_name), options(_options) {}
68 };
69
70 class ColumnFamilyHandle {
71 public:
72 virtual ~ColumnFamilyHandle() {}
73 // Returns the name of the column family associated with the current handle.
74 virtual const std::string& GetName() const = 0;
75 // Returns the ID of the column family associated with the current handle.
76 virtual uint32_t GetID() const = 0;
77 // Fills "*desc" with the up-to-date descriptor of the column family
78 // associated with this handle. Since it fills "*desc" with the up-to-date
79 // information, this call might internally lock and release DB mutex to
80 // access the up-to-date CF options. In addition, all the pointer-typed
81 // options cannot be referenced any longer than the original options exist.
82 //
83 // Note that this function is not supported in RocksDBLite.
84 virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
85 // Returns the comparator of the column family associated with the
86 // current handle.
87 virtual const Comparator* GetComparator() const = 0;
88 };
89
90 static const int kMajorVersion = __ROCKSDB_MAJOR__;
91 static const int kMinorVersion = __ROCKSDB_MINOR__;
92
93 // A range of keys
94 struct Range {
95 Slice start; // Included in the range
96 Slice limit; // Not included in the range
97
98 Range() { }
99 Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
100 };
101
102 // A collections of table properties objects, where
103 // key: is the table's file name.
104 // value: the table properties object of the given table.
105 typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
106 TablePropertiesCollection;
107
108 // A DB is a persistent ordered map from keys to values.
109 // A DB is safe for concurrent access from multiple threads without
110 // any external synchronization.
111 class DB {
112 public:
113 // Open the database with the specified "name".
114 // Stores a pointer to a heap-allocated database in *dbptr and returns
115 // OK on success.
116 // Stores nullptr in *dbptr and returns a non-OK status on error.
117 // Caller should delete *dbptr when it is no longer needed.
118 static Status Open(const Options& options,
119 const std::string& name,
120 DB** dbptr);
121
122 // Open the database for read only. All DB interfaces
123 // that modify data, like put/delete, will return error.
124 // If the db is opened in read only mode, then no compactions
125 // will happen.
126 //
127 // Not supported in ROCKSDB_LITE, in which case the function will
128 // return Status::NotSupported.
129 static Status OpenForReadOnly(const Options& options,
130 const std::string& name, DB** dbptr,
131 bool error_if_log_file_exist = false);
132
133 // Open the database for read only with column families. When opening DB with
134 // read only, you can specify only a subset of column families in the
135 // database that should be opened. However, you always need to specify default
136 // column family. The default column family name is 'default' and it's stored
137 // in rocksdb::kDefaultColumnFamilyName
138 //
139 // Not supported in ROCKSDB_LITE, in which case the function will
140 // return Status::NotSupported.
141 static Status OpenForReadOnly(
142 const DBOptions& db_options, const std::string& name,
143 const std::vector<ColumnFamilyDescriptor>& column_families,
144 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
145 bool error_if_log_file_exist = false);
146
147 // Open DB with column families.
148 // db_options specify database specific options
149 // column_families is the vector of all column families in the database,
150 // containing column family name and options. You need to open ALL column
151 // families in the database. To get the list of column families, you can use
152 // ListColumnFamilies(). Also, you can open only a subset of column families
153 // for read-only access.
154 // The default column family name is 'default' and it's stored
155 // in rocksdb::kDefaultColumnFamilyName.
156 // If everything is OK, handles will on return be the same size
157 // as column_families --- handles[i] will be a handle that you
158 // will use to operate on column family column_family[i].
159 // Before delete DB, you have to close All column families by calling
160 // DestroyColumnFamilyHandle() with all the handles.
161 static Status Open(const DBOptions& db_options, const std::string& name,
162 const std::vector<ColumnFamilyDescriptor>& column_families,
163 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
164
165 // ListColumnFamilies will open the DB specified by argument name
166 // and return the list of all column families in that DB
167 // through column_families argument. The ordering of
168 // column families in column_families is unspecified.
169 static Status ListColumnFamilies(const DBOptions& db_options,
170 const std::string& name,
171 std::vector<std::string>* column_families);
172
173 DB() { }
174 virtual ~DB();
175
176 // Create a column_family and return the handle of column family
177 // through the argument handle.
178 virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
179 const std::string& column_family_name,
180 ColumnFamilyHandle** handle);
181
182 // Drop a column family specified by column_family handle. This call
183 // only records a drop record in the manifest and prevents the column
184 // family from flushing and compacting.
185 virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
186 // Close a column family specified by column_family handle and destroy
187 // the column family handle specified to avoid double deletion. This call
188 // deletes the column family handle by default. Use this method to
189 // close column family instead of deleting column family handle directly
190 virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
191
192 // Set the database entry for "key" to "value".
193 // If "key" already exists, it will be overwritten.
194 // Returns OK on success, and a non-OK status on error.
195 // Note: consider setting options.sync = true.
196 virtual Status Put(const WriteOptions& options,
197 ColumnFamilyHandle* column_family, const Slice& key,
198 const Slice& value) = 0;
199 virtual Status Put(const WriteOptions& options, const Slice& key,
200 const Slice& value) {
201 return Put(options, DefaultColumnFamily(), key, value);
202 }
203
204 // Remove the database entry (if any) for "key". Returns OK on
205 // success, and a non-OK status on error. It is not an error if "key"
206 // did not exist in the database.
207 // Note: consider setting options.sync = true.
208 virtual Status Delete(const WriteOptions& options,
209 ColumnFamilyHandle* column_family,
210 const Slice& key) = 0;
211 virtual Status Delete(const WriteOptions& options, const Slice& key) {
212 return Delete(options, DefaultColumnFamily(), key);
213 }
214
215 // Remove the database entry for "key". Requires that the key exists
216 // and was not overwritten. Returns OK on success, and a non-OK status
217 // on error. It is not an error if "key" did not exist in the database.
218 //
219 // If a key is overwritten (by calling Put() multiple times), then the result
220 // of calling SingleDelete() on this key is undefined. SingleDelete() only
221 // behaves correctly if there has been only one Put() for this key since the
222 // previous call to SingleDelete() for this key.
223 //
224 // This feature is currently an experimental performance optimization
225 // for a very specific workload. It is up to the caller to ensure that
226 // SingleDelete is only used for a key that is not deleted using Delete() or
227 // written using Merge(). Mixing SingleDelete operations with Deletes and
228 // Merges can result in undefined behavior.
229 //
230 // Note: consider setting options.sync = true.
231 virtual Status SingleDelete(const WriteOptions& options,
232 ColumnFamilyHandle* column_family,
233 const Slice& key) = 0;
234 virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
235 return SingleDelete(options, DefaultColumnFamily(), key);
236 }
237
238 // Removes the database entries in the range ["begin_key", "end_key"), i.e.,
239 // including "begin_key" and excluding "end_key". Returns OK on success, and
240 // a non-OK status on error. It is not an error if no keys exist in the range
241 // ["begin_key", "end_key").
242 //
243 // This feature is currently an experimental performance optimization for
244 // deleting very large ranges of contiguous keys. Invoking it many times or on
245 // small ranges may severely degrade read performance; in particular, the
246 // resulting performance can be worse than calling Delete() for each key in
247 // the range. Note also the degraded read performance affects keys outside the
248 // deleted ranges, and affects database operations involving scans, like flush
249 // and compaction.
250 //
251 // Consider setting ReadOptions::ignore_range_deletions = true to speed
252 // up reads for key(s) that are known to be unaffected by range deletions.
253 virtual Status DeleteRange(const WriteOptions& options,
254 ColumnFamilyHandle* column_family,
255 const Slice& begin_key, const Slice& end_key);
256
257 // Merge the database entry for "key" with "value". Returns OK on success,
258 // and a non-OK status on error. The semantics of this operation is
259 // determined by the user provided merge_operator when opening DB.
260 // Note: consider setting options.sync = true.
261 virtual Status Merge(const WriteOptions& options,
262 ColumnFamilyHandle* column_family, const Slice& key,
263 const Slice& value) = 0;
264 virtual Status Merge(const WriteOptions& options, const Slice& key,
265 const Slice& value) {
266 return Merge(options, DefaultColumnFamily(), key, value);
267 }
268
269 // Apply the specified updates to the database.
270 // If `updates` contains no update, WAL will still be synced if
271 // options.sync=true.
272 // Returns OK on success, non-OK on failure.
273 // Note: consider setting options.sync = true.
274 virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
275
276 // If the database contains an entry for "key" store the
277 // corresponding value in *value and return OK.
278 //
279 // If there is no entry for "key" leave *value unchanged and return
280 // a status for which Status::IsNotFound() returns true.
281 //
282 // May return some other Status on an error.
283 virtual inline Status Get(const ReadOptions& options,
284 ColumnFamilyHandle* column_family, const Slice& key,
285 std::string* value) {
286 assert(value != nullptr);
287 PinnableSlice pinnable_val(value);
288 assert(!pinnable_val.IsPinned());
289 auto s = Get(options, column_family, key, &pinnable_val);
290 if (s.ok() && pinnable_val.IsPinned()) {
291 value->assign(pinnable_val.data(), pinnable_val.size());
292 } // else value is already assigned
293 return s;
294 }
295 virtual Status Get(const ReadOptions& options,
296 ColumnFamilyHandle* column_family, const Slice& key,
297 PinnableSlice* value) = 0;
298 virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) {
299 return Get(options, DefaultColumnFamily(), key, value);
300 }
301
302 // If keys[i] does not exist in the database, then the i'th returned
303 // status will be one for which Status::IsNotFound() is true, and
304 // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
305 // the i'th returned status will have Status::ok() true, and (*values)[i]
306 // will store the value associated with keys[i].
307 //
308 // (*values) will always be resized to be the same size as (keys).
309 // Similarly, the number of returned statuses will be the number of keys.
310 // Note: keys will not be "de-duplicated". Duplicate keys will return
311 // duplicate values in order.
312 virtual std::vector<Status> MultiGet(
313 const ReadOptions& options,
314 const std::vector<ColumnFamilyHandle*>& column_family,
315 const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
316 virtual std::vector<Status> MultiGet(const ReadOptions& options,
317 const std::vector<Slice>& keys,
318 std::vector<std::string>* values) {
319 return MultiGet(options, std::vector<ColumnFamilyHandle*>(
320 keys.size(), DefaultColumnFamily()),
321 keys, values);
322 }
323
324 // If the key definitely does not exist in the database, then this method
325 // returns false, else true. If the caller wants to obtain value when the key
326 // is found in memory, a bool for 'value_found' must be passed. 'value_found'
327 // will be true on return if value has been set properly.
328 // This check is potentially lighter-weight than invoking DB::Get(). One way
329 // to make this lighter weight is to avoid doing any IOs.
330 // Default implementation here returns true and sets 'value_found' to false
331 virtual bool KeyMayExist(const ReadOptions& /*options*/,
332 ColumnFamilyHandle* /*column_family*/,
333 const Slice& /*key*/, std::string* /*value*/,
334 bool* value_found = nullptr) {
335 if (value_found != nullptr) {
336 *value_found = false;
337 }
338 return true;
339 }
340 virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
341 std::string* value, bool* value_found = nullptr) {
342 return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
343 }
344
345 // Return a heap-allocated iterator over the contents of the database.
346 // The result of NewIterator() is initially invalid (caller must
347 // call one of the Seek methods on the iterator before using it).
348 //
349 // Caller should delete the iterator when it is no longer needed.
350 // The returned iterator should be deleted before this db is deleted.
351 virtual Iterator* NewIterator(const ReadOptions& options,
352 ColumnFamilyHandle* column_family) = 0;
353 virtual Iterator* NewIterator(const ReadOptions& options) {
354 return NewIterator(options, DefaultColumnFamily());
355 }
356 // Returns iterators from a consistent database state across multiple
357 // column families. Iterators are heap allocated and need to be deleted
358 // before the db is deleted
359 virtual Status NewIterators(
360 const ReadOptions& options,
361 const std::vector<ColumnFamilyHandle*>& column_families,
362 std::vector<Iterator*>* iterators) = 0;
363
364 // Return a handle to the current DB state. Iterators created with
365 // this handle will all observe a stable snapshot of the current DB
366 // state. The caller must call ReleaseSnapshot(result) when the
367 // snapshot is no longer needed.
368 //
369 // nullptr will be returned if the DB fails to take a snapshot or does
370 // not support snapshot.
371 virtual const Snapshot* GetSnapshot() = 0;
372
373 // Release a previously acquired snapshot. The caller must not
374 // use "snapshot" after this call.
375 virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
376
377 #ifndef ROCKSDB_LITE
378 // Contains all valid property arguments for GetProperty().
379 //
380 // NOTE: Property names cannot end in numbers since those are interpreted as
381 // arguments, e.g., see kNumFilesAtLevelPrefix.
382 struct Properties {
383 // "rocksdb.num-files-at-level<N>" - returns string containing the number
384 // of files at level <N>, where <N> is an ASCII representation of a
385 // level number (e.g., "0").
386 static const std::string kNumFilesAtLevelPrefix;
387
388 // "rocksdb.compression-ratio-at-level<N>" - returns string containing the
389 // compression ratio of data at level <N>, where <N> is an ASCII
390 // representation of a level number (e.g., "0"). Here, compression
391 // ratio is defined as uncompressed data size / compressed file size.
392 // Returns "-1.0" if no open files at level <N>.
393 static const std::string kCompressionRatioAtLevelPrefix;
394
395 // "rocksdb.stats" - returns a multi-line string containing the data
396 // described by kCFStats followed by the data described by kDBStats.
397 static const std::string kStats;
398
399 // "rocksdb.sstables" - returns a multi-line string summarizing current
400 // SST files.
401 static const std::string kSSTables;
402
403 // "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and
404 // "rocksdb.cf-file-histogram" together. See below for description
405 // of the two.
406 static const std::string kCFStats;
407
408 // "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with
409 // general columm family stats per-level over db's lifetime ("L<n>"),
410 // aggregated over db's lifetime ("Sum"), and aggregated over the
411 // interval since the last retrieval ("Int").
412 // It could also be used to return the stats in the format of the map.
413 // In this case there will a pair of string to array of double for
414 // each level as well as for "Sum". "Int" stats will not be affected
415 // when this form of stats are retrived.
416 static const std::string kCFStatsNoFileHistogram;
417
418 // "rocksdb.cf-file-histogram" - print out how many file reads to every
419 // level, as well as the histogram of latency of single requests.
420 static const std::string kCFFileHistogram;
421
422 // "rocksdb.dbstats" - returns a multi-line string with general database
423 // stats, both cumulative (over the db's lifetime) and interval (since
424 // the last retrieval of kDBStats).
425 static const std::string kDBStats;
426
427 // "rocksdb.levelstats" - returns multi-line string containing the number
428 // of files per level and total size of each level (MB).
429 static const std::string kLevelStats;
430
431 // "rocksdb.num-immutable-mem-table" - returns number of immutable
432 // memtables that have not yet been flushed.
433 static const std::string kNumImmutableMemTable;
434
435 // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
436 // memtables that have already been flushed.
437 static const std::string kNumImmutableMemTableFlushed;
438
439 // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
440 // pending; otherwise, returns 0.
441 static const std::string kMemTableFlushPending;
442
443 // "rocksdb.num-running-flushes" - returns the number of currently running
444 // flushes.
445 static const std::string kNumRunningFlushes;
446
447 // "rocksdb.compaction-pending" - returns 1 if at least one compaction is
448 // pending; otherwise, returns 0.
449 static const std::string kCompactionPending;
450
451 // "rocksdb.num-running-compactions" - returns the number of currently
452 // running compactions.
453 static const std::string kNumRunningCompactions;
454
455 // "rocksdb.background-errors" - returns accumulated number of background
456 // errors.
457 static const std::string kBackgroundErrors;
458
459 // "rocksdb.cur-size-active-mem-table" - returns approximate size of active
460 // memtable (bytes).
461 static const std::string kCurSizeActiveMemTable;
462
463 // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
464 // and unflushed immutable memtables (bytes).
465 static const std::string kCurSizeAllMemTables;
466
467 // "rocksdb.size-all-mem-tables" - returns approximate size of active,
468 // unflushed immutable, and pinned immutable memtables (bytes).
469 static const std::string kSizeAllMemTables;
470
471 // "rocksdb.num-entries-active-mem-table" - returns total number of entries
472 // in the active memtable.
473 static const std::string kNumEntriesActiveMemTable;
474
475 // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
476 // in the unflushed immutable memtables.
477 static const std::string kNumEntriesImmMemTables;
478
479 // "rocksdb.num-deletes-active-mem-table" - returns total number of delete
480 // entries in the active memtable.
481 static const std::string kNumDeletesActiveMemTable;
482
483 // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
484 // entries in the unflushed immutable memtables.
485 static const std::string kNumDeletesImmMemTables;
486
487 // "rocksdb.estimate-num-keys" - returns estimated number of total keys in
488 // the active and unflushed immutable memtables and storage.
489 static const std::string kEstimateNumKeys;
490
491 // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
492 // reading SST tables, excluding memory used in block cache (e.g.,
493 // filter and index blocks).
494 static const std::string kEstimateTableReadersMem;
495
496 // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
497 // files is enabled; otherwise, returns a non-zero number.
498 static const std::string kIsFileDeletionsEnabled;
499
500 // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
501 // database.
502 static const std::string kNumSnapshots;
503
504 // "rocksdb.oldest-snapshot-time" - returns number representing unix
505 // timestamp of oldest unreleased snapshot.
506 static const std::string kOldestSnapshotTime;
507
508 // "rocksdb.num-live-versions" - returns number of live versions. `Version`
509 // is an internal data structure. See version_set.h for details. More
510 // live versions often mean more SST files are held from being deleted,
511 // by iterators or unfinished compactions.
512 static const std::string kNumLiveVersions;
513
514 // "rocksdb.current-super-version-number" - returns number of curent LSM
515 // version. It is a uint64_t integer number, incremented after there is
516 // any change to the LSM tree. The number is not preserved after restarting
517 // the DB. After DB restart, it will start from 0 again.
518 static const std::string kCurrentSuperVersionNumber;
519
520 // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
521 // live data in bytes.
522 static const std::string kEstimateLiveDataSize;
523
524 // "rocksdb.min-log-number-to-keep" - return the minmum log number of the
525 // log files that should be kept.
526 static const std::string kMinLogNumberToKeep;
527
528 // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
529 // files.
530 // WARNING: may slow down online queries if there are too many files.
531 static const std::string kTotalSstFilesSize;
532
533 // "rocksdb.base-level" - returns number of level to which L0 data will be
534 // compacted.
535 static const std::string kBaseLevel;
536
537 // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
538 // number of bytes compaction needs to rewrite to get all levels down
539 // to under target size. Not valid for other compactions than level-
540 // based.
541 static const std::string kEstimatePendingCompactionBytes;
542
543 // "rocksdb.aggregated-table-properties" - returns a string representation
544 // of the aggregated table properties of the target column family.
545 static const std::string kAggregatedTableProperties;
546
547 // "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
548 // one but only returns the aggregated table properties of the
549 // specified level "N" at the target column family.
550 static const std::string kAggregatedTablePropertiesAtLevel;
551
552 // "rocksdb.actual-delayed-write-rate" - returns the current actual delayed
553 // write rate. 0 means no delay.
554 static const std::string kActualDelayedWriteRate;
555
556 // "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
557 static const std::string kIsWriteStopped;
558 };
559 #endif /* ROCKSDB_LITE */
560
561 // DB implementations can export properties about their state via this method.
562 // If "property" is a valid property understood by this DB implementation (see
563 // Properties struct above for valid options), fills "*value" with its current
564 // value and returns true. Otherwise, returns false.
565 virtual bool GetProperty(ColumnFamilyHandle* column_family,
566 const Slice& property, std::string* value) = 0;
567 virtual bool GetProperty(const Slice& property, std::string* value) {
568 return GetProperty(DefaultColumnFamily(), property, value);
569 }
570 virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
571 const Slice& property,
572 std::map<std::string, double>* value) = 0;
573 virtual bool GetMapProperty(const Slice& property,
574 std::map<std::string, double>* value) {
575 return GetMapProperty(DefaultColumnFamily(), property, value);
576 }
577
578 // Similar to GetProperty(), but only works for a subset of properties whose
579 // return value is an integer. Return the value by integer. Supported
580 // properties:
581 // "rocksdb.num-immutable-mem-table"
582 // "rocksdb.mem-table-flush-pending"
583 // "rocksdb.compaction-pending"
584 // "rocksdb.background-errors"
585 // "rocksdb.cur-size-active-mem-table"
586 // "rocksdb.cur-size-all-mem-tables"
587 // "rocksdb.size-all-mem-tables"
588 // "rocksdb.num-entries-active-mem-table"
589 // "rocksdb.num-entries-imm-mem-tables"
590 // "rocksdb.num-deletes-active-mem-table"
591 // "rocksdb.num-deletes-imm-mem-tables"
592 // "rocksdb.estimate-num-keys"
593 // "rocksdb.estimate-table-readers-mem"
594 // "rocksdb.is-file-deletions-enabled"
595 // "rocksdb.num-snapshots"
596 // "rocksdb.oldest-snapshot-time"
597 // "rocksdb.num-live-versions"
598 // "rocksdb.current-super-version-number"
599 // "rocksdb.estimate-live-data-size"
600 // "rocksdb.min-log-number-to-keep"
601 // "rocksdb.total-sst-files-size"
602 // "rocksdb.base-level"
603 // "rocksdb.estimate-pending-compaction-bytes"
604 // "rocksdb.num-running-compactions"
605 // "rocksdb.num-running-flushes"
606 // "rocksdb.actual-delayed-write-rate"
607 // "rocksdb.is-write-stopped"
608 virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
609 const Slice& property, uint64_t* value) = 0;
610 virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
611 return GetIntProperty(DefaultColumnFamily(), property, value);
612 }
613
614 // Reset internal stats for DB and all column families.
615 // Note this doesn't reset options.statistics as it is not owned by
616 // DB.
617 virtual Status ResetStats() {
618 return Status::NotSupported("Not implemented");
619 }
620
621 // Same as GetIntProperty(), but this one returns the aggregated int
622 // property from all column families.
623 virtual bool GetAggregatedIntProperty(const Slice& property,
624 uint64_t* value) = 0;
625
626 // Flags for DB::GetSizeApproximation that specify whether memtable
627 // stats should be included, or file stats approximation or both
628 enum SizeApproximationFlags : uint8_t {
629 NONE = 0,
630 INCLUDE_MEMTABLES = 1,
631 INCLUDE_FILES = 1 << 1
632 };
633
634 // For each i in [0,n-1], store in "sizes[i]", the approximate
635 // file system space used by keys in "[range[i].start .. range[i].limit)".
636 //
637 // Note that the returned sizes measure file system space usage, so
638 // if the user data compresses by a factor of ten, the returned
639 // sizes will be one-tenth the size of the corresponding user data size.
640 //
641 // If include_flags defines whether the returned size should include
642 // the recently written data in the mem-tables (if
643 // the mem-table type supports it), data serialized to disk, or both.
644 // include_flags should be of type DB::SizeApproximationFlags
645 virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
646 const Range* range, int n, uint64_t* sizes,
647 uint8_t include_flags
648 = INCLUDE_FILES) = 0;
649 virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
650 uint8_t include_flags
651 = INCLUDE_FILES) {
652 GetApproximateSizes(DefaultColumnFamily(), range, n, sizes,
653 include_flags);
654 }
655
656 // The method is similar to GetApproximateSizes, except it
657 // returns approximate number of records in memtables.
658 virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
659 const Range& range,
660 uint64_t* const count,
661 uint64_t* const size) = 0;
662 virtual void GetApproximateMemTableStats(const Range& range,
663 uint64_t* const count,
664 uint64_t* const size) {
665 GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size);
666 }
667
668 // Deprecated versions of GetApproximateSizes
669 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
670 const Range* range, int n, uint64_t* sizes,
671 bool include_memtable) {
672 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
673 if (include_memtable) {
674 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
675 }
676 GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
677 }
678 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
679 ColumnFamilyHandle* column_family,
680 const Range* range, int n, uint64_t* sizes,
681 bool include_memtable) {
682 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
683 if (include_memtable) {
684 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
685 }
686 GetApproximateSizes(column_family, range, n, sizes, include_flags);
687 }
688
689 // Compact the underlying storage for the key range [*begin,*end].
690 // The actual compaction interval might be superset of [*begin, *end].
691 // In particular, deleted and overwritten versions are discarded,
692 // and the data is rearranged to reduce the cost of operations
693 // needed to access the data. This operation should typically only
694 // be invoked by users who understand the underlying implementation.
695 //
696 // begin==nullptr is treated as a key before all keys in the database.
697 // end==nullptr is treated as a key after all keys in the database.
698 // Therefore the following call will compact the entire database:
699 // db->CompactRange(options, nullptr, nullptr);
700 // Note that after the entire database is compacted, all data are pushed
701 // down to the last level containing any data. If the total data size after
702 // compaction is reduced, that level might not be appropriate for hosting all
703 // the files. In this case, client could set options.change_level to true, to
704 // move the files back to the minimum level capable of holding the data set
705 // or a given level (specified by non-negative options.target_level).
706 virtual Status CompactRange(const CompactRangeOptions& options,
707 ColumnFamilyHandle* column_family,
708 const Slice* begin, const Slice* end) = 0;
709 virtual Status CompactRange(const CompactRangeOptions& options,
710 const Slice* begin, const Slice* end) {
711 return CompactRange(options, DefaultColumnFamily(), begin, end);
712 }
713
714 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
715 ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end,
716 bool change_level = false, int target_level = -1,
717 uint32_t target_path_id = 0) {
718 CompactRangeOptions options;
719 options.change_level = change_level;
720 options.target_level = target_level;
721 options.target_path_id = target_path_id;
722 return CompactRange(options, column_family, begin, end);
723 }
724
725 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
726 const Slice* begin, const Slice* end, bool change_level = false,
727 int target_level = -1, uint32_t target_path_id = 0) {
728 CompactRangeOptions options;
729 options.change_level = change_level;
730 options.target_level = target_level;
731 options.target_path_id = target_path_id;
732 return CompactRange(options, DefaultColumnFamily(), begin, end);
733 }
734
735 virtual Status SetOptions(
736 ColumnFamilyHandle* /*column_family*/,
737 const std::unordered_map<std::string, std::string>& /*new_options*/) {
738 return Status::NotSupported("Not implemented");
739 }
740 virtual Status SetOptions(
741 const std::unordered_map<std::string, std::string>& new_options) {
742 return SetOptions(DefaultColumnFamily(), new_options);
743 }
744
745 virtual Status SetDBOptions(
746 const std::unordered_map<std::string, std::string>& new_options) = 0;
747
748 // CompactFiles() inputs a list of files specified by file numbers and
749 // compacts them to the specified level. Note that the behavior is different
750 // from CompactRange() in that CompactFiles() performs the compaction job
751 // using the CURRENT thread.
752 //
753 // @see GetDataBaseMetaData
754 // @see GetColumnFamilyMetaData
755 virtual Status CompactFiles(
756 const CompactionOptions& compact_options,
757 ColumnFamilyHandle* column_family,
758 const std::vector<std::string>& input_file_names,
759 const int output_level, const int output_path_id = -1) = 0;
760
761 virtual Status CompactFiles(
762 const CompactionOptions& compact_options,
763 const std::vector<std::string>& input_file_names,
764 const int output_level, const int output_path_id = -1) {
765 return CompactFiles(compact_options, DefaultColumnFamily(),
766 input_file_names, output_level, output_path_id);
767 }
768
769 // This function will wait until all currently running background processes
770 // finish. After it returns, no background process will be run until
771 // UnblockBackgroundWork is called
772 virtual Status PauseBackgroundWork() = 0;
773 virtual Status ContinueBackgroundWork() = 0;
774
775 // This function will enable automatic compactions for the given column
776 // families if they were previously disabled. The function will first set the
777 // disable_auto_compactions option for each column family to 'false', after
778 // which it will schedule a flush/compaction.
779 //
780 // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
781 // does NOT schedule a flush/compaction afterwards, and only changes the
782 // parameter itself within the column family option.
783 //
784 virtual Status EnableAutoCompaction(
785 const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;
786
787 // Number of levels used for this DB.
788 virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
789 virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
790
791 // Maximum level to which a new compacted memtable is pushed if it
792 // does not create overlap.
793 virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
794 virtual int MaxMemCompactionLevel() {
795 return MaxMemCompactionLevel(DefaultColumnFamily());
796 }
797
798 // Number of files in level-0 that would stop writes.
799 virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
800 virtual int Level0StopWriteTrigger() {
801 return Level0StopWriteTrigger(DefaultColumnFamily());
802 }
803
804 // Get DB name -- the exact same name that was provided as an argument to
805 // DB::Open()
806 virtual const std::string& GetName() const = 0;
807
808 // Get Env object from the DB
809 virtual Env* GetEnv() const = 0;
810
811 // Get DB Options that we use. During the process of opening the
812 // column family, the options provided when calling DB::Open() or
813 // DB::CreateColumnFamily() will have been "sanitized" and transformed
814 // in an implementation-defined manner.
815 virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
816 virtual Options GetOptions() const {
817 return GetOptions(DefaultColumnFamily());
818 }
819
820 virtual DBOptions GetDBOptions() const = 0;
821
822 // Flush all mem-table data.
823 virtual Status Flush(const FlushOptions& options,
824 ColumnFamilyHandle* column_family) = 0;
825 virtual Status Flush(const FlushOptions& options) {
826 return Flush(options, DefaultColumnFamily());
827 }
828
829 // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
830 // same as Write() with sync=true: in the latter case the changes won't be
831 // visible until the sync is done.
832 // Currently only works if allow_mmap_writes = false in Options.
833 virtual Status SyncWAL() = 0;
834
835 // The sequence number of the most recent transaction.
836 virtual SequenceNumber GetLatestSequenceNumber() const = 0;
837
838 #ifndef ROCKSDB_LITE
839
840 // Prevent file deletions. Compactions will continue to occur,
841 // but no obsolete files will be deleted. Calling this multiple
842 // times have the same effect as calling it once.
843 virtual Status DisableFileDeletions() = 0;
844
845 // Allow compactions to delete obsolete files.
846 // If force == true, the call to EnableFileDeletions() will guarantee that
847 // file deletions are enabled after the call, even if DisableFileDeletions()
848 // was called multiple times before.
849 // If force == false, EnableFileDeletions will only enable file deletion
850 // after it's been called at least as many times as DisableFileDeletions(),
851 // enabling the two methods to be called by two threads concurrently without
852 // synchronization -- i.e., file deletions will be enabled only after both
853 // threads call EnableFileDeletions()
854 virtual Status EnableFileDeletions(bool force = true) = 0;
855
856 // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
857
858 // Retrieve the list of all files in the database. The files are
859 // relative to the dbname and are not absolute paths. The valid size of the
860 // manifest file is returned in manifest_file_size. The manifest file is an
861 // ever growing file, but only the portion specified by manifest_file_size is
862 // valid for this snapshot.
863 // Setting flush_memtable to true does Flush before recording the live files.
864 // Setting flush_memtable to false is useful when we don't want to wait for
865 // flush which may have to wait for compaction to complete taking an
866 // indeterminate time.
867 //
868 // In case you have multiple column families, even if flush_memtable is true,
869 // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
870 // for new data that arrived to already-flushed column families while other
871 // column families were flushing
872 virtual Status GetLiveFiles(std::vector<std::string>&,
873 uint64_t* manifest_file_size,
874 bool flush_memtable = true) = 0;
875
876 // Retrieve the sorted list of all wal files with earliest file first
877 virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
878
879 // Sets iter to an iterator that is positioned at a write-batch containing
880 // seq_number. If the sequence number is non existent, it returns an iterator
881 // at the first available seq_no after the requested seq_no
882 // Returns Status::OK if iterator is valid
883 // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
884 // use this api, else the WAL files will get
885 // cleared aggressively and the iterator might keep getting invalid before
886 // an update is read.
887 virtual Status GetUpdatesSince(
888 SequenceNumber seq_number, unique_ptr<TransactionLogIterator>* iter,
889 const TransactionLogIterator::ReadOptions&
890 read_options = TransactionLogIterator::ReadOptions()) = 0;
891
892 // Windows API macro interference
893 #undef DeleteFile
894 // Delete the file name from the db directory and update the internal state to
895 // reflect that. Supports deletion of sst and log files only. 'name' must be
896 // path relative to the db directory. eg. 000001.sst, /archive/000003.log
897 virtual Status DeleteFile(std::string name) = 0;
898
899 // Returns a list of all table files with their level, start key
900 // and end key
901 virtual void GetLiveFilesMetaData(
902 std::vector<LiveFileMetaData>* /*metadata*/) {}
903
904 // Obtains the meta data of the specified column family of the DB.
905 // Status::NotFound() will be returned if the current DB does not have
906 // any column family match the specified name.
907 //
908 // If cf_name is not specified, then the metadata of the default
909 // column family will be returned.
910 virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
911 ColumnFamilyMetaData* /*metadata*/) {}
912
913 // Get the metadata of the default column family.
914 void GetColumnFamilyMetaData(
915 ColumnFamilyMetaData* metadata) {
916 GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
917 }
918
919 // IngestExternalFile() will load a list of external SST files (1) into the DB
920 // We will try to find the lowest possible level that the file can fit in, and
921 // ingest the file into this level (2). A file that have a key range that
922 // overlap with the memtable key range will require us to Flush the memtable
923 // first before ingesting the file.
924 //
925 // (1) External SST files can be created using SstFileWriter
926 // (2) We will try to ingest the files to the lowest possible level
927 // even if the file compression dont match the level compression
928 virtual Status IngestExternalFile(
929 ColumnFamilyHandle* column_family,
930 const std::vector<std::string>& external_files,
931 const IngestExternalFileOptions& options) = 0;
932
933 virtual Status IngestExternalFile(
934 const std::vector<std::string>& external_files,
935 const IngestExternalFileOptions& options) {
936 return IngestExternalFile(DefaultColumnFamily(), external_files, options);
937 }
938
939 // AddFile() is deprecated, please use IngestExternalFile()
940 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
941 ColumnFamilyHandle* column_family,
942 const std::vector<std::string>& file_path_list, bool move_file = false,
943 bool skip_snapshot_check = false) {
944 IngestExternalFileOptions ifo;
945 ifo.move_files = move_file;
946 ifo.snapshot_consistency = !skip_snapshot_check;
947 ifo.allow_global_seqno = false;
948 ifo.allow_blocking_flush = false;
949 return IngestExternalFile(column_family, file_path_list, ifo);
950 }
951
952 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
953 const std::vector<std::string>& file_path_list, bool move_file = false,
954 bool skip_snapshot_check = false) {
955 IngestExternalFileOptions ifo;
956 ifo.move_files = move_file;
957 ifo.snapshot_consistency = !skip_snapshot_check;
958 ifo.allow_global_seqno = false;
959 ifo.allow_blocking_flush = false;
960 return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo);
961 }
962
963 // AddFile() is deprecated, please use IngestExternalFile()
964 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
965 ColumnFamilyHandle* column_family, const std::string& file_path,
966 bool move_file = false, bool skip_snapshot_check = false) {
967 IngestExternalFileOptions ifo;
968 ifo.move_files = move_file;
969 ifo.snapshot_consistency = !skip_snapshot_check;
970 ifo.allow_global_seqno = false;
971 ifo.allow_blocking_flush = false;
972 return IngestExternalFile(column_family, {file_path}, ifo);
973 }
974
975 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
976 const std::string& file_path, bool move_file = false,
977 bool skip_snapshot_check = false) {
978 IngestExternalFileOptions ifo;
979 ifo.move_files = move_file;
980 ifo.snapshot_consistency = !skip_snapshot_check;
981 ifo.allow_global_seqno = false;
982 ifo.allow_blocking_flush = false;
983 return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo);
984 }
985
986 // Load table file with information "file_info" into "column_family"
987 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
988 ColumnFamilyHandle* column_family,
989 const std::vector<ExternalSstFileInfo>& file_info_list,
990 bool move_file = false, bool skip_snapshot_check = false) {
991 std::vector<std::string> external_files;
992 for (const ExternalSstFileInfo& file_info : file_info_list) {
993 external_files.push_back(file_info.file_path);
994 }
995 IngestExternalFileOptions ifo;
996 ifo.move_files = move_file;
997 ifo.snapshot_consistency = !skip_snapshot_check;
998 ifo.allow_global_seqno = false;
999 ifo.allow_blocking_flush = false;
1000 return IngestExternalFile(column_family, external_files, ifo);
1001 }
1002
1003 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1004 const std::vector<ExternalSstFileInfo>& file_info_list,
1005 bool move_file = false, bool skip_snapshot_check = false) {
1006 std::vector<std::string> external_files;
1007 for (const ExternalSstFileInfo& file_info : file_info_list) {
1008 external_files.push_back(file_info.file_path);
1009 }
1010 IngestExternalFileOptions ifo;
1011 ifo.move_files = move_file;
1012 ifo.snapshot_consistency = !skip_snapshot_check;
1013 ifo.allow_global_seqno = false;
1014 ifo.allow_blocking_flush = false;
1015 return IngestExternalFile(DefaultColumnFamily(), external_files, ifo);
1016 }
1017
1018 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1019 ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info,
1020 bool move_file = false, bool skip_snapshot_check = false) {
1021 IngestExternalFileOptions ifo;
1022 ifo.move_files = move_file;
1023 ifo.snapshot_consistency = !skip_snapshot_check;
1024 ifo.allow_global_seqno = false;
1025 ifo.allow_blocking_flush = false;
1026 return IngestExternalFile(column_family, {file_info->file_path}, ifo);
1027 }
1028
1029 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1030 const ExternalSstFileInfo* file_info, bool move_file = false,
1031 bool skip_snapshot_check = false) {
1032 IngestExternalFileOptions ifo;
1033 ifo.move_files = move_file;
1034 ifo.snapshot_consistency = !skip_snapshot_check;
1035 ifo.allow_global_seqno = false;
1036 ifo.allow_blocking_flush = false;
1037 return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path},
1038 ifo);
1039 }
1040
1041 #endif // ROCKSDB_LITE
1042
1043 // Sets the globally unique ID created at database creation time by invoking
1044 // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
1045 // be set properly
1046 virtual Status GetDbIdentity(std::string& identity) const = 0;
1047
1048 // Returns default column family handle
1049 virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
1050
1051 #ifndef ROCKSDB_LITE
1052 virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
1053 TablePropertiesCollection* props) = 0;
1054 virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
1055 return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
1056 }
1057 virtual Status GetPropertiesOfTablesInRange(
1058 ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
1059 TablePropertiesCollection* props) = 0;
1060 #endif // ROCKSDB_LITE
1061
1062 // Needed for StackableDB
1063 virtual DB* GetRootDB() { return this; }
1064
1065 private:
1066 // No copying allowed
1067 DB(const DB&);
1068 void operator=(const DB&);
1069 };
1070
1071 // Destroy the contents of the specified database.
1072 // Be very careful using this method.
1073 Status DestroyDB(const std::string& name, const Options& options);
1074
1075 #ifndef ROCKSDB_LITE
1076 // If a DB cannot be opened, you may attempt to call this method to
1077 // resurrect as much of the contents of the database as possible.
1078 // Some data may be lost, so be careful when calling this function
1079 // on a database that contains important information.
1080 //
1081 // With this API, we will warn and skip data associated with column families not
1082 // specified in column_families.
1083 //
1084 // @param column_families Descriptors for known column families
1085 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1086 const std::vector<ColumnFamilyDescriptor>& column_families);
1087
1088 // @param unknown_cf_opts Options for column families encountered during the
1089 // repair that were not specified in column_families.
1090 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1091 const std::vector<ColumnFamilyDescriptor>& column_families,
1092 const ColumnFamilyOptions& unknown_cf_opts);
1093
1094 // @param options These options will be used for the database and for ALL column
1095 // families encountered during the repair
1096 Status RepairDB(const std::string& dbname, const Options& options);
1097
1098 #endif
1099
1100 } // namespace rocksdb
1101
1102 #endif // STORAGE_ROCKSDB_INCLUDE_DB_H_