]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/db.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / include / rocksdb / db.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8
9 #pragma once
10
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <map>
14 #include <memory>
15 #include <string>
16 #include <unordered_map>
17 #include <vector>
18 #include "rocksdb/iterator.h"
19 #include "rocksdb/listener.h"
20 #include "rocksdb/metadata.h"
21 #include "rocksdb/options.h"
22 #include "rocksdb/snapshot.h"
23 #include "rocksdb/sst_file_writer.h"
24 #include "rocksdb/thread_status.h"
25 #include "rocksdb/transaction_log.h"
26 #include "rocksdb/types.h"
27 #include "rocksdb/version.h"
28
29 #ifdef _WIN32
30 // Windows API macro interference
31 #undef DeleteFile
32 #endif
33
34 #if defined(__GNUC__) || defined(__clang__)
35 #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
36 #elif _WIN32
37 #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
38 #endif
39
40 namespace rocksdb {
41
42 struct Options;
43 struct DBOptions;
44 struct ColumnFamilyOptions;
45 struct ReadOptions;
46 struct WriteOptions;
47 struct FlushOptions;
48 struct CompactionOptions;
49 struct CompactRangeOptions;
50 struct TableProperties;
51 struct ExternalSstFileInfo;
52 class WriteBatch;
53 class Env;
54 class EventListener;
55 class StatsHistoryIterator;
56 class TraceWriter;
57 #ifdef ROCKSDB_LITE
58 class CompactionJobInfo;
59 #endif
60
61 extern const std::string kDefaultColumnFamilyName;
62 struct ColumnFamilyDescriptor {
63 std::string name;
64 ColumnFamilyOptions options;
65 ColumnFamilyDescriptor()
66 : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
67 ColumnFamilyDescriptor(const std::string& _name,
68 const ColumnFamilyOptions& _options)
69 : name(_name), options(_options) {}
70 };
71
72 class ColumnFamilyHandle {
73 public:
74 virtual ~ColumnFamilyHandle() {}
75 // Returns the name of the column family associated with the current handle.
76 virtual const std::string& GetName() const = 0;
77 // Returns the ID of the column family associated with the current handle.
78 virtual uint32_t GetID() const = 0;
79 // Fills "*desc" with the up-to-date descriptor of the column family
80 // associated with this handle. Since it fills "*desc" with the up-to-date
81 // information, this call might internally lock and release DB mutex to
82 // access the up-to-date CF options. In addition, all the pointer-typed
83 // options cannot be referenced any longer than the original options exist.
84 //
85 // Note that this function is not supported in RocksDBLite.
86 virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
87 // Returns the comparator of the column family associated with the
88 // current handle.
89 virtual const Comparator* GetComparator() const = 0;
90 };
91
92 static const int kMajorVersion = __ROCKSDB_MAJOR__;
93 static const int kMinorVersion = __ROCKSDB_MINOR__;
94
95 // A range of keys
96 struct Range {
97 Slice start;
98 Slice limit;
99
100 Range() {}
101 Range(const Slice& s, const Slice& l) : start(s), limit(l) {}
102 };
103
104 struct RangePtr {
105 const Slice* start;
106 const Slice* limit;
107
108 RangePtr() : start(nullptr), limit(nullptr) {}
109 RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {}
110 };
111
112 struct IngestExternalFileArg {
113 ColumnFamilyHandle* column_family = nullptr;
114 std::vector<std::string> external_files;
115 IngestExternalFileOptions options;
116 };
117
118 // A collections of table properties objects, where
119 // key: is the table's file name.
120 // value: the table properties object of the given table.
121 typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
122 TablePropertiesCollection;
123
124 // A DB is a persistent ordered map from keys to values.
125 // A DB is safe for concurrent access from multiple threads without
126 // any external synchronization.
127 class DB {
128 public:
129 // Open the database with the specified "name".
130 // Stores a pointer to a heap-allocated database in *dbptr and returns
131 // OK on success.
132 // Stores nullptr in *dbptr and returns a non-OK status on error.
133 // Caller should delete *dbptr when it is no longer needed.
134 static Status Open(const Options& options, const std::string& name,
135 DB** dbptr);
136
137 // Open the database for read only. All DB interfaces
138 // that modify data, like put/delete, will return error.
139 // If the db is opened in read only mode, then no compactions
140 // will happen.
141 //
142 // Not supported in ROCKSDB_LITE, in which case the function will
143 // return Status::NotSupported.
144 static Status OpenForReadOnly(const Options& options, const std::string& name,
145 DB** dbptr,
146 bool error_if_log_file_exist = false);
147
148 // Open the database for read only with column families. When opening DB with
149 // read only, you can specify only a subset of column families in the
150 // database that should be opened. However, you always need to specify default
151 // column family. The default column family name is 'default' and it's stored
152 // in rocksdb::kDefaultColumnFamilyName
153 //
154 // Not supported in ROCKSDB_LITE, in which case the function will
155 // return Status::NotSupported.
156 static Status OpenForReadOnly(
157 const DBOptions& db_options, const std::string& name,
158 const std::vector<ColumnFamilyDescriptor>& column_families,
159 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
160 bool error_if_log_file_exist = false);
161
162 // The following OpenAsSecondary functions create a secondary instance that
163 // can dynamically tail the MANIFEST of a primary that must have already been
164 // created. User can call TryCatchUpWithPrimary to make the secondary
165 // instance catch up with primary (WAL tailing is NOT supported now) whenever
166 // the user feels necessary. Column families created by the primary after the
167 // secondary instance starts are currently ignored by the secondary instance.
168 // Column families opened by secondary and dropped by the primary will be
169 // dropped by secondary as well. However the user of the secondary instance
170 // can still access the data of such dropped column family as long as they
171 // do not destroy the corresponding column family handle.
172 // WAL tailing is not supported at present, but will arrive soon.
173 //
174 // The options argument specifies the options to open the secondary instance.
175 // The name argument specifies the name of the primary db that you have used
176 // to open the primary instance.
177 // The secondary_path argument points to a directory where the secondary
178 // instance stores its info log.
179 // The dbptr is an out-arg corresponding to the opened secondary instance.
180 // The pointer points to a heap-allocated database, and the user should
181 // delete it after use.
182 // Open DB as secondary instance with only the default column family.
183 // Return OK on success, non-OK on failures.
184 static Status OpenAsSecondary(const Options& options, const std::string& name,
185 const std::string& secondary_path, DB** dbptr);
186
187 // Open DB as secondary instance with column families. You can open a subset
188 // of column families in secondary mode.
189 // The db_options specify the database specific options.
190 // The name argument specifies the name of the primary db that you have used
191 // to open the primary instance.
192 // The secondary_path argument points to a directory where the secondary
193 // instance stores its info log.
194 // The column_families argument specifieds a list of column families to open.
195 // If any of the column families does not exist, the function returns non-OK
196 // status.
197 // The handles is an out-arg corresponding to the opened database column
198 // familiy handles.
199 // The dbptr is an out-arg corresponding to the opened secondary instance.
200 // The pointer points to a heap-allocated database, and the caller should
201 // delete it after use. Before deleting the dbptr, the user should also
202 // delete the pointers stored in handles vector.
203 // Return OK on success, on-OK on failures.
204 static Status OpenAsSecondary(
205 const DBOptions& db_options, const std::string& name,
206 const std::string& secondary_path,
207 const std::vector<ColumnFamilyDescriptor>& column_families,
208 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
209
210 // Open DB with column families.
211 // db_options specify database specific options
212 // column_families is the vector of all column families in the database,
213 // containing column family name and options. You need to open ALL column
214 // families in the database. To get the list of column families, you can use
215 // ListColumnFamilies(). Also, you can open only a subset of column families
216 // for read-only access.
217 // The default column family name is 'default' and it's stored
218 // in rocksdb::kDefaultColumnFamilyName.
219 // If everything is OK, handles will on return be the same size
220 // as column_families --- handles[i] will be a handle that you
221 // will use to operate on column family column_family[i].
222 // Before delete DB, you have to close All column families by calling
223 // DestroyColumnFamilyHandle() with all the handles.
224 static Status Open(const DBOptions& db_options, const std::string& name,
225 const std::vector<ColumnFamilyDescriptor>& column_families,
226 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
227
228 virtual Status Resume() { return Status::NotSupported(); }
229
230 // Close the DB by releasing resources, closing files etc. This should be
231 // called before calling the destructor so that the caller can get back a
232 // status in case there are any errors. This will not fsync the WAL files.
233 // If syncing is required, the caller must first call SyncWAL(), or Write()
234 // using an empty write batch with WriteOptions.sync=true.
235 // Regardless of the return status, the DB must be freed. If the return
236 // status is NotSupported(), then the DB implementation does cleanup in the
237 // destructor
238 virtual Status Close() { return Status::NotSupported(); }
239
240 // ListColumnFamilies will open the DB specified by argument name
241 // and return the list of all column families in that DB
242 // through column_families argument. The ordering of
243 // column families in column_families is unspecified.
244 static Status ListColumnFamilies(const DBOptions& db_options,
245 const std::string& name,
246 std::vector<std::string>* column_families);
247
248 DB() {}
249 virtual ~DB();
250
251 // Create a column_family and return the handle of column family
252 // through the argument handle.
253 virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
254 const std::string& column_family_name,
255 ColumnFamilyHandle** handle);
256
257 // Bulk create column families with the same column family options.
258 // Return the handles of the column families through the argument handles.
259 // In case of error, the request may succeed partially, and handles will
260 // contain column family handles that it managed to create, and have size
261 // equal to the number of created column families.
262 virtual Status CreateColumnFamilies(
263 const ColumnFamilyOptions& options,
264 const std::vector<std::string>& column_family_names,
265 std::vector<ColumnFamilyHandle*>* handles);
266
267 // Bulk create column families.
268 // Return the handles of the column families through the argument handles.
269 // In case of error, the request may succeed partially, and handles will
270 // contain column family handles that it managed to create, and have size
271 // equal to the number of created column families.
272 virtual Status CreateColumnFamilies(
273 const std::vector<ColumnFamilyDescriptor>& column_families,
274 std::vector<ColumnFamilyHandle*>* handles);
275
276 // Drop a column family specified by column_family handle. This call
277 // only records a drop record in the manifest and prevents the column
278 // family from flushing and compacting.
279 virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
280
281 // Bulk drop column families. This call only records drop records in the
282 // manifest and prevents the column families from flushing and compacting.
283 // In case of error, the request may succeed partially. User may call
284 // ListColumnFamilies to check the result.
285 virtual Status DropColumnFamilies(
286 const std::vector<ColumnFamilyHandle*>& column_families);
287
288 // Close a column family specified by column_family handle and destroy
289 // the column family handle specified to avoid double deletion. This call
290 // deletes the column family handle by default. Use this method to
291 // close column family instead of deleting column family handle directly
292 virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
293
294 // Set the database entry for "key" to "value".
295 // If "key" already exists, it will be overwritten.
296 // Returns OK on success, and a non-OK status on error.
297 // Note: consider setting options.sync = true.
298 virtual Status Put(const WriteOptions& options,
299 ColumnFamilyHandle* column_family, const Slice& key,
300 const Slice& value) = 0;
301 virtual Status Put(const WriteOptions& options, const Slice& key,
302 const Slice& value) {
303 return Put(options, DefaultColumnFamily(), key, value);
304 }
305
306 // Remove the database entry (if any) for "key". Returns OK on
307 // success, and a non-OK status on error. It is not an error if "key"
308 // did not exist in the database.
309 // Note: consider setting options.sync = true.
310 virtual Status Delete(const WriteOptions& options,
311 ColumnFamilyHandle* column_family,
312 const Slice& key) = 0;
313 virtual Status Delete(const WriteOptions& options, const Slice& key) {
314 return Delete(options, DefaultColumnFamily(), key);
315 }
316
317 // Remove the database entry for "key". Requires that the key exists
318 // and was not overwritten. Returns OK on success, and a non-OK status
319 // on error. It is not an error if "key" did not exist in the database.
320 //
321 // If a key is overwritten (by calling Put() multiple times), then the result
322 // of calling SingleDelete() on this key is undefined. SingleDelete() only
323 // behaves correctly if there has been only one Put() for this key since the
324 // previous call to SingleDelete() for this key.
325 //
326 // This feature is currently an experimental performance optimization
327 // for a very specific workload. It is up to the caller to ensure that
328 // SingleDelete is only used for a key that is not deleted using Delete() or
329 // written using Merge(). Mixing SingleDelete operations with Deletes and
330 // Merges can result in undefined behavior.
331 //
332 // Note: consider setting options.sync = true.
333 virtual Status SingleDelete(const WriteOptions& options,
334 ColumnFamilyHandle* column_family,
335 const Slice& key) = 0;
336 virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
337 return SingleDelete(options, DefaultColumnFamily(), key);
338 }
339
340 // Removes the database entries in the range ["begin_key", "end_key"), i.e.,
341 // including "begin_key" and excluding "end_key". Returns OK on success, and
342 // a non-OK status on error. It is not an error if no keys exist in the range
343 // ["begin_key", "end_key").
344 //
345 // This feature is now usable in production, with the following caveats:
346 // 1) Accumulating many range tombstones in the memtable will degrade read
347 // performance; this can be avoided by manually flushing occasionally.
348 // 2) Limiting the maximum number of open files in the presence of range
349 // tombstones can degrade read performance. To avoid this problem, set
350 // max_open_files to -1 whenever possible.
351 virtual Status DeleteRange(const WriteOptions& options,
352 ColumnFamilyHandle* column_family,
353 const Slice& begin_key, const Slice& end_key);
354
355 // Merge the database entry for "key" with "value". Returns OK on success,
356 // and a non-OK status on error. The semantics of this operation is
357 // determined by the user provided merge_operator when opening DB.
358 // Note: consider setting options.sync = true.
359 virtual Status Merge(const WriteOptions& options,
360 ColumnFamilyHandle* column_family, const Slice& key,
361 const Slice& value) = 0;
362 virtual Status Merge(const WriteOptions& options, const Slice& key,
363 const Slice& value) {
364 return Merge(options, DefaultColumnFamily(), key, value);
365 }
366
367 // Apply the specified updates to the database.
368 // If `updates` contains no update, WAL will still be synced if
369 // options.sync=true.
370 // Returns OK on success, non-OK on failure.
371 // Note: consider setting options.sync = true.
372 virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
373
374 // If the database contains an entry for "key" store the
375 // corresponding value in *value and return OK.
376 //
377 // If there is no entry for "key" leave *value unchanged and return
378 // a status for which Status::IsNotFound() returns true.
379 //
380 // May return some other Status on an error.
381 virtual inline Status Get(const ReadOptions& options,
382 ColumnFamilyHandle* column_family, const Slice& key,
383 std::string* value) {
384 assert(value != nullptr);
385 PinnableSlice pinnable_val(value);
386 assert(!pinnable_val.IsPinned());
387 auto s = Get(options, column_family, key, &pinnable_val);
388 if (s.ok() && pinnable_val.IsPinned()) {
389 value->assign(pinnable_val.data(), pinnable_val.size());
390 } // else value is already assigned
391 return s;
392 }
393 virtual Status Get(const ReadOptions& options,
394 ColumnFamilyHandle* column_family, const Slice& key,
395 PinnableSlice* value) = 0;
396 virtual Status Get(const ReadOptions& options, const Slice& key,
397 std::string* value) {
398 return Get(options, DefaultColumnFamily(), key, value);
399 }
400
401 // If keys[i] does not exist in the database, then the i'th returned
402 // status will be one for which Status::IsNotFound() is true, and
403 // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
404 // the i'th returned status will have Status::ok() true, and (*values)[i]
405 // will store the value associated with keys[i].
406 //
407 // (*values) will always be resized to be the same size as (keys).
408 // Similarly, the number of returned statuses will be the number of keys.
409 // Note: keys will not be "de-duplicated". Duplicate keys will return
410 // duplicate values in order.
411 virtual std::vector<Status> MultiGet(
412 const ReadOptions& options,
413 const std::vector<ColumnFamilyHandle*>& column_family,
414 const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
415 virtual std::vector<Status> MultiGet(const ReadOptions& options,
416 const std::vector<Slice>& keys,
417 std::vector<std::string>* values) {
418 return MultiGet(
419 options,
420 std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
421 keys, values);
422 }
423
424 // If the key definitely does not exist in the database, then this method
425 // returns false, else true. If the caller wants to obtain value when the key
426 // is found in memory, a bool for 'value_found' must be passed. 'value_found'
427 // will be true on return if value has been set properly.
428 // This check is potentially lighter-weight than invoking DB::Get(). One way
429 // to make this lighter weight is to avoid doing any IOs.
430 // Default implementation here returns true and sets 'value_found' to false
431 virtual bool KeyMayExist(const ReadOptions& /*options*/,
432 ColumnFamilyHandle* /*column_family*/,
433 const Slice& /*key*/, std::string* /*value*/,
434 bool* value_found = nullptr) {
435 if (value_found != nullptr) {
436 *value_found = false;
437 }
438 return true;
439 }
440 virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
441 std::string* value, bool* value_found = nullptr) {
442 return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
443 }
444
445 // Return a heap-allocated iterator over the contents of the database.
446 // The result of NewIterator() is initially invalid (caller must
447 // call one of the Seek methods on the iterator before using it).
448 //
449 // Caller should delete the iterator when it is no longer needed.
450 // The returned iterator should be deleted before this db is deleted.
451 virtual Iterator* NewIterator(const ReadOptions& options,
452 ColumnFamilyHandle* column_family) = 0;
453 virtual Iterator* NewIterator(const ReadOptions& options) {
454 return NewIterator(options, DefaultColumnFamily());
455 }
456 // Returns iterators from a consistent database state across multiple
457 // column families. Iterators are heap allocated and need to be deleted
458 // before the db is deleted
459 virtual Status NewIterators(
460 const ReadOptions& options,
461 const std::vector<ColumnFamilyHandle*>& column_families,
462 std::vector<Iterator*>* iterators) = 0;
463
464 // Return a handle to the current DB state. Iterators created with
465 // this handle will all observe a stable snapshot of the current DB
466 // state. The caller must call ReleaseSnapshot(result) when the
467 // snapshot is no longer needed.
468 //
469 // nullptr will be returned if the DB fails to take a snapshot or does
470 // not support snapshot.
471 virtual const Snapshot* GetSnapshot() = 0;
472
473 // Release a previously acquired snapshot. The caller must not
474 // use "snapshot" after this call.
475 virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
476
477 #ifndef ROCKSDB_LITE
478 // Contains all valid property arguments for GetProperty().
479 //
480 // NOTE: Property names cannot end in numbers since those are interpreted as
481 // arguments, e.g., see kNumFilesAtLevelPrefix.
482 struct Properties {
483 // "rocksdb.num-files-at-level<N>" - returns string containing the number
484 // of files at level <N>, where <N> is an ASCII representation of a
485 // level number (e.g., "0").
486 static const std::string kNumFilesAtLevelPrefix;
487
488 // "rocksdb.compression-ratio-at-level<N>" - returns string containing the
489 // compression ratio of data at level <N>, where <N> is an ASCII
490 // representation of a level number (e.g., "0"). Here, compression
491 // ratio is defined as uncompressed data size / compressed file size.
492 // Returns "-1.0" if no open files at level <N>.
493 static const std::string kCompressionRatioAtLevelPrefix;
494
495 // "rocksdb.stats" - returns a multi-line string containing the data
496 // described by kCFStats followed by the data described by kDBStats.
497 static const std::string kStats;
498
499 // "rocksdb.sstables" - returns a multi-line string summarizing current
500 // SST files.
501 static const std::string kSSTables;
502
503 // "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and
504 // "rocksdb.cf-file-histogram" together. See below for description
505 // of the two.
506 static const std::string kCFStats;
507
508 // "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with
509 // general columm family stats per-level over db's lifetime ("L<n>"),
510 // aggregated over db's lifetime ("Sum"), and aggregated over the
511 // interval since the last retrieval ("Int").
512 // It could also be used to return the stats in the format of the map.
513 // In this case there will a pair of string to array of double for
514 // each level as well as for "Sum". "Int" stats will not be affected
515 // when this form of stats are retrieved.
516 static const std::string kCFStatsNoFileHistogram;
517
518 // "rocksdb.cf-file-histogram" - print out how many file reads to every
519 // level, as well as the histogram of latency of single requests.
520 static const std::string kCFFileHistogram;
521
522 // "rocksdb.dbstats" - returns a multi-line string with general database
523 // stats, both cumulative (over the db's lifetime) and interval (since
524 // the last retrieval of kDBStats).
525 static const std::string kDBStats;
526
527 // "rocksdb.levelstats" - returns multi-line string containing the number
528 // of files per level and total size of each level (MB).
529 static const std::string kLevelStats;
530
531 // "rocksdb.num-immutable-mem-table" - returns number of immutable
532 // memtables that have not yet been flushed.
533 static const std::string kNumImmutableMemTable;
534
535 // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
536 // memtables that have already been flushed.
537 static const std::string kNumImmutableMemTableFlushed;
538
539 // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
540 // pending; otherwise, returns 0.
541 static const std::string kMemTableFlushPending;
542
543 // "rocksdb.num-running-flushes" - returns the number of currently running
544 // flushes.
545 static const std::string kNumRunningFlushes;
546
547 // "rocksdb.compaction-pending" - returns 1 if at least one compaction is
548 // pending; otherwise, returns 0.
549 static const std::string kCompactionPending;
550
551 // "rocksdb.num-running-compactions" - returns the number of currently
552 // running compactions.
553 static const std::string kNumRunningCompactions;
554
555 // "rocksdb.background-errors" - returns accumulated number of background
556 // errors.
557 static const std::string kBackgroundErrors;
558
559 // "rocksdb.cur-size-active-mem-table" - returns approximate size of active
560 // memtable (bytes).
561 static const std::string kCurSizeActiveMemTable;
562
563 // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
564 // and unflushed immutable memtables (bytes).
565 static const std::string kCurSizeAllMemTables;
566
567 // "rocksdb.size-all-mem-tables" - returns approximate size of active,
568 // unflushed immutable, and pinned immutable memtables (bytes).
569 static const std::string kSizeAllMemTables;
570
571 // "rocksdb.num-entries-active-mem-table" - returns total number of entries
572 // in the active memtable.
573 static const std::string kNumEntriesActiveMemTable;
574
575 // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
576 // in the unflushed immutable memtables.
577 static const std::string kNumEntriesImmMemTables;
578
579 // "rocksdb.num-deletes-active-mem-table" - returns total number of delete
580 // entries in the active memtable.
581 static const std::string kNumDeletesActiveMemTable;
582
583 // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
584 // entries in the unflushed immutable memtables.
585 static const std::string kNumDeletesImmMemTables;
586
587 // "rocksdb.estimate-num-keys" - returns estimated number of total keys in
588 // the active and unflushed immutable memtables and storage.
589 static const std::string kEstimateNumKeys;
590
591 // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
592 // reading SST tables, excluding memory used in block cache (e.g.,
593 // filter and index blocks).
594 static const std::string kEstimateTableReadersMem;
595
596 // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
597 // files is enabled; otherwise, returns a non-zero number.
598 static const std::string kIsFileDeletionsEnabled;
599
600 // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
601 // database.
602 static const std::string kNumSnapshots;
603
604 // "rocksdb.oldest-snapshot-time" - returns number representing unix
605 // timestamp of oldest unreleased snapshot.
606 static const std::string kOldestSnapshotTime;
607
608 // "rocksdb.num-live-versions" - returns number of live versions. `Version`
609 // is an internal data structure. See version_set.h for details. More
610 // live versions often mean more SST files are held from being deleted,
611 // by iterators or unfinished compactions.
612 static const std::string kNumLiveVersions;
613
614 // "rocksdb.current-super-version-number" - returns number of current LSM
615 // version. It is a uint64_t integer number, incremented after there is
616 // any change to the LSM tree. The number is not preserved after restarting
617 // the DB. After DB restart, it will start from 0 again.
618 static const std::string kCurrentSuperVersionNumber;
619
620 // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
621 // live data in bytes.
622 static const std::string kEstimateLiveDataSize;
623
624 // "rocksdb.min-log-number-to-keep" - return the minimum log number of the
625 // log files that should be kept.
626 static const std::string kMinLogNumberToKeep;
627
628 // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file
629 // number for an obsolete SST to be kept. The max value of `uint64_t`
630 // will be returned if all obsolete files can be deleted.
631 static const std::string kMinObsoleteSstNumberToKeep;
632
633 // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
634 // files.
635 // WARNING: may slow down online queries if there are too many files.
636 static const std::string kTotalSstFilesSize;
637
638 // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST
639 // files belong to the latest LSM tree.
640 static const std::string kLiveSstFilesSize;
641
642 // "rocksdb.base-level" - returns number of level to which L0 data will be
643 // compacted.
644 static const std::string kBaseLevel;
645
646 // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
647 // number of bytes compaction needs to rewrite to get all levels down
648 // to under target size. Not valid for other compactions than level-
649 // based.
650 static const std::string kEstimatePendingCompactionBytes;
651
652 // "rocksdb.aggregated-table-properties" - returns a string representation
653 // of the aggregated table properties of the target column family.
654 static const std::string kAggregatedTableProperties;
655
656 // "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
657 // one but only returns the aggregated table properties of the
658 // specified level "N" at the target column family.
659 static const std::string kAggregatedTablePropertiesAtLevel;
660
661 // "rocksdb.actual-delayed-write-rate" - returns the current actual delayed
662 // write rate. 0 means no delay.
663 static const std::string kActualDelayedWriteRate;
664
665 // "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
666 static const std::string kIsWriteStopped;
667
668 // "rocksdb.estimate-oldest-key-time" - returns an estimation of
669 // oldest key timestamp in the DB. Currently only available for
670 // FIFO compaction with
671 // compaction_options_fifo.allow_compaction = false.
672 static const std::string kEstimateOldestKeyTime;
673
674 // "rocksdb.block-cache-capacity" - returns block cache capacity.
675 static const std::string kBlockCacheCapacity;
676
677 // "rocksdb.block-cache-usage" - returns the memory size for the entries
678 // residing in block cache.
679 static const std::string kBlockCacheUsage;
680
681 // "rocksdb.block-cache-pinned-usage" - returns the memory size for the
682 // entries being pinned.
683 static const std::string kBlockCachePinnedUsage;
684
685 // "rocksdb.options-statistics" - returns multi-line string
686 // of options.statistics
687 static const std::string kOptionsStatistics;
688 };
689 #endif /* ROCKSDB_LITE */
690
691 // DB implementations can export properties about their state via this method.
692 // If "property" is a valid property understood by this DB implementation (see
693 // Properties struct above for valid options), fills "*value" with its current
694 // value and returns true. Otherwise, returns false.
695 virtual bool GetProperty(ColumnFamilyHandle* column_family,
696 const Slice& property, std::string* value) = 0;
697 virtual bool GetProperty(const Slice& property, std::string* value) {
698 return GetProperty(DefaultColumnFamily(), property, value);
699 }
700 virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
701 const Slice& property,
702 std::map<std::string, std::string>* value) = 0;
703 virtual bool GetMapProperty(const Slice& property,
704 std::map<std::string, std::string>* value) {
705 return GetMapProperty(DefaultColumnFamily(), property, value);
706 }
707
708 // Similar to GetProperty(), but only works for a subset of properties whose
709 // return value is an integer. Return the value by integer. Supported
710 // properties:
711 // "rocksdb.num-immutable-mem-table"
712 // "rocksdb.mem-table-flush-pending"
713 // "rocksdb.compaction-pending"
714 // "rocksdb.background-errors"
715 // "rocksdb.cur-size-active-mem-table"
716 // "rocksdb.cur-size-all-mem-tables"
717 // "rocksdb.size-all-mem-tables"
718 // "rocksdb.num-entries-active-mem-table"
719 // "rocksdb.num-entries-imm-mem-tables"
720 // "rocksdb.num-deletes-active-mem-table"
721 // "rocksdb.num-deletes-imm-mem-tables"
722 // "rocksdb.estimate-num-keys"
723 // "rocksdb.estimate-table-readers-mem"
724 // "rocksdb.is-file-deletions-enabled"
725 // "rocksdb.num-snapshots"
726 // "rocksdb.oldest-snapshot-time"
727 // "rocksdb.num-live-versions"
728 // "rocksdb.current-super-version-number"
729 // "rocksdb.estimate-live-data-size"
730 // "rocksdb.min-log-number-to-keep"
731 // "rocksdb.min-obsolete-sst-number-to-keep"
732 // "rocksdb.total-sst-files-size"
733 // "rocksdb.live-sst-files-size"
734 // "rocksdb.base-level"
735 // "rocksdb.estimate-pending-compaction-bytes"
736 // "rocksdb.num-running-compactions"
737 // "rocksdb.num-running-flushes"
738 // "rocksdb.actual-delayed-write-rate"
739 // "rocksdb.is-write-stopped"
740 // "rocksdb.estimate-oldest-key-time"
741 // "rocksdb.block-cache-capacity"
742 // "rocksdb.block-cache-usage"
743 // "rocksdb.block-cache-pinned-usage"
744 virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
745 const Slice& property, uint64_t* value) = 0;
746 virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
747 return GetIntProperty(DefaultColumnFamily(), property, value);
748 }
749
750 // Reset internal stats for DB and all column families.
751 // Note this doesn't reset options.statistics as it is not owned by
752 // DB.
753 virtual Status ResetStats() {
754 return Status::NotSupported("Not implemented");
755 }
756
757 // Same as GetIntProperty(), but this one returns the aggregated int
758 // property from all column families.
759 virtual bool GetAggregatedIntProperty(const Slice& property,
760 uint64_t* value) = 0;
761
762 // Flags for DB::GetSizeApproximation that specify whether memtable
763 // stats should be included, or file stats approximation or both
764 enum SizeApproximationFlags : uint8_t {
765 NONE = 0,
766 INCLUDE_MEMTABLES = 1,
767 INCLUDE_FILES = 1 << 1
768 };
769
770 // For each i in [0,n-1], store in "sizes[i]", the approximate
771 // file system space used by keys in "[range[i].start .. range[i].limit)".
772 //
773 // Note that the returned sizes measure file system space usage, so
774 // if the user data compresses by a factor of ten, the returned
775 // sizes will be one-tenth the size of the corresponding user data size.
776 //
777 // If include_flags defines whether the returned size should include
778 // the recently written data in the mem-tables (if
779 // the mem-table type supports it), data serialized to disk, or both.
780 // include_flags should be of type DB::SizeApproximationFlags
781 virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
782 const Range* range, int n, uint64_t* sizes,
783 uint8_t include_flags = INCLUDE_FILES) = 0;
784 virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
785 uint8_t include_flags = INCLUDE_FILES) {
786 GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
787 }
788
789 // The method is similar to GetApproximateSizes, except it
790 // returns approximate number of records in memtables.
791 virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
792 const Range& range,
793 uint64_t* const count,
794 uint64_t* const size) = 0;
795 virtual void GetApproximateMemTableStats(const Range& range,
796 uint64_t* const count,
797 uint64_t* const size) {
798 GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size);
799 }
800
801 // Deprecated versions of GetApproximateSizes
802 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
803 const Range* range, int n, uint64_t* sizes, bool include_memtable) {
804 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
805 if (include_memtable) {
806 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
807 }
808 GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
809 }
810 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
811 ColumnFamilyHandle* column_family, const Range* range, int n,
812 uint64_t* sizes, bool include_memtable) {
813 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
814 if (include_memtable) {
815 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
816 }
817 GetApproximateSizes(column_family, range, n, sizes, include_flags);
818 }
819
820 // Compact the underlying storage for the key range [*begin,*end].
821 // The actual compaction interval might be superset of [*begin, *end].
822 // In particular, deleted and overwritten versions are discarded,
823 // and the data is rearranged to reduce the cost of operations
824 // needed to access the data. This operation should typically only
825 // be invoked by users who understand the underlying implementation.
826 //
827 // begin==nullptr is treated as a key before all keys in the database.
828 // end==nullptr is treated as a key after all keys in the database.
829 // Therefore the following call will compact the entire database:
830 // db->CompactRange(options, nullptr, nullptr);
831 // Note that after the entire database is compacted, all data are pushed
832 // down to the last level containing any data. If the total data size after
833 // compaction is reduced, that level might not be appropriate for hosting all
834 // the files. In this case, client could set options.change_level to true, to
835 // move the files back to the minimum level capable of holding the data set
836 // or a given level (specified by non-negative options.target_level).
837 virtual Status CompactRange(const CompactRangeOptions& options,
838 ColumnFamilyHandle* column_family,
839 const Slice* begin, const Slice* end) = 0;
840 virtual Status CompactRange(const CompactRangeOptions& options,
841 const Slice* begin, const Slice* end) {
842 return CompactRange(options, DefaultColumnFamily(), begin, end);
843 }
844
845 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
846 ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end,
847 bool change_level = false, int target_level = -1,
848 uint32_t target_path_id = 0) {
849 CompactRangeOptions options;
850 options.change_level = change_level;
851 options.target_level = target_level;
852 options.target_path_id = target_path_id;
853 return CompactRange(options, column_family, begin, end);
854 }
855
856 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
857 const Slice* begin, const Slice* end, bool change_level = false,
858 int target_level = -1, uint32_t target_path_id = 0) {
859 CompactRangeOptions options;
860 options.change_level = change_level;
861 options.target_level = target_level;
862 options.target_path_id = target_path_id;
863 return CompactRange(options, DefaultColumnFamily(), begin, end);
864 }
865
866 virtual Status SetOptions(
867 ColumnFamilyHandle* /*column_family*/,
868 const std::unordered_map<std::string, std::string>& /*new_options*/) {
869 return Status::NotSupported("Not implemented");
870 }
871 virtual Status SetOptions(
872 const std::unordered_map<std::string, std::string>& new_options) {
873 return SetOptions(DefaultColumnFamily(), new_options);
874 }
875
876 virtual Status SetDBOptions(
877 const std::unordered_map<std::string, std::string>& new_options) = 0;
878
879 // CompactFiles() inputs a list of files specified by file numbers and
880 // compacts them to the specified level. Note that the behavior is different
881 // from CompactRange() in that CompactFiles() performs the compaction job
882 // using the CURRENT thread.
883 //
884 // @see GetDataBaseMetaData
885 // @see GetColumnFamilyMetaData
886 virtual Status CompactFiles(
887 const CompactionOptions& compact_options,
888 ColumnFamilyHandle* column_family,
889 const std::vector<std::string>& input_file_names, const int output_level,
890 const int output_path_id = -1,
891 std::vector<std::string>* const output_file_names = nullptr,
892 CompactionJobInfo* compaction_job_info = nullptr) = 0;
893
894 virtual Status CompactFiles(
895 const CompactionOptions& compact_options,
896 const std::vector<std::string>& input_file_names, const int output_level,
897 const int output_path_id = -1,
898 std::vector<std::string>* const output_file_names = nullptr,
899 CompactionJobInfo* compaction_job_info = nullptr) {
900 return CompactFiles(compact_options, DefaultColumnFamily(),
901 input_file_names, output_level, output_path_id,
902 output_file_names, compaction_job_info);
903 }
904
905 // This function will wait until all currently running background processes
906 // finish. After it returns, no background process will be run until
907 // ContinueBackgroundWork is called
908 virtual Status PauseBackgroundWork() = 0;
909 virtual Status ContinueBackgroundWork() = 0;
910
911 // This function will enable automatic compactions for the given column
912 // families if they were previously disabled. The function will first set the
913 // disable_auto_compactions option for each column family to 'false', after
914 // which it will schedule a flush/compaction.
915 //
916 // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
917 // does NOT schedule a flush/compaction afterwards, and only changes the
918 // parameter itself within the column family option.
919 //
920 virtual Status EnableAutoCompaction(
921 const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;
922
923 // Number of levels used for this DB.
924 virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
925 virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
926
927 // Maximum level to which a new compacted memtable is pushed if it
928 // does not create overlap.
929 virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
930 virtual int MaxMemCompactionLevel() {
931 return MaxMemCompactionLevel(DefaultColumnFamily());
932 }
933
934 // Number of files in level-0 that would stop writes.
935 virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
936 virtual int Level0StopWriteTrigger() {
937 return Level0StopWriteTrigger(DefaultColumnFamily());
938 }
939
940 // Get DB name -- the exact same name that was provided as an argument to
941 // DB::Open()
942 virtual const std::string& GetName() const = 0;
943
944 // Get Env object from the DB
945 virtual Env* GetEnv() const = 0;
946
947 // Get DB Options that we use. During the process of opening the
948 // column family, the options provided when calling DB::Open() or
949 // DB::CreateColumnFamily() will have been "sanitized" and transformed
950 // in an implementation-defined manner.
951 virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
952 virtual Options GetOptions() const {
953 return GetOptions(DefaultColumnFamily());
954 }
955
956 virtual DBOptions GetDBOptions() const = 0;
957
958 // Flush all mem-table data.
959 // Flush a single column family, even when atomic flush is enabled. To flush
960 // multiple column families, use Flush(options, column_families).
961 virtual Status Flush(const FlushOptions& options,
962 ColumnFamilyHandle* column_family) = 0;
963 virtual Status Flush(const FlushOptions& options) {
964 return Flush(options, DefaultColumnFamily());
965 }
966 // Flushes multiple column families.
967 // If atomic flush is not enabled, Flush(options, column_families) is
968 // equivalent to calling Flush(options, column_family) multiple times.
969 // If atomic flush is enabled, Flush(options, column_families) will flush all
970 // column families specified in 'column_families' up to the latest sequence
971 // number at the time when flush is requested.
972 // Note that RocksDB 5.15 and earlier may not be able to open later versions
973 // with atomic flush enabled.
974 virtual Status Flush(
975 const FlushOptions& options,
976 const std::vector<ColumnFamilyHandle*>& column_families) = 0;
977
978 // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL
979 // afterwards.
980 virtual Status FlushWAL(bool /*sync*/) {
981 return Status::NotSupported("FlushWAL not implemented");
982 }
983 // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
984 // same as Write() with sync=true: in the latter case the changes won't be
985 // visible until the sync is done.
986 // Currently only works if allow_mmap_writes = false in Options.
987 virtual Status SyncWAL() = 0;
988
989 // Lock the WAL. Also flushes the WAL after locking.
990 virtual Status LockWAL() {
991 return Status::NotSupported("LockWAL not implemented");
992 }
993
994 // Unlock the WAL.
995 virtual Status UnlockWAL() {
996 return Status::NotSupported("UnlockWAL not implemented");
997 }
998
999 // The sequence number of the most recent transaction.
1000 virtual SequenceNumber GetLatestSequenceNumber() const = 0;
1001
1002 // Instructs DB to preserve deletes with sequence numbers >= passed seqnum.
1003 // Has no effect if DBOptions.preserve_deletes is set to false.
1004 // This function assumes that user calls this function with monotonically
1005 // increasing seqnums (otherwise we can't guarantee that a particular delete
1006 // hasn't been already processed); returns true if the value was successfully
1007 // updated, false if user attempted to call if with seqnum <= current value.
1008 virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0;
1009
1010 #ifndef ROCKSDB_LITE
1011
1012 // Prevent file deletions. Compactions will continue to occur,
1013 // but no obsolete files will be deleted. Calling this multiple
1014 // times have the same effect as calling it once.
1015 virtual Status DisableFileDeletions() = 0;
1016
1017 // Allow compactions to delete obsolete files.
1018 // If force == true, the call to EnableFileDeletions() will guarantee that
1019 // file deletions are enabled after the call, even if DisableFileDeletions()
1020 // was called multiple times before.
1021 // If force == false, EnableFileDeletions will only enable file deletion
1022 // after it's been called at least as many times as DisableFileDeletions(),
1023 // enabling the two methods to be called by two threads concurrently without
1024 // synchronization -- i.e., file deletions will be enabled only after both
1025 // threads call EnableFileDeletions()
1026 virtual Status EnableFileDeletions(bool force = true) = 0;
1027
1028 // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
1029
1030 // Retrieve the list of all files in the database. The files are
1031 // relative to the dbname and are not absolute paths. Despite being relative
1032 // paths, the file names begin with "/". The valid size of the manifest file
1033 // is returned in manifest_file_size. The manifest file is an ever growing
1034 // file, but only the portion specified by manifest_file_size is valid for
1035 // this snapshot. Setting flush_memtable to true does Flush before recording
1036 // the live files. Setting flush_memtable to false is useful when we don't
1037 // want to wait for flush which may have to wait for compaction to complete
1038 // taking an indeterminate time.
1039 //
1040 // In case you have multiple column families, even if flush_memtable is true,
1041 // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
1042 // for new data that arrived to already-flushed column families while other
1043 // column families were flushing
1044 virtual Status GetLiveFiles(std::vector<std::string>&,
1045 uint64_t* manifest_file_size,
1046 bool flush_memtable = true) = 0;
1047
1048 // Retrieve the sorted list of all wal files with earliest file first
1049 virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
1050
1051 // Note: this API is not yet consistent with WritePrepared transactions.
1052 // Sets iter to an iterator that is positioned at a write-batch containing
1053 // seq_number. If the sequence number is non existent, it returns an iterator
1054 // at the first available seq_no after the requested seq_no
1055 // Returns Status::OK if iterator is valid
1056 // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
1057 // use this api, else the WAL files will get
1058 // cleared aggressively and the iterator might keep getting invalid before
1059 // an update is read.
1060 virtual Status GetUpdatesSince(
1061 SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
1062 const TransactionLogIterator::ReadOptions& read_options =
1063 TransactionLogIterator::ReadOptions()) = 0;
1064
1065 // Windows API macro interference
1066 #undef DeleteFile
1067 // Delete the file name from the db directory and update the internal state to
1068 // reflect that. Supports deletion of sst and log files only. 'name' must be
1069 // path relative to the db directory. eg. 000001.sst, /archive/000003.log
1070 virtual Status DeleteFile(std::string name) = 0;
1071
1072 // Returns a list of all table files with their level, start key
1073 // and end key
1074 virtual void GetLiveFilesMetaData(
1075 std::vector<LiveFileMetaData>* /*metadata*/) {}
1076
1077 // Obtains the meta data of the specified column family of the DB.
1078 virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
1079 ColumnFamilyMetaData* /*metadata*/) {}
1080
1081 // Get the metadata of the default column family.
1082 void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) {
1083 GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
1084 }
1085
1086 // IngestExternalFile() will load a list of external SST files (1) into the DB
1087 // Two primary modes are supported:
1088 // - Duplicate keys in the new files will overwrite exiting keys (default)
1089 // - Duplicate keys will be skipped (set ingest_behind=true)
1090 // In the first mode we will try to find the lowest possible level that
1091 // the file can fit in, and ingest the file into this level (2). A file that
1092 // have a key range that overlap with the memtable key range will require us
1093 // to Flush the memtable first before ingesting the file.
1094 // In the second mode we will always ingest in the bottom most level (see
1095 // docs to IngestExternalFileOptions::ingest_behind).
1096 //
1097 // (1) External SST files can be created using SstFileWriter
1098 // (2) We will try to ingest the files to the lowest possible level
1099 // even if the file compression doesn't match the level compression
1100 // (3) If IngestExternalFileOptions->ingest_behind is set to true,
1101 // we always ingest at the bottommost level, which should be reserved
1102 // for this purpose (see DBOPtions::allow_ingest_behind flag).
1103 virtual Status IngestExternalFile(
1104 ColumnFamilyHandle* column_family,
1105 const std::vector<std::string>& external_files,
1106 const IngestExternalFileOptions& options) = 0;
1107
1108 virtual Status IngestExternalFile(
1109 const std::vector<std::string>& external_files,
1110 const IngestExternalFileOptions& options) {
1111 return IngestExternalFile(DefaultColumnFamily(), external_files, options);
1112 }
1113
1114 // IngestExternalFiles() will ingest files for multiple column families, and
1115 // record the result atomically to the MANIFEST.
1116 // If this function returns OK, all column families' ingestion must succeed.
1117 // If this function returns NOK, or the process crashes, then non-of the
1118 // files will be ingested into the database after recovery.
1119 // Note that it is possible for application to observe a mixed state during
1120 // the execution of this function. If the user performs range scan over the
1121 // column families with iterators, iterator on one column family may return
1122 // ingested data, while iterator on other column family returns old data.
1123 // Users can use snapshot for a consistent view of data.
1124 // If your db ingests multiple SST files using this API, i.e. args.size()
1125 // > 1, then RocksDB 5.15 and earlier will not be able to open it.
1126 //
1127 // REQUIRES: each arg corresponds to a different column family: namely, for
1128 // 0 <= i < j < len(args), args[i].column_family != args[j].column_family.
1129 virtual Status IngestExternalFiles(
1130 const std::vector<IngestExternalFileArg>& args) = 0;
1131
1132 virtual Status VerifyChecksum() = 0;
1133
1134 // AddFile() is deprecated, please use IngestExternalFile()
1135 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1136 ColumnFamilyHandle* column_family,
1137 const std::vector<std::string>& file_path_list, bool move_file = false,
1138 bool skip_snapshot_check = false) {
1139 IngestExternalFileOptions ifo;
1140 ifo.move_files = move_file;
1141 ifo.snapshot_consistency = !skip_snapshot_check;
1142 ifo.allow_global_seqno = false;
1143 ifo.allow_blocking_flush = false;
1144 return IngestExternalFile(column_family, file_path_list, ifo);
1145 }
1146
1147 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1148 const std::vector<std::string>& file_path_list, bool move_file = false,
1149 bool skip_snapshot_check = false) {
1150 IngestExternalFileOptions ifo;
1151 ifo.move_files = move_file;
1152 ifo.snapshot_consistency = !skip_snapshot_check;
1153 ifo.allow_global_seqno = false;
1154 ifo.allow_blocking_flush = false;
1155 return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo);
1156 }
1157
1158 // AddFile() is deprecated, please use IngestExternalFile()
1159 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1160 ColumnFamilyHandle* column_family, const std::string& file_path,
1161 bool move_file = false, bool skip_snapshot_check = false) {
1162 IngestExternalFileOptions ifo;
1163 ifo.move_files = move_file;
1164 ifo.snapshot_consistency = !skip_snapshot_check;
1165 ifo.allow_global_seqno = false;
1166 ifo.allow_blocking_flush = false;
1167 return IngestExternalFile(column_family, {file_path}, ifo);
1168 }
1169
1170 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1171 const std::string& file_path, bool move_file = false,
1172 bool skip_snapshot_check = false) {
1173 IngestExternalFileOptions ifo;
1174 ifo.move_files = move_file;
1175 ifo.snapshot_consistency = !skip_snapshot_check;
1176 ifo.allow_global_seqno = false;
1177 ifo.allow_blocking_flush = false;
1178 return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo);
1179 }
1180
1181 // Load table file with information "file_info" into "column_family"
1182 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1183 ColumnFamilyHandle* column_family,
1184 const std::vector<ExternalSstFileInfo>& file_info_list,
1185 bool move_file = false, bool skip_snapshot_check = false) {
1186 std::vector<std::string> external_files;
1187 for (const ExternalSstFileInfo& file_info : file_info_list) {
1188 external_files.push_back(file_info.file_path);
1189 }
1190 IngestExternalFileOptions ifo;
1191 ifo.move_files = move_file;
1192 ifo.snapshot_consistency = !skip_snapshot_check;
1193 ifo.allow_global_seqno = false;
1194 ifo.allow_blocking_flush = false;
1195 return IngestExternalFile(column_family, external_files, ifo);
1196 }
1197
1198 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1199 const std::vector<ExternalSstFileInfo>& file_info_list,
1200 bool move_file = false, bool skip_snapshot_check = false) {
1201 std::vector<std::string> external_files;
1202 for (const ExternalSstFileInfo& file_info : file_info_list) {
1203 external_files.push_back(file_info.file_path);
1204 }
1205 IngestExternalFileOptions ifo;
1206 ifo.move_files = move_file;
1207 ifo.snapshot_consistency = !skip_snapshot_check;
1208 ifo.allow_global_seqno = false;
1209 ifo.allow_blocking_flush = false;
1210 return IngestExternalFile(DefaultColumnFamily(), external_files, ifo);
1211 }
1212
1213 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1214 ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info,
1215 bool move_file = false, bool skip_snapshot_check = false) {
1216 IngestExternalFileOptions ifo;
1217 ifo.move_files = move_file;
1218 ifo.snapshot_consistency = !skip_snapshot_check;
1219 ifo.allow_global_seqno = false;
1220 ifo.allow_blocking_flush = false;
1221 return IngestExternalFile(column_family, {file_info->file_path}, ifo);
1222 }
1223
1224 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1225 const ExternalSstFileInfo* file_info, bool move_file = false,
1226 bool skip_snapshot_check = false) {
1227 IngestExternalFileOptions ifo;
1228 ifo.move_files = move_file;
1229 ifo.snapshot_consistency = !skip_snapshot_check;
1230 ifo.allow_global_seqno = false;
1231 ifo.allow_blocking_flush = false;
1232 return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path},
1233 ifo);
1234 }
1235
1236 #endif // ROCKSDB_LITE
1237
1238 // Sets the globally unique ID created at database creation time by invoking
1239 // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
1240 // be set properly
1241 virtual Status GetDbIdentity(std::string& identity) const = 0;
1242
1243 // Returns default column family handle
1244 virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
1245
1246 #ifndef ROCKSDB_LITE
1247 virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
1248 TablePropertiesCollection* props) = 0;
1249 virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
1250 return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
1251 }
1252 virtual Status GetPropertiesOfTablesInRange(
1253 ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
1254 TablePropertiesCollection* props) = 0;
1255
1256 virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
1257 const Slice* /*begin*/,
1258 const Slice* /*end*/) {
1259 return Status::NotSupported("SuggestCompactRange() is not implemented.");
1260 }
1261
1262 virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/,
1263 int /*target_level*/) {
1264 return Status::NotSupported("PromoteL0() is not implemented.");
1265 }
1266
1267 // Trace DB operations. Use EndTrace() to stop tracing.
1268 virtual Status StartTrace(const TraceOptions& /*options*/,
1269 std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1270 return Status::NotSupported("StartTrace() is not implemented.");
1271 }
1272
1273 virtual Status EndTrace() {
1274 return Status::NotSupported("EndTrace() is not implemented.");
1275 }
1276 #endif // ROCKSDB_LITE
1277
1278 // Needed for StackableDB
1279 virtual DB* GetRootDB() { return this; }
1280
1281 // Given a time window, return an iterator for accessing stats history
1282 // User is responsible for deleting StatsHistoryIterator after use
1283 virtual Status GetStatsHistory(
1284 uint64_t /*start_time*/, uint64_t /*end_time*/,
1285 std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) {
1286 return Status::NotSupported("GetStatsHistory() is not implemented.");
1287 }
1288
1289 #ifndef ROCKSDB_LITE
1290 // Make the secondary instance catch up with the primary by tailing and
1291 // replaying the MANIFEST and WAL of the primary.
1292 // Column families created by the primary after the secondary instance starts
1293 // will be ignored unless the secondary instance closes and restarts with the
1294 // newly created column families.
1295 // Column families that exist before secondary instance starts and dropped by
1296 // the primary afterwards will be marked as dropped. However, as long as the
1297 // secondary instance does not delete the corresponding column family
1298 // handles, the data of the column family is still accessible to the
1299 // secondary.
1300 // TODO: we will support WAL tailing soon.
1301 virtual Status TryCatchUpWithPrimary() {
1302 return Status::NotSupported("Supported only by secondary instance");
1303 }
1304 #endif // !ROCKSDB_LITE
1305
1306 private:
1307 // No copying allowed
1308 DB(const DB&);
1309 void operator=(const DB&);
1310 };
1311
1312 // Destroy the contents of the specified database.
1313 // Be very careful using this method.
1314 Status DestroyDB(const std::string& name, const Options& options,
1315 const std::vector<ColumnFamilyDescriptor>& column_families =
1316 std::vector<ColumnFamilyDescriptor>());
1317
1318 #ifndef ROCKSDB_LITE
1319 // If a DB cannot be opened, you may attempt to call this method to
1320 // resurrect as much of the contents of the database as possible.
1321 // Some data may be lost, so be careful when calling this function
1322 // on a database that contains important information.
1323 //
1324 // With this API, we will warn and skip data associated with column families not
1325 // specified in column_families.
1326 //
1327 // @param column_families Descriptors for known column families
1328 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1329 const std::vector<ColumnFamilyDescriptor>& column_families);
1330
1331 // @param unknown_cf_opts Options for column families encountered during the
1332 // repair that were not specified in column_families.
1333 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1334 const std::vector<ColumnFamilyDescriptor>& column_families,
1335 const ColumnFamilyOptions& unknown_cf_opts);
1336
1337 // @param options These options will be used for the database and for ALL column
1338 // families encountered during the repair
1339 Status RepairDB(const std::string& dbname, const Options& options);
1340
1341 #endif
1342
1343 } // namespace rocksdb