]>
Commit | Line | Data |
---|---|---|
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. | |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
6 | // Use of this source code is governed by a BSD-style license that can be | |
7 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
8 | ||
9 | #pragma once | |
10 | ||
11 | #include <stdint.h> | |
12 | #include <stdio.h> | |
13 | #include <map> | |
14 | #include <memory> | |
15 | #include <string> | |
16 | #include <unordered_map> | |
17 | #include <vector> | |
18 | #include "rocksdb/iterator.h" | |
19 | #include "rocksdb/listener.h" | |
20 | #include "rocksdb/metadata.h" | |
21 | #include "rocksdb/options.h" | |
22 | #include "rocksdb/snapshot.h" | |
23 | #include "rocksdb/sst_file_writer.h" | |
24 | #include "rocksdb/thread_status.h" | |
25 | #include "rocksdb/transaction_log.h" | |
26 | #include "rocksdb/types.h" | |
27 | #include "rocksdb/version.h" | |
28 | ||
29 | #ifdef _WIN32 | |
30 | // Windows API macro interference | |
31 | #undef DeleteFile | |
32 | #endif | |
33 | ||
34 | #if defined(__GNUC__) || defined(__clang__) | |
35 | #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__)) | |
36 | #elif _WIN32 | |
37 | #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated) | |
38 | #endif | |
39 | ||
40 | namespace ROCKSDB_NAMESPACE { | |
41 | ||
42 | struct Options; | |
43 | struct DBOptions; | |
44 | struct ColumnFamilyOptions; | |
45 | struct ReadOptions; | |
46 | struct WriteOptions; | |
47 | struct FlushOptions; | |
48 | struct CompactionOptions; | |
49 | struct CompactRangeOptions; | |
50 | struct TableProperties; | |
51 | struct ExternalSstFileInfo; | |
52 | class WriteBatch; | |
53 | class Env; | |
54 | class EventListener; | |
55 | class StatsHistoryIterator; | |
56 | class TraceWriter; | |
57 | #ifdef ROCKSDB_LITE | |
58 | class CompactionJobInfo; | |
59 | #endif | |
60 | class FileSystem; | |
61 | ||
62 | extern const std::string kDefaultColumnFamilyName; | |
63 | extern const std::string kPersistentStatsColumnFamilyName; | |
64 | struct ColumnFamilyDescriptor { | |
65 | std::string name; | |
66 | ColumnFamilyOptions options; | |
67 | ColumnFamilyDescriptor() | |
68 | : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {} | |
69 | ColumnFamilyDescriptor(const std::string& _name, | |
70 | const ColumnFamilyOptions& _options) | |
71 | : name(_name), options(_options) {} | |
72 | }; | |
73 | ||
74 | class ColumnFamilyHandle { | |
75 | public: | |
76 | virtual ~ColumnFamilyHandle() {} | |
77 | // Returns the name of the column family associated with the current handle. | |
78 | virtual const std::string& GetName() const = 0; | |
79 | // Returns the ID of the column family associated with the current handle. | |
80 | virtual uint32_t GetID() const = 0; | |
81 | // Fills "*desc" with the up-to-date descriptor of the column family | |
82 | // associated with this handle. Since it fills "*desc" with the up-to-date | |
83 | // information, this call might internally lock and release DB mutex to | |
84 | // access the up-to-date CF options. In addition, all the pointer-typed | |
85 | // options cannot be referenced any longer than the original options exist. | |
86 | // | |
87 | // Note that this function is not supported in RocksDBLite. | |
88 | virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0; | |
89 | // Returns the comparator of the column family associated with the | |
90 | // current handle. | |
91 | virtual const Comparator* GetComparator() const = 0; | |
92 | }; | |
93 | ||
94 | static const int kMajorVersion = __ROCKSDB_MAJOR__; | |
95 | static const int kMinorVersion = __ROCKSDB_MINOR__; | |
96 | ||
97 | // A range of keys | |
98 | struct Range { | |
99 | Slice start; | |
100 | Slice limit; | |
101 | ||
102 | Range() {} | |
103 | Range(const Slice& s, const Slice& l) : start(s), limit(l) {} | |
104 | }; | |
105 | ||
106 | struct RangePtr { | |
107 | const Slice* start; | |
108 | const Slice* limit; | |
109 | ||
110 | RangePtr() : start(nullptr), limit(nullptr) {} | |
111 | RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {} | |
112 | }; | |
113 | ||
114 | // It is valid that files_checksums and files_checksum_func_names are both | |
115 | // empty (no checksum informaiton is provided for ingestion). Otherwise, | |
116 | // their sizes should be the same as external_files. The file order should | |
117 | // be the same in three vectors and guaranteed by the caller. | |
118 | struct IngestExternalFileArg { | |
119 | ColumnFamilyHandle* column_family = nullptr; | |
120 | std::vector<std::string> external_files; | |
121 | IngestExternalFileOptions options; | |
122 | std::vector<std::string> files_checksums; | |
123 | std::vector<std::string> files_checksum_func_names; | |
124 | }; | |
125 | ||
126 | struct GetMergeOperandsOptions { | |
127 | int expected_max_number_of_operands = 0; | |
128 | }; | |
129 | ||
130 | // A collections of table properties objects, where | |
131 | // key: is the table's file name. | |
132 | // value: the table properties object of the given table. | |
133 | typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>> | |
134 | TablePropertiesCollection; | |
135 | ||
136 | // A DB is a persistent, versioned ordered map from keys to values. | |
137 | // A DB is safe for concurrent access from multiple threads without | |
138 | // any external synchronization. | |
139 | // DB is an abstract base class with one primary implementation (DBImpl) | |
140 | // and a number of wrapper implementations. | |
141 | class DB { | |
142 | public: | |
143 | // Open the database with the specified "name". | |
144 | // Stores a pointer to a heap-allocated database in *dbptr and returns | |
145 | // OK on success. | |
146 | // Stores nullptr in *dbptr and returns a non-OK status on error. | |
147 | // Caller should delete *dbptr when it is no longer needed. | |
148 | static Status Open(const Options& options, const std::string& name, | |
149 | DB** dbptr); | |
150 | ||
151 | // Open the database for read only. All DB interfaces | |
152 | // that modify data, like put/delete, will return error. | |
153 | // If the db is opened in read only mode, then no compactions | |
154 | // will happen. | |
155 | // | |
156 | // Not supported in ROCKSDB_LITE, in which case the function will | |
157 | // return Status::NotSupported. | |
158 | static Status OpenForReadOnly(const Options& options, const std::string& name, | |
159 | DB** dbptr, | |
160 | bool error_if_wal_file_exists = false); | |
161 | ||
162 | // Open the database for read only with column families. When opening DB with | |
163 | // read only, you can specify only a subset of column families in the | |
164 | // database that should be opened. However, you always need to specify default | |
165 | // column family. The default column family name is 'default' and it's stored | |
166 | // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName | |
167 | // | |
168 | // Not supported in ROCKSDB_LITE, in which case the function will | |
169 | // return Status::NotSupported. | |
170 | static Status OpenForReadOnly( | |
171 | const DBOptions& db_options, const std::string& name, | |
172 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
173 | std::vector<ColumnFamilyHandle*>* handles, DB** dbptr, | |
174 | bool error_if_wal_file_exists = false); | |
175 | ||
176 | // The following OpenAsSecondary functions create a secondary instance that | |
177 | // can dynamically tail the MANIFEST of a primary that must have already been | |
178 | // created. User can call TryCatchUpWithPrimary to make the secondary | |
179 | // instance catch up with primary (WAL tailing is NOT supported now) whenever | |
180 | // the user feels necessary. Column families created by the primary after the | |
181 | // secondary instance starts are currently ignored by the secondary instance. | |
182 | // Column families opened by secondary and dropped by the primary will be | |
183 | // dropped by secondary as well. However the user of the secondary instance | |
184 | // can still access the data of such dropped column family as long as they | |
185 | // do not destroy the corresponding column family handle. | |
186 | // WAL tailing is not supported at present, but will arrive soon. | |
187 | // | |
188 | // The options argument specifies the options to open the secondary instance. | |
189 | // The name argument specifies the name of the primary db that you have used | |
190 | // to open the primary instance. | |
191 | // The secondary_path argument points to a directory where the secondary | |
192 | // instance stores its info log. | |
193 | // The dbptr is an out-arg corresponding to the opened secondary instance. | |
194 | // The pointer points to a heap-allocated database, and the user should | |
195 | // delete it after use. | |
196 | // Open DB as secondary instance with only the default column family. | |
197 | // Return OK on success, non-OK on failures. | |
198 | static Status OpenAsSecondary(const Options& options, const std::string& name, | |
199 | const std::string& secondary_path, DB** dbptr); | |
200 | ||
201 | // Open DB as secondary instance with column families. You can open a subset | |
202 | // of column families in secondary mode. | |
203 | // The db_options specify the database specific options. | |
204 | // The name argument specifies the name of the primary db that you have used | |
205 | // to open the primary instance. | |
206 | // The secondary_path argument points to a directory where the secondary | |
207 | // instance stores its info log. | |
208 | // The column_families argument specifieds a list of column families to open. | |
209 | // If any of the column families does not exist, the function returns non-OK | |
210 | // status. | |
211 | // The handles is an out-arg corresponding to the opened database column | |
212 | // familiy handles. | |
213 | // The dbptr is an out-arg corresponding to the opened secondary instance. | |
214 | // The pointer points to a heap-allocated database, and the caller should | |
215 | // delete it after use. Before deleting the dbptr, the user should also | |
216 | // delete the pointers stored in handles vector. | |
217 | // Return OK on success, on-OK on failures. | |
218 | static Status OpenAsSecondary( | |
219 | const DBOptions& db_options, const std::string& name, | |
220 | const std::string& secondary_path, | |
221 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
222 | std::vector<ColumnFamilyHandle*>* handles, DB** dbptr); | |
223 | ||
224 | // Open DB with column families. | |
225 | // db_options specify database specific options | |
226 | // column_families is the vector of all column families in the database, | |
227 | // containing column family name and options. You need to open ALL column | |
228 | // families in the database. To get the list of column families, you can use | |
229 | // ListColumnFamilies(). Also, you can open only a subset of column families | |
230 | // for read-only access. | |
231 | // The default column family name is 'default' and it's stored | |
232 | // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName. | |
233 | // If everything is OK, handles will on return be the same size | |
234 | // as column_families --- handles[i] will be a handle that you | |
235 | // will use to operate on column family column_family[i]. | |
236 | // Before delete DB, you have to close All column families by calling | |
237 | // DestroyColumnFamilyHandle() with all the handles. | |
238 | static Status Open(const DBOptions& db_options, const std::string& name, | |
239 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
240 | std::vector<ColumnFamilyHandle*>* handles, DB** dbptr); | |
241 | ||
242 | virtual Status Resume() { return Status::NotSupported(); } | |
243 | ||
244 | // Close the DB by releasing resources, closing files etc. This should be | |
245 | // called before calling the destructor so that the caller can get back a | |
246 | // status in case there are any errors. This will not fsync the WAL files. | |
247 | // If syncing is required, the caller must first call SyncWAL(), or Write() | |
248 | // using an empty write batch with WriteOptions.sync=true. | |
249 | // Regardless of the return status, the DB must be freed. | |
250 | // If the return status is Aborted(), closing fails because there is | |
251 | // unreleased snapshot in the system. In this case, users can release | |
252 | // the unreleased snapshots and try again and expect it to succeed. For | |
253 | // other status, recalling Close() will be no-op. | |
254 | // If the return status is NotSupported(), then the DB implementation does | |
255 | // cleanup in the destructor | |
256 | virtual Status Close() { return Status::NotSupported(); } | |
257 | ||
258 | // ListColumnFamilies will open the DB specified by argument name | |
259 | // and return the list of all column families in that DB | |
260 | // through column_families argument. The ordering of | |
261 | // column families in column_families is unspecified. | |
262 | static Status ListColumnFamilies(const DBOptions& db_options, | |
263 | const std::string& name, | |
264 | std::vector<std::string>* column_families); | |
265 | ||
266 | // Abstract class ctor | |
267 | DB() {} | |
268 | // No copying allowed | |
269 | DB(const DB&) = delete; | |
270 | void operator=(const DB&) = delete; | |
271 | ||
272 | virtual ~DB(); | |
273 | ||
274 | // Create a column_family and return the handle of column family | |
275 | // through the argument handle. | |
276 | virtual Status CreateColumnFamily(const ColumnFamilyOptions& options, | |
277 | const std::string& column_family_name, | |
278 | ColumnFamilyHandle** handle); | |
279 | ||
280 | // Bulk create column families with the same column family options. | |
281 | // Return the handles of the column families through the argument handles. | |
282 | // In case of error, the request may succeed partially, and handles will | |
283 | // contain column family handles that it managed to create, and have size | |
284 | // equal to the number of created column families. | |
285 | virtual Status CreateColumnFamilies( | |
286 | const ColumnFamilyOptions& options, | |
287 | const std::vector<std::string>& column_family_names, | |
288 | std::vector<ColumnFamilyHandle*>* handles); | |
289 | ||
290 | // Bulk create column families. | |
291 | // Return the handles of the column families through the argument handles. | |
292 | // In case of error, the request may succeed partially, and handles will | |
293 | // contain column family handles that it managed to create, and have size | |
294 | // equal to the number of created column families. | |
295 | virtual Status CreateColumnFamilies( | |
296 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
297 | std::vector<ColumnFamilyHandle*>* handles); | |
298 | ||
299 | // Drop a column family specified by column_family handle. This call | |
300 | // only records a drop record in the manifest and prevents the column | |
301 | // family from flushing and compacting. | |
302 | virtual Status DropColumnFamily(ColumnFamilyHandle* column_family); | |
303 | ||
304 | // Bulk drop column families. This call only records drop records in the | |
305 | // manifest and prevents the column families from flushing and compacting. | |
306 | // In case of error, the request may succeed partially. User may call | |
307 | // ListColumnFamilies to check the result. | |
308 | virtual Status DropColumnFamilies( | |
309 | const std::vector<ColumnFamilyHandle*>& column_families); | |
310 | ||
311 | // Close a column family specified by column_family handle and destroy | |
312 | // the column family handle specified to avoid double deletion. This call | |
313 | // deletes the column family handle by default. Use this method to | |
314 | // close column family instead of deleting column family handle directly | |
315 | virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family); | |
316 | ||
317 | // Set the database entry for "key" to "value". | |
318 | // If "key" already exists, it will be overwritten. | |
319 | // Returns OK on success, and a non-OK status on error. | |
320 | // Note: consider setting options.sync = true. | |
321 | virtual Status Put(const WriteOptions& options, | |
322 | ColumnFamilyHandle* column_family, const Slice& key, | |
323 | const Slice& value) = 0; | |
324 | virtual Status Put(const WriteOptions& options, const Slice& key, | |
325 | const Slice& value) { | |
326 | return Put(options, DefaultColumnFamily(), key, value); | |
327 | } | |
328 | ||
329 | // Remove the database entry (if any) for "key". Returns OK on | |
330 | // success, and a non-OK status on error. It is not an error if "key" | |
331 | // did not exist in the database. | |
332 | // Note: consider setting options.sync = true. | |
333 | virtual Status Delete(const WriteOptions& options, | |
334 | ColumnFamilyHandle* column_family, | |
335 | const Slice& key) = 0; | |
336 | virtual Status Delete(const WriteOptions& options, const Slice& key) { | |
337 | return Delete(options, DefaultColumnFamily(), key); | |
338 | } | |
339 | ||
340 | // Remove the database entry for "key". Requires that the key exists | |
341 | // and was not overwritten. Returns OK on success, and a non-OK status | |
342 | // on error. It is not an error if "key" did not exist in the database. | |
343 | // | |
344 | // If a key is overwritten (by calling Put() multiple times), then the result | |
345 | // of calling SingleDelete() on this key is undefined. SingleDelete() only | |
346 | // behaves correctly if there has been only one Put() for this key since the | |
347 | // previous call to SingleDelete() for this key. | |
348 | // | |
349 | // This feature is currently an experimental performance optimization | |
350 | // for a very specific workload. It is up to the caller to ensure that | |
351 | // SingleDelete is only used for a key that is not deleted using Delete() or | |
352 | // written using Merge(). Mixing SingleDelete operations with Deletes and | |
353 | // Merges can result in undefined behavior. | |
354 | // | |
355 | // Note: consider setting options.sync = true. | |
356 | virtual Status SingleDelete(const WriteOptions& options, | |
357 | ColumnFamilyHandle* column_family, | |
358 | const Slice& key) = 0; | |
359 | virtual Status SingleDelete(const WriteOptions& options, const Slice& key) { | |
360 | return SingleDelete(options, DefaultColumnFamily(), key); | |
361 | } | |
362 | ||
363 | // Removes the database entries in the range ["begin_key", "end_key"), i.e., | |
364 | // including "begin_key" and excluding "end_key". Returns OK on success, and | |
365 | // a non-OK status on error. It is not an error if the database does not | |
366 | // contain any existing data in the range ["begin_key", "end_key"). | |
367 | // | |
368 | // If "end_key" comes before "start_key" according to the user's comparator, | |
369 | // a `Status::InvalidArgument` is returned. | |
370 | // | |
371 | // This feature is now usable in production, with the following caveats: | |
372 | // 1) Accumulating many range tombstones in the memtable will degrade read | |
373 | // performance; this can be avoided by manually flushing occasionally. | |
374 | // 2) Limiting the maximum number of open files in the presence of range | |
375 | // tombstones can degrade read performance. To avoid this problem, set | |
376 | // max_open_files to -1 whenever possible. | |
377 | virtual Status DeleteRange(const WriteOptions& options, | |
378 | ColumnFamilyHandle* column_family, | |
379 | const Slice& begin_key, const Slice& end_key); | |
380 | ||
381 | // Merge the database entry for "key" with "value". Returns OK on success, | |
382 | // and a non-OK status on error. The semantics of this operation is | |
383 | // determined by the user provided merge_operator when opening DB. | |
384 | // Note: consider setting options.sync = true. | |
385 | virtual Status Merge(const WriteOptions& options, | |
386 | ColumnFamilyHandle* column_family, const Slice& key, | |
387 | const Slice& value) = 0; | |
388 | virtual Status Merge(const WriteOptions& options, const Slice& key, | |
389 | const Slice& value) { | |
390 | return Merge(options, DefaultColumnFamily(), key, value); | |
391 | } | |
392 | ||
393 | // Apply the specified updates to the database. | |
394 | // If `updates` contains no update, WAL will still be synced if | |
395 | // options.sync=true. | |
396 | // Returns OK on success, non-OK on failure. | |
397 | // Note: consider setting options.sync = true. | |
398 | virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0; | |
399 | ||
400 | // If the database contains an entry for "key" store the | |
401 | // corresponding value in *value and return OK. | |
402 | // | |
403 | // If timestamp is enabled and a non-null timestamp pointer is passed in, | |
404 | // timestamp is returned. | |
405 | // | |
406 | // If there is no entry for "key" leave *value unchanged and return | |
407 | // a status for which Status::IsNotFound() returns true. | |
408 | // | |
409 | // May return some other Status on an error. | |
410 | virtual inline Status Get(const ReadOptions& options, | |
411 | ColumnFamilyHandle* column_family, const Slice& key, | |
412 | std::string* value) { | |
413 | assert(value != nullptr); | |
414 | PinnableSlice pinnable_val(value); | |
415 | assert(!pinnable_val.IsPinned()); | |
416 | auto s = Get(options, column_family, key, &pinnable_val); | |
417 | if (s.ok() && pinnable_val.IsPinned()) { | |
418 | value->assign(pinnable_val.data(), pinnable_val.size()); | |
419 | } // else value is already assigned | |
420 | return s; | |
421 | } | |
422 | virtual Status Get(const ReadOptions& options, | |
423 | ColumnFamilyHandle* column_family, const Slice& key, | |
424 | PinnableSlice* value) = 0; | |
425 | virtual Status Get(const ReadOptions& options, const Slice& key, | |
426 | std::string* value) { | |
427 | return Get(options, DefaultColumnFamily(), key, value); | |
428 | } | |
429 | ||
430 | // Get() methods that return timestamp. Derived DB classes don't need to worry | |
431 | // about this group of methods if they don't care about timestamp feature. | |
432 | virtual inline Status Get(const ReadOptions& options, | |
433 | ColumnFamilyHandle* column_family, const Slice& key, | |
434 | std::string* value, std::string* timestamp) { | |
435 | assert(value != nullptr); | |
436 | PinnableSlice pinnable_val(value); | |
437 | assert(!pinnable_val.IsPinned()); | |
438 | auto s = Get(options, column_family, key, &pinnable_val, timestamp); | |
439 | if (s.ok() && pinnable_val.IsPinned()) { | |
440 | value->assign(pinnable_val.data(), pinnable_val.size()); | |
441 | } // else value is already assigned | |
442 | return s; | |
443 | } | |
444 | virtual Status Get(const ReadOptions& /*options*/, | |
445 | ColumnFamilyHandle* /*column_family*/, | |
446 | const Slice& /*key*/, PinnableSlice* /*value*/, | |
447 | std::string* /*timestamp*/) { | |
448 | return Status::NotSupported( | |
449 | "Get() that returns timestamp is not implemented."); | |
450 | } | |
451 | virtual Status Get(const ReadOptions& options, const Slice& key, | |
452 | std::string* value, std::string* timestamp) { | |
453 | return Get(options, DefaultColumnFamily(), key, value, timestamp); | |
454 | } | |
455 | ||
456 | // Returns all the merge operands corresponding to the key. If the | |
457 | // number of merge operands in DB is greater than | |
458 | // merge_operands_options.expected_max_number_of_operands | |
459 | // no merge operands are returned and status is Incomplete. Merge operands | |
460 | // returned are in the order of insertion. | |
461 | // merge_operands- Points to an array of at-least | |
462 | // merge_operands_options.expected_max_number_of_operands and the | |
463 | // caller is responsible for allocating it. If the status | |
464 | // returned is Incomplete then number_of_operands will contain | |
465 | // the total number of merge operands found in DB for key. | |
466 | virtual Status GetMergeOperands( | |
467 | const ReadOptions& options, ColumnFamilyHandle* column_family, | |
468 | const Slice& key, PinnableSlice* merge_operands, | |
469 | GetMergeOperandsOptions* get_merge_operands_options, | |
470 | int* number_of_operands) = 0; | |
471 | ||
472 | // Consistent Get of many keys across column families without the need | |
473 | // for an explicit snapshot. NOTE: the implementation of this MultiGet API | |
474 | // does not have the performance benefits of the void-returning MultiGet | |
475 | // functions. | |
476 | // | |
477 | // If keys[i] does not exist in the database, then the i'th returned | |
478 | // status will be one for which Status::IsNotFound() is true, and | |
479 | // (*values)[i] will be set to some arbitrary value (often ""). Otherwise, | |
480 | // the i'th returned status will have Status::ok() true, and (*values)[i] | |
481 | // will store the value associated with keys[i]. | |
482 | // | |
483 | // (*values) will always be resized to be the same size as (keys). | |
484 | // Similarly, the number of returned statuses will be the number of keys. | |
485 | // Note: keys will not be "de-duplicated". Duplicate keys will return | |
486 | // duplicate values in order. | |
487 | virtual std::vector<Status> MultiGet( | |
488 | const ReadOptions& options, | |
489 | const std::vector<ColumnFamilyHandle*>& column_family, | |
490 | const std::vector<Slice>& keys, std::vector<std::string>* values) = 0; | |
491 | virtual std::vector<Status> MultiGet(const ReadOptions& options, | |
492 | const std::vector<Slice>& keys, | |
493 | std::vector<std::string>* values) { | |
494 | return MultiGet( | |
495 | options, | |
496 | std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()), | |
497 | keys, values); | |
498 | } | |
499 | ||
500 | virtual std::vector<Status> MultiGet( | |
501 | const ReadOptions& /*options*/, | |
502 | const std::vector<ColumnFamilyHandle*>& /*column_family*/, | |
503 | const std::vector<Slice>& keys, std::vector<std::string>* /*values*/, | |
504 | std::vector<std::string>* /*timestamps*/) { | |
505 | return std::vector<Status>( | |
506 | keys.size(), Status::NotSupported( | |
507 | "MultiGet() returning timestamps not implemented.")); | |
508 | } | |
509 | virtual std::vector<Status> MultiGet(const ReadOptions& options, | |
510 | const std::vector<Slice>& keys, | |
511 | std::vector<std::string>* values, | |
512 | std::vector<std::string>* timestamps) { | |
513 | return MultiGet( | |
514 | options, | |
515 | std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()), | |
516 | keys, values, timestamps); | |
517 | } | |
518 | ||
519 | // Overloaded MultiGet API that improves performance by batching operations | |
520 | // in the read path for greater efficiency. Currently, only the block based | |
521 | // table format with full filters are supported. Other table formats such | |
522 | // as plain table, block based table with block based filters and | |
523 | // partitioned indexes will still work, but will not get any performance | |
524 | // benefits. | |
525 | // Parameters - | |
526 | // options - ReadOptions | |
527 | // column_family - ColumnFamilyHandle* that the keys belong to. All the keys | |
528 | // passed to the API are restricted to a single column family | |
529 | // num_keys - Number of keys to lookup | |
530 | // keys - Pointer to C style array of key Slices with num_keys elements | |
531 | // values - Pointer to C style array of PinnableSlices with num_keys elements | |
532 | // statuses - Pointer to C style array of Status with num_keys elements | |
533 | // sorted_input - If true, it means the input keys are already sorted by key | |
534 | // order, so the MultiGet() API doesn't have to sort them | |
535 | // again. If false, the keys will be copied and sorted | |
536 | // internally by the API - the input array will not be | |
537 | // modified | |
538 | virtual void MultiGet(const ReadOptions& options, | |
539 | ColumnFamilyHandle* column_family, | |
540 | const size_t num_keys, const Slice* keys, | |
541 | PinnableSlice* values, Status* statuses, | |
542 | const bool /*sorted_input*/ = false) { | |
543 | std::vector<ColumnFamilyHandle*> cf; | |
544 | std::vector<Slice> user_keys; | |
545 | std::vector<Status> status; | |
546 | std::vector<std::string> vals; | |
547 | ||
548 | for (size_t i = 0; i < num_keys; ++i) { | |
549 | cf.emplace_back(column_family); | |
550 | user_keys.emplace_back(keys[i]); | |
551 | } | |
552 | status = MultiGet(options, cf, user_keys, &vals); | |
553 | std::copy(status.begin(), status.end(), statuses); | |
554 | for (auto& value : vals) { | |
555 | values->PinSelf(value); | |
556 | values++; | |
557 | } | |
558 | } | |
559 | ||
560 | virtual void MultiGet(const ReadOptions& options, | |
561 | ColumnFamilyHandle* column_family, | |
562 | const size_t num_keys, const Slice* keys, | |
563 | PinnableSlice* values, std::string* timestamps, | |
564 | Status* statuses, const bool /*sorted_input*/ = false) { | |
565 | std::vector<ColumnFamilyHandle*> cf; | |
566 | std::vector<Slice> user_keys; | |
567 | std::vector<Status> status; | |
568 | std::vector<std::string> vals; | |
569 | std::vector<std::string> tss; | |
570 | ||
571 | for (size_t i = 0; i < num_keys; ++i) { | |
572 | cf.emplace_back(column_family); | |
573 | user_keys.emplace_back(keys[i]); | |
574 | } | |
575 | status = MultiGet(options, cf, user_keys, &vals, &tss); | |
576 | std::copy(status.begin(), status.end(), statuses); | |
577 | std::copy(tss.begin(), tss.end(), timestamps); | |
578 | for (auto& value : vals) { | |
579 | values->PinSelf(value); | |
580 | values++; | |
581 | } | |
582 | } | |
583 | ||
584 | // Overloaded MultiGet API that improves performance by batching operations | |
585 | // in the read path for greater efficiency. Currently, only the block based | |
586 | // table format with full filters are supported. Other table formats such | |
587 | // as plain table, block based table with block based filters and | |
588 | // partitioned indexes will still work, but will not get any performance | |
589 | // benefits. | |
590 | // Parameters - | |
591 | // options - ReadOptions | |
592 | // column_family - ColumnFamilyHandle* that the keys belong to. All the keys | |
593 | // passed to the API are restricted to a single column family | |
594 | // num_keys - Number of keys to lookup | |
595 | // keys - Pointer to C style array of key Slices with num_keys elements | |
596 | // values - Pointer to C style array of PinnableSlices with num_keys elements | |
597 | // statuses - Pointer to C style array of Status with num_keys elements | |
598 | // sorted_input - If true, it means the input keys are already sorted by key | |
599 | // order, so the MultiGet() API doesn't have to sort them | |
600 | // again. If false, the keys will be copied and sorted | |
601 | // internally by the API - the input array will not be | |
602 | // modified | |
603 | virtual void MultiGet(const ReadOptions& options, const size_t num_keys, | |
604 | ColumnFamilyHandle** column_families, const Slice* keys, | |
605 | PinnableSlice* values, Status* statuses, | |
606 | const bool /*sorted_input*/ = false) { | |
607 | std::vector<ColumnFamilyHandle*> cf; | |
608 | std::vector<Slice> user_keys; | |
609 | std::vector<Status> status; | |
610 | std::vector<std::string> vals; | |
611 | ||
612 | for (size_t i = 0; i < num_keys; ++i) { | |
613 | cf.emplace_back(column_families[i]); | |
614 | user_keys.emplace_back(keys[i]); | |
615 | } | |
616 | status = MultiGet(options, cf, user_keys, &vals); | |
617 | std::copy(status.begin(), status.end(), statuses); | |
618 | for (auto& value : vals) { | |
619 | values->PinSelf(value); | |
620 | values++; | |
621 | } | |
622 | } | |
623 | virtual void MultiGet(const ReadOptions& options, const size_t num_keys, | |
624 | ColumnFamilyHandle** column_families, const Slice* keys, | |
625 | PinnableSlice* values, std::string* timestamps, | |
626 | Status* statuses, const bool /*sorted_input*/ = false) { | |
627 | std::vector<ColumnFamilyHandle*> cf; | |
628 | std::vector<Slice> user_keys; | |
629 | std::vector<Status> status; | |
630 | std::vector<std::string> vals; | |
631 | std::vector<std::string> tss; | |
632 | ||
633 | for (size_t i = 0; i < num_keys; ++i) { | |
634 | cf.emplace_back(column_families[i]); | |
635 | user_keys.emplace_back(keys[i]); | |
636 | } | |
637 | status = MultiGet(options, cf, user_keys, &vals, &tss); | |
638 | std::copy(status.begin(), status.end(), statuses); | |
639 | std::copy(tss.begin(), tss.end(), timestamps); | |
640 | for (auto& value : vals) { | |
641 | values->PinSelf(value); | |
642 | values++; | |
643 | } | |
644 | } | |
645 | ||
646 | // If the key definitely does not exist in the database, then this method | |
647 | // returns false, else true. If the caller wants to obtain value when the key | |
648 | // is found in memory, a bool for 'value_found' must be passed. 'value_found' | |
649 | // will be true on return if value has been set properly. | |
650 | // This check is potentially lighter-weight than invoking DB::Get(). One way | |
651 | // to make this lighter weight is to avoid doing any IOs. | |
652 | // Default implementation here returns true and sets 'value_found' to false | |
653 | virtual bool KeyMayExist(const ReadOptions& /*options*/, | |
654 | ColumnFamilyHandle* /*column_family*/, | |
655 | const Slice& /*key*/, std::string* /*value*/, | |
656 | std::string* /*timestamp*/, | |
657 | bool* value_found = nullptr) { | |
658 | if (value_found != nullptr) { | |
659 | *value_found = false; | |
660 | } | |
661 | return true; | |
662 | } | |
663 | ||
664 | virtual bool KeyMayExist(const ReadOptions& options, | |
665 | ColumnFamilyHandle* column_family, const Slice& key, | |
666 | std::string* value, bool* value_found = nullptr) { | |
667 | return KeyMayExist(options, column_family, key, value, | |
668 | /*timestamp=*/nullptr, value_found); | |
669 | } | |
670 | ||
671 | virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, | |
672 | std::string* value, bool* value_found = nullptr) { | |
673 | return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found); | |
674 | } | |
675 | ||
676 | virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, | |
677 | std::string* value, std::string* timestamp, | |
678 | bool* value_found = nullptr) { | |
679 | return KeyMayExist(options, DefaultColumnFamily(), key, value, timestamp, | |
680 | value_found); | |
681 | } | |
682 | ||
683 | // Return a heap-allocated iterator over the contents of the database. | |
684 | // The result of NewIterator() is initially invalid (caller must | |
685 | // call one of the Seek methods on the iterator before using it). | |
686 | // | |
687 | // Caller should delete the iterator when it is no longer needed. | |
688 | // The returned iterator should be deleted before this db is deleted. | |
689 | virtual Iterator* NewIterator(const ReadOptions& options, | |
690 | ColumnFamilyHandle* column_family) = 0; | |
691 | virtual Iterator* NewIterator(const ReadOptions& options) { | |
692 | return NewIterator(options, DefaultColumnFamily()); | |
693 | } | |
694 | // Returns iterators from a consistent database state across multiple | |
695 | // column families. Iterators are heap allocated and need to be deleted | |
696 | // before the db is deleted | |
697 | virtual Status NewIterators( | |
698 | const ReadOptions& options, | |
699 | const std::vector<ColumnFamilyHandle*>& column_families, | |
700 | std::vector<Iterator*>* iterators) = 0; | |
701 | ||
702 | // Return a handle to the current DB state. Iterators created with | |
703 | // this handle will all observe a stable snapshot of the current DB | |
704 | // state. The caller must call ReleaseSnapshot(result) when the | |
705 | // snapshot is no longer needed. | |
706 | // | |
707 | // nullptr will be returned if the DB fails to take a snapshot or does | |
708 | // not support snapshot. | |
709 | virtual const Snapshot* GetSnapshot() = 0; | |
710 | ||
711 | // Release a previously acquired snapshot. The caller must not | |
712 | // use "snapshot" after this call. | |
713 | virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; | |
714 | ||
715 | #ifndef ROCKSDB_LITE | |
716 | // Contains all valid property arguments for GetProperty(). | |
717 | // | |
718 | // NOTE: Property names cannot end in numbers since those are interpreted as | |
719 | // arguments, e.g., see kNumFilesAtLevelPrefix. | |
720 | struct Properties { | |
721 | // "rocksdb.num-files-at-level<N>" - returns string containing the number | |
722 | // of files at level <N>, where <N> is an ASCII representation of a | |
723 | // level number (e.g., "0"). | |
724 | static const std::string kNumFilesAtLevelPrefix; | |
725 | ||
726 | // "rocksdb.compression-ratio-at-level<N>" - returns string containing the | |
727 | // compression ratio of data at level <N>, where <N> is an ASCII | |
728 | // representation of a level number (e.g., "0"). Here, compression | |
729 | // ratio is defined as uncompressed data size / compressed file size. | |
730 | // Returns "-1.0" if no open files at level <N>. | |
731 | static const std::string kCompressionRatioAtLevelPrefix; | |
732 | ||
733 | // "rocksdb.stats" - returns a multi-line string containing the data | |
734 | // described by kCFStats followed by the data described by kDBStats. | |
735 | static const std::string kStats; | |
736 | ||
737 | // "rocksdb.sstables" - returns a multi-line string summarizing current | |
738 | // SST files. | |
739 | static const std::string kSSTables; | |
740 | ||
741 | // "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and | |
742 | // "rocksdb.cf-file-histogram" together. See below for description | |
743 | // of the two. | |
744 | static const std::string kCFStats; | |
745 | ||
746 | // "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with | |
747 | // general columm family stats per-level over db's lifetime ("L<n>"), | |
748 | // aggregated over db's lifetime ("Sum"), and aggregated over the | |
749 | // interval since the last retrieval ("Int"). | |
750 | // It could also be used to return the stats in the format of the map. | |
751 | // In this case there will a pair of string to array of double for | |
752 | // each level as well as for "Sum". "Int" stats will not be affected | |
753 | // when this form of stats are retrieved. | |
754 | static const std::string kCFStatsNoFileHistogram; | |
755 | ||
756 | // "rocksdb.cf-file-histogram" - print out how many file reads to every | |
757 | // level, as well as the histogram of latency of single requests. | |
758 | static const std::string kCFFileHistogram; | |
759 | ||
760 | // "rocksdb.dbstats" - returns a multi-line string with general database | |
761 | // stats, both cumulative (over the db's lifetime) and interval (since | |
762 | // the last retrieval of kDBStats). | |
763 | static const std::string kDBStats; | |
764 | ||
765 | // "rocksdb.levelstats" - returns multi-line string containing the number | |
766 | // of files per level and total size of each level (MB). | |
767 | static const std::string kLevelStats; | |
768 | ||
769 | // "rocksdb.num-immutable-mem-table" - returns number of immutable | |
770 | // memtables that have not yet been flushed. | |
771 | static const std::string kNumImmutableMemTable; | |
772 | ||
773 | // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable | |
774 | // memtables that have already been flushed. | |
775 | static const std::string kNumImmutableMemTableFlushed; | |
776 | ||
777 | // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is | |
778 | // pending; otherwise, returns 0. | |
779 | static const std::string kMemTableFlushPending; | |
780 | ||
781 | // "rocksdb.num-running-flushes" - returns the number of currently running | |
782 | // flushes. | |
783 | static const std::string kNumRunningFlushes; | |
784 | ||
785 | // "rocksdb.compaction-pending" - returns 1 if at least one compaction is | |
786 | // pending; otherwise, returns 0. | |
787 | static const std::string kCompactionPending; | |
788 | ||
789 | // "rocksdb.num-running-compactions" - returns the number of currently | |
790 | // running compactions. | |
791 | static const std::string kNumRunningCompactions; | |
792 | ||
793 | // "rocksdb.background-errors" - returns accumulated number of background | |
794 | // errors. | |
795 | static const std::string kBackgroundErrors; | |
796 | ||
797 | // "rocksdb.cur-size-active-mem-table" - returns approximate size of active | |
798 | // memtable (bytes). | |
799 | static const std::string kCurSizeActiveMemTable; | |
800 | ||
801 | // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active | |
802 | // and unflushed immutable memtables (bytes). | |
803 | static const std::string kCurSizeAllMemTables; | |
804 | ||
805 | // "rocksdb.size-all-mem-tables" - returns approximate size of active, | |
806 | // unflushed immutable, and pinned immutable memtables (bytes). | |
807 | static const std::string kSizeAllMemTables; | |
808 | ||
809 | // "rocksdb.num-entries-active-mem-table" - returns total number of entries | |
810 | // in the active memtable. | |
811 | static const std::string kNumEntriesActiveMemTable; | |
812 | ||
813 | // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries | |
814 | // in the unflushed immutable memtables. | |
815 | static const std::string kNumEntriesImmMemTables; | |
816 | ||
817 | // "rocksdb.num-deletes-active-mem-table" - returns total number of delete | |
818 | // entries in the active memtable. | |
819 | static const std::string kNumDeletesActiveMemTable; | |
820 | ||
821 | // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete | |
822 | // entries in the unflushed immutable memtables. | |
823 | static const std::string kNumDeletesImmMemTables; | |
824 | ||
825 | // "rocksdb.estimate-num-keys" - returns estimated number of total keys in | |
826 | // the active and unflushed immutable memtables and storage. | |
827 | static const std::string kEstimateNumKeys; | |
828 | ||
829 | // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for | |
830 | // reading SST tables, excluding memory used in block cache (e.g., | |
831 | // filter and index blocks). | |
832 | static const std::string kEstimateTableReadersMem; | |
833 | ||
834 | // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete | |
835 | // files is enabled; otherwise, returns a non-zero number. | |
836 | static const std::string kIsFileDeletionsEnabled; | |
837 | ||
838 | // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the | |
839 | // database. | |
840 | static const std::string kNumSnapshots; | |
841 | ||
842 | // "rocksdb.oldest-snapshot-time" - returns number representing unix | |
843 | // timestamp of oldest unreleased snapshot. | |
844 | static const std::string kOldestSnapshotTime; | |
845 | ||
846 | // "rocksdb.oldest-snapshot-sequence" - returns number representing | |
847 | // sequence number of oldest unreleased snapshot. | |
848 | static const std::string kOldestSnapshotSequence; | |
849 | ||
850 | // "rocksdb.num-live-versions" - returns number of live versions. `Version` | |
851 | // is an internal data structure. See version_set.h for details. More | |
852 | // live versions often mean more SST files are held from being deleted, | |
853 | // by iterators or unfinished compactions. | |
854 | static const std::string kNumLiveVersions; | |
855 | ||
856 | // "rocksdb.current-super-version-number" - returns number of current LSM | |
857 | // version. It is a uint64_t integer number, incremented after there is | |
858 | // any change to the LSM tree. The number is not preserved after restarting | |
859 | // the DB. After DB restart, it will start from 0 again. | |
860 | static const std::string kCurrentSuperVersionNumber; | |
861 | ||
862 | // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of | |
863 | // live data in bytes. | |
864 | static const std::string kEstimateLiveDataSize; | |
865 | ||
866 | // "rocksdb.min-log-number-to-keep" - return the minimum log number of the | |
867 | // log files that should be kept. | |
868 | static const std::string kMinLogNumberToKeep; | |
869 | ||
870 | // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file | |
871 | // number for an obsolete SST to be kept. The max value of `uint64_t` | |
872 | // will be returned if all obsolete files can be deleted. | |
873 | static const std::string kMinObsoleteSstNumberToKeep; | |
874 | ||
875 | // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST | |
876 | // files. | |
877 | // WARNING: may slow down online queries if there are too many files. | |
878 | static const std::string kTotalSstFilesSize; | |
879 | ||
880 | // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST | |
881 | // files belong to the latest LSM tree. | |
882 | static const std::string kLiveSstFilesSize; | |
883 | ||
884 | // "rocksdb.base-level" - returns number of level to which L0 data will be | |
885 | // compacted. | |
886 | static const std::string kBaseLevel; | |
887 | ||
888 | // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total | |
889 | // number of bytes compaction needs to rewrite to get all levels down | |
890 | // to under target size. Not valid for other compactions than level- | |
891 | // based. | |
892 | static const std::string kEstimatePendingCompactionBytes; | |
893 | ||
894 | // "rocksdb.aggregated-table-properties" - returns a string representation | |
895 | // of the aggregated table properties of the target column family. | |
896 | static const std::string kAggregatedTableProperties; | |
897 | ||
898 | // "rocksdb.aggregated-table-properties-at-level<N>", same as the previous | |
899 | // one but only returns the aggregated table properties of the | |
900 | // specified level "N" at the target column family. | |
901 | static const std::string kAggregatedTablePropertiesAtLevel; | |
902 | ||
903 | // "rocksdb.actual-delayed-write-rate" - returns the current actual delayed | |
904 | // write rate. 0 means no delay. | |
905 | static const std::string kActualDelayedWriteRate; | |
906 | ||
907 | // "rocksdb.is-write-stopped" - Return 1 if write has been stopped. | |
908 | static const std::string kIsWriteStopped; | |
909 | ||
910 | // "rocksdb.estimate-oldest-key-time" - returns an estimation of | |
911 | // oldest key timestamp in the DB. Currently only available for | |
912 | // FIFO compaction with | |
913 | // compaction_options_fifo.allow_compaction = false. | |
914 | static const std::string kEstimateOldestKeyTime; | |
915 | ||
916 | // "rocksdb.block-cache-capacity" - returns block cache capacity. | |
917 | static const std::string kBlockCacheCapacity; | |
918 | ||
919 | // "rocksdb.block-cache-usage" - returns the memory size for the entries | |
920 | // residing in block cache. | |
921 | static const std::string kBlockCacheUsage; | |
922 | ||
923 | // "rocksdb.block-cache-pinned-usage" - returns the memory size for the | |
924 | // entries being pinned. | |
925 | static const std::string kBlockCachePinnedUsage; | |
926 | ||
927 | // "rocksdb.options-statistics" - returns multi-line string | |
928 | // of options.statistics | |
929 | static const std::string kOptionsStatistics; | |
930 | }; | |
931 | #endif /* ROCKSDB_LITE */ | |
932 | ||
933 | // DB implementations can export properties about their state via this method. | |
934 | // If "property" is a valid property understood by this DB implementation (see | |
935 | // Properties struct above for valid options), fills "*value" with its current | |
936 | // value and returns true. Otherwise, returns false. | |
937 | virtual bool GetProperty(ColumnFamilyHandle* column_family, | |
938 | const Slice& property, std::string* value) = 0; | |
939 | virtual bool GetProperty(const Slice& property, std::string* value) { | |
940 | return GetProperty(DefaultColumnFamily(), property, value); | |
941 | } | |
942 | virtual bool GetMapProperty(ColumnFamilyHandle* column_family, | |
943 | const Slice& property, | |
944 | std::map<std::string, std::string>* value) = 0; | |
945 | virtual bool GetMapProperty(const Slice& property, | |
946 | std::map<std::string, std::string>* value) { | |
947 | return GetMapProperty(DefaultColumnFamily(), property, value); | |
948 | } | |
949 | ||
950 | // Similar to GetProperty(), but only works for a subset of properties whose | |
951 | // return value is an integer. Return the value by integer. Supported | |
952 | // properties: | |
953 | // "rocksdb.num-immutable-mem-table" | |
954 | // "rocksdb.mem-table-flush-pending" | |
955 | // "rocksdb.compaction-pending" | |
956 | // "rocksdb.background-errors" | |
957 | // "rocksdb.cur-size-active-mem-table" | |
958 | // "rocksdb.cur-size-all-mem-tables" | |
959 | // "rocksdb.size-all-mem-tables" | |
960 | // "rocksdb.num-entries-active-mem-table" | |
961 | // "rocksdb.num-entries-imm-mem-tables" | |
962 | // "rocksdb.num-deletes-active-mem-table" | |
963 | // "rocksdb.num-deletes-imm-mem-tables" | |
964 | // "rocksdb.estimate-num-keys" | |
965 | // "rocksdb.estimate-table-readers-mem" | |
966 | // "rocksdb.is-file-deletions-enabled" | |
967 | // "rocksdb.num-snapshots" | |
968 | // "rocksdb.oldest-snapshot-time" | |
969 | // "rocksdb.num-live-versions" | |
970 | // "rocksdb.current-super-version-number" | |
971 | // "rocksdb.estimate-live-data-size" | |
972 | // "rocksdb.min-log-number-to-keep" | |
973 | // "rocksdb.min-obsolete-sst-number-to-keep" | |
974 | // "rocksdb.total-sst-files-size" | |
975 | // "rocksdb.live-sst-files-size" | |
976 | // "rocksdb.base-level" | |
977 | // "rocksdb.estimate-pending-compaction-bytes" | |
978 | // "rocksdb.num-running-compactions" | |
979 | // "rocksdb.num-running-flushes" | |
980 | // "rocksdb.actual-delayed-write-rate" | |
981 | // "rocksdb.is-write-stopped" | |
982 | // "rocksdb.estimate-oldest-key-time" | |
983 | // "rocksdb.block-cache-capacity" | |
984 | // "rocksdb.block-cache-usage" | |
985 | // "rocksdb.block-cache-pinned-usage" | |
986 | virtual bool GetIntProperty(ColumnFamilyHandle* column_family, | |
987 | const Slice& property, uint64_t* value) = 0; | |
988 | virtual bool GetIntProperty(const Slice& property, uint64_t* value) { | |
989 | return GetIntProperty(DefaultColumnFamily(), property, value); | |
990 | } | |
991 | ||
992 | // Reset internal stats for DB and all column families. | |
993 | // Note this doesn't reset options.statistics as it is not owned by | |
994 | // DB. | |
995 | virtual Status ResetStats() { | |
996 | return Status::NotSupported("Not implemented"); | |
997 | } | |
998 | ||
999 | // Same as GetIntProperty(), but this one returns the aggregated int | |
1000 | // property from all column families. | |
1001 | virtual bool GetAggregatedIntProperty(const Slice& property, | |
1002 | uint64_t* value) = 0; | |
1003 | ||
1004 | // Flags for DB::GetSizeApproximation that specify whether memtable | |
1005 | // stats should be included, or file stats approximation or both | |
1006 | enum SizeApproximationFlags : uint8_t { | |
1007 | NONE = 0, | |
1008 | INCLUDE_MEMTABLES = 1 << 0, | |
1009 | INCLUDE_FILES = 1 << 1 | |
1010 | }; | |
1011 | ||
1012 | // For each i in [0,n-1], store in "sizes[i]", the approximate | |
1013 | // file system space used by keys in "[range[i].start .. range[i].limit)" | |
1014 | // in a single column family. | |
1015 | // | |
1016 | // Note that the returned sizes measure file system space usage, so | |
1017 | // if the user data compresses by a factor of ten, the returned | |
1018 | // sizes will be one-tenth the size of the corresponding user data size. | |
1019 | virtual Status GetApproximateSizes(const SizeApproximationOptions& options, | |
1020 | ColumnFamilyHandle* column_family, | |
1021 | const Range* ranges, int n, | |
1022 | uint64_t* sizes) = 0; | |
1023 | ||
1024 | // Simpler versions of the GetApproximateSizes() method above. | |
1025 | // The include_flags argumenbt must of type DB::SizeApproximationFlags | |
1026 | // and can not be NONE. | |
1027 | virtual void GetApproximateSizes(ColumnFamilyHandle* column_family, | |
1028 | const Range* ranges, int n, uint64_t* sizes, | |
1029 | uint8_t include_flags = INCLUDE_FILES) { | |
1030 | SizeApproximationOptions options; | |
1031 | options.include_memtabtles = | |
1032 | (include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0; | |
1033 | options.include_files = | |
1034 | (include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0; | |
1035 | GetApproximateSizes(options, column_family, ranges, n, sizes); | |
1036 | } | |
1037 | virtual void GetApproximateSizes(const Range* ranges, int n, uint64_t* sizes, | |
1038 | uint8_t include_flags = INCLUDE_FILES) { | |
1039 | GetApproximateSizes(DefaultColumnFamily(), ranges, n, sizes, include_flags); | |
1040 | } | |
1041 | ||
1042 | // The method is similar to GetApproximateSizes, except it | |
1043 | // returns approximate number of records in memtables. | |
1044 | virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family, | |
1045 | const Range& range, | |
1046 | uint64_t* const count, | |
1047 | uint64_t* const size) = 0; | |
1048 | virtual void GetApproximateMemTableStats(const Range& range, | |
1049 | uint64_t* const count, | |
1050 | uint64_t* const size) { | |
1051 | GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size); | |
1052 | } | |
1053 | ||
1054 | // Deprecated versions of GetApproximateSizes | |
1055 | ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( | |
1056 | const Range* range, int n, uint64_t* sizes, bool include_memtable) { | |
1057 | uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; | |
1058 | if (include_memtable) { | |
1059 | include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; | |
1060 | } | |
1061 | GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags); | |
1062 | } | |
1063 | ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( | |
1064 | ColumnFamilyHandle* column_family, const Range* range, int n, | |
1065 | uint64_t* sizes, bool include_memtable) { | |
1066 | uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; | |
1067 | if (include_memtable) { | |
1068 | include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; | |
1069 | } | |
1070 | GetApproximateSizes(column_family, range, n, sizes, include_flags); | |
1071 | } | |
1072 | ||
1073 | // Compact the underlying storage for the key range [*begin,*end]. | |
1074 | // The actual compaction interval might be superset of [*begin, *end]. | |
1075 | // In particular, deleted and overwritten versions are discarded, | |
1076 | // and the data is rearranged to reduce the cost of operations | |
1077 | // needed to access the data. This operation should typically only | |
1078 | // be invoked by users who understand the underlying implementation. | |
1079 | // | |
1080 | // begin==nullptr is treated as a key before all keys in the database. | |
1081 | // end==nullptr is treated as a key after all keys in the database. | |
1082 | // Therefore the following call will compact the entire database: | |
1083 | // db->CompactRange(options, nullptr, nullptr); | |
1084 | // Note that after the entire database is compacted, all data are pushed | |
1085 | // down to the last level containing any data. If the total data size after | |
1086 | // compaction is reduced, that level might not be appropriate for hosting all | |
1087 | // the files. In this case, client could set options.change_level to true, to | |
1088 | // move the files back to the minimum level capable of holding the data set | |
1089 | // or a given level (specified by non-negative options.target_level). | |
1090 | virtual Status CompactRange(const CompactRangeOptions& options, | |
1091 | ColumnFamilyHandle* column_family, | |
1092 | const Slice* begin, const Slice* end) = 0; | |
1093 | virtual Status CompactRange(const CompactRangeOptions& options, | |
1094 | const Slice* begin, const Slice* end) { | |
1095 | return CompactRange(options, DefaultColumnFamily(), begin, end); | |
1096 | } | |
1097 | ||
1098 | ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( | |
1099 | ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end, | |
1100 | bool change_level = false, int target_level = -1, | |
1101 | uint32_t target_path_id = 0) { | |
1102 | CompactRangeOptions options; | |
1103 | options.change_level = change_level; | |
1104 | options.target_level = target_level; | |
1105 | options.target_path_id = target_path_id; | |
1106 | return CompactRange(options, column_family, begin, end); | |
1107 | } | |
1108 | ||
1109 | ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( | |
1110 | const Slice* begin, const Slice* end, bool change_level = false, | |
1111 | int target_level = -1, uint32_t target_path_id = 0) { | |
1112 | CompactRangeOptions options; | |
1113 | options.change_level = change_level; | |
1114 | options.target_level = target_level; | |
1115 | options.target_path_id = target_path_id; | |
1116 | return CompactRange(options, DefaultColumnFamily(), begin, end); | |
1117 | } | |
1118 | ||
1119 | virtual Status SetOptions( | |
1120 | ColumnFamilyHandle* /*column_family*/, | |
1121 | const std::unordered_map<std::string, std::string>& /*new_options*/) { | |
1122 | return Status::NotSupported("Not implemented"); | |
1123 | } | |
1124 | virtual Status SetOptions( | |
1125 | const std::unordered_map<std::string, std::string>& new_options) { | |
1126 | return SetOptions(DefaultColumnFamily(), new_options); | |
1127 | } | |
1128 | ||
1129 | virtual Status SetDBOptions( | |
1130 | const std::unordered_map<std::string, std::string>& new_options) = 0; | |
1131 | ||
1132 | // CompactFiles() inputs a list of files specified by file numbers and | |
1133 | // compacts them to the specified level. Note that the behavior is different | |
1134 | // from CompactRange() in that CompactFiles() performs the compaction job | |
1135 | // using the CURRENT thread. | |
1136 | // | |
1137 | // @see GetDataBaseMetaData | |
1138 | // @see GetColumnFamilyMetaData | |
1139 | virtual Status CompactFiles( | |
1140 | const CompactionOptions& compact_options, | |
1141 | ColumnFamilyHandle* column_family, | |
1142 | const std::vector<std::string>& input_file_names, const int output_level, | |
1143 | const int output_path_id = -1, | |
1144 | std::vector<std::string>* const output_file_names = nullptr, | |
1145 | CompactionJobInfo* compaction_job_info = nullptr) = 0; | |
1146 | ||
1147 | virtual Status CompactFiles( | |
1148 | const CompactionOptions& compact_options, | |
1149 | const std::vector<std::string>& input_file_names, const int output_level, | |
1150 | const int output_path_id = -1, | |
1151 | std::vector<std::string>* const output_file_names = nullptr, | |
1152 | CompactionJobInfo* compaction_job_info = nullptr) { | |
1153 | return CompactFiles(compact_options, DefaultColumnFamily(), | |
1154 | input_file_names, output_level, output_path_id, | |
1155 | output_file_names, compaction_job_info); | |
1156 | } | |
1157 | ||
1158 | // This function will wait until all currently running background processes | |
1159 | // finish. After it returns, no background process will be run until | |
1160 | // ContinueBackgroundWork is called, once for each preceding OK-returning | |
1161 | // call to PauseBackgroundWork. | |
1162 | virtual Status PauseBackgroundWork() = 0; | |
1163 | virtual Status ContinueBackgroundWork() = 0; | |
1164 | ||
1165 | // This function will enable automatic compactions for the given column | |
1166 | // families if they were previously disabled. The function will first set the | |
1167 | // disable_auto_compactions option for each column family to 'false', after | |
1168 | // which it will schedule a flush/compaction. | |
1169 | // | |
1170 | // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API | |
1171 | // does NOT schedule a flush/compaction afterwards, and only changes the | |
1172 | // parameter itself within the column family option. | |
1173 | // | |
1174 | virtual Status EnableAutoCompaction( | |
1175 | const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0; | |
1176 | ||
1177 | virtual void DisableManualCompaction() = 0; | |
1178 | virtual void EnableManualCompaction() = 0; | |
1179 | ||
1180 | // Number of levels used for this DB. | |
1181 | virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0; | |
1182 | virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); } | |
1183 | ||
1184 | // Maximum level to which a new compacted memtable is pushed if it | |
1185 | // does not create overlap. | |
1186 | virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0; | |
1187 | virtual int MaxMemCompactionLevel() { | |
1188 | return MaxMemCompactionLevel(DefaultColumnFamily()); | |
1189 | } | |
1190 | ||
1191 | // Number of files in level-0 that would stop writes. | |
1192 | virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0; | |
1193 | virtual int Level0StopWriteTrigger() { | |
1194 | return Level0StopWriteTrigger(DefaultColumnFamily()); | |
1195 | } | |
1196 | ||
1197 | // Get DB name -- the exact same name that was provided as an argument to | |
1198 | // DB::Open() | |
1199 | virtual const std::string& GetName() const = 0; | |
1200 | ||
1201 | // Get Env object from the DB | |
1202 | virtual Env* GetEnv() const = 0; | |
1203 | ||
1204 | virtual FileSystem* GetFileSystem() const; | |
1205 | ||
1206 | // Get DB Options that we use. During the process of opening the | |
1207 | // column family, the options provided when calling DB::Open() or | |
1208 | // DB::CreateColumnFamily() will have been "sanitized" and transformed | |
1209 | // in an implementation-defined manner. | |
1210 | virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0; | |
1211 | virtual Options GetOptions() const { | |
1212 | return GetOptions(DefaultColumnFamily()); | |
1213 | } | |
1214 | ||
1215 | virtual DBOptions GetDBOptions() const = 0; | |
1216 | ||
1217 | // Flush all mem-table data. | |
1218 | // Flush a single column family, even when atomic flush is enabled. To flush | |
1219 | // multiple column families, use Flush(options, column_families). | |
1220 | virtual Status Flush(const FlushOptions& options, | |
1221 | ColumnFamilyHandle* column_family) = 0; | |
1222 | virtual Status Flush(const FlushOptions& options) { | |
1223 | return Flush(options, DefaultColumnFamily()); | |
1224 | } | |
1225 | // Flushes multiple column families. | |
1226 | // If atomic flush is not enabled, Flush(options, column_families) is | |
1227 | // equivalent to calling Flush(options, column_family) multiple times. | |
1228 | // If atomic flush is enabled, Flush(options, column_families) will flush all | |
1229 | // column families specified in 'column_families' up to the latest sequence | |
1230 | // number at the time when flush is requested. | |
1231 | // Note that RocksDB 5.15 and earlier may not be able to open later versions | |
1232 | // with atomic flush enabled. | |
1233 | virtual Status Flush( | |
1234 | const FlushOptions& options, | |
1235 | const std::vector<ColumnFamilyHandle*>& column_families) = 0; | |
1236 | ||
1237 | // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL | |
1238 | // afterwards. | |
1239 | virtual Status FlushWAL(bool /*sync*/) { | |
1240 | return Status::NotSupported("FlushWAL not implemented"); | |
1241 | } | |
1242 | // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the | |
1243 | // same as Write() with sync=true: in the latter case the changes won't be | |
1244 | // visible until the sync is done. | |
1245 | // Currently only works if allow_mmap_writes = false in Options. | |
1246 | virtual Status SyncWAL() = 0; | |
1247 | ||
1248 | // Lock the WAL. Also flushes the WAL after locking. | |
1249 | virtual Status LockWAL() { | |
1250 | return Status::NotSupported("LockWAL not implemented"); | |
1251 | } | |
1252 | ||
1253 | // Unlock the WAL. | |
1254 | virtual Status UnlockWAL() { | |
1255 | return Status::NotSupported("UnlockWAL not implemented"); | |
1256 | } | |
1257 | ||
1258 | // The sequence number of the most recent transaction. | |
1259 | virtual SequenceNumber GetLatestSequenceNumber() const = 0; | |
1260 | ||
1261 | // Instructs DB to preserve deletes with sequence numbers >= passed seqnum. | |
1262 | // Has no effect if DBOptions.preserve_deletes is set to false. | |
1263 | // This function assumes that user calls this function with monotonically | |
1264 | // increasing seqnums (otherwise we can't guarantee that a particular delete | |
1265 | // hasn't been already processed); returns true if the value was successfully | |
1266 | // updated, false if user attempted to call if with seqnum <= current value. | |
1267 | virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0; | |
1268 | ||
1269 | // Prevent file deletions. Compactions will continue to occur, | |
1270 | // but no obsolete files will be deleted. Calling this multiple | |
1271 | // times have the same effect as calling it once. | |
1272 | virtual Status DisableFileDeletions() = 0; | |
1273 | ||
1274 | // Allow compactions to delete obsolete files. | |
1275 | // If force == true, the call to EnableFileDeletions() will guarantee that | |
1276 | // file deletions are enabled after the call, even if DisableFileDeletions() | |
1277 | // was called multiple times before. | |
1278 | // If force == false, EnableFileDeletions will only enable file deletion | |
1279 | // after it's been called at least as many times as DisableFileDeletions(), | |
1280 | // enabling the two methods to be called by two threads concurrently without | |
1281 | // synchronization -- i.e., file deletions will be enabled only after both | |
1282 | // threads call EnableFileDeletions() | |
1283 | virtual Status EnableFileDeletions(bool force = true) = 0; | |
1284 | ||
1285 | #ifndef ROCKSDB_LITE | |
1286 | // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup | |
1287 | ||
1288 | // Retrieve the list of all files in the database. The files are | |
1289 | // relative to the dbname and are not absolute paths. Despite being relative | |
1290 | // paths, the file names begin with "/". The valid size of the manifest file | |
1291 | // is returned in manifest_file_size. The manifest file is an ever growing | |
1292 | // file, but only the portion specified by manifest_file_size is valid for | |
1293 | // this snapshot. Setting flush_memtable to true does Flush before recording | |
1294 | // the live files. Setting flush_memtable to false is useful when we don't | |
1295 | // want to wait for flush which may have to wait for compaction to complete | |
1296 | // taking an indeterminate time. | |
1297 | // | |
1298 | // In case you have multiple column families, even if flush_memtable is true, | |
1299 | // you still need to call GetSortedWalFiles after GetLiveFiles to compensate | |
1300 | // for new data that arrived to already-flushed column families while other | |
1301 | // column families were flushing | |
1302 | virtual Status GetLiveFiles(std::vector<std::string>&, | |
1303 | uint64_t* manifest_file_size, | |
1304 | bool flush_memtable = true) = 0; | |
1305 | ||
1306 | // Retrieve the sorted list of all wal files with earliest file first | |
1307 | virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0; | |
1308 | ||
1309 | // Retrieve information about the current wal file | |
1310 | // | |
1311 | // Note that the log might have rolled after this call in which case | |
1312 | // the current_log_file would not point to the current log file. | |
1313 | // | |
1314 | // Additionally, for the sake of optimization current_log_file->StartSequence | |
1315 | // would always be set to 0 | |
1316 | virtual Status GetCurrentWalFile( | |
1317 | std::unique_ptr<LogFile>* current_log_file) = 0; | |
1318 | ||
1319 | // Retrieves the creation time of the oldest file in the DB. | |
1320 | // This API only works if max_open_files = -1, if it is not then | |
1321 | // Status returned is Status::NotSupported() | |
1322 | // The file creation time is set using the env provided to the DB. | |
1323 | // If the DB was created from a very old release then its possible that | |
1324 | // the SST files might not have file_creation_time property and even after | |
1325 | // moving to a newer release its possible that some files never got compacted | |
1326 | // and may not have file_creation_time property. In both the cases | |
1327 | // file_creation_time is considered 0 which means this API will return | |
1328 | // creation_time = 0 as there wouldn't be a timestamp lower than 0. | |
1329 | virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0; | |
1330 | ||
1331 | // Note: this API is not yet consistent with WritePrepared transactions. | |
1332 | // Sets iter to an iterator that is positioned at a write-batch containing | |
1333 | // seq_number. If the sequence number is non existent, it returns an iterator | |
1334 | // at the first available seq_no after the requested seq_no | |
1335 | // Returns Status::OK if iterator is valid | |
1336 | // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to | |
1337 | // use this api, else the WAL files will get | |
1338 | // cleared aggressively and the iterator might keep getting invalid before | |
1339 | // an update is read. | |
1340 | virtual Status GetUpdatesSince( | |
1341 | SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter, | |
1342 | const TransactionLogIterator::ReadOptions& read_options = | |
1343 | TransactionLogIterator::ReadOptions()) = 0; | |
1344 | ||
1345 | // Windows API macro interference | |
1346 | #undef DeleteFile | |
1347 | // WARNING: This API is planned for removal in RocksDB 7.0 since it does not | |
1348 | // operate at the proper level of abstraction for a key-value store, and its | |
1349 | // contract/restrictions are poorly documented. For example, it returns non-OK | |
1350 | // `Status` for non-bottommost files and files undergoing compaction. Since we | |
1351 | // do not plan to maintain it, the contract will likely remain underspecified | |
1352 | // until its removal. Any user is encouraged to read the implementation | |
1353 | // carefully and migrate away from it when possible. | |
1354 | // | |
1355 | // Delete the file name from the db directory and update the internal state to | |
1356 | // reflect that. Supports deletion of sst and log files only. 'name' must be | |
1357 | // path relative to the db directory. eg. 000001.sst, /archive/000003.log | |
1358 | virtual Status DeleteFile(std::string name) = 0; | |
1359 | ||
1360 | // Returns a list of all table files with their level, start key | |
1361 | // and end key | |
1362 | virtual void GetLiveFilesMetaData( | |
1363 | std::vector<LiveFileMetaData>* /*metadata*/) {} | |
1364 | ||
1365 | // Return a list of all table file checksum info. | |
1366 | // Note: This function might be of limited use because it cannot be | |
1367 | // synchronized with GetLiveFiles. | |
1368 | virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0; | |
1369 | ||
1370 | // Obtains the meta data of the specified column family of the DB. | |
1371 | virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, | |
1372 | ColumnFamilyMetaData* /*metadata*/) {} | |
1373 | ||
1374 | // Get the metadata of the default column family. | |
1375 | void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) { | |
1376 | GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); | |
1377 | } | |
1378 | ||
1379 | // IngestExternalFile() will load a list of external SST files (1) into the DB | |
1380 | // Two primary modes are supported: | |
1381 | // - Duplicate keys in the new files will overwrite exiting keys (default) | |
1382 | // - Duplicate keys will be skipped (set ingest_behind=true) | |
1383 | // In the first mode we will try to find the lowest possible level that | |
1384 | // the file can fit in, and ingest the file into this level (2). A file that | |
1385 | // have a key range that overlap with the memtable key range will require us | |
1386 | // to Flush the memtable first before ingesting the file. | |
1387 | // In the second mode we will always ingest in the bottom most level (see | |
1388 | // docs to IngestExternalFileOptions::ingest_behind). | |
1389 | // | |
1390 | // (1) External SST files can be created using SstFileWriter | |
1391 | // (2) We will try to ingest the files to the lowest possible level | |
1392 | // even if the file compression doesn't match the level compression | |
1393 | // (3) If IngestExternalFileOptions->ingest_behind is set to true, | |
1394 | // we always ingest at the bottommost level, which should be reserved | |
1395 | // for this purpose (see DBOPtions::allow_ingest_behind flag). | |
1396 | virtual Status IngestExternalFile( | |
1397 | ColumnFamilyHandle* column_family, | |
1398 | const std::vector<std::string>& external_files, | |
1399 | const IngestExternalFileOptions& options) = 0; | |
1400 | ||
1401 | virtual Status IngestExternalFile( | |
1402 | const std::vector<std::string>& external_files, | |
1403 | const IngestExternalFileOptions& options) { | |
1404 | return IngestExternalFile(DefaultColumnFamily(), external_files, options); | |
1405 | } | |
1406 | ||
1407 | // IngestExternalFiles() will ingest files for multiple column families, and | |
1408 | // record the result atomically to the MANIFEST. | |
1409 | // If this function returns OK, all column families' ingestion must succeed. | |
1410 | // If this function returns NOK, or the process crashes, then non-of the | |
1411 | // files will be ingested into the database after recovery. | |
1412 | // Note that it is possible for application to observe a mixed state during | |
1413 | // the execution of this function. If the user performs range scan over the | |
1414 | // column families with iterators, iterator on one column family may return | |
1415 | // ingested data, while iterator on other column family returns old data. | |
1416 | // Users can use snapshot for a consistent view of data. | |
1417 | // If your db ingests multiple SST files using this API, i.e. args.size() | |
1418 | // > 1, then RocksDB 5.15 and earlier will not be able to open it. | |
1419 | // | |
1420 | // REQUIRES: each arg corresponds to a different column family: namely, for | |
1421 | // 0 <= i < j < len(args), args[i].column_family != args[j].column_family. | |
1422 | virtual Status IngestExternalFiles( | |
1423 | const std::vector<IngestExternalFileArg>& args) = 0; | |
1424 | ||
1425 | // CreateColumnFamilyWithImport() will create a new column family with | |
1426 | // column_family_name and import external SST files specified in metadata into | |
1427 | // this column family. | |
1428 | // (1) External SST files can be created using SstFileWriter. | |
1429 | // (2) External SST files can be exported from a particular column family in | |
1430 | // an existing DB. | |
1431 | // Option in import_options specifies whether the external files are copied or | |
1432 | // moved (default is copy). When option specifies copy, managing files at | |
1433 | // external_file_path is caller's responsibility. When option specifies a | |
1434 | // move, the call ensures that the specified files at external_file_path are | |
1435 | // deleted on successful return and files are not modified on any error | |
1436 | // return. | |
1437 | // On error return, column family handle returned will be nullptr. | |
1438 | // ColumnFamily will be present on successful return and will not be present | |
1439 | // on error return. ColumnFamily may be present on any crash during this call. | |
1440 | virtual Status CreateColumnFamilyWithImport( | |
1441 | const ColumnFamilyOptions& options, const std::string& column_family_name, | |
1442 | const ImportColumnFamilyOptions& import_options, | |
1443 | const ExportImportFilesMetaData& metadata, | |
1444 | ColumnFamilyHandle** handle) = 0; | |
1445 | ||
1446 | // Verify the checksums of files in db. Currently the whole-file checksum of | |
1447 | // table files are checked. | |
1448 | virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) { | |
1449 | return Status::NotSupported("File verification not supported"); | |
1450 | } | |
1451 | ||
1452 | // Verify the block checksums of files in db. The block checksums of table | |
1453 | // files are checked. | |
1454 | virtual Status VerifyChecksum(const ReadOptions& read_options) = 0; | |
1455 | ||
1456 | virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } | |
1457 | ||
1458 | // AddFile() is deprecated, please use IngestExternalFile() | |
1459 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1460 | ColumnFamilyHandle* column_family, | |
1461 | const std::vector<std::string>& file_path_list, bool move_file = false, | |
1462 | bool skip_snapshot_check = false) { | |
1463 | IngestExternalFileOptions ifo; | |
1464 | ifo.move_files = move_file; | |
1465 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1466 | ifo.allow_global_seqno = false; | |
1467 | ifo.allow_blocking_flush = false; | |
1468 | return IngestExternalFile(column_family, file_path_list, ifo); | |
1469 | } | |
1470 | ||
1471 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1472 | const std::vector<std::string>& file_path_list, bool move_file = false, | |
1473 | bool skip_snapshot_check = false) { | |
1474 | IngestExternalFileOptions ifo; | |
1475 | ifo.move_files = move_file; | |
1476 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1477 | ifo.allow_global_seqno = false; | |
1478 | ifo.allow_blocking_flush = false; | |
1479 | return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo); | |
1480 | } | |
1481 | ||
1482 | // AddFile() is deprecated, please use IngestExternalFile() | |
1483 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1484 | ColumnFamilyHandle* column_family, const std::string& file_path, | |
1485 | bool move_file = false, bool skip_snapshot_check = false) { | |
1486 | IngestExternalFileOptions ifo; | |
1487 | ifo.move_files = move_file; | |
1488 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1489 | ifo.allow_global_seqno = false; | |
1490 | ifo.allow_blocking_flush = false; | |
1491 | return IngestExternalFile(column_family, {file_path}, ifo); | |
1492 | } | |
1493 | ||
1494 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1495 | const std::string& file_path, bool move_file = false, | |
1496 | bool skip_snapshot_check = false) { | |
1497 | IngestExternalFileOptions ifo; | |
1498 | ifo.move_files = move_file; | |
1499 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1500 | ifo.allow_global_seqno = false; | |
1501 | ifo.allow_blocking_flush = false; | |
1502 | return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo); | |
1503 | } | |
1504 | ||
1505 | // Load table file with information "file_info" into "column_family" | |
1506 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1507 | ColumnFamilyHandle* column_family, | |
1508 | const std::vector<ExternalSstFileInfo>& file_info_list, | |
1509 | bool move_file = false, bool skip_snapshot_check = false) { | |
1510 | std::vector<std::string> external_files; | |
1511 | for (const ExternalSstFileInfo& file_info : file_info_list) { | |
1512 | external_files.push_back(file_info.file_path); | |
1513 | } | |
1514 | IngestExternalFileOptions ifo; | |
1515 | ifo.move_files = move_file; | |
1516 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1517 | ifo.allow_global_seqno = false; | |
1518 | ifo.allow_blocking_flush = false; | |
1519 | return IngestExternalFile(column_family, external_files, ifo); | |
1520 | } | |
1521 | ||
1522 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1523 | const std::vector<ExternalSstFileInfo>& file_info_list, | |
1524 | bool move_file = false, bool skip_snapshot_check = false) { | |
1525 | std::vector<std::string> external_files; | |
1526 | for (const ExternalSstFileInfo& file_info : file_info_list) { | |
1527 | external_files.push_back(file_info.file_path); | |
1528 | } | |
1529 | IngestExternalFileOptions ifo; | |
1530 | ifo.move_files = move_file; | |
1531 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1532 | ifo.allow_global_seqno = false; | |
1533 | ifo.allow_blocking_flush = false; | |
1534 | return IngestExternalFile(DefaultColumnFamily(), external_files, ifo); | |
1535 | } | |
1536 | ||
1537 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1538 | ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info, | |
1539 | bool move_file = false, bool skip_snapshot_check = false) { | |
1540 | IngestExternalFileOptions ifo; | |
1541 | ifo.move_files = move_file; | |
1542 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1543 | ifo.allow_global_seqno = false; | |
1544 | ifo.allow_blocking_flush = false; | |
1545 | return IngestExternalFile(column_family, {file_info->file_path}, ifo); | |
1546 | } | |
1547 | ||
1548 | ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( | |
1549 | const ExternalSstFileInfo* file_info, bool move_file = false, | |
1550 | bool skip_snapshot_check = false) { | |
1551 | IngestExternalFileOptions ifo; | |
1552 | ifo.move_files = move_file; | |
1553 | ifo.snapshot_consistency = !skip_snapshot_check; | |
1554 | ifo.allow_global_seqno = false; | |
1555 | ifo.allow_blocking_flush = false; | |
1556 | return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path}, | |
1557 | ifo); | |
1558 | } | |
1559 | ||
1560 | #endif // ROCKSDB_LITE | |
1561 | ||
1562 | // Returns the unique ID which is read from IDENTITY file during the opening | |
1563 | // of database by setting in the identity variable | |
1564 | // Returns Status::OK if identity could be set properly | |
1565 | virtual Status GetDbIdentity(std::string& identity) const = 0; | |
1566 | ||
1567 | // Return a unique identifier for each DB object that is opened | |
1568 | // This DB session ID should be unique among all open DB instances on all | |
1569 | // hosts, and should be unique among re-openings of the same or other DBs. | |
1570 | // (Two open DBs have the same identity from other function GetDbIdentity when | |
1571 | // one is physically copied from the other.) | |
1572 | virtual Status GetDbSessionId(std::string& session_id) const = 0; | |
1573 | ||
1574 | // Returns default column family handle | |
1575 | virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0; | |
1576 | ||
1577 | #ifndef ROCKSDB_LITE | |
1578 | virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, | |
1579 | TablePropertiesCollection* props) = 0; | |
1580 | virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { | |
1581 | return GetPropertiesOfAllTables(DefaultColumnFamily(), props); | |
1582 | } | |
1583 | virtual Status GetPropertiesOfTablesInRange( | |
1584 | ColumnFamilyHandle* column_family, const Range* range, std::size_t n, | |
1585 | TablePropertiesCollection* props) = 0; | |
1586 | ||
1587 | virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/, | |
1588 | const Slice* /*begin*/, | |
1589 | const Slice* /*end*/) { | |
1590 | return Status::NotSupported("SuggestCompactRange() is not implemented."); | |
1591 | } | |
1592 | ||
1593 | virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/, | |
1594 | int /*target_level*/) { | |
1595 | return Status::NotSupported("PromoteL0() is not implemented."); | |
1596 | } | |
1597 | ||
1598 | // Trace DB operations. Use EndTrace() to stop tracing. | |
1599 | virtual Status StartTrace(const TraceOptions& /*options*/, | |
1600 | std::unique_ptr<TraceWriter>&& /*trace_writer*/) { | |
1601 | return Status::NotSupported("StartTrace() is not implemented."); | |
1602 | } | |
1603 | ||
1604 | virtual Status EndTrace() { | |
1605 | return Status::NotSupported("EndTrace() is not implemented."); | |
1606 | } | |
1607 | ||
1608 | // IO Tracing operations. Use EndIOTrace() to stop tracing. | |
1609 | virtual Status StartIOTrace(Env* /*env*/, const TraceOptions& /*options*/, | |
1610 | std::unique_ptr<TraceWriter>&& /*trace_writer*/) { | |
1611 | return Status::NotSupported("StartIOTrace() is not implemented."); | |
1612 | } | |
1613 | ||
1614 | virtual Status EndIOTrace() { | |
1615 | return Status::NotSupported("EndIOTrace() is not implemented."); | |
1616 | } | |
1617 | ||
1618 | // Trace block cache accesses. Use EndBlockCacheTrace() to stop tracing. | |
1619 | virtual Status StartBlockCacheTrace( | |
1620 | const TraceOptions& /*options*/, | |
1621 | std::unique_ptr<TraceWriter>&& /*trace_writer*/) { | |
1622 | return Status::NotSupported("StartBlockCacheTrace() is not implemented."); | |
1623 | } | |
1624 | ||
1625 | virtual Status EndBlockCacheTrace() { | |
1626 | return Status::NotSupported("EndBlockCacheTrace() is not implemented."); | |
1627 | } | |
1628 | #endif // ROCKSDB_LITE | |
1629 | ||
1630 | // Needed for StackableDB | |
1631 | virtual DB* GetRootDB() { return this; } | |
1632 | ||
1633 | // Given a window [start_time, end_time), setup a StatsHistoryIterator | |
1634 | // to access stats history. Note the start_time and end_time are epoch | |
1635 | // time measured in seconds, and end_time is an exclusive bound. | |
1636 | virtual Status GetStatsHistory( | |
1637 | uint64_t /*start_time*/, uint64_t /*end_time*/, | |
1638 | std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) { | |
1639 | return Status::NotSupported("GetStatsHistory() is not implemented."); | |
1640 | } | |
1641 | ||
1642 | #ifndef ROCKSDB_LITE | |
1643 | // Make the secondary instance catch up with the primary by tailing and | |
1644 | // replaying the MANIFEST and WAL of the primary. | |
1645 | // Column families created by the primary after the secondary instance starts | |
1646 | // will be ignored unless the secondary instance closes and restarts with the | |
1647 | // newly created column families. | |
1648 | // Column families that exist before secondary instance starts and dropped by | |
1649 | // the primary afterwards will be marked as dropped. However, as long as the | |
1650 | // secondary instance does not delete the corresponding column family | |
1651 | // handles, the data of the column family is still accessible to the | |
1652 | // secondary. | |
1653 | // TODO: we will support WAL tailing soon. | |
1654 | virtual Status TryCatchUpWithPrimary() { | |
1655 | return Status::NotSupported("Supported only by secondary instance"); | |
1656 | } | |
1657 | #endif // !ROCKSDB_LITE | |
1658 | }; | |
1659 | ||
1660 | // Destroy the contents of the specified database. | |
1661 | // Be very careful using this method. | |
1662 | Status DestroyDB(const std::string& name, const Options& options, | |
1663 | const std::vector<ColumnFamilyDescriptor>& column_families = | |
1664 | std::vector<ColumnFamilyDescriptor>()); | |
1665 | ||
1666 | #ifndef ROCKSDB_LITE | |
1667 | // If a DB cannot be opened, you may attempt to call this method to | |
1668 | // resurrect as much of the contents of the database as possible. | |
1669 | // Some data may be lost, so be careful when calling this function | |
1670 | // on a database that contains important information. | |
1671 | // | |
1672 | // With this API, we will warn and skip data associated with column families not | |
1673 | // specified in column_families. | |
1674 | // | |
1675 | // @param column_families Descriptors for known column families | |
1676 | Status RepairDB(const std::string& dbname, const DBOptions& db_options, | |
1677 | const std::vector<ColumnFamilyDescriptor>& column_families); | |
1678 | ||
1679 | // @param unknown_cf_opts Options for column families encountered during the | |
1680 | // repair that were not specified in column_families. | |
1681 | Status RepairDB(const std::string& dbname, const DBOptions& db_options, | |
1682 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
1683 | const ColumnFamilyOptions& unknown_cf_opts); | |
1684 | ||
1685 | // @param options These options will be used for the database and for ALL column | |
1686 | // families encountered during the repair | |
1687 | Status RepairDB(const std::string& dbname, const Options& options); | |
1688 | ||
1689 | #endif | |
1690 | ||
1691 | } // namespace ROCKSDB_NAMESPACE |