1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 import java
.util
.Collection
;
11 public interface DBOptionsInterface
<T
extends DBOptionsInterface
> {
14 * Use this if your DB is very small (like under 1GB) and you don't want to
15 * spend lots of memory for memtables.
17 * @return the instance of the current object.
19 T
optimizeForSmallDb();
22 * Use the specified object to interact with the environment,
23 * e.g. to read/write files, schedule background work, etc.
24 * Default: {@link Env#getDefault()}
26 * @param env {@link Env} instance.
27 * @return the instance of the current Options.
29 T
setEnv(final Env env
);
32 * Returns the set RocksEnv instance.
34 * @return {@link RocksEnv} instance set in the options.
39 * <p>By default, RocksDB uses only one background thread for flush and
40 * compaction. Calling this function will set it up such that total of
41 * `total_threads` is used.</p>
43 * <p>You almost definitely want to call this function if your system is
44 * bottlenecked by RocksDB.</p>
46 * @param totalThreads The total number of threads to be used by RocksDB.
47 * A good value is the number of cores.
49 * @return the instance of the current Options
51 T
setIncreaseParallelism(int totalThreads
);
54 * If this value is set to true, then the database will be created
55 * if it is missing during {@code RocksDB.open()}.
58 * @param flag a flag indicating whether to create a database the
59 * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation
61 * @return the instance of the current Options
62 * @see RocksDB#open(org.rocksdb.Options, String)
64 T
setCreateIfMissing(boolean flag
);
67 * Return true if the create_if_missing flag is set to true.
68 * If true, the database will be created if it is missing.
70 * @return true if the createIfMissing option is set to true.
71 * @see #setCreateIfMissing(boolean)
73 boolean createIfMissing();
76 * <p>If true, missing column families will be automatically created</p>
78 * <p>Default: false</p>
80 * @param flag a flag indicating if missing column families shall be
81 * created automatically.
82 * @return true if missing column families shall be created automatically
85 T
setCreateMissingColumnFamilies(boolean flag
);
88 * Return true if the create_missing_column_families flag is set
89 * to true. If true column families be created if missing.
91 * @return true if the createMissingColumnFamilies is set to
93 * @see #setCreateMissingColumnFamilies(boolean)
95 boolean createMissingColumnFamilies();
98 * If true, an error will be thrown during RocksDB.open() if the
99 * database already exists.
102 * @param errorIfExists if true, an exception will be thrown
103 * during {@code RocksDB.open()} if the database already exists.
104 * @return the reference to the current option.
105 * @see RocksDB#open(org.rocksdb.Options, String)
107 T
setErrorIfExists(boolean errorIfExists
);
110 * If true, an error will be thrown during RocksDB.open() if the
111 * database already exists.
113 * @return if true, an error is raised when the specified database
114 * already exists before open.
116 boolean errorIfExists();
119 * If true, the implementation will do aggressive checking of the
120 * data it is processing and will stop early if it detects any
121 * errors. This may have unforeseen ramifications: for example, a
122 * corruption of one DB entry may cause a large number of entries to
123 * become unreadable or for the entire DB to become unopenable.
124 * If any of the writes to the database fails (Put, Delete, Merge, Write),
125 * the database will switch to read-only mode and fail all other
129 * @param paranoidChecks a flag to indicate whether paranoid-check
131 * @return the reference to the current option.
133 T
setParanoidChecks(boolean paranoidChecks
);
136 * If true, the implementation will do aggressive checking of the
137 * data it is processing and will stop early if it detects any
138 * errors. This may have unforeseen ramifications: for example, a
139 * corruption of one DB entry may cause a large number of entries to
140 * become unreadable or for the entire DB to become unopenable.
141 * If any of the writes to the database fails (Put, Delete, Merge, Write),
142 * the database will switch to read-only mode and fail all other
145 * @return a boolean indicating whether paranoid-check is on.
147 boolean paranoidChecks();
150 * Use to control write rate of flush and compaction. Flush has higher
151 * priority than compaction. Rate limiting is disabled if nullptr.
154 * @param rateLimiter {@link org.rocksdb.RateLimiter} instance.
155 * @return the instance of the current object.
159 T
setRateLimiter(RateLimiter rateLimiter
);
162 * Use to track SST files and control their file deletion rate.
165 * - Throttle the deletion rate of the SST files.
166 * - Keep track the total size of all SST files.
167 * - Set a maximum allowed space limit for SST files that when reached
168 * the DB wont do any further flushes or compactions and will set the
170 * - Can be shared between multiple dbs.
173 * - Only track and throttle deletes of SST files in
174 * first db_path (db_name if db_paths is empty).
176 * @param sstFileManager The SST File Manager for the db.
177 * @return the instance of the current object.
179 T
setSstFileManager(SstFileManager sstFileManager
);
182 * <p>Any internal progress/error information generated by
183 * the db will be written to the Logger if it is non-nullptr,
184 * or to a file stored in the same directory as the DB
185 * contents if info_log is nullptr.</p>
187 * <p>Default: nullptr</p>
189 * @param logger {@link Logger} instance.
190 * @return the instance of the current object.
192 T
setLogger(Logger logger
);
195 * <p>Sets the RocksDB log level. Default level is INFO</p>
197 * @param infoLogLevel log level to set.
198 * @return the instance of the current object.
200 T
setInfoLogLevel(InfoLogLevel infoLogLevel
);
203 * <p>Returns currently set log level.</p>
204 * @return {@link org.rocksdb.InfoLogLevel} instance.
206 InfoLogLevel
infoLogLevel();
209 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open
210 * all files on DB::Open(). You can use this option to increase the number
211 * of threads used to open the files.
215 * @param maxFileOpeningThreads the maximum number of threads to use to
218 * @return the reference to the current options.
220 T
setMaxFileOpeningThreads(int maxFileOpeningThreads
);
223 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all
224 * files on DB::Open(). You can use this option to increase the number of
225 * threads used to open the files.
229 * @return the maximum number of threads to use to open files
231 int maxFileOpeningThreads();
234 * <p>Sets the statistics object which collects metrics about database operations.
235 * Statistics objects should not be shared between DB instances as
236 * it does not use any locks to prevent concurrent updates.</p>
238 * @param statistics The statistics to set
240 * @return the instance of the current object.
242 * @see RocksDB#open(org.rocksdb.Options, String)
244 T
setStatistics(final Statistics statistics
);
247 * <p>Returns statistics object.</p>
249 * @return the instance of the statistics object or null if there is no
252 * @see #setStatistics(Statistics)
254 Statistics
statistics();
257 * <p>If true, then every store to stable storage will issue a fsync.</p>
258 * <p>If false, then every store to stable storage will issue a fdatasync.
259 * This parameter should be set to true while storing data to
260 * filesystem like ext3 that can lose files after a reboot.</p>
261 * <p>Default: false</p>
263 * @param useFsync a boolean flag to specify whether to use fsync
264 * @return the instance of the current object.
266 T
setUseFsync(boolean useFsync
);
269 * <p>If true, then every store to stable storage will issue a fsync.</p>
270 * <p>If false, then every store to stable storage will issue a fdatasync.
271 * This parameter should be set to true while storing data to
272 * filesystem like ext3 that can lose files after a reboot.</p>
274 * @return boolean value indicating if fsync is used.
279 * A list of paths where SST files can be put into, with its target size.
280 * Newer data is placed into paths specified earlier in the vector while
281 * older data gradually moves to paths specified later in the vector.
283 * For example, you have a flash device with 10GB allocated for the DB,
284 * as well as a hard drive of 2TB, you should config it to be:
285 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
287 * The system will try to guarantee data under each path is close to but
288 * not larger than the target size. But current and future file sizes used
289 * by determining where to place a file are based on best-effort estimation,
290 * which means there is a chance that the actual size under the directory
291 * is slightly more than target size under some workloads. User should give
292 * some buffer room for those cases.
294 * If none of the paths has sufficient room to place a file, the file will
295 * be placed to the last path anyway, despite to the target size.
297 * Placing newer data to earlier paths is also best-efforts. User should
298 * expect user files to be placed in higher levels in some extreme cases.
300 * If left empty, only one path will be used, which is db_name passed when
305 * @param dbPaths the paths and target sizes
307 * @return the reference to the current options
309 T
setDbPaths(final Collection
<DbPath
> dbPaths
);
312 * A list of paths where SST files can be put into, with its target size.
313 * Newer data is placed into paths specified earlier in the vector while
314 * older data gradually moves to paths specified later in the vector.
316 * For example, you have a flash device with 10GB allocated for the DB,
317 * as well as a hard drive of 2TB, you should config it to be:
318 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
320 * The system will try to guarantee data under each path is close to but
321 * not larger than the target size. But current and future file sizes used
322 * by determining where to place a file are based on best-effort estimation,
323 * which means there is a chance that the actual size under the directory
324 * is slightly more than target size under some workloads. User should give
325 * some buffer room for those cases.
327 * If none of the paths has sufficient room to place a file, the file will
328 * be placed to the last path anyway, despite to the target size.
330 * Placing newer data to earlier paths is also best-efforts. User should
331 * expect user files to be placed in higher levels in some extreme cases.
333 * If left empty, only one path will be used, which is db_name passed when
336 * Default: {@link java.util.Collections#emptyList()}
338 * @return dbPaths the paths and target sizes
340 List
<DbPath
> dbPaths();
343 * This specifies the info LOG dir.
344 * If it is empty, the log files will be in the same dir as data.
345 * If it is non empty, the log files will be in the specified dir,
346 * and the db data dir's absolute path will be used as the log file
349 * @param dbLogDir the path to the info log directory
350 * @return the instance of the current object.
352 T
setDbLogDir(String dbLogDir
);
355 * Returns the directory of info log.
357 * If it is empty, the log files will be in the same dir as data.
358 * If it is non empty, the log files will be in the specified dir,
359 * and the db data dir's absolute path will be used as the log file
362 * @return the path to the info log directory
367 * This specifies the absolute dir path for write-ahead logs (WAL).
368 * If it is empty, the log files will be in the same dir as data,
369 * dbname is used as the data dir by default
370 * If it is non empty, the log files will be in kept the specified dir.
371 * When destroying the db,
372 * all log files in wal_dir and the dir itself is deleted
374 * @param walDir the path to the write-ahead-log directory.
375 * @return the instance of the current object.
377 T
setWalDir(String walDir
);
380 * Returns the path to the write-ahead-logs (WAL) directory.
382 * If it is empty, the log files will be in the same dir as data,
383 * dbname is used as the data dir by default
384 * If it is non empty, the log files will be in kept the specified dir.
385 * When destroying the db,
386 * all log files in wal_dir and the dir itself is deleted
388 * @return the path to the write-ahead-logs (WAL) directory.
393 * The periodicity when obsolete files get deleted. The default
394 * value is 6 hours. The files that get out of scope by compaction
395 * process will still get automatically delete on every compaction,
396 * regardless of this setting
398 * @param micros the time interval in micros
399 * @return the instance of the current object.
401 T
setDeleteObsoleteFilesPeriodMicros(long micros
);
404 * The periodicity when obsolete files get deleted. The default
405 * value is 6 hours. The files that get out of scope by compaction
406 * process will still get automatically delete on every compaction,
407 * regardless of this setting
409 * @return the time interval in micros when obsolete files will be deleted.
411 long deleteObsoleteFilesPeriodMicros();
414 * This value represents the maximum number of threads that will
415 * concurrently perform a compaction job by breaking it into multiple,
416 * smaller ones that are run simultaneously.
417 * Default: 1 (i.e. no subcompactions)
419 * @param maxSubcompactions The maximum number of threads that will
420 * concurrently perform a compaction job
422 * @return the instance of the current object.
424 T
setMaxSubcompactions(int maxSubcompactions
);
427 * This value represents the maximum number of threads that will
428 * concurrently perform a compaction job by breaking it into multiple,
429 * smaller ones that are run simultaneously.
430 * Default: 1 (i.e. no subcompactions)
432 * @return The maximum number of threads that will concurrently perform a
435 int maxSubcompactions();
438 * Specifies the maximum number of concurrent background flush jobs.
439 * If you're increasing this, also consider increasing number of threads in
440 * HIGH priority thread pool. For more information, see
443 * @param maxBackgroundFlushes number of max concurrent flush jobs
444 * @return the instance of the current object.
446 * @see RocksEnv#setBackgroundThreads(int)
447 * @see RocksEnv#setBackgroundThreads(int, Priority)
448 * @see MutableDBOptionsInterface#maxBackgroundCompactions()
450 * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)}
453 T
setMaxBackgroundFlushes(int maxBackgroundFlushes
);
456 * Returns the maximum number of concurrent background flush jobs.
457 * If you're increasing this, also consider increasing number of threads in
458 * HIGH priority thread pool. For more information, see
461 * @return the maximum number of concurrent background flush jobs.
462 * @see RocksEnv#setBackgroundThreads(int)
463 * @see RocksEnv#setBackgroundThreads(int, Priority)
466 int maxBackgroundFlushes();
469 * Specifies the maximum size of a info log file. If the current log file
470 * is larger than `max_log_file_size`, a new info log file will
472 * If 0, all logs will be written to one log file.
474 * @param maxLogFileSize the maximum size of a info log file.
475 * @return the instance of the current object.
476 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
477 * while overflowing the underlying platform specific value.
479 T
setMaxLogFileSize(long maxLogFileSize
);
482 * Returns the maximum size of a info log file. If the current log file
483 * is larger than this size, a new info log file will be created.
484 * If 0, all logs will be written to one log file.
486 * @return the maximum size of the info log file.
488 long maxLogFileSize();
491 * Specifies the time interval for the info log file to roll (in seconds).
492 * If specified with non-zero value, log file will be rolled
493 * if it has been active longer than `log_file_time_to_roll`.
494 * Default: 0 (disabled)
496 * @param logFileTimeToRoll the time interval in seconds.
497 * @return the instance of the current object.
498 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
499 * while overflowing the underlying platform specific value.
501 T
setLogFileTimeToRoll(long logFileTimeToRoll
);
504 * Returns the time interval for the info log file to roll (in seconds).
505 * If specified with non-zero value, log file will be rolled
506 * if it has been active longer than `log_file_time_to_roll`.
507 * Default: 0 (disabled)
509 * @return the time interval in seconds.
511 long logFileTimeToRoll();
514 * Specifies the maximum number of info log files to be kept.
517 * @param keepLogFileNum the maximum number of info log files to be kept.
518 * @return the instance of the current object.
519 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
520 * while overflowing the underlying platform specific value.
522 T
setKeepLogFileNum(long keepLogFileNum
);
525 * Returns the maximum number of info log files to be kept.
528 * @return the maximum number of info log files to be kept.
530 long keepLogFileNum();
535 * If non-zero, we will reuse previously written log files for new
536 * logs, overwriting the old data. The value indicates how many
537 * such files we will keep around at any point in time for later
540 * This is more efficient because the blocks are already
541 * allocated and fdatasync does not need to update the inode after
546 * @param recycleLogFileNum the number of log files to keep for recycling
548 * @return the reference to the current options
550 T
setRecycleLogFileNum(long recycleLogFileNum
);
555 * If non-zero, we will reuse previously written log files for new
556 * logs, overwriting the old data. The value indicates how many
557 * such files we will keep around at any point in time for later
560 * This is more efficient because the blocks are already
561 * allocated and fdatasync does not need to update the inode after
566 * @return the number of log files kept for recycling
568 long recycleLogFileNum();
571 * Manifest file is rolled over on reaching this limit.
572 * The older manifest file be deleted.
573 * The default value is MAX_INT so that roll-over does not take place.
575 * @param maxManifestFileSize the size limit of a manifest file.
576 * @return the instance of the current object.
578 T
setMaxManifestFileSize(long maxManifestFileSize
);
581 * Manifest file is rolled over on reaching this limit.
582 * The older manifest file be deleted.
583 * The default value is MAX_INT so that roll-over does not take place.
585 * @return the size limit of a manifest file.
587 long maxManifestFileSize();
590 * Number of shards used for table cache.
592 * @param tableCacheNumshardbits the number of chards
593 * @return the instance of the current object.
595 T
setTableCacheNumshardbits(int tableCacheNumshardbits
);
598 * Number of shards used for table cache.
600 * @return the number of shards used for table cache.
602 int tableCacheNumshardbits();
605 * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs
608 * <li>If both set to 0, logs will be deleted asap and will not get into
610 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
611 * WAL files will be checked every 10 min and if total size is greater
612 * then WAL_size_limit_MB, they will be deleted starting with the
613 * earliest until size_limit is met. All empty files will be deleted.</li>
614 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
615 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
616 * are older than WAL_ttl_seconds will be deleted.</li>
617 * <li>If both are not 0, WAL files will be checked every 10 min and both
618 * checks will be performed with ttl being first.</li>
621 * @param walTtlSeconds the ttl seconds
622 * @return the instance of the current object.
623 * @see #setWalSizeLimitMB(long)
625 T
setWalTtlSeconds(long walTtlSeconds
);
628 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
631 * <li>If both set to 0, logs will be deleted asap and will not get into
633 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
634 * WAL files will be checked every 10 min and if total size is greater
635 * then WAL_size_limit_MB, they will be deleted starting with the
636 * earliest until size_limit is met. All empty files will be deleted.</li>
637 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
638 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
639 * are older than WAL_ttl_seconds will be deleted.</li>
640 * <li>If both are not 0, WAL files will be checked every 10 min and both
641 * checks will be performed with ttl being first.</li>
644 * @return the wal-ttl seconds
645 * @see #walSizeLimitMB()
647 long walTtlSeconds();
650 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
653 * <li>If both set to 0, logs will be deleted asap and will not get into
655 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
656 * WAL files will be checked every 10 min and if total size is greater
657 * then WAL_size_limit_MB, they will be deleted starting with the
658 * earliest until size_limit is met. All empty files will be deleted.</li>
659 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
660 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
661 * are older than WAL_ttl_seconds will be deleted.</li>
662 * <li>If both are not 0, WAL files will be checked every 10 min and both
663 * checks will be performed with ttl being first.</li>
666 * @param sizeLimitMB size limit in mega-bytes.
667 * @return the instance of the current object.
668 * @see #setWalSizeLimitMB(long)
670 T
setWalSizeLimitMB(long sizeLimitMB
);
673 * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs
676 * <li>If both set to 0, logs will be deleted asap and will not get into
678 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
679 * WAL files will be checked every 10 min and if total size is greater
680 * then WAL_size_limit_MB, they will be deleted starting with the
681 * earliest until size_limit is met. All empty files will be deleted.</li>
682 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
683 * WAL files will be checked every WAL_ttl_seconds i / 2 and those that
684 * are older than WAL_ttl_seconds will be deleted.</li>
685 * <li>If both are not 0, WAL files will be checked every 10 min and both
686 * checks will be performed with ttl being first.</li>
688 * @return size limit in mega-bytes.
689 * @see #walSizeLimitMB()
691 long walSizeLimitMB();
694 * Number of bytes to preallocate (via fallocate) the manifest
695 * files. Default is 4mb, which is reasonable to reduce random IO
696 * as well as prevent overallocation for mounts that preallocate
697 * large amounts of data (such as xfs's allocsize option).
699 * @param size the size in byte
700 * @return the instance of the current object.
701 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
702 * while overflowing the underlying platform specific value.
704 T
setManifestPreallocationSize(long size
);
707 * Number of bytes to preallocate (via fallocate) the manifest
708 * files. Default is 4mb, which is reasonable to reduce random IO
709 * as well as prevent overallocation for mounts that preallocate
710 * large amounts of data (such as xfs's allocsize option).
712 * @return size in bytes.
714 long manifestPreallocationSize();
717 * Enable the OS to use direct I/O for reading sst tables.
720 * @param useDirectReads if true, then direct read is enabled
721 * @return the instance of the current object.
723 T
setUseDirectReads(boolean useDirectReads
);
726 * Enable the OS to use direct I/O for reading sst tables.
729 * @return if true, then direct reads are enabled
731 boolean useDirectReads();
734 * Enable the OS to use direct reads and writes in flush and
738 * @param useDirectIoForFlushAndCompaction if true, then direct
739 * I/O will be enabled for background flush and compactions
740 * @return the instance of the current object.
742 T
setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction
);
745 * Enable the OS to use direct reads and writes in flush and
748 * @return if true, then direct I/O is enabled for flush and
751 boolean useDirectIoForFlushAndCompaction();
754 * Whether fallocate calls are allowed
756 * @param allowFAllocate false if fallocate() calls are bypassed
758 * @return the reference to the current options.
760 T
setAllowFAllocate(boolean allowFAllocate
);
763 * Whether fallocate calls are allowed
765 * @return false if fallocate() calls are bypassed
767 boolean allowFAllocate();
770 * Allow the OS to mmap file for reading sst tables.
773 * @param allowMmapReads true if mmap reads are allowed.
774 * @return the instance of the current object.
776 T
setAllowMmapReads(boolean allowMmapReads
);
779 * Allow the OS to mmap file for reading sst tables.
782 * @return true if mmap reads are allowed.
784 boolean allowMmapReads();
787 * Allow the OS to mmap file for writing. Default: false
789 * @param allowMmapWrites true if mmap writes are allowd.
790 * @return the instance of the current object.
792 T
setAllowMmapWrites(boolean allowMmapWrites
);
795 * Allow the OS to mmap file for writing. Default: false
797 * @return true if mmap writes are allowed.
799 boolean allowMmapWrites();
802 * Disable child process inherit open files. Default: true
804 * @param isFdCloseOnExec true if child process inheriting open
806 * @return the instance of the current object.
808 T
setIsFdCloseOnExec(boolean isFdCloseOnExec
);
811 * Disable child process inherit open files. Default: true
813 * @return true if child process inheriting open files is disabled.
815 boolean isFdCloseOnExec();
818 * If set true, will hint the underlying file system that the file
819 * access pattern is random, when a sst file is opened.
822 * @param adviseRandomOnOpen true if hinting random access is on.
823 * @return the instance of the current object.
825 T
setAdviseRandomOnOpen(boolean adviseRandomOnOpen
);
828 * If set true, will hint the underlying file system that the file
829 * access pattern is random, when a sst file is opened.
832 * @return true if hinting random access is on.
834 boolean adviseRandomOnOpen();
837 * Amount of data to build up in memtables across all column
838 * families before writing to disk.
840 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
841 * which enforces a limit for a single memtable.
843 * This feature is disabled by default. Specify a non-zero value
846 * Default: 0 (disabled)
848 * @param dbWriteBufferSize the size of the write buffer
850 * @return the reference to the current options.
852 T
setDbWriteBufferSize(long dbWriteBufferSize
);
855 * Use passed {@link WriteBufferManager} to control memory usage across
856 * multiple column families and/or DB instances.
858 * Check <a href="https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager">
859 * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager</a>
860 * for more details on when to use it
862 * @param writeBufferManager The WriteBufferManager to use
863 * @return the reference of the current options.
865 T
setWriteBufferManager(final WriteBufferManager writeBufferManager
);
868 * Reference to {@link WriteBufferManager} used by it. <br>
870 * Default: null (Disabled)
872 * @return a reference to WriteBufferManager
874 WriteBufferManager
writeBufferManager();
877 * Amount of data to build up in memtables across all column
878 * families before writing to disk.
880 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
881 * which enforces a limit for a single memtable.
883 * This feature is disabled by default. Specify a non-zero value
886 * Default: 0 (disabled)
888 * @return the size of the write buffer
890 long dbWriteBufferSize();
893 * Specify the file access pattern once a compaction is started.
894 * It will be applied to all input files of a compaction.
896 * Default: {@link AccessHint#NORMAL}
898 * @param accessHint The access hint
900 * @return the reference to the current options.
902 T
setAccessHintOnCompactionStart(final AccessHint accessHint
);
905 * Specify the file access pattern once a compaction is started.
906 * It will be applied to all input files of a compaction.
908 * Default: {@link AccessHint#NORMAL}
910 * @return The access hint
912 AccessHint
accessHintOnCompactionStart();
915 * If true, always create a new file descriptor and new table reader
916 * for compaction inputs. Turn this parameter on may introduce extra
917 * memory usage in the table reader, if it allocates extra memory
918 * for indexes. This will allow file descriptor prefetch options
919 * to be set for compaction input files and not to impact file
920 * descriptors for the same file used by user queries.
921 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
922 * for this mode if using block-based table.
926 * @param newTableReaderForCompactionInputs true if a new file descriptor and
927 * table reader should be created for compaction inputs
929 * @return the reference to the current options.
931 T
setNewTableReaderForCompactionInputs(
932 boolean newTableReaderForCompactionInputs
);
935 * If true, always create a new file descriptor and new table reader
936 * for compaction inputs. Turn this parameter on may introduce extra
937 * memory usage in the table reader, if it allocates extra memory
938 * for indexes. This will allow file descriptor prefetch options
939 * to be set for compaction input files and not to impact file
940 * descriptors for the same file used by user queries.
941 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
942 * for this mode if using block-based table.
946 * @return true if a new file descriptor and table reader are created for
949 boolean newTableReaderForCompactionInputs();
952 * This is a maximum buffer size that is used by WinMmapReadableFile in
953 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
954 * reads. We allow the buffer to grow until the specified value and then
955 * for bigger requests allocate one shot buffers. In unbuffered mode we
956 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
957 * When read-ahead is required we then make use of
958 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
959 * always try to read ahead.
960 * With read-ahead we always pre-allocate buffer to the size instead of
961 * growing it up to a limit.
963 * This option is currently honored only on Windows
967 * Special value: 0 - means do not maintain per instance buffer. Allocate
968 * per request buffer and avoid locking.
970 * @param randomAccessMaxBufferSize the maximum size of the random access
973 * @return the reference to the current options.
975 T
setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize
);
978 * This is a maximum buffer size that is used by WinMmapReadableFile in
979 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
980 * reads. We allow the buffer to grow until the specified value and then
981 * for bigger requests allocate one shot buffers. In unbuffered mode we
982 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
983 * When read-ahead is required we then make use of
984 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
985 * always try to read ahead. With read-ahead we always pre-allocate buffer
986 * to the size instead of growing it up to a limit.
988 * This option is currently honored only on Windows
992 * Special value: 0 - means do not maintain per instance buffer. Allocate
993 * per request buffer and avoid locking.
995 * @return the maximum size of the random access buffer
997 long randomAccessMaxBufferSize();
1000 * Use adaptive mutex, which spins in the user space before resorting
1001 * to kernel. This could reduce context switch when the mutex is not
1002 * heavily contended. However, if the mutex is hot, we could end up
1003 * wasting spin time.
1006 * @param useAdaptiveMutex true if adaptive mutex is used.
1007 * @return the instance of the current object.
1009 T
setUseAdaptiveMutex(boolean useAdaptiveMutex
);
1012 * Use adaptive mutex, which spins in the user space before resorting
1013 * to kernel. This could reduce context switch when the mutex is not
1014 * heavily contended. However, if the mutex is hot, we could end up
1015 * wasting spin time.
1018 * @return true if adaptive mutex is used.
1020 boolean useAdaptiveMutex();
1024 // * Sets the {@link EventListener}s whose callback functions
1025 // * will be called when specific RocksDB event happens.
1027 // * @param listeners the listeners who should be notified on various events.
1029 // * @return the instance of the current object.
1031 // T setListeners(final List<EventListener> listeners);
1034 // * Gets the {@link EventListener}s whose callback functions
1035 // * will be called when specific RocksDB event happens.
1037 // * @return a collection of Event listeners.
1039 // Collection<EventListener> listeners();
1042 * If true, then the status of the threads involved in this DB will
1043 * be tracked and available via GetThreadList() API.
1047 * @param enableThreadTracking true to enable tracking
1049 * @return the reference to the current options.
1051 T
setEnableThreadTracking(boolean enableThreadTracking
);
1054 * If true, then the status of the threads involved in this DB will
1055 * be tracked and available via GetThreadList() API.
1059 * @return true if tracking is enabled
1061 boolean enableThreadTracking();
1064 * By default, a single write thread queue is maintained. The thread gets
1065 * to the head of the queue becomes write batch group leader and responsible
1066 * for writing to WAL and memtable for the batch group.
1068 * If {@link #enablePipelinedWrite()} is true, separate write thread queue is
1069 * maintained for WAL write and memtable write. A write thread first enter WAL
1070 * writer queue and then memtable writer queue. Pending thread on the WAL
1071 * writer queue thus only have to wait for previous writers to finish their
1072 * WAL writing but not the memtable writing. Enabling the feature may improve
1073 * write throughput and reduce latency of the prepare phase of two-phase
1078 * @param enablePipelinedWrite true to enabled pipelined writes
1080 * @return the reference to the current options.
1082 T
setEnablePipelinedWrite(final boolean enablePipelinedWrite
);
1085 * Returns true if pipelined writes are enabled.
1086 * See {@link #setEnablePipelinedWrite(boolean)}.
1088 * @return true if pipelined writes are enabled, false otherwise.
1090 boolean enablePipelinedWrite();
1093 * If true, allow multi-writers to update mem tables in parallel.
1094 * Only some memtable factorys support concurrent writes; currently it
1095 * is implemented only for SkipListFactory. Concurrent memtable writes
1096 * are not compatible with inplace_update_support or filter_deletes.
1097 * It is strongly recommended to set
1098 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1102 * @param allowConcurrentMemtableWrite true to enable concurrent writes
1105 * @return the reference to the current options.
1107 T
setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite
);
1110 * If true, allow multi-writers to update mem tables in parallel.
1111 * Only some memtable factorys support concurrent writes; currently it
1112 * is implemented only for SkipListFactory. Concurrent memtable writes
1113 * are not compatible with inplace_update_support or filter_deletes.
1114 * It is strongly recommended to set
1115 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1119 * @return true if concurrent writes are enabled for the memtable
1121 boolean allowConcurrentMemtableWrite();
1124 * If true, threads synchronizing with the write batch group leader will
1125 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1126 * mutex. This can substantially improve throughput for concurrent workloads,
1127 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1130 * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the
1133 * @return the reference to the current options.
1135 T
setEnableWriteThreadAdaptiveYield(
1136 boolean enableWriteThreadAdaptiveYield
);
1139 * If true, threads synchronizing with the write batch group leader will
1140 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1141 * mutex. This can substantially improve throughput for concurrent workloads,
1142 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1145 * @return true if adaptive yield is enabled
1146 * for the writing threads
1148 boolean enableWriteThreadAdaptiveYield();
1151 * The maximum number of microseconds that a write operation will use
1152 * a yielding spin loop to coordinate with other write threads before
1153 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1154 * set properly) increasing this value is likely to increase RocksDB
1155 * throughput at the expense of increased CPU usage.
1158 * @param writeThreadMaxYieldUsec maximum number of microseconds
1160 * @return the reference to the current options.
1162 T
setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec
);
1165 * The maximum number of microseconds that a write operation will use
1166 * a yielding spin loop to coordinate with other write threads before
1167 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1168 * set properly) increasing this value is likely to increase RocksDB
1169 * throughput at the expense of increased CPU usage.
1172 * @return the maximum number of microseconds
1174 long writeThreadMaxYieldUsec();
1177 * The latency in microseconds after which a std::this_thread::yield
1178 * call (sched_yield on Linux) is considered to be a signal that
1179 * other processes or threads would like to use the current core.
1180 * Increasing this makes writer threads more likely to take CPU
1181 * by spinning, which will show up as an increase in the number of
1182 * involuntary context switches.
1185 * @param writeThreadSlowYieldUsec the latency in microseconds
1187 * @return the reference to the current options.
1189 T
setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec
);
1192 * The latency in microseconds after which a std::this_thread::yield
1193 * call (sched_yield on Linux) is considered to be a signal that
1194 * other processes or threads would like to use the current core.
1195 * Increasing this makes writer threads more likely to take CPU
1196 * by spinning, which will show up as an increase in the number of
1197 * involuntary context switches.
1200 * @return writeThreadSlowYieldUsec the latency in microseconds
1202 long writeThreadSlowYieldUsec();
1205 * If true, then DB::Open() will not update the statistics used to optimize
1206 * compaction decision by loading table properties from many files.
1207 * Turning off this feature will improve DBOpen time especially in
1212 * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped
1214 * @return the reference to the current options.
1216 T
setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen
);
1219 * If true, then DB::Open() will not update the statistics used to optimize
1220 * compaction decision by loading table properties from many files.
1221 * Turning off this feature will improve DBOpen time especially in
1226 * @return true if updating stats will be skipped
1228 boolean skipStatsUpdateOnDbOpen();
1231 * Recovery mode to control the consistency while replaying WAL
1233 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1235 * @param walRecoveryMode The WAL recover mode
1237 * @return the reference to the current options.
1239 T
setWalRecoveryMode(WALRecoveryMode walRecoveryMode
);
1242 * Recovery mode to control the consistency while replaying WAL
1244 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1246 * @return The WAL recover mode
1248 WALRecoveryMode
walRecoveryMode();
1251 * if set to false then recovery will fail when a prepared
1252 * transaction is encountered in the WAL
1256 * @param allow2pc true if two-phase-commit is enabled
1258 * @return the reference to the current options.
1260 T
setAllow2pc(boolean allow2pc
);
1263 * if set to false then recovery will fail when a prepared
1264 * transaction is encountered in the WAL
1268 * @return true if two-phase-commit is enabled
1273 * A global cache for table-level rows.
1275 * Default: null (disabled)
1277 * @param rowCache The global row cache
1279 * @return the reference to the current options.
1281 T
setRowCache(final Cache rowCache
);
1284 * A global cache for table-level rows.
1286 * Default: null (disabled)
1288 * @return The global row cache
1293 * A filter object supplied to be invoked while processing write-ahead-logs
1294 * (WALs) during recovery. The filter provides a way to inspect log
1295 * records, ignoring a particular record or skipping replay.
1296 * The filter is invoked at startup and is invoked from a single-thread
1299 * @param walFilter the filter for processing WALs during recovery.
1301 * @return the reference to the current options.
1303 T
setWalFilter(final AbstractWalFilter walFilter
);
1306 * Get's the filter for processing WALs during recovery.
1307 * See {@link #setWalFilter(AbstractWalFilter)}.
1309 * @return the filter used for processing WALs during recovery.
1311 WalFilter
walFilter();
1314 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1315 * / SetOptions will fail if options file is not detected or properly
1320 * @param failIfOptionsFileError true if we should fail if there is an error
1321 * in the options file
1323 * @return the reference to the current options.
1325 T
setFailIfOptionsFileError(boolean failIfOptionsFileError
);
1328 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1329 * / SetOptions will fail if options file is not detected or properly
1334 * @return true if we should fail if there is an error in the options file
1336 boolean failIfOptionsFileError();
1339 * If true, then print malloc stats together with rocksdb.stats
1340 * when printing to LOG.
1344 * @param dumpMallocStats true if malloc stats should be printed to LOG
1346 * @return the reference to the current options.
1348 T
setDumpMallocStats(boolean dumpMallocStats
);
1351 * If true, then print malloc stats together with rocksdb.stats
1352 * when printing to LOG.
1356 * @return true if malloc stats should be printed to LOG
1358 boolean dumpMallocStats();
1361 * By default RocksDB replay WAL logs and flush them on DB open, which may
1362 * create very small SST files. If this option is enabled, RocksDB will try
1363 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1364 * WAL logs will be kept, so that if crash happened before flush, we still
1365 * have logs to recover from.
1369 * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee
1370 * not to) flush during recovery
1372 * @return the reference to the current options.
1374 T
setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery
);
1377 * By default RocksDB replay WAL logs and flush them on DB open, which may
1378 * create very small SST files. If this option is enabled, RocksDB will try
1379 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1380 * WAL logs will be kept, so that if crash happened before flush, we still
1381 * have logs to recover from.
1385 * @return true to try to avoid (but not guarantee not to) flush during
1388 boolean avoidFlushDuringRecovery();
1391 * Set this option to true during creation of database if you want
1392 * to be able to ingest behind (call IngestExternalFile() skipping keys
1393 * that already exist, rather than overwriting matching keys).
1394 * Setting this option to true will affect 2 things:
1395 * 1) Disable some internal optimizations around SST file compression
1396 * 2) Reserve bottom-most level for ingested files only.
1397 * 3) Note that num_levels should be >= 3 if this option is turned on.
1401 * @param allowIngestBehind true to allow ingest behind, false to disallow.
1403 * @return the reference to the current options.
1405 T
setAllowIngestBehind(final boolean allowIngestBehind
);
1408 * Returns true if ingest behind is allowed.
1409 * See {@link #setAllowIngestBehind(boolean)}.
1411 * @return true if ingest behind is allowed, false otherwise.
1413 boolean allowIngestBehind();
1416 * Needed to support differential snapshots.
1417 * If set to true then DB will only process deletes with sequence number
1418 * less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts).
1419 * Clients are responsible to periodically call this method to advance
1420 * the cutoff time. If this method is never called and preserve_deletes
1421 * is set to true NO deletes will ever be processed.
1422 * At the moment this only keeps normal deletes, SingleDeletes will
1427 * @param preserveDeletes true to preserve deletes.
1429 * @return the reference to the current options.
1431 T
setPreserveDeletes(final boolean preserveDeletes
);
1434 * Returns true if deletes are preserved.
1435 * See {@link #setPreserveDeletes(boolean)}.
1437 * @return true if deletes are preserved, false otherwise.
1439 boolean preserveDeletes();
1442 * If enabled it uses two queues for writes, one for the ones with
1443 * disable_memtable and one for the ones that also write to memtable. This
1444 * allows the memtable writes not to lag behind other writes. It can be used
1445 * to optimize MySQL 2PC in which only the commits, which are serial, write to
1450 * @param twoWriteQueues true to enable two write queues, false otherwise.
1452 * @return the reference to the current options.
1454 T
setTwoWriteQueues(final boolean twoWriteQueues
);
1457 * Returns true if two write queues are enabled.
1459 * @return true if two write queues are enabled, false otherwise.
1461 boolean twoWriteQueues();
1464 * If true WAL is not flushed automatically after each write. Instead it
1465 * relies on manual invocation of FlushWAL to write the WAL buffer to its
1470 * @param manualWalFlush true to set disable automatic WAL flushing,
1473 * @return the reference to the current options.
1475 T
setManualWalFlush(final boolean manualWalFlush
);
1478 * Returns true if automatic WAL flushing is disabled.
1479 * See {@link #setManualWalFlush(boolean)}.
1481 * @return true if automatic WAL flushing is disabled, false otherwise.
1483 boolean manualWalFlush();
1486 * If true, RocksDB supports flushing multiple column families and committing
1487 * their results atomically to MANIFEST. Note that it is not
1488 * necessary to set atomic_flush to true if WAL is always enabled since WAL
1489 * allows the database to be restored to the last persistent state in WAL.
1490 * This option is useful when there are column families with writes NOT
1492 * For manual flush, application has to specify which column families to
1493 * flush atomically in {@link RocksDB#flush(FlushOptions, List)}.
1494 * For auto-triggered flush, RocksDB atomically flushes ALL column families.
1496 * Currently, any WAL-enabled writes after atomic flush may be replayed
1497 * independently if the process crashes later and tries to recover.
1499 * @param atomicFlush true to enable atomic flush of multiple column families.
1501 * @return the reference to the current options.
1503 T
setAtomicFlush(final boolean atomicFlush
);
1506 * Determine if atomic flush of multiple column families is enabled.
1508 * See {@link #setAtomicFlush(boolean)}.
1510 * @return true if atomic flush is enabled.
1512 boolean atomicFlush();