1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
8 import java
.util
.Collection
;
11 public interface DBOptionsInterface
<T
extends DBOptionsInterface
> {
14 * Use this if your DB is very small (like under 1GB) and you don't want to
15 * spend lots of memory for memtables.
17 * @return the instance of the current object.
19 T
optimizeForSmallDb();
22 * Use the specified object to interact with the environment,
23 * e.g. to read/write files, schedule background work, etc.
24 * Default: {@link Env#getDefault()}
26 * @param env {@link Env} instance.
27 * @return the instance of the current Options.
29 T
setEnv(final Env env
);
32 * Returns the set RocksEnv instance.
34 * @return {@link RocksEnv} instance set in the options.
39 * <p>By default, RocksDB uses only one background thread for flush and
40 * compaction. Calling this function will set it up such that total of
41 * `total_threads` is used.</p>
43 * <p>You almost definitely want to call this function if your system is
44 * bottlenecked by RocksDB.</p>
46 * @param totalThreads The total number of threads to be used by RocksDB.
47 * A good value is the number of cores.
49 * @return the instance of the current Options
51 T
setIncreaseParallelism(int totalThreads
);
54 * If this value is set to true, then the database will be created
55 * if it is missing during {@code RocksDB.open()}.
58 * @param flag a flag indicating whether to create a database the
59 * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation
61 * @return the instance of the current Options
62 * @see RocksDB#open(org.rocksdb.Options, String)
64 T
setCreateIfMissing(boolean flag
);
67 * Return true if the create_if_missing flag is set to true.
68 * If true, the database will be created if it is missing.
70 * @return true if the createIfMissing option is set to true.
71 * @see #setCreateIfMissing(boolean)
73 boolean createIfMissing();
76 * <p>If true, missing column families will be automatically created</p>
78 * <p>Default: false</p>
80 * @param flag a flag indicating if missing column families shall be
81 * created automatically.
82 * @return true if missing column families shall be created automatically
85 T
setCreateMissingColumnFamilies(boolean flag
);
88 * Return true if the create_missing_column_families flag is set
89 * to true. If true column families be created if missing.
91 * @return true if the createMissingColumnFamilies is set to
93 * @see #setCreateMissingColumnFamilies(boolean)
95 boolean createMissingColumnFamilies();
98 * If true, an error will be thrown during RocksDB.open() if the
99 * database already exists.
102 * @param errorIfExists if true, an exception will be thrown
103 * during {@code RocksDB.open()} if the database already exists.
104 * @return the reference to the current option.
105 * @see RocksDB#open(org.rocksdb.Options, String)
107 T
setErrorIfExists(boolean errorIfExists
);
110 * If true, an error will be thrown during RocksDB.open() if the
111 * database already exists.
113 * @return if true, an error is raised when the specified database
114 * already exists before open.
116 boolean errorIfExists();
119 * If true, the implementation will do aggressive checking of the
120 * data it is processing and will stop early if it detects any
121 * errors. This may have unforeseen ramifications: for example, a
122 * corruption of one DB entry may cause a large number of entries to
123 * become unreadable or for the entire DB to become unopenable.
124 * If any of the writes to the database fails (Put, Delete, Merge, Write),
125 * the database will switch to read-only mode and fail all other
129 * @param paranoidChecks a flag to indicate whether paranoid-check
131 * @return the reference to the current option.
133 T
setParanoidChecks(boolean paranoidChecks
);
136 * If true, the implementation will do aggressive checking of the
137 * data it is processing and will stop early if it detects any
138 * errors. This may have unforeseen ramifications: for example, a
139 * corruption of one DB entry may cause a large number of entries to
140 * become unreadable or for the entire DB to become unopenable.
141 * If any of the writes to the database fails (Put, Delete, Merge, Write),
142 * the database will switch to read-only mode and fail all other
145 * @return a boolean indicating whether paranoid-check is on.
147 boolean paranoidChecks();
150 * Use to control write rate of flush and compaction. Flush has higher
151 * priority than compaction. Rate limiting is disabled if nullptr.
154 * @param rateLimiter {@link org.rocksdb.RateLimiter} instance.
155 * @return the instance of the current object.
159 T
setRateLimiter(RateLimiter rateLimiter
);
162 * <p>Any internal progress/error information generated by
163 * the db will be written to the Logger if it is non-nullptr,
164 * or to a file stored in the same directory as the DB
165 * contents if info_log is nullptr.</p>
167 * <p>Default: nullptr</p>
169 * @param logger {@link Logger} instance.
170 * @return the instance of the current object.
172 T
setLogger(Logger logger
);
175 * <p>Sets the RocksDB log level. Default level is INFO</p>
177 * @param infoLogLevel log level to set.
178 * @return the instance of the current object.
180 T
setInfoLogLevel(InfoLogLevel infoLogLevel
);
183 * <p>Returns currently set log level.</p>
184 * @return {@link org.rocksdb.InfoLogLevel} instance.
186 InfoLogLevel
infoLogLevel();
189 * Number of open files that can be used by the DB. You may need to
190 * increase this if your database has a large working set. Value -1 means
191 * files opened are always kept open. You can estimate number of files based
192 * on {@code target_file_size_base} and {@code target_file_size_multiplier}
193 * for level-based compaction. For universal-style compaction, you can usually
197 * @param maxOpenFiles the maximum number of open files.
198 * @return the instance of the current object.
200 T
setMaxOpenFiles(int maxOpenFiles
);
203 * Number of open files that can be used by the DB. You may need to
204 * increase this if your database has a large working set. Value -1 means
205 * files opened are always kept open. You can estimate number of files based
206 * on {@code target_file_size_base} and {@code target_file_size_multiplier}
207 * for level-based compaction. For universal-style compaction, you can usually
210 * @return the maximum number of open files.
215 * If {@link #maxOpenFiles()} is -1, DB will open all files on DB::Open(). You
216 * can use this option to increase the number of threads used to open the
221 * @param maxFileOpeningThreads the maximum number of threads to use to
224 * @return the reference to the current options.
226 T
setMaxFileOpeningThreads(int maxFileOpeningThreads
);
229 * If {@link #maxOpenFiles()} is -1, DB will open all files on DB::Open(). You
230 * can use this option to increase the number of threads used to open the
235 * @return the maximum number of threads to use to open files
237 int maxFileOpeningThreads();
240 * <p>Once write-ahead logs exceed this size, we will start forcing the
241 * flush of column families whose memtables are backed by the oldest live
242 * WAL file (i.e. the ones that are causing all the space amplification).
244 * <p>If set to 0 (default), we will dynamically choose the WAL size limit to
245 * be [sum of all write_buffer_size * max_write_buffer_number] * 2</p>
248 * @param maxTotalWalSize max total wal size.
249 * @return the instance of the current object.
251 T
setMaxTotalWalSize(long maxTotalWalSize
);
254 * <p>Returns the max total wal size. Once write-ahead logs exceed this size,
255 * we will start forcing the flush of column families whose memtables are
256 * backed by the oldest live WAL file (i.e. the ones that are causing all
257 * the space amplification).</p>
259 * <p>If set to 0 (default), we will dynamically choose the WAL size limit
260 * to be [sum of all write_buffer_size * max_write_buffer_number] * 2
263 * @return max total wal size
265 long maxTotalWalSize();
268 * <p>Creates statistics object which collects metrics about database operations.
269 * Statistics objects should not be shared between DB instances as
270 * it does not use any locks to prevent concurrent updates.</p>
272 * @return the instance of the current object.
273 * @see RocksDB#open(org.rocksdb.Options, String)
275 T
createStatistics();
278 * <p>Returns statistics object. Calls {@link #createStatistics()} if
279 * C++ returns {@code nullptr} for statistics.</p>
281 * @return the instance of the statistics object.
282 * @see #createStatistics()
284 Statistics
statisticsPtr();
287 * <p>If true, then every store to stable storage will issue a fsync.</p>
288 * <p>If false, then every store to stable storage will issue a fdatasync.
289 * This parameter should be set to true while storing data to
290 * filesystem like ext3 that can lose files after a reboot.</p>
291 * <p>Default: false</p>
293 * @param useFsync a boolean flag to specify whether to use fsync
294 * @return the instance of the current object.
296 T
setUseFsync(boolean useFsync
);
299 * <p>If true, then every store to stable storage will issue a fsync.</p>
300 * <p>If false, then every store to stable storage will issue a fdatasync.
301 * This parameter should be set to true while storing data to
302 * filesystem like ext3 that can lose files after a reboot.</p>
304 * @return boolean value indicating if fsync is used.
309 * A list of paths where SST files can be put into, with its target size.
310 * Newer data is placed into paths specified earlier in the vector while
311 * older data gradually moves to paths specified later in the vector.
313 * For example, you have a flash device with 10GB allocated for the DB,
314 * as well as a hard drive of 2TB, you should config it to be:
315 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
317 * The system will try to guarantee data under each path is close to but
318 * not larger than the target size. But current and future file sizes used
319 * by determining where to place a file are based on best-effort estimation,
320 * which means there is a chance that the actual size under the directory
321 * is slightly more than target size under some workloads. User should give
322 * some buffer room for those cases.
324 * If none of the paths has sufficient room to place a file, the file will
325 * be placed to the last path anyway, despite to the target size.
327 * Placing newer data to earlier paths is also best-efforts. User should
328 * expect user files to be placed in higher levels in some extreme cases.
330 * If left empty, only one path will be used, which is db_name passed when
335 * @param dbPaths the paths and target sizes
337 * @return the reference to the current options
339 T
setDbPaths(final Collection
<DbPath
> dbPaths
);
342 * A list of paths where SST files can be put into, with its target size.
343 * Newer data is placed into paths specified earlier in the vector while
344 * older data gradually moves to paths specified later in the vector.
346 * For example, you have a flash device with 10GB allocated for the DB,
347 * as well as a hard drive of 2TB, you should config it to be:
348 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
350 * The system will try to guarantee data under each path is close to but
351 * not larger than the target size. But current and future file sizes used
352 * by determining where to place a file are based on best-effort estimation,
353 * which means there is a chance that the actual size under the directory
354 * is slightly more than target size under some workloads. User should give
355 * some buffer room for those cases.
357 * If none of the paths has sufficient room to place a file, the file will
358 * be placed to the last path anyway, despite to the target size.
360 * Placing newer data to earlier paths is also best-efforts. User should
361 * expect user files to be placed in higher levels in some extreme cases.
363 * If left empty, only one path will be used, which is db_name passed when
366 * Default: {@link java.util.Collections#emptyList()}
368 * @return dbPaths the paths and target sizes
370 List
<DbPath
> dbPaths();
373 * This specifies the info LOG dir.
374 * If it is empty, the log files will be in the same dir as data.
375 * If it is non empty, the log files will be in the specified dir,
376 * and the db data dir's absolute path will be used as the log file
379 * @param dbLogDir the path to the info log directory
380 * @return the instance of the current object.
382 T
setDbLogDir(String dbLogDir
);
385 * Returns the directory of info log.
387 * If it is empty, the log files will be in the same dir as data.
388 * If it is non empty, the log files will be in the specified dir,
389 * and the db data dir's absolute path will be used as the log file
392 * @return the path to the info log directory
397 * This specifies the absolute dir path for write-ahead logs (WAL).
398 * If it is empty, the log files will be in the same dir as data,
399 * dbname is used as the data dir by default
400 * If it is non empty, the log files will be in kept the specified dir.
401 * When destroying the db,
402 * all log files in wal_dir and the dir itself is deleted
404 * @param walDir the path to the write-ahead-log directory.
405 * @return the instance of the current object.
407 T
setWalDir(String walDir
);
410 * Returns the path to the write-ahead-logs (WAL) directory.
412 * If it is empty, the log files will be in the same dir as data,
413 * dbname is used as the data dir by default
414 * If it is non empty, the log files will be in kept the specified dir.
415 * When destroying the db,
416 * all log files in wal_dir and the dir itself is deleted
418 * @return the path to the write-ahead-logs (WAL) directory.
423 * The periodicity when obsolete files get deleted. The default
424 * value is 6 hours. The files that get out of scope by compaction
425 * process will still get automatically delete on every compaction,
426 * regardless of this setting
428 * @param micros the time interval in micros
429 * @return the instance of the current object.
431 T
setDeleteObsoleteFilesPeriodMicros(long micros
);
434 * The periodicity when obsolete files get deleted. The default
435 * value is 6 hours. The files that get out of scope by compaction
436 * process will still get automatically delete on every compaction,
437 * regardless of this setting
439 * @return the time interval in micros when obsolete files will be deleted.
441 long deleteObsoleteFilesPeriodMicros();
444 * Suggested number of concurrent background compaction jobs, submitted to
445 * the default LOW priority thread pool.
448 * @param baseBackgroundCompactions Suggested number of background compaction
451 void setBaseBackgroundCompactions(int baseBackgroundCompactions
);
454 * Suggested number of concurrent background compaction jobs, submitted to
455 * the default LOW priority thread pool.
458 * @return Suggested number of background compaction jobs
460 int baseBackgroundCompactions();
463 * Specifies the maximum number of concurrent background compaction jobs,
464 * submitted to the default LOW priority thread pool.
465 * If you're increasing this, also consider increasing number of threads in
466 * LOW priority thread pool. For more information, see
469 * @param maxBackgroundCompactions the maximum number of background
471 * @return the instance of the current object.
473 * @see RocksEnv#setBackgroundThreads(int)
474 * @see RocksEnv#setBackgroundThreads(int, int)
475 * @see #maxBackgroundFlushes()
477 T
setMaxBackgroundCompactions(int maxBackgroundCompactions
);
480 * Returns the maximum number of concurrent background compaction jobs,
481 * submitted to the default LOW priority thread pool.
482 * When increasing this number, we may also want to consider increasing
483 * number of threads in LOW priority thread pool.
486 * @return the maximum number of concurrent background compaction jobs.
487 * @see RocksEnv#setBackgroundThreads(int)
488 * @see RocksEnv#setBackgroundThreads(int, int)
490 int maxBackgroundCompactions();
493 * This value represents the maximum number of threads that will
494 * concurrently perform a compaction job by breaking it into multiple,
495 * smaller ones that are run simultaneously.
496 * Default: 1 (i.e. no subcompactions)
498 * @param maxSubcompactions The maximum number of threads that will
499 * concurrently perform a compaction job
501 void setMaxSubcompactions(int maxSubcompactions
);
504 * This value represents the maximum number of threads that will
505 * concurrently perform a compaction job by breaking it into multiple,
506 * smaller ones that are run simultaneously.
507 * Default: 1 (i.e. no subcompactions)
509 * @return The maximum number of threads that will concurrently perform a
512 int maxSubcompactions();
515 * Specifies the maximum number of concurrent background flush jobs.
516 * If you're increasing this, also consider increasing number of threads in
517 * HIGH priority thread pool. For more information, see
520 * @param maxBackgroundFlushes number of max concurrent flush jobs
521 * @return the instance of the current object.
523 * @see RocksEnv#setBackgroundThreads(int)
524 * @see RocksEnv#setBackgroundThreads(int, int)
525 * @see #maxBackgroundCompactions()
527 T
setMaxBackgroundFlushes(int maxBackgroundFlushes
);
530 * Returns the maximum number of concurrent background flush jobs.
531 * If you're increasing this, also consider increasing number of threads in
532 * HIGH priority thread pool. For more information, see
535 * @return the maximum number of concurrent background flush jobs.
536 * @see RocksEnv#setBackgroundThreads(int)
537 * @see RocksEnv#setBackgroundThreads(int, int)
539 int maxBackgroundFlushes();
542 * Specifies the maximum size of a info log file. If the current log file
543 * is larger than `max_log_file_size`, a new info log file will
545 * If 0, all logs will be written to one log file.
547 * @param maxLogFileSize the maximum size of a info log file.
548 * @return the instance of the current object.
549 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
550 * while overflowing the underlying platform specific value.
552 T
setMaxLogFileSize(long maxLogFileSize
);
555 * Returns the maximum size of a info log file. If the current log file
556 * is larger than this size, a new info log file will be created.
557 * If 0, all logs will be written to one log file.
559 * @return the maximum size of the info log file.
561 long maxLogFileSize();
564 * Specifies the time interval for the info log file to roll (in seconds).
565 * If specified with non-zero value, log file will be rolled
566 * if it has been active longer than `log_file_time_to_roll`.
567 * Default: 0 (disabled)
569 * @param logFileTimeToRoll the time interval in seconds.
570 * @return the instance of the current object.
571 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
572 * while overflowing the underlying platform specific value.
574 T
setLogFileTimeToRoll(long logFileTimeToRoll
);
577 * Returns the time interval for the info log file to roll (in seconds).
578 * If specified with non-zero value, log file will be rolled
579 * if it has been active longer than `log_file_time_to_roll`.
580 * Default: 0 (disabled)
582 * @return the time interval in seconds.
584 long logFileTimeToRoll();
587 * Specifies the maximum number of info log files to be kept.
590 * @param keepLogFileNum the maximum number of info log files to be kept.
591 * @return the instance of the current object.
592 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
593 * while overflowing the underlying platform specific value.
595 T
setKeepLogFileNum(long keepLogFileNum
);
598 * Returns the maximum number of info log files to be kept.
601 * @return the maximum number of info log files to be kept.
603 long keepLogFileNum();
608 * If non-zero, we will reuse previously written log files for new
609 * logs, overwriting the old data. The value indicates how many
610 * such files we will keep around at any point in time for later
613 * This is more efficient because the blocks are already
614 * allocated and fdatasync does not need to update the inode after
619 * @param recycleLogFileNum the number of log files to keep for recycling
621 * @return the reference to the current options
623 T
setRecycleLogFileNum(long recycleLogFileNum
);
628 * If non-zero, we will reuse previously written log files for new
629 * logs, overwriting the old data. The value indicates how many
630 * such files we will keep around at any point in time for later
633 * This is more efficient because the blocks are already
634 * allocated and fdatasync does not need to update the inode after
639 * @return the number of log files kept for recycling
641 long recycleLogFileNum();
644 * Manifest file is rolled over on reaching this limit.
645 * The older manifest file be deleted.
646 * The default value is MAX_INT so that roll-over does not take place.
648 * @param maxManifestFileSize the size limit of a manifest file.
649 * @return the instance of the current object.
651 T
setMaxManifestFileSize(long maxManifestFileSize
);
654 * Manifest file is rolled over on reaching this limit.
655 * The older manifest file be deleted.
656 * The default value is MAX_INT so that roll-over does not take place.
658 * @return the size limit of a manifest file.
660 long maxManifestFileSize();
663 * Number of shards used for table cache.
665 * @param tableCacheNumshardbits the number of chards
666 * @return the instance of the current object.
668 T
setTableCacheNumshardbits(int tableCacheNumshardbits
);
671 * Number of shards used for table cache.
673 * @return the number of shards used for table cache.
675 int tableCacheNumshardbits();
678 * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs
681 * <li>If both set to 0, logs will be deleted asap and will not get into
683 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
684 * WAL files will be checked every 10 min and if total size is greater
685 * then WAL_size_limit_MB, they will be deleted starting with the
686 * earliest until size_limit is met. All empty files will be deleted.</li>
687 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
688 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
689 * are older than WAL_ttl_seconds will be deleted.</li>
690 * <li>If both are not 0, WAL files will be checked every 10 min and both
691 * checks will be performed with ttl being first.</li>
694 * @param walTtlSeconds the ttl seconds
695 * @return the instance of the current object.
696 * @see #setWalSizeLimitMB(long)
698 T
setWalTtlSeconds(long walTtlSeconds
);
701 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
704 * <li>If both set to 0, logs will be deleted asap and will not get into
706 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
707 * WAL files will be checked every 10 min and if total size is greater
708 * then WAL_size_limit_MB, they will be deleted starting with the
709 * earliest until size_limit is met. All empty files will be deleted.</li>
710 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
711 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
712 * are older than WAL_ttl_seconds will be deleted.</li>
713 * <li>If both are not 0, WAL files will be checked every 10 min and both
714 * checks will be performed with ttl being first.</li>
717 * @return the wal-ttl seconds
718 * @see #walSizeLimitMB()
720 long walTtlSeconds();
723 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
726 * <li>If both set to 0, logs will be deleted asap and will not get into
728 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
729 * WAL files will be checked every 10 min and if total size is greater
730 * then WAL_size_limit_MB, they will be deleted starting with the
731 * earliest until size_limit is met. All empty files will be deleted.</li>
732 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
733 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
734 * are older than WAL_ttl_seconds will be deleted.</li>
735 * <li>If both are not 0, WAL files will be checked every 10 min and both
736 * checks will be performed with ttl being first.</li>
739 * @param sizeLimitMB size limit in mega-bytes.
740 * @return the instance of the current object.
741 * @see #setWalSizeLimitMB(long)
743 T
setWalSizeLimitMB(long sizeLimitMB
);
746 * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs
749 * <li>If both set to 0, logs will be deleted asap and will not get into
751 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
752 * WAL files will be checked every 10 min and if total size is greater
753 * then WAL_size_limit_MB, they will be deleted starting with the
754 * earliest until size_limit is met. All empty files will be deleted.</li>
755 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
756 * WAL files will be checked every WAL_ttl_seconds i / 2 and those that
757 * are older than WAL_ttl_seconds will be deleted.</li>
758 * <li>If both are not 0, WAL files will be checked every 10 min and both
759 * checks will be performed with ttl being first.</li>
761 * @return size limit in mega-bytes.
762 * @see #walSizeLimitMB()
764 long walSizeLimitMB();
767 * Number of bytes to preallocate (via fallocate) the manifest
768 * files. Default is 4mb, which is reasonable to reduce random IO
769 * as well as prevent overallocation for mounts that preallocate
770 * large amounts of data (such as xfs's allocsize option).
772 * @param size the size in byte
773 * @return the instance of the current object.
774 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
775 * while overflowing the underlying platform specific value.
777 T
setManifestPreallocationSize(long size
);
780 * Number of bytes to preallocate (via fallocate) the manifest
781 * files. Default is 4mb, which is reasonable to reduce random IO
782 * as well as prevent overallocation for mounts that preallocate
783 * large amounts of data (such as xfs's allocsize option).
785 * @return size in bytes.
787 long manifestPreallocationSize();
790 * Enable the OS to use direct I/O for reading sst tables.
793 * @param useDirectReads if true, then direct read is enabled
794 * @return the instance of the current object.
796 T
setUseDirectReads(boolean useDirectReads
);
799 * Enable the OS to use direct I/O for reading sst tables.
802 * @return if true, then direct reads are enabled
804 boolean useDirectReads();
807 * Enable the OS to use direct reads and writes in flush and
811 * @param useDirectIoForFlushAndCompaction if true, then direct
812 * I/O will be enabled for background flush and compactions
813 * @return the instance of the current object.
815 T
setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction
);
818 * Enable the OS to use direct reads and writes in flush and
821 * @return if true, then direct I/O is enabled for flush and
824 boolean useDirectIoForFlushAndCompaction();
827 * Whether fallocate calls are allowed
829 * @param allowFAllocate false if fallocate() calls are bypassed
831 * @return the reference to the current options.
833 T
setAllowFAllocate(boolean allowFAllocate
);
836 * Whether fallocate calls are allowed
838 * @return false if fallocate() calls are bypassed
840 boolean allowFAllocate();
843 * Allow the OS to mmap file for reading sst tables.
846 * @param allowMmapReads true if mmap reads are allowed.
847 * @return the instance of the current object.
849 T
setAllowMmapReads(boolean allowMmapReads
);
852 * Allow the OS to mmap file for reading sst tables.
855 * @return true if mmap reads are allowed.
857 boolean allowMmapReads();
860 * Allow the OS to mmap file for writing. Default: false
862 * @param allowMmapWrites true if mmap writes are allowd.
863 * @return the instance of the current object.
865 T
setAllowMmapWrites(boolean allowMmapWrites
);
868 * Allow the OS to mmap file for writing. Default: false
870 * @return true if mmap writes are allowed.
872 boolean allowMmapWrites();
875 * Disable child process inherit open files. Default: true
877 * @param isFdCloseOnExec true if child process inheriting open
879 * @return the instance of the current object.
881 T
setIsFdCloseOnExec(boolean isFdCloseOnExec
);
884 * Disable child process inherit open files. Default: true
886 * @return true if child process inheriting open files is disabled.
888 boolean isFdCloseOnExec();
891 * if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
892 * Default: 600 (10 minutes)
894 * @param statsDumpPeriodSec time interval in seconds.
895 * @return the instance of the current object.
897 T
setStatsDumpPeriodSec(int statsDumpPeriodSec
);
900 * If not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
901 * Default: 600 (10 minutes)
903 * @return time interval in seconds.
905 int statsDumpPeriodSec();
908 * If set true, will hint the underlying file system that the file
909 * access pattern is random, when a sst file is opened.
912 * @param adviseRandomOnOpen true if hinting random access is on.
913 * @return the instance of the current object.
915 T
setAdviseRandomOnOpen(boolean adviseRandomOnOpen
);
918 * If set true, will hint the underlying file system that the file
919 * access pattern is random, when a sst file is opened.
922 * @return true if hinting random access is on.
924 boolean adviseRandomOnOpen();
927 * Amount of data to build up in memtables across all column
928 * families before writing to disk.
930 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
931 * which enforces a limit for a single memtable.
933 * This feature is disabled by default. Specify a non-zero value
936 * Default: 0 (disabled)
938 * @param dbWriteBufferSize the size of the write buffer
940 * @return the reference to the current options.
942 T
setDbWriteBufferSize(long dbWriteBufferSize
);
945 * Amount of data to build up in memtables across all column
946 * families before writing to disk.
948 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
949 * which enforces a limit for a single memtable.
951 * This feature is disabled by default. Specify a non-zero value
954 * Default: 0 (disabled)
956 * @return the size of the write buffer
958 long dbWriteBufferSize();
961 * Specify the file access pattern once a compaction is started.
962 * It will be applied to all input files of a compaction.
964 * Default: {@link AccessHint#NORMAL}
966 * @param accessHint The access hint
968 * @return the reference to the current options.
970 T
setAccessHintOnCompactionStart(final AccessHint accessHint
);
973 * Specify the file access pattern once a compaction is started.
974 * It will be applied to all input files of a compaction.
976 * Default: {@link AccessHint#NORMAL}
978 * @return The access hint
980 AccessHint
accessHintOnCompactionStart();
983 * If true, always create a new file descriptor and new table reader
984 * for compaction inputs. Turn this parameter on may introduce extra
985 * memory usage in the table reader, if it allocates extra memory
986 * for indexes. This will allow file descriptor prefetch options
987 * to be set for compaction input files and not to impact file
988 * descriptors for the same file used by user queries.
989 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
990 * for this mode if using block-based table.
994 * @param newTableReaderForCompactionInputs true if a new file descriptor and
995 * table reader should be created for compaction inputs
997 * @return the reference to the current options.
999 T
setNewTableReaderForCompactionInputs(
1000 boolean newTableReaderForCompactionInputs
);
1003 * If true, always create a new file descriptor and new table reader
1004 * for compaction inputs. Turn this parameter on may introduce extra
1005 * memory usage in the table reader, if it allocates extra memory
1006 * for indexes. This will allow file descriptor prefetch options
1007 * to be set for compaction input files and not to impact file
1008 * descriptors for the same file used by user queries.
1009 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
1010 * for this mode if using block-based table.
1014 * @return true if a new file descriptor and table reader are created for
1017 boolean newTableReaderForCompactionInputs();
1020 * If non-zero, we perform bigger reads when doing compaction. If you're
1021 * running RocksDB on spinning disks, you should set this to at least 2MB.
1023 * That way RocksDB's compaction is doing sequential instead of random reads.
1024 * When non-zero, we also force {@link #newTableReaderForCompactionInputs()}
1029 * @param compactionReadaheadSize The compaction read-ahead size
1031 * @return the reference to the current options.
1033 T
setCompactionReadaheadSize(final long compactionReadaheadSize
);
1036 * If non-zero, we perform bigger reads when doing compaction. If you're
1037 * running RocksDB on spinning disks, you should set this to at least 2MB.
1039 * That way RocksDB's compaction is doing sequential instead of random reads.
1040 * When non-zero, we also force {@link #newTableReaderForCompactionInputs()}
1045 * @return The compaction read-ahead size
1047 long compactionReadaheadSize();
1050 * This is a maximum buffer size that is used by WinMmapReadableFile in
1051 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
1052 * reads. We allow the buffer to grow until the specified value and then
1053 * for bigger requests allocate one shot buffers. In unbuffered mode we
1054 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
1055 * When read-ahead is required we then make use of
1056 * {@link #compactionReadaheadSize()} value and always try to read ahead.
1057 * With read-ahead we always pre-allocate buffer to the size instead of
1058 * growing it up to a limit.
1060 * This option is currently honored only on Windows
1064 * Special value: 0 - means do not maintain per instance buffer. Allocate
1065 * per request buffer and avoid locking.
1067 * @param randomAccessMaxBufferSize the maximum size of the random access
1070 * @return the reference to the current options.
1072 T
setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize
);
1075 * This is a maximum buffer size that is used by WinMmapReadableFile in
1076 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
1077 * reads. We allow the buffer to grow until the specified value and then
1078 * for bigger requests allocate one shot buffers. In unbuffered mode we
1079 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
1080 * When read-ahead is required we then make use of
1081 * {@link #compactionReadaheadSize()} value and always try to read ahead.
1082 * With read-ahead we always pre-allocate buffer to the size instead of
1083 * growing it up to a limit.
1085 * This option is currently honored only on Windows
1089 * Special value: 0 - means do not maintain per instance buffer. Allocate
1090 * per request buffer and avoid locking.
1092 * @return the maximum size of the random access buffer
1094 long randomAccessMaxBufferSize();
1097 * This is the maximum buffer size that is used by WritableFileWriter.
1098 * On Windows, we need to maintain an aligned buffer for writes.
1099 * We allow the buffer to grow until it's size hits the limit.
1101 * Default: 1024 * 1024 (1 MB)
1103 * @param writableFileMaxBufferSize the maximum buffer size
1105 * @return the reference to the current options.
1107 T
setWritableFileMaxBufferSize(long writableFileMaxBufferSize
);
1110 * This is the maximum buffer size that is used by WritableFileWriter.
1111 * On Windows, we need to maintain an aligned buffer for writes.
1112 * We allow the buffer to grow until it's size hits the limit.
1114 * Default: 1024 * 1024 (1 MB)
1116 * @return the maximum buffer size
1118 long writableFileMaxBufferSize();
1121 * Use adaptive mutex, which spins in the user space before resorting
1122 * to kernel. This could reduce context switch when the mutex is not
1123 * heavily contended. However, if the mutex is hot, we could end up
1124 * wasting spin time.
1127 * @param useAdaptiveMutex true if adaptive mutex is used.
1128 * @return the instance of the current object.
1130 T
setUseAdaptiveMutex(boolean useAdaptiveMutex
);
1133 * Use adaptive mutex, which spins in the user space before resorting
1134 * to kernel. This could reduce context switch when the mutex is not
1135 * heavily contended. However, if the mutex is hot, we could end up
1136 * wasting spin time.
1139 * @return true if adaptive mutex is used.
1141 boolean useAdaptiveMutex();
1144 * Allows OS to incrementally sync files to disk while they are being
1145 * written, asynchronously, in the background.
1146 * Issue one request for every bytes_per_sync written. 0 turns it off.
1149 * @param bytesPerSync size in bytes
1150 * @return the instance of the current object.
1152 T
setBytesPerSync(long bytesPerSync
);
1155 * Allows OS to incrementally sync files to disk while they are being
1156 * written, asynchronously, in the background.
1157 * Issue one request for every bytes_per_sync written. 0 turns it off.
1160 * @return size in bytes
1162 long bytesPerSync();
1165 * Same as {@link #setBytesPerSync(long)} , but applies to WAL files
1167 * Default: 0, turned off
1169 * @param walBytesPerSync size in bytes
1170 * @return the instance of the current object.
1172 T
setWalBytesPerSync(long walBytesPerSync
);
1175 * Same as {@link #bytesPerSync()} , but applies to WAL files
1177 * Default: 0, turned off
1179 * @return size in bytes
1181 long walBytesPerSync();
1184 * If true, then the status of the threads involved in this DB will
1185 * be tracked and available via GetThreadList() API.
1189 * @param enableThreadTracking true to enable tracking
1191 * @return the reference to the current options.
1193 T
setEnableThreadTracking(boolean enableThreadTracking
);
1196 * If true, then the status of the threads involved in this DB will
1197 * be tracked and available via GetThreadList() API.
1201 * @return true if tracking is enabled
1203 boolean enableThreadTracking();
1206 * The limited write rate to DB if
1207 * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or
1208 * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered,
1209 * or we are writing to the last mem table allowed and we allow more than 3
1210 * mem tables. It is calculated using size of user write requests before
1211 * compression. RocksDB may decide to slow down more if the compaction still
1212 * gets behind further.
1214 * Unit: bytes per second.
1218 * @param delayedWriteRate the rate in bytes per second
1220 * @return the reference to the current options.
1222 T
setDelayedWriteRate(long delayedWriteRate
);
1225 * The limited write rate to DB if
1226 * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or
1227 * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered,
1228 * or we are writing to the last mem table allowed and we allow more than 3
1229 * mem tables. It is calculated using size of user write requests before
1230 * compression. RocksDB may decide to slow down more if the compaction still
1231 * gets behind further.
1233 * Unit: bytes per second.
1237 * @return the rate in bytes per second
1239 long delayedWriteRate();
1242 * If true, allow multi-writers to update mem tables in parallel.
1243 * Only some memtable factorys support concurrent writes; currently it
1244 * is implemented only for SkipListFactory. Concurrent memtable writes
1245 * are not compatible with inplace_update_support or filter_deletes.
1246 * It is strongly recommended to set
1247 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1251 * @param allowConcurrentMemtableWrite true to enable concurrent writes
1254 * @return the reference to the current options.
1256 T
setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite
);
1259 * If true, allow multi-writers to update mem tables in parallel.
1260 * Only some memtable factorys support concurrent writes; currently it
1261 * is implemented only for SkipListFactory. Concurrent memtable writes
1262 * are not compatible with inplace_update_support or filter_deletes.
1263 * It is strongly recommended to set
1264 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1268 * @return true if concurrent writes are enabled for the memtable
1270 boolean allowConcurrentMemtableWrite();
1273 * If true, threads synchronizing with the write batch group leader will
1274 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1275 * mutex. This can substantially improve throughput for concurrent workloads,
1276 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1279 * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the
1282 * @return the reference to the current options.
1284 T
setEnableWriteThreadAdaptiveYield(
1285 boolean enableWriteThreadAdaptiveYield
);
1288 * If true, threads synchronizing with the write batch group leader will
1289 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1290 * mutex. This can substantially improve throughput for concurrent workloads,
1291 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1294 * @return true if adaptive yield is enabled
1295 * for the writing threads
1297 boolean enableWriteThreadAdaptiveYield();
1300 * The maximum number of microseconds that a write operation will use
1301 * a yielding spin loop to coordinate with other write threads before
1302 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1303 * set properly) increasing this value is likely to increase RocksDB
1304 * throughput at the expense of increased CPU usage.
1307 * @param writeThreadMaxYieldUsec maximum number of microseconds
1309 * @return the reference to the current options.
1311 T
setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec
);
1314 * The maximum number of microseconds that a write operation will use
1315 * a yielding spin loop to coordinate with other write threads before
1316 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1317 * set properly) increasing this value is likely to increase RocksDB
1318 * throughput at the expense of increased CPU usage.
1321 * @return the maximum number of microseconds
1323 long writeThreadMaxYieldUsec();
1326 * The latency in microseconds after which a std::this_thread::yield
1327 * call (sched_yield on Linux) is considered to be a signal that
1328 * other processes or threads would like to use the current core.
1329 * Increasing this makes writer threads more likely to take CPU
1330 * by spinning, which will show up as an increase in the number of
1331 * involuntary context switches.
1334 * @param writeThreadSlowYieldUsec the latency in microseconds
1336 * @return the reference to the current options.
1338 T
setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec
);
1341 * The latency in microseconds after which a std::this_thread::yield
1342 * call (sched_yield on Linux) is considered to be a signal that
1343 * other processes or threads would like to use the current core.
1344 * Increasing this makes writer threads more likely to take CPU
1345 * by spinning, which will show up as an increase in the number of
1346 * involuntary context switches.
1349 * @return writeThreadSlowYieldUsec the latency in microseconds
1351 long writeThreadSlowYieldUsec();
1354 * If true, then DB::Open() will not update the statistics used to optimize
1355 * compaction decision by loading table properties from many files.
1356 * Turning off this feature will improve DBOpen time especially in
1361 * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped
1363 * @return the reference to the current options.
1365 T
setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen
);
1368 * If true, then DB::Open() will not update the statistics used to optimize
1369 * compaction decision by loading table properties from many files.
1370 * Turning off this feature will improve DBOpen time especially in
1375 * @return true if updating stats will be skipped
1377 boolean skipStatsUpdateOnDbOpen();
1380 * Recovery mode to control the consistency while replaying WAL
1382 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1384 * @param walRecoveryMode The WAL recover mode
1386 * @return the reference to the current options.
1388 T
setWalRecoveryMode(WALRecoveryMode walRecoveryMode
);
1391 * Recovery mode to control the consistency while replaying WAL
1393 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1395 * @return The WAL recover mode
1397 WALRecoveryMode
walRecoveryMode();
1400 * if set to false then recovery will fail when a prepared
1401 * transaction is encountered in the WAL
1405 * @param allow2pc true if two-phase-commit is enabled
1407 * @return the reference to the current options.
1409 T
setAllow2pc(boolean allow2pc
);
1412 * if set to false then recovery will fail when a prepared
1413 * transaction is encountered in the WAL
1417 * @return true if two-phase-commit is enabled
1422 * A global cache for table-level rows.
1424 * Default: null (disabled)
1426 * @param rowCache The global row cache
1428 * @return the reference to the current options.
1430 T
setRowCache(final Cache rowCache
);
1433 * A global cache for table-level rows.
1435 * Default: null (disabled)
1437 * @return The global row cache
1442 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1443 * / SetOptions will fail if options file is not detected or properly
1448 * @param failIfOptionsFileError true if we should fail if there is an error
1449 * in the options file
1451 * @return the reference to the current options.
1453 T
setFailIfOptionsFileError(boolean failIfOptionsFileError
);
1456 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1457 * / SetOptions will fail if options file is not detected or properly
1462 * @return true if we should fail if there is an error in the options file
1464 boolean failIfOptionsFileError();
1467 * If true, then print malloc stats together with rocksdb.stats
1468 * when printing to LOG.
1472 * @param dumpMallocStats true if malloc stats should be printed to LOG
1474 * @return the reference to the current options.
1476 T
setDumpMallocStats(boolean dumpMallocStats
);
1479 * If true, then print malloc stats together with rocksdb.stats
1480 * when printing to LOG.
1484 * @return true if malloc stats should be printed to LOG
1486 boolean dumpMallocStats();
1489 * By default RocksDB replay WAL logs and flush them on DB open, which may
1490 * create very small SST files. If this option is enabled, RocksDB will try
1491 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1492 * WAL logs will be kept, so that if crash happened before flush, we still
1493 * have logs to recover from.
1497 * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee
1498 * not to) flush during recovery
1500 * @return the reference to the current options.
1502 T
setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery
);
1505 * By default RocksDB replay WAL logs and flush them on DB open, which may
1506 * create very small SST files. If this option is enabled, RocksDB will try
1507 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1508 * WAL logs will be kept, so that if crash happened before flush, we still
1509 * have logs to recover from.
1513 * @return true to try to avoid (but not guarantee not to) flush during
1516 boolean avoidFlushDuringRecovery();
1519 * By default RocksDB will flush all memtables on DB close if there are
1520 * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup
1521 * DB close. Unpersisted data WILL BE LOST.
1525 * Dynamically changeable through
1526 * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}
1529 * @param avoidFlushDuringShutdown true if we should avoid flush during
1532 * @return the reference to the current options.
1534 T
setAvoidFlushDuringShutdown(boolean avoidFlushDuringShutdown
);
1537 * By default RocksDB will flush all memtables on DB close if there are
1538 * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup
1539 * DB close. Unpersisted data WILL BE LOST.
1543 * Dynamically changeable through
1544 * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}
1547 * @return true if we should avoid flush during shutdown
1549 boolean avoidFlushDuringShutdown();