ceph/src/rocksdb/java/src/main/java/org/rocksdb/DBOptionsInterface.java

   1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5
   6 package org.rocksdb;
   7
   8 import java.util.Collection;
   9 import java.util.List;
  10
  11 public interface DBOptionsInterface<T extends DBOptionsInterface<T>> {
  12   /**
  13    * Use this if your DB is very small (like under 1GB) and you don't want to
  14    * spend lots of memory for memtables.
  15    *
  16    * @return the instance of the current object.
  17    */
  18   T optimizeForSmallDb();
  19
  20   /**
  21    * Use the specified object to interact with the environment,
  22    * e.g. to read/write files, schedule background work, etc.
  23    * Default: {@link Env#getDefault()}
  24    *
  25    * @param env {@link Env} instance.
  26    * @return the instance of the current Options.
  27    */
  28   T setEnv(final Env env);
  29
  30   /**
  31    * Returns the set RocksEnv instance.
  32    *
  33    * @return {@link RocksEnv} instance set in the options.
  34    */
  35   Env getEnv();
  36
  37   /**
  38    * <p>By default, RocksDB uses only one background thread for flush and
  39    * compaction. Calling this function will set it up such that total of
  40    * `total_threads` is used.</p>
  41    *
  42    * <p>You almost definitely want to call this function if your system is
  43    * bottlenecked by RocksDB.</p>
  44    *
  45    * @param totalThreads The total number of threads to be used by RocksDB.
  46    *     A good value is the number of cores.
  47    *
  48    * @return the instance of the current Options
  49    */
  50   T setIncreaseParallelism(int totalThreads);
  51
  52   /**
  53    * If this value is set to true, then the database will be created
  54    * if it is missing during {@code RocksDB.open()}.
  55    * Default: false
  56    *
  57    * @param flag a flag indicating whether to create a database the
  58    *     specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation
  59    *     is missing.
  60    * @return the instance of the current Options
  61    * @see RocksDB#open(org.rocksdb.Options, String)
  62    */
  63   T setCreateIfMissing(boolean flag);
  64
  65   /**
  66    * Return true if the create_if_missing flag is set to true.
  67    * If true, the database will be created if it is missing.
  68    *
  69    * @return true if the createIfMissing option is set to true.
  70    * @see #setCreateIfMissing(boolean)
  71    */
  72   boolean createIfMissing();
  73
  74   /**
  75    * <p>If true, missing column families will be automatically created</p>
  76    *
  77    * <p>Default: false</p>
  78    *
  79    * @param flag a flag indicating if missing column families shall be
  80    *     created automatically.
  81    * @return true if missing column families shall be created automatically
  82    *     on open.
  83    */
  84   T setCreateMissingColumnFamilies(boolean flag);
  85
  86   /**
  87    * Return true if the create_missing_column_families flag is set
  88    * to true. If true column families be created if missing.
  89    *
  90    * @return true if the createMissingColumnFamilies is set to
  91    *     true.
  92    * @see #setCreateMissingColumnFamilies(boolean)
  93    */
  94   boolean createMissingColumnFamilies();
  95
  96   /**
  97    * If true, an error will be thrown during RocksDB.open() if the
  98    * database already exists.
  99    * Default: false
 100    *
 101    * @param errorIfExists if true, an exception will be thrown
 102    *     during {@code RocksDB.open()} if the database already exists.
 103    * @return the reference to the current option.
 104    * @see RocksDB#open(org.rocksdb.Options, String)
 105    */
 106   T setErrorIfExists(boolean errorIfExists);
 107
 108   /**
 109    * If true, an error will be thrown during RocksDB.open() if the
 110    * database already exists.
 111    *
 112    * @return if true, an error is raised when the specified database
 113    *    already exists before open.
 114    */
 115   boolean errorIfExists();
 116
 117   /**
 118    * If true, the implementation will do aggressive checking of the
 119    * data it is processing and will stop early if it detects any
 120    * errors.  This may have unforeseen ramifications: for example, a
 121    * corruption of one DB entry may cause a large number of entries to
 122    * become unreadable or for the entire DB to become unopenable.
 123    * If any of the  writes to the database fails (Put, Delete, Merge, Write),
 124    * the database will switch to read-only mode and fail all other
 125    * Write operations.
 126    * Default: true
 127    *
 128    * @param paranoidChecks a flag to indicate whether paranoid-check
 129    *     is on.
 130    * @return the reference to the current option.
 131    */
 132   T setParanoidChecks(boolean paranoidChecks);
 133
 134   /**
 135    * If true, the implementation will do aggressive checking of the
 136    * data it is processing and will stop early if it detects any
 137    * errors.  This may have unforeseen ramifications: for example, a
 138    * corruption of one DB entry may cause a large number of entries to
 139    * become unreadable or for the entire DB to become unopenable.
 140    * If any of the  writes to the database fails (Put, Delete, Merge, Write),
 141    * the database will switch to read-only mode and fail all other
 142    * Write operations.
 143    *
 144    * @return a boolean indicating whether paranoid-check is on.
 145    */
 146   boolean paranoidChecks();
 147
 148   /**
 149    * Use to control write rate of flush and compaction. Flush has higher
 150    * priority than compaction. Rate limiting is disabled if nullptr.
 151    * Default: nullptr
 152    *
 153    * @param rateLimiter {@link org.rocksdb.RateLimiter} instance.
 154    * @return the instance of the current object.
 155    *
 156    * @since 3.10.0
 157    */
 158   T setRateLimiter(RateLimiter rateLimiter);
 159
 160   /**
 161    * Use to track SST files and control their file deletion rate.
 162    *
 163    * Features:
 164    *  - Throttle the deletion rate of the SST files.
 165    *  - Keep track the total size of all SST files.
 166    *  - Set a maximum allowed space limit for SST files that when reached
 167    *    the DB wont do any further flushes or compactions and will set the
 168    *    background error.
 169    *  - Can be shared between multiple dbs.
 170    *
 171    *  Limitations:
 172    *  - Only track and throttle deletes of SST files in
 173    *    first db_path (db_name if db_paths is empty).
 174    *
 175    * @param sstFileManager The SST File Manager for the db.
 176    * @return the instance of the current object.
 177    */
 178   T setSstFileManager(SstFileManager sstFileManager);
 179
 180   /**
 181    * <p>Any internal progress/error information generated by
 182    * the db will be written to the Logger if it is non-nullptr,
 183    * or to a file stored in the same directory as the DB
 184    * contents if info_log is nullptr.</p>
 185    *
 186    * <p>Default: nullptr</p>
 187    *
 188    * @param logger {@link Logger} instance.
 189    * @return the instance of the current object.
 190    */
 191   T setLogger(Logger logger);
 192
 193   /**
 194    * <p>Sets the RocksDB log level. Default level is INFO</p>
 195    *
 196    * @param infoLogLevel log level to set.
 197    * @return the instance of the current object.
 198    */
 199   T setInfoLogLevel(InfoLogLevel infoLogLevel);
 200
 201   /**
 202    * <p>Returns currently set log level.</p>
 203    * @return {@link org.rocksdb.InfoLogLevel} instance.
 204    */
 205   InfoLogLevel infoLogLevel();
 206
 207   /**
 208    * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open
 209    * all files on DB::Open(). You can use this option to increase the number
 210    * of threads used to open the files.
 211    *
 212    * Default: 16
 213    *
 214    * @param maxFileOpeningThreads the maximum number of threads to use to
 215    *     open files
 216    *
 217    * @return the reference to the current options.
 218    */
 219   T setMaxFileOpeningThreads(int maxFileOpeningThreads);
 220
 221   /**
 222    * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all
 223    * files on DB::Open(). You can use this option to increase the number of
 224    * threads used to open the files.
 225    *
 226    * Default: 16
 227    *
 228    * @return the maximum number of threads to use to open files
 229    */
 230   int maxFileOpeningThreads();
 231
 232   /**
 233    * <p>Sets the statistics object which collects metrics about database operations.
 234    * Statistics objects should not be shared between DB instances as
 235    * it does not use any locks to prevent concurrent updates.</p>
 236    *
 237    * @param statistics The statistics to set
 238    *
 239    * @return the instance of the current object.
 240    *
 241    * @see RocksDB#open(org.rocksdb.Options, String)
 242    */
 243   T setStatistics(final Statistics statistics);
 244
 245   /**
 246    * <p>Returns statistics object.</p>
 247    *
 248    * @return the instance of the statistics object or null if there is no
 249    * statistics object.
 250    *
 251    * @see #setStatistics(Statistics)
 252    */
 253   Statistics statistics();
 254
 255   /**
 256    * <p>If true, then every store to stable storage will issue a fsync.</p>
 257    * <p>If false, then every store to stable storage will issue a fdatasync.
 258    * This parameter should be set to true while storing data to
 259    * filesystem like ext3 that can lose files after a reboot.</p>
 260    * <p>Default: false</p>
 261    *
 262    * @param useFsync a boolean flag to specify whether to use fsync
 263    * @return the instance of the current object.
 264    */
 265   T setUseFsync(boolean useFsync);
 266
 267   /**
 268    * <p>If true, then every store to stable storage will issue a fsync.</p>
 269    * <p>If false, then every store to stable storage will issue a fdatasync.
 270    * This parameter should be set to true while storing data to
 271    * filesystem like ext3 that can lose files after a reboot.</p>
 272    *
 273    * @return boolean value indicating if fsync is used.
 274    */
 275   boolean useFsync();
 276
 277   /**
 278    * A list of paths where SST files can be put into, with its target size.
 279    * Newer data is placed into paths specified earlier in the vector while
 280    * older data gradually moves to paths specified later in the vector.
 281    *
 282    * For example, you have a flash device with 10GB allocated for the DB,
 283    * as well as a hard drive of 2TB, you should config it to be:
 284    *    [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
 285    *
 286    * The system will try to guarantee data under each path is close to but
 287    * not larger than the target size. But current and future file sizes used
 288    * by determining where to place a file are based on best-effort estimation,
 289    * which means there is a chance that the actual size under the directory
 290    * is slightly more than target size under some workloads. User should give
 291    * some buffer room for those cases.
 292    *
 293    * If none of the paths has sufficient room to place a file, the file will
 294    * be placed to the last path anyway, despite to the target size.
 295    *
 296    * Placing newer data to earlier paths is also best-efforts. User should
 297    * expect user files to be placed in higher levels in some extreme cases.
 298    *
 299    * If left empty, only one path will be used, which is db_name passed when
 300    * opening the DB.
 301    *
 302    * Default: empty
 303    *
 304    * @param dbPaths the paths and target sizes
 305    *
 306    * @return the reference to the current options
 307    */
 308   T setDbPaths(final Collection<DbPath> dbPaths);
 309
 310   /**
 311    * A list of paths where SST files can be put into, with its target size.
 312    * Newer data is placed into paths specified earlier in the vector while
 313    * older data gradually moves to paths specified later in the vector.
 314    *
 315    * For example, you have a flash device with 10GB allocated for the DB,
 316    * as well as a hard drive of 2TB, you should config it to be:
 317    *    [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
 318    *
 319    * The system will try to guarantee data under each path is close to but
 320    * not larger than the target size. But current and future file sizes used
 321    * by determining where to place a file are based on best-effort estimation,
 322    * which means there is a chance that the actual size under the directory
 323    * is slightly more than target size under some workloads. User should give
 324    * some buffer room for those cases.
 325    *
 326    * If none of the paths has sufficient room to place a file, the file will
 327    * be placed to the last path anyway, despite to the target size.
 328    *
 329    * Placing newer data to earlier paths is also best-efforts. User should
 330    * expect user files to be placed in higher levels in some extreme cases.
 331    *
 332    * If left empty, only one path will be used, which is db_name passed when
 333    * opening the DB.
 334    *
 335    * Default: {@link java.util.Collections#emptyList()}
 336    *
 337    * @return dbPaths the paths and target sizes
 338    */
 339   List<DbPath> dbPaths();
 340
 341   /**
 342    * This specifies the info LOG dir.
 343    * If it is empty, the log files will be in the same dir as data.
 344    * If it is non empty, the log files will be in the specified dir,
 345    * and the db data dir's absolute path will be used as the log file
 346    * name's prefix.
 347    *
 348    * @param dbLogDir the path to the info log directory
 349    * @return the instance of the current object.
 350    */
 351   T setDbLogDir(String dbLogDir);
 352
 353   /**
 354    * Returns the directory of info log.
 355    *
 356    * If it is empty, the log files will be in the same dir as data.
 357    * If it is non empty, the log files will be in the specified dir,
 358    * and the db data dir's absolute path will be used as the log file
 359    * name's prefix.
 360    *
 361    * @return the path to the info log directory
 362    */
 363   String dbLogDir();
 364
 365   /**
 366    * This specifies the absolute dir path for write-ahead logs (WAL).
 367    * If it is empty, the log files will be in the same dir as data,
 368    *   dbname is used as the data dir by default
 369    * If it is non empty, the log files will be in kept the specified dir.
 370    * When destroying the db,
 371    *   all log files in wal_dir and the dir itself is deleted
 372    *
 373    * @param walDir the path to the write-ahead-log directory.
 374    * @return the instance of the current object.
 375    */
 376   T setWalDir(String walDir);
 377
 378   /**
 379    * Returns the path to the write-ahead-logs (WAL) directory.
 380    *
 381    * If it is empty, the log files will be in the same dir as data,
 382    *   dbname is used as the data dir by default
 383    * If it is non empty, the log files will be in kept the specified dir.
 384    * When destroying the db,
 385    *   all log files in wal_dir and the dir itself is deleted
 386    *
 387    * @return the path to the write-ahead-logs (WAL) directory.
 388    */
 389   String walDir();
 390
 391   /**
 392    * The periodicity when obsolete files get deleted. The default
 393    * value is 6 hours. The files that get out of scope by compaction
 394    * process will still get automatically delete on every compaction,
 395    * regardless of this setting
 396    *
 397    * @param micros the time interval in micros
 398    * @return the instance of the current object.
 399    */
 400   T setDeleteObsoleteFilesPeriodMicros(long micros);
 401
 402   /**
 403    * The periodicity when obsolete files get deleted. The default
 404    * value is 6 hours. The files that get out of scope by compaction
 405    * process will still get automatically delete on every compaction,
 406    * regardless of this setting
 407    *
 408    * @return the time interval in micros when obsolete files will be deleted.
 409    */
 410   long deleteObsoleteFilesPeriodMicros();
 411
 412   /**
 413    * This value represents the maximum number of threads that will
 414    * concurrently perform a compaction job by breaking it into multiple,
 415    * smaller ones that are run simultaneously.
 416    * Default: 1 (i.e. no subcompactions)
 417    *
 418    * @param maxSubcompactions The maximum number of threads that will
 419    *     concurrently perform a compaction job
 420    *
 421    * @return the instance of the current object.
 422    */
 423   T setMaxSubcompactions(int maxSubcompactions);
 424
 425   /**
 426    * This value represents the maximum number of threads that will
 427    * concurrently perform a compaction job by breaking it into multiple,
 428    * smaller ones that are run simultaneously.
 429    * Default: 1 (i.e. no subcompactions)
 430    *
 431    * @return The maximum number of threads that will concurrently perform a
 432    *     compaction job
 433    */
 434   int maxSubcompactions();
 435
 436   /**
 437    * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
 438    * value of max_background_jobs. For backwards compatibility we will set
 439    * `max_background_jobs = max_background_compactions + max_background_flushes`
 440    * in the case where user sets at least one of `max_background_compactions` or
 441    * `max_background_flushes`.
 442    *
 443    * Specifies the maximum number of concurrent background flush jobs.
 444    * If you're increasing this, also consider increasing number of threads in
 445    * HIGH priority thread pool. For more information, see
 446    * Default: -1
 447    *
 448    * @param maxBackgroundFlushes number of max concurrent flush jobs
 449    * @return the instance of the current object.
 450    *
 451    * @see RocksEnv#setBackgroundThreads(int)
 452    * @see RocksEnv#setBackgroundThreads(int, Priority)
 453    * @see MutableDBOptionsInterface#maxBackgroundCompactions()
 454    *
 455    * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)}
 456    */
 457   @Deprecated
 458   T setMaxBackgroundFlushes(int maxBackgroundFlushes);
 459
 460   /**
 461    * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
 462    * value of max_background_jobs. For backwards compatibility we will set
 463    * `max_background_jobs = max_background_compactions + max_background_flushes`
 464    * in the case where user sets at least one of `max_background_compactions` or
 465    * `max_background_flushes`.
 466    *
 467    * Returns the maximum number of concurrent background flush jobs.
 468    * If you're increasing this, also consider increasing number of threads in
 469    * HIGH priority thread pool. For more information, see
 470    * Default: -1
 471    *
 472    * @return the maximum number of concurrent background flush jobs.
 473    * @see RocksEnv#setBackgroundThreads(int)
 474    * @see RocksEnv#setBackgroundThreads(int, Priority)
 475    */
 476   @Deprecated
 477   int maxBackgroundFlushes();
 478
 479   /**
 480    * Specifies the maximum size of a info log file. If the current log file
 481    * is larger than `max_log_file_size`, a new info log file will
 482    * be created.
 483    * If 0, all logs will be written to one log file.
 484    *
 485    * @param maxLogFileSize the maximum size of a info log file.
 486    * @return the instance of the current object.
 487    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 488    *   while overflowing the underlying platform specific value.
 489    */
 490   T setMaxLogFileSize(long maxLogFileSize);
 491
 492   /**
 493    * Returns the maximum size of a info log file. If the current log file
 494    * is larger than this size, a new info log file will be created.
 495    * If 0, all logs will be written to one log file.
 496    *
 497    * @return the maximum size of the info log file.
 498    */
 499   long maxLogFileSize();
 500
 501   /**
 502    * Specifies the time interval for the info log file to roll (in seconds).
 503    * If specified with non-zero value, log file will be rolled
 504    * if it has been active longer than `log_file_time_to_roll`.
 505    * Default: 0 (disabled)
 506    *
 507    * @param logFileTimeToRoll the time interval in seconds.
 508    * @return the instance of the current object.
 509    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 510    *   while overflowing the underlying platform specific value.
 511    */
 512   T setLogFileTimeToRoll(long logFileTimeToRoll);
 513
 514   /**
 515    * Returns the time interval for the info log file to roll (in seconds).
 516    * If specified with non-zero value, log file will be rolled
 517    * if it has been active longer than `log_file_time_to_roll`.
 518    * Default: 0 (disabled)
 519    *
 520    * @return the time interval in seconds.
 521    */
 522   long logFileTimeToRoll();
 523
 524   /**
 525    * Specifies the maximum number of info log files to be kept.
 526    * Default: 1000
 527    *
 528    * @param keepLogFileNum the maximum number of info log files to be kept.
 529    * @return the instance of the current object.
 530    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 531    *   while overflowing the underlying platform specific value.
 532    */
 533   T setKeepLogFileNum(long keepLogFileNum);
 534
 535   /**
 536    * Returns the maximum number of info log files to be kept.
 537    * Default: 1000
 538    *
 539    * @return the maximum number of info log files to be kept.
 540    */
 541   long keepLogFileNum();
 542
 543   /**
 544    * Recycle log files.
 545    *
 546    * If non-zero, we will reuse previously written log files for new
 547    * logs, overwriting the old data.  The value indicates how many
 548    * such files we will keep around at any point in time for later
 549    * use.
 550    *
 551    * This is more efficient because the blocks are already
 552    * allocated and fdatasync does not need to update the inode after
 553    * each write.
 554    *
 555    * Default: 0
 556    *
 557    * @param recycleLogFileNum the number of log files to keep for recycling
 558    *
 559    * @return the reference to the current options
 560    */
 561   T setRecycleLogFileNum(long recycleLogFileNum);
 562
 563   /**
 564    * Recycle log files.
 565    *
 566    * If non-zero, we will reuse previously written log files for new
 567    * logs, overwriting the old data.  The value indicates how many
 568    * such files we will keep around at any point in time for later
 569    * use.
 570    *
 571    * This is more efficient because the blocks are already
 572    * allocated and fdatasync does not need to update the inode after
 573    * each write.
 574    *
 575    * Default: 0
 576    *
 577    * @return the number of log files kept for recycling
 578    */
 579   long recycleLogFileNum();
 580
 581   /**
 582    * Manifest file is rolled over on reaching this limit.
 583    * The older manifest file be deleted.
 584    * The default value is 1GB so that the manifest file can grow, but not
 585    * reach the limit of storage capacity.
 586    *
 587    * @param maxManifestFileSize the size limit of a manifest file.
 588    * @return the instance of the current object.
 589    */
 590   T setMaxManifestFileSize(long maxManifestFileSize);
 591
 592   /**
 593    * Manifest file is rolled over on reaching this limit.
 594    * The older manifest file be deleted.
 595    * The default value is 1GB so that the manifest file can grow, but not
 596    * reach the limit of storage capacity.
 597    *
 598    * @return the size limit of a manifest file.
 599    */
 600   long maxManifestFileSize();
 601
 602   /**
 603    * Number of shards used for table cache.
 604    *
 605    * @param tableCacheNumshardbits the number of chards
 606    * @return the instance of the current object.
 607    */
 608   T setTableCacheNumshardbits(int tableCacheNumshardbits);
 609
 610   /**
 611    * Number of shards used for table cache.
 612    *
 613    * @return the number of shards used for table cache.
 614    */
 615   int tableCacheNumshardbits();
 616
 617   /**
 618    * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs
 619    * will be deleted.
 620    * <ol>
 621    * <li>If both set to 0, logs will be deleted asap and will not get into
 622    * the archive.</li>
 623    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 624    *    WAL files will be checked every 10 min and if total size is greater
 625    *    then WAL_size_limit_MB, they will be deleted starting with the
 626    *    earliest until size_limit is met. All empty files will be deleted.</li>
 627    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 628    *    WAL files will be checked every WAL_ttl_secondsi / 2 and those that
 629    *    are older than WAL_ttl_seconds will be deleted.</li>
 630    * <li>If both are not 0, WAL files will be checked every 10 min and both
 631    *    checks will be performed with ttl being first.</li>
 632    * </ol>
 633    *
 634    * @param walTtlSeconds the ttl seconds
 635    * @return the instance of the current object.
 636    * @see #setWalSizeLimitMB(long)
 637    */
 638   T setWalTtlSeconds(long walTtlSeconds);
 639
 640   /**
 641    * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
 642    * will be deleted.
 643    * <ol>
 644    * <li>If both set to 0, logs will be deleted asap and will not get into
 645    * the archive.</li>
 646    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 647    * WAL files will be checked every 10 min and if total size is greater
 648    * then WAL_size_limit_MB, they will be deleted starting with the
 649    * earliest until size_limit is met. All empty files will be deleted.</li>
 650    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 651    * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
 652    * are older than WAL_ttl_seconds will be deleted.</li>
 653    * <li>If both are not 0, WAL files will be checked every 10 min and both
 654    * checks will be performed with ttl being first.</li>
 655    * </ol>
 656    *
 657    * @return the wal-ttl seconds
 658    * @see #walSizeLimitMB()
 659    */
 660   long walTtlSeconds();
 661
 662   /**
 663    * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
 664    * will be deleted.
 665    * <ol>
 666    * <li>If both set to 0, logs will be deleted asap and will not get into
 667    *    the archive.</li>
 668    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 669    *    WAL files will be checked every 10 min and if total size is greater
 670    *    then WAL_size_limit_MB, they will be deleted starting with the
 671    *    earliest until size_limit is met. All empty files will be deleted.</li>
 672    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 673    *    WAL files will be checked every WAL_ttl_secondsi / 2 and those that
 674    *    are older than WAL_ttl_seconds will be deleted.</li>
 675    * <li>If both are not 0, WAL files will be checked every 10 min and both
 676    *    checks will be performed with ttl being first.</li>
 677    * </ol>
 678    *
 679    * @param sizeLimitMB size limit in mega-bytes.
 680    * @return the instance of the current object.
 681    * @see #setWalSizeLimitMB(long)
 682    */
 683   T setWalSizeLimitMB(long sizeLimitMB);
 684
 685   /**
 686    * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs
 687    * will be deleted.
 688    * <ol>
 689    * <li>If both set to 0, logs will be deleted asap and will not get into
 690    *    the archive.</li>
 691    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 692    *    WAL files will be checked every 10 min and if total size is greater
 693    *    then WAL_size_limit_MB, they will be deleted starting with the
 694    *    earliest until size_limit is met. All empty files will be deleted.</li>
 695    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 696    *    WAL files will be checked every WAL_ttl_seconds i / 2 and those that
 697    *    are older than WAL_ttl_seconds will be deleted.</li>
 698    * <li>If both are not 0, WAL files will be checked every 10 min and both
 699    *    checks will be performed with ttl being first.</li>
 700    * </ol>
 701    * @return size limit in mega-bytes.
 702    * @see #walSizeLimitMB()
 703    */
 704   long walSizeLimitMB();
 705
 706   /**
 707    * The maximum limit of number of bytes that are written in a single batch
 708    * of WAL or memtable write. It is followed when the leader write size
 709    * is larger than 1/8 of this limit.
 710    *
 711    * Default: 1 MB
 712    *
 713    * @param maxWriteBatchGroupSizeBytes the maximum limit of number of bytes, see description.
 714    * @return the instance of the current object.
 715    */
 716   T setMaxWriteBatchGroupSizeBytes(final long maxWriteBatchGroupSizeBytes);
 717
 718   /**
 719    * The maximum limit of number of bytes that are written in a single batch
 720    * of WAL or memtable write. It is followed when the leader write size
 721    * is larger than 1/8 of this limit.
 722    *
 723    * Default: 1 MB
 724    *
 725    * @return the maximum limit of number of bytes, see description.
 726    */
 727   long maxWriteBatchGroupSizeBytes();
 728
 729   /**
 730    * Number of bytes to preallocate (via fallocate) the manifest
 731    * files.  Default is 4mb, which is reasonable to reduce random IO
 732    * as well as prevent overallocation for mounts that preallocate
 733    * large amounts of data (such as xfs's allocsize option).
 734    *
 735    * @param size the size in byte
 736    * @return the instance of the current object.
 737    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 738    *   while overflowing the underlying platform specific value.
 739    */
 740   T setManifestPreallocationSize(long size);
 741
 742   /**
 743    * Number of bytes to preallocate (via fallocate) the manifest
 744    * files.  Default is 4mb, which is reasonable to reduce random IO
 745    * as well as prevent overallocation for mounts that preallocate
 746    * large amounts of data (such as xfs's allocsize option).
 747    *
 748    * @return size in bytes.
 749    */
 750   long manifestPreallocationSize();
 751
 752   /**
 753    * Enable the OS to use direct I/O for reading sst tables.
 754    * Default: false
 755    *
 756    * @param useDirectReads if true, then direct read is enabled
 757    * @return the instance of the current object.
 758    */
 759   T setUseDirectReads(boolean useDirectReads);
 760
 761   /**
 762    * Enable the OS to use direct I/O for reading sst tables.
 763    * Default: false
 764    *
 765    * @return if true, then direct reads are enabled
 766    */
 767   boolean useDirectReads();
 768
 769   /**
 770    * Enable the OS to use direct reads and writes in flush and
 771    * compaction
 772    * Default: false
 773    *
 774    * @param useDirectIoForFlushAndCompaction if true, then direct
 775    *        I/O will be enabled for background flush and compactions
 776    * @return the instance of the current object.
 777    */
 778   T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction);
 779
 780   /**
 781    * Enable the OS to use direct reads and writes in flush and
 782    * compaction
 783    *
 784    * @return if true, then direct I/O is enabled for flush and
 785    *         compaction
 786    */
 787   boolean useDirectIoForFlushAndCompaction();
 788
 789   /**
 790    * Whether fallocate calls are allowed
 791    *
 792    * @param allowFAllocate false if fallocate() calls are bypassed
 793    *
 794    * @return the reference to the current options.
 795    */
 796   T setAllowFAllocate(boolean allowFAllocate);
 797
 798   /**
 799    * Whether fallocate calls are allowed
 800    *
 801    * @return false if fallocate() calls are bypassed
 802    */
 803   boolean allowFAllocate();
 804
 805   /**
 806    * Allow the OS to mmap file for reading sst tables.
 807    * Default: false
 808    *
 809    * @param allowMmapReads true if mmap reads are allowed.
 810    * @return the instance of the current object.
 811    */
 812   T setAllowMmapReads(boolean allowMmapReads);
 813
 814   /**
 815    * Allow the OS to mmap file for reading sst tables.
 816    * Default: false
 817    *
 818    * @return true if mmap reads are allowed.
 819    */
 820   boolean allowMmapReads();
 821
 822   /**
 823    * Allow the OS to mmap file for writing. Default: false
 824    *
 825    * @param allowMmapWrites true if mmap writes are allowd.
 826    * @return the instance of the current object.
 827    */
 828   T setAllowMmapWrites(boolean allowMmapWrites);
 829
 830   /**
 831    * Allow the OS to mmap file for writing. Default: false
 832    *
 833    * @return true if mmap writes are allowed.
 834    */
 835   boolean allowMmapWrites();
 836
 837   /**
 838    * Disable child process inherit open files. Default: true
 839    *
 840    * @param isFdCloseOnExec true if child process inheriting open
 841    *     files is disabled.
 842    * @return the instance of the current object.
 843    */
 844   T setIsFdCloseOnExec(boolean isFdCloseOnExec);
 845
 846   /**
 847    * Disable child process inherit open files. Default: true
 848    *
 849    * @return true if child process inheriting open files is disabled.
 850    */
 851   boolean isFdCloseOnExec();
 852
 853   /**
 854    * If set true, will hint the underlying file system that the file
 855    * access pattern is random, when a sst file is opened.
 856    * Default: true
 857    *
 858    * @param adviseRandomOnOpen true if hinting random access is on.
 859    * @return the instance of the current object.
 860    */
 861   T setAdviseRandomOnOpen(boolean adviseRandomOnOpen);
 862
 863   /**
 864    * If set true, will hint the underlying file system that the file
 865    * access pattern is random, when a sst file is opened.
 866    * Default: true
 867    *
 868    * @return true if hinting random access is on.
 869    */
 870   boolean adviseRandomOnOpen();
 871
 872   /**
 873    * Amount of data to build up in memtables across all column
 874    * families before writing to disk.
 875    *
 876    * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
 877    * which enforces a limit for a single memtable.
 878    *
 879    * This feature is disabled by default. Specify a non-zero value
 880    * to enable it.
 881    *
 882    * Default: 0 (disabled)
 883    *
 884    * @param dbWriteBufferSize the size of the write buffer
 885    *
 886    * @return the reference to the current options.
 887    */
 888   T setDbWriteBufferSize(long dbWriteBufferSize);
 889
 890   /**
 891    * Use passed {@link WriteBufferManager} to control memory usage across
 892    * multiple column families and/or DB instances.
 893    *
 894    * Check <a href="https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager">
 895    *     https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager</a>
 896    * for more details on when to use it
 897    *
 898    * @param writeBufferManager The WriteBufferManager to use
 899    * @return the reference of the current options.
 900    */
 901   T setWriteBufferManager(final WriteBufferManager writeBufferManager);
 902
 903   /**
 904    * Reference to {@link WriteBufferManager} used by it. <br>
 905    *
 906    * Default: null (Disabled)
 907    *
 908    * @return a reference to WriteBufferManager
 909    */
 910   WriteBufferManager writeBufferManager();
 911
 912   /**
 913    * Amount of data to build up in memtables across all column
 914    * families before writing to disk.
 915    *
 916    * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
 917    * which enforces a limit for a single memtable.
 918    *
 919    * This feature is disabled by default. Specify a non-zero value
 920    * to enable it.
 921    *
 922    * Default: 0 (disabled)
 923    *
 924    * @return the size of the write buffer
 925    */
 926   long dbWriteBufferSize();
 927
 928   /**
 929    * Specify the file access pattern once a compaction is started.
 930    * It will be applied to all input files of a compaction.
 931    *
 932    * Default: {@link AccessHint#NORMAL}
 933    *
 934    * @param accessHint The access hint
 935    *
 936    * @return the reference to the current options.
 937    */
 938   T setAccessHintOnCompactionStart(final AccessHint accessHint);
 939
 940   /**
 941    * Specify the file access pattern once a compaction is started.
 942    * It will be applied to all input files of a compaction.
 943    *
 944    * Default: {@link AccessHint#NORMAL}
 945    *
 946    * @return The access hint
 947    */
 948   AccessHint accessHintOnCompactionStart();
 949
 950   /**
 951    * If true, always create a new file descriptor and new table reader
 952    * for compaction inputs. Turn this parameter on may introduce extra
 953    * memory usage in the table reader, if it allocates extra memory
 954    * for indexes. This will allow file descriptor prefetch options
 955    * to be set for compaction input files and not to impact file
 956    * descriptors for the same file used by user queries.
 957    * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
 958    * for this mode if using block-based table.
 959    *
 960    * Default: false
 961    *
 962    * @param newTableReaderForCompactionInputs true if a new file descriptor and
 963    *     table reader should be created for compaction inputs
 964    *
 965    * @return the reference to the current options.
 966    */
 967   T setNewTableReaderForCompactionInputs(
 968       boolean newTableReaderForCompactionInputs);
 969
 970   /**
 971    * If true, always create a new file descriptor and new table reader
 972    * for compaction inputs. Turn this parameter on may introduce extra
 973    * memory usage in the table reader, if it allocates extra memory
 974    * for indexes. This will allow file descriptor prefetch options
 975    * to be set for compaction input files and not to impact file
 976    * descriptors for the same file used by user queries.
 977    * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
 978    * for this mode if using block-based table.
 979    *
 980    * Default: false
 981    *
 982    * @return true if a new file descriptor and table reader are created for
 983    *     compaction inputs
 984    */
 985   boolean newTableReaderForCompactionInputs();
 986
 987   /**
 988    * This is a maximum buffer size that is used by WinMmapReadableFile in
 989    * unbuffered disk I/O mode. We need to maintain an aligned buffer for
 990    * reads. We allow the buffer to grow until the specified value and then
 991    * for bigger requests allocate one shot buffers. In unbuffered mode we
 992    * always bypass read-ahead buffer at ReadaheadRandomAccessFile
 993    * When read-ahead is required we then make use of
 994    * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
 995    * always try to read ahead.
 996    * With read-ahead we always pre-allocate buffer to the size instead of
 997    * growing it up to a limit.
 998    *
 999    * This option is currently honored only on Windows
1000    *
1001    * Default: 1 Mb
1002    *
1003    * Special value: 0 - means do not maintain per instance buffer. Allocate
1004    *                per request buffer and avoid locking.
1005    *
1006    * @param randomAccessMaxBufferSize the maximum size of the random access
1007    *     buffer
1008    *
1009    * @return the reference to the current options.
1010    */
1011   T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize);
1012
1013   /**
1014    * This is a maximum buffer size that is used by WinMmapReadableFile in
1015    * unbuffered disk I/O mode. We need to maintain an aligned buffer for
1016    * reads. We allow the buffer to grow until the specified value and then
1017    * for bigger requests allocate one shot buffers. In unbuffered mode we
1018    * always bypass read-ahead buffer at ReadaheadRandomAccessFile
1019    * When read-ahead is required we then make use of
1020    * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
1021    * always try to read ahead. With read-ahead we always pre-allocate buffer
1022    * to the size instead of growing it up to a limit.
1023    *
1024    * This option is currently honored only on Windows
1025    *
1026    * Default: 1 Mb
1027    *
1028    * Special value: 0 - means do not maintain per instance buffer. Allocate
1029    *                per request buffer and avoid locking.
1030    *
1031    * @return the maximum size of the random access buffer
1032    */
1033   long randomAccessMaxBufferSize();
1034
1035   /**
1036    * Use adaptive mutex, which spins in the user space before resorting
1037    * to kernel. This could reduce context switch when the mutex is not
1038    * heavily contended. However, if the mutex is hot, we could end up
1039    * wasting spin time.
1040    * Default: false
1041    *
1042    * @param useAdaptiveMutex true if adaptive mutex is used.
1043    * @return the instance of the current object.
1044    */
1045   T setUseAdaptiveMutex(boolean useAdaptiveMutex);
1046
1047   /**
1048    * Use adaptive mutex, which spins in the user space before resorting
1049    * to kernel. This could reduce context switch when the mutex is not
1050    * heavily contended. However, if the mutex is hot, we could end up
1051    * wasting spin time.
1052    * Default: false
1053    *
1054    * @return true if adaptive mutex is used.
1055    */
1056   boolean useAdaptiveMutex();
1057
1058   /**
1059    * Sets the {@link EventListener}s whose callback functions
1060    * will be called when specific RocksDB event happens.
1061    *
1062    * Note: the RocksJava API currently only supports EventListeners implemented in Java.
1063    * It could be extended in future to also support adding/removing EventListeners implemented in
1064    * C++.
1065    *
1066    * @param listeners the listeners who should be notified on various events.
1067    *
1068    * @return the instance of the current object.
1069    */
1070   T setListeners(final List<AbstractEventListener> listeners);
1071
1072   /**
1073    * Sets the {@link EventListener}s whose callback functions
1074    * will be called when specific RocksDB event happens.
1075    *
1076    * Note: the RocksJava API currently only supports EventListeners implemented in Java.
1077    * It could be extended in future to also support adding/removing EventListeners implemented in
1078    * C++.
1079    *
1080    * @return the instance of the current object.
1081    */
1082   List<AbstractEventListener> listeners();
1083
1084   /**
1085    * If true, then the status of the threads involved in this DB will
1086    * be tracked and available via GetThreadList() API.
1087    *
1088    * Default: false
1089    *
1090    * @param enableThreadTracking true to enable tracking
1091    *
1092    * @return the reference to the current options.
1093    */
1094   T setEnableThreadTracking(boolean enableThreadTracking);
1095
1096   /**
1097    * If true, then the status of the threads involved in this DB will
1098    * be tracked and available via GetThreadList() API.
1099    *
1100    * Default: false
1101    *
1102    * @return true if tracking is enabled
1103    */
1104   boolean enableThreadTracking();
1105
1106   /**
1107    * By default, a single write thread queue is maintained. The thread gets
1108    * to the head of the queue becomes write batch group leader and responsible
1109    * for writing to WAL and memtable for the batch group.
1110    *
1111    * If {@link #enablePipelinedWrite()} is true, separate write thread queue is
1112    * maintained for WAL write and memtable write. A write thread first enter WAL
1113    * writer queue and then memtable writer queue. Pending thread on the WAL
1114    * writer queue thus only have to wait for previous writers to finish their
1115    * WAL writing but not the memtable writing. Enabling the feature may improve
1116    * write throughput and reduce latency of the prepare phase of two-phase
1117    * commit.
1118    *
1119    * Default: false
1120    *
1121    * @param enablePipelinedWrite true to enabled pipelined writes
1122    *
1123    * @return the reference to the current options.
1124    */
1125   T setEnablePipelinedWrite(final boolean enablePipelinedWrite);
1126
1127   /**
1128    * Returns true if pipelined writes are enabled.
1129    * See {@link #setEnablePipelinedWrite(boolean)}.
1130    *
1131    * @return true if pipelined writes are enabled, false otherwise.
1132    */
1133   boolean enablePipelinedWrite();
1134
1135   /**
1136    * Setting {@link #unorderedWrite()} to true trades higher write throughput with
1137    * relaxing the immutability guarantee of snapshots. This violates the
1138    * repeatability one expects from ::Get from a snapshot, as well as
1139    * ::MultiGet and Iterator's consistent-point-in-time view property.
1140    * If the application cannot tolerate the relaxed guarantees, it can implement
1141    * its own mechanisms to work around that and yet benefit from the higher
1142    * throughput. Using TransactionDB with WRITE_PREPARED write policy and
1143    * {@link #twoWriteQueues()} true is one way to achieve immutable snapshots despite
1144    * unordered_write.
1145    *
1146    * By default, i.e., when it is false, rocksdb does not advance the sequence
1147    * number for new snapshots unless all the writes with lower sequence numbers
1148    * are already finished. This provides the immutability that we except from
1149    * snapshots. Moreover, since Iterator and MultiGet internally depend on
1150    * snapshots, the snapshot immutability results into Iterator and MultiGet
1151    * offering consistent-point-in-time view. If set to true, although
1152    * Read-Your-Own-Write property is still provided, the snapshot immutability
1153    * property is relaxed: the writes issued after the snapshot is obtained (with
1154    * larger sequence numbers) will be still not visible to the reads from that
1155    * snapshot, however, there still might be pending writes (with lower sequence
1156    * number) that will change the state visible to the snapshot after they are
1157    * landed to the memtable.
1158    *
1159    * @param unorderedWrite true to enabled unordered write
1160    *
1161    * @return the reference to the current options.
1162    */
1163   T setUnorderedWrite(final boolean unorderedWrite);
1164
1165   /**
1166    * Returns true if unordered write are enabled.
1167    * See {@link #setUnorderedWrite(boolean)}.
1168    *
1169    * @return true if unordered write are enabled, false otherwise.
1170    */
1171   boolean unorderedWrite();
1172
1173   /**
1174    * If true, allow multi-writers to update mem tables in parallel.
1175    * Only some memtable factorys support concurrent writes; currently it
1176    * is implemented only for SkipListFactory.  Concurrent memtable writes
1177    * are not compatible with inplace_update_support or filter_deletes.
1178    * It is strongly recommended to set
1179    * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1180    * this feature.
1181    * Default: true
1182    *
1183    * @param allowConcurrentMemtableWrite true to enable concurrent writes
1184    *     for the memtable
1185    *
1186    * @return the reference to the current options.
1187    */
1188   T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite);
1189
1190   /**
1191    * If true, allow multi-writers to update mem tables in parallel.
1192    * Only some memtable factorys support concurrent writes; currently it
1193    * is implemented only for SkipListFactory.  Concurrent memtable writes
1194    * are not compatible with inplace_update_support or filter_deletes.
1195    * It is strongly recommended to set
1196    * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1197    * this feature.
1198    * Default: true
1199    *
1200    * @return true if concurrent writes are enabled for the memtable
1201    */
1202   boolean allowConcurrentMemtableWrite();
1203
1204   /**
1205    * If true, threads synchronizing with the write batch group leader will
1206    * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1207    * mutex. This can substantially improve throughput for concurrent workloads,
1208    * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1209    * Default: true
1210    *
1211    * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the
1212    *     write threads
1213    *
1214    * @return the reference to the current options.
1215    */
1216   T setEnableWriteThreadAdaptiveYield(
1217       boolean enableWriteThreadAdaptiveYield);
1218
1219   /**
1220    * If true, threads synchronizing with the write batch group leader will
1221    * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1222    * mutex. This can substantially improve throughput for concurrent workloads,
1223    * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1224    * Default: true
1225    *
1226    * @return true if adaptive yield is enabled
1227    *    for the writing threads
1228    */
1229   boolean enableWriteThreadAdaptiveYield();
1230
1231   /**
1232    * The maximum number of microseconds that a write operation will use
1233    * a yielding spin loop to coordinate with other write threads before
1234    * blocking on a mutex.  (Assuming {@link #writeThreadSlowYieldUsec()} is
1235    * set properly) increasing this value is likely to increase RocksDB
1236    * throughput at the expense of increased CPU usage.
1237    * Default: 100
1238    *
1239    * @param writeThreadMaxYieldUsec maximum number of microseconds
1240    *
1241    * @return the reference to the current options.
1242    */
1243   T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec);
1244
1245   /**
1246    * The maximum number of microseconds that a write operation will use
1247    * a yielding spin loop to coordinate with other write threads before
1248    * blocking on a mutex.  (Assuming {@link #writeThreadSlowYieldUsec()} is
1249    * set properly) increasing this value is likely to increase RocksDB
1250    * throughput at the expense of increased CPU usage.
1251    * Default: 100
1252    *
1253    * @return the maximum number of microseconds
1254    */
1255   long writeThreadMaxYieldUsec();
1256
1257   /**
1258    * The latency in microseconds after which a std::this_thread::yield
1259    * call (sched_yield on Linux) is considered to be a signal that
1260    * other processes or threads would like to use the current core.
1261    * Increasing this makes writer threads more likely to take CPU
1262    * by spinning, which will show up as an increase in the number of
1263    * involuntary context switches.
1264    * Default: 3
1265    *
1266    * @param writeThreadSlowYieldUsec the latency in microseconds
1267    *
1268    * @return the reference to the current options.
1269    */
1270   T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec);
1271
1272   /**
1273    * The latency in microseconds after which a std::this_thread::yield
1274    * call (sched_yield on Linux) is considered to be a signal that
1275    * other processes or threads would like to use the current core.
1276    * Increasing this makes writer threads more likely to take CPU
1277    * by spinning, which will show up as an increase in the number of
1278    * involuntary context switches.
1279    * Default: 3
1280    *
1281    * @return writeThreadSlowYieldUsec the latency in microseconds
1282    */
1283   long writeThreadSlowYieldUsec();
1284
1285   /**
1286    * If true, then DB::Open() will not update the statistics used to optimize
1287    * compaction decision by loading table properties from many files.
1288    * Turning off this feature will improve DBOpen time especially in
1289    * disk environment.
1290    *
1291    * Default: false
1292    *
1293    * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped
1294    *
1295    * @return the reference to the current options.
1296    */
1297   T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen);
1298
1299   /**
1300    * If true, then DB::Open() will not update the statistics used to optimize
1301    * compaction decision by loading table properties from many files.
1302    * Turning off this feature will improve DBOpen time especially in
1303    * disk environment.
1304    *
1305    * Default: false
1306    *
1307    * @return true if updating stats will be skipped
1308    */
1309   boolean skipStatsUpdateOnDbOpen();
1310
1311   /**
1312    * If true, then {@link RocksDB#open(String)} will not fetch and check sizes of all sst files.
1313    * This may significantly speed up startup if there are many sst files,
1314    * especially when using non-default Env with expensive GetFileSize().
1315    * We'll still check that all required sst files exist.
1316    * If {@code paranoid_checks} is false, this option is ignored, and sst files are
1317    * not checked at all.
1318    *
1319    * Default: false
1320    *
1321    * @param skipCheckingSstFileSizesOnDbOpen if true, then SST file sizes will not be checked
1322    *                                         when calling {@link RocksDB#open(String)}.
1323    * @return the reference to the current options.
1324    */
1325   T setSkipCheckingSstFileSizesOnDbOpen(final boolean skipCheckingSstFileSizesOnDbOpen);
1326
1327   /**
1328    * If true, then {@link RocksDB#open(String)} will not fetch and check sizes of all sst files.
1329    * This may significantly speed up startup if there are many sst files,
1330    * especially when using non-default Env with expensive GetFileSize().
1331    * We'll still check that all required sst files exist.
1332    * If {@code paranoid_checks} is false, this option is ignored, and sst files are
1333    * not checked at all.
1334    *
1335    * Default: false
1336    *
1337    * @return true, if file sizes will not be checked when calling {@link RocksDB#open(String)}.
1338    */
1339   boolean skipCheckingSstFileSizesOnDbOpen();
1340
1341   /**
1342    * Recovery mode to control the consistency while replaying WAL
1343    *
1344    * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1345    *
1346    * @param walRecoveryMode The WAL recover mode
1347    *
1348    * @return the reference to the current options.
1349    */
1350   T setWalRecoveryMode(WALRecoveryMode walRecoveryMode);
1351
1352   /**
1353    * Recovery mode to control the consistency while replaying WAL
1354    *
1355    * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1356    *
1357    * @return The WAL recover mode
1358    */
1359   WALRecoveryMode walRecoveryMode();
1360
1361   /**
1362    * if set to false then recovery will fail when a prepared
1363    * transaction is encountered in the WAL
1364    *
1365    * Default: false
1366    *
1367    * @param allow2pc true if two-phase-commit is enabled
1368    *
1369    * @return the reference to the current options.
1370    */
1371   T setAllow2pc(boolean allow2pc);
1372
1373   /**
1374    * if set to false then recovery will fail when a prepared
1375    * transaction is encountered in the WAL
1376    *
1377    * Default: false
1378    *
1379    * @return true if two-phase-commit is enabled
1380    */
1381   boolean allow2pc();
1382
1383   /**
1384    * A global cache for table-level rows.
1385    *
1386    * Default: null (disabled)
1387    *
1388    * @param rowCache The global row cache
1389    *
1390    * @return the reference to the current options.
1391    */
1392   T setRowCache(final Cache rowCache);
1393
1394   /**
1395    * A global cache for table-level rows.
1396    *
1397    * Default: null (disabled)
1398    *
1399    * @return The global row cache
1400    */
1401   Cache rowCache();
1402
1403   /**
1404    * A filter object supplied to be invoked while processing write-ahead-logs
1405    * (WALs) during recovery. The filter provides a way to inspect log
1406    * records, ignoring a particular record or skipping replay.
1407    * The filter is invoked at startup and is invoked from a single-thread
1408    * currently.
1409    *
1410    * @param walFilter the filter for processing WALs during recovery.
1411    *
1412    * @return the reference to the current options.
1413    */
1414   T setWalFilter(final AbstractWalFilter walFilter);
1415
1416   /**
1417    * Get's the filter for processing WALs during recovery.
1418    * See {@link #setWalFilter(AbstractWalFilter)}.
1419    *
1420    * @return the filter used for processing WALs during recovery.
1421    */
1422   WalFilter walFilter();
1423
1424   /**
1425    * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1426    * / SetOptions will fail if options file is not detected or properly
1427    * persisted.
1428    *
1429    * DEFAULT: false
1430    *
1431    * @param failIfOptionsFileError true if we should fail if there is an error
1432    *     in the options file
1433    *
1434    * @return the reference to the current options.
1435    */
1436   T setFailIfOptionsFileError(boolean failIfOptionsFileError);
1437
1438   /**
1439    * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1440    * / SetOptions will fail if options file is not detected or properly
1441    * persisted.
1442    *
1443    * DEFAULT: false
1444    *
1445    * @return true if we should fail if there is an error in the options file
1446    */
1447   boolean failIfOptionsFileError();
1448
1449   /**
1450    * If true, then print malloc stats together with rocksdb.stats
1451    * when printing to LOG.
1452    *
1453    * DEFAULT: false
1454    *
1455    * @param dumpMallocStats true if malloc stats should be printed to LOG
1456    *
1457    * @return the reference to the current options.
1458    */
1459   T setDumpMallocStats(boolean dumpMallocStats);
1460
1461   /**
1462    * If true, then print malloc stats together with rocksdb.stats
1463    * when printing to LOG.
1464    *
1465    * DEFAULT: false
1466    *
1467    * @return true if malloc stats should be printed to LOG
1468    */
1469   boolean dumpMallocStats();
1470
1471   /**
1472    * By default RocksDB replay WAL logs and flush them on DB open, which may
1473    * create very small SST files. If this option is enabled, RocksDB will try
1474    * to avoid (but not guarantee not to) flush during recovery. Also, existing
1475    * WAL logs will be kept, so that if crash happened before flush, we still
1476    * have logs to recover from.
1477    *
1478    * DEFAULT: false
1479    *
1480    * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee
1481    *     not to) flush during recovery
1482    *
1483    * @return the reference to the current options.
1484    */
1485   T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery);
1486
1487   /**
1488    * By default RocksDB replay WAL logs and flush them on DB open, which may
1489    * create very small SST files. If this option is enabled, RocksDB will try
1490    * to avoid (but not guarantee not to) flush during recovery. Also, existing
1491    * WAL logs will be kept, so that if crash happened before flush, we still
1492    * have logs to recover from.
1493    *
1494    * DEFAULT: false
1495    *
1496    * @return true to try to avoid (but not guarantee not to) flush during
1497    *     recovery
1498    */
1499   boolean avoidFlushDuringRecovery();
1500
1501   /**
1502    * Set this option to true during creation of database if you want
1503    * to be able to ingest behind (call IngestExternalFile() skipping keys
1504    * that already exist, rather than overwriting matching keys).
1505    * Setting this option to true will affect 2 things:
1506    *     1) Disable some internal optimizations around SST file compression
1507    *     2) Reserve bottom-most level for ingested files only.
1508    *     3) Note that num_levels should be &gt;= 3 if this option is turned on.
1509    *
1510    * DEFAULT: false
1511    *
1512    * @param allowIngestBehind true to allow ingest behind, false to disallow.
1513    *
1514    * @return the reference to the current options.
1515    */
1516   T setAllowIngestBehind(final boolean allowIngestBehind);
1517
1518   /**
1519    * Returns true if ingest behind is allowed.
1520    * See {@link #setAllowIngestBehind(boolean)}.
1521    *
1522    * @return true if ingest behind is allowed, false otherwise.
1523    */
1524   boolean allowIngestBehind();
1525
1526   /**
1527    * Needed to support differential snapshots.
1528    * If set to true then DB will only process deletes with sequence number
1529    * less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts).
1530    * Clients are responsible to periodically call this method to advance
1531    * the cutoff time. If this method is never called and preserve_deletes
1532    * is set to true NO deletes will ever be processed.
1533    * At the moment this only keeps normal deletes, SingleDeletes will
1534    * not be preserved.
1535    *
1536    * DEFAULT: false
1537    *
1538    * @param preserveDeletes true to preserve deletes.
1539    *
1540    * @return the reference to the current options.
1541    */
1542   T setPreserveDeletes(final boolean preserveDeletes);
1543
1544   /**
1545    * Returns true if deletes are preserved.
1546    * See {@link #setPreserveDeletes(boolean)}.
1547    *
1548    * @return true if deletes are preserved, false otherwise.
1549    */
1550   boolean preserveDeletes();
1551
1552   /**
1553    * If enabled it uses two queues for writes, one for the ones with
1554    * disable_memtable and one for the ones that also write to memtable. This
1555    * allows the memtable writes not to lag behind other writes. It can be used
1556    * to optimize MySQL 2PC in which only the commits, which are serial, write to
1557    * memtable.
1558    *
1559    * DEFAULT: false
1560    *
1561    * @param twoWriteQueues true to enable two write queues, false otherwise.
1562    *
1563    * @return the reference to the current options.
1564    */
1565   T setTwoWriteQueues(final boolean twoWriteQueues);
1566
1567   /**
1568    * Returns true if two write queues are enabled.
1569    *
1570    * @return true if two write queues are enabled, false otherwise.
1571    */
1572   boolean twoWriteQueues();
1573
1574   /**
1575    * If true WAL is not flushed automatically after each write. Instead it
1576    * relies on manual invocation of FlushWAL to write the WAL buffer to its
1577    * file.
1578    *
1579    * DEFAULT: false
1580    *
1581    * @param manualWalFlush true to set disable automatic WAL flushing,
1582    *     false otherwise.
1583    *
1584    * @return the reference to the current options.
1585    */
1586   T setManualWalFlush(final boolean manualWalFlush);
1587
1588   /**
1589    * Returns true if automatic WAL flushing is disabled.
1590    * See {@link #setManualWalFlush(boolean)}.
1591    *
1592    * @return true if automatic WAL flushing is disabled, false otherwise.
1593    */
1594   boolean manualWalFlush();
1595
1596   /**
1597    * If true, RocksDB supports flushing multiple column families and committing
1598    * their results atomically to MANIFEST. Note that it is not
1599    * necessary to set atomic_flush to true if WAL is always enabled since WAL
1600    * allows the database to be restored to the last persistent state in WAL.
1601    * This option is useful when there are column families with writes NOT
1602    * protected by WAL.
1603    * For manual flush, application has to specify which column families to
1604    * flush atomically in {@link RocksDB#flush(FlushOptions, List)}.
1605    * For auto-triggered flush, RocksDB atomically flushes ALL column families.
1606    *
1607    * Currently, any WAL-enabled writes after atomic flush may be replayed
1608    * independently if the process crashes later and tries to recover.
1609    *
1610    * @param atomicFlush true to enable atomic flush of multiple column families.
1611    *
1612    * @return the reference to the current options.
1613    */
1614   T setAtomicFlush(final boolean atomicFlush);
1615
1616   /**
1617    * Determine if atomic flush of multiple column families is enabled.
1618    *
1619    * See {@link #setAtomicFlush(boolean)}.
1620    *
1621    * @return true if atomic flush is enabled.
1622    */
1623   boolean atomicFlush();
1624
1625   /**
1626    * If true, working thread may avoid doing unnecessary and long-latency
1627    * operation (such as deleting obsolete files directly or deleting memtable)
1628    * and will instead schedule a background job to do it.
1629    * Use it if you're latency-sensitive.
1630    * If set to true, takes precedence over
1631    * {@link ReadOptions#setBackgroundPurgeOnIteratorCleanup(boolean)}.
1632    *
1633    * @param avoidUnnecessaryBlockingIO If true, working thread may avoid doing unnecessary
1634    *     operation.
1635    * @return the reference to the current options.
1636    */
1637   T setAvoidUnnecessaryBlockingIO(final boolean avoidUnnecessaryBlockingIO);
1638
1639   /**
1640    * If true, working thread may avoid doing unnecessary and long-latency
1641    * operation (such as deleting obsolete files directly or deleting memtable)
1642    * and will instead schedule a background job to do it.
1643    * Use it if you're latency-sensitive.
1644    * If set to true, takes precedence over
1645    * {@link ReadOptions#setBackgroundPurgeOnIteratorCleanup(boolean)}.
1646    *
1647    * @return true, if working thread may avoid doing unnecessary operation.
1648    */
1649   boolean avoidUnnecessaryBlockingIO();
1650
1651   /**
1652    * If true, automatically persist stats to a hidden column family (column
1653    * family name: ___rocksdb_stats_history___) every
1654    * stats_persist_period_sec seconds; otherwise, write to an in-memory
1655    * struct. User can query through `GetStatsHistory` API.
1656    * If user attempts to create a column family with the same name on a DB
1657    * which have previously set persist_stats_to_disk to true, the column family
1658    * creation will fail, but the hidden column family will survive, as well as
1659    * the previously persisted statistics.
1660    * When peristing stats to disk, the stat name will be limited at 100 bytes.
1661    * Default: false
1662    *
1663    * @param persistStatsToDisk true if stats should be persisted to hidden column family.
1664    * @return the instance of the current object.
1665    */
1666   T setPersistStatsToDisk(final boolean persistStatsToDisk);
1667
1668   /**
1669    * If true, automatically persist stats to a hidden column family (column
1670    * family name: ___rocksdb_stats_history___) every
1671    * stats_persist_period_sec seconds; otherwise, write to an in-memory
1672    * struct. User can query through `GetStatsHistory` API.
1673    * If user attempts to create a column family with the same name on a DB
1674    * which have previously set persist_stats_to_disk to true, the column family
1675    * creation will fail, but the hidden column family will survive, as well as
1676    * the previously persisted statistics.
1677    * When peristing stats to disk, the stat name will be limited at 100 bytes.
1678    * Default: false
1679    *
1680    * @return true if stats should be persisted to hidden column family.
1681    */
1682   boolean persistStatsToDisk();
1683
1684   /**
1685    * Historically DB ID has always been stored in Identity File in DB folder.
1686    * If this flag is true, the DB ID is written to Manifest file in addition
1687    * to the Identity file. By doing this 2 problems are solved
1688    * 1. We don't checksum the Identity file where as Manifest file is.
1689    * 2. Since the source of truth for DB is Manifest file DB ID will sit with
1690    *    the source of truth. Previously the Identity file could be copied
1691    *    independent of Manifest and that can result in wrong DB ID.
1692    * We recommend setting this flag to true.
1693    * Default: false
1694    *
1695    * @param writeDbidToManifest if true, then DB ID will be written to Manifest file.
1696    * @return the instance of the current object.
1697    */
1698   T setWriteDbidToManifest(final boolean writeDbidToManifest);
1699
1700   /**
1701    * Historically DB ID has always been stored in Identity File in DB folder.
1702    * If this flag is true, the DB ID is written to Manifest file in addition
1703    * to the Identity file. By doing this 2 problems are solved
1704    * 1. We don't checksum the Identity file where as Manifest file is.
1705    * 2. Since the source of truth for DB is Manifest file DB ID will sit with
1706    *    the source of truth. Previously the Identity file could be copied
1707    *    independent of Manifest and that can result in wrong DB ID.
1708    * We recommend setting this flag to true.
1709    * Default: false
1710    *
1711    * @return true, if DB ID will be written to Manifest file.
1712    */
1713   boolean writeDbidToManifest();
1714
1715   /**
1716    * The number of bytes to prefetch when reading the log. This is mostly useful
1717    * for reading a remotely located log, as it can save the number of
1718    * round-trips. If 0, then the prefetching is disabled.
1719    *
1720    * Default: 0
1721    *
1722    * @param logReadaheadSize the number of bytes to prefetch when reading the log.
1723    * @return the instance of the current object.
1724    */
1725   T setLogReadaheadSize(final long logReadaheadSize);
1726
1727   /**
1728    * The number of bytes to prefetch when reading the log. This is mostly useful
1729    * for reading a remotely located log, as it can save the number of
1730    * round-trips. If 0, then the prefetching is disabled.
1731    *
1732    * Default: 0
1733    *
1734    * @return the number of bytes to prefetch when reading the log.
1735    */
1736   long logReadaheadSize();
1737
1738   /**
1739    * By default, RocksDB recovery fails if any table file referenced in
1740    * MANIFEST are missing after scanning the MANIFEST.
1741    * Best-efforts recovery is another recovery mode that
1742    * tries to restore the database to the most recent point in time without
1743    * missing file.
1744    * Currently not compatible with atomic flush. Furthermore, WAL files will
1745    * not be used for recovery if best_efforts_recovery is true.
1746    * Default: false
1747    *
1748    * @param bestEffortsRecovery if true, RocksDB will use best-efforts mode when recovering.
1749    * @return the instance of the current object.
1750    */
1751   T setBestEffortsRecovery(final boolean bestEffortsRecovery);
1752
1753   /**
1754    * By default, RocksDB recovery fails if any table file referenced in
1755    * MANIFEST are missing after scanning the MANIFEST.
1756    * Best-efforts recovery is another recovery mode that
1757    * tries to restore the database to the most recent point in time without
1758    * missing file.
1759    * Currently not compatible with atomic flush. Furthermore, WAL files will
1760    * not be used for recovery if best_efforts_recovery is true.
1761    * Default: false
1762    *
1763    * @return true, if RocksDB uses best-efforts mode when recovering.
1764    */
1765   boolean bestEffortsRecovery();
1766
1767   /**
1768    * It defines how many times db resume is called by a separate thread when
1769    * background retryable IO Error happens. When background retryable IO
1770    * Error happens, SetBGError is called to deal with the error. If the error
1771    * can be auto-recovered (e.g., retryable IO Error during Flush or WAL write),
1772    * then db resume is called in background to recover from the error. If this
1773    * value is 0 or negative, db resume will not be called.
1774    *
1775    * Default: INT_MAX
1776    *
1777    * @param maxBgerrorResumeCount maximum number of times db resume should be called when IO Error
1778    *     happens.
1779    * @return the instance of the current object.
1780    */
1781   T setMaxBgErrorResumeCount(final int maxBgerrorResumeCount);
1782
1783   /**
1784    * It defines how many times db resume is called by a separate thread when
1785    * background retryable IO Error happens. When background retryable IO
1786    * Error happens, SetBGError is called to deal with the error. If the error
1787    * can be auto-recovered (e.g., retryable IO Error during Flush or WAL write),
1788    * then db resume is called in background to recover from the error. If this
1789    * value is 0 or negative, db resume will not be called.
1790    *
1791    * Default: INT_MAX
1792    *
1793    * @return maximum number of times db resume should be called when IO Error happens.
1794    */
1795   int maxBgerrorResumeCount();
1796
1797   /**
1798    * If max_bgerror_resume_count is &ge; 2, db resume is called multiple times.
1799    * This option decides how long to wait to retry the next resume if the
1800    * previous resume fails and satisfy redo resume conditions.
1801    *
1802    * Default: 1000000 (microseconds).
1803    *
1804    * @param bgerrorResumeRetryInterval how many microseconds to wait between DB resume attempts.
1805    * @return the instance of the current object.
1806    */
1807   T setBgerrorResumeRetryInterval(final long bgerrorResumeRetryInterval);
1808
1809   /**
1810    * If max_bgerror_resume_count is &ge; 2, db resume is called multiple times.
1811    * This option decides how long to wait to retry the next resume if the
1812    * previous resume fails and satisfy redo resume conditions.
1813    *
1814    * Default: 1000000 (microseconds).
1815    *
1816    * @return the instance of the current object.
1817    */
1818   long bgerrorResumeRetryInterval();
1819 }