ceph/src/rocksdb/java/src/main/java/org/rocksdb/DBOptionsInterface.java

   1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5
   6 package org.rocksdb;
   7
   8 import java.util.Collection;
   9 import java.util.List;
  10
  11 public interface DBOptionsInterface<T extends DBOptionsInterface<T>> {
  12   /**
  13    * Use this if your DB is very small (like under 1GB) and you don't want to
  14    * spend lots of memory for memtables.
  15    *
  16    * @return the instance of the current object.
  17    */
  18   T optimizeForSmallDb();
  19
  20   /**
  21    * Use the specified object to interact with the environment,
  22    * e.g. to read/write files, schedule background work, etc.
  23    * Default: {@link Env#getDefault()}
  24    *
  25    * @param env {@link Env} instance.
  26    * @return the instance of the current Options.
  27    */
  28   T setEnv(final Env env);
  29
  30   /**
  31    * Returns the set RocksEnv instance.
  32    *
  33    * @return {@link RocksEnv} instance set in the options.
  34    */
  35   Env getEnv();
  36
  37   /**
  38    * <p>By default, RocksDB uses only one background thread for flush and
  39    * compaction. Calling this function will set it up such that total of
  40    * `total_threads` is used.</p>
  41    *
  42    * <p>You almost definitely want to call this function if your system is
  43    * bottlenecked by RocksDB.</p>
  44    *
  45    * @param totalThreads The total number of threads to be used by RocksDB.
  46    *     A good value is the number of cores.
  47    *
  48    * @return the instance of the current Options
  49    */
  50   T setIncreaseParallelism(int totalThreads);
  51
  52   /**
  53    * If this value is set to true, then the database will be created
  54    * if it is missing during {@code RocksDB.open()}.
  55    * Default: false
  56    *
  57    * @param flag a flag indicating whether to create a database the
  58    *     specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation
  59    *     is missing.
  60    * @return the instance of the current Options
  61    * @see RocksDB#open(org.rocksdb.Options, String)
  62    */
  63   T setCreateIfMissing(boolean flag);
  64
  65   /**
  66    * Return true if the create_if_missing flag is set to true.
  67    * If true, the database will be created if it is missing.
  68    *
  69    * @return true if the createIfMissing option is set to true.
  70    * @see #setCreateIfMissing(boolean)
  71    */
  72   boolean createIfMissing();
  73
  74   /**
  75    * <p>If true, missing column families will be automatically created</p>
  76    *
  77    * <p>Default: false</p>
  78    *
  79    * @param flag a flag indicating if missing column families shall be
  80    *     created automatically.
  81    * @return true if missing column families shall be created automatically
  82    *     on open.
  83    */
  84   T setCreateMissingColumnFamilies(boolean flag);
  85
  86   /**
  87    * Return true if the create_missing_column_families flag is set
  88    * to true. If true column families be created if missing.
  89    *
  90    * @return true if the createMissingColumnFamilies is set to
  91    *     true.
  92    * @see #setCreateMissingColumnFamilies(boolean)
  93    */
  94   boolean createMissingColumnFamilies();
  95
  96   /**
  97    * If true, an error will be thrown during RocksDB.open() if the
  98    * database already exists.
  99    * Default: false
 100    *
 101    * @param errorIfExists if true, an exception will be thrown
 102    *     during {@code RocksDB.open()} if the database already exists.
 103    * @return the reference to the current option.
 104    * @see RocksDB#open(org.rocksdb.Options, String)
 105    */
 106   T setErrorIfExists(boolean errorIfExists);
 107
 108   /**
 109    * If true, an error will be thrown during RocksDB.open() if the
 110    * database already exists.
 111    *
 112    * @return if true, an error is raised when the specified database
 113    *    already exists before open.
 114    */
 115   boolean errorIfExists();
 116
 117   /**
 118    * If true, the implementation will do aggressive checking of the
 119    * data it is processing and will stop early if it detects any
 120    * errors.  This may have unforeseen ramifications: for example, a
 121    * corruption of one DB entry may cause a large number of entries to
 122    * become unreadable or for the entire DB to become unopenable.
 123    * If any of the  writes to the database fails (Put, Delete, Merge, Write),
 124    * the database will switch to read-only mode and fail all other
 125    * Write operations.
 126    * Default: true
 127    *
 128    * @param paranoidChecks a flag to indicate whether paranoid-check
 129    *     is on.
 130    * @return the reference to the current option.
 131    */
 132   T setParanoidChecks(boolean paranoidChecks);
 133
 134   /**
 135    * If true, the implementation will do aggressive checking of the
 136    * data it is processing and will stop early if it detects any
 137    * errors.  This may have unforeseen ramifications: for example, a
 138    * corruption of one DB entry may cause a large number of entries to
 139    * become unreadable or for the entire DB to become unopenable.
 140    * If any of the  writes to the database fails (Put, Delete, Merge, Write),
 141    * the database will switch to read-only mode and fail all other
 142    * Write operations.
 143    *
 144    * @return a boolean indicating whether paranoid-check is on.
 145    */
 146   boolean paranoidChecks();
 147
 148   /**
 149    * Use to control write rate of flush and compaction. Flush has higher
 150    * priority than compaction. Rate limiting is disabled if nullptr.
 151    * Default: nullptr
 152    *
 153    * @param rateLimiter {@link org.rocksdb.RateLimiter} instance.
 154    * @return the instance of the current object.
 155    *
 156    * @since 3.10.0
 157    */
 158   T setRateLimiter(RateLimiter rateLimiter);
 159
 160   /**
 161    * Use to track SST files and control their file deletion rate.
 162    *
 163    * Features:
 164    *  - Throttle the deletion rate of the SST files.
 165    *  - Keep track the total size of all SST files.
 166    *  - Set a maximum allowed space limit for SST files that when reached
 167    *    the DB wont do any further flushes or compactions and will set the
 168    *    background error.
 169    *  - Can be shared between multiple dbs.
 170    *
 171    *  Limitations:
 172    *  - Only track and throttle deletes of SST files in
 173    *    first db_path (db_name if db_paths is empty).
 174    *
 175    * @param sstFileManager The SST File Manager for the db.
 176    * @return the instance of the current object.
 177    */
 178   T setSstFileManager(SstFileManager sstFileManager);
 179
 180   /**
 181    * <p>Any internal progress/error information generated by
 182    * the db will be written to the Logger if it is non-nullptr,
 183    * or to a file stored in the same directory as the DB
 184    * contents if info_log is nullptr.</p>
 185    *
 186    * <p>Default: nullptr</p>
 187    *
 188    * @param logger {@link Logger} instance.
 189    * @return the instance of the current object.
 190    */
 191   T setLogger(Logger logger);
 192
 193   /**
 194    * <p>Sets the RocksDB log level. Default level is INFO</p>
 195    *
 196    * @param infoLogLevel log level to set.
 197    * @return the instance of the current object.
 198    */
 199   T setInfoLogLevel(InfoLogLevel infoLogLevel);
 200
 201   /**
 202    * <p>Returns currently set log level.</p>
 203    * @return {@link org.rocksdb.InfoLogLevel} instance.
 204    */
 205   InfoLogLevel infoLogLevel();
 206
 207   /**
 208    * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open
 209    * all files on DB::Open(). You can use this option to increase the number
 210    * of threads used to open the files.
 211    *
 212    * Default: 16
 213    *
 214    * @param maxFileOpeningThreads the maximum number of threads to use to
 215    *     open files
 216    *
 217    * @return the reference to the current options.
 218    */
 219   T setMaxFileOpeningThreads(int maxFileOpeningThreads);
 220
 221   /**
 222    * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all
 223    * files on DB::Open(). You can use this option to increase the number of
 224    * threads used to open the files.
 225    *
 226    * Default: 16
 227    *
 228    * @return the maximum number of threads to use to open files
 229    */
 230   int maxFileOpeningThreads();
 231
 232   /**
 233    * <p>Sets the statistics object which collects metrics about database operations.
 234    * Statistics objects should not be shared between DB instances as
 235    * it does not use any locks to prevent concurrent updates.</p>
 236    *
 237    * @param statistics The statistics to set
 238    *
 239    * @return the instance of the current object.
 240    *
 241    * @see RocksDB#open(org.rocksdb.Options, String)
 242    */
 243   T setStatistics(final Statistics statistics);
 244
 245   /**
 246    * <p>Returns statistics object.</p>
 247    *
 248    * @return the instance of the statistics object or null if there is no
 249    * statistics object.
 250    *
 251    * @see #setStatistics(Statistics)
 252    */
 253   Statistics statistics();
 254
 255   /**
 256    * <p>If true, then every store to stable storage will issue a fsync.</p>
 257    * <p>If false, then every store to stable storage will issue a fdatasync.
 258    * This parameter should be set to true while storing data to
 259    * filesystem like ext3 that can lose files after a reboot.</p>
 260    * <p>Default: false</p>
 261    *
 262    * @param useFsync a boolean flag to specify whether to use fsync
 263    * @return the instance of the current object.
 264    */
 265   T setUseFsync(boolean useFsync);
 266
 267   /**
 268    * <p>If true, then every store to stable storage will issue a fsync.</p>
 269    * <p>If false, then every store to stable storage will issue a fdatasync.
 270    * This parameter should be set to true while storing data to
 271    * filesystem like ext3 that can lose files after a reboot.</p>
 272    *
 273    * @return boolean value indicating if fsync is used.
 274    */
 275   boolean useFsync();
 276
 277   /**
 278    * A list of paths where SST files can be put into, with its target size.
 279    * Newer data is placed into paths specified earlier in the vector while
 280    * older data gradually moves to paths specified later in the vector.
 281    *
 282    * For example, you have a flash device with 10GB allocated for the DB,
 283    * as well as a hard drive of 2TB, you should config it to be:
 284    *    [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
 285    *
 286    * The system will try to guarantee data under each path is close to but
 287    * not larger than the target size. But current and future file sizes used
 288    * by determining where to place a file are based on best-effort estimation,
 289    * which means there is a chance that the actual size under the directory
 290    * is slightly more than target size under some workloads. User should give
 291    * some buffer room for those cases.
 292    *
 293    * If none of the paths has sufficient room to place a file, the file will
 294    * be placed to the last path anyway, despite to the target size.
 295    *
 296    * Placing newer data to earlier paths is also best-efforts. User should
 297    * expect user files to be placed in higher levels in some extreme cases.
 298    *
 299    * If left empty, only one path will be used, which is db_name passed when
 300    * opening the DB.
 301    *
 302    * Default: empty
 303    *
 304    * @param dbPaths the paths and target sizes
 305    *
 306    * @return the reference to the current options
 307    */
 308   T setDbPaths(final Collection<DbPath> dbPaths);
 309
 310   /**
 311    * A list of paths where SST files can be put into, with its target size.
 312    * Newer data is placed into paths specified earlier in the vector while
 313    * older data gradually moves to paths specified later in the vector.
 314    *
 315    * For example, you have a flash device with 10GB allocated for the DB,
 316    * as well as a hard drive of 2TB, you should config it to be:
 317    *    [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
 318    *
 319    * The system will try to guarantee data under each path is close to but
 320    * not larger than the target size. But current and future file sizes used
 321    * by determining where to place a file are based on best-effort estimation,
 322    * which means there is a chance that the actual size under the directory
 323    * is slightly more than target size under some workloads. User should give
 324    * some buffer room for those cases.
 325    *
 326    * If none of the paths has sufficient room to place a file, the file will
 327    * be placed to the last path anyway, despite to the target size.
 328    *
 329    * Placing newer data to earlier paths is also best-efforts. User should
 330    * expect user files to be placed in higher levels in some extreme cases.
 331    *
 332    * If left empty, only one path will be used, which is db_name passed when
 333    * opening the DB.
 334    *
 335    * Default: {@link java.util.Collections#emptyList()}
 336    *
 337    * @return dbPaths the paths and target sizes
 338    */
 339   List<DbPath> dbPaths();
 340
 341   /**
 342    * This specifies the info LOG dir.
 343    * If it is empty, the log files will be in the same dir as data.
 344    * If it is non empty, the log files will be in the specified dir,
 345    * and the db data dir's absolute path will be used as the log file
 346    * name's prefix.
 347    *
 348    * @param dbLogDir the path to the info log directory
 349    * @return the instance of the current object.
 350    */
 351   T setDbLogDir(String dbLogDir);
 352
 353   /**
 354    * Returns the directory of info log.
 355    *
 356    * If it is empty, the log files will be in the same dir as data.
 357    * If it is non empty, the log files will be in the specified dir,
 358    * and the db data dir's absolute path will be used as the log file
 359    * name's prefix.
 360    *
 361    * @return the path to the info log directory
 362    */
 363   String dbLogDir();
 364
 365   /**
 366    * This specifies the absolute dir path for write-ahead logs (WAL).
 367    * If it is empty, the log files will be in the same dir as data,
 368    *   dbname is used as the data dir by default
 369    * If it is non empty, the log files will be in kept the specified dir.
 370    * When destroying the db,
 371    *   all log files in wal_dir and the dir itself is deleted
 372    *
 373    * @param walDir the path to the write-ahead-log directory.
 374    * @return the instance of the current object.
 375    */
 376   T setWalDir(String walDir);
 377
 378   /**
 379    * Returns the path to the write-ahead-logs (WAL) directory.
 380    *
 381    * If it is empty, the log files will be in the same dir as data,
 382    *   dbname is used as the data dir by default
 383    * If it is non empty, the log files will be in kept the specified dir.
 384    * When destroying the db,
 385    *   all log files in wal_dir and the dir itself is deleted
 386    *
 387    * @return the path to the write-ahead-logs (WAL) directory.
 388    */
 389   String walDir();
 390
 391   /**
 392    * The periodicity when obsolete files get deleted. The default
 393    * value is 6 hours. The files that get out of scope by compaction
 394    * process will still get automatically delete on every compaction,
 395    * regardless of this setting
 396    *
 397    * @param micros the time interval in micros
 398    * @return the instance of the current object.
 399    */
 400   T setDeleteObsoleteFilesPeriodMicros(long micros);
 401
 402   /**
 403    * The periodicity when obsolete files get deleted. The default
 404    * value is 6 hours. The files that get out of scope by compaction
 405    * process will still get automatically delete on every compaction,
 406    * regardless of this setting
 407    *
 408    * @return the time interval in micros when obsolete files will be deleted.
 409    */
 410   long deleteObsoleteFilesPeriodMicros();
 411
 412   /**
 413    * This value represents the maximum number of threads that will
 414    * concurrently perform a compaction job by breaking it into multiple,
 415    * smaller ones that are run simultaneously.
 416    * Default: 1 (i.e. no subcompactions)
 417    *
 418    * @param maxSubcompactions The maximum number of threads that will
 419    *     concurrently perform a compaction job
 420    *
 421    * @return the instance of the current object.
 422    */
 423   T setMaxSubcompactions(int maxSubcompactions);
 424
 425   /**
 426    * This value represents the maximum number of threads that will
 427    * concurrently perform a compaction job by breaking it into multiple,
 428    * smaller ones that are run simultaneously.
 429    * Default: 1 (i.e. no subcompactions)
 430    *
 431    * @return The maximum number of threads that will concurrently perform a
 432    *     compaction job
 433    */
 434   int maxSubcompactions();
 435
 436   /**
 437    * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
 438    * value of max_background_jobs. For backwards compatibility we will set
 439    * `max_background_jobs = max_background_compactions + max_background_flushes`
 440    * in the case where user sets at least one of `max_background_compactions` or
 441    * `max_background_flushes`.
 442    *
 443    * Specifies the maximum number of concurrent background flush jobs.
 444    * If you're increasing this, also consider increasing number of threads in
 445    * HIGH priority thread pool. For more information, see
 446    * Default: -1
 447    *
 448    * @param maxBackgroundFlushes number of max concurrent flush jobs
 449    * @return the instance of the current object.
 450    *
 451    * @see RocksEnv#setBackgroundThreads(int)
 452    * @see RocksEnv#setBackgroundThreads(int, Priority)
 453    * @see MutableDBOptionsInterface#maxBackgroundCompactions()
 454    *
 455    * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)}
 456    */
 457   @Deprecated
 458   T setMaxBackgroundFlushes(int maxBackgroundFlushes);
 459
 460   /**
 461    * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
 462    * value of max_background_jobs. For backwards compatibility we will set
 463    * `max_background_jobs = max_background_compactions + max_background_flushes`
 464    * in the case where user sets at least one of `max_background_compactions` or
 465    * `max_background_flushes`.
 466    *
 467    * Returns the maximum number of concurrent background flush jobs.
 468    * If you're increasing this, also consider increasing number of threads in
 469    * HIGH priority thread pool. For more information, see
 470    * Default: -1
 471    *
 472    * @return the maximum number of concurrent background flush jobs.
 473    * @see RocksEnv#setBackgroundThreads(int)
 474    * @see RocksEnv#setBackgroundThreads(int, Priority)
 475    */
 476   @Deprecated
 477   int maxBackgroundFlushes();
 478
 479   /**
 480    * Specifies the maximum size of a info log file. If the current log file
 481    * is larger than `max_log_file_size`, a new info log file will
 482    * be created.
 483    * If 0, all logs will be written to one log file.
 484    *
 485    * @param maxLogFileSize the maximum size of a info log file.
 486    * @return the instance of the current object.
 487    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 488    *   while overflowing the underlying platform specific value.
 489    */
 490   T setMaxLogFileSize(long maxLogFileSize);
 491
 492   /**
 493    * Returns the maximum size of a info log file. If the current log file
 494    * is larger than this size, a new info log file will be created.
 495    * If 0, all logs will be written to one log file.
 496    *
 497    * @return the maximum size of the info log file.
 498    */
 499   long maxLogFileSize();
 500
 501   /**
 502    * Specifies the time interval for the info log file to roll (in seconds).
 503    * If specified with non-zero value, log file will be rolled
 504    * if it has been active longer than `log_file_time_to_roll`.
 505    * Default: 0 (disabled)
 506    *
 507    * @param logFileTimeToRoll the time interval in seconds.
 508    * @return the instance of the current object.
 509    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 510    *   while overflowing the underlying platform specific value.
 511    */
 512   T setLogFileTimeToRoll(long logFileTimeToRoll);
 513
 514   /**
 515    * Returns the time interval for the info log file to roll (in seconds).
 516    * If specified with non-zero value, log file will be rolled
 517    * if it has been active longer than `log_file_time_to_roll`.
 518    * Default: 0 (disabled)
 519    *
 520    * @return the time interval in seconds.
 521    */
 522   long logFileTimeToRoll();
 523
 524   /**
 525    * Specifies the maximum number of info log files to be kept.
 526    * Default: 1000
 527    *
 528    * @param keepLogFileNum the maximum number of info log files to be kept.
 529    * @return the instance of the current object.
 530    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 531    *   while overflowing the underlying platform specific value.
 532    */
 533   T setKeepLogFileNum(long keepLogFileNum);
 534
 535   /**
 536    * Returns the maximum number of info log files to be kept.
 537    * Default: 1000
 538    *
 539    * @return the maximum number of info log files to be kept.
 540    */
 541   long keepLogFileNum();
 542
 543   /**
 544    * Recycle log files.
 545    *
 546    * If non-zero, we will reuse previously written log files for new
 547    * logs, overwriting the old data.  The value indicates how many
 548    * such files we will keep around at any point in time for later
 549    * use.
 550    *
 551    * This is more efficient because the blocks are already
 552    * allocated and fdatasync does not need to update the inode after
 553    * each write.
 554    *
 555    * Default: 0
 556    *
 557    * @param recycleLogFileNum the number of log files to keep for recycling
 558    *
 559    * @return the reference to the current options
 560    */
 561   T setRecycleLogFileNum(long recycleLogFileNum);
 562
 563   /**
 564    * Recycle log files.
 565    *
 566    * If non-zero, we will reuse previously written log files for new
 567    * logs, overwriting the old data.  The value indicates how many
 568    * such files we will keep around at any point in time for later
 569    * use.
 570    *
 571    * This is more efficient because the blocks are already
 572    * allocated and fdatasync does not need to update the inode after
 573    * each write.
 574    *
 575    * Default: 0
 576    *
 577    * @return the number of log files kept for recycling
 578    */
 579   long recycleLogFileNum();
 580
 581   /**
 582    * Manifest file is rolled over on reaching this limit.
 583    * The older manifest file be deleted.
 584    * The default value is 1GB so that the manifest file can grow, but not
 585    * reach the limit of storage capacity.
 586    *
 587    * @param maxManifestFileSize the size limit of a manifest file.
 588    * @return the instance of the current object.
 589    */
 590   T setMaxManifestFileSize(long maxManifestFileSize);
 591
 592   /**
 593    * Manifest file is rolled over on reaching this limit.
 594    * The older manifest file be deleted.
 595    * The default value is 1GB so that the manifest file can grow, but not
 596    * reach the limit of storage capacity.
 597    *
 598    * @return the size limit of a manifest file.
 599    */
 600   long maxManifestFileSize();
 601
 602   /**
 603    * Number of shards used for table cache.
 604    *
 605    * @param tableCacheNumshardbits the number of chards
 606    * @return the instance of the current object.
 607    */
 608   T setTableCacheNumshardbits(int tableCacheNumshardbits);
 609
 610   /**
 611    * Number of shards used for table cache.
 612    *
 613    * @return the number of shards used for table cache.
 614    */
 615   int tableCacheNumshardbits();
 616
 617   /**
 618    * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs
 619    * will be deleted.
 620    * <ol>
 621    * <li>If both set to 0, logs will be deleted asap and will not get into
 622    * the archive.</li>
 623    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 624    *    WAL files will be checked every 10 min and if total size is greater
 625    *    then WAL_size_limit_MB, they will be deleted starting with the
 626    *    earliest until size_limit is met. All empty files will be deleted.</li>
 627    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 628    *    WAL files will be checked every WAL_ttl_seconds / 2 and those that
 629    *    are older than WAL_ttl_seconds will be deleted.</li>
 630    * <li>If both are not 0, WAL files will be checked every 10 min and both
 631    *    checks will be performed with ttl being first.</li>
 632    * </ol>
 633    *
 634    * @param walTtlSeconds the ttl seconds
 635    * @return the instance of the current object.
 636    * @see #setWalSizeLimitMB(long)
 637    */
 638   T setWalTtlSeconds(long walTtlSeconds);
 639
 640   /**
 641    * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
 642    * will be deleted.
 643    * <ol>
 644    * <li>If both set to 0, logs will be deleted asap and will not get into
 645    * the archive.</li>
 646    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 647    * WAL files will be checked every 10 min and if total size is greater
 648    * then WAL_size_limit_MB, they will be deleted starting with the
 649    * earliest until size_limit is met. All empty files will be deleted.</li>
 650    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 651    * WAL files will be checked every WAL_ttl_seconds / 2 and those that
 652    * are older than WAL_ttl_seconds will be deleted.</li>
 653    * <li>If both are not 0, WAL files will be checked every 10 min and both
 654    * checks will be performed with ttl being first.</li>
 655    * </ol>
 656    *
 657    * @return the wal-ttl seconds
 658    * @see #walSizeLimitMB()
 659    */
 660   long walTtlSeconds();
 661
 662   /**
 663    * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
 664    * will be deleted.
 665    * <ol>
 666    * <li>If both set to 0, logs will be deleted asap and will not get into
 667    *    the archive.</li>
 668    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 669    *    WAL files will be checked every 10 min and if total size is greater
 670    *    then WAL_size_limit_MB, they will be deleted starting with the
 671    *    earliest until size_limit is met. All empty files will be deleted.</li>
 672    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 673    *    WAL files will be checked every WAL_ttl_secondsi / 2 and those that
 674    *    are older than WAL_ttl_seconds will be deleted.</li>
 675    * <li>If both are not 0, WAL files will be checked every 10 min and both
 676    *    checks will be performed with ttl being first.</li>
 677    * </ol>
 678    *
 679    * @param sizeLimitMB size limit in mega-bytes.
 680    * @return the instance of the current object.
 681    * @see #setWalSizeLimitMB(long)
 682    */
 683   T setWalSizeLimitMB(long sizeLimitMB);
 684
 685   /**
 686    * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs
 687    * will be deleted.
 688    * <ol>
 689    * <li>If both set to 0, logs will be deleted asap and will not get into
 690    *    the archive.</li>
 691    * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
 692    *    WAL files will be checked every 10 min and if total size is greater
 693    *    then WAL_size_limit_MB, they will be deleted starting with the
 694    *    earliest until size_limit is met. All empty files will be deleted.</li>
 695    * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
 696    *    WAL files will be checked every WAL_ttl_seconds i / 2 and those that
 697    *    are older than WAL_ttl_seconds will be deleted.</li>
 698    * <li>If both are not 0, WAL files will be checked every 10 min and both
 699    *    checks will be performed with ttl being first.</li>
 700    * </ol>
 701    * @return size limit in mega-bytes.
 702    * @see #walSizeLimitMB()
 703    */
 704   long walSizeLimitMB();
 705
 706   /**
 707    * The maximum limit of number of bytes that are written in a single batch
 708    * of WAL or memtable write. It is followed when the leader write size
 709    * is larger than 1/8 of this limit.
 710    *
 711    * Default: 1 MB
 712    *
 713    * @param maxWriteBatchGroupSizeBytes the maximum limit of number of bytes, see description.
 714    * @return the instance of the current object.
 715    */
 716   T setMaxWriteBatchGroupSizeBytes(final long maxWriteBatchGroupSizeBytes);
 717
 718   /**
 719    * The maximum limit of number of bytes that are written in a single batch
 720    * of WAL or memtable write. It is followed when the leader write size
 721    * is larger than 1/8 of this limit.
 722    *
 723    * Default: 1 MB
 724    *
 725    * @return the maximum limit of number of bytes, see description.
 726    */
 727   long maxWriteBatchGroupSizeBytes();
 728
 729   /**
 730    * Number of bytes to preallocate (via fallocate) the manifest
 731    * files.  Default is 4mb, which is reasonable to reduce random IO
 732    * as well as prevent overallocation for mounts that preallocate
 733    * large amounts of data (such as xfs's allocsize option).
 734    *
 735    * @param size the size in byte
 736    * @return the instance of the current object.
 737    * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
 738    *   while overflowing the underlying platform specific value.
 739    */
 740   T setManifestPreallocationSize(long size);
 741
 742   /**
 743    * Number of bytes to preallocate (via fallocate) the manifest
 744    * files.  Default is 4mb, which is reasonable to reduce random IO
 745    * as well as prevent overallocation for mounts that preallocate
 746    * large amounts of data (such as xfs's allocsize option).
 747    *
 748    * @return size in bytes.
 749    */
 750   long manifestPreallocationSize();
 751
 752   /**
 753    * Enable the OS to use direct I/O for reading sst tables.
 754    * Default: false
 755    *
 756    * @param useDirectReads if true, then direct read is enabled
 757    * @return the instance of the current object.
 758    */
 759   T setUseDirectReads(boolean useDirectReads);
 760
 761   /**
 762    * Enable the OS to use direct I/O for reading sst tables.
 763    * Default: false
 764    *
 765    * @return if true, then direct reads are enabled
 766    */
 767   boolean useDirectReads();
 768
 769   /**
 770    * Enable the OS to use direct reads and writes in flush and
 771    * compaction
 772    * Default: false
 773    *
 774    * @param useDirectIoForFlushAndCompaction if true, then direct
 775    *        I/O will be enabled for background flush and compactions
 776    * @return the instance of the current object.
 777    */
 778   T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction);
 779
 780   /**
 781    * Enable the OS to use direct reads and writes in flush and
 782    * compaction
 783    *
 784    * @return if true, then direct I/O is enabled for flush and
 785    *         compaction
 786    */
 787   boolean useDirectIoForFlushAndCompaction();
 788
 789   /**
 790    * Whether fallocate calls are allowed
 791    *
 792    * @param allowFAllocate false if fallocate() calls are bypassed
 793    *
 794    * @return the reference to the current options.
 795    */
 796   T setAllowFAllocate(boolean allowFAllocate);
 797
 798   /**
 799    * Whether fallocate calls are allowed
 800    *
 801    * @return false if fallocate() calls are bypassed
 802    */
 803   boolean allowFAllocate();
 804
 805   /**
 806    * Allow the OS to mmap file for reading sst tables.
 807    * Default: false
 808    *
 809    * @param allowMmapReads true if mmap reads are allowed.
 810    * @return the instance of the current object.
 811    */
 812   T setAllowMmapReads(boolean allowMmapReads);
 813
 814   /**
 815    * Allow the OS to mmap file for reading sst tables.
 816    * Default: false
 817    *
 818    * @return true if mmap reads are allowed.
 819    */
 820   boolean allowMmapReads();
 821
 822   /**
 823    * Allow the OS to mmap file for writing. Default: false
 824    *
 825    * @param allowMmapWrites true if mmap writes are allowd.
 826    * @return the instance of the current object.
 827    */
 828   T setAllowMmapWrites(boolean allowMmapWrites);
 829
 830   /**
 831    * Allow the OS to mmap file for writing. Default: false
 832    *
 833    * @return true if mmap writes are allowed.
 834    */
 835   boolean allowMmapWrites();
 836
 837   /**
 838    * Disable child process inherit open files. Default: true
 839    *
 840    * @param isFdCloseOnExec true if child process inheriting open
 841    *     files is disabled.
 842    * @return the instance of the current object.
 843    */
 844   T setIsFdCloseOnExec(boolean isFdCloseOnExec);
 845
 846   /**
 847    * Disable child process inherit open files. Default: true
 848    *
 849    * @return true if child process inheriting open files is disabled.
 850    */
 851   boolean isFdCloseOnExec();
 852
 853   /**
 854    * If set true, will hint the underlying file system that the file
 855    * access pattern is random, when a sst file is opened.
 856    * Default: true
 857    *
 858    * @param adviseRandomOnOpen true if hinting random access is on.
 859    * @return the instance of the current object.
 860    */
 861   T setAdviseRandomOnOpen(boolean adviseRandomOnOpen);
 862
 863   /**
 864    * If set true, will hint the underlying file system that the file
 865    * access pattern is random, when a sst file is opened.
 866    * Default: true
 867    *
 868    * @return true if hinting random access is on.
 869    */
 870   boolean adviseRandomOnOpen();
 871
 872   /**
 873    * Amount of data to build up in memtables across all column
 874    * families before writing to disk.
 875    *
 876    * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
 877    * which enforces a limit for a single memtable.
 878    *
 879    * This feature is disabled by default. Specify a non-zero value
 880    * to enable it.
 881    *
 882    * Default: 0 (disabled)
 883    *
 884    * @param dbWriteBufferSize the size of the write buffer
 885    *
 886    * @return the reference to the current options.
 887    */
 888   T setDbWriteBufferSize(long dbWriteBufferSize);
 889
 890   /**
 891    * Use passed {@link WriteBufferManager} to control memory usage across
 892    * multiple column families and/or DB instances.
 893    *
 894    * Check <a href="https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager">
 895    *     https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager</a>
 896    * for more details on when to use it
 897    *
 898    * @param writeBufferManager The WriteBufferManager to use
 899    * @return the reference of the current options.
 900    */
 901   T setWriteBufferManager(final WriteBufferManager writeBufferManager);
 902
 903   /**
 904    * Reference to {@link WriteBufferManager} used by it. <br>
 905    *
 906    * Default: null (Disabled)
 907    *
 908    * @return a reference to WriteBufferManager
 909    */
 910   WriteBufferManager writeBufferManager();
 911
 912   /**
 913    * Amount of data to build up in memtables across all column
 914    * families before writing to disk.
 915    *
 916    * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
 917    * which enforces a limit for a single memtable.
 918    *
 919    * This feature is disabled by default. Specify a non-zero value
 920    * to enable it.
 921    *
 922    * Default: 0 (disabled)
 923    *
 924    * @return the size of the write buffer
 925    */
 926   long dbWriteBufferSize();
 927
 928   /**
 929    * Specify the file access pattern once a compaction is started.
 930    * It will be applied to all input files of a compaction.
 931    *
 932    * Default: {@link AccessHint#NORMAL}
 933    *
 934    * @param accessHint The access hint
 935    *
 936    * @return the reference to the current options.
 937    */
 938   T setAccessHintOnCompactionStart(final AccessHint accessHint);
 939
 940   /**
 941    * Specify the file access pattern once a compaction is started.
 942    * It will be applied to all input files of a compaction.
 943    *
 944    * Default: {@link AccessHint#NORMAL}
 945    *
 946    * @return The access hint
 947    */
 948   AccessHint accessHintOnCompactionStart();
 949
 950   /**
 951    * This is a maximum buffer size that is used by WinMmapReadableFile in
 952    * unbuffered disk I/O mode. We need to maintain an aligned buffer for
 953    * reads. We allow the buffer to grow until the specified value and then
 954    * for bigger requests allocate one shot buffers. In unbuffered mode we
 955    * always bypass read-ahead buffer at ReadaheadRandomAccessFile
 956    * When read-ahead is required we then make use of
 957    * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
 958    * always try to read ahead.
 959    * With read-ahead we always pre-allocate buffer to the size instead of
 960    * growing it up to a limit.
 961    *
 962    * This option is currently honored only on Windows
 963    *
 964    * Default: 1 Mb
 965    *
 966    * Special value: 0 - means do not maintain per instance buffer. Allocate
 967    *                per request buffer and avoid locking.
 968    *
 969    * @param randomAccessMaxBufferSize the maximum size of the random access
 970    *     buffer
 971    *
 972    * @return the reference to the current options.
 973    */
 974   T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize);
 975
 976   /**
 977    * This is a maximum buffer size that is used by WinMmapReadableFile in
 978    * unbuffered disk I/O mode. We need to maintain an aligned buffer for
 979    * reads. We allow the buffer to grow until the specified value and then
 980    * for bigger requests allocate one shot buffers. In unbuffered mode we
 981    * always bypass read-ahead buffer at ReadaheadRandomAccessFile
 982    * When read-ahead is required we then make use of
 983    * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
 984    * always try to read ahead. With read-ahead we always pre-allocate buffer
 985    * to the size instead of growing it up to a limit.
 986    *
 987    * This option is currently honored only on Windows
 988    *
 989    * Default: 1 Mb
 990    *
 991    * Special value: 0 - means do not maintain per instance buffer. Allocate
 992    *                per request buffer and avoid locking.
 993    *
 994    * @return the maximum size of the random access buffer
 995    */
 996   long randomAccessMaxBufferSize();
 997
 998   /**
 999    * Use adaptive mutex, which spins in the user space before resorting
1000    * to kernel. This could reduce context switch when the mutex is not
1001    * heavily contended. However, if the mutex is hot, we could end up
1002    * wasting spin time.
1003    * Default: false
1004    *
1005    * @param useAdaptiveMutex true if adaptive mutex is used.
1006    * @return the instance of the current object.
1007    */
1008   T setUseAdaptiveMutex(boolean useAdaptiveMutex);
1009
1010   /**
1011    * Use adaptive mutex, which spins in the user space before resorting
1012    * to kernel. This could reduce context switch when the mutex is not
1013    * heavily contended. However, if the mutex is hot, we could end up
1014    * wasting spin time.
1015    * Default: false
1016    *
1017    * @return true if adaptive mutex is used.
1018    */
1019   boolean useAdaptiveMutex();
1020
1021   /**
1022    * Sets the {@link EventListener}s whose callback functions
1023    * will be called when specific RocksDB event happens.
1024    *
1025    * Note: the RocksJava API currently only supports EventListeners implemented in Java.
1026    * It could be extended in future to also support adding/removing EventListeners implemented in
1027    * C++.
1028    *
1029    * @param listeners the listeners who should be notified on various events.
1030    *
1031    * @return the instance of the current object.
1032    */
1033   T setListeners(final List<AbstractEventListener> listeners);
1034
1035   /**
1036    * Sets the {@link EventListener}s whose callback functions
1037    * will be called when specific RocksDB event happens.
1038    *
1039    * Note: the RocksJava API currently only supports EventListeners implemented in Java.
1040    * It could be extended in future to also support adding/removing EventListeners implemented in
1041    * C++.
1042    *
1043    * @return the instance of the current object.
1044    */
1045   List<AbstractEventListener> listeners();
1046
1047   /**
1048    * If true, then the status of the threads involved in this DB will
1049    * be tracked and available via GetThreadList() API.
1050    *
1051    * Default: false
1052    *
1053    * @param enableThreadTracking true to enable tracking
1054    *
1055    * @return the reference to the current options.
1056    */
1057   T setEnableThreadTracking(boolean enableThreadTracking);
1058
1059   /**
1060    * If true, then the status of the threads involved in this DB will
1061    * be tracked and available via GetThreadList() API.
1062    *
1063    * Default: false
1064    *
1065    * @return true if tracking is enabled
1066    */
1067   boolean enableThreadTracking();
1068
1069   /**
1070    * By default, a single write thread queue is maintained. The thread gets
1071    * to the head of the queue becomes write batch group leader and responsible
1072    * for writing to WAL and memtable for the batch group.
1073    *
1074    * If {@link #enablePipelinedWrite()} is true, separate write thread queue is
1075    * maintained for WAL write and memtable write. A write thread first enter WAL
1076    * writer queue and then memtable writer queue. Pending thread on the WAL
1077    * writer queue thus only have to wait for previous writers to finish their
1078    * WAL writing but not the memtable writing. Enabling the feature may improve
1079    * write throughput and reduce latency of the prepare phase of two-phase
1080    * commit.
1081    *
1082    * Default: false
1083    *
1084    * @param enablePipelinedWrite true to enabled pipelined writes
1085    *
1086    * @return the reference to the current options.
1087    */
1088   T setEnablePipelinedWrite(final boolean enablePipelinedWrite);
1089
1090   /**
1091    * Returns true if pipelined writes are enabled.
1092    * See {@link #setEnablePipelinedWrite(boolean)}.
1093    *
1094    * @return true if pipelined writes are enabled, false otherwise.
1095    */
1096   boolean enablePipelinedWrite();
1097
1098   /**
1099    * Setting {@link #unorderedWrite()} to true trades higher write throughput with
1100    * relaxing the immutability guarantee of snapshots. This violates the
1101    * repeatability one expects from ::Get from a snapshot, as well as
1102    * ::MultiGet and Iterator's consistent-point-in-time view property.
1103    * If the application cannot tolerate the relaxed guarantees, it can implement
1104    * its own mechanisms to work around that and yet benefit from the higher
1105    * throughput. Using TransactionDB with WRITE_PREPARED write policy and
1106    * {@link #twoWriteQueues()} true is one way to achieve immutable snapshots despite
1107    * unordered_write.
1108    *
1109    * By default, i.e., when it is false, rocksdb does not advance the sequence
1110    * number for new snapshots unless all the writes with lower sequence numbers
1111    * are already finished. This provides the immutability that we except from
1112    * snapshots. Moreover, since Iterator and MultiGet internally depend on
1113    * snapshots, the snapshot immutability results into Iterator and MultiGet
1114    * offering consistent-point-in-time view. If set to true, although
1115    * Read-Your-Own-Write property is still provided, the snapshot immutability
1116    * property is relaxed: the writes issued after the snapshot is obtained (with
1117    * larger sequence numbers) will be still not visible to the reads from that
1118    * snapshot, however, there still might be pending writes (with lower sequence
1119    * number) that will change the state visible to the snapshot after they are
1120    * landed to the memtable.
1121    *
1122    * @param unorderedWrite true to enabled unordered write
1123    *
1124    * @return the reference to the current options.
1125    */
1126   T setUnorderedWrite(final boolean unorderedWrite);
1127
1128   /**
1129    * Returns true if unordered write are enabled.
1130    * See {@link #setUnorderedWrite(boolean)}.
1131    *
1132    * @return true if unordered write are enabled, false otherwise.
1133    */
1134   boolean unorderedWrite();
1135
1136   /**
1137    * If true, allow multi-writers to update mem tables in parallel.
1138    * Only some memtable factorys support concurrent writes; currently it
1139    * is implemented only for SkipListFactory.  Concurrent memtable writes
1140    * are not compatible with inplace_update_support or filter_deletes.
1141    * It is strongly recommended to set
1142    * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1143    * this feature.
1144    * Default: true
1145    *
1146    * @param allowConcurrentMemtableWrite true to enable concurrent writes
1147    *     for the memtable
1148    *
1149    * @return the reference to the current options.
1150    */
1151   T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite);
1152
1153   /**
1154    * If true, allow multi-writers to update mem tables in parallel.
1155    * Only some memtable factorys support concurrent writes; currently it
1156    * is implemented only for SkipListFactory.  Concurrent memtable writes
1157    * are not compatible with inplace_update_support or filter_deletes.
1158    * It is strongly recommended to set
1159    * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1160    * this feature.
1161    * Default: true
1162    *
1163    * @return true if concurrent writes are enabled for the memtable
1164    */
1165   boolean allowConcurrentMemtableWrite();
1166
1167   /**
1168    * If true, threads synchronizing with the write batch group leader will
1169    * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1170    * mutex. This can substantially improve throughput for concurrent workloads,
1171    * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1172    * Default: true
1173    *
1174    * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the
1175    *     write threads
1176    *
1177    * @return the reference to the current options.
1178    */
1179   T setEnableWriteThreadAdaptiveYield(
1180       boolean enableWriteThreadAdaptiveYield);
1181
1182   /**
1183    * If true, threads synchronizing with the write batch group leader will
1184    * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1185    * mutex. This can substantially improve throughput for concurrent workloads,
1186    * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1187    * Default: true
1188    *
1189    * @return true if adaptive yield is enabled
1190    *    for the writing threads
1191    */
1192   boolean enableWriteThreadAdaptiveYield();
1193
1194   /**
1195    * The maximum number of microseconds that a write operation will use
1196    * a yielding spin loop to coordinate with other write threads before
1197    * blocking on a mutex.  (Assuming {@link #writeThreadSlowYieldUsec()} is
1198    * set properly) increasing this value is likely to increase RocksDB
1199    * throughput at the expense of increased CPU usage.
1200    * Default: 100
1201    *
1202    * @param writeThreadMaxYieldUsec maximum number of microseconds
1203    *
1204    * @return the reference to the current options.
1205    */
1206   T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec);
1207
1208   /**
1209    * The maximum number of microseconds that a write operation will use
1210    * a yielding spin loop to coordinate with other write threads before
1211    * blocking on a mutex.  (Assuming {@link #writeThreadSlowYieldUsec()} is
1212    * set properly) increasing this value is likely to increase RocksDB
1213    * throughput at the expense of increased CPU usage.
1214    * Default: 100
1215    *
1216    * @return the maximum number of microseconds
1217    */
1218   long writeThreadMaxYieldUsec();
1219
1220   /**
1221    * The latency in microseconds after which a std::this_thread::yield
1222    * call (sched_yield on Linux) is considered to be a signal that
1223    * other processes or threads would like to use the current core.
1224    * Increasing this makes writer threads more likely to take CPU
1225    * by spinning, which will show up as an increase in the number of
1226    * involuntary context switches.
1227    * Default: 3
1228    *
1229    * @param writeThreadSlowYieldUsec the latency in microseconds
1230    *
1231    * @return the reference to the current options.
1232    */
1233   T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec);
1234
1235   /**
1236    * The latency in microseconds after which a std::this_thread::yield
1237    * call (sched_yield on Linux) is considered to be a signal that
1238    * other processes or threads would like to use the current core.
1239    * Increasing this makes writer threads more likely to take CPU
1240    * by spinning, which will show up as an increase in the number of
1241    * involuntary context switches.
1242    * Default: 3
1243    *
1244    * @return writeThreadSlowYieldUsec the latency in microseconds
1245    */
1246   long writeThreadSlowYieldUsec();
1247
1248   /**
1249    * If true, then DB::Open() will not update the statistics used to optimize
1250    * compaction decision by loading table properties from many files.
1251    * Turning off this feature will improve DBOpen time especially in
1252    * disk environment.
1253    *
1254    * Default: false
1255    *
1256    * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped
1257    *
1258    * @return the reference to the current options.
1259    */
1260   T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen);
1261
1262   /**
1263    * If true, then DB::Open() will not update the statistics used to optimize
1264    * compaction decision by loading table properties from many files.
1265    * Turning off this feature will improve DBOpen time especially in
1266    * disk environment.
1267    *
1268    * Default: false
1269    *
1270    * @return true if updating stats will be skipped
1271    */
1272   boolean skipStatsUpdateOnDbOpen();
1273
1274   /**
1275    * If true, then {@link RocksDB#open(String)} will not fetch and check sizes of all sst files.
1276    * This may significantly speed up startup if there are many sst files,
1277    * especially when using non-default Env with expensive GetFileSize().
1278    * We'll still check that all required sst files exist.
1279    * If {@code paranoid_checks} is false, this option is ignored, and sst files are
1280    * not checked at all.
1281    *
1282    * Default: false
1283    *
1284    * @param skipCheckingSstFileSizesOnDbOpen if true, then SST file sizes will not be checked
1285    *                                         when calling {@link RocksDB#open(String)}.
1286    * @return the reference to the current options.
1287    */
1288   T setSkipCheckingSstFileSizesOnDbOpen(final boolean skipCheckingSstFileSizesOnDbOpen);
1289
1290   /**
1291    * If true, then {@link RocksDB#open(String)} will not fetch and check sizes of all sst files.
1292    * This may significantly speed up startup if there are many sst files,
1293    * especially when using non-default Env with expensive GetFileSize().
1294    * We'll still check that all required sst files exist.
1295    * If {@code paranoid_checks} is false, this option is ignored, and sst files are
1296    * not checked at all.
1297    *
1298    * Default: false
1299    *
1300    * @return true, if file sizes will not be checked when calling {@link RocksDB#open(String)}.
1301    */
1302   boolean skipCheckingSstFileSizesOnDbOpen();
1303
1304   /**
1305    * Recovery mode to control the consistency while replaying WAL
1306    *
1307    * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1308    *
1309    * @param walRecoveryMode The WAL recover mode
1310    *
1311    * @return the reference to the current options.
1312    */
1313   T setWalRecoveryMode(WALRecoveryMode walRecoveryMode);
1314
1315   /**
1316    * Recovery mode to control the consistency while replaying WAL
1317    *
1318    * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1319    *
1320    * @return The WAL recover mode
1321    */
1322   WALRecoveryMode walRecoveryMode();
1323
1324   /**
1325    * if set to false then recovery will fail when a prepared
1326    * transaction is encountered in the WAL
1327    *
1328    * Default: false
1329    *
1330    * @param allow2pc true if two-phase-commit is enabled
1331    *
1332    * @return the reference to the current options.
1333    */
1334   T setAllow2pc(boolean allow2pc);
1335
1336   /**
1337    * if set to false then recovery will fail when a prepared
1338    * transaction is encountered in the WAL
1339    *
1340    * Default: false
1341    *
1342    * @return true if two-phase-commit is enabled
1343    */
1344   boolean allow2pc();
1345
1346   /**
1347    * A global cache for table-level rows.
1348    *
1349    * Default: null (disabled)
1350    *
1351    * @param rowCache The global row cache
1352    *
1353    * @return the reference to the current options.
1354    */
1355   T setRowCache(final Cache rowCache);
1356
1357   /**
1358    * A global cache for table-level rows.
1359    *
1360    * Default: null (disabled)
1361    *
1362    * @return The global row cache
1363    */
1364   Cache rowCache();
1365
1366   /**
1367    * A filter object supplied to be invoked while processing write-ahead-logs
1368    * (WALs) during recovery. The filter provides a way to inspect log
1369    * records, ignoring a particular record or skipping replay.
1370    * The filter is invoked at startup and is invoked from a single-thread
1371    * currently.
1372    *
1373    * @param walFilter the filter for processing WALs during recovery.
1374    *
1375    * @return the reference to the current options.
1376    */
1377   T setWalFilter(final AbstractWalFilter walFilter);
1378
1379   /**
1380    * Get's the filter for processing WALs during recovery.
1381    * See {@link #setWalFilter(AbstractWalFilter)}.
1382    *
1383    * @return the filter used for processing WALs during recovery.
1384    */
1385   WalFilter walFilter();
1386
1387   /**
1388    * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1389    * / SetOptions will fail if options file is not detected or properly
1390    * persisted.
1391    *
1392    * DEFAULT: false
1393    *
1394    * @param failIfOptionsFileError true if we should fail if there is an error
1395    *     in the options file
1396    *
1397    * @return the reference to the current options.
1398    */
1399   T setFailIfOptionsFileError(boolean failIfOptionsFileError);
1400
1401   /**
1402    * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1403    * / SetOptions will fail if options file is not detected or properly
1404    * persisted.
1405    *
1406    * DEFAULT: false
1407    *
1408    * @return true if we should fail if there is an error in the options file
1409    */
1410   boolean failIfOptionsFileError();
1411
1412   /**
1413    * If true, then print malloc stats together with rocksdb.stats
1414    * when printing to LOG.
1415    *
1416    * DEFAULT: false
1417    *
1418    * @param dumpMallocStats true if malloc stats should be printed to LOG
1419    *
1420    * @return the reference to the current options.
1421    */
1422   T setDumpMallocStats(boolean dumpMallocStats);
1423
1424   /**
1425    * If true, then print malloc stats together with rocksdb.stats
1426    * when printing to LOG.
1427    *
1428    * DEFAULT: false
1429    *
1430    * @return true if malloc stats should be printed to LOG
1431    */
1432   boolean dumpMallocStats();
1433
1434   /**
1435    * By default RocksDB replay WAL logs and flush them on DB open, which may
1436    * create very small SST files. If this option is enabled, RocksDB will try
1437    * to avoid (but not guarantee not to) flush during recovery. Also, existing
1438    * WAL logs will be kept, so that if crash happened before flush, we still
1439    * have logs to recover from.
1440    *
1441    * DEFAULT: false
1442    *
1443    * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee
1444    *     not to) flush during recovery
1445    *
1446    * @return the reference to the current options.
1447    */
1448   T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery);
1449
1450   /**
1451    * By default RocksDB replay WAL logs and flush them on DB open, which may
1452    * create very small SST files. If this option is enabled, RocksDB will try
1453    * to avoid (but not guarantee not to) flush during recovery. Also, existing
1454    * WAL logs will be kept, so that if crash happened before flush, we still
1455    * have logs to recover from.
1456    *
1457    * DEFAULT: false
1458    *
1459    * @return true to try to avoid (but not guarantee not to) flush during
1460    *     recovery
1461    */
1462   boolean avoidFlushDuringRecovery();
1463
1464   /**
1465    * Set this option to true during creation of database if you want
1466    * to be able to ingest behind (call IngestExternalFile() skipping keys
1467    * that already exist, rather than overwriting matching keys).
1468    * Setting this option to true will affect 2 things:
1469    *     1) Disable some internal optimizations around SST file compression
1470    *     2) Reserve bottom-most level for ingested files only.
1471    *     3) Note that num_levels should be &gt;= 3 if this option is turned on.
1472    *
1473    * DEFAULT: false
1474    *
1475    * @param allowIngestBehind true to allow ingest behind, false to disallow.
1476    *
1477    * @return the reference to the current options.
1478    */
1479   T setAllowIngestBehind(final boolean allowIngestBehind);
1480
1481   /**
1482    * Returns true if ingest behind is allowed.
1483    * See {@link #setAllowIngestBehind(boolean)}.
1484    *
1485    * @return true if ingest behind is allowed, false otherwise.
1486    */
1487   boolean allowIngestBehind();
1488
1489   /**
1490    * If enabled it uses two queues for writes, one for the ones with
1491    * disable_memtable and one for the ones that also write to memtable. This
1492    * allows the memtable writes not to lag behind other writes. It can be used
1493    * to optimize MySQL 2PC in which only the commits, which are serial, write to
1494    * memtable.
1495    *
1496    * DEFAULT: false
1497    *
1498    * @param twoWriteQueues true to enable two write queues, false otherwise.
1499    *
1500    * @return the reference to the current options.
1501    */
1502   T setTwoWriteQueues(final boolean twoWriteQueues);
1503
1504   /**
1505    * Returns true if two write queues are enabled.
1506    *
1507    * @return true if two write queues are enabled, false otherwise.
1508    */
1509   boolean twoWriteQueues();
1510
1511   /**
1512    * If true WAL is not flushed automatically after each write. Instead it
1513    * relies on manual invocation of FlushWAL to write the WAL buffer to its
1514    * file.
1515    *
1516    * DEFAULT: false
1517    *
1518    * @param manualWalFlush true to set disable automatic WAL flushing,
1519    *     false otherwise.
1520    *
1521    * @return the reference to the current options.
1522    */
1523   T setManualWalFlush(final boolean manualWalFlush);
1524
1525   /**
1526    * Returns true if automatic WAL flushing is disabled.
1527    * See {@link #setManualWalFlush(boolean)}.
1528    *
1529    * @return true if automatic WAL flushing is disabled, false otherwise.
1530    */
1531   boolean manualWalFlush();
1532
1533   /**
1534    * If true, RocksDB supports flushing multiple column families and committing
1535    * their results atomically to MANIFEST. Note that it is not
1536    * necessary to set atomic_flush to true if WAL is always enabled since WAL
1537    * allows the database to be restored to the last persistent state in WAL.
1538    * This option is useful when there are column families with writes NOT
1539    * protected by WAL.
1540    * For manual flush, application has to specify which column families to
1541    * flush atomically in {@link RocksDB#flush(FlushOptions, List)}.
1542    * For auto-triggered flush, RocksDB atomically flushes ALL column families.
1543    *
1544    * Currently, any WAL-enabled writes after atomic flush may be replayed
1545    * independently if the process crashes later and tries to recover.
1546    *
1547    * @param atomicFlush true to enable atomic flush of multiple column families.
1548    *
1549    * @return the reference to the current options.
1550    */
1551   T setAtomicFlush(final boolean atomicFlush);
1552
1553   /**
1554    * Determine if atomic flush of multiple column families is enabled.
1555    *
1556    * See {@link #setAtomicFlush(boolean)}.
1557    *
1558    * @return true if atomic flush is enabled.
1559    */
1560   boolean atomicFlush();
1561
1562   /**
1563    * If true, working thread may avoid doing unnecessary and long-latency
1564    * operation (such as deleting obsolete files directly or deleting memtable)
1565    * and will instead schedule a background job to do it.
1566    * Use it if you're latency-sensitive.
1567    * If set to true, takes precedence over
1568    * {@link ReadOptions#setBackgroundPurgeOnIteratorCleanup(boolean)}.
1569    *
1570    * @param avoidUnnecessaryBlockingIO If true, working thread may avoid doing unnecessary
1571    *     operation.
1572    * @return the reference to the current options.
1573    */
1574   T setAvoidUnnecessaryBlockingIO(final boolean avoidUnnecessaryBlockingIO);
1575
1576   /**
1577    * If true, working thread may avoid doing unnecessary and long-latency
1578    * operation (such as deleting obsolete files directly or deleting memtable)
1579    * and will instead schedule a background job to do it.
1580    * Use it if you're latency-sensitive.
1581    * If set to true, takes precedence over
1582    * {@link ReadOptions#setBackgroundPurgeOnIteratorCleanup(boolean)}.
1583    *
1584    * @return true, if working thread may avoid doing unnecessary operation.
1585    */
1586   boolean avoidUnnecessaryBlockingIO();
1587
1588   /**
1589    * If true, automatically persist stats to a hidden column family (column
1590    * family name: ___rocksdb_stats_history___) every
1591    * stats_persist_period_sec seconds; otherwise, write to an in-memory
1592    * struct. User can query through `GetStatsHistory` API.
1593    * If user attempts to create a column family with the same name on a DB
1594    * which have previously set persist_stats_to_disk to true, the column family
1595    * creation will fail, but the hidden column family will survive, as well as
1596    * the previously persisted statistics.
1597    * When peristing stats to disk, the stat name will be limited at 100 bytes.
1598    * Default: false
1599    *
1600    * @param persistStatsToDisk true if stats should be persisted to hidden column family.
1601    * @return the instance of the current object.
1602    */
1603   T setPersistStatsToDisk(final boolean persistStatsToDisk);
1604
1605   /**
1606    * If true, automatically persist stats to a hidden column family (column
1607    * family name: ___rocksdb_stats_history___) every
1608    * stats_persist_period_sec seconds; otherwise, write to an in-memory
1609    * struct. User can query through `GetStatsHistory` API.
1610    * If user attempts to create a column family with the same name on a DB
1611    * which have previously set persist_stats_to_disk to true, the column family
1612    * creation will fail, but the hidden column family will survive, as well as
1613    * the previously persisted statistics.
1614    * When peristing stats to disk, the stat name will be limited at 100 bytes.
1615    * Default: false
1616    *
1617    * @return true if stats should be persisted to hidden column family.
1618    */
1619   boolean persistStatsToDisk();
1620
1621   /**
1622    * Historically DB ID has always been stored in Identity File in DB folder.
1623    * If this flag is true, the DB ID is written to Manifest file in addition
1624    * to the Identity file. By doing this 2 problems are solved
1625    * 1. We don't checksum the Identity file where as Manifest file is.
1626    * 2. Since the source of truth for DB is Manifest file DB ID will sit with
1627    *    the source of truth. Previously the Identity file could be copied
1628    *    independent of Manifest and that can result in wrong DB ID.
1629    * We recommend setting this flag to true.
1630    * Default: false
1631    *
1632    * @param writeDbidToManifest if true, then DB ID will be written to Manifest file.
1633    * @return the instance of the current object.
1634    */
1635   T setWriteDbidToManifest(final boolean writeDbidToManifest);
1636
1637   /**
1638    * Historically DB ID has always been stored in Identity File in DB folder.
1639    * If this flag is true, the DB ID is written to Manifest file in addition
1640    * to the Identity file. By doing this 2 problems are solved
1641    * 1. We don't checksum the Identity file where as Manifest file is.
1642    * 2. Since the source of truth for DB is Manifest file DB ID will sit with
1643    *    the source of truth. Previously the Identity file could be copied
1644    *    independent of Manifest and that can result in wrong DB ID.
1645    * We recommend setting this flag to true.
1646    * Default: false
1647    *
1648    * @return true, if DB ID will be written to Manifest file.
1649    */
1650   boolean writeDbidToManifest();
1651
1652   /**
1653    * The number of bytes to prefetch when reading the log. This is mostly useful
1654    * for reading a remotely located log, as it can save the number of
1655    * round-trips. If 0, then the prefetching is disabled.
1656    *
1657    * Default: 0
1658    *
1659    * @param logReadaheadSize the number of bytes to prefetch when reading the log.
1660    * @return the instance of the current object.
1661    */
1662   T setLogReadaheadSize(final long logReadaheadSize);
1663
1664   /**
1665    * The number of bytes to prefetch when reading the log. This is mostly useful
1666    * for reading a remotely located log, as it can save the number of
1667    * round-trips. If 0, then the prefetching is disabled.
1668    *
1669    * Default: 0
1670    *
1671    * @return the number of bytes to prefetch when reading the log.
1672    */
1673   long logReadaheadSize();
1674
1675   /**
1676    * By default, RocksDB recovery fails if any table file referenced in
1677    * MANIFEST are missing after scanning the MANIFEST.
1678    * Best-efforts recovery is another recovery mode that
1679    * tries to restore the database to the most recent point in time without
1680    * missing file.
1681    * Currently not compatible with atomic flush. Furthermore, WAL files will
1682    * not be used for recovery if best_efforts_recovery is true.
1683    * Default: false
1684    *
1685    * @param bestEffortsRecovery if true, RocksDB will use best-efforts mode when recovering.
1686    * @return the instance of the current object.
1687    */
1688   T setBestEffortsRecovery(final boolean bestEffortsRecovery);
1689
1690   /**
1691    * By default, RocksDB recovery fails if any table file referenced in
1692    * MANIFEST are missing after scanning the MANIFEST.
1693    * Best-efforts recovery is another recovery mode that
1694    * tries to restore the database to the most recent point in time without
1695    * missing file.
1696    * Currently not compatible with atomic flush. Furthermore, WAL files will
1697    * not be used for recovery if best_efforts_recovery is true.
1698    * Default: false
1699    *
1700    * @return true, if RocksDB uses best-efforts mode when recovering.
1701    */
1702   boolean bestEffortsRecovery();
1703
1704   /**
1705    * It defines how many times db resume is called by a separate thread when
1706    * background retryable IO Error happens. When background retryable IO
1707    * Error happens, SetBGError is called to deal with the error. If the error
1708    * can be auto-recovered (e.g., retryable IO Error during Flush or WAL write),
1709    * then db resume is called in background to recover from the error. If this
1710    * value is 0 or negative, db resume will not be called.
1711    *
1712    * Default: INT_MAX
1713    *
1714    * @param maxBgerrorResumeCount maximum number of times db resume should be called when IO Error
1715    *     happens.
1716    * @return the instance of the current object.
1717    */
1718   T setMaxBgErrorResumeCount(final int maxBgerrorResumeCount);
1719
1720   /**
1721    * It defines how many times db resume is called by a separate thread when
1722    * background retryable IO Error happens. When background retryable IO
1723    * Error happens, SetBGError is called to deal with the error. If the error
1724    * can be auto-recovered (e.g., retryable IO Error during Flush or WAL write),
1725    * then db resume is called in background to recover from the error. If this
1726    * value is 0 or negative, db resume will not be called.
1727    *
1728    * Default: INT_MAX
1729    *
1730    * @return maximum number of times db resume should be called when IO Error happens.
1731    */
1732   int maxBgerrorResumeCount();
1733
1734   /**
1735    * If max_bgerror_resume_count is &ge; 2, db resume is called multiple times.
1736    * This option decides how long to wait to retry the next resume if the
1737    * previous resume fails and satisfy redo resume conditions.
1738    *
1739    * Default: 1000000 (microseconds).
1740    *
1741    * @param bgerrorResumeRetryInterval how many microseconds to wait between DB resume attempts.
1742    * @return the instance of the current object.
1743    */
1744   T setBgerrorResumeRetryInterval(final long bgerrorResumeRetryInterval);
1745
1746   /**
1747    * If max_bgerror_resume_count is &ge; 2, db resume is called multiple times.
1748    * This option decides how long to wait to retry the next resume if the
1749    * previous resume fails and satisfy redo resume conditions.
1750    *
1751    * Default: 1000000 (microseconds).
1752    *
1753    * @return the instance of the current object.
1754    */
1755   long bgerrorResumeRetryInterval();
1756 }