ceph/src/rocksdb/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java

   1 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
   2 package org.rocksdb;
   3
   4 public interface MutableDBOptionsInterface<T extends MutableDBOptionsInterface<T>> {
   5   /**
   6    * Specifies the maximum number of concurrent background jobs (both flushes
   7    * and compactions combined).
   8    * Default: 2
   9    *
  10    * @param maxBackgroundJobs number of max concurrent background jobs
  11    * @return the instance of the current object.
  12    */
  13   T setMaxBackgroundJobs(int maxBackgroundJobs);
  14
  15   /**
  16    * Returns the maximum number of concurrent background jobs (both flushes
  17    * and compactions combined).
  18    * Default: 2
  19    *
  20    * @return the maximum number of concurrent background jobs.
  21    */
  22   int maxBackgroundJobs();
  23
  24   /**
  25    * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
  26    * value of max_background_jobs. For backwards compatibility we will set
  27    * `max_background_jobs = max_background_compactions + max_background_flushes`
  28    * in the case where user sets at least one of `max_background_compactions` or
  29    * `max_background_flushes` (we replace -1 by 1 in case one option is unset).
  30    *
  31    * Specifies the maximum number of concurrent background compaction jobs,
  32    * submitted to the default LOW priority thread pool.
  33    * If you're increasing this, also consider increasing number of threads in
  34    * LOW priority thread pool. For more information, see
  35    * Default: -1
  36    *
  37    * @param maxBackgroundCompactions the maximum number of background
  38    *     compaction jobs.
  39    * @return the instance of the current object.
  40    *
  41    * @see RocksEnv#setBackgroundThreads(int)
  42    * @see RocksEnv#setBackgroundThreads(int, Priority)
  43    * @see DBOptionsInterface#maxBackgroundFlushes()
  44    * @deprecated Use {@link #setMaxBackgroundJobs(int)}
  45    */
  46   @Deprecated
  47   T setMaxBackgroundCompactions(int maxBackgroundCompactions);
  48
  49   /**
  50    * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
  51    * value of max_background_jobs. For backwards compatibility we will set
  52    * `max_background_jobs = max_background_compactions + max_background_flushes`
  53    * in the case where user sets at least one of `max_background_compactions` or
  54    * `max_background_flushes` (we replace -1 by 1 in case one option is unset).
  55    *
  56    * Returns the maximum number of concurrent background compaction jobs,
  57    * submitted to the default LOW priority thread pool.
  58    * When increasing this number, we may also want to consider increasing
  59    * number of threads in LOW priority thread pool.
  60    * Default: -1
  61    *
  62    * @return the maximum number of concurrent background compaction jobs.
  63    * @see RocksEnv#setBackgroundThreads(int)
  64    * @see RocksEnv#setBackgroundThreads(int, Priority)
  65    *
  66    * @deprecated Use {@link #setMaxBackgroundJobs(int)}
  67    */
  68   @Deprecated
  69   int maxBackgroundCompactions();
  70
  71   /**
  72    * By default RocksDB will flush all memtables on DB close if there are
  73    * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup
  74    * DB close. Unpersisted data WILL BE LOST.
  75    *
  76    * DEFAULT: false
  77    *
  78    * Dynamically changeable through
  79    *     {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}
  80    *     API.
  81    *
  82    * @param avoidFlushDuringShutdown true if we should avoid flush during
  83    *     shutdown
  84    *
  85    * @return the reference to the current options.
  86    */
  87   T setAvoidFlushDuringShutdown(boolean avoidFlushDuringShutdown);
  88
  89   /**
  90    * By default RocksDB will flush all memtables on DB close if there are
  91    * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup
  92    * DB close. Unpersisted data WILL BE LOST.
  93    *
  94    * DEFAULT: false
  95    *
  96    * Dynamically changeable through
  97    *     {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}
  98    *     API.
  99    *
 100    * @return true if we should avoid flush during shutdown
 101    */
 102   boolean avoidFlushDuringShutdown();
 103
 104   /**
 105    * This is the maximum buffer size that is used by WritableFileWriter.
 106    * On Windows, we need to maintain an aligned buffer for writes.
 107    * We allow the buffer to grow until it's size hits the limit.
 108    *
 109    * Default: 1024 * 1024 (1 MB)
 110    *
 111    * @param writableFileMaxBufferSize the maximum buffer size
 112    *
 113    * @return the reference to the current options.
 114    */
 115   T setWritableFileMaxBufferSize(long writableFileMaxBufferSize);
 116
 117   /**
 118    * This is the maximum buffer size that is used by WritableFileWriter.
 119    * On Windows, we need to maintain an aligned buffer for writes.
 120    * We allow the buffer to grow until it's size hits the limit.
 121    *
 122    * Default: 1024 * 1024 (1 MB)
 123    *
 124    * @return the maximum buffer size
 125    */
 126   long writableFileMaxBufferSize();
 127
 128   /**
 129    * The limited write rate to DB if
 130    * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or
 131    * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered,
 132    * or we are writing to the last mem table allowed and we allow more than 3
 133    * mem tables. It is calculated using size of user write requests before
 134    * compression. RocksDB may decide to slow down more if the compaction still
 135    * gets behind further.
 136    * If the value is 0, we will infer a value from `rater_limiter` value
 137    * if it is not empty, or 16MB if `rater_limiter` is empty. Note that
 138    * if users change the rate in `rate_limiter` after DB is opened,
 139    * `delayed_write_rate` won't be adjusted.
 140    *
 141    * Unit: bytes per second.
 142    *
 143    * Default: 0
 144    *
 145    * Dynamically changeable through {@link RocksDB#setDBOptions(MutableDBOptions)}.
 146    *
 147    * @param delayedWriteRate the rate in bytes per second
 148    *
 149    * @return the reference to the current options.
 150    */
 151   T setDelayedWriteRate(long delayedWriteRate);
 152
 153   /**
 154    * The limited write rate to DB if
 155    * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or
 156    * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered,
 157    * or we are writing to the last mem table allowed and we allow more than 3
 158    * mem tables. It is calculated using size of user write requests before
 159    * compression. RocksDB may decide to slow down more if the compaction still
 160    * gets behind further.
 161    * If the value is 0, we will infer a value from `rater_limiter` value
 162    * if it is not empty, or 16MB if `rater_limiter` is empty. Note that
 163    * if users change the rate in `rate_limiter` after DB is opened,
 164    * `delayed_write_rate` won't be adjusted.
 165    *
 166    * Unit: bytes per second.
 167    *
 168    * Default: 0
 169    *
 170    * Dynamically changeable through {@link RocksDB#setDBOptions(MutableDBOptions)}.
 171    *
 172    * @return the rate in bytes per second
 173    */
 174   long delayedWriteRate();
 175
 176   /**
 177    * <p>Set the max total write-ahead log size. Once write-ahead logs exceed this size, we will
 178    * start forcing the flush of column families whose memtables are backed by the oldest live WAL
 179    * file
 180    * </p>
 181    * <p>The oldest WAL files are the ones that are causing all the space amplification.
 182    * </p>
 183    *  For example, with 15 column families, each with
 184    *  <code>write_buffer_size = 128 MB</code>
 185    *  <code>max_write_buffer_number = 6</code>
 186    *  <code>max_total_wal_size</code> will be calculated to be <code>[15 * 128MB * 6] * 4 =
 187    * 45GB</code>
 188    * <p>
 189    *  The RocksDB wiki has some discussion about how the WAL interacts
 190    *  with memtables and flushing of column families, at
 191    * <a href="https://github.com/facebook/rocksdb/wiki/Column-Families">...</a>
 192    *  </p>
 193    * <p>If set to 0 (default), we will dynamically choose the WAL size limit to
 194    * be [sum of all write_buffer_size * max_write_buffer_number] * 4</p>
 195    * <p>This option takes effect only when there are more than one column family as
 196    * otherwise the wal size is dictated by the write_buffer_size.</p>
 197    * <p>Default: 0</p>
 198    *
 199    * @param maxTotalWalSize max total wal size.
 200    * @return the instance of the current object.
 201    */
 202   T setMaxTotalWalSize(long maxTotalWalSize);
 203
 204   /**
 205    * <p>Returns the max total write-ahead log size. Once write-ahead logs exceed this size,
 206    * we will start forcing the flush of column families whose memtables are
 207    * backed by the oldest live WAL file.</p>
 208    * <p>The oldest WAL files are the ones that are causing all the space amplification.
 209    * </p>
 210    *  For example, with 15 column families, each with
 211    *  <code>write_buffer_size = 128 MB</code>
 212    *  <code>max_write_buffer_number = 6</code>
 213    *  <code>max_total_wal_size</code> will be calculated to be <code>[15 * 128MB * 6] * 4 =
 214    * 45GB</code>
 215    * <p>
 216    *  The RocksDB wiki has some discussion about how the WAL interacts
 217    *  with memtables and flushing of column families, at
 218    * <a href="https://github.com/facebook/rocksdb/wiki/Column-Families">...</a>
 219    *  </p>
 220    * <p>If set to 0 (default), we will dynamically choose the WAL size limit to
 221    * be [sum of all write_buffer_size * max_write_buffer_number] * 4</p>
 222    * <p>This option takes effect only when there are more than one column family as
 223    * otherwise the wal size is dictated by the write_buffer_size.</p>
 224    * <p>Default: 0</p>
 225    *
 226    *
 227    * <p>If set to 0 (default), we will dynamically choose the WAL size limit
 228    * to be [sum of all write_buffer_size * max_write_buffer_number] * 4
 229    * </p>
 230    *
 231    * @return max total wal size
 232    */
 233   long maxTotalWalSize();
 234
 235   /**
 236    * The periodicity when obsolete files get deleted. The default
 237    * value is 6 hours. The files that get out of scope by compaction
 238    * process will still get automatically delete on every compaction,
 239    * regardless of this setting
 240    *
 241    * @param micros the time interval in micros
 242    * @return the instance of the current object.
 243    */
 244   T setDeleteObsoleteFilesPeriodMicros(long micros);
 245
 246   /**
 247    * The periodicity when obsolete files get deleted. The default
 248    * value is 6 hours. The files that get out of scope by compaction
 249    * process will still get automatically delete on every compaction,
 250    * regardless of this setting
 251    *
 252    * @return the time interval in micros when obsolete files will be deleted.
 253    */
 254   long deleteObsoleteFilesPeriodMicros();
 255
 256   /**
 257    * if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
 258    * Default: 600 (10 minutes)
 259    *
 260    * @param statsDumpPeriodSec time interval in seconds.
 261    * @return the instance of the current object.
 262    */
 263   T setStatsDumpPeriodSec(int statsDumpPeriodSec);
 264
 265   /**
 266    * If not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
 267    * Default: 600 (10 minutes)
 268    *
 269    * @return time interval in seconds.
 270    */
 271   int statsDumpPeriodSec();
 272
 273   /**
 274    * If not zero, dump rocksdb.stats to RocksDB every
 275    * {@code statsPersistPeriodSec}
 276    *
 277    * Default: 600
 278    *
 279    * @param statsPersistPeriodSec time interval in seconds.
 280    * @return the instance of the current object.
 281    */
 282   T setStatsPersistPeriodSec(int statsPersistPeriodSec);
 283
 284   /**
 285    * If not zero, dump rocksdb.stats to RocksDB every
 286    * {@code statsPersistPeriodSec}
 287    *
 288    * @return time interval in seconds.
 289    */
 290   int statsPersistPeriodSec();
 291
 292   /**
 293    * If not zero, periodically take stats snapshots and store in memory, the
 294    * memory size for stats snapshots is capped at {@code statsHistoryBufferSize}
 295    *
 296    * Default: 1MB
 297    *
 298    * @param statsHistoryBufferSize the size of the buffer.
 299    * @return the instance of the current object.
 300    */
 301   T setStatsHistoryBufferSize(long statsHistoryBufferSize);
 302
 303   /**
 304    * If not zero, periodically take stats snapshots and store in memory, the
 305    * memory size for stats snapshots is capped at {@code statsHistoryBufferSize}
 306    *
 307    * @return the size of the buffer.
 308    */
 309   long statsHistoryBufferSize();
 310
 311   /**
 312    * Number of open files that can be used by the DB.  You may need to
 313    * increase this if your database has a large working set. Value -1 means
 314    * files opened are always kept open. You can estimate number of files based
 315    * on {@code target_file_size_base} and {@code target_file_size_multiplier}
 316    * for level-based compaction. For universal-style compaction, you can usually
 317    * set it to -1.
 318    * Default: -1
 319    *
 320    * @param maxOpenFiles the maximum number of open files.
 321    * @return the instance of the current object.
 322    */
 323   T setMaxOpenFiles(int maxOpenFiles);
 324
 325   /**
 326    * Number of open files that can be used by the DB.  You may need to
 327    * increase this if your database has a large working set. Value -1 means
 328    * files opened are always kept open. You can estimate number of files based
 329    * on {@code target_file_size_base} and {@code target_file_size_multiplier}
 330    * for level-based compaction. For universal-style compaction, you can usually
 331    * set it to -1.
 332    * Default: -1
 333    *
 334    * @return the maximum number of open files.
 335    */
 336   int maxOpenFiles();
 337
 338   /**
 339    * Allows OS to incrementally sync files to disk while they are being
 340    * written, asynchronously, in the background.
 341    * Issue one request for every bytes_per_sync written. 0 turns it off.
 342    * Default: 0
 343    *
 344    * @param bytesPerSync size in bytes
 345    * @return the instance of the current object.
 346    */
 347   T setBytesPerSync(long bytesPerSync);
 348
 349   /**
 350    * Allows OS to incrementally sync files to disk while they are being
 351    * written, asynchronously, in the background.
 352    * Issue one request for every bytes_per_sync written. 0 turns it off.
 353    * Default: 0
 354    *
 355    * @return size in bytes
 356    */
 357   long bytesPerSync();
 358
 359   /**
 360    * Same as {@link #setBytesPerSync(long)} , but applies to WAL files
 361    *
 362    * Default: 0, turned off
 363    *
 364    * @param walBytesPerSync size in bytes
 365    * @return the instance of the current object.
 366    */
 367   T setWalBytesPerSync(long walBytesPerSync);
 368
 369   /**
 370    * Same as {@link #bytesPerSync()} , but applies to WAL files
 371    *
 372    * Default: 0, turned off
 373    *
 374    * @return size in bytes
 375    */
 376   long walBytesPerSync();
 377
 378   /**
 379    * When true, guarantees WAL files have at most {@link #walBytesPerSync()}
 380    * bytes submitted for writeback at any given time, and SST files have at most
 381    * {@link #bytesPerSync()} bytes pending writeback at any given time. This
 382    * can be used to handle cases where processing speed exceeds I/O speed
 383    * during file generation, which can lead to a huge sync when the file is
 384    * finished, even with {@link #bytesPerSync()} / {@link #walBytesPerSync()}
 385    * properly configured.
 386    *
 387    * - If `sync_file_range` is supported it achieves this by waiting for any
 388    *   prior `sync_file_range`s to finish before proceeding. In this way,
 389    *   processing (compression, etc.) can proceed uninhibited in the gap
 390    *   between `sync_file_range`s, and we block only when I/O falls
 391    *   behind.
 392    * - Otherwise the `WritableFile::Sync` method is used. Note this mechanism
 393    *   always blocks, thus preventing the interleaving of I/O and processing.
 394    *
 395    * Note: Enabling this option does not provide any additional persistence
 396    * guarantees, as it may use `sync_file_range`, which does not write out
 397    * metadata.
 398    *
 399    * Default: false
 400    *
 401    * @param strictBytesPerSync the bytes per sync
 402    * @return the instance of the current object.
 403    */
 404   T setStrictBytesPerSync(boolean strictBytesPerSync);
 405
 406   /**
 407    * Return the strict byte limit per sync.
 408    *
 409    * See {@link #setStrictBytesPerSync(boolean)}
 410    *
 411    * @return the limit in bytes.
 412    */
 413   boolean strictBytesPerSync();
 414
 415   /**
 416    * If non-zero, we perform bigger reads when doing compaction. If you're
 417    * running RocksDB on spinning disks, you should set this to at least 2MB.
 418    *
 419    * That way RocksDB's compaction is doing sequential instead of random reads.
 420    *
 421    * Default: 0
 422    *
 423    * @param compactionReadaheadSize The compaction read-ahead size
 424    *
 425    * @return the reference to the current options.
 426    */
 427   T setCompactionReadaheadSize(final long compactionReadaheadSize);
 428
 429   /**
 430    * If non-zero, we perform bigger reads when doing compaction. If you're
 431    * running RocksDB on spinning disks, you should set this to at least 2MB.
 432    *
 433    * That way RocksDB's compaction is doing sequential instead of random reads.
 434    *
 435    * Default: 0
 436    *
 437    * @return The compaction read-ahead size
 438    */
 439   long compactionReadaheadSize();
 440 }