ceph/src/rocksdb/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java

   1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5
   6 package org.rocksdb;
   7
   8 import java.util.List;
   9
  10 /**
  11  * Advanced Column Family Options which are not
  12  * mutable (i.e. present in {@link AdvancedMutableColumnFamilyOptionsInterface}
  13  *
  14  * Taken from include/rocksdb/advanced_options.h
  15  */
  16 public interface AdvancedColumnFamilyOptionsInterface
  17     <T extends AdvancedColumnFamilyOptionsInterface> {
  18
  19   /**
  20    * The minimum number of write buffers that will be merged together
  21    * before writing to storage.  If set to 1, then
  22    * all write buffers are flushed to L0 as individual files and this increases
  23    * read amplification because a get request has to check in all of these
  24    * files. Also, an in-memory merge may result in writing lesser
  25    * data to storage if there are duplicate records in each of these
  26    * individual write buffers.  Default: 1
  27    *
  28    * @param minWriteBufferNumberToMerge the minimum number of write buffers
  29    *     that will be merged together.
  30    * @return the reference to the current options.
  31    */
  32   T setMinWriteBufferNumberToMerge(
  33       int minWriteBufferNumberToMerge);
  34
  35   /**
  36    * The minimum number of write buffers that will be merged together
  37    * before writing to storage.  If set to 1, then
  38    * all write buffers are flushed to L0 as individual files and this increases
  39    * read amplification because a get request has to check in all of these
  40    * files. Also, an in-memory merge may result in writing lesser
  41    * data to storage if there are duplicate records in each of these
  42    * individual write buffers.  Default: 1
  43    *
  44    * @return the minimum number of write buffers that will be merged together.
  45    */
  46   int minWriteBufferNumberToMerge();
  47
  48   /**
  49    * The total maximum number of write buffers to maintain in memory including
  50    * copies of buffers that have already been flushed.  Unlike
  51    * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()},
  52    * this parameter does not affect flushing.
  53    * This controls the minimum amount of write history that will be available
  54    * in memory for conflict checking when Transactions are used.
  55    *
  56    * When using an OptimisticTransactionDB:
  57    * If this value is too low, some transactions may fail at commit time due
  58    * to not being able to determine whether there were any write conflicts.
  59    *
  60    * When using a TransactionDB:
  61    * If Transaction::SetSnapshot is used, TransactionDB will read either
  62    * in-memory write buffers or SST files to do write-conflict checking.
  63    * Increasing this value can reduce the number of reads to SST files
  64    * done for conflict detection.
  65    *
  66    * Setting this value to 0 will cause write buffers to be freed immediately
  67    * after they are flushed.
  68    * If this value is set to -1,
  69    * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}
  70    * will be used.
  71    *
  72    * Default:
  73    * If using a TransactionDB/OptimisticTransactionDB, the default value will
  74    * be set to the value of
  75    * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}
  76    * if it is not explicitly set by the user. Otherwise, the default is 0.
  77    *
  78    * @param maxWriteBufferNumberToMaintain The maximum number of write
  79    *     buffers to maintain
  80    *
  81    * @return the reference to the current options.
  82    */
  83   T setMaxWriteBufferNumberToMaintain(
  84       int maxWriteBufferNumberToMaintain);
  85
  86   /**
  87    * The total maximum number of write buffers to maintain in memory including
  88    * copies of buffers that have already been flushed.
  89    *
  90    * @return maxWriteBufferNumberToMaintain The maximum number of write buffers
  91    *     to maintain
  92    */
  93   int maxWriteBufferNumberToMaintain();
  94
  95   /**
  96    * Allows thread-safe inplace updates.
  97    * If inplace_callback function is not set,
  98    *   Put(key, new_value) will update inplace the existing_value iff
  99    *   * key exists in current memtable
 100    *   * new sizeof(new_value) &le; sizeof(existing_value)
 101    *   * existing_value for that key is a put i.e. kTypeValue
 102    * If inplace_callback function is set, check doc for inplace_callback.
 103    * Default: false.
 104    *
 105    * @param inplaceUpdateSupport true if thread-safe inplace updates
 106    *     are allowed.
 107    * @return the reference to the current options.
 108    */
 109   T setInplaceUpdateSupport(
 110       boolean inplaceUpdateSupport);
 111
 112   /**
 113    * Allows thread-safe inplace updates.
 114    * If inplace_callback function is not set,
 115    *   Put(key, new_value) will update inplace the existing_value iff
 116    *   * key exists in current memtable
 117    *   * new sizeof(new_value) &le; sizeof(existing_value)
 118    *   * existing_value for that key is a put i.e. kTypeValue
 119    * If inplace_callback function is set, check doc for inplace_callback.
 120    * Default: false.
 121    *
 122    * @return true if thread-safe inplace updates are allowed.
 123    */
 124   boolean inplaceUpdateSupport();
 125
 126   /**
 127    * Control locality of bloom filter probes to improve cache miss rate.
 128    * This option only applies to memtable prefix bloom and plaintable
 129    * prefix bloom. It essentially limits the max number of cache lines each
 130    * bloom filter check can touch.
 131    * This optimization is turned off when set to 0. The number should never
 132    * be greater than number of probes. This option can boost performance
 133    * for in-memory workload but should use with care since it can cause
 134    * higher false positive rate.
 135    * Default: 0
 136    *
 137    * @param bloomLocality the level of locality of bloom-filter probes.
 138    * @return the reference to the current options.
 139    */
 140   T setBloomLocality(int bloomLocality);
 141
 142   /**
 143    * Control locality of bloom filter probes to improve cache miss rate.
 144    * This option only applies to memtable prefix bloom and plaintable
 145    * prefix bloom. It essentially limits the max number of cache lines each
 146    * bloom filter check can touch.
 147    * This optimization is turned off when set to 0. The number should never
 148    * be greater than number of probes. This option can boost performance
 149    * for in-memory workload but should use with care since it can cause
 150    * higher false positive rate.
 151    * Default: 0
 152    *
 153    * @return the level of locality of bloom-filter probes.
 154    * @see #setBloomLocality(int)
 155    */
 156   int bloomLocality();
 157
 158   /**
 159    * <p>Different levels can have different compression
 160    * policies. There are cases where most lower levels
 161    * would like to use quick compression algorithms while
 162    * the higher levels (which have more data) use
 163    * compression algorithms that have better compression
 164    * but could be slower. This array, if non-empty, should
 165    * have an entry for each level of the database;
 166    * these override the value specified in the previous
 167    * field 'compression'.</p>
 168    *
 169    * <strong>NOTICE</strong>
 170    * <p>If {@code level_compaction_dynamic_level_bytes=true},
 171    * {@code compression_per_level[0]} still determines {@code L0},
 172    * but other elements of the array are based on base level
 173    * (the level {@code L0} files are merged to), and may not
 174    * match the level users see from info log for metadata.
 175    * </p>
 176    * <p>If {@code L0} files are merged to {@code level - n},
 177    * then, for {@code i&gt;0}, {@code compression_per_level[i]}
 178    * determines compaction type for level {@code n+i-1}.</p>
 179    *
 180    * <strong>Example</strong>
 181    * <p>For example, if we have 5 levels, and we determine to
 182    * merge {@code L0} data to {@code L4} (which means {@code L1..L3}
 183    * will be empty), then the new files go to {@code L4} uses
 184    * compression type {@code compression_per_level[1]}.</p>
 185    *
 186    * <p>If now {@code L0} is merged to {@code L2}. Data goes to
 187    * {@code L2} will be compressed according to
 188    * {@code compression_per_level[1]}, {@code L3} using
 189    * {@code compression_per_level[2]}and {@code L4} using
 190    * {@code compression_per_level[3]}. Compaction for each
 191    * level can change when data grows.</p>
 192    *
 193    * <p><strong>Default:</strong> empty</p>
 194    *
 195    * @param compressionLevels list of
 196    *     {@link org.rocksdb.CompressionType} instances.
 197    *
 198    * @return the reference to the current options.
 199    */
 200   T setCompressionPerLevel(
 201       List<CompressionType> compressionLevels);
 202
 203   /**
 204    * <p>Return the currently set {@link org.rocksdb.CompressionType}
 205    * per instances.</p>
 206    *
 207    * <p>See: {@link #setCompressionPerLevel(java.util.List)}</p>
 208    *
 209    * @return list of {@link org.rocksdb.CompressionType}
 210    *     instances.
 211    */
 212   List<CompressionType> compressionPerLevel();
 213
 214   /**
 215    * Set the number of levels for this database
 216    * If level-styled compaction is used, then this number determines
 217    * the total number of levels.
 218    *
 219    * @param numLevels the number of levels.
 220    * @return the reference to the current options.
 221    */
 222   T setNumLevels(int numLevels);
 223
 224   /**
 225    * If level-styled compaction is used, then this number determines
 226    * the total number of levels.
 227    *
 228    * @return the number of levels.
 229    */
 230   int numLevels();
 231
 232   /**
 233    * <p>If {@code true}, RocksDB will pick target size of each level
 234    * dynamically. We will pick a base level b &gt;= 1. L0 will be
 235    * directly merged into level b, instead of always into level 1.
 236    * Level 1 to b-1 need to be empty. We try to pick b and its target
 237    * size so that</p>
 238    *
 239    * <ol>
 240    * <li>target size is in the range of
 241    *   (max_bytes_for_level_base / max_bytes_for_level_multiplier,
 242    *    max_bytes_for_level_base]</li>
 243    * <li>target size of the last level (level num_levels-1) equals to extra size
 244    *    of the level.</li>
 245    * </ol>
 246    *
 247    * <p>At the same time max_bytes_for_level_multiplier and
 248    * max_bytes_for_level_multiplier_additional are still satisfied.</p>
 249    *
 250    * <p>With this option on, from an empty DB, we make last level the base
 251    * level, which means merging L0 data into the last level, until it exceeds
 252    * max_bytes_for_level_base. And then we make the second last level to be
 253    * base level, to start to merge L0 data to second last level, with its
 254    * target size to be {@code 1/max_bytes_for_level_multiplier} of the last
 255    * levels extra size. After the data accumulates more so that we need to
 256    * move the base level to the third last one, and so on.</p>
 257    *
 258    * <h2>Example</h2>
 259    * <p>For example, assume {@code max_bytes_for_level_multiplier=10},
 260    * {@code num_levels=6}, and {@code max_bytes_for_level_base=10MB}.</p>
 261    *
 262    * <p>Target sizes of level 1 to 5 starts with:</p>
 263    * {@code [- - - - 10MB]}
 264    * <p>with base level is level. Target sizes of level 1 to 4 are not applicable
 265    * because they will not be used.
 266    * Until the size of Level 5 grows to more than 10MB, say 11MB, we make
 267    * base target to level 4 and now the targets looks like:</p>
 268    * {@code [- - - 1.1MB 11MB]}
 269    * <p>While data are accumulated, size targets are tuned based on actual data
 270    * of level 5. When level 5 has 50MB of data, the target is like:</p>
 271    * {@code [- - - 5MB 50MB]}
 272    * <p>Until level 5's actual size is more than 100MB, say 101MB. Now if we
 273    * keep level 4 to be the base level, its target size needs to be 10.1MB,
 274    * which doesn't satisfy the target size range. So now we make level 3
 275    * the target size and the target sizes of the levels look like:</p>
 276    * {@code [- - 1.01MB 10.1MB 101MB]}
 277    * <p>In the same way, while level 5 further grows, all levels' targets grow,
 278    * like</p>
 279    * {@code [- - 5MB 50MB 500MB]}
 280    * <p>Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
 281    * base level and make levels' target sizes like this:</p>
 282    * {@code [- 1.001MB 10.01MB 100.1MB 1001MB]}
 283    * <p>and go on...</p>
 284    *
 285    * <p>By doing it, we give {@code max_bytes_for_level_multiplier} a priority
 286    * against {@code max_bytes_for_level_base}, for a more predictable LSM tree
 287    * shape. It is useful to limit worse case space amplification.</p>
 288    *
 289    * <p>{@code max_bytes_for_level_multiplier_additional} is ignored with
 290    * this flag on.</p>
 291    *
 292    * <p>Turning this feature on or off for an existing DB can cause unexpected
 293    * LSM tree structure so it's not recommended.</p>
 294    *
 295    * <p><strong>Caution</strong>: this option is experimental</p>
 296    *
 297    * <p>Default: false</p>
 298    *
 299    * @param enableLevelCompactionDynamicLevelBytes boolean value indicating
 300    *     if {@code LevelCompactionDynamicLevelBytes} shall be enabled.
 301    * @return the reference to the current options.
 302    */
 303   @Experimental("Turning this feature on or off for an existing DB can cause" +
 304       "unexpected LSM tree structure so it's not recommended")
 305   T setLevelCompactionDynamicLevelBytes(
 306       boolean enableLevelCompactionDynamicLevelBytes);
 307
 308   /**
 309    * <p>Return if {@code LevelCompactionDynamicLevelBytes} is enabled.
 310    * </p>
 311    *
 312    * <p>For further information see
 313    * {@link #setLevelCompactionDynamicLevelBytes(boolean)}</p>
 314    *
 315    * @return boolean value indicating if
 316    *    {@code levelCompactionDynamicLevelBytes} is enabled.
 317    */
 318   @Experimental("Caution: this option is experimental")
 319   boolean levelCompactionDynamicLevelBytes();
 320
 321   /**
 322    * Maximum size of each compaction (not guarantee)
 323    *
 324    * @param maxCompactionBytes the compaction size limit
 325    * @return the reference to the current options.
 326    */
 327   T setMaxCompactionBytes(
 328       long maxCompactionBytes);
 329
 330   /**
 331    * Control maximum size of each compaction (not guaranteed)
 332    *
 333    * @return compaction size threshold
 334    */
 335   long maxCompactionBytes();
 336
 337   /**
 338    * Set compaction style for DB.
 339    *
 340    * Default: LEVEL.
 341    *
 342    * @param compactionStyle Compaction style.
 343    * @return the reference to the current options.
 344    */
 345   ColumnFamilyOptionsInterface setCompactionStyle(
 346       CompactionStyle compactionStyle);
 347
 348   /**
 349    * Compaction style for DB.
 350    *
 351    * @return Compaction style.
 352    */
 353   CompactionStyle compactionStyle();
 354
 355   /**
 356    * If level {@link #compactionStyle()} == {@link CompactionStyle#LEVEL},
 357    * for each level, which files are prioritized to be picked to compact.
 358    *
 359    * Default: {@link CompactionPriority#ByCompensatedSize}
 360    *
 361    * @param compactionPriority The compaction priority
 362    *
 363    * @return the reference to the current options.
 364    */
 365   T setCompactionPriority(
 366       CompactionPriority compactionPriority);
 367
 368   /**
 369    * Get the Compaction priority if level compaction
 370    * is used for all levels
 371    *
 372    * @return The compaction priority
 373    */
 374   CompactionPriority compactionPriority();
 375
 376   /**
 377    * Set the options needed to support Universal Style compactions
 378    *
 379    * @param compactionOptionsUniversal The Universal Style compaction options
 380    *
 381    * @return the reference to the current options.
 382    */
 383   T setCompactionOptionsUniversal(
 384       CompactionOptionsUniversal compactionOptionsUniversal);
 385
 386   /**
 387    * The options needed to support Universal Style compactions
 388    *
 389    * @return The Universal Style compaction options
 390    */
 391   CompactionOptionsUniversal compactionOptionsUniversal();
 392
 393   /**
 394    * The options for FIFO compaction style
 395    *
 396    * @param compactionOptionsFIFO The FIFO compaction options
 397    *
 398    * @return the reference to the current options.
 399    */
 400   T setCompactionOptionsFIFO(
 401       CompactionOptionsFIFO compactionOptionsFIFO);
 402
 403   /**
 404    * The options for FIFO compaction style
 405    *
 406    * @return The FIFO compaction options
 407    */
 408   CompactionOptionsFIFO compactionOptionsFIFO();
 409
 410   /**
 411    * <p>This flag specifies that the implementation should optimize the filters
 412    * mainly for cases where keys are found rather than also optimize for keys
 413    * missed. This would be used in cases where the application knows that
 414    * there are very few misses or the performance in the case of misses is not
 415    * important.</p>
 416    *
 417    * <p>For now, this flag allows us to not store filters for the last level i.e
 418    * the largest level which contains data of the LSM store. For keys which
 419    * are hits, the filters in this level are not useful because we will search
 420    * for the data anyway.</p>
 421    *
 422    * <p><strong>NOTE</strong>: the filters in other levels are still useful
 423    * even for key hit because they tell us whether to look in that level or go
 424    * to the higher level.</p>
 425    *
 426    * <p>Default: false<p>
 427    *
 428    * @param optimizeFiltersForHits boolean value indicating if this flag is set.
 429    * @return the reference to the current options.
 430    */
 431   T setOptimizeFiltersForHits(
 432       boolean optimizeFiltersForHits);
 433
 434   /**
 435    * <p>Returns the current state of the {@code optimize_filters_for_hits}
 436    * setting.</p>
 437    *
 438    * @return boolean value indicating if the flag
 439    *     {@code optimize_filters_for_hits} was set.
 440    */
 441   boolean optimizeFiltersForHits();
 442
 443   /**
 444    * In debug mode, RocksDB run consistency checks on the LSM every time the LSM
 445    * change (Flush, Compaction, AddFile). These checks are disabled in release
 446    * mode, use this option to enable them in release mode as well.
 447    *
 448    * Default: false
 449    *
 450    * @param forceConsistencyChecks true to force consistency checks
 451    *
 452    * @return the reference to the current options.
 453    */
 454   T setForceConsistencyChecks(
 455       boolean forceConsistencyChecks);
 456
 457   /**
 458    * In debug mode, RocksDB run consistency checks on the LSM every time the LSM
 459    * change (Flush, Compaction, AddFile). These checks are disabled in release
 460    * mode.
 461    *
 462    * @return true if consistency checks are enforced
 463    */
 464   boolean forceConsistencyChecks();
 465 }