]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | package org.rocksdb; | |
7 | ||
8 | import java.util.List; | |
9 | ||
10 | /** | |
11 | * Advanced Column Family Options which are not | |
12 | * mutable (i.e. present in {@link AdvancedMutableColumnFamilyOptionsInterface} | |
13 | * | |
14 | * Taken from include/rocksdb/advanced_options.h | |
15 | */ | |
f67539c2 TL |
16 | public interface AdvancedColumnFamilyOptionsInterface< |
17 | T extends AdvancedColumnFamilyOptionsInterface<T>> { | |
7c673cae FG |
18 | /** |
19 | * The minimum number of write buffers that will be merged together | |
20 | * before writing to storage. If set to 1, then | |
21 | * all write buffers are flushed to L0 as individual files and this increases | |
22 | * read amplification because a get request has to check in all of these | |
23 | * files. Also, an in-memory merge may result in writing lesser | |
24 | * data to storage if there are duplicate records in each of these | |
25 | * individual write buffers. Default: 1 | |
26 | * | |
27 | * @param minWriteBufferNumberToMerge the minimum number of write buffers | |
28 | * that will be merged together. | |
29 | * @return the reference to the current options. | |
30 | */ | |
31 | T setMinWriteBufferNumberToMerge( | |
32 | int minWriteBufferNumberToMerge); | |
33 | ||
34 | /** | |
35 | * The minimum number of write buffers that will be merged together | |
36 | * before writing to storage. If set to 1, then | |
37 | * all write buffers are flushed to L0 as individual files and this increases | |
38 | * read amplification because a get request has to check in all of these | |
39 | * files. Also, an in-memory merge may result in writing lesser | |
40 | * data to storage if there are duplicate records in each of these | |
41 | * individual write buffers. Default: 1 | |
42 | * | |
43 | * @return the minimum number of write buffers that will be merged together. | |
44 | */ | |
45 | int minWriteBufferNumberToMerge(); | |
46 | ||
47 | /** | |
48 | * The total maximum number of write buffers to maintain in memory including | |
49 | * copies of buffers that have already been flushed. Unlike | |
50 | * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}, | |
51 | * this parameter does not affect flushing. | |
52 | * This controls the minimum amount of write history that will be available | |
53 | * in memory for conflict checking when Transactions are used. | |
54 | * | |
55 | * When using an OptimisticTransactionDB: | |
56 | * If this value is too low, some transactions may fail at commit time due | |
57 | * to not being able to determine whether there were any write conflicts. | |
58 | * | |
59 | * When using a TransactionDB: | |
60 | * If Transaction::SetSnapshot is used, TransactionDB will read either | |
61 | * in-memory write buffers or SST files to do write-conflict checking. | |
62 | * Increasing this value can reduce the number of reads to SST files | |
63 | * done for conflict detection. | |
64 | * | |
65 | * Setting this value to 0 will cause write buffers to be freed immediately | |
66 | * after they are flushed. | |
67 | * If this value is set to -1, | |
68 | * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} | |
69 | * will be used. | |
70 | * | |
71 | * Default: | |
72 | * If using a TransactionDB/OptimisticTransactionDB, the default value will | |
73 | * be set to the value of | |
74 | * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} | |
75 | * if it is not explicitly set by the user. Otherwise, the default is 0. | |
76 | * | |
77 | * @param maxWriteBufferNumberToMaintain The maximum number of write | |
78 | * buffers to maintain | |
79 | * | |
80 | * @return the reference to the current options. | |
81 | */ | |
82 | T setMaxWriteBufferNumberToMaintain( | |
83 | int maxWriteBufferNumberToMaintain); | |
84 | ||
85 | /** | |
86 | * The total maximum number of write buffers to maintain in memory including | |
87 | * copies of buffers that have already been flushed. | |
88 | * | |
89 | * @return maxWriteBufferNumberToMaintain The maximum number of write buffers | |
90 | * to maintain | |
91 | */ | |
92 | int maxWriteBufferNumberToMaintain(); | |
93 | ||
94 | /** | |
95 | * Allows thread-safe inplace updates. | |
96 | * If inplace_callback function is not set, | |
97 | * Put(key, new_value) will update inplace the existing_value iff | |
98 | * * key exists in current memtable | |
99 | * * new sizeof(new_value) ≤ sizeof(existing_value) | |
100 | * * existing_value for that key is a put i.e. kTypeValue | |
101 | * If inplace_callback function is set, check doc for inplace_callback. | |
102 | * Default: false. | |
103 | * | |
104 | * @param inplaceUpdateSupport true if thread-safe inplace updates | |
105 | * are allowed. | |
106 | * @return the reference to the current options. | |
107 | */ | |
108 | T setInplaceUpdateSupport( | |
109 | boolean inplaceUpdateSupport); | |
110 | ||
111 | /** | |
112 | * Allows thread-safe inplace updates. | |
113 | * If inplace_callback function is not set, | |
114 | * Put(key, new_value) will update inplace the existing_value iff | |
115 | * * key exists in current memtable | |
116 | * * new sizeof(new_value) ≤ sizeof(existing_value) | |
117 | * * existing_value for that key is a put i.e. kTypeValue | |
118 | * If inplace_callback function is set, check doc for inplace_callback. | |
119 | * Default: false. | |
120 | * | |
121 | * @return true if thread-safe inplace updates are allowed. | |
122 | */ | |
123 | boolean inplaceUpdateSupport(); | |
124 | ||
125 | /** | |
126 | * Control locality of bloom filter probes to improve cache miss rate. | |
127 | * This option only applies to memtable prefix bloom and plaintable | |
128 | * prefix bloom. It essentially limits the max number of cache lines each | |
129 | * bloom filter check can touch. | |
130 | * This optimization is turned off when set to 0. The number should never | |
131 | * be greater than number of probes. This option can boost performance | |
132 | * for in-memory workload but should use with care since it can cause | |
133 | * higher false positive rate. | |
134 | * Default: 0 | |
135 | * | |
136 | * @param bloomLocality the level of locality of bloom-filter probes. | |
137 | * @return the reference to the current options. | |
138 | */ | |
139 | T setBloomLocality(int bloomLocality); | |
140 | ||
141 | /** | |
142 | * Control locality of bloom filter probes to improve cache miss rate. | |
143 | * This option only applies to memtable prefix bloom and plaintable | |
144 | * prefix bloom. It essentially limits the max number of cache lines each | |
145 | * bloom filter check can touch. | |
146 | * This optimization is turned off when set to 0. The number should never | |
147 | * be greater than number of probes. This option can boost performance | |
148 | * for in-memory workload but should use with care since it can cause | |
149 | * higher false positive rate. | |
150 | * Default: 0 | |
151 | * | |
152 | * @return the level of locality of bloom-filter probes. | |
153 | * @see #setBloomLocality(int) | |
154 | */ | |
155 | int bloomLocality(); | |
156 | ||
157 | /** | |
158 | * <p>Different levels can have different compression | |
159 | * policies. There are cases where most lower levels | |
160 | * would like to use quick compression algorithms while | |
161 | * the higher levels (which have more data) use | |
162 | * compression algorithms that have better compression | |
163 | * but could be slower. This array, if non-empty, should | |
164 | * have an entry for each level of the database; | |
165 | * these override the value specified in the previous | |
166 | * field 'compression'.</p> | |
167 | * | |
168 | * <strong>NOTICE</strong> | |
169 | * <p>If {@code level_compaction_dynamic_level_bytes=true}, | |
170 | * {@code compression_per_level[0]} still determines {@code L0}, | |
171 | * but other elements of the array are based on base level | |
172 | * (the level {@code L0} files are merged to), and may not | |
173 | * match the level users see from info log for metadata. | |
174 | * </p> | |
175 | * <p>If {@code L0} files are merged to {@code level - n}, | |
176 | * then, for {@code i>0}, {@code compression_per_level[i]} | |
177 | * determines compaction type for level {@code n+i-1}.</p> | |
178 | * | |
179 | * <strong>Example</strong> | |
180 | * <p>For example, if we have 5 levels, and we determine to | |
181 | * merge {@code L0} data to {@code L4} (which means {@code L1..L3} | |
182 | * will be empty), then the new files go to {@code L4} uses | |
183 | * compression type {@code compression_per_level[1]}.</p> | |
184 | * | |
185 | * <p>If now {@code L0} is merged to {@code L2}. Data goes to | |
186 | * {@code L2} will be compressed according to | |
187 | * {@code compression_per_level[1]}, {@code L3} using | |
188 | * {@code compression_per_level[2]}and {@code L4} using | |
189 | * {@code compression_per_level[3]}. Compaction for each | |
190 | * level can change when data grows.</p> | |
191 | * | |
192 | * <p><strong>Default:</strong> empty</p> | |
193 | * | |
194 | * @param compressionLevels list of | |
195 | * {@link org.rocksdb.CompressionType} instances. | |
196 | * | |
197 | * @return the reference to the current options. | |
198 | */ | |
199 | T setCompressionPerLevel( | |
200 | List<CompressionType> compressionLevels); | |
201 | ||
202 | /** | |
203 | * <p>Return the currently set {@link org.rocksdb.CompressionType} | |
204 | * per instances.</p> | |
205 | * | |
206 | * <p>See: {@link #setCompressionPerLevel(java.util.List)}</p> | |
207 | * | |
208 | * @return list of {@link org.rocksdb.CompressionType} | |
209 | * instances. | |
210 | */ | |
211 | List<CompressionType> compressionPerLevel(); | |
212 | ||
213 | /** | |
214 | * Set the number of levels for this database | |
215 | * If level-styled compaction is used, then this number determines | |
216 | * the total number of levels. | |
217 | * | |
218 | * @param numLevels the number of levels. | |
219 | * @return the reference to the current options. | |
220 | */ | |
221 | T setNumLevels(int numLevels); | |
222 | ||
223 | /** | |
224 | * If level-styled compaction is used, then this number determines | |
225 | * the total number of levels. | |
226 | * | |
227 | * @return the number of levels. | |
228 | */ | |
229 | int numLevels(); | |
230 | ||
231 | /** | |
232 | * <p>If {@code true}, RocksDB will pick target size of each level | |
233 | * dynamically. We will pick a base level b >= 1. L0 will be | |
234 | * directly merged into level b, instead of always into level 1. | |
235 | * Level 1 to b-1 need to be empty. We try to pick b and its target | |
236 | * size so that</p> | |
237 | * | |
238 | * <ol> | |
239 | * <li>target size is in the range of | |
240 | * (max_bytes_for_level_base / max_bytes_for_level_multiplier, | |
241 | * max_bytes_for_level_base]</li> | |
242 | * <li>target size of the last level (level num_levels-1) equals to extra size | |
243 | * of the level.</li> | |
244 | * </ol> | |
245 | * | |
246 | * <p>At the same time max_bytes_for_level_multiplier and | |
247 | * max_bytes_for_level_multiplier_additional are still satisfied.</p> | |
248 | * | |
249 | * <p>With this option on, from an empty DB, we make last level the base | |
250 | * level, which means merging L0 data into the last level, until it exceeds | |
251 | * max_bytes_for_level_base. And then we make the second last level to be | |
252 | * base level, to start to merge L0 data to second last level, with its | |
253 | * target size to be {@code 1/max_bytes_for_level_multiplier} of the last | |
254 | * levels extra size. After the data accumulates more so that we need to | |
255 | * move the base level to the third last one, and so on.</p> | |
256 | * | |
f67539c2 TL |
257 | * <p><b>Example</b></p> |
258 | * | |
7c673cae FG |
259 | * <p>For example, assume {@code max_bytes_for_level_multiplier=10}, |
260 | * {@code num_levels=6}, and {@code max_bytes_for_level_base=10MB}.</p> | |
261 | * | |
262 | * <p>Target sizes of level 1 to 5 starts with:</p> | |
263 | * {@code [- - - - 10MB]} | |
264 | * <p>with base level is level. Target sizes of level 1 to 4 are not applicable | |
265 | * because they will not be used. | |
266 | * Until the size of Level 5 grows to more than 10MB, say 11MB, we make | |
267 | * base target to level 4 and now the targets looks like:</p> | |
268 | * {@code [- - - 1.1MB 11MB]} | |
269 | * <p>While data are accumulated, size targets are tuned based on actual data | |
270 | * of level 5. When level 5 has 50MB of data, the target is like:</p> | |
271 | * {@code [- - - 5MB 50MB]} | |
272 | * <p>Until level 5's actual size is more than 100MB, say 101MB. Now if we | |
273 | * keep level 4 to be the base level, its target size needs to be 10.1MB, | |
274 | * which doesn't satisfy the target size range. So now we make level 3 | |
275 | * the target size and the target sizes of the levels look like:</p> | |
276 | * {@code [- - 1.01MB 10.1MB 101MB]} | |
277 | * <p>In the same way, while level 5 further grows, all levels' targets grow, | |
278 | * like</p> | |
279 | * {@code [- - 5MB 50MB 500MB]} | |
280 | * <p>Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the | |
281 | * base level and make levels' target sizes like this:</p> | |
282 | * {@code [- 1.001MB 10.01MB 100.1MB 1001MB]} | |
283 | * <p>and go on...</p> | |
284 | * | |
285 | * <p>By doing it, we give {@code max_bytes_for_level_multiplier} a priority | |
286 | * against {@code max_bytes_for_level_base}, for a more predictable LSM tree | |
287 | * shape. It is useful to limit worse case space amplification.</p> | |
288 | * | |
289 | * <p>{@code max_bytes_for_level_multiplier_additional} is ignored with | |
290 | * this flag on.</p> | |
291 | * | |
292 | * <p>Turning this feature on or off for an existing DB can cause unexpected | |
293 | * LSM tree structure so it's not recommended.</p> | |
294 | * | |
295 | * <p><strong>Caution</strong>: this option is experimental</p> | |
296 | * | |
297 | * <p>Default: false</p> | |
298 | * | |
299 | * @param enableLevelCompactionDynamicLevelBytes boolean value indicating | |
300 | * if {@code LevelCompactionDynamicLevelBytes} shall be enabled. | |
301 | * @return the reference to the current options. | |
302 | */ | |
303 | @Experimental("Turning this feature on or off for an existing DB can cause" + | |
1e59de90 | 304 | " unexpected LSM tree structure so it's not recommended") |
7c673cae FG |
305 | T setLevelCompactionDynamicLevelBytes( |
306 | boolean enableLevelCompactionDynamicLevelBytes); | |
307 | ||
308 | /** | |
309 | * <p>Return if {@code LevelCompactionDynamicLevelBytes} is enabled. | |
310 | * </p> | |
311 | * | |
312 | * <p>For further information see | |
313 | * {@link #setLevelCompactionDynamicLevelBytes(boolean)}</p> | |
314 | * | |
315 | * @return boolean value indicating if | |
316 | * {@code levelCompactionDynamicLevelBytes} is enabled. | |
317 | */ | |
318 | @Experimental("Caution: this option is experimental") | |
319 | boolean levelCompactionDynamicLevelBytes(); | |
320 | ||
321 | /** | |
322 | * Maximum size of each compaction (not guarantee) | |
323 | * | |
324 | * @param maxCompactionBytes the compaction size limit | |
325 | * @return the reference to the current options. | |
326 | */ | |
327 | T setMaxCompactionBytes( | |
328 | long maxCompactionBytes); | |
329 | ||
330 | /** | |
331 | * Control maximum size of each compaction (not guaranteed) | |
332 | * | |
333 | * @return compaction size threshold | |
334 | */ | |
335 | long maxCompactionBytes(); | |
336 | ||
337 | /** | |
338 | * Set compaction style for DB. | |
339 | * | |
340 | * Default: LEVEL. | |
341 | * | |
342 | * @param compactionStyle Compaction style. | |
343 | * @return the reference to the current options. | |
344 | */ | |
345 | ColumnFamilyOptionsInterface setCompactionStyle( | |
346 | CompactionStyle compactionStyle); | |
347 | ||
348 | /** | |
349 | * Compaction style for DB. | |
350 | * | |
351 | * @return Compaction style. | |
352 | */ | |
353 | CompactionStyle compactionStyle(); | |
354 | ||
355 | /** | |
356 | * If level {@link #compactionStyle()} == {@link CompactionStyle#LEVEL}, | |
357 | * for each level, which files are prioritized to be picked to compact. | |
358 | * | |
359 | * Default: {@link CompactionPriority#ByCompensatedSize} | |
360 | * | |
361 | * @param compactionPriority The compaction priority | |
362 | * | |
363 | * @return the reference to the current options. | |
364 | */ | |
365 | T setCompactionPriority( | |
366 | CompactionPriority compactionPriority); | |
367 | ||
368 | /** | |
369 | * Get the Compaction priority if level compaction | |
370 | * is used for all levels | |
371 | * | |
372 | * @return The compaction priority | |
373 | */ | |
374 | CompactionPriority compactionPriority(); | |
375 | ||
376 | /** | |
377 | * Set the options needed to support Universal Style compactions | |
378 | * | |
379 | * @param compactionOptionsUniversal The Universal Style compaction options | |
380 | * | |
381 | * @return the reference to the current options. | |
382 | */ | |
383 | T setCompactionOptionsUniversal( | |
384 | CompactionOptionsUniversal compactionOptionsUniversal); | |
385 | ||
386 | /** | |
387 | * The options needed to support Universal Style compactions | |
388 | * | |
389 | * @return The Universal Style compaction options | |
390 | */ | |
391 | CompactionOptionsUniversal compactionOptionsUniversal(); | |
392 | ||
393 | /** | |
394 | * The options for FIFO compaction style | |
395 | * | |
396 | * @param compactionOptionsFIFO The FIFO compaction options | |
397 | * | |
398 | * @return the reference to the current options. | |
399 | */ | |
400 | T setCompactionOptionsFIFO( | |
401 | CompactionOptionsFIFO compactionOptionsFIFO); | |
402 | ||
403 | /** | |
404 | * The options for FIFO compaction style | |
405 | * | |
406 | * @return The FIFO compaction options | |
407 | */ | |
408 | CompactionOptionsFIFO compactionOptionsFIFO(); | |
409 | ||
410 | /** | |
411 | * <p>This flag specifies that the implementation should optimize the filters | |
412 | * mainly for cases where keys are found rather than also optimize for keys | |
413 | * missed. This would be used in cases where the application knows that | |
414 | * there are very few misses or the performance in the case of misses is not | |
415 | * important.</p> | |
416 | * | |
417 | * <p>For now, this flag allows us to not store filters for the last level i.e | |
418 | * the largest level which contains data of the LSM store. For keys which | |
419 | * are hits, the filters in this level are not useful because we will search | |
420 | * for the data anyway.</p> | |
421 | * | |
422 | * <p><strong>NOTE</strong>: the filters in other levels are still useful | |
423 | * even for key hit because they tell us whether to look in that level or go | |
424 | * to the higher level.</p> | |
425 | * | |
426 | * <p>Default: false<p> | |
427 | * | |
428 | * @param optimizeFiltersForHits boolean value indicating if this flag is set. | |
429 | * @return the reference to the current options. | |
430 | */ | |
431 | T setOptimizeFiltersForHits( | |
432 | boolean optimizeFiltersForHits); | |
433 | ||
434 | /** | |
435 | * <p>Returns the current state of the {@code optimize_filters_for_hits} | |
436 | * setting.</p> | |
437 | * | |
438 | * @return boolean value indicating if the flag | |
439 | * {@code optimize_filters_for_hits} was set. | |
440 | */ | |
441 | boolean optimizeFiltersForHits(); | |
442 | ||
443 | /** | |
1e59de90 TL |
444 | * By default, RocksDB runs consistency checks on the LSM every time the LSM |
445 | * changes (Flush, Compaction, AddFile). Use this option if you need to | |
446 | * disable them. | |
7c673cae | 447 | * |
1e59de90 | 448 | * Default: true |
7c673cae | 449 | * |
1e59de90 | 450 | * @param forceConsistencyChecks false to disable consistency checks |
7c673cae FG |
451 | * |
452 | * @return the reference to the current options. | |
453 | */ | |
454 | T setForceConsistencyChecks( | |
455 | boolean forceConsistencyChecks); | |
456 | ||
457 | /** | |
1e59de90 TL |
458 | * By default, RocksDB runs consistency checks on the LSM every time the LSM |
459 | * changes (Flush, Compaction, AddFile). | |
7c673cae FG |
460 | * |
461 | * @return true if consistency checks are enforced | |
462 | */ | |
463 | boolean forceConsistencyChecks(); | |
464 | } |