]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | package org.rocksdb; | |
7 | ||
8 | import java.util.Collection; | |
9 | import java.util.List; | |
10 | ||
f67539c2 | 11 | public interface DBOptionsInterface<T extends DBOptionsInterface<T>> { |
7c673cae FG |
12 | /** |
13 | * Use this if your DB is very small (like under 1GB) and you don't want to | |
14 | * spend lots of memory for memtables. | |
15 | * | |
16 | * @return the instance of the current object. | |
17 | */ | |
18 | T optimizeForSmallDb(); | |
19 | ||
20 | /** | |
21 | * Use the specified object to interact with the environment, | |
22 | * e.g. to read/write files, schedule background work, etc. | |
23 | * Default: {@link Env#getDefault()} | |
24 | * | |
25 | * @param env {@link Env} instance. | |
26 | * @return the instance of the current Options. | |
27 | */ | |
28 | T setEnv(final Env env); | |
29 | ||
30 | /** | |
31 | * Returns the set RocksEnv instance. | |
32 | * | |
33 | * @return {@link RocksEnv} instance set in the options. | |
34 | */ | |
35 | Env getEnv(); | |
36 | ||
37 | /** | |
38 | * <p>By default, RocksDB uses only one background thread for flush and | |
39 | * compaction. Calling this function will set it up such that total of | |
40 | * `total_threads` is used.</p> | |
41 | * | |
42 | * <p>You almost definitely want to call this function if your system is | |
43 | * bottlenecked by RocksDB.</p> | |
44 | * | |
45 | * @param totalThreads The total number of threads to be used by RocksDB. | |
46 | * A good value is the number of cores. | |
47 | * | |
48 | * @return the instance of the current Options | |
49 | */ | |
50 | T setIncreaseParallelism(int totalThreads); | |
51 | ||
52 | /** | |
53 | * If this value is set to true, then the database will be created | |
54 | * if it is missing during {@code RocksDB.open()}. | |
55 | * Default: false | |
56 | * | |
57 | * @param flag a flag indicating whether to create a database the | |
58 | * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation | |
59 | * is missing. | |
60 | * @return the instance of the current Options | |
61 | * @see RocksDB#open(org.rocksdb.Options, String) | |
62 | */ | |
63 | T setCreateIfMissing(boolean flag); | |
64 | ||
65 | /** | |
66 | * Return true if the create_if_missing flag is set to true. | |
67 | * If true, the database will be created if it is missing. | |
68 | * | |
69 | * @return true if the createIfMissing option is set to true. | |
70 | * @see #setCreateIfMissing(boolean) | |
71 | */ | |
72 | boolean createIfMissing(); | |
73 | ||
74 | /** | |
75 | * <p>If true, missing column families will be automatically created</p> | |
76 | * | |
77 | * <p>Default: false</p> | |
78 | * | |
79 | * @param flag a flag indicating if missing column families shall be | |
80 | * created automatically. | |
81 | * @return true if missing column families shall be created automatically | |
82 | * on open. | |
83 | */ | |
84 | T setCreateMissingColumnFamilies(boolean flag); | |
85 | ||
86 | /** | |
87 | * Return true if the create_missing_column_families flag is set | |
88 | * to true. If true column families be created if missing. | |
89 | * | |
90 | * @return true if the createMissingColumnFamilies is set to | |
91 | * true. | |
92 | * @see #setCreateMissingColumnFamilies(boolean) | |
93 | */ | |
94 | boolean createMissingColumnFamilies(); | |
95 | ||
96 | /** | |
97 | * If true, an error will be thrown during RocksDB.open() if the | |
98 | * database already exists. | |
99 | * Default: false | |
100 | * | |
101 | * @param errorIfExists if true, an exception will be thrown | |
102 | * during {@code RocksDB.open()} if the database already exists. | |
103 | * @return the reference to the current option. | |
104 | * @see RocksDB#open(org.rocksdb.Options, String) | |
105 | */ | |
106 | T setErrorIfExists(boolean errorIfExists); | |
107 | ||
108 | /** | |
109 | * If true, an error will be thrown during RocksDB.open() if the | |
110 | * database already exists. | |
111 | * | |
112 | * @return if true, an error is raised when the specified database | |
113 | * already exists before open. | |
114 | */ | |
115 | boolean errorIfExists(); | |
116 | ||
117 | /** | |
118 | * If true, the implementation will do aggressive checking of the | |
119 | * data it is processing and will stop early if it detects any | |
120 | * errors. This may have unforeseen ramifications: for example, a | |
121 | * corruption of one DB entry may cause a large number of entries to | |
122 | * become unreadable or for the entire DB to become unopenable. | |
123 | * If any of the writes to the database fails (Put, Delete, Merge, Write), | |
124 | * the database will switch to read-only mode and fail all other | |
125 | * Write operations. | |
126 | * Default: true | |
127 | * | |
128 | * @param paranoidChecks a flag to indicate whether paranoid-check | |
129 | * is on. | |
130 | * @return the reference to the current option. | |
131 | */ | |
132 | T setParanoidChecks(boolean paranoidChecks); | |
133 | ||
134 | /** | |
135 | * If true, the implementation will do aggressive checking of the | |
136 | * data it is processing and will stop early if it detects any | |
137 | * errors. This may have unforeseen ramifications: for example, a | |
138 | * corruption of one DB entry may cause a large number of entries to | |
139 | * become unreadable or for the entire DB to become unopenable. | |
140 | * If any of the writes to the database fails (Put, Delete, Merge, Write), | |
141 | * the database will switch to read-only mode and fail all other | |
142 | * Write operations. | |
143 | * | |
144 | * @return a boolean indicating whether paranoid-check is on. | |
145 | */ | |
146 | boolean paranoidChecks(); | |
147 | ||
148 | /** | |
149 | * Use to control write rate of flush and compaction. Flush has higher | |
150 | * priority than compaction. Rate limiting is disabled if nullptr. | |
151 | * Default: nullptr | |
152 | * | |
153 | * @param rateLimiter {@link org.rocksdb.RateLimiter} instance. | |
154 | * @return the instance of the current object. | |
155 | * | |
156 | * @since 3.10.0 | |
157 | */ | |
158 | T setRateLimiter(RateLimiter rateLimiter); | |
159 | ||
11fdf7f2 TL |
160 | /** |
161 | * Use to track SST files and control their file deletion rate. | |
162 | * | |
163 | * Features: | |
164 | * - Throttle the deletion rate of the SST files. | |
165 | * - Keep track the total size of all SST files. | |
166 | * - Set a maximum allowed space limit for SST files that when reached | |
167 | * the DB wont do any further flushes or compactions and will set the | |
168 | * background error. | |
169 | * - Can be shared between multiple dbs. | |
170 | * | |
171 | * Limitations: | |
172 | * - Only track and throttle deletes of SST files in | |
173 | * first db_path (db_name if db_paths is empty). | |
174 | * | |
175 | * @param sstFileManager The SST File Manager for the db. | |
494da23a | 176 | * @return the instance of the current object. |
11fdf7f2 TL |
177 | */ |
178 | T setSstFileManager(SstFileManager sstFileManager); | |
179 | ||
7c673cae FG |
180 | /** |
181 | * <p>Any internal progress/error information generated by | |
182 | * the db will be written to the Logger if it is non-nullptr, | |
183 | * or to a file stored in the same directory as the DB | |
184 | * contents if info_log is nullptr.</p> | |
185 | * | |
186 | * <p>Default: nullptr</p> | |
187 | * | |
188 | * @param logger {@link Logger} instance. | |
189 | * @return the instance of the current object. | |
190 | */ | |
191 | T setLogger(Logger logger); | |
192 | ||
193 | /** | |
194 | * <p>Sets the RocksDB log level. Default level is INFO</p> | |
195 | * | |
196 | * @param infoLogLevel log level to set. | |
197 | * @return the instance of the current object. | |
198 | */ | |
199 | T setInfoLogLevel(InfoLogLevel infoLogLevel); | |
200 | ||
201 | /** | |
202 | * <p>Returns currently set log level.</p> | |
203 | * @return {@link org.rocksdb.InfoLogLevel} instance. | |
204 | */ | |
205 | InfoLogLevel infoLogLevel(); | |
206 | ||
207 | /** | |
494da23a TL |
208 | * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open |
209 | * all files on DB::Open(). You can use this option to increase the number | |
210 | * of threads used to open the files. | |
7c673cae FG |
211 | * |
212 | * Default: 16 | |
213 | * | |
214 | * @param maxFileOpeningThreads the maximum number of threads to use to | |
215 | * open files | |
216 | * | |
217 | * @return the reference to the current options. | |
218 | */ | |
219 | T setMaxFileOpeningThreads(int maxFileOpeningThreads); | |
220 | ||
221 | /** | |
494da23a TL |
222 | * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all |
223 | * files on DB::Open(). You can use this option to increase the number of | |
224 | * threads used to open the files. | |
7c673cae FG |
225 | * |
226 | * Default: 16 | |
227 | * | |
228 | * @return the maximum number of threads to use to open files | |
229 | */ | |
230 | int maxFileOpeningThreads(); | |
231 | ||
7c673cae | 232 | /** |
11fdf7f2 | 233 | * <p>Sets the statistics object which collects metrics about database operations. |
7c673cae FG |
234 | * Statistics objects should not be shared between DB instances as |
235 | * it does not use any locks to prevent concurrent updates.</p> | |
236 | * | |
11fdf7f2 TL |
237 | * @param statistics The statistics to set |
238 | * | |
7c673cae | 239 | * @return the instance of the current object. |
11fdf7f2 | 240 | * |
7c673cae FG |
241 | * @see RocksDB#open(org.rocksdb.Options, String) |
242 | */ | |
11fdf7f2 | 243 | T setStatistics(final Statistics statistics); |
7c673cae FG |
244 | |
245 | /** | |
11fdf7f2 TL |
246 | * <p>Returns statistics object.</p> |
247 | * | |
248 | * @return the instance of the statistics object or null if there is no | |
249 | * statistics object. | |
7c673cae | 250 | * |
11fdf7f2 | 251 | * @see #setStatistics(Statistics) |
7c673cae | 252 | */ |
11fdf7f2 | 253 | Statistics statistics(); |
7c673cae FG |
254 | |
255 | /** | |
256 | * <p>If true, then every store to stable storage will issue a fsync.</p> | |
257 | * <p>If false, then every store to stable storage will issue a fdatasync. | |
258 | * This parameter should be set to true while storing data to | |
259 | * filesystem like ext3 that can lose files after a reboot.</p> | |
260 | * <p>Default: false</p> | |
261 | * | |
262 | * @param useFsync a boolean flag to specify whether to use fsync | |
263 | * @return the instance of the current object. | |
264 | */ | |
265 | T setUseFsync(boolean useFsync); | |
266 | ||
267 | /** | |
268 | * <p>If true, then every store to stable storage will issue a fsync.</p> | |
269 | * <p>If false, then every store to stable storage will issue a fdatasync. | |
270 | * This parameter should be set to true while storing data to | |
271 | * filesystem like ext3 that can lose files after a reboot.</p> | |
272 | * | |
273 | * @return boolean value indicating if fsync is used. | |
274 | */ | |
275 | boolean useFsync(); | |
276 | ||
277 | /** | |
278 | * A list of paths where SST files can be put into, with its target size. | |
279 | * Newer data is placed into paths specified earlier in the vector while | |
280 | * older data gradually moves to paths specified later in the vector. | |
281 | * | |
282 | * For example, you have a flash device with 10GB allocated for the DB, | |
283 | * as well as a hard drive of 2TB, you should config it to be: | |
284 | * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] | |
285 | * | |
286 | * The system will try to guarantee data under each path is close to but | |
287 | * not larger than the target size. But current and future file sizes used | |
288 | * by determining where to place a file are based on best-effort estimation, | |
289 | * which means there is a chance that the actual size under the directory | |
290 | * is slightly more than target size under some workloads. User should give | |
291 | * some buffer room for those cases. | |
292 | * | |
293 | * If none of the paths has sufficient room to place a file, the file will | |
294 | * be placed to the last path anyway, despite to the target size. | |
295 | * | |
296 | * Placing newer data to earlier paths is also best-efforts. User should | |
297 | * expect user files to be placed in higher levels in some extreme cases. | |
298 | * | |
299 | * If left empty, only one path will be used, which is db_name passed when | |
300 | * opening the DB. | |
301 | * | |
302 | * Default: empty | |
303 | * | |
304 | * @param dbPaths the paths and target sizes | |
305 | * | |
306 | * @return the reference to the current options | |
307 | */ | |
308 | T setDbPaths(final Collection<DbPath> dbPaths); | |
309 | ||
310 | /** | |
311 | * A list of paths where SST files can be put into, with its target size. | |
312 | * Newer data is placed into paths specified earlier in the vector while | |
313 | * older data gradually moves to paths specified later in the vector. | |
314 | * | |
315 | * For example, you have a flash device with 10GB allocated for the DB, | |
316 | * as well as a hard drive of 2TB, you should config it to be: | |
317 | * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] | |
318 | * | |
319 | * The system will try to guarantee data under each path is close to but | |
320 | * not larger than the target size. But current and future file sizes used | |
321 | * by determining where to place a file are based on best-effort estimation, | |
322 | * which means there is a chance that the actual size under the directory | |
323 | * is slightly more than target size under some workloads. User should give | |
324 | * some buffer room for those cases. | |
325 | * | |
326 | * If none of the paths has sufficient room to place a file, the file will | |
327 | * be placed to the last path anyway, despite to the target size. | |
328 | * | |
329 | * Placing newer data to earlier paths is also best-efforts. User should | |
330 | * expect user files to be placed in higher levels in some extreme cases. | |
331 | * | |
332 | * If left empty, only one path will be used, which is db_name passed when | |
333 | * opening the DB. | |
334 | * | |
335 | * Default: {@link java.util.Collections#emptyList()} | |
336 | * | |
337 | * @return dbPaths the paths and target sizes | |
338 | */ | |
339 | List<DbPath> dbPaths(); | |
340 | ||
341 | /** | |
342 | * This specifies the info LOG dir. | |
343 | * If it is empty, the log files will be in the same dir as data. | |
344 | * If it is non empty, the log files will be in the specified dir, | |
345 | * and the db data dir's absolute path will be used as the log file | |
346 | * name's prefix. | |
347 | * | |
348 | * @param dbLogDir the path to the info log directory | |
349 | * @return the instance of the current object. | |
350 | */ | |
351 | T setDbLogDir(String dbLogDir); | |
352 | ||
353 | /** | |
354 | * Returns the directory of info log. | |
355 | * | |
356 | * If it is empty, the log files will be in the same dir as data. | |
357 | * If it is non empty, the log files will be in the specified dir, | |
358 | * and the db data dir's absolute path will be used as the log file | |
359 | * name's prefix. | |
360 | * | |
361 | * @return the path to the info log directory | |
362 | */ | |
363 | String dbLogDir(); | |
364 | ||
365 | /** | |
366 | * This specifies the absolute dir path for write-ahead logs (WAL). | |
367 | * If it is empty, the log files will be in the same dir as data, | |
368 | * dbname is used as the data dir by default | |
369 | * If it is non empty, the log files will be in kept the specified dir. | |
370 | * When destroying the db, | |
371 | * all log files in wal_dir and the dir itself is deleted | |
372 | * | |
373 | * @param walDir the path to the write-ahead-log directory. | |
374 | * @return the instance of the current object. | |
375 | */ | |
376 | T setWalDir(String walDir); | |
377 | ||
378 | /** | |
379 | * Returns the path to the write-ahead-logs (WAL) directory. | |
380 | * | |
381 | * If it is empty, the log files will be in the same dir as data, | |
382 | * dbname is used as the data dir by default | |
383 | * If it is non empty, the log files will be in kept the specified dir. | |
384 | * When destroying the db, | |
385 | * all log files in wal_dir and the dir itself is deleted | |
386 | * | |
387 | * @return the path to the write-ahead-logs (WAL) directory. | |
388 | */ | |
389 | String walDir(); | |
390 | ||
391 | /** | |
392 | * The periodicity when obsolete files get deleted. The default | |
393 | * value is 6 hours. The files that get out of scope by compaction | |
394 | * process will still get automatically delete on every compaction, | |
395 | * regardless of this setting | |
396 | * | |
397 | * @param micros the time interval in micros | |
398 | * @return the instance of the current object. | |
399 | */ | |
400 | T setDeleteObsoleteFilesPeriodMicros(long micros); | |
401 | ||
402 | /** | |
403 | * The periodicity when obsolete files get deleted. The default | |
404 | * value is 6 hours. The files that get out of scope by compaction | |
405 | * process will still get automatically delete on every compaction, | |
406 | * regardless of this setting | |
407 | * | |
408 | * @return the time interval in micros when obsolete files will be deleted. | |
409 | */ | |
410 | long deleteObsoleteFilesPeriodMicros(); | |
411 | ||
7c673cae FG |
412 | /** |
413 | * This value represents the maximum number of threads that will | |
414 | * concurrently perform a compaction job by breaking it into multiple, | |
415 | * smaller ones that are run simultaneously. | |
416 | * Default: 1 (i.e. no subcompactions) | |
417 | * | |
418 | * @param maxSubcompactions The maximum number of threads that will | |
419 | * concurrently perform a compaction job | |
494da23a TL |
420 | * |
421 | * @return the instance of the current object. | |
7c673cae | 422 | */ |
494da23a | 423 | T setMaxSubcompactions(int maxSubcompactions); |
7c673cae FG |
424 | |
425 | /** | |
426 | * This value represents the maximum number of threads that will | |
427 | * concurrently perform a compaction job by breaking it into multiple, | |
428 | * smaller ones that are run simultaneously. | |
429 | * Default: 1 (i.e. no subcompactions) | |
430 | * | |
431 | * @return The maximum number of threads that will concurrently perform a | |
432 | * compaction job | |
433 | */ | |
434 | int maxSubcompactions(); | |
435 | ||
436 | /** | |
f67539c2 TL |
437 | * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the |
438 | * value of max_background_jobs. For backwards compatibility we will set | |
439 | * `max_background_jobs = max_background_compactions + max_background_flushes` | |
440 | * in the case where user sets at least one of `max_background_compactions` or | |
441 | * `max_background_flushes`. | |
442 | * | |
7c673cae FG |
443 | * Specifies the maximum number of concurrent background flush jobs. |
444 | * If you're increasing this, also consider increasing number of threads in | |
445 | * HIGH priority thread pool. For more information, see | |
f67539c2 | 446 | * Default: -1 |
7c673cae FG |
447 | * |
448 | * @param maxBackgroundFlushes number of max concurrent flush jobs | |
449 | * @return the instance of the current object. | |
450 | * | |
451 | * @see RocksEnv#setBackgroundThreads(int) | |
494da23a TL |
452 | * @see RocksEnv#setBackgroundThreads(int, Priority) |
453 | * @see MutableDBOptionsInterface#maxBackgroundCompactions() | |
11fdf7f2 | 454 | * |
494da23a | 455 | * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)} |
7c673cae | 456 | */ |
494da23a | 457 | @Deprecated |
7c673cae FG |
458 | T setMaxBackgroundFlushes(int maxBackgroundFlushes); |
459 | ||
460 | /** | |
f67539c2 TL |
461 | * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the |
462 | * value of max_background_jobs. For backwards compatibility we will set | |
463 | * `max_background_jobs = max_background_compactions + max_background_flushes` | |
464 | * in the case where user sets at least one of `max_background_compactions` or | |
465 | * `max_background_flushes`. | |
466 | * | |
7c673cae FG |
467 | * Returns the maximum number of concurrent background flush jobs. |
468 | * If you're increasing this, also consider increasing number of threads in | |
469 | * HIGH priority thread pool. For more information, see | |
f67539c2 | 470 | * Default: -1 |
7c673cae FG |
471 | * |
472 | * @return the maximum number of concurrent background flush jobs. | |
473 | * @see RocksEnv#setBackgroundThreads(int) | |
494da23a | 474 | * @see RocksEnv#setBackgroundThreads(int, Priority) |
7c673cae | 475 | */ |
494da23a | 476 | @Deprecated |
7c673cae FG |
477 | int maxBackgroundFlushes(); |
478 | ||
479 | /** | |
480 | * Specifies the maximum size of a info log file. If the current log file | |
481 | * is larger than `max_log_file_size`, a new info log file will | |
482 | * be created. | |
483 | * If 0, all logs will be written to one log file. | |
484 | * | |
485 | * @param maxLogFileSize the maximum size of a info log file. | |
486 | * @return the instance of the current object. | |
487 | * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms | |
488 | * while overflowing the underlying platform specific value. | |
489 | */ | |
490 | T setMaxLogFileSize(long maxLogFileSize); | |
491 | ||
492 | /** | |
493 | * Returns the maximum size of a info log file. If the current log file | |
494 | * is larger than this size, a new info log file will be created. | |
495 | * If 0, all logs will be written to one log file. | |
496 | * | |
497 | * @return the maximum size of the info log file. | |
498 | */ | |
499 | long maxLogFileSize(); | |
500 | ||
501 | /** | |
502 | * Specifies the time interval for the info log file to roll (in seconds). | |
503 | * If specified with non-zero value, log file will be rolled | |
504 | * if it has been active longer than `log_file_time_to_roll`. | |
505 | * Default: 0 (disabled) | |
506 | * | |
507 | * @param logFileTimeToRoll the time interval in seconds. | |
508 | * @return the instance of the current object. | |
509 | * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms | |
510 | * while overflowing the underlying platform specific value. | |
511 | */ | |
512 | T setLogFileTimeToRoll(long logFileTimeToRoll); | |
513 | ||
514 | /** | |
515 | * Returns the time interval for the info log file to roll (in seconds). | |
516 | * If specified with non-zero value, log file will be rolled | |
517 | * if it has been active longer than `log_file_time_to_roll`. | |
518 | * Default: 0 (disabled) | |
519 | * | |
520 | * @return the time interval in seconds. | |
521 | */ | |
522 | long logFileTimeToRoll(); | |
523 | ||
524 | /** | |
525 | * Specifies the maximum number of info log files to be kept. | |
526 | * Default: 1000 | |
527 | * | |
528 | * @param keepLogFileNum the maximum number of info log files to be kept. | |
529 | * @return the instance of the current object. | |
530 | * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms | |
531 | * while overflowing the underlying platform specific value. | |
532 | */ | |
533 | T setKeepLogFileNum(long keepLogFileNum); | |
534 | ||
535 | /** | |
536 | * Returns the maximum number of info log files to be kept. | |
537 | * Default: 1000 | |
538 | * | |
539 | * @return the maximum number of info log files to be kept. | |
540 | */ | |
541 | long keepLogFileNum(); | |
542 | ||
543 | /** | |
544 | * Recycle log files. | |
545 | * | |
546 | * If non-zero, we will reuse previously written log files for new | |
547 | * logs, overwriting the old data. The value indicates how many | |
548 | * such files we will keep around at any point in time for later | |
549 | * use. | |
550 | * | |
551 | * This is more efficient because the blocks are already | |
552 | * allocated and fdatasync does not need to update the inode after | |
553 | * each write. | |
554 | * | |
555 | * Default: 0 | |
556 | * | |
557 | * @param recycleLogFileNum the number of log files to keep for recycling | |
558 | * | |
559 | * @return the reference to the current options | |
560 | */ | |
561 | T setRecycleLogFileNum(long recycleLogFileNum); | |
562 | ||
563 | /** | |
564 | * Recycle log files. | |
565 | * | |
566 | * If non-zero, we will reuse previously written log files for new | |
567 | * logs, overwriting the old data. The value indicates how many | |
568 | * such files we will keep around at any point in time for later | |
569 | * use. | |
570 | * | |
571 | * This is more efficient because the blocks are already | |
572 | * allocated and fdatasync does not need to update the inode after | |
573 | * each write. | |
574 | * | |
575 | * Default: 0 | |
576 | * | |
577 | * @return the number of log files kept for recycling | |
578 | */ | |
579 | long recycleLogFileNum(); | |
580 | ||
581 | /** | |
582 | * Manifest file is rolled over on reaching this limit. | |
583 | * The older manifest file be deleted. | |
f67539c2 TL |
584 | * The default value is 1GB so that the manifest file can grow, but not |
585 | * reach the limit of storage capacity. | |
7c673cae FG |
586 | * |
587 | * @param maxManifestFileSize the size limit of a manifest file. | |
588 | * @return the instance of the current object. | |
589 | */ | |
590 | T setMaxManifestFileSize(long maxManifestFileSize); | |
591 | ||
592 | /** | |
593 | * Manifest file is rolled over on reaching this limit. | |
594 | * The older manifest file be deleted. | |
f67539c2 TL |
595 | * The default value is 1GB so that the manifest file can grow, but not |
596 | * reach the limit of storage capacity. | |
7c673cae FG |
597 | * |
598 | * @return the size limit of a manifest file. | |
599 | */ | |
600 | long maxManifestFileSize(); | |
601 | ||
602 | /** | |
603 | * Number of shards used for table cache. | |
604 | * | |
605 | * @param tableCacheNumshardbits the number of chards | |
606 | * @return the instance of the current object. | |
607 | */ | |
608 | T setTableCacheNumshardbits(int tableCacheNumshardbits); | |
609 | ||
610 | /** | |
611 | * Number of shards used for table cache. | |
612 | * | |
613 | * @return the number of shards used for table cache. | |
614 | */ | |
615 | int tableCacheNumshardbits(); | |
616 | ||
617 | /** | |
618 | * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs | |
619 | * will be deleted. | |
620 | * <ol> | |
621 | * <li>If both set to 0, logs will be deleted asap and will not get into | |
622 | * the archive.</li> | |
623 | * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, | |
624 | * WAL files will be checked every 10 min and if total size is greater | |
625 | * then WAL_size_limit_MB, they will be deleted starting with the | |
626 | * earliest until size_limit is met. All empty files will be deleted.</li> | |
627 | * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then | |
628 | * WAL files will be checked every WAL_ttl_secondsi / 2 and those that | |
629 | * are older than WAL_ttl_seconds will be deleted.</li> | |
630 | * <li>If both are not 0, WAL files will be checked every 10 min and both | |
631 | * checks will be performed with ttl being first.</li> | |
632 | * </ol> | |
633 | * | |
634 | * @param walTtlSeconds the ttl seconds | |
635 | * @return the instance of the current object. | |
636 | * @see #setWalSizeLimitMB(long) | |
637 | */ | |
638 | T setWalTtlSeconds(long walTtlSeconds); | |
639 | ||
640 | /** | |
641 | * WalTtlSeconds() and walSizeLimitMB() affect how archived logs | |
642 | * will be deleted. | |
643 | * <ol> | |
644 | * <li>If both set to 0, logs will be deleted asap and will not get into | |
645 | * the archive.</li> | |
646 | * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, | |
647 | * WAL files will be checked every 10 min and if total size is greater | |
648 | * then WAL_size_limit_MB, they will be deleted starting with the | |
649 | * earliest until size_limit is met. All empty files will be deleted.</li> | |
650 | * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then | |
651 | * WAL files will be checked every WAL_ttl_secondsi / 2 and those that | |
652 | * are older than WAL_ttl_seconds will be deleted.</li> | |
653 | * <li>If both are not 0, WAL files will be checked every 10 min and both | |
654 | * checks will be performed with ttl being first.</li> | |
655 | * </ol> | |
656 | * | |
657 | * @return the wal-ttl seconds | |
658 | * @see #walSizeLimitMB() | |
659 | */ | |
660 | long walTtlSeconds(); | |
661 | ||
662 | /** | |
663 | * WalTtlSeconds() and walSizeLimitMB() affect how archived logs | |
664 | * will be deleted. | |
665 | * <ol> | |
666 | * <li>If both set to 0, logs will be deleted asap and will not get into | |
667 | * the archive.</li> | |
668 | * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, | |
669 | * WAL files will be checked every 10 min and if total size is greater | |
670 | * then WAL_size_limit_MB, they will be deleted starting with the | |
671 | * earliest until size_limit is met. All empty files will be deleted.</li> | |
672 | * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then | |
673 | * WAL files will be checked every WAL_ttl_secondsi / 2 and those that | |
674 | * are older than WAL_ttl_seconds will be deleted.</li> | |
675 | * <li>If both are not 0, WAL files will be checked every 10 min and both | |
676 | * checks will be performed with ttl being first.</li> | |
677 | * </ol> | |
678 | * | |
679 | * @param sizeLimitMB size limit in mega-bytes. | |
680 | * @return the instance of the current object. | |
681 | * @see #setWalSizeLimitMB(long) | |
682 | */ | |
683 | T setWalSizeLimitMB(long sizeLimitMB); | |
684 | ||
685 | /** | |
686 | * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs | |
687 | * will be deleted. | |
688 | * <ol> | |
689 | * <li>If both set to 0, logs will be deleted asap and will not get into | |
690 | * the archive.</li> | |
691 | * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, | |
692 | * WAL files will be checked every 10 min and if total size is greater | |
693 | * then WAL_size_limit_MB, they will be deleted starting with the | |
694 | * earliest until size_limit is met. All empty files will be deleted.</li> | |
695 | * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then | |
696 | * WAL files will be checked every WAL_ttl_seconds i / 2 and those that | |
697 | * are older than WAL_ttl_seconds will be deleted.</li> | |
698 | * <li>If both are not 0, WAL files will be checked every 10 min and both | |
699 | * checks will be performed with ttl being first.</li> | |
700 | * </ol> | |
701 | * @return size limit in mega-bytes. | |
702 | * @see #walSizeLimitMB() | |
703 | */ | |
704 | long walSizeLimitMB(); | |
705 | ||
706 | /** | |
707 | * Number of bytes to preallocate (via fallocate) the manifest | |
708 | * files. Default is 4mb, which is reasonable to reduce random IO | |
709 | * as well as prevent overallocation for mounts that preallocate | |
710 | * large amounts of data (such as xfs's allocsize option). | |
711 | * | |
712 | * @param size the size in byte | |
713 | * @return the instance of the current object. | |
714 | * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms | |
715 | * while overflowing the underlying platform specific value. | |
716 | */ | |
717 | T setManifestPreallocationSize(long size); | |
718 | ||
719 | /** | |
720 | * Number of bytes to preallocate (via fallocate) the manifest | |
721 | * files. Default is 4mb, which is reasonable to reduce random IO | |
722 | * as well as prevent overallocation for mounts that preallocate | |
723 | * large amounts of data (such as xfs's allocsize option). | |
724 | * | |
725 | * @return size in bytes. | |
726 | */ | |
727 | long manifestPreallocationSize(); | |
728 | ||
729 | /** | |
730 | * Enable the OS to use direct I/O for reading sst tables. | |
731 | * Default: false | |
732 | * | |
733 | * @param useDirectReads if true, then direct read is enabled | |
734 | * @return the instance of the current object. | |
735 | */ | |
736 | T setUseDirectReads(boolean useDirectReads); | |
737 | ||
738 | /** | |
739 | * Enable the OS to use direct I/O for reading sst tables. | |
740 | * Default: false | |
741 | * | |
742 | * @return if true, then direct reads are enabled | |
743 | */ | |
744 | boolean useDirectReads(); | |
745 | ||
746 | /** | |
747 | * Enable the OS to use direct reads and writes in flush and | |
748 | * compaction | |
749 | * Default: false | |
750 | * | |
751 | * @param useDirectIoForFlushAndCompaction if true, then direct | |
752 | * I/O will be enabled for background flush and compactions | |
753 | * @return the instance of the current object. | |
754 | */ | |
755 | T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction); | |
756 | ||
757 | /** | |
758 | * Enable the OS to use direct reads and writes in flush and | |
759 | * compaction | |
760 | * | |
761 | * @return if true, then direct I/O is enabled for flush and | |
762 | * compaction | |
763 | */ | |
764 | boolean useDirectIoForFlushAndCompaction(); | |
765 | ||
766 | /** | |
767 | * Whether fallocate calls are allowed | |
768 | * | |
769 | * @param allowFAllocate false if fallocate() calls are bypassed | |
770 | * | |
771 | * @return the reference to the current options. | |
772 | */ | |
773 | T setAllowFAllocate(boolean allowFAllocate); | |
774 | ||
775 | /** | |
776 | * Whether fallocate calls are allowed | |
777 | * | |
778 | * @return false if fallocate() calls are bypassed | |
779 | */ | |
780 | boolean allowFAllocate(); | |
781 | ||
782 | /** | |
783 | * Allow the OS to mmap file for reading sst tables. | |
784 | * Default: false | |
785 | * | |
786 | * @param allowMmapReads true if mmap reads are allowed. | |
787 | * @return the instance of the current object. | |
788 | */ | |
789 | T setAllowMmapReads(boolean allowMmapReads); | |
790 | ||
791 | /** | |
792 | * Allow the OS to mmap file for reading sst tables. | |
793 | * Default: false | |
794 | * | |
795 | * @return true if mmap reads are allowed. | |
796 | */ | |
797 | boolean allowMmapReads(); | |
798 | ||
799 | /** | |
800 | * Allow the OS to mmap file for writing. Default: false | |
801 | * | |
802 | * @param allowMmapWrites true if mmap writes are allowd. | |
803 | * @return the instance of the current object. | |
804 | */ | |
805 | T setAllowMmapWrites(boolean allowMmapWrites); | |
806 | ||
807 | /** | |
808 | * Allow the OS to mmap file for writing. Default: false | |
809 | * | |
810 | * @return true if mmap writes are allowed. | |
811 | */ | |
812 | boolean allowMmapWrites(); | |
813 | ||
814 | /** | |
815 | * Disable child process inherit open files. Default: true | |
816 | * | |
817 | * @param isFdCloseOnExec true if child process inheriting open | |
818 | * files is disabled. | |
819 | * @return the instance of the current object. | |
820 | */ | |
821 | T setIsFdCloseOnExec(boolean isFdCloseOnExec); | |
822 | ||
823 | /** | |
824 | * Disable child process inherit open files. Default: true | |
825 | * | |
826 | * @return true if child process inheriting open files is disabled. | |
827 | */ | |
828 | boolean isFdCloseOnExec(); | |
829 | ||
7c673cae FG |
830 | /** |
831 | * If set true, will hint the underlying file system that the file | |
832 | * access pattern is random, when a sst file is opened. | |
833 | * Default: true | |
834 | * | |
835 | * @param adviseRandomOnOpen true if hinting random access is on. | |
836 | * @return the instance of the current object. | |
837 | */ | |
838 | T setAdviseRandomOnOpen(boolean adviseRandomOnOpen); | |
839 | ||
840 | /** | |
841 | * If set true, will hint the underlying file system that the file | |
842 | * access pattern is random, when a sst file is opened. | |
843 | * Default: true | |
844 | * | |
845 | * @return true if hinting random access is on. | |
846 | */ | |
847 | boolean adviseRandomOnOpen(); | |
848 | ||
849 | /** | |
850 | * Amount of data to build up in memtables across all column | |
851 | * families before writing to disk. | |
852 | * | |
853 | * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, | |
854 | * which enforces a limit for a single memtable. | |
855 | * | |
856 | * This feature is disabled by default. Specify a non-zero value | |
857 | * to enable it. | |
858 | * | |
859 | * Default: 0 (disabled) | |
860 | * | |
861 | * @param dbWriteBufferSize the size of the write buffer | |
862 | * | |
863 | * @return the reference to the current options. | |
864 | */ | |
865 | T setDbWriteBufferSize(long dbWriteBufferSize); | |
866 | ||
494da23a TL |
867 | /** |
868 | * Use passed {@link WriteBufferManager} to control memory usage across | |
869 | * multiple column families and/or DB instances. | |
870 | * | |
871 | * Check <a href="https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager"> | |
872 | * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager</a> | |
873 | * for more details on when to use it | |
874 | * | |
875 | * @param writeBufferManager The WriteBufferManager to use | |
876 | * @return the reference of the current options. | |
877 | */ | |
878 | T setWriteBufferManager(final WriteBufferManager writeBufferManager); | |
879 | ||
880 | /** | |
881 | * Reference to {@link WriteBufferManager} used by it. <br> | |
882 | * | |
883 | * Default: null (Disabled) | |
884 | * | |
885 | * @return a reference to WriteBufferManager | |
886 | */ | |
887 | WriteBufferManager writeBufferManager(); | |
888 | ||
7c673cae FG |
889 | /** |
890 | * Amount of data to build up in memtables across all column | |
891 | * families before writing to disk. | |
892 | * | |
893 | * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, | |
894 | * which enforces a limit for a single memtable. | |
895 | * | |
896 | * This feature is disabled by default. Specify a non-zero value | |
897 | * to enable it. | |
898 | * | |
899 | * Default: 0 (disabled) | |
900 | * | |
901 | * @return the size of the write buffer | |
902 | */ | |
903 | long dbWriteBufferSize(); | |
904 | ||
905 | /** | |
906 | * Specify the file access pattern once a compaction is started. | |
907 | * It will be applied to all input files of a compaction. | |
908 | * | |
909 | * Default: {@link AccessHint#NORMAL} | |
910 | * | |
911 | * @param accessHint The access hint | |
912 | * | |
913 | * @return the reference to the current options. | |
914 | */ | |
915 | T setAccessHintOnCompactionStart(final AccessHint accessHint); | |
916 | ||
917 | /** | |
918 | * Specify the file access pattern once a compaction is started. | |
919 | * It will be applied to all input files of a compaction. | |
920 | * | |
921 | * Default: {@link AccessHint#NORMAL} | |
922 | * | |
923 | * @return The access hint | |
924 | */ | |
925 | AccessHint accessHintOnCompactionStart(); | |
926 | ||
927 | /** | |
928 | * If true, always create a new file descriptor and new table reader | |
929 | * for compaction inputs. Turn this parameter on may introduce extra | |
930 | * memory usage in the table reader, if it allocates extra memory | |
931 | * for indexes. This will allow file descriptor prefetch options | |
932 | * to be set for compaction input files and not to impact file | |
933 | * descriptors for the same file used by user queries. | |
934 | * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()} | |
935 | * for this mode if using block-based table. | |
936 | * | |
937 | * Default: false | |
938 | * | |
939 | * @param newTableReaderForCompactionInputs true if a new file descriptor and | |
940 | * table reader should be created for compaction inputs | |
941 | * | |
942 | * @return the reference to the current options. | |
943 | */ | |
944 | T setNewTableReaderForCompactionInputs( | |
945 | boolean newTableReaderForCompactionInputs); | |
946 | ||
947 | /** | |
948 | * If true, always create a new file descriptor and new table reader | |
949 | * for compaction inputs. Turn this parameter on may introduce extra | |
950 | * memory usage in the table reader, if it allocates extra memory | |
951 | * for indexes. This will allow file descriptor prefetch options | |
952 | * to be set for compaction input files and not to impact file | |
953 | * descriptors for the same file used by user queries. | |
954 | * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()} | |
955 | * for this mode if using block-based table. | |
956 | * | |
957 | * Default: false | |
958 | * | |
959 | * @return true if a new file descriptor and table reader are created for | |
960 | * compaction inputs | |
961 | */ | |
962 | boolean newTableReaderForCompactionInputs(); | |
963 | ||
7c673cae FG |
964 | /** |
965 | * This is a maximum buffer size that is used by WinMmapReadableFile in | |
966 | * unbuffered disk I/O mode. We need to maintain an aligned buffer for | |
967 | * reads. We allow the buffer to grow until the specified value and then | |
968 | * for bigger requests allocate one shot buffers. In unbuffered mode we | |
969 | * always bypass read-ahead buffer at ReadaheadRandomAccessFile | |
970 | * When read-ahead is required we then make use of | |
494da23a TL |
971 | * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and |
972 | * always try to read ahead. | |
7c673cae FG |
973 | * With read-ahead we always pre-allocate buffer to the size instead of |
974 | * growing it up to a limit. | |
975 | * | |
976 | * This option is currently honored only on Windows | |
977 | * | |
978 | * Default: 1 Mb | |
979 | * | |
980 | * Special value: 0 - means do not maintain per instance buffer. Allocate | |
981 | * per request buffer and avoid locking. | |
982 | * | |
983 | * @param randomAccessMaxBufferSize the maximum size of the random access | |
984 | * buffer | |
985 | * | |
986 | * @return the reference to the current options. | |
987 | */ | |
988 | T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize); | |
989 | ||
990 | /** | |
991 | * This is a maximum buffer size that is used by WinMmapReadableFile in | |
992 | * unbuffered disk I/O mode. We need to maintain an aligned buffer for | |
993 | * reads. We allow the buffer to grow until the specified value and then | |
994 | * for bigger requests allocate one shot buffers. In unbuffered mode we | |
995 | * always bypass read-ahead buffer at ReadaheadRandomAccessFile | |
996 | * When read-ahead is required we then make use of | |
494da23a TL |
997 | * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and |
998 | * always try to read ahead. With read-ahead we always pre-allocate buffer | |
999 | * to the size instead of growing it up to a limit. | |
7c673cae FG |
1000 | * |
1001 | * This option is currently honored only on Windows | |
1002 | * | |
1003 | * Default: 1 Mb | |
1004 | * | |
1005 | * Special value: 0 - means do not maintain per instance buffer. Allocate | |
1006 | * per request buffer and avoid locking. | |
1007 | * | |
1008 | * @return the maximum size of the random access buffer | |
1009 | */ | |
1010 | long randomAccessMaxBufferSize(); | |
1011 | ||
7c673cae FG |
1012 | /** |
1013 | * Use adaptive mutex, which spins in the user space before resorting | |
1014 | * to kernel. This could reduce context switch when the mutex is not | |
1015 | * heavily contended. However, if the mutex is hot, we could end up | |
1016 | * wasting spin time. | |
1017 | * Default: false | |
1018 | * | |
1019 | * @param useAdaptiveMutex true if adaptive mutex is used. | |
1020 | * @return the instance of the current object. | |
1021 | */ | |
1022 | T setUseAdaptiveMutex(boolean useAdaptiveMutex); | |
1023 | ||
1024 | /** | |
1025 | * Use adaptive mutex, which spins in the user space before resorting | |
1026 | * to kernel. This could reduce context switch when the mutex is not | |
1027 | * heavily contended. However, if the mutex is hot, we could end up | |
1028 | * wasting spin time. | |
1029 | * Default: false | |
1030 | * | |
1031 | * @return true if adaptive mutex is used. | |
1032 | */ | |
1033 | boolean useAdaptiveMutex(); | |
1034 | ||
494da23a TL |
1035 | //TODO(AR) NOW |
1036 | // /** | |
1037 | // * Sets the {@link EventListener}s whose callback functions | |
1038 | // * will be called when specific RocksDB event happens. | |
1039 | // * | |
1040 | // * @param listeners the listeners who should be notified on various events. | |
1041 | // * | |
1042 | // * @return the instance of the current object. | |
1043 | // */ | |
1044 | // T setListeners(final List<EventListener> listeners); | |
1045 | // | |
1046 | // /** | |
1047 | // * Gets the {@link EventListener}s whose callback functions | |
1048 | // * will be called when specific RocksDB event happens. | |
1049 | // * | |
1050 | // * @return a collection of Event listeners. | |
1051 | // */ | |
1052 | // Collection<EventListener> listeners(); | |
7c673cae FG |
1053 | |
1054 | /** | |
1055 | * If true, then the status of the threads involved in this DB will | |
1056 | * be tracked and available via GetThreadList() API. | |
1057 | * | |
1058 | * Default: false | |
1059 | * | |
1060 | * @param enableThreadTracking true to enable tracking | |
1061 | * | |
1062 | * @return the reference to the current options. | |
1063 | */ | |
1064 | T setEnableThreadTracking(boolean enableThreadTracking); | |
1065 | ||
1066 | /** | |
1067 | * If true, then the status of the threads involved in this DB will | |
1068 | * be tracked and available via GetThreadList() API. | |
1069 | * | |
1070 | * Default: false | |
1071 | * | |
1072 | * @return true if tracking is enabled | |
1073 | */ | |
1074 | boolean enableThreadTracking(); | |
1075 | ||
1076 | /** | |
494da23a TL |
1077 | * By default, a single write thread queue is maintained. The thread gets |
1078 | * to the head of the queue becomes write batch group leader and responsible | |
1079 | * for writing to WAL and memtable for the batch group. | |
7c673cae | 1080 | * |
494da23a TL |
1081 | * If {@link #enablePipelinedWrite()} is true, separate write thread queue is |
1082 | * maintained for WAL write and memtable write. A write thread first enter WAL | |
1083 | * writer queue and then memtable writer queue. Pending thread on the WAL | |
1084 | * writer queue thus only have to wait for previous writers to finish their | |
1085 | * WAL writing but not the memtable writing. Enabling the feature may improve | |
1086 | * write throughput and reduce latency of the prepare phase of two-phase | |
1087 | * commit. | |
7c673cae | 1088 | * |
494da23a | 1089 | * Default: false |
7c673cae | 1090 | * |
494da23a | 1091 | * @param enablePipelinedWrite true to enabled pipelined writes |
7c673cae FG |
1092 | * |
1093 | * @return the reference to the current options. | |
1094 | */ | |
494da23a | 1095 | T setEnablePipelinedWrite(final boolean enablePipelinedWrite); |
7c673cae FG |
1096 | |
1097 | /** | |
494da23a TL |
1098 | * Returns true if pipelined writes are enabled. |
1099 | * See {@link #setEnablePipelinedWrite(boolean)}. | |
7c673cae | 1100 | * |
494da23a | 1101 | * @return true if pipelined writes are enabled, false otherwise. |
7c673cae | 1102 | */ |
494da23a | 1103 | boolean enablePipelinedWrite(); |
7c673cae | 1104 | |
f67539c2 TL |
1105 | /** |
1106 | * Setting {@link #unorderedWrite()} to true trades higher write throughput with | |
1107 | * relaxing the immutability guarantee of snapshots. This violates the | |
1108 | * repeatability one expects from ::Get from a snapshot, as well as | |
1109 | * ::MultiGet and Iterator's consistent-point-in-time view property. | |
1110 | * If the application cannot tolerate the relaxed guarantees, it can implement | |
1111 | * its own mechanisms to work around that and yet benefit from the higher | |
1112 | * throughput. Using TransactionDB with WRITE_PREPARED write policy and | |
1113 | * {@link #twoWriteQueues()} true is one way to achieve immutable snapshots despite | |
1114 | * unordered_write. | |
1115 | * | |
1116 | * By default, i.e., when it is false, rocksdb does not advance the sequence | |
1117 | * number for new snapshots unless all the writes with lower sequence numbers | |
1118 | * are already finished. This provides the immutability that we except from | |
1119 | * snapshots. Moreover, since Iterator and MultiGet internally depend on | |
1120 | * snapshots, the snapshot immutability results into Iterator and MultiGet | |
1121 | * offering consistent-point-in-time view. If set to true, although | |
1122 | * Read-Your-Own-Write property is still provided, the snapshot immutability | |
1123 | * property is relaxed: the writes issued after the snapshot is obtained (with | |
1124 | * larger sequence numbers) will be still not visible to the reads from that | |
1125 | * snapshot, however, there still might be pending writes (with lower sequence | |
1126 | * number) that will change the state visible to the snapshot after they are | |
1127 | * landed to the memtable. | |
1128 | * | |
1129 | * @param unorderedWrite true to enabled unordered write | |
1130 | * | |
1131 | * @return the reference to the current options. | |
1132 | */ | |
1133 | T setUnorderedWrite(final boolean unorderedWrite); | |
1134 | ||
1135 | /** | |
1136 | * Returns true if unordered write are enabled. | |
1137 | * See {@link #setUnorderedWrite(boolean)}. | |
1138 | * | |
1139 | * @return true if unordered write are enabled, false otherwise. | |
1140 | */ | |
1141 | boolean unorderedWrite(); | |
1142 | ||
7c673cae FG |
1143 | /** |
1144 | * If true, allow multi-writers to update mem tables in parallel. | |
1145 | * Only some memtable factorys support concurrent writes; currently it | |
1146 | * is implemented only for SkipListFactory. Concurrent memtable writes | |
1147 | * are not compatible with inplace_update_support or filter_deletes. | |
1148 | * It is strongly recommended to set | |
1149 | * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use | |
1150 | * this feature. | |
f67539c2 | 1151 | * Default: true |
7c673cae FG |
1152 | * |
1153 | * @param allowConcurrentMemtableWrite true to enable concurrent writes | |
1154 | * for the memtable | |
1155 | * | |
1156 | * @return the reference to the current options. | |
1157 | */ | |
1158 | T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite); | |
1159 | ||
1160 | /** | |
1161 | * If true, allow multi-writers to update mem tables in parallel. | |
1162 | * Only some memtable factorys support concurrent writes; currently it | |
1163 | * is implemented only for SkipListFactory. Concurrent memtable writes | |
1164 | * are not compatible with inplace_update_support or filter_deletes. | |
1165 | * It is strongly recommended to set | |
1166 | * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use | |
1167 | * this feature. | |
f67539c2 | 1168 | * Default: true |
7c673cae FG |
1169 | * |
1170 | * @return true if concurrent writes are enabled for the memtable | |
1171 | */ | |
1172 | boolean allowConcurrentMemtableWrite(); | |
1173 | ||
1174 | /** | |
1175 | * If true, threads synchronizing with the write batch group leader will | |
1176 | * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a | |
1177 | * mutex. This can substantially improve throughput for concurrent workloads, | |
1178 | * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. | |
f67539c2 | 1179 | * Default: true |
7c673cae FG |
1180 | * |
1181 | * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the | |
1182 | * write threads | |
1183 | * | |
1184 | * @return the reference to the current options. | |
1185 | */ | |
1186 | T setEnableWriteThreadAdaptiveYield( | |
1187 | boolean enableWriteThreadAdaptiveYield); | |
1188 | ||
1189 | /** | |
1190 | * If true, threads synchronizing with the write batch group leader will | |
1191 | * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a | |
1192 | * mutex. This can substantially improve throughput for concurrent workloads, | |
1193 | * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. | |
f67539c2 | 1194 | * Default: true |
7c673cae FG |
1195 | * |
1196 | * @return true if adaptive yield is enabled | |
1197 | * for the writing threads | |
1198 | */ | |
1199 | boolean enableWriteThreadAdaptiveYield(); | |
1200 | ||
1201 | /** | |
1202 | * The maximum number of microseconds that a write operation will use | |
1203 | * a yielding spin loop to coordinate with other write threads before | |
1204 | * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is | |
1205 | * set properly) increasing this value is likely to increase RocksDB | |
1206 | * throughput at the expense of increased CPU usage. | |
1207 | * Default: 100 | |
1208 | * | |
1209 | * @param writeThreadMaxYieldUsec maximum number of microseconds | |
1210 | * | |
1211 | * @return the reference to the current options. | |
1212 | */ | |
1213 | T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec); | |
1214 | ||
1215 | /** | |
1216 | * The maximum number of microseconds that a write operation will use | |
1217 | * a yielding spin loop to coordinate with other write threads before | |
1218 | * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is | |
1219 | * set properly) increasing this value is likely to increase RocksDB | |
1220 | * throughput at the expense of increased CPU usage. | |
1221 | * Default: 100 | |
1222 | * | |
1223 | * @return the maximum number of microseconds | |
1224 | */ | |
1225 | long writeThreadMaxYieldUsec(); | |
1226 | ||
1227 | /** | |
1228 | * The latency in microseconds after which a std::this_thread::yield | |
1229 | * call (sched_yield on Linux) is considered to be a signal that | |
1230 | * other processes or threads would like to use the current core. | |
1231 | * Increasing this makes writer threads more likely to take CPU | |
1232 | * by spinning, which will show up as an increase in the number of | |
1233 | * involuntary context switches. | |
1234 | * Default: 3 | |
1235 | * | |
1236 | * @param writeThreadSlowYieldUsec the latency in microseconds | |
1237 | * | |
1238 | * @return the reference to the current options. | |
1239 | */ | |
1240 | T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec); | |
1241 | ||
1242 | /** | |
1243 | * The latency in microseconds after which a std::this_thread::yield | |
1244 | * call (sched_yield on Linux) is considered to be a signal that | |
1245 | * other processes or threads would like to use the current core. | |
1246 | * Increasing this makes writer threads more likely to take CPU | |
1247 | * by spinning, which will show up as an increase in the number of | |
1248 | * involuntary context switches. | |
1249 | * Default: 3 | |
1250 | * | |
1251 | * @return writeThreadSlowYieldUsec the latency in microseconds | |
1252 | */ | |
1253 | long writeThreadSlowYieldUsec(); | |
1254 | ||
1255 | /** | |
1256 | * If true, then DB::Open() will not update the statistics used to optimize | |
1257 | * compaction decision by loading table properties from many files. | |
1258 | * Turning off this feature will improve DBOpen time especially in | |
1259 | * disk environment. | |
1260 | * | |
1261 | * Default: false | |
1262 | * | |
1263 | * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped | |
1264 | * | |
1265 | * @return the reference to the current options. | |
1266 | */ | |
1267 | T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen); | |
1268 | ||
1269 | /** | |
1270 | * If true, then DB::Open() will not update the statistics used to optimize | |
1271 | * compaction decision by loading table properties from many files. | |
1272 | * Turning off this feature will improve DBOpen time especially in | |
1273 | * disk environment. | |
1274 | * | |
1275 | * Default: false | |
1276 | * | |
1277 | * @return true if updating stats will be skipped | |
1278 | */ | |
1279 | boolean skipStatsUpdateOnDbOpen(); | |
1280 | ||
1281 | /** | |
1282 | * Recovery mode to control the consistency while replaying WAL | |
1283 | * | |
1284 | * Default: {@link WALRecoveryMode#PointInTimeRecovery} | |
1285 | * | |
1286 | * @param walRecoveryMode The WAL recover mode | |
1287 | * | |
1288 | * @return the reference to the current options. | |
1289 | */ | |
1290 | T setWalRecoveryMode(WALRecoveryMode walRecoveryMode); | |
1291 | ||
1292 | /** | |
1293 | * Recovery mode to control the consistency while replaying WAL | |
1294 | * | |
1295 | * Default: {@link WALRecoveryMode#PointInTimeRecovery} | |
1296 | * | |
1297 | * @return The WAL recover mode | |
1298 | */ | |
1299 | WALRecoveryMode walRecoveryMode(); | |
1300 | ||
1301 | /** | |
1302 | * if set to false then recovery will fail when a prepared | |
1303 | * transaction is encountered in the WAL | |
1304 | * | |
1305 | * Default: false | |
1306 | * | |
1307 | * @param allow2pc true if two-phase-commit is enabled | |
1308 | * | |
1309 | * @return the reference to the current options. | |
1310 | */ | |
1311 | T setAllow2pc(boolean allow2pc); | |
1312 | ||
1313 | /** | |
1314 | * if set to false then recovery will fail when a prepared | |
1315 | * transaction is encountered in the WAL | |
1316 | * | |
1317 | * Default: false | |
1318 | * | |
1319 | * @return true if two-phase-commit is enabled | |
1320 | */ | |
1321 | boolean allow2pc(); | |
1322 | ||
1323 | /** | |
1324 | * A global cache for table-level rows. | |
1325 | * | |
1326 | * Default: null (disabled) | |
1327 | * | |
1328 | * @param rowCache The global row cache | |
1329 | * | |
1330 | * @return the reference to the current options. | |
1331 | */ | |
1332 | T setRowCache(final Cache rowCache); | |
1333 | ||
1334 | /** | |
1335 | * A global cache for table-level rows. | |
1336 | * | |
1337 | * Default: null (disabled) | |
1338 | * | |
1339 | * @return The global row cache | |
1340 | */ | |
1341 | Cache rowCache(); | |
1342 | ||
494da23a TL |
1343 | /** |
1344 | * A filter object supplied to be invoked while processing write-ahead-logs | |
1345 | * (WALs) during recovery. The filter provides a way to inspect log | |
1346 | * records, ignoring a particular record or skipping replay. | |
1347 | * The filter is invoked at startup and is invoked from a single-thread | |
1348 | * currently. | |
1349 | * | |
1350 | * @param walFilter the filter for processing WALs during recovery. | |
1351 | * | |
1352 | * @return the reference to the current options. | |
1353 | */ | |
1354 | T setWalFilter(final AbstractWalFilter walFilter); | |
1355 | ||
1356 | /** | |
1357 | * Get's the filter for processing WALs during recovery. | |
1358 | * See {@link #setWalFilter(AbstractWalFilter)}. | |
1359 | * | |
1360 | * @return the filter used for processing WALs during recovery. | |
1361 | */ | |
1362 | WalFilter walFilter(); | |
1363 | ||
7c673cae FG |
1364 | /** |
1365 | * If true, then DB::Open / CreateColumnFamily / DropColumnFamily | |
1366 | * / SetOptions will fail if options file is not detected or properly | |
1367 | * persisted. | |
1368 | * | |
1369 | * DEFAULT: false | |
1370 | * | |
1371 | * @param failIfOptionsFileError true if we should fail if there is an error | |
1372 | * in the options file | |
1373 | * | |
1374 | * @return the reference to the current options. | |
1375 | */ | |
1376 | T setFailIfOptionsFileError(boolean failIfOptionsFileError); | |
1377 | ||
1378 | /** | |
1379 | * If true, then DB::Open / CreateColumnFamily / DropColumnFamily | |
1380 | * / SetOptions will fail if options file is not detected or properly | |
1381 | * persisted. | |
1382 | * | |
1383 | * DEFAULT: false | |
1384 | * | |
1385 | * @return true if we should fail if there is an error in the options file | |
1386 | */ | |
1387 | boolean failIfOptionsFileError(); | |
1388 | ||
1389 | /** | |
1390 | * If true, then print malloc stats together with rocksdb.stats | |
1391 | * when printing to LOG. | |
1392 | * | |
1393 | * DEFAULT: false | |
1394 | * | |
1395 | * @param dumpMallocStats true if malloc stats should be printed to LOG | |
1396 | * | |
1397 | * @return the reference to the current options. | |
1398 | */ | |
1399 | T setDumpMallocStats(boolean dumpMallocStats); | |
1400 | ||
1401 | /** | |
1402 | * If true, then print malloc stats together with rocksdb.stats | |
1403 | * when printing to LOG. | |
1404 | * | |
1405 | * DEFAULT: false | |
1406 | * | |
1407 | * @return true if malloc stats should be printed to LOG | |
1408 | */ | |
1409 | boolean dumpMallocStats(); | |
1410 | ||
1411 | /** | |
1412 | * By default RocksDB replay WAL logs and flush them on DB open, which may | |
1413 | * create very small SST files. If this option is enabled, RocksDB will try | |
1414 | * to avoid (but not guarantee not to) flush during recovery. Also, existing | |
1415 | * WAL logs will be kept, so that if crash happened before flush, we still | |
1416 | * have logs to recover from. | |
1417 | * | |
1418 | * DEFAULT: false | |
1419 | * | |
1420 | * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee | |
1421 | * not to) flush during recovery | |
1422 | * | |
1423 | * @return the reference to the current options. | |
1424 | */ | |
1425 | T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery); | |
1426 | ||
1427 | /** | |
1428 | * By default RocksDB replay WAL logs and flush them on DB open, which may | |
1429 | * create very small SST files. If this option is enabled, RocksDB will try | |
1430 | * to avoid (but not guarantee not to) flush during recovery. Also, existing | |
1431 | * WAL logs will be kept, so that if crash happened before flush, we still | |
1432 | * have logs to recover from. | |
1433 | * | |
1434 | * DEFAULT: false | |
1435 | * | |
1436 | * @return true to try to avoid (but not guarantee not to) flush during | |
1437 | * recovery | |
1438 | */ | |
1439 | boolean avoidFlushDuringRecovery(); | |
1440 | ||
1441 | /** | |
494da23a TL |
1442 | * Set this option to true during creation of database if you want |
1443 | * to be able to ingest behind (call IngestExternalFile() skipping keys | |
1444 | * that already exist, rather than overwriting matching keys). | |
1445 | * Setting this option to true will affect 2 things: | |
1446 | * 1) Disable some internal optimizations around SST file compression | |
1447 | * 2) Reserve bottom-most level for ingested files only. | |
1448 | * 3) Note that num_levels should be >= 3 if this option is turned on. | |
7c673cae FG |
1449 | * |
1450 | * DEFAULT: false | |
1451 | * | |
494da23a TL |
1452 | * @param allowIngestBehind true to allow ingest behind, false to disallow. |
1453 | * | |
1454 | * @return the reference to the current options. | |
1455 | */ | |
1456 | T setAllowIngestBehind(final boolean allowIngestBehind); | |
1457 | ||
1458 | /** | |
1459 | * Returns true if ingest behind is allowed. | |
1460 | * See {@link #setAllowIngestBehind(boolean)}. | |
1461 | * | |
1462 | * @return true if ingest behind is allowed, false otherwise. | |
1463 | */ | |
1464 | boolean allowIngestBehind(); | |
1465 | ||
1466 | /** | |
1467 | * Needed to support differential snapshots. | |
1468 | * If set to true then DB will only process deletes with sequence number | |
1469 | * less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts). | |
1470 | * Clients are responsible to periodically call this method to advance | |
1471 | * the cutoff time. If this method is never called and preserve_deletes | |
1472 | * is set to true NO deletes will ever be processed. | |
1473 | * At the moment this only keeps normal deletes, SingleDeletes will | |
1474 | * not be preserved. | |
1475 | * | |
1476 | * DEFAULT: false | |
1477 | * | |
1478 | * @param preserveDeletes true to preserve deletes. | |
1479 | * | |
1480 | * @return the reference to the current options. | |
1481 | */ | |
1482 | T setPreserveDeletes(final boolean preserveDeletes); | |
1483 | ||
1484 | /** | |
1485 | * Returns true if deletes are preserved. | |
1486 | * See {@link #setPreserveDeletes(boolean)}. | |
1487 | * | |
1488 | * @return true if deletes are preserved, false otherwise. | |
1489 | */ | |
1490 | boolean preserveDeletes(); | |
1491 | ||
1492 | /** | |
1493 | * If enabled it uses two queues for writes, one for the ones with | |
1494 | * disable_memtable and one for the ones that also write to memtable. This | |
1495 | * allows the memtable writes not to lag behind other writes. It can be used | |
1496 | * to optimize MySQL 2PC in which only the commits, which are serial, write to | |
1497 | * memtable. | |
1498 | * | |
1499 | * DEFAULT: false | |
7c673cae | 1500 | * |
494da23a | 1501 | * @param twoWriteQueues true to enable two write queues, false otherwise. |
7c673cae FG |
1502 | * |
1503 | * @return the reference to the current options. | |
1504 | */ | |
494da23a TL |
1505 | T setTwoWriteQueues(final boolean twoWriteQueues); |
1506 | ||
1507 | /** | |
1508 | * Returns true if two write queues are enabled. | |
1509 | * | |
1510 | * @return true if two write queues are enabled, false otherwise. | |
1511 | */ | |
1512 | boolean twoWriteQueues(); | |
7c673cae FG |
1513 | |
1514 | /** | |
494da23a TL |
1515 | * If true WAL is not flushed automatically after each write. Instead it |
1516 | * relies on manual invocation of FlushWAL to write the WAL buffer to its | |
1517 | * file. | |
7c673cae FG |
1518 | * |
1519 | * DEFAULT: false | |
1520 | * | |
494da23a TL |
1521 | * @param manualWalFlush true to set disable automatic WAL flushing, |
1522 | * false otherwise. | |
1523 | * | |
1524 | * @return the reference to the current options. | |
1525 | */ | |
1526 | T setManualWalFlush(final boolean manualWalFlush); | |
1527 | ||
1528 | /** | |
1529 | * Returns true if automatic WAL flushing is disabled. | |
1530 | * See {@link #setManualWalFlush(boolean)}. | |
1531 | * | |
1532 | * @return true if automatic WAL flushing is disabled, false otherwise. | |
1533 | */ | |
1534 | boolean manualWalFlush(); | |
1535 | ||
1536 | /** | |
1537 | * If true, RocksDB supports flushing multiple column families and committing | |
1538 | * their results atomically to MANIFEST. Note that it is not | |
1539 | * necessary to set atomic_flush to true if WAL is always enabled since WAL | |
1540 | * allows the database to be restored to the last persistent state in WAL. | |
1541 | * This option is useful when there are column families with writes NOT | |
1542 | * protected by WAL. | |
1543 | * For manual flush, application has to specify which column families to | |
1544 | * flush atomically in {@link RocksDB#flush(FlushOptions, List)}. | |
1545 | * For auto-triggered flush, RocksDB atomically flushes ALL column families. | |
1546 | * | |
1547 | * Currently, any WAL-enabled writes after atomic flush may be replayed | |
1548 | * independently if the process crashes later and tries to recover. | |
1549 | * | |
1550 | * @param atomicFlush true to enable atomic flush of multiple column families. | |
1551 | * | |
1552 | * @return the reference to the current options. | |
1553 | */ | |
1554 | T setAtomicFlush(final boolean atomicFlush); | |
1555 | ||
1556 | /** | |
1557 | * Determine if atomic flush of multiple column families is enabled. | |
1558 | * | |
1559 | * See {@link #setAtomicFlush(boolean)}. | |
7c673cae | 1560 | * |
494da23a | 1561 | * @return true if atomic flush is enabled. |
7c673cae | 1562 | */ |
494da23a | 1563 | boolean atomicFlush(); |
7c673cae | 1564 | } |