]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/java/src/main/java/org/rocksdb/DBOptionsInterface.java
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / java / src / main / java / org / rocksdb / DBOptionsInterface.java
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 package org.rocksdb;
7
8 import java.util.Collection;
9 import java.util.List;
10
11 public interface DBOptionsInterface<T extends DBOptionsInterface> {
12
13 /**
14 * Use this if your DB is very small (like under 1GB) and you don't want to
15 * spend lots of memory for memtables.
16 *
17 * @return the instance of the current object.
18 */
19 T optimizeForSmallDb();
20
21 /**
22 * Use the specified object to interact with the environment,
23 * e.g. to read/write files, schedule background work, etc.
24 * Default: {@link Env#getDefault()}
25 *
26 * @param env {@link Env} instance.
27 * @return the instance of the current Options.
28 */
29 T setEnv(final Env env);
30
31 /**
32 * Returns the set RocksEnv instance.
33 *
34 * @return {@link RocksEnv} instance set in the options.
35 */
36 Env getEnv();
37
38 /**
39 * <p>By default, RocksDB uses only one background thread for flush and
40 * compaction. Calling this function will set it up such that total of
41 * `total_threads` is used.</p>
42 *
43 * <p>You almost definitely want to call this function if your system is
44 * bottlenecked by RocksDB.</p>
45 *
46 * @param totalThreads The total number of threads to be used by RocksDB.
47 * A good value is the number of cores.
48 *
49 * @return the instance of the current Options
50 */
51 T setIncreaseParallelism(int totalThreads);
52
53 /**
54 * If this value is set to true, then the database will be created
55 * if it is missing during {@code RocksDB.open()}.
56 * Default: false
57 *
58 * @param flag a flag indicating whether to create a database the
59 * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation
60 * is missing.
61 * @return the instance of the current Options
62 * @see RocksDB#open(org.rocksdb.Options, String)
63 */
64 T setCreateIfMissing(boolean flag);
65
66 /**
67 * Return true if the create_if_missing flag is set to true.
68 * If true, the database will be created if it is missing.
69 *
70 * @return true if the createIfMissing option is set to true.
71 * @see #setCreateIfMissing(boolean)
72 */
73 boolean createIfMissing();
74
75 /**
76 * <p>If true, missing column families will be automatically created</p>
77 *
78 * <p>Default: false</p>
79 *
80 * @param flag a flag indicating if missing column families shall be
81 * created automatically.
82 * @return true if missing column families shall be created automatically
83 * on open.
84 */
85 T setCreateMissingColumnFamilies(boolean flag);
86
87 /**
88 * Return true if the create_missing_column_families flag is set
89 * to true. If true column families be created if missing.
90 *
91 * @return true if the createMissingColumnFamilies is set to
92 * true.
93 * @see #setCreateMissingColumnFamilies(boolean)
94 */
95 boolean createMissingColumnFamilies();
96
97 /**
98 * If true, an error will be thrown during RocksDB.open() if the
99 * database already exists.
100 * Default: false
101 *
102 * @param errorIfExists if true, an exception will be thrown
103 * during {@code RocksDB.open()} if the database already exists.
104 * @return the reference to the current option.
105 * @see RocksDB#open(org.rocksdb.Options, String)
106 */
107 T setErrorIfExists(boolean errorIfExists);
108
109 /**
110 * If true, an error will be thrown during RocksDB.open() if the
111 * database already exists.
112 *
113 * @return if true, an error is raised when the specified database
114 * already exists before open.
115 */
116 boolean errorIfExists();
117
118 /**
119 * If true, the implementation will do aggressive checking of the
120 * data it is processing and will stop early if it detects any
121 * errors. This may have unforeseen ramifications: for example, a
122 * corruption of one DB entry may cause a large number of entries to
123 * become unreadable or for the entire DB to become unopenable.
124 * If any of the writes to the database fails (Put, Delete, Merge, Write),
125 * the database will switch to read-only mode and fail all other
126 * Write operations.
127 * Default: true
128 *
129 * @param paranoidChecks a flag to indicate whether paranoid-check
130 * is on.
131 * @return the reference to the current option.
132 */
133 T setParanoidChecks(boolean paranoidChecks);
134
135 /**
136 * If true, the implementation will do aggressive checking of the
137 * data it is processing and will stop early if it detects any
138 * errors. This may have unforeseen ramifications: for example, a
139 * corruption of one DB entry may cause a large number of entries to
140 * become unreadable or for the entire DB to become unopenable.
141 * If any of the writes to the database fails (Put, Delete, Merge, Write),
142 * the database will switch to read-only mode and fail all other
143 * Write operations.
144 *
145 * @return a boolean indicating whether paranoid-check is on.
146 */
147 boolean paranoidChecks();
148
149 /**
150 * Use to control write rate of flush and compaction. Flush has higher
151 * priority than compaction. Rate limiting is disabled if nullptr.
152 * Default: nullptr
153 *
154 * @param rateLimiter {@link org.rocksdb.RateLimiter} instance.
155 * @return the instance of the current object.
156 *
157 * @since 3.10.0
158 */
159 T setRateLimiter(RateLimiter rateLimiter);
160
161 /**
162 * Use to track SST files and control their file deletion rate.
163 *
164 * Features:
165 * - Throttle the deletion rate of the SST files.
166 * - Keep track the total size of all SST files.
167 * - Set a maximum allowed space limit for SST files that when reached
168 * the DB wont do any further flushes or compactions and will set the
169 * background error.
170 * - Can be shared between multiple dbs.
171 *
172 * Limitations:
173 * - Only track and throttle deletes of SST files in
174 * first db_path (db_name if db_paths is empty).
175 *
176 * @param sstFileManager The SST File Manager for the db.
177 * @return the instance of the current object.
178 */
179 T setSstFileManager(SstFileManager sstFileManager);
180
181 /**
182 * <p>Any internal progress/error information generated by
183 * the db will be written to the Logger if it is non-nullptr,
184 * or to a file stored in the same directory as the DB
185 * contents if info_log is nullptr.</p>
186 *
187 * <p>Default: nullptr</p>
188 *
189 * @param logger {@link Logger} instance.
190 * @return the instance of the current object.
191 */
192 T setLogger(Logger logger);
193
194 /**
195 * <p>Sets the RocksDB log level. Default level is INFO</p>
196 *
197 * @param infoLogLevel log level to set.
198 * @return the instance of the current object.
199 */
200 T setInfoLogLevel(InfoLogLevel infoLogLevel);
201
202 /**
203 * <p>Returns currently set log level.</p>
204 * @return {@link org.rocksdb.InfoLogLevel} instance.
205 */
206 InfoLogLevel infoLogLevel();
207
208 /**
209 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open
210 * all files on DB::Open(). You can use this option to increase the number
211 * of threads used to open the files.
212 *
213 * Default: 16
214 *
215 * @param maxFileOpeningThreads the maximum number of threads to use to
216 * open files
217 *
218 * @return the reference to the current options.
219 */
220 T setMaxFileOpeningThreads(int maxFileOpeningThreads);
221
222 /**
223 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all
224 * files on DB::Open(). You can use this option to increase the number of
225 * threads used to open the files.
226 *
227 * Default: 16
228 *
229 * @return the maximum number of threads to use to open files
230 */
231 int maxFileOpeningThreads();
232
233 /**
234 * <p>Sets the statistics object which collects metrics about database operations.
235 * Statistics objects should not be shared between DB instances as
236 * it does not use any locks to prevent concurrent updates.</p>
237 *
238 * @param statistics The statistics to set
239 *
240 * @return the instance of the current object.
241 *
242 * @see RocksDB#open(org.rocksdb.Options, String)
243 */
244 T setStatistics(final Statistics statistics);
245
246 /**
247 * <p>Returns statistics object.</p>
248 *
249 * @return the instance of the statistics object or null if there is no
250 * statistics object.
251 *
252 * @see #setStatistics(Statistics)
253 */
254 Statistics statistics();
255
256 /**
257 * <p>If true, then every store to stable storage will issue a fsync.</p>
258 * <p>If false, then every store to stable storage will issue a fdatasync.
259 * This parameter should be set to true while storing data to
260 * filesystem like ext3 that can lose files after a reboot.</p>
261 * <p>Default: false</p>
262 *
263 * @param useFsync a boolean flag to specify whether to use fsync
264 * @return the instance of the current object.
265 */
266 T setUseFsync(boolean useFsync);
267
268 /**
269 * <p>If true, then every store to stable storage will issue a fsync.</p>
270 * <p>If false, then every store to stable storage will issue a fdatasync.
271 * This parameter should be set to true while storing data to
272 * filesystem like ext3 that can lose files after a reboot.</p>
273 *
274 * @return boolean value indicating if fsync is used.
275 */
276 boolean useFsync();
277
278 /**
279 * A list of paths where SST files can be put into, with its target size.
280 * Newer data is placed into paths specified earlier in the vector while
281 * older data gradually moves to paths specified later in the vector.
282 *
283 * For example, you have a flash device with 10GB allocated for the DB,
284 * as well as a hard drive of 2TB, you should config it to be:
285 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
286 *
287 * The system will try to guarantee data under each path is close to but
288 * not larger than the target size. But current and future file sizes used
289 * by determining where to place a file are based on best-effort estimation,
290 * which means there is a chance that the actual size under the directory
291 * is slightly more than target size under some workloads. User should give
292 * some buffer room for those cases.
293 *
294 * If none of the paths has sufficient room to place a file, the file will
295 * be placed to the last path anyway, despite to the target size.
296 *
297 * Placing newer data to earlier paths is also best-efforts. User should
298 * expect user files to be placed in higher levels in some extreme cases.
299 *
300 * If left empty, only one path will be used, which is db_name passed when
301 * opening the DB.
302 *
303 * Default: empty
304 *
305 * @param dbPaths the paths and target sizes
306 *
307 * @return the reference to the current options
308 */
309 T setDbPaths(final Collection<DbPath> dbPaths);
310
311 /**
312 * A list of paths where SST files can be put into, with its target size.
313 * Newer data is placed into paths specified earlier in the vector while
314 * older data gradually moves to paths specified later in the vector.
315 *
316 * For example, you have a flash device with 10GB allocated for the DB,
317 * as well as a hard drive of 2TB, you should config it to be:
318 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
319 *
320 * The system will try to guarantee data under each path is close to but
321 * not larger than the target size. But current and future file sizes used
322 * by determining where to place a file are based on best-effort estimation,
323 * which means there is a chance that the actual size under the directory
324 * is slightly more than target size under some workloads. User should give
325 * some buffer room for those cases.
326 *
327 * If none of the paths has sufficient room to place a file, the file will
328 * be placed to the last path anyway, despite to the target size.
329 *
330 * Placing newer data to earlier paths is also best-efforts. User should
331 * expect user files to be placed in higher levels in some extreme cases.
332 *
333 * If left empty, only one path will be used, which is db_name passed when
334 * opening the DB.
335 *
336 * Default: {@link java.util.Collections#emptyList()}
337 *
338 * @return dbPaths the paths and target sizes
339 */
340 List<DbPath> dbPaths();
341
342 /**
343 * This specifies the info LOG dir.
344 * If it is empty, the log files will be in the same dir as data.
345 * If it is non empty, the log files will be in the specified dir,
346 * and the db data dir's absolute path will be used as the log file
347 * name's prefix.
348 *
349 * @param dbLogDir the path to the info log directory
350 * @return the instance of the current object.
351 */
352 T setDbLogDir(String dbLogDir);
353
354 /**
355 * Returns the directory of info log.
356 *
357 * If it is empty, the log files will be in the same dir as data.
358 * If it is non empty, the log files will be in the specified dir,
359 * and the db data dir's absolute path will be used as the log file
360 * name's prefix.
361 *
362 * @return the path to the info log directory
363 */
364 String dbLogDir();
365
366 /**
367 * This specifies the absolute dir path for write-ahead logs (WAL).
368 * If it is empty, the log files will be in the same dir as data,
369 * dbname is used as the data dir by default
370 * If it is non empty, the log files will be in kept the specified dir.
371 * When destroying the db,
372 * all log files in wal_dir and the dir itself is deleted
373 *
374 * @param walDir the path to the write-ahead-log directory.
375 * @return the instance of the current object.
376 */
377 T setWalDir(String walDir);
378
379 /**
380 * Returns the path to the write-ahead-logs (WAL) directory.
381 *
382 * If it is empty, the log files will be in the same dir as data,
383 * dbname is used as the data dir by default
384 * If it is non empty, the log files will be in kept the specified dir.
385 * When destroying the db,
386 * all log files in wal_dir and the dir itself is deleted
387 *
388 * @return the path to the write-ahead-logs (WAL) directory.
389 */
390 String walDir();
391
392 /**
393 * The periodicity when obsolete files get deleted. The default
394 * value is 6 hours. The files that get out of scope by compaction
395 * process will still get automatically delete on every compaction,
396 * regardless of this setting
397 *
398 * @param micros the time interval in micros
399 * @return the instance of the current object.
400 */
401 T setDeleteObsoleteFilesPeriodMicros(long micros);
402
403 /**
404 * The periodicity when obsolete files get deleted. The default
405 * value is 6 hours. The files that get out of scope by compaction
406 * process will still get automatically delete on every compaction,
407 * regardless of this setting
408 *
409 * @return the time interval in micros when obsolete files will be deleted.
410 */
411 long deleteObsoleteFilesPeriodMicros();
412
413 /**
414 * This value represents the maximum number of threads that will
415 * concurrently perform a compaction job by breaking it into multiple,
416 * smaller ones that are run simultaneously.
417 * Default: 1 (i.e. no subcompactions)
418 *
419 * @param maxSubcompactions The maximum number of threads that will
420 * concurrently perform a compaction job
421 *
422 * @return the instance of the current object.
423 */
424 T setMaxSubcompactions(int maxSubcompactions);
425
426 /**
427 * This value represents the maximum number of threads that will
428 * concurrently perform a compaction job by breaking it into multiple,
429 * smaller ones that are run simultaneously.
430 * Default: 1 (i.e. no subcompactions)
431 *
432 * @return The maximum number of threads that will concurrently perform a
433 * compaction job
434 */
435 int maxSubcompactions();
436
437 /**
438 * Specifies the maximum number of concurrent background flush jobs.
439 * If you're increasing this, also consider increasing number of threads in
440 * HIGH priority thread pool. For more information, see
441 * Default: 1
442 *
443 * @param maxBackgroundFlushes number of max concurrent flush jobs
444 * @return the instance of the current object.
445 *
446 * @see RocksEnv#setBackgroundThreads(int)
447 * @see RocksEnv#setBackgroundThreads(int, Priority)
448 * @see MutableDBOptionsInterface#maxBackgroundCompactions()
449 *
450 * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)}
451 */
452 @Deprecated
453 T setMaxBackgroundFlushes(int maxBackgroundFlushes);
454
455 /**
456 * Returns the maximum number of concurrent background flush jobs.
457 * If you're increasing this, also consider increasing number of threads in
458 * HIGH priority thread pool. For more information, see
459 * Default: 1
460 *
461 * @return the maximum number of concurrent background flush jobs.
462 * @see RocksEnv#setBackgroundThreads(int)
463 * @see RocksEnv#setBackgroundThreads(int, Priority)
464 */
465 @Deprecated
466 int maxBackgroundFlushes();
467
468 /**
469 * Specifies the maximum size of a info log file. If the current log file
470 * is larger than `max_log_file_size`, a new info log file will
471 * be created.
472 * If 0, all logs will be written to one log file.
473 *
474 * @param maxLogFileSize the maximum size of a info log file.
475 * @return the instance of the current object.
476 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
477 * while overflowing the underlying platform specific value.
478 */
479 T setMaxLogFileSize(long maxLogFileSize);
480
481 /**
482 * Returns the maximum size of a info log file. If the current log file
483 * is larger than this size, a new info log file will be created.
484 * If 0, all logs will be written to one log file.
485 *
486 * @return the maximum size of the info log file.
487 */
488 long maxLogFileSize();
489
490 /**
491 * Specifies the time interval for the info log file to roll (in seconds).
492 * If specified with non-zero value, log file will be rolled
493 * if it has been active longer than `log_file_time_to_roll`.
494 * Default: 0 (disabled)
495 *
496 * @param logFileTimeToRoll the time interval in seconds.
497 * @return the instance of the current object.
498 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
499 * while overflowing the underlying platform specific value.
500 */
501 T setLogFileTimeToRoll(long logFileTimeToRoll);
502
503 /**
504 * Returns the time interval for the info log file to roll (in seconds).
505 * If specified with non-zero value, log file will be rolled
506 * if it has been active longer than `log_file_time_to_roll`.
507 * Default: 0 (disabled)
508 *
509 * @return the time interval in seconds.
510 */
511 long logFileTimeToRoll();
512
513 /**
514 * Specifies the maximum number of info log files to be kept.
515 * Default: 1000
516 *
517 * @param keepLogFileNum the maximum number of info log files to be kept.
518 * @return the instance of the current object.
519 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
520 * while overflowing the underlying platform specific value.
521 */
522 T setKeepLogFileNum(long keepLogFileNum);
523
524 /**
525 * Returns the maximum number of info log files to be kept.
526 * Default: 1000
527 *
528 * @return the maximum number of info log files to be kept.
529 */
530 long keepLogFileNum();
531
532 /**
533 * Recycle log files.
534 *
535 * If non-zero, we will reuse previously written log files for new
536 * logs, overwriting the old data. The value indicates how many
537 * such files we will keep around at any point in time for later
538 * use.
539 *
540 * This is more efficient because the blocks are already
541 * allocated and fdatasync does not need to update the inode after
542 * each write.
543 *
544 * Default: 0
545 *
546 * @param recycleLogFileNum the number of log files to keep for recycling
547 *
548 * @return the reference to the current options
549 */
550 T setRecycleLogFileNum(long recycleLogFileNum);
551
552 /**
553 * Recycle log files.
554 *
555 * If non-zero, we will reuse previously written log files for new
556 * logs, overwriting the old data. The value indicates how many
557 * such files we will keep around at any point in time for later
558 * use.
559 *
560 * This is more efficient because the blocks are already
561 * allocated and fdatasync does not need to update the inode after
562 * each write.
563 *
564 * Default: 0
565 *
566 * @return the number of log files kept for recycling
567 */
568 long recycleLogFileNum();
569
570 /**
571 * Manifest file is rolled over on reaching this limit.
572 * The older manifest file be deleted.
573 * The default value is MAX_INT so that roll-over does not take place.
574 *
575 * @param maxManifestFileSize the size limit of a manifest file.
576 * @return the instance of the current object.
577 */
578 T setMaxManifestFileSize(long maxManifestFileSize);
579
580 /**
581 * Manifest file is rolled over on reaching this limit.
582 * The older manifest file be deleted.
583 * The default value is MAX_INT so that roll-over does not take place.
584 *
585 * @return the size limit of a manifest file.
586 */
587 long maxManifestFileSize();
588
589 /**
590 * Number of shards used for table cache.
591 *
592 * @param tableCacheNumshardbits the number of chards
593 * @return the instance of the current object.
594 */
595 T setTableCacheNumshardbits(int tableCacheNumshardbits);
596
597 /**
598 * Number of shards used for table cache.
599 *
600 * @return the number of shards used for table cache.
601 */
602 int tableCacheNumshardbits();
603
604 /**
605 * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs
606 * will be deleted.
607 * <ol>
608 * <li>If both set to 0, logs will be deleted asap and will not get into
609 * the archive.</li>
610 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
611 * WAL files will be checked every 10 min and if total size is greater
612 * then WAL_size_limit_MB, they will be deleted starting with the
613 * earliest until size_limit is met. All empty files will be deleted.</li>
614 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
615 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
616 * are older than WAL_ttl_seconds will be deleted.</li>
617 * <li>If both are not 0, WAL files will be checked every 10 min and both
618 * checks will be performed with ttl being first.</li>
619 * </ol>
620 *
621 * @param walTtlSeconds the ttl seconds
622 * @return the instance of the current object.
623 * @see #setWalSizeLimitMB(long)
624 */
625 T setWalTtlSeconds(long walTtlSeconds);
626
627 /**
628 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
629 * will be deleted.
630 * <ol>
631 * <li>If both set to 0, logs will be deleted asap and will not get into
632 * the archive.</li>
633 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
634 * WAL files will be checked every 10 min and if total size is greater
635 * then WAL_size_limit_MB, they will be deleted starting with the
636 * earliest until size_limit is met. All empty files will be deleted.</li>
637 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
638 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
639 * are older than WAL_ttl_seconds will be deleted.</li>
640 * <li>If both are not 0, WAL files will be checked every 10 min and both
641 * checks will be performed with ttl being first.</li>
642 * </ol>
643 *
644 * @return the wal-ttl seconds
645 * @see #walSizeLimitMB()
646 */
647 long walTtlSeconds();
648
649 /**
650 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
651 * will be deleted.
652 * <ol>
653 * <li>If both set to 0, logs will be deleted asap and will not get into
654 * the archive.</li>
655 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
656 * WAL files will be checked every 10 min and if total size is greater
657 * then WAL_size_limit_MB, they will be deleted starting with the
658 * earliest until size_limit is met. All empty files will be deleted.</li>
659 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
660 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
661 * are older than WAL_ttl_seconds will be deleted.</li>
662 * <li>If both are not 0, WAL files will be checked every 10 min and both
663 * checks will be performed with ttl being first.</li>
664 * </ol>
665 *
666 * @param sizeLimitMB size limit in mega-bytes.
667 * @return the instance of the current object.
668 * @see #setWalSizeLimitMB(long)
669 */
670 T setWalSizeLimitMB(long sizeLimitMB);
671
672 /**
673 * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs
674 * will be deleted.
675 * <ol>
676 * <li>If both set to 0, logs will be deleted asap and will not get into
677 * the archive.</li>
678 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
679 * WAL files will be checked every 10 min and if total size is greater
680 * then WAL_size_limit_MB, they will be deleted starting with the
681 * earliest until size_limit is met. All empty files will be deleted.</li>
682 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
683 * WAL files will be checked every WAL_ttl_seconds i / 2 and those that
684 * are older than WAL_ttl_seconds will be deleted.</li>
685 * <li>If both are not 0, WAL files will be checked every 10 min and both
686 * checks will be performed with ttl being first.</li>
687 * </ol>
688 * @return size limit in mega-bytes.
689 * @see #walSizeLimitMB()
690 */
691 long walSizeLimitMB();
692
693 /**
694 * Number of bytes to preallocate (via fallocate) the manifest
695 * files. Default is 4mb, which is reasonable to reduce random IO
696 * as well as prevent overallocation for mounts that preallocate
697 * large amounts of data (such as xfs's allocsize option).
698 *
699 * @param size the size in byte
700 * @return the instance of the current object.
701 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
702 * while overflowing the underlying platform specific value.
703 */
704 T setManifestPreallocationSize(long size);
705
706 /**
707 * Number of bytes to preallocate (via fallocate) the manifest
708 * files. Default is 4mb, which is reasonable to reduce random IO
709 * as well as prevent overallocation for mounts that preallocate
710 * large amounts of data (such as xfs's allocsize option).
711 *
712 * @return size in bytes.
713 */
714 long manifestPreallocationSize();
715
716 /**
717 * Enable the OS to use direct I/O for reading sst tables.
718 * Default: false
719 *
720 * @param useDirectReads if true, then direct read is enabled
721 * @return the instance of the current object.
722 */
723 T setUseDirectReads(boolean useDirectReads);
724
725 /**
726 * Enable the OS to use direct I/O for reading sst tables.
727 * Default: false
728 *
729 * @return if true, then direct reads are enabled
730 */
731 boolean useDirectReads();
732
733 /**
734 * Enable the OS to use direct reads and writes in flush and
735 * compaction
736 * Default: false
737 *
738 * @param useDirectIoForFlushAndCompaction if true, then direct
739 * I/O will be enabled for background flush and compactions
740 * @return the instance of the current object.
741 */
742 T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction);
743
744 /**
745 * Enable the OS to use direct reads and writes in flush and
746 * compaction
747 *
748 * @return if true, then direct I/O is enabled for flush and
749 * compaction
750 */
751 boolean useDirectIoForFlushAndCompaction();
752
753 /**
754 * Whether fallocate calls are allowed
755 *
756 * @param allowFAllocate false if fallocate() calls are bypassed
757 *
758 * @return the reference to the current options.
759 */
760 T setAllowFAllocate(boolean allowFAllocate);
761
762 /**
763 * Whether fallocate calls are allowed
764 *
765 * @return false if fallocate() calls are bypassed
766 */
767 boolean allowFAllocate();
768
769 /**
770 * Allow the OS to mmap file for reading sst tables.
771 * Default: false
772 *
773 * @param allowMmapReads true if mmap reads are allowed.
774 * @return the instance of the current object.
775 */
776 T setAllowMmapReads(boolean allowMmapReads);
777
778 /**
779 * Allow the OS to mmap file for reading sst tables.
780 * Default: false
781 *
782 * @return true if mmap reads are allowed.
783 */
784 boolean allowMmapReads();
785
786 /**
787 * Allow the OS to mmap file for writing. Default: false
788 *
789 * @param allowMmapWrites true if mmap writes are allowd.
790 * @return the instance of the current object.
791 */
792 T setAllowMmapWrites(boolean allowMmapWrites);
793
794 /**
795 * Allow the OS to mmap file for writing. Default: false
796 *
797 * @return true if mmap writes are allowed.
798 */
799 boolean allowMmapWrites();
800
801 /**
802 * Disable child process inherit open files. Default: true
803 *
804 * @param isFdCloseOnExec true if child process inheriting open
805 * files is disabled.
806 * @return the instance of the current object.
807 */
808 T setIsFdCloseOnExec(boolean isFdCloseOnExec);
809
810 /**
811 * Disable child process inherit open files. Default: true
812 *
813 * @return true if child process inheriting open files is disabled.
814 */
815 boolean isFdCloseOnExec();
816
817 /**
818 * If set true, will hint the underlying file system that the file
819 * access pattern is random, when a sst file is opened.
820 * Default: true
821 *
822 * @param adviseRandomOnOpen true if hinting random access is on.
823 * @return the instance of the current object.
824 */
825 T setAdviseRandomOnOpen(boolean adviseRandomOnOpen);
826
827 /**
828 * If set true, will hint the underlying file system that the file
829 * access pattern is random, when a sst file is opened.
830 * Default: true
831 *
832 * @return true if hinting random access is on.
833 */
834 boolean adviseRandomOnOpen();
835
836 /**
837 * Amount of data to build up in memtables across all column
838 * families before writing to disk.
839 *
840 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
841 * which enforces a limit for a single memtable.
842 *
843 * This feature is disabled by default. Specify a non-zero value
844 * to enable it.
845 *
846 * Default: 0 (disabled)
847 *
848 * @param dbWriteBufferSize the size of the write buffer
849 *
850 * @return the reference to the current options.
851 */
852 T setDbWriteBufferSize(long dbWriteBufferSize);
853
854 /**
855 * Use passed {@link WriteBufferManager} to control memory usage across
856 * multiple column families and/or DB instances.
857 *
858 * Check <a href="https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager">
859 * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager</a>
860 * for more details on when to use it
861 *
862 * @param writeBufferManager The WriteBufferManager to use
863 * @return the reference of the current options.
864 */
865 T setWriteBufferManager(final WriteBufferManager writeBufferManager);
866
867 /**
868 * Reference to {@link WriteBufferManager} used by it. <br>
869 *
870 * Default: null (Disabled)
871 *
872 * @return a reference to WriteBufferManager
873 */
874 WriteBufferManager writeBufferManager();
875
876 /**
877 * Amount of data to build up in memtables across all column
878 * families before writing to disk.
879 *
880 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
881 * which enforces a limit for a single memtable.
882 *
883 * This feature is disabled by default. Specify a non-zero value
884 * to enable it.
885 *
886 * Default: 0 (disabled)
887 *
888 * @return the size of the write buffer
889 */
890 long dbWriteBufferSize();
891
892 /**
893 * Specify the file access pattern once a compaction is started.
894 * It will be applied to all input files of a compaction.
895 *
896 * Default: {@link AccessHint#NORMAL}
897 *
898 * @param accessHint The access hint
899 *
900 * @return the reference to the current options.
901 */
902 T setAccessHintOnCompactionStart(final AccessHint accessHint);
903
904 /**
905 * Specify the file access pattern once a compaction is started.
906 * It will be applied to all input files of a compaction.
907 *
908 * Default: {@link AccessHint#NORMAL}
909 *
910 * @return The access hint
911 */
912 AccessHint accessHintOnCompactionStart();
913
914 /**
915 * If true, always create a new file descriptor and new table reader
916 * for compaction inputs. Turn this parameter on may introduce extra
917 * memory usage in the table reader, if it allocates extra memory
918 * for indexes. This will allow file descriptor prefetch options
919 * to be set for compaction input files and not to impact file
920 * descriptors for the same file used by user queries.
921 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
922 * for this mode if using block-based table.
923 *
924 * Default: false
925 *
926 * @param newTableReaderForCompactionInputs true if a new file descriptor and
927 * table reader should be created for compaction inputs
928 *
929 * @return the reference to the current options.
930 */
931 T setNewTableReaderForCompactionInputs(
932 boolean newTableReaderForCompactionInputs);
933
934 /**
935 * If true, always create a new file descriptor and new table reader
936 * for compaction inputs. Turn this parameter on may introduce extra
937 * memory usage in the table reader, if it allocates extra memory
938 * for indexes. This will allow file descriptor prefetch options
939 * to be set for compaction input files and not to impact file
940 * descriptors for the same file used by user queries.
941 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
942 * for this mode if using block-based table.
943 *
944 * Default: false
945 *
946 * @return true if a new file descriptor and table reader are created for
947 * compaction inputs
948 */
949 boolean newTableReaderForCompactionInputs();
950
951 /**
952 * This is a maximum buffer size that is used by WinMmapReadableFile in
953 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
954 * reads. We allow the buffer to grow until the specified value and then
955 * for bigger requests allocate one shot buffers. In unbuffered mode we
956 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
957 * When read-ahead is required we then make use of
958 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
959 * always try to read ahead.
960 * With read-ahead we always pre-allocate buffer to the size instead of
961 * growing it up to a limit.
962 *
963 * This option is currently honored only on Windows
964 *
965 * Default: 1 Mb
966 *
967 * Special value: 0 - means do not maintain per instance buffer. Allocate
968 * per request buffer and avoid locking.
969 *
970 * @param randomAccessMaxBufferSize the maximum size of the random access
971 * buffer
972 *
973 * @return the reference to the current options.
974 */
975 T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize);
976
977 /**
978 * This is a maximum buffer size that is used by WinMmapReadableFile in
979 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
980 * reads. We allow the buffer to grow until the specified value and then
981 * for bigger requests allocate one shot buffers. In unbuffered mode we
982 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
983 * When read-ahead is required we then make use of
984 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
985 * always try to read ahead. With read-ahead we always pre-allocate buffer
986 * to the size instead of growing it up to a limit.
987 *
988 * This option is currently honored only on Windows
989 *
990 * Default: 1 Mb
991 *
992 * Special value: 0 - means do not maintain per instance buffer. Allocate
993 * per request buffer and avoid locking.
994 *
995 * @return the maximum size of the random access buffer
996 */
997 long randomAccessMaxBufferSize();
998
999 /**
1000 * Use adaptive mutex, which spins in the user space before resorting
1001 * to kernel. This could reduce context switch when the mutex is not
1002 * heavily contended. However, if the mutex is hot, we could end up
1003 * wasting spin time.
1004 * Default: false
1005 *
1006 * @param useAdaptiveMutex true if adaptive mutex is used.
1007 * @return the instance of the current object.
1008 */
1009 T setUseAdaptiveMutex(boolean useAdaptiveMutex);
1010
1011 /**
1012 * Use adaptive mutex, which spins in the user space before resorting
1013 * to kernel. This could reduce context switch when the mutex is not
1014 * heavily contended. However, if the mutex is hot, we could end up
1015 * wasting spin time.
1016 * Default: false
1017 *
1018 * @return true if adaptive mutex is used.
1019 */
1020 boolean useAdaptiveMutex();
1021
1022 //TODO(AR) NOW
1023 // /**
1024 // * Sets the {@link EventListener}s whose callback functions
1025 // * will be called when specific RocksDB event happens.
1026 // *
1027 // * @param listeners the listeners who should be notified on various events.
1028 // *
1029 // * @return the instance of the current object.
1030 // */
1031 // T setListeners(final List<EventListener> listeners);
1032 //
1033 // /**
1034 // * Gets the {@link EventListener}s whose callback functions
1035 // * will be called when specific RocksDB event happens.
1036 // *
1037 // * @return a collection of Event listeners.
1038 // */
1039 // Collection<EventListener> listeners();
1040
1041 /**
1042 * If true, then the status of the threads involved in this DB will
1043 * be tracked and available via GetThreadList() API.
1044 *
1045 * Default: false
1046 *
1047 * @param enableThreadTracking true to enable tracking
1048 *
1049 * @return the reference to the current options.
1050 */
1051 T setEnableThreadTracking(boolean enableThreadTracking);
1052
1053 /**
1054 * If true, then the status of the threads involved in this DB will
1055 * be tracked and available via GetThreadList() API.
1056 *
1057 * Default: false
1058 *
1059 * @return true if tracking is enabled
1060 */
1061 boolean enableThreadTracking();
1062
1063 /**
1064 * By default, a single write thread queue is maintained. The thread gets
1065 * to the head of the queue becomes write batch group leader and responsible
1066 * for writing to WAL and memtable for the batch group.
1067 *
1068 * If {@link #enablePipelinedWrite()} is true, separate write thread queue is
1069 * maintained for WAL write and memtable write. A write thread first enter WAL
1070 * writer queue and then memtable writer queue. Pending thread on the WAL
1071 * writer queue thus only have to wait for previous writers to finish their
1072 * WAL writing but not the memtable writing. Enabling the feature may improve
1073 * write throughput and reduce latency of the prepare phase of two-phase
1074 * commit.
1075 *
1076 * Default: false
1077 *
1078 * @param enablePipelinedWrite true to enabled pipelined writes
1079 *
1080 * @return the reference to the current options.
1081 */
1082 T setEnablePipelinedWrite(final boolean enablePipelinedWrite);
1083
1084 /**
1085 * Returns true if pipelined writes are enabled.
1086 * See {@link #setEnablePipelinedWrite(boolean)}.
1087 *
1088 * @return true if pipelined writes are enabled, false otherwise.
1089 */
1090 boolean enablePipelinedWrite();
1091
1092 /**
1093 * If true, allow multi-writers to update mem tables in parallel.
1094 * Only some memtable factorys support concurrent writes; currently it
1095 * is implemented only for SkipListFactory. Concurrent memtable writes
1096 * are not compatible with inplace_update_support or filter_deletes.
1097 * It is strongly recommended to set
1098 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1099 * this feature.
1100 * Default: false
1101 *
1102 * @param allowConcurrentMemtableWrite true to enable concurrent writes
1103 * for the memtable
1104 *
1105 * @return the reference to the current options.
1106 */
1107 T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite);
1108
1109 /**
1110 * If true, allow multi-writers to update mem tables in parallel.
1111 * Only some memtable factorys support concurrent writes; currently it
1112 * is implemented only for SkipListFactory. Concurrent memtable writes
1113 * are not compatible with inplace_update_support or filter_deletes.
1114 * It is strongly recommended to set
1115 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1116 * this feature.
1117 * Default: false
1118 *
1119 * @return true if concurrent writes are enabled for the memtable
1120 */
1121 boolean allowConcurrentMemtableWrite();
1122
1123 /**
1124 * If true, threads synchronizing with the write batch group leader will
1125 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1126 * mutex. This can substantially improve throughput for concurrent workloads,
1127 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1128 * Default: false
1129 *
1130 * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the
1131 * write threads
1132 *
1133 * @return the reference to the current options.
1134 */
1135 T setEnableWriteThreadAdaptiveYield(
1136 boolean enableWriteThreadAdaptiveYield);
1137
1138 /**
1139 * If true, threads synchronizing with the write batch group leader will
1140 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1141 * mutex. This can substantially improve throughput for concurrent workloads,
1142 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
1143 * Default: false
1144 *
1145 * @return true if adaptive yield is enabled
1146 * for the writing threads
1147 */
1148 boolean enableWriteThreadAdaptiveYield();
1149
1150 /**
1151 * The maximum number of microseconds that a write operation will use
1152 * a yielding spin loop to coordinate with other write threads before
1153 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1154 * set properly) increasing this value is likely to increase RocksDB
1155 * throughput at the expense of increased CPU usage.
1156 * Default: 100
1157 *
1158 * @param writeThreadMaxYieldUsec maximum number of microseconds
1159 *
1160 * @return the reference to the current options.
1161 */
1162 T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec);
1163
1164 /**
1165 * The maximum number of microseconds that a write operation will use
1166 * a yielding spin loop to coordinate with other write threads before
1167 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1168 * set properly) increasing this value is likely to increase RocksDB
1169 * throughput at the expense of increased CPU usage.
1170 * Default: 100
1171 *
1172 * @return the maximum number of microseconds
1173 */
1174 long writeThreadMaxYieldUsec();
1175
1176 /**
1177 * The latency in microseconds after which a std::this_thread::yield
1178 * call (sched_yield on Linux) is considered to be a signal that
1179 * other processes or threads would like to use the current core.
1180 * Increasing this makes writer threads more likely to take CPU
1181 * by spinning, which will show up as an increase in the number of
1182 * involuntary context switches.
1183 * Default: 3
1184 *
1185 * @param writeThreadSlowYieldUsec the latency in microseconds
1186 *
1187 * @return the reference to the current options.
1188 */
1189 T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec);
1190
1191 /**
1192 * The latency in microseconds after which a std::this_thread::yield
1193 * call (sched_yield on Linux) is considered to be a signal that
1194 * other processes or threads would like to use the current core.
1195 * Increasing this makes writer threads more likely to take CPU
1196 * by spinning, which will show up as an increase in the number of
1197 * involuntary context switches.
1198 * Default: 3
1199 *
1200 * @return writeThreadSlowYieldUsec the latency in microseconds
1201 */
1202 long writeThreadSlowYieldUsec();
1203
1204 /**
1205 * If true, then DB::Open() will not update the statistics used to optimize
1206 * compaction decision by loading table properties from many files.
1207 * Turning off this feature will improve DBOpen time especially in
1208 * disk environment.
1209 *
1210 * Default: false
1211 *
1212 * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped
1213 *
1214 * @return the reference to the current options.
1215 */
1216 T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen);
1217
1218 /**
1219 * If true, then DB::Open() will not update the statistics used to optimize
1220 * compaction decision by loading table properties from many files.
1221 * Turning off this feature will improve DBOpen time especially in
1222 * disk environment.
1223 *
1224 * Default: false
1225 *
1226 * @return true if updating stats will be skipped
1227 */
1228 boolean skipStatsUpdateOnDbOpen();
1229
1230 /**
1231 * Recovery mode to control the consistency while replaying WAL
1232 *
1233 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1234 *
1235 * @param walRecoveryMode The WAL recover mode
1236 *
1237 * @return the reference to the current options.
1238 */
1239 T setWalRecoveryMode(WALRecoveryMode walRecoveryMode);
1240
1241 /**
1242 * Recovery mode to control the consistency while replaying WAL
1243 *
1244 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1245 *
1246 * @return The WAL recover mode
1247 */
1248 WALRecoveryMode walRecoveryMode();
1249
1250 /**
1251 * if set to false then recovery will fail when a prepared
1252 * transaction is encountered in the WAL
1253 *
1254 * Default: false
1255 *
1256 * @param allow2pc true if two-phase-commit is enabled
1257 *
1258 * @return the reference to the current options.
1259 */
1260 T setAllow2pc(boolean allow2pc);
1261
1262 /**
1263 * if set to false then recovery will fail when a prepared
1264 * transaction is encountered in the WAL
1265 *
1266 * Default: false
1267 *
1268 * @return true if two-phase-commit is enabled
1269 */
1270 boolean allow2pc();
1271
1272 /**
1273 * A global cache for table-level rows.
1274 *
1275 * Default: null (disabled)
1276 *
1277 * @param rowCache The global row cache
1278 *
1279 * @return the reference to the current options.
1280 */
1281 T setRowCache(final Cache rowCache);
1282
1283 /**
1284 * A global cache for table-level rows.
1285 *
1286 * Default: null (disabled)
1287 *
1288 * @return The global row cache
1289 */
1290 Cache rowCache();
1291
1292 /**
1293 * A filter object supplied to be invoked while processing write-ahead-logs
1294 * (WALs) during recovery. The filter provides a way to inspect log
1295 * records, ignoring a particular record or skipping replay.
1296 * The filter is invoked at startup and is invoked from a single-thread
1297 * currently.
1298 *
1299 * @param walFilter the filter for processing WALs during recovery.
1300 *
1301 * @return the reference to the current options.
1302 */
1303 T setWalFilter(final AbstractWalFilter walFilter);
1304
1305 /**
1306 * Get's the filter for processing WALs during recovery.
1307 * See {@link #setWalFilter(AbstractWalFilter)}.
1308 *
1309 * @return the filter used for processing WALs during recovery.
1310 */
1311 WalFilter walFilter();
1312
1313 /**
1314 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1315 * / SetOptions will fail if options file is not detected or properly
1316 * persisted.
1317 *
1318 * DEFAULT: false
1319 *
1320 * @param failIfOptionsFileError true if we should fail if there is an error
1321 * in the options file
1322 *
1323 * @return the reference to the current options.
1324 */
1325 T setFailIfOptionsFileError(boolean failIfOptionsFileError);
1326
1327 /**
1328 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1329 * / SetOptions will fail if options file is not detected or properly
1330 * persisted.
1331 *
1332 * DEFAULT: false
1333 *
1334 * @return true if we should fail if there is an error in the options file
1335 */
1336 boolean failIfOptionsFileError();
1337
1338 /**
1339 * If true, then print malloc stats together with rocksdb.stats
1340 * when printing to LOG.
1341 *
1342 * DEFAULT: false
1343 *
1344 * @param dumpMallocStats true if malloc stats should be printed to LOG
1345 *
1346 * @return the reference to the current options.
1347 */
1348 T setDumpMallocStats(boolean dumpMallocStats);
1349
1350 /**
1351 * If true, then print malloc stats together with rocksdb.stats
1352 * when printing to LOG.
1353 *
1354 * DEFAULT: false
1355 *
1356 * @return true if malloc stats should be printed to LOG
1357 */
1358 boolean dumpMallocStats();
1359
1360 /**
1361 * By default RocksDB replay WAL logs and flush them on DB open, which may
1362 * create very small SST files. If this option is enabled, RocksDB will try
1363 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1364 * WAL logs will be kept, so that if crash happened before flush, we still
1365 * have logs to recover from.
1366 *
1367 * DEFAULT: false
1368 *
1369 * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee
1370 * not to) flush during recovery
1371 *
1372 * @return the reference to the current options.
1373 */
1374 T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery);
1375
1376 /**
1377 * By default RocksDB replay WAL logs and flush them on DB open, which may
1378 * create very small SST files. If this option is enabled, RocksDB will try
1379 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1380 * WAL logs will be kept, so that if crash happened before flush, we still
1381 * have logs to recover from.
1382 *
1383 * DEFAULT: false
1384 *
1385 * @return true to try to avoid (but not guarantee not to) flush during
1386 * recovery
1387 */
1388 boolean avoidFlushDuringRecovery();
1389
1390 /**
1391 * Set this option to true during creation of database if you want
1392 * to be able to ingest behind (call IngestExternalFile() skipping keys
1393 * that already exist, rather than overwriting matching keys).
1394 * Setting this option to true will affect 2 things:
1395 * 1) Disable some internal optimizations around SST file compression
1396 * 2) Reserve bottom-most level for ingested files only.
1397 * 3) Note that num_levels should be &gt;= 3 if this option is turned on.
1398 *
1399 * DEFAULT: false
1400 *
1401 * @param allowIngestBehind true to allow ingest behind, false to disallow.
1402 *
1403 * @return the reference to the current options.
1404 */
1405 T setAllowIngestBehind(final boolean allowIngestBehind);
1406
1407 /**
1408 * Returns true if ingest behind is allowed.
1409 * See {@link #setAllowIngestBehind(boolean)}.
1410 *
1411 * @return true if ingest behind is allowed, false otherwise.
1412 */
1413 boolean allowIngestBehind();
1414
1415 /**
1416 * Needed to support differential snapshots.
1417 * If set to true then DB will only process deletes with sequence number
1418 * less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts).
1419 * Clients are responsible to periodically call this method to advance
1420 * the cutoff time. If this method is never called and preserve_deletes
1421 * is set to true NO deletes will ever be processed.
1422 * At the moment this only keeps normal deletes, SingleDeletes will
1423 * not be preserved.
1424 *
1425 * DEFAULT: false
1426 *
1427 * @param preserveDeletes true to preserve deletes.
1428 *
1429 * @return the reference to the current options.
1430 */
1431 T setPreserveDeletes(final boolean preserveDeletes);
1432
1433 /**
1434 * Returns true if deletes are preserved.
1435 * See {@link #setPreserveDeletes(boolean)}.
1436 *
1437 * @return true if deletes are preserved, false otherwise.
1438 */
1439 boolean preserveDeletes();
1440
1441 /**
1442 * If enabled it uses two queues for writes, one for the ones with
1443 * disable_memtable and one for the ones that also write to memtable. This
1444 * allows the memtable writes not to lag behind other writes. It can be used
1445 * to optimize MySQL 2PC in which only the commits, which are serial, write to
1446 * memtable.
1447 *
1448 * DEFAULT: false
1449 *
1450 * @param twoWriteQueues true to enable two write queues, false otherwise.
1451 *
1452 * @return the reference to the current options.
1453 */
1454 T setTwoWriteQueues(final boolean twoWriteQueues);
1455
1456 /**
1457 * Returns true if two write queues are enabled.
1458 *
1459 * @return true if two write queues are enabled, false otherwise.
1460 */
1461 boolean twoWriteQueues();
1462
1463 /**
1464 * If true WAL is not flushed automatically after each write. Instead it
1465 * relies on manual invocation of FlushWAL to write the WAL buffer to its
1466 * file.
1467 *
1468 * DEFAULT: false
1469 *
1470 * @param manualWalFlush true to set disable automatic WAL flushing,
1471 * false otherwise.
1472 *
1473 * @return the reference to the current options.
1474 */
1475 T setManualWalFlush(final boolean manualWalFlush);
1476
1477 /**
1478 * Returns true if automatic WAL flushing is disabled.
1479 * See {@link #setManualWalFlush(boolean)}.
1480 *
1481 * @return true if automatic WAL flushing is disabled, false otherwise.
1482 */
1483 boolean manualWalFlush();
1484
1485 /**
1486 * If true, RocksDB supports flushing multiple column families and committing
1487 * their results atomically to MANIFEST. Note that it is not
1488 * necessary to set atomic_flush to true if WAL is always enabled since WAL
1489 * allows the database to be restored to the last persistent state in WAL.
1490 * This option is useful when there are column families with writes NOT
1491 * protected by WAL.
1492 * For manual flush, application has to specify which column families to
1493 * flush atomically in {@link RocksDB#flush(FlushOptions, List)}.
1494 * For auto-triggered flush, RocksDB atomically flushes ALL column families.
1495 *
1496 * Currently, any WAL-enabled writes after atomic flush may be replayed
1497 * independently if the process crashes later and tries to recover.
1498 *
1499 * @param atomicFlush true to enable atomic flush of multiple column families.
1500 *
1501 * @return the reference to the current options.
1502 */
1503 T setAtomicFlush(final boolean atomicFlush);
1504
1505 /**
1506 * Determine if atomic flush of multiple column families is enabled.
1507 *
1508 * See {@link #setAtomicFlush(boolean)}.
1509 *
1510 * @return true if atomic flush is enabled.
1511 */
1512 boolean atomicFlush();
1513 }