]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/java/src/main/java/org/rocksdb/DBOptionsInterface.java
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / java / src / main / java / org / rocksdb / DBOptionsInterface.java
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6package org.rocksdb;
7
8import java.util.Collection;
9import java.util.List;
10
f67539c2 11public interface DBOptionsInterface<T extends DBOptionsInterface<T>> {
7c673cae
FG
12 /**
13 * Use this if your DB is very small (like under 1GB) and you don't want to
14 * spend lots of memory for memtables.
15 *
16 * @return the instance of the current object.
17 */
18 T optimizeForSmallDb();
19
20 /**
21 * Use the specified object to interact with the environment,
22 * e.g. to read/write files, schedule background work, etc.
23 * Default: {@link Env#getDefault()}
24 *
25 * @param env {@link Env} instance.
26 * @return the instance of the current Options.
27 */
28 T setEnv(final Env env);
29
30 /**
31 * Returns the set RocksEnv instance.
32 *
33 * @return {@link RocksEnv} instance set in the options.
34 */
35 Env getEnv();
36
37 /**
38 * <p>By default, RocksDB uses only one background thread for flush and
39 * compaction. Calling this function will set it up such that total of
40 * `total_threads` is used.</p>
41 *
42 * <p>You almost definitely want to call this function if your system is
43 * bottlenecked by RocksDB.</p>
44 *
45 * @param totalThreads The total number of threads to be used by RocksDB.
46 * A good value is the number of cores.
47 *
48 * @return the instance of the current Options
49 */
50 T setIncreaseParallelism(int totalThreads);
51
52 /**
53 * If this value is set to true, then the database will be created
54 * if it is missing during {@code RocksDB.open()}.
55 * Default: false
56 *
57 * @param flag a flag indicating whether to create a database the
58 * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation
59 * is missing.
60 * @return the instance of the current Options
61 * @see RocksDB#open(org.rocksdb.Options, String)
62 */
63 T setCreateIfMissing(boolean flag);
64
65 /**
66 * Return true if the create_if_missing flag is set to true.
67 * If true, the database will be created if it is missing.
68 *
69 * @return true if the createIfMissing option is set to true.
70 * @see #setCreateIfMissing(boolean)
71 */
72 boolean createIfMissing();
73
74 /**
75 * <p>If true, missing column families will be automatically created</p>
76 *
77 * <p>Default: false</p>
78 *
79 * @param flag a flag indicating if missing column families shall be
80 * created automatically.
81 * @return true if missing column families shall be created automatically
82 * on open.
83 */
84 T setCreateMissingColumnFamilies(boolean flag);
85
86 /**
87 * Return true if the create_missing_column_families flag is set
88 * to true. If true column families be created if missing.
89 *
90 * @return true if the createMissingColumnFamilies is set to
91 * true.
92 * @see #setCreateMissingColumnFamilies(boolean)
93 */
94 boolean createMissingColumnFamilies();
95
96 /**
97 * If true, an error will be thrown during RocksDB.open() if the
98 * database already exists.
99 * Default: false
100 *
101 * @param errorIfExists if true, an exception will be thrown
102 * during {@code RocksDB.open()} if the database already exists.
103 * @return the reference to the current option.
104 * @see RocksDB#open(org.rocksdb.Options, String)
105 */
106 T setErrorIfExists(boolean errorIfExists);
107
108 /**
109 * If true, an error will be thrown during RocksDB.open() if the
110 * database already exists.
111 *
112 * @return if true, an error is raised when the specified database
113 * already exists before open.
114 */
115 boolean errorIfExists();
116
117 /**
118 * If true, the implementation will do aggressive checking of the
119 * data it is processing and will stop early if it detects any
120 * errors. This may have unforeseen ramifications: for example, a
121 * corruption of one DB entry may cause a large number of entries to
122 * become unreadable or for the entire DB to become unopenable.
123 * If any of the writes to the database fails (Put, Delete, Merge, Write),
124 * the database will switch to read-only mode and fail all other
125 * Write operations.
126 * Default: true
127 *
128 * @param paranoidChecks a flag to indicate whether paranoid-check
129 * is on.
130 * @return the reference to the current option.
131 */
132 T setParanoidChecks(boolean paranoidChecks);
133
134 /**
135 * If true, the implementation will do aggressive checking of the
136 * data it is processing and will stop early if it detects any
137 * errors. This may have unforeseen ramifications: for example, a
138 * corruption of one DB entry may cause a large number of entries to
139 * become unreadable or for the entire DB to become unopenable.
140 * If any of the writes to the database fails (Put, Delete, Merge, Write),
141 * the database will switch to read-only mode and fail all other
142 * Write operations.
143 *
144 * @return a boolean indicating whether paranoid-check is on.
145 */
146 boolean paranoidChecks();
147
148 /**
149 * Use to control write rate of flush and compaction. Flush has higher
150 * priority than compaction. Rate limiting is disabled if nullptr.
151 * Default: nullptr
152 *
153 * @param rateLimiter {@link org.rocksdb.RateLimiter} instance.
154 * @return the instance of the current object.
155 *
156 * @since 3.10.0
157 */
158 T setRateLimiter(RateLimiter rateLimiter);
159
11fdf7f2
TL
160 /**
161 * Use to track SST files and control their file deletion rate.
162 *
163 * Features:
164 * - Throttle the deletion rate of the SST files.
165 * - Keep track the total size of all SST files.
166 * - Set a maximum allowed space limit for SST files that when reached
167 * the DB wont do any further flushes or compactions and will set the
168 * background error.
169 * - Can be shared between multiple dbs.
170 *
171 * Limitations:
172 * - Only track and throttle deletes of SST files in
173 * first db_path (db_name if db_paths is empty).
174 *
175 * @param sstFileManager The SST File Manager for the db.
494da23a 176 * @return the instance of the current object.
11fdf7f2
TL
177 */
178 T setSstFileManager(SstFileManager sstFileManager);
179
7c673cae
FG
180 /**
181 * <p>Any internal progress/error information generated by
182 * the db will be written to the Logger if it is non-nullptr,
183 * or to a file stored in the same directory as the DB
184 * contents if info_log is nullptr.</p>
185 *
186 * <p>Default: nullptr</p>
187 *
188 * @param logger {@link Logger} instance.
189 * @return the instance of the current object.
190 */
191 T setLogger(Logger logger);
192
193 /**
194 * <p>Sets the RocksDB log level. Default level is INFO</p>
195 *
196 * @param infoLogLevel log level to set.
197 * @return the instance of the current object.
198 */
199 T setInfoLogLevel(InfoLogLevel infoLogLevel);
200
201 /**
202 * <p>Returns currently set log level.</p>
203 * @return {@link org.rocksdb.InfoLogLevel} instance.
204 */
205 InfoLogLevel infoLogLevel();
206
207 /**
494da23a
TL
208 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open
209 * all files on DB::Open(). You can use this option to increase the number
210 * of threads used to open the files.
7c673cae
FG
211 *
212 * Default: 16
213 *
214 * @param maxFileOpeningThreads the maximum number of threads to use to
215 * open files
216 *
217 * @return the reference to the current options.
218 */
219 T setMaxFileOpeningThreads(int maxFileOpeningThreads);
220
221 /**
494da23a
TL
222 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all
223 * files on DB::Open(). You can use this option to increase the number of
224 * threads used to open the files.
7c673cae
FG
225 *
226 * Default: 16
227 *
228 * @return the maximum number of threads to use to open files
229 */
230 int maxFileOpeningThreads();
231
7c673cae 232 /**
11fdf7f2 233 * <p>Sets the statistics object which collects metrics about database operations.
7c673cae
FG
234 * Statistics objects should not be shared between DB instances as
235 * it does not use any locks to prevent concurrent updates.</p>
236 *
11fdf7f2
TL
237 * @param statistics The statistics to set
238 *
7c673cae 239 * @return the instance of the current object.
11fdf7f2 240 *
7c673cae
FG
241 * @see RocksDB#open(org.rocksdb.Options, String)
242 */
11fdf7f2 243 T setStatistics(final Statistics statistics);
7c673cae
FG
244
245 /**
11fdf7f2
TL
246 * <p>Returns statistics object.</p>
247 *
248 * @return the instance of the statistics object or null if there is no
249 * statistics object.
7c673cae 250 *
11fdf7f2 251 * @see #setStatistics(Statistics)
7c673cae 252 */
11fdf7f2 253 Statistics statistics();
7c673cae
FG
254
255 /**
256 * <p>If true, then every store to stable storage will issue a fsync.</p>
257 * <p>If false, then every store to stable storage will issue a fdatasync.
258 * This parameter should be set to true while storing data to
259 * filesystem like ext3 that can lose files after a reboot.</p>
260 * <p>Default: false</p>
261 *
262 * @param useFsync a boolean flag to specify whether to use fsync
263 * @return the instance of the current object.
264 */
265 T setUseFsync(boolean useFsync);
266
267 /**
268 * <p>If true, then every store to stable storage will issue a fsync.</p>
269 * <p>If false, then every store to stable storage will issue a fdatasync.
270 * This parameter should be set to true while storing data to
271 * filesystem like ext3 that can lose files after a reboot.</p>
272 *
273 * @return boolean value indicating if fsync is used.
274 */
275 boolean useFsync();
276
277 /**
278 * A list of paths where SST files can be put into, with its target size.
279 * Newer data is placed into paths specified earlier in the vector while
280 * older data gradually moves to paths specified later in the vector.
281 *
282 * For example, you have a flash device with 10GB allocated for the DB,
283 * as well as a hard drive of 2TB, you should config it to be:
284 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
285 *
286 * The system will try to guarantee data under each path is close to but
287 * not larger than the target size. But current and future file sizes used
288 * by determining where to place a file are based on best-effort estimation,
289 * which means there is a chance that the actual size under the directory
290 * is slightly more than target size under some workloads. User should give
291 * some buffer room for those cases.
292 *
293 * If none of the paths has sufficient room to place a file, the file will
294 * be placed to the last path anyway, despite to the target size.
295 *
296 * Placing newer data to earlier paths is also best-efforts. User should
297 * expect user files to be placed in higher levels in some extreme cases.
298 *
299 * If left empty, only one path will be used, which is db_name passed when
300 * opening the DB.
301 *
302 * Default: empty
303 *
304 * @param dbPaths the paths and target sizes
305 *
306 * @return the reference to the current options
307 */
308 T setDbPaths(final Collection<DbPath> dbPaths);
309
310 /**
311 * A list of paths where SST files can be put into, with its target size.
312 * Newer data is placed into paths specified earlier in the vector while
313 * older data gradually moves to paths specified later in the vector.
314 *
315 * For example, you have a flash device with 10GB allocated for the DB,
316 * as well as a hard drive of 2TB, you should config it to be:
317 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
318 *
319 * The system will try to guarantee data under each path is close to but
320 * not larger than the target size. But current and future file sizes used
321 * by determining where to place a file are based on best-effort estimation,
322 * which means there is a chance that the actual size under the directory
323 * is slightly more than target size under some workloads. User should give
324 * some buffer room for those cases.
325 *
326 * If none of the paths has sufficient room to place a file, the file will
327 * be placed to the last path anyway, despite to the target size.
328 *
329 * Placing newer data to earlier paths is also best-efforts. User should
330 * expect user files to be placed in higher levels in some extreme cases.
331 *
332 * If left empty, only one path will be used, which is db_name passed when
333 * opening the DB.
334 *
335 * Default: {@link java.util.Collections#emptyList()}
336 *
337 * @return dbPaths the paths and target sizes
338 */
339 List<DbPath> dbPaths();
340
341 /**
342 * This specifies the info LOG dir.
343 * If it is empty, the log files will be in the same dir as data.
344 * If it is non empty, the log files will be in the specified dir,
345 * and the db data dir's absolute path will be used as the log file
346 * name's prefix.
347 *
348 * @param dbLogDir the path to the info log directory
349 * @return the instance of the current object.
350 */
351 T setDbLogDir(String dbLogDir);
352
353 /**
354 * Returns the directory of info log.
355 *
356 * If it is empty, the log files will be in the same dir as data.
357 * If it is non empty, the log files will be in the specified dir,
358 * and the db data dir's absolute path will be used as the log file
359 * name's prefix.
360 *
361 * @return the path to the info log directory
362 */
363 String dbLogDir();
364
365 /**
366 * This specifies the absolute dir path for write-ahead logs (WAL).
367 * If it is empty, the log files will be in the same dir as data,
368 * dbname is used as the data dir by default
369 * If it is non empty, the log files will be in kept the specified dir.
370 * When destroying the db,
371 * all log files in wal_dir and the dir itself is deleted
372 *
373 * @param walDir the path to the write-ahead-log directory.
374 * @return the instance of the current object.
375 */
376 T setWalDir(String walDir);
377
378 /**
379 * Returns the path to the write-ahead-logs (WAL) directory.
380 *
381 * If it is empty, the log files will be in the same dir as data,
382 * dbname is used as the data dir by default
383 * If it is non empty, the log files will be in kept the specified dir.
384 * When destroying the db,
385 * all log files in wal_dir and the dir itself is deleted
386 *
387 * @return the path to the write-ahead-logs (WAL) directory.
388 */
389 String walDir();
390
391 /**
392 * The periodicity when obsolete files get deleted. The default
393 * value is 6 hours. The files that get out of scope by compaction
394 * process will still get automatically delete on every compaction,
395 * regardless of this setting
396 *
397 * @param micros the time interval in micros
398 * @return the instance of the current object.
399 */
400 T setDeleteObsoleteFilesPeriodMicros(long micros);
401
402 /**
403 * The periodicity when obsolete files get deleted. The default
404 * value is 6 hours. The files that get out of scope by compaction
405 * process will still get automatically delete on every compaction,
406 * regardless of this setting
407 *
408 * @return the time interval in micros when obsolete files will be deleted.
409 */
410 long deleteObsoleteFilesPeriodMicros();
411
7c673cae
FG
412 /**
413 * This value represents the maximum number of threads that will
414 * concurrently perform a compaction job by breaking it into multiple,
415 * smaller ones that are run simultaneously.
416 * Default: 1 (i.e. no subcompactions)
417 *
418 * @param maxSubcompactions The maximum number of threads that will
419 * concurrently perform a compaction job
494da23a
TL
420 *
421 * @return the instance of the current object.
7c673cae 422 */
494da23a 423 T setMaxSubcompactions(int maxSubcompactions);
7c673cae
FG
424
425 /**
426 * This value represents the maximum number of threads that will
427 * concurrently perform a compaction job by breaking it into multiple,
428 * smaller ones that are run simultaneously.
429 * Default: 1 (i.e. no subcompactions)
430 *
431 * @return The maximum number of threads that will concurrently perform a
432 * compaction job
433 */
434 int maxSubcompactions();
435
436 /**
f67539c2
TL
437 * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
438 * value of max_background_jobs. For backwards compatibility we will set
439 * `max_background_jobs = max_background_compactions + max_background_flushes`
440 * in the case where user sets at least one of `max_background_compactions` or
441 * `max_background_flushes`.
442 *
7c673cae
FG
443 * Specifies the maximum number of concurrent background flush jobs.
444 * If you're increasing this, also consider increasing number of threads in
445 * HIGH priority thread pool. For more information, see
f67539c2 446 * Default: -1
7c673cae
FG
447 *
448 * @param maxBackgroundFlushes number of max concurrent flush jobs
449 * @return the instance of the current object.
450 *
451 * @see RocksEnv#setBackgroundThreads(int)
494da23a
TL
452 * @see RocksEnv#setBackgroundThreads(int, Priority)
453 * @see MutableDBOptionsInterface#maxBackgroundCompactions()
11fdf7f2 454 *
494da23a 455 * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)}
7c673cae 456 */
494da23a 457 @Deprecated
7c673cae
FG
458 T setMaxBackgroundFlushes(int maxBackgroundFlushes);
459
460 /**
f67539c2
TL
461 * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
462 * value of max_background_jobs. For backwards compatibility we will set
463 * `max_background_jobs = max_background_compactions + max_background_flushes`
464 * in the case where user sets at least one of `max_background_compactions` or
465 * `max_background_flushes`.
466 *
7c673cae
FG
467 * Returns the maximum number of concurrent background flush jobs.
468 * If you're increasing this, also consider increasing number of threads in
469 * HIGH priority thread pool. For more information, see
f67539c2 470 * Default: -1
7c673cae
FG
471 *
472 * @return the maximum number of concurrent background flush jobs.
473 * @see RocksEnv#setBackgroundThreads(int)
494da23a 474 * @see RocksEnv#setBackgroundThreads(int, Priority)
7c673cae 475 */
494da23a 476 @Deprecated
7c673cae
FG
477 int maxBackgroundFlushes();
478
479 /**
480 * Specifies the maximum size of a info log file. If the current log file
481 * is larger than `max_log_file_size`, a new info log file will
482 * be created.
483 * If 0, all logs will be written to one log file.
484 *
485 * @param maxLogFileSize the maximum size of a info log file.
486 * @return the instance of the current object.
487 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
488 * while overflowing the underlying platform specific value.
489 */
490 T setMaxLogFileSize(long maxLogFileSize);
491
492 /**
493 * Returns the maximum size of a info log file. If the current log file
494 * is larger than this size, a new info log file will be created.
495 * If 0, all logs will be written to one log file.
496 *
497 * @return the maximum size of the info log file.
498 */
499 long maxLogFileSize();
500
501 /**
502 * Specifies the time interval for the info log file to roll (in seconds).
503 * If specified with non-zero value, log file will be rolled
504 * if it has been active longer than `log_file_time_to_roll`.
505 * Default: 0 (disabled)
506 *
507 * @param logFileTimeToRoll the time interval in seconds.
508 * @return the instance of the current object.
509 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
510 * while overflowing the underlying platform specific value.
511 */
512 T setLogFileTimeToRoll(long logFileTimeToRoll);
513
514 /**
515 * Returns the time interval for the info log file to roll (in seconds).
516 * If specified with non-zero value, log file will be rolled
517 * if it has been active longer than `log_file_time_to_roll`.
518 * Default: 0 (disabled)
519 *
520 * @return the time interval in seconds.
521 */
522 long logFileTimeToRoll();
523
524 /**
525 * Specifies the maximum number of info log files to be kept.
526 * Default: 1000
527 *
528 * @param keepLogFileNum the maximum number of info log files to be kept.
529 * @return the instance of the current object.
530 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
531 * while overflowing the underlying platform specific value.
532 */
533 T setKeepLogFileNum(long keepLogFileNum);
534
535 /**
536 * Returns the maximum number of info log files to be kept.
537 * Default: 1000
538 *
539 * @return the maximum number of info log files to be kept.
540 */
541 long keepLogFileNum();
542
543 /**
544 * Recycle log files.
545 *
546 * If non-zero, we will reuse previously written log files for new
547 * logs, overwriting the old data. The value indicates how many
548 * such files we will keep around at any point in time for later
549 * use.
550 *
551 * This is more efficient because the blocks are already
552 * allocated and fdatasync does not need to update the inode after
553 * each write.
554 *
555 * Default: 0
556 *
557 * @param recycleLogFileNum the number of log files to keep for recycling
558 *
559 * @return the reference to the current options
560 */
561 T setRecycleLogFileNum(long recycleLogFileNum);
562
563 /**
564 * Recycle log files.
565 *
566 * If non-zero, we will reuse previously written log files for new
567 * logs, overwriting the old data. The value indicates how many
568 * such files we will keep around at any point in time for later
569 * use.
570 *
571 * This is more efficient because the blocks are already
572 * allocated and fdatasync does not need to update the inode after
573 * each write.
574 *
575 * Default: 0
576 *
577 * @return the number of log files kept for recycling
578 */
579 long recycleLogFileNum();
580
581 /**
582 * Manifest file is rolled over on reaching this limit.
583 * The older manifest file be deleted.
f67539c2
TL
584 * The default value is 1GB so that the manifest file can grow, but not
585 * reach the limit of storage capacity.
7c673cae
FG
586 *
587 * @param maxManifestFileSize the size limit of a manifest file.
588 * @return the instance of the current object.
589 */
590 T setMaxManifestFileSize(long maxManifestFileSize);
591
592 /**
593 * Manifest file is rolled over on reaching this limit.
594 * The older manifest file be deleted.
f67539c2
TL
595 * The default value is 1GB so that the manifest file can grow, but not
596 * reach the limit of storage capacity.
7c673cae
FG
597 *
598 * @return the size limit of a manifest file.
599 */
600 long maxManifestFileSize();
601
602 /**
603 * Number of shards used for table cache.
604 *
605 * @param tableCacheNumshardbits the number of chards
606 * @return the instance of the current object.
607 */
608 T setTableCacheNumshardbits(int tableCacheNumshardbits);
609
610 /**
611 * Number of shards used for table cache.
612 *
613 * @return the number of shards used for table cache.
614 */
615 int tableCacheNumshardbits();
616
617 /**
618 * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs
619 * will be deleted.
620 * <ol>
621 * <li>If both set to 0, logs will be deleted asap and will not get into
622 * the archive.</li>
623 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
624 * WAL files will be checked every 10 min and if total size is greater
625 * then WAL_size_limit_MB, they will be deleted starting with the
626 * earliest until size_limit is met. All empty files will be deleted.</li>
627 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
628 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
629 * are older than WAL_ttl_seconds will be deleted.</li>
630 * <li>If both are not 0, WAL files will be checked every 10 min and both
631 * checks will be performed with ttl being first.</li>
632 * </ol>
633 *
634 * @param walTtlSeconds the ttl seconds
635 * @return the instance of the current object.
636 * @see #setWalSizeLimitMB(long)
637 */
638 T setWalTtlSeconds(long walTtlSeconds);
639
640 /**
641 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
642 * will be deleted.
643 * <ol>
644 * <li>If both set to 0, logs will be deleted asap and will not get into
645 * the archive.</li>
646 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
647 * WAL files will be checked every 10 min and if total size is greater
648 * then WAL_size_limit_MB, they will be deleted starting with the
649 * earliest until size_limit is met. All empty files will be deleted.</li>
650 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
651 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
652 * are older than WAL_ttl_seconds will be deleted.</li>
653 * <li>If both are not 0, WAL files will be checked every 10 min and both
654 * checks will be performed with ttl being first.</li>
655 * </ol>
656 *
657 * @return the wal-ttl seconds
658 * @see #walSizeLimitMB()
659 */
660 long walTtlSeconds();
661
662 /**
663 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
664 * will be deleted.
665 * <ol>
666 * <li>If both set to 0, logs will be deleted asap and will not get into
667 * the archive.</li>
668 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
669 * WAL files will be checked every 10 min and if total size is greater
670 * then WAL_size_limit_MB, they will be deleted starting with the
671 * earliest until size_limit is met. All empty files will be deleted.</li>
672 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
673 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that
674 * are older than WAL_ttl_seconds will be deleted.</li>
675 * <li>If both are not 0, WAL files will be checked every 10 min and both
676 * checks will be performed with ttl being first.</li>
677 * </ol>
678 *
679 * @param sizeLimitMB size limit in mega-bytes.
680 * @return the instance of the current object.
681 * @see #setWalSizeLimitMB(long)
682 */
683 T setWalSizeLimitMB(long sizeLimitMB);
684
685 /**
686 * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs
687 * will be deleted.
688 * <ol>
689 * <li>If both set to 0, logs will be deleted asap and will not get into
690 * the archive.</li>
691 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
692 * WAL files will be checked every 10 min and if total size is greater
693 * then WAL_size_limit_MB, they will be deleted starting with the
694 * earliest until size_limit is met. All empty files will be deleted.</li>
695 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
696 * WAL files will be checked every WAL_ttl_seconds i / 2 and those that
697 * are older than WAL_ttl_seconds will be deleted.</li>
698 * <li>If both are not 0, WAL files will be checked every 10 min and both
699 * checks will be performed with ttl being first.</li>
700 * </ol>
701 * @return size limit in mega-bytes.
702 * @see #walSizeLimitMB()
703 */
704 long walSizeLimitMB();
705
706 /**
707 * Number of bytes to preallocate (via fallocate) the manifest
708 * files. Default is 4mb, which is reasonable to reduce random IO
709 * as well as prevent overallocation for mounts that preallocate
710 * large amounts of data (such as xfs's allocsize option).
711 *
712 * @param size the size in byte
713 * @return the instance of the current object.
714 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms
715 * while overflowing the underlying platform specific value.
716 */
717 T setManifestPreallocationSize(long size);
718
719 /**
720 * Number of bytes to preallocate (via fallocate) the manifest
721 * files. Default is 4mb, which is reasonable to reduce random IO
722 * as well as prevent overallocation for mounts that preallocate
723 * large amounts of data (such as xfs's allocsize option).
724 *
725 * @return size in bytes.
726 */
727 long manifestPreallocationSize();
728
729 /**
730 * Enable the OS to use direct I/O for reading sst tables.
731 * Default: false
732 *
733 * @param useDirectReads if true, then direct read is enabled
734 * @return the instance of the current object.
735 */
736 T setUseDirectReads(boolean useDirectReads);
737
738 /**
739 * Enable the OS to use direct I/O for reading sst tables.
740 * Default: false
741 *
742 * @return if true, then direct reads are enabled
743 */
744 boolean useDirectReads();
745
746 /**
747 * Enable the OS to use direct reads and writes in flush and
748 * compaction
749 * Default: false
750 *
751 * @param useDirectIoForFlushAndCompaction if true, then direct
752 * I/O will be enabled for background flush and compactions
753 * @return the instance of the current object.
754 */
755 T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction);
756
757 /**
758 * Enable the OS to use direct reads and writes in flush and
759 * compaction
760 *
761 * @return if true, then direct I/O is enabled for flush and
762 * compaction
763 */
764 boolean useDirectIoForFlushAndCompaction();
765
766 /**
767 * Whether fallocate calls are allowed
768 *
769 * @param allowFAllocate false if fallocate() calls are bypassed
770 *
771 * @return the reference to the current options.
772 */
773 T setAllowFAllocate(boolean allowFAllocate);
774
775 /**
776 * Whether fallocate calls are allowed
777 *
778 * @return false if fallocate() calls are bypassed
779 */
780 boolean allowFAllocate();
781
782 /**
783 * Allow the OS to mmap file for reading sst tables.
784 * Default: false
785 *
786 * @param allowMmapReads true if mmap reads are allowed.
787 * @return the instance of the current object.
788 */
789 T setAllowMmapReads(boolean allowMmapReads);
790
791 /**
792 * Allow the OS to mmap file for reading sst tables.
793 * Default: false
794 *
795 * @return true if mmap reads are allowed.
796 */
797 boolean allowMmapReads();
798
799 /**
800 * Allow the OS to mmap file for writing. Default: false
801 *
802 * @param allowMmapWrites true if mmap writes are allowd.
803 * @return the instance of the current object.
804 */
805 T setAllowMmapWrites(boolean allowMmapWrites);
806
807 /**
808 * Allow the OS to mmap file for writing. Default: false
809 *
810 * @return true if mmap writes are allowed.
811 */
812 boolean allowMmapWrites();
813
814 /**
815 * Disable child process inherit open files. Default: true
816 *
817 * @param isFdCloseOnExec true if child process inheriting open
818 * files is disabled.
819 * @return the instance of the current object.
820 */
821 T setIsFdCloseOnExec(boolean isFdCloseOnExec);
822
823 /**
824 * Disable child process inherit open files. Default: true
825 *
826 * @return true if child process inheriting open files is disabled.
827 */
828 boolean isFdCloseOnExec();
829
7c673cae
FG
830 /**
831 * If set true, will hint the underlying file system that the file
832 * access pattern is random, when a sst file is opened.
833 * Default: true
834 *
835 * @param adviseRandomOnOpen true if hinting random access is on.
836 * @return the instance of the current object.
837 */
838 T setAdviseRandomOnOpen(boolean adviseRandomOnOpen);
839
840 /**
841 * If set true, will hint the underlying file system that the file
842 * access pattern is random, when a sst file is opened.
843 * Default: true
844 *
845 * @return true if hinting random access is on.
846 */
847 boolean adviseRandomOnOpen();
848
849 /**
850 * Amount of data to build up in memtables across all column
851 * families before writing to disk.
852 *
853 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
854 * which enforces a limit for a single memtable.
855 *
856 * This feature is disabled by default. Specify a non-zero value
857 * to enable it.
858 *
859 * Default: 0 (disabled)
860 *
861 * @param dbWriteBufferSize the size of the write buffer
862 *
863 * @return the reference to the current options.
864 */
865 T setDbWriteBufferSize(long dbWriteBufferSize);
866
494da23a
TL
867 /**
868 * Use passed {@link WriteBufferManager} to control memory usage across
869 * multiple column families and/or DB instances.
870 *
871 * Check <a href="https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager">
872 * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager</a>
873 * for more details on when to use it
874 *
875 * @param writeBufferManager The WriteBufferManager to use
876 * @return the reference of the current options.
877 */
878 T setWriteBufferManager(final WriteBufferManager writeBufferManager);
879
880 /**
881 * Reference to {@link WriteBufferManager} used by it. <br>
882 *
883 * Default: null (Disabled)
884 *
885 * @return a reference to WriteBufferManager
886 */
887 WriteBufferManager writeBufferManager();
888
7c673cae
FG
889 /**
890 * Amount of data to build up in memtables across all column
891 * families before writing to disk.
892 *
893 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()},
894 * which enforces a limit for a single memtable.
895 *
896 * This feature is disabled by default. Specify a non-zero value
897 * to enable it.
898 *
899 * Default: 0 (disabled)
900 *
901 * @return the size of the write buffer
902 */
903 long dbWriteBufferSize();
904
905 /**
906 * Specify the file access pattern once a compaction is started.
907 * It will be applied to all input files of a compaction.
908 *
909 * Default: {@link AccessHint#NORMAL}
910 *
911 * @param accessHint The access hint
912 *
913 * @return the reference to the current options.
914 */
915 T setAccessHintOnCompactionStart(final AccessHint accessHint);
916
917 /**
918 * Specify the file access pattern once a compaction is started.
919 * It will be applied to all input files of a compaction.
920 *
921 * Default: {@link AccessHint#NORMAL}
922 *
923 * @return The access hint
924 */
925 AccessHint accessHintOnCompactionStart();
926
927 /**
928 * If true, always create a new file descriptor and new table reader
929 * for compaction inputs. Turn this parameter on may introduce extra
930 * memory usage in the table reader, if it allocates extra memory
931 * for indexes. This will allow file descriptor prefetch options
932 * to be set for compaction input files and not to impact file
933 * descriptors for the same file used by user queries.
934 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
935 * for this mode if using block-based table.
936 *
937 * Default: false
938 *
939 * @param newTableReaderForCompactionInputs true if a new file descriptor and
940 * table reader should be created for compaction inputs
941 *
942 * @return the reference to the current options.
943 */
944 T setNewTableReaderForCompactionInputs(
945 boolean newTableReaderForCompactionInputs);
946
947 /**
948 * If true, always create a new file descriptor and new table reader
949 * for compaction inputs. Turn this parameter on may introduce extra
950 * memory usage in the table reader, if it allocates extra memory
951 * for indexes. This will allow file descriptor prefetch options
952 * to be set for compaction input files and not to impact file
953 * descriptors for the same file used by user queries.
954 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()}
955 * for this mode if using block-based table.
956 *
957 * Default: false
958 *
959 * @return true if a new file descriptor and table reader are created for
960 * compaction inputs
961 */
962 boolean newTableReaderForCompactionInputs();
963
7c673cae
FG
964 /**
965 * This is a maximum buffer size that is used by WinMmapReadableFile in
966 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
967 * reads. We allow the buffer to grow until the specified value and then
968 * for bigger requests allocate one shot buffers. In unbuffered mode we
969 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
970 * When read-ahead is required we then make use of
494da23a
TL
971 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
972 * always try to read ahead.
7c673cae
FG
973 * With read-ahead we always pre-allocate buffer to the size instead of
974 * growing it up to a limit.
975 *
976 * This option is currently honored only on Windows
977 *
978 * Default: 1 Mb
979 *
980 * Special value: 0 - means do not maintain per instance buffer. Allocate
981 * per request buffer and avoid locking.
982 *
983 * @param randomAccessMaxBufferSize the maximum size of the random access
984 * buffer
985 *
986 * @return the reference to the current options.
987 */
988 T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize);
989
990 /**
991 * This is a maximum buffer size that is used by WinMmapReadableFile in
992 * unbuffered disk I/O mode. We need to maintain an aligned buffer for
993 * reads. We allow the buffer to grow until the specified value and then
994 * for bigger requests allocate one shot buffers. In unbuffered mode we
995 * always bypass read-ahead buffer at ReadaheadRandomAccessFile
996 * When read-ahead is required we then make use of
494da23a
TL
997 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and
998 * always try to read ahead. With read-ahead we always pre-allocate buffer
999 * to the size instead of growing it up to a limit.
7c673cae
FG
1000 *
1001 * This option is currently honored only on Windows
1002 *
1003 * Default: 1 Mb
1004 *
1005 * Special value: 0 - means do not maintain per instance buffer. Allocate
1006 * per request buffer and avoid locking.
1007 *
1008 * @return the maximum size of the random access buffer
1009 */
1010 long randomAccessMaxBufferSize();
1011
7c673cae
FG
1012 /**
1013 * Use adaptive mutex, which spins in the user space before resorting
1014 * to kernel. This could reduce context switch when the mutex is not
1015 * heavily contended. However, if the mutex is hot, we could end up
1016 * wasting spin time.
1017 * Default: false
1018 *
1019 * @param useAdaptiveMutex true if adaptive mutex is used.
1020 * @return the instance of the current object.
1021 */
1022 T setUseAdaptiveMutex(boolean useAdaptiveMutex);
1023
1024 /**
1025 * Use adaptive mutex, which spins in the user space before resorting
1026 * to kernel. This could reduce context switch when the mutex is not
1027 * heavily contended. However, if the mutex is hot, we could end up
1028 * wasting spin time.
1029 * Default: false
1030 *
1031 * @return true if adaptive mutex is used.
1032 */
1033 boolean useAdaptiveMutex();
1034
494da23a
TL
1035 //TODO(AR) NOW
1036// /**
1037// * Sets the {@link EventListener}s whose callback functions
1038// * will be called when specific RocksDB event happens.
1039// *
1040// * @param listeners the listeners who should be notified on various events.
1041// *
1042// * @return the instance of the current object.
1043// */
1044// T setListeners(final List<EventListener> listeners);
1045//
1046// /**
1047// * Gets the {@link EventListener}s whose callback functions
1048// * will be called when specific RocksDB event happens.
1049// *
1050// * @return a collection of Event listeners.
1051// */
1052// Collection<EventListener> listeners();
7c673cae
FG
1053
1054 /**
1055 * If true, then the status of the threads involved in this DB will
1056 * be tracked and available via GetThreadList() API.
1057 *
1058 * Default: false
1059 *
1060 * @param enableThreadTracking true to enable tracking
1061 *
1062 * @return the reference to the current options.
1063 */
1064 T setEnableThreadTracking(boolean enableThreadTracking);
1065
1066 /**
1067 * If true, then the status of the threads involved in this DB will
1068 * be tracked and available via GetThreadList() API.
1069 *
1070 * Default: false
1071 *
1072 * @return true if tracking is enabled
1073 */
1074 boolean enableThreadTracking();
1075
1076 /**
494da23a
TL
1077 * By default, a single write thread queue is maintained. The thread gets
1078 * to the head of the queue becomes write batch group leader and responsible
1079 * for writing to WAL and memtable for the batch group.
7c673cae 1080 *
494da23a
TL
1081 * If {@link #enablePipelinedWrite()} is true, separate write thread queue is
1082 * maintained for WAL write and memtable write. A write thread first enter WAL
1083 * writer queue and then memtable writer queue. Pending thread on the WAL
1084 * writer queue thus only have to wait for previous writers to finish their
1085 * WAL writing but not the memtable writing. Enabling the feature may improve
1086 * write throughput and reduce latency of the prepare phase of two-phase
1087 * commit.
7c673cae 1088 *
494da23a 1089 * Default: false
7c673cae 1090 *
494da23a 1091 * @param enablePipelinedWrite true to enabled pipelined writes
7c673cae
FG
1092 *
1093 * @return the reference to the current options.
1094 */
494da23a 1095 T setEnablePipelinedWrite(final boolean enablePipelinedWrite);
7c673cae
FG
1096
1097 /**
494da23a
TL
1098 * Returns true if pipelined writes are enabled.
1099 * See {@link #setEnablePipelinedWrite(boolean)}.
7c673cae 1100 *
494da23a 1101 * @return true if pipelined writes are enabled, false otherwise.
7c673cae 1102 */
494da23a 1103 boolean enablePipelinedWrite();
7c673cae 1104
f67539c2
TL
1105 /**
1106 * Setting {@link #unorderedWrite()} to true trades higher write throughput with
1107 * relaxing the immutability guarantee of snapshots. This violates the
1108 * repeatability one expects from ::Get from a snapshot, as well as
1109 * ::MultiGet and Iterator's consistent-point-in-time view property.
1110 * If the application cannot tolerate the relaxed guarantees, it can implement
1111 * its own mechanisms to work around that and yet benefit from the higher
1112 * throughput. Using TransactionDB with WRITE_PREPARED write policy and
1113 * {@link #twoWriteQueues()} true is one way to achieve immutable snapshots despite
1114 * unordered_write.
1115 *
1116 * By default, i.e., when it is false, rocksdb does not advance the sequence
1117 * number for new snapshots unless all the writes with lower sequence numbers
1118 * are already finished. This provides the immutability that we except from
1119 * snapshots. Moreover, since Iterator and MultiGet internally depend on
1120 * snapshots, the snapshot immutability results into Iterator and MultiGet
1121 * offering consistent-point-in-time view. If set to true, although
1122 * Read-Your-Own-Write property is still provided, the snapshot immutability
1123 * property is relaxed: the writes issued after the snapshot is obtained (with
1124 * larger sequence numbers) will be still not visible to the reads from that
1125 * snapshot, however, there still might be pending writes (with lower sequence
1126 * number) that will change the state visible to the snapshot after they are
1127 * landed to the memtable.
1128 *
1129 * @param unorderedWrite true to enabled unordered write
1130 *
1131 * @return the reference to the current options.
1132 */
1133 T setUnorderedWrite(final boolean unorderedWrite);
1134
1135 /**
1136 * Returns true if unordered write are enabled.
1137 * See {@link #setUnorderedWrite(boolean)}.
1138 *
1139 * @return true if unordered write are enabled, false otherwise.
1140 */
1141 boolean unorderedWrite();
1142
7c673cae
FG
1143 /**
1144 * If true, allow multi-writers to update mem tables in parallel.
1145 * Only some memtable factorys support concurrent writes; currently it
1146 * is implemented only for SkipListFactory. Concurrent memtable writes
1147 * are not compatible with inplace_update_support or filter_deletes.
1148 * It is strongly recommended to set
1149 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1150 * this feature.
f67539c2 1151 * Default: true
7c673cae
FG
1152 *
1153 * @param allowConcurrentMemtableWrite true to enable concurrent writes
1154 * for the memtable
1155 *
1156 * @return the reference to the current options.
1157 */
1158 T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite);
1159
1160 /**
1161 * If true, allow multi-writers to update mem tables in parallel.
1162 * Only some memtable factorys support concurrent writes; currently it
1163 * is implemented only for SkipListFactory. Concurrent memtable writes
1164 * are not compatible with inplace_update_support or filter_deletes.
1165 * It is strongly recommended to set
1166 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use
1167 * this feature.
f67539c2 1168 * Default: true
7c673cae
FG
1169 *
1170 * @return true if concurrent writes are enabled for the memtable
1171 */
1172 boolean allowConcurrentMemtableWrite();
1173
1174 /**
1175 * If true, threads synchronizing with the write batch group leader will
1176 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1177 * mutex. This can substantially improve throughput for concurrent workloads,
1178 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
f67539c2 1179 * Default: true
7c673cae
FG
1180 *
1181 * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the
1182 * write threads
1183 *
1184 * @return the reference to the current options.
1185 */
1186 T setEnableWriteThreadAdaptiveYield(
1187 boolean enableWriteThreadAdaptiveYield);
1188
1189 /**
1190 * If true, threads synchronizing with the write batch group leader will
1191 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a
1192 * mutex. This can substantially improve throughput for concurrent workloads,
1193 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled.
f67539c2 1194 * Default: true
7c673cae
FG
1195 *
1196 * @return true if adaptive yield is enabled
1197 * for the writing threads
1198 */
1199 boolean enableWriteThreadAdaptiveYield();
1200
1201 /**
1202 * The maximum number of microseconds that a write operation will use
1203 * a yielding spin loop to coordinate with other write threads before
1204 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1205 * set properly) increasing this value is likely to increase RocksDB
1206 * throughput at the expense of increased CPU usage.
1207 * Default: 100
1208 *
1209 * @param writeThreadMaxYieldUsec maximum number of microseconds
1210 *
1211 * @return the reference to the current options.
1212 */
1213 T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec);
1214
1215 /**
1216 * The maximum number of microseconds that a write operation will use
1217 * a yielding spin loop to coordinate with other write threads before
1218 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is
1219 * set properly) increasing this value is likely to increase RocksDB
1220 * throughput at the expense of increased CPU usage.
1221 * Default: 100
1222 *
1223 * @return the maximum number of microseconds
1224 */
1225 long writeThreadMaxYieldUsec();
1226
1227 /**
1228 * The latency in microseconds after which a std::this_thread::yield
1229 * call (sched_yield on Linux) is considered to be a signal that
1230 * other processes or threads would like to use the current core.
1231 * Increasing this makes writer threads more likely to take CPU
1232 * by spinning, which will show up as an increase in the number of
1233 * involuntary context switches.
1234 * Default: 3
1235 *
1236 * @param writeThreadSlowYieldUsec the latency in microseconds
1237 *
1238 * @return the reference to the current options.
1239 */
1240 T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec);
1241
1242 /**
1243 * The latency in microseconds after which a std::this_thread::yield
1244 * call (sched_yield on Linux) is considered to be a signal that
1245 * other processes or threads would like to use the current core.
1246 * Increasing this makes writer threads more likely to take CPU
1247 * by spinning, which will show up as an increase in the number of
1248 * involuntary context switches.
1249 * Default: 3
1250 *
1251 * @return writeThreadSlowYieldUsec the latency in microseconds
1252 */
1253 long writeThreadSlowYieldUsec();
1254
1255 /**
1256 * If true, then DB::Open() will not update the statistics used to optimize
1257 * compaction decision by loading table properties from many files.
1258 * Turning off this feature will improve DBOpen time especially in
1259 * disk environment.
1260 *
1261 * Default: false
1262 *
1263 * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped
1264 *
1265 * @return the reference to the current options.
1266 */
1267 T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen);
1268
1269 /**
1270 * If true, then DB::Open() will not update the statistics used to optimize
1271 * compaction decision by loading table properties from many files.
1272 * Turning off this feature will improve DBOpen time especially in
1273 * disk environment.
1274 *
1275 * Default: false
1276 *
1277 * @return true if updating stats will be skipped
1278 */
1279 boolean skipStatsUpdateOnDbOpen();
1280
1281 /**
1282 * Recovery mode to control the consistency while replaying WAL
1283 *
1284 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1285 *
1286 * @param walRecoveryMode The WAL recover mode
1287 *
1288 * @return the reference to the current options.
1289 */
1290 T setWalRecoveryMode(WALRecoveryMode walRecoveryMode);
1291
1292 /**
1293 * Recovery mode to control the consistency while replaying WAL
1294 *
1295 * Default: {@link WALRecoveryMode#PointInTimeRecovery}
1296 *
1297 * @return The WAL recover mode
1298 */
1299 WALRecoveryMode walRecoveryMode();
1300
1301 /**
1302 * if set to false then recovery will fail when a prepared
1303 * transaction is encountered in the WAL
1304 *
1305 * Default: false
1306 *
1307 * @param allow2pc true if two-phase-commit is enabled
1308 *
1309 * @return the reference to the current options.
1310 */
1311 T setAllow2pc(boolean allow2pc);
1312
1313 /**
1314 * if set to false then recovery will fail when a prepared
1315 * transaction is encountered in the WAL
1316 *
1317 * Default: false
1318 *
1319 * @return true if two-phase-commit is enabled
1320 */
1321 boolean allow2pc();
1322
1323 /**
1324 * A global cache for table-level rows.
1325 *
1326 * Default: null (disabled)
1327 *
1328 * @param rowCache The global row cache
1329 *
1330 * @return the reference to the current options.
1331 */
1332 T setRowCache(final Cache rowCache);
1333
1334 /**
1335 * A global cache for table-level rows.
1336 *
1337 * Default: null (disabled)
1338 *
1339 * @return The global row cache
1340 */
1341 Cache rowCache();
1342
494da23a
TL
1343 /**
1344 * A filter object supplied to be invoked while processing write-ahead-logs
1345 * (WALs) during recovery. The filter provides a way to inspect log
1346 * records, ignoring a particular record or skipping replay.
1347 * The filter is invoked at startup and is invoked from a single-thread
1348 * currently.
1349 *
1350 * @param walFilter the filter for processing WALs during recovery.
1351 *
1352 * @return the reference to the current options.
1353 */
1354 T setWalFilter(final AbstractWalFilter walFilter);
1355
1356 /**
1357 * Get's the filter for processing WALs during recovery.
1358 * See {@link #setWalFilter(AbstractWalFilter)}.
1359 *
1360 * @return the filter used for processing WALs during recovery.
1361 */
1362 WalFilter walFilter();
1363
7c673cae
FG
1364 /**
1365 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1366 * / SetOptions will fail if options file is not detected or properly
1367 * persisted.
1368 *
1369 * DEFAULT: false
1370 *
1371 * @param failIfOptionsFileError true if we should fail if there is an error
1372 * in the options file
1373 *
1374 * @return the reference to the current options.
1375 */
1376 T setFailIfOptionsFileError(boolean failIfOptionsFileError);
1377
1378 /**
1379 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily
1380 * / SetOptions will fail if options file is not detected or properly
1381 * persisted.
1382 *
1383 * DEFAULT: false
1384 *
1385 * @return true if we should fail if there is an error in the options file
1386 */
1387 boolean failIfOptionsFileError();
1388
1389 /**
1390 * If true, then print malloc stats together with rocksdb.stats
1391 * when printing to LOG.
1392 *
1393 * DEFAULT: false
1394 *
1395 * @param dumpMallocStats true if malloc stats should be printed to LOG
1396 *
1397 * @return the reference to the current options.
1398 */
1399 T setDumpMallocStats(boolean dumpMallocStats);
1400
1401 /**
1402 * If true, then print malloc stats together with rocksdb.stats
1403 * when printing to LOG.
1404 *
1405 * DEFAULT: false
1406 *
1407 * @return true if malloc stats should be printed to LOG
1408 */
1409 boolean dumpMallocStats();
1410
1411 /**
1412 * By default RocksDB replay WAL logs and flush them on DB open, which may
1413 * create very small SST files. If this option is enabled, RocksDB will try
1414 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1415 * WAL logs will be kept, so that if crash happened before flush, we still
1416 * have logs to recover from.
1417 *
1418 * DEFAULT: false
1419 *
1420 * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee
1421 * not to) flush during recovery
1422 *
1423 * @return the reference to the current options.
1424 */
1425 T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery);
1426
1427 /**
1428 * By default RocksDB replay WAL logs and flush them on DB open, which may
1429 * create very small SST files. If this option is enabled, RocksDB will try
1430 * to avoid (but not guarantee not to) flush during recovery. Also, existing
1431 * WAL logs will be kept, so that if crash happened before flush, we still
1432 * have logs to recover from.
1433 *
1434 * DEFAULT: false
1435 *
1436 * @return true to try to avoid (but not guarantee not to) flush during
1437 * recovery
1438 */
1439 boolean avoidFlushDuringRecovery();
1440
1441 /**
494da23a
TL
1442 * Set this option to true during creation of database if you want
1443 * to be able to ingest behind (call IngestExternalFile() skipping keys
1444 * that already exist, rather than overwriting matching keys).
1445 * Setting this option to true will affect 2 things:
1446 * 1) Disable some internal optimizations around SST file compression
1447 * 2) Reserve bottom-most level for ingested files only.
1448 * 3) Note that num_levels should be &gt;= 3 if this option is turned on.
7c673cae
FG
1449 *
1450 * DEFAULT: false
1451 *
494da23a
TL
1452 * @param allowIngestBehind true to allow ingest behind, false to disallow.
1453 *
1454 * @return the reference to the current options.
1455 */
1456 T setAllowIngestBehind(final boolean allowIngestBehind);
1457
1458 /**
1459 * Returns true if ingest behind is allowed.
1460 * See {@link #setAllowIngestBehind(boolean)}.
1461 *
1462 * @return true if ingest behind is allowed, false otherwise.
1463 */
1464 boolean allowIngestBehind();
1465
1466 /**
1467 * Needed to support differential snapshots.
1468 * If set to true then DB will only process deletes with sequence number
1469 * less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts).
1470 * Clients are responsible to periodically call this method to advance
1471 * the cutoff time. If this method is never called and preserve_deletes
1472 * is set to true NO deletes will ever be processed.
1473 * At the moment this only keeps normal deletes, SingleDeletes will
1474 * not be preserved.
1475 *
1476 * DEFAULT: false
1477 *
1478 * @param preserveDeletes true to preserve deletes.
1479 *
1480 * @return the reference to the current options.
1481 */
1482 T setPreserveDeletes(final boolean preserveDeletes);
1483
1484 /**
1485 * Returns true if deletes are preserved.
1486 * See {@link #setPreserveDeletes(boolean)}.
1487 *
1488 * @return true if deletes are preserved, false otherwise.
1489 */
1490 boolean preserveDeletes();
1491
1492 /**
1493 * If enabled it uses two queues for writes, one for the ones with
1494 * disable_memtable and one for the ones that also write to memtable. This
1495 * allows the memtable writes not to lag behind other writes. It can be used
1496 * to optimize MySQL 2PC in which only the commits, which are serial, write to
1497 * memtable.
1498 *
1499 * DEFAULT: false
7c673cae 1500 *
494da23a 1501 * @param twoWriteQueues true to enable two write queues, false otherwise.
7c673cae
FG
1502 *
1503 * @return the reference to the current options.
1504 */
494da23a
TL
1505 T setTwoWriteQueues(final boolean twoWriteQueues);
1506
1507 /**
1508 * Returns true if two write queues are enabled.
1509 *
1510 * @return true if two write queues are enabled, false otherwise.
1511 */
1512 boolean twoWriteQueues();
7c673cae
FG
1513
1514 /**
494da23a
TL
1515 * If true WAL is not flushed automatically after each write. Instead it
1516 * relies on manual invocation of FlushWAL to write the WAL buffer to its
1517 * file.
7c673cae
FG
1518 *
1519 * DEFAULT: false
1520 *
494da23a
TL
1521 * @param manualWalFlush true to set disable automatic WAL flushing,
1522 * false otherwise.
1523 *
1524 * @return the reference to the current options.
1525 */
1526 T setManualWalFlush(final boolean manualWalFlush);
1527
1528 /**
1529 * Returns true if automatic WAL flushing is disabled.
1530 * See {@link #setManualWalFlush(boolean)}.
1531 *
1532 * @return true if automatic WAL flushing is disabled, false otherwise.
1533 */
1534 boolean manualWalFlush();
1535
1536 /**
1537 * If true, RocksDB supports flushing multiple column families and committing
1538 * their results atomically to MANIFEST. Note that it is not
1539 * necessary to set atomic_flush to true if WAL is always enabled since WAL
1540 * allows the database to be restored to the last persistent state in WAL.
1541 * This option is useful when there are column families with writes NOT
1542 * protected by WAL.
1543 * For manual flush, application has to specify which column families to
1544 * flush atomically in {@link RocksDB#flush(FlushOptions, List)}.
1545 * For auto-triggered flush, RocksDB atomically flushes ALL column families.
1546 *
1547 * Currently, any WAL-enabled writes after atomic flush may be replayed
1548 * independently if the process crashes later and tries to recover.
1549 *
1550 * @param atomicFlush true to enable atomic flush of multiple column families.
1551 *
1552 * @return the reference to the current options.
1553 */
1554 T setAtomicFlush(final boolean atomicFlush);
1555
1556 /**
1557 * Determine if atomic flush of multiple column families is enabled.
1558 *
1559 * See {@link #setAtomicFlush(boolean)}.
7c673cae 1560 *
494da23a 1561 * @return true if atomic flush is enabled.
7c673cae 1562 */
494da23a 1563 boolean atomicFlush();
7c673cae 1564}