]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/advanced_options.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / include / rocksdb / advanced_options.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6// Use of this source code is governed by a BSD-style license that can be
7// found in the LICENSE file. See the AUTHORS file for names of contributors.
8
9#pragma once
10
11#include <memory>
12
13#include "rocksdb/memtablerep.h"
14#include "rocksdb/universal_compaction.h"
15
16namespace rocksdb {
17
18class Slice;
19class SliceTransform;
20enum CompressionType : unsigned char;
21class TablePropertiesCollectorFactory;
22class TableFactory;
23struct Options;
24
25enum CompactionStyle : char {
26 // level based compaction style
27 kCompactionStyleLevel = 0x0,
28 // Universal compaction style
29 // Not supported in ROCKSDB_LITE.
30 kCompactionStyleUniversal = 0x1,
31 // FIFO compaction style
32 // Not supported in ROCKSDB_LITE
33 kCompactionStyleFIFO = 0x2,
34 // Disable background compaction. Compaction jobs are submitted
35 // via CompactFiles().
36 // Not supported in ROCKSDB_LITE
37 kCompactionStyleNone = 0x3,
38};
39
11fdf7f2 40// In Level-based compaction, it Determines which file from a level to be
7c673cae
FG
41// picked to merge to the next level. We suggest people try
42// kMinOverlappingRatio first when you tune your database.
43enum CompactionPri : char {
11fdf7f2 44 // Slightly prioritize larger files by size compensated by #deletes
7c673cae
FG
45 kByCompensatedSize = 0x0,
46 // First compact files whose data's latest update time is oldest.
47 // Try this if you only update some hot keys in small ranges.
48 kOldestLargestSeqFirst = 0x1,
49 // First compact files whose range hasn't been compacted to the next level
50 // for the longest. If your updates are random across the key space,
51 // write amplification is slightly better with this option.
52 kOldestSmallestSeqFirst = 0x2,
53 // First compact files whose ratio between overlapping size in next level
54 // and its size is the smallest. It in many cases can optimize write
55 // amplification.
56 kMinOverlappingRatio = 0x3,
57};
58
59struct CompactionOptionsFIFO {
60 // once the total sum of table files reaches this, we will delete the oldest
61 // table file
62 // Default: 1GB
63 uint64_t max_table_files_size;
64
11fdf7f2
TL
65 // Drop files older than TTL. TTL based deletion will take precedence over
66 // size based deletion if ttl > 0.
67 // delete if sst_file_creation_time < (current_time - ttl)
68 // unit: seconds. Ex: 1 day = 1 * 24 * 60 * 60
69 // Default: 0 (disabled)
70 uint64_t ttl = 0;
71
72 // If true, try to do compaction to compact smaller files into larger ones.
73 // Minimum files to compact follows options.level0_file_num_compaction_trigger
74 // and compaction won't trigger if average compact bytes per del file is
75 // larger than options.write_buffer_size. This is to protect large files
76 // from being compacted again.
77 // Default: false;
78 bool allow_compaction = false;
79
7c673cae 80 CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
11fdf7f2
TL
81 CompactionOptionsFIFO(uint64_t _max_table_files_size, bool _allow_compaction,
82 uint64_t _ttl = 0)
83 : max_table_files_size(_max_table_files_size),
84 ttl(_ttl),
85 allow_compaction(_allow_compaction) {}
7c673cae
FG
86};
87
88// Compression options for different compression algorithms like Zlib
89struct CompressionOptions {
11fdf7f2
TL
90 // RocksDB's generic default compression level. Internally it'll be translated
91 // to the default compression level specific to the library being used (see
92 // comment above `ColumnFamilyOptions::compression`).
93 //
94 // The default value is the max 16-bit int as it'll be written out in OPTIONS
95 // file, which should be portable.
96 const static int kDefaultCompressionLevel = 32767;
97
7c673cae
FG
98 int window_bits;
99 int level;
100 int strategy;
11fdf7f2
TL
101
102 // Maximum size of dictionaries used to prime the compression library.
103 // Enabling dictionary can improve compression ratios when there are
104 // repetitions across data blocks.
105 //
106 // The dictionary is created by sampling the SST file data. If
107 // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
108 // dictionary generator. Otherwise, the random samples are used directly as
109 // the dictionary.
110 //
111 // When compression dictionary is disabled, we compress and write each block
112 // before buffering data for the next one. When compression dictionary is
113 // enabled, we buffer all SST file data in-memory so we can sample it, as data
114 // can only be compressed and written after the dictionary has been finalized.
115 // So users of this feature may see increased memory usage.
116 //
7c673cae
FG
117 // Default: 0.
118 uint32_t max_dict_bytes;
119
11fdf7f2
TL
120 // Maximum size of training data passed to zstd's dictionary trainer. Using
121 // zstd's dictionary trainer can achieve even better compression ratio
122 // improvements than using `max_dict_bytes` alone.
123 //
124 // The training data will be used to generate a dictionary of max_dict_bytes.
125 //
126 // Default: 0.
127 uint32_t zstd_max_train_bytes;
128
129 // When the compression options are set by the user, it will be set to "true".
130 // For bottommost_compression_opts, to enable it, user must set enabled=true.
131 // Otherwise, bottommost compression will use compression_opts as default
132 // compression options.
133 //
134 // For compression_opts, if compression_opts.enabled=false, it is still
135 // used as compression options for compression process.
136 //
137 // Default: false.
138 bool enabled;
139
7c673cae 140 CompressionOptions()
11fdf7f2
TL
141 : window_bits(-14),
142 level(kDefaultCompressionLevel),
143 strategy(0),
144 max_dict_bytes(0),
145 zstd_max_train_bytes(0),
146 enabled(false) {}
147 CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes,
148 int _zstd_max_train_bytes, bool _enabled)
7c673cae
FG
149 : window_bits(wbits),
150 level(_lev),
151 strategy(_strategy),
11fdf7f2
TL
152 max_dict_bytes(_max_dict_bytes),
153 zstd_max_train_bytes(_zstd_max_train_bytes),
154 enabled(_enabled) {}
7c673cae
FG
155};
156
157enum UpdateStatus { // Return status For inplace update callback
158 UPDATE_FAILED = 0, // Nothing to update
159 UPDATED_INPLACE = 1, // Value updated inplace
160 UPDATED = 2, // No inplace update. Merged value set
161};
162
163
164struct AdvancedColumnFamilyOptions {
165 // The maximum number of write buffers that are built up in memory.
166 // The default and the minimum number is 2, so that when 1 write buffer
167 // is being flushed to storage, new writes can continue to the other
168 // write buffer.
169 // If max_write_buffer_number > 3, writing will be slowed down to
170 // options.delayed_write_rate if we are writing to the last write buffer
171 // allowed.
172 //
173 // Default: 2
174 //
175 // Dynamically changeable through SetOptions() API
176 int max_write_buffer_number = 2;
177
178 // The minimum number of write buffers that will be merged together
179 // before writing to storage. If set to 1, then
180 // all write buffers are flushed to L0 as individual files and this increases
181 // read amplification because a get request has to check in all of these
182 // files. Also, an in-memory merge may result in writing lesser
183 // data to storage if there are duplicate records in each of these
184 // individual write buffers. Default: 1
185 int min_write_buffer_number_to_merge = 1;
186
187 // The total maximum number of write buffers to maintain in memory including
188 // copies of buffers that have already been flushed. Unlike
189 // max_write_buffer_number, this parameter does not affect flushing.
190 // This controls the minimum amount of write history that will be available
191 // in memory for conflict checking when Transactions are used.
192 //
193 // When using an OptimisticTransactionDB:
194 // If this value is too low, some transactions may fail at commit time due
195 // to not being able to determine whether there were any write conflicts.
196 //
197 // When using a TransactionDB:
198 // If Transaction::SetSnapshot is used, TransactionDB will read either
199 // in-memory write buffers or SST files to do write-conflict checking.
200 // Increasing this value can reduce the number of reads to SST files
201 // done for conflict detection.
202 //
203 // Setting this value to 0 will cause write buffers to be freed immediately
204 // after they are flushed.
205 // If this value is set to -1, 'max_write_buffer_number' will be used.
206 //
207 // Default:
208 // If using a TransactionDB/OptimisticTransactionDB, the default value will
209 // be set to the value of 'max_write_buffer_number' if it is not explicitly
210 // set by the user. Otherwise, the default is 0.
211 int max_write_buffer_number_to_maintain = 0;
212
213 // Allows thread-safe inplace updates. If this is true, there is no way to
214 // achieve point-in-time consistency using snapshot or iterator (assuming
215 // concurrent updates). Hence iterator and multi-get will return results
216 // which are not consistent as of any point-in-time.
217 // If inplace_callback function is not set,
218 // Put(key, new_value) will update inplace the existing_value iff
219 // * key exists in current memtable
220 // * new sizeof(new_value) <= sizeof(existing_value)
221 // * existing_value for that key is a put i.e. kTypeValue
222 // If inplace_callback function is set, check doc for inplace_callback.
223 // Default: false.
224 bool inplace_update_support = false;
225
226 // Number of locks used for inplace update
227 // Default: 10000, if inplace_update_support = true, else 0.
228 //
229 // Dynamically changeable through SetOptions() API
230 size_t inplace_update_num_locks = 10000;
231
232 // existing_value - pointer to previous value (from both memtable and sst).
233 // nullptr if key doesn't exist
234 // existing_value_size - pointer to size of existing_value).
235 // nullptr if key doesn't exist
236 // delta_value - Delta value to be merged with the existing_value.
237 // Stored in transaction logs.
238 // merged_value - Set when delta is applied on the previous value.
239
240 // Applicable only when inplace_update_support is true,
241 // this callback function is called at the time of updating the memtable
242 // as part of a Put operation, lets say Put(key, delta_value). It allows the
243 // 'delta_value' specified as part of the Put operation to be merged with
244 // an 'existing_value' of the key in the database.
245
246 // If the merged value is smaller in size that the 'existing_value',
247 // then this function can update the 'existing_value' buffer inplace and
248 // the corresponding 'existing_value'_size pointer, if it wishes to.
249 // The callback should return UpdateStatus::UPDATED_INPLACE.
250 // In this case. (In this case, the snapshot-semantics of the rocksdb
251 // Iterator is not atomic anymore).
252
253 // If the merged value is larger in size than the 'existing_value' or the
254 // application does not wish to modify the 'existing_value' buffer inplace,
255 // then the merged value should be returned via *merge_value. It is set by
256 // merging the 'existing_value' and the Put 'delta_value'. The callback should
257 // return UpdateStatus::UPDATED in this case. This merged value will be added
258 // to the memtable.
259
260 // If merging fails or the application does not wish to take any action,
261 // then the callback should return UpdateStatus::UPDATE_FAILED.
262
263 // Please remember that the original call from the application is Put(key,
264 // delta_value). So the transaction log (if enabled) will still contain (key,
265 // delta_value). The 'merged_value' is not stored in the transaction log.
266 // Hence the inplace_callback function should be consistent across db reopens.
267
268 // Default: nullptr
269 UpdateStatus (*inplace_callback)(char* existing_value,
270 uint32_t* existing_value_size,
271 Slice delta_value,
272 std::string* merged_value) = nullptr;
273
274 // if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0,
275 // create prefix bloom for memtable with the size of
276 // write_buffer_size * memtable_prefix_bloom_size_ratio.
11fdf7f2 277 // If it is larger than 0.25, it is sanitized to 0.25.
7c673cae
FG
278 //
279 // Default: 0 (disable)
280 //
281 // Dynamically changeable through SetOptions() API
282 double memtable_prefix_bloom_size_ratio = 0.0;
283
284 // Page size for huge page for the arena used by the memtable. If <=0, it
285 // won't allocate from huge page but from malloc.
286 // Users are responsible to reserve huge pages for it to be allocated. For
287 // example:
288 // sysctl -w vm.nr_hugepages=20
289 // See linux doc Documentation/vm/hugetlbpage.txt
290 // If there isn't enough free huge page available, it will fall back to
291 // malloc.
292 //
293 // Dynamically changeable through SetOptions() API
294 size_t memtable_huge_page_size = 0;
295
296 // If non-nullptr, memtable will use the specified function to extract
297 // prefixes for keys, and for each prefix maintain a hint of insert location
298 // to reduce CPU usage for inserting keys with the prefix. Keys out of
299 // domain of the prefix extractor will be insert without using hints.
300 //
301 // Currently only the default skiplist based memtable implements the feature.
302 // All other memtable implementation will ignore the option. It incurs ~250
303 // additional bytes of memory overhead to store a hint for each prefix.
304 // Also concurrent writes (when allow_concurrent_memtable_write is true) will
305 // ignore the option.
306 //
307 // The option is best suited for workloads where keys will likely to insert
11fdf7f2 308 // to a location close the last inserted key with the same prefix.
7c673cae
FG
309 // One example could be inserting keys of the form (prefix + timestamp),
310 // and keys of the same prefix always comes in with time order. Another
311 // example would be updating the same key over and over again, in which case
312 // the prefix can be the key itself.
313 //
314 // Default: nullptr (disable)
315 std::shared_ptr<const SliceTransform>
316 memtable_insert_with_hint_prefix_extractor = nullptr;
317
318 // Control locality of bloom filter probes to improve cache miss rate.
319 // This option only applies to memtable prefix bloom and plaintable
320 // prefix bloom. It essentially limits every bloom checking to one cache line.
321 // This optimization is turned off when set to 0, and positive number to turn
322 // it on.
323 // Default: 0
324 uint32_t bloom_locality = 0;
325
326 // size of one block in arena memory allocation.
327 // If <= 0, a proper value is automatically calculated (usually 1/8 of
328 // writer_buffer_size, rounded up to a multiple of 4KB).
329 //
11fdf7f2 330 // There are two additional restriction of the specified size:
7c673cae
FG
331 // (1) size should be in the range of [4096, 2 << 30] and
332 // (2) be the multiple of the CPU word (which helps with the memory
333 // alignment).
334 //
335 // We'll automatically check and adjust the size number to make sure it
336 // conforms to the restrictions.
337 //
338 // Default: 0
339 //
340 // Dynamically changeable through SetOptions() API
341 size_t arena_block_size = 0;
342
343 // Different levels can have different compression policies. There
344 // are cases where most lower levels would like to use quick compression
345 // algorithms while the higher levels (which have more data) use
346 // compression algorithms that have better compression but could
347 // be slower. This array, if non-empty, should have an entry for
348 // each level of the database; these override the value specified in
349 // the previous field 'compression'.
350 //
351 // NOTICE if level_compaction_dynamic_level_bytes=true,
352 // compression_per_level[0] still determines L0, but other elements
353 // of the array are based on base level (the level L0 files are merged
354 // to), and may not match the level users see from info log for metadata.
355 // If L0 files are merged to level-n, then, for i>0, compression_per_level[i]
356 // determines compaction type for level n+i-1.
357 // For example, if we have three 5 levels, and we determine to merge L0
358 // data to L4 (which means L1..L3 will be empty), then the new files go to
359 // L4 uses compression type compression_per_level[1].
360 // If now L0 is merged to L2. Data goes to L2 will be compressed
361 // according to compression_per_level[1], L3 using compression_per_level[2]
362 // and L4 using compression_per_level[3]. Compaction for each level can
363 // change when data grows.
364 std::vector<CompressionType> compression_per_level;
365
366 // Number of levels for this database
367 int num_levels = 7;
368
369 // Soft limit on number of level-0 files. We start slowing down writes at this
370 // point. A value <0 means that no writing slow down will be triggered by
371 // number of files in level-0.
372 //
373 // Default: 20
374 //
375 // Dynamically changeable through SetOptions() API
376 int level0_slowdown_writes_trigger = 20;
377
378 // Maximum number of level-0 files. We stop writes at this point.
379 //
380 // Default: 36
381 //
382 // Dynamically changeable through SetOptions() API
383 int level0_stop_writes_trigger = 36;
384
385 // Target file size for compaction.
386 // target_file_size_base is per-file size for level-1.
387 // Target file size for level L can be calculated by
388 // target_file_size_base * (target_file_size_multiplier ^ (L-1))
389 // For example, if target_file_size_base is 2MB and
390 // target_file_size_multiplier is 10, then each file on level-1 will
391 // be 2MB, and each file on level 2 will be 20MB,
392 // and each file on level-3 will be 200MB.
393 //
394 // Default: 64MB.
395 //
396 // Dynamically changeable through SetOptions() API
397 uint64_t target_file_size_base = 64 * 1048576;
398
399 // By default target_file_size_multiplier is 1, which means
400 // by default files in different levels will have similar size.
401 //
402 // Dynamically changeable through SetOptions() API
403 int target_file_size_multiplier = 1;
404
405 // If true, RocksDB will pick target size of each level dynamically.
406 // We will pick a base level b >= 1. L0 will be directly merged into level b,
407 // instead of always into level 1. Level 1 to b-1 need to be empty.
408 // We try to pick b and its target size so that
409 // 1. target size is in the range of
410 // (max_bytes_for_level_base / max_bytes_for_level_multiplier,
411 // max_bytes_for_level_base]
412 // 2. target size of the last level (level num_levels-1) equals to extra size
413 // of the level.
414 // At the same time max_bytes_for_level_multiplier and
415 // max_bytes_for_level_multiplier_additional are still satisfied.
416 //
417 // With this option on, from an empty DB, we make last level the base level,
418 // which means merging L0 data into the last level, until it exceeds
419 // max_bytes_for_level_base. And then we make the second last level to be
420 // base level, to start to merge L0 data to second last level, with its
421 // target size to be 1/max_bytes_for_level_multiplier of the last level's
422 // extra size. After the data accumulates more so that we need to move the
423 // base level to the third last one, and so on.
424 //
425 // For example, assume max_bytes_for_level_multiplier=10, num_levels=6,
426 // and max_bytes_for_level_base=10MB.
427 // Target sizes of level 1 to 5 starts with:
428 // [- - - - 10MB]
429 // with base level is level. Target sizes of level 1 to 4 are not applicable
430 // because they will not be used.
431 // Until the size of Level 5 grows to more than 10MB, say 11MB, we make
432 // base target to level 4 and now the targets looks like:
433 // [- - - 1.1MB 11MB]
434 // While data are accumulated, size targets are tuned based on actual data
435 // of level 5. When level 5 has 50MB of data, the target is like:
436 // [- - - 5MB 50MB]
437 // Until level 5's actual size is more than 100MB, say 101MB. Now if we keep
438 // level 4 to be the base level, its target size needs to be 10.1MB, which
439 // doesn't satisfy the target size range. So now we make level 3 the target
440 // size and the target sizes of the levels look like:
441 // [- - 1.01MB 10.1MB 101MB]
442 // In the same way, while level 5 further grows, all levels' targets grow,
443 // like
444 // [- - 5MB 50MB 500MB]
445 // Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
446 // base level and make levels' target sizes like this:
447 // [- 1.001MB 10.01MB 100.1MB 1001MB]
448 // and go on...
449 //
450 // By doing it, we give max_bytes_for_level_multiplier a priority against
451 // max_bytes_for_level_base, for a more predictable LSM tree shape. It is
452 // useful to limit worse case space amplification.
453 //
454 // max_bytes_for_level_multiplier_additional is ignored with this flag on.
455 //
456 // Turning this feature on or off for an existing DB can cause unexpected
457 // LSM tree structure so it's not recommended.
458 //
7c673cae
FG
459 // Default: false
460 bool level_compaction_dynamic_level_bytes = false;
461
462 // Default: 10.
463 //
464 // Dynamically changeable through SetOptions() API
465 double max_bytes_for_level_multiplier = 10;
466
467 // Different max-size multipliers for different levels.
468 // These are multiplied by max_bytes_for_level_multiplier to arrive
469 // at the max-size of each level.
470 //
471 // Default: 1
472 //
473 // Dynamically changeable through SetOptions() API
474 std::vector<int> max_bytes_for_level_multiplier_additional =
475 std::vector<int>(num_levels, 1);
476
477 // We try to limit number of bytes in one compaction to be lower than this
478 // threshold. But it's not guaranteed.
479 // Value 0 will be sanitized.
480 //
481 // Default: result.target_file_size_base * 25
482 uint64_t max_compaction_bytes = 0;
483
484 // All writes will be slowed down to at least delayed_write_rate if estimated
485 // bytes needed to be compaction exceed this threshold.
486 //
487 // Default: 64GB
488 uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull;
489
490 // All writes are stopped if estimated bytes needed to be compaction exceed
491 // this threshold.
492 //
493 // Default: 256GB
494 uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull;
495
496 // The compaction style. Default: kCompactionStyleLevel
497 CompactionStyle compaction_style = kCompactionStyleLevel;
498
499 // If level compaction_style = kCompactionStyleLevel, for each level,
500 // which files are prioritized to be picked to compact.
501 // Default: kByCompensatedSize
502 CompactionPri compaction_pri = kByCompensatedSize;
503
504 // The options needed to support Universal Style compactions
505 CompactionOptionsUniversal compaction_options_universal;
506
507 // The options for FIFO compaction style
11fdf7f2
TL
508 //
509 // Dynamically changeable through SetOptions() API
510 // Dynamic change example:
511 // SetOption("compaction_options_fifo", "{max_table_files_size=100;ttl=2;}")
7c673cae
FG
512 CompactionOptionsFIFO compaction_options_fifo;
513
514 // An iteration->Next() sequentially skips over keys with the same
515 // user-key unless this option is set. This number specifies the number
516 // of keys (with the same userkey) that will be sequentially
517 // skipped before a reseek is issued.
518 //
519 // Default: 8
520 //
521 // Dynamically changeable through SetOptions() API
522 uint64_t max_sequential_skip_in_iterations = 8;
523
524 // This is a factory that provides MemTableRep objects.
525 // Default: a factory that provides a skip-list-based implementation of
526 // MemTableRep.
527 std::shared_ptr<MemTableRepFactory> memtable_factory =
528 std::shared_ptr<SkipListFactory>(new SkipListFactory);
529
530 // Block-based table related options are moved to BlockBasedTableOptions.
531 // Related options that were originally here but now moved include:
532 // no_block_cache
533 // block_cache
534 // block_cache_compressed
535 // block_size
536 // block_size_deviation
537 // block_restart_interval
538 // filter_policy
539 // whole_key_filtering
540 // If you'd like to customize some of these options, you will need to
541 // use NewBlockBasedTableFactory() to construct a new table factory.
542
543 // This option allows user to collect their own interested statistics of
544 // the tables.
545 // Default: empty vector -- no user-defined statistics collection will be
546 // performed.
547 typedef std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
548 TablePropertiesCollectorFactories;
549 TablePropertiesCollectorFactories table_properties_collector_factories;
550
551 // Maximum number of successive merge operations on a key in the memtable.
552 //
553 // When a merge operation is added to the memtable and the maximum number of
554 // successive merges is reached, the value of the key will be calculated and
555 // inserted into the memtable instead of the merge operation. This will
556 // ensure that there are never more than max_successive_merges merge
557 // operations in the memtable.
558 //
559 // Default: 0 (disabled)
560 //
561 // Dynamically changeable through SetOptions() API
562 size_t max_successive_merges = 0;
563
564 // This flag specifies that the implementation should optimize the filters
565 // mainly for cases where keys are found rather than also optimize for keys
566 // missed. This would be used in cases where the application knows that
567 // there are very few misses or the performance in the case of misses is not
568 // important.
569 //
570 // For now, this flag allows us to not store filters for the last level i.e
571 // the largest level which contains data of the LSM store. For keys which
572 // are hits, the filters in this level are not useful because we will search
573 // for the data anyway. NOTE: the filters in other levels are still useful
574 // even for key hit because they tell us whether to look in that level or go
575 // to the higher level.
576 //
577 // Default: false
578 bool optimize_filters_for_hits = false;
579
580 // After writing every SST file, reopen it and read all the keys.
581 // Default: false
582 bool paranoid_file_checks = false;
583
11fdf7f2 584 // In debug mode, RocksDB run consistency checks on the LSM every time the LSM
7c673cae
FG
585 // change (Flush, Compaction, AddFile). These checks are disabled in release
586 // mode, use this option to enable them in release mode as well.
587 // Default: false
588 bool force_consistency_checks = false;
589
590 // Measure IO stats in compactions and flushes, if true.
591 // Default: false
592 bool report_bg_io_stats = false;
593
11fdf7f2
TL
594 // Non-bottom-level files older than TTL will go through the compaction
595 // process. This needs max_open_files to be set to -1.
596 // Enabled only for level compaction for now.
597 //
598 // Default: 0 (disabled)
599 //
600 // Dynamically changeable through SetOptions() API
601 uint64_t ttl = 0;
602
7c673cae
FG
603 // Create ColumnFamilyOptions with default values for all fields
604 AdvancedColumnFamilyOptions();
605 // Create ColumnFamilyOptions from Options
606 explicit AdvancedColumnFamilyOptions(const Options& options);
607
608 // ---------------- OPTIONS NOT SUPPORTED ANYMORE ----------------
609
610 // NOT SUPPORTED ANYMORE
611 // This does not do anything anymore.
612 int max_mem_compaction_level;
613
614 // NOT SUPPORTED ANYMORE -- this options is no longer used
615 // Puts are delayed to options.delayed_write_rate when any level has a
616 // compaction score that exceeds soft_rate_limit. This is ignored when == 0.0.
617 //
618 // Default: 0 (disabled)
619 //
620 // Dynamically changeable through SetOptions() API
621 double soft_rate_limit = 0.0;
622
623 // NOT SUPPORTED ANYMORE -- this options is no longer used
624 double hard_rate_limit = 0.0;
625
626 // NOT SUPPORTED ANYMORE -- this options is no longer used
627 unsigned int rate_limit_delay_max_milliseconds = 100;
628
629 // NOT SUPPORTED ANYMORE
630 // Does not have any effect.
631 bool purge_redundant_kvs_while_flush = true;
632};
633
634} // namespace rocksdb