]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/options/options.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / options / options.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "rocksdb/options.h"
11
f67539c2 12#include <cinttypes>
7c673cae
FG
13#include <limits>
14
15#include "monitoring/statistics.h"
16#include "options/db_options.h"
17#include "options/options_helper.h"
18#include "rocksdb/cache.h"
19#include "rocksdb/compaction_filter.h"
20#include "rocksdb/comparator.h"
21#include "rocksdb/env.h"
22#include "rocksdb/memtablerep.h"
23#include "rocksdb/merge_operator.h"
24#include "rocksdb/slice.h"
25#include "rocksdb/slice_transform.h"
26#include "rocksdb/sst_file_manager.h"
20effc67 27#include "rocksdb/sst_partitioner.h"
7c673cae
FG
28#include "rocksdb/table.h"
29#include "rocksdb/table_properties.h"
30#include "rocksdb/wal_filter.h"
f67539c2 31#include "table/block_based/block_based_table_factory.h"
7c673cae
FG
32#include "util/compression.h"
33
f67539c2 34namespace ROCKSDB_NAMESPACE {
7c673cae
FG
35
36AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
37 assert(memtable_factory.get() != nullptr);
38}
39
40AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
41 : max_write_buffer_number(options.max_write_buffer_number),
42 min_write_buffer_number_to_merge(
43 options.min_write_buffer_number_to_merge),
44 max_write_buffer_number_to_maintain(
45 options.max_write_buffer_number_to_maintain),
f67539c2
TL
46 max_write_buffer_size_to_maintain(
47 options.max_write_buffer_size_to_maintain),
7c673cae
FG
48 inplace_update_support(options.inplace_update_support),
49 inplace_update_num_locks(options.inplace_update_num_locks),
50 inplace_callback(options.inplace_callback),
51 memtable_prefix_bloom_size_ratio(
52 options.memtable_prefix_bloom_size_ratio),
494da23a 53 memtable_whole_key_filtering(options.memtable_whole_key_filtering),
7c673cae
FG
54 memtable_huge_page_size(options.memtable_huge_page_size),
55 memtable_insert_with_hint_prefix_extractor(
56 options.memtable_insert_with_hint_prefix_extractor),
57 bloom_locality(options.bloom_locality),
58 arena_block_size(options.arena_block_size),
59 compression_per_level(options.compression_per_level),
60 num_levels(options.num_levels),
61 level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
62 level0_stop_writes_trigger(options.level0_stop_writes_trigger),
63 target_file_size_base(options.target_file_size_base),
64 target_file_size_multiplier(options.target_file_size_multiplier),
65 level_compaction_dynamic_level_bytes(
66 options.level_compaction_dynamic_level_bytes),
67 max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
68 max_bytes_for_level_multiplier_additional(
69 options.max_bytes_for_level_multiplier_additional),
70 max_compaction_bytes(options.max_compaction_bytes),
71 soft_pending_compaction_bytes_limit(
72 options.soft_pending_compaction_bytes_limit),
73 hard_pending_compaction_bytes_limit(
74 options.hard_pending_compaction_bytes_limit),
75 compaction_style(options.compaction_style),
76 compaction_pri(options.compaction_pri),
77 compaction_options_universal(options.compaction_options_universal),
78 compaction_options_fifo(options.compaction_options_fifo),
79 max_sequential_skip_in_iterations(
80 options.max_sequential_skip_in_iterations),
81 memtable_factory(options.memtable_factory),
82 table_properties_collector_factories(
83 options.table_properties_collector_factories),
84 max_successive_merges(options.max_successive_merges),
85 optimize_filters_for_hits(options.optimize_filters_for_hits),
86 paranoid_file_checks(options.paranoid_file_checks),
87 force_consistency_checks(options.force_consistency_checks),
11fdf7f2 88 report_bg_io_stats(options.report_bg_io_stats),
494da23a 89 ttl(options.ttl),
f67539c2 90 periodic_compaction_seconds(options.periodic_compaction_seconds),
20effc67
TL
91 sample_for_compression(options.sample_for_compression),
92 enable_blob_files(options.enable_blob_files),
93 min_blob_size(options.min_blob_size),
94 blob_file_size(options.blob_file_size),
95 blob_compression_type(options.blob_compression_type),
96 enable_blob_garbage_collection(options.enable_blob_garbage_collection),
97 blob_garbage_collection_age_cutoff(
98 options.blob_garbage_collection_age_cutoff) {
7c673cae
FG
99 assert(memtable_factory.get() != nullptr);
100 if (max_bytes_for_level_multiplier_additional.size() <
101 static_cast<unsigned int>(num_levels)) {
102 max_bytes_for_level_multiplier_additional.resize(num_levels, 1);
103 }
104}
105
106ColumnFamilyOptions::ColumnFamilyOptions()
107 : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
108 table_factory(
109 std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
110
111ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
11fdf7f2 112 : ColumnFamilyOptions(*static_cast<const ColumnFamilyOptions*>(&options)) {}
7c673cae
FG
113
114DBOptions::DBOptions() {}
7c673cae 115DBOptions::DBOptions(const Options& options)
11fdf7f2 116 : DBOptions(*static_cast<const DBOptions*>(&options)) {}
7c673cae
FG
117
118void DBOptions::Dump(Logger* log) const {
119 ImmutableDBOptions(*this).Dump(log);
120 MutableDBOptions(*this).Dump(log);
121} // DBOptions::Dump
122
123void ColumnFamilyOptions::Dump(Logger* log) const {
124 ROCKS_LOG_HEADER(log, " Options.comparator: %s",
125 comparator->Name());
126 ROCKS_LOG_HEADER(log, " Options.merge_operator: %s",
127 merge_operator ? merge_operator->Name() : "None");
128 ROCKS_LOG_HEADER(log, " Options.compaction_filter: %s",
129 compaction_filter ? compaction_filter->Name() : "None");
130 ROCKS_LOG_HEADER(
131 log, " Options.compaction_filter_factory: %s",
132 compaction_filter_factory ? compaction_filter_factory->Name() : "None");
20effc67
TL
133 ROCKS_LOG_HEADER(
134 log, " Options.sst_partitioner_factory: %s",
135 sst_partitioner_factory ? sst_partitioner_factory->Name() : "None");
7c673cae
FG
136 ROCKS_LOG_HEADER(log, " Options.memtable_factory: %s",
137 memtable_factory->Name());
138 ROCKS_LOG_HEADER(log, " Options.table_factory: %s",
139 table_factory->Name());
140 ROCKS_LOG_HEADER(log, " table_factory options: %s",
20effc67 141 table_factory->GetPrintableOptions().c_str());
7c673cae
FG
142 ROCKS_LOG_HEADER(log, " Options.write_buffer_size: %" ROCKSDB_PRIszt,
143 write_buffer_size);
144 ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number: %d",
145 max_write_buffer_number);
146 if (!compression_per_level.empty()) {
147 for (unsigned int i = 0; i < compression_per_level.size(); i++) {
148 ROCKS_LOG_HEADER(
149 log, " Options.compression[%d]: %s", i,
150 CompressionTypeToString(compression_per_level[i]).c_str());
151 }
152 } else {
153 ROCKS_LOG_HEADER(log, " Options.compression: %s",
154 CompressionTypeToString(compression).c_str());
155 }
156 ROCKS_LOG_HEADER(
157 log, " Options.bottommost_compression: %s",
158 bottommost_compression == kDisableCompressionOption
159 ? "Disabled"
160 : CompressionTypeToString(bottommost_compression).c_str());
161 ROCKS_LOG_HEADER(
162 log, " Options.prefix_extractor: %s",
163 prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name());
164 ROCKS_LOG_HEADER(log,
165 " Options.memtable_insert_with_hint_prefix_extractor: %s",
166 memtable_insert_with_hint_prefix_extractor == nullptr
167 ? "nullptr"
168 : memtable_insert_with_hint_prefix_extractor->Name());
169 ROCKS_LOG_HEADER(log, " Options.num_levels: %d", num_levels);
170 ROCKS_LOG_HEADER(log, " Options.min_write_buffer_number_to_merge: %d",
171 min_write_buffer_number_to_merge);
172 ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number_to_maintain: %d",
173 max_write_buffer_number_to_maintain);
f67539c2
TL
174 ROCKS_LOG_HEADER(log,
175 " Options.max_write_buffer_size_to_maintain: %" PRIu64,
176 max_write_buffer_size_to_maintain);
11fdf7f2
TL
177 ROCKS_LOG_HEADER(
178 log, " Options.bottommost_compression_opts.window_bits: %d",
179 bottommost_compression_opts.window_bits);
180 ROCKS_LOG_HEADER(
181 log, " Options.bottommost_compression_opts.level: %d",
182 bottommost_compression_opts.level);
183 ROCKS_LOG_HEADER(
184 log, " Options.bottommost_compression_opts.strategy: %d",
185 bottommost_compression_opts.strategy);
186 ROCKS_LOG_HEADER(
187 log,
188 " Options.bottommost_compression_opts.max_dict_bytes: "
494da23a 189 "%" PRIu32,
11fdf7f2
TL
190 bottommost_compression_opts.max_dict_bytes);
191 ROCKS_LOG_HEADER(
192 log,
193 " Options.bottommost_compression_opts.zstd_max_train_bytes: "
494da23a 194 "%" PRIu32,
11fdf7f2 195 bottommost_compression_opts.zstd_max_train_bytes);
20effc67
TL
196 ROCKS_LOG_HEADER(
197 log,
198 " Options.bottommost_compression_opts.parallel_threads: "
199 "%" PRIu32,
200 bottommost_compression_opts.parallel_threads);
11fdf7f2
TL
201 ROCKS_LOG_HEADER(
202 log, " Options.bottommost_compression_opts.enabled: %s",
203 bottommost_compression_opts.enabled ? "true" : "false");
7c673cae
FG
204 ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d",
205 compression_opts.window_bits);
206 ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d",
207 compression_opts.level);
208 ROCKS_LOG_HEADER(log, " Options.compression_opts.strategy: %d",
209 compression_opts.strategy);
210 ROCKS_LOG_HEADER(
211 log,
494da23a 212 " Options.compression_opts.max_dict_bytes: %" PRIu32,
7c673cae 213 compression_opts.max_dict_bytes);
11fdf7f2
TL
214 ROCKS_LOG_HEADER(log,
215 " Options.compression_opts.zstd_max_train_bytes: "
494da23a 216 "%" PRIu32,
11fdf7f2 217 compression_opts.zstd_max_train_bytes);
20effc67
TL
218 ROCKS_LOG_HEADER(log,
219 " Options.compression_opts.parallel_threads: "
220 "%" PRIu32,
221 compression_opts.parallel_threads);
11fdf7f2
TL
222 ROCKS_LOG_HEADER(log,
223 " Options.compression_opts.enabled: %s",
224 compression_opts.enabled ? "true" : "false");
7c673cae
FG
225 ROCKS_LOG_HEADER(log, " Options.level0_file_num_compaction_trigger: %d",
226 level0_file_num_compaction_trigger);
227 ROCKS_LOG_HEADER(log, " Options.level0_slowdown_writes_trigger: %d",
228 level0_slowdown_writes_trigger);
229 ROCKS_LOG_HEADER(log, " Options.level0_stop_writes_trigger: %d",
230 level0_stop_writes_trigger);
231 ROCKS_LOG_HEADER(
232 log, " Options.target_file_size_base: %" PRIu64,
233 target_file_size_base);
234 ROCKS_LOG_HEADER(log, " Options.target_file_size_multiplier: %d",
235 target_file_size_multiplier);
236 ROCKS_LOG_HEADER(
237 log, " Options.max_bytes_for_level_base: %" PRIu64,
238 max_bytes_for_level_base);
239 ROCKS_LOG_HEADER(log, "Options.level_compaction_dynamic_level_bytes: %d",
240 level_compaction_dynamic_level_bytes);
241 ROCKS_LOG_HEADER(log, " Options.max_bytes_for_level_multiplier: %f",
242 max_bytes_for_level_multiplier);
243 for (size_t i = 0; i < max_bytes_for_level_multiplier_additional.size();
244 i++) {
245 ROCKS_LOG_HEADER(
246 log, "Options.max_bytes_for_level_multiplier_addtl[%" ROCKSDB_PRIszt
247 "]: %d",
248 i, max_bytes_for_level_multiplier_additional[i]);
249 }
250 ROCKS_LOG_HEADER(
251 log, " Options.max_sequential_skip_in_iterations: %" PRIu64,
252 max_sequential_skip_in_iterations);
253 ROCKS_LOG_HEADER(
254 log, " Options.max_compaction_bytes: %" PRIu64,
255 max_compaction_bytes);
256 ROCKS_LOG_HEADER(
257 log,
258 " Options.arena_block_size: %" ROCKSDB_PRIszt,
259 arena_block_size);
260 ROCKS_LOG_HEADER(log,
261 " Options.soft_pending_compaction_bytes_limit: %" PRIu64,
262 soft_pending_compaction_bytes_limit);
263 ROCKS_LOG_HEADER(log,
264 " Options.hard_pending_compaction_bytes_limit: %" PRIu64,
265 hard_pending_compaction_bytes_limit);
266 ROCKS_LOG_HEADER(log, " Options.rate_limit_delay_max_milliseconds: %u",
267 rate_limit_delay_max_milliseconds);
268 ROCKS_LOG_HEADER(log, " Options.disable_auto_compactions: %d",
269 disable_auto_compactions);
270
271 const auto& it_compaction_style =
272 compaction_style_to_string.find(compaction_style);
273 std::string str_compaction_style;
274 if (it_compaction_style == compaction_style_to_string.end()) {
275 assert(false);
276 str_compaction_style = "unknown_" + std::to_string(compaction_style);
277 } else {
278 str_compaction_style = it_compaction_style->second;
279 }
280 ROCKS_LOG_HEADER(log,
11fdf7f2 281 " Options.compaction_style: %s",
7c673cae
FG
282 str_compaction_style.c_str());
283
284 const auto& it_compaction_pri =
285 compaction_pri_to_string.find(compaction_pri);
286 std::string str_compaction_pri;
287 if (it_compaction_pri == compaction_pri_to_string.end()) {
288 assert(false);
289 str_compaction_pri = "unknown_" + std::to_string(compaction_pri);
290 } else {
291 str_compaction_pri = it_compaction_pri->second;
292 }
293 ROCKS_LOG_HEADER(log,
11fdf7f2 294 " Options.compaction_pri: %s",
7c673cae
FG
295 str_compaction_pri.c_str());
296 ROCKS_LOG_HEADER(log,
11fdf7f2 297 "Options.compaction_options_universal.size_ratio: %u",
7c673cae
FG
298 compaction_options_universal.size_ratio);
299 ROCKS_LOG_HEADER(log,
300 "Options.compaction_options_universal.min_merge_width: %u",
301 compaction_options_universal.min_merge_width);
302 ROCKS_LOG_HEADER(log,
303 "Options.compaction_options_universal.max_merge_width: %u",
304 compaction_options_universal.max_merge_width);
305 ROCKS_LOG_HEADER(
306 log,
307 "Options.compaction_options_universal."
308 "max_size_amplification_percent: %u",
309 compaction_options_universal.max_size_amplification_percent);
310 ROCKS_LOG_HEADER(
311 log,
312 "Options.compaction_options_universal.compression_size_percent: %d",
313 compaction_options_universal.compression_size_percent);
11fdf7f2
TL
314 const auto& it_compaction_stop_style = compaction_stop_style_to_string.find(
315 compaction_options_universal.stop_style);
316 std::string str_compaction_stop_style;
317 if (it_compaction_stop_style == compaction_stop_style_to_string.end()) {
318 assert(false);
319 str_compaction_stop_style =
320 "unknown_" + std::to_string(compaction_options_universal.stop_style);
321 } else {
322 str_compaction_stop_style = it_compaction_stop_style->second;
323 }
324 ROCKS_LOG_HEADER(log,
325 "Options.compaction_options_universal.stop_style: %s",
326 str_compaction_stop_style.c_str());
7c673cae
FG
327 ROCKS_LOG_HEADER(
328 log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64,
329 compaction_options_fifo.max_table_files_size);
11fdf7f2
TL
330 ROCKS_LOG_HEADER(log,
331 "Options.compaction_options_fifo.allow_compaction: %d",
332 compaction_options_fifo.allow_compaction);
20effc67 333 std::ostringstream collector_info;
7c673cae 334 for (const auto& collector_factory : table_properties_collector_factories) {
20effc67 335 collector_info << collector_factory->ToString() << ';';
7c673cae
FG
336 }
337 ROCKS_LOG_HEADER(
338 log, " Options.table_properties_collectors: %s",
20effc67 339 collector_info.str().c_str());
7c673cae
FG
340 ROCKS_LOG_HEADER(log,
341 " Options.inplace_update_support: %d",
342 inplace_update_support);
343 ROCKS_LOG_HEADER(
344 log,
345 " Options.inplace_update_num_locks: %" ROCKSDB_PRIszt,
346 inplace_update_num_locks);
347 // TODO: easier config for bloom (maybe based on avg key/value size)
348 ROCKS_LOG_HEADER(
349 log, " Options.memtable_prefix_bloom_size_ratio: %f",
350 memtable_prefix_bloom_size_ratio);
494da23a
TL
351 ROCKS_LOG_HEADER(log,
352 " Options.memtable_whole_key_filtering: %d",
353 memtable_whole_key_filtering);
7c673cae
FG
354
355 ROCKS_LOG_HEADER(log, " Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
356 memtable_huge_page_size);
357 ROCKS_LOG_HEADER(log,
358 " Options.bloom_locality: %d",
359 bloom_locality);
360
361 ROCKS_LOG_HEADER(
362 log,
363 " Options.max_successive_merges: %" ROCKSDB_PRIszt,
364 max_successive_merges);
365 ROCKS_LOG_HEADER(log,
366 " Options.optimize_filters_for_hits: %d",
367 optimize_filters_for_hits);
368 ROCKS_LOG_HEADER(log, " Options.paranoid_file_checks: %d",
369 paranoid_file_checks);
370 ROCKS_LOG_HEADER(log, " Options.force_consistency_checks: %d",
371 force_consistency_checks);
372 ROCKS_LOG_HEADER(log, " Options.report_bg_io_stats: %d",
373 report_bg_io_stats);
494da23a
TL
374 ROCKS_LOG_HEADER(log, " Options.ttl: %" PRIu64,
375 ttl);
f67539c2
TL
376 ROCKS_LOG_HEADER(log,
377 " Options.periodic_compaction_seconds: %" PRIu64,
378 periodic_compaction_seconds);
20effc67
TL
379 ROCKS_LOG_HEADER(log, " Options.enable_blob_files: %s",
380 enable_blob_files ? "true" : "false");
381 ROCKS_LOG_HEADER(log,
382 " Options.min_blob_size: %" PRIu64,
383 min_blob_size);
384 ROCKS_LOG_HEADER(log,
385 " Options.blob_file_size: %" PRIu64,
386 blob_file_size);
387 ROCKS_LOG_HEADER(log, " Options.blob_compression_type: %s",
388 CompressionTypeToString(blob_compression_type).c_str());
389 ROCKS_LOG_HEADER(log, " Options.enable_blob_garbage_collection: %s",
390 enable_blob_garbage_collection ? "true" : "false");
391 ROCKS_LOG_HEADER(log, " Options.blob_garbage_collection_age_cutoff: %f",
392 blob_garbage_collection_age_cutoff);
7c673cae
FG
393} // ColumnFamilyOptions::Dump
394
395void Options::Dump(Logger* log) const {
396 DBOptions::Dump(log);
397 ColumnFamilyOptions::Dump(log);
398} // Options::Dump
399
400void Options::DumpCFOptions(Logger* log) const {
401 ColumnFamilyOptions::Dump(log);
402} // Options::DumpCFOptions
403
404//
405// The goal of this method is to create a configuration that
406// allows an application to write all files into L0 and
407// then do a single compaction to output all files into L1.
408Options*
409Options::PrepareForBulkLoad()
410{
411 // never slowdown ingest.
412 level0_file_num_compaction_trigger = (1<<30);
413 level0_slowdown_writes_trigger = (1<<30);
414 level0_stop_writes_trigger = (1<<30);
415 soft_pending_compaction_bytes_limit = 0;
416 hard_pending_compaction_bytes_limit = 0;
417
418 // no auto compactions please. The application should issue a
419 // manual compaction after all data is loaded into L0.
420 disable_auto_compactions = true;
421 // A manual compaction run should pick all files in L0 in
422 // a single compaction run.
423 max_compaction_bytes = (static_cast<uint64_t>(1) << 60);
424
425 // It is better to have only 2 levels, otherwise a manual
426 // compaction would compact at every possible level, thereby
427 // increasing the total time needed for compactions.
428 num_levels = 2;
429
430 // Need to allow more write buffers to allow more parallism
431 // of flushes.
432 max_write_buffer_number = 6;
433 min_write_buffer_number_to_merge = 1;
434
435 // When compaction is disabled, more parallel flush threads can
436 // help with write throughput.
437 max_background_flushes = 4;
438
439 // Prevent a memtable flush to automatically promote files
440 // to L1. This is helpful so that all files that are
441 // input to the manual compaction are all at L0.
442 max_background_compactions = 2;
7c673cae
FG
443
444 // The compaction would create large files in L1.
445 target_file_size_base = 256 * 1024 * 1024;
446 return this;
447}
448
449Options* Options::OptimizeForSmallDb() {
f67539c2
TL
450 // 16MB block cache
451 std::shared_ptr<Cache> cache = NewLRUCache(16 << 20);
452
453 ColumnFamilyOptions::OptimizeForSmallDb(&cache);
454 DBOptions::OptimizeForSmallDb(&cache);
7c673cae
FG
455 return this;
456}
457
458Options* Options::OldDefaults(int rocksdb_major_version,
459 int rocksdb_minor_version) {
460 ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
461 rocksdb_minor_version);
462 DBOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version);
463 return this;
464}
465
466DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
467 int rocksdb_minor_version) {
468 if (rocksdb_major_version < 4 ||
469 (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
470 max_file_opening_threads = 1;
471 table_cache_numshardbits = 4;
472 }
473 if (rocksdb_major_version < 5 ||
474 (rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
475 delayed_write_rate = 2 * 1024U * 1024U;
11fdf7f2
TL
476 } else if (rocksdb_major_version < 5 ||
477 (rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
478 delayed_write_rate = 16 * 1024U * 1024U;
7c673cae 479 }
7c673cae 480 max_open_files = 5000;
7c673cae
FG
481 wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
482 return this;
483}
484
485ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
486 int rocksdb_major_version, int rocksdb_minor_version) {
494da23a
TL
487 if (rocksdb_major_version < 5 ||
488 (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) {
489 compaction_pri = CompactionPri::kByCompensatedSize;
490 }
7c673cae
FG
491 if (rocksdb_major_version < 4 ||
492 (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
493 write_buffer_size = 4 << 20;
494 target_file_size_base = 2 * 1048576;
495 max_bytes_for_level_base = 10 * 1048576;
496 soft_pending_compaction_bytes_limit = 0;
497 hard_pending_compaction_bytes_limit = 0;
498 }
499 if (rocksdb_major_version < 5) {
500 level0_stop_writes_trigger = 24;
501 } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) {
502 level0_stop_writes_trigger = 30;
503 }
7c673cae
FG
504
505 return this;
506}
507
508// Optimization functions
f67539c2 509DBOptions* DBOptions::OptimizeForSmallDb(std::shared_ptr<Cache>* cache) {
7c673cae
FG
510 max_file_opening_threads = 1;
511 max_open_files = 5000;
f67539c2
TL
512
513 // Cost memtable to block cache too.
514 std::shared_ptr<ROCKSDB_NAMESPACE::WriteBufferManager> wbm =
515 std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(
516 0, (cache != nullptr) ? *cache : std::shared_ptr<Cache>());
517 write_buffer_manager = wbm;
518
7c673cae
FG
519 return this;
520}
521
f67539c2
TL
522ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb(
523 std::shared_ptr<Cache>* cache) {
7c673cae
FG
524 write_buffer_size = 2 << 20;
525 target_file_size_base = 2 * 1048576;
526 max_bytes_for_level_base = 10 * 1048576;
527 soft_pending_compaction_bytes_limit = 256 * 1048576;
528 hard_pending_compaction_bytes_limit = 1073741824ul;
f67539c2
TL
529
530 BlockBasedTableOptions table_options;
531 table_options.block_cache =
532 (cache != nullptr) ? *cache : std::shared_ptr<Cache>();
533 table_options.cache_index_and_filter_blocks = true;
534 // Two level iterator to avoid LRU cache imbalance
535 table_options.index_type =
536 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
537 table_factory.reset(new BlockBasedTableFactory(table_options));
538
7c673cae
FG
539 return this;
540}
541
542#ifndef ROCKSDB_LITE
543ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForPointLookup(
544 uint64_t block_cache_size_mb) {
7c673cae 545 BlockBasedTableOptions block_based_options;
11fdf7f2
TL
546 block_based_options.data_block_index_type =
547 BlockBasedTableOptions::kDataBlockBinaryAndHash;
548 block_based_options.data_block_hash_table_util_ratio = 0.75;
7c673cae
FG
549 block_based_options.filter_policy.reset(NewBloomFilterPolicy(10));
550 block_based_options.block_cache =
551 NewLRUCache(static_cast<size_t>(block_cache_size_mb * 1024 * 1024));
552 table_factory.reset(new BlockBasedTableFactory(block_based_options));
553 memtable_prefix_bloom_size_ratio = 0.02;
f67539c2 554 memtable_whole_key_filtering = true;
7c673cae
FG
555 return this;
556}
557
558ColumnFamilyOptions* ColumnFamilyOptions::OptimizeLevelStyleCompaction(
559 uint64_t memtable_memory_budget) {
560 write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
561 // merge two memtables when flushing to L0
562 min_write_buffer_number_to_merge = 2;
563 // this means we'll use 50% extra memory in the worst case, but will reduce
564 // write stalls.
565 max_write_buffer_number = 6;
566 // start flushing L0->L1 as soon as possible. each file on level0 is
567 // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than
568 // memtable_memory_budget.
569 level0_file_num_compaction_trigger = 2;
570 // doesn't really matter much, but we don't want to create too many files
571 target_file_size_base = memtable_memory_budget / 8;
572 // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast
573 max_bytes_for_level_base = memtable_memory_budget;
574
575 // level style compaction
576 compaction_style = kCompactionStyleLevel;
577
578 // only compress levels >= 2
579 compression_per_level.resize(num_levels);
580 for (int i = 0; i < num_levels; ++i) {
581 if (i < 2) {
582 compression_per_level[i] = kNoCompression;
583 } else {
f67539c2
TL
584 compression_per_level[i] =
585 LZ4_Supported()
586 ? kLZ4Compression
587 : (Snappy_Supported() ? kSnappyCompression : kNoCompression);
7c673cae
FG
588 }
589 }
590 return this;
591}
592
593ColumnFamilyOptions* ColumnFamilyOptions::OptimizeUniversalStyleCompaction(
594 uint64_t memtable_memory_budget) {
595 write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
596 // merge two memtables when flushing to L0
597 min_write_buffer_number_to_merge = 2;
598 // this means we'll use 50% extra memory in the worst case, but will reduce
599 // write stalls.
600 max_write_buffer_number = 6;
601 // universal style compaction
602 compaction_style = kCompactionStyleUniversal;
603 compaction_options_universal.compression_size_percent = 80;
604 return this;
605}
606
607DBOptions* DBOptions::IncreaseParallelism(int total_threads) {
11fdf7f2 608 max_background_jobs = total_threads;
7c673cae
FG
609 env->SetBackgroundThreads(total_threads, Env::LOW);
610 env->SetBackgroundThreads(1, Env::HIGH);
611 return this;
612}
613
614#endif // !ROCKSDB_LITE
615
616ReadOptions::ReadOptions()
11fdf7f2
TL
617 : snapshot(nullptr),
618 iterate_lower_bound(nullptr),
7c673cae 619 iterate_upper_bound(nullptr),
11fdf7f2
TL
620 readahead_size(0),
621 max_skippable_internal_keys(0),
7c673cae 622 read_tier(kReadAllTier),
11fdf7f2
TL
623 verify_checksums(true),
624 fill_cache(true),
7c673cae
FG
625 tailing(false),
626 managed(false),
627 total_order_seek(false),
f67539c2 628 auto_prefix_mode(false),
7c673cae
FG
629 prefix_same_as_start(false),
630 pin_data(false),
631 background_purge_on_iterator_cleanup(false),
7c673cae 632 ignore_range_deletions(false),
f67539c2 633 iter_start_seqnum(0),
20effc67
TL
634 timestamp(nullptr),
635 iter_start_ts(nullptr),
636 deadline(std::chrono::microseconds::zero()),
637 io_timeout(std::chrono::microseconds::zero()),
638 value_size_soft_limit(std::numeric_limits<uint64_t>::max()) {}
7c673cae
FG
639
640ReadOptions::ReadOptions(bool cksum, bool cache)
11fdf7f2
TL
641 : snapshot(nullptr),
642 iterate_lower_bound(nullptr),
7c673cae 643 iterate_upper_bound(nullptr),
11fdf7f2
TL
644 readahead_size(0),
645 max_skippable_internal_keys(0),
7c673cae 646 read_tier(kReadAllTier),
11fdf7f2
TL
647 verify_checksums(cksum),
648 fill_cache(cache),
7c673cae
FG
649 tailing(false),
650 managed(false),
651 total_order_seek(false),
f67539c2 652 auto_prefix_mode(false),
7c673cae
FG
653 prefix_same_as_start(false),
654 pin_data(false),
655 background_purge_on_iterator_cleanup(false),
7c673cae 656 ignore_range_deletions(false),
f67539c2 657 iter_start_seqnum(0),
20effc67
TL
658 timestamp(nullptr),
659 iter_start_ts(nullptr),
660 deadline(std::chrono::microseconds::zero()),
661 io_timeout(std::chrono::microseconds::zero()),
662 value_size_soft_limit(std::numeric_limits<uint64_t>::max()) {}
7c673cae 663
f67539c2 664} // namespace ROCKSDB_NAMESPACE