]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/options/options.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / options / options.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "rocksdb/options.h"
11
12 #include <cinttypes>
13 #include <limits>
14
15 #include "logging/logging.h"
16 #include "monitoring/statistics.h"
17 #include "options/db_options.h"
18 #include "options/options_helper.h"
19 #include "rocksdb/cache.h"
20 #include "rocksdb/compaction_filter.h"
21 #include "rocksdb/comparator.h"
22 #include "rocksdb/env.h"
23 #include "rocksdb/filter_policy.h"
24 #include "rocksdb/memtablerep.h"
25 #include "rocksdb/merge_operator.h"
26 #include "rocksdb/slice.h"
27 #include "rocksdb/slice_transform.h"
28 #include "rocksdb/sst_file_manager.h"
29 #include "rocksdb/sst_partitioner.h"
30 #include "rocksdb/table.h"
31 #include "rocksdb/table_properties.h"
32 #include "rocksdb/wal_filter.h"
33 #include "table/block_based/block_based_table_factory.h"
34 #include "util/compression.h"
35
36 namespace ROCKSDB_NAMESPACE {
37
38 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
39 assert(memtable_factory.get() != nullptr);
40 }
41
42 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
43 : max_write_buffer_number(options.max_write_buffer_number),
44 min_write_buffer_number_to_merge(
45 options.min_write_buffer_number_to_merge),
46 max_write_buffer_number_to_maintain(
47 options.max_write_buffer_number_to_maintain),
48 max_write_buffer_size_to_maintain(
49 options.max_write_buffer_size_to_maintain),
50 inplace_update_support(options.inplace_update_support),
51 inplace_update_num_locks(options.inplace_update_num_locks),
52 experimental_mempurge_threshold(options.experimental_mempurge_threshold),
53 inplace_callback(options.inplace_callback),
54 memtable_prefix_bloom_size_ratio(
55 options.memtable_prefix_bloom_size_ratio),
56 memtable_whole_key_filtering(options.memtable_whole_key_filtering),
57 memtable_huge_page_size(options.memtable_huge_page_size),
58 memtable_insert_with_hint_prefix_extractor(
59 options.memtable_insert_with_hint_prefix_extractor),
60 bloom_locality(options.bloom_locality),
61 arena_block_size(options.arena_block_size),
62 compression_per_level(options.compression_per_level),
63 num_levels(options.num_levels),
64 level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
65 level0_stop_writes_trigger(options.level0_stop_writes_trigger),
66 target_file_size_base(options.target_file_size_base),
67 target_file_size_multiplier(options.target_file_size_multiplier),
68 level_compaction_dynamic_level_bytes(
69 options.level_compaction_dynamic_level_bytes),
70 max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
71 max_bytes_for_level_multiplier_additional(
72 options.max_bytes_for_level_multiplier_additional),
73 max_compaction_bytes(options.max_compaction_bytes),
74 ignore_max_compaction_bytes_for_input(
75 options.ignore_max_compaction_bytes_for_input),
76 soft_pending_compaction_bytes_limit(
77 options.soft_pending_compaction_bytes_limit),
78 hard_pending_compaction_bytes_limit(
79 options.hard_pending_compaction_bytes_limit),
80 compaction_style(options.compaction_style),
81 compaction_pri(options.compaction_pri),
82 compaction_options_universal(options.compaction_options_universal),
83 compaction_options_fifo(options.compaction_options_fifo),
84 max_sequential_skip_in_iterations(
85 options.max_sequential_skip_in_iterations),
86 memtable_factory(options.memtable_factory),
87 table_properties_collector_factories(
88 options.table_properties_collector_factories),
89 max_successive_merges(options.max_successive_merges),
90 optimize_filters_for_hits(options.optimize_filters_for_hits),
91 paranoid_file_checks(options.paranoid_file_checks),
92 force_consistency_checks(options.force_consistency_checks),
93 report_bg_io_stats(options.report_bg_io_stats),
94 ttl(options.ttl),
95 periodic_compaction_seconds(options.periodic_compaction_seconds),
96 sample_for_compression(options.sample_for_compression),
97 preclude_last_level_data_seconds(
98 options.preclude_last_level_data_seconds),
99 preserve_internal_time_seconds(options.preserve_internal_time_seconds),
100 enable_blob_files(options.enable_blob_files),
101 min_blob_size(options.min_blob_size),
102 blob_file_size(options.blob_file_size),
103 blob_compression_type(options.blob_compression_type),
104 enable_blob_garbage_collection(options.enable_blob_garbage_collection),
105 blob_garbage_collection_age_cutoff(
106 options.blob_garbage_collection_age_cutoff),
107 blob_garbage_collection_force_threshold(
108 options.blob_garbage_collection_force_threshold),
109 blob_compaction_readahead_size(options.blob_compaction_readahead_size),
110 blob_file_starting_level(options.blob_file_starting_level),
111 blob_cache(options.blob_cache),
112 prepopulate_blob_cache(options.prepopulate_blob_cache) {
113 assert(memtable_factory.get() != nullptr);
114 if (max_bytes_for_level_multiplier_additional.size() <
115 static_cast<unsigned int>(num_levels)) {
116 max_bytes_for_level_multiplier_additional.resize(num_levels, 1);
117 }
118 }
119
120 ColumnFamilyOptions::ColumnFamilyOptions()
121 : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
122 table_factory(
123 std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
124
125 ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
126 : ColumnFamilyOptions(*static_cast<const ColumnFamilyOptions*>(&options)) {}
127
128 DBOptions::DBOptions() {}
129 DBOptions::DBOptions(const Options& options)
130 : DBOptions(*static_cast<const DBOptions*>(&options)) {}
131
132 void DBOptions::Dump(Logger* log) const {
133 ImmutableDBOptions(*this).Dump(log);
134 MutableDBOptions(*this).Dump(log);
135 } // DBOptions::Dump
136
137 void ColumnFamilyOptions::Dump(Logger* log) const {
138 ROCKS_LOG_HEADER(log, " Options.comparator: %s",
139 comparator->Name());
140 ROCKS_LOG_HEADER(log, " Options.merge_operator: %s",
141 merge_operator ? merge_operator->Name() : "None");
142 ROCKS_LOG_HEADER(log, " Options.compaction_filter: %s",
143 compaction_filter ? compaction_filter->Name() : "None");
144 ROCKS_LOG_HEADER(
145 log, " Options.compaction_filter_factory: %s",
146 compaction_filter_factory ? compaction_filter_factory->Name() : "None");
147 ROCKS_LOG_HEADER(
148 log, " Options.sst_partitioner_factory: %s",
149 sst_partitioner_factory ? sst_partitioner_factory->Name() : "None");
150 ROCKS_LOG_HEADER(log, " Options.memtable_factory: %s",
151 memtable_factory->Name());
152 ROCKS_LOG_HEADER(log, " Options.table_factory: %s",
153 table_factory->Name());
154 ROCKS_LOG_HEADER(log, " table_factory options: %s",
155 table_factory->GetPrintableOptions().c_str());
156 ROCKS_LOG_HEADER(log, " Options.write_buffer_size: %" ROCKSDB_PRIszt,
157 write_buffer_size);
158 ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number: %d",
159 max_write_buffer_number);
160 if (!compression_per_level.empty()) {
161 for (unsigned int i = 0; i < compression_per_level.size(); i++) {
162 ROCKS_LOG_HEADER(
163 log, " Options.compression[%d]: %s", i,
164 CompressionTypeToString(compression_per_level[i]).c_str());
165 }
166 } else {
167 ROCKS_LOG_HEADER(log, " Options.compression: %s",
168 CompressionTypeToString(compression).c_str());
169 }
170 ROCKS_LOG_HEADER(
171 log, " Options.bottommost_compression: %s",
172 bottommost_compression == kDisableCompressionOption
173 ? "Disabled"
174 : CompressionTypeToString(bottommost_compression).c_str());
175 ROCKS_LOG_HEADER(
176 log, " Options.prefix_extractor: %s",
177 prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name());
178 ROCKS_LOG_HEADER(log,
179 " Options.memtable_insert_with_hint_prefix_extractor: %s",
180 memtable_insert_with_hint_prefix_extractor == nullptr
181 ? "nullptr"
182 : memtable_insert_with_hint_prefix_extractor->Name());
183 ROCKS_LOG_HEADER(log, " Options.num_levels: %d", num_levels);
184 ROCKS_LOG_HEADER(log, " Options.min_write_buffer_number_to_merge: %d",
185 min_write_buffer_number_to_merge);
186 ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number_to_maintain: %d",
187 max_write_buffer_number_to_maintain);
188 ROCKS_LOG_HEADER(log,
189 " Options.max_write_buffer_size_to_maintain: %" PRIu64,
190 max_write_buffer_size_to_maintain);
191 ROCKS_LOG_HEADER(
192 log, " Options.bottommost_compression_opts.window_bits: %d",
193 bottommost_compression_opts.window_bits);
194 ROCKS_LOG_HEADER(
195 log, " Options.bottommost_compression_opts.level: %d",
196 bottommost_compression_opts.level);
197 ROCKS_LOG_HEADER(
198 log, " Options.bottommost_compression_opts.strategy: %d",
199 bottommost_compression_opts.strategy);
200 ROCKS_LOG_HEADER(
201 log,
202 " Options.bottommost_compression_opts.max_dict_bytes: "
203 "%" PRIu32,
204 bottommost_compression_opts.max_dict_bytes);
205 ROCKS_LOG_HEADER(
206 log,
207 " Options.bottommost_compression_opts.zstd_max_train_bytes: "
208 "%" PRIu32,
209 bottommost_compression_opts.zstd_max_train_bytes);
210 ROCKS_LOG_HEADER(
211 log,
212 " Options.bottommost_compression_opts.parallel_threads: "
213 "%" PRIu32,
214 bottommost_compression_opts.parallel_threads);
215 ROCKS_LOG_HEADER(
216 log, " Options.bottommost_compression_opts.enabled: %s",
217 bottommost_compression_opts.enabled ? "true" : "false");
218 ROCKS_LOG_HEADER(
219 log,
220 " Options.bottommost_compression_opts.max_dict_buffer_bytes: "
221 "%" PRIu64,
222 bottommost_compression_opts.max_dict_buffer_bytes);
223 ROCKS_LOG_HEADER(
224 log,
225 " Options.bottommost_compression_opts.use_zstd_dict_trainer: %s",
226 bottommost_compression_opts.use_zstd_dict_trainer ? "true" : "false");
227 ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d",
228 compression_opts.window_bits);
229 ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d",
230 compression_opts.level);
231 ROCKS_LOG_HEADER(log, " Options.compression_opts.strategy: %d",
232 compression_opts.strategy);
233 ROCKS_LOG_HEADER(
234 log,
235 " Options.compression_opts.max_dict_bytes: %" PRIu32,
236 compression_opts.max_dict_bytes);
237 ROCKS_LOG_HEADER(log,
238 " Options.compression_opts.zstd_max_train_bytes: "
239 "%" PRIu32,
240 compression_opts.zstd_max_train_bytes);
241 ROCKS_LOG_HEADER(
242 log, " Options.compression_opts.use_zstd_dict_trainer: %s",
243 compression_opts.use_zstd_dict_trainer ? "true" : "false");
244 ROCKS_LOG_HEADER(log,
245 " Options.compression_opts.parallel_threads: "
246 "%" PRIu32,
247 compression_opts.parallel_threads);
248 ROCKS_LOG_HEADER(log,
249 " Options.compression_opts.enabled: %s",
250 compression_opts.enabled ? "true" : "false");
251 ROCKS_LOG_HEADER(log,
252 " Options.compression_opts.max_dict_buffer_bytes: "
253 "%" PRIu64,
254 compression_opts.max_dict_buffer_bytes);
255 ROCKS_LOG_HEADER(log, " Options.level0_file_num_compaction_trigger: %d",
256 level0_file_num_compaction_trigger);
257 ROCKS_LOG_HEADER(log, " Options.level0_slowdown_writes_trigger: %d",
258 level0_slowdown_writes_trigger);
259 ROCKS_LOG_HEADER(log, " Options.level0_stop_writes_trigger: %d",
260 level0_stop_writes_trigger);
261 ROCKS_LOG_HEADER(
262 log, " Options.target_file_size_base: %" PRIu64,
263 target_file_size_base);
264 ROCKS_LOG_HEADER(log, " Options.target_file_size_multiplier: %d",
265 target_file_size_multiplier);
266 ROCKS_LOG_HEADER(
267 log, " Options.max_bytes_for_level_base: %" PRIu64,
268 max_bytes_for_level_base);
269 ROCKS_LOG_HEADER(log, "Options.level_compaction_dynamic_level_bytes: %d",
270 level_compaction_dynamic_level_bytes);
271 ROCKS_LOG_HEADER(log, " Options.max_bytes_for_level_multiplier: %f",
272 max_bytes_for_level_multiplier);
273 for (size_t i = 0; i < max_bytes_for_level_multiplier_additional.size();
274 i++) {
275 ROCKS_LOG_HEADER(
276 log, "Options.max_bytes_for_level_multiplier_addtl[%" ROCKSDB_PRIszt
277 "]: %d",
278 i, max_bytes_for_level_multiplier_additional[i]);
279 }
280 ROCKS_LOG_HEADER(
281 log, " Options.max_sequential_skip_in_iterations: %" PRIu64,
282 max_sequential_skip_in_iterations);
283 ROCKS_LOG_HEADER(
284 log, " Options.max_compaction_bytes: %" PRIu64,
285 max_compaction_bytes);
286 ROCKS_LOG_HEADER(log, " Options.ignore_max_compaction_bytes_for_input: %s",
287 ignore_max_compaction_bytes_for_input ? "true" : "false");
288 ROCKS_LOG_HEADER(
289 log,
290 " Options.arena_block_size: %" ROCKSDB_PRIszt,
291 arena_block_size);
292 ROCKS_LOG_HEADER(log,
293 " Options.soft_pending_compaction_bytes_limit: %" PRIu64,
294 soft_pending_compaction_bytes_limit);
295 ROCKS_LOG_HEADER(log,
296 " Options.hard_pending_compaction_bytes_limit: %" PRIu64,
297 hard_pending_compaction_bytes_limit);
298 ROCKS_LOG_HEADER(log, " Options.disable_auto_compactions: %d",
299 disable_auto_compactions);
300
301 const auto& it_compaction_style =
302 compaction_style_to_string.find(compaction_style);
303 std::string str_compaction_style;
304 if (it_compaction_style == compaction_style_to_string.end()) {
305 assert(false);
306 str_compaction_style = "unknown_" + std::to_string(compaction_style);
307 } else {
308 str_compaction_style = it_compaction_style->second;
309 }
310 ROCKS_LOG_HEADER(log,
311 " Options.compaction_style: %s",
312 str_compaction_style.c_str());
313
314 const auto& it_compaction_pri =
315 compaction_pri_to_string.find(compaction_pri);
316 std::string str_compaction_pri;
317 if (it_compaction_pri == compaction_pri_to_string.end()) {
318 assert(false);
319 str_compaction_pri = "unknown_" + std::to_string(compaction_pri);
320 } else {
321 str_compaction_pri = it_compaction_pri->second;
322 }
323 ROCKS_LOG_HEADER(log,
324 " Options.compaction_pri: %s",
325 str_compaction_pri.c_str());
326 ROCKS_LOG_HEADER(log,
327 "Options.compaction_options_universal.size_ratio: %u",
328 compaction_options_universal.size_ratio);
329 ROCKS_LOG_HEADER(log,
330 "Options.compaction_options_universal.min_merge_width: %u",
331 compaction_options_universal.min_merge_width);
332 ROCKS_LOG_HEADER(log,
333 "Options.compaction_options_universal.max_merge_width: %u",
334 compaction_options_universal.max_merge_width);
335 ROCKS_LOG_HEADER(
336 log,
337 "Options.compaction_options_universal."
338 "max_size_amplification_percent: %u",
339 compaction_options_universal.max_size_amplification_percent);
340 ROCKS_LOG_HEADER(
341 log,
342 "Options.compaction_options_universal.compression_size_percent: %d",
343 compaction_options_universal.compression_size_percent);
344 const auto& it_compaction_stop_style = compaction_stop_style_to_string.find(
345 compaction_options_universal.stop_style);
346 std::string str_compaction_stop_style;
347 if (it_compaction_stop_style == compaction_stop_style_to_string.end()) {
348 assert(false);
349 str_compaction_stop_style =
350 "unknown_" + std::to_string(compaction_options_universal.stop_style);
351 } else {
352 str_compaction_stop_style = it_compaction_stop_style->second;
353 }
354 ROCKS_LOG_HEADER(log,
355 "Options.compaction_options_universal.stop_style: %s",
356 str_compaction_stop_style.c_str());
357 ROCKS_LOG_HEADER(
358 log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64,
359 compaction_options_fifo.max_table_files_size);
360 ROCKS_LOG_HEADER(log,
361 "Options.compaction_options_fifo.allow_compaction: %d",
362 compaction_options_fifo.allow_compaction);
363 std::ostringstream collector_info;
364 for (const auto& collector_factory : table_properties_collector_factories) {
365 collector_info << collector_factory->ToString() << ';';
366 }
367 ROCKS_LOG_HEADER(
368 log, " Options.table_properties_collectors: %s",
369 collector_info.str().c_str());
370 ROCKS_LOG_HEADER(log,
371 " Options.inplace_update_support: %d",
372 inplace_update_support);
373 ROCKS_LOG_HEADER(
374 log,
375 " Options.inplace_update_num_locks: %" ROCKSDB_PRIszt,
376 inplace_update_num_locks);
377 // TODO: easier config for bloom (maybe based on avg key/value size)
378 ROCKS_LOG_HEADER(
379 log, " Options.memtable_prefix_bloom_size_ratio: %f",
380 memtable_prefix_bloom_size_ratio);
381 ROCKS_LOG_HEADER(log,
382 " Options.memtable_whole_key_filtering: %d",
383 memtable_whole_key_filtering);
384
385 ROCKS_LOG_HEADER(log, " Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
386 memtable_huge_page_size);
387 ROCKS_LOG_HEADER(log,
388 " Options.bloom_locality: %d",
389 bloom_locality);
390
391 ROCKS_LOG_HEADER(
392 log,
393 " Options.max_successive_merges: %" ROCKSDB_PRIszt,
394 max_successive_merges);
395 ROCKS_LOG_HEADER(log,
396 " Options.optimize_filters_for_hits: %d",
397 optimize_filters_for_hits);
398 ROCKS_LOG_HEADER(log, " Options.paranoid_file_checks: %d",
399 paranoid_file_checks);
400 ROCKS_LOG_HEADER(log, " Options.force_consistency_checks: %d",
401 force_consistency_checks);
402 ROCKS_LOG_HEADER(log, " Options.report_bg_io_stats: %d",
403 report_bg_io_stats);
404 ROCKS_LOG_HEADER(log, " Options.ttl: %" PRIu64,
405 ttl);
406 ROCKS_LOG_HEADER(log,
407 " Options.periodic_compaction_seconds: %" PRIu64,
408 periodic_compaction_seconds);
409 ROCKS_LOG_HEADER(log, " Options.preclude_last_level_data_seconds: %" PRIu64,
410 preclude_last_level_data_seconds);
411 ROCKS_LOG_HEADER(log, " Options.preserve_internal_time_seconds: %" PRIu64,
412 preserve_internal_time_seconds);
413 ROCKS_LOG_HEADER(log, " Options.enable_blob_files: %s",
414 enable_blob_files ? "true" : "false");
415 ROCKS_LOG_HEADER(
416 log, " Options.min_blob_size: %" PRIu64,
417 min_blob_size);
418 ROCKS_LOG_HEADER(
419 log, " Options.blob_file_size: %" PRIu64,
420 blob_file_size);
421 ROCKS_LOG_HEADER(log, " Options.blob_compression_type: %s",
422 CompressionTypeToString(blob_compression_type).c_str());
423 ROCKS_LOG_HEADER(log, " Options.enable_blob_garbage_collection: %s",
424 enable_blob_garbage_collection ? "true" : "false");
425 ROCKS_LOG_HEADER(log, " Options.blob_garbage_collection_age_cutoff: %f",
426 blob_garbage_collection_age_cutoff);
427 ROCKS_LOG_HEADER(log, "Options.blob_garbage_collection_force_threshold: %f",
428 blob_garbage_collection_force_threshold);
429 ROCKS_LOG_HEADER(
430 log, " Options.blob_compaction_readahead_size: %" PRIu64,
431 blob_compaction_readahead_size);
432 ROCKS_LOG_HEADER(log, " Options.blob_file_starting_level: %d",
433 blob_file_starting_level);
434 if (blob_cache) {
435 ROCKS_LOG_HEADER(log, " Options.blob_cache: %s",
436 blob_cache->Name());
437 ROCKS_LOG_HEADER(log, " blob_cache options: %s",
438 blob_cache->GetPrintableOptions().c_str());
439 ROCKS_LOG_HEADER(
440 log, " blob_cache prepopulated: %s",
441 prepopulate_blob_cache == PrepopulateBlobCache::kFlushOnly
442 ? "flush only"
443 : "disabled");
444 }
445 ROCKS_LOG_HEADER(log, "Options.experimental_mempurge_threshold: %f",
446 experimental_mempurge_threshold);
447 } // ColumnFamilyOptions::Dump
448
449 void Options::Dump(Logger* log) const {
450 DBOptions::Dump(log);
451 ColumnFamilyOptions::Dump(log);
452 } // Options::Dump
453
454 void Options::DumpCFOptions(Logger* log) const {
455 ColumnFamilyOptions::Dump(log);
456 } // Options::DumpCFOptions
457
458 //
459 // The goal of this method is to create a configuration that
460 // allows an application to write all files into L0 and
461 // then do a single compaction to output all files into L1.
462 Options*
463 Options::PrepareForBulkLoad()
464 {
465 // never slowdown ingest.
466 level0_file_num_compaction_trigger = (1<<30);
467 level0_slowdown_writes_trigger = (1<<30);
468 level0_stop_writes_trigger = (1<<30);
469 soft_pending_compaction_bytes_limit = 0;
470 hard_pending_compaction_bytes_limit = 0;
471
472 // no auto compactions please. The application should issue a
473 // manual compaction after all data is loaded into L0.
474 disable_auto_compactions = true;
475 // A manual compaction run should pick all files in L0 in
476 // a single compaction run.
477 max_compaction_bytes = (static_cast<uint64_t>(1) << 60);
478
479 // It is better to have only 2 levels, otherwise a manual
480 // compaction would compact at every possible level, thereby
481 // increasing the total time needed for compactions.
482 num_levels = 2;
483
484 // Need to allow more write buffers to allow more parallism
485 // of flushes.
486 max_write_buffer_number = 6;
487 min_write_buffer_number_to_merge = 1;
488
489 // When compaction is disabled, more parallel flush threads can
490 // help with write throughput.
491 max_background_flushes = 4;
492
493 // Prevent a memtable flush to automatically promote files
494 // to L1. This is helpful so that all files that are
495 // input to the manual compaction are all at L0.
496 max_background_compactions = 2;
497
498 // The compaction would create large files in L1.
499 target_file_size_base = 256 * 1024 * 1024;
500 return this;
501 }
502
503 Options* Options::OptimizeForSmallDb() {
504 // 16MB block cache
505 std::shared_ptr<Cache> cache = NewLRUCache(16 << 20);
506
507 ColumnFamilyOptions::OptimizeForSmallDb(&cache);
508 DBOptions::OptimizeForSmallDb(&cache);
509 return this;
510 }
511
512 Options* Options::DisableExtraChecks() {
513 // See https://github.com/facebook/rocksdb/issues/9354
514 force_consistency_checks = false;
515 // Considered but no clear performance impact seen:
516 // * check_flush_compaction_key_order
517 // * paranoid_checks
518 // * flush_verify_memtable_count
519 // By current API contract, not including
520 // * verify_checksums
521 // because checking storage data integrity is a more standard practice.
522 return this;
523 }
524
525 Options* Options::OldDefaults(int rocksdb_major_version,
526 int rocksdb_minor_version) {
527 ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
528 rocksdb_minor_version);
529 DBOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version);
530 return this;
531 }
532
533 DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
534 int rocksdb_minor_version) {
535 if (rocksdb_major_version < 4 ||
536 (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
537 max_file_opening_threads = 1;
538 table_cache_numshardbits = 4;
539 }
540 if (rocksdb_major_version < 5 ||
541 (rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
542 delayed_write_rate = 2 * 1024U * 1024U;
543 } else if (rocksdb_major_version < 5 ||
544 (rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
545 delayed_write_rate = 16 * 1024U * 1024U;
546 }
547 max_open_files = 5000;
548 wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
549 return this;
550 }
551
552 ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
553 int rocksdb_major_version, int rocksdb_minor_version) {
554 if (rocksdb_major_version < 5 ||
555 (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) {
556 compaction_pri = CompactionPri::kByCompensatedSize;
557 }
558 if (rocksdb_major_version < 4 ||
559 (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
560 write_buffer_size = 4 << 20;
561 target_file_size_base = 2 * 1048576;
562 max_bytes_for_level_base = 10 * 1048576;
563 soft_pending_compaction_bytes_limit = 0;
564 hard_pending_compaction_bytes_limit = 0;
565 }
566 if (rocksdb_major_version < 5) {
567 level0_stop_writes_trigger = 24;
568 } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) {
569 level0_stop_writes_trigger = 30;
570 }
571
572 return this;
573 }
574
575 // Optimization functions
576 DBOptions* DBOptions::OptimizeForSmallDb(std::shared_ptr<Cache>* cache) {
577 max_file_opening_threads = 1;
578 max_open_files = 5000;
579
580 // Cost memtable to block cache too.
581 std::shared_ptr<ROCKSDB_NAMESPACE::WriteBufferManager> wbm =
582 std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(
583 0, (cache != nullptr) ? *cache : std::shared_ptr<Cache>());
584 write_buffer_manager = wbm;
585
586 return this;
587 }
588
589 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb(
590 std::shared_ptr<Cache>* cache) {
591 write_buffer_size = 2 << 20;
592 target_file_size_base = 2 * 1048576;
593 max_bytes_for_level_base = 10 * 1048576;
594 soft_pending_compaction_bytes_limit = 256 * 1048576;
595 hard_pending_compaction_bytes_limit = 1073741824ul;
596
597 BlockBasedTableOptions table_options;
598 table_options.block_cache =
599 (cache != nullptr) ? *cache : std::shared_ptr<Cache>();
600 table_options.cache_index_and_filter_blocks = true;
601 // Two level iterator to avoid LRU cache imbalance
602 table_options.index_type =
603 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
604 table_factory.reset(new BlockBasedTableFactory(table_options));
605
606 return this;
607 }
608
609 #ifndef ROCKSDB_LITE
610 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForPointLookup(
611 uint64_t block_cache_size_mb) {
612 BlockBasedTableOptions block_based_options;
613 block_based_options.data_block_index_type =
614 BlockBasedTableOptions::kDataBlockBinaryAndHash;
615 block_based_options.data_block_hash_table_util_ratio = 0.75;
616 block_based_options.filter_policy.reset(NewBloomFilterPolicy(10));
617 block_based_options.block_cache =
618 NewLRUCache(static_cast<size_t>(block_cache_size_mb * 1024 * 1024));
619 table_factory.reset(new BlockBasedTableFactory(block_based_options));
620 memtable_prefix_bloom_size_ratio = 0.02;
621 memtable_whole_key_filtering = true;
622 return this;
623 }
624
625 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeLevelStyleCompaction(
626 uint64_t memtable_memory_budget) {
627 write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
628 // merge two memtables when flushing to L0
629 min_write_buffer_number_to_merge = 2;
630 // this means we'll use 50% extra memory in the worst case, but will reduce
631 // write stalls.
632 max_write_buffer_number = 6;
633 // start flushing L0->L1 as soon as possible. each file on level0 is
634 // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than
635 // memtable_memory_budget.
636 level0_file_num_compaction_trigger = 2;
637 // doesn't really matter much, but we don't want to create too many files
638 target_file_size_base = memtable_memory_budget / 8;
639 // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast
640 max_bytes_for_level_base = memtable_memory_budget;
641
642 // level style compaction
643 compaction_style = kCompactionStyleLevel;
644
645 // only compress levels >= 2
646 compression_per_level.resize(num_levels);
647 for (int i = 0; i < num_levels; ++i) {
648 if (i < 2) {
649 compression_per_level[i] = kNoCompression;
650 } else {
651 compression_per_level[i] =
652 LZ4_Supported()
653 ? kLZ4Compression
654 : (Snappy_Supported() ? kSnappyCompression : kNoCompression);
655 }
656 }
657 return this;
658 }
659
660 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeUniversalStyleCompaction(
661 uint64_t memtable_memory_budget) {
662 write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
663 // merge two memtables when flushing to L0
664 min_write_buffer_number_to_merge = 2;
665 // this means we'll use 50% extra memory in the worst case, but will reduce
666 // write stalls.
667 max_write_buffer_number = 6;
668 // universal style compaction
669 compaction_style = kCompactionStyleUniversal;
670 compaction_options_universal.compression_size_percent = 80;
671 return this;
672 }
673
674 DBOptions* DBOptions::IncreaseParallelism(int total_threads) {
675 max_background_jobs = total_threads;
676 env->SetBackgroundThreads(total_threads, Env::LOW);
677 env->SetBackgroundThreads(1, Env::HIGH);
678 return this;
679 }
680
681 #endif // !ROCKSDB_LITE
682
683 ReadOptions::ReadOptions()
684 : snapshot(nullptr),
685 iterate_lower_bound(nullptr),
686 iterate_upper_bound(nullptr),
687 readahead_size(0),
688 max_skippable_internal_keys(0),
689 read_tier(kReadAllTier),
690 verify_checksums(true),
691 fill_cache(true),
692 tailing(false),
693 managed(false),
694 total_order_seek(false),
695 auto_prefix_mode(false),
696 prefix_same_as_start(false),
697 pin_data(false),
698 background_purge_on_iterator_cleanup(false),
699 ignore_range_deletions(false),
700 timestamp(nullptr),
701 iter_start_ts(nullptr),
702 deadline(std::chrono::microseconds::zero()),
703 io_timeout(std::chrono::microseconds::zero()),
704 value_size_soft_limit(std::numeric_limits<uint64_t>::max()),
705 adaptive_readahead(false),
706 async_io(false),
707 optimize_multiget_for_io(true) {}
708
709 ReadOptions::ReadOptions(bool cksum, bool cache)
710 : snapshot(nullptr),
711 iterate_lower_bound(nullptr),
712 iterate_upper_bound(nullptr),
713 readahead_size(0),
714 max_skippable_internal_keys(0),
715 read_tier(kReadAllTier),
716 verify_checksums(cksum),
717 fill_cache(cache),
718 tailing(false),
719 managed(false),
720 total_order_seek(false),
721 auto_prefix_mode(false),
722 prefix_same_as_start(false),
723 pin_data(false),
724 background_purge_on_iterator_cleanup(false),
725 ignore_range_deletions(false),
726 timestamp(nullptr),
727 iter_start_ts(nullptr),
728 deadline(std::chrono::microseconds::zero()),
729 io_timeout(std::chrono::microseconds::zero()),
730 value_size_soft_limit(std::numeric_limits<uint64_t>::max()),
731 adaptive_readahead(false),
732 async_io(false),
733 optimize_multiget_for_io(true) {}
734
735 } // namespace ROCKSDB_NAMESPACE