/* note: no header guard */
OPTION(host, OPT_STR) // "" means that ceph will use short hostname
OPTION(public_addr, OPT_ADDR)
+OPTION(public_addrv, OPT_ADDRVEC)
OPTION(public_bind_addr, OPT_ADDR)
OPTION(cluster_addr, OPT_ADDR)
OPTION(public_network, OPT_STR)
OPTION(restapi_log_level, OPT_STR) // default set by Python code
OPTION(restapi_base_url, OPT_STR) // "
OPTION(fatal_signal_handlers, OPT_BOOL)
+OPTION(crash_dir, OPT_STR)
SAFE_OPTION(erasure_code_dir, OPT_STR) // default location for erasure-code plugins
OPTION(log_file, OPT_STR) // default changed by common_preinit()
OPTION(log_max_new, OPT_INT) // default changed by common_preinit()
OPTION(log_max_recent, OPT_INT) // default changed by common_preinit()
+OPTION(log_to_file, OPT_BOOL)
OPTION(log_to_stderr, OPT_BOOL) // default changed by common_preinit()
OPTION(err_to_stderr, OPT_BOOL) // default changed by common_preinit()
OPTION(log_to_syslog, OPT_BOOL)
OPTION(mon_cluster_log_to_syslog, OPT_STR)
OPTION(mon_cluster_log_to_syslog_level, OPT_STR) // this level and above
OPTION(mon_cluster_log_to_syslog_facility, OPT_STR)
+OPTION(mon_cluster_log_to_file, OPT_BOOL)
OPTION(mon_cluster_log_file, OPT_STR)
OPTION(mon_cluster_log_file_level, OPT_STR)
OPTION(mon_cluster_log_to_graylog, OPT_STR)
SAFE_OPTION(plugin_dir, OPT_STR)
-OPTION(xio_trace_mempool, OPT_BOOL) // mempool allocation counters
-OPTION(xio_trace_msgcnt, OPT_BOOL) // incoming/outgoing msg counters
-OPTION(xio_trace_xcon, OPT_BOOL) // Xio message encode/decode trace
-OPTION(xio_queue_depth, OPT_INT) // depth of Accelio msg queue
-OPTION(xio_mp_min, OPT_INT) // default min mempool size
-OPTION(xio_mp_max_64, OPT_INT) // max 64-byte chunks (buffer is 40)
-OPTION(xio_mp_max_256, OPT_INT) // max 256-byte chunks
-OPTION(xio_mp_max_1k, OPT_INT) // max 1K chunks
-OPTION(xio_mp_max_page, OPT_INT) // max 1K chunks
-OPTION(xio_mp_max_hint, OPT_INT) // max size-hint chunks
-OPTION(xio_portal_threads, OPT_INT) // xio portal threads per messenger
-OPTION(xio_max_conns_per_portal, OPT_INT) // max xio_connections per portal/ctx
-OPTION(xio_transport_type, OPT_STR) // xio transport type: {rdma or tcp}
-OPTION(xio_max_send_inline, OPT_INT) // xio maximum threshold to send inline
-
OPTION(compressor_zlib_isal, OPT_BOOL)
OPTION(compressor_zlib_level, OPT_INT) //regular zlib compression level, not applicable to isa-l optimized version
+OPTION(compressor_zstd_level, OPT_INT) //regular zstd compression level
-OPTION(async_compressor_enabled, OPT_BOOL)
-OPTION(async_compressor_type, OPT_STR)
-OPTION(async_compressor_threads, OPT_INT)
-OPTION(async_compressor_thread_timeout, OPT_INT)
-OPTION(async_compressor_thread_suicide_timeout, OPT_INT)
+OPTION(qat_compressor_enabled, OPT_BOOL)
OPTION(plugin_crypto_accelerator, OPT_STR)
SAFE_OPTION(ms_type, OPT_STR) // messenger backend. It will be modified in runtime, so use SAFE_OPTION
OPTION(ms_public_type, OPT_STR) // messenger backend
OPTION(ms_cluster_type, OPT_STR) // messenger backend
+OPTION(ms_learn_addr_from_peer, OPT_BOOL)
OPTION(ms_tcp_nodelay, OPT_BOOL)
OPTION(ms_tcp_rcvbuf, OPT_INT)
-OPTION(ms_tcp_prefetch_max_size, OPT_INT) // max prefetch size, we limit this to avoid extra memcpy
+OPTION(ms_tcp_prefetch_max_size, OPT_U32) // max prefetch size, we limit this to avoid extra memcpy
OPTION(ms_initial_backoff, OPT_DOUBLE)
OPTION(ms_max_backoff, OPT_DOUBLE)
OPTION(ms_crc_data, OPT_BOOL)
OPTION(ms_die_on_unhandled_msg, OPT_BOOL)
OPTION(ms_die_on_old_message, OPT_BOOL) // assert if we get a dup incoming message and shouldn't have (may be triggered by pre-541cd3c64be0dfa04e8a2df39422e0eb9541a428 code)
OPTION(ms_die_on_skipped_message, OPT_BOOL) // assert if we skip a seq (kernel client does this intentionally)
+OPTION(ms_die_on_bug, OPT_BOOL)
OPTION(ms_dispatch_throttle_bytes, OPT_U64)
OPTION(ms_bind_ipv6, OPT_BOOL)
OPTION(ms_bind_port_min, OPT_INT)
OPTION(ms_bind_port_max, OPT_INT)
OPTION(ms_bind_retry_count, OPT_INT) // If binding fails, how many times do we retry to bind
-OPTION(ms_bind_retry_delay, OPT_INT) // Delay between attemps to bind
+OPTION(ms_bind_retry_delay, OPT_INT) // Delay between attempts to bind
OPTION(ms_bind_before_connect, OPT_BOOL)
OPTION(ms_tcp_listen_backlog, OPT_INT)
-OPTION(ms_rwthread_stack_bytes, OPT_U64)
-OPTION(ms_tcp_read_timeout, OPT_U64)
+OPTION(ms_connection_ready_timeout, OPT_U64)
+OPTION(ms_connection_idle_timeout, OPT_U64)
OPTION(ms_pq_max_tokens_per_priority, OPT_U64)
OPTION(ms_pq_min_cost, OPT_U64)
OPTION(ms_inject_socket_failures, OPT_U64)
OPTION(ms_inject_delay_max, OPT_DOUBLE) // seconds
OPTION(ms_inject_delay_probability, OPT_DOUBLE) // range [0, 1]
OPTION(ms_inject_internal_delays, OPT_DOUBLE) // seconds
+OPTION(ms_blackhole_osd, OPT_BOOL)
+OPTION(ms_blackhole_mon, OPT_BOOL)
+OPTION(ms_blackhole_mds, OPT_BOOL)
+OPTION(ms_blackhole_mgr, OPT_BOOL)
+OPTION(ms_blackhole_client, OPT_BOOL)
OPTION(ms_dump_on_send, OPT_BOOL) // hexdump msg to log on send
OPTION(ms_dump_corrupt_message_level, OPT_INT) // debug level to hexdump undecodeable messages at
OPTION(ms_async_op_threads, OPT_U64) // number of worker processing threads for async messenger created on init
OPTION(ms_async_max_op_threads, OPT_U64) // max number of worker processing threads for async messenger
-OPTION(ms_async_set_affinity, OPT_BOOL)
-// example: ms_async_affinity_cores = 0,1
-// The number of coreset is expected to equal to ms_async_op_threads, otherwise
-// extra op threads will loop ms_async_affinity_cores again.
-// If ms_async_affinity_cores is empty, all threads will be bind to current running
-// core
-OPTION(ms_async_affinity_cores, OPT_STR)
OPTION(ms_async_rdma_device_name, OPT_STR)
OPTION(ms_async_rdma_enable_hugepage, OPT_BOOL)
OPTION(ms_async_rdma_buffer_size, OPT_INT)
OPTION(ms_async_rdma_send_buffers, OPT_U32)
+//size of the receive buffer pool, 0 is unlimited
OPTION(ms_async_rdma_receive_buffers, OPT_U32)
+// max number of wr in srq
+OPTION(ms_async_rdma_receive_queue_len, OPT_U32)
+// support srq
+OPTION(ms_async_rdma_support_srq, OPT_BOOL)
OPTION(ms_async_rdma_port_num, OPT_U32)
OPTION(ms_async_rdma_polling_us, OPT_U32)
OPTION(ms_async_rdma_local_gid, OPT_STR) // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding
OPTION(ms_async_rdma_sl, OPT_INT) // in RoCE, this means PCP
OPTION(ms_async_rdma_dscp, OPT_INT) // in RoCE, this means DSCP
+// rdma connection management
+OPTION(ms_async_rdma_cm, OPT_BOOL)
+OPTION(ms_async_rdma_type, OPT_STR)
+
+// when there are enough accept failures, indicating there are unrecoverable failures,
+// just do ceph_abort() . Here we make it configurable.
+OPTION(ms_max_accept_failures, OPT_INT)
+
OPTION(ms_dpdk_port_id, OPT_INT)
-SAFE_OPTION(ms_dpdk_coremask, OPT_STR) // it is modified in unittest so that use SAFE_OPTION to declare
+SAFE_OPTION(ms_dpdk_coremask, OPT_STR) // it is modified in unittest so that use SAFE_OPTION to declare
OPTION(ms_dpdk_memory_channel, OPT_STR)
OPTION(ms_dpdk_hugepages, OPT_STR)
OPTION(ms_dpdk_pmd, OPT_STR)
OPTION(mon_compact_on_trim, OPT_BOOL) // compact (a prefix) when we trim old states
OPTION(mon_osd_cache_size, OPT_INT) // the size of osdmaps cache, not to rely on underlying store's cache
+OPTION(mon_osd_cache_size_min, OPT_U64) // minimum amount of memory to cache osdmaps
+OPTION(mon_memory_target, OPT_U64) // amount of mapped memory for osdmaps
+OPTION(mon_memory_autotune, OPT_BOOL) // autotune cache memory for osdmap
OPTION(mon_cpu_threads, OPT_INT)
OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT)
+OPTION(mon_clean_pg_upmaps_per_chunk, OPT_U64)
OPTION(mon_osd_max_creating_pgs, OPT_INT)
OPTION(mon_tick_interval, OPT_INT)
OPTION(mon_session_timeout, OPT_INT) // must send keepalive or subscribe
OPTION(mon_osd_auto_mark_new_in, OPT_BOOL) // mark booting new osds 'in'
OPTION(mon_osd_destroyed_out_interval, OPT_INT) // seconds
OPTION(mon_osd_down_out_interval, OPT_INT) // seconds
-OPTION(mon_osd_down_out_subtree_limit, OPT_STR) // smallest crush unit/type that we will not automatically mark out
OPTION(mon_osd_min_up_ratio, OPT_DOUBLE) // min osds required to be up to mark things down
OPTION(mon_osd_min_in_ratio, OPT_DOUBLE) // min osds required to be in to mark things out
OPTION(mon_osd_warn_op_age, OPT_DOUBLE) // max op age before we generate a warning (make it a power of 2)
OPTION(mon_osd_err_op_age_ratio, OPT_DOUBLE) // when to generate an error, as multiple of mon_osd_warn_op_age
-OPTION(mon_osd_max_split_count, OPT_INT) // largest number of PGs per "involved" OSD to let split create
-OPTION(mon_osd_allow_primary_temp, OPT_BOOL) // allow primary_temp to be set in the osdmap
-OPTION(mon_osd_allow_primary_affinity, OPT_BOOL) // allow primary_affinity to be set in the osdmap
OPTION(mon_osd_prime_pg_temp, OPT_BOOL) // prime osdmap with pg mapping changes
OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT) // max time to spend priming
OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT) // max estimate of pg total before we do all pgs in parallel
-OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL) // whether turn on fast read on the pool or not
OPTION(mon_election_timeout, OPT_FLOAT) // on election proposer, max waiting time for all ACKs
OPTION(mon_lease, OPT_FLOAT) // lease interval
OPTION(mon_lease_renew_interval_factor, OPT_FLOAT) // on leader, to renew the lease
OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT) // exponential backoff for clock drift warnings
OPTION(mon_timecheck_interval, OPT_FLOAT) // on leader, timecheck (clock drift check) interval (seconds)
OPTION(mon_timecheck_skew_interval, OPT_FLOAT) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
-OPTION(mon_pg_min_inactive, OPT_U64) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
-OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT) // max skew few average in objects per pg
-OPTION(mon_pg_warn_min_objects, OPT_INT) // do not warn below this object #
-OPTION(mon_pg_warn_min_pool_objects, OPT_INT) // do not warn on pools below this object #
OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT) // threshold of down osds after which we check all pgs
OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT) // position between pool cache_target_full and max where we start warning
OPTION(mon_osd_full_ratio, OPT_FLOAT) // what % full makes an OSD "full"
OPTION(mon_fake_pool_delete, OPT_BOOL) // fake pool deletion (add _DELETED suffix)
OPTION(mon_globalid_prealloc, OPT_U32) // how many globalids to prealloc
OPTION(mon_osd_report_timeout, OPT_INT) // grace period before declaring unresponsive OSDs dead
-OPTION(mon_force_standby_active, OPT_BOOL) // should mons force standby-replay mds to be active
OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL) // warn if crush tunables are too old (older than mon_min_crush_required_version)
OPTION(mon_crush_min_required_version, OPT_STR)
OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL) // warn if crush straw_calc_version==0
OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL) // warn if 'mon_osd_down_out_interval == 0'
OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL)
+OPTION(mon_warn_on_misplaced, OPT_BOOL)
OPTION(mon_min_osdmap_epochs, OPT_INT)
-OPTION(mon_max_pgmap_epochs, OPT_INT)
OPTION(mon_max_log_epochs, OPT_INT)
OPTION(mon_max_mdsmap_epochs, OPT_INT)
OPTION(mon_max_osd, OPT_INT)
OPTION(mon_reweight_min_bytes_per_osd, OPT_U64) // min bytes per osd for reweight-by-utilization command
OPTION(mon_reweight_max_osds, OPT_INT) // max osds to change per reweight-by-* command
OPTION(mon_reweight_max_change, OPT_DOUBLE)
-OPTION(mon_health_data_update_interval, OPT_FLOAT)
OPTION(mon_health_to_clog, OPT_BOOL)
OPTION(mon_health_to_clog_interval, OPT_INT)
OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE)
-OPTION(mon_health_preluminous_compat, OPT_BOOL)
+OPTION(mon_health_detail_to_clog, OPT_BOOL)
OPTION(mon_data_avail_crit, OPT_INT)
OPTION(mon_data_avail_warn, OPT_INT)
OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes)
-OPTION(mon_warn_not_scrubbed, OPT_INT)
-OPTION(mon_warn_not_deep_scrubbed, OPT_INT)
+OPTION(mon_warn_pg_not_scrubbed_ratio, OPT_FLOAT)
+OPTION(mon_warn_pg_not_deep_scrubbed_ratio, OPT_FLOAT)
OPTION(mon_scrub_interval, OPT_INT) // once a day
OPTION(mon_scrub_timeout, OPT_INT) // let's give it 5 minutes; why not.
OPTION(mon_scrub_max_keys, OPT_INT) // max number of keys to scrub each time
OPTION(mon_scrub_inject_missing_keys, OPT_DOUBLE) // probability of injected missing keys [0.0, 1.0]
OPTION(mon_config_key_max_entry_size, OPT_INT) // max num bytes per config-key entry
OPTION(mon_sync_timeout, OPT_DOUBLE)
-OPTION(mon_sync_max_payload_size, OPT_U32) // max size for a sync chunk payload (say)
+OPTION(mon_sync_max_payload_size, OPT_SIZE)
+OPTION(mon_sync_max_payload_keys, OPT_INT)
OPTION(mon_sync_debug, OPT_BOOL) // enable sync-specific debug
OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE) // inject N second delay on each get_chunk request
-OPTION(mon_osd_min_down_reporters, OPT_INT) // number of OSDs from different subtrees who need to report a down OSD for it to count
-OPTION(mon_osd_reporter_subtree_level , OPT_STR) // in which level of parent bucket the reporters are counted
OPTION(mon_osd_force_trim_to, OPT_INT) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous)
OPTION(mon_mds_force_trim_to, OPT_INT) // force mon to trim mdsmaps to this point (dangerous)
OPTION(mon_mds_skip_sanity, OPT_BOOL) // skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
OPTION(mon_debug_dump_transactions, OPT_BOOL)
OPTION(mon_debug_dump_json, OPT_BOOL)
OPTION(mon_debug_dump_location, OPT_STR)
-OPTION(mon_debug_no_require_luminous, OPT_BOOL)
OPTION(mon_debug_no_require_bluestore_for_ec_overwrites, OPT_BOOL)
OPTION(mon_debug_no_initial_persistent_features, OPT_BOOL)
OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE) // seconds
OPTION(auth_debug, OPT_BOOL) // if true, assert when weird things happen
OPTION(mon_client_hunt_parallel, OPT_U32) // how many mons to try to connect to in parallel during hunt
OPTION(mon_client_hunt_interval, OPT_DOUBLE) // try new mon every N seconds until we connect
+OPTION(mon_client_log_interval, OPT_DOUBLE) // send logs every N seconds
OPTION(mon_client_ping_interval, OPT_DOUBLE) // ping every N seconds
OPTION(mon_client_ping_timeout, OPT_DOUBLE) // fail if we don't hear back
OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE) // each time we reconnect to a monitor, double our timeout
OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE) // up to a max of 10*default (30 seconds)
OPTION(mon_client_max_log_entries_per_message, OPT_INT)
-OPTION(mon_max_pool_pg_num, OPT_INT)
-OPTION(mon_pool_quota_warn_threshold, OPT_INT) // percent of quota at which to issue warnings
-OPTION(mon_pool_quota_crit_threshold, OPT_INT) // percent of quota at which to issue errors
+OPTION(mon_client_directed_command_retry, OPT_INT)
OPTION(client_cache_size, OPT_INT)
OPTION(client_cache_mid, OPT_FLOAT)
OPTION(client_use_random_mds, OPT_BOOL)
OPTION(client_readahead_min, OPT_LONGLONG) // readahead at _least_ this much.
OPTION(client_readahead_max_bytes, OPT_LONGLONG) // default unlimited
OPTION(client_readahead_max_periods, OPT_LONGLONG) // as multiple of file layout period (object size * num stripes)
-OPTION(client_reconnect_stale, OPT_BOOL) // automatically reconnect stale session
OPTION(client_snapdir, OPT_STR)
-OPTION(client_mountpoint, OPT_STR)
OPTION(client_mount_uid, OPT_INT)
OPTION(client_mount_gid, OPT_INT)
OPTION(client_notify_timeout, OPT_INT) // in seconds
OPTION(client_permissions, OPT_BOOL)
OPTION(client_dirsize_rbytes, OPT_BOOL)
-// note: the max amount of "in flight" dirty data is roughly (max - target)
-OPTION(fuse_use_invalidate_cb, OPT_BOOL) // use fuse 2.8+ invalidate callback to keep page cache consistent
-OPTION(fuse_disable_pagecache, OPT_BOOL)
-OPTION(fuse_allow_other, OPT_BOOL)
-OPTION(fuse_default_permissions, OPT_BOOL)
-OPTION(fuse_big_writes, OPT_BOOL)
-OPTION(fuse_atomic_o_trunc, OPT_BOOL)
-OPTION(fuse_debug, OPT_BOOL)
-OPTION(fuse_multithreaded, OPT_BOOL)
-OPTION(fuse_require_active_mds, OPT_BOOL) // if ceph_fuse requires active mds server
-OPTION(fuse_syncfs_on_mksnap, OPT_BOOL)
-
OPTION(client_try_dentry_invalidate, OPT_BOOL) // the client should try to use dentry invaldation instead of remounting, on kernels it believes that will work for
OPTION(client_check_pool_perm, OPT_BOOL)
OPTION(client_use_faked_inos, OPT_BOOL)
-OPTION(client_mds_namespace, OPT_STR)
OPTION(crush_location, OPT_STR) // whitespace-separated list of key=value pairs describing crush location
OPTION(crush_location_hook, OPT_STR)
// Max number of truncate at once in a single Filer::truncate call
OPTION(filer_max_truncate_ops, OPT_U32)
-OPTION(journaler_write_head_interval, OPT_INT)
-OPTION(journaler_prefetch_periods, OPT_INT) // * journal object size
-OPTION(journaler_prezero_periods, OPT_INT) // * journal object size
OPTION(mds_data, OPT_STR)
-OPTION(mds_max_file_size, OPT_U64) // Used when creating new CephFS. Change with 'ceph mds set max_file_size <size>' afterwards
// max xattr kv pairs size for each dir/file
OPTION(mds_max_xattr_pairs_size, OPT_U32)
OPTION(mds_max_file_recover, OPT_U32)
OPTION(mds_beacon_grace, OPT_FLOAT)
OPTION(mds_enforce_unique_name, OPT_BOOL)
-OPTION(mds_session_timeout, OPT_FLOAT) // cap bits and leases time out if client unresponsive or not returning its caps
OPTION(mds_session_blacklist_on_timeout, OPT_BOOL) // whether to blacklist clients whose sessions are dropped due to timeout
OPTION(mds_session_blacklist_on_evict, OPT_BOOL) // whether to blacklist clients whose sessions are dropped via admin commands
OPTION(mds_sessionmap_keys_per_op, OPT_U32) // how many sessions should I try to load/store in a single OMAP operation?
-OPTION(mds_recall_state_timeout, OPT_FLOAT) // detect clients which aren't trimming caps
OPTION(mds_freeze_tree_timeout, OPT_FLOAT) // detecting freeze tree deadlock
-OPTION(mds_session_autoclose, OPT_FLOAT) // autoclose idle session
OPTION(mds_health_summarize_threshold, OPT_INT) // collapse N-client health metrics to a single 'many'
OPTION(mds_reconnect_timeout, OPT_FLOAT) // seconds to wait for clients during mds restart
- // make it (mds_session_timeout - mds_beacon_grace)
+ // make it (mdsmap.session_timeout - mds_beacon_grace)
OPTION(mds_tick_interval, OPT_FLOAT)
OPTION(mds_dirstat_min_interval, OPT_FLOAT) // try to avoid propagating more often than this
OPTION(mds_scatter_nudge_interval, OPT_FLOAT) // how quickly dirstat changes propagate up the hierarchy
OPTION(mds_bal_sample_interval, OPT_DOUBLE) // every 3 seconds
OPTION(mds_bal_replicate_threshold, OPT_FLOAT)
OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT)
-OPTION(mds_bal_frag, OPT_BOOL)
OPTION(mds_bal_split_size, OPT_INT)
OPTION(mds_bal_split_rd, OPT_FLOAT)
OPTION(mds_bal_split_wr, OPT_FLOAT)
OPTION(mds_bal_split_bits, OPT_INT)
OPTION(mds_bal_merge_size, OPT_INT)
-OPTION(mds_bal_interval, OPT_INT) // seconds
-OPTION(mds_bal_fragment_interval, OPT_INT) // seconds
OPTION(mds_bal_fragment_size_max, OPT_INT) // order of magnitude higher than split size
OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT) // multiple of size_max that triggers immediate split
OPTION(mds_bal_idle_threshold, OPT_FLOAT)
OPTION(mds_wipe_sessions, OPT_BOOL)
OPTION(mds_wipe_ino_prealloc, OPT_BOOL)
OPTION(mds_skip_ino, OPT_INT)
-OPTION(mds_standby_for_name, OPT_STR)
-OPTION(mds_standby_for_rank, OPT_INT)
-OPTION(mds_standby_for_fscid, OPT_INT)
-OPTION(mds_standby_replay, OPT_BOOL)
OPTION(mds_enable_op_tracker, OPT_BOOL) // enable/disable MDS op tracking
OPTION(mds_op_history_size, OPT_U32) // Max number of completed ops to track
OPTION(mds_op_history_duration, OPT_U32) // Oldest completed op to track
OPTION(mds_op_log_threshold, OPT_INT) // how many op log messages to show in one go
OPTION(mds_snap_min_uid, OPT_U32) // The minimum UID required to create a snapshot
OPTION(mds_snap_max_uid, OPT_U32) // The maximum UID allowed to create a snapshot
-OPTION(mds_snap_rstat, OPT_BOOL) // enable/disbale nested stat for snapshot
+OPTION(mds_snap_rstat, OPT_BOOL) // enable/disable nested stat for snapshot
OPTION(mds_verify_backtrace, OPT_U32)
// detect clients which aren't trimming completed requests
OPTION(mds_max_completed_flushes, OPT_U32)
OPTION(osd_journal, OPT_STR)
OPTION(osd_journal_size, OPT_INT) // in mb
OPTION(osd_journal_flush_on_shutdown, OPT_BOOL) // Flush journal to data store on shutdown
-// flags for specific control purpose during osd mount() process.
+// flags for specific control purpose during osd mount() process.
// e.g., can be 1 to skip over replaying journal
// or 2 to skip over mounting omap or 3 to skip over both.
// This might be helpful in case the journal is totally corrupted
OPTION(osd_max_pgls, OPT_U64) // max number of pgls entries to return
OPTION(osd_client_message_size_cap, OPT_U64) // client data allowed in-memory (in bytes)
OPTION(osd_client_message_cap, OPT_U64) // num client messages allowed in-memory
-OPTION(osd_pg_bits, OPT_INT) // bits per osd
-OPTION(osd_pgp_bits, OPT_INT) // bits per osd
OPTION(osd_crush_update_weight_set, OPT_BOOL) // update weight set while updating weights
OPTION(osd_crush_chooseleaf_type, OPT_INT) // 1 = host
OPTION(osd_pool_use_gmt_hitset, OPT_BOOL) // try to use gmt for hitset archive names if all osds in cluster support it.
OPTION(osd_crush_update_on_start, OPT_BOOL)
OPTION(osd_class_update_on_start, OPT_BOOL) // automatically set device class on start
OPTION(osd_crush_initial_weight, OPT_DOUBLE) // if >=0, the initial weight is for newly added osds.
-OPTION(osd_pool_default_crush_rule, OPT_INT)
-OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32) // in bytes
-OPTION(osd_pool_default_size, OPT_INT)
-OPTION(osd_pool_default_min_size, OPT_INT) // 0 means no specific default; ceph will use size-size/2
-OPTION(osd_pool_default_pg_num, OPT_INT) // number of PGs for new pools. Configure in global or mon section of ceph.conf
-OPTION(osd_pool_default_pgp_num, OPT_INT) // number of PGs for placement purposes. Should be equal to pg_num
-OPTION(osd_pool_default_type, OPT_STR)
-OPTION(osd_pool_default_erasure_code_profile, OPT_STR) // default properties of osd pool create
OPTION(osd_erasure_code_plugins, OPT_STR) // list of erasure code plugins
// Allows the "peered" state for recovery and backfill below min_size
OPTION(osd_allow_recovery_below_min_size, OPT_BOOL)
+OPTION(osd_pool_default_ec_fast_read, OPT_BOOL) // whether turn on fast read on the pool or not
OPTION(osd_pool_default_flags, OPT_INT) // default flags for new pools
OPTION(osd_pool_default_flag_hashpspool, OPT_BOOL) // use new pg hashing to prevent pool/pg overlap
OPTION(osd_pool_default_flag_nodelete, OPT_BOOL) // pool can't be deleted
OPTION(osd_pool_default_cache_min_flush_age, OPT_INT) // seconds
OPTION(osd_pool_default_cache_min_evict_age, OPT_INT) // seconds
OPTION(osd_pool_default_cache_max_evict_check_size, OPT_INT) // max size to check for eviction
+OPTION(osd_pool_default_read_lease_ratio, OPT_FLOAT)
OPTION(osd_hit_set_min_size, OPT_INT) // min target size for a HitSet
OPTION(osd_hit_set_max_size, OPT_INT) // max target size for a HitSet
OPTION(osd_hit_set_namespace, OPT_STR) // rados namespace for hit_set tracking
OPTION(osd_tier_promote_max_objects_sec, OPT_U64)
OPTION(osd_tier_promote_max_bytes_sec, OPT_U64)
-OPTION(osd_tier_default_cache_mode, OPT_STR)
-OPTION(osd_tier_default_cache_hit_set_count, OPT_INT)
-OPTION(osd_tier_default_cache_hit_set_period, OPT_INT)
-OPTION(osd_tier_default_cache_hit_set_type, OPT_STR)
-OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT) // number of recent HitSets the object must appear in to be promoted (on read)
-OPTION(osd_tier_default_cache_min_write_recency_for_promote, OPT_INT) // number of recent HitSets the object must appear in to be promoted (on write)
-OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT)
-OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT)
+OPTION(osd_objecter_finishers, OPT_INT)
OPTION(osd_map_dedup, OPT_BOOL)
-OPTION(osd_map_max_advance, OPT_INT) // make this < cache_size!
OPTION(osd_map_cache_size, OPT_INT)
OPTION(osd_map_message_max, OPT_INT) // max maps per MOSDMap message
+OPTION(osd_map_message_max_bytes, OPT_SIZE) // max maps per MOSDMap message
OPTION(osd_map_share_max_epochs, OPT_INT) // cap on # of inc maps we send to peers, clients
OPTION(osd_inject_bad_map_crc_probability, OPT_FLOAT)
OPTION(osd_inject_failure_on_pg_removal, OPT_BOOL)
OPTION(osd_max_markdown_period , OPT_INT)
OPTION(osd_max_markdown_count, OPT_INT)
-OPTION(osd_peering_wq_threads, OPT_INT)
-OPTION(osd_peering_wq_batch_size, OPT_U64)
OPTION(osd_op_pq_max_tokens_per_priority, OPT_U64)
OPTION(osd_op_pq_min_cost, OPT_U64)
-OPTION(osd_disk_threads, OPT_INT)
-OPTION(osd_disk_thread_ioprio_class, OPT_STR) // rt realtime be best effort idle
-OPTION(osd_disk_thread_ioprio_priority, OPT_INT) // 0-7
OPTION(osd_recover_clone_overlap, OPT_BOOL) // preserve clone_overlap during recovery/migration
OPTION(osd_op_num_threads_per_shard, OPT_INT)
OPTION(osd_op_num_threads_per_shard_hdd, OPT_INT)
OPTION(osd_op_queue_cut_off, OPT_STR) // Min priority to go to strict queue. (low, high)
-// mClock priority queue parameters for five types of ops
-OPTION(osd_op_queue_mclock_client_op_res, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_client_op_wgt, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_client_op_lim, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_osd_subop_res, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_osd_subop_wgt, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_osd_subop_lim, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_snap_res, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_snap_wgt, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_snap_lim, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_recov_res, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_recov_wgt, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_recov_lim, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE)
-OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE)
-
OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
// Set to true for testing. Users should NOT set this.
// decode the object, any error will be reported.
OPTION(osd_read_ec_check_for_errors, OPT_BOOL) // return error if any ec shard has an error
-// Only use clone_overlap for recovery if there are fewer than
-// osd_recover_clone_overlap_limit entries in the overlap set
-OPTION(osd_recover_clone_overlap_limit, OPT_INT)
+OPTION(osd_debug_feed_pullee, OPT_INT)
OPTION(osd_backfill_scan_min, OPT_INT)
OPTION(osd_backfill_scan_max, OPT_INT)
OPTION(osd_op_thread_timeout, OPT_INT)
OPTION(osd_op_thread_suicide_timeout, OPT_INT)
-OPTION(osd_recovery_thread_timeout, OPT_INT)
-OPTION(osd_recovery_thread_suicide_timeout, OPT_INT)
OPTION(osd_recovery_sleep, OPT_FLOAT) // seconds to sleep between recovery ops
OPTION(osd_recovery_sleep_hdd, OPT_FLOAT)
OPTION(osd_recovery_sleep_ssd, OPT_FLOAT)
OPTION(osd_snap_trim_sleep, OPT_DOUBLE)
OPTION(osd_scrub_invalid_stats, OPT_BOOL)
-OPTION(osd_remove_thread_timeout, OPT_INT)
-OPTION(osd_remove_thread_suicide_timeout, OPT_INT)
OPTION(osd_command_thread_timeout, OPT_INT)
OPTION(osd_command_thread_suicide_timeout, OPT_INT)
-OPTION(osd_heartbeat_addr, OPT_ADDR)
OPTION(osd_heartbeat_interval, OPT_INT) // (seconds) how often we ping peers
// (seconds) how long before we decide a peer has failed
OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT)
OPTION(osd_mon_heartbeat_interval, OPT_INT) // (seconds) how often to ping monitor if no peers
-OPTION(osd_mon_report_interval_max, OPT_INT)
-OPTION(osd_mon_report_interval_min, OPT_INT) // pg stats, failures, up_thru, boot.
+OPTION(osd_mon_report_interval, OPT_INT) // failures, up_thru, boot.
OPTION(osd_mon_report_max_in_flight, OPT_INT) // max updates in flight
OPTION(osd_beacon_report_interval, OPT_INT) // (second) how often to send beacon message to monitor
OPTION(osd_pg_stat_report_interval_max, OPT_INT) // report pg stats for any given pg at least this often
OPTION(osd_auto_mark_unfound_lost, OPT_BOOL)
OPTION(osd_recovery_delay_start, OPT_FLOAT)
OPTION(osd_recovery_max_active, OPT_U64)
+OPTION(osd_recovery_max_active_hdd, OPT_U64)
+OPTION(osd_recovery_max_active_ssd, OPT_U64)
OPTION(osd_recovery_max_single_start, OPT_U64)
OPTION(osd_recovery_max_chunk, OPT_U64) // max size of push chunk
OPTION(osd_recovery_max_omap_entries_per_chunk, OPT_U64) // max number of omap entries per chunk; 0 to disable limit
OPTION(osd_push_per_object_cost, OPT_U64) // push cost per object
OPTION(osd_max_push_cost, OPT_U64) // max size of push message
OPTION(osd_max_push_objects, OPT_U64) // max objects in single push op
-OPTION(osd_recovery_forget_lost_objects, OPT_BOOL) // off for now
OPTION(osd_max_scrubs, OPT_INT)
OPTION(osd_scrub_during_recovery, OPT_BOOL) // Allow new scrubs to start while recovery is active on the OSD
+OPTION(osd_repair_during_recovery, OPT_BOOL) // Allow new requested repairs to start while recovery is active on the OSD
OPTION(osd_scrub_begin_hour, OPT_INT)
OPTION(osd_scrub_end_hour, OPT_INT)
OPTION(osd_scrub_begin_week_day, OPT_INT)
OPTION(osd_scrub_chunk_min, OPT_INT)
OPTION(osd_scrub_chunk_max, OPT_INT)
OPTION(osd_scrub_sleep, OPT_FLOAT) // sleep between [deep]scrub ops
+OPTION(osd_scrub_extended_sleep, OPT_FLOAT) // more sleep between [deep]scrub ops
OPTION(osd_scrub_auto_repair, OPT_BOOL) // whether auto-repair inconsistencies upon deep-scrubbing
OPTION(osd_scrub_auto_repair_num_errors, OPT_U32) // only auto-repair when number of errors is below this threshold
OPTION(osd_deep_scrub_interval, OPT_FLOAT) // once a week
OPTION(osd_deep_scrub_keys, OPT_INT)
OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT) // objects must be this old (seconds) before we update the whole-object digest on scrub
OPTION(osd_skip_data_digest, OPT_BOOL)
-OPTION(osd_distrust_data_digest, OPT_BOOL)
OPTION(osd_deep_scrub_large_omap_object_key_threshold, OPT_U64)
OPTION(osd_deep_scrub_large_omap_object_value_sum_threshold, OPT_U64)
OPTION(osd_class_dir, OPT_STR) // where rados plugins are stored
// Bounds how infrequently a new map epoch will be persisted for a pg
OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32) // make this < map_cache_size!
+OPTION(osd_target_pg_log_entries_per_osd, OPT_U32)
OPTION(osd_min_pg_log_entries, OPT_U32) // number of entries to keep in the pg log when trimming it
OPTION(osd_max_pg_log_entries, OPT_U32) // max entries, say when degraded, before we trim
OPTION(osd_pg_log_dups_tracked, OPT_U32) // how many versions back to track combined in both pglog's regular + dup logs
+OPTION(osd_object_clean_region_max_num_intervals, OPT_INT) // number of intervals in clean_offsets
OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT) // max entries factor before force recovery
OPTION(osd_pg_log_trim_min, OPT_U32)
OPTION(osd_pg_log_trim_max, OPT_U32)
OPTION(osd_command_max_records, OPT_INT)
OPTION(osd_max_pg_blocked_by, OPT_U32) // max peer osds to report that are blocking our progress
OPTION(osd_op_log_threshold, OPT_INT) // how many op log messages to show in one go
-OPTION(osd_verify_sparse_read_holes, OPT_BOOL) // read fiemap-reported holes and verify they are zeros
OPTION(osd_backoff_on_unfound, OPT_BOOL) // object unfound
OPTION(osd_backoff_on_degraded, OPT_BOOL) // [mainly for debug?] object unreadable/writeable
-OPTION(osd_backoff_on_down, OPT_BOOL) // pg in down/incomplete state
OPTION(osd_backoff_on_peering, OPT_BOOL) // [debug] pg peering
OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL) // crash osd if client ignores a backoff; useful for debugging
OPTION(osd_debug_inject_dispatch_delay_probability, OPT_DOUBLE)
OPTION(osd_debug_random_push_read_error, OPT_DOUBLE)
OPTION(osd_debug_verify_cached_snaps, OPT_BOOL)
OPTION(osd_debug_deep_scrub_sleep, OPT_FLOAT)
+OPTION(osd_debug_no_acting_change, OPT_BOOL)
+OPTION(osd_debug_pretend_recovery_active, OPT_BOOL)
OPTION(osd_enable_op_tracker, OPT_BOOL) // enable/disable OSD op tracking
OPTION(osd_num_op_tracker_shard, OPT_U32) // The number of shards for holding the ops
OPTION(osd_op_history_size, OPT_U32) // Max number of completed ops to track
OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE) // track the op if over this threshold
OPTION(osd_target_transaction_size, OPT_INT) // to adjust various transactions that batch smaller items
OPTION(osd_failsafe_full_ratio, OPT_FLOAT) // what % full makes an OSD "full" (failsafe)
+OPTION(osd_fast_shutdown, OPT_BOOL)
OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL) // immediately mark OSDs as down once they refuse to accept connections
OPTION(osd_pg_object_context_cache_count, OPT_INT)
// determines whether PGLog::check() compares written out log to stored log
OPTION(osd_debug_pg_log_writeout, OPT_BOOL)
OPTION(osd_loop_before_reset_tphandle, OPT_U32) // Max number of loop before we reset thread-pool's handle
+OPTION(osd_max_snap_prune_intervals_per_epoch, OPT_U64) // Max number of snap intervals to report to mgr in pg_stat_t
+
// default timeout while caling WaitInterval on an empty queue
OPTION(threadpool_default_timeout, OPT_INT)
// default wait time for an empty queue before pinging the hb timeout
OPTION(leveldb_log, OPT_STR) // enable leveldb log file
OPTION(leveldb_compact_on_mount, OPT_BOOL)
-OPTION(kinetic_host, OPT_STR) // hostname or ip address of a kinetic drive to use
-OPTION(kinetic_port, OPT_INT) // port number of the kinetic drive
-OPTION(kinetic_user_id, OPT_INT) // kinetic user to authenticate as
-OPTION(kinetic_hmac_key, OPT_STR) // kinetic key to authenticate with
-OPTION(kinetic_use_ssl, OPT_BOOL) // whether to secure kinetic traffic with TLS
-
-
-OPTION(rocksdb_separate_wal_dir, OPT_BOOL) // use $path.wal for wal
-SAFE_OPTION(rocksdb_db_paths, OPT_STR) // path,size( path,size)*
OPTION(rocksdb_log_to_ceph_log, OPT_BOOL) // log to ceph log
OPTION(rocksdb_cache_size, OPT_U64) // rocksdb cache size (unless set by bluestore/etc)
OPTION(rocksdb_cache_row_ratio, OPT_FLOAT) // ratio of cache for row (vs block)
OPTION(rocksdb_collect_compaction_stats, OPT_BOOL) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
OPTION(rocksdb_collect_extended_stats, OPT_BOOL) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
OPTION(rocksdb_collect_memory_stats, OPT_BOOL) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
-OPTION(rocksdb_enable_rmrange, OPT_BOOL) // see https://github.com/facebook/rocksdb/blob/master/include/rocksdb/db.h#L253
// rocksdb options that will be used for omap(if omap_backend is rocksdb)
OPTION(filestore_rocksdb_options, OPT_STR)
*/
OPTION(osd_client_op_priority, OPT_U32)
OPTION(osd_recovery_op_priority, OPT_U32)
+OPTION(osd_peering_op_priority, OPT_U32)
OPTION(osd_snap_trim_priority, OPT_U32)
OPTION(osd_snap_trim_cost, OPT_U32) // set default cost equal to 1MB io
OPTION(osd_scrub_priority, OPT_U32)
// set default cost equal to 50MB io
-OPTION(osd_scrub_cost, OPT_U32)
+OPTION(osd_scrub_cost, OPT_U32)
// set requested scrub priority higher than scrub priority to make the
// requested scrubs jump the queue of scheduled scrubs
OPTION(osd_requested_scrub_priority, OPT_U32)
+OPTION(osd_pg_delete_priority, OPT_U32)
+OPTION(osd_pg_delete_cost, OPT_U32) // set default cost equal to 1MB io
+
OPTION(osd_recovery_priority, OPT_U32)
// set default cost equal to 20MB io
OPTION(osd_recovery_cost, OPT_U32)
/**
- * osd_recovery_op_warn_multiple scales the normal warning threshhold,
+ * osd_recovery_op_warn_multiple scales the normal warning threshold,
* osd_op_complaint_time, so that slow recovery ops won't cause noise
*/
OPTION(osd_recovery_op_warn_multiple, OPT_U32)
OPTION(osd_max_omap_entries_per_request, OPT_U64)
OPTION(osd_max_omap_bytes_per_request, OPT_U64)
+OPTION(osd_max_write_op_reply_len, OPT_U64)
OPTION(osd_objectstore, OPT_STR) // ObjectStore backend type
OPTION(osd_objectstore_tracing, OPT_BOOL) // true if LTTng-UST tracepoints should be enabled
OPTION(memstore_device_bytes, OPT_U64)
OPTION(memstore_page_set, OPT_BOOL)
OPTION(memstore_page_size, OPT_U64)
+OPTION(memstore_debug_omit_block_device_write, OPT_BOOL)
OPTION(bdev_debug_inflight_ios, OPT_BOOL)
OPTION(bdev_inject_crash, OPT_INT) // if N>0, then ~ 1/N IOs will complete before we crash on flush.
OPTION(bdev_block_size, OPT_INT)
OPTION(bdev_debug_aio, OPT_BOOL)
OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT)
+OPTION(bdev_debug_aio_log_age, OPT_DOUBLE)
// if yes, osd will unbind all NVMe devices from kernel driver and bind them
// to the uio_pci_generic driver. The purpose is to prevent the case where
// NVMe driver is loaded while osd is running.
OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL)
OPTION(bdev_nvme_retry_count, OPT_INT) // -1 means by default which is 4
+OPTION(bdev_enable_discard, OPT_BOOL)
+OPTION(bdev_async_discard, OPT_BOOL)
OPTION(objectstore_blackhole, OPT_BOOL)
OPTION(bluefs_alloc_size, OPT_U64)
+OPTION(bluefs_shared_alloc_size, OPT_U64)
OPTION(bluefs_max_prefetch, OPT_U64)
OPTION(bluefs_min_log_runway, OPT_U64) // alloc when we get this low
OPTION(bluefs_max_log_runway, OPT_U64) // alloc this much at a time
OPTION(bluefs_buffered_io, OPT_BOOL)
OPTION(bluefs_sync_write, OPT_BOOL)
OPTION(bluefs_allocator, OPT_STR) // stupid | bitmap
-OPTION(bluefs_preextend_wal_files, OPT_BOOL) // this *requires* that rocksdb has recycling enabled
+OPTION(bluefs_log_replay_check_allocations, OPT_BOOL)
+OPTION(bluefs_replay_recovery, OPT_BOOL)
+OPTION(bluefs_replay_recovery_disable_compact, OPT_BOOL)
OPTION(bluestore_bluefs, OPT_BOOL)
OPTION(bluestore_bluefs_env_mirror, OPT_BOOL) // mirror to normal Env for debug
OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT) // how much to add at a time
OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT) // how much to reclaim at a time
OPTION(bluestore_bluefs_balance_interval, OPT_FLOAT) // how often (sec) to balance free space between bluefs and bluestore
+// how often (sec) to dump allocator on allocation failure
+OPTION(bluestore_bluefs_alloc_failure_dump_interval, OPT_FLOAT)
+
+// Enforces db sync with legacy bluefs extents information on close.
+// Enables downgrades to pre-nautilus releases
+OPTION(bluestore_bluefs_db_compatibility, OPT_BOOL)
+
// If you want to use spdk driver, you need to specify NVMe serial number here
// with "spdk:" prefix.
// Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to
// get the serial number of Intel(R) Fultondale NVMe controllers.
// Example:
// bluestore_block_path = spdk:55cd2e404bd73932
-// If you want to run multiple SPDK instances per node, you must specify the
-// amount of dpdk memory size in MB each instance will use, to make sure each
-// instance uses its own dpdk memory
-OPTION(bluestore_spdk_mem, OPT_U32)
-// A hexadecimal bit mask of the cores to run on. Note the core numbering can change between platforms and should be determined beforehand.
-OPTION(bluestore_spdk_coremask, OPT_STR)
-// Specify the maximal I/Os to be batched completed while checking queue pair completions.
-// Default value 0 means that let SPDK nvme library determine the value.
-OPTION(bluestore_spdk_max_io_completion, OPT_U32)
OPTION(bluestore_block_path, OPT_STR)
OPTION(bluestore_block_size, OPT_U64) // 10gb for testing
OPTION(bluestore_block_create, OPT_BOOL)
OPTION(bluestore_block_wal_size, OPT_U64) // rocksdb wal
OPTION(bluestore_block_wal_create, OPT_BOOL)
OPTION(bluestore_block_preallocate_file, OPT_BOOL) //whether preallocate space if block/db_path/wal_path is file rather that block device.
+OPTION(bluestore_ignore_data_csum, OPT_BOOL)
OPTION(bluestore_csum_type, OPT_STR) // none|xxhash32|xxhash64|crc32c|crc32c_16|crc32c_8
-OPTION(bluestore_csum_min_block, OPT_U32)
-OPTION(bluestore_csum_max_block, OPT_U32)
+OPTION(bluestore_retry_disk_reads, OPT_U64)
OPTION(bluestore_min_alloc_size, OPT_U32)
OPTION(bluestore_min_alloc_size_hdd, OPT_U32)
OPTION(bluestore_min_alloc_size_ssd, OPT_U32)
/*
* Specifies minimum expected amount of saved allocation units
* per single blob to enable compressed blobs garbage collection
- *
+ *
*/
-OPTION(bluestore_gc_enable_blob_threshold, OPT_INT)
+OPTION(bluestore_gc_enable_blob_threshold, OPT_INT)
/*
* Specifies minimum expected amount of saved allocation units
* per all blobsb to enable compressed blobs garbage collection
- *
+ *
*/
-OPTION(bluestore_gc_enable_total_threshold, OPT_INT)
+OPTION(bluestore_gc_enable_total_threshold, OPT_INT)
OPTION(bluestore_max_blob_size, OPT_U32)
OPTION(bluestore_max_blob_size_hdd, OPT_U32)
OPTION(bluestore_cache_size_ssd, OPT_U64)
OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE)
OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE)
-OPTION(bluestore_cache_kv_max, OPT_INT) // limit the maximum amount of cache for the kv store
+OPTION(bluestore_alloc_stats_dump_interval, OPT_DOUBLE)
OPTION(bluestore_kvbackend, OPT_STR)
OPTION(bluestore_allocator, OPT_STR) // stupid | bitmap
OPTION(bluestore_freelist_blocks_per_key, OPT_INT)
OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT) // must be power of 2 aligned, e.g., 512, 1024, 2048...
OPTION(bluestore_bitmapallocator_span_size, OPT_INT) // must be power of 2 aligned, e.g., 512, 1024, 2048...
OPTION(bluestore_max_deferred_txc, OPT_U64)
+OPTION(bluestore_max_defer_interval, OPT_U64)
OPTION(bluestore_rocksdb_options, OPT_STR)
OPTION(bluestore_fsck_on_mount, OPT_BOOL)
OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL)
+OPTION(bluestore_fsck_quick_fix_on_mount, OPT_BOOL)
OPTION(bluestore_fsck_on_umount, OPT_BOOL)
OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL)
OPTION(bluestore_fsck_on_mkfs, OPT_BOOL)
OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL)
OPTION(bluestore_sync_submit_transaction, OPT_BOOL) // submit kv txn in queueing thread (not kv_sync_thread)
+OPTION(bluestore_fsck_read_bytes_cap, OPT_U64)
+OPTION(bluestore_fsck_quick_fix_threads, OPT_INT)
OPTION(bluestore_throttle_bytes, OPT_U64)
OPTION(bluestore_throttle_deferred_bytes, OPT_U64)
OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64)
OPTION(bluestore_debug_misc, OPT_BOOL)
OPTION(bluestore_debug_no_reuse_blocks, OPT_BOOL)
OPTION(bluestore_debug_small_allocations, OPT_INT)
+OPTION(bluestore_debug_too_many_blobs_threshold, OPT_INT)
OPTION(bluestore_debug_freelist, OPT_BOOL)
OPTION(bluestore_debug_prefill, OPT_FLOAT)
OPTION(bluestore_debug_prefragment_max, OPT_INT)
OPTION(bluestore_debug_fsck_abort, OPT_BOOL)
OPTION(bluestore_debug_omit_kv_commit, OPT_BOOL)
OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL)
-OPTION(bluestore_shard_finishers, OPT_BOOL)
OPTION(bluestore_debug_random_read_err, OPT_DOUBLE)
+OPTION(bluestore_debug_inject_bug21040, OPT_BOOL)
+OPTION(bluestore_debug_inject_csum_err_probability, OPT_FLOAT)
+OPTION(bluestore_fsck_error_on_no_per_pool_stats, OPT_BOOL)
+OPTION(bluestore_warn_on_bluefs_spillover, OPT_BOOL)
+OPTION(bluestore_warn_on_legacy_statfs, OPT_BOOL)
+OPTION(bluestore_fsck_error_on_no_per_pool_omap, OPT_BOOL)
+OPTION(bluestore_warn_on_no_per_pool_omap, OPT_BOOL)
+OPTION(bluestore_log_op_age, OPT_DOUBLE)
+OPTION(bluestore_log_omap_iterator_age, OPT_DOUBLE)
+OPTION(bluestore_log_collection_list_age, OPT_DOUBLE)
+OPTION(bluestore_debug_enforce_settings, OPT_STR)
+OPTION(bluestore_volume_selection_policy, OPT_STR)
+OPTION(bluestore_volume_selection_reserved_factor, OPT_DOUBLE)
+OPTION(bluestore_volume_selection_reserved, OPT_INT)
OPTION(kstore_max_ops, OPT_U64)
OPTION(kstore_max_bytes, OPT_U64)
// (try to) use extsize for alloc hint NOTE: extsize seems to trigger
// data corruption in xfs prior to kernel 3.5. filestore will
-// implicity disable this if it cannot confirm the kernel is newer
+// implicitly disable this if it cannot confirm the kernel is newer
// than that.
// NOTE: This option involves a tradeoff: When disabled, fragmentation is
// worse, but large sequential writes are faster. When enabled, large
/// Filestore high delay multiple. Defaults to 0 (disabled)
OPTION(filestore_queue_high_delay_multiple, OPT_DOUBLE)
+/// Filestore max delay multiple bytes. Defaults to 0 (disabled)
+OPTION(filestore_queue_max_delay_multiple_bytes, OPT_DOUBLE)
+/// Filestore high delay multiple bytes. Defaults to 0 (disabled)
+OPTION(filestore_queue_high_delay_multiple_bytes, OPT_DOUBLE)
+
+/// Filestore max delay multiple ops. Defaults to 0 (disabled)
+OPTION(filestore_queue_max_delay_multiple_ops, OPT_DOUBLE)
+/// Filestore high delay multiple ops. Defaults to 0 (disabled)
+OPTION(filestore_queue_high_delay_multiple_ops, OPT_DOUBLE)
+
/// Use above to inject delays intended to keep the op queue between low and high
OPTION(filestore_queue_low_threshhold, OPT_DOUBLE)
OPTION(filestore_queue_high_threshhold, OPT_DOUBLE)
OPTION(journal_force_aio, OPT_BOOL)
OPTION(journal_block_size, OPT_INT)
-// max bytes to search ahead in journal searching for corruption
-OPTION(journal_max_corrupt_search, OPT_U64)
OPTION(journal_block_align, OPT_BOOL)
OPTION(journal_write_header_frequency, OPT_U64)
OPTION(journal_max_write_bytes, OPT_INT)
OPTION(fio_dir, OPT_STR) // fio data directory for fio-objectstore
-OPTION(rados_mon_op_timeout, OPT_DOUBLE) // how many seconds to wait for a response from the monitor before returning an error from a rados operation. 0 means no limit.
-OPTION(rados_osd_op_timeout, OPT_DOUBLE) // how many seconds to wait for a response from osds before returning an error from a rados operation. 0 means no limit.
OPTION(rados_tracing, OPT_BOOL) // true if LTTng-UST tracepoints should be enabled
-OPTION(nss_db_path, OPT_STR) // path to nss db
+OPTION(rgw_max_attr_name_len, OPT_SIZE)
+OPTION(rgw_max_attr_size, OPT_SIZE)
+OPTION(rgw_max_attrs_num_in_req, OPT_U64)
OPTION(rgw_max_chunk_size, OPT_INT)
OPTION(rgw_put_obj_min_window_size, OPT_INT)
OPTION(rgw_enable_gc_threads, OPT_BOOL)
OPTION(rgw_enable_lc_threads, OPT_BOOL)
+/* overrides for librgw/nfs */
+OPTION(rgw_nfs_run_gc_threads, OPT_BOOL)
+OPTION(rgw_nfs_run_lc_threads, OPT_BOOL)
+OPTION(rgw_nfs_run_quota_threads, OPT_BOOL)
+OPTION(rgw_nfs_run_sync_thread, OPT_BOOL)
OPTION(rgw_data, OPT_STR)
OPTION(rgw_enable_apis, OPT_STR)
OPTION(rgw_port, OPT_STR) // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi
OPTION(rgw_dns_name, OPT_STR) // hostname suffix on buckets
OPTION(rgw_dns_s3website_name, OPT_STR) // hostname suffix on buckets for s3-website endpoint
+OPTION(rgw_service_provider_name, OPT_STR) //service provider name which is contained in http response headers
OPTION(rgw_content_length_compat, OPT_BOOL) // Check both HTTP_CONTENT_LENGTH and CONTENT_LENGTH in fcgi env
OPTION(rgw_lifecycle_work_time, OPT_STR) //job process lc at 00:00-06:00s
OPTION(rgw_lc_lock_max_time, OPT_INT) // total run time for a single lc processor work
+OPTION(rgw_lc_max_worker, OPT_INT)// number of (parellized) LCWorker threads
+OPTION(rgw_lc_max_wp_worker, OPT_INT)// number of per-LCWorker pool threads
OPTION(rgw_lc_max_objs, OPT_INT)
+OPTION(rgw_lc_max_rules, OPT_U32) // Max rules set on one bucket
OPTION(rgw_lc_debug_interval, OPT_INT) // Debug run interval, in seconds
OPTION(rgw_script_uri, OPT_STR) // alternative value for SCRIPT_URI if not set in request
OPTION(rgw_request_uri, OPT_STR) // alternative value for REQUEST_URI if not set in request
OPTION(rgw_swift_enforce_content_length, OPT_BOOL) // enforce generation of Content-Length even in cost of performance or scalability
OPTION(rgw_keystone_url, OPT_STR) // url for keystone server
OPTION(rgw_keystone_admin_token, OPT_STR) // keystone admin token (shared secret)
+OPTION(rgw_keystone_admin_token_path, OPT_STR) // path to keystone admin token (shared secret)
OPTION(rgw_keystone_admin_user, OPT_STR) // keystone admin user name
OPTION(rgw_keystone_admin_password, OPT_STR) // keystone admin user password
+OPTION(rgw_keystone_admin_password_path, OPT_STR) // path to keystone admin user password
OPTION(rgw_keystone_admin_tenant, OPT_STR) // keystone admin user tenant (for keystone v2.0)
OPTION(rgw_keystone_admin_project, OPT_STR) // keystone admin user project (for keystone v3)
OPTION(rgw_keystone_admin_domain, OPT_STR) // keystone admin user domain
OPTION(rgw_keystone_accepted_roles, OPT_STR) // roles required to serve requests
OPTION(rgw_keystone_accepted_admin_roles, OPT_STR) // list of roles allowing an user to gain admin privileges
OPTION(rgw_keystone_token_cache_size, OPT_INT) // max number of entries in keystone token cache
-OPTION(rgw_keystone_revocation_interval, OPT_INT) // seconds between tokens revocation check
OPTION(rgw_keystone_verify_ssl, OPT_BOOL) // should we try to verify keystone's ssl
OPTION(rgw_cross_domain_policy, OPT_STR)
OPTION(rgw_healthcheck_disabling_path, OPT_STR) // path that existence causes the healthcheck to respond 503
OPTION(rgw_s3_auth_use_rados, OPT_BOOL) // should we try to use the internal credentials for s3?
OPTION(rgw_s3_auth_use_keystone, OPT_BOOL) // should we try to use keystone for s3?
+OPTION(rgw_s3_auth_order, OPT_STR) // s3 authentication order to try
OPTION(rgw_barbican_url, OPT_STR) // url for barbican server
+OPTION(rgw_opa_url, OPT_STR) // url for OPA server
+OPTION(rgw_opa_token, OPT_STR) // Bearer token OPA uses to authenticate client requests
+OPTION(rgw_opa_verify_ssl, OPT_BOOL) // should we try to verify OPA's ssl
+OPTION(rgw_use_opa_authz, OPT_BOOL) // should we use OPA to authorize client requests?
/* OpenLDAP-style LDAP parameter strings */
/* rgw_ldap_uri space-separated list of LDAP servers in URI format */
OPTION(rgw_op_thread_suicide_timeout, OPT_INT)
OPTION(rgw_thread_pool_size, OPT_INT)
OPTION(rgw_num_control_oids, OPT_INT)
-OPTION(rgw_num_rados_handles, OPT_U32)
OPTION(rgw_verify_ssl, OPT_BOOL) // should http_client try to verify ssl when sent https request
/* The following are tunables for caches of RGW NFS (and other file
OPTION(rgw_nfs_write_completion_interval_s, OPT_INT) /* stateless (V3)
* commit
* delay */
+OPTION(rgw_nfs_s3_fast_attrs, OPT_BOOL) /* use fast S3 attrs from
+ * bucket index--currently
+ * assumes NFS mounts are
+ * immutable */
OPTION(rgw_zone, OPT_STR) // zone name
OPTION(rgw_zone_root_pool, OPT_STR) // pool where zone specific info is stored
OPTION(rgw_init_timeout, OPT_INT) // time in seconds
OPTION(rgw_mime_types_file, OPT_STR)
OPTION(rgw_gc_max_objs, OPT_INT)
-OPTION(rgw_gc_obj_min_wait, OPT_INT) // wait time before object may be handled by gc
+OPTION(rgw_gc_obj_min_wait, OPT_INT) // wait time before object may be handled by gc, recommended lower limit is 30 mins
OPTION(rgw_gc_processor_max_time, OPT_INT) // total run time for a single gc processor work
OPTION(rgw_gc_processor_period, OPT_INT) // gc processor cycle time
+OPTION(rgw_gc_max_concurrent_io, OPT_INT) // gc processor cycle time
+OPTION(rgw_gc_max_trim_chunk, OPT_INT) // gc trim chunk size
OPTION(rgw_s3_success_create_obj_status, OPT_INT) // alternative success status response for create-obj (0 - default)
OPTION(rgw_resolve_cname, OPT_BOOL) // should rgw try to resolve hostname as a dns cname record
OPTION(rgw_obj_stripe_size, OPT_INT)
OPTION(rgw_defer_to_bucket_acls, OPT_STR) // if the user has bucket perms)
OPTION(rgw_list_buckets_max_chunk, OPT_INT) // max buckets to retrieve in a single op when listing user buckets
OPTION(rgw_md_log_max_shards, OPT_INT) // max shards for metadata log
-OPTION(rgw_num_zone_opstate_shards, OPT_INT) // max shards for keeping inter-region copy progress info
-OPTION(rgw_opstate_ratelimit_sec, OPT_INT) // min time between opstate updates on a single upload (0 for disabling ratelimit)
OPTION(rgw_curl_wait_timeout_ms, OPT_INT) // timeout for certain curl calls
+OPTION(rgw_curl_low_speed_limit, OPT_INT) // low speed limit for certain curl calls
+OPTION(rgw_curl_low_speed_time, OPT_INT) // low speed time for certain curl calls
OPTION(rgw_copy_obj_progress, OPT_BOOL) // should dump progress during long copy operations?
OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT) // min bytes between copy progress output
OPTION(rgw_obj_tombstone_cache_size, OPT_INT) // how many objects in tombstone cache, which is used in multi-zone sync to keep
OPTION(rgw_data_log_changes_size, OPT_INT) // number of in-memory entries to hold for data changes log
OPTION(rgw_data_log_num_shards, OPT_INT) // number of objects to keep data changes log on
OPTION(rgw_data_log_obj_prefix, OPT_STR) //
-OPTION(rgw_replica_log_obj_prefix, OPT_STR) //
OPTION(rgw_bucket_quota_ttl, OPT_INT) // time for cached bucket stats to be cached within rgw instance
OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE) // threshold from which we don't rely on cached info for quota decisions
OPTION(rgw_sync_data_inject_err_probability, OPT_DOUBLE) // range [0, 1]
OPTION(rgw_sync_meta_inject_err_probability, OPT_DOUBLE) // range [0, 1]
+OPTION(rgw_sync_trace_history_size, OPT_INT) // max number of complete sync trace entries to keep
+OPTION(rgw_sync_trace_per_node_log_size, OPT_INT) // how many log entries to keep per node
+OPTION(rgw_sync_trace_servicemap_update_interval, OPT_INT) // interval in seconds between sync trace servicemap update
OPTION(rgw_period_push_interval, OPT_DOUBLE) // seconds to wait before retrying "period push"
OPTION(rgw_swift_versioning_enabled, OPT_BOOL) // whether swift object versioning feature is enabled
+OPTION(rgw_trust_forwarded_https, OPT_BOOL) // trust Forwarded and X-Forwarded-Proto headers for ssl termination
OPTION(rgw_crypt_require_ssl, OPT_BOOL) // requests including encryption key headers must be sent over ssl
OPTION(rgw_crypt_default_encryption_key, OPT_STR) // base64 encoded key for encryption of rgw objects
+
+OPTION(rgw_crypt_s3_kms_backend, OPT_STR) // Where SSE-KMS encryption keys are stored
+OPTION(rgw_crypt_vault_auth, OPT_STR) // Type of authentication method to be used with Vault
+OPTION(rgw_crypt_vault_token_file, OPT_STR) // Path to the token file for Vault authentication
+OPTION(rgw_crypt_vault_addr, OPT_STR) // Vault server base address
+OPTION(rgw_crypt_vault_prefix, OPT_STR) // Optional URL prefix to Vault secret path
+OPTION(rgw_crypt_vault_secret_engine, OPT_STR) // kv, transit or other supported secret engines
+OPTION(rgw_crypt_vault_namespace, OPT_STR) // Vault Namespace (only availabe in Vault Enterprise Version)
+
OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR) // extra keys that may be used for aws:kms
// defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg=="
OPTION(rgw_crypt_suppress_logs, OPT_BOOL) // suppress logs that might print customer key
OPTION(rgw_rest_getusage_op_compat, OPT_BOOL) // dump description of total stats for s3 GetUsage API
-OPTION(mutex_perf_counter, OPT_BOOL) // enable/disable mutex perf counter
OPTION(throttler_perf_counter, OPT_BOOL) // enable/disable throttler perf counter
/* The following are tunables for torrent data */
OPTION(rgw_torrent_flag, OPT_BOOL) // produce torrent function flag
-OPTION(rgw_torrent_tracker, OPT_STR) // torrent field annouce and annouce list
+OPTION(rgw_torrent_tracker, OPT_STR) // torrent field announce and announce list
OPTION(rgw_torrent_createby, OPT_STR) // torrent field created by
OPTION(rgw_torrent_comment, OPT_STR) // torrent field comment
OPTION(rgw_torrent_encoding, OPT_STR) // torrent field encoding
OPTION(event_tracing, OPT_BOOL) // true if LTTng-UST tracepoints should be enabled
-// This will be set to true when it is safe to start threads.
-// Once it is true, it will never change.
-OPTION(internal_safe_to_start_threads, OPT_BOOL)
-
OPTION(debug_deliberately_leak_memory, OPT_BOOL)
+OPTION(debug_asok_assert_abort, OPT_BOOL)
OPTION(rgw_swift_custom_header, OPT_STR) // option to enable swift custom headers
OPTION(rgw_swift_need_stats, OPT_BOOL) // option to enable stats on bucket listing for swift
-/* resharding tunables */
-OPTION(rgw_reshard_num_logs, OPT_INT)
-OPTION(rgw_reshard_bucket_lock_duration, OPT_INT) // duration of lock on bucket obj during resharding
-OPTION(rgw_dynamic_resharding, OPT_BOOL)
-OPTION(rgw_max_objs_per_shard, OPT_INT)
-OPTION(rgw_reshard_thread_interval, OPT_U32) // maximum time between rounds of reshard thread processing
-
OPTION(rgw_acl_grants_max_num, OPT_INT) // According to AWS S3(http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html), An ACL can have up to 100 grants.
+OPTION(rgw_cors_rules_max_num, OPT_INT) // According to AWS S3(http://docs.aws.amazon.com/AmazonS3/latest/dev/cors.html), An cors can have up to 100 rules.
+OPTION(rgw_delete_multi_obj_max_num, OPT_INT) // According to AWS S3(https://docs.aws.amazon.com/AmazonS3/latest/dev/DeletingObjects.html), Amazon S3 also provides the Multi-Object Delete API that you can use to delete up to 1000 objects in a single HTTP request.
+OPTION(rgw_website_routing_rules_max_num, OPT_INT) // According to AWS S3, An website routing config can have up to 50 rules.
+OPTION(rgw_sts_entry, OPT_STR)
+OPTION(rgw_sts_key, OPT_STR)
+OPTION(rgw_s3_auth_use_sts, OPT_BOOL) // should we try to use sts for s3?
+OPTION(rgw_sts_max_session_duration, OPT_U64) // Max duration in seconds for which the session token is valid.
+OPTION(fake_statfs_for_testing, OPT_INT) // Set a value for kb and compute kb_used from total of num_bytes
+OPTION(rgw_sts_token_introspection_url, OPT_STR) // url for introspecting web tokens
+OPTION(rgw_sts_client_id, OPT_STR) // Client Id
+OPTION(rgw_sts_client_secret, OPT_STR) // Client Secret
+OPTION(debug_allow_any_pool_priority, OPT_BOOL)
+OPTION(rgw_gc_max_deferred_entries_size, OPT_U64) // GC deferred entries size in queue head
+OPTION(rgw_gc_max_queue_size, OPT_U64) // GC max queue size
+OPTION(rgw_gc_max_deferred, OPT_U64) // GC max number of deferred entries