OPTION(host, OPT_STR, "") // "" means that ceph will use short hostname
OPTION(fsid, OPT_UUID, uuid_d())
OPTION(public_addr, OPT_ADDR, entity_addr_t())
+OPTION(public_bind_addr, OPT_ADDR, entity_addr_t())
OPTION(cluster_addr, OPT_ADDR, entity_addr_t())
OPTION(public_network, OPT_STR, "")
OPTION(cluster_network, OPT_STR, "")
OPTION(run_dir, OPT_STR, "/var/run/ceph") // the "/var/run/ceph" dir, created on daemon startup
OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit()
OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755"
-OPTION(crushtool, OPT_STR, "crushtool") // crushtool utility path
OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit()
OPTION(setuser, OPT_STR, "") // uid or user name
OPTION(ms_bind_retry_delay, OPT_INT, 6) // Delay between attemps to bind
#endif
OPTION(ms_bind_before_connect, OPT_BOOL, false)
+OPTION(ms_tcp_listen_backlog, OPT_INT, 512)
OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10)
OPTION(ms_tcp_read_timeout, OPT_U64, 900)
OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216)
OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings
OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
-OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
-OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
+OPTION(mon_pg_stuck_threshold, OPT_INT, 60) // number of seconds after which pgs can be considered stuck inactive, unclean, etc (see doc/control.rst under dump_stuck for more info)
OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin
OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin
OPTION(mon_health_to_clog, OPT_BOOL, true)
OPTION(mon_health_to_clog_interval, OPT_INT, 3600)
OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
+OPTION(mon_health_preluminous_compat, OPT_BOOL, false)
+OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
OPTION(mon_data_avail_crit, OPT_INT, 5)
OPTION(mon_data_avail_warn, OPT_INT, 30)
OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
// UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps
OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false)
+OPTION(mon_osd_blacklist_default_expire, OPT_DOUBLE, 60*60) // default one hour
+OPTION(mon_osd_crush_smoke_test, OPT_BOOL, true)
OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state
OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
OPTION(osd_crush_update_on_start, OPT_BOOL, true)
+OPTION(osd_class_update_on_start, OPT_BOOL, true) // automatically set device class on start
OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds.
OPTION(osd_pool_default_crush_rule, OPT_INT, -1)
OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32, 4096) // in bytes
OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2
OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
+OPTION(osd_pool_default_type, OPT_STR, "replicated")
OPTION(osd_pool_default_erasure_code_profile,
OPT_STR,
"plugin=jerasure "
OPTION(osd_op_num_shards, OPT_INT, 0)
OPTION(osd_op_num_shards_hdd, OPT_INT, 5)
OPTION(osd_op_num_shards_ssd, OPT_INT, 8)
-OPTION(osd_op_queue, OPT_STR, "wpq") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random
+
+// PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
+// mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
+// and "mclock_client" are based on the mClock/dmClock algorithm
+// (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
+// class the operation belongs to. "mclock_client" does the same but
+// also works to ienforce fairness between clients. "debug_random"
+// chooses among all four with equal probability.
+OPTION(osd_op_queue, OPT_STR, "wpq")
+
OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
+// mClock priority queue parameters for five types of ops
+OPTION(osd_op_queue_mclock_client_op_res, OPT_DOUBLE, 1000.0)
+OPTION(osd_op_queue_mclock_client_op_wgt, OPT_DOUBLE, 500.0)
+OPTION(osd_op_queue_mclock_client_op_lim, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_osd_subop_res, OPT_DOUBLE, 1000.0)
+OPTION(osd_op_queue_mclock_osd_subop_wgt, OPT_DOUBLE, 500.0)
+OPTION(osd_op_queue_mclock_osd_subop_lim, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_snap_res, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_snap_wgt, OPT_DOUBLE, 1.0)
+OPTION(osd_op_queue_mclock_snap_lim, OPT_DOUBLE, 0.001)
+OPTION(osd_op_queue_mclock_recov_res, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_recov_wgt, OPT_DOUBLE, 1.0)
+OPTION(osd_op_queue_mclock_recov_lim, OPT_DOUBLE, 0.001)
+OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE, 1.0)
+OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE, 0.001)
+
OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
// Set to true for testing. Users should NOT set this.
OPTION(osd_stats_ack_timeout_factor, OPT_DOUBLE, 2.0) // multiples of mon_ack_timeout
OPTION(osd_stats_ack_timeout_decay, OPT_DOUBLE, .9)
OPTION(osd_default_data_pool_replay_window, OPT_INT, 45)
-OPTION(osd_preserve_trimmed_log, OPT_BOOL, false)
OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false)
OPTION(osd_recovery_delay_start, OPT_FLOAT, 0)
OPTION(osd_recovery_max_active, OPT_U64, 3)
OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)
OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub
-OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100)
OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
OPTION(osd_open_classes_on_start, OPT_BOOL, true)
OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops "
OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion
OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
+OPTION(osd_debug_random_push_read_error, OPT_DOUBLE, 0)
OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal
SAFE_OPTION(rocksdb_db_paths, OPT_STR, "") // path,size( path,size)*
OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true) // log to ceph log
-OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024) // default rocksdb cache size
-OPTION(rocksdb_cache_row_ratio, OPT_FLOAT, .2) // ratio of cache for row (vs block)
+OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024) // rocksdb cache size (unless set by bluestore/etc)
+OPTION(rocksdb_cache_row_ratio, OPT_FLOAT, 0) // ratio of cache for row (vs block)
OPTION(rocksdb_cache_shard_bits, OPT_INT, 4) // rocksdb block cache shard bits, 4 bit -> 16 shards
OPTION(rocksdb_cache_type, OPT_STR, "lru") // 'lru' or 'clock'
OPTION(rocksdb_block_size, OPT_INT, 4*1024) // default rocksdb block size
OPTION(bdev_aio, OPT_BOOL, true)
OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds
OPTION(bdev_aio_max_queue_depth, OPT_INT, 1024)
+OPTION(bdev_aio_reap_max, OPT_INT, 16)
OPTION(bdev_block_size, OPT_INT, 4096)
OPTION(bdev_debug_aio, OPT_BOOL, false)
OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0)
OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q
OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5) // kin page slot size / max page slot size
OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5) // number of kout page slot / total number of page slot
-OPTION(bluestore_cache_size, OPT_U64, 1024*1024*1024)
-OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .7)
-OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .2)
+OPTION(bluestore_cache_size, OPT_U64, 0)
+OPTION(bluestore_cache_size_hdd, OPT_U64, 1*1024*1024*1024)
+OPTION(bluestore_cache_size_ssd, OPT_U64, 3*1024*1024*1024)
+OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .01)
+OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .99)
+OPTION(bluestore_cache_kv_max, OPT_U64, 512*1024*1024) // limit the maximum amount of cache for the kv store
OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap
OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
OPTION(bluestore_debug_omit_kv_commit, OPT_BOOL, false)
OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL, false)
OPTION(bluestore_shard_finishers, OPT_BOOL, false)
+OPTION(bluestore_debug_random_read_err, OPT_DOUBLE, 0)
OPTION(kstore_max_ops, OPT_U64, 512)
OPTION(kstore_max_bytes, OPT_U64, 64*1024*1024)
OPTION(kstore_sync_transaction, OPT_BOOL, false)
OPTION(kstore_sync_submit_transaction, OPT_BOOL, false)
OPTION(kstore_onode_map_size, OPT_U64, 1024)
-OPTION(kstore_cache_tails, OPT_BOOL, true)
OPTION(kstore_default_stripe_size, OPT_INT, 65536)
OPTION(filestore_omap_backend, OPT_STR, "rocksdb")
// Allow object read error injection
OPTION(filestore_debug_inject_read_err, OPT_BOOL, false)
+OPTION(filestore_debug_random_read_err, OPT_DOUBLE, 0)
OPTION(filestore_debug_omap_check, OPT_BOOL, false) // Expensive debugging check on sync
OPTION(filestore_omap_header_cache_size, OPT_INT, 1024)
OPTION(filestore_fiemap_threshold, OPT_INT, 4096)
OPTION(filestore_merge_threshold, OPT_INT, 10)
OPTION(filestore_split_multiple, OPT_INT, 2)
+OPTION(filestore_split_rand_factor, OPT_U32, 20) // randomize the split threshold by adding 16 * [0, rand_factor)
OPTION(filestore_update_to, OPT_INT, 1000)
OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor
OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size
OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled
OPTION(mgr_module_path, OPT_STR, CEPH_PKGLIBDIR "/mgr") // where to load python modules from
-OPTION(mgr_modules, OPT_STR, "restful") // Which modules to load
+OPTION(mgr_initial_modules, OPT_STR, "restful status") // Which modules to load
OPTION(mgr_data, OPT_STR, "/var/lib/ceph/mgr/$cluster-$id") // where to find keyring etc
OPTION(mgr_tick_period, OPT_INT, 2) // How frequently to tick
OPTION(mgr_stats_period, OPT_INT, 5) // How frequently clients send stats
OPTION(mgr_mon_messages, OPT_U64, 128) // messages from mons
OPTION(mgr_connect_retry_interval, OPT_DOUBLE, 1.0)
+OPTION(mgr_service_beacon_grace, OPT_DOUBLE, 60.0)
OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests
OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover
OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR
+OPTION(mon_mgr_mkfs_grace, OPT_INT, 60) // How long before we complain about MGR_DOWN
OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl
OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects
OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms
OPTION(rgw_swift_custom_header, OPT_STR, "") // option to enable swift custom headers
+OPTION(rgw_swift_need_stats, OPT_BOOL, true) // option to enable stats on bucket listing for swift
+
/* resharding tunables */
OPTION(rgw_reshard_num_logs, OPT_INT, 16)
OPTION(rgw_reshard_bucket_lock_duration, OPT_INT, 120) // duration of lock on bucket obj during resharding