update sources to v12.1.1

[ceph.git] / ceph / src / common / config_opts.h
diff --git a/ceph/src/common/config_opts.h b/ceph/src/common/config_opts.h

index 443ef8c1a8719afd17a6b6ad3e4c76b5d14a4dc0..b7348a02fb70b49464a704d75fb891ea83f32a4a 100644 (file)
--- a/ceph/src/common/config_opts.h
+++ b/ceph/src/common/config_opts.h
@@ -16,6 +16,7 @@
  OPTION(host, OPT_STR, "") // "" means that ceph will use short hostname
  OPTION(fsid, OPT_UUID, uuid_d())
  OPTION(public_addr, OPT_ADDR, entity_addr_t())
+OPTION(public_bind_addr, OPT_ADDR, entity_addr_t())
  OPTION(cluster_addr, OPT_ADDR, entity_addr_t())
  OPTION(public_network, OPT_STR, "")
  OPTION(cluster_network, OPT_STR, "")
@@ -28,7 +29,6 @@ OPTION(lockdep_force_backtrace, OPT_BOOL, false) // always gather current backtr
  OPTION(run_dir, OPT_STR, "/var/run/ceph")       // the "/var/run/ceph" dir, created on daemon startup
  OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit()
  OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755"
-OPTION(crushtool, OPT_STR, "crushtool") // crushtool utility path
  
  OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit()
  OPTION(setuser, OPT_STR, "")        // uid or user name
@@ -212,6 +212,7 @@ OPTION(ms_bind_retry_count, OPT_INT, 6) // If binding fails, how many times do w
  OPTION(ms_bind_retry_delay, OPT_INT, 6) // Delay between attemps to bind
  #endif
  OPTION(ms_bind_before_connect, OPT_BOOL, false)
+OPTION(ms_tcp_listen_backlog, OPT_INT, 512)
  OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10)
  OPTION(ms_tcp_read_timeout, OPT_U64, 900)
  OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216)
@@ -308,8 +309,7 @@ OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between
  OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings
  OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
  OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
-OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
-OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
+OPTION(mon_pg_stuck_threshold, OPT_INT, 60) // number of seconds after which pgs can be considered stuck inactive, unclean, etc (see doc/control.rst under dump_stuck for more info)
  OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
  OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30)  // min # pgs per (in) osd before we warn the admin
  OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300)  // max # pgs per (in) osd before we warn the admin
@@ -352,6 +352,8 @@ OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0)
  OPTION(mon_health_to_clog, OPT_BOOL, true)
  OPTION(mon_health_to_clog_interval, OPT_INT, 3600)
  OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
+OPTION(mon_health_preluminous_compat, OPT_BOOL, false)
+OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
  OPTION(mon_data_avail_crit, OPT_INT, 5)
  OPTION(mon_data_avail_warn, OPT_INT, 30)
  OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
@@ -393,6 +395,8 @@ OPTION(mon_keyvaluedb, OPT_STR, "rocksdb")   // type of keyvaluedb backend
  
  // UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps
  OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false)
+OPTION(mon_osd_blacklist_default_expire, OPT_DOUBLE, 60*60) // default one hour
+OPTION(mon_osd_crush_smoke_test, OPT_BOOL, true)
  
  OPTION(paxos_stash_full_interval, OPT_INT, 25)   // how often (in commits) to stash a full copy of the PaxosService state
  OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
@@ -688,6 +692,7 @@ OPTION(osd_crush_update_weight_set, OPT_BOOL, true) // update weight set while u
  OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
  OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
  OPTION(osd_crush_update_on_start, OPT_BOOL, true)
+OPTION(osd_class_update_on_start, OPT_BOOL, true) // automatically set device class on start
  OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds.
  OPTION(osd_pool_default_crush_rule, OPT_INT, -1)
  OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32, 4096) // in bytes
@@ -695,6 +700,7 @@ OPTION(osd_pool_default_size, OPT_INT, 3)
  OPTION(osd_pool_default_min_size, OPT_INT, 0)  // 0 means no specific default; ceph will use size-size/2
  OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
  OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
+OPTION(osd_pool_default_type, OPT_STR, "replicated")
  OPTION(osd_pool_default_erasure_code_profile,
         OPT_STR,
         "plugin=jerasure "
@@ -767,9 +773,35 @@ OPTION(osd_op_num_threads_per_shard_ssd, OPT_INT, 2)
  OPTION(osd_op_num_shards, OPT_INT, 0)
  OPTION(osd_op_num_shards_hdd, OPT_INT, 5)
  OPTION(osd_op_num_shards_ssd, OPT_INT, 8)
-OPTION(osd_op_queue, OPT_STR, "wpq") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random
+
+// PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
+// mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
+// and "mclock_client" are based on the mClock/dmClock algorithm
+// (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
+// class the operation belongs to. "mclock_client" does the same but
+// also works to ienforce fairness between clients. "debug_random"
+// chooses among all four with equal probability.
+OPTION(osd_op_queue, OPT_STR, "wpq")
+
  OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
  
+// mClock priority queue parameters for five types of ops
+OPTION(osd_op_queue_mclock_client_op_res, OPT_DOUBLE, 1000.0)
+OPTION(osd_op_queue_mclock_client_op_wgt, OPT_DOUBLE, 500.0)
+OPTION(osd_op_queue_mclock_client_op_lim, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_osd_subop_res, OPT_DOUBLE, 1000.0)
+OPTION(osd_op_queue_mclock_osd_subop_wgt, OPT_DOUBLE, 500.0)
+OPTION(osd_op_queue_mclock_osd_subop_lim, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_snap_res, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_snap_wgt, OPT_DOUBLE, 1.0)
+OPTION(osd_op_queue_mclock_snap_lim, OPT_DOUBLE, 0.001)
+OPTION(osd_op_queue_mclock_recov_res, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_recov_wgt, OPT_DOUBLE, 1.0)
+OPTION(osd_op_queue_mclock_recov_lim, OPT_DOUBLE, 0.001)
+OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE, 0.0)
+OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE, 1.0)
+OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE, 0.001)
+
  OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
  
  // Set to true for testing.  Users should NOT set this.
@@ -823,7 +855,6 @@ OPTION(osd_mon_ack_timeout, OPT_DOUBLE, 30.0) // time out a mon if it doesn't ac
  OPTION(osd_stats_ack_timeout_factor, OPT_DOUBLE, 2.0) // multiples of mon_ack_timeout
  OPTION(osd_stats_ack_timeout_decay, OPT_DOUBLE, .9)
  OPTION(osd_default_data_pool_replay_window, OPT_INT, 45)
-OPTION(osd_preserve_trimmed_log, OPT_BOOL, false)
  OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false)
  OPTION(osd_recovery_delay_start, OPT_FLOAT, 0)
  OPTION(osd_recovery_max_active, OPT_U64, 3)
@@ -853,7 +884,6 @@ OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
  OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)
  OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
  OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60)   // objects must be this old (seconds) before we update the whole-object digest on scrub
-OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100)
  OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
  OPTION(osd_open_classes_on_start, OPT_BOOL, true)
  OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops "
@@ -897,6 +927,7 @@ OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
  OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false)  // inject failure during copyfrom completion
  OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
  OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
+OPTION(osd_debug_random_push_read_error, OPT_DOUBLE, 0)
  OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
  OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
  OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
@@ -943,8 +974,8 @@ OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic wi
  OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal
  SAFE_OPTION(rocksdb_db_paths, OPT_STR, "")   // path,size( path,size)*
  OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true)  // log to ceph log
-OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024)  // default rocksdb cache size
-OPTION(rocksdb_cache_row_ratio, OPT_FLOAT, .2)   // ratio of cache for row (vs block)
+OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024)  // rocksdb cache size (unless set by bluestore/etc)
+OPTION(rocksdb_cache_row_ratio, OPT_FLOAT, 0)   // ratio of cache for row (vs block)
  OPTION(rocksdb_cache_shard_bits, OPT_INT, 4)  // rocksdb block cache shard bits, 4 bit -> 16 shards
  OPTION(rocksdb_cache_type, OPT_STR, "lru") // 'lru' or 'clock'
  OPTION(rocksdb_block_size, OPT_INT, 4*1024)  // default rocksdb block size
@@ -1027,6 +1058,7 @@ OPTION(bdev_inject_crash_flush_delay, OPT_INT, 2) // wait N more seconds on flus
  OPTION(bdev_aio, OPT_BOOL, true)
  OPTION(bdev_aio_poll_ms, OPT_INT, 250)  // milliseconds
  OPTION(bdev_aio_max_queue_depth, OPT_INT, 1024)
+OPTION(bdev_aio_reap_max, OPT_INT, 16)
  OPTION(bdev_block_size, OPT_INT, 4096)
  OPTION(bdev_debug_aio, OPT_BOOL, false)
  OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0)
@@ -1135,9 +1167,12 @@ OPTION(bluestore_cache_trim_max_skip_pinned, OPT_U32, 64) // skip this many onod
  OPTION(bluestore_cache_type, OPT_STR, "2q")   // lru, 2q
  OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5)    // kin page slot size / max page slot size
  OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5)   // number of kout page slot / total number of page slot
-OPTION(bluestore_cache_size, OPT_U64, 1024*1024*1024)
-OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .7)
-OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .2)
+OPTION(bluestore_cache_size, OPT_U64, 0)
+OPTION(bluestore_cache_size_hdd, OPT_U64, 1*1024*1024*1024)
+OPTION(bluestore_cache_size_ssd, OPT_U64, 3*1024*1024*1024)
+OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .01)
+OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .99)
+OPTION(bluestore_cache_kv_max, OPT_U64, 512*1024*1024) // limit the maximum amount of cache for the kv store
  OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
  OPTION(bluestore_allocator, OPT_STR, "bitmap")     // stupid | bitmap
  OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
@@ -1178,6 +1213,7 @@ OPTION(bluestore_debug_fsck_abort, OPT_BOOL, false)
  OPTION(bluestore_debug_omit_kv_commit, OPT_BOOL, false)
  OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL, false)
  OPTION(bluestore_shard_finishers, OPT_BOOL, false)
+OPTION(bluestore_debug_random_read_err, OPT_DOUBLE, 0)
  
  OPTION(kstore_max_ops, OPT_U64, 512)
  OPTION(kstore_max_bytes, OPT_U64, 64*1024*1024)
@@ -1190,7 +1226,6 @@ OPTION(kstore_nid_prealloc, OPT_U64, 1024)
  OPTION(kstore_sync_transaction, OPT_BOOL, false)
  OPTION(kstore_sync_submit_transaction, OPT_BOOL, false)
  OPTION(kstore_onode_map_size, OPT_U64, 1024)
-OPTION(kstore_cache_tails, OPT_BOOL, true)
  OPTION(kstore_default_stripe_size, OPT_INT, 65536)
  
  OPTION(filestore_omap_backend, OPT_STR, "rocksdb")
@@ -1221,6 +1256,7 @@ OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0)
  
  // Allow object read error injection
  OPTION(filestore_debug_inject_read_err, OPT_BOOL, false)
+OPTION(filestore_debug_random_read_err, OPT_DOUBLE, 0)
  
  OPTION(filestore_debug_omap_check, OPT_BOOL, false) // Expensive debugging check on sync
  OPTION(filestore_omap_header_cache_size, OPT_INT, 1024)
@@ -1307,6 +1343,7 @@ OPTION(filestore_commit_timeout, OPT_FLOAT, 600)
  OPTION(filestore_fiemap_threshold, OPT_INT, 4096)
  OPTION(filestore_merge_threshold, OPT_INT, 10)
  OPTION(filestore_split_multiple, OPT_INT, 2)
+OPTION(filestore_split_rand_factor, OPT_U32, 20) // randomize the split threshold by adding 16 * [0, rand_factor)
  OPTION(filestore_update_to, OPT_INT, 1000)
  OPTION(filestore_blackhole, OPT_BOOL, false)     // drop any new transactions on the floor
  OPTION(filestore_fd_cache_size, OPT_INT, 128)    // FD lru size
@@ -1702,7 +1739,7 @@ OPTION(rgw_shard_warning_threshold, OPT_DOUBLE, 90) // pct of safe max
  OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled
  
  OPTION(mgr_module_path, OPT_STR, CEPH_PKGLIBDIR "/mgr") // where to load python modules from
-OPTION(mgr_modules, OPT_STR, "restful")  // Which modules to load
+OPTION(mgr_initial_modules, OPT_STR, "restful status")  // Which modules to load
  OPTION(mgr_data, OPT_STR, "/var/lib/ceph/mgr/$cluster-$id") // where to find keyring etc
  OPTION(mgr_tick_period, OPT_INT, 2)  // How frequently to tick
  OPTION(mgr_stats_period, OPT_INT, 5) // How frequently clients send stats
@@ -1716,10 +1753,12 @@ OPTION(mgr_mon_bytes, OPT_U64, 128*1048576)   // bytes from mons
  OPTION(mgr_mon_messages, OPT_U64, 128)        // messages from mons
  
  OPTION(mgr_connect_retry_interval, OPT_DOUBLE, 1.0)
+OPTION(mgr_service_beacon_grace, OPT_DOUBLE, 60.0)
  
  OPTION(mon_mgr_digest_period, OPT_INT, 5)  // How frequently to send digests
  OPTION(mon_mgr_beacon_grace, OPT_INT, 30)  // How long to wait to failover
  OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR
+OPTION(mon_mgr_mkfs_grace, OPT_INT, 60) // How long before we complain about MGR_DOWN
  OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl
  OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects
  OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms
@@ -1751,6 +1790,8 @@ OPTION(debug_deliberately_leak_memory, OPT_BOOL, false)
  
  OPTION(rgw_swift_custom_header, OPT_STR, "") // option to enable swift custom headers
  
+OPTION(rgw_swift_need_stats, OPT_BOOL, true) // option to enable stats on bucket listing for swift
+
  /* resharding tunables */
  OPTION(rgw_reshard_num_logs, OPT_INT, 16)
  OPTION(rgw_reshard_bucket_lock_duration, OPT_INT, 120) // duration of lock on bucket obj during resharding