]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/config_opts.h
update sources to v12.1.1
[ceph.git] / ceph / src / common / config_opts.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15/* note: no header guard */
16OPTION(host, OPT_STR, "") // "" means that ceph will use short hostname
17OPTION(fsid, OPT_UUID, uuid_d())
18OPTION(public_addr, OPT_ADDR, entity_addr_t())
224ce89b 19OPTION(public_bind_addr, OPT_ADDR, entity_addr_t())
7c673cae
FG
20OPTION(cluster_addr, OPT_ADDR, entity_addr_t())
21OPTION(public_network, OPT_STR, "")
22OPTION(cluster_network, OPT_STR, "")
23OPTION(num_client, OPT_INT, 1)
24OPTION(monmap, OPT_STR, "")
25OPTION(mon_host, OPT_STR, "")
26OPTION(mon_dns_srv_name, OPT_STR, "ceph-mon")
27OPTION(lockdep, OPT_BOOL, false)
28OPTION(lockdep_force_backtrace, OPT_BOOL, false) // always gather current backtrace at every lock
29OPTION(run_dir, OPT_STR, "/var/run/ceph") // the "/var/run/ceph" dir, created on daemon startup
30OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit()
31OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755"
7c673cae
FG
32
33OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit()
34OPTION(setuser, OPT_STR, "") // uid or user name
35OPTION(setgroup, OPT_STR, "") // gid or group name
36OPTION(setuser_match_path, OPT_STR, "") // make setuser/group conditional on this path matching ownership
37OPTION(pid_file, OPT_STR, "") // default changed by common_preinit()
38OPTION(chdir, OPT_STR, "/")
39OPTION(max_open_files, OPT_LONGLONG, 0)
40OPTION(restapi_log_level, OPT_STR, "") // default set by Python code
41OPTION(restapi_base_url, OPT_STR, "") // "
42OPTION(fatal_signal_handlers, OPT_BOOL, true)
43SAFE_OPTION(erasure_code_dir, OPT_STR, CEPH_PKGLIBDIR"/erasure-code") // default location for erasure-code plugins
44
45OPTION(log_file, OPT_STR, "/var/log/ceph/$cluster-$name.log") // default changed by common_preinit()
46OPTION(log_max_new, OPT_INT, 1000) // default changed by common_preinit()
47OPTION(log_max_recent, OPT_INT, 10000) // default changed by common_preinit()
48OPTION(log_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
49OPTION(err_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
50OPTION(log_to_syslog, OPT_BOOL, false)
51OPTION(err_to_syslog, OPT_BOOL, false)
52OPTION(log_flush_on_exit, OPT_BOOL, true) // default changed by common_preinit()
53OPTION(log_stop_at_utilization, OPT_FLOAT, .97) // stop logging at (near) full
54OPTION(log_to_graylog, OPT_BOOL, false)
55OPTION(err_to_graylog, OPT_BOOL, false)
56OPTION(log_graylog_host, OPT_STR, "127.0.0.1")
57OPTION(log_graylog_port, OPT_INT, 12201)
58
59// options will take k/v pairs, or single-item that will be assumed as general
60// default for all, regardless of channel.
61// e.g., "info" would be taken as the same as "default=info"
62// also, "default=daemon audit=local0" would mean
63// "default all to 'daemon', override 'audit' with 'local0'
64OPTION(clog_to_monitors, OPT_STR, "default=true")
65OPTION(clog_to_syslog, OPT_STR, "false")
66OPTION(clog_to_syslog_level, OPT_STR, "info") // this level and above
67OPTION(clog_to_syslog_facility, OPT_STR, "default=daemon audit=local0")
68OPTION(clog_to_graylog, OPT_STR, "false")
69OPTION(clog_to_graylog_host, OPT_STR, "127.0.0.1")
70OPTION(clog_to_graylog_port, OPT_STR, "12201")
71
72OPTION(mon_cluster_log_to_syslog, OPT_STR, "default=false")
73OPTION(mon_cluster_log_to_syslog_level, OPT_STR, "info") // this level and above
74OPTION(mon_cluster_log_to_syslog_facility, OPT_STR, "daemon")
75OPTION(mon_cluster_log_file, OPT_STR,
76 "default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log")
77OPTION(mon_cluster_log_file_level, OPT_STR, "info")
78OPTION(mon_cluster_log_to_graylog, OPT_STR, "false")
79OPTION(mon_cluster_log_to_graylog_host, OPT_STR, "127.0.0.1")
80OPTION(mon_cluster_log_to_graylog_port, OPT_STR, "12201")
81
82OPTION(enable_experimental_unrecoverable_data_corrupting_features, OPT_STR, "")
83
84SAFE_OPTION(plugin_dir, OPT_STR, CEPH_PKGLIBDIR)
85
86OPTION(xio_trace_mempool, OPT_BOOL, false) // mempool allocation counters
87OPTION(xio_trace_msgcnt, OPT_BOOL, false) // incoming/outgoing msg counters
88OPTION(xio_trace_xcon, OPT_BOOL, false) // Xio message encode/decode trace
89OPTION(xio_queue_depth, OPT_INT, 128) // depth of Accelio msg queue
90OPTION(xio_mp_min, OPT_INT, 128) // default min mempool size
91OPTION(xio_mp_max_64, OPT_INT, 65536) // max 64-byte chunks (buffer is 40)
92OPTION(xio_mp_max_256, OPT_INT, 8192) // max 256-byte chunks
93OPTION(xio_mp_max_1k, OPT_INT, 8192) // max 1K chunks
94OPTION(xio_mp_max_page, OPT_INT, 4096) // max 1K chunks
95OPTION(xio_mp_max_hint, OPT_INT, 4096) // max size-hint chunks
96OPTION(xio_portal_threads, OPT_INT, 2) // xio portal threads per messenger
97OPTION(xio_max_conns_per_portal, OPT_INT, 32) // max xio_connections per portal/ctx
98OPTION(xio_transport_type, OPT_STR, "rdma") // xio transport type: {rdma or tcp}
99OPTION(xio_max_send_inline, OPT_INT, 512) // xio maximum threshold to send inline
100
101OPTION(compressor_zlib_isal, OPT_BOOL, false)
102OPTION(compressor_zlib_level, OPT_INT, 5) //regular zlib compression level, not applicable to isa-l optimized version
103
104OPTION(async_compressor_enabled, OPT_BOOL, false)
105OPTION(async_compressor_type, OPT_STR, "snappy")
106OPTION(async_compressor_threads, OPT_INT, 2)
107OPTION(async_compressor_thread_timeout, OPT_INT, 5)
108OPTION(async_compressor_thread_suicide_timeout, OPT_INT, 30)
109
110OPTION(plugin_crypto_accelerator, OPT_STR, "crypto_isal")
111
112OPTION(mempool_debug, OPT_BOOL, false)
113
114DEFAULT_SUBSYS(0, 5)
115SUBSYS(lockdep, 0, 1)
116SUBSYS(context, 0, 1)
117SUBSYS(crush, 1, 1)
118SUBSYS(mds, 1, 5)
119SUBSYS(mds_balancer, 1, 5)
120SUBSYS(mds_locker, 1, 5)
121SUBSYS(mds_log, 1, 5)
122SUBSYS(mds_log_expire, 1, 5)
123SUBSYS(mds_migrator, 1, 5)
124SUBSYS(buffer, 0, 1)
125SUBSYS(timer, 0, 1)
126SUBSYS(filer, 0, 1)
127SUBSYS(striper, 0, 1)
128SUBSYS(objecter, 0, 1)
129SUBSYS(rados, 0, 5)
130SUBSYS(rbd, 0, 5)
131SUBSYS(rbd_mirror, 0, 5)
132SUBSYS(rbd_replay, 0, 5)
133SUBSYS(journaler, 0, 5)
134SUBSYS(objectcacher, 0, 5)
135SUBSYS(client, 0, 5)
136SUBSYS(osd, 1, 5)
137SUBSYS(optracker, 0, 5)
138SUBSYS(objclass, 0, 5)
139SUBSYS(filestore, 1, 3)
140SUBSYS(journal, 1, 3)
141SUBSYS(ms, 0, 5)
142SUBSYS(mon, 1, 5)
143SUBSYS(monc, 0, 10)
144SUBSYS(paxos, 1, 5)
145SUBSYS(tp, 0, 5)
146SUBSYS(auth, 1, 5)
147SUBSYS(crypto, 1, 5)
148SUBSYS(finisher, 1, 1)
149SUBSYS(heartbeatmap, 1, 5)
150SUBSYS(perfcounter, 1, 5)
151SUBSYS(rgw, 1, 5) // log level for the Rados gateway
152SUBSYS(civetweb, 1, 10)
153SUBSYS(javaclient, 1, 5)
154SUBSYS(asok, 1, 5)
155SUBSYS(throttle, 1, 1)
156SUBSYS(refs, 0, 0)
157SUBSYS(xio, 1, 5)
158SUBSYS(compressor, 1, 5)
159SUBSYS(bluestore, 1, 5)
160SUBSYS(bluefs, 1, 5)
161SUBSYS(bdev, 1, 3)
162SUBSYS(kstore, 1, 5)
163SUBSYS(rocksdb, 4, 5)
164SUBSYS(leveldb, 4, 5)
165SUBSYS(memdb, 4, 5)
166SUBSYS(kinetic, 1, 5)
167SUBSYS(fuse, 1, 5)
168SUBSYS(mgr, 1, 5)
169SUBSYS(mgrc, 1, 5)
170SUBSYS(dpdk, 1, 5)
171SUBSYS(eventtrace, 1, 5)
172
173OPTION(key, OPT_STR, "")
174OPTION(keyfile, OPT_STR, "")
175OPTION(keyring, OPT_STR,
176 // default changed by common_preinit() for mds and osd
177 "/etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,"
178#if defined(__FreeBSD)
179 "/usr/local/etc/ceph/$cluster.$name.keyring,/usr/local/etc/ceph/$cluster.keyring,"
180 "/usr/local/etc/ceph/keyring,/usr/local/etc/ceph/keyring.bin,"
181#endif
182 )
183OPTION(heartbeat_interval, OPT_INT, 5)
184OPTION(heartbeat_file, OPT_STR, "")
185OPTION(heartbeat_inject_failure, OPT_INT, 0) // force an unhealthy heartbeat for N seconds
186OPTION(perf, OPT_BOOL, true) // enable internal perf counters
187
188SAFE_OPTION(ms_type, OPT_STR, "async+posix") // messenger backend. It will be modified in runtime, so use SAFE_OPTION
189OPTION(ms_public_type, OPT_STR, "") // messenger backend
190OPTION(ms_cluster_type, OPT_STR, "") // messenger backend
191OPTION(ms_tcp_nodelay, OPT_BOOL, true)
192OPTION(ms_tcp_rcvbuf, OPT_INT, 0)
193OPTION(ms_tcp_prefetch_max_size, OPT_INT, 4096) // max prefetch size, we limit this to avoid extra memcpy
194OPTION(ms_initial_backoff, OPT_DOUBLE, .2)
195OPTION(ms_max_backoff, OPT_DOUBLE, 15.0)
196OPTION(ms_crc_data, OPT_BOOL, true)
197OPTION(ms_crc_header, OPT_BOOL, true)
198OPTION(ms_die_on_bad_msg, OPT_BOOL, false)
199OPTION(ms_die_on_unhandled_msg, OPT_BOOL, false)
200OPTION(ms_die_on_old_message, OPT_BOOL, false) // assert if we get a dup incoming message and shouldn't have (may be triggered by pre-541cd3c64be0dfa04e8a2df39422e0eb9541a428 code)
201OPTION(ms_die_on_skipped_message, OPT_BOOL, false) // assert if we skip a seq (kernel client does this intentionally)
202OPTION(ms_dispatch_throttle_bytes, OPT_U64, 100 << 20)
203OPTION(ms_bind_ipv6, OPT_BOOL, false)
204OPTION(ms_bind_port_min, OPT_INT, 6800)
205OPTION(ms_bind_port_max, OPT_INT, 7300)
206#if !defined(__FreeBSD__)
207OPTION(ms_bind_retry_count, OPT_INT, 3) // If binding fails, how many times do we retry to bind
208OPTION(ms_bind_retry_delay, OPT_INT, 5) // Delay between attemps to bind
209#else
210// FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
211OPTION(ms_bind_retry_count, OPT_INT, 6) // If binding fails, how many times do we retry to bind
212OPTION(ms_bind_retry_delay, OPT_INT, 6) // Delay between attemps to bind
213#endif
31f18b77 214OPTION(ms_bind_before_connect, OPT_BOOL, false)
224ce89b 215OPTION(ms_tcp_listen_backlog, OPT_INT, 512)
7c673cae
FG
216OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10)
217OPTION(ms_tcp_read_timeout, OPT_U64, 900)
218OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216)
219OPTION(ms_pq_min_cost, OPT_U64, 65536)
220OPTION(ms_inject_socket_failures, OPT_U64, 0)
221SAFE_OPTION(ms_inject_delay_type, OPT_STR, "") // "osd mds mon client" allowed
222OPTION(ms_inject_delay_msg_type, OPT_STR, "") // the type of message to delay, as returned by Message::get_type_name(). This is an additional restriction on the general type filter ms_inject_delay_type.
223OPTION(ms_inject_delay_max, OPT_DOUBLE, 1) // seconds
224OPTION(ms_inject_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
225OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0) // seconds
226OPTION(ms_dump_on_send, OPT_BOOL, false) // hexdump msg to log on send
227OPTION(ms_dump_corrupt_message_level, OPT_INT, 1) // debug level to hexdump undecodeable messages at
228OPTION(ms_async_op_threads, OPT_U64, 3) // number of worker processing threads for async messenger created on init
229OPTION(ms_async_max_op_threads, OPT_U64, 5) // max number of worker processing threads for async messenger
230OPTION(ms_async_set_affinity, OPT_BOOL, true)
231// example: ms_async_affinity_cores = 0,1
232// The number of coreset is expected to equal to ms_async_op_threads, otherwise
233// extra op threads will loop ms_async_affinity_cores again.
234// If ms_async_affinity_cores is empty, all threads will be bind to current running
235// core
236OPTION(ms_async_affinity_cores, OPT_STR, "")
7c673cae
FG
237OPTION(ms_async_rdma_device_name, OPT_STR, "")
238OPTION(ms_async_rdma_enable_hugepage, OPT_BOOL, false)
239OPTION(ms_async_rdma_buffer_size, OPT_INT, 128 << 10)
240OPTION(ms_async_rdma_send_buffers, OPT_U32, 1024)
241OPTION(ms_async_rdma_receive_buffers, OPT_U32, 1024)
242OPTION(ms_async_rdma_port_num, OPT_U32, 1)
243OPTION(ms_async_rdma_polling_us, OPT_U32, 1000)
244OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding
245OPTION(ms_async_rdma_roce_ver, OPT_INT, 1) // 0=RoCEv1, 1=RoCEv2, 2=RoCEv1.5
246OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP
31f18b77 247OPTION(ms_async_rdma_dscp, OPT_INT, 96) // in RoCE, this means DSCP
7c673cae
FG
248
249OPTION(ms_dpdk_port_id, OPT_INT, 0)
250SAFE_OPTION(ms_dpdk_coremask, OPT_STR, "1") // it is modified in unittest so that use SAFE_OPTION to declare
251OPTION(ms_dpdk_memory_channel, OPT_STR, "4")
252OPTION(ms_dpdk_hugepages, OPT_STR, "")
253OPTION(ms_dpdk_pmd, OPT_STR, "")
254SAFE_OPTION(ms_dpdk_host_ipv4_addr, OPT_STR, "")
255SAFE_OPTION(ms_dpdk_gateway_ipv4_addr, OPT_STR, "")
256SAFE_OPTION(ms_dpdk_netmask_ipv4_addr, OPT_STR, "")
257OPTION(ms_dpdk_lro, OPT_BOOL, true)
258OPTION(ms_dpdk_hw_flow_control, OPT_BOOL, true)
259// Weighing of a hardware network queue relative to a software queue (0=no work, 1= equal share)")
260OPTION(ms_dpdk_hw_queue_weight, OPT_FLOAT, 1)
261OPTION(ms_dpdk_debug_allow_loopback, OPT_BOOL, false)
262OPTION(ms_dpdk_rx_buffer_count_per_core, OPT_INT, 8192)
263
264OPTION(inject_early_sigterm, OPT_BOOL, false)
265
266OPTION(mon_data, OPT_STR, "/var/lib/ceph/mon/$cluster-$id")
267OPTION(mon_initial_members, OPT_STR, "") // list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster
7c673cae
FG
268OPTION(mon_compact_on_start, OPT_BOOL, false) // compact leveldb on ceph-mon start
269OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction on bootstrap
270OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states
271OPTION(mon_osd_cache_size, OPT_INT, 10) // the size of osdmaps cache, not to rely on underlying store's cache
272
273OPTION(mon_cpu_threads, OPT_INT, 4)
274OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT, 4096)
31f18b77 275OPTION(mon_osd_max_creating_pgs, OPT_INT, 1024)
7c673cae
FG
276OPTION(mon_tick_interval, OPT_INT, 5)
277OPTION(mon_session_timeout, OPT_INT, 300) // must send keepalive or subscribe
278OPTION(mon_subscribe_interval, OPT_DOUBLE, 24*3600) // for legacy clients only
279OPTION(mon_delta_reset_interval, OPT_DOUBLE, 10) // seconds of inactivity before we reset the pg delta to 0
280OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay
281OPTION(mon_osd_laggy_weight, OPT_DOUBLE, .3) // weight for new 'samples's in laggy estimations
282OPTION(mon_osd_laggy_max_interval, OPT_INT, 300) // maximum value of laggy_interval in laggy estimations
283OPTION(mon_osd_adjust_heartbeat_grace, OPT_BOOL, true) // true if we should scale based on laggy estimations
284OPTION(mon_osd_adjust_down_out_interval, OPT_BOOL, true) // true if we should scale based on laggy estimations
285OPTION(mon_osd_auto_mark_in, OPT_BOOL, false) // mark any booting osds 'in'
286OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in'
287OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true) // mark booting new osds 'in'
288OPTION(mon_osd_down_out_interval, OPT_INT, 600) // seconds
289OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // smallest crush unit/type that we will not automatically mark out
290OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down
291OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .75) // min osds required to be in to mark things out
31f18b77
FG
292OPTION(mon_osd_warn_op_age, OPT_DOUBLE, 32) // max op age before we generate a warning (make it a power of 2)
293OPTION(mon_osd_err_op_age_ratio, OPT_DOUBLE, 128) // when to generate an error, as multiple of mon_osd_warn_op_age
7c673cae
FG
294OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create
295OPTION(mon_osd_allow_primary_temp, OPT_BOOL, false) // allow primary_temp to be set in the osdmap
296OPTION(mon_osd_allow_primary_affinity, OPT_BOOL, false) // allow primary_affinity to be set in the osdmap
297OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes
298OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming
299OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT, .25) // max estimate of pg total before we do all pgs in parallel
300OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL, false) // whether turn on fast read on the pool or not
301OPTION(mon_stat_smooth_intervals, OPT_INT, 6) // smooth stats over last N PGMap maps
302OPTION(mon_election_timeout, OPT_FLOAT, 5) // on election proposer, max waiting time for all ACKs
303OPTION(mon_lease, OPT_FLOAT, 5) // lease interval
304OPTION(mon_lease_renew_interval_factor, OPT_FLOAT, .6) // on leader, to renew the lease
305OPTION(mon_lease_ack_timeout_factor, OPT_FLOAT, 2.0) // on leader, if lease isn't acked by all peons
306OPTION(mon_accept_timeout_factor, OPT_FLOAT, 2.0) // on leader, if paxos update isn't accepted
307
308OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between monitors
309OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings
310OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
311OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
224ce89b 312OPTION(mon_pg_stuck_threshold, OPT_INT, 60) // number of seconds after which pgs can be considered stuck inactive, unclean, etc (see doc/control.rst under dump_stuck for more info)
7c673cae
FG
313OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
314OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin
315OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin
316OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
317OPTION(mon_pg_warn_min_objects, OPT_INT, 10000) // do not warn below this object #
318OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools below this object #
319OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT, .5) // threshold of down osds after which we check all pgs
320OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT, .66) // position between pool cache_target_full and max where we start warning
321OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
322OPTION(mon_osd_backfillfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD backfill full (backfill halted)
323OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
31f18b77 324OPTION(mon_osd_initial_require_min_compat_client, OPT_STR, "jewel")
7c673cae
FG
325OPTION(mon_allow_pool_delete, OPT_BOOL, false) // allow pool deletion
326OPTION(mon_fake_pool_delete, OPT_BOOL, false) // fake pool deletion (add _DELETED suffix)
327OPTION(mon_globalid_prealloc, OPT_U32, 10000) // how many globalids to prealloc
328OPTION(mon_osd_report_timeout, OPT_INT, 900) // grace period before declaring unresponsive OSDs dead
329OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-replay mds to be active
330OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL, true) // warn if crush tunables are too old (older than mon_min_crush_required_version)
331OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
332OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
333OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
334OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
335OPTION(mon_warn_osd_usage_min_max_delta, OPT_FLOAT, .40) // warn if difference between min and max OSD utilizations exceeds specified amount
336OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
337OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
338OPTION(mon_max_log_epochs, OPT_INT, 500)
339OPTION(mon_max_mdsmap_epochs, OPT_INT, 500)
340OPTION(mon_max_osd, OPT_INT, 10000)
341OPTION(mon_probe_timeout, OPT_DOUBLE, 2.0)
7c673cae
FG
342OPTION(mon_client_bytes, OPT_U64, 100ul << 20) // client msg data allowed in memory (in bytes)
343OPTION(mon_mgr_proxy_client_bytes_ratio, OPT_FLOAT, .3) // ratio of mon_client_bytes that can be consumed by proxied mgr commands before we error out to client
31f18b77 344OPTION(mon_log_max_summary, OPT_U64, 50)
7c673cae
FG
345OPTION(mon_daemon_bytes, OPT_U64, 400ul << 20) // mds, osd message memory cap (in bytes)
346OPTION(mon_max_log_entries_per_event, OPT_INT, 4096)
347OPTION(mon_reweight_min_pgs_per_osd, OPT_U64, 10) // min pgs per osd for reweight-by-pg command
348OPTION(mon_reweight_min_bytes_per_osd, OPT_U64, 100*1024*1024) // min bytes per osd for reweight-by-utilization command
349OPTION(mon_reweight_max_osds, OPT_INT, 4) // max osds to change per reweight-by-* command
350OPTION(mon_reweight_max_change, OPT_DOUBLE, 0.05)
351OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0)
352OPTION(mon_health_to_clog, OPT_BOOL, true)
353OPTION(mon_health_to_clog_interval, OPT_INT, 3600)
354OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
224ce89b
WB
355OPTION(mon_health_preluminous_compat, OPT_BOOL, false)
356OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
7c673cae
FG
357OPTION(mon_data_avail_crit, OPT_INT, 5)
358OPTION(mon_data_avail_warn, OPT_INT, 30)
359OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
360OPTION(mon_warn_not_scrubbed, OPT_INT, 0)
361OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0)
362OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day
363OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not.
364OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time
365OPTION(mon_scrub_inject_crc_mismatch, OPT_DOUBLE, 0.0) // probability of injected crc mismatch [0.0, 1.0]
366OPTION(mon_scrub_inject_missing_keys, OPT_DOUBLE, 0.0) // probability of injected missing keys [0.0, 1.0]
367OPTION(mon_config_key_max_entry_size, OPT_INT, 4096) // max num bytes per config-key entry
368OPTION(mon_sync_timeout, OPT_DOUBLE, 60.0)
369OPTION(mon_sync_max_payload_size, OPT_U32, 1048576) // max size for a sync chunk payload (say, 1MB)
370OPTION(mon_sync_debug, OPT_BOOL, false) // enable sync-specific debug
7c673cae
FG
371OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE, 0) // inject N second delay on each get_chunk request
372OPTION(mon_osd_min_down_reporters, OPT_INT, 2) // number of OSDs from different subtrees who need to report a down OSD for it to count
373OPTION(mon_osd_reporter_subtree_level , OPT_STR, "host") // in which level of parent bucket the reporters are counted
374OPTION(mon_osd_force_trim_to, OPT_INT, 0) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous, use with care)
375OPTION(mon_mds_force_trim_to, OPT_INT, 0) // force mon to trim mdsmaps to this point (dangerous, use with care)
376OPTION(mon_mds_skip_sanity, OPT_BOOL, false) // skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
377
378// monitor debug options
379OPTION(mon_debug_deprecated_as_obsolete, OPT_BOOL, false) // consider deprecated commands as obsolete
380
381// dump transactions
382OPTION(mon_debug_dump_transactions, OPT_BOOL, false)
383OPTION(mon_debug_dump_json, OPT_BOOL, false)
384OPTION(mon_debug_dump_location, OPT_STR, "/var/log/ceph/$cluster-$name.tdump")
385OPTION(mon_debug_no_require_luminous, OPT_BOOL, false)
31f18b77
FG
386OPTION(mon_debug_no_require_bluestore_for_ec_overwrites, OPT_BOOL, false)
387OPTION(mon_debug_no_initial_persistent_features, OPT_BOOL, false)
7c673cae
FG
388OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE, 10.0) // seconds
389OPTION(mon_inject_transaction_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
390
391OPTION(mon_sync_provider_kill_at, OPT_INT, 0) // kill the sync provider at a specific point in the work flow
392OPTION(mon_sync_requester_kill_at, OPT_INT, 0) // kill the sync requester at a specific point in the work flow
393OPTION(mon_force_quorum_join, OPT_BOOL, false) // force monitor to join quorum even if it has been previously removed from the map
394OPTION(mon_keyvaluedb, OPT_STR, "rocksdb") // type of keyvaluedb backend
395
396// UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps
397OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false)
224ce89b
WB
398OPTION(mon_osd_blacklist_default_expire, OPT_DOUBLE, 60*60) // default one hour
399OPTION(mon_osd_crush_smoke_test, OPT_BOOL, true)
7c673cae
FG
400
401OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state
402OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
403OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long before proposing a map update
404OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity
405OPTION(paxos_min, OPT_INT, 500) // minimum number of paxos states to keep around
406OPTION(paxos_trim_min, OPT_INT, 250) // number of extra proposals tolerated before trimming
407OPTION(paxos_trim_max, OPT_INT, 500) // max number of extra proposals to trim at a time
408OPTION(paxos_service_trim_min, OPT_INT, 250) // minimum amount of versions to trigger a trim (0 disables it)
409OPTION(paxos_service_trim_max, OPT_INT, 500) // maximum amount of versions to trim during a single proposal (0 disables it)
410OPTION(paxos_kill_at, OPT_INT, 0)
411OPTION(auth_cluster_required, OPT_STR, "cephx") // required of mon, mds, osd daemons
412OPTION(auth_service_required, OPT_STR, "cephx") // required by daemons of clients
413OPTION(auth_client_required, OPT_STR, "cephx, none") // what clients require of daemons
414OPTION(auth_supported, OPT_STR, "") // deprecated; default value for above if they are not defined.
415OPTION(max_rotating_auth_attempts, OPT_INT, 10)
416OPTION(cephx_require_signatures, OPT_BOOL, false) // If true, don't talk to Cephx partners if they don't support message signing; off by default
417OPTION(cephx_cluster_require_signatures, OPT_BOOL, false)
418OPTION(cephx_service_require_signatures, OPT_BOOL, false)
419OPTION(cephx_sign_messages, OPT_BOOL, true) // Default to signing session messages if supported
420OPTION(auth_mon_ticket_ttl, OPT_DOUBLE, 60*60*12)
421OPTION(auth_service_ticket_ttl, OPT_DOUBLE, 60*60)
422OPTION(auth_debug, OPT_BOOL, false) // if true, assert when weird things happen
423OPTION(mon_client_hunt_parallel, OPT_U32, 2) // how many mons to try to connect to in parallel during hunt
424OPTION(mon_client_hunt_interval, OPT_DOUBLE, 3.0) // try new mon every N seconds until we connect
425OPTION(mon_client_ping_interval, OPT_DOUBLE, 10.0) // ping every N seconds
426OPTION(mon_client_ping_timeout, OPT_DOUBLE, 30.0) // fail if we don't hear back
427OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE, 2.0) // each time we reconnect to a monitor, double our timeout
428OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE, 10.0) // up to a max of 10*default (30 seconds)
429OPTION(mon_client_max_log_entries_per_message, OPT_INT, 1000)
430OPTION(mon_max_pool_pg_num, OPT_INT, 65536)
431OPTION(mon_pool_quota_warn_threshold, OPT_INT, 0) // percent of quota at which to issue warnings
432OPTION(mon_pool_quota_crit_threshold, OPT_INT, 0) // percent of quota at which to issue errors
433OPTION(client_cache_size, OPT_INT, 16384)
434OPTION(client_cache_mid, OPT_FLOAT, .75)
435OPTION(client_use_random_mds, OPT_BOOL, false)
436OPTION(client_mount_timeout, OPT_DOUBLE, 300.0)
437OPTION(client_tick_interval, OPT_DOUBLE, 1.0)
438OPTION(client_trace, OPT_STR, "")
439OPTION(client_readahead_min, OPT_LONGLONG, 128*1024) // readahead at _least_ this much.
440OPTION(client_readahead_max_bytes, OPT_LONGLONG, 0) // default unlimited
441OPTION(client_readahead_max_periods, OPT_LONGLONG, 4) // as multiple of file layout period (object size * num stripes)
442OPTION(client_reconnect_stale, OPT_BOOL, false) // automatically reconnect stale session
443OPTION(client_snapdir, OPT_STR, ".snap")
444OPTION(client_mountpoint, OPT_STR, "/")
445OPTION(client_mount_uid, OPT_INT, -1)
446OPTION(client_mount_gid, OPT_INT, -1)
447OPTION(client_notify_timeout, OPT_INT, 10) // in seconds
448OPTION(osd_client_watch_timeout, OPT_INT, 30) // in seconds
449OPTION(client_caps_release_delay, OPT_INT, 5) // in seconds
450OPTION(client_quota_df, OPT_BOOL, true) // use quota for df on subdir mounts
451OPTION(client_oc, OPT_BOOL, true)
452OPTION(client_oc_size, OPT_INT, 1024*1024* 200) // MB * n
453OPTION(client_oc_max_dirty, OPT_INT, 1024*1024* 100) // MB * n (dirty OR tx.. bigish)
454OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this smallish)
455OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback
456OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache
457OPTION(client_debug_getattr_caps, OPT_BOOL, false) // check if MDS reply contains wanted caps
458OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds)
459OPTION(client_debug_inject_tick_delay, OPT_INT, 0) // delay the client tick for a number of seconds
460OPTION(client_max_inline_size, OPT_U64, 4096)
461OPTION(client_inject_release_failure, OPT_BOOL, false) // synthetic client bug for testing
462OPTION(client_inject_fixed_oldest_tid, OPT_BOOL, false) // synthetic client bug for testing
463OPTION(client_metadata, OPT_STR, "")
464OPTION(client_acl_type, OPT_STR, "")
465OPTION(client_permissions, OPT_BOOL, true)
466OPTION(client_dirsize_rbytes, OPT_BOOL, true)
467
468// note: the max amount of "in flight" dirty data is roughly (max - target)
469OPTION(fuse_use_invalidate_cb, OPT_BOOL, true) // use fuse 2.8+ invalidate callback to keep page cache consistent
470OPTION(fuse_disable_pagecache, OPT_BOOL, false)
471OPTION(fuse_allow_other, OPT_BOOL, true)
472OPTION(fuse_default_permissions, OPT_BOOL, false)
473OPTION(fuse_big_writes, OPT_BOOL, true)
474OPTION(fuse_atomic_o_trunc, OPT_BOOL, true)
475OPTION(fuse_debug, OPT_BOOL, false)
476OPTION(fuse_multithreaded, OPT_BOOL, true)
477OPTION(fuse_require_active_mds, OPT_BOOL, true) // if ceph_fuse requires active mds server
478OPTION(fuse_syncfs_on_mksnap, OPT_BOOL, true)
479OPTION(fuse_set_user_groups, OPT_BOOL, false) // if ceph_fuse fills in group lists or not
480
481OPTION(client_try_dentry_invalidate, OPT_BOOL, true) // the client should try to use dentry invaldation instead of remounting, on kernels it believes that will work for
482OPTION(client_die_on_failed_remount, OPT_BOOL, true)
483OPTION(client_check_pool_perm, OPT_BOOL, true)
484OPTION(client_use_faked_inos, OPT_BOOL, false)
485OPTION(client_mds_namespace, OPT_STR, "")
486
487OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location
488OPTION(crush_location_hook, OPT_STR, "")
489OPTION(crush_location_hook_timeout, OPT_INT, 10)
490
491OPTION(objecter_tick_interval, OPT_DOUBLE, 5.0)
492OPTION(objecter_timeout, OPT_DOUBLE, 10.0) // before we ask for a map
493OPTION(objecter_inflight_op_bytes, OPT_U64, 1024*1024*100) // max in-flight data (both directions)
494OPTION(objecter_inflight_ops, OPT_U64, 1024) // max in-flight ios
495OPTION(objecter_completion_locks_per_session, OPT_U64, 32) // num of completion locks per each session, for serializing same object responses
496OPTION(objecter_inject_no_watch_ping, OPT_BOOL, false) // suppress watch pings
497OPTION(objecter_retry_writes_after_first_reply, OPT_BOOL, false) // ignore the first reply for each write, and resend the osd op instead
498OPTION(objecter_debug_inject_relock_delay, OPT_BOOL, false)
499
500// Max number of deletes at once in a single Filer::purge call
501OPTION(filer_max_purge_ops, OPT_U32, 10)
502// Max number of truncate at once in a single Filer::truncate call
503OPTION(filer_max_truncate_ops, OPT_U32, 128)
504
505OPTION(journaler_write_head_interval, OPT_INT, 15)
506OPTION(journaler_prefetch_periods, OPT_INT, 10) // * journal object size
507OPTION(journaler_prezero_periods, OPT_INT, 5) // * journal object size
508OPTION(mds_data, OPT_STR, "/var/lib/ceph/mds/$cluster-$id")
509OPTION(mds_max_file_size, OPT_U64, 1ULL << 40) // Used when creating new CephFS. Change with 'ceph mds set max_file_size <size>' afterwards
510// max xattr kv pairs size for each dir/file
511OPTION(mds_max_xattr_pairs_size, OPT_U32, 64 << 10)
512OPTION(mds_cache_size, OPT_INT, 100000)
513OPTION(mds_cache_mid, OPT_FLOAT, .7)
514OPTION(mds_max_file_recover, OPT_U32, 32)
515OPTION(mds_dir_max_commit_size, OPT_INT, 10) // MB
516OPTION(mds_dir_keys_per_op, OPT_INT, 16384)
517OPTION(mds_decay_halflife, OPT_FLOAT, 5)
518OPTION(mds_beacon_interval, OPT_FLOAT, 4)
519OPTION(mds_beacon_grace, OPT_FLOAT, 15)
520OPTION(mds_enforce_unique_name, OPT_BOOL, true)
521OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes
31f18b77 522
7c673cae 523OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle
31f18b77
FG
524OPTION(mds_session_blacklist_on_timeout, OPT_BOOL, true) // whether to blacklist clients whose sessions are dropped due to timeout
525OPTION(mds_session_blacklist_on_evict, OPT_BOOL, true) // whether to blacklist clients whose sessions are dropped via admin commands
526
7c673cae
FG
527OPTION(mds_sessionmap_keys_per_op, OPT_U32, 1024) // how many sessions should I try to load/store in a single OMAP operation?
528OPTION(mds_revoke_cap_timeout, OPT_FLOAT, 60) // detect clients which aren't revoking caps
529OPTION(mds_recall_state_timeout, OPT_FLOAT, 60) // detect clients which aren't trimming caps
530OPTION(mds_freeze_tree_timeout, OPT_FLOAT, 30) // detecting freeze tree deadlock
531OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session
532OPTION(mds_health_summarize_threshold, OPT_INT, 10) // collapse N-client health metrics to a single 'many'
533OPTION(mds_health_cache_threshold, OPT_FLOAT, 1.5) // warn on cache size if it exceeds mds_cache_size by this factor
534OPTION(mds_reconnect_timeout, OPT_FLOAT, 45) // seconds to wait for clients during mds restart
535 // make it (mds_session_timeout - mds_beacon_grace)
536OPTION(mds_tick_interval, OPT_FLOAT, 5)
537OPTION(mds_dirstat_min_interval, OPT_FLOAT, 1) // try to avoid propagating more often than this
538OPTION(mds_scatter_nudge_interval, OPT_FLOAT, 5) // how quickly dirstat changes propagate up the hierarchy
539OPTION(mds_client_prealloc_inos, OPT_INT, 1000)
540OPTION(mds_early_reply, OPT_BOOL, true)
541OPTION(mds_default_dir_hash, OPT_INT, CEPH_STR_HASH_RJENKINS)
542OPTION(mds_log_pause, OPT_BOOL, false)
543OPTION(mds_log_skip_corrupt_events, OPT_BOOL, false)
544OPTION(mds_log_max_events, OPT_INT, -1)
545OPTION(mds_log_events_per_segment, OPT_INT, 1024)
546OPTION(mds_log_segment_size, OPT_INT, 0) // segment size for mds log, default to default file_layout_t
547OPTION(mds_log_max_segments, OPT_U32, 30)
548OPTION(mds_log_max_expiring, OPT_INT, 20)
549OPTION(mds_bal_export_pin, OPT_BOOL, true) // allow clients to pin directory trees to ranks
550OPTION(mds_bal_sample_interval, OPT_DOUBLE, 3.0) // every 3 seconds
551OPTION(mds_bal_replicate_threshold, OPT_FLOAT, 8000)
552OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT, 0)
553OPTION(mds_bal_frag, OPT_BOOL, true)
554OPTION(mds_bal_split_size, OPT_INT, 10000)
555OPTION(mds_bal_split_rd, OPT_FLOAT, 25000)
556OPTION(mds_bal_split_wr, OPT_FLOAT, 10000)
557OPTION(mds_bal_split_bits, OPT_INT, 3)
558OPTION(mds_bal_merge_size, OPT_INT, 50)
559OPTION(mds_bal_interval, OPT_INT, 10) // seconds
560OPTION(mds_bal_fragment_interval, OPT_INT, 5) // seconds
561OPTION(mds_bal_fragment_size_max, OPT_INT, 10000*10) // order of magnitude higher than split size
562OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT, 1.5) // multiple of size_max that triggers immediate split
563OPTION(mds_bal_idle_threshold, OPT_FLOAT, 0)
564OPTION(mds_bal_max, OPT_INT, -1)
565OPTION(mds_bal_max_until, OPT_INT, -1)
566OPTION(mds_bal_mode, OPT_INT, 0)
567OPTION(mds_bal_min_rebalance, OPT_FLOAT, .1) // must be this much above average before we export anything
568OPTION(mds_bal_min_start, OPT_FLOAT, .2) // if we need less than this, we don't do anything
569OPTION(mds_bal_need_min, OPT_FLOAT, .8) // take within this range of what we need
570OPTION(mds_bal_need_max, OPT_FLOAT, 1.2)
571OPTION(mds_bal_midchunk, OPT_FLOAT, .3) // any sub bigger than this taken in full
572OPTION(mds_bal_minchunk, OPT_FLOAT, .001) // never take anything smaller than this
573OPTION(mds_bal_target_decay, OPT_DOUBLE, 10.0) // target decay half-life in MDSMap (2x larger is approx. 2x slower)
574OPTION(mds_replay_interval, OPT_FLOAT, 1.0) // time to wait before starting replay again
575OPTION(mds_shutdown_check, OPT_INT, 0)
576OPTION(mds_thrash_exports, OPT_INT, 0)
577OPTION(mds_thrash_fragments, OPT_INT, 0)
578OPTION(mds_dump_cache_on_map, OPT_BOOL, false)
579OPTION(mds_dump_cache_after_rejoin, OPT_BOOL, false)
580OPTION(mds_verify_scatter, OPT_BOOL, false)
581OPTION(mds_debug_scatterstat, OPT_BOOL, false)
582OPTION(mds_debug_frag, OPT_BOOL, false)
583OPTION(mds_debug_auth_pins, OPT_BOOL, false)
584OPTION(mds_debug_subtrees, OPT_BOOL, false)
585OPTION(mds_kill_mdstable_at, OPT_INT, 0)
586OPTION(mds_kill_export_at, OPT_INT, 0)
587OPTION(mds_kill_import_at, OPT_INT, 0)
588OPTION(mds_kill_link_at, OPT_INT, 0)
589OPTION(mds_kill_rename_at, OPT_INT, 0)
590OPTION(mds_kill_openc_at, OPT_INT, 0)
591OPTION(mds_kill_journal_at, OPT_INT, 0)
592OPTION(mds_kill_journal_expire_at, OPT_INT, 0)
593OPTION(mds_kill_journal_replay_at, OPT_INT, 0)
594OPTION(mds_journal_format, OPT_U32, 1) // Default to most recent JOURNAL_FORMAT_*
595OPTION(mds_kill_create_at, OPT_INT, 0)
596OPTION(mds_inject_traceless_reply_probability, OPT_DOUBLE, 0) /* percentage
597 of MDS modify replies to skip sending the
598 client a trace on [0-1]*/
599OPTION(mds_wipe_sessions, OPT_BOOL, 0)
600OPTION(mds_wipe_ino_prealloc, OPT_BOOL, 0)
601OPTION(mds_skip_ino, OPT_INT, 0)
602OPTION(mds_standby_for_name, OPT_STR, "")
603OPTION(mds_standby_for_rank, OPT_INT, -1)
604OPTION(mds_standby_for_fscid, OPT_INT, -1)
605OPTION(mds_standby_replay, OPT_BOOL, false)
606OPTION(mds_enable_op_tracker, OPT_BOOL, true) // enable/disable MDS op tracking
607OPTION(mds_op_history_size, OPT_U32, 20) // Max number of completed ops to track
608OPTION(mds_op_history_duration, OPT_U32, 600) // Oldest completed op to track
609OPTION(mds_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
610OPTION(mds_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
611OPTION(mds_snap_min_uid, OPT_U32, 0) // The minimum UID required to create a snapshot
612OPTION(mds_snap_max_uid, OPT_U32, 4294967294) // The maximum UID allowed to create a snapshot
613OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapshot
614OPTION(mds_verify_backtrace, OPT_U32, 1)
615// detect clients which aren't trimming completed requests
616OPTION(mds_max_completed_flushes, OPT_U32, 100000)
617OPTION(mds_max_completed_requests, OPT_U32, 100000)
618
619OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash
620OPTION(mds_mon_shutdown_timeout, OPT_DOUBLE, 5)
621
622// Maximum number of concurrent stray files to purge
623OPTION(mds_max_purge_files, OPT_U32, 64)
624// Maximum number of concurrent RADOS ops to issue in purging
625OPTION(mds_max_purge_ops, OPT_U32, 8192)
626// Maximum number of concurrent RADOS ops to issue in purging, scaled by PG count
627OPTION(mds_max_purge_ops_per_pg, OPT_FLOAT, 0.5)
628
629OPTION(mds_purge_queue_busy_flush_period, OPT_FLOAT, 1.0)
630
631OPTION(mds_root_ino_uid, OPT_INT, 0) // The UID of / on new filesystems
632OPTION(mds_root_ino_gid, OPT_INT, 0) // The GID of / on new filesystems
633
634OPTION(mds_max_scrub_ops_in_progress, OPT_INT, 5) // the number of simultaneous scrubs allowed
635
636// Maximum number of damaged frags/dentries before whole MDS rank goes damaged
637OPTION(mds_damage_table_max_entries, OPT_INT, 10000)
638
31f18b77
FG
639// Maximum increment for client writable range, counted by number of objects
640OPTION(mds_client_writeable_range_max_inc_objs, OPT_U32, 1024)
641
7c673cae
FG
642// verify backend can support configured max object name length
643OPTION(osd_check_max_object_name_len_on_startup, OPT_BOOL, true)
644
645// Maximum number of backfills to or from a single osd
646OPTION(osd_max_backfills, OPT_U64, 1)
647
648// Minimum recovery priority (255 = max, smaller = lower)
649OPTION(osd_min_recovery_priority, OPT_INT, 0)
650
651// Seconds to wait before retrying refused backfills
652OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 30.0)
653
654// Seconds to wait before retrying refused recovery
655OPTION(osd_recovery_retry_interval, OPT_DOUBLE, 30.0)
656
657// max agent flush ops
658OPTION(osd_agent_max_ops, OPT_INT, 4)
659OPTION(osd_agent_max_low_ops, OPT_INT, 2)
660OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1)
661OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1)
662OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0)
663
664// osd ignore history.last_epoch_started in find_best_info
665OPTION(osd_find_best_info_ignore_history_les, OPT_BOOL, false)
666
667// decay atime and hist histograms after how many objects go by
668OPTION(osd_agent_hist_halflife, OPT_INT, 1000)
669
670// must be this amount over the threshold to enable,
671// this amount below the threshold to disable.
672OPTION(osd_agent_slop, OPT_FLOAT, .02)
673
674OPTION(osd_uuid, OPT_UUID, uuid_d())
675OPTION(osd_data, OPT_STR, "/var/lib/ceph/osd/$cluster-$id")
676OPTION(osd_journal, OPT_STR, "/var/lib/ceph/osd/$cluster-$id/journal")
677OPTION(osd_journal_size, OPT_INT, 5120) // in mb
678OPTION(osd_journal_flush_on_shutdown, OPT_BOOL, true) // Flush journal to data store on shutdown
679// flags for specific control purpose during osd mount() process.
680// e.g., can be 1 to skip over replaying journal
681// or 2 to skip over mounting omap or 3 to skip over both.
682// This might be helpful in case the journal is totally corrupted
683// and we still want to bring the osd daemon back normally, etc.
684OPTION(osd_os_flags, OPT_U32, 0)
685OPTION(osd_max_write_size, OPT_INT, 90)
686OPTION(osd_max_pgls, OPT_U64, 1024) // max number of pgls entries to return
687OPTION(osd_client_message_size_cap, OPT_U64, 500*1024L*1024L) // client data allowed in-memory (in bytes)
688OPTION(osd_client_message_cap, OPT_U64, 100) // num client messages allowed in-memory
689OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd
690OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd
31f18b77 691OPTION(osd_crush_update_weight_set, OPT_BOOL, true) // update weight set while updating weights
7c673cae
FG
692OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
693OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
694OPTION(osd_crush_update_on_start, OPT_BOOL, true)
224ce89b 695OPTION(osd_class_update_on_start, OPT_BOOL, true) // automatically set device class on start
7c673cae 696OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds.
31f18b77 697OPTION(osd_pool_default_crush_rule, OPT_INT, -1)
7c673cae
FG
698OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32, 4096) // in bytes
699OPTION(osd_pool_default_size, OPT_INT, 3)
700OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2
701OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
702OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
224ce89b 703OPTION(osd_pool_default_type, OPT_STR, "replicated")
7c673cae
FG
704OPTION(osd_pool_default_erasure_code_profile,
705 OPT_STR,
706 "plugin=jerasure "
707 "technique=reed_sol_van "
708 "k=2 "
709 "m=1 "
710 ) // default properties of osd pool create
711OPTION(osd_erasure_code_plugins, OPT_STR,
712 "jerasure"
713 " lrc"
714#ifdef HAVE_BETTER_YASM_ELF64
715 " isa"
716#endif
717 ) // list of erasure code plugins
718
719// Allows the "peered" state for recovery and backfill below min_size
720OPTION(osd_allow_recovery_below_min_size, OPT_BOOL, true)
721
722OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools
723OPTION(osd_pool_default_flag_hashpspool, OPT_BOOL, true) // use new pg hashing to prevent pool/pg overlap
724OPTION(osd_pool_default_flag_nodelete, OPT_BOOL, false) // pool can't be deleted
725OPTION(osd_pool_default_flag_nopgchange, OPT_BOOL, false) // pool's pg and pgp num can't be changed
726OPTION(osd_pool_default_flag_nosizechange, OPT_BOOL, false) // pool's size and min size can't be changed
727OPTION(osd_pool_default_hit_set_bloom_fpp, OPT_FLOAT, .05)
728OPTION(osd_pool_default_cache_target_dirty_ratio, OPT_FLOAT, .4)
729OPTION(osd_pool_default_cache_target_dirty_high_ratio, OPT_FLOAT, .6)
730OPTION(osd_pool_default_cache_target_full_ratio, OPT_FLOAT, .8)
731OPTION(osd_pool_default_cache_min_flush_age, OPT_INT, 0) // seconds
732OPTION(osd_pool_default_cache_min_evict_age, OPT_INT, 0) // seconds
733OPTION(osd_pool_default_cache_max_evict_check_size, OPT_INT, 10) // max size to check for eviction
734OPTION(osd_hit_set_min_size, OPT_INT, 1000) // min target size for a HitSet
735OPTION(osd_hit_set_max_size, OPT_INT, 100000) // max target size for a HitSet
736OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking
737
738// conservative default throttling values
739OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 25)
740OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 5 * 1024*1024)
741
742OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback")
743OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
744OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
745OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom")
746OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on read)
747OPTION(osd_tier_default_cache_min_write_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on write)
748OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 20)
749OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT, 1)
750
751OPTION(osd_map_dedup, OPT_BOOL, true)
31f18b77
FG
752OPTION(osd_map_max_advance, OPT_INT, 40) // make this < cache_size!
753OPTION(osd_map_cache_size, OPT_INT, 50)
754OPTION(osd_map_message_max, OPT_INT, 40) // max maps per MOSDMap message
755OPTION(osd_map_share_max_epochs, OPT_INT, 40) // cap on # of inc maps we send to peers, clients
7c673cae
FG
756OPTION(osd_inject_bad_map_crc_probability, OPT_FLOAT, 0)
757OPTION(osd_inject_failure_on_pg_removal, OPT_BOOL, false)
758// shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
759OPTION(osd_max_markdown_period , OPT_INT, 600)
760OPTION(osd_max_markdown_count, OPT_INT, 5)
761
31f18b77 762OPTION(osd_peering_wq_threads, OPT_INT, 2)
7c673cae
FG
763OPTION(osd_peering_wq_batch_size, OPT_U64, 20)
764OPTION(osd_op_pq_max_tokens_per_priority, OPT_U64, 4194304)
765OPTION(osd_op_pq_min_cost, OPT_U64, 65536)
766OPTION(osd_disk_threads, OPT_INT, 1)
767OPTION(osd_disk_thread_ioprio_class, OPT_STR, "") // rt realtime be best effort idle
768OPTION(osd_disk_thread_ioprio_priority, OPT_INT, -1) // 0-7
769OPTION(osd_recover_clone_overlap, OPT_BOOL, true) // preserve clone_overlap during recovery/migration
31f18b77
FG
770OPTION(osd_op_num_threads_per_shard, OPT_INT, 0)
771OPTION(osd_op_num_threads_per_shard_hdd, OPT_INT, 1)
772OPTION(osd_op_num_threads_per_shard_ssd, OPT_INT, 2)
773OPTION(osd_op_num_shards, OPT_INT, 0)
774OPTION(osd_op_num_shards_hdd, OPT_INT, 5)
775OPTION(osd_op_num_shards_ssd, OPT_INT, 8)
224ce89b
WB
776
777// PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
778// mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
779// and "mclock_client" are based on the mClock/dmClock algorithm
780// (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
781// class the operation belongs to. "mclock_client" does the same but
782// also works to ienforce fairness between clients. "debug_random"
783// chooses among all four with equal probability.
784OPTION(osd_op_queue, OPT_STR, "wpq")
785
7c673cae
FG
786OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
787
224ce89b
WB
788// mClock priority queue parameters for five types of ops
789OPTION(osd_op_queue_mclock_client_op_res, OPT_DOUBLE, 1000.0)
790OPTION(osd_op_queue_mclock_client_op_wgt, OPT_DOUBLE, 500.0)
791OPTION(osd_op_queue_mclock_client_op_lim, OPT_DOUBLE, 0.0)
792OPTION(osd_op_queue_mclock_osd_subop_res, OPT_DOUBLE, 1000.0)
793OPTION(osd_op_queue_mclock_osd_subop_wgt, OPT_DOUBLE, 500.0)
794OPTION(osd_op_queue_mclock_osd_subop_lim, OPT_DOUBLE, 0.0)
795OPTION(osd_op_queue_mclock_snap_res, OPT_DOUBLE, 0.0)
796OPTION(osd_op_queue_mclock_snap_wgt, OPT_DOUBLE, 1.0)
797OPTION(osd_op_queue_mclock_snap_lim, OPT_DOUBLE, 0.001)
798OPTION(osd_op_queue_mclock_recov_res, OPT_DOUBLE, 0.0)
799OPTION(osd_op_queue_mclock_recov_wgt, OPT_DOUBLE, 1.0)
800OPTION(osd_op_queue_mclock_recov_lim, OPT_DOUBLE, 0.001)
801OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE, 0.0)
802OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE, 1.0)
803OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE, 0.001)
804
7c673cae
FG
805OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
806
807// Set to true for testing. Users should NOT set this.
808// If set to true even after reading enough shards to
809// decode the object, any error will be reported.
810OPTION(osd_read_ec_check_for_errors, OPT_BOOL, false) // return error if any ec shard has an error
811
812// Only use clone_overlap for recovery if there are fewer than
813// osd_recover_clone_overlap_limit entries in the overlap set
814OPTION(osd_recover_clone_overlap_limit, OPT_INT, 10)
815
816OPTION(osd_backfill_scan_min, OPT_INT, 64)
817OPTION(osd_backfill_scan_max, OPT_INT, 512)
818OPTION(osd_op_thread_timeout, OPT_INT, 15)
819OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150)
820OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
821OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300)
31f18b77 822OPTION(osd_recovery_sleep, OPT_FLOAT, 0.01) // seconds to sleep between recovery ops
7c673cae
FG
823OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0)
824OPTION(osd_scrub_invalid_stats, OPT_BOOL, true)
825OPTION(osd_remove_thread_timeout, OPT_INT, 60*60)
826OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60)
827OPTION(osd_command_thread_timeout, OPT_INT, 10*60)
828OPTION(osd_command_thread_suicide_timeout, OPT_INT, 15*60)
829OPTION(osd_heartbeat_addr, OPT_ADDR, entity_addr_t())
830OPTION(osd_heartbeat_interval, OPT_INT, 6) // (seconds) how often we ping peers
831
832// (seconds) how long before we decide a peer has failed
833// This setting is read by the MONs and OSDs and has to be set to a equal value in both settings of the configuration
834OPTION(osd_heartbeat_grace, OPT_INT, 20)
835OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers
836OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbeat tcp socket and set dscp as CS6 on it if true
31f18b77 837OPTION(osd_heartbeat_min_size, OPT_INT, 2000) // the minimum size of OSD heartbeat messages to send
7c673cae
FG
838
839// max number of parallel snap trims/pg
840OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
841// max number of trimming pgs
842OPTION(osd_max_trimming_pgs, OPT_U64, 2)
843
844// minimum number of peers that must be reachable to mark ourselves
845// back up after being wrongly marked down.
846OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT, .33)
847
848OPTION(osd_mon_heartbeat_interval, OPT_INT, 30) // (seconds) how often to ping monitor if no peers
849OPTION(osd_mon_report_interval_max, OPT_INT, 600)
850OPTION(osd_mon_report_interval_min, OPT_INT, 5) // pg stats, failures, up_thru, boot.
851OPTION(osd_mon_report_max_in_flight, OPT_INT, 2) // max updates in flight
852OPTION(osd_beacon_report_interval, OPT_INT, 300) // (second) how often to send beacon message to monitor
853OPTION(osd_pg_stat_report_interval_max, OPT_INT, 500) // report pg stats for any given pg at least this often
854OPTION(osd_mon_ack_timeout, OPT_DOUBLE, 30.0) // time out a mon if it doesn't ack stats
855OPTION(osd_stats_ack_timeout_factor, OPT_DOUBLE, 2.0) // multiples of mon_ack_timeout
856OPTION(osd_stats_ack_timeout_decay, OPT_DOUBLE, .9)
857OPTION(osd_default_data_pool_replay_window, OPT_INT, 45)
7c673cae
FG
858OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false)
859OPTION(osd_recovery_delay_start, OPT_FLOAT, 0)
860OPTION(osd_recovery_max_active, OPT_U64, 3)
861OPTION(osd_recovery_max_single_start, OPT_U64, 1)
862OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk
863OPTION(osd_recovery_max_omap_entries_per_chunk, OPT_U64, 64000) // max number of omap entries per chunk; 0 to disable limit
864OPTION(osd_copyfrom_max_chunk, OPT_U64, 8<<20) // max size of a COPYFROM chunk
865OPTION(osd_push_per_object_cost, OPT_U64, 1000) // push cost per object
866OPTION(osd_max_push_cost, OPT_U64, 8<<20) // max size of push message
867OPTION(osd_max_push_objects, OPT_U64, 10) // max objects in single push op
868OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now
869OPTION(osd_max_scrubs, OPT_INT, 1)
870OPTION(osd_scrub_during_recovery, OPT_BOOL, false) // Allow new scrubs to start while recovery is active on the OSD
871OPTION(osd_scrub_begin_hour, OPT_INT, 0)
872OPTION(osd_scrub_end_hour, OPT_INT, 24)
873OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
874OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low
875OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load
876OPTION(osd_scrub_interval_randomize_ratio, OPT_FLOAT, 0.5) // randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
877OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE, .66) // the probability to back off the scheduled scrub
878OPTION(osd_scrub_chunk_min, OPT_INT, 5)
879OPTION(osd_scrub_chunk_max, OPT_INT, 25)
880OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops
881OPTION(osd_scrub_auto_repair, OPT_BOOL, false) // whether auto-repair inconsistencies upon deep-scrubbing
882OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5) // only auto-repair when number of errors is below this threshold
883OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
884OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)
885OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
886OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub
7c673cae
FG
887OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
888OPTION(osd_open_classes_on_start, OPT_BOOL, true)
889OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops "
890 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes allowed to be loaded (allow all: *)
891OPTION(osd_class_default_list, OPT_STR, "cephfs hello journal lock log numops "
892 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes with default execute perm (allow all: *)
893OPTION(osd_check_for_log_corruption, OPT_BOOL, false)
894OPTION(osd_use_stale_snap, OPT_BOOL, false)
895OPTION(osd_rollback_to_cluster_snap, OPT_STR, "")
896OPTION(osd_default_notify_timeout, OPT_U32, 30) // default notify timeout in seconds
897OPTION(osd_kill_backfill_at, OPT_INT, 0)
898
899// Bounds how infrequently a new map epoch will be persisted for a pg
31f18b77 900OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 40) // make this < map_cache_size!
7c673cae
FG
901
902OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it
903OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
904OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery
905OPTION(osd_pg_log_trim_min, OPT_U32, 100)
906OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
907OPTION(osd_command_max_records, OPT_INT, 256)
908OPTION(osd_max_pg_blocked_by, OPT_U32, 16) // max peer osds to report that are blocking our progress
909OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
910OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros
911OPTION(osd_backoff_on_unfound, OPT_BOOL, true) // object unfound
912OPTION(osd_backoff_on_degraded, OPT_BOOL, false) // [mainly for debug?] object unreadable/writeable
913OPTION(osd_backoff_on_down, OPT_BOOL, true) // pg in down/incomplete state
914OPTION(osd_backoff_on_peering, OPT_BOOL, false) // [debug] pg peering
915OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL, false) // crash osd if client ignores a backoff; useful for debugging
916OPTION(osd_debug_inject_dispatch_delay_probability, OPT_DOUBLE, 0)
917OPTION(osd_debug_inject_dispatch_delay_duration, OPT_DOUBLE, .1)
918OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
919OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
920OPTION(osd_debug_op_order, OPT_BOOL, false)
921OPTION(osd_debug_verify_missing_on_start, OPT_BOOL, false)
922OPTION(osd_debug_scrub_chance_rewrite_digest, OPT_U64, 0)
923OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false)
924OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false)
925OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
926OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
927OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion
928OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
929OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
224ce89b 930OPTION(osd_debug_random_push_read_error, OPT_DOUBLE, 0)
7c673cae
FG
931OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
932OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
933OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
934OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track
935OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
936OPTION(osd_op_history_slow_op_size, OPT_U32, 20) // Max number of slow ops to track
937OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE, 10.0) // track the op if over this threshold
938OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items
939OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
940OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections
941
942OPTION(osd_pg_object_context_cache_count, OPT_INT, 64)
943OPTION(osd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
31f18b77 944OPTION(osd_function_tracing, OPT_BOOL, false) // true if function instrumentation should use LTTng
7c673cae
FG
945
946OPTION(osd_fast_info, OPT_BOOL, true) // use fast info attr, if we can
947
948// determines whether PGLog::check() compares written out log to stored log
949OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false)
950OPTION(osd_loop_before_reset_tphandle, OPT_U32, 64) // Max number of loop before we reset thread-pool's handle
951// default timeout while caling WaitInterval on an empty queue
952OPTION(threadpool_default_timeout, OPT_INT, 60)
953// default wait time for an empty queue before pinging the hb timeout
954OPTION(threadpool_empty_queue_max_wait, OPT_INT, 2)
955
956OPTION(leveldb_log_to_ceph_log, OPT_BOOL, true)
957OPTION(leveldb_write_buffer_size, OPT_U64, 8 *1024*1024) // leveldb write buffer size
958OPTION(leveldb_cache_size, OPT_U64, 128 *1024*1024) // leveldb cache size
959OPTION(leveldb_block_size, OPT_U64, 0) // leveldb block size
960OPTION(leveldb_bloom_size, OPT_INT, 0) // leveldb bloom bits per entry
961OPTION(leveldb_max_open_files, OPT_INT, 0) // leveldb max open files
962OPTION(leveldb_compression, OPT_BOOL, true) // leveldb uses compression
963OPTION(leveldb_paranoid, OPT_BOOL, false) // leveldb paranoid flag
964OPTION(leveldb_log, OPT_STR, "/dev/null") // enable leveldb log file
965OPTION(leveldb_compact_on_mount, OPT_BOOL, false)
966
967OPTION(kinetic_host, OPT_STR, "") // hostname or ip address of a kinetic drive to use
968OPTION(kinetic_port, OPT_INT, 8123) // port number of the kinetic drive
969OPTION(kinetic_user_id, OPT_INT, 1) // kinetic user to authenticate as
970OPTION(kinetic_hmac_key, OPT_STR, "asdfasdf") // kinetic key to authenticate with
971OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic with TLS
972
973
974OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal
975SAFE_OPTION(rocksdb_db_paths, OPT_STR, "") // path,size( path,size)*
976OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true) // log to ceph log
224ce89b
WB
977OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024) // rocksdb cache size (unless set by bluestore/etc)
978OPTION(rocksdb_cache_row_ratio, OPT_FLOAT, 0) // ratio of cache for row (vs block)
7c673cae 979OPTION(rocksdb_cache_shard_bits, OPT_INT, 4) // rocksdb block cache shard bits, 4 bit -> 16 shards
31f18b77 980OPTION(rocksdb_cache_type, OPT_STR, "lru") // 'lru' or 'clock'
7c673cae
FG
981OPTION(rocksdb_block_size, OPT_INT, 4*1024) // default rocksdb block size
982OPTION(rocksdb_perf, OPT_BOOL, false) // Enabling this will have 5-10% impact on performance for the stats collection
983OPTION(rocksdb_collect_compaction_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
984OPTION(rocksdb_collect_extended_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
985OPTION(rocksdb_collect_memory_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
986OPTION(rocksdb_enable_rmrange, OPT_BOOL, false) // see https://github.com/facebook/rocksdb/blob/master/include/rocksdb/db.h#L253
987
988// rocksdb options that will be used for omap(if omap_backend is rocksdb)
989OPTION(filestore_rocksdb_options, OPT_STR, "")
990// rocksdb options that will be used in monstore
991OPTION(mon_rocksdb_options, OPT_STR, "write_buffer_size=33554432,compression=kNoCompression")
992
993/**
994 * osd_*_priority adjust the relative priority of client io, recovery io,
995 * snaptrim io, etc
996 *
997 * osd_*_priority determines the ratio of available io between client and
998 * recovery. Each option may be set between
999 * 1..63.
1000 */
1001OPTION(osd_client_op_priority, OPT_U32, 63)
1002OPTION(osd_recovery_op_priority, OPT_U32, 3)
1003
1004OPTION(osd_snap_trim_priority, OPT_U32, 5)
1005OPTION(osd_snap_trim_cost, OPT_U32, 1<<20) // set default cost equal to 1MB io
1006
1007OPTION(osd_scrub_priority, OPT_U32, 5)
1008// set default cost equal to 50MB io
1009OPTION(osd_scrub_cost, OPT_U32, 50<<20)
1010// set requested scrub priority higher than scrub priority to make the
1011// requested scrubs jump the queue of scheduled scrubs
1012OPTION(osd_requested_scrub_priority, OPT_U32, 120)
1013
1014OPTION(osd_recovery_priority, OPT_U32, 5)
1015// set default cost equal to 20MB io
1016OPTION(osd_recovery_cost, OPT_U32, 20<<20)
1017
1018/**
1019 * osd_recovery_op_warn_multiple scales the normal warning threshhold,
1020 * osd_op_complaint_time, so that slow recovery ops won't cause noise
1021 */
1022OPTION(osd_recovery_op_warn_multiple, OPT_U32, 16)
1023
1024// Max time to wait between notifying mon of shutdown and shutting down
1025OPTION(osd_mon_shutdown_timeout, OPT_DOUBLE, 5)
31f18b77 1026OPTION(osd_shutdown_pgref_assert, OPT_BOOL, false) // crash if the OSD has stray PG refs on shutdown
7c673cae 1027
31f18b77 1028OPTION(osd_max_object_size, OPT_U64, 128*1024L*1024L) // OSD's maximum object size
7c673cae
FG
1029OPTION(osd_max_object_name_len, OPT_U32, 2048) // max rados object name len
1030OPTION(osd_max_object_namespace_len, OPT_U32, 256) // max rados object namespace len
1031OPTION(osd_max_attr_name_len, OPT_U32, 100) // max rados attr name len; cannot go higher than 100 chars for file system backends
1032OPTION(osd_max_attr_size, OPT_U64, 0)
1033
1034OPTION(osd_max_omap_entries_per_request, OPT_U64, 131072)
1035OPTION(osd_max_omap_bytes_per_request, OPT_U64, 1<<30)
1036
1037OPTION(osd_objectstore, OPT_STR, "filestore") // ObjectStore backend type
1038OPTION(osd_objectstore_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
7c673cae
FG
1039OPTION(osd_objectstore_fuse, OPT_BOOL, false)
1040
1041OPTION(osd_bench_small_size_max_iops, OPT_U32, 100) // 100 IOPS
1042OPTION(osd_bench_large_size_max_throughput, OPT_U64, 100 << 20) // 100 MB/s
1043OPTION(osd_bench_max_block_size, OPT_U64, 64 << 20) // cap the block size at 64MB
1044OPTION(osd_bench_duration, OPT_U32, 30) // duration of 'osd bench', capped at 30s to avoid triggering timeouts
1045
1046OPTION(osd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all osd requests
1047OPTION(osdc_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all objecter requests
1048
1049OPTION(osd_discard_disconnected_ops, OPT_BOOL, true)
1050
1051OPTION(memstore_device_bytes, OPT_U64, 1024*1024*1024)
1052OPTION(memstore_page_set, OPT_BOOL, true)
1053OPTION(memstore_page_size, OPT_U64, 64 << 10)
1054
1055OPTION(bdev_debug_inflight_ios, OPT_BOOL, false)
1056OPTION(bdev_inject_crash, OPT_INT, 0) // if N>0, then ~ 1/N IOs will complete before we crash on flush.
1057OPTION(bdev_inject_crash_flush_delay, OPT_INT, 2) // wait N more seconds on flush
1058OPTION(bdev_aio, OPT_BOOL, true)
1059OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds
1060OPTION(bdev_aio_max_queue_depth, OPT_INT, 1024)
224ce89b 1061OPTION(bdev_aio_reap_max, OPT_INT, 16)
7c673cae
FG
1062OPTION(bdev_block_size, OPT_INT, 4096)
1063OPTION(bdev_debug_aio, OPT_BOOL, false)
1064OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0)
1065
1066// if yes, osd will unbind all NVMe devices from kernel driver and bind them
1067// to the uio_pci_generic driver. The purpose is to prevent the case where
1068// NVMe driver is loaded while osd is running.
1069OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL, false)
1070OPTION(bdev_nvme_retry_count, OPT_INT, -1) // -1 means by default which is 4
1071
1072OPTION(objectstore_blackhole, OPT_BOOL, false)
1073
1074OPTION(bluefs_alloc_size, OPT_U64, 1048576)
1075OPTION(bluefs_max_prefetch, OPT_U64, 1048576)
1076OPTION(bluefs_min_log_runway, OPT_U64, 1048576) // alloc when we get this low
1077OPTION(bluefs_max_log_runway, OPT_U64, 4194304) // alloc this much at a time
1078OPTION(bluefs_log_compact_min_ratio, OPT_FLOAT, 5.0) // before we consider
1079OPTION(bluefs_log_compact_min_size, OPT_U64, 16*1048576) // before we consider
1080OPTION(bluefs_min_flush_size, OPT_U64, 524288) // ignore flush until its this big
1081OPTION(bluefs_compact_log_sync, OPT_BOOL, false) // sync or async log compaction?
1082OPTION(bluefs_buffered_io, OPT_BOOL, false)
1083OPTION(bluefs_sync_write, OPT_BOOL, false)
1084OPTION(bluefs_allocator, OPT_STR, "bitmap") // stupid | bitmap
1085OPTION(bluefs_preextend_wal_files, OPT_BOOL, false) // this *requires* that rocksdb has recycling enabled
1086
1087OPTION(bluestore_bluefs, OPT_BOOL, true)
1088OPTION(bluestore_bluefs_env_mirror, OPT_BOOL, false) // mirror to normal Env for debug
1089OPTION(bluestore_bluefs_min, OPT_U64, 1*1024*1024*1024) // 1gb
1090OPTION(bluestore_bluefs_min_ratio, OPT_FLOAT, .02) // min fs free / total free
1091OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT, .90) // max fs free / total free
1092OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT, .02) // how much to add at a time
1093OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT, .20) // how much to reclaim at a time
1094OPTION(bluestore_bluefs_balance_interval, OPT_FLOAT, 1) // how often (sec) to balance free space between bluefs and bluestore
1095// If you want to use spdk driver, you need to specify NVMe serial number here
1096// with "spdk:" prefix.
1097// Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to
1098// get the serial number of Intel(R) Fultondale NVMe controllers.
1099// Example:
1100// bluestore_block_path = spdk:55cd2e404bd73932
1101// If you want to run multiple SPDK instances per node, you must specify the
1102// amount of dpdk memory size in MB each instance will use, to make sure each
1103// instance uses its own dpdk memory
1104OPTION(bluestore_spdk_mem, OPT_U32, 512)
1105// A hexadecimal bit mask of the cores to run on. Note the core numbering can change between platforms and should be determined beforehand.
1106OPTION(bluestore_spdk_coremask, OPT_STR, "0x3")
1107// Specify the maximal I/Os to be batched completed while checking queue pair completions.
1108// Default value 0 means that let SPDK nvme library determine the value.
1109OPTION(bluestore_spdk_max_io_completion, OPT_U32, 0)
1110OPTION(bluestore_block_path, OPT_STR, "")
1111OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing
1112OPTION(bluestore_block_create, OPT_BOOL, true)
1113OPTION(bluestore_block_db_path, OPT_STR, "")
1114OPTION(bluestore_block_db_size, OPT_U64, 0) // rocksdb ssts (hot/warm)
1115OPTION(bluestore_block_db_create, OPT_BOOL, false)
1116OPTION(bluestore_block_wal_path, OPT_STR, "")
1117OPTION(bluestore_block_wal_size, OPT_U64, 96 * 1024*1024) // rocksdb wal
1118OPTION(bluestore_block_wal_create, OPT_BOOL, false)
1119OPTION(bluestore_block_preallocate_file, OPT_BOOL, false) //whether preallocate space if block/db_path/wal_path is file rather that block device.
1120OPTION(bluestore_csum_type, OPT_STR, "crc32c") // none|xxhash32|xxhash64|crc32c|crc32c_16|crc32c_8
1121OPTION(bluestore_csum_min_block, OPT_U32, 4096)
1122OPTION(bluestore_csum_max_block, OPT_U32, 64*1024)
1123OPTION(bluestore_min_alloc_size, OPT_U32, 0)
1124OPTION(bluestore_min_alloc_size_hdd, OPT_U32, 64*1024)
1125OPTION(bluestore_min_alloc_size_ssd, OPT_U32, 16*1024)
1126OPTION(bluestore_max_alloc_size, OPT_U32, 0)
1127OPTION(bluestore_prefer_deferred_size, OPT_U32, 0)
1128OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768)
1129OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0)
1130OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none
1131OPTION(bluestore_compression_algorithm, OPT_STR, "snappy")
1132OPTION(bluestore_compression_min_blob_size, OPT_U32, 0)
1133OPTION(bluestore_compression_min_blob_size_hdd, OPT_U32, 128*1024)
1134OPTION(bluestore_compression_min_blob_size_ssd, OPT_U32, 8*1024)
1135OPTION(bluestore_compression_max_blob_size, OPT_U32, 0)
1136OPTION(bluestore_compression_max_blob_size_hdd, OPT_U32, 512*1024)
1137OPTION(bluestore_compression_max_blob_size_ssd, OPT_U32, 64*1024)
1138/*
1139 * Specifies minimum expected amount of saved allocation units
1140 * per single blob to enable compressed blobs garbage collection
1141 *
1142 */
1143OPTION(bluestore_gc_enable_blob_threshold, OPT_INT, 0)
1144/*
1145 * Specifies minimum expected amount of saved allocation units
1146 * per all blobsb to enable compressed blobs garbage collection
1147 *
1148 */
1149OPTION(bluestore_gc_enable_total_threshold, OPT_INT, 0)
1150
1151OPTION(bluestore_max_blob_size, OPT_U32, 0)
1152OPTION(bluestore_max_blob_size_hdd, OPT_U32, 512*1024)
1153OPTION(bluestore_max_blob_size_ssd, OPT_U32, 64*1024)
1154/*
1155 * Require the net gain of compression at least to be at this ratio,
1156 * otherwise we don't compress.
1157 * And ask for compressing at least 12.5%(1/8) off, by default.
1158 */
1159OPTION(bluestore_compression_required_ratio, OPT_DOUBLE, .875)
1160OPTION(bluestore_extent_map_shard_max_size, OPT_U32, 1200)
1161OPTION(bluestore_extent_map_shard_target_size, OPT_U32, 500)
1162OPTION(bluestore_extent_map_shard_min_size, OPT_U32, 150)
1163OPTION(bluestore_extent_map_shard_target_size_slop, OPT_DOUBLE, .2)
1164OPTION(bluestore_extent_map_inline_shard_prealloc_size, OPT_U32, 256)
31f18b77 1165OPTION(bluestore_cache_trim_interval, OPT_DOUBLE, .2)
7c673cae
FG
1166OPTION(bluestore_cache_trim_max_skip_pinned, OPT_U32, 64) // skip this many onodes pinned in cache before we give up
1167OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q
1168OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5) // kin page slot size / max page slot size
1169OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5) // number of kout page slot / total number of page slot
224ce89b
WB
1170OPTION(bluestore_cache_size, OPT_U64, 0)
1171OPTION(bluestore_cache_size_hdd, OPT_U64, 1*1024*1024*1024)
1172OPTION(bluestore_cache_size_ssd, OPT_U64, 3*1024*1024*1024)
1173OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .01)
1174OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .99)
1175OPTION(bluestore_cache_kv_max, OPT_U64, 512*1024*1024) // limit the maximum amount of cache for the kv store
7c673cae
FG
1176OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
1177OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap
1178OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
1179OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
1180OPTION(bluestore_bitmapallocator_span_size, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
31f18b77 1181OPTION(bluestore_max_deferred_txc, OPT_U64, 32)
7c673cae
FG
1182OPTION(bluestore_rocksdb_options, OPT_STR, "compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152")
1183OPTION(bluestore_fsck_on_mount, OPT_BOOL, false)
1184OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL, true)
1185OPTION(bluestore_fsck_on_umount, OPT_BOOL, false)
1186OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL, true)
1187OPTION(bluestore_fsck_on_mkfs, OPT_BOOL, true)
1188OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL, false)
1189OPTION(bluestore_sync_submit_transaction, OPT_BOOL, false) // submit kv txn in queueing thread (not kv_sync_thread)
1190OPTION(bluestore_throttle_bytes, OPT_U64, 64*1024*1024)
1191OPTION(bluestore_throttle_deferred_bytes, OPT_U64, 128*1024*1024)
31f18b77 1192OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64, 670000)
7c673cae
FG
1193OPTION(bluestore_throttle_cost_per_io_ssd, OPT_U64, 4000)
1194OPTION(bluestore_throttle_cost_per_io, OPT_U64, 0)
1195OPTION(bluestore_deferred_batch_ops, OPT_U64, 0)
1196OPTION(bluestore_deferred_batch_ops_hdd, OPT_U64, 64)
1197OPTION(bluestore_deferred_batch_ops_ssd, OPT_U64, 16)
1198OPTION(bluestore_nid_prealloc, OPT_INT, 1024)
1199OPTION(bluestore_blobid_prealloc, OPT_U64, 10240)
1200OPTION(bluestore_clone_cow, OPT_BOOL, true) // do copy-on-write for clones
1201OPTION(bluestore_default_buffered_read, OPT_BOOL, true)
1202OPTION(bluestore_default_buffered_write, OPT_BOOL, false)
1203OPTION(bluestore_debug_misc, OPT_BOOL, false)
1204OPTION(bluestore_debug_no_reuse_blocks, OPT_BOOL, false)
1205OPTION(bluestore_debug_small_allocations, OPT_INT, 0)
1206OPTION(bluestore_debug_freelist, OPT_BOOL, false)
1207OPTION(bluestore_debug_prefill, OPT_FLOAT, 0)
1208OPTION(bluestore_debug_prefragment_max, OPT_INT, 1048576)
1209OPTION(bluestore_debug_inject_read_err, OPT_BOOL, false)
1210OPTION(bluestore_debug_randomize_serial_transaction, OPT_INT, 0)
1211OPTION(bluestore_debug_omit_block_device_write, OPT_BOOL, false)
31f18b77
FG
1212OPTION(bluestore_debug_fsck_abort, OPT_BOOL, false)
1213OPTION(bluestore_debug_omit_kv_commit, OPT_BOOL, false)
1214OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL, false)
7c673cae 1215OPTION(bluestore_shard_finishers, OPT_BOOL, false)
224ce89b 1216OPTION(bluestore_debug_random_read_err, OPT_DOUBLE, 0)
7c673cae
FG
1217
1218OPTION(kstore_max_ops, OPT_U64, 512)
1219OPTION(kstore_max_bytes, OPT_U64, 64*1024*1024)
1220OPTION(kstore_backend, OPT_STR, "rocksdb")
1221OPTION(kstore_rocksdb_options, OPT_STR, "compression=kNoCompression")
1222OPTION(kstore_rocksdb_bloom_bits_per_key, OPT_INT, 0)
1223OPTION(kstore_fsck_on_mount, OPT_BOOL, false)
1224OPTION(kstore_fsck_on_mount_deep, OPT_BOOL, true)
1225OPTION(kstore_nid_prealloc, OPT_U64, 1024)
1226OPTION(kstore_sync_transaction, OPT_BOOL, false)
1227OPTION(kstore_sync_submit_transaction, OPT_BOOL, false)
1228OPTION(kstore_onode_map_size, OPT_U64, 1024)
7c673cae
FG
1229OPTION(kstore_default_stripe_size, OPT_INT, 65536)
1230
1231OPTION(filestore_omap_backend, OPT_STR, "rocksdb")
1232OPTION(filestore_omap_backend_path, OPT_STR, "")
1233
1234/// filestore wb throttle limits
1235OPTION(filestore_wbthrottle_enable, OPT_BOOL, true)
1236OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 41943040)
1237OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 419430400)
1238OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 500)
1239OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 5000)
1240OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 500)
1241OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 41943040)
1242OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 419430400)
1243OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 500)
1244OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 5000)
1245OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 500)
1246
1247/// These must be less than the fd limit
1248OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 5000)
1249OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 5000)
1250
1251//Introduce a O_DSYNC write in the filestore
1252OPTION(filestore_odsync_write, OPT_BOOL, false)
1253
1254// Tests index failure paths
1255OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0)
1256
1257// Allow object read error injection
1258OPTION(filestore_debug_inject_read_err, OPT_BOOL, false)
224ce89b 1259OPTION(filestore_debug_random_read_err, OPT_DOUBLE, 0)
7c673cae 1260
31f18b77 1261OPTION(filestore_debug_omap_check, OPT_BOOL, false) // Expensive debugging check on sync
7c673cae
FG
1262OPTION(filestore_omap_header_cache_size, OPT_INT, 1024)
1263
1264// Use omap for xattrs for attrs over
1265// filestore_max_inline_xattr_size or
1266OPTION(filestore_max_inline_xattr_size, OPT_U32, 0) //Override
1267OPTION(filestore_max_inline_xattr_size_xfs, OPT_U32, 65536)
1268OPTION(filestore_max_inline_xattr_size_btrfs, OPT_U32, 2048)
1269OPTION(filestore_max_inline_xattr_size_other, OPT_U32, 512)
1270
1271// for more than filestore_max_inline_xattrs attrs
1272OPTION(filestore_max_inline_xattrs, OPT_U32, 0) //Override
1273OPTION(filestore_max_inline_xattrs_xfs, OPT_U32, 10)
1274OPTION(filestore_max_inline_xattrs_btrfs, OPT_U32, 10)
1275OPTION(filestore_max_inline_xattrs_other, OPT_U32, 2)
1276
1277// max xattr value size
1278OPTION(filestore_max_xattr_value_size, OPT_U32, 0) //Override
1279OPTION(filestore_max_xattr_value_size_xfs, OPT_U32, 64<<10)
1280OPTION(filestore_max_xattr_value_size_btrfs, OPT_U32, 64<<10)
1281// ext4 allows 4k xattrs total including some smallish extra fields and the
1282// keys. We're allowing 2 512 inline attrs in addition some some filestore
1283// replay attrs. After accounting for those, we still need to fit up to
1284// two attrs of this value. That means we need this value to be around 1k
1285// to be safe. This is hacky, but it's not worth complicating the code
1286// to work around ext4's total xattr limit.
1287OPTION(filestore_max_xattr_value_size_other, OPT_U32, 1<<10)
1288
1289OPTION(filestore_sloppy_crc, OPT_BOOL, false) // track sloppy crcs
1290OPTION(filestore_sloppy_crc_block_size, OPT_INT, 65536)
1291
1292OPTION(filestore_max_alloc_hint_size, OPT_U64, 1ULL << 20) // bytes
1293
1294OPTION(filestore_max_sync_interval, OPT_DOUBLE, 5) // seconds
1295OPTION(filestore_min_sync_interval, OPT_DOUBLE, .01) // seconds
1296OPTION(filestore_btrfs_snap, OPT_BOOL, true)
1297OPTION(filestore_btrfs_clone_range, OPT_BOOL, true)
1298OPTION(filestore_zfs_snap, OPT_BOOL, false) // zfsonlinux is still unstable
1299OPTION(filestore_fsync_flushes_journal_data, OPT_BOOL, false)
1300OPTION(filestore_fiemap, OPT_BOOL, false) // (try to) use fiemap
1301OPTION(filestore_punch_hole, OPT_BOOL, false)
1302OPTION(filestore_seek_data_hole, OPT_BOOL, false) // (try to) use seek_data/hole
1303OPTION(filestore_splice, OPT_BOOL, false)
1304OPTION(filestore_fadvise, OPT_BOOL, true)
1305//collect device partition information for management application to use
1306OPTION(filestore_collect_device_partition_information, OPT_BOOL, true)
1307
1308// (try to) use extsize for alloc hint NOTE: extsize seems to trigger
1309// data corruption in xfs prior to kernel 3.5. filestore will
1310// implicity disable this if it cannot confirm the kernel is newer
1311// than that.
1312// NOTE: This option involves a tradeoff: When disabled, fragmentation is
1313// worse, but large sequential writes are faster. When enabled, large
1314// sequential writes are slower, but fragmentation is reduced.
1315OPTION(filestore_xfs_extsize, OPT_BOOL, false)
1316
1317OPTION(filestore_journal_parallel, OPT_BOOL, false)
1318OPTION(filestore_journal_writeahead, OPT_BOOL, false)
1319OPTION(filestore_journal_trailing, OPT_BOOL, false)
1320OPTION(filestore_queue_max_ops, OPT_U64, 50)
1321OPTION(filestore_queue_max_bytes, OPT_U64, 100 << 20)
1322
1323OPTION(filestore_caller_concurrency, OPT_INT, 10)
1324
1325/// Expected filestore throughput in B/s
1326OPTION(filestore_expected_throughput_bytes, OPT_DOUBLE, 200 << 20)
1327/// Expected filestore throughput in ops/s
1328OPTION(filestore_expected_throughput_ops, OPT_DOUBLE, 200)
1329
1330/// Filestore max delay multiple. Defaults to 0 (disabled)
1331OPTION(filestore_queue_max_delay_multiple, OPT_DOUBLE, 0)
1332/// Filestore high delay multiple. Defaults to 0 (disabled)
1333OPTION(filestore_queue_high_delay_multiple, OPT_DOUBLE, 0)
1334
1335/// Use above to inject delays intended to keep the op queue between low and high
1336OPTION(filestore_queue_low_threshhold, OPT_DOUBLE, 0.3)
1337OPTION(filestore_queue_high_threshhold, OPT_DOUBLE, 0.9)
1338
1339OPTION(filestore_op_threads, OPT_INT, 2)
1340OPTION(filestore_op_thread_timeout, OPT_INT, 60)
1341OPTION(filestore_op_thread_suicide_timeout, OPT_INT, 180)
1342OPTION(filestore_commit_timeout, OPT_FLOAT, 600)
1343OPTION(filestore_fiemap_threshold, OPT_INT, 4096)
1344OPTION(filestore_merge_threshold, OPT_INT, 10)
1345OPTION(filestore_split_multiple, OPT_INT, 2)
224ce89b 1346OPTION(filestore_split_rand_factor, OPT_U32, 20) // randomize the split threshold by adding 16 * [0, rand_factor)
7c673cae
FG
1347OPTION(filestore_update_to, OPT_INT, 1000)
1348OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor
1349OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size
1350OPTION(filestore_fd_cache_shards, OPT_INT, 16) // FD number of shards
1351OPTION(filestore_ondisk_finisher_threads, OPT_INT, 1)
1352OPTION(filestore_apply_finisher_threads, OPT_INT, 1)
1353OPTION(filestore_dump_file, OPT_STR, "") // file onto which store transaction dumps
1354OPTION(filestore_kill_at, OPT_INT, 0) // inject a failure at the n'th opportunity
1355OPTION(filestore_inject_stall, OPT_INT, 0) // artificially stall for N seconds in op queue thread
1356OPTION(filestore_fail_eio, OPT_BOOL, true) // fail/crash on EIO
1357OPTION(filestore_debug_verify_split, OPT_BOOL, false)
1358OPTION(journal_dio, OPT_BOOL, true)
1359OPTION(journal_aio, OPT_BOOL, true)
1360OPTION(journal_force_aio, OPT_BOOL, false)
1361OPTION(journal_block_size, OPT_INT, 4096)
1362
1363// max bytes to search ahead in journal searching for corruption
1364OPTION(journal_max_corrupt_search, OPT_U64, 10<<20)
1365OPTION(journal_block_align, OPT_BOOL, true)
1366OPTION(journal_write_header_frequency, OPT_U64, 0)
1367OPTION(journal_max_write_bytes, OPT_INT, 10 << 20)
1368OPTION(journal_max_write_entries, OPT_INT, 100)
1369
1370/// Target range for journal fullness
1371OPTION(journal_throttle_low_threshhold, OPT_DOUBLE, 0.6)
1372OPTION(journal_throttle_high_threshhold, OPT_DOUBLE, 0.9)
1373
1374/// Multiple over expected at high_threshhold. Defaults to 0 (disabled).
1375OPTION(journal_throttle_high_multiple, OPT_DOUBLE, 0)
1376/// Multiple over expected at max. Defaults to 0 (disabled).
1377OPTION(journal_throttle_max_multiple, OPT_DOUBLE, 0)
1378
1379OPTION(journal_align_min_size, OPT_INT, 64 << 10) // align data payloads >= this.
1380OPTION(journal_replay_from, OPT_INT, 0)
1381OPTION(journal_zero_on_create, OPT_BOOL, false)
1382OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corrupt
1383OPTION(journal_discard, OPT_BOOL, false) //using ssd disk as journal, whether support discard nouse journal-data.
1384
1385OPTION(fio_dir, OPT_STR, "/tmp/fio") // fio data directory for fio-objectstore
1386
31f18b77 1387OPTION(rados_mon_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from the monitor before returning an error from a rados operation. 0 means no limit.
7c673cae
FG
1388OPTION(rados_osd_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from osds before returning an error from a rados operation. 0 means no limit.
1389OPTION(rados_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1390
1391OPTION(rbd_op_threads, OPT_INT, 1)
1392OPTION(rbd_op_thread_timeout, OPT_INT, 60)
1393OPTION(rbd_non_blocking_aio, OPT_BOOL, true) // process AIO ops from a worker thread to prevent blocking
1394OPTION(rbd_cache, OPT_BOOL, true) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0)
1395OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, true) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe
1396OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes
1397OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching
1398OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
1399OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts
1400OPTION(rbd_cache_max_dirty_object, OPT_INT, 0) // dirty limit for objects - set to 0 for auto calculate from rbd_cache_size
1401OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false)
1402OPTION(rbd_concurrent_management_ops, OPT_INT, 10) // how many operations can be in flight for a management operation like deleting or resizing an image
1403OPTION(rbd_balance_snap_reads, OPT_BOOL, false)
1404OPTION(rbd_localize_snap_reads, OPT_BOOL, false)
1405OPTION(rbd_balance_parent_reads, OPT_BOOL, false)
1406OPTION(rbd_localize_parent_reads, OPT_BOOL, true)
1407OPTION(rbd_readahead_trigger_requests, OPT_INT, 10) // number of sequential requests necessary to trigger readahead
1408OPTION(rbd_readahead_max_bytes, OPT_LONGLONG, 512 * 1024) // set to 0 to disable readahead
1409OPTION(rbd_readahead_disable_after_bytes, OPT_LONGLONG, 50 * 1024 * 1024) // how many bytes are read in total before readahead is disabled
1410OPTION(rbd_clone_copy_on_read, OPT_BOOL, false)
1411OPTION(rbd_blacklist_on_break_lock, OPT_BOOL, true) // whether to blacklist clients whose lock was broken
1412OPTION(rbd_blacklist_expire_seconds, OPT_INT, 0) // number of seconds to blacklist - set to 0 for OSD default
1413OPTION(rbd_request_timed_out_seconds, OPT_INT, 30) // number of seconds before maint request times out
1414OPTION(rbd_skip_partial_discard, OPT_BOOL, false) // when trying to discard a range inside an object, set to true to skip zeroing the range.
1415OPTION(rbd_enable_alloc_hint, OPT_BOOL, true) // when writing a object, it will issue a hint to osd backend to indicate the expected size object need
1416OPTION(rbd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
31f18b77 1417OPTION(rbd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all RBD requests
7c673cae
FG
1418OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be validated for RBD compatibility
1419OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated
1420OPTION(rbd_auto_exclusive_lock_until_manual_request, OPT_BOOL, true) // whether to automatically acquire/release exclusive lock until it is explicitly requested, i.e. before we know the user of librbd is properly using the lock API
1421OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy
1422OPTION(rbd_mirroring_replay_delay, OPT_INT, 0) // time-delay in seconds for rbd-mirror asynchronous replication
1423
31f18b77
FG
1424OPTION(rbd_default_pool, OPT_STR, "rbd") // default pool for storing images
1425OPTION_VALIDATOR(rbd_default_pool)
1426
7c673cae
FG
1427/*
1428 * The following options change the behavior for librbd's image creation methods that
1429 * don't require all of the parameters. These are provided so that older programs
1430 * can take advantage of newer features without being rewritten to use new versions
1431 * of the image creation functions.
1432 *
1433 * rbd_create()/RBD::create() are affected by all of these options.
1434 *
1435 * rbd_create2()/RBD::create2() and rbd_clone()/RBD::clone() are affected by:
1436 * - rbd_default_order
1437 * - rbd_default_stripe_count
1438 * - rbd_default_stripe_size
1439 *
1440 * rbd_create3()/RBD::create3() and rbd_clone2/RBD::clone2() are only
1441 * affected by rbd_default_order.
1442 */
1443OPTION(rbd_default_format, OPT_INT, 2)
1444OPTION(rbd_default_order, OPT_INT, 22)
1445OPTION(rbd_default_stripe_count, OPT_U64, 0) // changing requires stripingv2 feature
1446OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature
1447OPTION(rbd_default_data_pool, OPT_STR, "") // optional default pool for storing image data blocks
31f18b77 1448OPTION_VALIDATOR(rbd_default_data_pool)
7c673cae
FG
1449
1450/**
1451 * RBD features are only applicable for v2 images. This setting accepts either
1452 * an integer bitmask value or comma-delimited string of RBD feature names.
1453 * This setting is always internally stored as an integer bitmask value. The
1454 * mapping between feature bitmask value and feature name is as follows:
1455 *
1456 * +1 -> layering
1457 * +2 -> striping
1458 * +4 -> exclusive-lock
1459 * +8 -> object-map
1460 * +16 -> fast-diff
1461 * +32 -> deep-flatten
1462 * +64 -> journaling
1463 * +128 -> data-pool
1464 */
1465SAFE_OPTION(rbd_default_features, OPT_STR, "layering,exclusive-lock,object-map,fast-diff,deep-flatten")
1466OPTION_VALIDATOR(rbd_default_features)
1467
1468OPTION(rbd_default_map_options, OPT_STR, "") // default rbd map -o / --options
1469
1470/**
1471 * RBD journal options.
1472 */
1473OPTION(rbd_journal_order, OPT_U32, 24) // bits to shift to compute journal object max size, between 12 and 64
1474OPTION(rbd_journal_splay_width, OPT_U32, 4) // number of active journal objects
1475OPTION(rbd_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1476OPTION(rbd_journal_object_flush_interval, OPT_INT, 0) // maximum number of pending commits per journal object
1477OPTION(rbd_journal_object_flush_bytes, OPT_INT, 0) // maximum number of pending bytes per journal object
1478OPTION(rbd_journal_object_flush_age, OPT_DOUBLE, 0) // maximum age (in seconds) for pending commits
1479OPTION(rbd_journal_pool, OPT_STR, "") // pool for journal objects
1480OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload size before splitting
1481OPTION(rbd_journal_max_concurrent_object_sets, OPT_INT, 0) // maximum number of object sets a journal client can be behind before it is automatically unregistered
1482
1483/**
1484 * RBD Mirror options
1485 */
1486OPTION(rbd_mirror_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1487OPTION(rbd_mirror_journal_poll_age, OPT_DOUBLE, 5) // maximum age (in seconds) between successive journal polls
1488OPTION(rbd_mirror_journal_max_fetch_bytes, OPT_U32, 32768) // maximum bytes to read from each journal data object per fetch
1489OPTION(rbd_mirror_sync_point_update_age, OPT_DOUBLE, 30) // number of seconds between each update of the image sync point object number
1490OPTION(rbd_mirror_concurrent_image_syncs, OPT_U32, 5) // maximum number of image syncs in parallel
1491OPTION(rbd_mirror_pool_replayers_refresh_interval, OPT_INT, 30) // interval to refresh peers in rbd-mirror daemon
1492OPTION(rbd_mirror_delete_retry_interval, OPT_DOUBLE, 30) // interval to check and retry the failed requests in deleter
1493OPTION(rbd_mirror_image_state_check_interval, OPT_INT, 30) // interval to get images from pool watcher and set sources in replayer
1494OPTION(rbd_mirror_leader_heartbeat_interval, OPT_INT, 5) // interval (in seconds) between mirror leader heartbeats
1495OPTION(rbd_mirror_leader_max_missed_heartbeats, OPT_INT, 2) // number of missed heartbeats for non-lock owner to attempt to acquire lock
1496OPTION(rbd_mirror_leader_max_acquire_attempts_before_break, OPT_INT, 3) // number of failed attempts to acquire lock after missing heartbeats before breaking lock
1497
1498OPTION(nss_db_path, OPT_STR, "") // path to nss db
1499
1500
1501OPTION(rgw_max_chunk_size, OPT_INT, 4 * 1024 * 1024)
1502OPTION(rgw_put_obj_min_window_size, OPT_INT, 16 * 1024 * 1024)
1503OPTION(rgw_put_obj_max_window_size, OPT_INT, 64 * 1024 * 1024)
1504OPTION(rgw_max_put_size, OPT_U64, 5ULL*1024*1024*1024)
1505OPTION(rgw_max_put_param_size, OPT_U64, 1 * 1024 * 1024) // max input size for PUT requests accepting json/xml params
1506
1507/**
1508 * override max bucket index shards in zone configuration (if not zero)
1509 *
1510 * Represents the number of shards for the bucket index object, a value of zero
1511 * indicates there is no sharding. By default (no sharding, the name of the object
1512 * is '.dir.{marker}', with sharding, the name is '.dir.{markder}.{sharding_id}',
1513 * sharding_id is zero-based value. It is not recommended to set a too large value
1514 * (e.g. thousand) as it increases the cost for bucket listing.
1515 */
1516OPTION(rgw_override_bucket_index_max_shards, OPT_U32, 0)
1517
1518/**
1519 * Represents the maximum AIO pending requests for the bucket index object shards.
1520 */
1521OPTION(rgw_bucket_index_max_aio, OPT_U32, 8)
1522
1523/**
1524 * whether or not the quota/gc threads should be started
1525 */
1526OPTION(rgw_enable_quota_threads, OPT_BOOL, true)
1527OPTION(rgw_enable_gc_threads, OPT_BOOL, true)
1528OPTION(rgw_enable_lc_threads, OPT_BOOL, true)
1529
1530
1531OPTION(rgw_data, OPT_STR, "/var/lib/ceph/radosgw/$cluster-$id")
1532OPTION(rgw_enable_apis, OPT_STR, "s3, s3website, swift, swift_auth, admin")
1533OPTION(rgw_cache_enabled, OPT_BOOL, true) // rgw cache enabled
1534OPTION(rgw_cache_lru_size, OPT_INT, 10000) // num of entries in rgw cache
1535OPTION(rgw_socket_path, OPT_STR, "") // path to unix domain socket, if not specified, rgw will not run as external fcgi
1536OPTION(rgw_host, OPT_STR, "") // host for radosgw, can be an IP, default is 0.0.0.0
1537OPTION(rgw_port, OPT_STR, "") // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi
1538OPTION(rgw_dns_name, OPT_STR, "") // hostname suffix on buckets
1539OPTION(rgw_dns_s3website_name, OPT_STR, "") // hostname suffix on buckets for s3-website endpoint
1540OPTION(rgw_content_length_compat, OPT_BOOL, false) // Check both HTTP_CONTENT_LENGTH and CONTENT_LENGTH in fcgi env
1541OPTION(rgw_lifecycle_work_time, OPT_STR, "00:00-06:00") //job process lc at 00:00-06:00s
1542OPTION(rgw_lc_lock_max_time, OPT_INT, 60) // total run time for a single lc processor work
1543OPTION(rgw_lc_max_objs, OPT_INT, 32)
1544OPTION(rgw_lc_debug_interval, OPT_INT, -1) // Debug run interval, in seconds
1545OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request
1546OPTION(rgw_request_uri, OPT_STR, "") // alternative value for REQUEST_URI if not set in request
1547OPTION(rgw_swift_url, OPT_STR, "") // the swift url, being published by the internal swift auth
1548OPTION(rgw_swift_url_prefix, OPT_STR, "swift") // entry point for which a url is considered a swift url
1549OPTION(rgw_swift_auth_url, OPT_STR, "") // default URL to go and verify tokens for v1 auth (if not using internal swift auth)
1550OPTION(rgw_swift_auth_entry, OPT_STR, "auth") // entry point for which a url is considered a swift auth url
1551OPTION(rgw_swift_tenant_name, OPT_STR, "") // tenant name to use for swift access
1552OPTION(rgw_swift_account_in_url, OPT_BOOL, false) // assume that URL always contain the account (aka tenant) part
1553OPTION(rgw_swift_enforce_content_length, OPT_BOOL, false) // enforce generation of Content-Length even in cost of performance or scalability
1554OPTION(rgw_keystone_url, OPT_STR, "") // url for keystone server
1555OPTION(rgw_keystone_admin_token, OPT_STR, "") // keystone admin token (shared secret)
1556OPTION(rgw_keystone_admin_user, OPT_STR, "") // keystone admin user name
1557OPTION(rgw_keystone_admin_password, OPT_STR, "") // keystone admin user password
1558OPTION(rgw_keystone_admin_tenant, OPT_STR, "") // keystone admin user tenant (for keystone v2.0)
1559OPTION(rgw_keystone_admin_project, OPT_STR, "") // keystone admin user project (for keystone v3)
1560OPTION(rgw_keystone_admin_domain, OPT_STR, "") // keystone admin user domain
1561OPTION(rgw_keystone_barbican_user, OPT_STR, "") // keystone user to access barbican secrets
1562OPTION(rgw_keystone_barbican_password, OPT_STR, "") // keystone password for barbican user
1563OPTION(rgw_keystone_barbican_tenant, OPT_STR, "") // keystone barbican user tenant (for keystone v2.0)
1564OPTION(rgw_keystone_barbican_project, OPT_STR, "") // keystone barbican user project (for keystone v3)
1565OPTION(rgw_keystone_barbican_domain, OPT_STR, "") // keystone barbican user domain
1566OPTION(rgw_keystone_api_version, OPT_INT, 2) // Version of Keystone API to use (2 or 3)
1567OPTION(rgw_keystone_accepted_roles, OPT_STR, "Member, admin") // roles required to serve requests
1568OPTION(rgw_keystone_accepted_admin_roles, OPT_STR, "") // list of roles allowing an user to gain admin privileges
1569OPTION(rgw_keystone_token_cache_size, OPT_INT, 10000) // max number of entries in keystone token cache
1570OPTION(rgw_keystone_revocation_interval, OPT_INT, 15 * 60) // seconds between tokens revocation check
1571OPTION(rgw_keystone_verify_ssl, OPT_BOOL, true) // should we try to verify keystone's ssl
1572OPTION(rgw_keystone_implicit_tenants, OPT_BOOL, false) // create new users in their own tenants of the same name
1573OPTION(rgw_cross_domain_policy, OPT_STR, "<allow-access-from domain=\"*\" secure=\"false\" />")
1574OPTION(rgw_healthcheck_disabling_path, OPT_STR, "") // path that existence causes the healthcheck to respond 503
1575OPTION(rgw_s3_auth_use_rados, OPT_BOOL, true) // should we try to use the internal credentials for s3?
1576OPTION(rgw_s3_auth_use_keystone, OPT_BOOL, false) // should we try to use keystone for s3?
1577OPTION(rgw_s3_auth_aws4_force_boto2_compat, OPT_BOOL, true) // force aws4 auth boto2 compatibility
1578OPTION(rgw_barbican_url, OPT_STR, "") // url for barbican server
1579
1580/* OpenLDAP-style LDAP parameter strings */
1581/* rgw_ldap_uri space-separated list of LDAP servers in URI format */
1582OPTION(rgw_ldap_uri, OPT_STR, "ldaps://<ldap.your.domain>")
1583/* rgw_ldap_binddn LDAP entry RGW will bind with (user match) */
1584OPTION(rgw_ldap_binddn, OPT_STR, "uid=admin,cn=users,dc=example,dc=com")
1585/* rgw_ldap_searchdn LDAP search base (basedn) */
1586OPTION(rgw_ldap_searchdn, OPT_STR, "cn=users,cn=accounts,dc=example,dc=com")
1587/* rgw_ldap_dnattr LDAP attribute containing RGW user names (to form binddns)*/
1588OPTION(rgw_ldap_dnattr, OPT_STR, "uid")
1589/* rgw_ldap_secret file containing credentials for rgw_ldap_binddn */
1590OPTION(rgw_ldap_secret, OPT_STR, "/etc/openldap/secret")
1591/* rgw_s3_auth_use_ldap use LDAP for RGW auth? */
1592OPTION(rgw_s3_auth_use_ldap, OPT_BOOL, false)
1593/* rgw_ldap_searchfilter LDAP search filter */
1594OPTION(rgw_ldap_searchfilter, OPT_STR, "")
1595
1596OPTION(rgw_admin_entry, OPT_STR, "admin") // entry point for which a url is considered an admin request
1597OPTION(rgw_enforce_swift_acls, OPT_BOOL, true)
1598OPTION(rgw_swift_token_expiration, OPT_INT, 24 * 3600) // time in seconds for swift token expiration
1599OPTION(rgw_print_continue, OPT_BOOL, true) // enable if 100-Continue works
1600OPTION(rgw_print_prohibited_content_length, OPT_BOOL, false) // violate RFC 7230 and send Content-Length in 204 and 304
1601OPTION(rgw_remote_addr_param, OPT_STR, "REMOTE_ADDR") // e.g. X-Forwarded-For, if you have a reverse proxy
1602OPTION(rgw_op_thread_timeout, OPT_INT, 10*60)
1603OPTION(rgw_op_thread_suicide_timeout, OPT_INT, 0)
1604OPTION(rgw_thread_pool_size, OPT_INT, 100)
1605OPTION(rgw_num_control_oids, OPT_INT, 8)
1606OPTION(rgw_num_rados_handles, OPT_U32, 1)
31f18b77 1607OPTION(rgw_verify_ssl, OPT_BOOL, true) // should http_client try to verify ssl when sent https request
7c673cae
FG
1608
1609/* The following are tunables for caches of RGW NFS (and other file
1610 * client) objects.
1611 *
1612 * The file handle cache is a partitioned hash table
1613 * (fhcache_partitions), each with a closed hash part and backing
1614 * b-tree mapping. The number of partions is expected to be a small
1615 * prime, the cache size something larger but less than 5K, the total
1616 * size of the cache is n_part * cache_size.
1617 */
1618OPTION(rgw_nfs_lru_lanes, OPT_INT, 5)
1619OPTION(rgw_nfs_lru_lane_hiwat, OPT_INT, 911)
1620OPTION(rgw_nfs_fhcache_partitions, OPT_INT, 3)
1621OPTION(rgw_nfs_fhcache_size, OPT_INT, 2017) /* 3*2017=6051 */
1622OPTION(rgw_nfs_namespace_expire_secs, OPT_INT, 300) /* namespace invalidate
1623 * timer */
1624OPTION(rgw_nfs_max_gc, OPT_INT, 300) /* max gc events per cycle */
1625OPTION(rgw_nfs_write_completion_interval_s, OPT_INT, 10) /* stateless (V3)
1626 * commit
1627 * delay */
1628
1629OPTION(rgw_zone, OPT_STR, "") // zone name
1630OPTION(rgw_zone_root_pool, OPT_STR, ".rgw.root") // pool where zone specific info is stored
1631OPTION(rgw_default_zone_info_oid, OPT_STR, "default.zone") // oid where default zone info is stored
1632OPTION(rgw_region, OPT_STR, "") // region name
1633OPTION(rgw_region_root_pool, OPT_STR, ".rgw.root") // pool where all region info is stored
1634OPTION(rgw_default_region_info_oid, OPT_STR, "default.region") // oid where default region info is stored
1635OPTION(rgw_zonegroup, OPT_STR, "") // zone group name
1636OPTION(rgw_zonegroup_root_pool, OPT_STR, ".rgw.root") // pool where all zone group info is stored
1637OPTION(rgw_default_zonegroup_info_oid, OPT_STR, "default.zonegroup") // oid where default zone group info is stored
1638OPTION(rgw_realm, OPT_STR, "") // realm name
1639OPTION(rgw_realm_root_pool, OPT_STR, ".rgw.root") // pool where all realm info is stored
1640OPTION(rgw_default_realm_info_oid, OPT_STR, "default.realm") // oid where default realm info is stored
1641OPTION(rgw_period_root_pool, OPT_STR, ".rgw.root") // pool where all period info is stored
1642OPTION(rgw_period_latest_epoch_info_oid, OPT_STR, ".latest_epoch") // oid where current period info is stored
1643OPTION(rgw_log_nonexistent_bucket, OPT_BOOL, false)
1644OPTION(rgw_log_object_name, OPT_STR, "%Y-%m-%d-%H-%i-%n") // man date to see codes (a subset are supported)
1645OPTION(rgw_log_object_name_utc, OPT_BOOL, false)
1646OPTION(rgw_usage_max_shards, OPT_INT, 32)
1647OPTION(rgw_usage_max_user_shards, OPT_INT, 1)
1648OPTION(rgw_enable_ops_log, OPT_BOOL, false) // enable logging every rgw operation
1649OPTION(rgw_enable_usage_log, OPT_BOOL, false) // enable logging bandwidth usage
1650OPTION(rgw_ops_log_rados, OPT_BOOL, true) // whether ops log should go to rados
1651OPTION(rgw_ops_log_socket_path, OPT_STR, "") // path to unix domain socket where ops log can go
1652OPTION(rgw_ops_log_data_backlog, OPT_INT, 5 << 20) // max data backlog for ops log
1653OPTION(rgw_fcgi_socket_backlog, OPT_INT, 1024) // socket backlog for fcgi
1654OPTION(rgw_usage_log_flush_threshold, OPT_INT, 1024) // threshold to flush pending log data
1655OPTION(rgw_usage_log_tick_interval, OPT_INT, 30) // flush pending log data every X seconds
1656OPTION(rgw_intent_log_object_name, OPT_STR, "%Y-%m-%d-%i-%n") // man date to see codes (a subset are supported)
1657OPTION(rgw_intent_log_object_name_utc, OPT_BOOL, false)
1658OPTION(rgw_init_timeout, OPT_INT, 300) // time in seconds
1659OPTION(rgw_mime_types_file, OPT_STR, "/etc/mime.types")
1660OPTION(rgw_gc_max_objs, OPT_INT, 32)
1661OPTION(rgw_gc_obj_min_wait, OPT_INT, 2 * 3600) // wait time before object may be handled by gc
1662OPTION(rgw_gc_processor_max_time, OPT_INT, 3600) // total run time for a single gc processor work
1663OPTION(rgw_gc_processor_period, OPT_INT, 3600) // gc processor cycle time
1664OPTION(rgw_s3_success_create_obj_status, OPT_INT, 0) // alternative success status response for create-obj (0 - default)
1665OPTION(rgw_resolve_cname, OPT_BOOL, false) // should rgw try to resolve hostname as a dns cname record
1666OPTION(rgw_obj_stripe_size, OPT_INT, 4 << 20)
1667OPTION(rgw_extended_http_attrs, OPT_STR, "") // list of extended attrs that can be set on objects (beyond the default)
1668OPTION(rgw_exit_timeout_secs, OPT_INT, 120) // how many seconds to wait for process to go down before exiting unconditionally
1669OPTION(rgw_get_obj_window_size, OPT_INT, 16 << 20) // window size in bytes for single get obj request
1670OPTION(rgw_get_obj_max_req_size, OPT_INT, 4 << 20) // max length of a single get obj rados op
1671OPTION(rgw_relaxed_s3_bucket_names, OPT_BOOL, false) // enable relaxed bucket name rules for US region buckets
1672OPTION(rgw_defer_to_bucket_acls, OPT_STR, "") // if the user has bucket perms, use those before key perms (recurse and full_control)
1673OPTION(rgw_list_buckets_max_chunk, OPT_INT, 1000) // max buckets to retrieve in a single op when listing user buckets
1674OPTION(rgw_md_log_max_shards, OPT_INT, 64) // max shards for metadata log
1675OPTION(rgw_num_zone_opstate_shards, OPT_INT, 128) // max shards for keeping inter-region copy progress info
1676OPTION(rgw_opstate_ratelimit_sec, OPT_INT, 30) // min time between opstate updates on a single upload (0 for disabling ratelimit)
1677OPTION(rgw_curl_wait_timeout_ms, OPT_INT, 1000) // timeout for certain curl calls
1678OPTION(rgw_copy_obj_progress, OPT_BOOL, true) // should dump progress during long copy operations?
1679OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT, 1024 * 1024) // min bytes between copy progress output
1680OPTION(rgw_obj_tombstone_cache_size, OPT_INT, 1000) // how many objects in tombstone cache, which is used in multi-zone sync to keep
1681 // track of removed objects' mtime
1682
1683OPTION(rgw_data_log_window, OPT_INT, 30) // data log entries window (in seconds)
1684OPTION(rgw_data_log_changes_size, OPT_INT, 1000) // number of in-memory entries to hold for data changes log
1685OPTION(rgw_data_log_num_shards, OPT_INT, 128) // number of objects to keep data changes log on
1686OPTION(rgw_data_log_obj_prefix, OPT_STR, "data_log") //
1687OPTION(rgw_replica_log_obj_prefix, OPT_STR, "replica_log") //
1688
1689OPTION(rgw_bucket_quota_ttl, OPT_INT, 600) // time for cached bucket stats to be cached within rgw instance
1690OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE, 0.95) // threshold from which we don't rely on cached info for quota decisions
1691OPTION(rgw_bucket_quota_cache_size, OPT_INT, 10000) // number of entries in bucket quota cache
1692OPTION(rgw_bucket_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1693OPTION(rgw_bucket_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1694
1695OPTION(rgw_expose_bucket, OPT_BOOL, false) // Return the bucket name in the 'Bucket' response header
1696
31f18b77 1697OPTION(rgw_frontends, OPT_STR, "civetweb port=7480") // rgw front ends
7c673cae
FG
1698
1699OPTION(rgw_user_quota_bucket_sync_interval, OPT_INT, 180) // time period for accumulating modified buckets before syncing stats
1700OPTION(rgw_user_quota_sync_interval, OPT_INT, 3600 * 24) // time period for accumulating modified buckets before syncing entire user stats
1701OPTION(rgw_user_quota_sync_idle_users, OPT_BOOL, false) // whether stats for idle users be fully synced
1702OPTION(rgw_user_quota_sync_wait_time, OPT_INT, 3600 * 24) // min time between two full stats sync for non-idle users
1703OPTION(rgw_user_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1704OPTION(rgw_user_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1705
1706OPTION(rgw_multipart_min_part_size, OPT_INT, 5 * 1024 * 1024) // min size for each part (except for last one) in multipart upload
1707OPTION(rgw_multipart_part_upload_limit, OPT_INT, 10000) // parts limit in multipart upload
1708
1709OPTION(rgw_max_slo_entries, OPT_INT, 1000) // default number of max entries in slo
1710
1711OPTION(rgw_olh_pending_timeout_sec, OPT_INT, 3600) // time until we retire a pending olh change
1712OPTION(rgw_user_max_buckets, OPT_INT, 1000) // global option to set max buckets count for all user
1713
1714OPTION(rgw_objexp_gc_interval, OPT_U32, 60 * 10) // maximum time between round of expired objects garbage collecting
1715OPTION(rgw_objexp_time_step, OPT_U32, 4096) // number of seconds for rounding the timestamps
1716OPTION(rgw_objexp_hints_num_shards, OPT_U32, 127) // maximum number of parts in which the hint index is stored in
1717OPTION(rgw_objexp_chunk_size, OPT_U32, 100) // maximum number of entries in a single operation when processing objexp data
1718
1719OPTION(rgw_enable_static_website, OPT_BOOL, false) // enable static website feature
1720OPTION(rgw_log_http_headers, OPT_STR, "" ) // list of HTTP headers to log when seen, ignores case (e.g., http_x_forwarded_for
1721
1722OPTION(rgw_num_async_rados_threads, OPT_INT, 32) // num of threads to use for async rados operations
1723OPTION(rgw_md_notify_interval_msec, OPT_INT, 200) // metadata changes notification interval to followers
1724OPTION(rgw_run_sync_thread, OPT_BOOL, true) // whether radosgw (not radosgw-admin) spawns the sync thread
1725OPTION(rgw_sync_lease_period, OPT_INT, 120) // time in second for lease that rgw takes on a specific log (or log shard)
1726OPTION(rgw_sync_log_trim_interval, OPT_INT, 1200) // time in seconds between attempts to trim sync logs
1727
1728OPTION(rgw_sync_data_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1729OPTION(rgw_sync_meta_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1730
1731
1732OPTION(rgw_period_push_interval, OPT_DOUBLE, 2) // seconds to wait before retrying "period push"
1733OPTION(rgw_period_push_interval_max, OPT_DOUBLE, 30) // maximum interval after exponential backoff
1734
1735OPTION(rgw_safe_max_objects_per_shard, OPT_INT, 100*1024) // safe max loading
1736OPTION(rgw_shard_warning_threshold, OPT_DOUBLE, 90) // pct of safe max
1737 // at which to warn
1738
1739OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled
1740
1741OPTION(mgr_module_path, OPT_STR, CEPH_PKGLIBDIR "/mgr") // where to load python modules from
224ce89b 1742OPTION(mgr_initial_modules, OPT_STR, "restful status") // Which modules to load
7c673cae 1743OPTION(mgr_data, OPT_STR, "/var/lib/ceph/mgr/$cluster-$id") // where to find keyring etc
31f18b77
FG
1744OPTION(mgr_tick_period, OPT_INT, 2) // How frequently to tick
1745OPTION(mgr_stats_period, OPT_INT, 5) // How frequently clients send stats
7c673cae
FG
1746OPTION(mgr_client_bytes, OPT_U64, 128*1048576) // bytes from clients
1747OPTION(mgr_client_messages, OPT_U64, 512) // messages from clients
1748OPTION(mgr_osd_bytes, OPT_U64, 512*1048576) // bytes from osds
1749OPTION(mgr_osd_messages, OPT_U64, 8192) // messages from osds
1750OPTION(mgr_mds_bytes, OPT_U64, 128*1048576) // bytes from mdss
1751OPTION(mgr_mds_messages, OPT_U64, 128) // messages from mdss
1752OPTION(mgr_mon_bytes, OPT_U64, 128*1048576) // bytes from mons
1753OPTION(mgr_mon_messages, OPT_U64, 128) // messages from mons
1754
1755OPTION(mgr_connect_retry_interval, OPT_DOUBLE, 1.0)
224ce89b 1756OPTION(mgr_service_beacon_grace, OPT_DOUBLE, 60.0)
7c673cae
FG
1757
1758OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests
1759OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover
1760OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR
224ce89b 1761OPTION(mon_mgr_mkfs_grace, OPT_INT, 60) // How long before we complain about MGR_DOWN
7c673cae
FG
1762OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl
1763OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects
1764OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms
1765 // defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg=="
1766OPTION(rgw_crypt_suppress_logs, OPT_BOOL, true) // suppress logs that might print customer key
1767OPTION(rgw_list_bucket_min_readahead, OPT_INT, 1000) // minimum number of entries to read from rados for bucket listing
1768
1769OPTION(rgw_rest_getusage_op_compat, OPT_BOOL, false) // dump description of total stats for s3 GetUsage API
1770
1771OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter
1772OPTION(throttler_perf_counter, OPT_BOOL, true) // enable/disable throttler perf counter
1773
1774/* The following are tunables for torrent data */
1775OPTION(rgw_torrent_flag, OPT_BOOL, false) // produce torrent function flag
1776OPTION(rgw_torrent_tracker, OPT_STR, "") // torrent field annouce and annouce list
1777OPTION(rgw_torrent_createby, OPT_STR, "") // torrent field created by
1778OPTION(rgw_torrent_comment, OPT_STR, "") // torrent field comment
1779OPTION(rgw_torrent_encoding, OPT_STR, "") // torrent field encoding
1780OPTION(rgw_torrent_origin, OPT_STR, "") // torrent origin
1781OPTION(rgw_torrent_sha_unit, OPT_INT, 512*1024) // torrent field piece length 512K
1782
1783OPTION(event_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1784
1785// This will be set to true when it is safe to start threads.
1786// Once it is true, it will never change.
1787OPTION(internal_safe_to_start_threads, OPT_BOOL, false)
1788
1789OPTION(debug_deliberately_leak_memory, OPT_BOOL, false)
1790
1791OPTION(rgw_swift_custom_header, OPT_STR, "") // option to enable swift custom headers
31f18b77 1792
224ce89b
WB
1793OPTION(rgw_swift_need_stats, OPT_BOOL, true) // option to enable stats on bucket listing for swift
1794
31f18b77
FG
1795/* resharding tunables */
1796OPTION(rgw_reshard_num_logs, OPT_INT, 16)
1797OPTION(rgw_reshard_bucket_lock_duration, OPT_INT, 120) // duration of lock on bucket obj during resharding
1798OPTION(rgw_dynamic_resharding, OPT_BOOL, true)
1799OPTION(rgw_max_objs_per_shard, OPT_INT, 100000)
1800OPTION(rgw_reshard_thread_interval, OPT_U32, 60 * 10) // maximum time between rounds of reshard thread processing