]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/config_opts.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / common / config_opts.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 /* note: no header guard */
16 OPTION(host, OPT_STR, "") // "" means that ceph will use short hostname
17 OPTION(fsid, OPT_UUID, uuid_d())
18 OPTION(public_addr, OPT_ADDR, entity_addr_t())
19 OPTION(cluster_addr, OPT_ADDR, entity_addr_t())
20 OPTION(public_network, OPT_STR, "")
21 OPTION(cluster_network, OPT_STR, "")
22 OPTION(num_client, OPT_INT, 1)
23 OPTION(monmap, OPT_STR, "")
24 OPTION(mon_host, OPT_STR, "")
25 OPTION(mon_dns_srv_name, OPT_STR, "ceph-mon")
26 OPTION(lockdep, OPT_BOOL, false)
27 OPTION(lockdep_force_backtrace, OPT_BOOL, false) // always gather current backtrace at every lock
28 OPTION(run_dir, OPT_STR, "/var/run/ceph") // the "/var/run/ceph" dir, created on daemon startup
29 OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit()
30 OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755"
31 OPTION(crushtool, OPT_STR, "crushtool") // crushtool utility path
32
33 OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit()
34 OPTION(setuser, OPT_STR, "") // uid or user name
35 OPTION(setgroup, OPT_STR, "") // gid or group name
36 OPTION(setuser_match_path, OPT_STR, "") // make setuser/group conditional on this path matching ownership
37 OPTION(pid_file, OPT_STR, "") // default changed by common_preinit()
38 OPTION(chdir, OPT_STR, "/")
39 OPTION(max_open_files, OPT_LONGLONG, 0)
40 OPTION(restapi_log_level, OPT_STR, "") // default set by Python code
41 OPTION(restapi_base_url, OPT_STR, "") // "
42 OPTION(fatal_signal_handlers, OPT_BOOL, true)
43 SAFE_OPTION(erasure_code_dir, OPT_STR, CEPH_PKGLIBDIR"/erasure-code") // default location for erasure-code plugins
44
45 OPTION(log_file, OPT_STR, "/var/log/ceph/$cluster-$name.log") // default changed by common_preinit()
46 OPTION(log_max_new, OPT_INT, 1000) // default changed by common_preinit()
47 OPTION(log_max_recent, OPT_INT, 10000) // default changed by common_preinit()
48 OPTION(log_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
49 OPTION(err_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
50 OPTION(log_to_syslog, OPT_BOOL, false)
51 OPTION(err_to_syslog, OPT_BOOL, false)
52 OPTION(log_flush_on_exit, OPT_BOOL, true) // default changed by common_preinit()
53 OPTION(log_stop_at_utilization, OPT_FLOAT, .97) // stop logging at (near) full
54 OPTION(log_to_graylog, OPT_BOOL, false)
55 OPTION(err_to_graylog, OPT_BOOL, false)
56 OPTION(log_graylog_host, OPT_STR, "127.0.0.1")
57 OPTION(log_graylog_port, OPT_INT, 12201)
58
59 // options will take k/v pairs, or single-item that will be assumed as general
60 // default for all, regardless of channel.
61 // e.g., "info" would be taken as the same as "default=info"
62 // also, "default=daemon audit=local0" would mean
63 // "default all to 'daemon', override 'audit' with 'local0'
64 OPTION(clog_to_monitors, OPT_STR, "default=true")
65 OPTION(clog_to_syslog, OPT_STR, "false")
66 OPTION(clog_to_syslog_level, OPT_STR, "info") // this level and above
67 OPTION(clog_to_syslog_facility, OPT_STR, "default=daemon audit=local0")
68 OPTION(clog_to_graylog, OPT_STR, "false")
69 OPTION(clog_to_graylog_host, OPT_STR, "127.0.0.1")
70 OPTION(clog_to_graylog_port, OPT_STR, "12201")
71
72 OPTION(mon_cluster_log_to_syslog, OPT_STR, "default=false")
73 OPTION(mon_cluster_log_to_syslog_level, OPT_STR, "info") // this level and above
74 OPTION(mon_cluster_log_to_syslog_facility, OPT_STR, "daemon")
75 OPTION(mon_cluster_log_file, OPT_STR,
76 "default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log")
77 OPTION(mon_cluster_log_file_level, OPT_STR, "info")
78 OPTION(mon_cluster_log_to_graylog, OPT_STR, "false")
79 OPTION(mon_cluster_log_to_graylog_host, OPT_STR, "127.0.0.1")
80 OPTION(mon_cluster_log_to_graylog_port, OPT_STR, "12201")
81
82 OPTION(enable_experimental_unrecoverable_data_corrupting_features, OPT_STR, "")
83
84 SAFE_OPTION(plugin_dir, OPT_STR, CEPH_PKGLIBDIR)
85
86 OPTION(xio_trace_mempool, OPT_BOOL, false) // mempool allocation counters
87 OPTION(xio_trace_msgcnt, OPT_BOOL, false) // incoming/outgoing msg counters
88 OPTION(xio_trace_xcon, OPT_BOOL, false) // Xio message encode/decode trace
89 OPTION(xio_queue_depth, OPT_INT, 128) // depth of Accelio msg queue
90 OPTION(xio_mp_min, OPT_INT, 128) // default min mempool size
91 OPTION(xio_mp_max_64, OPT_INT, 65536) // max 64-byte chunks (buffer is 40)
92 OPTION(xio_mp_max_256, OPT_INT, 8192) // max 256-byte chunks
93 OPTION(xio_mp_max_1k, OPT_INT, 8192) // max 1K chunks
94 OPTION(xio_mp_max_page, OPT_INT, 4096) // max 1K chunks
95 OPTION(xio_mp_max_hint, OPT_INT, 4096) // max size-hint chunks
96 OPTION(xio_portal_threads, OPT_INT, 2) // xio portal threads per messenger
97 OPTION(xio_max_conns_per_portal, OPT_INT, 32) // max xio_connections per portal/ctx
98 OPTION(xio_transport_type, OPT_STR, "rdma") // xio transport type: {rdma or tcp}
99 OPTION(xio_max_send_inline, OPT_INT, 512) // xio maximum threshold to send inline
100
101 OPTION(compressor_zlib_isal, OPT_BOOL, false)
102 OPTION(compressor_zlib_level, OPT_INT, 5) //regular zlib compression level, not applicable to isa-l optimized version
103
104 OPTION(async_compressor_enabled, OPT_BOOL, false)
105 OPTION(async_compressor_type, OPT_STR, "snappy")
106 OPTION(async_compressor_threads, OPT_INT, 2)
107 OPTION(async_compressor_thread_timeout, OPT_INT, 5)
108 OPTION(async_compressor_thread_suicide_timeout, OPT_INT, 30)
109
110 OPTION(plugin_crypto_accelerator, OPT_STR, "crypto_isal")
111
112 OPTION(mempool_debug, OPT_BOOL, false)
113
114 DEFAULT_SUBSYS(0, 5)
115 SUBSYS(lockdep, 0, 1)
116 SUBSYS(context, 0, 1)
117 SUBSYS(crush, 1, 1)
118 SUBSYS(mds, 1, 5)
119 SUBSYS(mds_balancer, 1, 5)
120 SUBSYS(mds_locker, 1, 5)
121 SUBSYS(mds_log, 1, 5)
122 SUBSYS(mds_log_expire, 1, 5)
123 SUBSYS(mds_migrator, 1, 5)
124 SUBSYS(buffer, 0, 1)
125 SUBSYS(timer, 0, 1)
126 SUBSYS(filer, 0, 1)
127 SUBSYS(striper, 0, 1)
128 SUBSYS(objecter, 0, 1)
129 SUBSYS(rados, 0, 5)
130 SUBSYS(rbd, 0, 5)
131 SUBSYS(rbd_mirror, 0, 5)
132 SUBSYS(rbd_replay, 0, 5)
133 SUBSYS(journaler, 0, 5)
134 SUBSYS(objectcacher, 0, 5)
135 SUBSYS(client, 0, 5)
136 SUBSYS(osd, 1, 5)
137 SUBSYS(optracker, 0, 5)
138 SUBSYS(objclass, 0, 5)
139 SUBSYS(filestore, 1, 3)
140 SUBSYS(journal, 1, 3)
141 SUBSYS(ms, 0, 5)
142 SUBSYS(mon, 1, 5)
143 SUBSYS(monc, 0, 10)
144 SUBSYS(paxos, 1, 5)
145 SUBSYS(tp, 0, 5)
146 SUBSYS(auth, 1, 5)
147 SUBSYS(crypto, 1, 5)
148 SUBSYS(finisher, 1, 1)
149 SUBSYS(heartbeatmap, 1, 5)
150 SUBSYS(perfcounter, 1, 5)
151 SUBSYS(rgw, 1, 5) // log level for the Rados gateway
152 SUBSYS(civetweb, 1, 10)
153 SUBSYS(javaclient, 1, 5)
154 SUBSYS(asok, 1, 5)
155 SUBSYS(throttle, 1, 1)
156 SUBSYS(refs, 0, 0)
157 SUBSYS(xio, 1, 5)
158 SUBSYS(compressor, 1, 5)
159 SUBSYS(bluestore, 1, 5)
160 SUBSYS(bluefs, 1, 5)
161 SUBSYS(bdev, 1, 3)
162 SUBSYS(kstore, 1, 5)
163 SUBSYS(rocksdb, 4, 5)
164 SUBSYS(leveldb, 4, 5)
165 SUBSYS(memdb, 4, 5)
166 SUBSYS(kinetic, 1, 5)
167 SUBSYS(fuse, 1, 5)
168 SUBSYS(mgr, 1, 5)
169 SUBSYS(mgrc, 1, 5)
170 SUBSYS(dpdk, 1, 5)
171 SUBSYS(eventtrace, 1, 5)
172
173 OPTION(key, OPT_STR, "")
174 OPTION(keyfile, OPT_STR, "")
175 OPTION(keyring, OPT_STR,
176 // default changed by common_preinit() for mds and osd
177 "/etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,"
178 #if defined(__FreeBSD)
179 "/usr/local/etc/ceph/$cluster.$name.keyring,/usr/local/etc/ceph/$cluster.keyring,"
180 "/usr/local/etc/ceph/keyring,/usr/local/etc/ceph/keyring.bin,"
181 #endif
182 )
183 OPTION(heartbeat_interval, OPT_INT, 5)
184 OPTION(heartbeat_file, OPT_STR, "")
185 OPTION(heartbeat_inject_failure, OPT_INT, 0) // force an unhealthy heartbeat for N seconds
186 OPTION(perf, OPT_BOOL, true) // enable internal perf counters
187
188 SAFE_OPTION(ms_type, OPT_STR, "async+posix") // messenger backend. It will be modified in runtime, so use SAFE_OPTION
189 OPTION(ms_public_type, OPT_STR, "") // messenger backend
190 OPTION(ms_cluster_type, OPT_STR, "") // messenger backend
191 OPTION(ms_tcp_nodelay, OPT_BOOL, true)
192 OPTION(ms_tcp_rcvbuf, OPT_INT, 0)
193 OPTION(ms_tcp_prefetch_max_size, OPT_INT, 4096) // max prefetch size, we limit this to avoid extra memcpy
194 OPTION(ms_initial_backoff, OPT_DOUBLE, .2)
195 OPTION(ms_max_backoff, OPT_DOUBLE, 15.0)
196 OPTION(ms_crc_data, OPT_BOOL, true)
197 OPTION(ms_crc_header, OPT_BOOL, true)
198 OPTION(ms_die_on_bad_msg, OPT_BOOL, false)
199 OPTION(ms_die_on_unhandled_msg, OPT_BOOL, false)
200 OPTION(ms_die_on_old_message, OPT_BOOL, false) // assert if we get a dup incoming message and shouldn't have (may be triggered by pre-541cd3c64be0dfa04e8a2df39422e0eb9541a428 code)
201 OPTION(ms_die_on_skipped_message, OPT_BOOL, false) // assert if we skip a seq (kernel client does this intentionally)
202 OPTION(ms_dispatch_throttle_bytes, OPT_U64, 100 << 20)
203 OPTION(ms_bind_ipv6, OPT_BOOL, false)
204 OPTION(ms_bind_port_min, OPT_INT, 6800)
205 OPTION(ms_bind_port_max, OPT_INT, 7300)
206 #if !defined(__FreeBSD__)
207 OPTION(ms_bind_retry_count, OPT_INT, 3) // If binding fails, how many times do we retry to bind
208 OPTION(ms_bind_retry_delay, OPT_INT, 5) // Delay between attemps to bind
209 #else
210 // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
211 OPTION(ms_bind_retry_count, OPT_INT, 6) // If binding fails, how many times do we retry to bind
212 OPTION(ms_bind_retry_delay, OPT_INT, 6) // Delay between attemps to bind
213 #endif
214 OPTION(ms_bind_before_connect, OPT_BOOL, true)
215 OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10)
216 OPTION(ms_tcp_read_timeout, OPT_U64, 900)
217 OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216)
218 OPTION(ms_pq_min_cost, OPT_U64, 65536)
219 OPTION(ms_inject_socket_failures, OPT_U64, 0)
220 SAFE_OPTION(ms_inject_delay_type, OPT_STR, "") // "osd mds mon client" allowed
221 OPTION(ms_inject_delay_msg_type, OPT_STR, "") // the type of message to delay, as returned by Message::get_type_name(). This is an additional restriction on the general type filter ms_inject_delay_type.
222 OPTION(ms_inject_delay_max, OPT_DOUBLE, 1) // seconds
223 OPTION(ms_inject_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
224 OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0) // seconds
225 OPTION(ms_dump_on_send, OPT_BOOL, false) // hexdump msg to log on send
226 OPTION(ms_dump_corrupt_message_level, OPT_INT, 1) // debug level to hexdump undecodeable messages at
227 OPTION(ms_async_op_threads, OPT_U64, 3) // number of worker processing threads for async messenger created on init
228 OPTION(ms_async_max_op_threads, OPT_U64, 5) // max number of worker processing threads for async messenger
229 OPTION(ms_async_set_affinity, OPT_BOOL, true)
230 // example: ms_async_affinity_cores = 0,1
231 // The number of coreset is expected to equal to ms_async_op_threads, otherwise
232 // extra op threads will loop ms_async_affinity_cores again.
233 // If ms_async_affinity_cores is empty, all threads will be bind to current running
234 // core
235 OPTION(ms_async_affinity_cores, OPT_STR, "")
236 OPTION(ms_async_send_inline, OPT_BOOL, false)
237 OPTION(ms_async_rdma_device_name, OPT_STR, "")
238 OPTION(ms_async_rdma_enable_hugepage, OPT_BOOL, false)
239 OPTION(ms_async_rdma_buffer_size, OPT_INT, 128 << 10)
240 OPTION(ms_async_rdma_send_buffers, OPT_U32, 1024)
241 OPTION(ms_async_rdma_receive_buffers, OPT_U32, 1024)
242 OPTION(ms_async_rdma_port_num, OPT_U32, 1)
243 OPTION(ms_async_rdma_polling_us, OPT_U32, 1000)
244 OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding
245 OPTION(ms_async_rdma_roce_ver, OPT_INT, 1) // 0=RoCEv1, 1=RoCEv2, 2=RoCEv1.5
246 OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP
247
248 OPTION(ms_dpdk_port_id, OPT_INT, 0)
249 SAFE_OPTION(ms_dpdk_coremask, OPT_STR, "1") // it is modified in unittest so that use SAFE_OPTION to declare
250 OPTION(ms_dpdk_memory_channel, OPT_STR, "4")
251 OPTION(ms_dpdk_hugepages, OPT_STR, "")
252 OPTION(ms_dpdk_pmd, OPT_STR, "")
253 SAFE_OPTION(ms_dpdk_host_ipv4_addr, OPT_STR, "")
254 SAFE_OPTION(ms_dpdk_gateway_ipv4_addr, OPT_STR, "")
255 SAFE_OPTION(ms_dpdk_netmask_ipv4_addr, OPT_STR, "")
256 OPTION(ms_dpdk_lro, OPT_BOOL, true)
257 OPTION(ms_dpdk_hw_flow_control, OPT_BOOL, true)
258 // Weighing of a hardware network queue relative to a software queue (0=no work, 1= equal share)")
259 OPTION(ms_dpdk_hw_queue_weight, OPT_FLOAT, 1)
260 OPTION(ms_dpdk_debug_allow_loopback, OPT_BOOL, false)
261 OPTION(ms_dpdk_rx_buffer_count_per_core, OPT_INT, 8192)
262
263 OPTION(inject_early_sigterm, OPT_BOOL, false)
264
265 OPTION(mon_data, OPT_STR, "/var/lib/ceph/mon/$cluster-$id")
266 OPTION(mon_initial_members, OPT_STR, "") // list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster
267 OPTION(mon_sync_fs_threshold, OPT_INT, 5) // sync() when writing this many objects; 0 to disable.
268 OPTION(mon_compact_on_start, OPT_BOOL, false) // compact leveldb on ceph-mon start
269 OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction on bootstrap
270 OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states
271 OPTION(mon_osd_cache_size, OPT_INT, 10) // the size of osdmaps cache, not to rely on underlying store's cache
272
273 OPTION(mon_cpu_threads, OPT_INT, 4)
274 OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT, 4096)
275 OPTION(mon_tick_interval, OPT_INT, 5)
276 OPTION(mon_session_timeout, OPT_INT, 300) // must send keepalive or subscribe
277 OPTION(mon_subscribe_interval, OPT_DOUBLE, 24*3600) // for legacy clients only
278 OPTION(mon_delta_reset_interval, OPT_DOUBLE, 10) // seconds of inactivity before we reset the pg delta to 0
279 OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay
280 OPTION(mon_osd_laggy_weight, OPT_DOUBLE, .3) // weight for new 'samples's in laggy estimations
281 OPTION(mon_osd_laggy_max_interval, OPT_INT, 300) // maximum value of laggy_interval in laggy estimations
282 OPTION(mon_osd_adjust_heartbeat_grace, OPT_BOOL, true) // true if we should scale based on laggy estimations
283 OPTION(mon_osd_adjust_down_out_interval, OPT_BOOL, true) // true if we should scale based on laggy estimations
284 OPTION(mon_osd_auto_mark_in, OPT_BOOL, false) // mark any booting osds 'in'
285 OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in'
286 OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true) // mark booting new osds 'in'
287 OPTION(mon_osd_down_out_interval, OPT_INT, 600) // seconds
288 OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // smallest crush unit/type that we will not automatically mark out
289 OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down
290 OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .75) // min osds required to be in to mark things out
291 OPTION(mon_osd_max_op_age, OPT_DOUBLE, 32) // max op age before we get concerned (make it a power of 2)
292 OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create
293 OPTION(mon_osd_allow_primary_temp, OPT_BOOL, false) // allow primary_temp to be set in the osdmap
294 OPTION(mon_osd_allow_primary_affinity, OPT_BOOL, false) // allow primary_affinity to be set in the osdmap
295 OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes
296 OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming
297 OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT, .25) // max estimate of pg total before we do all pgs in parallel
298 OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL, false) // whether turn on fast read on the pool or not
299 OPTION(mon_stat_smooth_intervals, OPT_INT, 6) // smooth stats over last N PGMap maps
300 OPTION(mon_election_timeout, OPT_FLOAT, 5) // on election proposer, max waiting time for all ACKs
301 OPTION(mon_lease, OPT_FLOAT, 5) // lease interval
302 OPTION(mon_lease_renew_interval_factor, OPT_FLOAT, .6) // on leader, to renew the lease
303 OPTION(mon_lease_ack_timeout_factor, OPT_FLOAT, 2.0) // on leader, if lease isn't acked by all peons
304 OPTION(mon_accept_timeout_factor, OPT_FLOAT, 2.0) // on leader, if paxos update isn't accepted
305
306 OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between monitors
307 OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings
308 OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
309 OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
310 OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
311 OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
312 OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin
313 OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin
314 OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
315 OPTION(mon_pg_warn_min_objects, OPT_INT, 10000) // do not warn below this object #
316 OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools below this object #
317 OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT, .5) // threshold of down osds after which we check all pgs
318 OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT, .66) // position between pool cache_target_full and max where we start warning
319 OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
320 OPTION(mon_osd_backfillfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD backfill full (backfill halted)
321 OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
322 OPTION(mon_osd_initial_require_min_compat_client, OPT_STR, "hammer")
323 OPTION(mon_allow_pool_delete, OPT_BOOL, false) // allow pool deletion
324 OPTION(mon_fake_pool_delete, OPT_BOOL, false) // fake pool deletion (add _DELETED suffix)
325 OPTION(mon_globalid_prealloc, OPT_U32, 10000) // how many globalids to prealloc
326 OPTION(mon_osd_report_timeout, OPT_INT, 900) // grace period before declaring unresponsive OSDs dead
327 OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-replay mds to be active
328 OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL, true) // warn if crush tunables are too old (older than mon_min_crush_required_version)
329 OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
330 OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
331 OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
332 OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
333 OPTION(mon_warn_osd_usage_min_max_delta, OPT_FLOAT, .40) // warn if difference between min and max OSD utilizations exceeds specified amount
334 OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
335 OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
336 OPTION(mon_max_log_epochs, OPT_INT, 500)
337 OPTION(mon_max_mdsmap_epochs, OPT_INT, 500)
338 OPTION(mon_max_osd, OPT_INT, 10000)
339 OPTION(mon_probe_timeout, OPT_DOUBLE, 2.0)
340 OPTION(mon_slurp_timeout, OPT_DOUBLE, 10.0)
341 OPTION(mon_slurp_bytes, OPT_INT, 256*1024) // limit size of slurp messages
342 OPTION(mon_client_bytes, OPT_U64, 100ul << 20) // client msg data allowed in memory (in bytes)
343 OPTION(mon_mgr_proxy_client_bytes_ratio, OPT_FLOAT, .3) // ratio of mon_client_bytes that can be consumed by proxied mgr commands before we error out to client
344 OPTION(mon_daemon_bytes, OPT_U64, 400ul << 20) // mds, osd message memory cap (in bytes)
345 OPTION(mon_max_log_entries_per_event, OPT_INT, 4096)
346 OPTION(mon_reweight_min_pgs_per_osd, OPT_U64, 10) // min pgs per osd for reweight-by-pg command
347 OPTION(mon_reweight_min_bytes_per_osd, OPT_U64, 100*1024*1024) // min bytes per osd for reweight-by-utilization command
348 OPTION(mon_reweight_max_osds, OPT_INT, 4) // max osds to change per reweight-by-* command
349 OPTION(mon_reweight_max_change, OPT_DOUBLE, 0.05)
350 OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0)
351 OPTION(mon_health_to_clog, OPT_BOOL, true)
352 OPTION(mon_health_to_clog_interval, OPT_INT, 3600)
353 OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
354 OPTION(mon_data_avail_crit, OPT_INT, 5)
355 OPTION(mon_data_avail_warn, OPT_INT, 30)
356 OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
357 OPTION(mon_warn_not_scrubbed, OPT_INT, 0)
358 OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0)
359 OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day
360 OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not.
361 OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time
362 OPTION(mon_scrub_inject_crc_mismatch, OPT_DOUBLE, 0.0) // probability of injected crc mismatch [0.0, 1.0]
363 OPTION(mon_scrub_inject_missing_keys, OPT_DOUBLE, 0.0) // probability of injected missing keys [0.0, 1.0]
364 OPTION(mon_config_key_max_entry_size, OPT_INT, 4096) // max num bytes per config-key entry
365 OPTION(mon_sync_timeout, OPT_DOUBLE, 60.0)
366 OPTION(mon_sync_max_payload_size, OPT_U32, 1048576) // max size for a sync chunk payload (say, 1MB)
367 OPTION(mon_sync_debug, OPT_BOOL, false) // enable sync-specific debug
368 OPTION(mon_sync_debug_leader, OPT_INT, -1) // monitor to be used as the sync leader
369 OPTION(mon_sync_debug_provider, OPT_INT, -1) // monitor to be used as the sync provider
370 OPTION(mon_sync_debug_provider_fallback, OPT_INT, -1) // monitor to be used as fallback if sync provider fails
371 OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE, 0) // inject N second delay on each get_chunk request
372 OPTION(mon_osd_min_down_reporters, OPT_INT, 2) // number of OSDs from different subtrees who need to report a down OSD for it to count
373 OPTION(mon_osd_reporter_subtree_level , OPT_STR, "host") // in which level of parent bucket the reporters are counted
374 OPTION(mon_osd_force_trim_to, OPT_INT, 0) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous, use with care)
375 OPTION(mon_mds_force_trim_to, OPT_INT, 0) // force mon to trim mdsmaps to this point (dangerous, use with care)
376 OPTION(mon_mds_skip_sanity, OPT_BOOL, false) // skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
377
378 // monitor debug options
379 OPTION(mon_debug_deprecated_as_obsolete, OPT_BOOL, false) // consider deprecated commands as obsolete
380
381 // dump transactions
382 OPTION(mon_debug_dump_transactions, OPT_BOOL, false)
383 OPTION(mon_debug_dump_json, OPT_BOOL, false)
384 OPTION(mon_debug_dump_location, OPT_STR, "/var/log/ceph/$cluster-$name.tdump")
385 OPTION(mon_debug_no_require_luminous, OPT_BOOL, false)
386 OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE, 10.0) // seconds
387 OPTION(mon_inject_transaction_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
388
389 OPTION(mon_sync_provider_kill_at, OPT_INT, 0) // kill the sync provider at a specific point in the work flow
390 OPTION(mon_sync_requester_kill_at, OPT_INT, 0) // kill the sync requester at a specific point in the work flow
391 OPTION(mon_force_quorum_join, OPT_BOOL, false) // force monitor to join quorum even if it has been previously removed from the map
392 OPTION(mon_keyvaluedb, OPT_STR, "rocksdb") // type of keyvaluedb backend
393
394 // UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps
395 OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false)
396
397 OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state
398 OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
399 OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long before proposing a map update
400 OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity
401 OPTION(paxos_min, OPT_INT, 500) // minimum number of paxos states to keep around
402 OPTION(paxos_trim_min, OPT_INT, 250) // number of extra proposals tolerated before trimming
403 OPTION(paxos_trim_max, OPT_INT, 500) // max number of extra proposals to trim at a time
404 OPTION(paxos_service_trim_min, OPT_INT, 250) // minimum amount of versions to trigger a trim (0 disables it)
405 OPTION(paxos_service_trim_max, OPT_INT, 500) // maximum amount of versions to trim during a single proposal (0 disables it)
406 OPTION(paxos_kill_at, OPT_INT, 0)
407 OPTION(auth_cluster_required, OPT_STR, "cephx") // required of mon, mds, osd daemons
408 OPTION(auth_service_required, OPT_STR, "cephx") // required by daemons of clients
409 OPTION(auth_client_required, OPT_STR, "cephx, none") // what clients require of daemons
410 OPTION(auth_supported, OPT_STR, "") // deprecated; default value for above if they are not defined.
411 OPTION(max_rotating_auth_attempts, OPT_INT, 10)
412 OPTION(cephx_require_signatures, OPT_BOOL, false) // If true, don't talk to Cephx partners if they don't support message signing; off by default
413 OPTION(cephx_cluster_require_signatures, OPT_BOOL, false)
414 OPTION(cephx_service_require_signatures, OPT_BOOL, false)
415 OPTION(cephx_sign_messages, OPT_BOOL, true) // Default to signing session messages if supported
416 OPTION(auth_mon_ticket_ttl, OPT_DOUBLE, 60*60*12)
417 OPTION(auth_service_ticket_ttl, OPT_DOUBLE, 60*60)
418 OPTION(auth_debug, OPT_BOOL, false) // if true, assert when weird things happen
419 OPTION(mon_client_hunt_parallel, OPT_U32, 2) // how many mons to try to connect to in parallel during hunt
420 OPTION(mon_client_hunt_interval, OPT_DOUBLE, 3.0) // try new mon every N seconds until we connect
421 OPTION(mon_client_ping_interval, OPT_DOUBLE, 10.0) // ping every N seconds
422 OPTION(mon_client_ping_timeout, OPT_DOUBLE, 30.0) // fail if we don't hear back
423 OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE, 2.0) // each time we reconnect to a monitor, double our timeout
424 OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE, 10.0) // up to a max of 10*default (30 seconds)
425 OPTION(mon_client_max_log_entries_per_message, OPT_INT, 1000)
426 OPTION(mon_max_pool_pg_num, OPT_INT, 65536)
427 OPTION(mon_pool_quota_warn_threshold, OPT_INT, 0) // percent of quota at which to issue warnings
428 OPTION(mon_pool_quota_crit_threshold, OPT_INT, 0) // percent of quota at which to issue errors
429 OPTION(client_cache_size, OPT_INT, 16384)
430 OPTION(client_cache_mid, OPT_FLOAT, .75)
431 OPTION(client_use_random_mds, OPT_BOOL, false)
432 OPTION(client_mount_timeout, OPT_DOUBLE, 300.0)
433 OPTION(client_tick_interval, OPT_DOUBLE, 1.0)
434 OPTION(client_trace, OPT_STR, "")
435 OPTION(client_readahead_min, OPT_LONGLONG, 128*1024) // readahead at _least_ this much.
436 OPTION(client_readahead_max_bytes, OPT_LONGLONG, 0) // default unlimited
437 OPTION(client_readahead_max_periods, OPT_LONGLONG, 4) // as multiple of file layout period (object size * num stripes)
438 OPTION(client_reconnect_stale, OPT_BOOL, false) // automatically reconnect stale session
439 OPTION(client_snapdir, OPT_STR, ".snap")
440 OPTION(client_mountpoint, OPT_STR, "/")
441 OPTION(client_mount_uid, OPT_INT, -1)
442 OPTION(client_mount_gid, OPT_INT, -1)
443 OPTION(client_notify_timeout, OPT_INT, 10) // in seconds
444 OPTION(osd_client_watch_timeout, OPT_INT, 30) // in seconds
445 OPTION(client_caps_release_delay, OPT_INT, 5) // in seconds
446 OPTION(client_quota_df, OPT_BOOL, true) // use quota for df on subdir mounts
447 OPTION(client_oc, OPT_BOOL, true)
448 OPTION(client_oc_size, OPT_INT, 1024*1024* 200) // MB * n
449 OPTION(client_oc_max_dirty, OPT_INT, 1024*1024* 100) // MB * n (dirty OR tx.. bigish)
450 OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this smallish)
451 OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback
452 OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache
453 OPTION(client_debug_getattr_caps, OPT_BOOL, false) // check if MDS reply contains wanted caps
454 OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds)
455 OPTION(client_debug_inject_tick_delay, OPT_INT, 0) // delay the client tick for a number of seconds
456 OPTION(client_max_inline_size, OPT_U64, 4096)
457 OPTION(client_inject_release_failure, OPT_BOOL, false) // synthetic client bug for testing
458 OPTION(client_inject_fixed_oldest_tid, OPT_BOOL, false) // synthetic client bug for testing
459 OPTION(client_metadata, OPT_STR, "")
460 OPTION(client_acl_type, OPT_STR, "")
461 OPTION(client_permissions, OPT_BOOL, true)
462 OPTION(client_dirsize_rbytes, OPT_BOOL, true)
463
464 // note: the max amount of "in flight" dirty data is roughly (max - target)
465 OPTION(fuse_use_invalidate_cb, OPT_BOOL, true) // use fuse 2.8+ invalidate callback to keep page cache consistent
466 OPTION(fuse_disable_pagecache, OPT_BOOL, false)
467 OPTION(fuse_allow_other, OPT_BOOL, true)
468 OPTION(fuse_default_permissions, OPT_BOOL, false)
469 OPTION(fuse_big_writes, OPT_BOOL, true)
470 OPTION(fuse_atomic_o_trunc, OPT_BOOL, true)
471 OPTION(fuse_debug, OPT_BOOL, false)
472 OPTION(fuse_multithreaded, OPT_BOOL, true)
473 OPTION(fuse_require_active_mds, OPT_BOOL, true) // if ceph_fuse requires active mds server
474 OPTION(fuse_syncfs_on_mksnap, OPT_BOOL, true)
475 OPTION(fuse_set_user_groups, OPT_BOOL, false) // if ceph_fuse fills in group lists or not
476
477 OPTION(client_try_dentry_invalidate, OPT_BOOL, true) // the client should try to use dentry invaldation instead of remounting, on kernels it believes that will work for
478 OPTION(client_die_on_failed_remount, OPT_BOOL, true)
479 OPTION(client_check_pool_perm, OPT_BOOL, true)
480 OPTION(client_use_faked_inos, OPT_BOOL, false)
481 OPTION(client_mds_namespace, OPT_STR, "")
482
483 OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location
484 OPTION(crush_location_hook, OPT_STR, "")
485 OPTION(crush_location_hook_timeout, OPT_INT, 10)
486
487 OPTION(objecter_tick_interval, OPT_DOUBLE, 5.0)
488 OPTION(objecter_timeout, OPT_DOUBLE, 10.0) // before we ask for a map
489 OPTION(objecter_inflight_op_bytes, OPT_U64, 1024*1024*100) // max in-flight data (both directions)
490 OPTION(objecter_inflight_ops, OPT_U64, 1024) // max in-flight ios
491 OPTION(objecter_completion_locks_per_session, OPT_U64, 32) // num of completion locks per each session, for serializing same object responses
492 OPTION(objecter_inject_no_watch_ping, OPT_BOOL, false) // suppress watch pings
493 OPTION(objecter_retry_writes_after_first_reply, OPT_BOOL, false) // ignore the first reply for each write, and resend the osd op instead
494 OPTION(objecter_debug_inject_relock_delay, OPT_BOOL, false)
495
496 // Max number of deletes at once in a single Filer::purge call
497 OPTION(filer_max_purge_ops, OPT_U32, 10)
498 // Max number of truncate at once in a single Filer::truncate call
499 OPTION(filer_max_truncate_ops, OPT_U32, 128)
500
501 OPTION(journaler_write_head_interval, OPT_INT, 15)
502 OPTION(journaler_prefetch_periods, OPT_INT, 10) // * journal object size
503 OPTION(journaler_prezero_periods, OPT_INT, 5) // * journal object size
504 OPTION(mds_data, OPT_STR, "/var/lib/ceph/mds/$cluster-$id")
505 OPTION(mds_max_file_size, OPT_U64, 1ULL << 40) // Used when creating new CephFS. Change with 'ceph mds set max_file_size <size>' afterwards
506 // max xattr kv pairs size for each dir/file
507 OPTION(mds_max_xattr_pairs_size, OPT_U32, 64 << 10)
508 OPTION(mds_cache_size, OPT_INT, 100000)
509 OPTION(mds_cache_mid, OPT_FLOAT, .7)
510 OPTION(mds_max_file_recover, OPT_U32, 32)
511 OPTION(mds_dir_max_commit_size, OPT_INT, 10) // MB
512 OPTION(mds_dir_keys_per_op, OPT_INT, 16384)
513 OPTION(mds_decay_halflife, OPT_FLOAT, 5)
514 OPTION(mds_beacon_interval, OPT_FLOAT, 4)
515 OPTION(mds_beacon_grace, OPT_FLOAT, 15)
516 OPTION(mds_enforce_unique_name, OPT_BOOL, true)
517 OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes
518 OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle
519 OPTION(mds_sessionmap_keys_per_op, OPT_U32, 1024) // how many sessions should I try to load/store in a single OMAP operation?
520 OPTION(mds_revoke_cap_timeout, OPT_FLOAT, 60) // detect clients which aren't revoking caps
521 OPTION(mds_recall_state_timeout, OPT_FLOAT, 60) // detect clients which aren't trimming caps
522 OPTION(mds_freeze_tree_timeout, OPT_FLOAT, 30) // detecting freeze tree deadlock
523 OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session
524 OPTION(mds_health_summarize_threshold, OPT_INT, 10) // collapse N-client health metrics to a single 'many'
525 OPTION(mds_health_cache_threshold, OPT_FLOAT, 1.5) // warn on cache size if it exceeds mds_cache_size by this factor
526 OPTION(mds_reconnect_timeout, OPT_FLOAT, 45) // seconds to wait for clients during mds restart
527 // make it (mds_session_timeout - mds_beacon_grace)
528 OPTION(mds_tick_interval, OPT_FLOAT, 5)
529 OPTION(mds_dirstat_min_interval, OPT_FLOAT, 1) // try to avoid propagating more often than this
530 OPTION(mds_scatter_nudge_interval, OPT_FLOAT, 5) // how quickly dirstat changes propagate up the hierarchy
531 OPTION(mds_client_prealloc_inos, OPT_INT, 1000)
532 OPTION(mds_early_reply, OPT_BOOL, true)
533 OPTION(mds_default_dir_hash, OPT_INT, CEPH_STR_HASH_RJENKINS)
534 OPTION(mds_log_pause, OPT_BOOL, false)
535 OPTION(mds_log_skip_corrupt_events, OPT_BOOL, false)
536 OPTION(mds_log_max_events, OPT_INT, -1)
537 OPTION(mds_log_events_per_segment, OPT_INT, 1024)
538 OPTION(mds_log_segment_size, OPT_INT, 0) // segment size for mds log, default to default file_layout_t
539 OPTION(mds_log_max_segments, OPT_U32, 30)
540 OPTION(mds_log_max_expiring, OPT_INT, 20)
541 OPTION(mds_bal_export_pin, OPT_BOOL, true) // allow clients to pin directory trees to ranks
542 OPTION(mds_bal_sample_interval, OPT_DOUBLE, 3.0) // every 3 seconds
543 OPTION(mds_bal_replicate_threshold, OPT_FLOAT, 8000)
544 OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT, 0)
545 OPTION(mds_bal_frag, OPT_BOOL, true)
546 OPTION(mds_bal_split_size, OPT_INT, 10000)
547 OPTION(mds_bal_split_rd, OPT_FLOAT, 25000)
548 OPTION(mds_bal_split_wr, OPT_FLOAT, 10000)
549 OPTION(mds_bal_split_bits, OPT_INT, 3)
550 OPTION(mds_bal_merge_size, OPT_INT, 50)
551 OPTION(mds_bal_interval, OPT_INT, 10) // seconds
552 OPTION(mds_bal_fragment_interval, OPT_INT, 5) // seconds
553 OPTION(mds_bal_fragment_size_max, OPT_INT, 10000*10) // order of magnitude higher than split size
554 OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT, 1.5) // multiple of size_max that triggers immediate split
555 OPTION(mds_bal_idle_threshold, OPT_FLOAT, 0)
556 OPTION(mds_bal_max, OPT_INT, -1)
557 OPTION(mds_bal_max_until, OPT_INT, -1)
558 OPTION(mds_bal_mode, OPT_INT, 0)
559 OPTION(mds_bal_min_rebalance, OPT_FLOAT, .1) // must be this much above average before we export anything
560 OPTION(mds_bal_min_start, OPT_FLOAT, .2) // if we need less than this, we don't do anything
561 OPTION(mds_bal_need_min, OPT_FLOAT, .8) // take within this range of what we need
562 OPTION(mds_bal_need_max, OPT_FLOAT, 1.2)
563 OPTION(mds_bal_midchunk, OPT_FLOAT, .3) // any sub bigger than this taken in full
564 OPTION(mds_bal_minchunk, OPT_FLOAT, .001) // never take anything smaller than this
565 OPTION(mds_bal_target_decay, OPT_DOUBLE, 10.0) // target decay half-life in MDSMap (2x larger is approx. 2x slower)
566 OPTION(mds_replay_interval, OPT_FLOAT, 1.0) // time to wait before starting replay again
567 OPTION(mds_shutdown_check, OPT_INT, 0)
568 OPTION(mds_thrash_exports, OPT_INT, 0)
569 OPTION(mds_thrash_fragments, OPT_INT, 0)
570 OPTION(mds_dump_cache_on_map, OPT_BOOL, false)
571 OPTION(mds_dump_cache_after_rejoin, OPT_BOOL, false)
572 OPTION(mds_verify_scatter, OPT_BOOL, false)
573 OPTION(mds_debug_scatterstat, OPT_BOOL, false)
574 OPTION(mds_debug_frag, OPT_BOOL, false)
575 OPTION(mds_debug_auth_pins, OPT_BOOL, false)
576 OPTION(mds_debug_subtrees, OPT_BOOL, false)
577 OPTION(mds_kill_mdstable_at, OPT_INT, 0)
578 OPTION(mds_kill_export_at, OPT_INT, 0)
579 OPTION(mds_kill_import_at, OPT_INT, 0)
580 OPTION(mds_kill_link_at, OPT_INT, 0)
581 OPTION(mds_kill_rename_at, OPT_INT, 0)
582 OPTION(mds_kill_openc_at, OPT_INT, 0)
583 OPTION(mds_kill_journal_at, OPT_INT, 0)
584 OPTION(mds_kill_journal_expire_at, OPT_INT, 0)
585 OPTION(mds_kill_journal_replay_at, OPT_INT, 0)
586 OPTION(mds_journal_format, OPT_U32, 1) // Default to most recent JOURNAL_FORMAT_*
587 OPTION(mds_kill_create_at, OPT_INT, 0)
588 OPTION(mds_inject_traceless_reply_probability, OPT_DOUBLE, 0) /* percentage
589 of MDS modify replies to skip sending the
590 client a trace on [0-1]*/
591 OPTION(mds_wipe_sessions, OPT_BOOL, 0)
592 OPTION(mds_wipe_ino_prealloc, OPT_BOOL, 0)
593 OPTION(mds_skip_ino, OPT_INT, 0)
594 OPTION(mds_standby_for_name, OPT_STR, "")
595 OPTION(mds_standby_for_rank, OPT_INT, -1)
596 OPTION(mds_standby_for_fscid, OPT_INT, -1)
597 OPTION(mds_standby_replay, OPT_BOOL, false)
598 OPTION(mds_enable_op_tracker, OPT_BOOL, true) // enable/disable MDS op tracking
599 OPTION(mds_op_history_size, OPT_U32, 20) // Max number of completed ops to track
600 OPTION(mds_op_history_duration, OPT_U32, 600) // Oldest completed op to track
601 OPTION(mds_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
602 OPTION(mds_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
603 OPTION(mds_snap_min_uid, OPT_U32, 0) // The minimum UID required to create a snapshot
604 OPTION(mds_snap_max_uid, OPT_U32, 4294967294) // The maximum UID allowed to create a snapshot
605 OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapshot
606 OPTION(mds_verify_backtrace, OPT_U32, 1)
607 // detect clients which aren't trimming completed requests
608 OPTION(mds_max_completed_flushes, OPT_U32, 100000)
609 OPTION(mds_max_completed_requests, OPT_U32, 100000)
610
611 OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash
612 OPTION(mds_mon_shutdown_timeout, OPT_DOUBLE, 5)
613
614 // Maximum number of concurrent stray files to purge
615 OPTION(mds_max_purge_files, OPT_U32, 64)
616 // Maximum number of concurrent RADOS ops to issue in purging
617 OPTION(mds_max_purge_ops, OPT_U32, 8192)
618 // Maximum number of concurrent RADOS ops to issue in purging, scaled by PG count
619 OPTION(mds_max_purge_ops_per_pg, OPT_FLOAT, 0.5)
620
621 OPTION(mds_purge_queue_busy_flush_period, OPT_FLOAT, 1.0)
622
623 OPTION(mds_root_ino_uid, OPT_INT, 0) // The UID of / on new filesystems
624 OPTION(mds_root_ino_gid, OPT_INT, 0) // The GID of / on new filesystems
625
626 OPTION(mds_max_scrub_ops_in_progress, OPT_INT, 5) // the number of simultaneous scrubs allowed
627
628 // Maximum number of damaged frags/dentries before whole MDS rank goes damaged
629 OPTION(mds_damage_table_max_entries, OPT_INT, 10000)
630
631 // verify backend can support configured max object name length
632 OPTION(osd_check_max_object_name_len_on_startup, OPT_BOOL, true)
633
634 // Maximum number of backfills to or from a single osd
635 OPTION(osd_max_backfills, OPT_U64, 1)
636
637 // Minimum recovery priority (255 = max, smaller = lower)
638 OPTION(osd_min_recovery_priority, OPT_INT, 0)
639
640 // Seconds to wait before retrying refused backfills
641 OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 30.0)
642
643 // Seconds to wait before retrying refused recovery
644 OPTION(osd_recovery_retry_interval, OPT_DOUBLE, 30.0)
645
646 // max agent flush ops
647 OPTION(osd_agent_max_ops, OPT_INT, 4)
648 OPTION(osd_agent_max_low_ops, OPT_INT, 2)
649 OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1)
650 OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1)
651 OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0)
652
653 // osd ignore history.last_epoch_started in find_best_info
654 OPTION(osd_find_best_info_ignore_history_les, OPT_BOOL, false)
655
656 // decay atime and hist histograms after how many objects go by
657 OPTION(osd_agent_hist_halflife, OPT_INT, 1000)
658
659 // must be this amount over the threshold to enable,
660 // this amount below the threshold to disable.
661 OPTION(osd_agent_slop, OPT_FLOAT, .02)
662
663 OPTION(osd_uuid, OPT_UUID, uuid_d())
664 OPTION(osd_data, OPT_STR, "/var/lib/ceph/osd/$cluster-$id")
665 OPTION(osd_journal, OPT_STR, "/var/lib/ceph/osd/$cluster-$id/journal")
666 OPTION(osd_journal_size, OPT_INT, 5120) // in mb
667 OPTION(osd_journal_flush_on_shutdown, OPT_BOOL, true) // Flush journal to data store on shutdown
668 // flags for specific control purpose during osd mount() process.
669 // e.g., can be 1 to skip over replaying journal
670 // or 2 to skip over mounting omap or 3 to skip over both.
671 // This might be helpful in case the journal is totally corrupted
672 // and we still want to bring the osd daemon back normally, etc.
673 OPTION(osd_os_flags, OPT_U32, 0)
674 OPTION(osd_max_write_size, OPT_INT, 90)
675 OPTION(osd_max_pgls, OPT_U64, 1024) // max number of pgls entries to return
676 OPTION(osd_client_message_size_cap, OPT_U64, 500*1024L*1024L) // client data allowed in-memory (in bytes)
677 OPTION(osd_client_message_cap, OPT_U64, 100) // num client messages allowed in-memory
678 OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd
679 OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd
680 OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
681 OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
682 OPTION(osd_crush_update_on_start, OPT_BOOL, true)
683 OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds.
684 OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset
685 OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET)
686 OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32, 4096) // in bytes
687 OPTION(osd_pool_default_size, OPT_INT, 3)
688 OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2
689 OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
690 OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
691 OPTION(osd_pool_default_erasure_code_profile,
692 OPT_STR,
693 "plugin=jerasure "
694 "technique=reed_sol_van "
695 "k=2 "
696 "m=1 "
697 ) // default properties of osd pool create
698 OPTION(osd_erasure_code_plugins, OPT_STR,
699 "jerasure"
700 " lrc"
701 #ifdef HAVE_BETTER_YASM_ELF64
702 " isa"
703 #endif
704 ) // list of erasure code plugins
705
706 // Allows the "peered" state for recovery and backfill below min_size
707 OPTION(osd_allow_recovery_below_min_size, OPT_BOOL, true)
708
709 OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools
710 OPTION(osd_pool_default_flag_hashpspool, OPT_BOOL, true) // use new pg hashing to prevent pool/pg overlap
711 OPTION(osd_pool_default_flag_nodelete, OPT_BOOL, false) // pool can't be deleted
712 OPTION(osd_pool_default_flag_nopgchange, OPT_BOOL, false) // pool's pg and pgp num can't be changed
713 OPTION(osd_pool_default_flag_nosizechange, OPT_BOOL, false) // pool's size and min size can't be changed
714 OPTION(osd_pool_default_hit_set_bloom_fpp, OPT_FLOAT, .05)
715 OPTION(osd_pool_default_cache_target_dirty_ratio, OPT_FLOAT, .4)
716 OPTION(osd_pool_default_cache_target_dirty_high_ratio, OPT_FLOAT, .6)
717 OPTION(osd_pool_default_cache_target_full_ratio, OPT_FLOAT, .8)
718 OPTION(osd_pool_default_cache_min_flush_age, OPT_INT, 0) // seconds
719 OPTION(osd_pool_default_cache_min_evict_age, OPT_INT, 0) // seconds
720 OPTION(osd_pool_default_cache_max_evict_check_size, OPT_INT, 10) // max size to check for eviction
721 OPTION(osd_hit_set_min_size, OPT_INT, 1000) // min target size for a HitSet
722 OPTION(osd_hit_set_max_size, OPT_INT, 100000) // max target size for a HitSet
723 OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking
724
725 // conservative default throttling values
726 OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 25)
727 OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 5 * 1024*1024)
728
729 OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback")
730 OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
731 OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
732 OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom")
733 OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on read)
734 OPTION(osd_tier_default_cache_min_write_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on write)
735 OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 20)
736 OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT, 1)
737
738 OPTION(osd_map_dedup, OPT_BOOL, true)
739 OPTION(osd_map_max_advance, OPT_INT, 150) // make this < cache_size!
740 OPTION(osd_map_cache_size, OPT_INT, 200)
741 OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message
742 OPTION(osd_map_share_max_epochs, OPT_INT, 100) // cap on # of inc maps we send to peers, clients
743 OPTION(osd_inject_bad_map_crc_probability, OPT_FLOAT, 0)
744 OPTION(osd_inject_failure_on_pg_removal, OPT_BOOL, false)
745 // shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
746 OPTION(osd_max_markdown_period , OPT_INT, 600)
747 OPTION(osd_max_markdown_count, OPT_INT, 5)
748
749 OPTION(osd_op_threads, OPT_INT, 2) // 0 == no threading
750 OPTION(osd_peering_wq_batch_size, OPT_U64, 20)
751 OPTION(osd_op_pq_max_tokens_per_priority, OPT_U64, 4194304)
752 OPTION(osd_op_pq_min_cost, OPT_U64, 65536)
753 OPTION(osd_disk_threads, OPT_INT, 1)
754 OPTION(osd_disk_thread_ioprio_class, OPT_STR, "") // rt realtime be best effort idle
755 OPTION(osd_disk_thread_ioprio_priority, OPT_INT, -1) // 0-7
756 OPTION(osd_recover_clone_overlap, OPT_BOOL, true) // preserve clone_overlap during recovery/migration
757 OPTION(osd_op_num_threads_per_shard, OPT_INT, 2)
758 OPTION(osd_op_num_shards, OPT_INT, 5)
759 OPTION(osd_op_queue, OPT_STR, "wpq") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random
760 OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
761
762 OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
763
764 // Set to true for testing. Users should NOT set this.
765 // If set to true even after reading enough shards to
766 // decode the object, any error will be reported.
767 OPTION(osd_read_ec_check_for_errors, OPT_BOOL, false) // return error if any ec shard has an error
768
769 // Only use clone_overlap for recovery if there are fewer than
770 // osd_recover_clone_overlap_limit entries in the overlap set
771 OPTION(osd_recover_clone_overlap_limit, OPT_INT, 10)
772
773 OPTION(osd_backfill_scan_min, OPT_INT, 64)
774 OPTION(osd_backfill_scan_max, OPT_INT, 512)
775 OPTION(osd_op_thread_timeout, OPT_INT, 15)
776 OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150)
777 OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
778 OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300)
779 OPTION(osd_recovery_sleep, OPT_FLOAT, 0) // seconds to sleep between recovery ops
780 OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0)
781 OPTION(osd_scrub_invalid_stats, OPT_BOOL, true)
782 OPTION(osd_remove_thread_timeout, OPT_INT, 60*60)
783 OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60)
784 OPTION(osd_command_thread_timeout, OPT_INT, 10*60)
785 OPTION(osd_command_thread_suicide_timeout, OPT_INT, 15*60)
786 OPTION(osd_heartbeat_addr, OPT_ADDR, entity_addr_t())
787 OPTION(osd_heartbeat_interval, OPT_INT, 6) // (seconds) how often we ping peers
788
789 // (seconds) how long before we decide a peer has failed
790 // This setting is read by the MONs and OSDs and has to be set to a equal value in both settings of the configuration
791 OPTION(osd_heartbeat_grace, OPT_INT, 20)
792 OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers
793 OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbeat tcp socket and set dscp as CS6 on it if true
794
795 // max number of parallel snap trims/pg
796 OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
797 // max number of trimming pgs
798 OPTION(osd_max_trimming_pgs, OPT_U64, 2)
799
800 // minimum number of peers that must be reachable to mark ourselves
801 // back up after being wrongly marked down.
802 OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT, .33)
803
804 OPTION(osd_mon_heartbeat_interval, OPT_INT, 30) // (seconds) how often to ping monitor if no peers
805 OPTION(osd_mon_report_interval_max, OPT_INT, 600)
806 OPTION(osd_mon_report_interval_min, OPT_INT, 5) // pg stats, failures, up_thru, boot.
807 OPTION(osd_mon_report_max_in_flight, OPT_INT, 2) // max updates in flight
808 OPTION(osd_beacon_report_interval, OPT_INT, 300) // (second) how often to send beacon message to monitor
809 OPTION(osd_pg_stat_report_interval_max, OPT_INT, 500) // report pg stats for any given pg at least this often
810 OPTION(osd_mon_ack_timeout, OPT_DOUBLE, 30.0) // time out a mon if it doesn't ack stats
811 OPTION(osd_stats_ack_timeout_factor, OPT_DOUBLE, 2.0) // multiples of mon_ack_timeout
812 OPTION(osd_stats_ack_timeout_decay, OPT_DOUBLE, .9)
813 OPTION(osd_default_data_pool_replay_window, OPT_INT, 45)
814 OPTION(osd_preserve_trimmed_log, OPT_BOOL, false)
815 OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false)
816 OPTION(osd_recovery_delay_start, OPT_FLOAT, 0)
817 OPTION(osd_recovery_max_active, OPT_U64, 3)
818 OPTION(osd_recovery_max_single_start, OPT_U64, 1)
819 OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk
820 OPTION(osd_recovery_max_omap_entries_per_chunk, OPT_U64, 64000) // max number of omap entries per chunk; 0 to disable limit
821 OPTION(osd_copyfrom_max_chunk, OPT_U64, 8<<20) // max size of a COPYFROM chunk
822 OPTION(osd_push_per_object_cost, OPT_U64, 1000) // push cost per object
823 OPTION(osd_max_push_cost, OPT_U64, 8<<20) // max size of push message
824 OPTION(osd_max_push_objects, OPT_U64, 10) // max objects in single push op
825 OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now
826 OPTION(osd_max_scrubs, OPT_INT, 1)
827 OPTION(osd_scrub_during_recovery, OPT_BOOL, false) // Allow new scrubs to start while recovery is active on the OSD
828 OPTION(osd_scrub_begin_hour, OPT_INT, 0)
829 OPTION(osd_scrub_end_hour, OPT_INT, 24)
830 OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
831 OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low
832 OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load
833 OPTION(osd_scrub_interval_randomize_ratio, OPT_FLOAT, 0.5) // randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
834 OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE, .66) // the probability to back off the scheduled scrub
835 OPTION(osd_scrub_chunk_min, OPT_INT, 5)
836 OPTION(osd_scrub_chunk_max, OPT_INT, 25)
837 OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops
838 OPTION(osd_scrub_auto_repair, OPT_BOOL, false) // whether auto-repair inconsistencies upon deep-scrubbing
839 OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5) // only auto-repair when number of errors is below this threshold
840 OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
841 OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)
842 OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
843 OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub
844 OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100)
845 OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
846 OPTION(osd_open_classes_on_start, OPT_BOOL, true)
847 OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops "
848 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes allowed to be loaded (allow all: *)
849 OPTION(osd_class_default_list, OPT_STR, "cephfs hello journal lock log numops "
850 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes with default execute perm (allow all: *)
851 OPTION(osd_check_for_log_corruption, OPT_BOOL, false)
852 OPTION(osd_use_stale_snap, OPT_BOOL, false)
853 OPTION(osd_rollback_to_cluster_snap, OPT_STR, "")
854 OPTION(osd_default_notify_timeout, OPT_U32, 30) // default notify timeout in seconds
855 OPTION(osd_kill_backfill_at, OPT_INT, 0)
856
857 // Bounds how infrequently a new map epoch will be persisted for a pg
858 OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 150) // make this < map_cache_size!
859
860 OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it
861 OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
862 OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery
863 OPTION(osd_pg_log_trim_min, OPT_U32, 100)
864 OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
865 OPTION(osd_command_max_records, OPT_INT, 256)
866 OPTION(osd_max_pg_blocked_by, OPT_U32, 16) // max peer osds to report that are blocking our progress
867 OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
868 OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros
869 OPTION(osd_backoff_on_unfound, OPT_BOOL, true) // object unfound
870 OPTION(osd_backoff_on_degraded, OPT_BOOL, false) // [mainly for debug?] object unreadable/writeable
871 OPTION(osd_backoff_on_down, OPT_BOOL, true) // pg in down/incomplete state
872 OPTION(osd_backoff_on_peering, OPT_BOOL, false) // [debug] pg peering
873 OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL, false) // crash osd if client ignores a backoff; useful for debugging
874 OPTION(osd_debug_inject_dispatch_delay_probability, OPT_DOUBLE, 0)
875 OPTION(osd_debug_inject_dispatch_delay_duration, OPT_DOUBLE, .1)
876 OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
877 OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
878 OPTION(osd_debug_op_order, OPT_BOOL, false)
879 OPTION(osd_debug_verify_missing_on_start, OPT_BOOL, false)
880 OPTION(osd_debug_scrub_chance_rewrite_digest, OPT_U64, 0)
881 OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false)
882 OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false)
883 OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
884 OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
885 OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion
886 OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
887 OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
888 OPTION(osd_enxio_on_misdirected_op, OPT_BOOL, false)
889 OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
890 OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
891 OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
892 OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track
893 OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
894 OPTION(osd_op_history_slow_op_size, OPT_U32, 20) // Max number of slow ops to track
895 OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE, 10.0) // track the op if over this threshold
896 OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items
897 OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
898 OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections
899
900 OPTION(osd_pg_object_context_cache_count, OPT_INT, 64)
901 OPTION(osd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
902
903 OPTION(osd_fast_info, OPT_BOOL, true) // use fast info attr, if we can
904
905 // determines whether PGLog::check() compares written out log to stored log
906 OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false)
907 OPTION(osd_loop_before_reset_tphandle, OPT_U32, 64) // Max number of loop before we reset thread-pool's handle
908 // default timeout while caling WaitInterval on an empty queue
909 OPTION(threadpool_default_timeout, OPT_INT, 60)
910 // default wait time for an empty queue before pinging the hb timeout
911 OPTION(threadpool_empty_queue_max_wait, OPT_INT, 2)
912
913 OPTION(leveldb_log_to_ceph_log, OPT_BOOL, true)
914 OPTION(leveldb_write_buffer_size, OPT_U64, 8 *1024*1024) // leveldb write buffer size
915 OPTION(leveldb_cache_size, OPT_U64, 128 *1024*1024) // leveldb cache size
916 OPTION(leveldb_block_size, OPT_U64, 0) // leveldb block size
917 OPTION(leveldb_bloom_size, OPT_INT, 0) // leveldb bloom bits per entry
918 OPTION(leveldb_max_open_files, OPT_INT, 0) // leveldb max open files
919 OPTION(leveldb_compression, OPT_BOOL, true) // leveldb uses compression
920 OPTION(leveldb_paranoid, OPT_BOOL, false) // leveldb paranoid flag
921 OPTION(leveldb_log, OPT_STR, "/dev/null") // enable leveldb log file
922 OPTION(leveldb_compact_on_mount, OPT_BOOL, false)
923
924 OPTION(kinetic_host, OPT_STR, "") // hostname or ip address of a kinetic drive to use
925 OPTION(kinetic_port, OPT_INT, 8123) // port number of the kinetic drive
926 OPTION(kinetic_user_id, OPT_INT, 1) // kinetic user to authenticate as
927 OPTION(kinetic_hmac_key, OPT_STR, "asdfasdf") // kinetic key to authenticate with
928 OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic with TLS
929
930
931 OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal
932 SAFE_OPTION(rocksdb_db_paths, OPT_STR, "") // path,size( path,size)*
933 OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true) // log to ceph log
934 OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024) // default rocksdb cache size
935 OPTION(rocksdb_cache_shard_bits, OPT_INT, 4) // rocksdb block cache shard bits, 4 bit -> 16 shards
936 OPTION(rocksdb_block_size, OPT_INT, 4*1024) // default rocksdb block size
937 OPTION(rocksdb_perf, OPT_BOOL, false) // Enabling this will have 5-10% impact on performance for the stats collection
938 OPTION(rocksdb_collect_compaction_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
939 OPTION(rocksdb_collect_extended_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
940 OPTION(rocksdb_collect_memory_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
941 OPTION(rocksdb_enable_rmrange, OPT_BOOL, false) // see https://github.com/facebook/rocksdb/blob/master/include/rocksdb/db.h#L253
942
943 // rocksdb options that will be used for omap(if omap_backend is rocksdb)
944 OPTION(filestore_rocksdb_options, OPT_STR, "")
945 // rocksdb options that will be used in monstore
946 OPTION(mon_rocksdb_options, OPT_STR, "write_buffer_size=33554432,compression=kNoCompression")
947
948 /**
949 * osd_*_priority adjust the relative priority of client io, recovery io,
950 * snaptrim io, etc
951 *
952 * osd_*_priority determines the ratio of available io between client and
953 * recovery. Each option may be set between
954 * 1..63.
955 */
956 OPTION(osd_client_op_priority, OPT_U32, 63)
957 OPTION(osd_recovery_op_priority, OPT_U32, 3)
958
959 OPTION(osd_snap_trim_priority, OPT_U32, 5)
960 OPTION(osd_snap_trim_cost, OPT_U32, 1<<20) // set default cost equal to 1MB io
961
962 OPTION(osd_scrub_priority, OPT_U32, 5)
963 // set default cost equal to 50MB io
964 OPTION(osd_scrub_cost, OPT_U32, 50<<20)
965 // set requested scrub priority higher than scrub priority to make the
966 // requested scrubs jump the queue of scheduled scrubs
967 OPTION(osd_requested_scrub_priority, OPT_U32, 120)
968
969 OPTION(osd_recovery_priority, OPT_U32, 5)
970 // set default cost equal to 20MB io
971 OPTION(osd_recovery_cost, OPT_U32, 20<<20)
972
973 /**
974 * osd_recovery_op_warn_multiple scales the normal warning threshhold,
975 * osd_op_complaint_time, so that slow recovery ops won't cause noise
976 */
977 OPTION(osd_recovery_op_warn_multiple, OPT_U32, 16)
978
979 // Max time to wait between notifying mon of shutdown and shutting down
980 OPTION(osd_mon_shutdown_timeout, OPT_DOUBLE, 5)
981
982 OPTION(osd_max_object_size, OPT_U64, 100*1024L*1024L*1024L) // OSD's maximum object size
983 OPTION(osd_max_object_name_len, OPT_U32, 2048) // max rados object name len
984 OPTION(osd_max_object_namespace_len, OPT_U32, 256) // max rados object namespace len
985 OPTION(osd_max_attr_name_len, OPT_U32, 100) // max rados attr name len; cannot go higher than 100 chars for file system backends
986 OPTION(osd_max_attr_size, OPT_U64, 0)
987
988 OPTION(osd_max_omap_entries_per_request, OPT_U64, 131072)
989 OPTION(osd_max_omap_bytes_per_request, OPT_U64, 1<<30)
990
991 OPTION(osd_objectstore, OPT_STR, "filestore") // ObjectStore backend type
992 OPTION(osd_objectstore_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
993 // Override maintaining compatibility with older OSDs
994 // Set to true for testing. Users should NOT set this.
995 OPTION(osd_debug_override_acting_compat, OPT_BOOL, false)
996 OPTION(osd_objectstore_fuse, OPT_BOOL, false)
997
998 OPTION(osd_bench_small_size_max_iops, OPT_U32, 100) // 100 IOPS
999 OPTION(osd_bench_large_size_max_throughput, OPT_U64, 100 << 20) // 100 MB/s
1000 OPTION(osd_bench_max_block_size, OPT_U64, 64 << 20) // cap the block size at 64MB
1001 OPTION(osd_bench_duration, OPT_U32, 30) // duration of 'osd bench', capped at 30s to avoid triggering timeouts
1002
1003 OPTION(osd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all osd requests
1004 OPTION(osdc_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all objecter requests
1005
1006 OPTION(osd_discard_disconnected_ops, OPT_BOOL, true)
1007
1008 OPTION(memstore_device_bytes, OPT_U64, 1024*1024*1024)
1009 OPTION(memstore_page_set, OPT_BOOL, true)
1010 OPTION(memstore_page_size, OPT_U64, 64 << 10)
1011
1012 OPTION(bdev_debug_inflight_ios, OPT_BOOL, false)
1013 OPTION(bdev_inject_crash, OPT_INT, 0) // if N>0, then ~ 1/N IOs will complete before we crash on flush.
1014 OPTION(bdev_inject_crash_flush_delay, OPT_INT, 2) // wait N more seconds on flush
1015 OPTION(bdev_aio, OPT_BOOL, true)
1016 OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds
1017 OPTION(bdev_aio_max_queue_depth, OPT_INT, 1024)
1018 OPTION(bdev_block_size, OPT_INT, 4096)
1019 OPTION(bdev_debug_aio, OPT_BOOL, false)
1020 OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0)
1021
1022 // if yes, osd will unbind all NVMe devices from kernel driver and bind them
1023 // to the uio_pci_generic driver. The purpose is to prevent the case where
1024 // NVMe driver is loaded while osd is running.
1025 OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL, false)
1026 OPTION(bdev_nvme_retry_count, OPT_INT, -1) // -1 means by default which is 4
1027
1028 OPTION(objectstore_blackhole, OPT_BOOL, false)
1029
1030 OPTION(bluefs_alloc_size, OPT_U64, 1048576)
1031 OPTION(bluefs_max_prefetch, OPT_U64, 1048576)
1032 OPTION(bluefs_min_log_runway, OPT_U64, 1048576) // alloc when we get this low
1033 OPTION(bluefs_max_log_runway, OPT_U64, 4194304) // alloc this much at a time
1034 OPTION(bluefs_log_compact_min_ratio, OPT_FLOAT, 5.0) // before we consider
1035 OPTION(bluefs_log_compact_min_size, OPT_U64, 16*1048576) // before we consider
1036 OPTION(bluefs_min_flush_size, OPT_U64, 524288) // ignore flush until its this big
1037 OPTION(bluefs_compact_log_sync, OPT_BOOL, false) // sync or async log compaction?
1038 OPTION(bluefs_buffered_io, OPT_BOOL, false)
1039 OPTION(bluefs_sync_write, OPT_BOOL, false)
1040 OPTION(bluefs_allocator, OPT_STR, "bitmap") // stupid | bitmap
1041 OPTION(bluefs_preextend_wal_files, OPT_BOOL, false) // this *requires* that rocksdb has recycling enabled
1042
1043 OPTION(bluestore_bluefs, OPT_BOOL, true)
1044 OPTION(bluestore_bluefs_env_mirror, OPT_BOOL, false) // mirror to normal Env for debug
1045 OPTION(bluestore_bluefs_min, OPT_U64, 1*1024*1024*1024) // 1gb
1046 OPTION(bluestore_bluefs_min_ratio, OPT_FLOAT, .02) // min fs free / total free
1047 OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT, .90) // max fs free / total free
1048 OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT, .02) // how much to add at a time
1049 OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT, .20) // how much to reclaim at a time
1050 OPTION(bluestore_bluefs_balance_interval, OPT_FLOAT, 1) // how often (sec) to balance free space between bluefs and bluestore
1051 // If you want to use spdk driver, you need to specify NVMe serial number here
1052 // with "spdk:" prefix.
1053 // Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to
1054 // get the serial number of Intel(R) Fultondale NVMe controllers.
1055 // Example:
1056 // bluestore_block_path = spdk:55cd2e404bd73932
1057 // If you want to run multiple SPDK instances per node, you must specify the
1058 // amount of dpdk memory size in MB each instance will use, to make sure each
1059 // instance uses its own dpdk memory
1060 OPTION(bluestore_spdk_mem, OPT_U32, 512)
1061 // A hexadecimal bit mask of the cores to run on. Note the core numbering can change between platforms and should be determined beforehand.
1062 OPTION(bluestore_spdk_coremask, OPT_STR, "0x3")
1063 // Specify the maximal I/Os to be batched completed while checking queue pair completions.
1064 // Default value 0 means that let SPDK nvme library determine the value.
1065 OPTION(bluestore_spdk_max_io_completion, OPT_U32, 0)
1066 OPTION(bluestore_block_path, OPT_STR, "")
1067 OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing
1068 OPTION(bluestore_block_create, OPT_BOOL, true)
1069 OPTION(bluestore_block_db_path, OPT_STR, "")
1070 OPTION(bluestore_block_db_size, OPT_U64, 0) // rocksdb ssts (hot/warm)
1071 OPTION(bluestore_block_db_create, OPT_BOOL, false)
1072 OPTION(bluestore_block_wal_path, OPT_STR, "")
1073 OPTION(bluestore_block_wal_size, OPT_U64, 96 * 1024*1024) // rocksdb wal
1074 OPTION(bluestore_block_wal_create, OPT_BOOL, false)
1075 OPTION(bluestore_block_preallocate_file, OPT_BOOL, false) //whether preallocate space if block/db_path/wal_path is file rather that block device.
1076 OPTION(bluestore_csum_type, OPT_STR, "crc32c") // none|xxhash32|xxhash64|crc32c|crc32c_16|crc32c_8
1077 OPTION(bluestore_csum_min_block, OPT_U32, 4096)
1078 OPTION(bluestore_csum_max_block, OPT_U32, 64*1024)
1079 OPTION(bluestore_min_alloc_size, OPT_U32, 0)
1080 OPTION(bluestore_min_alloc_size_hdd, OPT_U32, 64*1024)
1081 OPTION(bluestore_min_alloc_size_ssd, OPT_U32, 16*1024)
1082 OPTION(bluestore_max_alloc_size, OPT_U32, 0)
1083 OPTION(bluestore_prefer_deferred_size, OPT_U32, 0)
1084 OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768)
1085 OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0)
1086 OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none
1087 OPTION(bluestore_compression_algorithm, OPT_STR, "snappy")
1088 OPTION(bluestore_compression_min_blob_size, OPT_U32, 0)
1089 OPTION(bluestore_compression_min_blob_size_hdd, OPT_U32, 128*1024)
1090 OPTION(bluestore_compression_min_blob_size_ssd, OPT_U32, 8*1024)
1091 OPTION(bluestore_compression_max_blob_size, OPT_U32, 0)
1092 OPTION(bluestore_compression_max_blob_size_hdd, OPT_U32, 512*1024)
1093 OPTION(bluestore_compression_max_blob_size_ssd, OPT_U32, 64*1024)
1094 /*
1095 * Specifies minimum expected amount of saved allocation units
1096 * per single blob to enable compressed blobs garbage collection
1097 *
1098 */
1099 OPTION(bluestore_gc_enable_blob_threshold, OPT_INT, 0)
1100 /*
1101 * Specifies minimum expected amount of saved allocation units
1102 * per all blobsb to enable compressed blobs garbage collection
1103 *
1104 */
1105 OPTION(bluestore_gc_enable_total_threshold, OPT_INT, 0)
1106
1107 OPTION(bluestore_max_blob_size, OPT_U32, 0)
1108 OPTION(bluestore_max_blob_size_hdd, OPT_U32, 512*1024)
1109 OPTION(bluestore_max_blob_size_ssd, OPT_U32, 64*1024)
1110 /*
1111 * Require the net gain of compression at least to be at this ratio,
1112 * otherwise we don't compress.
1113 * And ask for compressing at least 12.5%(1/8) off, by default.
1114 */
1115 OPTION(bluestore_compression_required_ratio, OPT_DOUBLE, .875)
1116 OPTION(bluestore_extent_map_shard_max_size, OPT_U32, 1200)
1117 OPTION(bluestore_extent_map_shard_target_size, OPT_U32, 500)
1118 OPTION(bluestore_extent_map_shard_min_size, OPT_U32, 150)
1119 OPTION(bluestore_extent_map_shard_target_size_slop, OPT_DOUBLE, .2)
1120 OPTION(bluestore_extent_map_inline_shard_prealloc_size, OPT_U32, 256)
1121 OPTION(bluestore_cache_trim_interval, OPT_DOUBLE, .1)
1122 OPTION(bluestore_cache_trim_max_skip_pinned, OPT_U32, 64) // skip this many onodes pinned in cache before we give up
1123 OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q
1124 OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5) // kin page slot size / max page slot size
1125 OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5) // number of kout page slot / total number of page slot
1126 OPTION(bluestore_cache_size, OPT_U64, 1024*1024*1024)
1127 OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .9)
1128 OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
1129 OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap
1130 OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
1131 OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
1132 OPTION(bluestore_bitmapallocator_span_size, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
1133 OPTION(bluestore_rocksdb_options, OPT_STR, "compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152")
1134 OPTION(bluestore_fsck_on_mount, OPT_BOOL, false)
1135 OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL, true)
1136 OPTION(bluestore_fsck_on_umount, OPT_BOOL, false)
1137 OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL, true)
1138 OPTION(bluestore_fsck_on_mkfs, OPT_BOOL, true)
1139 OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL, false)
1140 OPTION(bluestore_sync_submit_transaction, OPT_BOOL, false) // submit kv txn in queueing thread (not kv_sync_thread)
1141 OPTION(bluestore_throttle_bytes, OPT_U64, 64*1024*1024)
1142 OPTION(bluestore_throttle_deferred_bytes, OPT_U64, 128*1024*1024)
1143 OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64, 1500000)
1144 OPTION(bluestore_throttle_cost_per_io_ssd, OPT_U64, 4000)
1145 OPTION(bluestore_throttle_cost_per_io, OPT_U64, 0)
1146 OPTION(bluestore_deferred_batch_ops, OPT_U64, 0)
1147 OPTION(bluestore_deferred_batch_ops_hdd, OPT_U64, 64)
1148 OPTION(bluestore_deferred_batch_ops_ssd, OPT_U64, 16)
1149 OPTION(bluestore_nid_prealloc, OPT_INT, 1024)
1150 OPTION(bluestore_blobid_prealloc, OPT_U64, 10240)
1151 OPTION(bluestore_clone_cow, OPT_BOOL, true) // do copy-on-write for clones
1152 OPTION(bluestore_default_buffered_read, OPT_BOOL, true)
1153 OPTION(bluestore_default_buffered_write, OPT_BOOL, false)
1154 OPTION(bluestore_debug_misc, OPT_BOOL, false)
1155 OPTION(bluestore_debug_no_reuse_blocks, OPT_BOOL, false)
1156 OPTION(bluestore_debug_small_allocations, OPT_INT, 0)
1157 OPTION(bluestore_debug_freelist, OPT_BOOL, false)
1158 OPTION(bluestore_debug_prefill, OPT_FLOAT, 0)
1159 OPTION(bluestore_debug_prefragment_max, OPT_INT, 1048576)
1160 OPTION(bluestore_debug_inject_read_err, OPT_BOOL, false)
1161 OPTION(bluestore_debug_randomize_serial_transaction, OPT_INT, 0)
1162 OPTION(bluestore_debug_omit_block_device_write, OPT_BOOL, false)
1163 OPTION(bluestore_shard_finishers, OPT_BOOL, false)
1164
1165 OPTION(kstore_max_ops, OPT_U64, 512)
1166 OPTION(kstore_max_bytes, OPT_U64, 64*1024*1024)
1167 OPTION(kstore_backend, OPT_STR, "rocksdb")
1168 OPTION(kstore_rocksdb_options, OPT_STR, "compression=kNoCompression")
1169 OPTION(kstore_rocksdb_bloom_bits_per_key, OPT_INT, 0)
1170 OPTION(kstore_fsck_on_mount, OPT_BOOL, false)
1171 OPTION(kstore_fsck_on_mount_deep, OPT_BOOL, true)
1172 OPTION(kstore_nid_prealloc, OPT_U64, 1024)
1173 OPTION(kstore_sync_transaction, OPT_BOOL, false)
1174 OPTION(kstore_sync_submit_transaction, OPT_BOOL, false)
1175 OPTION(kstore_onode_map_size, OPT_U64, 1024)
1176 OPTION(kstore_cache_tails, OPT_BOOL, true)
1177 OPTION(kstore_default_stripe_size, OPT_INT, 65536)
1178
1179 OPTION(filestore_omap_backend, OPT_STR, "rocksdb")
1180 OPTION(filestore_omap_backend_path, OPT_STR, "")
1181
1182 /// filestore wb throttle limits
1183 OPTION(filestore_wbthrottle_enable, OPT_BOOL, true)
1184 OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 41943040)
1185 OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 419430400)
1186 OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 500)
1187 OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 5000)
1188 OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 500)
1189 OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 41943040)
1190 OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 419430400)
1191 OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 500)
1192 OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 5000)
1193 OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 500)
1194
1195 /// These must be less than the fd limit
1196 OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 5000)
1197 OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 5000)
1198
1199 //Introduce a O_DSYNC write in the filestore
1200 OPTION(filestore_odsync_write, OPT_BOOL, false)
1201
1202 // Tests index failure paths
1203 OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0)
1204
1205 // Allow object read error injection
1206 OPTION(filestore_debug_inject_read_err, OPT_BOOL, false)
1207
1208 OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync
1209 OPTION(filestore_omap_header_cache_size, OPT_INT, 1024)
1210
1211 // Use omap for xattrs for attrs over
1212 // filestore_max_inline_xattr_size or
1213 OPTION(filestore_max_inline_xattr_size, OPT_U32, 0) //Override
1214 OPTION(filestore_max_inline_xattr_size_xfs, OPT_U32, 65536)
1215 OPTION(filestore_max_inline_xattr_size_btrfs, OPT_U32, 2048)
1216 OPTION(filestore_max_inline_xattr_size_other, OPT_U32, 512)
1217
1218 // for more than filestore_max_inline_xattrs attrs
1219 OPTION(filestore_max_inline_xattrs, OPT_U32, 0) //Override
1220 OPTION(filestore_max_inline_xattrs_xfs, OPT_U32, 10)
1221 OPTION(filestore_max_inline_xattrs_btrfs, OPT_U32, 10)
1222 OPTION(filestore_max_inline_xattrs_other, OPT_U32, 2)
1223
1224 // max xattr value size
1225 OPTION(filestore_max_xattr_value_size, OPT_U32, 0) //Override
1226 OPTION(filestore_max_xattr_value_size_xfs, OPT_U32, 64<<10)
1227 OPTION(filestore_max_xattr_value_size_btrfs, OPT_U32, 64<<10)
1228 // ext4 allows 4k xattrs total including some smallish extra fields and the
1229 // keys. We're allowing 2 512 inline attrs in addition some some filestore
1230 // replay attrs. After accounting for those, we still need to fit up to
1231 // two attrs of this value. That means we need this value to be around 1k
1232 // to be safe. This is hacky, but it's not worth complicating the code
1233 // to work around ext4's total xattr limit.
1234 OPTION(filestore_max_xattr_value_size_other, OPT_U32, 1<<10)
1235
1236 OPTION(filestore_sloppy_crc, OPT_BOOL, false) // track sloppy crcs
1237 OPTION(filestore_sloppy_crc_block_size, OPT_INT, 65536)
1238
1239 OPTION(filestore_max_alloc_hint_size, OPT_U64, 1ULL << 20) // bytes
1240
1241 OPTION(filestore_max_sync_interval, OPT_DOUBLE, 5) // seconds
1242 OPTION(filestore_min_sync_interval, OPT_DOUBLE, .01) // seconds
1243 OPTION(filestore_btrfs_snap, OPT_BOOL, true)
1244 OPTION(filestore_btrfs_clone_range, OPT_BOOL, true)
1245 OPTION(filestore_zfs_snap, OPT_BOOL, false) // zfsonlinux is still unstable
1246 OPTION(filestore_fsync_flushes_journal_data, OPT_BOOL, false)
1247 OPTION(filestore_fiemap, OPT_BOOL, false) // (try to) use fiemap
1248 OPTION(filestore_punch_hole, OPT_BOOL, false)
1249 OPTION(filestore_seek_data_hole, OPT_BOOL, false) // (try to) use seek_data/hole
1250 OPTION(filestore_splice, OPT_BOOL, false)
1251 OPTION(filestore_fadvise, OPT_BOOL, true)
1252 //collect device partition information for management application to use
1253 OPTION(filestore_collect_device_partition_information, OPT_BOOL, true)
1254
1255 // (try to) use extsize for alloc hint NOTE: extsize seems to trigger
1256 // data corruption in xfs prior to kernel 3.5. filestore will
1257 // implicity disable this if it cannot confirm the kernel is newer
1258 // than that.
1259 // NOTE: This option involves a tradeoff: When disabled, fragmentation is
1260 // worse, but large sequential writes are faster. When enabled, large
1261 // sequential writes are slower, but fragmentation is reduced.
1262 OPTION(filestore_xfs_extsize, OPT_BOOL, false)
1263
1264 OPTION(filestore_journal_parallel, OPT_BOOL, false)
1265 OPTION(filestore_journal_writeahead, OPT_BOOL, false)
1266 OPTION(filestore_journal_trailing, OPT_BOOL, false)
1267 OPTION(filestore_queue_max_ops, OPT_U64, 50)
1268 OPTION(filestore_queue_max_bytes, OPT_U64, 100 << 20)
1269
1270 OPTION(filestore_caller_concurrency, OPT_INT, 10)
1271
1272 /// Expected filestore throughput in B/s
1273 OPTION(filestore_expected_throughput_bytes, OPT_DOUBLE, 200 << 20)
1274 /// Expected filestore throughput in ops/s
1275 OPTION(filestore_expected_throughput_ops, OPT_DOUBLE, 200)
1276
1277 /// Filestore max delay multiple. Defaults to 0 (disabled)
1278 OPTION(filestore_queue_max_delay_multiple, OPT_DOUBLE, 0)
1279 /// Filestore high delay multiple. Defaults to 0 (disabled)
1280 OPTION(filestore_queue_high_delay_multiple, OPT_DOUBLE, 0)
1281
1282 /// Use above to inject delays intended to keep the op queue between low and high
1283 OPTION(filestore_queue_low_threshhold, OPT_DOUBLE, 0.3)
1284 OPTION(filestore_queue_high_threshhold, OPT_DOUBLE, 0.9)
1285
1286 OPTION(filestore_op_threads, OPT_INT, 2)
1287 OPTION(filestore_op_thread_timeout, OPT_INT, 60)
1288 OPTION(filestore_op_thread_suicide_timeout, OPT_INT, 180)
1289 OPTION(filestore_commit_timeout, OPT_FLOAT, 600)
1290 OPTION(filestore_fiemap_threshold, OPT_INT, 4096)
1291 OPTION(filestore_merge_threshold, OPT_INT, 10)
1292 OPTION(filestore_split_multiple, OPT_INT, 2)
1293 OPTION(filestore_update_to, OPT_INT, 1000)
1294 OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor
1295 OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size
1296 OPTION(filestore_fd_cache_shards, OPT_INT, 16) // FD number of shards
1297 OPTION(filestore_ondisk_finisher_threads, OPT_INT, 1)
1298 OPTION(filestore_apply_finisher_threads, OPT_INT, 1)
1299 OPTION(filestore_dump_file, OPT_STR, "") // file onto which store transaction dumps
1300 OPTION(filestore_kill_at, OPT_INT, 0) // inject a failure at the n'th opportunity
1301 OPTION(filestore_inject_stall, OPT_INT, 0) // artificially stall for N seconds in op queue thread
1302 OPTION(filestore_fail_eio, OPT_BOOL, true) // fail/crash on EIO
1303 OPTION(filestore_debug_verify_split, OPT_BOOL, false)
1304 OPTION(journal_dio, OPT_BOOL, true)
1305 OPTION(journal_aio, OPT_BOOL, true)
1306 OPTION(journal_force_aio, OPT_BOOL, false)
1307 OPTION(journal_block_size, OPT_INT, 4096)
1308
1309 // max bytes to search ahead in journal searching for corruption
1310 OPTION(journal_max_corrupt_search, OPT_U64, 10<<20)
1311 OPTION(journal_block_align, OPT_BOOL, true)
1312 OPTION(journal_write_header_frequency, OPT_U64, 0)
1313 OPTION(journal_max_write_bytes, OPT_INT, 10 << 20)
1314 OPTION(journal_max_write_entries, OPT_INT, 100)
1315
1316 /// Target range for journal fullness
1317 OPTION(journal_throttle_low_threshhold, OPT_DOUBLE, 0.6)
1318 OPTION(journal_throttle_high_threshhold, OPT_DOUBLE, 0.9)
1319
1320 /// Multiple over expected at high_threshhold. Defaults to 0 (disabled).
1321 OPTION(journal_throttle_high_multiple, OPT_DOUBLE, 0)
1322 /// Multiple over expected at max. Defaults to 0 (disabled).
1323 OPTION(journal_throttle_max_multiple, OPT_DOUBLE, 0)
1324
1325 OPTION(journal_align_min_size, OPT_INT, 64 << 10) // align data payloads >= this.
1326 OPTION(journal_replay_from, OPT_INT, 0)
1327 OPTION(journal_zero_on_create, OPT_BOOL, false)
1328 OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corrupt
1329 OPTION(journal_discard, OPT_BOOL, false) //using ssd disk as journal, whether support discard nouse journal-data.
1330
1331 OPTION(fio_dir, OPT_STR, "/tmp/fio") // fio data directory for fio-objectstore
1332
1333 OPTION(rados_mon_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from the monitor before returning an error from a rados operation. 0 means on limit.
1334 OPTION(rados_osd_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from osds before returning an error from a rados operation. 0 means no limit.
1335 OPTION(rados_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1336
1337 OPTION(rbd_op_threads, OPT_INT, 1)
1338 OPTION(rbd_op_thread_timeout, OPT_INT, 60)
1339 OPTION(rbd_non_blocking_aio, OPT_BOOL, true) // process AIO ops from a worker thread to prevent blocking
1340 OPTION(rbd_cache, OPT_BOOL, true) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0)
1341 OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, true) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe
1342 OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes
1343 OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching
1344 OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
1345 OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts
1346 OPTION(rbd_cache_max_dirty_object, OPT_INT, 0) // dirty limit for objects - set to 0 for auto calculate from rbd_cache_size
1347 OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false)
1348 OPTION(rbd_concurrent_management_ops, OPT_INT, 10) // how many operations can be in flight for a management operation like deleting or resizing an image
1349 OPTION(rbd_balance_snap_reads, OPT_BOOL, false)
1350 OPTION(rbd_localize_snap_reads, OPT_BOOL, false)
1351 OPTION(rbd_balance_parent_reads, OPT_BOOL, false)
1352 OPTION(rbd_localize_parent_reads, OPT_BOOL, true)
1353 OPTION(rbd_readahead_trigger_requests, OPT_INT, 10) // number of sequential requests necessary to trigger readahead
1354 OPTION(rbd_readahead_max_bytes, OPT_LONGLONG, 512 * 1024) // set to 0 to disable readahead
1355 OPTION(rbd_readahead_disable_after_bytes, OPT_LONGLONG, 50 * 1024 * 1024) // how many bytes are read in total before readahead is disabled
1356 OPTION(rbd_clone_copy_on_read, OPT_BOOL, false)
1357 OPTION(rbd_blacklist_on_break_lock, OPT_BOOL, true) // whether to blacklist clients whose lock was broken
1358 OPTION(rbd_blacklist_expire_seconds, OPT_INT, 0) // number of seconds to blacklist - set to 0 for OSD default
1359 OPTION(rbd_request_timed_out_seconds, OPT_INT, 30) // number of seconds before maint request times out
1360 OPTION(rbd_skip_partial_discard, OPT_BOOL, false) // when trying to discard a range inside an object, set to true to skip zeroing the range.
1361 OPTION(rbd_enable_alloc_hint, OPT_BOOL, true) // when writing a object, it will issue a hint to osd backend to indicate the expected size object need
1362 OPTION(rbd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1363 OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be validated for RBD compatibility
1364 OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated
1365 OPTION(rbd_auto_exclusive_lock_until_manual_request, OPT_BOOL, true) // whether to automatically acquire/release exclusive lock until it is explicitly requested, i.e. before we know the user of librbd is properly using the lock API
1366 OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy
1367 OPTION(rbd_mirroring_replay_delay, OPT_INT, 0) // time-delay in seconds for rbd-mirror asynchronous replication
1368
1369 /*
1370 * The following options change the behavior for librbd's image creation methods that
1371 * don't require all of the parameters. These are provided so that older programs
1372 * can take advantage of newer features without being rewritten to use new versions
1373 * of the image creation functions.
1374 *
1375 * rbd_create()/RBD::create() are affected by all of these options.
1376 *
1377 * rbd_create2()/RBD::create2() and rbd_clone()/RBD::clone() are affected by:
1378 * - rbd_default_order
1379 * - rbd_default_stripe_count
1380 * - rbd_default_stripe_size
1381 *
1382 * rbd_create3()/RBD::create3() and rbd_clone2/RBD::clone2() are only
1383 * affected by rbd_default_order.
1384 */
1385 OPTION(rbd_default_format, OPT_INT, 2)
1386 OPTION(rbd_default_order, OPT_INT, 22)
1387 OPTION(rbd_default_stripe_count, OPT_U64, 0) // changing requires stripingv2 feature
1388 OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature
1389 OPTION(rbd_default_data_pool, OPT_STR, "") // optional default pool for storing image data blocks
1390
1391 /**
1392 * RBD features are only applicable for v2 images. This setting accepts either
1393 * an integer bitmask value or comma-delimited string of RBD feature names.
1394 * This setting is always internally stored as an integer bitmask value. The
1395 * mapping between feature bitmask value and feature name is as follows:
1396 *
1397 * +1 -> layering
1398 * +2 -> striping
1399 * +4 -> exclusive-lock
1400 * +8 -> object-map
1401 * +16 -> fast-diff
1402 * +32 -> deep-flatten
1403 * +64 -> journaling
1404 * +128 -> data-pool
1405 */
1406 SAFE_OPTION(rbd_default_features, OPT_STR, "layering,exclusive-lock,object-map,fast-diff,deep-flatten")
1407 OPTION_VALIDATOR(rbd_default_features)
1408
1409 OPTION(rbd_default_map_options, OPT_STR, "") // default rbd map -o / --options
1410
1411 /**
1412 * RBD journal options.
1413 */
1414 OPTION(rbd_journal_order, OPT_U32, 24) // bits to shift to compute journal object max size, between 12 and 64
1415 OPTION(rbd_journal_splay_width, OPT_U32, 4) // number of active journal objects
1416 OPTION(rbd_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1417 OPTION(rbd_journal_object_flush_interval, OPT_INT, 0) // maximum number of pending commits per journal object
1418 OPTION(rbd_journal_object_flush_bytes, OPT_INT, 0) // maximum number of pending bytes per journal object
1419 OPTION(rbd_journal_object_flush_age, OPT_DOUBLE, 0) // maximum age (in seconds) for pending commits
1420 OPTION(rbd_journal_pool, OPT_STR, "") // pool for journal objects
1421 OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload size before splitting
1422 OPTION(rbd_journal_max_concurrent_object_sets, OPT_INT, 0) // maximum number of object sets a journal client can be behind before it is automatically unregistered
1423
1424 /**
1425 * RBD Mirror options
1426 */
1427 OPTION(rbd_mirror_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1428 OPTION(rbd_mirror_journal_poll_age, OPT_DOUBLE, 5) // maximum age (in seconds) between successive journal polls
1429 OPTION(rbd_mirror_journal_max_fetch_bytes, OPT_U32, 32768) // maximum bytes to read from each journal data object per fetch
1430 OPTION(rbd_mirror_sync_point_update_age, OPT_DOUBLE, 30) // number of seconds between each update of the image sync point object number
1431 OPTION(rbd_mirror_concurrent_image_syncs, OPT_U32, 5) // maximum number of image syncs in parallel
1432 OPTION(rbd_mirror_pool_replayers_refresh_interval, OPT_INT, 30) // interval to refresh peers in rbd-mirror daemon
1433 OPTION(rbd_mirror_delete_retry_interval, OPT_DOUBLE, 30) // interval to check and retry the failed requests in deleter
1434 OPTION(rbd_mirror_image_state_check_interval, OPT_INT, 30) // interval to get images from pool watcher and set sources in replayer
1435 OPTION(rbd_mirror_leader_heartbeat_interval, OPT_INT, 5) // interval (in seconds) between mirror leader heartbeats
1436 OPTION(rbd_mirror_leader_max_missed_heartbeats, OPT_INT, 2) // number of missed heartbeats for non-lock owner to attempt to acquire lock
1437 OPTION(rbd_mirror_leader_max_acquire_attempts_before_break, OPT_INT, 3) // number of failed attempts to acquire lock after missing heartbeats before breaking lock
1438
1439 OPTION(nss_db_path, OPT_STR, "") // path to nss db
1440
1441
1442 OPTION(rgw_max_chunk_size, OPT_INT, 4 * 1024 * 1024)
1443 OPTION(rgw_put_obj_min_window_size, OPT_INT, 16 * 1024 * 1024)
1444 OPTION(rgw_put_obj_max_window_size, OPT_INT, 64 * 1024 * 1024)
1445 OPTION(rgw_max_put_size, OPT_U64, 5ULL*1024*1024*1024)
1446 OPTION(rgw_max_put_param_size, OPT_U64, 1 * 1024 * 1024) // max input size for PUT requests accepting json/xml params
1447
1448 /**
1449 * override max bucket index shards in zone configuration (if not zero)
1450 *
1451 * Represents the number of shards for the bucket index object, a value of zero
1452 * indicates there is no sharding. By default (no sharding, the name of the object
1453 * is '.dir.{marker}', with sharding, the name is '.dir.{markder}.{sharding_id}',
1454 * sharding_id is zero-based value. It is not recommended to set a too large value
1455 * (e.g. thousand) as it increases the cost for bucket listing.
1456 */
1457 OPTION(rgw_override_bucket_index_max_shards, OPT_U32, 0)
1458
1459 /**
1460 * Represents the maximum AIO pending requests for the bucket index object shards.
1461 */
1462 OPTION(rgw_bucket_index_max_aio, OPT_U32, 8)
1463
1464 /**
1465 * whether or not the quota/gc threads should be started
1466 */
1467 OPTION(rgw_enable_quota_threads, OPT_BOOL, true)
1468 OPTION(rgw_enable_gc_threads, OPT_BOOL, true)
1469 OPTION(rgw_enable_lc_threads, OPT_BOOL, true)
1470
1471
1472 OPTION(rgw_data, OPT_STR, "/var/lib/ceph/radosgw/$cluster-$id")
1473 OPTION(rgw_enable_apis, OPT_STR, "s3, s3website, swift, swift_auth, admin")
1474 OPTION(rgw_cache_enabled, OPT_BOOL, true) // rgw cache enabled
1475 OPTION(rgw_cache_lru_size, OPT_INT, 10000) // num of entries in rgw cache
1476 OPTION(rgw_socket_path, OPT_STR, "") // path to unix domain socket, if not specified, rgw will not run as external fcgi
1477 OPTION(rgw_host, OPT_STR, "") // host for radosgw, can be an IP, default is 0.0.0.0
1478 OPTION(rgw_port, OPT_STR, "") // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi
1479 OPTION(rgw_dns_name, OPT_STR, "") // hostname suffix on buckets
1480 OPTION(rgw_dns_s3website_name, OPT_STR, "") // hostname suffix on buckets for s3-website endpoint
1481 OPTION(rgw_content_length_compat, OPT_BOOL, false) // Check both HTTP_CONTENT_LENGTH and CONTENT_LENGTH in fcgi env
1482 OPTION(rgw_lifecycle_work_time, OPT_STR, "00:00-06:00") //job process lc at 00:00-06:00s
1483 OPTION(rgw_lc_lock_max_time, OPT_INT, 60) // total run time for a single lc processor work
1484 OPTION(rgw_lc_max_objs, OPT_INT, 32)
1485 OPTION(rgw_lc_debug_interval, OPT_INT, -1) // Debug run interval, in seconds
1486 OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request
1487 OPTION(rgw_request_uri, OPT_STR, "") // alternative value for REQUEST_URI if not set in request
1488 OPTION(rgw_swift_url, OPT_STR, "") // the swift url, being published by the internal swift auth
1489 OPTION(rgw_swift_url_prefix, OPT_STR, "swift") // entry point for which a url is considered a swift url
1490 OPTION(rgw_swift_auth_url, OPT_STR, "") // default URL to go and verify tokens for v1 auth (if not using internal swift auth)
1491 OPTION(rgw_swift_auth_entry, OPT_STR, "auth") // entry point for which a url is considered a swift auth url
1492 OPTION(rgw_swift_tenant_name, OPT_STR, "") // tenant name to use for swift access
1493 OPTION(rgw_swift_account_in_url, OPT_BOOL, false) // assume that URL always contain the account (aka tenant) part
1494 OPTION(rgw_swift_enforce_content_length, OPT_BOOL, false) // enforce generation of Content-Length even in cost of performance or scalability
1495 OPTION(rgw_keystone_url, OPT_STR, "") // url for keystone server
1496 OPTION(rgw_keystone_admin_token, OPT_STR, "") // keystone admin token (shared secret)
1497 OPTION(rgw_keystone_admin_user, OPT_STR, "") // keystone admin user name
1498 OPTION(rgw_keystone_admin_password, OPT_STR, "") // keystone admin user password
1499 OPTION(rgw_keystone_admin_tenant, OPT_STR, "") // keystone admin user tenant (for keystone v2.0)
1500 OPTION(rgw_keystone_admin_project, OPT_STR, "") // keystone admin user project (for keystone v3)
1501 OPTION(rgw_keystone_admin_domain, OPT_STR, "") // keystone admin user domain
1502 OPTION(rgw_keystone_barbican_user, OPT_STR, "") // keystone user to access barbican secrets
1503 OPTION(rgw_keystone_barbican_password, OPT_STR, "") // keystone password for barbican user
1504 OPTION(rgw_keystone_barbican_tenant, OPT_STR, "") // keystone barbican user tenant (for keystone v2.0)
1505 OPTION(rgw_keystone_barbican_project, OPT_STR, "") // keystone barbican user project (for keystone v3)
1506 OPTION(rgw_keystone_barbican_domain, OPT_STR, "") // keystone barbican user domain
1507 OPTION(rgw_keystone_api_version, OPT_INT, 2) // Version of Keystone API to use (2 or 3)
1508 OPTION(rgw_keystone_accepted_roles, OPT_STR, "Member, admin") // roles required to serve requests
1509 OPTION(rgw_keystone_accepted_admin_roles, OPT_STR, "") // list of roles allowing an user to gain admin privileges
1510 OPTION(rgw_keystone_token_cache_size, OPT_INT, 10000) // max number of entries in keystone token cache
1511 OPTION(rgw_keystone_revocation_interval, OPT_INT, 15 * 60) // seconds between tokens revocation check
1512 OPTION(rgw_keystone_verify_ssl, OPT_BOOL, true) // should we try to verify keystone's ssl
1513 OPTION(rgw_keystone_implicit_tenants, OPT_BOOL, false) // create new users in their own tenants of the same name
1514 OPTION(rgw_cross_domain_policy, OPT_STR, "<allow-access-from domain=\"*\" secure=\"false\" />")
1515 OPTION(rgw_healthcheck_disabling_path, OPT_STR, "") // path that existence causes the healthcheck to respond 503
1516 OPTION(rgw_s3_auth_use_rados, OPT_BOOL, true) // should we try to use the internal credentials for s3?
1517 OPTION(rgw_s3_auth_use_keystone, OPT_BOOL, false) // should we try to use keystone for s3?
1518 OPTION(rgw_s3_auth_aws4_force_boto2_compat, OPT_BOOL, true) // force aws4 auth boto2 compatibility
1519 OPTION(rgw_barbican_url, OPT_STR, "") // url for barbican server
1520
1521 /* OpenLDAP-style LDAP parameter strings */
1522 /* rgw_ldap_uri space-separated list of LDAP servers in URI format */
1523 OPTION(rgw_ldap_uri, OPT_STR, "ldaps://<ldap.your.domain>")
1524 /* rgw_ldap_binddn LDAP entry RGW will bind with (user match) */
1525 OPTION(rgw_ldap_binddn, OPT_STR, "uid=admin,cn=users,dc=example,dc=com")
1526 /* rgw_ldap_searchdn LDAP search base (basedn) */
1527 OPTION(rgw_ldap_searchdn, OPT_STR, "cn=users,cn=accounts,dc=example,dc=com")
1528 /* rgw_ldap_dnattr LDAP attribute containing RGW user names (to form binddns)*/
1529 OPTION(rgw_ldap_dnattr, OPT_STR, "uid")
1530 /* rgw_ldap_secret file containing credentials for rgw_ldap_binddn */
1531 OPTION(rgw_ldap_secret, OPT_STR, "/etc/openldap/secret")
1532 /* rgw_s3_auth_use_ldap use LDAP for RGW auth? */
1533 OPTION(rgw_s3_auth_use_ldap, OPT_BOOL, false)
1534 /* rgw_ldap_searchfilter LDAP search filter */
1535 OPTION(rgw_ldap_searchfilter, OPT_STR, "")
1536
1537 OPTION(rgw_admin_entry, OPT_STR, "admin") // entry point for which a url is considered an admin request
1538 OPTION(rgw_enforce_swift_acls, OPT_BOOL, true)
1539 OPTION(rgw_swift_token_expiration, OPT_INT, 24 * 3600) // time in seconds for swift token expiration
1540 OPTION(rgw_print_continue, OPT_BOOL, true) // enable if 100-Continue works
1541 OPTION(rgw_print_prohibited_content_length, OPT_BOOL, false) // violate RFC 7230 and send Content-Length in 204 and 304
1542 OPTION(rgw_remote_addr_param, OPT_STR, "REMOTE_ADDR") // e.g. X-Forwarded-For, if you have a reverse proxy
1543 OPTION(rgw_op_thread_timeout, OPT_INT, 10*60)
1544 OPTION(rgw_op_thread_suicide_timeout, OPT_INT, 0)
1545 OPTION(rgw_thread_pool_size, OPT_INT, 100)
1546 OPTION(rgw_num_control_oids, OPT_INT, 8)
1547 OPTION(rgw_num_rados_handles, OPT_U32, 1)
1548
1549 /* The following are tunables for caches of RGW NFS (and other file
1550 * client) objects.
1551 *
1552 * The file handle cache is a partitioned hash table
1553 * (fhcache_partitions), each with a closed hash part and backing
1554 * b-tree mapping. The number of partions is expected to be a small
1555 * prime, the cache size something larger but less than 5K, the total
1556 * size of the cache is n_part * cache_size.
1557 */
1558 OPTION(rgw_nfs_lru_lanes, OPT_INT, 5)
1559 OPTION(rgw_nfs_lru_lane_hiwat, OPT_INT, 911)
1560 OPTION(rgw_nfs_fhcache_partitions, OPT_INT, 3)
1561 OPTION(rgw_nfs_fhcache_size, OPT_INT, 2017) /* 3*2017=6051 */
1562 OPTION(rgw_nfs_namespace_expire_secs, OPT_INT, 300) /* namespace invalidate
1563 * timer */
1564 OPTION(rgw_nfs_max_gc, OPT_INT, 300) /* max gc events per cycle */
1565 OPTION(rgw_nfs_write_completion_interval_s, OPT_INT, 10) /* stateless (V3)
1566 * commit
1567 * delay */
1568
1569 OPTION(rgw_zone, OPT_STR, "") // zone name
1570 OPTION(rgw_zone_root_pool, OPT_STR, ".rgw.root") // pool where zone specific info is stored
1571 OPTION(rgw_default_zone_info_oid, OPT_STR, "default.zone") // oid where default zone info is stored
1572 OPTION(rgw_region, OPT_STR, "") // region name
1573 OPTION(rgw_region_root_pool, OPT_STR, ".rgw.root") // pool where all region info is stored
1574 OPTION(rgw_default_region_info_oid, OPT_STR, "default.region") // oid where default region info is stored
1575 OPTION(rgw_zonegroup, OPT_STR, "") // zone group name
1576 OPTION(rgw_zonegroup_root_pool, OPT_STR, ".rgw.root") // pool where all zone group info is stored
1577 OPTION(rgw_default_zonegroup_info_oid, OPT_STR, "default.zonegroup") // oid where default zone group info is stored
1578 OPTION(rgw_realm, OPT_STR, "") // realm name
1579 OPTION(rgw_realm_root_pool, OPT_STR, ".rgw.root") // pool where all realm info is stored
1580 OPTION(rgw_default_realm_info_oid, OPT_STR, "default.realm") // oid where default realm info is stored
1581 OPTION(rgw_period_root_pool, OPT_STR, ".rgw.root") // pool where all period info is stored
1582 OPTION(rgw_period_latest_epoch_info_oid, OPT_STR, ".latest_epoch") // oid where current period info is stored
1583 OPTION(rgw_log_nonexistent_bucket, OPT_BOOL, false)
1584 OPTION(rgw_log_object_name, OPT_STR, "%Y-%m-%d-%H-%i-%n") // man date to see codes (a subset are supported)
1585 OPTION(rgw_log_object_name_utc, OPT_BOOL, false)
1586 OPTION(rgw_usage_max_shards, OPT_INT, 32)
1587 OPTION(rgw_usage_max_user_shards, OPT_INT, 1)
1588 OPTION(rgw_enable_ops_log, OPT_BOOL, false) // enable logging every rgw operation
1589 OPTION(rgw_enable_usage_log, OPT_BOOL, false) // enable logging bandwidth usage
1590 OPTION(rgw_ops_log_rados, OPT_BOOL, true) // whether ops log should go to rados
1591 OPTION(rgw_ops_log_socket_path, OPT_STR, "") // path to unix domain socket where ops log can go
1592 OPTION(rgw_ops_log_data_backlog, OPT_INT, 5 << 20) // max data backlog for ops log
1593 OPTION(rgw_fcgi_socket_backlog, OPT_INT, 1024) // socket backlog for fcgi
1594 OPTION(rgw_usage_log_flush_threshold, OPT_INT, 1024) // threshold to flush pending log data
1595 OPTION(rgw_usage_log_tick_interval, OPT_INT, 30) // flush pending log data every X seconds
1596 OPTION(rgw_intent_log_object_name, OPT_STR, "%Y-%m-%d-%i-%n") // man date to see codes (a subset are supported)
1597 OPTION(rgw_intent_log_object_name_utc, OPT_BOOL, false)
1598 OPTION(rgw_init_timeout, OPT_INT, 300) // time in seconds
1599 OPTION(rgw_mime_types_file, OPT_STR, "/etc/mime.types")
1600 OPTION(rgw_gc_max_objs, OPT_INT, 32)
1601 OPTION(rgw_gc_obj_min_wait, OPT_INT, 2 * 3600) // wait time before object may be handled by gc
1602 OPTION(rgw_gc_processor_max_time, OPT_INT, 3600) // total run time for a single gc processor work
1603 OPTION(rgw_gc_processor_period, OPT_INT, 3600) // gc processor cycle time
1604 OPTION(rgw_s3_success_create_obj_status, OPT_INT, 0) // alternative success status response for create-obj (0 - default)
1605 OPTION(rgw_resolve_cname, OPT_BOOL, false) // should rgw try to resolve hostname as a dns cname record
1606 OPTION(rgw_obj_stripe_size, OPT_INT, 4 << 20)
1607 OPTION(rgw_extended_http_attrs, OPT_STR, "") // list of extended attrs that can be set on objects (beyond the default)
1608 OPTION(rgw_exit_timeout_secs, OPT_INT, 120) // how many seconds to wait for process to go down before exiting unconditionally
1609 OPTION(rgw_get_obj_window_size, OPT_INT, 16 << 20) // window size in bytes for single get obj request
1610 OPTION(rgw_get_obj_max_req_size, OPT_INT, 4 << 20) // max length of a single get obj rados op
1611 OPTION(rgw_relaxed_s3_bucket_names, OPT_BOOL, false) // enable relaxed bucket name rules for US region buckets
1612 OPTION(rgw_defer_to_bucket_acls, OPT_STR, "") // if the user has bucket perms, use those before key perms (recurse and full_control)
1613 OPTION(rgw_list_buckets_max_chunk, OPT_INT, 1000) // max buckets to retrieve in a single op when listing user buckets
1614 OPTION(rgw_md_log_max_shards, OPT_INT, 64) // max shards for metadata log
1615 OPTION(rgw_num_zone_opstate_shards, OPT_INT, 128) // max shards for keeping inter-region copy progress info
1616 OPTION(rgw_opstate_ratelimit_sec, OPT_INT, 30) // min time between opstate updates on a single upload (0 for disabling ratelimit)
1617 OPTION(rgw_curl_wait_timeout_ms, OPT_INT, 1000) // timeout for certain curl calls
1618 OPTION(rgw_copy_obj_progress, OPT_BOOL, true) // should dump progress during long copy operations?
1619 OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT, 1024 * 1024) // min bytes between copy progress output
1620 OPTION(rgw_obj_tombstone_cache_size, OPT_INT, 1000) // how many objects in tombstone cache, which is used in multi-zone sync to keep
1621 // track of removed objects' mtime
1622
1623 OPTION(rgw_data_log_window, OPT_INT, 30) // data log entries window (in seconds)
1624 OPTION(rgw_data_log_changes_size, OPT_INT, 1000) // number of in-memory entries to hold for data changes log
1625 OPTION(rgw_data_log_num_shards, OPT_INT, 128) // number of objects to keep data changes log on
1626 OPTION(rgw_data_log_obj_prefix, OPT_STR, "data_log") //
1627 OPTION(rgw_replica_log_obj_prefix, OPT_STR, "replica_log") //
1628
1629 OPTION(rgw_bucket_quota_ttl, OPT_INT, 600) // time for cached bucket stats to be cached within rgw instance
1630 OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE, 0.95) // threshold from which we don't rely on cached info for quota decisions
1631 OPTION(rgw_bucket_quota_cache_size, OPT_INT, 10000) // number of entries in bucket quota cache
1632 OPTION(rgw_bucket_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1633 OPTION(rgw_bucket_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1634
1635 OPTION(rgw_expose_bucket, OPT_BOOL, false) // Return the bucket name in the 'Bucket' response header
1636
1637 OPTION(rgw_frontends, OPT_STR, "fastcgi, civetweb port=7480") // rgw front ends
1638
1639 OPTION(rgw_user_quota_bucket_sync_interval, OPT_INT, 180) // time period for accumulating modified buckets before syncing stats
1640 OPTION(rgw_user_quota_sync_interval, OPT_INT, 3600 * 24) // time period for accumulating modified buckets before syncing entire user stats
1641 OPTION(rgw_user_quota_sync_idle_users, OPT_BOOL, false) // whether stats for idle users be fully synced
1642 OPTION(rgw_user_quota_sync_wait_time, OPT_INT, 3600 * 24) // min time between two full stats sync for non-idle users
1643 OPTION(rgw_user_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1644 OPTION(rgw_user_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1645
1646 OPTION(rgw_multipart_min_part_size, OPT_INT, 5 * 1024 * 1024) // min size for each part (except for last one) in multipart upload
1647 OPTION(rgw_multipart_part_upload_limit, OPT_INT, 10000) // parts limit in multipart upload
1648
1649 OPTION(rgw_max_slo_entries, OPT_INT, 1000) // default number of max entries in slo
1650
1651 OPTION(rgw_olh_pending_timeout_sec, OPT_INT, 3600) // time until we retire a pending olh change
1652 OPTION(rgw_user_max_buckets, OPT_INT, 1000) // global option to set max buckets count for all user
1653
1654 OPTION(rgw_objexp_gc_interval, OPT_U32, 60 * 10) // maximum time between round of expired objects garbage collecting
1655 OPTION(rgw_objexp_time_step, OPT_U32, 4096) // number of seconds for rounding the timestamps
1656 OPTION(rgw_objexp_hints_num_shards, OPT_U32, 127) // maximum number of parts in which the hint index is stored in
1657 OPTION(rgw_objexp_chunk_size, OPT_U32, 100) // maximum number of entries in a single operation when processing objexp data
1658
1659 OPTION(rgw_enable_static_website, OPT_BOOL, false) // enable static website feature
1660 OPTION(rgw_log_http_headers, OPT_STR, "" ) // list of HTTP headers to log when seen, ignores case (e.g., http_x_forwarded_for
1661
1662 OPTION(rgw_num_async_rados_threads, OPT_INT, 32) // num of threads to use for async rados operations
1663 OPTION(rgw_md_notify_interval_msec, OPT_INT, 200) // metadata changes notification interval to followers
1664 OPTION(rgw_run_sync_thread, OPT_BOOL, true) // whether radosgw (not radosgw-admin) spawns the sync thread
1665 OPTION(rgw_sync_lease_period, OPT_INT, 120) // time in second for lease that rgw takes on a specific log (or log shard)
1666 OPTION(rgw_sync_log_trim_interval, OPT_INT, 1200) // time in seconds between attempts to trim sync logs
1667
1668 OPTION(rgw_sync_data_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1669 OPTION(rgw_sync_meta_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1670
1671
1672 OPTION(rgw_period_push_interval, OPT_DOUBLE, 2) // seconds to wait before retrying "period push"
1673 OPTION(rgw_period_push_interval_max, OPT_DOUBLE, 30) // maximum interval after exponential backoff
1674
1675 OPTION(rgw_safe_max_objects_per_shard, OPT_INT, 100*1024) // safe max loading
1676 OPTION(rgw_shard_warning_threshold, OPT_DOUBLE, 90) // pct of safe max
1677 // at which to warn
1678
1679 OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled
1680
1681 OPTION(mgr_module_path, OPT_STR, CEPH_PKGLIBDIR "/mgr") // where to load python modules from
1682 OPTION(mgr_modules, OPT_STR, "rest") // Which modules to load
1683 OPTION(mgr_data, OPT_STR, "/var/lib/ceph/mgr/$cluster-$id") // where to find keyring etc
1684 OPTION(mgr_beacon_period, OPT_INT, 5) // How frequently to send beacon
1685 OPTION(mgr_stats_period, OPT_INT, 5) // How frequently to send stats
1686 OPTION(mgr_client_bytes, OPT_U64, 128*1048576) // bytes from clients
1687 OPTION(mgr_client_messages, OPT_U64, 512) // messages from clients
1688 OPTION(mgr_osd_bytes, OPT_U64, 512*1048576) // bytes from osds
1689 OPTION(mgr_osd_messages, OPT_U64, 8192) // messages from osds
1690 OPTION(mgr_mds_bytes, OPT_U64, 128*1048576) // bytes from mdss
1691 OPTION(mgr_mds_messages, OPT_U64, 128) // messages from mdss
1692 OPTION(mgr_mon_bytes, OPT_U64, 128*1048576) // bytes from mons
1693 OPTION(mgr_mon_messages, OPT_U64, 128) // messages from mons
1694
1695 OPTION(mgr_connect_retry_interval, OPT_DOUBLE, 1.0)
1696
1697 OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests
1698 OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover
1699 OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR
1700 OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl
1701 OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects
1702 OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms
1703 // defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg=="
1704 OPTION(rgw_crypt_suppress_logs, OPT_BOOL, true) // suppress logs that might print customer key
1705 OPTION(rgw_list_bucket_min_readahead, OPT_INT, 1000) // minimum number of entries to read from rados for bucket listing
1706
1707 OPTION(rgw_rest_getusage_op_compat, OPT_BOOL, false) // dump description of total stats for s3 GetUsage API
1708
1709 OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter
1710 OPTION(throttler_perf_counter, OPT_BOOL, true) // enable/disable throttler perf counter
1711
1712 /* The following are tunables for torrent data */
1713 OPTION(rgw_torrent_flag, OPT_BOOL, false) // produce torrent function flag
1714 OPTION(rgw_torrent_tracker, OPT_STR, "") // torrent field annouce and annouce list
1715 OPTION(rgw_torrent_createby, OPT_STR, "") // torrent field created by
1716 OPTION(rgw_torrent_comment, OPT_STR, "") // torrent field comment
1717 OPTION(rgw_torrent_encoding, OPT_STR, "") // torrent field encoding
1718 OPTION(rgw_torrent_origin, OPT_STR, "") // torrent origin
1719 OPTION(rgw_torrent_sha_unit, OPT_INT, 512*1024) // torrent field piece length 512K
1720
1721 OPTION(event_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1722
1723 // This will be set to true when it is safe to start threads.
1724 // Once it is true, it will never change.
1725 OPTION(internal_safe_to_start_threads, OPT_BOOL, false)
1726
1727 OPTION(debug_deliberately_leak_memory, OPT_BOOL, false)
1728
1729 OPTION(rgw_swift_custom_header, OPT_STR, "") // option to enable swift custom headers