]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/config_opts.h
update sources to v12.1.1
[ceph.git] / ceph / src / common / config_opts.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 /* note: no header guard */
16 OPTION(host, OPT_STR, "") // "" means that ceph will use short hostname
17 OPTION(fsid, OPT_UUID, uuid_d())
18 OPTION(public_addr, OPT_ADDR, entity_addr_t())
19 OPTION(public_bind_addr, OPT_ADDR, entity_addr_t())
20 OPTION(cluster_addr, OPT_ADDR, entity_addr_t())
21 OPTION(public_network, OPT_STR, "")
22 OPTION(cluster_network, OPT_STR, "")
23 OPTION(num_client, OPT_INT, 1)
24 OPTION(monmap, OPT_STR, "")
25 OPTION(mon_host, OPT_STR, "")
26 OPTION(mon_dns_srv_name, OPT_STR, "ceph-mon")
27 OPTION(lockdep, OPT_BOOL, false)
28 OPTION(lockdep_force_backtrace, OPT_BOOL, false) // always gather current backtrace at every lock
29 OPTION(run_dir, OPT_STR, "/var/run/ceph") // the "/var/run/ceph" dir, created on daemon startup
30 OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit()
31 OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755"
32
33 OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit()
34 OPTION(setuser, OPT_STR, "") // uid or user name
35 OPTION(setgroup, OPT_STR, "") // gid or group name
36 OPTION(setuser_match_path, OPT_STR, "") // make setuser/group conditional on this path matching ownership
37 OPTION(pid_file, OPT_STR, "") // default changed by common_preinit()
38 OPTION(chdir, OPT_STR, "/")
39 OPTION(max_open_files, OPT_LONGLONG, 0)
40 OPTION(restapi_log_level, OPT_STR, "") // default set by Python code
41 OPTION(restapi_base_url, OPT_STR, "") // "
42 OPTION(fatal_signal_handlers, OPT_BOOL, true)
43 SAFE_OPTION(erasure_code_dir, OPT_STR, CEPH_PKGLIBDIR"/erasure-code") // default location for erasure-code plugins
44
45 OPTION(log_file, OPT_STR, "/var/log/ceph/$cluster-$name.log") // default changed by common_preinit()
46 OPTION(log_max_new, OPT_INT, 1000) // default changed by common_preinit()
47 OPTION(log_max_recent, OPT_INT, 10000) // default changed by common_preinit()
48 OPTION(log_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
49 OPTION(err_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
50 OPTION(log_to_syslog, OPT_BOOL, false)
51 OPTION(err_to_syslog, OPT_BOOL, false)
52 OPTION(log_flush_on_exit, OPT_BOOL, true) // default changed by common_preinit()
53 OPTION(log_stop_at_utilization, OPT_FLOAT, .97) // stop logging at (near) full
54 OPTION(log_to_graylog, OPT_BOOL, false)
55 OPTION(err_to_graylog, OPT_BOOL, false)
56 OPTION(log_graylog_host, OPT_STR, "127.0.0.1")
57 OPTION(log_graylog_port, OPT_INT, 12201)
58
59 // options will take k/v pairs, or single-item that will be assumed as general
60 // default for all, regardless of channel.
61 // e.g., "info" would be taken as the same as "default=info"
62 // also, "default=daemon audit=local0" would mean
63 // "default all to 'daemon', override 'audit' with 'local0'
64 OPTION(clog_to_monitors, OPT_STR, "default=true")
65 OPTION(clog_to_syslog, OPT_STR, "false")
66 OPTION(clog_to_syslog_level, OPT_STR, "info") // this level and above
67 OPTION(clog_to_syslog_facility, OPT_STR, "default=daemon audit=local0")
68 OPTION(clog_to_graylog, OPT_STR, "false")
69 OPTION(clog_to_graylog_host, OPT_STR, "127.0.0.1")
70 OPTION(clog_to_graylog_port, OPT_STR, "12201")
71
72 OPTION(mon_cluster_log_to_syslog, OPT_STR, "default=false")
73 OPTION(mon_cluster_log_to_syslog_level, OPT_STR, "info") // this level and above
74 OPTION(mon_cluster_log_to_syslog_facility, OPT_STR, "daemon")
75 OPTION(mon_cluster_log_file, OPT_STR,
76 "default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log")
77 OPTION(mon_cluster_log_file_level, OPT_STR, "info")
78 OPTION(mon_cluster_log_to_graylog, OPT_STR, "false")
79 OPTION(mon_cluster_log_to_graylog_host, OPT_STR, "127.0.0.1")
80 OPTION(mon_cluster_log_to_graylog_port, OPT_STR, "12201")
81
82 OPTION(enable_experimental_unrecoverable_data_corrupting_features, OPT_STR, "")
83
84 SAFE_OPTION(plugin_dir, OPT_STR, CEPH_PKGLIBDIR)
85
86 OPTION(xio_trace_mempool, OPT_BOOL, false) // mempool allocation counters
87 OPTION(xio_trace_msgcnt, OPT_BOOL, false) // incoming/outgoing msg counters
88 OPTION(xio_trace_xcon, OPT_BOOL, false) // Xio message encode/decode trace
89 OPTION(xio_queue_depth, OPT_INT, 128) // depth of Accelio msg queue
90 OPTION(xio_mp_min, OPT_INT, 128) // default min mempool size
91 OPTION(xio_mp_max_64, OPT_INT, 65536) // max 64-byte chunks (buffer is 40)
92 OPTION(xio_mp_max_256, OPT_INT, 8192) // max 256-byte chunks
93 OPTION(xio_mp_max_1k, OPT_INT, 8192) // max 1K chunks
94 OPTION(xio_mp_max_page, OPT_INT, 4096) // max 1K chunks
95 OPTION(xio_mp_max_hint, OPT_INT, 4096) // max size-hint chunks
96 OPTION(xio_portal_threads, OPT_INT, 2) // xio portal threads per messenger
97 OPTION(xio_max_conns_per_portal, OPT_INT, 32) // max xio_connections per portal/ctx
98 OPTION(xio_transport_type, OPT_STR, "rdma") // xio transport type: {rdma or tcp}
99 OPTION(xio_max_send_inline, OPT_INT, 512) // xio maximum threshold to send inline
100
101 OPTION(compressor_zlib_isal, OPT_BOOL, false)
102 OPTION(compressor_zlib_level, OPT_INT, 5) //regular zlib compression level, not applicable to isa-l optimized version
103
104 OPTION(async_compressor_enabled, OPT_BOOL, false)
105 OPTION(async_compressor_type, OPT_STR, "snappy")
106 OPTION(async_compressor_threads, OPT_INT, 2)
107 OPTION(async_compressor_thread_timeout, OPT_INT, 5)
108 OPTION(async_compressor_thread_suicide_timeout, OPT_INT, 30)
109
110 OPTION(plugin_crypto_accelerator, OPT_STR, "crypto_isal")
111
112 OPTION(mempool_debug, OPT_BOOL, false)
113
114 DEFAULT_SUBSYS(0, 5)
115 SUBSYS(lockdep, 0, 1)
116 SUBSYS(context, 0, 1)
117 SUBSYS(crush, 1, 1)
118 SUBSYS(mds, 1, 5)
119 SUBSYS(mds_balancer, 1, 5)
120 SUBSYS(mds_locker, 1, 5)
121 SUBSYS(mds_log, 1, 5)
122 SUBSYS(mds_log_expire, 1, 5)
123 SUBSYS(mds_migrator, 1, 5)
124 SUBSYS(buffer, 0, 1)
125 SUBSYS(timer, 0, 1)
126 SUBSYS(filer, 0, 1)
127 SUBSYS(striper, 0, 1)
128 SUBSYS(objecter, 0, 1)
129 SUBSYS(rados, 0, 5)
130 SUBSYS(rbd, 0, 5)
131 SUBSYS(rbd_mirror, 0, 5)
132 SUBSYS(rbd_replay, 0, 5)
133 SUBSYS(journaler, 0, 5)
134 SUBSYS(objectcacher, 0, 5)
135 SUBSYS(client, 0, 5)
136 SUBSYS(osd, 1, 5)
137 SUBSYS(optracker, 0, 5)
138 SUBSYS(objclass, 0, 5)
139 SUBSYS(filestore, 1, 3)
140 SUBSYS(journal, 1, 3)
141 SUBSYS(ms, 0, 5)
142 SUBSYS(mon, 1, 5)
143 SUBSYS(monc, 0, 10)
144 SUBSYS(paxos, 1, 5)
145 SUBSYS(tp, 0, 5)
146 SUBSYS(auth, 1, 5)
147 SUBSYS(crypto, 1, 5)
148 SUBSYS(finisher, 1, 1)
149 SUBSYS(heartbeatmap, 1, 5)
150 SUBSYS(perfcounter, 1, 5)
151 SUBSYS(rgw, 1, 5) // log level for the Rados gateway
152 SUBSYS(civetweb, 1, 10)
153 SUBSYS(javaclient, 1, 5)
154 SUBSYS(asok, 1, 5)
155 SUBSYS(throttle, 1, 1)
156 SUBSYS(refs, 0, 0)
157 SUBSYS(xio, 1, 5)
158 SUBSYS(compressor, 1, 5)
159 SUBSYS(bluestore, 1, 5)
160 SUBSYS(bluefs, 1, 5)
161 SUBSYS(bdev, 1, 3)
162 SUBSYS(kstore, 1, 5)
163 SUBSYS(rocksdb, 4, 5)
164 SUBSYS(leveldb, 4, 5)
165 SUBSYS(memdb, 4, 5)
166 SUBSYS(kinetic, 1, 5)
167 SUBSYS(fuse, 1, 5)
168 SUBSYS(mgr, 1, 5)
169 SUBSYS(mgrc, 1, 5)
170 SUBSYS(dpdk, 1, 5)
171 SUBSYS(eventtrace, 1, 5)
172
173 OPTION(key, OPT_STR, "")
174 OPTION(keyfile, OPT_STR, "")
175 OPTION(keyring, OPT_STR,
176 // default changed by common_preinit() for mds and osd
177 "/etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,"
178 #if defined(__FreeBSD)
179 "/usr/local/etc/ceph/$cluster.$name.keyring,/usr/local/etc/ceph/$cluster.keyring,"
180 "/usr/local/etc/ceph/keyring,/usr/local/etc/ceph/keyring.bin,"
181 #endif
182 )
183 OPTION(heartbeat_interval, OPT_INT, 5)
184 OPTION(heartbeat_file, OPT_STR, "")
185 OPTION(heartbeat_inject_failure, OPT_INT, 0) // force an unhealthy heartbeat for N seconds
186 OPTION(perf, OPT_BOOL, true) // enable internal perf counters
187
188 SAFE_OPTION(ms_type, OPT_STR, "async+posix") // messenger backend. It will be modified in runtime, so use SAFE_OPTION
189 OPTION(ms_public_type, OPT_STR, "") // messenger backend
190 OPTION(ms_cluster_type, OPT_STR, "") // messenger backend
191 OPTION(ms_tcp_nodelay, OPT_BOOL, true)
192 OPTION(ms_tcp_rcvbuf, OPT_INT, 0)
193 OPTION(ms_tcp_prefetch_max_size, OPT_INT, 4096) // max prefetch size, we limit this to avoid extra memcpy
194 OPTION(ms_initial_backoff, OPT_DOUBLE, .2)
195 OPTION(ms_max_backoff, OPT_DOUBLE, 15.0)
196 OPTION(ms_crc_data, OPT_BOOL, true)
197 OPTION(ms_crc_header, OPT_BOOL, true)
198 OPTION(ms_die_on_bad_msg, OPT_BOOL, false)
199 OPTION(ms_die_on_unhandled_msg, OPT_BOOL, false)
200 OPTION(ms_die_on_old_message, OPT_BOOL, false) // assert if we get a dup incoming message and shouldn't have (may be triggered by pre-541cd3c64be0dfa04e8a2df39422e0eb9541a428 code)
201 OPTION(ms_die_on_skipped_message, OPT_BOOL, false) // assert if we skip a seq (kernel client does this intentionally)
202 OPTION(ms_dispatch_throttle_bytes, OPT_U64, 100 << 20)
203 OPTION(ms_bind_ipv6, OPT_BOOL, false)
204 OPTION(ms_bind_port_min, OPT_INT, 6800)
205 OPTION(ms_bind_port_max, OPT_INT, 7300)
206 #if !defined(__FreeBSD__)
207 OPTION(ms_bind_retry_count, OPT_INT, 3) // If binding fails, how many times do we retry to bind
208 OPTION(ms_bind_retry_delay, OPT_INT, 5) // Delay between attemps to bind
209 #else
210 // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
211 OPTION(ms_bind_retry_count, OPT_INT, 6) // If binding fails, how many times do we retry to bind
212 OPTION(ms_bind_retry_delay, OPT_INT, 6) // Delay between attemps to bind
213 #endif
214 OPTION(ms_bind_before_connect, OPT_BOOL, false)
215 OPTION(ms_tcp_listen_backlog, OPT_INT, 512)
216 OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10)
217 OPTION(ms_tcp_read_timeout, OPT_U64, 900)
218 OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216)
219 OPTION(ms_pq_min_cost, OPT_U64, 65536)
220 OPTION(ms_inject_socket_failures, OPT_U64, 0)
221 SAFE_OPTION(ms_inject_delay_type, OPT_STR, "") // "osd mds mon client" allowed
222 OPTION(ms_inject_delay_msg_type, OPT_STR, "") // the type of message to delay, as returned by Message::get_type_name(). This is an additional restriction on the general type filter ms_inject_delay_type.
223 OPTION(ms_inject_delay_max, OPT_DOUBLE, 1) // seconds
224 OPTION(ms_inject_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
225 OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0) // seconds
226 OPTION(ms_dump_on_send, OPT_BOOL, false) // hexdump msg to log on send
227 OPTION(ms_dump_corrupt_message_level, OPT_INT, 1) // debug level to hexdump undecodeable messages at
228 OPTION(ms_async_op_threads, OPT_U64, 3) // number of worker processing threads for async messenger created on init
229 OPTION(ms_async_max_op_threads, OPT_U64, 5) // max number of worker processing threads for async messenger
230 OPTION(ms_async_set_affinity, OPT_BOOL, true)
231 // example: ms_async_affinity_cores = 0,1
232 // The number of coreset is expected to equal to ms_async_op_threads, otherwise
233 // extra op threads will loop ms_async_affinity_cores again.
234 // If ms_async_affinity_cores is empty, all threads will be bind to current running
235 // core
236 OPTION(ms_async_affinity_cores, OPT_STR, "")
237 OPTION(ms_async_rdma_device_name, OPT_STR, "")
238 OPTION(ms_async_rdma_enable_hugepage, OPT_BOOL, false)
239 OPTION(ms_async_rdma_buffer_size, OPT_INT, 128 << 10)
240 OPTION(ms_async_rdma_send_buffers, OPT_U32, 1024)
241 OPTION(ms_async_rdma_receive_buffers, OPT_U32, 1024)
242 OPTION(ms_async_rdma_port_num, OPT_U32, 1)
243 OPTION(ms_async_rdma_polling_us, OPT_U32, 1000)
244 OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding
245 OPTION(ms_async_rdma_roce_ver, OPT_INT, 1) // 0=RoCEv1, 1=RoCEv2, 2=RoCEv1.5
246 OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP
247 OPTION(ms_async_rdma_dscp, OPT_INT, 96) // in RoCE, this means DSCP
248
249 OPTION(ms_dpdk_port_id, OPT_INT, 0)
250 SAFE_OPTION(ms_dpdk_coremask, OPT_STR, "1") // it is modified in unittest so that use SAFE_OPTION to declare
251 OPTION(ms_dpdk_memory_channel, OPT_STR, "4")
252 OPTION(ms_dpdk_hugepages, OPT_STR, "")
253 OPTION(ms_dpdk_pmd, OPT_STR, "")
254 SAFE_OPTION(ms_dpdk_host_ipv4_addr, OPT_STR, "")
255 SAFE_OPTION(ms_dpdk_gateway_ipv4_addr, OPT_STR, "")
256 SAFE_OPTION(ms_dpdk_netmask_ipv4_addr, OPT_STR, "")
257 OPTION(ms_dpdk_lro, OPT_BOOL, true)
258 OPTION(ms_dpdk_hw_flow_control, OPT_BOOL, true)
259 // Weighing of a hardware network queue relative to a software queue (0=no work, 1= equal share)")
260 OPTION(ms_dpdk_hw_queue_weight, OPT_FLOAT, 1)
261 OPTION(ms_dpdk_debug_allow_loopback, OPT_BOOL, false)
262 OPTION(ms_dpdk_rx_buffer_count_per_core, OPT_INT, 8192)
263
264 OPTION(inject_early_sigterm, OPT_BOOL, false)
265
266 OPTION(mon_data, OPT_STR, "/var/lib/ceph/mon/$cluster-$id")
267 OPTION(mon_initial_members, OPT_STR, "") // list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster
268 OPTION(mon_compact_on_start, OPT_BOOL, false) // compact leveldb on ceph-mon start
269 OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction on bootstrap
270 OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states
271 OPTION(mon_osd_cache_size, OPT_INT, 10) // the size of osdmaps cache, not to rely on underlying store's cache
272
273 OPTION(mon_cpu_threads, OPT_INT, 4)
274 OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT, 4096)
275 OPTION(mon_osd_max_creating_pgs, OPT_INT, 1024)
276 OPTION(mon_tick_interval, OPT_INT, 5)
277 OPTION(mon_session_timeout, OPT_INT, 300) // must send keepalive or subscribe
278 OPTION(mon_subscribe_interval, OPT_DOUBLE, 24*3600) // for legacy clients only
279 OPTION(mon_delta_reset_interval, OPT_DOUBLE, 10) // seconds of inactivity before we reset the pg delta to 0
280 OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay
281 OPTION(mon_osd_laggy_weight, OPT_DOUBLE, .3) // weight for new 'samples's in laggy estimations
282 OPTION(mon_osd_laggy_max_interval, OPT_INT, 300) // maximum value of laggy_interval in laggy estimations
283 OPTION(mon_osd_adjust_heartbeat_grace, OPT_BOOL, true) // true if we should scale based on laggy estimations
284 OPTION(mon_osd_adjust_down_out_interval, OPT_BOOL, true) // true if we should scale based on laggy estimations
285 OPTION(mon_osd_auto_mark_in, OPT_BOOL, false) // mark any booting osds 'in'
286 OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in'
287 OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true) // mark booting new osds 'in'
288 OPTION(mon_osd_down_out_interval, OPT_INT, 600) // seconds
289 OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // smallest crush unit/type that we will not automatically mark out
290 OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down
291 OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .75) // min osds required to be in to mark things out
292 OPTION(mon_osd_warn_op_age, OPT_DOUBLE, 32) // max op age before we generate a warning (make it a power of 2)
293 OPTION(mon_osd_err_op_age_ratio, OPT_DOUBLE, 128) // when to generate an error, as multiple of mon_osd_warn_op_age
294 OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create
295 OPTION(mon_osd_allow_primary_temp, OPT_BOOL, false) // allow primary_temp to be set in the osdmap
296 OPTION(mon_osd_allow_primary_affinity, OPT_BOOL, false) // allow primary_affinity to be set in the osdmap
297 OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes
298 OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming
299 OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT, .25) // max estimate of pg total before we do all pgs in parallel
300 OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL, false) // whether turn on fast read on the pool or not
301 OPTION(mon_stat_smooth_intervals, OPT_INT, 6) // smooth stats over last N PGMap maps
302 OPTION(mon_election_timeout, OPT_FLOAT, 5) // on election proposer, max waiting time for all ACKs
303 OPTION(mon_lease, OPT_FLOAT, 5) // lease interval
304 OPTION(mon_lease_renew_interval_factor, OPT_FLOAT, .6) // on leader, to renew the lease
305 OPTION(mon_lease_ack_timeout_factor, OPT_FLOAT, 2.0) // on leader, if lease isn't acked by all peons
306 OPTION(mon_accept_timeout_factor, OPT_FLOAT, 2.0) // on leader, if paxos update isn't accepted
307
308 OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between monitors
309 OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings
310 OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
311 OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
312 OPTION(mon_pg_stuck_threshold, OPT_INT, 60) // number of seconds after which pgs can be considered stuck inactive, unclean, etc (see doc/control.rst under dump_stuck for more info)
313 OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
314 OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin
315 OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin
316 OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
317 OPTION(mon_pg_warn_min_objects, OPT_INT, 10000) // do not warn below this object #
318 OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools below this object #
319 OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT, .5) // threshold of down osds after which we check all pgs
320 OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT, .66) // position between pool cache_target_full and max where we start warning
321 OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
322 OPTION(mon_osd_backfillfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD backfill full (backfill halted)
323 OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
324 OPTION(mon_osd_initial_require_min_compat_client, OPT_STR, "jewel")
325 OPTION(mon_allow_pool_delete, OPT_BOOL, false) // allow pool deletion
326 OPTION(mon_fake_pool_delete, OPT_BOOL, false) // fake pool deletion (add _DELETED suffix)
327 OPTION(mon_globalid_prealloc, OPT_U32, 10000) // how many globalids to prealloc
328 OPTION(mon_osd_report_timeout, OPT_INT, 900) // grace period before declaring unresponsive OSDs dead
329 OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-replay mds to be active
330 OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL, true) // warn if crush tunables are too old (older than mon_min_crush_required_version)
331 OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
332 OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
333 OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
334 OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
335 OPTION(mon_warn_osd_usage_min_max_delta, OPT_FLOAT, .40) // warn if difference between min and max OSD utilizations exceeds specified amount
336 OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
337 OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
338 OPTION(mon_max_log_epochs, OPT_INT, 500)
339 OPTION(mon_max_mdsmap_epochs, OPT_INT, 500)
340 OPTION(mon_max_osd, OPT_INT, 10000)
341 OPTION(mon_probe_timeout, OPT_DOUBLE, 2.0)
342 OPTION(mon_client_bytes, OPT_U64, 100ul << 20) // client msg data allowed in memory (in bytes)
343 OPTION(mon_mgr_proxy_client_bytes_ratio, OPT_FLOAT, .3) // ratio of mon_client_bytes that can be consumed by proxied mgr commands before we error out to client
344 OPTION(mon_log_max_summary, OPT_U64, 50)
345 OPTION(mon_daemon_bytes, OPT_U64, 400ul << 20) // mds, osd message memory cap (in bytes)
346 OPTION(mon_max_log_entries_per_event, OPT_INT, 4096)
347 OPTION(mon_reweight_min_pgs_per_osd, OPT_U64, 10) // min pgs per osd for reweight-by-pg command
348 OPTION(mon_reweight_min_bytes_per_osd, OPT_U64, 100*1024*1024) // min bytes per osd for reweight-by-utilization command
349 OPTION(mon_reweight_max_osds, OPT_INT, 4) // max osds to change per reweight-by-* command
350 OPTION(mon_reweight_max_change, OPT_DOUBLE, 0.05)
351 OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0)
352 OPTION(mon_health_to_clog, OPT_BOOL, true)
353 OPTION(mon_health_to_clog_interval, OPT_INT, 3600)
354 OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
355 OPTION(mon_health_preluminous_compat, OPT_BOOL, false)
356 OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
357 OPTION(mon_data_avail_crit, OPT_INT, 5)
358 OPTION(mon_data_avail_warn, OPT_INT, 30)
359 OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
360 OPTION(mon_warn_not_scrubbed, OPT_INT, 0)
361 OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0)
362 OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day
363 OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not.
364 OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time
365 OPTION(mon_scrub_inject_crc_mismatch, OPT_DOUBLE, 0.0) // probability of injected crc mismatch [0.0, 1.0]
366 OPTION(mon_scrub_inject_missing_keys, OPT_DOUBLE, 0.0) // probability of injected missing keys [0.0, 1.0]
367 OPTION(mon_config_key_max_entry_size, OPT_INT, 4096) // max num bytes per config-key entry
368 OPTION(mon_sync_timeout, OPT_DOUBLE, 60.0)
369 OPTION(mon_sync_max_payload_size, OPT_U32, 1048576) // max size for a sync chunk payload (say, 1MB)
370 OPTION(mon_sync_debug, OPT_BOOL, false) // enable sync-specific debug
371 OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE, 0) // inject N second delay on each get_chunk request
372 OPTION(mon_osd_min_down_reporters, OPT_INT, 2) // number of OSDs from different subtrees who need to report a down OSD for it to count
373 OPTION(mon_osd_reporter_subtree_level , OPT_STR, "host") // in which level of parent bucket the reporters are counted
374 OPTION(mon_osd_force_trim_to, OPT_INT, 0) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous, use with care)
375 OPTION(mon_mds_force_trim_to, OPT_INT, 0) // force mon to trim mdsmaps to this point (dangerous, use with care)
376 OPTION(mon_mds_skip_sanity, OPT_BOOL, false) // skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
377
378 // monitor debug options
379 OPTION(mon_debug_deprecated_as_obsolete, OPT_BOOL, false) // consider deprecated commands as obsolete
380
381 // dump transactions
382 OPTION(mon_debug_dump_transactions, OPT_BOOL, false)
383 OPTION(mon_debug_dump_json, OPT_BOOL, false)
384 OPTION(mon_debug_dump_location, OPT_STR, "/var/log/ceph/$cluster-$name.tdump")
385 OPTION(mon_debug_no_require_luminous, OPT_BOOL, false)
386 OPTION(mon_debug_no_require_bluestore_for_ec_overwrites, OPT_BOOL, false)
387 OPTION(mon_debug_no_initial_persistent_features, OPT_BOOL, false)
388 OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE, 10.0) // seconds
389 OPTION(mon_inject_transaction_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
390
391 OPTION(mon_sync_provider_kill_at, OPT_INT, 0) // kill the sync provider at a specific point in the work flow
392 OPTION(mon_sync_requester_kill_at, OPT_INT, 0) // kill the sync requester at a specific point in the work flow
393 OPTION(mon_force_quorum_join, OPT_BOOL, false) // force monitor to join quorum even if it has been previously removed from the map
394 OPTION(mon_keyvaluedb, OPT_STR, "rocksdb") // type of keyvaluedb backend
395
396 // UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps
397 OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false)
398 OPTION(mon_osd_blacklist_default_expire, OPT_DOUBLE, 60*60) // default one hour
399 OPTION(mon_osd_crush_smoke_test, OPT_BOOL, true)
400
401 OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state
402 OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
403 OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long before proposing a map update
404 OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity
405 OPTION(paxos_min, OPT_INT, 500) // minimum number of paxos states to keep around
406 OPTION(paxos_trim_min, OPT_INT, 250) // number of extra proposals tolerated before trimming
407 OPTION(paxos_trim_max, OPT_INT, 500) // max number of extra proposals to trim at a time
408 OPTION(paxos_service_trim_min, OPT_INT, 250) // minimum amount of versions to trigger a trim (0 disables it)
409 OPTION(paxos_service_trim_max, OPT_INT, 500) // maximum amount of versions to trim during a single proposal (0 disables it)
410 OPTION(paxos_kill_at, OPT_INT, 0)
411 OPTION(auth_cluster_required, OPT_STR, "cephx") // required of mon, mds, osd daemons
412 OPTION(auth_service_required, OPT_STR, "cephx") // required by daemons of clients
413 OPTION(auth_client_required, OPT_STR, "cephx, none") // what clients require of daemons
414 OPTION(auth_supported, OPT_STR, "") // deprecated; default value for above if they are not defined.
415 OPTION(max_rotating_auth_attempts, OPT_INT, 10)
416 OPTION(cephx_require_signatures, OPT_BOOL, false) // If true, don't talk to Cephx partners if they don't support message signing; off by default
417 OPTION(cephx_cluster_require_signatures, OPT_BOOL, false)
418 OPTION(cephx_service_require_signatures, OPT_BOOL, false)
419 OPTION(cephx_sign_messages, OPT_BOOL, true) // Default to signing session messages if supported
420 OPTION(auth_mon_ticket_ttl, OPT_DOUBLE, 60*60*12)
421 OPTION(auth_service_ticket_ttl, OPT_DOUBLE, 60*60)
422 OPTION(auth_debug, OPT_BOOL, false) // if true, assert when weird things happen
423 OPTION(mon_client_hunt_parallel, OPT_U32, 2) // how many mons to try to connect to in parallel during hunt
424 OPTION(mon_client_hunt_interval, OPT_DOUBLE, 3.0) // try new mon every N seconds until we connect
425 OPTION(mon_client_ping_interval, OPT_DOUBLE, 10.0) // ping every N seconds
426 OPTION(mon_client_ping_timeout, OPT_DOUBLE, 30.0) // fail if we don't hear back
427 OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE, 2.0) // each time we reconnect to a monitor, double our timeout
428 OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE, 10.0) // up to a max of 10*default (30 seconds)
429 OPTION(mon_client_max_log_entries_per_message, OPT_INT, 1000)
430 OPTION(mon_max_pool_pg_num, OPT_INT, 65536)
431 OPTION(mon_pool_quota_warn_threshold, OPT_INT, 0) // percent of quota at which to issue warnings
432 OPTION(mon_pool_quota_crit_threshold, OPT_INT, 0) // percent of quota at which to issue errors
433 OPTION(client_cache_size, OPT_INT, 16384)
434 OPTION(client_cache_mid, OPT_FLOAT, .75)
435 OPTION(client_use_random_mds, OPT_BOOL, false)
436 OPTION(client_mount_timeout, OPT_DOUBLE, 300.0)
437 OPTION(client_tick_interval, OPT_DOUBLE, 1.0)
438 OPTION(client_trace, OPT_STR, "")
439 OPTION(client_readahead_min, OPT_LONGLONG, 128*1024) // readahead at _least_ this much.
440 OPTION(client_readahead_max_bytes, OPT_LONGLONG, 0) // default unlimited
441 OPTION(client_readahead_max_periods, OPT_LONGLONG, 4) // as multiple of file layout period (object size * num stripes)
442 OPTION(client_reconnect_stale, OPT_BOOL, false) // automatically reconnect stale session
443 OPTION(client_snapdir, OPT_STR, ".snap")
444 OPTION(client_mountpoint, OPT_STR, "/")
445 OPTION(client_mount_uid, OPT_INT, -1)
446 OPTION(client_mount_gid, OPT_INT, -1)
447 OPTION(client_notify_timeout, OPT_INT, 10) // in seconds
448 OPTION(osd_client_watch_timeout, OPT_INT, 30) // in seconds
449 OPTION(client_caps_release_delay, OPT_INT, 5) // in seconds
450 OPTION(client_quota_df, OPT_BOOL, true) // use quota for df on subdir mounts
451 OPTION(client_oc, OPT_BOOL, true)
452 OPTION(client_oc_size, OPT_INT, 1024*1024* 200) // MB * n
453 OPTION(client_oc_max_dirty, OPT_INT, 1024*1024* 100) // MB * n (dirty OR tx.. bigish)
454 OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this smallish)
455 OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback
456 OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache
457 OPTION(client_debug_getattr_caps, OPT_BOOL, false) // check if MDS reply contains wanted caps
458 OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds)
459 OPTION(client_debug_inject_tick_delay, OPT_INT, 0) // delay the client tick for a number of seconds
460 OPTION(client_max_inline_size, OPT_U64, 4096)
461 OPTION(client_inject_release_failure, OPT_BOOL, false) // synthetic client bug for testing
462 OPTION(client_inject_fixed_oldest_tid, OPT_BOOL, false) // synthetic client bug for testing
463 OPTION(client_metadata, OPT_STR, "")
464 OPTION(client_acl_type, OPT_STR, "")
465 OPTION(client_permissions, OPT_BOOL, true)
466 OPTION(client_dirsize_rbytes, OPT_BOOL, true)
467
468 // note: the max amount of "in flight" dirty data is roughly (max - target)
469 OPTION(fuse_use_invalidate_cb, OPT_BOOL, true) // use fuse 2.8+ invalidate callback to keep page cache consistent
470 OPTION(fuse_disable_pagecache, OPT_BOOL, false)
471 OPTION(fuse_allow_other, OPT_BOOL, true)
472 OPTION(fuse_default_permissions, OPT_BOOL, false)
473 OPTION(fuse_big_writes, OPT_BOOL, true)
474 OPTION(fuse_atomic_o_trunc, OPT_BOOL, true)
475 OPTION(fuse_debug, OPT_BOOL, false)
476 OPTION(fuse_multithreaded, OPT_BOOL, true)
477 OPTION(fuse_require_active_mds, OPT_BOOL, true) // if ceph_fuse requires active mds server
478 OPTION(fuse_syncfs_on_mksnap, OPT_BOOL, true)
479 OPTION(fuse_set_user_groups, OPT_BOOL, false) // if ceph_fuse fills in group lists or not
480
481 OPTION(client_try_dentry_invalidate, OPT_BOOL, true) // the client should try to use dentry invaldation instead of remounting, on kernels it believes that will work for
482 OPTION(client_die_on_failed_remount, OPT_BOOL, true)
483 OPTION(client_check_pool_perm, OPT_BOOL, true)
484 OPTION(client_use_faked_inos, OPT_BOOL, false)
485 OPTION(client_mds_namespace, OPT_STR, "")
486
487 OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location
488 OPTION(crush_location_hook, OPT_STR, "")
489 OPTION(crush_location_hook_timeout, OPT_INT, 10)
490
491 OPTION(objecter_tick_interval, OPT_DOUBLE, 5.0)
492 OPTION(objecter_timeout, OPT_DOUBLE, 10.0) // before we ask for a map
493 OPTION(objecter_inflight_op_bytes, OPT_U64, 1024*1024*100) // max in-flight data (both directions)
494 OPTION(objecter_inflight_ops, OPT_U64, 1024) // max in-flight ios
495 OPTION(objecter_completion_locks_per_session, OPT_U64, 32) // num of completion locks per each session, for serializing same object responses
496 OPTION(objecter_inject_no_watch_ping, OPT_BOOL, false) // suppress watch pings
497 OPTION(objecter_retry_writes_after_first_reply, OPT_BOOL, false) // ignore the first reply for each write, and resend the osd op instead
498 OPTION(objecter_debug_inject_relock_delay, OPT_BOOL, false)
499
500 // Max number of deletes at once in a single Filer::purge call
501 OPTION(filer_max_purge_ops, OPT_U32, 10)
502 // Max number of truncate at once in a single Filer::truncate call
503 OPTION(filer_max_truncate_ops, OPT_U32, 128)
504
505 OPTION(journaler_write_head_interval, OPT_INT, 15)
506 OPTION(journaler_prefetch_periods, OPT_INT, 10) // * journal object size
507 OPTION(journaler_prezero_periods, OPT_INT, 5) // * journal object size
508 OPTION(mds_data, OPT_STR, "/var/lib/ceph/mds/$cluster-$id")
509 OPTION(mds_max_file_size, OPT_U64, 1ULL << 40) // Used when creating new CephFS. Change with 'ceph mds set max_file_size <size>' afterwards
510 // max xattr kv pairs size for each dir/file
511 OPTION(mds_max_xattr_pairs_size, OPT_U32, 64 << 10)
512 OPTION(mds_cache_size, OPT_INT, 100000)
513 OPTION(mds_cache_mid, OPT_FLOAT, .7)
514 OPTION(mds_max_file_recover, OPT_U32, 32)
515 OPTION(mds_dir_max_commit_size, OPT_INT, 10) // MB
516 OPTION(mds_dir_keys_per_op, OPT_INT, 16384)
517 OPTION(mds_decay_halflife, OPT_FLOAT, 5)
518 OPTION(mds_beacon_interval, OPT_FLOAT, 4)
519 OPTION(mds_beacon_grace, OPT_FLOAT, 15)
520 OPTION(mds_enforce_unique_name, OPT_BOOL, true)
521 OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes
522
523 OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle
524 OPTION(mds_session_blacklist_on_timeout, OPT_BOOL, true) // whether to blacklist clients whose sessions are dropped due to timeout
525 OPTION(mds_session_blacklist_on_evict, OPT_BOOL, true) // whether to blacklist clients whose sessions are dropped via admin commands
526
527 OPTION(mds_sessionmap_keys_per_op, OPT_U32, 1024) // how many sessions should I try to load/store in a single OMAP operation?
528 OPTION(mds_revoke_cap_timeout, OPT_FLOAT, 60) // detect clients which aren't revoking caps
529 OPTION(mds_recall_state_timeout, OPT_FLOAT, 60) // detect clients which aren't trimming caps
530 OPTION(mds_freeze_tree_timeout, OPT_FLOAT, 30) // detecting freeze tree deadlock
531 OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session
532 OPTION(mds_health_summarize_threshold, OPT_INT, 10) // collapse N-client health metrics to a single 'many'
533 OPTION(mds_health_cache_threshold, OPT_FLOAT, 1.5) // warn on cache size if it exceeds mds_cache_size by this factor
534 OPTION(mds_reconnect_timeout, OPT_FLOAT, 45) // seconds to wait for clients during mds restart
535 // make it (mds_session_timeout - mds_beacon_grace)
536 OPTION(mds_tick_interval, OPT_FLOAT, 5)
537 OPTION(mds_dirstat_min_interval, OPT_FLOAT, 1) // try to avoid propagating more often than this
538 OPTION(mds_scatter_nudge_interval, OPT_FLOAT, 5) // how quickly dirstat changes propagate up the hierarchy
539 OPTION(mds_client_prealloc_inos, OPT_INT, 1000)
540 OPTION(mds_early_reply, OPT_BOOL, true)
541 OPTION(mds_default_dir_hash, OPT_INT, CEPH_STR_HASH_RJENKINS)
542 OPTION(mds_log_pause, OPT_BOOL, false)
543 OPTION(mds_log_skip_corrupt_events, OPT_BOOL, false)
544 OPTION(mds_log_max_events, OPT_INT, -1)
545 OPTION(mds_log_events_per_segment, OPT_INT, 1024)
546 OPTION(mds_log_segment_size, OPT_INT, 0) // segment size for mds log, default to default file_layout_t
547 OPTION(mds_log_max_segments, OPT_U32, 30)
548 OPTION(mds_log_max_expiring, OPT_INT, 20)
549 OPTION(mds_bal_export_pin, OPT_BOOL, true) // allow clients to pin directory trees to ranks
550 OPTION(mds_bal_sample_interval, OPT_DOUBLE, 3.0) // every 3 seconds
551 OPTION(mds_bal_replicate_threshold, OPT_FLOAT, 8000)
552 OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT, 0)
553 OPTION(mds_bal_frag, OPT_BOOL, true)
554 OPTION(mds_bal_split_size, OPT_INT, 10000)
555 OPTION(mds_bal_split_rd, OPT_FLOAT, 25000)
556 OPTION(mds_bal_split_wr, OPT_FLOAT, 10000)
557 OPTION(mds_bal_split_bits, OPT_INT, 3)
558 OPTION(mds_bal_merge_size, OPT_INT, 50)
559 OPTION(mds_bal_interval, OPT_INT, 10) // seconds
560 OPTION(mds_bal_fragment_interval, OPT_INT, 5) // seconds
561 OPTION(mds_bal_fragment_size_max, OPT_INT, 10000*10) // order of magnitude higher than split size
562 OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT, 1.5) // multiple of size_max that triggers immediate split
563 OPTION(mds_bal_idle_threshold, OPT_FLOAT, 0)
564 OPTION(mds_bal_max, OPT_INT, -1)
565 OPTION(mds_bal_max_until, OPT_INT, -1)
566 OPTION(mds_bal_mode, OPT_INT, 0)
567 OPTION(mds_bal_min_rebalance, OPT_FLOAT, .1) // must be this much above average before we export anything
568 OPTION(mds_bal_min_start, OPT_FLOAT, .2) // if we need less than this, we don't do anything
569 OPTION(mds_bal_need_min, OPT_FLOAT, .8) // take within this range of what we need
570 OPTION(mds_bal_need_max, OPT_FLOAT, 1.2)
571 OPTION(mds_bal_midchunk, OPT_FLOAT, .3) // any sub bigger than this taken in full
572 OPTION(mds_bal_minchunk, OPT_FLOAT, .001) // never take anything smaller than this
573 OPTION(mds_bal_target_decay, OPT_DOUBLE, 10.0) // target decay half-life in MDSMap (2x larger is approx. 2x slower)
574 OPTION(mds_replay_interval, OPT_FLOAT, 1.0) // time to wait before starting replay again
575 OPTION(mds_shutdown_check, OPT_INT, 0)
576 OPTION(mds_thrash_exports, OPT_INT, 0)
577 OPTION(mds_thrash_fragments, OPT_INT, 0)
578 OPTION(mds_dump_cache_on_map, OPT_BOOL, false)
579 OPTION(mds_dump_cache_after_rejoin, OPT_BOOL, false)
580 OPTION(mds_verify_scatter, OPT_BOOL, false)
581 OPTION(mds_debug_scatterstat, OPT_BOOL, false)
582 OPTION(mds_debug_frag, OPT_BOOL, false)
583 OPTION(mds_debug_auth_pins, OPT_BOOL, false)
584 OPTION(mds_debug_subtrees, OPT_BOOL, false)
585 OPTION(mds_kill_mdstable_at, OPT_INT, 0)
586 OPTION(mds_kill_export_at, OPT_INT, 0)
587 OPTION(mds_kill_import_at, OPT_INT, 0)
588 OPTION(mds_kill_link_at, OPT_INT, 0)
589 OPTION(mds_kill_rename_at, OPT_INT, 0)
590 OPTION(mds_kill_openc_at, OPT_INT, 0)
591 OPTION(mds_kill_journal_at, OPT_INT, 0)
592 OPTION(mds_kill_journal_expire_at, OPT_INT, 0)
593 OPTION(mds_kill_journal_replay_at, OPT_INT, 0)
594 OPTION(mds_journal_format, OPT_U32, 1) // Default to most recent JOURNAL_FORMAT_*
595 OPTION(mds_kill_create_at, OPT_INT, 0)
596 OPTION(mds_inject_traceless_reply_probability, OPT_DOUBLE, 0) /* percentage
597 of MDS modify replies to skip sending the
598 client a trace on [0-1]*/
599 OPTION(mds_wipe_sessions, OPT_BOOL, 0)
600 OPTION(mds_wipe_ino_prealloc, OPT_BOOL, 0)
601 OPTION(mds_skip_ino, OPT_INT, 0)
602 OPTION(mds_standby_for_name, OPT_STR, "")
603 OPTION(mds_standby_for_rank, OPT_INT, -1)
604 OPTION(mds_standby_for_fscid, OPT_INT, -1)
605 OPTION(mds_standby_replay, OPT_BOOL, false)
606 OPTION(mds_enable_op_tracker, OPT_BOOL, true) // enable/disable MDS op tracking
607 OPTION(mds_op_history_size, OPT_U32, 20) // Max number of completed ops to track
608 OPTION(mds_op_history_duration, OPT_U32, 600) // Oldest completed op to track
609 OPTION(mds_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
610 OPTION(mds_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
611 OPTION(mds_snap_min_uid, OPT_U32, 0) // The minimum UID required to create a snapshot
612 OPTION(mds_snap_max_uid, OPT_U32, 4294967294) // The maximum UID allowed to create a snapshot
613 OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapshot
614 OPTION(mds_verify_backtrace, OPT_U32, 1)
615 // detect clients which aren't trimming completed requests
616 OPTION(mds_max_completed_flushes, OPT_U32, 100000)
617 OPTION(mds_max_completed_requests, OPT_U32, 100000)
618
619 OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash
620 OPTION(mds_mon_shutdown_timeout, OPT_DOUBLE, 5)
621
622 // Maximum number of concurrent stray files to purge
623 OPTION(mds_max_purge_files, OPT_U32, 64)
624 // Maximum number of concurrent RADOS ops to issue in purging
625 OPTION(mds_max_purge_ops, OPT_U32, 8192)
626 // Maximum number of concurrent RADOS ops to issue in purging, scaled by PG count
627 OPTION(mds_max_purge_ops_per_pg, OPT_FLOAT, 0.5)
628
629 OPTION(mds_purge_queue_busy_flush_period, OPT_FLOAT, 1.0)
630
631 OPTION(mds_root_ino_uid, OPT_INT, 0) // The UID of / on new filesystems
632 OPTION(mds_root_ino_gid, OPT_INT, 0) // The GID of / on new filesystems
633
634 OPTION(mds_max_scrub_ops_in_progress, OPT_INT, 5) // the number of simultaneous scrubs allowed
635
636 // Maximum number of damaged frags/dentries before whole MDS rank goes damaged
637 OPTION(mds_damage_table_max_entries, OPT_INT, 10000)
638
639 // Maximum increment for client writable range, counted by number of objects
640 OPTION(mds_client_writeable_range_max_inc_objs, OPT_U32, 1024)
641
642 // verify backend can support configured max object name length
643 OPTION(osd_check_max_object_name_len_on_startup, OPT_BOOL, true)
644
645 // Maximum number of backfills to or from a single osd
646 OPTION(osd_max_backfills, OPT_U64, 1)
647
648 // Minimum recovery priority (255 = max, smaller = lower)
649 OPTION(osd_min_recovery_priority, OPT_INT, 0)
650
651 // Seconds to wait before retrying refused backfills
652 OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 30.0)
653
654 // Seconds to wait before retrying refused recovery
655 OPTION(osd_recovery_retry_interval, OPT_DOUBLE, 30.0)
656
657 // max agent flush ops
658 OPTION(osd_agent_max_ops, OPT_INT, 4)
659 OPTION(osd_agent_max_low_ops, OPT_INT, 2)
660 OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1)
661 OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1)
662 OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0)
663
664 // osd ignore history.last_epoch_started in find_best_info
665 OPTION(osd_find_best_info_ignore_history_les, OPT_BOOL, false)
666
667 // decay atime and hist histograms after how many objects go by
668 OPTION(osd_agent_hist_halflife, OPT_INT, 1000)
669
670 // must be this amount over the threshold to enable,
671 // this amount below the threshold to disable.
672 OPTION(osd_agent_slop, OPT_FLOAT, .02)
673
674 OPTION(osd_uuid, OPT_UUID, uuid_d())
675 OPTION(osd_data, OPT_STR, "/var/lib/ceph/osd/$cluster-$id")
676 OPTION(osd_journal, OPT_STR, "/var/lib/ceph/osd/$cluster-$id/journal")
677 OPTION(osd_journal_size, OPT_INT, 5120) // in mb
678 OPTION(osd_journal_flush_on_shutdown, OPT_BOOL, true) // Flush journal to data store on shutdown
679 // flags for specific control purpose during osd mount() process.
680 // e.g., can be 1 to skip over replaying journal
681 // or 2 to skip over mounting omap or 3 to skip over both.
682 // This might be helpful in case the journal is totally corrupted
683 // and we still want to bring the osd daemon back normally, etc.
684 OPTION(osd_os_flags, OPT_U32, 0)
685 OPTION(osd_max_write_size, OPT_INT, 90)
686 OPTION(osd_max_pgls, OPT_U64, 1024) // max number of pgls entries to return
687 OPTION(osd_client_message_size_cap, OPT_U64, 500*1024L*1024L) // client data allowed in-memory (in bytes)
688 OPTION(osd_client_message_cap, OPT_U64, 100) // num client messages allowed in-memory
689 OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd
690 OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd
691 OPTION(osd_crush_update_weight_set, OPT_BOOL, true) // update weight set while updating weights
692 OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
693 OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
694 OPTION(osd_crush_update_on_start, OPT_BOOL, true)
695 OPTION(osd_class_update_on_start, OPT_BOOL, true) // automatically set device class on start
696 OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds.
697 OPTION(osd_pool_default_crush_rule, OPT_INT, -1)
698 OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32, 4096) // in bytes
699 OPTION(osd_pool_default_size, OPT_INT, 3)
700 OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2
701 OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
702 OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
703 OPTION(osd_pool_default_type, OPT_STR, "replicated")
704 OPTION(osd_pool_default_erasure_code_profile,
705 OPT_STR,
706 "plugin=jerasure "
707 "technique=reed_sol_van "
708 "k=2 "
709 "m=1 "
710 ) // default properties of osd pool create
711 OPTION(osd_erasure_code_plugins, OPT_STR,
712 "jerasure"
713 " lrc"
714 #ifdef HAVE_BETTER_YASM_ELF64
715 " isa"
716 #endif
717 ) // list of erasure code plugins
718
719 // Allows the "peered" state for recovery and backfill below min_size
720 OPTION(osd_allow_recovery_below_min_size, OPT_BOOL, true)
721
722 OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools
723 OPTION(osd_pool_default_flag_hashpspool, OPT_BOOL, true) // use new pg hashing to prevent pool/pg overlap
724 OPTION(osd_pool_default_flag_nodelete, OPT_BOOL, false) // pool can't be deleted
725 OPTION(osd_pool_default_flag_nopgchange, OPT_BOOL, false) // pool's pg and pgp num can't be changed
726 OPTION(osd_pool_default_flag_nosizechange, OPT_BOOL, false) // pool's size and min size can't be changed
727 OPTION(osd_pool_default_hit_set_bloom_fpp, OPT_FLOAT, .05)
728 OPTION(osd_pool_default_cache_target_dirty_ratio, OPT_FLOAT, .4)
729 OPTION(osd_pool_default_cache_target_dirty_high_ratio, OPT_FLOAT, .6)
730 OPTION(osd_pool_default_cache_target_full_ratio, OPT_FLOAT, .8)
731 OPTION(osd_pool_default_cache_min_flush_age, OPT_INT, 0) // seconds
732 OPTION(osd_pool_default_cache_min_evict_age, OPT_INT, 0) // seconds
733 OPTION(osd_pool_default_cache_max_evict_check_size, OPT_INT, 10) // max size to check for eviction
734 OPTION(osd_hit_set_min_size, OPT_INT, 1000) // min target size for a HitSet
735 OPTION(osd_hit_set_max_size, OPT_INT, 100000) // max target size for a HitSet
736 OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking
737
738 // conservative default throttling values
739 OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 25)
740 OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 5 * 1024*1024)
741
742 OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback")
743 OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
744 OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
745 OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom")
746 OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on read)
747 OPTION(osd_tier_default_cache_min_write_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on write)
748 OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 20)
749 OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT, 1)
750
751 OPTION(osd_map_dedup, OPT_BOOL, true)
752 OPTION(osd_map_max_advance, OPT_INT, 40) // make this < cache_size!
753 OPTION(osd_map_cache_size, OPT_INT, 50)
754 OPTION(osd_map_message_max, OPT_INT, 40) // max maps per MOSDMap message
755 OPTION(osd_map_share_max_epochs, OPT_INT, 40) // cap on # of inc maps we send to peers, clients
756 OPTION(osd_inject_bad_map_crc_probability, OPT_FLOAT, 0)
757 OPTION(osd_inject_failure_on_pg_removal, OPT_BOOL, false)
758 // shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
759 OPTION(osd_max_markdown_period , OPT_INT, 600)
760 OPTION(osd_max_markdown_count, OPT_INT, 5)
761
762 OPTION(osd_peering_wq_threads, OPT_INT, 2)
763 OPTION(osd_peering_wq_batch_size, OPT_U64, 20)
764 OPTION(osd_op_pq_max_tokens_per_priority, OPT_U64, 4194304)
765 OPTION(osd_op_pq_min_cost, OPT_U64, 65536)
766 OPTION(osd_disk_threads, OPT_INT, 1)
767 OPTION(osd_disk_thread_ioprio_class, OPT_STR, "") // rt realtime be best effort idle
768 OPTION(osd_disk_thread_ioprio_priority, OPT_INT, -1) // 0-7
769 OPTION(osd_recover_clone_overlap, OPT_BOOL, true) // preserve clone_overlap during recovery/migration
770 OPTION(osd_op_num_threads_per_shard, OPT_INT, 0)
771 OPTION(osd_op_num_threads_per_shard_hdd, OPT_INT, 1)
772 OPTION(osd_op_num_threads_per_shard_ssd, OPT_INT, 2)
773 OPTION(osd_op_num_shards, OPT_INT, 0)
774 OPTION(osd_op_num_shards_hdd, OPT_INT, 5)
775 OPTION(osd_op_num_shards_ssd, OPT_INT, 8)
776
777 // PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
778 // mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
779 // and "mclock_client" are based on the mClock/dmClock algorithm
780 // (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
781 // class the operation belongs to. "mclock_client" does the same but
782 // also works to ienforce fairness between clients. "debug_random"
783 // chooses among all four with equal probability.
784 OPTION(osd_op_queue, OPT_STR, "wpq")
785
786 OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
787
788 // mClock priority queue parameters for five types of ops
789 OPTION(osd_op_queue_mclock_client_op_res, OPT_DOUBLE, 1000.0)
790 OPTION(osd_op_queue_mclock_client_op_wgt, OPT_DOUBLE, 500.0)
791 OPTION(osd_op_queue_mclock_client_op_lim, OPT_DOUBLE, 0.0)
792 OPTION(osd_op_queue_mclock_osd_subop_res, OPT_DOUBLE, 1000.0)
793 OPTION(osd_op_queue_mclock_osd_subop_wgt, OPT_DOUBLE, 500.0)
794 OPTION(osd_op_queue_mclock_osd_subop_lim, OPT_DOUBLE, 0.0)
795 OPTION(osd_op_queue_mclock_snap_res, OPT_DOUBLE, 0.0)
796 OPTION(osd_op_queue_mclock_snap_wgt, OPT_DOUBLE, 1.0)
797 OPTION(osd_op_queue_mclock_snap_lim, OPT_DOUBLE, 0.001)
798 OPTION(osd_op_queue_mclock_recov_res, OPT_DOUBLE, 0.0)
799 OPTION(osd_op_queue_mclock_recov_wgt, OPT_DOUBLE, 1.0)
800 OPTION(osd_op_queue_mclock_recov_lim, OPT_DOUBLE, 0.001)
801 OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE, 0.0)
802 OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE, 1.0)
803 OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE, 0.001)
804
805 OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
806
807 // Set to true for testing. Users should NOT set this.
808 // If set to true even after reading enough shards to
809 // decode the object, any error will be reported.
810 OPTION(osd_read_ec_check_for_errors, OPT_BOOL, false) // return error if any ec shard has an error
811
812 // Only use clone_overlap for recovery if there are fewer than
813 // osd_recover_clone_overlap_limit entries in the overlap set
814 OPTION(osd_recover_clone_overlap_limit, OPT_INT, 10)
815
816 OPTION(osd_backfill_scan_min, OPT_INT, 64)
817 OPTION(osd_backfill_scan_max, OPT_INT, 512)
818 OPTION(osd_op_thread_timeout, OPT_INT, 15)
819 OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150)
820 OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
821 OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300)
822 OPTION(osd_recovery_sleep, OPT_FLOAT, 0.01) // seconds to sleep between recovery ops
823 OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0)
824 OPTION(osd_scrub_invalid_stats, OPT_BOOL, true)
825 OPTION(osd_remove_thread_timeout, OPT_INT, 60*60)
826 OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60)
827 OPTION(osd_command_thread_timeout, OPT_INT, 10*60)
828 OPTION(osd_command_thread_suicide_timeout, OPT_INT, 15*60)
829 OPTION(osd_heartbeat_addr, OPT_ADDR, entity_addr_t())
830 OPTION(osd_heartbeat_interval, OPT_INT, 6) // (seconds) how often we ping peers
831
832 // (seconds) how long before we decide a peer has failed
833 // This setting is read by the MONs and OSDs and has to be set to a equal value in both settings of the configuration
834 OPTION(osd_heartbeat_grace, OPT_INT, 20)
835 OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers
836 OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbeat tcp socket and set dscp as CS6 on it if true
837 OPTION(osd_heartbeat_min_size, OPT_INT, 2000) // the minimum size of OSD heartbeat messages to send
838
839 // max number of parallel snap trims/pg
840 OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
841 // max number of trimming pgs
842 OPTION(osd_max_trimming_pgs, OPT_U64, 2)
843
844 // minimum number of peers that must be reachable to mark ourselves
845 // back up after being wrongly marked down.
846 OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT, .33)
847
848 OPTION(osd_mon_heartbeat_interval, OPT_INT, 30) // (seconds) how often to ping monitor if no peers
849 OPTION(osd_mon_report_interval_max, OPT_INT, 600)
850 OPTION(osd_mon_report_interval_min, OPT_INT, 5) // pg stats, failures, up_thru, boot.
851 OPTION(osd_mon_report_max_in_flight, OPT_INT, 2) // max updates in flight
852 OPTION(osd_beacon_report_interval, OPT_INT, 300) // (second) how often to send beacon message to monitor
853 OPTION(osd_pg_stat_report_interval_max, OPT_INT, 500) // report pg stats for any given pg at least this often
854 OPTION(osd_mon_ack_timeout, OPT_DOUBLE, 30.0) // time out a mon if it doesn't ack stats
855 OPTION(osd_stats_ack_timeout_factor, OPT_DOUBLE, 2.0) // multiples of mon_ack_timeout
856 OPTION(osd_stats_ack_timeout_decay, OPT_DOUBLE, .9)
857 OPTION(osd_default_data_pool_replay_window, OPT_INT, 45)
858 OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false)
859 OPTION(osd_recovery_delay_start, OPT_FLOAT, 0)
860 OPTION(osd_recovery_max_active, OPT_U64, 3)
861 OPTION(osd_recovery_max_single_start, OPT_U64, 1)
862 OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk
863 OPTION(osd_recovery_max_omap_entries_per_chunk, OPT_U64, 64000) // max number of omap entries per chunk; 0 to disable limit
864 OPTION(osd_copyfrom_max_chunk, OPT_U64, 8<<20) // max size of a COPYFROM chunk
865 OPTION(osd_push_per_object_cost, OPT_U64, 1000) // push cost per object
866 OPTION(osd_max_push_cost, OPT_U64, 8<<20) // max size of push message
867 OPTION(osd_max_push_objects, OPT_U64, 10) // max objects in single push op
868 OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now
869 OPTION(osd_max_scrubs, OPT_INT, 1)
870 OPTION(osd_scrub_during_recovery, OPT_BOOL, false) // Allow new scrubs to start while recovery is active on the OSD
871 OPTION(osd_scrub_begin_hour, OPT_INT, 0)
872 OPTION(osd_scrub_end_hour, OPT_INT, 24)
873 OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
874 OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low
875 OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load
876 OPTION(osd_scrub_interval_randomize_ratio, OPT_FLOAT, 0.5) // randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
877 OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE, .66) // the probability to back off the scheduled scrub
878 OPTION(osd_scrub_chunk_min, OPT_INT, 5)
879 OPTION(osd_scrub_chunk_max, OPT_INT, 25)
880 OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops
881 OPTION(osd_scrub_auto_repair, OPT_BOOL, false) // whether auto-repair inconsistencies upon deep-scrubbing
882 OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5) // only auto-repair when number of errors is below this threshold
883 OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
884 OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)
885 OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
886 OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub
887 OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
888 OPTION(osd_open_classes_on_start, OPT_BOOL, true)
889 OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops "
890 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes allowed to be loaded (allow all: *)
891 OPTION(osd_class_default_list, OPT_STR, "cephfs hello journal lock log numops "
892 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes with default execute perm (allow all: *)
893 OPTION(osd_check_for_log_corruption, OPT_BOOL, false)
894 OPTION(osd_use_stale_snap, OPT_BOOL, false)
895 OPTION(osd_rollback_to_cluster_snap, OPT_STR, "")
896 OPTION(osd_default_notify_timeout, OPT_U32, 30) // default notify timeout in seconds
897 OPTION(osd_kill_backfill_at, OPT_INT, 0)
898
899 // Bounds how infrequently a new map epoch will be persisted for a pg
900 OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 40) // make this < map_cache_size!
901
902 OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it
903 OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
904 OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery
905 OPTION(osd_pg_log_trim_min, OPT_U32, 100)
906 OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
907 OPTION(osd_command_max_records, OPT_INT, 256)
908 OPTION(osd_max_pg_blocked_by, OPT_U32, 16) // max peer osds to report that are blocking our progress
909 OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
910 OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros
911 OPTION(osd_backoff_on_unfound, OPT_BOOL, true) // object unfound
912 OPTION(osd_backoff_on_degraded, OPT_BOOL, false) // [mainly for debug?] object unreadable/writeable
913 OPTION(osd_backoff_on_down, OPT_BOOL, true) // pg in down/incomplete state
914 OPTION(osd_backoff_on_peering, OPT_BOOL, false) // [debug] pg peering
915 OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL, false) // crash osd if client ignores a backoff; useful for debugging
916 OPTION(osd_debug_inject_dispatch_delay_probability, OPT_DOUBLE, 0)
917 OPTION(osd_debug_inject_dispatch_delay_duration, OPT_DOUBLE, .1)
918 OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
919 OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
920 OPTION(osd_debug_op_order, OPT_BOOL, false)
921 OPTION(osd_debug_verify_missing_on_start, OPT_BOOL, false)
922 OPTION(osd_debug_scrub_chance_rewrite_digest, OPT_U64, 0)
923 OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false)
924 OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false)
925 OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
926 OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
927 OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion
928 OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
929 OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
930 OPTION(osd_debug_random_push_read_error, OPT_DOUBLE, 0)
931 OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
932 OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
933 OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
934 OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track
935 OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
936 OPTION(osd_op_history_slow_op_size, OPT_U32, 20) // Max number of slow ops to track
937 OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE, 10.0) // track the op if over this threshold
938 OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items
939 OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
940 OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections
941
942 OPTION(osd_pg_object_context_cache_count, OPT_INT, 64)
943 OPTION(osd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
944 OPTION(osd_function_tracing, OPT_BOOL, false) // true if function instrumentation should use LTTng
945
946 OPTION(osd_fast_info, OPT_BOOL, true) // use fast info attr, if we can
947
948 // determines whether PGLog::check() compares written out log to stored log
949 OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false)
950 OPTION(osd_loop_before_reset_tphandle, OPT_U32, 64) // Max number of loop before we reset thread-pool's handle
951 // default timeout while caling WaitInterval on an empty queue
952 OPTION(threadpool_default_timeout, OPT_INT, 60)
953 // default wait time for an empty queue before pinging the hb timeout
954 OPTION(threadpool_empty_queue_max_wait, OPT_INT, 2)
955
956 OPTION(leveldb_log_to_ceph_log, OPT_BOOL, true)
957 OPTION(leveldb_write_buffer_size, OPT_U64, 8 *1024*1024) // leveldb write buffer size
958 OPTION(leveldb_cache_size, OPT_U64, 128 *1024*1024) // leveldb cache size
959 OPTION(leveldb_block_size, OPT_U64, 0) // leveldb block size
960 OPTION(leveldb_bloom_size, OPT_INT, 0) // leveldb bloom bits per entry
961 OPTION(leveldb_max_open_files, OPT_INT, 0) // leveldb max open files
962 OPTION(leveldb_compression, OPT_BOOL, true) // leveldb uses compression
963 OPTION(leveldb_paranoid, OPT_BOOL, false) // leveldb paranoid flag
964 OPTION(leveldb_log, OPT_STR, "/dev/null") // enable leveldb log file
965 OPTION(leveldb_compact_on_mount, OPT_BOOL, false)
966
967 OPTION(kinetic_host, OPT_STR, "") // hostname or ip address of a kinetic drive to use
968 OPTION(kinetic_port, OPT_INT, 8123) // port number of the kinetic drive
969 OPTION(kinetic_user_id, OPT_INT, 1) // kinetic user to authenticate as
970 OPTION(kinetic_hmac_key, OPT_STR, "asdfasdf") // kinetic key to authenticate with
971 OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic with TLS
972
973
974 OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal
975 SAFE_OPTION(rocksdb_db_paths, OPT_STR, "") // path,size( path,size)*
976 OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true) // log to ceph log
977 OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024) // rocksdb cache size (unless set by bluestore/etc)
978 OPTION(rocksdb_cache_row_ratio, OPT_FLOAT, 0) // ratio of cache for row (vs block)
979 OPTION(rocksdb_cache_shard_bits, OPT_INT, 4) // rocksdb block cache shard bits, 4 bit -> 16 shards
980 OPTION(rocksdb_cache_type, OPT_STR, "lru") // 'lru' or 'clock'
981 OPTION(rocksdb_block_size, OPT_INT, 4*1024) // default rocksdb block size
982 OPTION(rocksdb_perf, OPT_BOOL, false) // Enabling this will have 5-10% impact on performance for the stats collection
983 OPTION(rocksdb_collect_compaction_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
984 OPTION(rocksdb_collect_extended_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
985 OPTION(rocksdb_collect_memory_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
986 OPTION(rocksdb_enable_rmrange, OPT_BOOL, false) // see https://github.com/facebook/rocksdb/blob/master/include/rocksdb/db.h#L253
987
988 // rocksdb options that will be used for omap(if omap_backend is rocksdb)
989 OPTION(filestore_rocksdb_options, OPT_STR, "")
990 // rocksdb options that will be used in monstore
991 OPTION(mon_rocksdb_options, OPT_STR, "write_buffer_size=33554432,compression=kNoCompression")
992
993 /**
994 * osd_*_priority adjust the relative priority of client io, recovery io,
995 * snaptrim io, etc
996 *
997 * osd_*_priority determines the ratio of available io between client and
998 * recovery. Each option may be set between
999 * 1..63.
1000 */
1001 OPTION(osd_client_op_priority, OPT_U32, 63)
1002 OPTION(osd_recovery_op_priority, OPT_U32, 3)
1003
1004 OPTION(osd_snap_trim_priority, OPT_U32, 5)
1005 OPTION(osd_snap_trim_cost, OPT_U32, 1<<20) // set default cost equal to 1MB io
1006
1007 OPTION(osd_scrub_priority, OPT_U32, 5)
1008 // set default cost equal to 50MB io
1009 OPTION(osd_scrub_cost, OPT_U32, 50<<20)
1010 // set requested scrub priority higher than scrub priority to make the
1011 // requested scrubs jump the queue of scheduled scrubs
1012 OPTION(osd_requested_scrub_priority, OPT_U32, 120)
1013
1014 OPTION(osd_recovery_priority, OPT_U32, 5)
1015 // set default cost equal to 20MB io
1016 OPTION(osd_recovery_cost, OPT_U32, 20<<20)
1017
1018 /**
1019 * osd_recovery_op_warn_multiple scales the normal warning threshhold,
1020 * osd_op_complaint_time, so that slow recovery ops won't cause noise
1021 */
1022 OPTION(osd_recovery_op_warn_multiple, OPT_U32, 16)
1023
1024 // Max time to wait between notifying mon of shutdown and shutting down
1025 OPTION(osd_mon_shutdown_timeout, OPT_DOUBLE, 5)
1026 OPTION(osd_shutdown_pgref_assert, OPT_BOOL, false) // crash if the OSD has stray PG refs on shutdown
1027
1028 OPTION(osd_max_object_size, OPT_U64, 128*1024L*1024L) // OSD's maximum object size
1029 OPTION(osd_max_object_name_len, OPT_U32, 2048) // max rados object name len
1030 OPTION(osd_max_object_namespace_len, OPT_U32, 256) // max rados object namespace len
1031 OPTION(osd_max_attr_name_len, OPT_U32, 100) // max rados attr name len; cannot go higher than 100 chars for file system backends
1032 OPTION(osd_max_attr_size, OPT_U64, 0)
1033
1034 OPTION(osd_max_omap_entries_per_request, OPT_U64, 131072)
1035 OPTION(osd_max_omap_bytes_per_request, OPT_U64, 1<<30)
1036
1037 OPTION(osd_objectstore, OPT_STR, "filestore") // ObjectStore backend type
1038 OPTION(osd_objectstore_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1039 OPTION(osd_objectstore_fuse, OPT_BOOL, false)
1040
1041 OPTION(osd_bench_small_size_max_iops, OPT_U32, 100) // 100 IOPS
1042 OPTION(osd_bench_large_size_max_throughput, OPT_U64, 100 << 20) // 100 MB/s
1043 OPTION(osd_bench_max_block_size, OPT_U64, 64 << 20) // cap the block size at 64MB
1044 OPTION(osd_bench_duration, OPT_U32, 30) // duration of 'osd bench', capped at 30s to avoid triggering timeouts
1045
1046 OPTION(osd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all osd requests
1047 OPTION(osdc_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all objecter requests
1048
1049 OPTION(osd_discard_disconnected_ops, OPT_BOOL, true)
1050
1051 OPTION(memstore_device_bytes, OPT_U64, 1024*1024*1024)
1052 OPTION(memstore_page_set, OPT_BOOL, true)
1053 OPTION(memstore_page_size, OPT_U64, 64 << 10)
1054
1055 OPTION(bdev_debug_inflight_ios, OPT_BOOL, false)
1056 OPTION(bdev_inject_crash, OPT_INT, 0) // if N>0, then ~ 1/N IOs will complete before we crash on flush.
1057 OPTION(bdev_inject_crash_flush_delay, OPT_INT, 2) // wait N more seconds on flush
1058 OPTION(bdev_aio, OPT_BOOL, true)
1059 OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds
1060 OPTION(bdev_aio_max_queue_depth, OPT_INT, 1024)
1061 OPTION(bdev_aio_reap_max, OPT_INT, 16)
1062 OPTION(bdev_block_size, OPT_INT, 4096)
1063 OPTION(bdev_debug_aio, OPT_BOOL, false)
1064 OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0)
1065
1066 // if yes, osd will unbind all NVMe devices from kernel driver and bind them
1067 // to the uio_pci_generic driver. The purpose is to prevent the case where
1068 // NVMe driver is loaded while osd is running.
1069 OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL, false)
1070 OPTION(bdev_nvme_retry_count, OPT_INT, -1) // -1 means by default which is 4
1071
1072 OPTION(objectstore_blackhole, OPT_BOOL, false)
1073
1074 OPTION(bluefs_alloc_size, OPT_U64, 1048576)
1075 OPTION(bluefs_max_prefetch, OPT_U64, 1048576)
1076 OPTION(bluefs_min_log_runway, OPT_U64, 1048576) // alloc when we get this low
1077 OPTION(bluefs_max_log_runway, OPT_U64, 4194304) // alloc this much at a time
1078 OPTION(bluefs_log_compact_min_ratio, OPT_FLOAT, 5.0) // before we consider
1079 OPTION(bluefs_log_compact_min_size, OPT_U64, 16*1048576) // before we consider
1080 OPTION(bluefs_min_flush_size, OPT_U64, 524288) // ignore flush until its this big
1081 OPTION(bluefs_compact_log_sync, OPT_BOOL, false) // sync or async log compaction?
1082 OPTION(bluefs_buffered_io, OPT_BOOL, false)
1083 OPTION(bluefs_sync_write, OPT_BOOL, false)
1084 OPTION(bluefs_allocator, OPT_STR, "bitmap") // stupid | bitmap
1085 OPTION(bluefs_preextend_wal_files, OPT_BOOL, false) // this *requires* that rocksdb has recycling enabled
1086
1087 OPTION(bluestore_bluefs, OPT_BOOL, true)
1088 OPTION(bluestore_bluefs_env_mirror, OPT_BOOL, false) // mirror to normal Env for debug
1089 OPTION(bluestore_bluefs_min, OPT_U64, 1*1024*1024*1024) // 1gb
1090 OPTION(bluestore_bluefs_min_ratio, OPT_FLOAT, .02) // min fs free / total free
1091 OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT, .90) // max fs free / total free
1092 OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT, .02) // how much to add at a time
1093 OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT, .20) // how much to reclaim at a time
1094 OPTION(bluestore_bluefs_balance_interval, OPT_FLOAT, 1) // how often (sec) to balance free space between bluefs and bluestore
1095 // If you want to use spdk driver, you need to specify NVMe serial number here
1096 // with "spdk:" prefix.
1097 // Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to
1098 // get the serial number of Intel(R) Fultondale NVMe controllers.
1099 // Example:
1100 // bluestore_block_path = spdk:55cd2e404bd73932
1101 // If you want to run multiple SPDK instances per node, you must specify the
1102 // amount of dpdk memory size in MB each instance will use, to make sure each
1103 // instance uses its own dpdk memory
1104 OPTION(bluestore_spdk_mem, OPT_U32, 512)
1105 // A hexadecimal bit mask of the cores to run on. Note the core numbering can change between platforms and should be determined beforehand.
1106 OPTION(bluestore_spdk_coremask, OPT_STR, "0x3")
1107 // Specify the maximal I/Os to be batched completed while checking queue pair completions.
1108 // Default value 0 means that let SPDK nvme library determine the value.
1109 OPTION(bluestore_spdk_max_io_completion, OPT_U32, 0)
1110 OPTION(bluestore_block_path, OPT_STR, "")
1111 OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing
1112 OPTION(bluestore_block_create, OPT_BOOL, true)
1113 OPTION(bluestore_block_db_path, OPT_STR, "")
1114 OPTION(bluestore_block_db_size, OPT_U64, 0) // rocksdb ssts (hot/warm)
1115 OPTION(bluestore_block_db_create, OPT_BOOL, false)
1116 OPTION(bluestore_block_wal_path, OPT_STR, "")
1117 OPTION(bluestore_block_wal_size, OPT_U64, 96 * 1024*1024) // rocksdb wal
1118 OPTION(bluestore_block_wal_create, OPT_BOOL, false)
1119 OPTION(bluestore_block_preallocate_file, OPT_BOOL, false) //whether preallocate space if block/db_path/wal_path is file rather that block device.
1120 OPTION(bluestore_csum_type, OPT_STR, "crc32c") // none|xxhash32|xxhash64|crc32c|crc32c_16|crc32c_8
1121 OPTION(bluestore_csum_min_block, OPT_U32, 4096)
1122 OPTION(bluestore_csum_max_block, OPT_U32, 64*1024)
1123 OPTION(bluestore_min_alloc_size, OPT_U32, 0)
1124 OPTION(bluestore_min_alloc_size_hdd, OPT_U32, 64*1024)
1125 OPTION(bluestore_min_alloc_size_ssd, OPT_U32, 16*1024)
1126 OPTION(bluestore_max_alloc_size, OPT_U32, 0)
1127 OPTION(bluestore_prefer_deferred_size, OPT_U32, 0)
1128 OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768)
1129 OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0)
1130 OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none
1131 OPTION(bluestore_compression_algorithm, OPT_STR, "snappy")
1132 OPTION(bluestore_compression_min_blob_size, OPT_U32, 0)
1133 OPTION(bluestore_compression_min_blob_size_hdd, OPT_U32, 128*1024)
1134 OPTION(bluestore_compression_min_blob_size_ssd, OPT_U32, 8*1024)
1135 OPTION(bluestore_compression_max_blob_size, OPT_U32, 0)
1136 OPTION(bluestore_compression_max_blob_size_hdd, OPT_U32, 512*1024)
1137 OPTION(bluestore_compression_max_blob_size_ssd, OPT_U32, 64*1024)
1138 /*
1139 * Specifies minimum expected amount of saved allocation units
1140 * per single blob to enable compressed blobs garbage collection
1141 *
1142 */
1143 OPTION(bluestore_gc_enable_blob_threshold, OPT_INT, 0)
1144 /*
1145 * Specifies minimum expected amount of saved allocation units
1146 * per all blobsb to enable compressed blobs garbage collection
1147 *
1148 */
1149 OPTION(bluestore_gc_enable_total_threshold, OPT_INT, 0)
1150
1151 OPTION(bluestore_max_blob_size, OPT_U32, 0)
1152 OPTION(bluestore_max_blob_size_hdd, OPT_U32, 512*1024)
1153 OPTION(bluestore_max_blob_size_ssd, OPT_U32, 64*1024)
1154 /*
1155 * Require the net gain of compression at least to be at this ratio,
1156 * otherwise we don't compress.
1157 * And ask for compressing at least 12.5%(1/8) off, by default.
1158 */
1159 OPTION(bluestore_compression_required_ratio, OPT_DOUBLE, .875)
1160 OPTION(bluestore_extent_map_shard_max_size, OPT_U32, 1200)
1161 OPTION(bluestore_extent_map_shard_target_size, OPT_U32, 500)
1162 OPTION(bluestore_extent_map_shard_min_size, OPT_U32, 150)
1163 OPTION(bluestore_extent_map_shard_target_size_slop, OPT_DOUBLE, .2)
1164 OPTION(bluestore_extent_map_inline_shard_prealloc_size, OPT_U32, 256)
1165 OPTION(bluestore_cache_trim_interval, OPT_DOUBLE, .2)
1166 OPTION(bluestore_cache_trim_max_skip_pinned, OPT_U32, 64) // skip this many onodes pinned in cache before we give up
1167 OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q
1168 OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5) // kin page slot size / max page slot size
1169 OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5) // number of kout page slot / total number of page slot
1170 OPTION(bluestore_cache_size, OPT_U64, 0)
1171 OPTION(bluestore_cache_size_hdd, OPT_U64, 1*1024*1024*1024)
1172 OPTION(bluestore_cache_size_ssd, OPT_U64, 3*1024*1024*1024)
1173 OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .01)
1174 OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .99)
1175 OPTION(bluestore_cache_kv_max, OPT_U64, 512*1024*1024) // limit the maximum amount of cache for the kv store
1176 OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
1177 OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap
1178 OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
1179 OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
1180 OPTION(bluestore_bitmapallocator_span_size, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
1181 OPTION(bluestore_max_deferred_txc, OPT_U64, 32)
1182 OPTION(bluestore_rocksdb_options, OPT_STR, "compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152")
1183 OPTION(bluestore_fsck_on_mount, OPT_BOOL, false)
1184 OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL, true)
1185 OPTION(bluestore_fsck_on_umount, OPT_BOOL, false)
1186 OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL, true)
1187 OPTION(bluestore_fsck_on_mkfs, OPT_BOOL, true)
1188 OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL, false)
1189 OPTION(bluestore_sync_submit_transaction, OPT_BOOL, false) // submit kv txn in queueing thread (not kv_sync_thread)
1190 OPTION(bluestore_throttle_bytes, OPT_U64, 64*1024*1024)
1191 OPTION(bluestore_throttle_deferred_bytes, OPT_U64, 128*1024*1024)
1192 OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64, 670000)
1193 OPTION(bluestore_throttle_cost_per_io_ssd, OPT_U64, 4000)
1194 OPTION(bluestore_throttle_cost_per_io, OPT_U64, 0)
1195 OPTION(bluestore_deferred_batch_ops, OPT_U64, 0)
1196 OPTION(bluestore_deferred_batch_ops_hdd, OPT_U64, 64)
1197 OPTION(bluestore_deferred_batch_ops_ssd, OPT_U64, 16)
1198 OPTION(bluestore_nid_prealloc, OPT_INT, 1024)
1199 OPTION(bluestore_blobid_prealloc, OPT_U64, 10240)
1200 OPTION(bluestore_clone_cow, OPT_BOOL, true) // do copy-on-write for clones
1201 OPTION(bluestore_default_buffered_read, OPT_BOOL, true)
1202 OPTION(bluestore_default_buffered_write, OPT_BOOL, false)
1203 OPTION(bluestore_debug_misc, OPT_BOOL, false)
1204 OPTION(bluestore_debug_no_reuse_blocks, OPT_BOOL, false)
1205 OPTION(bluestore_debug_small_allocations, OPT_INT, 0)
1206 OPTION(bluestore_debug_freelist, OPT_BOOL, false)
1207 OPTION(bluestore_debug_prefill, OPT_FLOAT, 0)
1208 OPTION(bluestore_debug_prefragment_max, OPT_INT, 1048576)
1209 OPTION(bluestore_debug_inject_read_err, OPT_BOOL, false)
1210 OPTION(bluestore_debug_randomize_serial_transaction, OPT_INT, 0)
1211 OPTION(bluestore_debug_omit_block_device_write, OPT_BOOL, false)
1212 OPTION(bluestore_debug_fsck_abort, OPT_BOOL, false)
1213 OPTION(bluestore_debug_omit_kv_commit, OPT_BOOL, false)
1214 OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL, false)
1215 OPTION(bluestore_shard_finishers, OPT_BOOL, false)
1216 OPTION(bluestore_debug_random_read_err, OPT_DOUBLE, 0)
1217
1218 OPTION(kstore_max_ops, OPT_U64, 512)
1219 OPTION(kstore_max_bytes, OPT_U64, 64*1024*1024)
1220 OPTION(kstore_backend, OPT_STR, "rocksdb")
1221 OPTION(kstore_rocksdb_options, OPT_STR, "compression=kNoCompression")
1222 OPTION(kstore_rocksdb_bloom_bits_per_key, OPT_INT, 0)
1223 OPTION(kstore_fsck_on_mount, OPT_BOOL, false)
1224 OPTION(kstore_fsck_on_mount_deep, OPT_BOOL, true)
1225 OPTION(kstore_nid_prealloc, OPT_U64, 1024)
1226 OPTION(kstore_sync_transaction, OPT_BOOL, false)
1227 OPTION(kstore_sync_submit_transaction, OPT_BOOL, false)
1228 OPTION(kstore_onode_map_size, OPT_U64, 1024)
1229 OPTION(kstore_default_stripe_size, OPT_INT, 65536)
1230
1231 OPTION(filestore_omap_backend, OPT_STR, "rocksdb")
1232 OPTION(filestore_omap_backend_path, OPT_STR, "")
1233
1234 /// filestore wb throttle limits
1235 OPTION(filestore_wbthrottle_enable, OPT_BOOL, true)
1236 OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 41943040)
1237 OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 419430400)
1238 OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 500)
1239 OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 5000)
1240 OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 500)
1241 OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 41943040)
1242 OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 419430400)
1243 OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 500)
1244 OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 5000)
1245 OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 500)
1246
1247 /// These must be less than the fd limit
1248 OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 5000)
1249 OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 5000)
1250
1251 //Introduce a O_DSYNC write in the filestore
1252 OPTION(filestore_odsync_write, OPT_BOOL, false)
1253
1254 // Tests index failure paths
1255 OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0)
1256
1257 // Allow object read error injection
1258 OPTION(filestore_debug_inject_read_err, OPT_BOOL, false)
1259 OPTION(filestore_debug_random_read_err, OPT_DOUBLE, 0)
1260
1261 OPTION(filestore_debug_omap_check, OPT_BOOL, false) // Expensive debugging check on sync
1262 OPTION(filestore_omap_header_cache_size, OPT_INT, 1024)
1263
1264 // Use omap for xattrs for attrs over
1265 // filestore_max_inline_xattr_size or
1266 OPTION(filestore_max_inline_xattr_size, OPT_U32, 0) //Override
1267 OPTION(filestore_max_inline_xattr_size_xfs, OPT_U32, 65536)
1268 OPTION(filestore_max_inline_xattr_size_btrfs, OPT_U32, 2048)
1269 OPTION(filestore_max_inline_xattr_size_other, OPT_U32, 512)
1270
1271 // for more than filestore_max_inline_xattrs attrs
1272 OPTION(filestore_max_inline_xattrs, OPT_U32, 0) //Override
1273 OPTION(filestore_max_inline_xattrs_xfs, OPT_U32, 10)
1274 OPTION(filestore_max_inline_xattrs_btrfs, OPT_U32, 10)
1275 OPTION(filestore_max_inline_xattrs_other, OPT_U32, 2)
1276
1277 // max xattr value size
1278 OPTION(filestore_max_xattr_value_size, OPT_U32, 0) //Override
1279 OPTION(filestore_max_xattr_value_size_xfs, OPT_U32, 64<<10)
1280 OPTION(filestore_max_xattr_value_size_btrfs, OPT_U32, 64<<10)
1281 // ext4 allows 4k xattrs total including some smallish extra fields and the
1282 // keys. We're allowing 2 512 inline attrs in addition some some filestore
1283 // replay attrs. After accounting for those, we still need to fit up to
1284 // two attrs of this value. That means we need this value to be around 1k
1285 // to be safe. This is hacky, but it's not worth complicating the code
1286 // to work around ext4's total xattr limit.
1287 OPTION(filestore_max_xattr_value_size_other, OPT_U32, 1<<10)
1288
1289 OPTION(filestore_sloppy_crc, OPT_BOOL, false) // track sloppy crcs
1290 OPTION(filestore_sloppy_crc_block_size, OPT_INT, 65536)
1291
1292 OPTION(filestore_max_alloc_hint_size, OPT_U64, 1ULL << 20) // bytes
1293
1294 OPTION(filestore_max_sync_interval, OPT_DOUBLE, 5) // seconds
1295 OPTION(filestore_min_sync_interval, OPT_DOUBLE, .01) // seconds
1296 OPTION(filestore_btrfs_snap, OPT_BOOL, true)
1297 OPTION(filestore_btrfs_clone_range, OPT_BOOL, true)
1298 OPTION(filestore_zfs_snap, OPT_BOOL, false) // zfsonlinux is still unstable
1299 OPTION(filestore_fsync_flushes_journal_data, OPT_BOOL, false)
1300 OPTION(filestore_fiemap, OPT_BOOL, false) // (try to) use fiemap
1301 OPTION(filestore_punch_hole, OPT_BOOL, false)
1302 OPTION(filestore_seek_data_hole, OPT_BOOL, false) // (try to) use seek_data/hole
1303 OPTION(filestore_splice, OPT_BOOL, false)
1304 OPTION(filestore_fadvise, OPT_BOOL, true)
1305 //collect device partition information for management application to use
1306 OPTION(filestore_collect_device_partition_information, OPT_BOOL, true)
1307
1308 // (try to) use extsize for alloc hint NOTE: extsize seems to trigger
1309 // data corruption in xfs prior to kernel 3.5. filestore will
1310 // implicity disable this if it cannot confirm the kernel is newer
1311 // than that.
1312 // NOTE: This option involves a tradeoff: When disabled, fragmentation is
1313 // worse, but large sequential writes are faster. When enabled, large
1314 // sequential writes are slower, but fragmentation is reduced.
1315 OPTION(filestore_xfs_extsize, OPT_BOOL, false)
1316
1317 OPTION(filestore_journal_parallel, OPT_BOOL, false)
1318 OPTION(filestore_journal_writeahead, OPT_BOOL, false)
1319 OPTION(filestore_journal_trailing, OPT_BOOL, false)
1320 OPTION(filestore_queue_max_ops, OPT_U64, 50)
1321 OPTION(filestore_queue_max_bytes, OPT_U64, 100 << 20)
1322
1323 OPTION(filestore_caller_concurrency, OPT_INT, 10)
1324
1325 /// Expected filestore throughput in B/s
1326 OPTION(filestore_expected_throughput_bytes, OPT_DOUBLE, 200 << 20)
1327 /// Expected filestore throughput in ops/s
1328 OPTION(filestore_expected_throughput_ops, OPT_DOUBLE, 200)
1329
1330 /// Filestore max delay multiple. Defaults to 0 (disabled)
1331 OPTION(filestore_queue_max_delay_multiple, OPT_DOUBLE, 0)
1332 /// Filestore high delay multiple. Defaults to 0 (disabled)
1333 OPTION(filestore_queue_high_delay_multiple, OPT_DOUBLE, 0)
1334
1335 /// Use above to inject delays intended to keep the op queue between low and high
1336 OPTION(filestore_queue_low_threshhold, OPT_DOUBLE, 0.3)
1337 OPTION(filestore_queue_high_threshhold, OPT_DOUBLE, 0.9)
1338
1339 OPTION(filestore_op_threads, OPT_INT, 2)
1340 OPTION(filestore_op_thread_timeout, OPT_INT, 60)
1341 OPTION(filestore_op_thread_suicide_timeout, OPT_INT, 180)
1342 OPTION(filestore_commit_timeout, OPT_FLOAT, 600)
1343 OPTION(filestore_fiemap_threshold, OPT_INT, 4096)
1344 OPTION(filestore_merge_threshold, OPT_INT, 10)
1345 OPTION(filestore_split_multiple, OPT_INT, 2)
1346 OPTION(filestore_split_rand_factor, OPT_U32, 20) // randomize the split threshold by adding 16 * [0, rand_factor)
1347 OPTION(filestore_update_to, OPT_INT, 1000)
1348 OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor
1349 OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size
1350 OPTION(filestore_fd_cache_shards, OPT_INT, 16) // FD number of shards
1351 OPTION(filestore_ondisk_finisher_threads, OPT_INT, 1)
1352 OPTION(filestore_apply_finisher_threads, OPT_INT, 1)
1353 OPTION(filestore_dump_file, OPT_STR, "") // file onto which store transaction dumps
1354 OPTION(filestore_kill_at, OPT_INT, 0) // inject a failure at the n'th opportunity
1355 OPTION(filestore_inject_stall, OPT_INT, 0) // artificially stall for N seconds in op queue thread
1356 OPTION(filestore_fail_eio, OPT_BOOL, true) // fail/crash on EIO
1357 OPTION(filestore_debug_verify_split, OPT_BOOL, false)
1358 OPTION(journal_dio, OPT_BOOL, true)
1359 OPTION(journal_aio, OPT_BOOL, true)
1360 OPTION(journal_force_aio, OPT_BOOL, false)
1361 OPTION(journal_block_size, OPT_INT, 4096)
1362
1363 // max bytes to search ahead in journal searching for corruption
1364 OPTION(journal_max_corrupt_search, OPT_U64, 10<<20)
1365 OPTION(journal_block_align, OPT_BOOL, true)
1366 OPTION(journal_write_header_frequency, OPT_U64, 0)
1367 OPTION(journal_max_write_bytes, OPT_INT, 10 << 20)
1368 OPTION(journal_max_write_entries, OPT_INT, 100)
1369
1370 /// Target range for journal fullness
1371 OPTION(journal_throttle_low_threshhold, OPT_DOUBLE, 0.6)
1372 OPTION(journal_throttle_high_threshhold, OPT_DOUBLE, 0.9)
1373
1374 /// Multiple over expected at high_threshhold. Defaults to 0 (disabled).
1375 OPTION(journal_throttle_high_multiple, OPT_DOUBLE, 0)
1376 /// Multiple over expected at max. Defaults to 0 (disabled).
1377 OPTION(journal_throttle_max_multiple, OPT_DOUBLE, 0)
1378
1379 OPTION(journal_align_min_size, OPT_INT, 64 << 10) // align data payloads >= this.
1380 OPTION(journal_replay_from, OPT_INT, 0)
1381 OPTION(journal_zero_on_create, OPT_BOOL, false)
1382 OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corrupt
1383 OPTION(journal_discard, OPT_BOOL, false) //using ssd disk as journal, whether support discard nouse journal-data.
1384
1385 OPTION(fio_dir, OPT_STR, "/tmp/fio") // fio data directory for fio-objectstore
1386
1387 OPTION(rados_mon_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from the monitor before returning an error from a rados operation. 0 means no limit.
1388 OPTION(rados_osd_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from osds before returning an error from a rados operation. 0 means no limit.
1389 OPTION(rados_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1390
1391 OPTION(rbd_op_threads, OPT_INT, 1)
1392 OPTION(rbd_op_thread_timeout, OPT_INT, 60)
1393 OPTION(rbd_non_blocking_aio, OPT_BOOL, true) // process AIO ops from a worker thread to prevent blocking
1394 OPTION(rbd_cache, OPT_BOOL, true) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0)
1395 OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, true) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe
1396 OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes
1397 OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching
1398 OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
1399 OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts
1400 OPTION(rbd_cache_max_dirty_object, OPT_INT, 0) // dirty limit for objects - set to 0 for auto calculate from rbd_cache_size
1401 OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false)
1402 OPTION(rbd_concurrent_management_ops, OPT_INT, 10) // how many operations can be in flight for a management operation like deleting or resizing an image
1403 OPTION(rbd_balance_snap_reads, OPT_BOOL, false)
1404 OPTION(rbd_localize_snap_reads, OPT_BOOL, false)
1405 OPTION(rbd_balance_parent_reads, OPT_BOOL, false)
1406 OPTION(rbd_localize_parent_reads, OPT_BOOL, true)
1407 OPTION(rbd_readahead_trigger_requests, OPT_INT, 10) // number of sequential requests necessary to trigger readahead
1408 OPTION(rbd_readahead_max_bytes, OPT_LONGLONG, 512 * 1024) // set to 0 to disable readahead
1409 OPTION(rbd_readahead_disable_after_bytes, OPT_LONGLONG, 50 * 1024 * 1024) // how many bytes are read in total before readahead is disabled
1410 OPTION(rbd_clone_copy_on_read, OPT_BOOL, false)
1411 OPTION(rbd_blacklist_on_break_lock, OPT_BOOL, true) // whether to blacklist clients whose lock was broken
1412 OPTION(rbd_blacklist_expire_seconds, OPT_INT, 0) // number of seconds to blacklist - set to 0 for OSD default
1413 OPTION(rbd_request_timed_out_seconds, OPT_INT, 30) // number of seconds before maint request times out
1414 OPTION(rbd_skip_partial_discard, OPT_BOOL, false) // when trying to discard a range inside an object, set to true to skip zeroing the range.
1415 OPTION(rbd_enable_alloc_hint, OPT_BOOL, true) // when writing a object, it will issue a hint to osd backend to indicate the expected size object need
1416 OPTION(rbd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1417 OPTION(rbd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all RBD requests
1418 OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be validated for RBD compatibility
1419 OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated
1420 OPTION(rbd_auto_exclusive_lock_until_manual_request, OPT_BOOL, true) // whether to automatically acquire/release exclusive lock until it is explicitly requested, i.e. before we know the user of librbd is properly using the lock API
1421 OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy
1422 OPTION(rbd_mirroring_replay_delay, OPT_INT, 0) // time-delay in seconds for rbd-mirror asynchronous replication
1423
1424 OPTION(rbd_default_pool, OPT_STR, "rbd") // default pool for storing images
1425 OPTION_VALIDATOR(rbd_default_pool)
1426
1427 /*
1428 * The following options change the behavior for librbd's image creation methods that
1429 * don't require all of the parameters. These are provided so that older programs
1430 * can take advantage of newer features without being rewritten to use new versions
1431 * of the image creation functions.
1432 *
1433 * rbd_create()/RBD::create() are affected by all of these options.
1434 *
1435 * rbd_create2()/RBD::create2() and rbd_clone()/RBD::clone() are affected by:
1436 * - rbd_default_order
1437 * - rbd_default_stripe_count
1438 * - rbd_default_stripe_size
1439 *
1440 * rbd_create3()/RBD::create3() and rbd_clone2/RBD::clone2() are only
1441 * affected by rbd_default_order.
1442 */
1443 OPTION(rbd_default_format, OPT_INT, 2)
1444 OPTION(rbd_default_order, OPT_INT, 22)
1445 OPTION(rbd_default_stripe_count, OPT_U64, 0) // changing requires stripingv2 feature
1446 OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature
1447 OPTION(rbd_default_data_pool, OPT_STR, "") // optional default pool for storing image data blocks
1448 OPTION_VALIDATOR(rbd_default_data_pool)
1449
1450 /**
1451 * RBD features are only applicable for v2 images. This setting accepts either
1452 * an integer bitmask value or comma-delimited string of RBD feature names.
1453 * This setting is always internally stored as an integer bitmask value. The
1454 * mapping between feature bitmask value and feature name is as follows:
1455 *
1456 * +1 -> layering
1457 * +2 -> striping
1458 * +4 -> exclusive-lock
1459 * +8 -> object-map
1460 * +16 -> fast-diff
1461 * +32 -> deep-flatten
1462 * +64 -> journaling
1463 * +128 -> data-pool
1464 */
1465 SAFE_OPTION(rbd_default_features, OPT_STR, "layering,exclusive-lock,object-map,fast-diff,deep-flatten")
1466 OPTION_VALIDATOR(rbd_default_features)
1467
1468 OPTION(rbd_default_map_options, OPT_STR, "") // default rbd map -o / --options
1469
1470 /**
1471 * RBD journal options.
1472 */
1473 OPTION(rbd_journal_order, OPT_U32, 24) // bits to shift to compute journal object max size, between 12 and 64
1474 OPTION(rbd_journal_splay_width, OPT_U32, 4) // number of active journal objects
1475 OPTION(rbd_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1476 OPTION(rbd_journal_object_flush_interval, OPT_INT, 0) // maximum number of pending commits per journal object
1477 OPTION(rbd_journal_object_flush_bytes, OPT_INT, 0) // maximum number of pending bytes per journal object
1478 OPTION(rbd_journal_object_flush_age, OPT_DOUBLE, 0) // maximum age (in seconds) for pending commits
1479 OPTION(rbd_journal_pool, OPT_STR, "") // pool for journal objects
1480 OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload size before splitting
1481 OPTION(rbd_journal_max_concurrent_object_sets, OPT_INT, 0) // maximum number of object sets a journal client can be behind before it is automatically unregistered
1482
1483 /**
1484 * RBD Mirror options
1485 */
1486 OPTION(rbd_mirror_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1487 OPTION(rbd_mirror_journal_poll_age, OPT_DOUBLE, 5) // maximum age (in seconds) between successive journal polls
1488 OPTION(rbd_mirror_journal_max_fetch_bytes, OPT_U32, 32768) // maximum bytes to read from each journal data object per fetch
1489 OPTION(rbd_mirror_sync_point_update_age, OPT_DOUBLE, 30) // number of seconds between each update of the image sync point object number
1490 OPTION(rbd_mirror_concurrent_image_syncs, OPT_U32, 5) // maximum number of image syncs in parallel
1491 OPTION(rbd_mirror_pool_replayers_refresh_interval, OPT_INT, 30) // interval to refresh peers in rbd-mirror daemon
1492 OPTION(rbd_mirror_delete_retry_interval, OPT_DOUBLE, 30) // interval to check and retry the failed requests in deleter
1493 OPTION(rbd_mirror_image_state_check_interval, OPT_INT, 30) // interval to get images from pool watcher and set sources in replayer
1494 OPTION(rbd_mirror_leader_heartbeat_interval, OPT_INT, 5) // interval (in seconds) between mirror leader heartbeats
1495 OPTION(rbd_mirror_leader_max_missed_heartbeats, OPT_INT, 2) // number of missed heartbeats for non-lock owner to attempt to acquire lock
1496 OPTION(rbd_mirror_leader_max_acquire_attempts_before_break, OPT_INT, 3) // number of failed attempts to acquire lock after missing heartbeats before breaking lock
1497
1498 OPTION(nss_db_path, OPT_STR, "") // path to nss db
1499
1500
1501 OPTION(rgw_max_chunk_size, OPT_INT, 4 * 1024 * 1024)
1502 OPTION(rgw_put_obj_min_window_size, OPT_INT, 16 * 1024 * 1024)
1503 OPTION(rgw_put_obj_max_window_size, OPT_INT, 64 * 1024 * 1024)
1504 OPTION(rgw_max_put_size, OPT_U64, 5ULL*1024*1024*1024)
1505 OPTION(rgw_max_put_param_size, OPT_U64, 1 * 1024 * 1024) // max input size for PUT requests accepting json/xml params
1506
1507 /**
1508 * override max bucket index shards in zone configuration (if not zero)
1509 *
1510 * Represents the number of shards for the bucket index object, a value of zero
1511 * indicates there is no sharding. By default (no sharding, the name of the object
1512 * is '.dir.{marker}', with sharding, the name is '.dir.{markder}.{sharding_id}',
1513 * sharding_id is zero-based value. It is not recommended to set a too large value
1514 * (e.g. thousand) as it increases the cost for bucket listing.
1515 */
1516 OPTION(rgw_override_bucket_index_max_shards, OPT_U32, 0)
1517
1518 /**
1519 * Represents the maximum AIO pending requests for the bucket index object shards.
1520 */
1521 OPTION(rgw_bucket_index_max_aio, OPT_U32, 8)
1522
1523 /**
1524 * whether or not the quota/gc threads should be started
1525 */
1526 OPTION(rgw_enable_quota_threads, OPT_BOOL, true)
1527 OPTION(rgw_enable_gc_threads, OPT_BOOL, true)
1528 OPTION(rgw_enable_lc_threads, OPT_BOOL, true)
1529
1530
1531 OPTION(rgw_data, OPT_STR, "/var/lib/ceph/radosgw/$cluster-$id")
1532 OPTION(rgw_enable_apis, OPT_STR, "s3, s3website, swift, swift_auth, admin")
1533 OPTION(rgw_cache_enabled, OPT_BOOL, true) // rgw cache enabled
1534 OPTION(rgw_cache_lru_size, OPT_INT, 10000) // num of entries in rgw cache
1535 OPTION(rgw_socket_path, OPT_STR, "") // path to unix domain socket, if not specified, rgw will not run as external fcgi
1536 OPTION(rgw_host, OPT_STR, "") // host for radosgw, can be an IP, default is 0.0.0.0
1537 OPTION(rgw_port, OPT_STR, "") // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi
1538 OPTION(rgw_dns_name, OPT_STR, "") // hostname suffix on buckets
1539 OPTION(rgw_dns_s3website_name, OPT_STR, "") // hostname suffix on buckets for s3-website endpoint
1540 OPTION(rgw_content_length_compat, OPT_BOOL, false) // Check both HTTP_CONTENT_LENGTH and CONTENT_LENGTH in fcgi env
1541 OPTION(rgw_lifecycle_work_time, OPT_STR, "00:00-06:00") //job process lc at 00:00-06:00s
1542 OPTION(rgw_lc_lock_max_time, OPT_INT, 60) // total run time for a single lc processor work
1543 OPTION(rgw_lc_max_objs, OPT_INT, 32)
1544 OPTION(rgw_lc_debug_interval, OPT_INT, -1) // Debug run interval, in seconds
1545 OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request
1546 OPTION(rgw_request_uri, OPT_STR, "") // alternative value for REQUEST_URI if not set in request
1547 OPTION(rgw_swift_url, OPT_STR, "") // the swift url, being published by the internal swift auth
1548 OPTION(rgw_swift_url_prefix, OPT_STR, "swift") // entry point for which a url is considered a swift url
1549 OPTION(rgw_swift_auth_url, OPT_STR, "") // default URL to go and verify tokens for v1 auth (if not using internal swift auth)
1550 OPTION(rgw_swift_auth_entry, OPT_STR, "auth") // entry point for which a url is considered a swift auth url
1551 OPTION(rgw_swift_tenant_name, OPT_STR, "") // tenant name to use for swift access
1552 OPTION(rgw_swift_account_in_url, OPT_BOOL, false) // assume that URL always contain the account (aka tenant) part
1553 OPTION(rgw_swift_enforce_content_length, OPT_BOOL, false) // enforce generation of Content-Length even in cost of performance or scalability
1554 OPTION(rgw_keystone_url, OPT_STR, "") // url for keystone server
1555 OPTION(rgw_keystone_admin_token, OPT_STR, "") // keystone admin token (shared secret)
1556 OPTION(rgw_keystone_admin_user, OPT_STR, "") // keystone admin user name
1557 OPTION(rgw_keystone_admin_password, OPT_STR, "") // keystone admin user password
1558 OPTION(rgw_keystone_admin_tenant, OPT_STR, "") // keystone admin user tenant (for keystone v2.0)
1559 OPTION(rgw_keystone_admin_project, OPT_STR, "") // keystone admin user project (for keystone v3)
1560 OPTION(rgw_keystone_admin_domain, OPT_STR, "") // keystone admin user domain
1561 OPTION(rgw_keystone_barbican_user, OPT_STR, "") // keystone user to access barbican secrets
1562 OPTION(rgw_keystone_barbican_password, OPT_STR, "") // keystone password for barbican user
1563 OPTION(rgw_keystone_barbican_tenant, OPT_STR, "") // keystone barbican user tenant (for keystone v2.0)
1564 OPTION(rgw_keystone_barbican_project, OPT_STR, "") // keystone barbican user project (for keystone v3)
1565 OPTION(rgw_keystone_barbican_domain, OPT_STR, "") // keystone barbican user domain
1566 OPTION(rgw_keystone_api_version, OPT_INT, 2) // Version of Keystone API to use (2 or 3)
1567 OPTION(rgw_keystone_accepted_roles, OPT_STR, "Member, admin") // roles required to serve requests
1568 OPTION(rgw_keystone_accepted_admin_roles, OPT_STR, "") // list of roles allowing an user to gain admin privileges
1569 OPTION(rgw_keystone_token_cache_size, OPT_INT, 10000) // max number of entries in keystone token cache
1570 OPTION(rgw_keystone_revocation_interval, OPT_INT, 15 * 60) // seconds between tokens revocation check
1571 OPTION(rgw_keystone_verify_ssl, OPT_BOOL, true) // should we try to verify keystone's ssl
1572 OPTION(rgw_keystone_implicit_tenants, OPT_BOOL, false) // create new users in their own tenants of the same name
1573 OPTION(rgw_cross_domain_policy, OPT_STR, "<allow-access-from domain=\"*\" secure=\"false\" />")
1574 OPTION(rgw_healthcheck_disabling_path, OPT_STR, "") // path that existence causes the healthcheck to respond 503
1575 OPTION(rgw_s3_auth_use_rados, OPT_BOOL, true) // should we try to use the internal credentials for s3?
1576 OPTION(rgw_s3_auth_use_keystone, OPT_BOOL, false) // should we try to use keystone for s3?
1577 OPTION(rgw_s3_auth_aws4_force_boto2_compat, OPT_BOOL, true) // force aws4 auth boto2 compatibility
1578 OPTION(rgw_barbican_url, OPT_STR, "") // url for barbican server
1579
1580 /* OpenLDAP-style LDAP parameter strings */
1581 /* rgw_ldap_uri space-separated list of LDAP servers in URI format */
1582 OPTION(rgw_ldap_uri, OPT_STR, "ldaps://<ldap.your.domain>")
1583 /* rgw_ldap_binddn LDAP entry RGW will bind with (user match) */
1584 OPTION(rgw_ldap_binddn, OPT_STR, "uid=admin,cn=users,dc=example,dc=com")
1585 /* rgw_ldap_searchdn LDAP search base (basedn) */
1586 OPTION(rgw_ldap_searchdn, OPT_STR, "cn=users,cn=accounts,dc=example,dc=com")
1587 /* rgw_ldap_dnattr LDAP attribute containing RGW user names (to form binddns)*/
1588 OPTION(rgw_ldap_dnattr, OPT_STR, "uid")
1589 /* rgw_ldap_secret file containing credentials for rgw_ldap_binddn */
1590 OPTION(rgw_ldap_secret, OPT_STR, "/etc/openldap/secret")
1591 /* rgw_s3_auth_use_ldap use LDAP for RGW auth? */
1592 OPTION(rgw_s3_auth_use_ldap, OPT_BOOL, false)
1593 /* rgw_ldap_searchfilter LDAP search filter */
1594 OPTION(rgw_ldap_searchfilter, OPT_STR, "")
1595
1596 OPTION(rgw_admin_entry, OPT_STR, "admin") // entry point for which a url is considered an admin request
1597 OPTION(rgw_enforce_swift_acls, OPT_BOOL, true)
1598 OPTION(rgw_swift_token_expiration, OPT_INT, 24 * 3600) // time in seconds for swift token expiration
1599 OPTION(rgw_print_continue, OPT_BOOL, true) // enable if 100-Continue works
1600 OPTION(rgw_print_prohibited_content_length, OPT_BOOL, false) // violate RFC 7230 and send Content-Length in 204 and 304
1601 OPTION(rgw_remote_addr_param, OPT_STR, "REMOTE_ADDR") // e.g. X-Forwarded-For, if you have a reverse proxy
1602 OPTION(rgw_op_thread_timeout, OPT_INT, 10*60)
1603 OPTION(rgw_op_thread_suicide_timeout, OPT_INT, 0)
1604 OPTION(rgw_thread_pool_size, OPT_INT, 100)
1605 OPTION(rgw_num_control_oids, OPT_INT, 8)
1606 OPTION(rgw_num_rados_handles, OPT_U32, 1)
1607 OPTION(rgw_verify_ssl, OPT_BOOL, true) // should http_client try to verify ssl when sent https request
1608
1609 /* The following are tunables for caches of RGW NFS (and other file
1610 * client) objects.
1611 *
1612 * The file handle cache is a partitioned hash table
1613 * (fhcache_partitions), each with a closed hash part and backing
1614 * b-tree mapping. The number of partions is expected to be a small
1615 * prime, the cache size something larger but less than 5K, the total
1616 * size of the cache is n_part * cache_size.
1617 */
1618 OPTION(rgw_nfs_lru_lanes, OPT_INT, 5)
1619 OPTION(rgw_nfs_lru_lane_hiwat, OPT_INT, 911)
1620 OPTION(rgw_nfs_fhcache_partitions, OPT_INT, 3)
1621 OPTION(rgw_nfs_fhcache_size, OPT_INT, 2017) /* 3*2017=6051 */
1622 OPTION(rgw_nfs_namespace_expire_secs, OPT_INT, 300) /* namespace invalidate
1623 * timer */
1624 OPTION(rgw_nfs_max_gc, OPT_INT, 300) /* max gc events per cycle */
1625 OPTION(rgw_nfs_write_completion_interval_s, OPT_INT, 10) /* stateless (V3)
1626 * commit
1627 * delay */
1628
1629 OPTION(rgw_zone, OPT_STR, "") // zone name
1630 OPTION(rgw_zone_root_pool, OPT_STR, ".rgw.root") // pool where zone specific info is stored
1631 OPTION(rgw_default_zone_info_oid, OPT_STR, "default.zone") // oid where default zone info is stored
1632 OPTION(rgw_region, OPT_STR, "") // region name
1633 OPTION(rgw_region_root_pool, OPT_STR, ".rgw.root") // pool where all region info is stored
1634 OPTION(rgw_default_region_info_oid, OPT_STR, "default.region") // oid where default region info is stored
1635 OPTION(rgw_zonegroup, OPT_STR, "") // zone group name
1636 OPTION(rgw_zonegroup_root_pool, OPT_STR, ".rgw.root") // pool where all zone group info is stored
1637 OPTION(rgw_default_zonegroup_info_oid, OPT_STR, "default.zonegroup") // oid where default zone group info is stored
1638 OPTION(rgw_realm, OPT_STR, "") // realm name
1639 OPTION(rgw_realm_root_pool, OPT_STR, ".rgw.root") // pool where all realm info is stored
1640 OPTION(rgw_default_realm_info_oid, OPT_STR, "default.realm") // oid where default realm info is stored
1641 OPTION(rgw_period_root_pool, OPT_STR, ".rgw.root") // pool where all period info is stored
1642 OPTION(rgw_period_latest_epoch_info_oid, OPT_STR, ".latest_epoch") // oid where current period info is stored
1643 OPTION(rgw_log_nonexistent_bucket, OPT_BOOL, false)
1644 OPTION(rgw_log_object_name, OPT_STR, "%Y-%m-%d-%H-%i-%n") // man date to see codes (a subset are supported)
1645 OPTION(rgw_log_object_name_utc, OPT_BOOL, false)
1646 OPTION(rgw_usage_max_shards, OPT_INT, 32)
1647 OPTION(rgw_usage_max_user_shards, OPT_INT, 1)
1648 OPTION(rgw_enable_ops_log, OPT_BOOL, false) // enable logging every rgw operation
1649 OPTION(rgw_enable_usage_log, OPT_BOOL, false) // enable logging bandwidth usage
1650 OPTION(rgw_ops_log_rados, OPT_BOOL, true) // whether ops log should go to rados
1651 OPTION(rgw_ops_log_socket_path, OPT_STR, "") // path to unix domain socket where ops log can go
1652 OPTION(rgw_ops_log_data_backlog, OPT_INT, 5 << 20) // max data backlog for ops log
1653 OPTION(rgw_fcgi_socket_backlog, OPT_INT, 1024) // socket backlog for fcgi
1654 OPTION(rgw_usage_log_flush_threshold, OPT_INT, 1024) // threshold to flush pending log data
1655 OPTION(rgw_usage_log_tick_interval, OPT_INT, 30) // flush pending log data every X seconds
1656 OPTION(rgw_intent_log_object_name, OPT_STR, "%Y-%m-%d-%i-%n") // man date to see codes (a subset are supported)
1657 OPTION(rgw_intent_log_object_name_utc, OPT_BOOL, false)
1658 OPTION(rgw_init_timeout, OPT_INT, 300) // time in seconds
1659 OPTION(rgw_mime_types_file, OPT_STR, "/etc/mime.types")
1660 OPTION(rgw_gc_max_objs, OPT_INT, 32)
1661 OPTION(rgw_gc_obj_min_wait, OPT_INT, 2 * 3600) // wait time before object may be handled by gc
1662 OPTION(rgw_gc_processor_max_time, OPT_INT, 3600) // total run time for a single gc processor work
1663 OPTION(rgw_gc_processor_period, OPT_INT, 3600) // gc processor cycle time
1664 OPTION(rgw_s3_success_create_obj_status, OPT_INT, 0) // alternative success status response for create-obj (0 - default)
1665 OPTION(rgw_resolve_cname, OPT_BOOL, false) // should rgw try to resolve hostname as a dns cname record
1666 OPTION(rgw_obj_stripe_size, OPT_INT, 4 << 20)
1667 OPTION(rgw_extended_http_attrs, OPT_STR, "") // list of extended attrs that can be set on objects (beyond the default)
1668 OPTION(rgw_exit_timeout_secs, OPT_INT, 120) // how many seconds to wait for process to go down before exiting unconditionally
1669 OPTION(rgw_get_obj_window_size, OPT_INT, 16 << 20) // window size in bytes for single get obj request
1670 OPTION(rgw_get_obj_max_req_size, OPT_INT, 4 << 20) // max length of a single get obj rados op
1671 OPTION(rgw_relaxed_s3_bucket_names, OPT_BOOL, false) // enable relaxed bucket name rules for US region buckets
1672 OPTION(rgw_defer_to_bucket_acls, OPT_STR, "") // if the user has bucket perms, use those before key perms (recurse and full_control)
1673 OPTION(rgw_list_buckets_max_chunk, OPT_INT, 1000) // max buckets to retrieve in a single op when listing user buckets
1674 OPTION(rgw_md_log_max_shards, OPT_INT, 64) // max shards for metadata log
1675 OPTION(rgw_num_zone_opstate_shards, OPT_INT, 128) // max shards for keeping inter-region copy progress info
1676 OPTION(rgw_opstate_ratelimit_sec, OPT_INT, 30) // min time between opstate updates on a single upload (0 for disabling ratelimit)
1677 OPTION(rgw_curl_wait_timeout_ms, OPT_INT, 1000) // timeout for certain curl calls
1678 OPTION(rgw_copy_obj_progress, OPT_BOOL, true) // should dump progress during long copy operations?
1679 OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT, 1024 * 1024) // min bytes between copy progress output
1680 OPTION(rgw_obj_tombstone_cache_size, OPT_INT, 1000) // how many objects in tombstone cache, which is used in multi-zone sync to keep
1681 // track of removed objects' mtime
1682
1683 OPTION(rgw_data_log_window, OPT_INT, 30) // data log entries window (in seconds)
1684 OPTION(rgw_data_log_changes_size, OPT_INT, 1000) // number of in-memory entries to hold for data changes log
1685 OPTION(rgw_data_log_num_shards, OPT_INT, 128) // number of objects to keep data changes log on
1686 OPTION(rgw_data_log_obj_prefix, OPT_STR, "data_log") //
1687 OPTION(rgw_replica_log_obj_prefix, OPT_STR, "replica_log") //
1688
1689 OPTION(rgw_bucket_quota_ttl, OPT_INT, 600) // time for cached bucket stats to be cached within rgw instance
1690 OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE, 0.95) // threshold from which we don't rely on cached info for quota decisions
1691 OPTION(rgw_bucket_quota_cache_size, OPT_INT, 10000) // number of entries in bucket quota cache
1692 OPTION(rgw_bucket_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1693 OPTION(rgw_bucket_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1694
1695 OPTION(rgw_expose_bucket, OPT_BOOL, false) // Return the bucket name in the 'Bucket' response header
1696
1697 OPTION(rgw_frontends, OPT_STR, "civetweb port=7480") // rgw front ends
1698
1699 OPTION(rgw_user_quota_bucket_sync_interval, OPT_INT, 180) // time period for accumulating modified buckets before syncing stats
1700 OPTION(rgw_user_quota_sync_interval, OPT_INT, 3600 * 24) // time period for accumulating modified buckets before syncing entire user stats
1701 OPTION(rgw_user_quota_sync_idle_users, OPT_BOOL, false) // whether stats for idle users be fully synced
1702 OPTION(rgw_user_quota_sync_wait_time, OPT_INT, 3600 * 24) // min time between two full stats sync for non-idle users
1703 OPTION(rgw_user_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1704 OPTION(rgw_user_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1705
1706 OPTION(rgw_multipart_min_part_size, OPT_INT, 5 * 1024 * 1024) // min size for each part (except for last one) in multipart upload
1707 OPTION(rgw_multipart_part_upload_limit, OPT_INT, 10000) // parts limit in multipart upload
1708
1709 OPTION(rgw_max_slo_entries, OPT_INT, 1000) // default number of max entries in slo
1710
1711 OPTION(rgw_olh_pending_timeout_sec, OPT_INT, 3600) // time until we retire a pending olh change
1712 OPTION(rgw_user_max_buckets, OPT_INT, 1000) // global option to set max buckets count for all user
1713
1714 OPTION(rgw_objexp_gc_interval, OPT_U32, 60 * 10) // maximum time between round of expired objects garbage collecting
1715 OPTION(rgw_objexp_time_step, OPT_U32, 4096) // number of seconds for rounding the timestamps
1716 OPTION(rgw_objexp_hints_num_shards, OPT_U32, 127) // maximum number of parts in which the hint index is stored in
1717 OPTION(rgw_objexp_chunk_size, OPT_U32, 100) // maximum number of entries in a single operation when processing objexp data
1718
1719 OPTION(rgw_enable_static_website, OPT_BOOL, false) // enable static website feature
1720 OPTION(rgw_log_http_headers, OPT_STR, "" ) // list of HTTP headers to log when seen, ignores case (e.g., http_x_forwarded_for
1721
1722 OPTION(rgw_num_async_rados_threads, OPT_INT, 32) // num of threads to use for async rados operations
1723 OPTION(rgw_md_notify_interval_msec, OPT_INT, 200) // metadata changes notification interval to followers
1724 OPTION(rgw_run_sync_thread, OPT_BOOL, true) // whether radosgw (not radosgw-admin) spawns the sync thread
1725 OPTION(rgw_sync_lease_period, OPT_INT, 120) // time in second for lease that rgw takes on a specific log (or log shard)
1726 OPTION(rgw_sync_log_trim_interval, OPT_INT, 1200) // time in seconds between attempts to trim sync logs
1727
1728 OPTION(rgw_sync_data_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1729 OPTION(rgw_sync_meta_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1730
1731
1732 OPTION(rgw_period_push_interval, OPT_DOUBLE, 2) // seconds to wait before retrying "period push"
1733 OPTION(rgw_period_push_interval_max, OPT_DOUBLE, 30) // maximum interval after exponential backoff
1734
1735 OPTION(rgw_safe_max_objects_per_shard, OPT_INT, 100*1024) // safe max loading
1736 OPTION(rgw_shard_warning_threshold, OPT_DOUBLE, 90) // pct of safe max
1737 // at which to warn
1738
1739 OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled
1740
1741 OPTION(mgr_module_path, OPT_STR, CEPH_PKGLIBDIR "/mgr") // where to load python modules from
1742 OPTION(mgr_initial_modules, OPT_STR, "restful status") // Which modules to load
1743 OPTION(mgr_data, OPT_STR, "/var/lib/ceph/mgr/$cluster-$id") // where to find keyring etc
1744 OPTION(mgr_tick_period, OPT_INT, 2) // How frequently to tick
1745 OPTION(mgr_stats_period, OPT_INT, 5) // How frequently clients send stats
1746 OPTION(mgr_client_bytes, OPT_U64, 128*1048576) // bytes from clients
1747 OPTION(mgr_client_messages, OPT_U64, 512) // messages from clients
1748 OPTION(mgr_osd_bytes, OPT_U64, 512*1048576) // bytes from osds
1749 OPTION(mgr_osd_messages, OPT_U64, 8192) // messages from osds
1750 OPTION(mgr_mds_bytes, OPT_U64, 128*1048576) // bytes from mdss
1751 OPTION(mgr_mds_messages, OPT_U64, 128) // messages from mdss
1752 OPTION(mgr_mon_bytes, OPT_U64, 128*1048576) // bytes from mons
1753 OPTION(mgr_mon_messages, OPT_U64, 128) // messages from mons
1754
1755 OPTION(mgr_connect_retry_interval, OPT_DOUBLE, 1.0)
1756 OPTION(mgr_service_beacon_grace, OPT_DOUBLE, 60.0)
1757
1758 OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests
1759 OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover
1760 OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR
1761 OPTION(mon_mgr_mkfs_grace, OPT_INT, 60) // How long before we complain about MGR_DOWN
1762 OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl
1763 OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects
1764 OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms
1765 // defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg=="
1766 OPTION(rgw_crypt_suppress_logs, OPT_BOOL, true) // suppress logs that might print customer key
1767 OPTION(rgw_list_bucket_min_readahead, OPT_INT, 1000) // minimum number of entries to read from rados for bucket listing
1768
1769 OPTION(rgw_rest_getusage_op_compat, OPT_BOOL, false) // dump description of total stats for s3 GetUsage API
1770
1771 OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter
1772 OPTION(throttler_perf_counter, OPT_BOOL, true) // enable/disable throttler perf counter
1773
1774 /* The following are tunables for torrent data */
1775 OPTION(rgw_torrent_flag, OPT_BOOL, false) // produce torrent function flag
1776 OPTION(rgw_torrent_tracker, OPT_STR, "") // torrent field annouce and annouce list
1777 OPTION(rgw_torrent_createby, OPT_STR, "") // torrent field created by
1778 OPTION(rgw_torrent_comment, OPT_STR, "") // torrent field comment
1779 OPTION(rgw_torrent_encoding, OPT_STR, "") // torrent field encoding
1780 OPTION(rgw_torrent_origin, OPT_STR, "") // torrent origin
1781 OPTION(rgw_torrent_sha_unit, OPT_INT, 512*1024) // torrent field piece length 512K
1782
1783 OPTION(event_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1784
1785 // This will be set to true when it is safe to start threads.
1786 // Once it is true, it will never change.
1787 OPTION(internal_safe_to_start_threads, OPT_BOOL, false)
1788
1789 OPTION(debug_deliberately_leak_memory, OPT_BOOL, false)
1790
1791 OPTION(rgw_swift_custom_header, OPT_STR, "") // option to enable swift custom headers
1792
1793 OPTION(rgw_swift_need_stats, OPT_BOOL, true) // option to enable stats on bucket listing for swift
1794
1795 /* resharding tunables */
1796 OPTION(rgw_reshard_num_logs, OPT_INT, 16)
1797 OPTION(rgw_reshard_bucket_lock_duration, OPT_INT, 120) // duration of lock on bucket obj during resharding
1798 OPTION(rgw_dynamic_resharding, OPT_BOOL, true)
1799 OPTION(rgw_max_objs_per_shard, OPT_INT, 100000)
1800 OPTION(rgw_reshard_thread_interval, OPT_U32, 60 * 10) // maximum time between rounds of reshard thread processing