]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/config_opts.h
443ef8c1a8719afd17a6b6ad3e4c76b5d14a4dc0
[ceph.git] / ceph / src / common / config_opts.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 /* note: no header guard */
16 OPTION(host, OPT_STR, "") // "" means that ceph will use short hostname
17 OPTION(fsid, OPT_UUID, uuid_d())
18 OPTION(public_addr, OPT_ADDR, entity_addr_t())
19 OPTION(cluster_addr, OPT_ADDR, entity_addr_t())
20 OPTION(public_network, OPT_STR, "")
21 OPTION(cluster_network, OPT_STR, "")
22 OPTION(num_client, OPT_INT, 1)
23 OPTION(monmap, OPT_STR, "")
24 OPTION(mon_host, OPT_STR, "")
25 OPTION(mon_dns_srv_name, OPT_STR, "ceph-mon")
26 OPTION(lockdep, OPT_BOOL, false)
27 OPTION(lockdep_force_backtrace, OPT_BOOL, false) // always gather current backtrace at every lock
28 OPTION(run_dir, OPT_STR, "/var/run/ceph") // the "/var/run/ceph" dir, created on daemon startup
29 OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit()
30 OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755"
31 OPTION(crushtool, OPT_STR, "crushtool") // crushtool utility path
32
33 OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit()
34 OPTION(setuser, OPT_STR, "") // uid or user name
35 OPTION(setgroup, OPT_STR, "") // gid or group name
36 OPTION(setuser_match_path, OPT_STR, "") // make setuser/group conditional on this path matching ownership
37 OPTION(pid_file, OPT_STR, "") // default changed by common_preinit()
38 OPTION(chdir, OPT_STR, "/")
39 OPTION(max_open_files, OPT_LONGLONG, 0)
40 OPTION(restapi_log_level, OPT_STR, "") // default set by Python code
41 OPTION(restapi_base_url, OPT_STR, "") // "
42 OPTION(fatal_signal_handlers, OPT_BOOL, true)
43 SAFE_OPTION(erasure_code_dir, OPT_STR, CEPH_PKGLIBDIR"/erasure-code") // default location for erasure-code plugins
44
45 OPTION(log_file, OPT_STR, "/var/log/ceph/$cluster-$name.log") // default changed by common_preinit()
46 OPTION(log_max_new, OPT_INT, 1000) // default changed by common_preinit()
47 OPTION(log_max_recent, OPT_INT, 10000) // default changed by common_preinit()
48 OPTION(log_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
49 OPTION(err_to_stderr, OPT_BOOL, true) // default changed by common_preinit()
50 OPTION(log_to_syslog, OPT_BOOL, false)
51 OPTION(err_to_syslog, OPT_BOOL, false)
52 OPTION(log_flush_on_exit, OPT_BOOL, true) // default changed by common_preinit()
53 OPTION(log_stop_at_utilization, OPT_FLOAT, .97) // stop logging at (near) full
54 OPTION(log_to_graylog, OPT_BOOL, false)
55 OPTION(err_to_graylog, OPT_BOOL, false)
56 OPTION(log_graylog_host, OPT_STR, "127.0.0.1")
57 OPTION(log_graylog_port, OPT_INT, 12201)
58
59 // options will take k/v pairs, or single-item that will be assumed as general
60 // default for all, regardless of channel.
61 // e.g., "info" would be taken as the same as "default=info"
62 // also, "default=daemon audit=local0" would mean
63 // "default all to 'daemon', override 'audit' with 'local0'
64 OPTION(clog_to_monitors, OPT_STR, "default=true")
65 OPTION(clog_to_syslog, OPT_STR, "false")
66 OPTION(clog_to_syslog_level, OPT_STR, "info") // this level and above
67 OPTION(clog_to_syslog_facility, OPT_STR, "default=daemon audit=local0")
68 OPTION(clog_to_graylog, OPT_STR, "false")
69 OPTION(clog_to_graylog_host, OPT_STR, "127.0.0.1")
70 OPTION(clog_to_graylog_port, OPT_STR, "12201")
71
72 OPTION(mon_cluster_log_to_syslog, OPT_STR, "default=false")
73 OPTION(mon_cluster_log_to_syslog_level, OPT_STR, "info") // this level and above
74 OPTION(mon_cluster_log_to_syslog_facility, OPT_STR, "daemon")
75 OPTION(mon_cluster_log_file, OPT_STR,
76 "default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log")
77 OPTION(mon_cluster_log_file_level, OPT_STR, "info")
78 OPTION(mon_cluster_log_to_graylog, OPT_STR, "false")
79 OPTION(mon_cluster_log_to_graylog_host, OPT_STR, "127.0.0.1")
80 OPTION(mon_cluster_log_to_graylog_port, OPT_STR, "12201")
81
82 OPTION(enable_experimental_unrecoverable_data_corrupting_features, OPT_STR, "")
83
84 SAFE_OPTION(plugin_dir, OPT_STR, CEPH_PKGLIBDIR)
85
86 OPTION(xio_trace_mempool, OPT_BOOL, false) // mempool allocation counters
87 OPTION(xio_trace_msgcnt, OPT_BOOL, false) // incoming/outgoing msg counters
88 OPTION(xio_trace_xcon, OPT_BOOL, false) // Xio message encode/decode trace
89 OPTION(xio_queue_depth, OPT_INT, 128) // depth of Accelio msg queue
90 OPTION(xio_mp_min, OPT_INT, 128) // default min mempool size
91 OPTION(xio_mp_max_64, OPT_INT, 65536) // max 64-byte chunks (buffer is 40)
92 OPTION(xio_mp_max_256, OPT_INT, 8192) // max 256-byte chunks
93 OPTION(xio_mp_max_1k, OPT_INT, 8192) // max 1K chunks
94 OPTION(xio_mp_max_page, OPT_INT, 4096) // max 1K chunks
95 OPTION(xio_mp_max_hint, OPT_INT, 4096) // max size-hint chunks
96 OPTION(xio_portal_threads, OPT_INT, 2) // xio portal threads per messenger
97 OPTION(xio_max_conns_per_portal, OPT_INT, 32) // max xio_connections per portal/ctx
98 OPTION(xio_transport_type, OPT_STR, "rdma") // xio transport type: {rdma or tcp}
99 OPTION(xio_max_send_inline, OPT_INT, 512) // xio maximum threshold to send inline
100
101 OPTION(compressor_zlib_isal, OPT_BOOL, false)
102 OPTION(compressor_zlib_level, OPT_INT, 5) //regular zlib compression level, not applicable to isa-l optimized version
103
104 OPTION(async_compressor_enabled, OPT_BOOL, false)
105 OPTION(async_compressor_type, OPT_STR, "snappy")
106 OPTION(async_compressor_threads, OPT_INT, 2)
107 OPTION(async_compressor_thread_timeout, OPT_INT, 5)
108 OPTION(async_compressor_thread_suicide_timeout, OPT_INT, 30)
109
110 OPTION(plugin_crypto_accelerator, OPT_STR, "crypto_isal")
111
112 OPTION(mempool_debug, OPT_BOOL, false)
113
114 DEFAULT_SUBSYS(0, 5)
115 SUBSYS(lockdep, 0, 1)
116 SUBSYS(context, 0, 1)
117 SUBSYS(crush, 1, 1)
118 SUBSYS(mds, 1, 5)
119 SUBSYS(mds_balancer, 1, 5)
120 SUBSYS(mds_locker, 1, 5)
121 SUBSYS(mds_log, 1, 5)
122 SUBSYS(mds_log_expire, 1, 5)
123 SUBSYS(mds_migrator, 1, 5)
124 SUBSYS(buffer, 0, 1)
125 SUBSYS(timer, 0, 1)
126 SUBSYS(filer, 0, 1)
127 SUBSYS(striper, 0, 1)
128 SUBSYS(objecter, 0, 1)
129 SUBSYS(rados, 0, 5)
130 SUBSYS(rbd, 0, 5)
131 SUBSYS(rbd_mirror, 0, 5)
132 SUBSYS(rbd_replay, 0, 5)
133 SUBSYS(journaler, 0, 5)
134 SUBSYS(objectcacher, 0, 5)
135 SUBSYS(client, 0, 5)
136 SUBSYS(osd, 1, 5)
137 SUBSYS(optracker, 0, 5)
138 SUBSYS(objclass, 0, 5)
139 SUBSYS(filestore, 1, 3)
140 SUBSYS(journal, 1, 3)
141 SUBSYS(ms, 0, 5)
142 SUBSYS(mon, 1, 5)
143 SUBSYS(monc, 0, 10)
144 SUBSYS(paxos, 1, 5)
145 SUBSYS(tp, 0, 5)
146 SUBSYS(auth, 1, 5)
147 SUBSYS(crypto, 1, 5)
148 SUBSYS(finisher, 1, 1)
149 SUBSYS(heartbeatmap, 1, 5)
150 SUBSYS(perfcounter, 1, 5)
151 SUBSYS(rgw, 1, 5) // log level for the Rados gateway
152 SUBSYS(civetweb, 1, 10)
153 SUBSYS(javaclient, 1, 5)
154 SUBSYS(asok, 1, 5)
155 SUBSYS(throttle, 1, 1)
156 SUBSYS(refs, 0, 0)
157 SUBSYS(xio, 1, 5)
158 SUBSYS(compressor, 1, 5)
159 SUBSYS(bluestore, 1, 5)
160 SUBSYS(bluefs, 1, 5)
161 SUBSYS(bdev, 1, 3)
162 SUBSYS(kstore, 1, 5)
163 SUBSYS(rocksdb, 4, 5)
164 SUBSYS(leveldb, 4, 5)
165 SUBSYS(memdb, 4, 5)
166 SUBSYS(kinetic, 1, 5)
167 SUBSYS(fuse, 1, 5)
168 SUBSYS(mgr, 1, 5)
169 SUBSYS(mgrc, 1, 5)
170 SUBSYS(dpdk, 1, 5)
171 SUBSYS(eventtrace, 1, 5)
172
173 OPTION(key, OPT_STR, "")
174 OPTION(keyfile, OPT_STR, "")
175 OPTION(keyring, OPT_STR,
176 // default changed by common_preinit() for mds and osd
177 "/etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,"
178 #if defined(__FreeBSD)
179 "/usr/local/etc/ceph/$cluster.$name.keyring,/usr/local/etc/ceph/$cluster.keyring,"
180 "/usr/local/etc/ceph/keyring,/usr/local/etc/ceph/keyring.bin,"
181 #endif
182 )
183 OPTION(heartbeat_interval, OPT_INT, 5)
184 OPTION(heartbeat_file, OPT_STR, "")
185 OPTION(heartbeat_inject_failure, OPT_INT, 0) // force an unhealthy heartbeat for N seconds
186 OPTION(perf, OPT_BOOL, true) // enable internal perf counters
187
188 SAFE_OPTION(ms_type, OPT_STR, "async+posix") // messenger backend. It will be modified in runtime, so use SAFE_OPTION
189 OPTION(ms_public_type, OPT_STR, "") // messenger backend
190 OPTION(ms_cluster_type, OPT_STR, "") // messenger backend
191 OPTION(ms_tcp_nodelay, OPT_BOOL, true)
192 OPTION(ms_tcp_rcvbuf, OPT_INT, 0)
193 OPTION(ms_tcp_prefetch_max_size, OPT_INT, 4096) // max prefetch size, we limit this to avoid extra memcpy
194 OPTION(ms_initial_backoff, OPT_DOUBLE, .2)
195 OPTION(ms_max_backoff, OPT_DOUBLE, 15.0)
196 OPTION(ms_crc_data, OPT_BOOL, true)
197 OPTION(ms_crc_header, OPT_BOOL, true)
198 OPTION(ms_die_on_bad_msg, OPT_BOOL, false)
199 OPTION(ms_die_on_unhandled_msg, OPT_BOOL, false)
200 OPTION(ms_die_on_old_message, OPT_BOOL, false) // assert if we get a dup incoming message and shouldn't have (may be triggered by pre-541cd3c64be0dfa04e8a2df39422e0eb9541a428 code)
201 OPTION(ms_die_on_skipped_message, OPT_BOOL, false) // assert if we skip a seq (kernel client does this intentionally)
202 OPTION(ms_dispatch_throttle_bytes, OPT_U64, 100 << 20)
203 OPTION(ms_bind_ipv6, OPT_BOOL, false)
204 OPTION(ms_bind_port_min, OPT_INT, 6800)
205 OPTION(ms_bind_port_max, OPT_INT, 7300)
206 #if !defined(__FreeBSD__)
207 OPTION(ms_bind_retry_count, OPT_INT, 3) // If binding fails, how many times do we retry to bind
208 OPTION(ms_bind_retry_delay, OPT_INT, 5) // Delay between attemps to bind
209 #else
210 // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
211 OPTION(ms_bind_retry_count, OPT_INT, 6) // If binding fails, how many times do we retry to bind
212 OPTION(ms_bind_retry_delay, OPT_INT, 6) // Delay between attemps to bind
213 #endif
214 OPTION(ms_bind_before_connect, OPT_BOOL, false)
215 OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10)
216 OPTION(ms_tcp_read_timeout, OPT_U64, 900)
217 OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216)
218 OPTION(ms_pq_min_cost, OPT_U64, 65536)
219 OPTION(ms_inject_socket_failures, OPT_U64, 0)
220 SAFE_OPTION(ms_inject_delay_type, OPT_STR, "") // "osd mds mon client" allowed
221 OPTION(ms_inject_delay_msg_type, OPT_STR, "") // the type of message to delay, as returned by Message::get_type_name(). This is an additional restriction on the general type filter ms_inject_delay_type.
222 OPTION(ms_inject_delay_max, OPT_DOUBLE, 1) // seconds
223 OPTION(ms_inject_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
224 OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0) // seconds
225 OPTION(ms_dump_on_send, OPT_BOOL, false) // hexdump msg to log on send
226 OPTION(ms_dump_corrupt_message_level, OPT_INT, 1) // debug level to hexdump undecodeable messages at
227 OPTION(ms_async_op_threads, OPT_U64, 3) // number of worker processing threads for async messenger created on init
228 OPTION(ms_async_max_op_threads, OPT_U64, 5) // max number of worker processing threads for async messenger
229 OPTION(ms_async_set_affinity, OPT_BOOL, true)
230 // example: ms_async_affinity_cores = 0,1
231 // The number of coreset is expected to equal to ms_async_op_threads, otherwise
232 // extra op threads will loop ms_async_affinity_cores again.
233 // If ms_async_affinity_cores is empty, all threads will be bind to current running
234 // core
235 OPTION(ms_async_affinity_cores, OPT_STR, "")
236 OPTION(ms_async_rdma_device_name, OPT_STR, "")
237 OPTION(ms_async_rdma_enable_hugepage, OPT_BOOL, false)
238 OPTION(ms_async_rdma_buffer_size, OPT_INT, 128 << 10)
239 OPTION(ms_async_rdma_send_buffers, OPT_U32, 1024)
240 OPTION(ms_async_rdma_receive_buffers, OPT_U32, 1024)
241 OPTION(ms_async_rdma_port_num, OPT_U32, 1)
242 OPTION(ms_async_rdma_polling_us, OPT_U32, 1000)
243 OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding
244 OPTION(ms_async_rdma_roce_ver, OPT_INT, 1) // 0=RoCEv1, 1=RoCEv2, 2=RoCEv1.5
245 OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP
246 OPTION(ms_async_rdma_dscp, OPT_INT, 96) // in RoCE, this means DSCP
247
248 OPTION(ms_dpdk_port_id, OPT_INT, 0)
249 SAFE_OPTION(ms_dpdk_coremask, OPT_STR, "1") // it is modified in unittest so that use SAFE_OPTION to declare
250 OPTION(ms_dpdk_memory_channel, OPT_STR, "4")
251 OPTION(ms_dpdk_hugepages, OPT_STR, "")
252 OPTION(ms_dpdk_pmd, OPT_STR, "")
253 SAFE_OPTION(ms_dpdk_host_ipv4_addr, OPT_STR, "")
254 SAFE_OPTION(ms_dpdk_gateway_ipv4_addr, OPT_STR, "")
255 SAFE_OPTION(ms_dpdk_netmask_ipv4_addr, OPT_STR, "")
256 OPTION(ms_dpdk_lro, OPT_BOOL, true)
257 OPTION(ms_dpdk_hw_flow_control, OPT_BOOL, true)
258 // Weighing of a hardware network queue relative to a software queue (0=no work, 1= equal share)")
259 OPTION(ms_dpdk_hw_queue_weight, OPT_FLOAT, 1)
260 OPTION(ms_dpdk_debug_allow_loopback, OPT_BOOL, false)
261 OPTION(ms_dpdk_rx_buffer_count_per_core, OPT_INT, 8192)
262
263 OPTION(inject_early_sigterm, OPT_BOOL, false)
264
265 OPTION(mon_data, OPT_STR, "/var/lib/ceph/mon/$cluster-$id")
266 OPTION(mon_initial_members, OPT_STR, "") // list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster
267 OPTION(mon_compact_on_start, OPT_BOOL, false) // compact leveldb on ceph-mon start
268 OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction on bootstrap
269 OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states
270 OPTION(mon_osd_cache_size, OPT_INT, 10) // the size of osdmaps cache, not to rely on underlying store's cache
271
272 OPTION(mon_cpu_threads, OPT_INT, 4)
273 OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT, 4096)
274 OPTION(mon_osd_max_creating_pgs, OPT_INT, 1024)
275 OPTION(mon_tick_interval, OPT_INT, 5)
276 OPTION(mon_session_timeout, OPT_INT, 300) // must send keepalive or subscribe
277 OPTION(mon_subscribe_interval, OPT_DOUBLE, 24*3600) // for legacy clients only
278 OPTION(mon_delta_reset_interval, OPT_DOUBLE, 10) // seconds of inactivity before we reset the pg delta to 0
279 OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay
280 OPTION(mon_osd_laggy_weight, OPT_DOUBLE, .3) // weight for new 'samples's in laggy estimations
281 OPTION(mon_osd_laggy_max_interval, OPT_INT, 300) // maximum value of laggy_interval in laggy estimations
282 OPTION(mon_osd_adjust_heartbeat_grace, OPT_BOOL, true) // true if we should scale based on laggy estimations
283 OPTION(mon_osd_adjust_down_out_interval, OPT_BOOL, true) // true if we should scale based on laggy estimations
284 OPTION(mon_osd_auto_mark_in, OPT_BOOL, false) // mark any booting osds 'in'
285 OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in'
286 OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true) // mark booting new osds 'in'
287 OPTION(mon_osd_down_out_interval, OPT_INT, 600) // seconds
288 OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // smallest crush unit/type that we will not automatically mark out
289 OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down
290 OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .75) // min osds required to be in to mark things out
291 OPTION(mon_osd_warn_op_age, OPT_DOUBLE, 32) // max op age before we generate a warning (make it a power of 2)
292 OPTION(mon_osd_err_op_age_ratio, OPT_DOUBLE, 128) // when to generate an error, as multiple of mon_osd_warn_op_age
293 OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create
294 OPTION(mon_osd_allow_primary_temp, OPT_BOOL, false) // allow primary_temp to be set in the osdmap
295 OPTION(mon_osd_allow_primary_affinity, OPT_BOOL, false) // allow primary_affinity to be set in the osdmap
296 OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes
297 OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming
298 OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT, .25) // max estimate of pg total before we do all pgs in parallel
299 OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL, false) // whether turn on fast read on the pool or not
300 OPTION(mon_stat_smooth_intervals, OPT_INT, 6) // smooth stats over last N PGMap maps
301 OPTION(mon_election_timeout, OPT_FLOAT, 5) // on election proposer, max waiting time for all ACKs
302 OPTION(mon_lease, OPT_FLOAT, 5) // lease interval
303 OPTION(mon_lease_renew_interval_factor, OPT_FLOAT, .6) // on leader, to renew the lease
304 OPTION(mon_lease_ack_timeout_factor, OPT_FLOAT, 2.0) // on leader, if lease isn't acked by all peons
305 OPTION(mon_accept_timeout_factor, OPT_FLOAT, 2.0) // on leader, if paxos update isn't accepted
306
307 OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between monitors
308 OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings
309 OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
310 OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
311 OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
312 OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
313 OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
314 OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin
315 OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin
316 OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
317 OPTION(mon_pg_warn_min_objects, OPT_INT, 10000) // do not warn below this object #
318 OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools below this object #
319 OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT, .5) // threshold of down osds after which we check all pgs
320 OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT, .66) // position between pool cache_target_full and max where we start warning
321 OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
322 OPTION(mon_osd_backfillfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD backfill full (backfill halted)
323 OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
324 OPTION(mon_osd_initial_require_min_compat_client, OPT_STR, "jewel")
325 OPTION(mon_allow_pool_delete, OPT_BOOL, false) // allow pool deletion
326 OPTION(mon_fake_pool_delete, OPT_BOOL, false) // fake pool deletion (add _DELETED suffix)
327 OPTION(mon_globalid_prealloc, OPT_U32, 10000) // how many globalids to prealloc
328 OPTION(mon_osd_report_timeout, OPT_INT, 900) // grace period before declaring unresponsive OSDs dead
329 OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-replay mds to be active
330 OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL, true) // warn if crush tunables are too old (older than mon_min_crush_required_version)
331 OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
332 OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
333 OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
334 OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
335 OPTION(mon_warn_osd_usage_min_max_delta, OPT_FLOAT, .40) // warn if difference between min and max OSD utilizations exceeds specified amount
336 OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
337 OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
338 OPTION(mon_max_log_epochs, OPT_INT, 500)
339 OPTION(mon_max_mdsmap_epochs, OPT_INT, 500)
340 OPTION(mon_max_osd, OPT_INT, 10000)
341 OPTION(mon_probe_timeout, OPT_DOUBLE, 2.0)
342 OPTION(mon_client_bytes, OPT_U64, 100ul << 20) // client msg data allowed in memory (in bytes)
343 OPTION(mon_mgr_proxy_client_bytes_ratio, OPT_FLOAT, .3) // ratio of mon_client_bytes that can be consumed by proxied mgr commands before we error out to client
344 OPTION(mon_log_max_summary, OPT_U64, 50)
345 OPTION(mon_daemon_bytes, OPT_U64, 400ul << 20) // mds, osd message memory cap (in bytes)
346 OPTION(mon_max_log_entries_per_event, OPT_INT, 4096)
347 OPTION(mon_reweight_min_pgs_per_osd, OPT_U64, 10) // min pgs per osd for reweight-by-pg command
348 OPTION(mon_reweight_min_bytes_per_osd, OPT_U64, 100*1024*1024) // min bytes per osd for reweight-by-utilization command
349 OPTION(mon_reweight_max_osds, OPT_INT, 4) // max osds to change per reweight-by-* command
350 OPTION(mon_reweight_max_change, OPT_DOUBLE, 0.05)
351 OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0)
352 OPTION(mon_health_to_clog, OPT_BOOL, true)
353 OPTION(mon_health_to_clog_interval, OPT_INT, 3600)
354 OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
355 OPTION(mon_data_avail_crit, OPT_INT, 5)
356 OPTION(mon_data_avail_warn, OPT_INT, 30)
357 OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
358 OPTION(mon_warn_not_scrubbed, OPT_INT, 0)
359 OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0)
360 OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day
361 OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not.
362 OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time
363 OPTION(mon_scrub_inject_crc_mismatch, OPT_DOUBLE, 0.0) // probability of injected crc mismatch [0.0, 1.0]
364 OPTION(mon_scrub_inject_missing_keys, OPT_DOUBLE, 0.0) // probability of injected missing keys [0.0, 1.0]
365 OPTION(mon_config_key_max_entry_size, OPT_INT, 4096) // max num bytes per config-key entry
366 OPTION(mon_sync_timeout, OPT_DOUBLE, 60.0)
367 OPTION(mon_sync_max_payload_size, OPT_U32, 1048576) // max size for a sync chunk payload (say, 1MB)
368 OPTION(mon_sync_debug, OPT_BOOL, false) // enable sync-specific debug
369 OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE, 0) // inject N second delay on each get_chunk request
370 OPTION(mon_osd_min_down_reporters, OPT_INT, 2) // number of OSDs from different subtrees who need to report a down OSD for it to count
371 OPTION(mon_osd_reporter_subtree_level , OPT_STR, "host") // in which level of parent bucket the reporters are counted
372 OPTION(mon_osd_force_trim_to, OPT_INT, 0) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous, use with care)
373 OPTION(mon_mds_force_trim_to, OPT_INT, 0) // force mon to trim mdsmaps to this point (dangerous, use with care)
374 OPTION(mon_mds_skip_sanity, OPT_BOOL, false) // skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
375
376 // monitor debug options
377 OPTION(mon_debug_deprecated_as_obsolete, OPT_BOOL, false) // consider deprecated commands as obsolete
378
379 // dump transactions
380 OPTION(mon_debug_dump_transactions, OPT_BOOL, false)
381 OPTION(mon_debug_dump_json, OPT_BOOL, false)
382 OPTION(mon_debug_dump_location, OPT_STR, "/var/log/ceph/$cluster-$name.tdump")
383 OPTION(mon_debug_no_require_luminous, OPT_BOOL, false)
384 OPTION(mon_debug_no_require_bluestore_for_ec_overwrites, OPT_BOOL, false)
385 OPTION(mon_debug_no_initial_persistent_features, OPT_BOOL, false)
386 OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE, 10.0) // seconds
387 OPTION(mon_inject_transaction_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
388
389 OPTION(mon_sync_provider_kill_at, OPT_INT, 0) // kill the sync provider at a specific point in the work flow
390 OPTION(mon_sync_requester_kill_at, OPT_INT, 0) // kill the sync requester at a specific point in the work flow
391 OPTION(mon_force_quorum_join, OPT_BOOL, false) // force monitor to join quorum even if it has been previously removed from the map
392 OPTION(mon_keyvaluedb, OPT_STR, "rocksdb") // type of keyvaluedb backend
393
394 // UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps
395 OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false)
396
397 OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state
398 OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
399 OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long before proposing a map update
400 OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity
401 OPTION(paxos_min, OPT_INT, 500) // minimum number of paxos states to keep around
402 OPTION(paxos_trim_min, OPT_INT, 250) // number of extra proposals tolerated before trimming
403 OPTION(paxos_trim_max, OPT_INT, 500) // max number of extra proposals to trim at a time
404 OPTION(paxos_service_trim_min, OPT_INT, 250) // minimum amount of versions to trigger a trim (0 disables it)
405 OPTION(paxos_service_trim_max, OPT_INT, 500) // maximum amount of versions to trim during a single proposal (0 disables it)
406 OPTION(paxos_kill_at, OPT_INT, 0)
407 OPTION(auth_cluster_required, OPT_STR, "cephx") // required of mon, mds, osd daemons
408 OPTION(auth_service_required, OPT_STR, "cephx") // required by daemons of clients
409 OPTION(auth_client_required, OPT_STR, "cephx, none") // what clients require of daemons
410 OPTION(auth_supported, OPT_STR, "") // deprecated; default value for above if they are not defined.
411 OPTION(max_rotating_auth_attempts, OPT_INT, 10)
412 OPTION(cephx_require_signatures, OPT_BOOL, false) // If true, don't talk to Cephx partners if they don't support message signing; off by default
413 OPTION(cephx_cluster_require_signatures, OPT_BOOL, false)
414 OPTION(cephx_service_require_signatures, OPT_BOOL, false)
415 OPTION(cephx_sign_messages, OPT_BOOL, true) // Default to signing session messages if supported
416 OPTION(auth_mon_ticket_ttl, OPT_DOUBLE, 60*60*12)
417 OPTION(auth_service_ticket_ttl, OPT_DOUBLE, 60*60)
418 OPTION(auth_debug, OPT_BOOL, false) // if true, assert when weird things happen
419 OPTION(mon_client_hunt_parallel, OPT_U32, 2) // how many mons to try to connect to in parallel during hunt
420 OPTION(mon_client_hunt_interval, OPT_DOUBLE, 3.0) // try new mon every N seconds until we connect
421 OPTION(mon_client_ping_interval, OPT_DOUBLE, 10.0) // ping every N seconds
422 OPTION(mon_client_ping_timeout, OPT_DOUBLE, 30.0) // fail if we don't hear back
423 OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE, 2.0) // each time we reconnect to a monitor, double our timeout
424 OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE, 10.0) // up to a max of 10*default (30 seconds)
425 OPTION(mon_client_max_log_entries_per_message, OPT_INT, 1000)
426 OPTION(mon_max_pool_pg_num, OPT_INT, 65536)
427 OPTION(mon_pool_quota_warn_threshold, OPT_INT, 0) // percent of quota at which to issue warnings
428 OPTION(mon_pool_quota_crit_threshold, OPT_INT, 0) // percent of quota at which to issue errors
429 OPTION(client_cache_size, OPT_INT, 16384)
430 OPTION(client_cache_mid, OPT_FLOAT, .75)
431 OPTION(client_use_random_mds, OPT_BOOL, false)
432 OPTION(client_mount_timeout, OPT_DOUBLE, 300.0)
433 OPTION(client_tick_interval, OPT_DOUBLE, 1.0)
434 OPTION(client_trace, OPT_STR, "")
435 OPTION(client_readahead_min, OPT_LONGLONG, 128*1024) // readahead at _least_ this much.
436 OPTION(client_readahead_max_bytes, OPT_LONGLONG, 0) // default unlimited
437 OPTION(client_readahead_max_periods, OPT_LONGLONG, 4) // as multiple of file layout period (object size * num stripes)
438 OPTION(client_reconnect_stale, OPT_BOOL, false) // automatically reconnect stale session
439 OPTION(client_snapdir, OPT_STR, ".snap")
440 OPTION(client_mountpoint, OPT_STR, "/")
441 OPTION(client_mount_uid, OPT_INT, -1)
442 OPTION(client_mount_gid, OPT_INT, -1)
443 OPTION(client_notify_timeout, OPT_INT, 10) // in seconds
444 OPTION(osd_client_watch_timeout, OPT_INT, 30) // in seconds
445 OPTION(client_caps_release_delay, OPT_INT, 5) // in seconds
446 OPTION(client_quota_df, OPT_BOOL, true) // use quota for df on subdir mounts
447 OPTION(client_oc, OPT_BOOL, true)
448 OPTION(client_oc_size, OPT_INT, 1024*1024* 200) // MB * n
449 OPTION(client_oc_max_dirty, OPT_INT, 1024*1024* 100) // MB * n (dirty OR tx.. bigish)
450 OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this smallish)
451 OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback
452 OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache
453 OPTION(client_debug_getattr_caps, OPT_BOOL, false) // check if MDS reply contains wanted caps
454 OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds)
455 OPTION(client_debug_inject_tick_delay, OPT_INT, 0) // delay the client tick for a number of seconds
456 OPTION(client_max_inline_size, OPT_U64, 4096)
457 OPTION(client_inject_release_failure, OPT_BOOL, false) // synthetic client bug for testing
458 OPTION(client_inject_fixed_oldest_tid, OPT_BOOL, false) // synthetic client bug for testing
459 OPTION(client_metadata, OPT_STR, "")
460 OPTION(client_acl_type, OPT_STR, "")
461 OPTION(client_permissions, OPT_BOOL, true)
462 OPTION(client_dirsize_rbytes, OPT_BOOL, true)
463
464 // note: the max amount of "in flight" dirty data is roughly (max - target)
465 OPTION(fuse_use_invalidate_cb, OPT_BOOL, true) // use fuse 2.8+ invalidate callback to keep page cache consistent
466 OPTION(fuse_disable_pagecache, OPT_BOOL, false)
467 OPTION(fuse_allow_other, OPT_BOOL, true)
468 OPTION(fuse_default_permissions, OPT_BOOL, false)
469 OPTION(fuse_big_writes, OPT_BOOL, true)
470 OPTION(fuse_atomic_o_trunc, OPT_BOOL, true)
471 OPTION(fuse_debug, OPT_BOOL, false)
472 OPTION(fuse_multithreaded, OPT_BOOL, true)
473 OPTION(fuse_require_active_mds, OPT_BOOL, true) // if ceph_fuse requires active mds server
474 OPTION(fuse_syncfs_on_mksnap, OPT_BOOL, true)
475 OPTION(fuse_set_user_groups, OPT_BOOL, false) // if ceph_fuse fills in group lists or not
476
477 OPTION(client_try_dentry_invalidate, OPT_BOOL, true) // the client should try to use dentry invaldation instead of remounting, on kernels it believes that will work for
478 OPTION(client_die_on_failed_remount, OPT_BOOL, true)
479 OPTION(client_check_pool_perm, OPT_BOOL, true)
480 OPTION(client_use_faked_inos, OPT_BOOL, false)
481 OPTION(client_mds_namespace, OPT_STR, "")
482
483 OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location
484 OPTION(crush_location_hook, OPT_STR, "")
485 OPTION(crush_location_hook_timeout, OPT_INT, 10)
486
487 OPTION(objecter_tick_interval, OPT_DOUBLE, 5.0)
488 OPTION(objecter_timeout, OPT_DOUBLE, 10.0) // before we ask for a map
489 OPTION(objecter_inflight_op_bytes, OPT_U64, 1024*1024*100) // max in-flight data (both directions)
490 OPTION(objecter_inflight_ops, OPT_U64, 1024) // max in-flight ios
491 OPTION(objecter_completion_locks_per_session, OPT_U64, 32) // num of completion locks per each session, for serializing same object responses
492 OPTION(objecter_inject_no_watch_ping, OPT_BOOL, false) // suppress watch pings
493 OPTION(objecter_retry_writes_after_first_reply, OPT_BOOL, false) // ignore the first reply for each write, and resend the osd op instead
494 OPTION(objecter_debug_inject_relock_delay, OPT_BOOL, false)
495
496 // Max number of deletes at once in a single Filer::purge call
497 OPTION(filer_max_purge_ops, OPT_U32, 10)
498 // Max number of truncate at once in a single Filer::truncate call
499 OPTION(filer_max_truncate_ops, OPT_U32, 128)
500
501 OPTION(journaler_write_head_interval, OPT_INT, 15)
502 OPTION(journaler_prefetch_periods, OPT_INT, 10) // * journal object size
503 OPTION(journaler_prezero_periods, OPT_INT, 5) // * journal object size
504 OPTION(mds_data, OPT_STR, "/var/lib/ceph/mds/$cluster-$id")
505 OPTION(mds_max_file_size, OPT_U64, 1ULL << 40) // Used when creating new CephFS. Change with 'ceph mds set max_file_size <size>' afterwards
506 // max xattr kv pairs size for each dir/file
507 OPTION(mds_max_xattr_pairs_size, OPT_U32, 64 << 10)
508 OPTION(mds_cache_size, OPT_INT, 100000)
509 OPTION(mds_cache_mid, OPT_FLOAT, .7)
510 OPTION(mds_max_file_recover, OPT_U32, 32)
511 OPTION(mds_dir_max_commit_size, OPT_INT, 10) // MB
512 OPTION(mds_dir_keys_per_op, OPT_INT, 16384)
513 OPTION(mds_decay_halflife, OPT_FLOAT, 5)
514 OPTION(mds_beacon_interval, OPT_FLOAT, 4)
515 OPTION(mds_beacon_grace, OPT_FLOAT, 15)
516 OPTION(mds_enforce_unique_name, OPT_BOOL, true)
517 OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes
518
519 OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle
520 OPTION(mds_session_blacklist_on_timeout, OPT_BOOL, true) // whether to blacklist clients whose sessions are dropped due to timeout
521 OPTION(mds_session_blacklist_on_evict, OPT_BOOL, true) // whether to blacklist clients whose sessions are dropped via admin commands
522
523 OPTION(mds_sessionmap_keys_per_op, OPT_U32, 1024) // how many sessions should I try to load/store in a single OMAP operation?
524 OPTION(mds_revoke_cap_timeout, OPT_FLOAT, 60) // detect clients which aren't revoking caps
525 OPTION(mds_recall_state_timeout, OPT_FLOAT, 60) // detect clients which aren't trimming caps
526 OPTION(mds_freeze_tree_timeout, OPT_FLOAT, 30) // detecting freeze tree deadlock
527 OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session
528 OPTION(mds_health_summarize_threshold, OPT_INT, 10) // collapse N-client health metrics to a single 'many'
529 OPTION(mds_health_cache_threshold, OPT_FLOAT, 1.5) // warn on cache size if it exceeds mds_cache_size by this factor
530 OPTION(mds_reconnect_timeout, OPT_FLOAT, 45) // seconds to wait for clients during mds restart
531 // make it (mds_session_timeout - mds_beacon_grace)
532 OPTION(mds_tick_interval, OPT_FLOAT, 5)
533 OPTION(mds_dirstat_min_interval, OPT_FLOAT, 1) // try to avoid propagating more often than this
534 OPTION(mds_scatter_nudge_interval, OPT_FLOAT, 5) // how quickly dirstat changes propagate up the hierarchy
535 OPTION(mds_client_prealloc_inos, OPT_INT, 1000)
536 OPTION(mds_early_reply, OPT_BOOL, true)
537 OPTION(mds_default_dir_hash, OPT_INT, CEPH_STR_HASH_RJENKINS)
538 OPTION(mds_log_pause, OPT_BOOL, false)
539 OPTION(mds_log_skip_corrupt_events, OPT_BOOL, false)
540 OPTION(mds_log_max_events, OPT_INT, -1)
541 OPTION(mds_log_events_per_segment, OPT_INT, 1024)
542 OPTION(mds_log_segment_size, OPT_INT, 0) // segment size for mds log, default to default file_layout_t
543 OPTION(mds_log_max_segments, OPT_U32, 30)
544 OPTION(mds_log_max_expiring, OPT_INT, 20)
545 OPTION(mds_bal_export_pin, OPT_BOOL, true) // allow clients to pin directory trees to ranks
546 OPTION(mds_bal_sample_interval, OPT_DOUBLE, 3.0) // every 3 seconds
547 OPTION(mds_bal_replicate_threshold, OPT_FLOAT, 8000)
548 OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT, 0)
549 OPTION(mds_bal_frag, OPT_BOOL, true)
550 OPTION(mds_bal_split_size, OPT_INT, 10000)
551 OPTION(mds_bal_split_rd, OPT_FLOAT, 25000)
552 OPTION(mds_bal_split_wr, OPT_FLOAT, 10000)
553 OPTION(mds_bal_split_bits, OPT_INT, 3)
554 OPTION(mds_bal_merge_size, OPT_INT, 50)
555 OPTION(mds_bal_interval, OPT_INT, 10) // seconds
556 OPTION(mds_bal_fragment_interval, OPT_INT, 5) // seconds
557 OPTION(mds_bal_fragment_size_max, OPT_INT, 10000*10) // order of magnitude higher than split size
558 OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT, 1.5) // multiple of size_max that triggers immediate split
559 OPTION(mds_bal_idle_threshold, OPT_FLOAT, 0)
560 OPTION(mds_bal_max, OPT_INT, -1)
561 OPTION(mds_bal_max_until, OPT_INT, -1)
562 OPTION(mds_bal_mode, OPT_INT, 0)
563 OPTION(mds_bal_min_rebalance, OPT_FLOAT, .1) // must be this much above average before we export anything
564 OPTION(mds_bal_min_start, OPT_FLOAT, .2) // if we need less than this, we don't do anything
565 OPTION(mds_bal_need_min, OPT_FLOAT, .8) // take within this range of what we need
566 OPTION(mds_bal_need_max, OPT_FLOAT, 1.2)
567 OPTION(mds_bal_midchunk, OPT_FLOAT, .3) // any sub bigger than this taken in full
568 OPTION(mds_bal_minchunk, OPT_FLOAT, .001) // never take anything smaller than this
569 OPTION(mds_bal_target_decay, OPT_DOUBLE, 10.0) // target decay half-life in MDSMap (2x larger is approx. 2x slower)
570 OPTION(mds_replay_interval, OPT_FLOAT, 1.0) // time to wait before starting replay again
571 OPTION(mds_shutdown_check, OPT_INT, 0)
572 OPTION(mds_thrash_exports, OPT_INT, 0)
573 OPTION(mds_thrash_fragments, OPT_INT, 0)
574 OPTION(mds_dump_cache_on_map, OPT_BOOL, false)
575 OPTION(mds_dump_cache_after_rejoin, OPT_BOOL, false)
576 OPTION(mds_verify_scatter, OPT_BOOL, false)
577 OPTION(mds_debug_scatterstat, OPT_BOOL, false)
578 OPTION(mds_debug_frag, OPT_BOOL, false)
579 OPTION(mds_debug_auth_pins, OPT_BOOL, false)
580 OPTION(mds_debug_subtrees, OPT_BOOL, false)
581 OPTION(mds_kill_mdstable_at, OPT_INT, 0)
582 OPTION(mds_kill_export_at, OPT_INT, 0)
583 OPTION(mds_kill_import_at, OPT_INT, 0)
584 OPTION(mds_kill_link_at, OPT_INT, 0)
585 OPTION(mds_kill_rename_at, OPT_INT, 0)
586 OPTION(mds_kill_openc_at, OPT_INT, 0)
587 OPTION(mds_kill_journal_at, OPT_INT, 0)
588 OPTION(mds_kill_journal_expire_at, OPT_INT, 0)
589 OPTION(mds_kill_journal_replay_at, OPT_INT, 0)
590 OPTION(mds_journal_format, OPT_U32, 1) // Default to most recent JOURNAL_FORMAT_*
591 OPTION(mds_kill_create_at, OPT_INT, 0)
592 OPTION(mds_inject_traceless_reply_probability, OPT_DOUBLE, 0) /* percentage
593 of MDS modify replies to skip sending the
594 client a trace on [0-1]*/
595 OPTION(mds_wipe_sessions, OPT_BOOL, 0)
596 OPTION(mds_wipe_ino_prealloc, OPT_BOOL, 0)
597 OPTION(mds_skip_ino, OPT_INT, 0)
598 OPTION(mds_standby_for_name, OPT_STR, "")
599 OPTION(mds_standby_for_rank, OPT_INT, -1)
600 OPTION(mds_standby_for_fscid, OPT_INT, -1)
601 OPTION(mds_standby_replay, OPT_BOOL, false)
602 OPTION(mds_enable_op_tracker, OPT_BOOL, true) // enable/disable MDS op tracking
603 OPTION(mds_op_history_size, OPT_U32, 20) // Max number of completed ops to track
604 OPTION(mds_op_history_duration, OPT_U32, 600) // Oldest completed op to track
605 OPTION(mds_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
606 OPTION(mds_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
607 OPTION(mds_snap_min_uid, OPT_U32, 0) // The minimum UID required to create a snapshot
608 OPTION(mds_snap_max_uid, OPT_U32, 4294967294) // The maximum UID allowed to create a snapshot
609 OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapshot
610 OPTION(mds_verify_backtrace, OPT_U32, 1)
611 // detect clients which aren't trimming completed requests
612 OPTION(mds_max_completed_flushes, OPT_U32, 100000)
613 OPTION(mds_max_completed_requests, OPT_U32, 100000)
614
615 OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash
616 OPTION(mds_mon_shutdown_timeout, OPT_DOUBLE, 5)
617
618 // Maximum number of concurrent stray files to purge
619 OPTION(mds_max_purge_files, OPT_U32, 64)
620 // Maximum number of concurrent RADOS ops to issue in purging
621 OPTION(mds_max_purge_ops, OPT_U32, 8192)
622 // Maximum number of concurrent RADOS ops to issue in purging, scaled by PG count
623 OPTION(mds_max_purge_ops_per_pg, OPT_FLOAT, 0.5)
624
625 OPTION(mds_purge_queue_busy_flush_period, OPT_FLOAT, 1.0)
626
627 OPTION(mds_root_ino_uid, OPT_INT, 0) // The UID of / on new filesystems
628 OPTION(mds_root_ino_gid, OPT_INT, 0) // The GID of / on new filesystems
629
630 OPTION(mds_max_scrub_ops_in_progress, OPT_INT, 5) // the number of simultaneous scrubs allowed
631
632 // Maximum number of damaged frags/dentries before whole MDS rank goes damaged
633 OPTION(mds_damage_table_max_entries, OPT_INT, 10000)
634
635 // Maximum increment for client writable range, counted by number of objects
636 OPTION(mds_client_writeable_range_max_inc_objs, OPT_U32, 1024)
637
638 // verify backend can support configured max object name length
639 OPTION(osd_check_max_object_name_len_on_startup, OPT_BOOL, true)
640
641 // Maximum number of backfills to or from a single osd
642 OPTION(osd_max_backfills, OPT_U64, 1)
643
644 // Minimum recovery priority (255 = max, smaller = lower)
645 OPTION(osd_min_recovery_priority, OPT_INT, 0)
646
647 // Seconds to wait before retrying refused backfills
648 OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 30.0)
649
650 // Seconds to wait before retrying refused recovery
651 OPTION(osd_recovery_retry_interval, OPT_DOUBLE, 30.0)
652
653 // max agent flush ops
654 OPTION(osd_agent_max_ops, OPT_INT, 4)
655 OPTION(osd_agent_max_low_ops, OPT_INT, 2)
656 OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1)
657 OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1)
658 OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0)
659
660 // osd ignore history.last_epoch_started in find_best_info
661 OPTION(osd_find_best_info_ignore_history_les, OPT_BOOL, false)
662
663 // decay atime and hist histograms after how many objects go by
664 OPTION(osd_agent_hist_halflife, OPT_INT, 1000)
665
666 // must be this amount over the threshold to enable,
667 // this amount below the threshold to disable.
668 OPTION(osd_agent_slop, OPT_FLOAT, .02)
669
670 OPTION(osd_uuid, OPT_UUID, uuid_d())
671 OPTION(osd_data, OPT_STR, "/var/lib/ceph/osd/$cluster-$id")
672 OPTION(osd_journal, OPT_STR, "/var/lib/ceph/osd/$cluster-$id/journal")
673 OPTION(osd_journal_size, OPT_INT, 5120) // in mb
674 OPTION(osd_journal_flush_on_shutdown, OPT_BOOL, true) // Flush journal to data store on shutdown
675 // flags for specific control purpose during osd mount() process.
676 // e.g., can be 1 to skip over replaying journal
677 // or 2 to skip over mounting omap or 3 to skip over both.
678 // This might be helpful in case the journal is totally corrupted
679 // and we still want to bring the osd daemon back normally, etc.
680 OPTION(osd_os_flags, OPT_U32, 0)
681 OPTION(osd_max_write_size, OPT_INT, 90)
682 OPTION(osd_max_pgls, OPT_U64, 1024) // max number of pgls entries to return
683 OPTION(osd_client_message_size_cap, OPT_U64, 500*1024L*1024L) // client data allowed in-memory (in bytes)
684 OPTION(osd_client_message_cap, OPT_U64, 100) // num client messages allowed in-memory
685 OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd
686 OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd
687 OPTION(osd_crush_update_weight_set, OPT_BOOL, true) // update weight set while updating weights
688 OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
689 OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
690 OPTION(osd_crush_update_on_start, OPT_BOOL, true)
691 OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds.
692 OPTION(osd_pool_default_crush_rule, OPT_INT, -1)
693 OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32, 4096) // in bytes
694 OPTION(osd_pool_default_size, OPT_INT, 3)
695 OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2
696 OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
697 OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
698 OPTION(osd_pool_default_erasure_code_profile,
699 OPT_STR,
700 "plugin=jerasure "
701 "technique=reed_sol_van "
702 "k=2 "
703 "m=1 "
704 ) // default properties of osd pool create
705 OPTION(osd_erasure_code_plugins, OPT_STR,
706 "jerasure"
707 " lrc"
708 #ifdef HAVE_BETTER_YASM_ELF64
709 " isa"
710 #endif
711 ) // list of erasure code plugins
712
713 // Allows the "peered" state for recovery and backfill below min_size
714 OPTION(osd_allow_recovery_below_min_size, OPT_BOOL, true)
715
716 OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools
717 OPTION(osd_pool_default_flag_hashpspool, OPT_BOOL, true) // use new pg hashing to prevent pool/pg overlap
718 OPTION(osd_pool_default_flag_nodelete, OPT_BOOL, false) // pool can't be deleted
719 OPTION(osd_pool_default_flag_nopgchange, OPT_BOOL, false) // pool's pg and pgp num can't be changed
720 OPTION(osd_pool_default_flag_nosizechange, OPT_BOOL, false) // pool's size and min size can't be changed
721 OPTION(osd_pool_default_hit_set_bloom_fpp, OPT_FLOAT, .05)
722 OPTION(osd_pool_default_cache_target_dirty_ratio, OPT_FLOAT, .4)
723 OPTION(osd_pool_default_cache_target_dirty_high_ratio, OPT_FLOAT, .6)
724 OPTION(osd_pool_default_cache_target_full_ratio, OPT_FLOAT, .8)
725 OPTION(osd_pool_default_cache_min_flush_age, OPT_INT, 0) // seconds
726 OPTION(osd_pool_default_cache_min_evict_age, OPT_INT, 0) // seconds
727 OPTION(osd_pool_default_cache_max_evict_check_size, OPT_INT, 10) // max size to check for eviction
728 OPTION(osd_hit_set_min_size, OPT_INT, 1000) // min target size for a HitSet
729 OPTION(osd_hit_set_max_size, OPT_INT, 100000) // max target size for a HitSet
730 OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking
731
732 // conservative default throttling values
733 OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 25)
734 OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 5 * 1024*1024)
735
736 OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback")
737 OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
738 OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
739 OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom")
740 OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on read)
741 OPTION(osd_tier_default_cache_min_write_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on write)
742 OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 20)
743 OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT, 1)
744
745 OPTION(osd_map_dedup, OPT_BOOL, true)
746 OPTION(osd_map_max_advance, OPT_INT, 40) // make this < cache_size!
747 OPTION(osd_map_cache_size, OPT_INT, 50)
748 OPTION(osd_map_message_max, OPT_INT, 40) // max maps per MOSDMap message
749 OPTION(osd_map_share_max_epochs, OPT_INT, 40) // cap on # of inc maps we send to peers, clients
750 OPTION(osd_inject_bad_map_crc_probability, OPT_FLOAT, 0)
751 OPTION(osd_inject_failure_on_pg_removal, OPT_BOOL, false)
752 // shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
753 OPTION(osd_max_markdown_period , OPT_INT, 600)
754 OPTION(osd_max_markdown_count, OPT_INT, 5)
755
756 OPTION(osd_peering_wq_threads, OPT_INT, 2)
757 OPTION(osd_peering_wq_batch_size, OPT_U64, 20)
758 OPTION(osd_op_pq_max_tokens_per_priority, OPT_U64, 4194304)
759 OPTION(osd_op_pq_min_cost, OPT_U64, 65536)
760 OPTION(osd_disk_threads, OPT_INT, 1)
761 OPTION(osd_disk_thread_ioprio_class, OPT_STR, "") // rt realtime be best effort idle
762 OPTION(osd_disk_thread_ioprio_priority, OPT_INT, -1) // 0-7
763 OPTION(osd_recover_clone_overlap, OPT_BOOL, true) // preserve clone_overlap during recovery/migration
764 OPTION(osd_op_num_threads_per_shard, OPT_INT, 0)
765 OPTION(osd_op_num_threads_per_shard_hdd, OPT_INT, 1)
766 OPTION(osd_op_num_threads_per_shard_ssd, OPT_INT, 2)
767 OPTION(osd_op_num_shards, OPT_INT, 0)
768 OPTION(osd_op_num_shards_hdd, OPT_INT, 5)
769 OPTION(osd_op_num_shards_ssd, OPT_INT, 8)
770 OPTION(osd_op_queue, OPT_STR, "wpq") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random
771 OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
772
773 OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
774
775 // Set to true for testing. Users should NOT set this.
776 // If set to true even after reading enough shards to
777 // decode the object, any error will be reported.
778 OPTION(osd_read_ec_check_for_errors, OPT_BOOL, false) // return error if any ec shard has an error
779
780 // Only use clone_overlap for recovery if there are fewer than
781 // osd_recover_clone_overlap_limit entries in the overlap set
782 OPTION(osd_recover_clone_overlap_limit, OPT_INT, 10)
783
784 OPTION(osd_backfill_scan_min, OPT_INT, 64)
785 OPTION(osd_backfill_scan_max, OPT_INT, 512)
786 OPTION(osd_op_thread_timeout, OPT_INT, 15)
787 OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150)
788 OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
789 OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300)
790 OPTION(osd_recovery_sleep, OPT_FLOAT, 0.01) // seconds to sleep between recovery ops
791 OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0)
792 OPTION(osd_scrub_invalid_stats, OPT_BOOL, true)
793 OPTION(osd_remove_thread_timeout, OPT_INT, 60*60)
794 OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60)
795 OPTION(osd_command_thread_timeout, OPT_INT, 10*60)
796 OPTION(osd_command_thread_suicide_timeout, OPT_INT, 15*60)
797 OPTION(osd_heartbeat_addr, OPT_ADDR, entity_addr_t())
798 OPTION(osd_heartbeat_interval, OPT_INT, 6) // (seconds) how often we ping peers
799
800 // (seconds) how long before we decide a peer has failed
801 // This setting is read by the MONs and OSDs and has to be set to a equal value in both settings of the configuration
802 OPTION(osd_heartbeat_grace, OPT_INT, 20)
803 OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers
804 OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbeat tcp socket and set dscp as CS6 on it if true
805 OPTION(osd_heartbeat_min_size, OPT_INT, 2000) // the minimum size of OSD heartbeat messages to send
806
807 // max number of parallel snap trims/pg
808 OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
809 // max number of trimming pgs
810 OPTION(osd_max_trimming_pgs, OPT_U64, 2)
811
812 // minimum number of peers that must be reachable to mark ourselves
813 // back up after being wrongly marked down.
814 OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT, .33)
815
816 OPTION(osd_mon_heartbeat_interval, OPT_INT, 30) // (seconds) how often to ping monitor if no peers
817 OPTION(osd_mon_report_interval_max, OPT_INT, 600)
818 OPTION(osd_mon_report_interval_min, OPT_INT, 5) // pg stats, failures, up_thru, boot.
819 OPTION(osd_mon_report_max_in_flight, OPT_INT, 2) // max updates in flight
820 OPTION(osd_beacon_report_interval, OPT_INT, 300) // (second) how often to send beacon message to monitor
821 OPTION(osd_pg_stat_report_interval_max, OPT_INT, 500) // report pg stats for any given pg at least this often
822 OPTION(osd_mon_ack_timeout, OPT_DOUBLE, 30.0) // time out a mon if it doesn't ack stats
823 OPTION(osd_stats_ack_timeout_factor, OPT_DOUBLE, 2.0) // multiples of mon_ack_timeout
824 OPTION(osd_stats_ack_timeout_decay, OPT_DOUBLE, .9)
825 OPTION(osd_default_data_pool_replay_window, OPT_INT, 45)
826 OPTION(osd_preserve_trimmed_log, OPT_BOOL, false)
827 OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false)
828 OPTION(osd_recovery_delay_start, OPT_FLOAT, 0)
829 OPTION(osd_recovery_max_active, OPT_U64, 3)
830 OPTION(osd_recovery_max_single_start, OPT_U64, 1)
831 OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk
832 OPTION(osd_recovery_max_omap_entries_per_chunk, OPT_U64, 64000) // max number of omap entries per chunk; 0 to disable limit
833 OPTION(osd_copyfrom_max_chunk, OPT_U64, 8<<20) // max size of a COPYFROM chunk
834 OPTION(osd_push_per_object_cost, OPT_U64, 1000) // push cost per object
835 OPTION(osd_max_push_cost, OPT_U64, 8<<20) // max size of push message
836 OPTION(osd_max_push_objects, OPT_U64, 10) // max objects in single push op
837 OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now
838 OPTION(osd_max_scrubs, OPT_INT, 1)
839 OPTION(osd_scrub_during_recovery, OPT_BOOL, false) // Allow new scrubs to start while recovery is active on the OSD
840 OPTION(osd_scrub_begin_hour, OPT_INT, 0)
841 OPTION(osd_scrub_end_hour, OPT_INT, 24)
842 OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
843 OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low
844 OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load
845 OPTION(osd_scrub_interval_randomize_ratio, OPT_FLOAT, 0.5) // randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
846 OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE, .66) // the probability to back off the scheduled scrub
847 OPTION(osd_scrub_chunk_min, OPT_INT, 5)
848 OPTION(osd_scrub_chunk_max, OPT_INT, 25)
849 OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops
850 OPTION(osd_scrub_auto_repair, OPT_BOOL, false) // whether auto-repair inconsistencies upon deep-scrubbing
851 OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5) // only auto-repair when number of errors is below this threshold
852 OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
853 OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)
854 OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
855 OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub
856 OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100)
857 OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
858 OPTION(osd_open_classes_on_start, OPT_BOOL, true)
859 OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops "
860 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes allowed to be loaded (allow all: *)
861 OPTION(osd_class_default_list, OPT_STR, "cephfs hello journal lock log numops "
862 "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes with default execute perm (allow all: *)
863 OPTION(osd_check_for_log_corruption, OPT_BOOL, false)
864 OPTION(osd_use_stale_snap, OPT_BOOL, false)
865 OPTION(osd_rollback_to_cluster_snap, OPT_STR, "")
866 OPTION(osd_default_notify_timeout, OPT_U32, 30) // default notify timeout in seconds
867 OPTION(osd_kill_backfill_at, OPT_INT, 0)
868
869 // Bounds how infrequently a new map epoch will be persisted for a pg
870 OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 40) // make this < map_cache_size!
871
872 OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it
873 OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
874 OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery
875 OPTION(osd_pg_log_trim_min, OPT_U32, 100)
876 OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
877 OPTION(osd_command_max_records, OPT_INT, 256)
878 OPTION(osd_max_pg_blocked_by, OPT_U32, 16) // max peer osds to report that are blocking our progress
879 OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
880 OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros
881 OPTION(osd_backoff_on_unfound, OPT_BOOL, true) // object unfound
882 OPTION(osd_backoff_on_degraded, OPT_BOOL, false) // [mainly for debug?] object unreadable/writeable
883 OPTION(osd_backoff_on_down, OPT_BOOL, true) // pg in down/incomplete state
884 OPTION(osd_backoff_on_peering, OPT_BOOL, false) // [debug] pg peering
885 OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL, false) // crash osd if client ignores a backoff; useful for debugging
886 OPTION(osd_debug_inject_dispatch_delay_probability, OPT_DOUBLE, 0)
887 OPTION(osd_debug_inject_dispatch_delay_duration, OPT_DOUBLE, .1)
888 OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
889 OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
890 OPTION(osd_debug_op_order, OPT_BOOL, false)
891 OPTION(osd_debug_verify_missing_on_start, OPT_BOOL, false)
892 OPTION(osd_debug_scrub_chance_rewrite_digest, OPT_U64, 0)
893 OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false)
894 OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false)
895 OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
896 OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
897 OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion
898 OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
899 OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
900 OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
901 OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
902 OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
903 OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track
904 OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
905 OPTION(osd_op_history_slow_op_size, OPT_U32, 20) // Max number of slow ops to track
906 OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE, 10.0) // track the op if over this threshold
907 OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items
908 OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
909 OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections
910
911 OPTION(osd_pg_object_context_cache_count, OPT_INT, 64)
912 OPTION(osd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
913 OPTION(osd_function_tracing, OPT_BOOL, false) // true if function instrumentation should use LTTng
914
915 OPTION(osd_fast_info, OPT_BOOL, true) // use fast info attr, if we can
916
917 // determines whether PGLog::check() compares written out log to stored log
918 OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false)
919 OPTION(osd_loop_before_reset_tphandle, OPT_U32, 64) // Max number of loop before we reset thread-pool's handle
920 // default timeout while caling WaitInterval on an empty queue
921 OPTION(threadpool_default_timeout, OPT_INT, 60)
922 // default wait time for an empty queue before pinging the hb timeout
923 OPTION(threadpool_empty_queue_max_wait, OPT_INT, 2)
924
925 OPTION(leveldb_log_to_ceph_log, OPT_BOOL, true)
926 OPTION(leveldb_write_buffer_size, OPT_U64, 8 *1024*1024) // leveldb write buffer size
927 OPTION(leveldb_cache_size, OPT_U64, 128 *1024*1024) // leveldb cache size
928 OPTION(leveldb_block_size, OPT_U64, 0) // leveldb block size
929 OPTION(leveldb_bloom_size, OPT_INT, 0) // leveldb bloom bits per entry
930 OPTION(leveldb_max_open_files, OPT_INT, 0) // leveldb max open files
931 OPTION(leveldb_compression, OPT_BOOL, true) // leveldb uses compression
932 OPTION(leveldb_paranoid, OPT_BOOL, false) // leveldb paranoid flag
933 OPTION(leveldb_log, OPT_STR, "/dev/null") // enable leveldb log file
934 OPTION(leveldb_compact_on_mount, OPT_BOOL, false)
935
936 OPTION(kinetic_host, OPT_STR, "") // hostname or ip address of a kinetic drive to use
937 OPTION(kinetic_port, OPT_INT, 8123) // port number of the kinetic drive
938 OPTION(kinetic_user_id, OPT_INT, 1) // kinetic user to authenticate as
939 OPTION(kinetic_hmac_key, OPT_STR, "asdfasdf") // kinetic key to authenticate with
940 OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic with TLS
941
942
943 OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal
944 SAFE_OPTION(rocksdb_db_paths, OPT_STR, "") // path,size( path,size)*
945 OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true) // log to ceph log
946 OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024) // default rocksdb cache size
947 OPTION(rocksdb_cache_row_ratio, OPT_FLOAT, .2) // ratio of cache for row (vs block)
948 OPTION(rocksdb_cache_shard_bits, OPT_INT, 4) // rocksdb block cache shard bits, 4 bit -> 16 shards
949 OPTION(rocksdb_cache_type, OPT_STR, "lru") // 'lru' or 'clock'
950 OPTION(rocksdb_block_size, OPT_INT, 4*1024) // default rocksdb block size
951 OPTION(rocksdb_perf, OPT_BOOL, false) // Enabling this will have 5-10% impact on performance for the stats collection
952 OPTION(rocksdb_collect_compaction_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
953 OPTION(rocksdb_collect_extended_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
954 OPTION(rocksdb_collect_memory_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
955 OPTION(rocksdb_enable_rmrange, OPT_BOOL, false) // see https://github.com/facebook/rocksdb/blob/master/include/rocksdb/db.h#L253
956
957 // rocksdb options that will be used for omap(if omap_backend is rocksdb)
958 OPTION(filestore_rocksdb_options, OPT_STR, "")
959 // rocksdb options that will be used in monstore
960 OPTION(mon_rocksdb_options, OPT_STR, "write_buffer_size=33554432,compression=kNoCompression")
961
962 /**
963 * osd_*_priority adjust the relative priority of client io, recovery io,
964 * snaptrim io, etc
965 *
966 * osd_*_priority determines the ratio of available io between client and
967 * recovery. Each option may be set between
968 * 1..63.
969 */
970 OPTION(osd_client_op_priority, OPT_U32, 63)
971 OPTION(osd_recovery_op_priority, OPT_U32, 3)
972
973 OPTION(osd_snap_trim_priority, OPT_U32, 5)
974 OPTION(osd_snap_trim_cost, OPT_U32, 1<<20) // set default cost equal to 1MB io
975
976 OPTION(osd_scrub_priority, OPT_U32, 5)
977 // set default cost equal to 50MB io
978 OPTION(osd_scrub_cost, OPT_U32, 50<<20)
979 // set requested scrub priority higher than scrub priority to make the
980 // requested scrubs jump the queue of scheduled scrubs
981 OPTION(osd_requested_scrub_priority, OPT_U32, 120)
982
983 OPTION(osd_recovery_priority, OPT_U32, 5)
984 // set default cost equal to 20MB io
985 OPTION(osd_recovery_cost, OPT_U32, 20<<20)
986
987 /**
988 * osd_recovery_op_warn_multiple scales the normal warning threshhold,
989 * osd_op_complaint_time, so that slow recovery ops won't cause noise
990 */
991 OPTION(osd_recovery_op_warn_multiple, OPT_U32, 16)
992
993 // Max time to wait between notifying mon of shutdown and shutting down
994 OPTION(osd_mon_shutdown_timeout, OPT_DOUBLE, 5)
995 OPTION(osd_shutdown_pgref_assert, OPT_BOOL, false) // crash if the OSD has stray PG refs on shutdown
996
997 OPTION(osd_max_object_size, OPT_U64, 128*1024L*1024L) // OSD's maximum object size
998 OPTION(osd_max_object_name_len, OPT_U32, 2048) // max rados object name len
999 OPTION(osd_max_object_namespace_len, OPT_U32, 256) // max rados object namespace len
1000 OPTION(osd_max_attr_name_len, OPT_U32, 100) // max rados attr name len; cannot go higher than 100 chars for file system backends
1001 OPTION(osd_max_attr_size, OPT_U64, 0)
1002
1003 OPTION(osd_max_omap_entries_per_request, OPT_U64, 131072)
1004 OPTION(osd_max_omap_bytes_per_request, OPT_U64, 1<<30)
1005
1006 OPTION(osd_objectstore, OPT_STR, "filestore") // ObjectStore backend type
1007 OPTION(osd_objectstore_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1008 OPTION(osd_objectstore_fuse, OPT_BOOL, false)
1009
1010 OPTION(osd_bench_small_size_max_iops, OPT_U32, 100) // 100 IOPS
1011 OPTION(osd_bench_large_size_max_throughput, OPT_U64, 100 << 20) // 100 MB/s
1012 OPTION(osd_bench_max_block_size, OPT_U64, 64 << 20) // cap the block size at 64MB
1013 OPTION(osd_bench_duration, OPT_U32, 30) // duration of 'osd bench', capped at 30s to avoid triggering timeouts
1014
1015 OPTION(osd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all osd requests
1016 OPTION(osdc_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all objecter requests
1017
1018 OPTION(osd_discard_disconnected_ops, OPT_BOOL, true)
1019
1020 OPTION(memstore_device_bytes, OPT_U64, 1024*1024*1024)
1021 OPTION(memstore_page_set, OPT_BOOL, true)
1022 OPTION(memstore_page_size, OPT_U64, 64 << 10)
1023
1024 OPTION(bdev_debug_inflight_ios, OPT_BOOL, false)
1025 OPTION(bdev_inject_crash, OPT_INT, 0) // if N>0, then ~ 1/N IOs will complete before we crash on flush.
1026 OPTION(bdev_inject_crash_flush_delay, OPT_INT, 2) // wait N more seconds on flush
1027 OPTION(bdev_aio, OPT_BOOL, true)
1028 OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds
1029 OPTION(bdev_aio_max_queue_depth, OPT_INT, 1024)
1030 OPTION(bdev_block_size, OPT_INT, 4096)
1031 OPTION(bdev_debug_aio, OPT_BOOL, false)
1032 OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0)
1033
1034 // if yes, osd will unbind all NVMe devices from kernel driver and bind them
1035 // to the uio_pci_generic driver. The purpose is to prevent the case where
1036 // NVMe driver is loaded while osd is running.
1037 OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL, false)
1038 OPTION(bdev_nvme_retry_count, OPT_INT, -1) // -1 means by default which is 4
1039
1040 OPTION(objectstore_blackhole, OPT_BOOL, false)
1041
1042 OPTION(bluefs_alloc_size, OPT_U64, 1048576)
1043 OPTION(bluefs_max_prefetch, OPT_U64, 1048576)
1044 OPTION(bluefs_min_log_runway, OPT_U64, 1048576) // alloc when we get this low
1045 OPTION(bluefs_max_log_runway, OPT_U64, 4194304) // alloc this much at a time
1046 OPTION(bluefs_log_compact_min_ratio, OPT_FLOAT, 5.0) // before we consider
1047 OPTION(bluefs_log_compact_min_size, OPT_U64, 16*1048576) // before we consider
1048 OPTION(bluefs_min_flush_size, OPT_U64, 524288) // ignore flush until its this big
1049 OPTION(bluefs_compact_log_sync, OPT_BOOL, false) // sync or async log compaction?
1050 OPTION(bluefs_buffered_io, OPT_BOOL, false)
1051 OPTION(bluefs_sync_write, OPT_BOOL, false)
1052 OPTION(bluefs_allocator, OPT_STR, "bitmap") // stupid | bitmap
1053 OPTION(bluefs_preextend_wal_files, OPT_BOOL, false) // this *requires* that rocksdb has recycling enabled
1054
1055 OPTION(bluestore_bluefs, OPT_BOOL, true)
1056 OPTION(bluestore_bluefs_env_mirror, OPT_BOOL, false) // mirror to normal Env for debug
1057 OPTION(bluestore_bluefs_min, OPT_U64, 1*1024*1024*1024) // 1gb
1058 OPTION(bluestore_bluefs_min_ratio, OPT_FLOAT, .02) // min fs free / total free
1059 OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT, .90) // max fs free / total free
1060 OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT, .02) // how much to add at a time
1061 OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT, .20) // how much to reclaim at a time
1062 OPTION(bluestore_bluefs_balance_interval, OPT_FLOAT, 1) // how often (sec) to balance free space between bluefs and bluestore
1063 // If you want to use spdk driver, you need to specify NVMe serial number here
1064 // with "spdk:" prefix.
1065 // Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to
1066 // get the serial number of Intel(R) Fultondale NVMe controllers.
1067 // Example:
1068 // bluestore_block_path = spdk:55cd2e404bd73932
1069 // If you want to run multiple SPDK instances per node, you must specify the
1070 // amount of dpdk memory size in MB each instance will use, to make sure each
1071 // instance uses its own dpdk memory
1072 OPTION(bluestore_spdk_mem, OPT_U32, 512)
1073 // A hexadecimal bit mask of the cores to run on. Note the core numbering can change between platforms and should be determined beforehand.
1074 OPTION(bluestore_spdk_coremask, OPT_STR, "0x3")
1075 // Specify the maximal I/Os to be batched completed while checking queue pair completions.
1076 // Default value 0 means that let SPDK nvme library determine the value.
1077 OPTION(bluestore_spdk_max_io_completion, OPT_U32, 0)
1078 OPTION(bluestore_block_path, OPT_STR, "")
1079 OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing
1080 OPTION(bluestore_block_create, OPT_BOOL, true)
1081 OPTION(bluestore_block_db_path, OPT_STR, "")
1082 OPTION(bluestore_block_db_size, OPT_U64, 0) // rocksdb ssts (hot/warm)
1083 OPTION(bluestore_block_db_create, OPT_BOOL, false)
1084 OPTION(bluestore_block_wal_path, OPT_STR, "")
1085 OPTION(bluestore_block_wal_size, OPT_U64, 96 * 1024*1024) // rocksdb wal
1086 OPTION(bluestore_block_wal_create, OPT_BOOL, false)
1087 OPTION(bluestore_block_preallocate_file, OPT_BOOL, false) //whether preallocate space if block/db_path/wal_path is file rather that block device.
1088 OPTION(bluestore_csum_type, OPT_STR, "crc32c") // none|xxhash32|xxhash64|crc32c|crc32c_16|crc32c_8
1089 OPTION(bluestore_csum_min_block, OPT_U32, 4096)
1090 OPTION(bluestore_csum_max_block, OPT_U32, 64*1024)
1091 OPTION(bluestore_min_alloc_size, OPT_U32, 0)
1092 OPTION(bluestore_min_alloc_size_hdd, OPT_U32, 64*1024)
1093 OPTION(bluestore_min_alloc_size_ssd, OPT_U32, 16*1024)
1094 OPTION(bluestore_max_alloc_size, OPT_U32, 0)
1095 OPTION(bluestore_prefer_deferred_size, OPT_U32, 0)
1096 OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768)
1097 OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0)
1098 OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none
1099 OPTION(bluestore_compression_algorithm, OPT_STR, "snappy")
1100 OPTION(bluestore_compression_min_blob_size, OPT_U32, 0)
1101 OPTION(bluestore_compression_min_blob_size_hdd, OPT_U32, 128*1024)
1102 OPTION(bluestore_compression_min_blob_size_ssd, OPT_U32, 8*1024)
1103 OPTION(bluestore_compression_max_blob_size, OPT_U32, 0)
1104 OPTION(bluestore_compression_max_blob_size_hdd, OPT_U32, 512*1024)
1105 OPTION(bluestore_compression_max_blob_size_ssd, OPT_U32, 64*1024)
1106 /*
1107 * Specifies minimum expected amount of saved allocation units
1108 * per single blob to enable compressed blobs garbage collection
1109 *
1110 */
1111 OPTION(bluestore_gc_enable_blob_threshold, OPT_INT, 0)
1112 /*
1113 * Specifies minimum expected amount of saved allocation units
1114 * per all blobsb to enable compressed blobs garbage collection
1115 *
1116 */
1117 OPTION(bluestore_gc_enable_total_threshold, OPT_INT, 0)
1118
1119 OPTION(bluestore_max_blob_size, OPT_U32, 0)
1120 OPTION(bluestore_max_blob_size_hdd, OPT_U32, 512*1024)
1121 OPTION(bluestore_max_blob_size_ssd, OPT_U32, 64*1024)
1122 /*
1123 * Require the net gain of compression at least to be at this ratio,
1124 * otherwise we don't compress.
1125 * And ask for compressing at least 12.5%(1/8) off, by default.
1126 */
1127 OPTION(bluestore_compression_required_ratio, OPT_DOUBLE, .875)
1128 OPTION(bluestore_extent_map_shard_max_size, OPT_U32, 1200)
1129 OPTION(bluestore_extent_map_shard_target_size, OPT_U32, 500)
1130 OPTION(bluestore_extent_map_shard_min_size, OPT_U32, 150)
1131 OPTION(bluestore_extent_map_shard_target_size_slop, OPT_DOUBLE, .2)
1132 OPTION(bluestore_extent_map_inline_shard_prealloc_size, OPT_U32, 256)
1133 OPTION(bluestore_cache_trim_interval, OPT_DOUBLE, .2)
1134 OPTION(bluestore_cache_trim_max_skip_pinned, OPT_U32, 64) // skip this many onodes pinned in cache before we give up
1135 OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q
1136 OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5) // kin page slot size / max page slot size
1137 OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5) // number of kout page slot / total number of page slot
1138 OPTION(bluestore_cache_size, OPT_U64, 1024*1024*1024)
1139 OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .7)
1140 OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .2)
1141 OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
1142 OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap
1143 OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
1144 OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
1145 OPTION(bluestore_bitmapallocator_span_size, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
1146 OPTION(bluestore_max_deferred_txc, OPT_U64, 32)
1147 OPTION(bluestore_rocksdb_options, OPT_STR, "compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152")
1148 OPTION(bluestore_fsck_on_mount, OPT_BOOL, false)
1149 OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL, true)
1150 OPTION(bluestore_fsck_on_umount, OPT_BOOL, false)
1151 OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL, true)
1152 OPTION(bluestore_fsck_on_mkfs, OPT_BOOL, true)
1153 OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL, false)
1154 OPTION(bluestore_sync_submit_transaction, OPT_BOOL, false) // submit kv txn in queueing thread (not kv_sync_thread)
1155 OPTION(bluestore_throttle_bytes, OPT_U64, 64*1024*1024)
1156 OPTION(bluestore_throttle_deferred_bytes, OPT_U64, 128*1024*1024)
1157 OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64, 670000)
1158 OPTION(bluestore_throttle_cost_per_io_ssd, OPT_U64, 4000)
1159 OPTION(bluestore_throttle_cost_per_io, OPT_U64, 0)
1160 OPTION(bluestore_deferred_batch_ops, OPT_U64, 0)
1161 OPTION(bluestore_deferred_batch_ops_hdd, OPT_U64, 64)
1162 OPTION(bluestore_deferred_batch_ops_ssd, OPT_U64, 16)
1163 OPTION(bluestore_nid_prealloc, OPT_INT, 1024)
1164 OPTION(bluestore_blobid_prealloc, OPT_U64, 10240)
1165 OPTION(bluestore_clone_cow, OPT_BOOL, true) // do copy-on-write for clones
1166 OPTION(bluestore_default_buffered_read, OPT_BOOL, true)
1167 OPTION(bluestore_default_buffered_write, OPT_BOOL, false)
1168 OPTION(bluestore_debug_misc, OPT_BOOL, false)
1169 OPTION(bluestore_debug_no_reuse_blocks, OPT_BOOL, false)
1170 OPTION(bluestore_debug_small_allocations, OPT_INT, 0)
1171 OPTION(bluestore_debug_freelist, OPT_BOOL, false)
1172 OPTION(bluestore_debug_prefill, OPT_FLOAT, 0)
1173 OPTION(bluestore_debug_prefragment_max, OPT_INT, 1048576)
1174 OPTION(bluestore_debug_inject_read_err, OPT_BOOL, false)
1175 OPTION(bluestore_debug_randomize_serial_transaction, OPT_INT, 0)
1176 OPTION(bluestore_debug_omit_block_device_write, OPT_BOOL, false)
1177 OPTION(bluestore_debug_fsck_abort, OPT_BOOL, false)
1178 OPTION(bluestore_debug_omit_kv_commit, OPT_BOOL, false)
1179 OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL, false)
1180 OPTION(bluestore_shard_finishers, OPT_BOOL, false)
1181
1182 OPTION(kstore_max_ops, OPT_U64, 512)
1183 OPTION(kstore_max_bytes, OPT_U64, 64*1024*1024)
1184 OPTION(kstore_backend, OPT_STR, "rocksdb")
1185 OPTION(kstore_rocksdb_options, OPT_STR, "compression=kNoCompression")
1186 OPTION(kstore_rocksdb_bloom_bits_per_key, OPT_INT, 0)
1187 OPTION(kstore_fsck_on_mount, OPT_BOOL, false)
1188 OPTION(kstore_fsck_on_mount_deep, OPT_BOOL, true)
1189 OPTION(kstore_nid_prealloc, OPT_U64, 1024)
1190 OPTION(kstore_sync_transaction, OPT_BOOL, false)
1191 OPTION(kstore_sync_submit_transaction, OPT_BOOL, false)
1192 OPTION(kstore_onode_map_size, OPT_U64, 1024)
1193 OPTION(kstore_cache_tails, OPT_BOOL, true)
1194 OPTION(kstore_default_stripe_size, OPT_INT, 65536)
1195
1196 OPTION(filestore_omap_backend, OPT_STR, "rocksdb")
1197 OPTION(filestore_omap_backend_path, OPT_STR, "")
1198
1199 /// filestore wb throttle limits
1200 OPTION(filestore_wbthrottle_enable, OPT_BOOL, true)
1201 OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 41943040)
1202 OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 419430400)
1203 OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 500)
1204 OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 5000)
1205 OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 500)
1206 OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 41943040)
1207 OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 419430400)
1208 OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 500)
1209 OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 5000)
1210 OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 500)
1211
1212 /// These must be less than the fd limit
1213 OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 5000)
1214 OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 5000)
1215
1216 //Introduce a O_DSYNC write in the filestore
1217 OPTION(filestore_odsync_write, OPT_BOOL, false)
1218
1219 // Tests index failure paths
1220 OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0)
1221
1222 // Allow object read error injection
1223 OPTION(filestore_debug_inject_read_err, OPT_BOOL, false)
1224
1225 OPTION(filestore_debug_omap_check, OPT_BOOL, false) // Expensive debugging check on sync
1226 OPTION(filestore_omap_header_cache_size, OPT_INT, 1024)
1227
1228 // Use omap for xattrs for attrs over
1229 // filestore_max_inline_xattr_size or
1230 OPTION(filestore_max_inline_xattr_size, OPT_U32, 0) //Override
1231 OPTION(filestore_max_inline_xattr_size_xfs, OPT_U32, 65536)
1232 OPTION(filestore_max_inline_xattr_size_btrfs, OPT_U32, 2048)
1233 OPTION(filestore_max_inline_xattr_size_other, OPT_U32, 512)
1234
1235 // for more than filestore_max_inline_xattrs attrs
1236 OPTION(filestore_max_inline_xattrs, OPT_U32, 0) //Override
1237 OPTION(filestore_max_inline_xattrs_xfs, OPT_U32, 10)
1238 OPTION(filestore_max_inline_xattrs_btrfs, OPT_U32, 10)
1239 OPTION(filestore_max_inline_xattrs_other, OPT_U32, 2)
1240
1241 // max xattr value size
1242 OPTION(filestore_max_xattr_value_size, OPT_U32, 0) //Override
1243 OPTION(filestore_max_xattr_value_size_xfs, OPT_U32, 64<<10)
1244 OPTION(filestore_max_xattr_value_size_btrfs, OPT_U32, 64<<10)
1245 // ext4 allows 4k xattrs total including some smallish extra fields and the
1246 // keys. We're allowing 2 512 inline attrs in addition some some filestore
1247 // replay attrs. After accounting for those, we still need to fit up to
1248 // two attrs of this value. That means we need this value to be around 1k
1249 // to be safe. This is hacky, but it's not worth complicating the code
1250 // to work around ext4's total xattr limit.
1251 OPTION(filestore_max_xattr_value_size_other, OPT_U32, 1<<10)
1252
1253 OPTION(filestore_sloppy_crc, OPT_BOOL, false) // track sloppy crcs
1254 OPTION(filestore_sloppy_crc_block_size, OPT_INT, 65536)
1255
1256 OPTION(filestore_max_alloc_hint_size, OPT_U64, 1ULL << 20) // bytes
1257
1258 OPTION(filestore_max_sync_interval, OPT_DOUBLE, 5) // seconds
1259 OPTION(filestore_min_sync_interval, OPT_DOUBLE, .01) // seconds
1260 OPTION(filestore_btrfs_snap, OPT_BOOL, true)
1261 OPTION(filestore_btrfs_clone_range, OPT_BOOL, true)
1262 OPTION(filestore_zfs_snap, OPT_BOOL, false) // zfsonlinux is still unstable
1263 OPTION(filestore_fsync_flushes_journal_data, OPT_BOOL, false)
1264 OPTION(filestore_fiemap, OPT_BOOL, false) // (try to) use fiemap
1265 OPTION(filestore_punch_hole, OPT_BOOL, false)
1266 OPTION(filestore_seek_data_hole, OPT_BOOL, false) // (try to) use seek_data/hole
1267 OPTION(filestore_splice, OPT_BOOL, false)
1268 OPTION(filestore_fadvise, OPT_BOOL, true)
1269 //collect device partition information for management application to use
1270 OPTION(filestore_collect_device_partition_information, OPT_BOOL, true)
1271
1272 // (try to) use extsize for alloc hint NOTE: extsize seems to trigger
1273 // data corruption in xfs prior to kernel 3.5. filestore will
1274 // implicity disable this if it cannot confirm the kernel is newer
1275 // than that.
1276 // NOTE: This option involves a tradeoff: When disabled, fragmentation is
1277 // worse, but large sequential writes are faster. When enabled, large
1278 // sequential writes are slower, but fragmentation is reduced.
1279 OPTION(filestore_xfs_extsize, OPT_BOOL, false)
1280
1281 OPTION(filestore_journal_parallel, OPT_BOOL, false)
1282 OPTION(filestore_journal_writeahead, OPT_BOOL, false)
1283 OPTION(filestore_journal_trailing, OPT_BOOL, false)
1284 OPTION(filestore_queue_max_ops, OPT_U64, 50)
1285 OPTION(filestore_queue_max_bytes, OPT_U64, 100 << 20)
1286
1287 OPTION(filestore_caller_concurrency, OPT_INT, 10)
1288
1289 /// Expected filestore throughput in B/s
1290 OPTION(filestore_expected_throughput_bytes, OPT_DOUBLE, 200 << 20)
1291 /// Expected filestore throughput in ops/s
1292 OPTION(filestore_expected_throughput_ops, OPT_DOUBLE, 200)
1293
1294 /// Filestore max delay multiple. Defaults to 0 (disabled)
1295 OPTION(filestore_queue_max_delay_multiple, OPT_DOUBLE, 0)
1296 /// Filestore high delay multiple. Defaults to 0 (disabled)
1297 OPTION(filestore_queue_high_delay_multiple, OPT_DOUBLE, 0)
1298
1299 /// Use above to inject delays intended to keep the op queue between low and high
1300 OPTION(filestore_queue_low_threshhold, OPT_DOUBLE, 0.3)
1301 OPTION(filestore_queue_high_threshhold, OPT_DOUBLE, 0.9)
1302
1303 OPTION(filestore_op_threads, OPT_INT, 2)
1304 OPTION(filestore_op_thread_timeout, OPT_INT, 60)
1305 OPTION(filestore_op_thread_suicide_timeout, OPT_INT, 180)
1306 OPTION(filestore_commit_timeout, OPT_FLOAT, 600)
1307 OPTION(filestore_fiemap_threshold, OPT_INT, 4096)
1308 OPTION(filestore_merge_threshold, OPT_INT, 10)
1309 OPTION(filestore_split_multiple, OPT_INT, 2)
1310 OPTION(filestore_update_to, OPT_INT, 1000)
1311 OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor
1312 OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size
1313 OPTION(filestore_fd_cache_shards, OPT_INT, 16) // FD number of shards
1314 OPTION(filestore_ondisk_finisher_threads, OPT_INT, 1)
1315 OPTION(filestore_apply_finisher_threads, OPT_INT, 1)
1316 OPTION(filestore_dump_file, OPT_STR, "") // file onto which store transaction dumps
1317 OPTION(filestore_kill_at, OPT_INT, 0) // inject a failure at the n'th opportunity
1318 OPTION(filestore_inject_stall, OPT_INT, 0) // artificially stall for N seconds in op queue thread
1319 OPTION(filestore_fail_eio, OPT_BOOL, true) // fail/crash on EIO
1320 OPTION(filestore_debug_verify_split, OPT_BOOL, false)
1321 OPTION(journal_dio, OPT_BOOL, true)
1322 OPTION(journal_aio, OPT_BOOL, true)
1323 OPTION(journal_force_aio, OPT_BOOL, false)
1324 OPTION(journal_block_size, OPT_INT, 4096)
1325
1326 // max bytes to search ahead in journal searching for corruption
1327 OPTION(journal_max_corrupt_search, OPT_U64, 10<<20)
1328 OPTION(journal_block_align, OPT_BOOL, true)
1329 OPTION(journal_write_header_frequency, OPT_U64, 0)
1330 OPTION(journal_max_write_bytes, OPT_INT, 10 << 20)
1331 OPTION(journal_max_write_entries, OPT_INT, 100)
1332
1333 /// Target range for journal fullness
1334 OPTION(journal_throttle_low_threshhold, OPT_DOUBLE, 0.6)
1335 OPTION(journal_throttle_high_threshhold, OPT_DOUBLE, 0.9)
1336
1337 /// Multiple over expected at high_threshhold. Defaults to 0 (disabled).
1338 OPTION(journal_throttle_high_multiple, OPT_DOUBLE, 0)
1339 /// Multiple over expected at max. Defaults to 0 (disabled).
1340 OPTION(journal_throttle_max_multiple, OPT_DOUBLE, 0)
1341
1342 OPTION(journal_align_min_size, OPT_INT, 64 << 10) // align data payloads >= this.
1343 OPTION(journal_replay_from, OPT_INT, 0)
1344 OPTION(journal_zero_on_create, OPT_BOOL, false)
1345 OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corrupt
1346 OPTION(journal_discard, OPT_BOOL, false) //using ssd disk as journal, whether support discard nouse journal-data.
1347
1348 OPTION(fio_dir, OPT_STR, "/tmp/fio") // fio data directory for fio-objectstore
1349
1350 OPTION(rados_mon_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from the monitor before returning an error from a rados operation. 0 means no limit.
1351 OPTION(rados_osd_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from osds before returning an error from a rados operation. 0 means no limit.
1352 OPTION(rados_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1353
1354 OPTION(rbd_op_threads, OPT_INT, 1)
1355 OPTION(rbd_op_thread_timeout, OPT_INT, 60)
1356 OPTION(rbd_non_blocking_aio, OPT_BOOL, true) // process AIO ops from a worker thread to prevent blocking
1357 OPTION(rbd_cache, OPT_BOOL, true) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0)
1358 OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, true) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe
1359 OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes
1360 OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching
1361 OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
1362 OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts
1363 OPTION(rbd_cache_max_dirty_object, OPT_INT, 0) // dirty limit for objects - set to 0 for auto calculate from rbd_cache_size
1364 OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false)
1365 OPTION(rbd_concurrent_management_ops, OPT_INT, 10) // how many operations can be in flight for a management operation like deleting or resizing an image
1366 OPTION(rbd_balance_snap_reads, OPT_BOOL, false)
1367 OPTION(rbd_localize_snap_reads, OPT_BOOL, false)
1368 OPTION(rbd_balance_parent_reads, OPT_BOOL, false)
1369 OPTION(rbd_localize_parent_reads, OPT_BOOL, true)
1370 OPTION(rbd_readahead_trigger_requests, OPT_INT, 10) // number of sequential requests necessary to trigger readahead
1371 OPTION(rbd_readahead_max_bytes, OPT_LONGLONG, 512 * 1024) // set to 0 to disable readahead
1372 OPTION(rbd_readahead_disable_after_bytes, OPT_LONGLONG, 50 * 1024 * 1024) // how many bytes are read in total before readahead is disabled
1373 OPTION(rbd_clone_copy_on_read, OPT_BOOL, false)
1374 OPTION(rbd_blacklist_on_break_lock, OPT_BOOL, true) // whether to blacklist clients whose lock was broken
1375 OPTION(rbd_blacklist_expire_seconds, OPT_INT, 0) // number of seconds to blacklist - set to 0 for OSD default
1376 OPTION(rbd_request_timed_out_seconds, OPT_INT, 30) // number of seconds before maint request times out
1377 OPTION(rbd_skip_partial_discard, OPT_BOOL, false) // when trying to discard a range inside an object, set to true to skip zeroing the range.
1378 OPTION(rbd_enable_alloc_hint, OPT_BOOL, true) // when writing a object, it will issue a hint to osd backend to indicate the expected size object need
1379 OPTION(rbd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1380 OPTION(rbd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all RBD requests
1381 OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be validated for RBD compatibility
1382 OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated
1383 OPTION(rbd_auto_exclusive_lock_until_manual_request, OPT_BOOL, true) // whether to automatically acquire/release exclusive lock until it is explicitly requested, i.e. before we know the user of librbd is properly using the lock API
1384 OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy
1385 OPTION(rbd_mirroring_replay_delay, OPT_INT, 0) // time-delay in seconds for rbd-mirror asynchronous replication
1386
1387 OPTION(rbd_default_pool, OPT_STR, "rbd") // default pool for storing images
1388 OPTION_VALIDATOR(rbd_default_pool)
1389
1390 /*
1391 * The following options change the behavior for librbd's image creation methods that
1392 * don't require all of the parameters. These are provided so that older programs
1393 * can take advantage of newer features without being rewritten to use new versions
1394 * of the image creation functions.
1395 *
1396 * rbd_create()/RBD::create() are affected by all of these options.
1397 *
1398 * rbd_create2()/RBD::create2() and rbd_clone()/RBD::clone() are affected by:
1399 * - rbd_default_order
1400 * - rbd_default_stripe_count
1401 * - rbd_default_stripe_size
1402 *
1403 * rbd_create3()/RBD::create3() and rbd_clone2/RBD::clone2() are only
1404 * affected by rbd_default_order.
1405 */
1406 OPTION(rbd_default_format, OPT_INT, 2)
1407 OPTION(rbd_default_order, OPT_INT, 22)
1408 OPTION(rbd_default_stripe_count, OPT_U64, 0) // changing requires stripingv2 feature
1409 OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature
1410 OPTION(rbd_default_data_pool, OPT_STR, "") // optional default pool for storing image data blocks
1411 OPTION_VALIDATOR(rbd_default_data_pool)
1412
1413 /**
1414 * RBD features are only applicable for v2 images. This setting accepts either
1415 * an integer bitmask value or comma-delimited string of RBD feature names.
1416 * This setting is always internally stored as an integer bitmask value. The
1417 * mapping between feature bitmask value and feature name is as follows:
1418 *
1419 * +1 -> layering
1420 * +2 -> striping
1421 * +4 -> exclusive-lock
1422 * +8 -> object-map
1423 * +16 -> fast-diff
1424 * +32 -> deep-flatten
1425 * +64 -> journaling
1426 * +128 -> data-pool
1427 */
1428 SAFE_OPTION(rbd_default_features, OPT_STR, "layering,exclusive-lock,object-map,fast-diff,deep-flatten")
1429 OPTION_VALIDATOR(rbd_default_features)
1430
1431 OPTION(rbd_default_map_options, OPT_STR, "") // default rbd map -o / --options
1432
1433 /**
1434 * RBD journal options.
1435 */
1436 OPTION(rbd_journal_order, OPT_U32, 24) // bits to shift to compute journal object max size, between 12 and 64
1437 OPTION(rbd_journal_splay_width, OPT_U32, 4) // number of active journal objects
1438 OPTION(rbd_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1439 OPTION(rbd_journal_object_flush_interval, OPT_INT, 0) // maximum number of pending commits per journal object
1440 OPTION(rbd_journal_object_flush_bytes, OPT_INT, 0) // maximum number of pending bytes per journal object
1441 OPTION(rbd_journal_object_flush_age, OPT_DOUBLE, 0) // maximum age (in seconds) for pending commits
1442 OPTION(rbd_journal_pool, OPT_STR, "") // pool for journal objects
1443 OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload size before splitting
1444 OPTION(rbd_journal_max_concurrent_object_sets, OPT_INT, 0) // maximum number of object sets a journal client can be behind before it is automatically unregistered
1445
1446 /**
1447 * RBD Mirror options
1448 */
1449 OPTION(rbd_mirror_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
1450 OPTION(rbd_mirror_journal_poll_age, OPT_DOUBLE, 5) // maximum age (in seconds) between successive journal polls
1451 OPTION(rbd_mirror_journal_max_fetch_bytes, OPT_U32, 32768) // maximum bytes to read from each journal data object per fetch
1452 OPTION(rbd_mirror_sync_point_update_age, OPT_DOUBLE, 30) // number of seconds between each update of the image sync point object number
1453 OPTION(rbd_mirror_concurrent_image_syncs, OPT_U32, 5) // maximum number of image syncs in parallel
1454 OPTION(rbd_mirror_pool_replayers_refresh_interval, OPT_INT, 30) // interval to refresh peers in rbd-mirror daemon
1455 OPTION(rbd_mirror_delete_retry_interval, OPT_DOUBLE, 30) // interval to check and retry the failed requests in deleter
1456 OPTION(rbd_mirror_image_state_check_interval, OPT_INT, 30) // interval to get images from pool watcher and set sources in replayer
1457 OPTION(rbd_mirror_leader_heartbeat_interval, OPT_INT, 5) // interval (in seconds) between mirror leader heartbeats
1458 OPTION(rbd_mirror_leader_max_missed_heartbeats, OPT_INT, 2) // number of missed heartbeats for non-lock owner to attempt to acquire lock
1459 OPTION(rbd_mirror_leader_max_acquire_attempts_before_break, OPT_INT, 3) // number of failed attempts to acquire lock after missing heartbeats before breaking lock
1460
1461 OPTION(nss_db_path, OPT_STR, "") // path to nss db
1462
1463
1464 OPTION(rgw_max_chunk_size, OPT_INT, 4 * 1024 * 1024)
1465 OPTION(rgw_put_obj_min_window_size, OPT_INT, 16 * 1024 * 1024)
1466 OPTION(rgw_put_obj_max_window_size, OPT_INT, 64 * 1024 * 1024)
1467 OPTION(rgw_max_put_size, OPT_U64, 5ULL*1024*1024*1024)
1468 OPTION(rgw_max_put_param_size, OPT_U64, 1 * 1024 * 1024) // max input size for PUT requests accepting json/xml params
1469
1470 /**
1471 * override max bucket index shards in zone configuration (if not zero)
1472 *
1473 * Represents the number of shards for the bucket index object, a value of zero
1474 * indicates there is no sharding. By default (no sharding, the name of the object
1475 * is '.dir.{marker}', with sharding, the name is '.dir.{markder}.{sharding_id}',
1476 * sharding_id is zero-based value. It is not recommended to set a too large value
1477 * (e.g. thousand) as it increases the cost for bucket listing.
1478 */
1479 OPTION(rgw_override_bucket_index_max_shards, OPT_U32, 0)
1480
1481 /**
1482 * Represents the maximum AIO pending requests for the bucket index object shards.
1483 */
1484 OPTION(rgw_bucket_index_max_aio, OPT_U32, 8)
1485
1486 /**
1487 * whether or not the quota/gc threads should be started
1488 */
1489 OPTION(rgw_enable_quota_threads, OPT_BOOL, true)
1490 OPTION(rgw_enable_gc_threads, OPT_BOOL, true)
1491 OPTION(rgw_enable_lc_threads, OPT_BOOL, true)
1492
1493
1494 OPTION(rgw_data, OPT_STR, "/var/lib/ceph/radosgw/$cluster-$id")
1495 OPTION(rgw_enable_apis, OPT_STR, "s3, s3website, swift, swift_auth, admin")
1496 OPTION(rgw_cache_enabled, OPT_BOOL, true) // rgw cache enabled
1497 OPTION(rgw_cache_lru_size, OPT_INT, 10000) // num of entries in rgw cache
1498 OPTION(rgw_socket_path, OPT_STR, "") // path to unix domain socket, if not specified, rgw will not run as external fcgi
1499 OPTION(rgw_host, OPT_STR, "") // host for radosgw, can be an IP, default is 0.0.0.0
1500 OPTION(rgw_port, OPT_STR, "") // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi
1501 OPTION(rgw_dns_name, OPT_STR, "") // hostname suffix on buckets
1502 OPTION(rgw_dns_s3website_name, OPT_STR, "") // hostname suffix on buckets for s3-website endpoint
1503 OPTION(rgw_content_length_compat, OPT_BOOL, false) // Check both HTTP_CONTENT_LENGTH and CONTENT_LENGTH in fcgi env
1504 OPTION(rgw_lifecycle_work_time, OPT_STR, "00:00-06:00") //job process lc at 00:00-06:00s
1505 OPTION(rgw_lc_lock_max_time, OPT_INT, 60) // total run time for a single lc processor work
1506 OPTION(rgw_lc_max_objs, OPT_INT, 32)
1507 OPTION(rgw_lc_debug_interval, OPT_INT, -1) // Debug run interval, in seconds
1508 OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request
1509 OPTION(rgw_request_uri, OPT_STR, "") // alternative value for REQUEST_URI if not set in request
1510 OPTION(rgw_swift_url, OPT_STR, "") // the swift url, being published by the internal swift auth
1511 OPTION(rgw_swift_url_prefix, OPT_STR, "swift") // entry point for which a url is considered a swift url
1512 OPTION(rgw_swift_auth_url, OPT_STR, "") // default URL to go and verify tokens for v1 auth (if not using internal swift auth)
1513 OPTION(rgw_swift_auth_entry, OPT_STR, "auth") // entry point for which a url is considered a swift auth url
1514 OPTION(rgw_swift_tenant_name, OPT_STR, "") // tenant name to use for swift access
1515 OPTION(rgw_swift_account_in_url, OPT_BOOL, false) // assume that URL always contain the account (aka tenant) part
1516 OPTION(rgw_swift_enforce_content_length, OPT_BOOL, false) // enforce generation of Content-Length even in cost of performance or scalability
1517 OPTION(rgw_keystone_url, OPT_STR, "") // url for keystone server
1518 OPTION(rgw_keystone_admin_token, OPT_STR, "") // keystone admin token (shared secret)
1519 OPTION(rgw_keystone_admin_user, OPT_STR, "") // keystone admin user name
1520 OPTION(rgw_keystone_admin_password, OPT_STR, "") // keystone admin user password
1521 OPTION(rgw_keystone_admin_tenant, OPT_STR, "") // keystone admin user tenant (for keystone v2.0)
1522 OPTION(rgw_keystone_admin_project, OPT_STR, "") // keystone admin user project (for keystone v3)
1523 OPTION(rgw_keystone_admin_domain, OPT_STR, "") // keystone admin user domain
1524 OPTION(rgw_keystone_barbican_user, OPT_STR, "") // keystone user to access barbican secrets
1525 OPTION(rgw_keystone_barbican_password, OPT_STR, "") // keystone password for barbican user
1526 OPTION(rgw_keystone_barbican_tenant, OPT_STR, "") // keystone barbican user tenant (for keystone v2.0)
1527 OPTION(rgw_keystone_barbican_project, OPT_STR, "") // keystone barbican user project (for keystone v3)
1528 OPTION(rgw_keystone_barbican_domain, OPT_STR, "") // keystone barbican user domain
1529 OPTION(rgw_keystone_api_version, OPT_INT, 2) // Version of Keystone API to use (2 or 3)
1530 OPTION(rgw_keystone_accepted_roles, OPT_STR, "Member, admin") // roles required to serve requests
1531 OPTION(rgw_keystone_accepted_admin_roles, OPT_STR, "") // list of roles allowing an user to gain admin privileges
1532 OPTION(rgw_keystone_token_cache_size, OPT_INT, 10000) // max number of entries in keystone token cache
1533 OPTION(rgw_keystone_revocation_interval, OPT_INT, 15 * 60) // seconds between tokens revocation check
1534 OPTION(rgw_keystone_verify_ssl, OPT_BOOL, true) // should we try to verify keystone's ssl
1535 OPTION(rgw_keystone_implicit_tenants, OPT_BOOL, false) // create new users in their own tenants of the same name
1536 OPTION(rgw_cross_domain_policy, OPT_STR, "<allow-access-from domain=\"*\" secure=\"false\" />")
1537 OPTION(rgw_healthcheck_disabling_path, OPT_STR, "") // path that existence causes the healthcheck to respond 503
1538 OPTION(rgw_s3_auth_use_rados, OPT_BOOL, true) // should we try to use the internal credentials for s3?
1539 OPTION(rgw_s3_auth_use_keystone, OPT_BOOL, false) // should we try to use keystone for s3?
1540 OPTION(rgw_s3_auth_aws4_force_boto2_compat, OPT_BOOL, true) // force aws4 auth boto2 compatibility
1541 OPTION(rgw_barbican_url, OPT_STR, "") // url for barbican server
1542
1543 /* OpenLDAP-style LDAP parameter strings */
1544 /* rgw_ldap_uri space-separated list of LDAP servers in URI format */
1545 OPTION(rgw_ldap_uri, OPT_STR, "ldaps://<ldap.your.domain>")
1546 /* rgw_ldap_binddn LDAP entry RGW will bind with (user match) */
1547 OPTION(rgw_ldap_binddn, OPT_STR, "uid=admin,cn=users,dc=example,dc=com")
1548 /* rgw_ldap_searchdn LDAP search base (basedn) */
1549 OPTION(rgw_ldap_searchdn, OPT_STR, "cn=users,cn=accounts,dc=example,dc=com")
1550 /* rgw_ldap_dnattr LDAP attribute containing RGW user names (to form binddns)*/
1551 OPTION(rgw_ldap_dnattr, OPT_STR, "uid")
1552 /* rgw_ldap_secret file containing credentials for rgw_ldap_binddn */
1553 OPTION(rgw_ldap_secret, OPT_STR, "/etc/openldap/secret")
1554 /* rgw_s3_auth_use_ldap use LDAP for RGW auth? */
1555 OPTION(rgw_s3_auth_use_ldap, OPT_BOOL, false)
1556 /* rgw_ldap_searchfilter LDAP search filter */
1557 OPTION(rgw_ldap_searchfilter, OPT_STR, "")
1558
1559 OPTION(rgw_admin_entry, OPT_STR, "admin") // entry point for which a url is considered an admin request
1560 OPTION(rgw_enforce_swift_acls, OPT_BOOL, true)
1561 OPTION(rgw_swift_token_expiration, OPT_INT, 24 * 3600) // time in seconds for swift token expiration
1562 OPTION(rgw_print_continue, OPT_BOOL, true) // enable if 100-Continue works
1563 OPTION(rgw_print_prohibited_content_length, OPT_BOOL, false) // violate RFC 7230 and send Content-Length in 204 and 304
1564 OPTION(rgw_remote_addr_param, OPT_STR, "REMOTE_ADDR") // e.g. X-Forwarded-For, if you have a reverse proxy
1565 OPTION(rgw_op_thread_timeout, OPT_INT, 10*60)
1566 OPTION(rgw_op_thread_suicide_timeout, OPT_INT, 0)
1567 OPTION(rgw_thread_pool_size, OPT_INT, 100)
1568 OPTION(rgw_num_control_oids, OPT_INT, 8)
1569 OPTION(rgw_num_rados_handles, OPT_U32, 1)
1570 OPTION(rgw_verify_ssl, OPT_BOOL, true) // should http_client try to verify ssl when sent https request
1571
1572 /* The following are tunables for caches of RGW NFS (and other file
1573 * client) objects.
1574 *
1575 * The file handle cache is a partitioned hash table
1576 * (fhcache_partitions), each with a closed hash part and backing
1577 * b-tree mapping. The number of partions is expected to be a small
1578 * prime, the cache size something larger but less than 5K, the total
1579 * size of the cache is n_part * cache_size.
1580 */
1581 OPTION(rgw_nfs_lru_lanes, OPT_INT, 5)
1582 OPTION(rgw_nfs_lru_lane_hiwat, OPT_INT, 911)
1583 OPTION(rgw_nfs_fhcache_partitions, OPT_INT, 3)
1584 OPTION(rgw_nfs_fhcache_size, OPT_INT, 2017) /* 3*2017=6051 */
1585 OPTION(rgw_nfs_namespace_expire_secs, OPT_INT, 300) /* namespace invalidate
1586 * timer */
1587 OPTION(rgw_nfs_max_gc, OPT_INT, 300) /* max gc events per cycle */
1588 OPTION(rgw_nfs_write_completion_interval_s, OPT_INT, 10) /* stateless (V3)
1589 * commit
1590 * delay */
1591
1592 OPTION(rgw_zone, OPT_STR, "") // zone name
1593 OPTION(rgw_zone_root_pool, OPT_STR, ".rgw.root") // pool where zone specific info is stored
1594 OPTION(rgw_default_zone_info_oid, OPT_STR, "default.zone") // oid where default zone info is stored
1595 OPTION(rgw_region, OPT_STR, "") // region name
1596 OPTION(rgw_region_root_pool, OPT_STR, ".rgw.root") // pool where all region info is stored
1597 OPTION(rgw_default_region_info_oid, OPT_STR, "default.region") // oid where default region info is stored
1598 OPTION(rgw_zonegroup, OPT_STR, "") // zone group name
1599 OPTION(rgw_zonegroup_root_pool, OPT_STR, ".rgw.root") // pool where all zone group info is stored
1600 OPTION(rgw_default_zonegroup_info_oid, OPT_STR, "default.zonegroup") // oid where default zone group info is stored
1601 OPTION(rgw_realm, OPT_STR, "") // realm name
1602 OPTION(rgw_realm_root_pool, OPT_STR, ".rgw.root") // pool where all realm info is stored
1603 OPTION(rgw_default_realm_info_oid, OPT_STR, "default.realm") // oid where default realm info is stored
1604 OPTION(rgw_period_root_pool, OPT_STR, ".rgw.root") // pool where all period info is stored
1605 OPTION(rgw_period_latest_epoch_info_oid, OPT_STR, ".latest_epoch") // oid where current period info is stored
1606 OPTION(rgw_log_nonexistent_bucket, OPT_BOOL, false)
1607 OPTION(rgw_log_object_name, OPT_STR, "%Y-%m-%d-%H-%i-%n") // man date to see codes (a subset are supported)
1608 OPTION(rgw_log_object_name_utc, OPT_BOOL, false)
1609 OPTION(rgw_usage_max_shards, OPT_INT, 32)
1610 OPTION(rgw_usage_max_user_shards, OPT_INT, 1)
1611 OPTION(rgw_enable_ops_log, OPT_BOOL, false) // enable logging every rgw operation
1612 OPTION(rgw_enable_usage_log, OPT_BOOL, false) // enable logging bandwidth usage
1613 OPTION(rgw_ops_log_rados, OPT_BOOL, true) // whether ops log should go to rados
1614 OPTION(rgw_ops_log_socket_path, OPT_STR, "") // path to unix domain socket where ops log can go
1615 OPTION(rgw_ops_log_data_backlog, OPT_INT, 5 << 20) // max data backlog for ops log
1616 OPTION(rgw_fcgi_socket_backlog, OPT_INT, 1024) // socket backlog for fcgi
1617 OPTION(rgw_usage_log_flush_threshold, OPT_INT, 1024) // threshold to flush pending log data
1618 OPTION(rgw_usage_log_tick_interval, OPT_INT, 30) // flush pending log data every X seconds
1619 OPTION(rgw_intent_log_object_name, OPT_STR, "%Y-%m-%d-%i-%n") // man date to see codes (a subset are supported)
1620 OPTION(rgw_intent_log_object_name_utc, OPT_BOOL, false)
1621 OPTION(rgw_init_timeout, OPT_INT, 300) // time in seconds
1622 OPTION(rgw_mime_types_file, OPT_STR, "/etc/mime.types")
1623 OPTION(rgw_gc_max_objs, OPT_INT, 32)
1624 OPTION(rgw_gc_obj_min_wait, OPT_INT, 2 * 3600) // wait time before object may be handled by gc
1625 OPTION(rgw_gc_processor_max_time, OPT_INT, 3600) // total run time for a single gc processor work
1626 OPTION(rgw_gc_processor_period, OPT_INT, 3600) // gc processor cycle time
1627 OPTION(rgw_s3_success_create_obj_status, OPT_INT, 0) // alternative success status response for create-obj (0 - default)
1628 OPTION(rgw_resolve_cname, OPT_BOOL, false) // should rgw try to resolve hostname as a dns cname record
1629 OPTION(rgw_obj_stripe_size, OPT_INT, 4 << 20)
1630 OPTION(rgw_extended_http_attrs, OPT_STR, "") // list of extended attrs that can be set on objects (beyond the default)
1631 OPTION(rgw_exit_timeout_secs, OPT_INT, 120) // how many seconds to wait for process to go down before exiting unconditionally
1632 OPTION(rgw_get_obj_window_size, OPT_INT, 16 << 20) // window size in bytes for single get obj request
1633 OPTION(rgw_get_obj_max_req_size, OPT_INT, 4 << 20) // max length of a single get obj rados op
1634 OPTION(rgw_relaxed_s3_bucket_names, OPT_BOOL, false) // enable relaxed bucket name rules for US region buckets
1635 OPTION(rgw_defer_to_bucket_acls, OPT_STR, "") // if the user has bucket perms, use those before key perms (recurse and full_control)
1636 OPTION(rgw_list_buckets_max_chunk, OPT_INT, 1000) // max buckets to retrieve in a single op when listing user buckets
1637 OPTION(rgw_md_log_max_shards, OPT_INT, 64) // max shards for metadata log
1638 OPTION(rgw_num_zone_opstate_shards, OPT_INT, 128) // max shards for keeping inter-region copy progress info
1639 OPTION(rgw_opstate_ratelimit_sec, OPT_INT, 30) // min time between opstate updates on a single upload (0 for disabling ratelimit)
1640 OPTION(rgw_curl_wait_timeout_ms, OPT_INT, 1000) // timeout for certain curl calls
1641 OPTION(rgw_copy_obj_progress, OPT_BOOL, true) // should dump progress during long copy operations?
1642 OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT, 1024 * 1024) // min bytes between copy progress output
1643 OPTION(rgw_obj_tombstone_cache_size, OPT_INT, 1000) // how many objects in tombstone cache, which is used in multi-zone sync to keep
1644 // track of removed objects' mtime
1645
1646 OPTION(rgw_data_log_window, OPT_INT, 30) // data log entries window (in seconds)
1647 OPTION(rgw_data_log_changes_size, OPT_INT, 1000) // number of in-memory entries to hold for data changes log
1648 OPTION(rgw_data_log_num_shards, OPT_INT, 128) // number of objects to keep data changes log on
1649 OPTION(rgw_data_log_obj_prefix, OPT_STR, "data_log") //
1650 OPTION(rgw_replica_log_obj_prefix, OPT_STR, "replica_log") //
1651
1652 OPTION(rgw_bucket_quota_ttl, OPT_INT, 600) // time for cached bucket stats to be cached within rgw instance
1653 OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE, 0.95) // threshold from which we don't rely on cached info for quota decisions
1654 OPTION(rgw_bucket_quota_cache_size, OPT_INT, 10000) // number of entries in bucket quota cache
1655 OPTION(rgw_bucket_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1656 OPTION(rgw_bucket_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1657
1658 OPTION(rgw_expose_bucket, OPT_BOOL, false) // Return the bucket name in the 'Bucket' response header
1659
1660 OPTION(rgw_frontends, OPT_STR, "civetweb port=7480") // rgw front ends
1661
1662 OPTION(rgw_user_quota_bucket_sync_interval, OPT_INT, 180) // time period for accumulating modified buckets before syncing stats
1663 OPTION(rgw_user_quota_sync_interval, OPT_INT, 3600 * 24) // time period for accumulating modified buckets before syncing entire user stats
1664 OPTION(rgw_user_quota_sync_idle_users, OPT_BOOL, false) // whether stats for idle users be fully synced
1665 OPTION(rgw_user_quota_sync_wait_time, OPT_INT, 3600 * 24) // min time between two full stats sync for non-idle users
1666 OPTION(rgw_user_default_quota_max_objects, OPT_INT, -1) // number of objects allowed
1667 OPTION(rgw_user_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes
1668
1669 OPTION(rgw_multipart_min_part_size, OPT_INT, 5 * 1024 * 1024) // min size for each part (except for last one) in multipart upload
1670 OPTION(rgw_multipart_part_upload_limit, OPT_INT, 10000) // parts limit in multipart upload
1671
1672 OPTION(rgw_max_slo_entries, OPT_INT, 1000) // default number of max entries in slo
1673
1674 OPTION(rgw_olh_pending_timeout_sec, OPT_INT, 3600) // time until we retire a pending olh change
1675 OPTION(rgw_user_max_buckets, OPT_INT, 1000) // global option to set max buckets count for all user
1676
1677 OPTION(rgw_objexp_gc_interval, OPT_U32, 60 * 10) // maximum time between round of expired objects garbage collecting
1678 OPTION(rgw_objexp_time_step, OPT_U32, 4096) // number of seconds for rounding the timestamps
1679 OPTION(rgw_objexp_hints_num_shards, OPT_U32, 127) // maximum number of parts in which the hint index is stored in
1680 OPTION(rgw_objexp_chunk_size, OPT_U32, 100) // maximum number of entries in a single operation when processing objexp data
1681
1682 OPTION(rgw_enable_static_website, OPT_BOOL, false) // enable static website feature
1683 OPTION(rgw_log_http_headers, OPT_STR, "" ) // list of HTTP headers to log when seen, ignores case (e.g., http_x_forwarded_for
1684
1685 OPTION(rgw_num_async_rados_threads, OPT_INT, 32) // num of threads to use for async rados operations
1686 OPTION(rgw_md_notify_interval_msec, OPT_INT, 200) // metadata changes notification interval to followers
1687 OPTION(rgw_run_sync_thread, OPT_BOOL, true) // whether radosgw (not radosgw-admin) spawns the sync thread
1688 OPTION(rgw_sync_lease_period, OPT_INT, 120) // time in second for lease that rgw takes on a specific log (or log shard)
1689 OPTION(rgw_sync_log_trim_interval, OPT_INT, 1200) // time in seconds between attempts to trim sync logs
1690
1691 OPTION(rgw_sync_data_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1692 OPTION(rgw_sync_meta_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1]
1693
1694
1695 OPTION(rgw_period_push_interval, OPT_DOUBLE, 2) // seconds to wait before retrying "period push"
1696 OPTION(rgw_period_push_interval_max, OPT_DOUBLE, 30) // maximum interval after exponential backoff
1697
1698 OPTION(rgw_safe_max_objects_per_shard, OPT_INT, 100*1024) // safe max loading
1699 OPTION(rgw_shard_warning_threshold, OPT_DOUBLE, 90) // pct of safe max
1700 // at which to warn
1701
1702 OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled
1703
1704 OPTION(mgr_module_path, OPT_STR, CEPH_PKGLIBDIR "/mgr") // where to load python modules from
1705 OPTION(mgr_modules, OPT_STR, "restful") // Which modules to load
1706 OPTION(mgr_data, OPT_STR, "/var/lib/ceph/mgr/$cluster-$id") // where to find keyring etc
1707 OPTION(mgr_tick_period, OPT_INT, 2) // How frequently to tick
1708 OPTION(mgr_stats_period, OPT_INT, 5) // How frequently clients send stats
1709 OPTION(mgr_client_bytes, OPT_U64, 128*1048576) // bytes from clients
1710 OPTION(mgr_client_messages, OPT_U64, 512) // messages from clients
1711 OPTION(mgr_osd_bytes, OPT_U64, 512*1048576) // bytes from osds
1712 OPTION(mgr_osd_messages, OPT_U64, 8192) // messages from osds
1713 OPTION(mgr_mds_bytes, OPT_U64, 128*1048576) // bytes from mdss
1714 OPTION(mgr_mds_messages, OPT_U64, 128) // messages from mdss
1715 OPTION(mgr_mon_bytes, OPT_U64, 128*1048576) // bytes from mons
1716 OPTION(mgr_mon_messages, OPT_U64, 128) // messages from mons
1717
1718 OPTION(mgr_connect_retry_interval, OPT_DOUBLE, 1.0)
1719
1720 OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests
1721 OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover
1722 OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR
1723 OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl
1724 OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects
1725 OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms
1726 // defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg=="
1727 OPTION(rgw_crypt_suppress_logs, OPT_BOOL, true) // suppress logs that might print customer key
1728 OPTION(rgw_list_bucket_min_readahead, OPT_INT, 1000) // minimum number of entries to read from rados for bucket listing
1729
1730 OPTION(rgw_rest_getusage_op_compat, OPT_BOOL, false) // dump description of total stats for s3 GetUsage API
1731
1732 OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter
1733 OPTION(throttler_perf_counter, OPT_BOOL, true) // enable/disable throttler perf counter
1734
1735 /* The following are tunables for torrent data */
1736 OPTION(rgw_torrent_flag, OPT_BOOL, false) // produce torrent function flag
1737 OPTION(rgw_torrent_tracker, OPT_STR, "") // torrent field annouce and annouce list
1738 OPTION(rgw_torrent_createby, OPT_STR, "") // torrent field created by
1739 OPTION(rgw_torrent_comment, OPT_STR, "") // torrent field comment
1740 OPTION(rgw_torrent_encoding, OPT_STR, "") // torrent field encoding
1741 OPTION(rgw_torrent_origin, OPT_STR, "") // torrent origin
1742 OPTION(rgw_torrent_sha_unit, OPT_INT, 512*1024) // torrent field piece length 512K
1743
1744 OPTION(event_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
1745
1746 // This will be set to true when it is safe to start threads.
1747 // Once it is true, it will never change.
1748 OPTION(internal_safe_to_start_threads, OPT_BOOL, false)
1749
1750 OPTION(debug_deliberately_leak_memory, OPT_BOOL, false)
1751
1752 OPTION(rgw_swift_custom_header, OPT_STR, "") // option to enable swift custom headers
1753
1754 /* resharding tunables */
1755 OPTION(rgw_reshard_num_logs, OPT_INT, 16)
1756 OPTION(rgw_reshard_bucket_lock_duration, OPT_INT, 120) // duration of lock on bucket obj during resharding
1757 OPTION(rgw_dynamic_resharding, OPT_BOOL, true)
1758 OPTION(rgw_max_objs_per_shard, OPT_INT, 100000)
1759 OPTION(rgw_reshard_thread_interval, OPT_U32, 60 * 10) // maximum time between rounds of reshard thread processing