]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | /* note: no header guard */ | |
16 | OPTION(host, OPT_STR, "") // "" means that ceph will use short hostname | |
17 | OPTION(fsid, OPT_UUID, uuid_d()) | |
18 | OPTION(public_addr, OPT_ADDR, entity_addr_t()) | |
19 | OPTION(cluster_addr, OPT_ADDR, entity_addr_t()) | |
20 | OPTION(public_network, OPT_STR, "") | |
21 | OPTION(cluster_network, OPT_STR, "") | |
22 | OPTION(num_client, OPT_INT, 1) | |
23 | OPTION(monmap, OPT_STR, "") | |
24 | OPTION(mon_host, OPT_STR, "") | |
25 | OPTION(mon_dns_srv_name, OPT_STR, "ceph-mon") | |
26 | OPTION(lockdep, OPT_BOOL, false) | |
27 | OPTION(lockdep_force_backtrace, OPT_BOOL, false) // always gather current backtrace at every lock | |
28 | OPTION(run_dir, OPT_STR, "/var/run/ceph") // the "/var/run/ceph" dir, created on daemon startup | |
29 | OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit() | |
30 | OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755" | |
31 | OPTION(crushtool, OPT_STR, "crushtool") // crushtool utility path | |
32 | ||
33 | OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit() | |
34 | OPTION(setuser, OPT_STR, "") // uid or user name | |
35 | OPTION(setgroup, OPT_STR, "") // gid or group name | |
36 | OPTION(setuser_match_path, OPT_STR, "") // make setuser/group conditional on this path matching ownership | |
37 | OPTION(pid_file, OPT_STR, "") // default changed by common_preinit() | |
38 | OPTION(chdir, OPT_STR, "/") | |
39 | OPTION(max_open_files, OPT_LONGLONG, 0) | |
40 | OPTION(restapi_log_level, OPT_STR, "") // default set by Python code | |
41 | OPTION(restapi_base_url, OPT_STR, "") // " | |
42 | OPTION(fatal_signal_handlers, OPT_BOOL, true) | |
43 | SAFE_OPTION(erasure_code_dir, OPT_STR, CEPH_PKGLIBDIR"/erasure-code") // default location for erasure-code plugins | |
44 | ||
45 | OPTION(log_file, OPT_STR, "/var/log/ceph/$cluster-$name.log") // default changed by common_preinit() | |
46 | OPTION(log_max_new, OPT_INT, 1000) // default changed by common_preinit() | |
47 | OPTION(log_max_recent, OPT_INT, 10000) // default changed by common_preinit() | |
48 | OPTION(log_to_stderr, OPT_BOOL, true) // default changed by common_preinit() | |
49 | OPTION(err_to_stderr, OPT_BOOL, true) // default changed by common_preinit() | |
50 | OPTION(log_to_syslog, OPT_BOOL, false) | |
51 | OPTION(err_to_syslog, OPT_BOOL, false) | |
52 | OPTION(log_flush_on_exit, OPT_BOOL, true) // default changed by common_preinit() | |
53 | OPTION(log_stop_at_utilization, OPT_FLOAT, .97) // stop logging at (near) full | |
54 | OPTION(log_to_graylog, OPT_BOOL, false) | |
55 | OPTION(err_to_graylog, OPT_BOOL, false) | |
56 | OPTION(log_graylog_host, OPT_STR, "127.0.0.1") | |
57 | OPTION(log_graylog_port, OPT_INT, 12201) | |
58 | ||
59 | // options will take k/v pairs, or single-item that will be assumed as general | |
60 | // default for all, regardless of channel. | |
61 | // e.g., "info" would be taken as the same as "default=info" | |
62 | // also, "default=daemon audit=local0" would mean | |
63 | // "default all to 'daemon', override 'audit' with 'local0' | |
64 | OPTION(clog_to_monitors, OPT_STR, "default=true") | |
65 | OPTION(clog_to_syslog, OPT_STR, "false") | |
66 | OPTION(clog_to_syslog_level, OPT_STR, "info") // this level and above | |
67 | OPTION(clog_to_syslog_facility, OPT_STR, "default=daemon audit=local0") | |
68 | OPTION(clog_to_graylog, OPT_STR, "false") | |
69 | OPTION(clog_to_graylog_host, OPT_STR, "127.0.0.1") | |
70 | OPTION(clog_to_graylog_port, OPT_STR, "12201") | |
71 | ||
72 | OPTION(mon_cluster_log_to_syslog, OPT_STR, "default=false") | |
73 | OPTION(mon_cluster_log_to_syslog_level, OPT_STR, "info") // this level and above | |
74 | OPTION(mon_cluster_log_to_syslog_facility, OPT_STR, "daemon") | |
75 | OPTION(mon_cluster_log_file, OPT_STR, | |
76 | "default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log") | |
77 | OPTION(mon_cluster_log_file_level, OPT_STR, "info") | |
78 | OPTION(mon_cluster_log_to_graylog, OPT_STR, "false") | |
79 | OPTION(mon_cluster_log_to_graylog_host, OPT_STR, "127.0.0.1") | |
80 | OPTION(mon_cluster_log_to_graylog_port, OPT_STR, "12201") | |
81 | ||
82 | OPTION(enable_experimental_unrecoverable_data_corrupting_features, OPT_STR, "") | |
83 | ||
84 | SAFE_OPTION(plugin_dir, OPT_STR, CEPH_PKGLIBDIR) | |
85 | ||
86 | OPTION(xio_trace_mempool, OPT_BOOL, false) // mempool allocation counters | |
87 | OPTION(xio_trace_msgcnt, OPT_BOOL, false) // incoming/outgoing msg counters | |
88 | OPTION(xio_trace_xcon, OPT_BOOL, false) // Xio message encode/decode trace | |
89 | OPTION(xio_queue_depth, OPT_INT, 128) // depth of Accelio msg queue | |
90 | OPTION(xio_mp_min, OPT_INT, 128) // default min mempool size | |
91 | OPTION(xio_mp_max_64, OPT_INT, 65536) // max 64-byte chunks (buffer is 40) | |
92 | OPTION(xio_mp_max_256, OPT_INT, 8192) // max 256-byte chunks | |
93 | OPTION(xio_mp_max_1k, OPT_INT, 8192) // max 1K chunks | |
94 | OPTION(xio_mp_max_page, OPT_INT, 4096) // max 1K chunks | |
95 | OPTION(xio_mp_max_hint, OPT_INT, 4096) // max size-hint chunks | |
96 | OPTION(xio_portal_threads, OPT_INT, 2) // xio portal threads per messenger | |
97 | OPTION(xio_max_conns_per_portal, OPT_INT, 32) // max xio_connections per portal/ctx | |
98 | OPTION(xio_transport_type, OPT_STR, "rdma") // xio transport type: {rdma or tcp} | |
99 | OPTION(xio_max_send_inline, OPT_INT, 512) // xio maximum threshold to send inline | |
100 | ||
101 | OPTION(compressor_zlib_isal, OPT_BOOL, false) | |
102 | OPTION(compressor_zlib_level, OPT_INT, 5) //regular zlib compression level, not applicable to isa-l optimized version | |
103 | ||
104 | OPTION(async_compressor_enabled, OPT_BOOL, false) | |
105 | OPTION(async_compressor_type, OPT_STR, "snappy") | |
106 | OPTION(async_compressor_threads, OPT_INT, 2) | |
107 | OPTION(async_compressor_thread_timeout, OPT_INT, 5) | |
108 | OPTION(async_compressor_thread_suicide_timeout, OPT_INT, 30) | |
109 | ||
110 | OPTION(plugin_crypto_accelerator, OPT_STR, "crypto_isal") | |
111 | ||
112 | OPTION(mempool_debug, OPT_BOOL, false) | |
113 | ||
114 | DEFAULT_SUBSYS(0, 5) | |
115 | SUBSYS(lockdep, 0, 1) | |
116 | SUBSYS(context, 0, 1) | |
117 | SUBSYS(crush, 1, 1) | |
118 | SUBSYS(mds, 1, 5) | |
119 | SUBSYS(mds_balancer, 1, 5) | |
120 | SUBSYS(mds_locker, 1, 5) | |
121 | SUBSYS(mds_log, 1, 5) | |
122 | SUBSYS(mds_log_expire, 1, 5) | |
123 | SUBSYS(mds_migrator, 1, 5) | |
124 | SUBSYS(buffer, 0, 1) | |
125 | SUBSYS(timer, 0, 1) | |
126 | SUBSYS(filer, 0, 1) | |
127 | SUBSYS(striper, 0, 1) | |
128 | SUBSYS(objecter, 0, 1) | |
129 | SUBSYS(rados, 0, 5) | |
130 | SUBSYS(rbd, 0, 5) | |
131 | SUBSYS(rbd_mirror, 0, 5) | |
132 | SUBSYS(rbd_replay, 0, 5) | |
133 | SUBSYS(journaler, 0, 5) | |
134 | SUBSYS(objectcacher, 0, 5) | |
135 | SUBSYS(client, 0, 5) | |
136 | SUBSYS(osd, 1, 5) | |
137 | SUBSYS(optracker, 0, 5) | |
138 | SUBSYS(objclass, 0, 5) | |
139 | SUBSYS(filestore, 1, 3) | |
140 | SUBSYS(journal, 1, 3) | |
141 | SUBSYS(ms, 0, 5) | |
142 | SUBSYS(mon, 1, 5) | |
143 | SUBSYS(monc, 0, 10) | |
144 | SUBSYS(paxos, 1, 5) | |
145 | SUBSYS(tp, 0, 5) | |
146 | SUBSYS(auth, 1, 5) | |
147 | SUBSYS(crypto, 1, 5) | |
148 | SUBSYS(finisher, 1, 1) | |
149 | SUBSYS(heartbeatmap, 1, 5) | |
150 | SUBSYS(perfcounter, 1, 5) | |
151 | SUBSYS(rgw, 1, 5) // log level for the Rados gateway | |
152 | SUBSYS(civetweb, 1, 10) | |
153 | SUBSYS(javaclient, 1, 5) | |
154 | SUBSYS(asok, 1, 5) | |
155 | SUBSYS(throttle, 1, 1) | |
156 | SUBSYS(refs, 0, 0) | |
157 | SUBSYS(xio, 1, 5) | |
158 | SUBSYS(compressor, 1, 5) | |
159 | SUBSYS(bluestore, 1, 5) | |
160 | SUBSYS(bluefs, 1, 5) | |
161 | SUBSYS(bdev, 1, 3) | |
162 | SUBSYS(kstore, 1, 5) | |
163 | SUBSYS(rocksdb, 4, 5) | |
164 | SUBSYS(leveldb, 4, 5) | |
165 | SUBSYS(memdb, 4, 5) | |
166 | SUBSYS(kinetic, 1, 5) | |
167 | SUBSYS(fuse, 1, 5) | |
168 | SUBSYS(mgr, 1, 5) | |
169 | SUBSYS(mgrc, 1, 5) | |
170 | SUBSYS(dpdk, 1, 5) | |
171 | SUBSYS(eventtrace, 1, 5) | |
172 | ||
173 | OPTION(key, OPT_STR, "") | |
174 | OPTION(keyfile, OPT_STR, "") | |
175 | OPTION(keyring, OPT_STR, | |
176 | // default changed by common_preinit() for mds and osd | |
177 | "/etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin," | |
178 | #if defined(__FreeBSD) | |
179 | "/usr/local/etc/ceph/$cluster.$name.keyring,/usr/local/etc/ceph/$cluster.keyring," | |
180 | "/usr/local/etc/ceph/keyring,/usr/local/etc/ceph/keyring.bin," | |
181 | #endif | |
182 | ) | |
183 | OPTION(heartbeat_interval, OPT_INT, 5) | |
184 | OPTION(heartbeat_file, OPT_STR, "") | |
185 | OPTION(heartbeat_inject_failure, OPT_INT, 0) // force an unhealthy heartbeat for N seconds | |
186 | OPTION(perf, OPT_BOOL, true) // enable internal perf counters | |
187 | ||
188 | SAFE_OPTION(ms_type, OPT_STR, "async+posix") // messenger backend. It will be modified in runtime, so use SAFE_OPTION | |
189 | OPTION(ms_public_type, OPT_STR, "") // messenger backend | |
190 | OPTION(ms_cluster_type, OPT_STR, "") // messenger backend | |
191 | OPTION(ms_tcp_nodelay, OPT_BOOL, true) | |
192 | OPTION(ms_tcp_rcvbuf, OPT_INT, 0) | |
193 | OPTION(ms_tcp_prefetch_max_size, OPT_INT, 4096) // max prefetch size, we limit this to avoid extra memcpy | |
194 | OPTION(ms_initial_backoff, OPT_DOUBLE, .2) | |
195 | OPTION(ms_max_backoff, OPT_DOUBLE, 15.0) | |
196 | OPTION(ms_crc_data, OPT_BOOL, true) | |
197 | OPTION(ms_crc_header, OPT_BOOL, true) | |
198 | OPTION(ms_die_on_bad_msg, OPT_BOOL, false) | |
199 | OPTION(ms_die_on_unhandled_msg, OPT_BOOL, false) | |
200 | OPTION(ms_die_on_old_message, OPT_BOOL, false) // assert if we get a dup incoming message and shouldn't have (may be triggered by pre-541cd3c64be0dfa04e8a2df39422e0eb9541a428 code) | |
201 | OPTION(ms_die_on_skipped_message, OPT_BOOL, false) // assert if we skip a seq (kernel client does this intentionally) | |
202 | OPTION(ms_dispatch_throttle_bytes, OPT_U64, 100 << 20) | |
203 | OPTION(ms_bind_ipv6, OPT_BOOL, false) | |
204 | OPTION(ms_bind_port_min, OPT_INT, 6800) | |
205 | OPTION(ms_bind_port_max, OPT_INT, 7300) | |
206 | #if !defined(__FreeBSD__) | |
207 | OPTION(ms_bind_retry_count, OPT_INT, 3) // If binding fails, how many times do we retry to bind | |
208 | OPTION(ms_bind_retry_delay, OPT_INT, 5) // Delay between attemps to bind | |
209 | #else | |
210 | // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default | |
211 | OPTION(ms_bind_retry_count, OPT_INT, 6) // If binding fails, how many times do we retry to bind | |
212 | OPTION(ms_bind_retry_delay, OPT_INT, 6) // Delay between attemps to bind | |
213 | #endif | |
214 | OPTION(ms_bind_before_connect, OPT_BOOL, true) | |
215 | OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10) | |
216 | OPTION(ms_tcp_read_timeout, OPT_U64, 900) | |
217 | OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216) | |
218 | OPTION(ms_pq_min_cost, OPT_U64, 65536) | |
219 | OPTION(ms_inject_socket_failures, OPT_U64, 0) | |
220 | SAFE_OPTION(ms_inject_delay_type, OPT_STR, "") // "osd mds mon client" allowed | |
221 | OPTION(ms_inject_delay_msg_type, OPT_STR, "") // the type of message to delay, as returned by Message::get_type_name(). This is an additional restriction on the general type filter ms_inject_delay_type. | |
222 | OPTION(ms_inject_delay_max, OPT_DOUBLE, 1) // seconds | |
223 | OPTION(ms_inject_delay_probability, OPT_DOUBLE, 0) // range [0, 1] | |
224 | OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0) // seconds | |
225 | OPTION(ms_dump_on_send, OPT_BOOL, false) // hexdump msg to log on send | |
226 | OPTION(ms_dump_corrupt_message_level, OPT_INT, 1) // debug level to hexdump undecodeable messages at | |
227 | OPTION(ms_async_op_threads, OPT_U64, 3) // number of worker processing threads for async messenger created on init | |
228 | OPTION(ms_async_max_op_threads, OPT_U64, 5) // max number of worker processing threads for async messenger | |
229 | OPTION(ms_async_set_affinity, OPT_BOOL, true) | |
230 | // example: ms_async_affinity_cores = 0,1 | |
231 | // The number of coreset is expected to equal to ms_async_op_threads, otherwise | |
232 | // extra op threads will loop ms_async_affinity_cores again. | |
233 | // If ms_async_affinity_cores is empty, all threads will be bind to current running | |
234 | // core | |
235 | OPTION(ms_async_affinity_cores, OPT_STR, "") | |
236 | OPTION(ms_async_send_inline, OPT_BOOL, false) | |
237 | OPTION(ms_async_rdma_device_name, OPT_STR, "") | |
238 | OPTION(ms_async_rdma_enable_hugepage, OPT_BOOL, false) | |
239 | OPTION(ms_async_rdma_buffer_size, OPT_INT, 128 << 10) | |
240 | OPTION(ms_async_rdma_send_buffers, OPT_U32, 1024) | |
241 | OPTION(ms_async_rdma_receive_buffers, OPT_U32, 1024) | |
242 | OPTION(ms_async_rdma_port_num, OPT_U32, 1) | |
243 | OPTION(ms_async_rdma_polling_us, OPT_U32, 1000) | |
244 | OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding | |
245 | OPTION(ms_async_rdma_roce_ver, OPT_INT, 1) // 0=RoCEv1, 1=RoCEv2, 2=RoCEv1.5 | |
246 | OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP | |
247 | ||
248 | OPTION(ms_dpdk_port_id, OPT_INT, 0) | |
249 | SAFE_OPTION(ms_dpdk_coremask, OPT_STR, "1") // it is modified in unittest so that use SAFE_OPTION to declare | |
250 | OPTION(ms_dpdk_memory_channel, OPT_STR, "4") | |
251 | OPTION(ms_dpdk_hugepages, OPT_STR, "") | |
252 | OPTION(ms_dpdk_pmd, OPT_STR, "") | |
253 | SAFE_OPTION(ms_dpdk_host_ipv4_addr, OPT_STR, "") | |
254 | SAFE_OPTION(ms_dpdk_gateway_ipv4_addr, OPT_STR, "") | |
255 | SAFE_OPTION(ms_dpdk_netmask_ipv4_addr, OPT_STR, "") | |
256 | OPTION(ms_dpdk_lro, OPT_BOOL, true) | |
257 | OPTION(ms_dpdk_hw_flow_control, OPT_BOOL, true) | |
258 | // Weighing of a hardware network queue relative to a software queue (0=no work, 1= equal share)") | |
259 | OPTION(ms_dpdk_hw_queue_weight, OPT_FLOAT, 1) | |
260 | OPTION(ms_dpdk_debug_allow_loopback, OPT_BOOL, false) | |
261 | OPTION(ms_dpdk_rx_buffer_count_per_core, OPT_INT, 8192) | |
262 | ||
263 | OPTION(inject_early_sigterm, OPT_BOOL, false) | |
264 | ||
265 | OPTION(mon_data, OPT_STR, "/var/lib/ceph/mon/$cluster-$id") | |
266 | OPTION(mon_initial_members, OPT_STR, "") // list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster | |
267 | OPTION(mon_sync_fs_threshold, OPT_INT, 5) // sync() when writing this many objects; 0 to disable. | |
268 | OPTION(mon_compact_on_start, OPT_BOOL, false) // compact leveldb on ceph-mon start | |
269 | OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction on bootstrap | |
270 | OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states | |
271 | OPTION(mon_osd_cache_size, OPT_INT, 10) // the size of osdmaps cache, not to rely on underlying store's cache | |
272 | ||
273 | OPTION(mon_cpu_threads, OPT_INT, 4) | |
274 | OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT, 4096) | |
275 | OPTION(mon_tick_interval, OPT_INT, 5) | |
276 | OPTION(mon_session_timeout, OPT_INT, 300) // must send keepalive or subscribe | |
277 | OPTION(mon_subscribe_interval, OPT_DOUBLE, 24*3600) // for legacy clients only | |
278 | OPTION(mon_delta_reset_interval, OPT_DOUBLE, 10) // seconds of inactivity before we reset the pg delta to 0 | |
279 | OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay | |
280 | OPTION(mon_osd_laggy_weight, OPT_DOUBLE, .3) // weight for new 'samples's in laggy estimations | |
281 | OPTION(mon_osd_laggy_max_interval, OPT_INT, 300) // maximum value of laggy_interval in laggy estimations | |
282 | OPTION(mon_osd_adjust_heartbeat_grace, OPT_BOOL, true) // true if we should scale based on laggy estimations | |
283 | OPTION(mon_osd_adjust_down_out_interval, OPT_BOOL, true) // true if we should scale based on laggy estimations | |
284 | OPTION(mon_osd_auto_mark_in, OPT_BOOL, false) // mark any booting osds 'in' | |
285 | OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in' | |
286 | OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true) // mark booting new osds 'in' | |
287 | OPTION(mon_osd_down_out_interval, OPT_INT, 600) // seconds | |
288 | OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // smallest crush unit/type that we will not automatically mark out | |
289 | OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down | |
290 | OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .75) // min osds required to be in to mark things out | |
291 | OPTION(mon_osd_max_op_age, OPT_DOUBLE, 32) // max op age before we get concerned (make it a power of 2) | |
292 | OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create | |
293 | OPTION(mon_osd_allow_primary_temp, OPT_BOOL, false) // allow primary_temp to be set in the osdmap | |
294 | OPTION(mon_osd_allow_primary_affinity, OPT_BOOL, false) // allow primary_affinity to be set in the osdmap | |
295 | OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes | |
296 | OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming | |
297 | OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT, .25) // max estimate of pg total before we do all pgs in parallel | |
298 | OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL, false) // whether turn on fast read on the pool or not | |
299 | OPTION(mon_stat_smooth_intervals, OPT_INT, 6) // smooth stats over last N PGMap maps | |
300 | OPTION(mon_election_timeout, OPT_FLOAT, 5) // on election proposer, max waiting time for all ACKs | |
301 | OPTION(mon_lease, OPT_FLOAT, 5) // lease interval | |
302 | OPTION(mon_lease_renew_interval_factor, OPT_FLOAT, .6) // on leader, to renew the lease | |
303 | OPTION(mon_lease_ack_timeout_factor, OPT_FLOAT, 2.0) // on leader, if lease isn't acked by all peons | |
304 | OPTION(mon_accept_timeout_factor, OPT_FLOAT, 2.0) // on leader, if paxos update isn't accepted | |
305 | ||
306 | OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between monitors | |
307 | OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings | |
308 | OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds) | |
309 | OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds) | |
310 | OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info) | |
311 | OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR. | |
312 | OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin | |
313 | OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin | |
314 | OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg | |
315 | OPTION(mon_pg_warn_min_objects, OPT_INT, 10000) // do not warn below this object # | |
316 | OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools below this object # | |
317 | OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT, .5) // threshold of down osds after which we check all pgs | |
318 | OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT, .66) // position between pool cache_target_full and max where we start warning | |
319 | OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full" | |
320 | OPTION(mon_osd_backfillfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD backfill full (backfill halted) | |
321 | OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full | |
322 | OPTION(mon_osd_initial_require_min_compat_client, OPT_STR, "hammer") | |
323 | OPTION(mon_allow_pool_delete, OPT_BOOL, false) // allow pool deletion | |
324 | OPTION(mon_fake_pool_delete, OPT_BOOL, false) // fake pool deletion (add _DELETED suffix) | |
325 | OPTION(mon_globalid_prealloc, OPT_U32, 10000) // how many globalids to prealloc | |
326 | OPTION(mon_osd_report_timeout, OPT_INT, 900) // grace period before declaring unresponsive OSDs dead | |
327 | OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-replay mds to be active | |
328 | OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL, true) // warn if crush tunables are too old (older than mon_min_crush_required_version) | |
329 | OPTION(mon_crush_min_required_version, OPT_STR, "firefly") | |
330 | OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0 | |
331 | OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0' | |
332 | OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true) | |
333 | OPTION(mon_warn_osd_usage_min_max_delta, OPT_FLOAT, .40) // warn if difference between min and max OSD utilizations exceeds specified amount | |
334 | OPTION(mon_min_osdmap_epochs, OPT_INT, 500) | |
335 | OPTION(mon_max_pgmap_epochs, OPT_INT, 500) | |
336 | OPTION(mon_max_log_epochs, OPT_INT, 500) | |
337 | OPTION(mon_max_mdsmap_epochs, OPT_INT, 500) | |
338 | OPTION(mon_max_osd, OPT_INT, 10000) | |
339 | OPTION(mon_probe_timeout, OPT_DOUBLE, 2.0) | |
340 | OPTION(mon_slurp_timeout, OPT_DOUBLE, 10.0) | |
341 | OPTION(mon_slurp_bytes, OPT_INT, 256*1024) // limit size of slurp messages | |
342 | OPTION(mon_client_bytes, OPT_U64, 100ul << 20) // client msg data allowed in memory (in bytes) | |
343 | OPTION(mon_mgr_proxy_client_bytes_ratio, OPT_FLOAT, .3) // ratio of mon_client_bytes that can be consumed by proxied mgr commands before we error out to client | |
344 | OPTION(mon_daemon_bytes, OPT_U64, 400ul << 20) // mds, osd message memory cap (in bytes) | |
345 | OPTION(mon_max_log_entries_per_event, OPT_INT, 4096) | |
346 | OPTION(mon_reweight_min_pgs_per_osd, OPT_U64, 10) // min pgs per osd for reweight-by-pg command | |
347 | OPTION(mon_reweight_min_bytes_per_osd, OPT_U64, 100*1024*1024) // min bytes per osd for reweight-by-utilization command | |
348 | OPTION(mon_reweight_max_osds, OPT_INT, 4) // max osds to change per reweight-by-* command | |
349 | OPTION(mon_reweight_max_change, OPT_DOUBLE, 0.05) | |
350 | OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0) | |
351 | OPTION(mon_health_to_clog, OPT_BOOL, true) | |
352 | OPTION(mon_health_to_clog_interval, OPT_INT, 3600) | |
353 | OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0) | |
354 | OPTION(mon_data_avail_crit, OPT_INT, 5) | |
355 | OPTION(mon_data_avail_warn, OPT_INT, 30) | |
356 | OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes) | |
357 | OPTION(mon_warn_not_scrubbed, OPT_INT, 0) | |
358 | OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0) | |
359 | OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day | |
360 | OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not. | |
361 | OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time | |
362 | OPTION(mon_scrub_inject_crc_mismatch, OPT_DOUBLE, 0.0) // probability of injected crc mismatch [0.0, 1.0] | |
363 | OPTION(mon_scrub_inject_missing_keys, OPT_DOUBLE, 0.0) // probability of injected missing keys [0.0, 1.0] | |
364 | OPTION(mon_config_key_max_entry_size, OPT_INT, 4096) // max num bytes per config-key entry | |
365 | OPTION(mon_sync_timeout, OPT_DOUBLE, 60.0) | |
366 | OPTION(mon_sync_max_payload_size, OPT_U32, 1048576) // max size for a sync chunk payload (say, 1MB) | |
367 | OPTION(mon_sync_debug, OPT_BOOL, false) // enable sync-specific debug | |
368 | OPTION(mon_sync_debug_leader, OPT_INT, -1) // monitor to be used as the sync leader | |
369 | OPTION(mon_sync_debug_provider, OPT_INT, -1) // monitor to be used as the sync provider | |
370 | OPTION(mon_sync_debug_provider_fallback, OPT_INT, -1) // monitor to be used as fallback if sync provider fails | |
371 | OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE, 0) // inject N second delay on each get_chunk request | |
372 | OPTION(mon_osd_min_down_reporters, OPT_INT, 2) // number of OSDs from different subtrees who need to report a down OSD for it to count | |
373 | OPTION(mon_osd_reporter_subtree_level , OPT_STR, "host") // in which level of parent bucket the reporters are counted | |
374 | OPTION(mon_osd_force_trim_to, OPT_INT, 0) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous, use with care) | |
375 | OPTION(mon_mds_force_trim_to, OPT_INT, 0) // force mon to trim mdsmaps to this point (dangerous, use with care) | |
376 | OPTION(mon_mds_skip_sanity, OPT_BOOL, false) // skip safety assertions on FSMap (in case of bugs where we want to continue anyway) | |
377 | ||
378 | // monitor debug options | |
379 | OPTION(mon_debug_deprecated_as_obsolete, OPT_BOOL, false) // consider deprecated commands as obsolete | |
380 | ||
381 | // dump transactions | |
382 | OPTION(mon_debug_dump_transactions, OPT_BOOL, false) | |
383 | OPTION(mon_debug_dump_json, OPT_BOOL, false) | |
384 | OPTION(mon_debug_dump_location, OPT_STR, "/var/log/ceph/$cluster-$name.tdump") | |
385 | OPTION(mon_debug_no_require_luminous, OPT_BOOL, false) | |
386 | OPTION(mon_inject_transaction_delay_max, OPT_DOUBLE, 10.0) // seconds | |
387 | OPTION(mon_inject_transaction_delay_probability, OPT_DOUBLE, 0) // range [0, 1] | |
388 | ||
389 | OPTION(mon_sync_provider_kill_at, OPT_INT, 0) // kill the sync provider at a specific point in the work flow | |
390 | OPTION(mon_sync_requester_kill_at, OPT_INT, 0) // kill the sync requester at a specific point in the work flow | |
391 | OPTION(mon_force_quorum_join, OPT_BOOL, false) // force monitor to join quorum even if it has been previously removed from the map | |
392 | OPTION(mon_keyvaluedb, OPT_STR, "rocksdb") // type of keyvaluedb backend | |
393 | ||
394 | // UNSAFE -- TESTING ONLY! Allows addition of a cache tier with preexisting snaps | |
395 | OPTION(mon_debug_unsafe_allow_tier_with_nonempty_snaps, OPT_BOOL, false) | |
396 | ||
397 | OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state | |
398 | OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores | |
399 | OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long before proposing a map update | |
400 | OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity | |
401 | OPTION(paxos_min, OPT_INT, 500) // minimum number of paxos states to keep around | |
402 | OPTION(paxos_trim_min, OPT_INT, 250) // number of extra proposals tolerated before trimming | |
403 | OPTION(paxos_trim_max, OPT_INT, 500) // max number of extra proposals to trim at a time | |
404 | OPTION(paxos_service_trim_min, OPT_INT, 250) // minimum amount of versions to trigger a trim (0 disables it) | |
405 | OPTION(paxos_service_trim_max, OPT_INT, 500) // maximum amount of versions to trim during a single proposal (0 disables it) | |
406 | OPTION(paxos_kill_at, OPT_INT, 0) | |
407 | OPTION(auth_cluster_required, OPT_STR, "cephx") // required of mon, mds, osd daemons | |
408 | OPTION(auth_service_required, OPT_STR, "cephx") // required by daemons of clients | |
409 | OPTION(auth_client_required, OPT_STR, "cephx, none") // what clients require of daemons | |
410 | OPTION(auth_supported, OPT_STR, "") // deprecated; default value for above if they are not defined. | |
411 | OPTION(max_rotating_auth_attempts, OPT_INT, 10) | |
412 | OPTION(cephx_require_signatures, OPT_BOOL, false) // If true, don't talk to Cephx partners if they don't support message signing; off by default | |
413 | OPTION(cephx_cluster_require_signatures, OPT_BOOL, false) | |
414 | OPTION(cephx_service_require_signatures, OPT_BOOL, false) | |
415 | OPTION(cephx_sign_messages, OPT_BOOL, true) // Default to signing session messages if supported | |
416 | OPTION(auth_mon_ticket_ttl, OPT_DOUBLE, 60*60*12) | |
417 | OPTION(auth_service_ticket_ttl, OPT_DOUBLE, 60*60) | |
418 | OPTION(auth_debug, OPT_BOOL, false) // if true, assert when weird things happen | |
419 | OPTION(mon_client_hunt_parallel, OPT_U32, 2) // how many mons to try to connect to in parallel during hunt | |
420 | OPTION(mon_client_hunt_interval, OPT_DOUBLE, 3.0) // try new mon every N seconds until we connect | |
421 | OPTION(mon_client_ping_interval, OPT_DOUBLE, 10.0) // ping every N seconds | |
422 | OPTION(mon_client_ping_timeout, OPT_DOUBLE, 30.0) // fail if we don't hear back | |
423 | OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE, 2.0) // each time we reconnect to a monitor, double our timeout | |
424 | OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE, 10.0) // up to a max of 10*default (30 seconds) | |
425 | OPTION(mon_client_max_log_entries_per_message, OPT_INT, 1000) | |
426 | OPTION(mon_max_pool_pg_num, OPT_INT, 65536) | |
427 | OPTION(mon_pool_quota_warn_threshold, OPT_INT, 0) // percent of quota at which to issue warnings | |
428 | OPTION(mon_pool_quota_crit_threshold, OPT_INT, 0) // percent of quota at which to issue errors | |
429 | OPTION(client_cache_size, OPT_INT, 16384) | |
430 | OPTION(client_cache_mid, OPT_FLOAT, .75) | |
431 | OPTION(client_use_random_mds, OPT_BOOL, false) | |
432 | OPTION(client_mount_timeout, OPT_DOUBLE, 300.0) | |
433 | OPTION(client_tick_interval, OPT_DOUBLE, 1.0) | |
434 | OPTION(client_trace, OPT_STR, "") | |
435 | OPTION(client_readahead_min, OPT_LONGLONG, 128*1024) // readahead at _least_ this much. | |
436 | OPTION(client_readahead_max_bytes, OPT_LONGLONG, 0) // default unlimited | |
437 | OPTION(client_readahead_max_periods, OPT_LONGLONG, 4) // as multiple of file layout period (object size * num stripes) | |
438 | OPTION(client_reconnect_stale, OPT_BOOL, false) // automatically reconnect stale session | |
439 | OPTION(client_snapdir, OPT_STR, ".snap") | |
440 | OPTION(client_mountpoint, OPT_STR, "/") | |
441 | OPTION(client_mount_uid, OPT_INT, -1) | |
442 | OPTION(client_mount_gid, OPT_INT, -1) | |
443 | OPTION(client_notify_timeout, OPT_INT, 10) // in seconds | |
444 | OPTION(osd_client_watch_timeout, OPT_INT, 30) // in seconds | |
445 | OPTION(client_caps_release_delay, OPT_INT, 5) // in seconds | |
446 | OPTION(client_quota_df, OPT_BOOL, true) // use quota for df on subdir mounts | |
447 | OPTION(client_oc, OPT_BOOL, true) | |
448 | OPTION(client_oc_size, OPT_INT, 1024*1024* 200) // MB * n | |
449 | OPTION(client_oc_max_dirty, OPT_INT, 1024*1024* 100) // MB * n (dirty OR tx.. bigish) | |
450 | OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this smallish) | |
451 | OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback | |
452 | OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache | |
453 | OPTION(client_debug_getattr_caps, OPT_BOOL, false) // check if MDS reply contains wanted caps | |
454 | OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds) | |
455 | OPTION(client_debug_inject_tick_delay, OPT_INT, 0) // delay the client tick for a number of seconds | |
456 | OPTION(client_max_inline_size, OPT_U64, 4096) | |
457 | OPTION(client_inject_release_failure, OPT_BOOL, false) // synthetic client bug for testing | |
458 | OPTION(client_inject_fixed_oldest_tid, OPT_BOOL, false) // synthetic client bug for testing | |
459 | OPTION(client_metadata, OPT_STR, "") | |
460 | OPTION(client_acl_type, OPT_STR, "") | |
461 | OPTION(client_permissions, OPT_BOOL, true) | |
462 | OPTION(client_dirsize_rbytes, OPT_BOOL, true) | |
463 | ||
464 | // note: the max amount of "in flight" dirty data is roughly (max - target) | |
465 | OPTION(fuse_use_invalidate_cb, OPT_BOOL, true) // use fuse 2.8+ invalidate callback to keep page cache consistent | |
466 | OPTION(fuse_disable_pagecache, OPT_BOOL, false) | |
467 | OPTION(fuse_allow_other, OPT_BOOL, true) | |
468 | OPTION(fuse_default_permissions, OPT_BOOL, false) | |
469 | OPTION(fuse_big_writes, OPT_BOOL, true) | |
470 | OPTION(fuse_atomic_o_trunc, OPT_BOOL, true) | |
471 | OPTION(fuse_debug, OPT_BOOL, false) | |
472 | OPTION(fuse_multithreaded, OPT_BOOL, true) | |
473 | OPTION(fuse_require_active_mds, OPT_BOOL, true) // if ceph_fuse requires active mds server | |
474 | OPTION(fuse_syncfs_on_mksnap, OPT_BOOL, true) | |
475 | OPTION(fuse_set_user_groups, OPT_BOOL, false) // if ceph_fuse fills in group lists or not | |
476 | ||
477 | OPTION(client_try_dentry_invalidate, OPT_BOOL, true) // the client should try to use dentry invaldation instead of remounting, on kernels it believes that will work for | |
478 | OPTION(client_die_on_failed_remount, OPT_BOOL, true) | |
479 | OPTION(client_check_pool_perm, OPT_BOOL, true) | |
480 | OPTION(client_use_faked_inos, OPT_BOOL, false) | |
481 | OPTION(client_mds_namespace, OPT_STR, "") | |
482 | ||
483 | OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location | |
484 | OPTION(crush_location_hook, OPT_STR, "") | |
485 | OPTION(crush_location_hook_timeout, OPT_INT, 10) | |
486 | ||
487 | OPTION(objecter_tick_interval, OPT_DOUBLE, 5.0) | |
488 | OPTION(objecter_timeout, OPT_DOUBLE, 10.0) // before we ask for a map | |
489 | OPTION(objecter_inflight_op_bytes, OPT_U64, 1024*1024*100) // max in-flight data (both directions) | |
490 | OPTION(objecter_inflight_ops, OPT_U64, 1024) // max in-flight ios | |
491 | OPTION(objecter_completion_locks_per_session, OPT_U64, 32) // num of completion locks per each session, for serializing same object responses | |
492 | OPTION(objecter_inject_no_watch_ping, OPT_BOOL, false) // suppress watch pings | |
493 | OPTION(objecter_retry_writes_after_first_reply, OPT_BOOL, false) // ignore the first reply for each write, and resend the osd op instead | |
494 | OPTION(objecter_debug_inject_relock_delay, OPT_BOOL, false) | |
495 | ||
496 | // Max number of deletes at once in a single Filer::purge call | |
497 | OPTION(filer_max_purge_ops, OPT_U32, 10) | |
498 | // Max number of truncate at once in a single Filer::truncate call | |
499 | OPTION(filer_max_truncate_ops, OPT_U32, 128) | |
500 | ||
501 | OPTION(journaler_write_head_interval, OPT_INT, 15) | |
502 | OPTION(journaler_prefetch_periods, OPT_INT, 10) // * journal object size | |
503 | OPTION(journaler_prezero_periods, OPT_INT, 5) // * journal object size | |
504 | OPTION(mds_data, OPT_STR, "/var/lib/ceph/mds/$cluster-$id") | |
505 | OPTION(mds_max_file_size, OPT_U64, 1ULL << 40) // Used when creating new CephFS. Change with 'ceph mds set max_file_size <size>' afterwards | |
506 | // max xattr kv pairs size for each dir/file | |
507 | OPTION(mds_max_xattr_pairs_size, OPT_U32, 64 << 10) | |
508 | OPTION(mds_cache_size, OPT_INT, 100000) | |
509 | OPTION(mds_cache_mid, OPT_FLOAT, .7) | |
510 | OPTION(mds_max_file_recover, OPT_U32, 32) | |
511 | OPTION(mds_dir_max_commit_size, OPT_INT, 10) // MB | |
512 | OPTION(mds_dir_keys_per_op, OPT_INT, 16384) | |
513 | OPTION(mds_decay_halflife, OPT_FLOAT, 5) | |
514 | OPTION(mds_beacon_interval, OPT_FLOAT, 4) | |
515 | OPTION(mds_beacon_grace, OPT_FLOAT, 15) | |
516 | OPTION(mds_enforce_unique_name, OPT_BOOL, true) | |
517 | OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes | |
518 | OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle | |
519 | OPTION(mds_sessionmap_keys_per_op, OPT_U32, 1024) // how many sessions should I try to load/store in a single OMAP operation? | |
520 | OPTION(mds_revoke_cap_timeout, OPT_FLOAT, 60) // detect clients which aren't revoking caps | |
521 | OPTION(mds_recall_state_timeout, OPT_FLOAT, 60) // detect clients which aren't trimming caps | |
522 | OPTION(mds_freeze_tree_timeout, OPT_FLOAT, 30) // detecting freeze tree deadlock | |
523 | OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session | |
524 | OPTION(mds_health_summarize_threshold, OPT_INT, 10) // collapse N-client health metrics to a single 'many' | |
525 | OPTION(mds_health_cache_threshold, OPT_FLOAT, 1.5) // warn on cache size if it exceeds mds_cache_size by this factor | |
526 | OPTION(mds_reconnect_timeout, OPT_FLOAT, 45) // seconds to wait for clients during mds restart | |
527 | // make it (mds_session_timeout - mds_beacon_grace) | |
528 | OPTION(mds_tick_interval, OPT_FLOAT, 5) | |
529 | OPTION(mds_dirstat_min_interval, OPT_FLOAT, 1) // try to avoid propagating more often than this | |
530 | OPTION(mds_scatter_nudge_interval, OPT_FLOAT, 5) // how quickly dirstat changes propagate up the hierarchy | |
531 | OPTION(mds_client_prealloc_inos, OPT_INT, 1000) | |
532 | OPTION(mds_early_reply, OPT_BOOL, true) | |
533 | OPTION(mds_default_dir_hash, OPT_INT, CEPH_STR_HASH_RJENKINS) | |
534 | OPTION(mds_log_pause, OPT_BOOL, false) | |
535 | OPTION(mds_log_skip_corrupt_events, OPT_BOOL, false) | |
536 | OPTION(mds_log_max_events, OPT_INT, -1) | |
537 | OPTION(mds_log_events_per_segment, OPT_INT, 1024) | |
538 | OPTION(mds_log_segment_size, OPT_INT, 0) // segment size for mds log, default to default file_layout_t | |
539 | OPTION(mds_log_max_segments, OPT_U32, 30) | |
540 | OPTION(mds_log_max_expiring, OPT_INT, 20) | |
541 | OPTION(mds_bal_export_pin, OPT_BOOL, true) // allow clients to pin directory trees to ranks | |
542 | OPTION(mds_bal_sample_interval, OPT_DOUBLE, 3.0) // every 3 seconds | |
543 | OPTION(mds_bal_replicate_threshold, OPT_FLOAT, 8000) | |
544 | OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT, 0) | |
545 | OPTION(mds_bal_frag, OPT_BOOL, true) | |
546 | OPTION(mds_bal_split_size, OPT_INT, 10000) | |
547 | OPTION(mds_bal_split_rd, OPT_FLOAT, 25000) | |
548 | OPTION(mds_bal_split_wr, OPT_FLOAT, 10000) | |
549 | OPTION(mds_bal_split_bits, OPT_INT, 3) | |
550 | OPTION(mds_bal_merge_size, OPT_INT, 50) | |
551 | OPTION(mds_bal_interval, OPT_INT, 10) // seconds | |
552 | OPTION(mds_bal_fragment_interval, OPT_INT, 5) // seconds | |
553 | OPTION(mds_bal_fragment_size_max, OPT_INT, 10000*10) // order of magnitude higher than split size | |
554 | OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT, 1.5) // multiple of size_max that triggers immediate split | |
555 | OPTION(mds_bal_idle_threshold, OPT_FLOAT, 0) | |
556 | OPTION(mds_bal_max, OPT_INT, -1) | |
557 | OPTION(mds_bal_max_until, OPT_INT, -1) | |
558 | OPTION(mds_bal_mode, OPT_INT, 0) | |
559 | OPTION(mds_bal_min_rebalance, OPT_FLOAT, .1) // must be this much above average before we export anything | |
560 | OPTION(mds_bal_min_start, OPT_FLOAT, .2) // if we need less than this, we don't do anything | |
561 | OPTION(mds_bal_need_min, OPT_FLOAT, .8) // take within this range of what we need | |
562 | OPTION(mds_bal_need_max, OPT_FLOAT, 1.2) | |
563 | OPTION(mds_bal_midchunk, OPT_FLOAT, .3) // any sub bigger than this taken in full | |
564 | OPTION(mds_bal_minchunk, OPT_FLOAT, .001) // never take anything smaller than this | |
565 | OPTION(mds_bal_target_decay, OPT_DOUBLE, 10.0) // target decay half-life in MDSMap (2x larger is approx. 2x slower) | |
566 | OPTION(mds_replay_interval, OPT_FLOAT, 1.0) // time to wait before starting replay again | |
567 | OPTION(mds_shutdown_check, OPT_INT, 0) | |
568 | OPTION(mds_thrash_exports, OPT_INT, 0) | |
569 | OPTION(mds_thrash_fragments, OPT_INT, 0) | |
570 | OPTION(mds_dump_cache_on_map, OPT_BOOL, false) | |
571 | OPTION(mds_dump_cache_after_rejoin, OPT_BOOL, false) | |
572 | OPTION(mds_verify_scatter, OPT_BOOL, false) | |
573 | OPTION(mds_debug_scatterstat, OPT_BOOL, false) | |
574 | OPTION(mds_debug_frag, OPT_BOOL, false) | |
575 | OPTION(mds_debug_auth_pins, OPT_BOOL, false) | |
576 | OPTION(mds_debug_subtrees, OPT_BOOL, false) | |
577 | OPTION(mds_kill_mdstable_at, OPT_INT, 0) | |
578 | OPTION(mds_kill_export_at, OPT_INT, 0) | |
579 | OPTION(mds_kill_import_at, OPT_INT, 0) | |
580 | OPTION(mds_kill_link_at, OPT_INT, 0) | |
581 | OPTION(mds_kill_rename_at, OPT_INT, 0) | |
582 | OPTION(mds_kill_openc_at, OPT_INT, 0) | |
583 | OPTION(mds_kill_journal_at, OPT_INT, 0) | |
584 | OPTION(mds_kill_journal_expire_at, OPT_INT, 0) | |
585 | OPTION(mds_kill_journal_replay_at, OPT_INT, 0) | |
586 | OPTION(mds_journal_format, OPT_U32, 1) // Default to most recent JOURNAL_FORMAT_* | |
587 | OPTION(mds_kill_create_at, OPT_INT, 0) | |
588 | OPTION(mds_inject_traceless_reply_probability, OPT_DOUBLE, 0) /* percentage | |
589 | of MDS modify replies to skip sending the | |
590 | client a trace on [0-1]*/ | |
591 | OPTION(mds_wipe_sessions, OPT_BOOL, 0) | |
592 | OPTION(mds_wipe_ino_prealloc, OPT_BOOL, 0) | |
593 | OPTION(mds_skip_ino, OPT_INT, 0) | |
594 | OPTION(mds_standby_for_name, OPT_STR, "") | |
595 | OPTION(mds_standby_for_rank, OPT_INT, -1) | |
596 | OPTION(mds_standby_for_fscid, OPT_INT, -1) | |
597 | OPTION(mds_standby_replay, OPT_BOOL, false) | |
598 | OPTION(mds_enable_op_tracker, OPT_BOOL, true) // enable/disable MDS op tracking | |
599 | OPTION(mds_op_history_size, OPT_U32, 20) // Max number of completed ops to track | |
600 | OPTION(mds_op_history_duration, OPT_U32, 600) // Oldest completed op to track | |
601 | OPTION(mds_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy | |
602 | OPTION(mds_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go | |
603 | OPTION(mds_snap_min_uid, OPT_U32, 0) // The minimum UID required to create a snapshot | |
604 | OPTION(mds_snap_max_uid, OPT_U32, 4294967294) // The maximum UID allowed to create a snapshot | |
605 | OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapshot | |
606 | OPTION(mds_verify_backtrace, OPT_U32, 1) | |
607 | // detect clients which aren't trimming completed requests | |
608 | OPTION(mds_max_completed_flushes, OPT_U32, 100000) | |
609 | OPTION(mds_max_completed_requests, OPT_U32, 100000) | |
610 | ||
611 | OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash | |
612 | OPTION(mds_mon_shutdown_timeout, OPT_DOUBLE, 5) | |
613 | ||
614 | // Maximum number of concurrent stray files to purge | |
615 | OPTION(mds_max_purge_files, OPT_U32, 64) | |
616 | // Maximum number of concurrent RADOS ops to issue in purging | |
617 | OPTION(mds_max_purge_ops, OPT_U32, 8192) | |
618 | // Maximum number of concurrent RADOS ops to issue in purging, scaled by PG count | |
619 | OPTION(mds_max_purge_ops_per_pg, OPT_FLOAT, 0.5) | |
620 | ||
621 | OPTION(mds_purge_queue_busy_flush_period, OPT_FLOAT, 1.0) | |
622 | ||
623 | OPTION(mds_root_ino_uid, OPT_INT, 0) // The UID of / on new filesystems | |
624 | OPTION(mds_root_ino_gid, OPT_INT, 0) // The GID of / on new filesystems | |
625 | ||
626 | OPTION(mds_max_scrub_ops_in_progress, OPT_INT, 5) // the number of simultaneous scrubs allowed | |
627 | ||
628 | // Maximum number of damaged frags/dentries before whole MDS rank goes damaged | |
629 | OPTION(mds_damage_table_max_entries, OPT_INT, 10000) | |
630 | ||
631 | // verify backend can support configured max object name length | |
632 | OPTION(osd_check_max_object_name_len_on_startup, OPT_BOOL, true) | |
633 | ||
634 | // Maximum number of backfills to or from a single osd | |
635 | OPTION(osd_max_backfills, OPT_U64, 1) | |
636 | ||
637 | // Minimum recovery priority (255 = max, smaller = lower) | |
638 | OPTION(osd_min_recovery_priority, OPT_INT, 0) | |
639 | ||
640 | // Seconds to wait before retrying refused backfills | |
641 | OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 30.0) | |
642 | ||
643 | // Seconds to wait before retrying refused recovery | |
644 | OPTION(osd_recovery_retry_interval, OPT_DOUBLE, 30.0) | |
645 | ||
646 | // max agent flush ops | |
647 | OPTION(osd_agent_max_ops, OPT_INT, 4) | |
648 | OPTION(osd_agent_max_low_ops, OPT_INT, 2) | |
649 | OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1) | |
650 | OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1) | |
651 | OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0) | |
652 | ||
653 | // osd ignore history.last_epoch_started in find_best_info | |
654 | OPTION(osd_find_best_info_ignore_history_les, OPT_BOOL, false) | |
655 | ||
656 | // decay atime and hist histograms after how many objects go by | |
657 | OPTION(osd_agent_hist_halflife, OPT_INT, 1000) | |
658 | ||
659 | // must be this amount over the threshold to enable, | |
660 | // this amount below the threshold to disable. | |
661 | OPTION(osd_agent_slop, OPT_FLOAT, .02) | |
662 | ||
663 | OPTION(osd_uuid, OPT_UUID, uuid_d()) | |
664 | OPTION(osd_data, OPT_STR, "/var/lib/ceph/osd/$cluster-$id") | |
665 | OPTION(osd_journal, OPT_STR, "/var/lib/ceph/osd/$cluster-$id/journal") | |
666 | OPTION(osd_journal_size, OPT_INT, 5120) // in mb | |
667 | OPTION(osd_journal_flush_on_shutdown, OPT_BOOL, true) // Flush journal to data store on shutdown | |
668 | // flags for specific control purpose during osd mount() process. | |
669 | // e.g., can be 1 to skip over replaying journal | |
670 | // or 2 to skip over mounting omap or 3 to skip over both. | |
671 | // This might be helpful in case the journal is totally corrupted | |
672 | // and we still want to bring the osd daemon back normally, etc. | |
673 | OPTION(osd_os_flags, OPT_U32, 0) | |
674 | OPTION(osd_max_write_size, OPT_INT, 90) | |
675 | OPTION(osd_max_pgls, OPT_U64, 1024) // max number of pgls entries to return | |
676 | OPTION(osd_client_message_size_cap, OPT_U64, 500*1024L*1024L) // client data allowed in-memory (in bytes) | |
677 | OPTION(osd_client_message_cap, OPT_U64, 100) // num client messages allowed in-memory | |
678 | OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd | |
679 | OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd | |
680 | OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host | |
681 | OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it. | |
682 | OPTION(osd_crush_update_on_start, OPT_BOOL, true) | |
683 | OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds. | |
684 | OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset | |
685 | OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET) | |
686 | OPTION(osd_pool_erasure_code_stripe_unit, OPT_U32, 4096) // in bytes | |
687 | OPTION(osd_pool_default_size, OPT_INT, 3) | |
688 | OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2 | |
689 | OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf | |
690 | OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num | |
691 | OPTION(osd_pool_default_erasure_code_profile, | |
692 | OPT_STR, | |
693 | "plugin=jerasure " | |
694 | "technique=reed_sol_van " | |
695 | "k=2 " | |
696 | "m=1 " | |
697 | ) // default properties of osd pool create | |
698 | OPTION(osd_erasure_code_plugins, OPT_STR, | |
699 | "jerasure" | |
700 | " lrc" | |
701 | #ifdef HAVE_BETTER_YASM_ELF64 | |
702 | " isa" | |
703 | #endif | |
704 | ) // list of erasure code plugins | |
705 | ||
706 | // Allows the "peered" state for recovery and backfill below min_size | |
707 | OPTION(osd_allow_recovery_below_min_size, OPT_BOOL, true) | |
708 | ||
709 | OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools | |
710 | OPTION(osd_pool_default_flag_hashpspool, OPT_BOOL, true) // use new pg hashing to prevent pool/pg overlap | |
711 | OPTION(osd_pool_default_flag_nodelete, OPT_BOOL, false) // pool can't be deleted | |
712 | OPTION(osd_pool_default_flag_nopgchange, OPT_BOOL, false) // pool's pg and pgp num can't be changed | |
713 | OPTION(osd_pool_default_flag_nosizechange, OPT_BOOL, false) // pool's size and min size can't be changed | |
714 | OPTION(osd_pool_default_hit_set_bloom_fpp, OPT_FLOAT, .05) | |
715 | OPTION(osd_pool_default_cache_target_dirty_ratio, OPT_FLOAT, .4) | |
716 | OPTION(osd_pool_default_cache_target_dirty_high_ratio, OPT_FLOAT, .6) | |
717 | OPTION(osd_pool_default_cache_target_full_ratio, OPT_FLOAT, .8) | |
718 | OPTION(osd_pool_default_cache_min_flush_age, OPT_INT, 0) // seconds | |
719 | OPTION(osd_pool_default_cache_min_evict_age, OPT_INT, 0) // seconds | |
720 | OPTION(osd_pool_default_cache_max_evict_check_size, OPT_INT, 10) // max size to check for eviction | |
721 | OPTION(osd_hit_set_min_size, OPT_INT, 1000) // min target size for a HitSet | |
722 | OPTION(osd_hit_set_max_size, OPT_INT, 100000) // max target size for a HitSet | |
723 | OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking | |
724 | ||
725 | // conservative default throttling values | |
726 | OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 25) | |
727 | OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 5 * 1024*1024) | |
728 | ||
729 | OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback") | |
730 | OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4) | |
731 | OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200) | |
732 | OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom") | |
733 | OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on read) | |
734 | OPTION(osd_tier_default_cache_min_write_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on write) | |
735 | OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 20) | |
736 | OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT, 1) | |
737 | ||
738 | OPTION(osd_map_dedup, OPT_BOOL, true) | |
739 | OPTION(osd_map_max_advance, OPT_INT, 150) // make this < cache_size! | |
740 | OPTION(osd_map_cache_size, OPT_INT, 200) | |
741 | OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message | |
742 | OPTION(osd_map_share_max_epochs, OPT_INT, 100) // cap on # of inc maps we send to peers, clients | |
743 | OPTION(osd_inject_bad_map_crc_probability, OPT_FLOAT, 0) | |
744 | OPTION(osd_inject_failure_on_pg_removal, OPT_BOOL, false) | |
745 | // shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds | |
746 | OPTION(osd_max_markdown_period , OPT_INT, 600) | |
747 | OPTION(osd_max_markdown_count, OPT_INT, 5) | |
748 | ||
749 | OPTION(osd_op_threads, OPT_INT, 2) // 0 == no threading | |
750 | OPTION(osd_peering_wq_batch_size, OPT_U64, 20) | |
751 | OPTION(osd_op_pq_max_tokens_per_priority, OPT_U64, 4194304) | |
752 | OPTION(osd_op_pq_min_cost, OPT_U64, 65536) | |
753 | OPTION(osd_disk_threads, OPT_INT, 1) | |
754 | OPTION(osd_disk_thread_ioprio_class, OPT_STR, "") // rt realtime be best effort idle | |
755 | OPTION(osd_disk_thread_ioprio_priority, OPT_INT, -1) // 0-7 | |
756 | OPTION(osd_recover_clone_overlap, OPT_BOOL, true) // preserve clone_overlap during recovery/migration | |
757 | OPTION(osd_op_num_threads_per_shard, OPT_INT, 2) | |
758 | OPTION(osd_op_num_shards, OPT_INT, 5) | |
759 | OPTION(osd_op_queue, OPT_STR, "wpq") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random | |
760 | OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random) | |
761 | ||
762 | OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer | |
763 | ||
764 | // Set to true for testing. Users should NOT set this. | |
765 | // If set to true even after reading enough shards to | |
766 | // decode the object, any error will be reported. | |
767 | OPTION(osd_read_ec_check_for_errors, OPT_BOOL, false) // return error if any ec shard has an error | |
768 | ||
769 | // Only use clone_overlap for recovery if there are fewer than | |
770 | // osd_recover_clone_overlap_limit entries in the overlap set | |
771 | OPTION(osd_recover_clone_overlap_limit, OPT_INT, 10) | |
772 | ||
773 | OPTION(osd_backfill_scan_min, OPT_INT, 64) | |
774 | OPTION(osd_backfill_scan_max, OPT_INT, 512) | |
775 | OPTION(osd_op_thread_timeout, OPT_INT, 15) | |
776 | OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150) | |
777 | OPTION(osd_recovery_thread_timeout, OPT_INT, 30) | |
778 | OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300) | |
779 | OPTION(osd_recovery_sleep, OPT_FLOAT, 0) // seconds to sleep between recovery ops | |
780 | OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0) | |
781 | OPTION(osd_scrub_invalid_stats, OPT_BOOL, true) | |
782 | OPTION(osd_remove_thread_timeout, OPT_INT, 60*60) | |
783 | OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60) | |
784 | OPTION(osd_command_thread_timeout, OPT_INT, 10*60) | |
785 | OPTION(osd_command_thread_suicide_timeout, OPT_INT, 15*60) | |
786 | OPTION(osd_heartbeat_addr, OPT_ADDR, entity_addr_t()) | |
787 | OPTION(osd_heartbeat_interval, OPT_INT, 6) // (seconds) how often we ping peers | |
788 | ||
789 | // (seconds) how long before we decide a peer has failed | |
790 | // This setting is read by the MONs and OSDs and has to be set to a equal value in both settings of the configuration | |
791 | OPTION(osd_heartbeat_grace, OPT_INT, 20) | |
792 | OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers | |
793 | OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbeat tcp socket and set dscp as CS6 on it if true | |
794 | ||
795 | // max number of parallel snap trims/pg | |
796 | OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2) | |
797 | // max number of trimming pgs | |
798 | OPTION(osd_max_trimming_pgs, OPT_U64, 2) | |
799 | ||
800 | // minimum number of peers that must be reachable to mark ourselves | |
801 | // back up after being wrongly marked down. | |
802 | OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT, .33) | |
803 | ||
804 | OPTION(osd_mon_heartbeat_interval, OPT_INT, 30) // (seconds) how often to ping monitor if no peers | |
805 | OPTION(osd_mon_report_interval_max, OPT_INT, 600) | |
806 | OPTION(osd_mon_report_interval_min, OPT_INT, 5) // pg stats, failures, up_thru, boot. | |
807 | OPTION(osd_mon_report_max_in_flight, OPT_INT, 2) // max updates in flight | |
808 | OPTION(osd_beacon_report_interval, OPT_INT, 300) // (second) how often to send beacon message to monitor | |
809 | OPTION(osd_pg_stat_report_interval_max, OPT_INT, 500) // report pg stats for any given pg at least this often | |
810 | OPTION(osd_mon_ack_timeout, OPT_DOUBLE, 30.0) // time out a mon if it doesn't ack stats | |
811 | OPTION(osd_stats_ack_timeout_factor, OPT_DOUBLE, 2.0) // multiples of mon_ack_timeout | |
812 | OPTION(osd_stats_ack_timeout_decay, OPT_DOUBLE, .9) | |
813 | OPTION(osd_default_data_pool_replay_window, OPT_INT, 45) | |
814 | OPTION(osd_preserve_trimmed_log, OPT_BOOL, false) | |
815 | OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false) | |
816 | OPTION(osd_recovery_delay_start, OPT_FLOAT, 0) | |
817 | OPTION(osd_recovery_max_active, OPT_U64, 3) | |
818 | OPTION(osd_recovery_max_single_start, OPT_U64, 1) | |
819 | OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk | |
820 | OPTION(osd_recovery_max_omap_entries_per_chunk, OPT_U64, 64000) // max number of omap entries per chunk; 0 to disable limit | |
821 | OPTION(osd_copyfrom_max_chunk, OPT_U64, 8<<20) // max size of a COPYFROM chunk | |
822 | OPTION(osd_push_per_object_cost, OPT_U64, 1000) // push cost per object | |
823 | OPTION(osd_max_push_cost, OPT_U64, 8<<20) // max size of push message | |
824 | OPTION(osd_max_push_objects, OPT_U64, 10) // max objects in single push op | |
825 | OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now | |
826 | OPTION(osd_max_scrubs, OPT_INT, 1) | |
827 | OPTION(osd_scrub_during_recovery, OPT_BOOL, false) // Allow new scrubs to start while recovery is active on the OSD | |
828 | OPTION(osd_scrub_begin_hour, OPT_INT, 0) | |
829 | OPTION(osd_scrub_end_hour, OPT_INT, 24) | |
830 | OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5) | |
831 | OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low | |
832 | OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load | |
833 | OPTION(osd_scrub_interval_randomize_ratio, OPT_FLOAT, 0.5) // randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio)) | |
834 | OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE, .66) // the probability to back off the scheduled scrub | |
835 | OPTION(osd_scrub_chunk_min, OPT_INT, 5) | |
836 | OPTION(osd_scrub_chunk_max, OPT_INT, 25) | |
837 | OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops | |
838 | OPTION(osd_scrub_auto_repair, OPT_BOOL, false) // whether auto-repair inconsistencies upon deep-scrubbing | |
839 | OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5) // only auto-repair when number of errors is below this threshold | |
840 | OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week | |
841 | OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep) | |
842 | OPTION(osd_deep_scrub_stride, OPT_INT, 524288) | |
843 | OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub | |
844 | OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100) | |
845 | OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored | |
846 | OPTION(osd_open_classes_on_start, OPT_BOOL, true) | |
847 | OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops " | |
848 | "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes allowed to be loaded (allow all: *) | |
849 | OPTION(osd_class_default_list, OPT_STR, "cephfs hello journal lock log numops " | |
850 | "rbd refcount replica_log rgw statelog timeindex user version") // list of object classes with default execute perm (allow all: *) | |
851 | OPTION(osd_check_for_log_corruption, OPT_BOOL, false) | |
852 | OPTION(osd_use_stale_snap, OPT_BOOL, false) | |
853 | OPTION(osd_rollback_to_cluster_snap, OPT_STR, "") | |
854 | OPTION(osd_default_notify_timeout, OPT_U32, 30) // default notify timeout in seconds | |
855 | OPTION(osd_kill_backfill_at, OPT_INT, 0) | |
856 | ||
857 | // Bounds how infrequently a new map epoch will be persisted for a pg | |
858 | OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 150) // make this < map_cache_size! | |
859 | ||
860 | OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it | |
861 | OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim | |
862 | OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery | |
863 | OPTION(osd_pg_log_trim_min, OPT_U32, 100) | |
864 | OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy | |
865 | OPTION(osd_command_max_records, OPT_INT, 256) | |
866 | OPTION(osd_max_pg_blocked_by, OPT_U32, 16) // max peer osds to report that are blocking our progress | |
867 | OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go | |
868 | OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros | |
869 | OPTION(osd_backoff_on_unfound, OPT_BOOL, true) // object unfound | |
870 | OPTION(osd_backoff_on_degraded, OPT_BOOL, false) // [mainly for debug?] object unreadable/writeable | |
871 | OPTION(osd_backoff_on_down, OPT_BOOL, true) // pg in down/incomplete state | |
872 | OPTION(osd_backoff_on_peering, OPT_BOOL, false) // [debug] pg peering | |
873 | OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL, false) // crash osd if client ignores a backoff; useful for debugging | |
874 | OPTION(osd_debug_inject_dispatch_delay_probability, OPT_DOUBLE, 0) | |
875 | OPTION(osd_debug_inject_dispatch_delay_duration, OPT_DOUBLE, .1) | |
876 | OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0) | |
877 | OPTION(osd_debug_drop_ping_duration, OPT_INT, 0) | |
878 | OPTION(osd_debug_op_order, OPT_BOOL, false) | |
879 | OPTION(osd_debug_verify_missing_on_start, OPT_BOOL, false) | |
880 | OPTION(osd_debug_scrub_chance_rewrite_digest, OPT_U64, 0) | |
881 | OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false) | |
882 | OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false) | |
883 | OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false) | |
884 | OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0) | |
885 | OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion | |
886 | OPTION(osd_debug_misdirected_ops, OPT_BOOL, false) | |
887 | OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false) | |
888 | OPTION(osd_enxio_on_misdirected_op, OPT_BOOL, false) | |
889 | OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false) | |
890 | OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking | |
891 | OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops | |
892 | OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track | |
893 | OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track | |
894 | OPTION(osd_op_history_slow_op_size, OPT_U32, 20) // Max number of slow ops to track | |
895 | OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE, 10.0) // track the op if over this threshold | |
896 | OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items | |
897 | OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe) | |
898 | OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections | |
899 | ||
900 | OPTION(osd_pg_object_context_cache_count, OPT_INT, 64) | |
901 | OPTION(osd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled | |
902 | ||
903 | OPTION(osd_fast_info, OPT_BOOL, true) // use fast info attr, if we can | |
904 | ||
905 | // determines whether PGLog::check() compares written out log to stored log | |
906 | OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false) | |
907 | OPTION(osd_loop_before_reset_tphandle, OPT_U32, 64) // Max number of loop before we reset thread-pool's handle | |
908 | // default timeout while caling WaitInterval on an empty queue | |
909 | OPTION(threadpool_default_timeout, OPT_INT, 60) | |
910 | // default wait time for an empty queue before pinging the hb timeout | |
911 | OPTION(threadpool_empty_queue_max_wait, OPT_INT, 2) | |
912 | ||
913 | OPTION(leveldb_log_to_ceph_log, OPT_BOOL, true) | |
914 | OPTION(leveldb_write_buffer_size, OPT_U64, 8 *1024*1024) // leveldb write buffer size | |
915 | OPTION(leveldb_cache_size, OPT_U64, 128 *1024*1024) // leveldb cache size | |
916 | OPTION(leveldb_block_size, OPT_U64, 0) // leveldb block size | |
917 | OPTION(leveldb_bloom_size, OPT_INT, 0) // leveldb bloom bits per entry | |
918 | OPTION(leveldb_max_open_files, OPT_INT, 0) // leveldb max open files | |
919 | OPTION(leveldb_compression, OPT_BOOL, true) // leveldb uses compression | |
920 | OPTION(leveldb_paranoid, OPT_BOOL, false) // leveldb paranoid flag | |
921 | OPTION(leveldb_log, OPT_STR, "/dev/null") // enable leveldb log file | |
922 | OPTION(leveldb_compact_on_mount, OPT_BOOL, false) | |
923 | ||
924 | OPTION(kinetic_host, OPT_STR, "") // hostname or ip address of a kinetic drive to use | |
925 | OPTION(kinetic_port, OPT_INT, 8123) // port number of the kinetic drive | |
926 | OPTION(kinetic_user_id, OPT_INT, 1) // kinetic user to authenticate as | |
927 | OPTION(kinetic_hmac_key, OPT_STR, "asdfasdf") // kinetic key to authenticate with | |
928 | OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic with TLS | |
929 | ||
930 | ||
931 | OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal | |
932 | SAFE_OPTION(rocksdb_db_paths, OPT_STR, "") // path,size( path,size)* | |
933 | OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true) // log to ceph log | |
934 | OPTION(rocksdb_cache_size, OPT_U64, 128*1024*1024) // default rocksdb cache size | |
935 | OPTION(rocksdb_cache_shard_bits, OPT_INT, 4) // rocksdb block cache shard bits, 4 bit -> 16 shards | |
936 | OPTION(rocksdb_block_size, OPT_INT, 4*1024) // default rocksdb block size | |
937 | OPTION(rocksdb_perf, OPT_BOOL, false) // Enabling this will have 5-10% impact on performance for the stats collection | |
938 | OPTION(rocksdb_collect_compaction_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled. | |
939 | OPTION(rocksdb_collect_extended_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled. | |
940 | OPTION(rocksdb_collect_memory_stats, OPT_BOOL, false) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled. | |
941 | OPTION(rocksdb_enable_rmrange, OPT_BOOL, false) // see https://github.com/facebook/rocksdb/blob/master/include/rocksdb/db.h#L253 | |
942 | ||
943 | // rocksdb options that will be used for omap(if omap_backend is rocksdb) | |
944 | OPTION(filestore_rocksdb_options, OPT_STR, "") | |
945 | // rocksdb options that will be used in monstore | |
946 | OPTION(mon_rocksdb_options, OPT_STR, "write_buffer_size=33554432,compression=kNoCompression") | |
947 | ||
948 | /** | |
949 | * osd_*_priority adjust the relative priority of client io, recovery io, | |
950 | * snaptrim io, etc | |
951 | * | |
952 | * osd_*_priority determines the ratio of available io between client and | |
953 | * recovery. Each option may be set between | |
954 | * 1..63. | |
955 | */ | |
956 | OPTION(osd_client_op_priority, OPT_U32, 63) | |
957 | OPTION(osd_recovery_op_priority, OPT_U32, 3) | |
958 | ||
959 | OPTION(osd_snap_trim_priority, OPT_U32, 5) | |
960 | OPTION(osd_snap_trim_cost, OPT_U32, 1<<20) // set default cost equal to 1MB io | |
961 | ||
962 | OPTION(osd_scrub_priority, OPT_U32, 5) | |
963 | // set default cost equal to 50MB io | |
964 | OPTION(osd_scrub_cost, OPT_U32, 50<<20) | |
965 | // set requested scrub priority higher than scrub priority to make the | |
966 | // requested scrubs jump the queue of scheduled scrubs | |
967 | OPTION(osd_requested_scrub_priority, OPT_U32, 120) | |
968 | ||
969 | OPTION(osd_recovery_priority, OPT_U32, 5) | |
970 | // set default cost equal to 20MB io | |
971 | OPTION(osd_recovery_cost, OPT_U32, 20<<20) | |
972 | ||
973 | /** | |
974 | * osd_recovery_op_warn_multiple scales the normal warning threshhold, | |
975 | * osd_op_complaint_time, so that slow recovery ops won't cause noise | |
976 | */ | |
977 | OPTION(osd_recovery_op_warn_multiple, OPT_U32, 16) | |
978 | ||
979 | // Max time to wait between notifying mon of shutdown and shutting down | |
980 | OPTION(osd_mon_shutdown_timeout, OPT_DOUBLE, 5) | |
981 | ||
982 | OPTION(osd_max_object_size, OPT_U64, 100*1024L*1024L*1024L) // OSD's maximum object size | |
983 | OPTION(osd_max_object_name_len, OPT_U32, 2048) // max rados object name len | |
984 | OPTION(osd_max_object_namespace_len, OPT_U32, 256) // max rados object namespace len | |
985 | OPTION(osd_max_attr_name_len, OPT_U32, 100) // max rados attr name len; cannot go higher than 100 chars for file system backends | |
986 | OPTION(osd_max_attr_size, OPT_U64, 0) | |
987 | ||
988 | OPTION(osd_max_omap_entries_per_request, OPT_U64, 131072) | |
989 | OPTION(osd_max_omap_bytes_per_request, OPT_U64, 1<<30) | |
990 | ||
991 | OPTION(osd_objectstore, OPT_STR, "filestore") // ObjectStore backend type | |
992 | OPTION(osd_objectstore_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled | |
993 | // Override maintaining compatibility with older OSDs | |
994 | // Set to true for testing. Users should NOT set this. | |
995 | OPTION(osd_debug_override_acting_compat, OPT_BOOL, false) | |
996 | OPTION(osd_objectstore_fuse, OPT_BOOL, false) | |
997 | ||
998 | OPTION(osd_bench_small_size_max_iops, OPT_U32, 100) // 100 IOPS | |
999 | OPTION(osd_bench_large_size_max_throughput, OPT_U64, 100 << 20) // 100 MB/s | |
1000 | OPTION(osd_bench_max_block_size, OPT_U64, 64 << 20) // cap the block size at 64MB | |
1001 | OPTION(osd_bench_duration, OPT_U32, 30) // duration of 'osd bench', capped at 30s to avoid triggering timeouts | |
1002 | ||
1003 | OPTION(osd_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all osd requests | |
1004 | OPTION(osdc_blkin_trace_all, OPT_BOOL, false) // create a blkin trace for all objecter requests | |
1005 | ||
1006 | OPTION(osd_discard_disconnected_ops, OPT_BOOL, true) | |
1007 | ||
1008 | OPTION(memstore_device_bytes, OPT_U64, 1024*1024*1024) | |
1009 | OPTION(memstore_page_set, OPT_BOOL, true) | |
1010 | OPTION(memstore_page_size, OPT_U64, 64 << 10) | |
1011 | ||
1012 | OPTION(bdev_debug_inflight_ios, OPT_BOOL, false) | |
1013 | OPTION(bdev_inject_crash, OPT_INT, 0) // if N>0, then ~ 1/N IOs will complete before we crash on flush. | |
1014 | OPTION(bdev_inject_crash_flush_delay, OPT_INT, 2) // wait N more seconds on flush | |
1015 | OPTION(bdev_aio, OPT_BOOL, true) | |
1016 | OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds | |
1017 | OPTION(bdev_aio_max_queue_depth, OPT_INT, 1024) | |
1018 | OPTION(bdev_block_size, OPT_INT, 4096) | |
1019 | OPTION(bdev_debug_aio, OPT_BOOL, false) | |
1020 | OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0) | |
1021 | ||
1022 | // if yes, osd will unbind all NVMe devices from kernel driver and bind them | |
1023 | // to the uio_pci_generic driver. The purpose is to prevent the case where | |
1024 | // NVMe driver is loaded while osd is running. | |
1025 | OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL, false) | |
1026 | OPTION(bdev_nvme_retry_count, OPT_INT, -1) // -1 means by default which is 4 | |
1027 | ||
1028 | OPTION(objectstore_blackhole, OPT_BOOL, false) | |
1029 | ||
1030 | OPTION(bluefs_alloc_size, OPT_U64, 1048576) | |
1031 | OPTION(bluefs_max_prefetch, OPT_U64, 1048576) | |
1032 | OPTION(bluefs_min_log_runway, OPT_U64, 1048576) // alloc when we get this low | |
1033 | OPTION(bluefs_max_log_runway, OPT_U64, 4194304) // alloc this much at a time | |
1034 | OPTION(bluefs_log_compact_min_ratio, OPT_FLOAT, 5.0) // before we consider | |
1035 | OPTION(bluefs_log_compact_min_size, OPT_U64, 16*1048576) // before we consider | |
1036 | OPTION(bluefs_min_flush_size, OPT_U64, 524288) // ignore flush until its this big | |
1037 | OPTION(bluefs_compact_log_sync, OPT_BOOL, false) // sync or async log compaction? | |
1038 | OPTION(bluefs_buffered_io, OPT_BOOL, false) | |
1039 | OPTION(bluefs_sync_write, OPT_BOOL, false) | |
1040 | OPTION(bluefs_allocator, OPT_STR, "bitmap") // stupid | bitmap | |
1041 | OPTION(bluefs_preextend_wal_files, OPT_BOOL, false) // this *requires* that rocksdb has recycling enabled | |
1042 | ||
1043 | OPTION(bluestore_bluefs, OPT_BOOL, true) | |
1044 | OPTION(bluestore_bluefs_env_mirror, OPT_BOOL, false) // mirror to normal Env for debug | |
1045 | OPTION(bluestore_bluefs_min, OPT_U64, 1*1024*1024*1024) // 1gb | |
1046 | OPTION(bluestore_bluefs_min_ratio, OPT_FLOAT, .02) // min fs free / total free | |
1047 | OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT, .90) // max fs free / total free | |
1048 | OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT, .02) // how much to add at a time | |
1049 | OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT, .20) // how much to reclaim at a time | |
1050 | OPTION(bluestore_bluefs_balance_interval, OPT_FLOAT, 1) // how often (sec) to balance free space between bluefs and bluestore | |
1051 | // If you want to use spdk driver, you need to specify NVMe serial number here | |
1052 | // with "spdk:" prefix. | |
1053 | // Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to | |
1054 | // get the serial number of Intel(R) Fultondale NVMe controllers. | |
1055 | // Example: | |
1056 | // bluestore_block_path = spdk:55cd2e404bd73932 | |
1057 | // If you want to run multiple SPDK instances per node, you must specify the | |
1058 | // amount of dpdk memory size in MB each instance will use, to make sure each | |
1059 | // instance uses its own dpdk memory | |
1060 | OPTION(bluestore_spdk_mem, OPT_U32, 512) | |
1061 | // A hexadecimal bit mask of the cores to run on. Note the core numbering can change between platforms and should be determined beforehand. | |
1062 | OPTION(bluestore_spdk_coremask, OPT_STR, "0x3") | |
1063 | // Specify the maximal I/Os to be batched completed while checking queue pair completions. | |
1064 | // Default value 0 means that let SPDK nvme library determine the value. | |
1065 | OPTION(bluestore_spdk_max_io_completion, OPT_U32, 0) | |
1066 | OPTION(bluestore_block_path, OPT_STR, "") | |
1067 | OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing | |
1068 | OPTION(bluestore_block_create, OPT_BOOL, true) | |
1069 | OPTION(bluestore_block_db_path, OPT_STR, "") | |
1070 | OPTION(bluestore_block_db_size, OPT_U64, 0) // rocksdb ssts (hot/warm) | |
1071 | OPTION(bluestore_block_db_create, OPT_BOOL, false) | |
1072 | OPTION(bluestore_block_wal_path, OPT_STR, "") | |
1073 | OPTION(bluestore_block_wal_size, OPT_U64, 96 * 1024*1024) // rocksdb wal | |
1074 | OPTION(bluestore_block_wal_create, OPT_BOOL, false) | |
1075 | OPTION(bluestore_block_preallocate_file, OPT_BOOL, false) //whether preallocate space if block/db_path/wal_path is file rather that block device. | |
1076 | OPTION(bluestore_csum_type, OPT_STR, "crc32c") // none|xxhash32|xxhash64|crc32c|crc32c_16|crc32c_8 | |
1077 | OPTION(bluestore_csum_min_block, OPT_U32, 4096) | |
1078 | OPTION(bluestore_csum_max_block, OPT_U32, 64*1024) | |
1079 | OPTION(bluestore_min_alloc_size, OPT_U32, 0) | |
1080 | OPTION(bluestore_min_alloc_size_hdd, OPT_U32, 64*1024) | |
1081 | OPTION(bluestore_min_alloc_size_ssd, OPT_U32, 16*1024) | |
1082 | OPTION(bluestore_max_alloc_size, OPT_U32, 0) | |
1083 | OPTION(bluestore_prefer_deferred_size, OPT_U32, 0) | |
1084 | OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768) | |
1085 | OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0) | |
1086 | OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none | |
1087 | OPTION(bluestore_compression_algorithm, OPT_STR, "snappy") | |
1088 | OPTION(bluestore_compression_min_blob_size, OPT_U32, 0) | |
1089 | OPTION(bluestore_compression_min_blob_size_hdd, OPT_U32, 128*1024) | |
1090 | OPTION(bluestore_compression_min_blob_size_ssd, OPT_U32, 8*1024) | |
1091 | OPTION(bluestore_compression_max_blob_size, OPT_U32, 0) | |
1092 | OPTION(bluestore_compression_max_blob_size_hdd, OPT_U32, 512*1024) | |
1093 | OPTION(bluestore_compression_max_blob_size_ssd, OPT_U32, 64*1024) | |
1094 | /* | |
1095 | * Specifies minimum expected amount of saved allocation units | |
1096 | * per single blob to enable compressed blobs garbage collection | |
1097 | * | |
1098 | */ | |
1099 | OPTION(bluestore_gc_enable_blob_threshold, OPT_INT, 0) | |
1100 | /* | |
1101 | * Specifies minimum expected amount of saved allocation units | |
1102 | * per all blobsb to enable compressed blobs garbage collection | |
1103 | * | |
1104 | */ | |
1105 | OPTION(bluestore_gc_enable_total_threshold, OPT_INT, 0) | |
1106 | ||
1107 | OPTION(bluestore_max_blob_size, OPT_U32, 0) | |
1108 | OPTION(bluestore_max_blob_size_hdd, OPT_U32, 512*1024) | |
1109 | OPTION(bluestore_max_blob_size_ssd, OPT_U32, 64*1024) | |
1110 | /* | |
1111 | * Require the net gain of compression at least to be at this ratio, | |
1112 | * otherwise we don't compress. | |
1113 | * And ask for compressing at least 12.5%(1/8) off, by default. | |
1114 | */ | |
1115 | OPTION(bluestore_compression_required_ratio, OPT_DOUBLE, .875) | |
1116 | OPTION(bluestore_extent_map_shard_max_size, OPT_U32, 1200) | |
1117 | OPTION(bluestore_extent_map_shard_target_size, OPT_U32, 500) | |
1118 | OPTION(bluestore_extent_map_shard_min_size, OPT_U32, 150) | |
1119 | OPTION(bluestore_extent_map_shard_target_size_slop, OPT_DOUBLE, .2) | |
1120 | OPTION(bluestore_extent_map_inline_shard_prealloc_size, OPT_U32, 256) | |
1121 | OPTION(bluestore_cache_trim_interval, OPT_DOUBLE, .1) | |
1122 | OPTION(bluestore_cache_trim_max_skip_pinned, OPT_U32, 64) // skip this many onodes pinned in cache before we give up | |
1123 | OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q | |
1124 | OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5) // kin page slot size / max page slot size | |
1125 | OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5) // number of kout page slot / total number of page slot | |
1126 | OPTION(bluestore_cache_size, OPT_U64, 1024*1024*1024) | |
1127 | OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .9) | |
1128 | OPTION(bluestore_kvbackend, OPT_STR, "rocksdb") | |
1129 | OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap | |
1130 | OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128) | |
1131 | OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048... | |
1132 | OPTION(bluestore_bitmapallocator_span_size, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048... | |
1133 | OPTION(bluestore_rocksdb_options, OPT_STR, "compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152") | |
1134 | OPTION(bluestore_fsck_on_mount, OPT_BOOL, false) | |
1135 | OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL, true) | |
1136 | OPTION(bluestore_fsck_on_umount, OPT_BOOL, false) | |
1137 | OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL, true) | |
1138 | OPTION(bluestore_fsck_on_mkfs, OPT_BOOL, true) | |
1139 | OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL, false) | |
1140 | OPTION(bluestore_sync_submit_transaction, OPT_BOOL, false) // submit kv txn in queueing thread (not kv_sync_thread) | |
1141 | OPTION(bluestore_throttle_bytes, OPT_U64, 64*1024*1024) | |
1142 | OPTION(bluestore_throttle_deferred_bytes, OPT_U64, 128*1024*1024) | |
1143 | OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64, 1500000) | |
1144 | OPTION(bluestore_throttle_cost_per_io_ssd, OPT_U64, 4000) | |
1145 | OPTION(bluestore_throttle_cost_per_io, OPT_U64, 0) | |
1146 | OPTION(bluestore_deferred_batch_ops, OPT_U64, 0) | |
1147 | OPTION(bluestore_deferred_batch_ops_hdd, OPT_U64, 64) | |
1148 | OPTION(bluestore_deferred_batch_ops_ssd, OPT_U64, 16) | |
1149 | OPTION(bluestore_nid_prealloc, OPT_INT, 1024) | |
1150 | OPTION(bluestore_blobid_prealloc, OPT_U64, 10240) | |
1151 | OPTION(bluestore_clone_cow, OPT_BOOL, true) // do copy-on-write for clones | |
1152 | OPTION(bluestore_default_buffered_read, OPT_BOOL, true) | |
1153 | OPTION(bluestore_default_buffered_write, OPT_BOOL, false) | |
1154 | OPTION(bluestore_debug_misc, OPT_BOOL, false) | |
1155 | OPTION(bluestore_debug_no_reuse_blocks, OPT_BOOL, false) | |
1156 | OPTION(bluestore_debug_small_allocations, OPT_INT, 0) | |
1157 | OPTION(bluestore_debug_freelist, OPT_BOOL, false) | |
1158 | OPTION(bluestore_debug_prefill, OPT_FLOAT, 0) | |
1159 | OPTION(bluestore_debug_prefragment_max, OPT_INT, 1048576) | |
1160 | OPTION(bluestore_debug_inject_read_err, OPT_BOOL, false) | |
1161 | OPTION(bluestore_debug_randomize_serial_transaction, OPT_INT, 0) | |
1162 | OPTION(bluestore_debug_omit_block_device_write, OPT_BOOL, false) | |
1163 | OPTION(bluestore_shard_finishers, OPT_BOOL, false) | |
1164 | ||
1165 | OPTION(kstore_max_ops, OPT_U64, 512) | |
1166 | OPTION(kstore_max_bytes, OPT_U64, 64*1024*1024) | |
1167 | OPTION(kstore_backend, OPT_STR, "rocksdb") | |
1168 | OPTION(kstore_rocksdb_options, OPT_STR, "compression=kNoCompression") | |
1169 | OPTION(kstore_rocksdb_bloom_bits_per_key, OPT_INT, 0) | |
1170 | OPTION(kstore_fsck_on_mount, OPT_BOOL, false) | |
1171 | OPTION(kstore_fsck_on_mount_deep, OPT_BOOL, true) | |
1172 | OPTION(kstore_nid_prealloc, OPT_U64, 1024) | |
1173 | OPTION(kstore_sync_transaction, OPT_BOOL, false) | |
1174 | OPTION(kstore_sync_submit_transaction, OPT_BOOL, false) | |
1175 | OPTION(kstore_onode_map_size, OPT_U64, 1024) | |
1176 | OPTION(kstore_cache_tails, OPT_BOOL, true) | |
1177 | OPTION(kstore_default_stripe_size, OPT_INT, 65536) | |
1178 | ||
1179 | OPTION(filestore_omap_backend, OPT_STR, "rocksdb") | |
1180 | OPTION(filestore_omap_backend_path, OPT_STR, "") | |
1181 | ||
1182 | /// filestore wb throttle limits | |
1183 | OPTION(filestore_wbthrottle_enable, OPT_BOOL, true) | |
1184 | OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 41943040) | |
1185 | OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 419430400) | |
1186 | OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 500) | |
1187 | OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 5000) | |
1188 | OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 500) | |
1189 | OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 41943040) | |
1190 | OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 419430400) | |
1191 | OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 500) | |
1192 | OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 5000) | |
1193 | OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 500) | |
1194 | ||
1195 | /// These must be less than the fd limit | |
1196 | OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 5000) | |
1197 | OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 5000) | |
1198 | ||
1199 | //Introduce a O_DSYNC write in the filestore | |
1200 | OPTION(filestore_odsync_write, OPT_BOOL, false) | |
1201 | ||
1202 | // Tests index failure paths | |
1203 | OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0) | |
1204 | ||
1205 | // Allow object read error injection | |
1206 | OPTION(filestore_debug_inject_read_err, OPT_BOOL, false) | |
1207 | ||
1208 | OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync | |
1209 | OPTION(filestore_omap_header_cache_size, OPT_INT, 1024) | |
1210 | ||
1211 | // Use omap for xattrs for attrs over | |
1212 | // filestore_max_inline_xattr_size or | |
1213 | OPTION(filestore_max_inline_xattr_size, OPT_U32, 0) //Override | |
1214 | OPTION(filestore_max_inline_xattr_size_xfs, OPT_U32, 65536) | |
1215 | OPTION(filestore_max_inline_xattr_size_btrfs, OPT_U32, 2048) | |
1216 | OPTION(filestore_max_inline_xattr_size_other, OPT_U32, 512) | |
1217 | ||
1218 | // for more than filestore_max_inline_xattrs attrs | |
1219 | OPTION(filestore_max_inline_xattrs, OPT_U32, 0) //Override | |
1220 | OPTION(filestore_max_inline_xattrs_xfs, OPT_U32, 10) | |
1221 | OPTION(filestore_max_inline_xattrs_btrfs, OPT_U32, 10) | |
1222 | OPTION(filestore_max_inline_xattrs_other, OPT_U32, 2) | |
1223 | ||
1224 | // max xattr value size | |
1225 | OPTION(filestore_max_xattr_value_size, OPT_U32, 0) //Override | |
1226 | OPTION(filestore_max_xattr_value_size_xfs, OPT_U32, 64<<10) | |
1227 | OPTION(filestore_max_xattr_value_size_btrfs, OPT_U32, 64<<10) | |
1228 | // ext4 allows 4k xattrs total including some smallish extra fields and the | |
1229 | // keys. We're allowing 2 512 inline attrs in addition some some filestore | |
1230 | // replay attrs. After accounting for those, we still need to fit up to | |
1231 | // two attrs of this value. That means we need this value to be around 1k | |
1232 | // to be safe. This is hacky, but it's not worth complicating the code | |
1233 | // to work around ext4's total xattr limit. | |
1234 | OPTION(filestore_max_xattr_value_size_other, OPT_U32, 1<<10) | |
1235 | ||
1236 | OPTION(filestore_sloppy_crc, OPT_BOOL, false) // track sloppy crcs | |
1237 | OPTION(filestore_sloppy_crc_block_size, OPT_INT, 65536) | |
1238 | ||
1239 | OPTION(filestore_max_alloc_hint_size, OPT_U64, 1ULL << 20) // bytes | |
1240 | ||
1241 | OPTION(filestore_max_sync_interval, OPT_DOUBLE, 5) // seconds | |
1242 | OPTION(filestore_min_sync_interval, OPT_DOUBLE, .01) // seconds | |
1243 | OPTION(filestore_btrfs_snap, OPT_BOOL, true) | |
1244 | OPTION(filestore_btrfs_clone_range, OPT_BOOL, true) | |
1245 | OPTION(filestore_zfs_snap, OPT_BOOL, false) // zfsonlinux is still unstable | |
1246 | OPTION(filestore_fsync_flushes_journal_data, OPT_BOOL, false) | |
1247 | OPTION(filestore_fiemap, OPT_BOOL, false) // (try to) use fiemap | |
1248 | OPTION(filestore_punch_hole, OPT_BOOL, false) | |
1249 | OPTION(filestore_seek_data_hole, OPT_BOOL, false) // (try to) use seek_data/hole | |
1250 | OPTION(filestore_splice, OPT_BOOL, false) | |
1251 | OPTION(filestore_fadvise, OPT_BOOL, true) | |
1252 | //collect device partition information for management application to use | |
1253 | OPTION(filestore_collect_device_partition_information, OPT_BOOL, true) | |
1254 | ||
1255 | // (try to) use extsize for alloc hint NOTE: extsize seems to trigger | |
1256 | // data corruption in xfs prior to kernel 3.5. filestore will | |
1257 | // implicity disable this if it cannot confirm the kernel is newer | |
1258 | // than that. | |
1259 | // NOTE: This option involves a tradeoff: When disabled, fragmentation is | |
1260 | // worse, but large sequential writes are faster. When enabled, large | |
1261 | // sequential writes are slower, but fragmentation is reduced. | |
1262 | OPTION(filestore_xfs_extsize, OPT_BOOL, false) | |
1263 | ||
1264 | OPTION(filestore_journal_parallel, OPT_BOOL, false) | |
1265 | OPTION(filestore_journal_writeahead, OPT_BOOL, false) | |
1266 | OPTION(filestore_journal_trailing, OPT_BOOL, false) | |
1267 | OPTION(filestore_queue_max_ops, OPT_U64, 50) | |
1268 | OPTION(filestore_queue_max_bytes, OPT_U64, 100 << 20) | |
1269 | ||
1270 | OPTION(filestore_caller_concurrency, OPT_INT, 10) | |
1271 | ||
1272 | /// Expected filestore throughput in B/s | |
1273 | OPTION(filestore_expected_throughput_bytes, OPT_DOUBLE, 200 << 20) | |
1274 | /// Expected filestore throughput in ops/s | |
1275 | OPTION(filestore_expected_throughput_ops, OPT_DOUBLE, 200) | |
1276 | ||
1277 | /// Filestore max delay multiple. Defaults to 0 (disabled) | |
1278 | OPTION(filestore_queue_max_delay_multiple, OPT_DOUBLE, 0) | |
1279 | /// Filestore high delay multiple. Defaults to 0 (disabled) | |
1280 | OPTION(filestore_queue_high_delay_multiple, OPT_DOUBLE, 0) | |
1281 | ||
1282 | /// Use above to inject delays intended to keep the op queue between low and high | |
1283 | OPTION(filestore_queue_low_threshhold, OPT_DOUBLE, 0.3) | |
1284 | OPTION(filestore_queue_high_threshhold, OPT_DOUBLE, 0.9) | |
1285 | ||
1286 | OPTION(filestore_op_threads, OPT_INT, 2) | |
1287 | OPTION(filestore_op_thread_timeout, OPT_INT, 60) | |
1288 | OPTION(filestore_op_thread_suicide_timeout, OPT_INT, 180) | |
1289 | OPTION(filestore_commit_timeout, OPT_FLOAT, 600) | |
1290 | OPTION(filestore_fiemap_threshold, OPT_INT, 4096) | |
1291 | OPTION(filestore_merge_threshold, OPT_INT, 10) | |
1292 | OPTION(filestore_split_multiple, OPT_INT, 2) | |
1293 | OPTION(filestore_update_to, OPT_INT, 1000) | |
1294 | OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor | |
1295 | OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size | |
1296 | OPTION(filestore_fd_cache_shards, OPT_INT, 16) // FD number of shards | |
1297 | OPTION(filestore_ondisk_finisher_threads, OPT_INT, 1) | |
1298 | OPTION(filestore_apply_finisher_threads, OPT_INT, 1) | |
1299 | OPTION(filestore_dump_file, OPT_STR, "") // file onto which store transaction dumps | |
1300 | OPTION(filestore_kill_at, OPT_INT, 0) // inject a failure at the n'th opportunity | |
1301 | OPTION(filestore_inject_stall, OPT_INT, 0) // artificially stall for N seconds in op queue thread | |
1302 | OPTION(filestore_fail_eio, OPT_BOOL, true) // fail/crash on EIO | |
1303 | OPTION(filestore_debug_verify_split, OPT_BOOL, false) | |
1304 | OPTION(journal_dio, OPT_BOOL, true) | |
1305 | OPTION(journal_aio, OPT_BOOL, true) | |
1306 | OPTION(journal_force_aio, OPT_BOOL, false) | |
1307 | OPTION(journal_block_size, OPT_INT, 4096) | |
1308 | ||
1309 | // max bytes to search ahead in journal searching for corruption | |
1310 | OPTION(journal_max_corrupt_search, OPT_U64, 10<<20) | |
1311 | OPTION(journal_block_align, OPT_BOOL, true) | |
1312 | OPTION(journal_write_header_frequency, OPT_U64, 0) | |
1313 | OPTION(journal_max_write_bytes, OPT_INT, 10 << 20) | |
1314 | OPTION(journal_max_write_entries, OPT_INT, 100) | |
1315 | ||
1316 | /// Target range for journal fullness | |
1317 | OPTION(journal_throttle_low_threshhold, OPT_DOUBLE, 0.6) | |
1318 | OPTION(journal_throttle_high_threshhold, OPT_DOUBLE, 0.9) | |
1319 | ||
1320 | /// Multiple over expected at high_threshhold. Defaults to 0 (disabled). | |
1321 | OPTION(journal_throttle_high_multiple, OPT_DOUBLE, 0) | |
1322 | /// Multiple over expected at max. Defaults to 0 (disabled). | |
1323 | OPTION(journal_throttle_max_multiple, OPT_DOUBLE, 0) | |
1324 | ||
1325 | OPTION(journal_align_min_size, OPT_INT, 64 << 10) // align data payloads >= this. | |
1326 | OPTION(journal_replay_from, OPT_INT, 0) | |
1327 | OPTION(journal_zero_on_create, OPT_BOOL, false) | |
1328 | OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corrupt | |
1329 | OPTION(journal_discard, OPT_BOOL, false) //using ssd disk as journal, whether support discard nouse journal-data. | |
1330 | ||
1331 | OPTION(fio_dir, OPT_STR, "/tmp/fio") // fio data directory for fio-objectstore | |
1332 | ||
1333 | OPTION(rados_mon_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from the monitor before returning an error from a rados operation. 0 means on limit. | |
1334 | OPTION(rados_osd_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from osds before returning an error from a rados operation. 0 means no limit. | |
1335 | OPTION(rados_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled | |
1336 | ||
1337 | OPTION(rbd_op_threads, OPT_INT, 1) | |
1338 | OPTION(rbd_op_thread_timeout, OPT_INT, 60) | |
1339 | OPTION(rbd_non_blocking_aio, OPT_BOOL, true) // process AIO ops from a worker thread to prevent blocking | |
1340 | OPTION(rbd_cache, OPT_BOOL, true) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0) | |
1341 | OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, true) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe | |
1342 | OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes | |
1343 | OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching | |
1344 | OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes | |
1345 | OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts | |
1346 | OPTION(rbd_cache_max_dirty_object, OPT_INT, 0) // dirty limit for objects - set to 0 for auto calculate from rbd_cache_size | |
1347 | OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false) | |
1348 | OPTION(rbd_concurrent_management_ops, OPT_INT, 10) // how many operations can be in flight for a management operation like deleting or resizing an image | |
1349 | OPTION(rbd_balance_snap_reads, OPT_BOOL, false) | |
1350 | OPTION(rbd_localize_snap_reads, OPT_BOOL, false) | |
1351 | OPTION(rbd_balance_parent_reads, OPT_BOOL, false) | |
1352 | OPTION(rbd_localize_parent_reads, OPT_BOOL, true) | |
1353 | OPTION(rbd_readahead_trigger_requests, OPT_INT, 10) // number of sequential requests necessary to trigger readahead | |
1354 | OPTION(rbd_readahead_max_bytes, OPT_LONGLONG, 512 * 1024) // set to 0 to disable readahead | |
1355 | OPTION(rbd_readahead_disable_after_bytes, OPT_LONGLONG, 50 * 1024 * 1024) // how many bytes are read in total before readahead is disabled | |
1356 | OPTION(rbd_clone_copy_on_read, OPT_BOOL, false) | |
1357 | OPTION(rbd_blacklist_on_break_lock, OPT_BOOL, true) // whether to blacklist clients whose lock was broken | |
1358 | OPTION(rbd_blacklist_expire_seconds, OPT_INT, 0) // number of seconds to blacklist - set to 0 for OSD default | |
1359 | OPTION(rbd_request_timed_out_seconds, OPT_INT, 30) // number of seconds before maint request times out | |
1360 | OPTION(rbd_skip_partial_discard, OPT_BOOL, false) // when trying to discard a range inside an object, set to true to skip zeroing the range. | |
1361 | OPTION(rbd_enable_alloc_hint, OPT_BOOL, true) // when writing a object, it will issue a hint to osd backend to indicate the expected size object need | |
1362 | OPTION(rbd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled | |
1363 | OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be validated for RBD compatibility | |
1364 | OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated | |
1365 | OPTION(rbd_auto_exclusive_lock_until_manual_request, OPT_BOOL, true) // whether to automatically acquire/release exclusive lock until it is explicitly requested, i.e. before we know the user of librbd is properly using the lock API | |
1366 | OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy | |
1367 | OPTION(rbd_mirroring_replay_delay, OPT_INT, 0) // time-delay in seconds for rbd-mirror asynchronous replication | |
1368 | ||
1369 | /* | |
1370 | * The following options change the behavior for librbd's image creation methods that | |
1371 | * don't require all of the parameters. These are provided so that older programs | |
1372 | * can take advantage of newer features without being rewritten to use new versions | |
1373 | * of the image creation functions. | |
1374 | * | |
1375 | * rbd_create()/RBD::create() are affected by all of these options. | |
1376 | * | |
1377 | * rbd_create2()/RBD::create2() and rbd_clone()/RBD::clone() are affected by: | |
1378 | * - rbd_default_order | |
1379 | * - rbd_default_stripe_count | |
1380 | * - rbd_default_stripe_size | |
1381 | * | |
1382 | * rbd_create3()/RBD::create3() and rbd_clone2/RBD::clone2() are only | |
1383 | * affected by rbd_default_order. | |
1384 | */ | |
1385 | OPTION(rbd_default_format, OPT_INT, 2) | |
1386 | OPTION(rbd_default_order, OPT_INT, 22) | |
1387 | OPTION(rbd_default_stripe_count, OPT_U64, 0) // changing requires stripingv2 feature | |
1388 | OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature | |
1389 | OPTION(rbd_default_data_pool, OPT_STR, "") // optional default pool for storing image data blocks | |
1390 | ||
1391 | /** | |
1392 | * RBD features are only applicable for v2 images. This setting accepts either | |
1393 | * an integer bitmask value or comma-delimited string of RBD feature names. | |
1394 | * This setting is always internally stored as an integer bitmask value. The | |
1395 | * mapping between feature bitmask value and feature name is as follows: | |
1396 | * | |
1397 | * +1 -> layering | |
1398 | * +2 -> striping | |
1399 | * +4 -> exclusive-lock | |
1400 | * +8 -> object-map | |
1401 | * +16 -> fast-diff | |
1402 | * +32 -> deep-flatten | |
1403 | * +64 -> journaling | |
1404 | * +128 -> data-pool | |
1405 | */ | |
1406 | SAFE_OPTION(rbd_default_features, OPT_STR, "layering,exclusive-lock,object-map,fast-diff,deep-flatten") | |
1407 | OPTION_VALIDATOR(rbd_default_features) | |
1408 | ||
1409 | OPTION(rbd_default_map_options, OPT_STR, "") // default rbd map -o / --options | |
1410 | ||
1411 | /** | |
1412 | * RBD journal options. | |
1413 | */ | |
1414 | OPTION(rbd_journal_order, OPT_U32, 24) // bits to shift to compute journal object max size, between 12 and 64 | |
1415 | OPTION(rbd_journal_splay_width, OPT_U32, 4) // number of active journal objects | |
1416 | OPTION(rbd_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds | |
1417 | OPTION(rbd_journal_object_flush_interval, OPT_INT, 0) // maximum number of pending commits per journal object | |
1418 | OPTION(rbd_journal_object_flush_bytes, OPT_INT, 0) // maximum number of pending bytes per journal object | |
1419 | OPTION(rbd_journal_object_flush_age, OPT_DOUBLE, 0) // maximum age (in seconds) for pending commits | |
1420 | OPTION(rbd_journal_pool, OPT_STR, "") // pool for journal objects | |
1421 | OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload size before splitting | |
1422 | OPTION(rbd_journal_max_concurrent_object_sets, OPT_INT, 0) // maximum number of object sets a journal client can be behind before it is automatically unregistered | |
1423 | ||
1424 | /** | |
1425 | * RBD Mirror options | |
1426 | */ | |
1427 | OPTION(rbd_mirror_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds | |
1428 | OPTION(rbd_mirror_journal_poll_age, OPT_DOUBLE, 5) // maximum age (in seconds) between successive journal polls | |
1429 | OPTION(rbd_mirror_journal_max_fetch_bytes, OPT_U32, 32768) // maximum bytes to read from each journal data object per fetch | |
1430 | OPTION(rbd_mirror_sync_point_update_age, OPT_DOUBLE, 30) // number of seconds between each update of the image sync point object number | |
1431 | OPTION(rbd_mirror_concurrent_image_syncs, OPT_U32, 5) // maximum number of image syncs in parallel | |
1432 | OPTION(rbd_mirror_pool_replayers_refresh_interval, OPT_INT, 30) // interval to refresh peers in rbd-mirror daemon | |
1433 | OPTION(rbd_mirror_delete_retry_interval, OPT_DOUBLE, 30) // interval to check and retry the failed requests in deleter | |
1434 | OPTION(rbd_mirror_image_state_check_interval, OPT_INT, 30) // interval to get images from pool watcher and set sources in replayer | |
1435 | OPTION(rbd_mirror_leader_heartbeat_interval, OPT_INT, 5) // interval (in seconds) between mirror leader heartbeats | |
1436 | OPTION(rbd_mirror_leader_max_missed_heartbeats, OPT_INT, 2) // number of missed heartbeats for non-lock owner to attempt to acquire lock | |
1437 | OPTION(rbd_mirror_leader_max_acquire_attempts_before_break, OPT_INT, 3) // number of failed attempts to acquire lock after missing heartbeats before breaking lock | |
1438 | ||
1439 | OPTION(nss_db_path, OPT_STR, "") // path to nss db | |
1440 | ||
1441 | ||
1442 | OPTION(rgw_max_chunk_size, OPT_INT, 4 * 1024 * 1024) | |
1443 | OPTION(rgw_put_obj_min_window_size, OPT_INT, 16 * 1024 * 1024) | |
1444 | OPTION(rgw_put_obj_max_window_size, OPT_INT, 64 * 1024 * 1024) | |
1445 | OPTION(rgw_max_put_size, OPT_U64, 5ULL*1024*1024*1024) | |
1446 | OPTION(rgw_max_put_param_size, OPT_U64, 1 * 1024 * 1024) // max input size for PUT requests accepting json/xml params | |
1447 | ||
1448 | /** | |
1449 | * override max bucket index shards in zone configuration (if not zero) | |
1450 | * | |
1451 | * Represents the number of shards for the bucket index object, a value of zero | |
1452 | * indicates there is no sharding. By default (no sharding, the name of the object | |
1453 | * is '.dir.{marker}', with sharding, the name is '.dir.{markder}.{sharding_id}', | |
1454 | * sharding_id is zero-based value. It is not recommended to set a too large value | |
1455 | * (e.g. thousand) as it increases the cost for bucket listing. | |
1456 | */ | |
1457 | OPTION(rgw_override_bucket_index_max_shards, OPT_U32, 0) | |
1458 | ||
1459 | /** | |
1460 | * Represents the maximum AIO pending requests for the bucket index object shards. | |
1461 | */ | |
1462 | OPTION(rgw_bucket_index_max_aio, OPT_U32, 8) | |
1463 | ||
1464 | /** | |
1465 | * whether or not the quota/gc threads should be started | |
1466 | */ | |
1467 | OPTION(rgw_enable_quota_threads, OPT_BOOL, true) | |
1468 | OPTION(rgw_enable_gc_threads, OPT_BOOL, true) | |
1469 | OPTION(rgw_enable_lc_threads, OPT_BOOL, true) | |
1470 | ||
1471 | ||
1472 | OPTION(rgw_data, OPT_STR, "/var/lib/ceph/radosgw/$cluster-$id") | |
1473 | OPTION(rgw_enable_apis, OPT_STR, "s3, s3website, swift, swift_auth, admin") | |
1474 | OPTION(rgw_cache_enabled, OPT_BOOL, true) // rgw cache enabled | |
1475 | OPTION(rgw_cache_lru_size, OPT_INT, 10000) // num of entries in rgw cache | |
1476 | OPTION(rgw_socket_path, OPT_STR, "") // path to unix domain socket, if not specified, rgw will not run as external fcgi | |
1477 | OPTION(rgw_host, OPT_STR, "") // host for radosgw, can be an IP, default is 0.0.0.0 | |
1478 | OPTION(rgw_port, OPT_STR, "") // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi | |
1479 | OPTION(rgw_dns_name, OPT_STR, "") // hostname suffix on buckets | |
1480 | OPTION(rgw_dns_s3website_name, OPT_STR, "") // hostname suffix on buckets for s3-website endpoint | |
1481 | OPTION(rgw_content_length_compat, OPT_BOOL, false) // Check both HTTP_CONTENT_LENGTH and CONTENT_LENGTH in fcgi env | |
1482 | OPTION(rgw_lifecycle_work_time, OPT_STR, "00:00-06:00") //job process lc at 00:00-06:00s | |
1483 | OPTION(rgw_lc_lock_max_time, OPT_INT, 60) // total run time for a single lc processor work | |
1484 | OPTION(rgw_lc_max_objs, OPT_INT, 32) | |
1485 | OPTION(rgw_lc_debug_interval, OPT_INT, -1) // Debug run interval, in seconds | |
1486 | OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request | |
1487 | OPTION(rgw_request_uri, OPT_STR, "") // alternative value for REQUEST_URI if not set in request | |
1488 | OPTION(rgw_swift_url, OPT_STR, "") // the swift url, being published by the internal swift auth | |
1489 | OPTION(rgw_swift_url_prefix, OPT_STR, "swift") // entry point for which a url is considered a swift url | |
1490 | OPTION(rgw_swift_auth_url, OPT_STR, "") // default URL to go and verify tokens for v1 auth (if not using internal swift auth) | |
1491 | OPTION(rgw_swift_auth_entry, OPT_STR, "auth") // entry point for which a url is considered a swift auth url | |
1492 | OPTION(rgw_swift_tenant_name, OPT_STR, "") // tenant name to use for swift access | |
1493 | OPTION(rgw_swift_account_in_url, OPT_BOOL, false) // assume that URL always contain the account (aka tenant) part | |
1494 | OPTION(rgw_swift_enforce_content_length, OPT_BOOL, false) // enforce generation of Content-Length even in cost of performance or scalability | |
1495 | OPTION(rgw_keystone_url, OPT_STR, "") // url for keystone server | |
1496 | OPTION(rgw_keystone_admin_token, OPT_STR, "") // keystone admin token (shared secret) | |
1497 | OPTION(rgw_keystone_admin_user, OPT_STR, "") // keystone admin user name | |
1498 | OPTION(rgw_keystone_admin_password, OPT_STR, "") // keystone admin user password | |
1499 | OPTION(rgw_keystone_admin_tenant, OPT_STR, "") // keystone admin user tenant (for keystone v2.0) | |
1500 | OPTION(rgw_keystone_admin_project, OPT_STR, "") // keystone admin user project (for keystone v3) | |
1501 | OPTION(rgw_keystone_admin_domain, OPT_STR, "") // keystone admin user domain | |
1502 | OPTION(rgw_keystone_barbican_user, OPT_STR, "") // keystone user to access barbican secrets | |
1503 | OPTION(rgw_keystone_barbican_password, OPT_STR, "") // keystone password for barbican user | |
1504 | OPTION(rgw_keystone_barbican_tenant, OPT_STR, "") // keystone barbican user tenant (for keystone v2.0) | |
1505 | OPTION(rgw_keystone_barbican_project, OPT_STR, "") // keystone barbican user project (for keystone v3) | |
1506 | OPTION(rgw_keystone_barbican_domain, OPT_STR, "") // keystone barbican user domain | |
1507 | OPTION(rgw_keystone_api_version, OPT_INT, 2) // Version of Keystone API to use (2 or 3) | |
1508 | OPTION(rgw_keystone_accepted_roles, OPT_STR, "Member, admin") // roles required to serve requests | |
1509 | OPTION(rgw_keystone_accepted_admin_roles, OPT_STR, "") // list of roles allowing an user to gain admin privileges | |
1510 | OPTION(rgw_keystone_token_cache_size, OPT_INT, 10000) // max number of entries in keystone token cache | |
1511 | OPTION(rgw_keystone_revocation_interval, OPT_INT, 15 * 60) // seconds between tokens revocation check | |
1512 | OPTION(rgw_keystone_verify_ssl, OPT_BOOL, true) // should we try to verify keystone's ssl | |
1513 | OPTION(rgw_keystone_implicit_tenants, OPT_BOOL, false) // create new users in their own tenants of the same name | |
1514 | OPTION(rgw_cross_domain_policy, OPT_STR, "<allow-access-from domain=\"*\" secure=\"false\" />") | |
1515 | OPTION(rgw_healthcheck_disabling_path, OPT_STR, "") // path that existence causes the healthcheck to respond 503 | |
1516 | OPTION(rgw_s3_auth_use_rados, OPT_BOOL, true) // should we try to use the internal credentials for s3? | |
1517 | OPTION(rgw_s3_auth_use_keystone, OPT_BOOL, false) // should we try to use keystone for s3? | |
1518 | OPTION(rgw_s3_auth_aws4_force_boto2_compat, OPT_BOOL, true) // force aws4 auth boto2 compatibility | |
1519 | OPTION(rgw_barbican_url, OPT_STR, "") // url for barbican server | |
1520 | ||
1521 | /* OpenLDAP-style LDAP parameter strings */ | |
1522 | /* rgw_ldap_uri space-separated list of LDAP servers in URI format */ | |
1523 | OPTION(rgw_ldap_uri, OPT_STR, "ldaps://<ldap.your.domain>") | |
1524 | /* rgw_ldap_binddn LDAP entry RGW will bind with (user match) */ | |
1525 | OPTION(rgw_ldap_binddn, OPT_STR, "uid=admin,cn=users,dc=example,dc=com") | |
1526 | /* rgw_ldap_searchdn LDAP search base (basedn) */ | |
1527 | OPTION(rgw_ldap_searchdn, OPT_STR, "cn=users,cn=accounts,dc=example,dc=com") | |
1528 | /* rgw_ldap_dnattr LDAP attribute containing RGW user names (to form binddns)*/ | |
1529 | OPTION(rgw_ldap_dnattr, OPT_STR, "uid") | |
1530 | /* rgw_ldap_secret file containing credentials for rgw_ldap_binddn */ | |
1531 | OPTION(rgw_ldap_secret, OPT_STR, "/etc/openldap/secret") | |
1532 | /* rgw_s3_auth_use_ldap use LDAP for RGW auth? */ | |
1533 | OPTION(rgw_s3_auth_use_ldap, OPT_BOOL, false) | |
1534 | /* rgw_ldap_searchfilter LDAP search filter */ | |
1535 | OPTION(rgw_ldap_searchfilter, OPT_STR, "") | |
1536 | ||
1537 | OPTION(rgw_admin_entry, OPT_STR, "admin") // entry point for which a url is considered an admin request | |
1538 | OPTION(rgw_enforce_swift_acls, OPT_BOOL, true) | |
1539 | OPTION(rgw_swift_token_expiration, OPT_INT, 24 * 3600) // time in seconds for swift token expiration | |
1540 | OPTION(rgw_print_continue, OPT_BOOL, true) // enable if 100-Continue works | |
1541 | OPTION(rgw_print_prohibited_content_length, OPT_BOOL, false) // violate RFC 7230 and send Content-Length in 204 and 304 | |
1542 | OPTION(rgw_remote_addr_param, OPT_STR, "REMOTE_ADDR") // e.g. X-Forwarded-For, if you have a reverse proxy | |
1543 | OPTION(rgw_op_thread_timeout, OPT_INT, 10*60) | |
1544 | OPTION(rgw_op_thread_suicide_timeout, OPT_INT, 0) | |
1545 | OPTION(rgw_thread_pool_size, OPT_INT, 100) | |
1546 | OPTION(rgw_num_control_oids, OPT_INT, 8) | |
1547 | OPTION(rgw_num_rados_handles, OPT_U32, 1) | |
1548 | ||
1549 | /* The following are tunables for caches of RGW NFS (and other file | |
1550 | * client) objects. | |
1551 | * | |
1552 | * The file handle cache is a partitioned hash table | |
1553 | * (fhcache_partitions), each with a closed hash part and backing | |
1554 | * b-tree mapping. The number of partions is expected to be a small | |
1555 | * prime, the cache size something larger but less than 5K, the total | |
1556 | * size of the cache is n_part * cache_size. | |
1557 | */ | |
1558 | OPTION(rgw_nfs_lru_lanes, OPT_INT, 5) | |
1559 | OPTION(rgw_nfs_lru_lane_hiwat, OPT_INT, 911) | |
1560 | OPTION(rgw_nfs_fhcache_partitions, OPT_INT, 3) | |
1561 | OPTION(rgw_nfs_fhcache_size, OPT_INT, 2017) /* 3*2017=6051 */ | |
1562 | OPTION(rgw_nfs_namespace_expire_secs, OPT_INT, 300) /* namespace invalidate | |
1563 | * timer */ | |
1564 | OPTION(rgw_nfs_max_gc, OPT_INT, 300) /* max gc events per cycle */ | |
1565 | OPTION(rgw_nfs_write_completion_interval_s, OPT_INT, 10) /* stateless (V3) | |
1566 | * commit | |
1567 | * delay */ | |
1568 | ||
1569 | OPTION(rgw_zone, OPT_STR, "") // zone name | |
1570 | OPTION(rgw_zone_root_pool, OPT_STR, ".rgw.root") // pool where zone specific info is stored | |
1571 | OPTION(rgw_default_zone_info_oid, OPT_STR, "default.zone") // oid where default zone info is stored | |
1572 | OPTION(rgw_region, OPT_STR, "") // region name | |
1573 | OPTION(rgw_region_root_pool, OPT_STR, ".rgw.root") // pool where all region info is stored | |
1574 | OPTION(rgw_default_region_info_oid, OPT_STR, "default.region") // oid where default region info is stored | |
1575 | OPTION(rgw_zonegroup, OPT_STR, "") // zone group name | |
1576 | OPTION(rgw_zonegroup_root_pool, OPT_STR, ".rgw.root") // pool where all zone group info is stored | |
1577 | OPTION(rgw_default_zonegroup_info_oid, OPT_STR, "default.zonegroup") // oid where default zone group info is stored | |
1578 | OPTION(rgw_realm, OPT_STR, "") // realm name | |
1579 | OPTION(rgw_realm_root_pool, OPT_STR, ".rgw.root") // pool where all realm info is stored | |
1580 | OPTION(rgw_default_realm_info_oid, OPT_STR, "default.realm") // oid where default realm info is stored | |
1581 | OPTION(rgw_period_root_pool, OPT_STR, ".rgw.root") // pool where all period info is stored | |
1582 | OPTION(rgw_period_latest_epoch_info_oid, OPT_STR, ".latest_epoch") // oid where current period info is stored | |
1583 | OPTION(rgw_log_nonexistent_bucket, OPT_BOOL, false) | |
1584 | OPTION(rgw_log_object_name, OPT_STR, "%Y-%m-%d-%H-%i-%n") // man date to see codes (a subset are supported) | |
1585 | OPTION(rgw_log_object_name_utc, OPT_BOOL, false) | |
1586 | OPTION(rgw_usage_max_shards, OPT_INT, 32) | |
1587 | OPTION(rgw_usage_max_user_shards, OPT_INT, 1) | |
1588 | OPTION(rgw_enable_ops_log, OPT_BOOL, false) // enable logging every rgw operation | |
1589 | OPTION(rgw_enable_usage_log, OPT_BOOL, false) // enable logging bandwidth usage | |
1590 | OPTION(rgw_ops_log_rados, OPT_BOOL, true) // whether ops log should go to rados | |
1591 | OPTION(rgw_ops_log_socket_path, OPT_STR, "") // path to unix domain socket where ops log can go | |
1592 | OPTION(rgw_ops_log_data_backlog, OPT_INT, 5 << 20) // max data backlog for ops log | |
1593 | OPTION(rgw_fcgi_socket_backlog, OPT_INT, 1024) // socket backlog for fcgi | |
1594 | OPTION(rgw_usage_log_flush_threshold, OPT_INT, 1024) // threshold to flush pending log data | |
1595 | OPTION(rgw_usage_log_tick_interval, OPT_INT, 30) // flush pending log data every X seconds | |
1596 | OPTION(rgw_intent_log_object_name, OPT_STR, "%Y-%m-%d-%i-%n") // man date to see codes (a subset are supported) | |
1597 | OPTION(rgw_intent_log_object_name_utc, OPT_BOOL, false) | |
1598 | OPTION(rgw_init_timeout, OPT_INT, 300) // time in seconds | |
1599 | OPTION(rgw_mime_types_file, OPT_STR, "/etc/mime.types") | |
1600 | OPTION(rgw_gc_max_objs, OPT_INT, 32) | |
1601 | OPTION(rgw_gc_obj_min_wait, OPT_INT, 2 * 3600) // wait time before object may be handled by gc | |
1602 | OPTION(rgw_gc_processor_max_time, OPT_INT, 3600) // total run time for a single gc processor work | |
1603 | OPTION(rgw_gc_processor_period, OPT_INT, 3600) // gc processor cycle time | |
1604 | OPTION(rgw_s3_success_create_obj_status, OPT_INT, 0) // alternative success status response for create-obj (0 - default) | |
1605 | OPTION(rgw_resolve_cname, OPT_BOOL, false) // should rgw try to resolve hostname as a dns cname record | |
1606 | OPTION(rgw_obj_stripe_size, OPT_INT, 4 << 20) | |
1607 | OPTION(rgw_extended_http_attrs, OPT_STR, "") // list of extended attrs that can be set on objects (beyond the default) | |
1608 | OPTION(rgw_exit_timeout_secs, OPT_INT, 120) // how many seconds to wait for process to go down before exiting unconditionally | |
1609 | OPTION(rgw_get_obj_window_size, OPT_INT, 16 << 20) // window size in bytes for single get obj request | |
1610 | OPTION(rgw_get_obj_max_req_size, OPT_INT, 4 << 20) // max length of a single get obj rados op | |
1611 | OPTION(rgw_relaxed_s3_bucket_names, OPT_BOOL, false) // enable relaxed bucket name rules for US region buckets | |
1612 | OPTION(rgw_defer_to_bucket_acls, OPT_STR, "") // if the user has bucket perms, use those before key perms (recurse and full_control) | |
1613 | OPTION(rgw_list_buckets_max_chunk, OPT_INT, 1000) // max buckets to retrieve in a single op when listing user buckets | |
1614 | OPTION(rgw_md_log_max_shards, OPT_INT, 64) // max shards for metadata log | |
1615 | OPTION(rgw_num_zone_opstate_shards, OPT_INT, 128) // max shards for keeping inter-region copy progress info | |
1616 | OPTION(rgw_opstate_ratelimit_sec, OPT_INT, 30) // min time between opstate updates on a single upload (0 for disabling ratelimit) | |
1617 | OPTION(rgw_curl_wait_timeout_ms, OPT_INT, 1000) // timeout for certain curl calls | |
1618 | OPTION(rgw_copy_obj_progress, OPT_BOOL, true) // should dump progress during long copy operations? | |
1619 | OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT, 1024 * 1024) // min bytes between copy progress output | |
1620 | OPTION(rgw_obj_tombstone_cache_size, OPT_INT, 1000) // how many objects in tombstone cache, which is used in multi-zone sync to keep | |
1621 | // track of removed objects' mtime | |
1622 | ||
1623 | OPTION(rgw_data_log_window, OPT_INT, 30) // data log entries window (in seconds) | |
1624 | OPTION(rgw_data_log_changes_size, OPT_INT, 1000) // number of in-memory entries to hold for data changes log | |
1625 | OPTION(rgw_data_log_num_shards, OPT_INT, 128) // number of objects to keep data changes log on | |
1626 | OPTION(rgw_data_log_obj_prefix, OPT_STR, "data_log") // | |
1627 | OPTION(rgw_replica_log_obj_prefix, OPT_STR, "replica_log") // | |
1628 | ||
1629 | OPTION(rgw_bucket_quota_ttl, OPT_INT, 600) // time for cached bucket stats to be cached within rgw instance | |
1630 | OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE, 0.95) // threshold from which we don't rely on cached info for quota decisions | |
1631 | OPTION(rgw_bucket_quota_cache_size, OPT_INT, 10000) // number of entries in bucket quota cache | |
1632 | OPTION(rgw_bucket_default_quota_max_objects, OPT_INT, -1) // number of objects allowed | |
1633 | OPTION(rgw_bucket_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes | |
1634 | ||
1635 | OPTION(rgw_expose_bucket, OPT_BOOL, false) // Return the bucket name in the 'Bucket' response header | |
1636 | ||
1637 | OPTION(rgw_frontends, OPT_STR, "fastcgi, civetweb port=7480") // rgw front ends | |
1638 | ||
1639 | OPTION(rgw_user_quota_bucket_sync_interval, OPT_INT, 180) // time period for accumulating modified buckets before syncing stats | |
1640 | OPTION(rgw_user_quota_sync_interval, OPT_INT, 3600 * 24) // time period for accumulating modified buckets before syncing entire user stats | |
1641 | OPTION(rgw_user_quota_sync_idle_users, OPT_BOOL, false) // whether stats for idle users be fully synced | |
1642 | OPTION(rgw_user_quota_sync_wait_time, OPT_INT, 3600 * 24) // min time between two full stats sync for non-idle users | |
1643 | OPTION(rgw_user_default_quota_max_objects, OPT_INT, -1) // number of objects allowed | |
1644 | OPTION(rgw_user_default_quota_max_size, OPT_LONGLONG, -1) // Max size of object in bytes | |
1645 | ||
1646 | OPTION(rgw_multipart_min_part_size, OPT_INT, 5 * 1024 * 1024) // min size for each part (except for last one) in multipart upload | |
1647 | OPTION(rgw_multipart_part_upload_limit, OPT_INT, 10000) // parts limit in multipart upload | |
1648 | ||
1649 | OPTION(rgw_max_slo_entries, OPT_INT, 1000) // default number of max entries in slo | |
1650 | ||
1651 | OPTION(rgw_olh_pending_timeout_sec, OPT_INT, 3600) // time until we retire a pending olh change | |
1652 | OPTION(rgw_user_max_buckets, OPT_INT, 1000) // global option to set max buckets count for all user | |
1653 | ||
1654 | OPTION(rgw_objexp_gc_interval, OPT_U32, 60 * 10) // maximum time between round of expired objects garbage collecting | |
1655 | OPTION(rgw_objexp_time_step, OPT_U32, 4096) // number of seconds for rounding the timestamps | |
1656 | OPTION(rgw_objexp_hints_num_shards, OPT_U32, 127) // maximum number of parts in which the hint index is stored in | |
1657 | OPTION(rgw_objexp_chunk_size, OPT_U32, 100) // maximum number of entries in a single operation when processing objexp data | |
1658 | ||
1659 | OPTION(rgw_enable_static_website, OPT_BOOL, false) // enable static website feature | |
1660 | OPTION(rgw_log_http_headers, OPT_STR, "" ) // list of HTTP headers to log when seen, ignores case (e.g., http_x_forwarded_for | |
1661 | ||
1662 | OPTION(rgw_num_async_rados_threads, OPT_INT, 32) // num of threads to use for async rados operations | |
1663 | OPTION(rgw_md_notify_interval_msec, OPT_INT, 200) // metadata changes notification interval to followers | |
1664 | OPTION(rgw_run_sync_thread, OPT_BOOL, true) // whether radosgw (not radosgw-admin) spawns the sync thread | |
1665 | OPTION(rgw_sync_lease_period, OPT_INT, 120) // time in second for lease that rgw takes on a specific log (or log shard) | |
1666 | OPTION(rgw_sync_log_trim_interval, OPT_INT, 1200) // time in seconds between attempts to trim sync logs | |
1667 | ||
1668 | OPTION(rgw_sync_data_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1] | |
1669 | OPTION(rgw_sync_meta_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1] | |
1670 | ||
1671 | ||
1672 | OPTION(rgw_period_push_interval, OPT_DOUBLE, 2) // seconds to wait before retrying "period push" | |
1673 | OPTION(rgw_period_push_interval_max, OPT_DOUBLE, 30) // maximum interval after exponential backoff | |
1674 | ||
1675 | OPTION(rgw_safe_max_objects_per_shard, OPT_INT, 100*1024) // safe max loading | |
1676 | OPTION(rgw_shard_warning_threshold, OPT_DOUBLE, 90) // pct of safe max | |
1677 | // at which to warn | |
1678 | ||
1679 | OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled | |
1680 | ||
1681 | OPTION(mgr_module_path, OPT_STR, CEPH_PKGLIBDIR "/mgr") // where to load python modules from | |
1682 | OPTION(mgr_modules, OPT_STR, "rest") // Which modules to load | |
1683 | OPTION(mgr_data, OPT_STR, "/var/lib/ceph/mgr/$cluster-$id") // where to find keyring etc | |
1684 | OPTION(mgr_beacon_period, OPT_INT, 5) // How frequently to send beacon | |
1685 | OPTION(mgr_stats_period, OPT_INT, 5) // How frequently to send stats | |
1686 | OPTION(mgr_client_bytes, OPT_U64, 128*1048576) // bytes from clients | |
1687 | OPTION(mgr_client_messages, OPT_U64, 512) // messages from clients | |
1688 | OPTION(mgr_osd_bytes, OPT_U64, 512*1048576) // bytes from osds | |
1689 | OPTION(mgr_osd_messages, OPT_U64, 8192) // messages from osds | |
1690 | OPTION(mgr_mds_bytes, OPT_U64, 128*1048576) // bytes from mdss | |
1691 | OPTION(mgr_mds_messages, OPT_U64, 128) // messages from mdss | |
1692 | OPTION(mgr_mon_bytes, OPT_U64, 128*1048576) // bytes from mons | |
1693 | OPTION(mgr_mon_messages, OPT_U64, 128) // messages from mons | |
1694 | ||
1695 | OPTION(mgr_connect_retry_interval, OPT_DOUBLE, 1.0) | |
1696 | ||
1697 | OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests | |
1698 | OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover | |
1699 | OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR | |
1700 | OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl | |
1701 | OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects | |
1702 | OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms | |
1703 | // defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg==" | |
1704 | OPTION(rgw_crypt_suppress_logs, OPT_BOOL, true) // suppress logs that might print customer key | |
1705 | OPTION(rgw_list_bucket_min_readahead, OPT_INT, 1000) // minimum number of entries to read from rados for bucket listing | |
1706 | ||
1707 | OPTION(rgw_rest_getusage_op_compat, OPT_BOOL, false) // dump description of total stats for s3 GetUsage API | |
1708 | ||
1709 | OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter | |
1710 | OPTION(throttler_perf_counter, OPT_BOOL, true) // enable/disable throttler perf counter | |
1711 | ||
1712 | /* The following are tunables for torrent data */ | |
1713 | OPTION(rgw_torrent_flag, OPT_BOOL, false) // produce torrent function flag | |
1714 | OPTION(rgw_torrent_tracker, OPT_STR, "") // torrent field annouce and annouce list | |
1715 | OPTION(rgw_torrent_createby, OPT_STR, "") // torrent field created by | |
1716 | OPTION(rgw_torrent_comment, OPT_STR, "") // torrent field comment | |
1717 | OPTION(rgw_torrent_encoding, OPT_STR, "") // torrent field encoding | |
1718 | OPTION(rgw_torrent_origin, OPT_STR, "") // torrent origin | |
1719 | OPTION(rgw_torrent_sha_unit, OPT_INT, 512*1024) // torrent field piece length 512K | |
1720 | ||
1721 | OPTION(event_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled | |
1722 | ||
1723 | // This will be set to true when it is safe to start threads. | |
1724 | // Once it is true, it will never change. | |
1725 | OPTION(internal_safe_to_start_threads, OPT_BOOL, false) | |
1726 | ||
1727 | OPTION(debug_deliberately_leak_memory, OPT_BOOL, false) | |
1728 | ||
1729 | OPTION(rgw_swift_custom_header, OPT_STR, "") // option to enable swift custom headers |