]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/options/osd.yaml.in
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / common / options / osd.yaml.in
CommitLineData
20effc67
TL
1# -*- mode: YAML -*-
2---
3
4options:
5- name: osd_numa_prefer_iface
6 type: bool
7 level: advanced
8 desc: prefer IP on network interface on same numa node as storage
9 default: true
10 see_also:
11 - osd_numa_auto_affinity
12 flags:
13 - startup
14- name: osd_numa_auto_affinity
15 type: bool
16 level: advanced
17 desc: automatically set affinity to numa node when storage and network match
18 default: true
19 flags:
20 - startup
21- name: osd_numa_node
22 type: int
23 level: advanced
24 desc: set affinity to a numa node (-1 for none)
25 default: -1
26 see_also:
27 - osd_numa_auto_affinity
28 flags:
29 - startup
1e59de90
TL
30- name: set_keepcaps
31 type: bool
32 level: advanced
33 desc: set the keepcaps flag before changing UID, preserving the permitted capability set
34 long_desc: When ceph switches from root to the ceph uid, all capabilities in all sets are eraseed. If
35 a component that is capability aware needs a specific capability, the keepcaps flag maintains
36 the permitted capability set, allowing the capabilities in the effective set to be activated as needed.
37 default: false
38 flags:
39 - startup
20effc67
TL
40- name: osd_smart_report_timeout
41 type: uint
42 level: advanced
1e59de90 43 desc: Timeout (in seconds) for smartctl to run, default is set to 5
20effc67
TL
44 default: 5
45# verify backend can support configured max object name length
46- name: osd_check_max_object_name_len_on_startup
47 type: bool
48 level: dev
49 default: true
50 with_legacy: true
51- name: osd_max_backfills
52 type: uint
53 level: advanced
54 desc: Maximum number of concurrent local and remote backfills or recoveries per
55 OSD
56 long_desc: There can be osd_max_backfills local reservations AND the same remote
57 reservations per OSD. So a value of 1 lets this OSD participate as 1 PG primary
58 in recovery and 1 shard of another recovering PG.
59 fmt_desc: The maximum number of backfills allowed to or from a single OSD.
60 Note that this is applied separately for read and write operations.
61 default: 1
62 flags:
63 - runtime
64 with_legacy: true
65# Minimum recovery priority (255 = max, smaller = lower)
66- name: osd_min_recovery_priority
67 type: int
68 level: advanced
69 desc: Minimum priority below which recovery is not performed
70 long_desc: The purpose here is to prevent the cluster from doing *any* lower priority
71 work (e.g., rebalancing) below this threshold and focus solely on higher priority
72 work (e.g., replicating degraded objects).
73 default: 0
74 with_legacy: true
75- name: osd_backfill_retry_interval
76 type: float
77 level: advanced
78 desc: how frequently to retry backfill reservations after being denied (e.g., due
79 to a full OSD)
80 fmt_desc: The number of seconds to wait before retrying backfill requests.
81 default: 30
82 with_legacy: true
83- name: osd_recovery_retry_interval
84 type: float
85 level: advanced
86 desc: how frequently to retry recovery reservations after being denied (e.g., due
87 to a full OSD)
88 default: 30
89 with_legacy: true
90- name: osd_recovery_sleep
91 type: float
92 level: advanced
1e59de90
TL
93 desc: Time in seconds to sleep before next recovery or backfill op. This setting
94 overrides _ssd, _hdd, and _hybrid if non-zero.
20effc67
TL
95 fmt_desc: Time in seconds to sleep before the next recovery or backfill op.
96 Increasing this value will slow down recovery operation while
97 client operations will be less impacted.
98 default: 0
99 flags:
100 - runtime
101 with_legacy: true
102- name: osd_recovery_sleep_hdd
103 type: float
104 level: advanced
105 desc: Time in seconds to sleep before next recovery or backfill op for HDDs
106 fmt_desc: Time in seconds to sleep before next recovery or backfill op
107 for HDDs.
108 default: 0.1
109 flags:
110 - runtime
111 with_legacy: true
112- name: osd_recovery_sleep_ssd
113 type: float
114 level: advanced
115 desc: Time in seconds to sleep before next recovery or backfill op for SSDs
116 fmt_desc: Time in seconds to sleep before the next recovery or backfill op
117 for SSDs.
118 default: 0
119 see_also:
120 - osd_recovery_sleep
121 flags:
122 - runtime
123 with_legacy: true
124- name: osd_recovery_sleep_hybrid
125 type: float
126 level: advanced
127 desc: Time in seconds to sleep before next recovery or backfill op when data is
128 on HDD and journal is on SSD
129 fmt_desc: Time in seconds to sleep before the next recovery or backfill op
130 when OSD data is on HDD and OSD journal / WAL+DB is on SSD.
131 default: 0.025
132 see_also:
133 - osd_recovery_sleep
134 flags:
135 - runtime
136- name: osd_snap_trim_sleep
137 type: float
138 level: advanced
1e59de90
TL
139 desc: Time in seconds to sleep before next snap trim. This setting overrides _ssd,
140 _hdd, and _hybrid if non-zero.
20effc67
TL
141 fmt_desc: Time in seconds to sleep before next snap trim op.
142 Increasing this value will slow down snap trimming.
143 This option overrides backend specific variants.
144 default: 0
145 flags:
146 - runtime
147 with_legacy: true
148- name: osd_snap_trim_sleep_hdd
149 type: float
150 level: advanced
151 desc: Time in seconds to sleep before next snap trim for HDDs
152 default: 5
153 flags:
154 - runtime
155- name: osd_snap_trim_sleep_ssd
156 type: float
157 level: advanced
158 desc: Time in seconds to sleep before next snap trim for SSDs
159 fmt_desc: Time in seconds to sleep before next snap trim op
160 for SSD OSDs (including NVMe).
161 default: 0
162 flags:
163 - runtime
164- name: osd_snap_trim_sleep_hybrid
165 type: float
166 level: advanced
167 desc: Time in seconds to sleep before next snap trim when data is on HDD and journal
168 is on SSD
169 fmt_desc: Time in seconds to sleep before next snap trim op
170 when OSD data is on an HDD and the OSD journal or WAL+DB is on an SSD.
171 default: 2
172 flags:
173 - runtime
174- name: osd_scrub_invalid_stats
175 type: bool
176 level: advanced
177 default: true
178 with_legacy: true
179- name: osd_max_scrubs
180 type: int
181 level: advanced
182 desc: Maximum concurrent scrubs on a single OSD
183 fmt_desc: The maximum number of simultaneous scrub operations for
184 a Ceph OSD Daemon.
185 default: 1
186 with_legacy: true
187- name: osd_scrub_during_recovery
188 type: bool
189 level: advanced
190 desc: Allow scrubbing when PGs on the OSD are undergoing recovery
191 fmt_desc: Allow scrub during recovery. Setting this to ``false`` will disable
192 scheduling new scrub (and deep--scrub) while there is active recovery.
193 Already running scrubs will be continued. This might be useful to reduce
194 load on busy clusters.
195 default: false
196 with_legacy: true
197- name: osd_repair_during_recovery
198 type: bool
199 level: advanced
200 desc: Allow requested repairing when PGs on the OSD are undergoing recovery
201 default: false
202 with_legacy: true
203- name: osd_scrub_begin_hour
204 type: int
205 level: advanced
206 desc: Restrict scrubbing to this hour of the day or later
207 long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
208 fmt_desc: This restricts scrubbing to this hour of the day or later.
209 Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0``
210 to allow scrubbing the entire day. Along with ``osd_scrub_end_hour``, they define a time
211 window, in which the scrubs can happen.
212 But a scrub will be performed
213 no matter whether the time window allows or not, as long as the placement
214 group's scrub interval exceeds ``osd_scrub_max_interval``.
215 default: 0
216 see_also:
217 - osd_scrub_end_hour
218 min: 0
219 max: 23
220 with_legacy: true
221- name: osd_scrub_end_hour
222 type: int
223 level: advanced
224 desc: Restrict scrubbing to hours of the day earlier than this
225 long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
226 fmt_desc: This restricts scrubbing to the hour earlier than this.
227 Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0`` to allow scrubbing
228 for the entire day. Along with ``osd_scrub_begin_hour``, they define a time
229 window, in which the scrubs can happen. But a scrub will be performed
230 no matter whether the time window allows or not, as long as the placement
231 group's scrub interval exceeds ``osd_scrub_max_interval``.
232 default: 0
233 see_also:
234 - osd_scrub_begin_hour
235 min: 0
236 max: 23
237 with_legacy: true
238- name: osd_scrub_begin_week_day
239 type: int
240 level: advanced
241 desc: Restrict scrubbing to this day of the week or later
242 long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
243 for the entire week.
244 fmt_desc: This restricts scrubbing to this day of the week or later.
245 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
246 and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
247 Along with ``osd_scrub_end_week_day``, they define a time window in which
248 scrubs can happen. But a scrub will be performed
249 no matter whether the time window allows or not, when the PG's
250 scrub interval exceeds ``osd_scrub_max_interval``.
251 default: 0
252 see_also:
253 - osd_scrub_end_week_day
254 min: 0
255 max: 6
256 with_legacy: true
257- name: osd_scrub_end_week_day
258 type: int
259 level: advanced
260 desc: Restrict scrubbing to days of the week earlier than this
261 long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
262 for the entire week.
263 fmt_desc: This restricts scrubbing to days of the week earlier than this.
264 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
265 and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
266 Along with ``osd_scrub_begin_week_day``, they define a time
267 window, in which the scrubs can happen. But a scrub will be performed
268 no matter whether the time window allows or not, as long as the placement
269 group's scrub interval exceeds ``osd_scrub_max_interval``.
270 default: 0
271 see_also:
272 - osd_scrub_begin_week_day
273 min: 0
274 max: 6
275 with_legacy: true
276- name: osd_scrub_load_threshold
277 type: float
278 level: advanced
279 desc: Allow scrubbing when system load divided by number of CPUs is below this value
280 fmt_desc: The normalized maximum load. Ceph will not scrub when the system load
281 (as defined by ``getloadavg() / number of online CPUs``) is higher than this number.
282 Default is ``0.5``.
283 default: 0.5
284 with_legacy: true
285# if load is low
286- name: osd_scrub_min_interval
287 type: float
288 level: advanced
289 desc: Scrub each PG no more often than this interval
290 fmt_desc: The minimal interval in seconds for scrubbing the Ceph OSD Daemon
291 when the Ceph Storage Cluster load is low.
292 default: 1_day
293 see_also:
294 - osd_scrub_max_interval
295 with_legacy: true
296# regardless of load
297- name: osd_scrub_max_interval
298 type: float
299 level: advanced
300 desc: Scrub each PG no less often than this interval
301 fmt_desc: The maximum interval in seconds for scrubbing the Ceph OSD Daemon
302 irrespective of cluster load.
303 default: 7_day
304 see_also:
305 - osd_scrub_min_interval
306 with_legacy: true
307# randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
308- name: osd_scrub_interval_randomize_ratio
309 type: float
310 level: advanced
311 desc: Ratio of scrub interval to randomly vary
312 long_desc: This prevents a scrub 'stampede' by randomly varying the scrub intervals
313 so that they are soon uniformly distributed over the week
314 fmt_desc: Add a random delay to ``osd_scrub_min_interval`` when scheduling
315 the next scrub job for a PG. The delay is a random
316 value less than ``osd_scrub_min_interval`` \*
317 ``osd_scrub_interval_randomized_ratio``. The default setting
318 spreads scrubs throughout the allowed time
319 window of ``[1, 1.5]`` \* ``osd_scrub_min_interval``.
320 default: 0.5
321 see_also:
322 - osd_scrub_min_interval
323 with_legacy: true
324# the probability to back off the scheduled scrub
325- name: osd_scrub_backoff_ratio
326 type: float
327 level: dev
328 desc: Backoff ratio for scheduling scrubs
329 long_desc: This is the precentage of ticks that do NOT schedule scrubs, 66% means
330 that 1 out of 3 ticks will schedule scrubs
331 default: 0.66
332 with_legacy: true
333- name: osd_scrub_chunk_min
334 type: int
335 level: advanced
1e59de90 336 desc: Minimum number of objects to deep-scrub in a single chunk
20effc67
TL
337 fmt_desc: The minimal number of object store chunks to scrub during single operation.
338 Ceph blocks writes to single chunk during scrub.
339 default: 5
340 see_also:
341 - osd_scrub_chunk_max
342 with_legacy: true
343- name: osd_scrub_chunk_max
344 type: int
345 level: advanced
1e59de90 346 desc: Maximum number of objects to deep-scrub in a single chunk
20effc67
TL
347 fmt_desc: The maximum number of object store chunks to scrub during single operation.
348 default: 25
349 see_also:
350 - osd_scrub_chunk_min
351 with_legacy: true
1e59de90
TL
352- name: osd_shallow_scrub_chunk_min
353 type: int
354 level: advanced
355 desc: Minimum number of objects to scrub in a single chunk
356 fmt_desc: The minimum number of object store chunks to scrub during single operation.
357 Not applicable to deep scrubs.
358 Ceph blocks writes to single chunk during scrub.
359 default: 50
360 see_also:
361 - osd_shallow_scrub_chunk_max
362 - osd_scrub_chunk_min
363 with_legacy: true
364- name: osd_shallow_scrub_chunk_max
365 type: int
366 level: advanced
367 desc: Maximum number of objects to scrub in a single chunk
368 fmt_desc: The maximum number of object store chunks to scrub during single operation.
369 Not applicable to deep scrubs.
370 default: 100
371 see_also:
372 - osd_shallow_scrub_chunk_min
373 - osd_scrub_chunk_max
374 with_legacy: true
20effc67
TL
375# sleep between [deep]scrub ops
376- name: osd_scrub_sleep
377 type: float
378 level: advanced
379 desc: Duration to inject a delay during scrubbing
380 fmt_desc: Time to sleep before scrubbing the next group of chunks. Increasing this value will slow
381 down the overall rate of scrubbing so that client operations will be less impacted.
382 default: 0
383 flags:
384 - runtime
385 with_legacy: true
386# more sleep between [deep]scrub ops
387- name: osd_scrub_extended_sleep
388 type: float
389 level: advanced
390 desc: Duration to inject a delay during scrubbing out of scrubbing hours
391 default: 0
392 see_also:
393 - osd_scrub_begin_hour
394 - osd_scrub_end_hour
395 - osd_scrub_begin_week_day
396 - osd_scrub_end_week_day
397 with_legacy: true
398# whether auto-repair inconsistencies upon deep-scrubbing
399- name: osd_scrub_auto_repair
400 type: bool
401 level: advanced
402 desc: Automatically repair damaged objects detected during scrub
403 fmt_desc: Setting this to ``true`` will enable automatic PG repair when errors
404 are found by scrubs or deep-scrubs. However, if more than
405 ``osd_scrub_auto_repair_num_errors`` errors are found a repair is NOT performed.
406 default: false
407 with_legacy: true
408# only auto-repair when number of errors is below this threshold
409- name: osd_scrub_auto_repair_num_errors
410 type: uint
411 level: advanced
412 desc: Maximum number of detected errors to automatically repair
413 fmt_desc: Auto repair will not occur if more than this many errors are found.
414 default: 5
415 see_also:
416 - osd_scrub_auto_repair
417 with_legacy: true
418- name: osd_scrub_max_preemptions
419 type: uint
420 level: advanced
421 desc: Set the maximum number of times we will preempt a deep scrub due to a client
422 operation before blocking client IO to complete the scrub
423 default: 5
424 min: 0
425 max: 30
426- name: osd_deep_scrub_interval
427 type: float
428 level: advanced
429 desc: Deep scrub each PG (i.e., verify data checksums) at least this often
430 fmt_desc: The interval for "deep" scrubbing (fully reading all data). The
431 ``osd_scrub_load_threshold`` does not affect this setting.
432 default: 7_day
433 with_legacy: true
434- name: osd_deep_scrub_randomize_ratio
435 type: float
436 level: advanced
437 desc: Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs
438 are deep)
439 long_desc: This prevents a deep scrub 'stampede' by spreading deep scrubs so they
440 are uniformly distributed over the week
441 default: 0.15
442 with_legacy: true
443- name: osd_deep_scrub_stride
444 type: size
445 level: advanced
446 desc: Number of bytes to read from an object at a time during deep scrub
447 fmt_desc: Read size when doing a deep scrub.
448 default: 512_K
449 with_legacy: true
450- name: osd_deep_scrub_keys
451 type: int
452 level: advanced
453 desc: Number of keys to read from an object at a time during deep scrub
454 default: 1024
455 with_legacy: true
456# objects must be this old (seconds) before we update the whole-object digest on scrub
457- name: osd_deep_scrub_update_digest_min_age
458 type: int
459 level: advanced
460 desc: Update overall object digest only if object was last modified longer ago than
461 this
462 default: 2_hr
463 with_legacy: true
464- name: osd_deep_scrub_large_omap_object_key_threshold
465 type: uint
466 level: advanced
467 desc: Warn when we encounter an object with more omap keys than this
468 default: 200000
469 services:
470 - osd
471 - mds
472 see_also:
473 - osd_deep_scrub_large_omap_object_value_sum_threshold
474 with_legacy: true
475- name: osd_deep_scrub_large_omap_object_value_sum_threshold
476 type: size
477 level: advanced
478 desc: Warn when we encounter an object with more omap key bytes than this
479 default: 1_G
480 services:
481 - osd
482 see_also:
483 - osd_deep_scrub_large_omap_object_key_threshold
484 with_legacy: true
1e59de90
TL
485# when scrubbing blocks on a locked object
486- name: osd_blocked_scrub_grace_period
487 type: int
488 level: advanced
489 desc: Time (seconds) before issuing a cluster-log warning
490 long_desc: Waiting too long for an object in the scrubbed chunk to be unlocked.
491 default: 120
492 with_legacy: true
493# timely updates to the 'pg dump' output, esp. re scrub scheduling
494- name: osd_stats_update_period_scrubbing
495 type: int
496 level: advanced
497 desc: Stats update period (seconds) when scrubbing
498 long_desc: A PG actively scrubbing (or blocked while scrubbing) publishes its
499 stats (inc. scrub/block duration) every this many seconds.
500 default: 15
501 with_legacy: false
502- name: osd_stats_update_period_not_scrubbing
503 type: int
504 level: advanced
505 desc: Stats update period (seconds) when not scrubbing
506 long_desc: A PG we are a primary of, publishes its
507 stats (inc. scrub/block duration) every this many seconds.
508 default: 120
509 with_legacy: false
510# when replicas are slow to respond to scrub resource reservations
511# Note: disable by using a very large value
512- name: osd_scrub_slow_reservation_response
513 type: millisecs
514 level: advanced
515 desc: Duration before issuing a cluster-log warning
516 long_desc: Waiting too long for a replica to respond (after at least half of the
517 replicas have responded).
518 default: 2200
519 min: 500
520 see_also:
521 - osd_scrub_reservation_timeout
522 with_legacy: false
523# when a replica does not respond to scrub resource request
524# Note: disable by using a very large value
525- name: osd_scrub_reservation_timeout
526 type: millisecs
527 level: advanced
528 desc: Duration before aborting the scrub session
529 long_desc: Waiting too long for some replicas to respond to
530 scrub reservation requests.
531 default: 5000
532 min: 2000
533 see_also:
534 - osd_scrub_slow_reservation_response
535 with_legacy: false
20effc67
TL
536# where rados plugins are stored
537- name: osd_class_dir
538 type: str
539 level: advanced
540 default: @CMAKE_INSTALL_LIBDIR@/rados-classes
541 fmt_desc: The class path for RADOS class plug-ins.
542 with_legacy: true
543- name: osd_open_classes_on_start
544 type: bool
545 level: advanced
546 default: true
547 with_legacy: true
548# list of object classes allowed to be loaded (allow all: *)
549- name: osd_class_load_list
550 type: str
551 level: advanced
552 default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
553 user version cas cmpomap queue 2pc_queue fifo
554 with_legacy: true
555# list of object classes with default execute perm (allow all: *)
556- name: osd_class_default_list
557 type: str
558 level: advanced
559 default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
560 user version cas cmpomap queue 2pc_queue fifo
561 with_legacy: true
562- name: osd_agent_max_ops
563 type: int
564 level: advanced
565 desc: maximum concurrent tiering operations for tiering agent
566 fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
567 in the high speed mode.
568 default: 4
569 with_legacy: true
570- name: osd_agent_max_low_ops
571 type: int
572 level: advanced
573 desc: maximum concurrent low-priority tiering operations for tiering agent
574 fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
575 in the low speed mode.
576 default: 2
577 with_legacy: true
578- name: osd_agent_min_evict_effort
579 type: float
580 level: advanced
581 desc: minimum effort to expend evicting clean objects
582 default: 0.1
583 min: 0
584 max: 0.99
585 with_legacy: true
586- name: osd_agent_quantize_effort
587 type: float
588 level: advanced
589 desc: size of quantize unit for eviction effort
590 default: 0.1
591 with_legacy: true
592- name: osd_agent_delay_time
593 type: float
594 level: advanced
595 desc: how long agent should sleep if it has no work to do
596 default: 5
597 with_legacy: true
598# decay atime and hist histograms after how many objects go by
599- name: osd_agent_hist_halflife
600 type: int
601 level: advanced
602 desc: halflife of agent atime and temp histograms
603 default: 1000
604 with_legacy: true
605# decay atime and hist histograms after how many objects go by
606- name: osd_agent_slop
607 type: float
608 level: advanced
609 desc: slop factor to avoid switching tiering flush and eviction mode
610 default: 0.02
611 with_legacy: true
612- name: osd_find_best_info_ignore_history_les
613 type: bool
614 level: dev
615 desc: ignore last_epoch_started value when peering AND PROBABLY LOSE DATA
616 long_desc: THIS IS AN EXTREMELY DANGEROUS OPTION THAT SHOULD ONLY BE USED AT THE
617 DIRECTION OF A DEVELOPER. It makes peering ignore the last_epoch_started value
618 when peering, which can allow the OSD to believe an OSD has an authoritative view
619 of a PG's contents even when it is in fact old and stale, typically leading to
620 data loss (by believing a stale PG is up to date).
621 default: false
622 with_legacy: true
623- name: osd_uuid
624 type: uuid
625 level: advanced
626 desc: uuid label for a new OSD
627 fmt_desc: The universally unique identifier (UUID) for the Ceph OSD Daemon.
628 note: The ``osd_uuid`` applies to a single Ceph OSD Daemon. The ``fsid``
629 applies to the entire cluster.
630 flags:
631 - create
632 with_legacy: true
633- name: osd_data
634 type: str
635 level: advanced
636 desc: path to OSD data
637 fmt_desc: The path to the OSDs data. You must create the directory when
638 deploying Ceph. You should mount a drive for OSD data at this
639 mount point. We do not recommend changing the default.
640 default: /var/lib/ceph/osd/$cluster-$id
641 flags:
642 - no_mon_update
643 with_legacy: true
644- name: osd_journal
645 type: str
646 level: advanced
647 desc: path to OSD journal (when FileStore backend is in use)
648 fmt_desc: The path to the OSD's journal. This may be a path to a file or a
649 block device (such as a partition of an SSD). If it is a file,
650 you must create the directory to contain it. We recommend using a
651 separate fast device when the ``osd_data`` drive is an HDD.
652 default: /var/lib/ceph/osd/$cluster-$id/journal
653 flags:
654 - no_mon_update
655 with_legacy: true
656- name: osd_journal_size
657 type: size
658 level: advanced
659 desc: size of FileStore journal (in MiB)
660 fmt_desc: The size of the journal in megabytes.
661 default: 5_K
662 flags:
663 - create
664 with_legacy: true
665- name: osd_journal_flush_on_shutdown
666 type: bool
667 level: advanced
668 desc: flush FileStore journal contents during clean OSD shutdown
669 default: true
670 with_legacy: true
671- name: osd_compact_on_start
672 type: bool
673 level: advanced
674 desc: compact OSD's object store's OMAP on start
675 default: false
676# flags for specific control purpose during osd mount() process.
677# e.g., can be 1 to skip over replaying journal
678# or 2 to skip over mounting omap or 3 to skip over both.
679# This might be helpful in case the journal is totally corrupted
680# and we still want to bring the osd daemon back normally, etc.
681- name: osd_os_flags
682 type: uint
683 level: dev
684 desc: flags to skip filestore omap or journal initialization
685 default: 0
686- name: osd_max_write_size
687 type: size
688 level: advanced
689 desc: Maximum size of a RADOS write operation in megabytes
690 long_desc: This setting prevents clients from doing very large writes to RADOS. If
691 you set this to a value below what clients expect, they will receive an error
692 when attempting to write to the cluster.
693 fmt_desc: The maximum size of a write in megabytes.
694 default: 90
695 min: 4
696 with_legacy: true
697- name: osd_max_pgls
698 type: uint
699 level: advanced
700 desc: maximum number of results when listing objects in a pool
701 fmt_desc: The maximum number of placement groups to list. A client
702 requesting a large number can tie up the Ceph OSD Daemon.
703 default: 1_K
704 with_legacy: true
705- name: osd_client_message_size_cap
706 type: size
707 level: advanced
708 desc: maximum memory to devote to in-flight client requests
709 long_desc: If this value is exceeded, the OSD will not read any new client data
710 off of the network until memory is freed.
711 fmt_desc: The largest client data message allowed in memory.
712 default: 500_M
713 with_legacy: true
714- name: osd_client_message_cap
715 type: uint
716 level: advanced
717 desc: maximum number of in-flight client requests
718 default: 256
719 with_legacy: true
720- name: osd_crush_update_on_start
721 type: bool
722 level: advanced
723 desc: update OSD CRUSH location on startup
724 default: true
725 with_legacy: true
726- name: osd_class_update_on_start
727 type: bool
728 level: advanced
729 desc: set OSD device class on startup
730 default: true
731 with_legacy: true
732- name: osd_crush_initial_weight
733 type: float
734 level: advanced
735 desc: if >= 0, initial CRUSH weight for newly created OSDs
736 long_desc: If this value is negative, the size of the OSD in TiB is used.
737 fmt_desc: The initial CRUSH weight for newly added OSDs. The default
738 value of this option is ``the size of a newly added OSD in TB``. By default,
739 the initial CRUSH weight for a newly added OSD is set to its device size in
740 TB. See `Weighting Bucket Items`_ for details.
741 default: -1
742 with_legacy: true
743# Allows the "peered" state for recovery and backfill below min_size
744- name: osd_allow_recovery_below_min_size
745 type: bool
746 level: dev
747 desc: allow replicated pools to recover with < min_size active members
748 default: true
749 services:
750 - osd
751 with_legacy: true
752# cap on # of inc maps we send to peers, clients
753- name: osd_map_share_max_epochs
754 type: int
755 level: advanced
756 default: 40
757 with_legacy: true
758- name: osd_map_cache_size
759 type: int
760 level: advanced
761 default: 50
762 fmt_desc: The number of OSD maps to keep cached.
763 with_legacy: true
764- name: osd_pg_epoch_max_lag_factor
765 type: float
766 level: advanced
767 desc: Max multiple of the map cache that PGs can lag before we throttle map injest
768 default: 2
769 see_also:
770 - osd_map_cache_size
771- name: osd_inject_bad_map_crc_probability
772 type: float
773 level: dev
774 default: 0
775 with_legacy: true
776- name: osd_inject_failure_on_pg_removal
777 type: bool
778 level: dev
779 default: false
780 with_legacy: true
781# shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
782- name: osd_max_markdown_period
783 type: int
784 level: advanced
785 default: 10_min
786 with_legacy: true
787- name: osd_max_markdown_count
788 type: int
789 level: advanced
790 default: 5
791 with_legacy: true
792- name: osd_op_thread_timeout
793 type: int
794 level: advanced
795 default: 15
796 fmt_desc: The Ceph OSD Daemon operation thread timeout in seconds.
797 with_legacy: true
798- name: osd_op_thread_suicide_timeout
799 type: int
800 level: advanced
801 default: 150
802 with_legacy: true
803- name: osd_op_pq_max_tokens_per_priority
804 type: uint
805 level: advanced
806 default: 4_M
807 with_legacy: true
808- name: osd_op_pq_min_cost
809 type: size
810 level: advanced
811 default: 64_K
812 with_legacy: true
813# preserve clone_overlap during recovery/migration
814- name: osd_recover_clone_overlap
815 type: bool
816 level: advanced
817 default: true
818 fmt_desc: Preserves clone overlap during recovery. Should always be set
819 to ``true``.
820 with_legacy: true
821- name: osd_num_cache_shards
822 type: size
823 level: advanced
824 desc: The number of cache shards to use in the object store.
825 default: 32
826 flags:
827 - startup
828- name: osd_aggregated_slow_ops_logging
829 type: bool
830 level: advanced
831 desc: Allow OSD daemon to send an aggregated slow ops to the cluster log
832 fmt_desc: If set to ``true``, the OSD daemon will send slow ops information in
833 an aggregated format to the cluster log else sends every slow op to the
834 cluster log.
835 default: true
836 with_legacy: true
837- name: osd_op_num_threads_per_shard
838 type: int
839 level: advanced
840 default: 0
841 flags:
842 - startup
843 with_legacy: true
844- name: osd_op_num_threads_per_shard_hdd
845 type: int
846 level: advanced
847 default: 1
848 see_also:
849 - osd_op_num_threads_per_shard
850 flags:
851 - startup
852 with_legacy: true
853- name: osd_op_num_threads_per_shard_ssd
854 type: int
855 level: advanced
856 default: 2
857 see_also:
858 - osd_op_num_threads_per_shard
859 flags:
860 - startup
861 with_legacy: true
862- name: osd_op_num_shards
863 type: int
864 level: advanced
865 fmt_desc: The number of shards allocated for a given OSD. Each shard has its own processing queue.
866 PGs on the OSD are distributed evenly in the shard. This setting overrides _ssd and _hdd if
867 non-zero.
868 default: 0
869 flags:
870 - startup
871 with_legacy: true
872- name: osd_op_num_shards_hdd
873 type: int
874 level: advanced
875 fmt_desc: the number of shards allocated for a given OSD (for rotational media).
876 default: 5
877 see_also:
878 - osd_op_num_shards
879 flags:
880 - startup
881 with_legacy: true
882- name: osd_op_num_shards_ssd
883 type: int
884 level: advanced
885 fmt_desc: the number of shards allocated for a given OSD (for solid state media).
886 default: 8
887 see_also:
888 - osd_op_num_shards
889 flags:
890 - startup
891 with_legacy: true
892- name: osd_skip_data_digest
893 type: bool
894 level: dev
895 desc: Do not store full-object checksums if the backend (bluestore) does its own
896 checksums. Only usable with all BlueStore OSDs.
897 default: false
898# PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
899# mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
900# and "mclock_client" are based on the mClock/dmClock algorithm
901# (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
902# class the operation belongs to. "mclock_client" does the same but
903# also works to ienforce fairness between clients. "debug_random"
904# chooses among all four with equal probability.
905- name: osd_op_queue
906 type: str
907 level: advanced
908 desc: which operation priority queue algorithm to use
909 long_desc: which operation priority queue algorithm to use
910 fmt_desc: This sets the type of queue to be used for prioritizing ops
911 within each OSD. Both queues feature a strict sub-queue which is
912 dequeued before the normal queue. The normal queue is different
913 between implementations. The WeightedPriorityQueue (``wpq``)
914 dequeues operations in relation to their priorities to prevent
915 starvation of any queue. WPQ should help in cases where a few OSDs
916 are more overloaded than others. The mClockQueue
917 (``mclock_scheduler``) prioritizes operations based on which class
918 they belong to (recovery, scrub, snaptrim, client op, osd subop).
919 See `QoS Based on mClock`_. Requires a restart.
920 default: mclock_scheduler
921 see_also:
922 - osd_op_queue_cut_off
923 enum_values:
924 - wpq
925 - mclock_scheduler
926 - debug_random
927 with_legacy: true
928# Min priority to go to strict queue. (low, high)
929- name: osd_op_queue_cut_off
930 type: str
931 level: advanced
932 desc: the threshold between high priority ops and low priority ops
933 long_desc: the threshold between high priority ops that use strict priority ordering
934 and low priority ops that use a fairness algorithm that may or may not incorporate
935 priority
936 fmt_desc: This selects which priority ops will be sent to the strict
937 queue verses the normal queue. The ``low`` setting sends all
938 replication ops and higher to the strict queue, while the ``high``
939 option sends only replication acknowledgment ops and higher to
940 the strict queue. Setting this to ``high`` should help when a few
941 OSDs in the cluster are very busy especially when combined with
942 ``wpq`` in the ``osd_op_queue`` setting. OSDs that are very busy
943 handling replication traffic could starve primary client traffic
944 on these OSDs without these settings. Requires a restart.
945 default: high
946 see_also:
947 - osd_op_queue
948 enum_values:
949 - low
950 - high
951 - debug_random
952 with_legacy: true
953- name: osd_mclock_scheduler_client_res
1e59de90 954 type: float
20effc67 955 level: advanced
1e59de90
TL
956 desc: IO proportion reserved for each client (default). The default value
957 of 0 specifies the lowest possible reservation. Any value greater than
958 0 and up to 1.0 specifies the minimum IO proportion to reserve for each
959 client in terms of a fraction of the OSD's maximum IOPS capacity.
20effc67
TL
960 long_desc: Only considered for osd_op_queue = mclock_scheduler
961 fmt_desc: IO proportion reserved for each client (default).
1e59de90
TL
962 default: 0
963 min: 0
964 max: 1.0
20effc67
TL
965 see_also:
966 - osd_op_queue
967- name: osd_mclock_scheduler_client_wgt
968 type: uint
969 level: advanced
970 desc: IO share for each client (default) over reservation
971 long_desc: Only considered for osd_op_queue = mclock_scheduler
972 fmt_desc: IO share for each client (default) over reservation.
973 default: 1
974 see_also:
975 - osd_op_queue
976- name: osd_mclock_scheduler_client_lim
1e59de90 977 type: float
20effc67 978 level: advanced
1e59de90
TL
979 desc: IO limit for each client (default) over reservation. The default
980 value of 0 specifies no limit enforcement, which means each client can
981 use the maximum possible IOPS capacity of the OSD. Any value greater
982 than 0 and up to 1.0 specifies the upper IO limit over reservation
983 that each client receives in terms of a fraction of the OSD's
984 maximum IOPS capacity.
20effc67
TL
985 long_desc: Only considered for osd_op_queue = mclock_scheduler
986 fmt_desc: IO limit for each client (default) over reservation.
1e59de90
TL
987 default: 0
988 min: 0
989 max: 1.0
20effc67
TL
990 see_also:
991 - osd_op_queue
992- name: osd_mclock_scheduler_background_recovery_res
1e59de90 993 type: float
20effc67 994 level: advanced
1e59de90
TL
995 desc: IO proportion reserved for background recovery (default). The
996 default value of 0 specifies the lowest possible reservation. Any value
997 greater than 0 and up to 1.0 specifies the minimum IO proportion to
998 reserve for background recovery operations in terms of a fraction of
999 the OSD's maximum IOPS capacity.
20effc67
TL
1000 long_desc: Only considered for osd_op_queue = mclock_scheduler
1001 fmt_desc: IO proportion reserved for background recovery (default).
1e59de90
TL
1002 default: 0
1003 min: 0
1004 max: 1.0
20effc67
TL
1005 see_also:
1006 - osd_op_queue
1007- name: osd_mclock_scheduler_background_recovery_wgt
1008 type: uint
1009 level: advanced
1010 desc: IO share for each background recovery over reservation
1011 long_desc: Only considered for osd_op_queue = mclock_scheduler
1012 fmt_desc: IO share for each background recovery over reservation.
1013 default: 1
1014 see_also:
1015 - osd_op_queue
1016- name: osd_mclock_scheduler_background_recovery_lim
1e59de90 1017 type: float
20effc67 1018 level: advanced
1e59de90
TL
1019 desc: IO limit for background recovery over reservation. The default
1020 value of 0 specifies no limit enforcement, which means background
1021 recovery operation can use the maximum possible IOPS capacity of the
1022 OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
1023 limit over reservation that background recovery operation receives in
1024 terms of a fraction of the OSD's maximum IOPS capacity.
20effc67
TL
1025 long_desc: Only considered for osd_op_queue = mclock_scheduler
1026 fmt_desc: IO limit for background recovery over reservation.
1e59de90
TL
1027 default: 0
1028 min: 0
1029 max: 1.0
20effc67
TL
1030 see_also:
1031 - osd_op_queue
1032- name: osd_mclock_scheduler_background_best_effort_res
1e59de90 1033 type: float
20effc67 1034 level: advanced
1e59de90
TL
1035 desc: IO proportion reserved for background best_effort (default). The
1036 default value of 0 specifies the lowest possible reservation. Any value
1037 greater than 0 and up to 1.0 specifies the minimum IO proportion to
1038 reserve for background best_effort operations in terms of a fraction
1039 of the OSD's maximum IOPS capacity.
20effc67
TL
1040 long_desc: Only considered for osd_op_queue = mclock_scheduler
1041 fmt_desc: IO proportion reserved for background best_effort (default).
1e59de90
TL
1042 default: 0
1043 min: 0
1044 max: 1.0
20effc67
TL
1045 see_also:
1046 - osd_op_queue
1047- name: osd_mclock_scheduler_background_best_effort_wgt
1048 type: uint
1049 level: advanced
1050 desc: IO share for each background best_effort over reservation
1051 long_desc: Only considered for osd_op_queue = mclock_scheduler
1052 fmt_desc: IO share for each background best_effort over reservation.
1053 default: 1
1054 see_also:
1055 - osd_op_queue
1056- name: osd_mclock_scheduler_background_best_effort_lim
1e59de90 1057 type: float
20effc67 1058 level: advanced
1e59de90
TL
1059 desc: IO limit for background best_effort over reservation. The default
1060 value of 0 specifies no limit enforcement, which means background
1061 best_effort operation can use the maximum possible IOPS capacity of the
1062 OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
1063 limit over reservation that background best_effort operation receives
1064 in terms of a fraction of the OSD's maximum IOPS capacity.
20effc67
TL
1065 long_desc: Only considered for osd_op_queue = mclock_scheduler
1066 fmt_desc: IO limit for background best_effort over reservation.
1e59de90
TL
1067 default: 0
1068 min: 0
1069 max: 1.0
20effc67
TL
1070 see_also:
1071 - osd_op_queue
1072- name: osd_mclock_scheduler_anticipation_timeout
1073 type: float
1074 level: advanced
1075 desc: mclock anticipation timeout in seconds
1076 long_desc: the amount of time that mclock waits until the unused resource is forfeited
1077 default: 0
1e59de90
TL
1078- name: osd_mclock_max_sequential_bandwidth_hdd
1079 type: size
1080 level: basic
1081 desc: The maximum sequential bandwidth in bytes/second of the OSD (for
1082 rotational media)
1083 long_desc: This option specifies the maximum sequential bandwidth to consider
1084 for an OSD whose underlying device type is rotational media. This is
1085 considered by the mclock scheduler to derive the cost factor to be used in
1086 QoS calculations. Only considered for osd_op_queue = mclock_scheduler
1087 fmt_desc: The maximum sequential bandwidth in bytes/second to consider for the
1088 OSD (for rotational media)
1089 default: 150_M
20effc67
TL
1090 flags:
1091 - runtime
1e59de90
TL
1092- name: osd_mclock_max_sequential_bandwidth_ssd
1093 type: size
1094 level: basic
1095 desc: The maximum sequential bandwidth in bytes/second of the OSD (for
1096 solid state media)
1097 long_desc: This option specifies the maximum sequential bandwidth to consider
1098 for an OSD whose underlying device type is solid state media. This is
1099 considered by the mclock scheduler to derive the cost factor to be used in
1100 QoS calculations. Only considered for osd_op_queue = mclock_scheduler
1101 fmt_desc: The maximum sequential bandwidth in bytes/second to consider for the
1102 OSD (for solid state media)
1103 default: 1200_M
20effc67
TL
1104 flags:
1105 - runtime
1106- name: osd_mclock_max_capacity_iops_hdd
1107 type: float
1108 level: basic
1e59de90
TL
1109 desc: Max random write IOPS capacity (at 4KiB block size) to consider per OSD
1110 (for rotational media)
1111 long_desc: This option specifies the max OSD random write IOPS capacity per
1112 OSD. Contributes in QoS calculations when enabling a dmclock profile. Only
1113 considered for osd_op_queue = mclock_scheduler
1114 fmt_desc: Max random write IOPS capacity (at 4 KiB block size) to consider per
1115 OSD (for rotational media)
20effc67
TL
1116 default: 315
1117 flags:
1118 - runtime
1119- name: osd_mclock_max_capacity_iops_ssd
1120 type: float
1121 level: basic
1e59de90
TL
1122 desc: Max random write IOPS capacity (at 4 KiB block size) to consider per OSD
1123 (for solid state media)
1124 long_desc: This option specifies the max OSD random write IOPS capacity per
1125 OSD. Contributes in QoS calculations when enabling a dmclock profile. Only
1126 considered for osd_op_queue = mclock_scheduler
1127 fmt_desc: Max random write IOPS capacity (at 4 KiB block size) to consider per
1128 OSD (for solid state media)
20effc67
TL
1129 default: 21500
1130 flags:
1131 - runtime
1132- name: osd_mclock_force_run_benchmark_on_init
1133 type: bool
1134 level: advanced
1135 desc: Force run the OSD benchmark on OSD initialization/boot-up
1136 long_desc: This option specifies whether the OSD benchmark must be run during
1137 the OSD boot-up sequence even if historical data about the OSD iops capacity
1138 is available in the MON config store. Enable this to refresh the OSD iops
1139 capacity if the underlying device's performance characteristics have changed
1140 significantly. Only considered for osd_op_queue = mclock_scheduler.
1141 fmt_desc: Force run the OSD benchmark on OSD initialization/boot-up
1142 default: false
1143 see_also:
1144 - osd_mclock_max_capacity_iops_hdd
1145 - osd_mclock_max_capacity_iops_ssd
1146 flags:
1147 - startup
1148- name: osd_mclock_skip_benchmark
1149 type: bool
1150 level: dev
1151 desc: Skip the OSD benchmark on OSD initialization/boot-up
1152 long_desc: This option specifies whether the OSD benchmark must be skipped during
1153 the OSD boot-up sequence. Only considered for osd_op_queue = mclock_scheduler.
1154 fmt_desc: Skip the OSD benchmark on OSD initialization/boot-up
1155 default: false
1156 see_also:
1157 - osd_mclock_max_capacity_iops_hdd
1158 - osd_mclock_max_capacity_iops_ssd
1159 flags:
1160 - runtime
1161- name: osd_mclock_profile
1162 type: str
1163 level: advanced
1164 desc: Which mclock profile to use
1165 long_desc: This option specifies the mclock profile to enable - one among the set
1166 of built-in profiles or a custom profile. Only considered for osd_op_queue = mclock_scheduler
1167 fmt_desc: |
1168 This sets the type of mclock profile to use for providing QoS
1169 based on operations belonging to different classes (background
1170 recovery, scrub, snaptrim, client op, osd subop). Once a built-in
1171 profile is enabled, the lower level mclock resource control
1172 parameters [*reservation, weight, limit*] and some Ceph
1173 configuration parameters are set transparently. Note that the
1174 above does not apply for the *custom* profile.
1e59de90 1175 default: balanced
20effc67
TL
1176 see_also:
1177 - osd_op_queue
1178 enum_values:
1179 - balanced
1180 - high_recovery_ops
1181 - high_client_ops
1182 - custom
1183 flags:
1184 - runtime
39ae355f
TL
1185- name: osd_mclock_override_recovery_settings
1186 type: bool
1187 level: advanced
1188 desc: Setting this option enables the override of recovery/backfill limits
1189 for the mClock scheduler.
1190 long_desc: This option when set enables the override of the max recovery
1191 active and the max backfills limits with mClock scheduler active. These
1192 options are not modifiable when mClock scheduler is active. Any attempt
1193 to modify these values without setting this option will reset the
1194 recovery or backfill option back to its default value.
1195 fmt_desc: Setting this option will enable the override of the
1196 recovery/backfill limits for the mClock scheduler as defined by the
1197 ``osd_recovery_max_active_hdd``, ``osd_recovery_max_active_ssd`` and
1198 ``osd_max_backfills`` options.
1199 default: false
1200 see_also:
1201 - osd_recovery_max_active_hdd
1202 - osd_recovery_max_active_ssd
1203 - osd_max_backfills
1204 flags:
1205 - runtime
1206- name: osd_mclock_iops_capacity_threshold_hdd
1207 type: float
1208 level: basic
1209 desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore
1210 the OSD bench results for an OSD (for rotational media)
1211 long_desc: This option specifies the threshold IOPS capacity for an OSD under
1212 which the OSD bench results can be considered for QoS calculations. Only
1213 considered for osd_op_queue = mclock_scheduler
1214 fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to
1215 ignore OSD bench results for an OSD (for rotational media)
1216 default: 500
1217 flags:
1218 - runtime
1219- name: osd_mclock_iops_capacity_threshold_ssd
1220 type: float
1221 level: basic
1222 desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore
1223 the OSD bench results for an OSD (for solid state media)
1224 long_desc: This option specifies the threshold IOPS capacity for an OSD under
1225 which the OSD bench results can be considered for QoS calculations. Only
1226 considered for osd_op_queue = mclock_scheduler
1227 fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to
1228 ignore OSD bench results for an OSD (for solid state media)
1229 default: 80000
1230 flags:
1231 - runtime
20effc67
TL
1232# Set to true for testing. Users should NOT set this.
1233# If set to true even after reading enough shards to
1234# decode the object, any error will be reported.
1235- name: osd_read_ec_check_for_errors
1236 type: bool
1237 level: advanced
1238 default: false
1239 with_legacy: true
1240- name: osd_recovery_delay_start
1241 type: float
1242 level: advanced
1243 default: 0
1244 fmt_desc: After peering completes, Ceph will delay for the specified number
1245 of seconds before starting to recover RADOS objects.
1246 with_legacy: true
1247- name: osd_recovery_max_active
1248 type: uint
1249 level: advanced
1250 desc: Number of simultaneous active recovery operations per OSD (overrides _ssd
1251 and _hdd if non-zero)
1252 fmt_desc: The number of active recovery requests per OSD at one time. More
1253 requests will accelerate recovery, but the requests places an
1254 increased load on the cluster.
1255 note: This value is only used if it is non-zero. Normally it
1256 is ``0``, which means that the ``hdd`` or ``ssd`` values
1257 (below) are used, depending on the type of the primary
1258 device backing the OSD.
1259 default: 0
1260 see_also:
1261 - osd_recovery_max_active_hdd
1262 - osd_recovery_max_active_ssd
1263 flags:
1264 - runtime
1265 with_legacy: true
1266- name: osd_recovery_max_active_hdd
1267 type: uint
1268 level: advanced
1269 desc: Number of simultaneous active recovery operations per OSD (for rotational
1270 devices)
1271 fmt_desc: The number of active recovery requests per OSD at one time, if the
1272 primary device is rotational.
1273 default: 3
1274 see_also:
1275 - osd_recovery_max_active
1276 - osd_recovery_max_active_ssd
1277 flags:
1278 - runtime
1279 with_legacy: true
1280- name: osd_recovery_max_active_ssd
1281 type: uint
1282 level: advanced
1283 desc: Number of simultaneous active recovery operations per OSD (for non-rotational
1284 solid state devices)
1285 fmt_desc: The number of active recovery requests per OSD at one time, if the
1286 primary device is non-rotational (i.e., an SSD).
1287 default: 10
1288 see_also:
1289 - osd_recovery_max_active
1290 - osd_recovery_max_active_hdd
1291 flags:
1292 - runtime
1293 with_legacy: true
1294- name: osd_recovery_max_single_start
1295 type: uint
1296 level: advanced
1297 default: 1
1298 fmt_desc: The maximum number of recovery operations per OSD that will be
1299 newly started when an OSD is recovering.
1300 with_legacy: true
1301# max size of push chunk
1302- name: osd_recovery_max_chunk
1303 type: size
1304 level: advanced
1305 default: 8_M
1306 fmt_desc: the maximum total size of data chunks a recovery op can carry.
1307 with_legacy: true
1308# max number of omap entries per chunk; 0 to disable limit
1309- name: osd_recovery_max_omap_entries_per_chunk
1310 type: uint
1311 level: advanced
1312 default: 8096
1313 with_legacy: true
1314# max size of a COPYFROM chunk
1315- name: osd_copyfrom_max_chunk
1316 type: size
1317 level: advanced
1318 default: 8_M
1319 with_legacy: true
1320# push cost per object
1321- name: osd_push_per_object_cost
1322 type: size
1323 level: advanced
1324 default: 1000
1325 fmt_desc: the overhead for serving a push op
1326 with_legacy: true
1327# max size of push message
1328- name: osd_max_push_cost
1329 type: size
1330 level: advanced
1331 default: 8_M
1332 with_legacy: true
1333# max objects in single push op
1334- name: osd_max_push_objects
1335 type: uint
1336 level: advanced
1337 default: 10
1338 with_legacy: true
1339# Only use clone_overlap for recovery if there are fewer than
1340# osd_recover_clone_overlap_limit entries in the overlap set
1341- name: osd_recover_clone_overlap_limit
1342 type: uint
1343 level: advanced
1344 default: 10
1345 flags:
1346 - runtime
1347- name: osd_debug_feed_pullee
1348 type: int
1349 level: dev
1350 desc: Feed a pullee, and force primary to pull a currently missing object from it
1351 default: -1
1352 with_legacy: true
1353- name: osd_backfill_scan_min
1354 type: int
1355 level: advanced
1356 default: 64
1357 fmt_desc: The minimum number of objects per backfill scan.
1358 with_legacy: true
1359- name: osd_backfill_scan_max
1360 type: int
1361 level: advanced
1362 default: 512
1363 fmt_desc: The maximum number of objects per backfill scan.p
1364 with_legacy: true
1e59de90
TL
1365- name: osd_extblkdev_plugins
1366 type: str
1367 level: advanced
1368 desc: extended block device plugins to load, provide compression feedback at runtime
1369 default: vdo
1370 flags:
1371 - startup
20effc67
TL
1372# minimum number of peers
1373- name: osd_heartbeat_min_peers
1374 type: int
1375 level: advanced
1376 default: 10
1377 with_legacy: true
1378- name: osd_delete_sleep
1379 type: float
1380 level: advanced
1e59de90
TL
1381 desc: Time in seconds to sleep before next removal transaction. This setting
1382 overrides _ssd, _hdd, and _hybrid if non-zero.
20effc67
TL
1383 fmt_desc: Time in seconds to sleep before the next removal transaction. This
1384 throttles the PG deletion process.
1385 default: 0
1386 flags:
1387 - runtime
1388- name: osd_delete_sleep_hdd
1389 type: float
1390 level: advanced
1391 desc: Time in seconds to sleep before next removal transaction for HDDs
1392 default: 5
1393 flags:
1394 - runtime
1395- name: osd_delete_sleep_ssd
1396 type: float
1397 level: advanced
1398 desc: Time in seconds to sleep before next removal transaction for SSDs
1399 default: 1
1400 flags:
1401 - runtime
1402- name: osd_delete_sleep_hybrid
1403 type: float
1404 level: advanced
1405 desc: Time in seconds to sleep before next removal transaction when OSD data is on HDD
1406 and OSD journal or WAL+DB is on SSD
1407 default: 1
1408 flags:
1409 - runtime
33c7a0ef
TL
1410- name: osd_rocksdb_iterator_bounds_enabled
1411 desc: Whether omap iterator bounds are applied to rocksdb iterator ReadOptions
1412 type: bool
1413 level: dev
1414 default: true
1415 with_legacy: true