]> git.proxmox.com Git - mirror_zfs.git/blob - man/man5/zfs-module-parameters.5
Expand the description of scan-related and other parameters.
[mirror_zfs.git] / man / man5 / zfs-module-parameters.5
1 '\" te
2 .\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
3 .\" The contents of this file are subject to the terms of the Common Development
4 .\" and Distribution License (the "License"). You may not use this file except
5 .\" in compliance with the License. You can obtain a copy of the license at
6 .\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
7 .\"
8 .\" See the License for the specific language governing permissions and
9 .\" limitations under the License. When distributing Covered Code, include this
10 .\" CDDL HEADER in each file and include the License file at
11 .\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this
12 .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
13 .\" own identifying information:
14 .\" Portions Copyright [yyyy] [name of copyright owner]
15 .TH ZFS-MODULE-PARAMETERS 5 "Nov 16, 2013"
16 .SH NAME
17 zfs\-module\-parameters \- ZFS module parameters
18 .SH DESCRIPTION
19 .sp
20 .LP
21 Description of the different parameters to the ZFS module.
22
23 .SS "Module parameters"
24 .sp
25 .LP
26
27 .sp
28 .ne 2
29 .na
30 \fBl2arc_feed_again\fR (int)
31 .ad
32 .RS 12n
33 Turbo L2ARC warmup
34 .sp
35 Use \fB1\fR for yes (default) and \fB0\fR to disable.
36 .RE
37
38 .sp
39 .ne 2
40 .na
41 \fBl2arc_feed_min_ms\fR (ulong)
42 .ad
43 .RS 12n
44 Min feed interval in milliseconds
45 .sp
46 Default value: \fB200\fR.
47 .RE
48
49 .sp
50 .ne 2
51 .na
52 \fBl2arc_feed_secs\fR (ulong)
53 .ad
54 .RS 12n
55 Seconds between L2ARC writing
56 .sp
57 Default value: \fB1\fR.
58 .RE
59
60 .sp
61 .ne 2
62 .na
63 \fBl2arc_headroom\fR (ulong)
64 .ad
65 .RS 12n
66 Number of max device writes to precache
67 .sp
68 Default value: \fB2\fR.
69 .RE
70
71 .sp
72 .ne 2
73 .na
74 \fBl2arc_headroom_boost\fR (ulong)
75 .ad
76 .RS 12n
77 Compressed l2arc_headroom multiplier
78 .sp
79 Default value: \fB200\fR.
80 .RE
81
82 .sp
83 .ne 2
84 .na
85 \fBl2arc_nocompress\fR (int)
86 .ad
87 .RS 12n
88 Skip compressing L2ARC buffers
89 .sp
90 Use \fB1\fR for yes and \fB0\fR for no (default).
91 .RE
92
93 .sp
94 .ne 2
95 .na
96 \fBl2arc_noprefetch\fR (int)
97 .ad
98 .RS 12n
99 Skip caching prefetched buffers
100 .sp
101 Use \fB1\fR for yes (default) and \fB0\fR to disable.
102 .RE
103
104 .sp
105 .ne 2
106 .na
107 \fBl2arc_norw\fR (int)
108 .ad
109 .RS 12n
110 No reads during writes
111 .sp
112 Use \fB1\fR for yes and \fB0\fR for no (default).
113 .RE
114
115 .sp
116 .ne 2
117 .na
118 \fBl2arc_write_boost\fR (ulong)
119 .ad
120 .RS 12n
121 Extra write bytes during device warmup
122 .sp
123 Default value: \fB8,388,608\fR.
124 .RE
125
126 .sp
127 .ne 2
128 .na
129 \fBl2arc_write_max\fR (ulong)
130 .ad
131 .RS 12n
132 Max write bytes per interval
133 .sp
134 Default value: \fB8,388,608\fR.
135 .RE
136
137 .sp
138 .ne 2
139 .na
140 \fBmetaslab_debug_load\fR (int)
141 .ad
142 .RS 12n
143 Load all metaslabs during pool import.
144 .sp
145 Use \fB1\fR for yes and \fB0\fR for no (default).
146 .RE
147
148 .sp
149 .ne 2
150 .na
151 \fBmetaslab_debug_unload\fR (int)
152 .ad
153 .RS 12n
154 Prevent metaslabs from being unloaded.
155 .sp
156 Use \fB1\fR for yes and \fB0\fR for no (default).
157 .RE
158
159 .sp
160 .ne 2
161 .na
162 \fBspa_config_path\fR (charp)
163 .ad
164 .RS 12n
165 SPA config file
166 .sp
167 Default value: \fB/etc/zfs/zpool.cache\fR.
168 .RE
169
170 .sp
171 .ne 2
172 .na
173 \fBspa_asize_inflation\fR (int)
174 .ad
175 .RS 12n
176 Multiplication factor used to estimate actual disk consumption from the
177 size of data being written. The default value is a worst case estimate,
178 but lower values may be valid for a given pool depending on its
179 configuration. Pool administrators who understand the factors involved
180 may wish to specify a more realistic inflation factor, particularly if
181 they operate close to quota or capacity limits.
182 .sp
183 Default value: 24
184 .RE
185
186 .sp
187 .ne 2
188 .na
189 \fBzfetch_array_rd_sz\fR (ulong)
190 .ad
191 .RS 12n
192 If prefetching is enabled, disable prefetching for reads larger than this size.
193 .sp
194 Default value: \fB1,048,576\fR.
195 .RE
196
197 .sp
198 .ne 2
199 .na
200 \fBzfetch_block_cap\fR (uint)
201 .ad
202 .RS 12n
203 Max number of blocks to prefetch at a time
204 .sp
205 Default value: \fB256\fR.
206 .RE
207
208 .sp
209 .ne 2
210 .na
211 \fBzfetch_max_streams\fR (uint)
212 .ad
213 .RS 12n
214 Max number of streams per zfetch (prefetch streams per file).
215 .sp
216 Default value: \fB8\fR.
217 .RE
218
219 .sp
220 .ne 2
221 .na
222 \fBzfetch_min_sec_reap\fR (uint)
223 .ad
224 .RS 12n
225 Min time before an active prefetch stream can be reclaimed
226 .sp
227 Default value: \fB2\fR.
228 .RE
229
230 .sp
231 .ne 2
232 .na
233 \fBzfs_arc_grow_retry\fR (int)
234 .ad
235 .RS 12n
236 Seconds before growing arc size
237 .sp
238 Default value: \fB5\fR.
239 .RE
240
241 .sp
242 .ne 2
243 .na
244 \fBzfs_arc_max\fR (ulong)
245 .ad
246 .RS 12n
247 Max arc size
248 .sp
249 Default value: \fB0\fR.
250 .RE
251
252 .sp
253 .ne 2
254 .na
255 \fBzfs_arc_memory_throttle_disable\fR (int)
256 .ad
257 .RS 12n
258 Disable memory throttle
259 .sp
260 Use \fB1\fR for yes (default) and \fB0\fR to disable.
261 .RE
262
263 .sp
264 .ne 2
265 .na
266 \fBzfs_arc_meta_limit\fR (ulong)
267 .ad
268 .RS 12n
269 Meta limit for arc size
270 .sp
271 Default value: \fB0\fR.
272 .RE
273
274 .sp
275 .ne 2
276 .na
277 \fBzfs_arc_meta_prune\fR (int)
278 .ad
279 .RS 12n
280 Bytes of meta data to prune
281 .sp
282 Default value: \fB1,048,576\fR.
283 .RE
284
285 .sp
286 .ne 2
287 .na
288 \fBzfs_arc_min\fR (ulong)
289 .ad
290 .RS 12n
291 Min arc size
292 .sp
293 Default value: \fB100\fR.
294 .RE
295
296 .sp
297 .ne 2
298 .na
299 \fBzfs_arc_min_prefetch_lifespan\fR (int)
300 .ad
301 .RS 12n
302 Min life of prefetch block
303 .sp
304 Default value: \fB100\fR.
305 .RE
306
307 .sp
308 .ne 2
309 .na
310 \fBzfs_arc_p_aggressive_disable\fR (int)
311 .ad
312 .RS 12n
313 Disable aggressive arc_p growth
314 .sp
315 Use \fB1\fR for yes (default) and \fB0\fR to disable.
316 .RE
317
318 .sp
319 .ne 2
320 .na
321 \fBzfs_arc_p_dampener_disable\fR (int)
322 .ad
323 .RS 12n
324 Disable arc_p adapt dampener
325 .sp
326 Use \fB1\fR for yes (default) and \fB0\fR to disable.
327 .RE
328
329 .sp
330 .ne 2
331 .na
332 \fBzfs_arc_shrink_shift\fR (int)
333 .ad
334 .RS 12n
335 log2(fraction of arc to reclaim)
336 .sp
337 Default value: \fB5\fR.
338 .RE
339
340 .sp
341 .ne 2
342 .na
343 \fBzfs_autoimport_disable\fR (int)
344 .ad
345 .RS 12n
346 Disable pool import at module load by ignoring the cache file (typically \fB/etc/zfs/zpool.cache\fR).
347 .sp
348 Use \fB1\fR for yes and \fB0\fR for no (default).
349 .RE
350
351 .sp
352 .ne 2
353 .na
354 \fBzfs_dbuf_state_index\fR (int)
355 .ad
356 .RS 12n
357 Calculate arc header index
358 .sp
359 Default value: \fB0\fR.
360 .RE
361
362 .sp
363 .ne 2
364 .na
365 \fBzfs_deadman_enabled\fR (int)
366 .ad
367 .RS 12n
368 Enable deadman timer
369 .sp
370 Use \fB1\fR for yes (default) and \fB0\fR to disable.
371 .RE
372
373 .sp
374 .ne 2
375 .na
376 \fBzfs_deadman_synctime_ms\fR (ulong)
377 .ad
378 .RS 12n
379 Expiration time in milliseconds. This value has two meanings. First it is
380 used to determine when the spa_deadman() logic should fire. By default the
381 spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
382 Secondly, the value determines if an I/O is considered "hung". Any I/O that
383 has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
384 in a zevent being logged.
385 .sp
386 Default value: \fB1,000,000\fR.
387 .RE
388
389 .sp
390 .ne 2
391 .na
392 \fBzfs_dedup_prefetch\fR (int)
393 .ad
394 .RS 12n
395 Enable prefetching dedup-ed blks
396 .sp
397 Use \fB1\fR for yes (default) and \fB0\fR to disable.
398 .RE
399
400 .sp
401 .ne 2
402 .na
403 \fBzfs_delay_min_dirty_percent\fR (int)
404 .ad
405 .RS 12n
406 Start to delay each transaction once there is this amount of dirty data,
407 expressed as a percentage of \fBzfs_dirty_data_max\fR.
408 This value should be >= zfs_vdev_async_write_active_max_dirty_percent.
409 See the section "ZFS TRANSACTION DELAY".
410 .sp
411 Default value: \fB60\fR.
412 .RE
413
414 .sp
415 .ne 2
416 .na
417 \fBzfs_delay_scale\fR (int)
418 .ad
419 .RS 12n
420 This controls how quickly the transaction delay approaches infinity.
421 Larger values cause longer delays for a given amount of dirty data.
422 .sp
423 For the smoothest delay, this value should be about 1 billion divided
424 by the maximum number of operations per second. This will smoothly
425 handle between 10x and 1/10th this number.
426 .sp
427 See the section "ZFS TRANSACTION DELAY".
428 .sp
429 Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
430 .sp
431 Default value: \fB500,000\fR.
432 .RE
433
434 .sp
435 .ne 2
436 .na
437 \fBzfs_dirty_data_max\fR (int)
438 .ad
439 .RS 12n
440 Determines the dirty space limit in bytes. Once this limit is exceeded, new
441 writes are halted until space frees up. This parameter takes precedence
442 over \fBzfs_dirty_data_max_percent\fR.
443 See the section "ZFS TRANSACTION DELAY".
444 .sp
445 Default value: 10 percent of all memory, capped at \fBzfs_dirty_data_max_max\fR.
446 .RE
447
448 .sp
449 .ne 2
450 .na
451 \fBzfs_dirty_data_max_max\fR (int)
452 .ad
453 .RS 12n
454 Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed in bytes.
455 This limit is only enforced at module load time, and will be ignored if
456 \fBzfs_dirty_data_max\fR is later changed. This parameter takes
457 precedence over \fBzfs_dirty_data_max_max_percent\fR. See the section
458 "ZFS TRANSACTION DELAY".
459 .sp
460 Default value: 25% of physical RAM.
461 .RE
462
463 .sp
464 .ne 2
465 .na
466 \fBzfs_dirty_data_max_max_percent\fR (int)
467 .ad
468 .RS 12n
469 Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed as a
470 percentage of physical RAM. This limit is only enforced at module load
471 time, and will be ignored if \fBzfs_dirty_data_max\fR is later changed.
472 The parameter \fBzfs_dirty_data_max_max\fR takes precedence over this
473 one. See the section "ZFS TRANSACTION DELAY".
474 .sp
475 Default value: 25
476 .RE
477
478 .sp
479 .ne 2
480 .na
481 \fBzfs_dirty_data_max_percent\fR (int)
482 .ad
483 .RS 12n
484 Determines the dirty space limit, expressed as a percentage of all
485 memory. Once this limit is exceeded, new writes are halted until space frees
486 up. The parameter \fBzfs_dirty_data_max\fR takes precedence over this
487 one. See the section "ZFS TRANSACTION DELAY".
488 .sp
489 Default value: 10%, subject to \fBzfs_dirty_data_max_max\fR.
490 .RE
491
492 .sp
493 .ne 2
494 .na
495 \fBzfs_dirty_data_sync\fR (int)
496 .ad
497 .RS 12n
498 Start syncing out a transaction group if there is at least this much dirty data.
499 .sp
500 Default value: \fB67,108,864\fR.
501 .RE
502
503 .sp
504 .ne 2
505 .na
506 \fBzfs_vdev_async_read_max_active\fR (int)
507 .ad
508 .RS 12n
509 Maxium asynchronous read I/Os active to each device.
510 See the section "ZFS I/O SCHEDULER".
511 .sp
512 Default value: \fB3\fR.
513 .RE
514
515 .sp
516 .ne 2
517 .na
518 \fBzfs_vdev_async_read_min_active\fR (int)
519 .ad
520 .RS 12n
521 Minimum asynchronous read I/Os active to each device.
522 See the section "ZFS I/O SCHEDULER".
523 .sp
524 Default value: \fB1\fR.
525 .RE
526
527 .sp
528 .ne 2
529 .na
530 \fBzfs_vdev_async_write_active_max_dirty_percent\fR (int)
531 .ad
532 .RS 12n
533 When the pool has more than
534 \fBzfs_vdev_async_write_active_max_dirty_percent\fR dirty data, use
535 \fBzfs_vdev_async_write_max_active\fR to limit active async writes. If
536 the dirty data is between min and max, the active I/O limit is linearly
537 interpolated. See the section "ZFS I/O SCHEDULER".
538 .sp
539 Default value: \fB60\fR.
540 .RE
541
542 .sp
543 .ne 2
544 .na
545 \fBzfs_vdev_async_write_active_min_dirty_percent\fR (int)
546 .ad
547 .RS 12n
548 When the pool has less than
549 \fBzfs_vdev_async_write_active_min_dirty_percent\fR dirty data, use
550 \fBzfs_vdev_async_write_min_active\fR to limit active async writes. If
551 the dirty data is between min and max, the active I/O limit is linearly
552 interpolated. See the section "ZFS I/O SCHEDULER".
553 .sp
554 Default value: \fB30\fR.
555 .RE
556
557 .sp
558 .ne 2
559 .na
560 \fBzfs_vdev_async_write_max_active\fR (int)
561 .ad
562 .RS 12n
563 Maxium asynchronous write I/Os active to each device.
564 See the section "ZFS I/O SCHEDULER".
565 .sp
566 Default value: \fB10\fR.
567 .RE
568
569 .sp
570 .ne 2
571 .na
572 \fBzfs_vdev_async_write_min_active\fR (int)
573 .ad
574 .RS 12n
575 Minimum asynchronous write I/Os active to each device.
576 See the section "ZFS I/O SCHEDULER".
577 .sp
578 Default value: \fB1\fR.
579 .RE
580
581 .sp
582 .ne 2
583 .na
584 \fBzfs_vdev_max_active\fR (int)
585 .ad
586 .RS 12n
587 The maximum number of I/Os active to each device. Ideally, this will be >=
588 the sum of each queue's max_active. It must be at least the sum of each
589 queue's min_active. See the section "ZFS I/O SCHEDULER".
590 .sp
591 Default value: \fB1,000\fR.
592 .RE
593
594 .sp
595 .ne 2
596 .na
597 \fBzfs_vdev_scrub_max_active\fR (int)
598 .ad
599 .RS 12n
600 Maxium scrub I/Os active to each device.
601 See the section "ZFS I/O SCHEDULER".
602 .sp
603 Default value: \fB2\fR.
604 .RE
605
606 .sp
607 .ne 2
608 .na
609 \fBzfs_vdev_scrub_min_active\fR (int)
610 .ad
611 .RS 12n
612 Minimum scrub I/Os active to each device.
613 See the section "ZFS I/O SCHEDULER".
614 .sp
615 Default value: \fB1\fR.
616 .RE
617
618 .sp
619 .ne 2
620 .na
621 \fBzfs_vdev_sync_read_max_active\fR (int)
622 .ad
623 .RS 12n
624 Maxium synchronous read I/Os active to each device.
625 See the section "ZFS I/O SCHEDULER".
626 .sp
627 Default value: \fB10\fR.
628 .RE
629
630 .sp
631 .ne 2
632 .na
633 \fBzfs_vdev_sync_read_min_active\fR (int)
634 .ad
635 .RS 12n
636 Minimum synchronous read I/Os active to each device.
637 See the section "ZFS I/O SCHEDULER".
638 .sp
639 Default value: \fB10\fR.
640 .RE
641
642 .sp
643 .ne 2
644 .na
645 \fBzfs_vdev_sync_write_max_active\fR (int)
646 .ad
647 .RS 12n
648 Maxium synchronous write I/Os active to each device.
649 See the section "ZFS I/O SCHEDULER".
650 .sp
651 Default value: \fB10\fR.
652 .RE
653
654 .sp
655 .ne 2
656 .na
657 \fBzfs_vdev_sync_write_min_active\fR (int)
658 .ad
659 .RS 12n
660 Minimum synchronous write I/Os active to each device.
661 See the section "ZFS I/O SCHEDULER".
662 .sp
663 Default value: \fB10\fR.
664 .RE
665
666 .sp
667 .ne 2
668 .na
669 \fBzfs_disable_dup_eviction\fR (int)
670 .ad
671 .RS 12n
672 Disable duplicate buffer eviction
673 .sp
674 Use \fB1\fR for yes and \fB0\fR for no (default).
675 .RE
676
677 .sp
678 .ne 2
679 .na
680 \fBzfs_expire_snapshot\fR (int)
681 .ad
682 .RS 12n
683 Seconds to expire .zfs/snapshot
684 .sp
685 Default value: \fB300\fR.
686 .RE
687
688 .sp
689 .ne 2
690 .na
691 \fBzfs_flags\fR (int)
692 .ad
693 .RS 12n
694 Set additional debugging flags
695 .sp
696 Default value: \fB1\fR.
697 .RE
698
699 .sp
700 .ne 2
701 .na
702 \fBzfs_free_min_time_ms\fR (int)
703 .ad
704 .RS 12n
705 Min millisecs to free per txg
706 .sp
707 Default value: \fB1,000\fR.
708 .RE
709
710 .sp
711 .ne 2
712 .na
713 \fBzfs_immediate_write_sz\fR (long)
714 .ad
715 .RS 12n
716 Largest data block to write to zil
717 .sp
718 Default value: \fB32,768\fR.
719 .RE
720
721 .sp
722 .ne 2
723 .na
724 \fBzfs_mdcomp_disable\fR (int)
725 .ad
726 .RS 12n
727 Disable meta data compression
728 .sp
729 Use \fB1\fR for yes and \fB0\fR for no (default).
730 .RE
731
732 .sp
733 .ne 2
734 .na
735 \fBzfs_no_scrub_io\fR (int)
736 .ad
737 .RS 12n
738 Set for no scrub I/O
739 .sp
740 Use \fB1\fR for yes and \fB0\fR for no (default).
741 .RE
742
743 .sp
744 .ne 2
745 .na
746 \fBzfs_no_scrub_prefetch\fR (int)
747 .ad
748 .RS 12n
749 Set for no scrub prefetching
750 .sp
751 Use \fB1\fR for yes and \fB0\fR for no (default).
752 .RE
753
754 .sp
755 .ne 2
756 .na
757 \fBzfs_nocacheflush\fR (int)
758 .ad
759 .RS 12n
760 Disable cache flushes
761 .sp
762 Use \fB1\fR for yes and \fB0\fR for no (default).
763 .RE
764
765 .sp
766 .ne 2
767 .na
768 \fBzfs_nopwrite_enabled\fR (int)
769 .ad
770 .RS 12n
771 Enable NOP writes
772 .sp
773 Use \fB1\fR for yes (default) and \fB0\fR to disable.
774 .RE
775
776 .sp
777 .ne 2
778 .na
779 \fBzfs_pd_blks_max\fR (int)
780 .ad
781 .RS 12n
782 Max number of blocks to prefetch
783 .sp
784 Default value: \fB100\fR.
785 .RE
786
787 .sp
788 .ne 2
789 .na
790 \fBzfs_prefetch_disable\fR (int)
791 .ad
792 .RS 12n
793 Disable all ZFS prefetching
794 .sp
795 Use \fB1\fR for yes and \fB0\fR for no (default).
796 .RE
797
798 .sp
799 .ne 2
800 .na
801 \fBzfs_read_chunk_size\fR (long)
802 .ad
803 .RS 12n
804 Bytes to read per chunk
805 .sp
806 Default value: \fB1,048,576\fR.
807 .RE
808
809 .sp
810 .ne 2
811 .na
812 \fBzfs_read_history\fR (int)
813 .ad
814 .RS 12n
815 Historic statistics for the last N reads
816 .sp
817 Default value: \fB0\fR.
818 .RE
819
820 .sp
821 .ne 2
822 .na
823 \fBzfs_read_history_hits\fR (int)
824 .ad
825 .RS 12n
826 Include cache hits in read history
827 .sp
828 Use \fB1\fR for yes and \fB0\fR for no (default).
829 .RE
830
831 .sp
832 .ne 2
833 .na
834 \fBzfs_recover\fR (int)
835 .ad
836 .RS 12n
837 Set to attempt to recover from fatal errors. This should only be used as a
838 last resort, as it typically results in leaked space, or worse.
839 .sp
840 Use \fB1\fR for yes and \fB0\fR for no (default).
841 .RE
842
843 .sp
844 .ne 2
845 .na
846 \fBzfs_resilver_delay\fR (int)
847 .ad
848 .RS 12n
849 Number of ticks to delay prior to issuing a resilver I/O operation when
850 a non-resilver or non-scrub I/O operation has occurred within the past
851 \fBzfs_scan_idle\fR ticks.
852 .sp
853 Default value: \fB2\fR.
854 .RE
855
856 .sp
857 .ne 2
858 .na
859 \fBzfs_resilver_min_time_ms\fR (int)
860 .ad
861 .RS 12n
862 Min millisecs to resilver per txg
863 .sp
864 Default value: \fB3,000\fR.
865 .RE
866
867 .sp
868 .ne 2
869 .na
870 \fBzfs_scan_idle\fR (int)
871 .ad
872 .RS 12n
873 Idle window in clock ticks. During a scrub or a resilver, if
874 a non-scrub or non-resilver I/O operation has occurred during this
875 window, the next scrub or resilver operation is delayed by, respectively
876 \fBzfs_scrub_delay\fR or \fBzfs_resilver_delay\fR ticks.
877 .sp
878 Default value: \fB50\fR.
879 .RE
880
881 .sp
882 .ne 2
883 .na
884 \fBzfs_scan_min_time_ms\fR (int)
885 .ad
886 .RS 12n
887 Min millisecs to scrub per txg
888 .sp
889 Default value: \fB1,000\fR.
890 .RE
891
892 .sp
893 .ne 2
894 .na
895 \fBzfs_scrub_delay\fR (int)
896 .ad
897 .RS 12n
898 Number of ticks to delay prior to issuing a scrub I/O operation when
899 a non-scrub or non-resilver I/O operation has occurred within the past
900 \fBzfs_scan_idle\fR ticks.
901 .sp
902 Default value: \fB4\fR.
903 .RE
904
905 .sp
906 .ne 2
907 .na
908 \fBzfs_send_corrupt_data\fR (int)
909 .ad
910 .RS 12n
911 Allow to send corrupt data (ignore read/checksum errors when sending data)
912 .sp
913 Use \fB1\fR for yes and \fB0\fR for no (default).
914 .RE
915
916 .sp
917 .ne 2
918 .na
919 \fBzfs_sync_pass_deferred_free\fR (int)
920 .ad
921 .RS 12n
922 Defer frees starting in this pass
923 .sp
924 Default value: \fB2\fR.
925 .RE
926
927 .sp
928 .ne 2
929 .na
930 \fBzfs_sync_pass_dont_compress\fR (int)
931 .ad
932 .RS 12n
933 Don't compress starting in this pass
934 .sp
935 Default value: \fB5\fR.
936 .RE
937
938 .sp
939 .ne 2
940 .na
941 \fBzfs_sync_pass_rewrite\fR (int)
942 .ad
943 .RS 12n
944 Rewrite new bps starting in this pass
945 .sp
946 Default value: \fB2\fR.
947 .RE
948
949 .sp
950 .ne 2
951 .na
952 \fBzfs_top_maxinflight\fR (int)
953 .ad
954 .RS 12n
955 Max I/Os per top-level vdev during scrub or resilver operations.
956 .sp
957 Default value: \fB32\fR.
958 .RE
959
960 .sp
961 .ne 2
962 .na
963 \fBzfs_txg_history\fR (int)
964 .ad
965 .RS 12n
966 Historic statistics for the last N txgs
967 .sp
968 Default value: \fB0\fR.
969 .RE
970
971 .sp
972 .ne 2
973 .na
974 \fBzfs_txg_timeout\fR (int)
975 .ad
976 .RS 12n
977 Max seconds worth of delta per txg
978 .sp
979 Default value: \fB5\fR.
980 .RE
981
982 .sp
983 .ne 2
984 .na
985 \fBzfs_vdev_aggregation_limit\fR (int)
986 .ad
987 .RS 12n
988 Max vdev I/O aggregation size
989 .sp
990 Default value: \fB131,072\fR.
991 .RE
992
993 .sp
994 .ne 2
995 .na
996 \fBzfs_vdev_cache_bshift\fR (int)
997 .ad
998 .RS 12n
999 Shift size to inflate reads too
1000 .sp
1001 Default value: \fB16\fR.
1002 .RE
1003
1004 .sp
1005 .ne 2
1006 .na
1007 \fBzfs_vdev_cache_max\fR (int)
1008 .ad
1009 .RS 12n
1010 Inflate reads small than max
1011 .RE
1012
1013 .sp
1014 .ne 2
1015 .na
1016 \fBzfs_vdev_cache_size\fR (int)
1017 .ad
1018 .RS 12n
1019 Total size of the per-disk cache
1020 .sp
1021 Default value: \fB0\fR.
1022 .RE
1023
1024 .sp
1025 .ne 2
1026 .na
1027 \fBzfs_vdev_mirror_switch_us\fR (int)
1028 .ad
1029 .RS 12n
1030 Switch mirrors every N usecs
1031 .sp
1032 Default value: \fB10,000\fR.
1033 .RE
1034
1035 .sp
1036 .ne 2
1037 .na
1038 \fBzfs_vdev_read_gap_limit\fR (int)
1039 .ad
1040 .RS 12n
1041 Aggregate read I/O over gap
1042 .sp
1043 Default value: \fB32,768\fR.
1044 .RE
1045
1046 .sp
1047 .ne 2
1048 .na
1049 \fBzfs_vdev_scheduler\fR (charp)
1050 .ad
1051 .RS 12n
1052 I/O scheduler
1053 .sp
1054 Default value: \fBnoop\fR.
1055 .RE
1056
1057 .sp
1058 .ne 2
1059 .na
1060 \fBzfs_vdev_write_gap_limit\fR (int)
1061 .ad
1062 .RS 12n
1063 Aggregate write I/O over gap
1064 .sp
1065 Default value: \fB4,096\fR.
1066 .RE
1067
1068 .sp
1069 .ne 2
1070 .na
1071 \fBzfs_zevent_cols\fR (int)
1072 .ad
1073 .RS 12n
1074 Max event column width
1075 .sp
1076 Default value: \fB80\fR.
1077 .RE
1078
1079 .sp
1080 .ne 2
1081 .na
1082 \fBzfs_zevent_console\fR (int)
1083 .ad
1084 .RS 12n
1085 Log events to the console
1086 .sp
1087 Use \fB1\fR for yes and \fB0\fR for no (default).
1088 .RE
1089
1090 .sp
1091 .ne 2
1092 .na
1093 \fBzfs_zevent_len_max\fR (int)
1094 .ad
1095 .RS 12n
1096 Max event queue length
1097 .sp
1098 Default value: \fB0\fR.
1099 .RE
1100
1101 .sp
1102 .ne 2
1103 .na
1104 \fBzil_replay_disable\fR (int)
1105 .ad
1106 .RS 12n
1107 Disable intent logging replay
1108 .sp
1109 Use \fB1\fR for yes and \fB0\fR for no (default).
1110 .RE
1111
1112 .sp
1113 .ne 2
1114 .na
1115 \fBzil_slog_limit\fR (ulong)
1116 .ad
1117 .RS 12n
1118 Max commit bytes to separate log device
1119 .sp
1120 Default value: \fB1,048,576\fR.
1121 .RE
1122
1123 .sp
1124 .ne 2
1125 .na
1126 \fBzio_bulk_flags\fR (int)
1127 .ad
1128 .RS 12n
1129 Additional flags to pass to bulk buffers
1130 .sp
1131 Default value: \fB0\fR.
1132 .RE
1133
1134 .sp
1135 .ne 2
1136 .na
1137 \fBzio_delay_max\fR (int)
1138 .ad
1139 .RS 12n
1140 Max zio millisec delay before posting event
1141 .sp
1142 Default value: \fB30,000\fR.
1143 .RE
1144
1145 .sp
1146 .ne 2
1147 .na
1148 \fBzio_injection_enabled\fR (int)
1149 .ad
1150 .RS 12n
1151 Enable fault injection
1152 .sp
1153 Use \fB1\fR for yes and \fB0\fR for no (default).
1154 .RE
1155
1156 .sp
1157 .ne 2
1158 .na
1159 \fBzio_requeue_io_start_cut_in_line\fR (int)
1160 .ad
1161 .RS 12n
1162 Prioritize requeued I/O
1163 .sp
1164 Default value: \fB0\fR.
1165 .RE
1166
1167 .sp
1168 .ne 2
1169 .na
1170 \fBzvol_inhibit_dev\fR (uint)
1171 .ad
1172 .RS 12n
1173 Do not create zvol device nodes
1174 .sp
1175 Use \fB1\fR for yes and \fB0\fR for no (default).
1176 .RE
1177
1178 .sp
1179 .ne 2
1180 .na
1181 \fBzvol_major\fR (uint)
1182 .ad
1183 .RS 12n
1184 Major number for zvol device
1185 .sp
1186 Default value: \fB230\fR.
1187 .RE
1188
1189 .sp
1190 .ne 2
1191 .na
1192 \fBzvol_max_discard_blocks\fR (ulong)
1193 .ad
1194 .RS 12n
1195 Max number of blocks to discard at once
1196 .sp
1197 Default value: \fB16,384\fR.
1198 .RE
1199
1200 .sp
1201 .ne 2
1202 .na
1203 \fBzvol_threads\fR (uint)
1204 .ad
1205 .RS 12n
1206 Number of threads for zvol device
1207 .sp
1208 Default value: \fB32\fR.
1209 .RE
1210
1211 .SH ZFS I/O SCHEDULER
1212 ZFS issues I/O operations to leaf vdevs to satisfy and complete I/Os.
1213 The I/O scheduler determines when and in what order those operations are
1214 issued. The I/O scheduler divides operations into five I/O classes
1215 prioritized in the following order: sync read, sync write, async read,
1216 async write, and scrub/resilver. Each queue defines the minimum and
1217 maximum number of concurrent operations that may be issued to the
1218 device. In addition, the device has an aggregate maximum,
1219 \fBzfs_vdev_max_active\fR. Note that the sum of the per-queue minimums
1220 must not exceed the aggregate maximum. If the sum of the per-queue
1221 maximums exceeds the aggregate maximum, then the number of active I/Os
1222 may reach \fBzfs_vdev_max_active\fR, in which case no further I/Os will
1223 be issued regardless of whether all per-queue minimums have been met.
1224 .sp
1225 For many physical devices, throughput increases with the number of
1226 concurrent operations, but latency typically suffers. Further, physical
1227 devices typically have a limit at which more concurrent operations have no
1228 effect on throughput or can actually cause it to decrease.
1229 .sp
1230 The scheduler selects the next operation to issue by first looking for an
1231 I/O class whose minimum has not been satisfied. Once all are satisfied and
1232 the aggregate maximum has not been hit, the scheduler looks for classes
1233 whose maximum has not been satisfied. Iteration through the I/O classes is
1234 done in the order specified above. No further operations are issued if the
1235 aggregate maximum number of concurrent operations has been hit or if there
1236 are no operations queued for an I/O class that has not hit its maximum.
1237 Every time an I/O is queued or an operation completes, the I/O scheduler
1238 looks for new operations to issue.
1239 .sp
1240 In general, smaller max_active's will lead to lower latency of synchronous
1241 operations. Larger max_active's may lead to higher overall throughput,
1242 depending on underlying storage.
1243 .sp
1244 The ratio of the queues' max_actives determines the balance of performance
1245 between reads, writes, and scrubs. E.g., increasing
1246 \fBzfs_vdev_scrub_max_active\fR will cause the scrub or resilver to complete
1247 more quickly, but reads and writes to have higher latency and lower throughput.
1248 .sp
1249 All I/O classes have a fixed maximum number of outstanding operations
1250 except for the async write class. Asynchronous writes represent the data
1251 that is committed to stable storage during the syncing stage for
1252 transaction groups. Transaction groups enter the syncing state
1253 periodically so the number of queued async writes will quickly burst up
1254 and then bleed down to zero. Rather than servicing them as quickly as
1255 possible, the I/O scheduler changes the maximum number of active async
1256 write I/Os according to the amount of dirty data in the pool. Since
1257 both throughput and latency typically increase with the number of
1258 concurrent operations issued to physical devices, reducing the
1259 burstiness in the number of concurrent operations also stabilizes the
1260 response time of operations from other -- and in particular synchronous
1261 -- queues. In broad strokes, the I/O scheduler will issue more
1262 concurrent operations from the async write queue as there's more dirty
1263 data in the pool.
1264 .sp
1265 Async Writes
1266 .sp
1267 The number of concurrent operations issued for the async write I/O class
1268 follows a piece-wise linear function defined by a few adjustable points.
1269 .nf
1270
1271 | o---------| <-- zfs_vdev_async_write_max_active
1272 ^ | /^ |
1273 | | / | |
1274 active | / | |
1275 I/O | / | |
1276 count | / | |
1277 | / | |
1278 |-------o | | <-- zfs_vdev_async_write_min_active
1279 0|_______^______|_________|
1280 0% | | 100% of zfs_dirty_data_max
1281 | |
1282 | `-- zfs_vdev_async_write_active_max_dirty_percent
1283 `--------- zfs_vdev_async_write_active_min_dirty_percent
1284
1285 .fi
1286 Until the amount of dirty data exceeds a minimum percentage of the dirty
1287 data allowed in the pool, the I/O scheduler will limit the number of
1288 concurrent operations to the minimum. As that threshold is crossed, the
1289 number of concurrent operations issued increases linearly to the maximum at
1290 the specified maximum percentage of the dirty data allowed in the pool.
1291 .sp
1292 Ideally, the amount of dirty data on a busy pool will stay in the sloped
1293 part of the function between \fBzfs_vdev_async_write_active_min_dirty_percent\fR
1294 and \fBzfs_vdev_async_write_active_max_dirty_percent\fR. If it exceeds the
1295 maximum percentage, this indicates that the rate of incoming data is
1296 greater than the rate that the backend storage can handle. In this case, we
1297 must further throttle incoming writes, as described in the next section.
1298
1299 .SH ZFS TRANSACTION DELAY
1300 We delay transactions when we've determined that the backend storage
1301 isn't able to accommodate the rate of incoming writes.
1302 .sp
1303 If there is already a transaction waiting, we delay relative to when
1304 that transaction will finish waiting. This way the calculated delay time
1305 is independent of the number of threads concurrently executing
1306 transactions.
1307 .sp
1308 If we are the only waiter, wait relative to when the transaction
1309 started, rather than the current time. This credits the transaction for
1310 "time already served", e.g. reading indirect blocks.
1311 .sp
1312 The minimum time for a transaction to take is calculated as:
1313 .nf
1314 min_time = zfs_delay_scale * (dirty - min) / (max - dirty)
1315 min_time is then capped at 100 milliseconds.
1316 .fi
1317 .sp
1318 The delay has two degrees of freedom that can be adjusted via tunables. The
1319 percentage of dirty data at which we start to delay is defined by
1320 \fBzfs_delay_min_dirty_percent\fR. This should typically be at or above
1321 \fBzfs_vdev_async_write_active_max_dirty_percent\fR so that we only start to
1322 delay after writing at full speed has failed to keep up with the incoming write
1323 rate. The scale of the curve is defined by \fBzfs_delay_scale\fR. Roughly speaking,
1324 this variable determines the amount of delay at the midpoint of the curve.
1325 .sp
1326 .nf
1327 delay
1328 10ms +-------------------------------------------------------------*+
1329 | *|
1330 9ms + *+
1331 | *|
1332 8ms + *+
1333 | * |
1334 7ms + * +
1335 | * |
1336 6ms + * +
1337 | * |
1338 5ms + * +
1339 | * |
1340 4ms + * +
1341 | * |
1342 3ms + * +
1343 | * |
1344 2ms + (midpoint) * +
1345 | | ** |
1346 1ms + v *** +
1347 | zfs_delay_scale ----------> ******** |
1348 0 +-------------------------------------*********----------------+
1349 0% <- zfs_dirty_data_max -> 100%
1350 .fi
1351 .sp
1352 Note that since the delay is added to the outstanding time remaining on the
1353 most recent transaction, the delay is effectively the inverse of IOPS.
1354 Here the midpoint of 500us translates to 2000 IOPS. The shape of the curve
1355 was chosen such that small changes in the amount of accumulated dirty data
1356 in the first 3/4 of the curve yield relatively small differences in the
1357 amount of delay.
1358 .sp
1359 The effects can be easier to understand when the amount of delay is
1360 represented on a log scale:
1361 .sp
1362 .nf
1363 delay
1364 100ms +-------------------------------------------------------------++
1365 + +
1366 | |
1367 + *+
1368 10ms + *+
1369 + ** +
1370 | (midpoint) ** |
1371 + | ** +
1372 1ms + v **** +
1373 + zfs_delay_scale ----------> ***** +
1374 | **** |
1375 + **** +
1376 100us + ** +
1377 + * +
1378 | * |
1379 + * +
1380 10us + * +
1381 + +
1382 | |
1383 + +
1384 +--------------------------------------------------------------+
1385 0% <- zfs_dirty_data_max -> 100%
1386 .fi
1387 .sp
1388 Note here that only as the amount of dirty data approaches its limit does
1389 the delay start to increase rapidly. The goal of a properly tuned system
1390 should be to keep the amount of dirty data out of that range by first
1391 ensuring that the appropriate limits are set for the I/O scheduler to reach
1392 optimal throughput on the backend storage, and then by changing the value
1393 of \fBzfs_delay_scale\fR to increase the steepness of the curve.