]> git.proxmox.com Git - mirror_zfs.git/blob - man/man5/zfs-module-parameters.5
Convert zfs_mg_noalloc_threshold to a module parameter and document
[mirror_zfs.git] / man / man5 / zfs-module-parameters.5
1 '\" te
2 .\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
3 .\" The contents of this file are subject to the terms of the Common Development
4 .\" and Distribution License (the "License"). You may not use this file except
5 .\" in compliance with the License. You can obtain a copy of the license at
6 .\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
7 .\"
8 .\" See the License for the specific language governing permissions and
9 .\" limitations under the License. When distributing Covered Code, include this
10 .\" CDDL HEADER in each file and include the License file at
11 .\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this
12 .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
13 .\" own identifying information:
14 .\" Portions Copyright [yyyy] [name of copyright owner]
15 .TH ZFS-MODULE-PARAMETERS 5 "Nov 16, 2013"
16 .SH NAME
17 zfs\-module\-parameters \- ZFS module parameters
18 .SH DESCRIPTION
19 .sp
20 .LP
21 Description of the different parameters to the ZFS module.
22
23 .SS "Module parameters"
24 .sp
25 .LP
26
27 .sp
28 .ne 2
29 .na
30 \fBl2arc_feed_again\fR (int)
31 .ad
32 .RS 12n
33 Turbo L2ARC warmup
34 .sp
35 Use \fB1\fR for yes (default) and \fB0\fR to disable.
36 .RE
37
38 .sp
39 .ne 2
40 .na
41 \fBl2arc_feed_min_ms\fR (ulong)
42 .ad
43 .RS 12n
44 Min feed interval in milliseconds
45 .sp
46 Default value: \fB200\fR.
47 .RE
48
49 .sp
50 .ne 2
51 .na
52 \fBl2arc_feed_secs\fR (ulong)
53 .ad
54 .RS 12n
55 Seconds between L2ARC writing
56 .sp
57 Default value: \fB1\fR.
58 .RE
59
60 .sp
61 .ne 2
62 .na
63 \fBl2arc_headroom\fR (ulong)
64 .ad
65 .RS 12n
66 Number of max device writes to precache
67 .sp
68 Default value: \fB2\fR.
69 .RE
70
71 .sp
72 .ne 2
73 .na
74 \fBl2arc_headroom_boost\fR (ulong)
75 .ad
76 .RS 12n
77 Compressed l2arc_headroom multiplier
78 .sp
79 Default value: \fB200\fR.
80 .RE
81
82 .sp
83 .ne 2
84 .na
85 \fBl2arc_nocompress\fR (int)
86 .ad
87 .RS 12n
88 Skip compressing L2ARC buffers
89 .sp
90 Use \fB1\fR for yes and \fB0\fR for no (default).
91 .RE
92
93 .sp
94 .ne 2
95 .na
96 \fBl2arc_noprefetch\fR (int)
97 .ad
98 .RS 12n
99 Skip caching prefetched buffers
100 .sp
101 Use \fB1\fR for yes (default) and \fB0\fR to disable.
102 .RE
103
104 .sp
105 .ne 2
106 .na
107 \fBl2arc_norw\fR (int)
108 .ad
109 .RS 12n
110 No reads during writes
111 .sp
112 Use \fB1\fR for yes and \fB0\fR for no (default).
113 .RE
114
115 .sp
116 .ne 2
117 .na
118 \fBl2arc_write_boost\fR (ulong)
119 .ad
120 .RS 12n
121 Extra write bytes during device warmup
122 .sp
123 Default value: \fB8,388,608\fR.
124 .RE
125
126 .sp
127 .ne 2
128 .na
129 \fBl2arc_write_max\fR (ulong)
130 .ad
131 .RS 12n
132 Max write bytes per interval
133 .sp
134 Default value: \fB8,388,608\fR.
135 .RE
136
137 .sp
138 .ne 2
139 .na
140 \fBmetaslab_debug_load\fR (int)
141 .ad
142 .RS 12n
143 Load all metaslabs during pool import.
144 .sp
145 Use \fB1\fR for yes and \fB0\fR for no (default).
146 .RE
147
148 .sp
149 .ne 2
150 .na
151 \fBmetaslab_debug_unload\fR (int)
152 .ad
153 .RS 12n
154 Prevent metaslabs from being unloaded.
155 .sp
156 Use \fB1\fR for yes and \fB0\fR for no (default).
157 .RE
158
159 .sp
160 .ne 2
161 .na
162 \fBspa_config_path\fR (charp)
163 .ad
164 .RS 12n
165 SPA config file
166 .sp
167 Default value: \fB/etc/zfs/zpool.cache\fR.
168 .RE
169
170 .sp
171 .ne 2
172 .na
173 \fBspa_asize_inflation\fR (int)
174 .ad
175 .RS 12n
176 Multiplication factor used to estimate actual disk consumption from the
177 size of data being written. The default value is a worst case estimate,
178 but lower values may be valid for a given pool depending on its
179 configuration. Pool administrators who understand the factors involved
180 may wish to specify a more realistic inflation factor, particularly if
181 they operate close to quota or capacity limits.
182 .sp
183 Default value: 24
184 .RE
185
186 .sp
187 .ne 2
188 .na
189 \fBzfetch_array_rd_sz\fR (ulong)
190 .ad
191 .RS 12n
192 If prefetching is enabled, disable prefetching for reads larger than this size.
193 .sp
194 Default value: \fB1,048,576\fR.
195 .RE
196
197 .sp
198 .ne 2
199 .na
200 \fBzfetch_block_cap\fR (uint)
201 .ad
202 .RS 12n
203 Max number of blocks to prefetch at a time
204 .sp
205 Default value: \fB256\fR.
206 .RE
207
208 .sp
209 .ne 2
210 .na
211 \fBzfetch_max_streams\fR (uint)
212 .ad
213 .RS 12n
214 Max number of streams per zfetch (prefetch streams per file).
215 .sp
216 Default value: \fB8\fR.
217 .RE
218
219 .sp
220 .ne 2
221 .na
222 \fBzfetch_min_sec_reap\fR (uint)
223 .ad
224 .RS 12n
225 Min time before an active prefetch stream can be reclaimed
226 .sp
227 Default value: \fB2\fR.
228 .RE
229
230 .sp
231 .ne 2
232 .na
233 \fBzfs_arc_grow_retry\fR (int)
234 .ad
235 .RS 12n
236 Seconds before growing arc size
237 .sp
238 Default value: \fB5\fR.
239 .RE
240
241 .sp
242 .ne 2
243 .na
244 \fBzfs_arc_max\fR (ulong)
245 .ad
246 .RS 12n
247 Max arc size
248 .sp
249 Default value: \fB0\fR.
250 .RE
251
252 .sp
253 .ne 2
254 .na
255 \fBzfs_arc_memory_throttle_disable\fR (int)
256 .ad
257 .RS 12n
258 Disable memory throttle
259 .sp
260 Use \fB1\fR for yes (default) and \fB0\fR to disable.
261 .RE
262
263 .sp
264 .ne 2
265 .na
266 \fBzfs_arc_meta_limit\fR (ulong)
267 .ad
268 .RS 12n
269 Meta limit for arc size
270 .sp
271 Default value: \fB0\fR.
272 .RE
273
274 .sp
275 .ne 2
276 .na
277 \fBzfs_arc_meta_prune\fR (int)
278 .ad
279 .RS 12n
280 Bytes of meta data to prune
281 .sp
282 Default value: \fB1,048,576\fR.
283 .RE
284
285 .sp
286 .ne 2
287 .na
288 \fBzfs_arc_min\fR (ulong)
289 .ad
290 .RS 12n
291 Min arc size
292 .sp
293 Default value: \fB100\fR.
294 .RE
295
296 .sp
297 .ne 2
298 .na
299 \fBzfs_arc_min_prefetch_lifespan\fR (int)
300 .ad
301 .RS 12n
302 Min life of prefetch block
303 .sp
304 Default value: \fB100\fR.
305 .RE
306
307 .sp
308 .ne 2
309 .na
310 \fBzfs_arc_p_aggressive_disable\fR (int)
311 .ad
312 .RS 12n
313 Disable aggressive arc_p growth
314 .sp
315 Use \fB1\fR for yes (default) and \fB0\fR to disable.
316 .RE
317
318 .sp
319 .ne 2
320 .na
321 \fBzfs_arc_p_dampener_disable\fR (int)
322 .ad
323 .RS 12n
324 Disable arc_p adapt dampener
325 .sp
326 Use \fB1\fR for yes (default) and \fB0\fR to disable.
327 .RE
328
329 .sp
330 .ne 2
331 .na
332 \fBzfs_arc_shrink_shift\fR (int)
333 .ad
334 .RS 12n
335 log2(fraction of arc to reclaim)
336 .sp
337 Default value: \fB5\fR.
338 .RE
339
340 .sp
341 .ne 2
342 .na
343 \fBzfs_autoimport_disable\fR (int)
344 .ad
345 .RS 12n
346 Disable pool import at module load by ignoring the cache file (typically \fB/etc/zfs/zpool.cache\fR).
347 .sp
348 Use \fB1\fR for yes and \fB0\fR for no (default).
349 .RE
350
351 .sp
352 .ne 2
353 .na
354 \fBzfs_dbuf_state_index\fR (int)
355 .ad
356 .RS 12n
357 Calculate arc header index
358 .sp
359 Default value: \fB0\fR.
360 .RE
361
362 .sp
363 .ne 2
364 .na
365 \fBzfs_deadman_enabled\fR (int)
366 .ad
367 .RS 12n
368 Enable deadman timer
369 .sp
370 Use \fB1\fR for yes (default) and \fB0\fR to disable.
371 .RE
372
373 .sp
374 .ne 2
375 .na
376 \fBzfs_deadman_synctime_ms\fR (ulong)
377 .ad
378 .RS 12n
379 Expiration time in milliseconds. This value has two meanings. First it is
380 used to determine when the spa_deadman() logic should fire. By default the
381 spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
382 Secondly, the value determines if an I/O is considered "hung". Any I/O that
383 has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
384 in a zevent being logged.
385 .sp
386 Default value: \fB1,000,000\fR.
387 .RE
388
389 .sp
390 .ne 2
391 .na
392 \fBzfs_dedup_prefetch\fR (int)
393 .ad
394 .RS 12n
395 Enable prefetching dedup-ed blks
396 .sp
397 Use \fB1\fR for yes (default) and \fB0\fR to disable.
398 .RE
399
400 .sp
401 .ne 2
402 .na
403 \fBzfs_delay_min_dirty_percent\fR (int)
404 .ad
405 .RS 12n
406 Start to delay each transaction once there is this amount of dirty data,
407 expressed as a percentage of \fBzfs_dirty_data_max\fR.
408 This value should be >= zfs_vdev_async_write_active_max_dirty_percent.
409 See the section "ZFS TRANSACTION DELAY".
410 .sp
411 Default value: \fB60\fR.
412 .RE
413
414 .sp
415 .ne 2
416 .na
417 \fBzfs_delay_scale\fR (int)
418 .ad
419 .RS 12n
420 This controls how quickly the transaction delay approaches infinity.
421 Larger values cause longer delays for a given amount of dirty data.
422 .sp
423 For the smoothest delay, this value should be about 1 billion divided
424 by the maximum number of operations per second. This will smoothly
425 handle between 10x and 1/10th this number.
426 .sp
427 See the section "ZFS TRANSACTION DELAY".
428 .sp
429 Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
430 .sp
431 Default value: \fB500,000\fR.
432 .RE
433
434 .sp
435 .ne 2
436 .na
437 \fBzfs_dirty_data_max\fR (int)
438 .ad
439 .RS 12n
440 Determines the dirty space limit in bytes. Once this limit is exceeded, new
441 writes are halted until space frees up. This parameter takes precedence
442 over \fBzfs_dirty_data_max_percent\fR.
443 See the section "ZFS TRANSACTION DELAY".
444 .sp
445 Default value: 10 percent of all memory, capped at \fBzfs_dirty_data_max_max\fR.
446 .RE
447
448 .sp
449 .ne 2
450 .na
451 \fBzfs_dirty_data_max_max\fR (int)
452 .ad
453 .RS 12n
454 Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed in bytes.
455 This limit is only enforced at module load time, and will be ignored if
456 \fBzfs_dirty_data_max\fR is later changed. This parameter takes
457 precedence over \fBzfs_dirty_data_max_max_percent\fR. See the section
458 "ZFS TRANSACTION DELAY".
459 .sp
460 Default value: 25% of physical RAM.
461 .RE
462
463 .sp
464 .ne 2
465 .na
466 \fBzfs_dirty_data_max_max_percent\fR (int)
467 .ad
468 .RS 12n
469 Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed as a
470 percentage of physical RAM. This limit is only enforced at module load
471 time, and will be ignored if \fBzfs_dirty_data_max\fR is later changed.
472 The parameter \fBzfs_dirty_data_max_max\fR takes precedence over this
473 one. See the section "ZFS TRANSACTION DELAY".
474 .sp
475 Default value: 25
476 .RE
477
478 .sp
479 .ne 2
480 .na
481 \fBzfs_dirty_data_max_percent\fR (int)
482 .ad
483 .RS 12n
484 Determines the dirty space limit, expressed as a percentage of all
485 memory. Once this limit is exceeded, new writes are halted until space frees
486 up. The parameter \fBzfs_dirty_data_max\fR takes precedence over this
487 one. See the section "ZFS TRANSACTION DELAY".
488 .sp
489 Default value: 10%, subject to \fBzfs_dirty_data_max_max\fR.
490 .RE
491
492 .sp
493 .ne 2
494 .na
495 \fBzfs_dirty_data_sync\fR (int)
496 .ad
497 .RS 12n
498 Start syncing out a transaction group if there is at least this much dirty data.
499 .sp
500 Default value: \fB67,108,864\fR.
501 .RE
502
503 .sp
504 .ne 2
505 .na
506 \fBzfs_vdev_async_read_max_active\fR (int)
507 .ad
508 .RS 12n
509 Maxium asynchronous read I/Os active to each device.
510 See the section "ZFS I/O SCHEDULER".
511 .sp
512 Default value: \fB3\fR.
513 .RE
514
515 .sp
516 .ne 2
517 .na
518 \fBzfs_vdev_async_read_min_active\fR (int)
519 .ad
520 .RS 12n
521 Minimum asynchronous read I/Os active to each device.
522 See the section "ZFS I/O SCHEDULER".
523 .sp
524 Default value: \fB1\fR.
525 .RE
526
527 .sp
528 .ne 2
529 .na
530 \fBzfs_vdev_async_write_active_max_dirty_percent\fR (int)
531 .ad
532 .RS 12n
533 When the pool has more than
534 \fBzfs_vdev_async_write_active_max_dirty_percent\fR dirty data, use
535 \fBzfs_vdev_async_write_max_active\fR to limit active async writes. If
536 the dirty data is between min and max, the active I/O limit is linearly
537 interpolated. See the section "ZFS I/O SCHEDULER".
538 .sp
539 Default value: \fB60\fR.
540 .RE
541
542 .sp
543 .ne 2
544 .na
545 \fBzfs_vdev_async_write_active_min_dirty_percent\fR (int)
546 .ad
547 .RS 12n
548 When the pool has less than
549 \fBzfs_vdev_async_write_active_min_dirty_percent\fR dirty data, use
550 \fBzfs_vdev_async_write_min_active\fR to limit active async writes. If
551 the dirty data is between min and max, the active I/O limit is linearly
552 interpolated. See the section "ZFS I/O SCHEDULER".
553 .sp
554 Default value: \fB30\fR.
555 .RE
556
557 .sp
558 .ne 2
559 .na
560 \fBzfs_vdev_async_write_max_active\fR (int)
561 .ad
562 .RS 12n
563 Maxium asynchronous write I/Os active to each device.
564 See the section "ZFS I/O SCHEDULER".
565 .sp
566 Default value: \fB10\fR.
567 .RE
568
569 .sp
570 .ne 2
571 .na
572 \fBzfs_vdev_async_write_min_active\fR (int)
573 .ad
574 .RS 12n
575 Minimum asynchronous write I/Os active to each device.
576 See the section "ZFS I/O SCHEDULER".
577 .sp
578 Default value: \fB1\fR.
579 .RE
580
581 .sp
582 .ne 2
583 .na
584 \fBzfs_vdev_max_active\fR (int)
585 .ad
586 .RS 12n
587 The maximum number of I/Os active to each device. Ideally, this will be >=
588 the sum of each queue's max_active. It must be at least the sum of each
589 queue's min_active. See the section "ZFS I/O SCHEDULER".
590 .sp
591 Default value: \fB1,000\fR.
592 .RE
593
594 .sp
595 .ne 2
596 .na
597 \fBzfs_vdev_scrub_max_active\fR (int)
598 .ad
599 .RS 12n
600 Maxium scrub I/Os active to each device.
601 See the section "ZFS I/O SCHEDULER".
602 .sp
603 Default value: \fB2\fR.
604 .RE
605
606 .sp
607 .ne 2
608 .na
609 \fBzfs_vdev_scrub_min_active\fR (int)
610 .ad
611 .RS 12n
612 Minimum scrub I/Os active to each device.
613 See the section "ZFS I/O SCHEDULER".
614 .sp
615 Default value: \fB1\fR.
616 .RE
617
618 .sp
619 .ne 2
620 .na
621 \fBzfs_vdev_sync_read_max_active\fR (int)
622 .ad
623 .RS 12n
624 Maxium synchronous read I/Os active to each device.
625 See the section "ZFS I/O SCHEDULER".
626 .sp
627 Default value: \fB10\fR.
628 .RE
629
630 .sp
631 .ne 2
632 .na
633 \fBzfs_vdev_sync_read_min_active\fR (int)
634 .ad
635 .RS 12n
636 Minimum synchronous read I/Os active to each device.
637 See the section "ZFS I/O SCHEDULER".
638 .sp
639 Default value: \fB10\fR.
640 .RE
641
642 .sp
643 .ne 2
644 .na
645 \fBzfs_vdev_sync_write_max_active\fR (int)
646 .ad
647 .RS 12n
648 Maxium synchronous write I/Os active to each device.
649 See the section "ZFS I/O SCHEDULER".
650 .sp
651 Default value: \fB10\fR.
652 .RE
653
654 .sp
655 .ne 2
656 .na
657 \fBzfs_vdev_sync_write_min_active\fR (int)
658 .ad
659 .RS 12n
660 Minimum synchronous write I/Os active to each device.
661 See the section "ZFS I/O SCHEDULER".
662 .sp
663 Default value: \fB10\fR.
664 .RE
665
666 .sp
667 .ne 2
668 .na
669 \fBzfs_disable_dup_eviction\fR (int)
670 .ad
671 .RS 12n
672 Disable duplicate buffer eviction
673 .sp
674 Use \fB1\fR for yes and \fB0\fR for no (default).
675 .RE
676
677 .sp
678 .ne 2
679 .na
680 \fBzfs_expire_snapshot\fR (int)
681 .ad
682 .RS 12n
683 Seconds to expire .zfs/snapshot
684 .sp
685 Default value: \fB300\fR.
686 .RE
687
688 .sp
689 .ne 2
690 .na
691 \fBzfs_flags\fR (int)
692 .ad
693 .RS 12n
694 Set additional debugging flags
695 .sp
696 Default value: \fB1\fR.
697 .RE
698
699 .sp
700 .ne 2
701 .na
702 \fBzfs_free_min_time_ms\fR (int)
703 .ad
704 .RS 12n
705 Min millisecs to free per txg
706 .sp
707 Default value: \fB1,000\fR.
708 .RE
709
710 .sp
711 .ne 2
712 .na
713 \fBzfs_immediate_write_sz\fR (long)
714 .ad
715 .RS 12n
716 Largest data block to write to zil
717 .sp
718 Default value: \fB32,768\fR.
719 .RE
720
721 .sp
722 .ne 2
723 .na
724 \fBzfs_mdcomp_disable\fR (int)
725 .ad
726 .RS 12n
727 Disable meta data compression
728 .sp
729 Use \fB1\fR for yes and \fB0\fR for no (default).
730 .RE
731
732 .sp
733 .ne 2
734 .na
735 \fBzfs_mg_noalloc_threshold\fR (int)
736 .ad
737 .RS 12n
738 Defines a threshold at which metaslab groups should be eligible for
739 allocations. The value is expressed as a percentage of free space
740 beyond which a metaslab group is always eligible for allocations.
741 If a metaslab group's free space is less than or equal to the
742 the threshold, the allocator will avoid allocating to that group
743 unless all groups in the pool have reached the threshold. Once all
744 groups have reached the threshold, all groups are allowed to accept
745 allocations. The default value of 0 disables the feature and causes
746 all metaslab groups to be eligible for allocations.
747
748 This parameter allows to deal with pools having heavily imbalanced
749 vdevs such as would be the case when a new vdev has been added.
750 Setting the threshold to a non-zero percentage will stop allocations
751 from being made to vdevs that aren't filled to the specified percentage
752 and allow lesser filled vdevs to acquire more allocations than they
753 otherwise would under the old \fBzfs_mg_alloc_failures\fR facility.
754 .sp
755 Default value: \fB0\fR.
756 .RE
757
758 .sp
759 .ne 2
760 .na
761 \fBzfs_no_scrub_io\fR (int)
762 .ad
763 .RS 12n
764 Set for no scrub I/O
765 .sp
766 Use \fB1\fR for yes and \fB0\fR for no (default).
767 .RE
768
769 .sp
770 .ne 2
771 .na
772 \fBzfs_no_scrub_prefetch\fR (int)
773 .ad
774 .RS 12n
775 Set for no scrub prefetching
776 .sp
777 Use \fB1\fR for yes and \fB0\fR for no (default).
778 .RE
779
780 .sp
781 .ne 2
782 .na
783 \fBzfs_nocacheflush\fR (int)
784 .ad
785 .RS 12n
786 Disable cache flushes
787 .sp
788 Use \fB1\fR for yes and \fB0\fR for no (default).
789 .RE
790
791 .sp
792 .ne 2
793 .na
794 \fBzfs_nopwrite_enabled\fR (int)
795 .ad
796 .RS 12n
797 Enable NOP writes
798 .sp
799 Use \fB1\fR for yes (default) and \fB0\fR to disable.
800 .RE
801
802 .sp
803 .ne 2
804 .na
805 \fBzfs_pd_blks_max\fR (int)
806 .ad
807 .RS 12n
808 Max number of blocks to prefetch
809 .sp
810 Default value: \fB100\fR.
811 .RE
812
813 .sp
814 .ne 2
815 .na
816 \fBzfs_prefetch_disable\fR (int)
817 .ad
818 .RS 12n
819 Disable all ZFS prefetching
820 .sp
821 Use \fB1\fR for yes and \fB0\fR for no (default).
822 .RE
823
824 .sp
825 .ne 2
826 .na
827 \fBzfs_read_chunk_size\fR (long)
828 .ad
829 .RS 12n
830 Bytes to read per chunk
831 .sp
832 Default value: \fB1,048,576\fR.
833 .RE
834
835 .sp
836 .ne 2
837 .na
838 \fBzfs_read_history\fR (int)
839 .ad
840 .RS 12n
841 Historic statistics for the last N reads
842 .sp
843 Default value: \fB0\fR.
844 .RE
845
846 .sp
847 .ne 2
848 .na
849 \fBzfs_read_history_hits\fR (int)
850 .ad
851 .RS 12n
852 Include cache hits in read history
853 .sp
854 Use \fB1\fR for yes and \fB0\fR for no (default).
855 .RE
856
857 .sp
858 .ne 2
859 .na
860 \fBzfs_recover\fR (int)
861 .ad
862 .RS 12n
863 Set to attempt to recover from fatal errors. This should only be used as a
864 last resort, as it typically results in leaked space, or worse.
865 .sp
866 Use \fB1\fR for yes and \fB0\fR for no (default).
867 .RE
868
869 .sp
870 .ne 2
871 .na
872 \fBzfs_resilver_delay\fR (int)
873 .ad
874 .RS 12n
875 Number of ticks to delay prior to issuing a resilver I/O operation when
876 a non-resilver or non-scrub I/O operation has occurred within the past
877 \fBzfs_scan_idle\fR ticks.
878 .sp
879 Default value: \fB2\fR.
880 .RE
881
882 .sp
883 .ne 2
884 .na
885 \fBzfs_resilver_min_time_ms\fR (int)
886 .ad
887 .RS 12n
888 Min millisecs to resilver per txg
889 .sp
890 Default value: \fB3,000\fR.
891 .RE
892
893 .sp
894 .ne 2
895 .na
896 \fBzfs_scan_idle\fR (int)
897 .ad
898 .RS 12n
899 Idle window in clock ticks. During a scrub or a resilver, if
900 a non-scrub or non-resilver I/O operation has occurred during this
901 window, the next scrub or resilver operation is delayed by, respectively
902 \fBzfs_scrub_delay\fR or \fBzfs_resilver_delay\fR ticks.
903 .sp
904 Default value: \fB50\fR.
905 .RE
906
907 .sp
908 .ne 2
909 .na
910 \fBzfs_scan_min_time_ms\fR (int)
911 .ad
912 .RS 12n
913 Min millisecs to scrub per txg
914 .sp
915 Default value: \fB1,000\fR.
916 .RE
917
918 .sp
919 .ne 2
920 .na
921 \fBzfs_scrub_delay\fR (int)
922 .ad
923 .RS 12n
924 Number of ticks to delay prior to issuing a scrub I/O operation when
925 a non-scrub or non-resilver I/O operation has occurred within the past
926 \fBzfs_scan_idle\fR ticks.
927 .sp
928 Default value: \fB4\fR.
929 .RE
930
931 .sp
932 .ne 2
933 .na
934 \fBzfs_send_corrupt_data\fR (int)
935 .ad
936 .RS 12n
937 Allow to send corrupt data (ignore read/checksum errors when sending data)
938 .sp
939 Use \fB1\fR for yes and \fB0\fR for no (default).
940 .RE
941
942 .sp
943 .ne 2
944 .na
945 \fBzfs_sync_pass_deferred_free\fR (int)
946 .ad
947 .RS 12n
948 Defer frees starting in this pass
949 .sp
950 Default value: \fB2\fR.
951 .RE
952
953 .sp
954 .ne 2
955 .na
956 \fBzfs_sync_pass_dont_compress\fR (int)
957 .ad
958 .RS 12n
959 Don't compress starting in this pass
960 .sp
961 Default value: \fB5\fR.
962 .RE
963
964 .sp
965 .ne 2
966 .na
967 \fBzfs_sync_pass_rewrite\fR (int)
968 .ad
969 .RS 12n
970 Rewrite new bps starting in this pass
971 .sp
972 Default value: \fB2\fR.
973 .RE
974
975 .sp
976 .ne 2
977 .na
978 \fBzfs_top_maxinflight\fR (int)
979 .ad
980 .RS 12n
981 Max I/Os per top-level vdev during scrub or resilver operations.
982 .sp
983 Default value: \fB32\fR.
984 .RE
985
986 .sp
987 .ne 2
988 .na
989 \fBzfs_txg_history\fR (int)
990 .ad
991 .RS 12n
992 Historic statistics for the last N txgs
993 .sp
994 Default value: \fB0\fR.
995 .RE
996
997 .sp
998 .ne 2
999 .na
1000 \fBzfs_txg_timeout\fR (int)
1001 .ad
1002 .RS 12n
1003 Max seconds worth of delta per txg
1004 .sp
1005 Default value: \fB5\fR.
1006 .RE
1007
1008 .sp
1009 .ne 2
1010 .na
1011 \fBzfs_vdev_aggregation_limit\fR (int)
1012 .ad
1013 .RS 12n
1014 Max vdev I/O aggregation size
1015 .sp
1016 Default value: \fB131,072\fR.
1017 .RE
1018
1019 .sp
1020 .ne 2
1021 .na
1022 \fBzfs_vdev_cache_bshift\fR (int)
1023 .ad
1024 .RS 12n
1025 Shift size to inflate reads too
1026 .sp
1027 Default value: \fB16\fR.
1028 .RE
1029
1030 .sp
1031 .ne 2
1032 .na
1033 \fBzfs_vdev_cache_max\fR (int)
1034 .ad
1035 .RS 12n
1036 Inflate reads small than max
1037 .RE
1038
1039 .sp
1040 .ne 2
1041 .na
1042 \fBzfs_vdev_cache_size\fR (int)
1043 .ad
1044 .RS 12n
1045 Total size of the per-disk cache
1046 .sp
1047 Default value: \fB0\fR.
1048 .RE
1049
1050 .sp
1051 .ne 2
1052 .na
1053 \fBzfs_vdev_mirror_switch_us\fR (int)
1054 .ad
1055 .RS 12n
1056 Switch mirrors every N usecs
1057 .sp
1058 Default value: \fB10,000\fR.
1059 .RE
1060
1061 .sp
1062 .ne 2
1063 .na
1064 \fBzfs_vdev_read_gap_limit\fR (int)
1065 .ad
1066 .RS 12n
1067 Aggregate read I/O over gap
1068 .sp
1069 Default value: \fB32,768\fR.
1070 .RE
1071
1072 .sp
1073 .ne 2
1074 .na
1075 \fBzfs_vdev_scheduler\fR (charp)
1076 .ad
1077 .RS 12n
1078 I/O scheduler
1079 .sp
1080 Default value: \fBnoop\fR.
1081 .RE
1082
1083 .sp
1084 .ne 2
1085 .na
1086 \fBzfs_vdev_write_gap_limit\fR (int)
1087 .ad
1088 .RS 12n
1089 Aggregate write I/O over gap
1090 .sp
1091 Default value: \fB4,096\fR.
1092 .RE
1093
1094 .sp
1095 .ne 2
1096 .na
1097 \fBzfs_zevent_cols\fR (int)
1098 .ad
1099 .RS 12n
1100 Max event column width
1101 .sp
1102 Default value: \fB80\fR.
1103 .RE
1104
1105 .sp
1106 .ne 2
1107 .na
1108 \fBzfs_zevent_console\fR (int)
1109 .ad
1110 .RS 12n
1111 Log events to the console
1112 .sp
1113 Use \fB1\fR for yes and \fB0\fR for no (default).
1114 .RE
1115
1116 .sp
1117 .ne 2
1118 .na
1119 \fBzfs_zevent_len_max\fR (int)
1120 .ad
1121 .RS 12n
1122 Max event queue length
1123 .sp
1124 Default value: \fB0\fR.
1125 .RE
1126
1127 .sp
1128 .ne 2
1129 .na
1130 \fBzil_replay_disable\fR (int)
1131 .ad
1132 .RS 12n
1133 Disable intent logging replay
1134 .sp
1135 Use \fB1\fR for yes and \fB0\fR for no (default).
1136 .RE
1137
1138 .sp
1139 .ne 2
1140 .na
1141 \fBzil_slog_limit\fR (ulong)
1142 .ad
1143 .RS 12n
1144 Max commit bytes to separate log device
1145 .sp
1146 Default value: \fB1,048,576\fR.
1147 .RE
1148
1149 .sp
1150 .ne 2
1151 .na
1152 \fBzio_bulk_flags\fR (int)
1153 .ad
1154 .RS 12n
1155 Additional flags to pass to bulk buffers
1156 .sp
1157 Default value: \fB0\fR.
1158 .RE
1159
1160 .sp
1161 .ne 2
1162 .na
1163 \fBzio_delay_max\fR (int)
1164 .ad
1165 .RS 12n
1166 Max zio millisec delay before posting event
1167 .sp
1168 Default value: \fB30,000\fR.
1169 .RE
1170
1171 .sp
1172 .ne 2
1173 .na
1174 \fBzio_injection_enabled\fR (int)
1175 .ad
1176 .RS 12n
1177 Enable fault injection
1178 .sp
1179 Use \fB1\fR for yes and \fB0\fR for no (default).
1180 .RE
1181
1182 .sp
1183 .ne 2
1184 .na
1185 \fBzio_requeue_io_start_cut_in_line\fR (int)
1186 .ad
1187 .RS 12n
1188 Prioritize requeued I/O
1189 .sp
1190 Default value: \fB0\fR.
1191 .RE
1192
1193 .sp
1194 .ne 2
1195 .na
1196 \fBzvol_inhibit_dev\fR (uint)
1197 .ad
1198 .RS 12n
1199 Do not create zvol device nodes
1200 .sp
1201 Use \fB1\fR for yes and \fB0\fR for no (default).
1202 .RE
1203
1204 .sp
1205 .ne 2
1206 .na
1207 \fBzvol_major\fR (uint)
1208 .ad
1209 .RS 12n
1210 Major number for zvol device
1211 .sp
1212 Default value: \fB230\fR.
1213 .RE
1214
1215 .sp
1216 .ne 2
1217 .na
1218 \fBzvol_max_discard_blocks\fR (ulong)
1219 .ad
1220 .RS 12n
1221 Max number of blocks to discard at once
1222 .sp
1223 Default value: \fB16,384\fR.
1224 .RE
1225
1226 .sp
1227 .ne 2
1228 .na
1229 \fBzvol_threads\fR (uint)
1230 .ad
1231 .RS 12n
1232 Number of threads for zvol device
1233 .sp
1234 Default value: \fB32\fR.
1235 .RE
1236
1237 .SH ZFS I/O SCHEDULER
1238 ZFS issues I/O operations to leaf vdevs to satisfy and complete I/Os.
1239 The I/O scheduler determines when and in what order those operations are
1240 issued. The I/O scheduler divides operations into five I/O classes
1241 prioritized in the following order: sync read, sync write, async read,
1242 async write, and scrub/resilver. Each queue defines the minimum and
1243 maximum number of concurrent operations that may be issued to the
1244 device. In addition, the device has an aggregate maximum,
1245 \fBzfs_vdev_max_active\fR. Note that the sum of the per-queue minimums
1246 must not exceed the aggregate maximum. If the sum of the per-queue
1247 maximums exceeds the aggregate maximum, then the number of active I/Os
1248 may reach \fBzfs_vdev_max_active\fR, in which case no further I/Os will
1249 be issued regardless of whether all per-queue minimums have been met.
1250 .sp
1251 For many physical devices, throughput increases with the number of
1252 concurrent operations, but latency typically suffers. Further, physical
1253 devices typically have a limit at which more concurrent operations have no
1254 effect on throughput or can actually cause it to decrease.
1255 .sp
1256 The scheduler selects the next operation to issue by first looking for an
1257 I/O class whose minimum has not been satisfied. Once all are satisfied and
1258 the aggregate maximum has not been hit, the scheduler looks for classes
1259 whose maximum has not been satisfied. Iteration through the I/O classes is
1260 done in the order specified above. No further operations are issued if the
1261 aggregate maximum number of concurrent operations has been hit or if there
1262 are no operations queued for an I/O class that has not hit its maximum.
1263 Every time an I/O is queued or an operation completes, the I/O scheduler
1264 looks for new operations to issue.
1265 .sp
1266 In general, smaller max_active's will lead to lower latency of synchronous
1267 operations. Larger max_active's may lead to higher overall throughput,
1268 depending on underlying storage.
1269 .sp
1270 The ratio of the queues' max_actives determines the balance of performance
1271 between reads, writes, and scrubs. E.g., increasing
1272 \fBzfs_vdev_scrub_max_active\fR will cause the scrub or resilver to complete
1273 more quickly, but reads and writes to have higher latency and lower throughput.
1274 .sp
1275 All I/O classes have a fixed maximum number of outstanding operations
1276 except for the async write class. Asynchronous writes represent the data
1277 that is committed to stable storage during the syncing stage for
1278 transaction groups. Transaction groups enter the syncing state
1279 periodically so the number of queued async writes will quickly burst up
1280 and then bleed down to zero. Rather than servicing them as quickly as
1281 possible, the I/O scheduler changes the maximum number of active async
1282 write I/Os according to the amount of dirty data in the pool. Since
1283 both throughput and latency typically increase with the number of
1284 concurrent operations issued to physical devices, reducing the
1285 burstiness in the number of concurrent operations also stabilizes the
1286 response time of operations from other -- and in particular synchronous
1287 -- queues. In broad strokes, the I/O scheduler will issue more
1288 concurrent operations from the async write queue as there's more dirty
1289 data in the pool.
1290 .sp
1291 Async Writes
1292 .sp
1293 The number of concurrent operations issued for the async write I/O class
1294 follows a piece-wise linear function defined by a few adjustable points.
1295 .nf
1296
1297 | o---------| <-- zfs_vdev_async_write_max_active
1298 ^ | /^ |
1299 | | / | |
1300 active | / | |
1301 I/O | / | |
1302 count | / | |
1303 | / | |
1304 |-------o | | <-- zfs_vdev_async_write_min_active
1305 0|_______^______|_________|
1306 0% | | 100% of zfs_dirty_data_max
1307 | |
1308 | `-- zfs_vdev_async_write_active_max_dirty_percent
1309 `--------- zfs_vdev_async_write_active_min_dirty_percent
1310
1311 .fi
1312 Until the amount of dirty data exceeds a minimum percentage of the dirty
1313 data allowed in the pool, the I/O scheduler will limit the number of
1314 concurrent operations to the minimum. As that threshold is crossed, the
1315 number of concurrent operations issued increases linearly to the maximum at
1316 the specified maximum percentage of the dirty data allowed in the pool.
1317 .sp
1318 Ideally, the amount of dirty data on a busy pool will stay in the sloped
1319 part of the function between \fBzfs_vdev_async_write_active_min_dirty_percent\fR
1320 and \fBzfs_vdev_async_write_active_max_dirty_percent\fR. If it exceeds the
1321 maximum percentage, this indicates that the rate of incoming data is
1322 greater than the rate that the backend storage can handle. In this case, we
1323 must further throttle incoming writes, as described in the next section.
1324
1325 .SH ZFS TRANSACTION DELAY
1326 We delay transactions when we've determined that the backend storage
1327 isn't able to accommodate the rate of incoming writes.
1328 .sp
1329 If there is already a transaction waiting, we delay relative to when
1330 that transaction will finish waiting. This way the calculated delay time
1331 is independent of the number of threads concurrently executing
1332 transactions.
1333 .sp
1334 If we are the only waiter, wait relative to when the transaction
1335 started, rather than the current time. This credits the transaction for
1336 "time already served", e.g. reading indirect blocks.
1337 .sp
1338 The minimum time for a transaction to take is calculated as:
1339 .nf
1340 min_time = zfs_delay_scale * (dirty - min) / (max - dirty)
1341 min_time is then capped at 100 milliseconds.
1342 .fi
1343 .sp
1344 The delay has two degrees of freedom that can be adjusted via tunables. The
1345 percentage of dirty data at which we start to delay is defined by
1346 \fBzfs_delay_min_dirty_percent\fR. This should typically be at or above
1347 \fBzfs_vdev_async_write_active_max_dirty_percent\fR so that we only start to
1348 delay after writing at full speed has failed to keep up with the incoming write
1349 rate. The scale of the curve is defined by \fBzfs_delay_scale\fR. Roughly speaking,
1350 this variable determines the amount of delay at the midpoint of the curve.
1351 .sp
1352 .nf
1353 delay
1354 10ms +-------------------------------------------------------------*+
1355 | *|
1356 9ms + *+
1357 | *|
1358 8ms + *+
1359 | * |
1360 7ms + * +
1361 | * |
1362 6ms + * +
1363 | * |
1364 5ms + * +
1365 | * |
1366 4ms + * +
1367 | * |
1368 3ms + * +
1369 | * |
1370 2ms + (midpoint) * +
1371 | | ** |
1372 1ms + v *** +
1373 | zfs_delay_scale ----------> ******** |
1374 0 +-------------------------------------*********----------------+
1375 0% <- zfs_dirty_data_max -> 100%
1376 .fi
1377 .sp
1378 Note that since the delay is added to the outstanding time remaining on the
1379 most recent transaction, the delay is effectively the inverse of IOPS.
1380 Here the midpoint of 500us translates to 2000 IOPS. The shape of the curve
1381 was chosen such that small changes in the amount of accumulated dirty data
1382 in the first 3/4 of the curve yield relatively small differences in the
1383 amount of delay.
1384 .sp
1385 The effects can be easier to understand when the amount of delay is
1386 represented on a log scale:
1387 .sp
1388 .nf
1389 delay
1390 100ms +-------------------------------------------------------------++
1391 + +
1392 | |
1393 + *+
1394 10ms + *+
1395 + ** +
1396 | (midpoint) ** |
1397 + | ** +
1398 1ms + v **** +
1399 + zfs_delay_scale ----------> ***** +
1400 | **** |
1401 + **** +
1402 100us + ** +
1403 + * +
1404 | * |
1405 + * +
1406 10us + * +
1407 + +
1408 | |
1409 + +
1410 +--------------------------------------------------------------+
1411 0% <- zfs_dirty_data_max -> 100%
1412 .fi
1413 .sp
1414 Note here that only as the amount of dirty data approaches its limit does
1415 the delay start to increase rapidly. The goal of a properly tuned system
1416 should be to keep the amount of dirty data out of that range by first
1417 ensuring that the appropriate limits are set for the I/O scheduler to reach
1418 optimal throughput on the backend storage, and then by changing the value
1419 of \fBzfs_delay_scale\fR to increase the steepness of the curve.