man/man5/zfs-module-parameters.5

   1 '\" te
   2 .\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
   3 .\" The contents of this file are subject to the terms of the Common Development
   4 .\" and Distribution License (the "License").  You may not use this file except
   5 .\" in compliance with the License. You can obtain a copy of the license at
   6 .\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
   7 .\"
   8 .\" See the License for the specific language governing permissions and
   9 .\" limitations under the License. When distributing Covered Code, include this
  10 .\" CDDL HEADER in each file and include the License file at
  11 .\" usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this
  12 .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
  13 .\" own identifying information:
  14 .\" Portions Copyright [yyyy] [name of copyright owner]
  15 .TH ZFS-MODULE-PARAMETERS 5 "Nov 16, 2013"
  16 .SH NAME
  17 zfs\-module\-parameters \- ZFS module parameters
  18 .SH DESCRIPTION
  19 .sp
  20 .LP
  21 Description of the different parameters to the ZFS module.
  22
  23 .SS "Module parameters"
  24 .sp
  25 .LP
  26
  27 .sp
  28 .ne 2
  29 .na
  30 \fBl2arc_feed_again\fR (int)
  31 .ad
  32 .RS 12n
  33 Turbo L2ARC warmup
  34 .sp
  35 Use \fB1\fR for yes (default) and \fB0\fR to disable.
  36 .RE
  37
  38 .sp
  39 .ne 2
  40 .na
  41 \fBl2arc_feed_min_ms\fR (ulong)
  42 .ad
  43 .RS 12n
  44 Min feed interval in milliseconds
  45 .sp
  46 Default value: \fB200\fR.
  47 .RE
  48
  49 .sp
  50 .ne 2
  51 .na
  52 \fBl2arc_feed_secs\fR (ulong)
  53 .ad
  54 .RS 12n
  55 Seconds between L2ARC writing
  56 .sp
  57 Default value: \fB1\fR.
  58 .RE
  59
  60 .sp
  61 .ne 2
  62 .na
  63 \fBl2arc_headroom\fR (ulong)
  64 .ad
  65 .RS 12n
  66 Number of max device writes to precache
  67 .sp
  68 Default value: \fB2\fR.
  69 .RE
  70
  71 .sp
  72 .ne 2
  73 .na
  74 \fBl2arc_headroom_boost\fR (ulong)
  75 .ad
  76 .RS 12n
  77 Compressed l2arc_headroom multiplier
  78 .sp
  79 Default value: \fB200\fR.
  80 .RE
  81
  82 .sp
  83 .ne 2
  84 .na
  85 \fBl2arc_nocompress\fR (int)
  86 .ad
  87 .RS 12n
  88 Skip compressing L2ARC buffers
  89 .sp
  90 Use \fB1\fR for yes and \fB0\fR for no (default).
  91 .RE
  92
  93 .sp
  94 .ne 2
  95 .na
  96 \fBl2arc_noprefetch\fR (int)
  97 .ad
  98 .RS 12n
  99 Skip caching prefetched buffers
 100 .sp
 101 Use \fB1\fR for yes (default) and \fB0\fR to disable.
 102 .RE
 103
 104 .sp
 105 .ne 2
 106 .na
 107 \fBl2arc_norw\fR (int)
 108 .ad
 109 .RS 12n
 110 No reads during writes
 111 .sp
 112 Use \fB1\fR for yes and \fB0\fR for no (default).
 113 .RE
 114
 115 .sp
 116 .ne 2
 117 .na
 118 \fBl2arc_write_boost\fR (ulong)
 119 .ad
 120 .RS 12n
 121 Extra write bytes during device warmup
 122 .sp
 123 Default value: \fB8,388,608\fR.
 124 .RE
 125
 126 .sp
 127 .ne 2
 128 .na
 129 \fBl2arc_write_max\fR (ulong)
 130 .ad
 131 .RS 12n
 132 Max write bytes per interval
 133 .sp
 134 Default value: \fB8,388,608\fR.
 135 .RE
 136
 137 .sp
 138 .ne 2
 139 .na
 140 \fBmetaslab_debug_load\fR (int)
 141 .ad
 142 .RS 12n
 143 Load all metaslabs during pool import.
 144 .sp
 145 Use \fB1\fR for yes and \fB0\fR for no (default).
 146 .RE
 147
 148 .sp
 149 .ne 2
 150 .na
 151 \fBmetaslab_debug_unload\fR (int)
 152 .ad
 153 .RS 12n
 154 Prevent metaslabs from being unloaded.
 155 .sp
 156 Use \fB1\fR for yes and \fB0\fR for no (default).
 157 .RE
 158
 159 .sp
 160 .ne 2
 161 .na
 162 \fBspa_config_path\fR (charp)
 163 .ad
 164 .RS 12n
 165 SPA config file
 166 .sp
 167 Default value: \fB/etc/zfs/zpool.cache\fR.
 168 .RE
 169
 170 .sp
 171 .ne 2
 172 .na
 173 \fBspa_asize_inflation\fR (int)
 174 .ad
 175 .RS 12n
 176 Multiplication factor used to estimate actual disk consumption from the
 177 size of data being written. The default value is a worst case estimate,
 178 but lower values may be valid for a given pool depending on its
 179 configuration.  Pool administrators who understand the factors involved
 180 may wish to specify a more realistic inflation factor, particularly if
 181 they operate close to quota or capacity limits.
 182 .sp
 183 Default value: 24
 184 .RE
 185
 186 .sp
 187 .ne 2
 188 .na
 189 \fBzfetch_array_rd_sz\fR (ulong)
 190 .ad
 191 .RS 12n
 192 If prefetching is enabled, disable prefetching for reads larger than this size.
 193 .sp
 194 Default value: \fB1,048,576\fR.
 195 .RE
 196
 197 .sp
 198 .ne 2
 199 .na
 200 \fBzfetch_block_cap\fR (uint)
 201 .ad
 202 .RS 12n
 203 Max number of blocks to prefetch at a time
 204 .sp
 205 Default value: \fB256\fR.
 206 .RE
 207
 208 .sp
 209 .ne 2
 210 .na
 211 \fBzfetch_max_streams\fR (uint)
 212 .ad
 213 .RS 12n
 214 Max number of streams per zfetch (prefetch streams per file).
 215 .sp
 216 Default value: \fB8\fR.
 217 .RE
 218
 219 .sp
 220 .ne 2
 221 .na
 222 \fBzfetch_min_sec_reap\fR (uint)
 223 .ad
 224 .RS 12n
 225 Min time before an active prefetch stream can be reclaimed
 226 .sp
 227 Default value: \fB2\fR.
 228 .RE
 229
 230 .sp
 231 .ne 2
 232 .na
 233 \fBzfs_arc_grow_retry\fR (int)
 234 .ad
 235 .RS 12n
 236 Seconds before growing arc size
 237 .sp
 238 Default value: \fB5\fR.
 239 .RE
 240
 241 .sp
 242 .ne 2
 243 .na
 244 \fBzfs_arc_max\fR (ulong)
 245 .ad
 246 .RS 12n
 247 Max arc size
 248 .sp
 249 Default value: \fB0\fR.
 250 .RE
 251
 252 .sp
 253 .ne 2
 254 .na
 255 \fBzfs_arc_memory_throttle_disable\fR (int)
 256 .ad
 257 .RS 12n
 258 Disable memory throttle
 259 .sp
 260 Use \fB1\fR for yes (default) and \fB0\fR to disable.
 261 .RE
 262
 263 .sp
 264 .ne 2
 265 .na
 266 \fBzfs_arc_meta_limit\fR (ulong)
 267 .ad
 268 .RS 12n
 269 Meta limit for arc size
 270 .sp
 271 Default value: \fB0\fR.
 272 .RE
 273
 274 .sp
 275 .ne 2
 276 .na
 277 \fBzfs_arc_meta_prune\fR (int)
 278 .ad
 279 .RS 12n
 280 Bytes of meta data to prune
 281 .sp
 282 Default value: \fB1,048,576\fR.
 283 .RE
 284
 285 .sp
 286 .ne 2
 287 .na
 288 \fBzfs_arc_min\fR (ulong)
 289 .ad
 290 .RS 12n
 291 Min arc size
 292 .sp
 293 Default value: \fB100\fR.
 294 .RE
 295
 296 .sp
 297 .ne 2
 298 .na
 299 \fBzfs_arc_min_prefetch_lifespan\fR (int)
 300 .ad
 301 .RS 12n
 302 Min life of prefetch block
 303 .sp
 304 Default value: \fB100\fR.
 305 .RE
 306
 307 .sp
 308 .ne 2
 309 .na
 310 \fBzfs_arc_p_aggressive_disable\fR (int)
 311 .ad
 312 .RS 12n
 313 Disable aggressive arc_p growth
 314 .sp
 315 Use \fB1\fR for yes (default) and \fB0\fR to disable.
 316 .RE
 317
 318 .sp
 319 .ne 2
 320 .na
 321 \fBzfs_arc_p_dampener_disable\fR (int)
 322 .ad
 323 .RS 12n
 324 Disable arc_p adapt dampener
 325 .sp
 326 Use \fB1\fR for yes (default) and \fB0\fR to disable.
 327 .RE
 328
 329 .sp
 330 .ne 2
 331 .na
 332 \fBzfs_arc_shrink_shift\fR (int)
 333 .ad
 334 .RS 12n
 335 log2(fraction of arc to reclaim)
 336 .sp
 337 Default value: \fB5\fR.
 338 .RE
 339
 340 .sp
 341 .ne 2
 342 .na
 343 \fBzfs_autoimport_disable\fR (int)
 344 .ad
 345 .RS 12n
 346 Disable pool import at module load by ignoring the cache file (typically \fB/etc/zfs/zpool.cache\fR).
 347 .sp
 348 Use \fB1\fR for yes and \fB0\fR for no (default).
 349 .RE
 350
 351 .sp
 352 .ne 2
 353 .na
 354 \fBzfs_dbuf_state_index\fR (int)
 355 .ad
 356 .RS 12n
 357 Calculate arc header index
 358 .sp
 359 Default value: \fB0\fR.
 360 .RE
 361
 362 .sp
 363 .ne 2
 364 .na
 365 \fBzfs_deadman_enabled\fR (int)
 366 .ad
 367 .RS 12n
 368 Enable deadman timer
 369 .sp
 370 Use \fB1\fR for yes (default) and \fB0\fR to disable.
 371 .RE
 372
 373 .sp
 374 .ne 2
 375 .na
 376 \fBzfs_deadman_synctime_ms\fR (ulong)
 377 .ad
 378 .RS 12n
 379 Expiration time in milliseconds. This value has two meanings. First it is
 380 used to determine when the spa_deadman() logic should fire. By default the
 381 spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
 382 Secondly, the value determines if an I/O is considered "hung". Any I/O that
 383 has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
 384 in a zevent being logged.
 385 .sp
 386 Default value: \fB1,000,000\fR.
 387 .RE
 388
 389 .sp
 390 .ne 2
 391 .na
 392 \fBzfs_dedup_prefetch\fR (int)
 393 .ad
 394 .RS 12n
 395 Enable prefetching dedup-ed blks
 396 .sp
 397 Use \fB1\fR for yes (default) and \fB0\fR to disable.
 398 .RE
 399
 400 .sp
 401 .ne 2
 402 .na
 403 \fBzfs_delay_min_dirty_percent\fR (int)
 404 .ad
 405 .RS 12n
 406 Start to delay each transaction once there is this amount of dirty data,
 407 expressed as a percentage of \fBzfs_dirty_data_max\fR.
 408 This value should be >= zfs_vdev_async_write_active_max_dirty_percent.
 409 See the section "ZFS TRANSACTION DELAY".
 410 .sp
 411 Default value: \fB60\fR.
 412 .RE
 413
 414 .sp
 415 .ne 2
 416 .na
 417 \fBzfs_delay_scale\fR (int)
 418 .ad
 419 .RS 12n
 420 This controls how quickly the transaction delay approaches infinity.
 421 Larger values cause longer delays for a given amount of dirty data.
 422 .sp
 423 For the smoothest delay, this value should be about 1 billion divided
 424 by the maximum number of operations per second.  This will smoothly
 425 handle between 10x and 1/10th this number.
 426 .sp
 427 See the section "ZFS TRANSACTION DELAY".
 428 .sp
 429 Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
 430 .sp
 431 Default value: \fB500,000\fR.
 432 .RE
 433
 434 .sp
 435 .ne 2
 436 .na
 437 \fBzfs_dirty_data_max\fR (int)
 438 .ad
 439 .RS 12n
 440 Determines the dirty space limit in bytes.  Once this limit is exceeded, new
 441 writes are halted until space frees up. This parameter takes precedence
 442 over \fBzfs_dirty_data_max_percent\fR.
 443 See the section "ZFS TRANSACTION DELAY".
 444 .sp
 445 Default value: 10 percent of all memory, capped at \fBzfs_dirty_data_max_max\fR.
 446 .RE
 447
 448 .sp
 449 .ne 2
 450 .na
 451 \fBzfs_dirty_data_max_max\fR (int)
 452 .ad
 453 .RS 12n
 454 Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed in bytes.
 455 This limit is only enforced at module load time, and will be ignored if
 456 \fBzfs_dirty_data_max\fR is later changed.  This parameter takes
 457 precedence over \fBzfs_dirty_data_max_max_percent\fR. See the section
 458 "ZFS TRANSACTION DELAY".
 459 .sp
 460 Default value: 25% of physical RAM.
 461 .RE
 462
 463 .sp
 464 .ne 2
 465 .na
 466 \fBzfs_dirty_data_max_max_percent\fR (int)
 467 .ad
 468 .RS 12n
 469 Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed as a
 470 percentage of physical RAM.  This limit is only enforced at module load
 471 time, and will be ignored if \fBzfs_dirty_data_max\fR is later changed.
 472 The parameter \fBzfs_dirty_data_max_max\fR takes precedence over this
 473 one. See the section "ZFS TRANSACTION DELAY".
 474 .sp
 475 Default value: 25
 476 .RE
 477
 478 .sp
 479 .ne 2
 480 .na
 481 \fBzfs_dirty_data_max_percent\fR (int)
 482 .ad
 483 .RS 12n
 484 Determines the dirty space limit, expressed as a percentage of all
 485 memory.  Once this limit is exceeded, new writes are halted until space frees
 486 up.  The parameter \fBzfs_dirty_data_max\fR takes precedence over this
 487 one.  See the section "ZFS TRANSACTION DELAY".
 488 .sp
 489 Default value: 10%, subject to \fBzfs_dirty_data_max_max\fR.
 490 .RE
 491
 492 .sp
 493 .ne 2
 494 .na
 495 \fBzfs_dirty_data_sync\fR (int)
 496 .ad
 497 .RS 12n
 498 Start syncing out a transaction group if there is at least this much dirty data.
 499 .sp
 500 Default value: \fB67,108,864\fR.
 501 .RE
 502
 503 .sp
 504 .ne 2
 505 .na
 506 \fBzfs_vdev_async_read_max_active\fR (int)
 507 .ad
 508 .RS 12n
 509 Maxium asynchronous read I/Os active to each device.
 510 See the section "ZFS I/O SCHEDULER".
 511 .sp
 512 Default value: \fB3\fR.
 513 .RE
 514
 515 .sp
 516 .ne 2
 517 .na
 518 \fBzfs_vdev_async_read_min_active\fR (int)
 519 .ad
 520 .RS 12n
 521 Minimum asynchronous read I/Os active to each device.
 522 See the section "ZFS I/O SCHEDULER".
 523 .sp
 524 Default value: \fB1\fR.
 525 .RE
 526
 527 .sp
 528 .ne 2
 529 .na
 530 \fBzfs_vdev_async_write_active_max_dirty_percent\fR (int)
 531 .ad
 532 .RS 12n
 533 When the pool has more than
 534 \fBzfs_vdev_async_write_active_max_dirty_percent\fR dirty data, use
 535 \fBzfs_vdev_async_write_max_active\fR to limit active async writes.  If
 536 the dirty data is between min and max, the active I/O limit is linearly
 537 interpolated. See the section "ZFS I/O SCHEDULER".
 538 .sp
 539 Default value: \fB60\fR.
 540 .RE
 541
 542 .sp
 543 .ne 2
 544 .na
 545 \fBzfs_vdev_async_write_active_min_dirty_percent\fR (int)
 546 .ad
 547 .RS 12n
 548 When the pool has less than
 549 \fBzfs_vdev_async_write_active_min_dirty_percent\fR dirty data, use
 550 \fBzfs_vdev_async_write_min_active\fR to limit active async writes.  If
 551 the dirty data is between min and max, the active I/O limit is linearly
 552 interpolated. See the section "ZFS I/O SCHEDULER".
 553 .sp
 554 Default value: \fB30\fR.
 555 .RE
 556
 557 .sp
 558 .ne 2
 559 .na
 560 \fBzfs_vdev_async_write_max_active\fR (int)
 561 .ad
 562 .RS 12n
 563 Maxium asynchronous write I/Os active to each device.
 564 See the section "ZFS I/O SCHEDULER".
 565 .sp
 566 Default value: \fB10\fR.
 567 .RE
 568
 569 .sp
 570 .ne 2
 571 .na
 572 \fBzfs_vdev_async_write_min_active\fR (int)
 573 .ad
 574 .RS 12n
 575 Minimum asynchronous write I/Os active to each device.
 576 See the section "ZFS I/O SCHEDULER".
 577 .sp
 578 Default value: \fB1\fR.
 579 .RE
 580
 581 .sp
 582 .ne 2
 583 .na
 584 \fBzfs_vdev_max_active\fR (int)
 585 .ad
 586 .RS 12n
 587 The maximum number of I/Os active to each device.  Ideally, this will be >=
 588 the sum of each queue's max_active.  It must be at least the sum of each
 589 queue's min_active.  See the section "ZFS I/O SCHEDULER".
 590 .sp
 591 Default value: \fB1,000\fR.
 592 .RE
 593
 594 .sp
 595 .ne 2
 596 .na
 597 \fBzfs_vdev_scrub_max_active\fR (int)
 598 .ad
 599 .RS 12n
 600 Maxium scrub I/Os active to each device.
 601 See the section "ZFS I/O SCHEDULER".
 602 .sp
 603 Default value: \fB2\fR.
 604 .RE
 605
 606 .sp
 607 .ne 2
 608 .na
 609 \fBzfs_vdev_scrub_min_active\fR (int)
 610 .ad
 611 .RS 12n
 612 Minimum scrub I/Os active to each device.
 613 See the section "ZFS I/O SCHEDULER".
 614 .sp
 615 Default value: \fB1\fR.
 616 .RE
 617
 618 .sp
 619 .ne 2
 620 .na
 621 \fBzfs_vdev_sync_read_max_active\fR (int)
 622 .ad
 623 .RS 12n
 624 Maxium synchronous read I/Os active to each device.
 625 See the section "ZFS I/O SCHEDULER".
 626 .sp
 627 Default value: \fB10\fR.
 628 .RE
 629
 630 .sp
 631 .ne 2
 632 .na
 633 \fBzfs_vdev_sync_read_min_active\fR (int)
 634 .ad
 635 .RS 12n
 636 Minimum synchronous read I/Os active to each device.
 637 See the section "ZFS I/O SCHEDULER".
 638 .sp
 639 Default value: \fB10\fR.
 640 .RE
 641
 642 .sp
 643 .ne 2
 644 .na
 645 \fBzfs_vdev_sync_write_max_active\fR (int)
 646 .ad
 647 .RS 12n
 648 Maxium synchronous write I/Os active to each device.
 649 See the section "ZFS I/O SCHEDULER".
 650 .sp
 651 Default value: \fB10\fR.
 652 .RE
 653
 654 .sp
 655 .ne 2
 656 .na
 657 \fBzfs_vdev_sync_write_min_active\fR (int)
 658 .ad
 659 .RS 12n
 660 Minimum synchronous write I/Os active to each device.
 661 See the section "ZFS I/O SCHEDULER".
 662 .sp
 663 Default value: \fB10\fR.
 664 .RE
 665
 666 .sp
 667 .ne 2
 668 .na
 669 \fBzfs_disable_dup_eviction\fR (int)
 670 .ad
 671 .RS 12n
 672 Disable duplicate buffer eviction
 673 .sp
 674 Use \fB1\fR for yes and \fB0\fR for no (default).
 675 .RE
 676
 677 .sp
 678 .ne 2
 679 .na
 680 \fBzfs_expire_snapshot\fR (int)
 681 .ad
 682 .RS 12n
 683 Seconds to expire .zfs/snapshot
 684 .sp
 685 Default value: \fB300\fR.
 686 .RE
 687
 688 .sp
 689 .ne 2
 690 .na
 691 \fBzfs_flags\fR (int)
 692 .ad
 693 .RS 12n
 694 Set additional debugging flags
 695 .sp
 696 Default value: \fB1\fR.
 697 .RE
 698
 699 .sp
 700 .ne 2
 701 .na
 702 \fBzfs_free_min_time_ms\fR (int)
 703 .ad
 704 .RS 12n
 705 Min millisecs to free per txg
 706 .sp
 707 Default value: \fB1,000\fR.
 708 .RE
 709
 710 .sp
 711 .ne 2
 712 .na
 713 \fBzfs_immediate_write_sz\fR (long)
 714 .ad
 715 .RS 12n
 716 Largest data block to write to zil
 717 .sp
 718 Default value: \fB32,768\fR.
 719 .RE
 720
 721 .sp
 722 .ne 2
 723 .na
 724 \fBzfs_mdcomp_disable\fR (int)
 725 .ad
 726 .RS 12n
 727 Disable meta data compression
 728 .sp
 729 Use \fB1\fR for yes and \fB0\fR for no (default).
 730 .RE
 731
 732 .sp
 733 .ne 2
 734 .na
 735 \fBzfs_mg_noalloc_threshold\fR (int)
 736 .ad
 737 .RS 12n
 738 Defines a threshold at which metaslab groups should be eligible for
 739 allocations.  The value is expressed as a percentage of free space
 740 beyond which a metaslab group is always eligible for allocations.
 741 If a metaslab group's free space is less than or equal to the
 742 the threshold, the allocator will avoid allocating to that group
 743 unless all groups in the pool have reached the threshold.  Once all
 744 groups have reached the threshold, all groups are allowed to accept
 745 allocations.  The default value of 0 disables the feature and causes
 746 all metaslab groups to be eligible for allocations.
 747
 748 This parameter allows to deal with pools having heavily imbalanced
 749 vdevs such as would be the case when a new vdev has been added.
 750 Setting the threshold to a non-zero percentage will stop allocations
 751 from being made to vdevs that aren't filled to the specified percentage
 752 and allow lesser filled vdevs to acquire more allocations than they
 753 otherwise would under the old \fBzfs_mg_alloc_failures\fR facility.
 754 .sp
 755 Default value: \fB0\fR.
 756 .RE
 757
 758 .sp
 759 .ne 2
 760 .na
 761 \fBzfs_no_scrub_io\fR (int)
 762 .ad
 763 .RS 12n
 764 Set for no scrub I/O
 765 .sp
 766 Use \fB1\fR for yes and \fB0\fR for no (default).
 767 .RE
 768
 769 .sp
 770 .ne 2
 771 .na
 772 \fBzfs_no_scrub_prefetch\fR (int)
 773 .ad
 774 .RS 12n
 775 Set for no scrub prefetching
 776 .sp
 777 Use \fB1\fR for yes and \fB0\fR for no (default).
 778 .RE
 779
 780 .sp
 781 .ne 2
 782 .na
 783 \fBzfs_nocacheflush\fR (int)
 784 .ad
 785 .RS 12n
 786 Disable cache flushes
 787 .sp
 788 Use \fB1\fR for yes and \fB0\fR for no (default).
 789 .RE
 790
 791 .sp
 792 .ne 2
 793 .na
 794 \fBzfs_nopwrite_enabled\fR (int)
 795 .ad
 796 .RS 12n
 797 Enable NOP writes
 798 .sp
 799 Use \fB1\fR for yes (default) and \fB0\fR to disable.
 800 .RE
 801
 802 .sp
 803 .ne 2
 804 .na
 805 \fBzfs_pd_blks_max\fR (int)
 806 .ad
 807 .RS 12n
 808 Max number of blocks to prefetch
 809 .sp
 810 Default value: \fB100\fR.
 811 .RE
 812
 813 .sp
 814 .ne 2
 815 .na
 816 \fBzfs_prefetch_disable\fR (int)
 817 .ad
 818 .RS 12n
 819 Disable all ZFS prefetching
 820 .sp
 821 Use \fB1\fR for yes and \fB0\fR for no (default).
 822 .RE
 823
 824 .sp
 825 .ne 2
 826 .na
 827 \fBzfs_read_chunk_size\fR (long)
 828 .ad
 829 .RS 12n
 830 Bytes to read per chunk
 831 .sp
 832 Default value: \fB1,048,576\fR.
 833 .RE
 834
 835 .sp
 836 .ne 2
 837 .na
 838 \fBzfs_read_history\fR (int)
 839 .ad
 840 .RS 12n
 841 Historic statistics for the last N reads
 842 .sp
 843 Default value: \fB0\fR.
 844 .RE
 845
 846 .sp
 847 .ne 2
 848 .na
 849 \fBzfs_read_history_hits\fR (int)
 850 .ad
 851 .RS 12n
 852 Include cache hits in read history
 853 .sp
 854 Use \fB1\fR for yes and \fB0\fR for no (default).
 855 .RE
 856
 857 .sp
 858 .ne 2
 859 .na
 860 \fBzfs_recover\fR (int)
 861 .ad
 862 .RS 12n
 863 Set to attempt to recover from fatal errors. This should only be used as a
 864 last resort, as it typically results in leaked space, or worse.
 865 .sp
 866 Use \fB1\fR for yes and \fB0\fR for no (default).
 867 .RE
 868
 869 .sp
 870 .ne 2
 871 .na
 872 \fBzfs_resilver_delay\fR (int)
 873 .ad
 874 .RS 12n
 875 Number of ticks to delay prior to issuing a resilver I/O operation when
 876 a non-resilver or non-scrub I/O operation has occurred within the past
 877 \fBzfs_scan_idle\fR ticks.
 878 .sp
 879 Default value: \fB2\fR.
 880 .RE
 881
 882 .sp
 883 .ne 2
 884 .na
 885 \fBzfs_resilver_min_time_ms\fR (int)
 886 .ad
 887 .RS 12n
 888 Min millisecs to resilver per txg
 889 .sp
 890 Default value: \fB3,000\fR.
 891 .RE
 892
 893 .sp
 894 .ne 2
 895 .na
 896 \fBzfs_scan_idle\fR (int)
 897 .ad
 898 .RS 12n
 899 Idle window in clock ticks.  During a scrub or a resilver, if
 900 a non-scrub or non-resilver I/O operation has occurred during this
 901 window, the next scrub or resilver operation is delayed by, respectively
 902 \fBzfs_scrub_delay\fR or \fBzfs_resilver_delay\fR ticks.
 903 .sp
 904 Default value: \fB50\fR.
 905 .RE
 906
 907 .sp
 908 .ne 2
 909 .na
 910 \fBzfs_scan_min_time_ms\fR (int)
 911 .ad
 912 .RS 12n
 913 Min millisecs to scrub per txg
 914 .sp
 915 Default value: \fB1,000\fR.
 916 .RE
 917
 918 .sp
 919 .ne 2
 920 .na
 921 \fBzfs_scrub_delay\fR (int)
 922 .ad
 923 .RS 12n
 924 Number of ticks to delay prior to issuing a scrub I/O operation when
 925 a non-scrub or non-resilver I/O operation has occurred within the past
 926 \fBzfs_scan_idle\fR ticks.
 927 .sp
 928 Default value: \fB4\fR.
 929 .RE
 930
 931 .sp
 932 .ne 2
 933 .na
 934 \fBzfs_send_corrupt_data\fR (int)
 935 .ad
 936 .RS 12n
 937 Allow to send corrupt data (ignore read/checksum errors when sending data)
 938 .sp
 939 Use \fB1\fR for yes and \fB0\fR for no (default).
 940 .RE
 941
 942 .sp
 943 .ne 2
 944 .na
 945 \fBzfs_sync_pass_deferred_free\fR (int)
 946 .ad
 947 .RS 12n
 948 Defer frees starting in this pass
 949 .sp
 950 Default value: \fB2\fR.
 951 .RE
 952
 953 .sp
 954 .ne 2
 955 .na
 956 \fBzfs_sync_pass_dont_compress\fR (int)
 957 .ad
 958 .RS 12n
 959 Don't compress starting in this pass
 960 .sp
 961 Default value: \fB5\fR.
 962 .RE
 963
 964 .sp
 965 .ne 2
 966 .na
 967 \fBzfs_sync_pass_rewrite\fR (int)
 968 .ad
 969 .RS 12n
 970 Rewrite new bps starting in this pass
 971 .sp
 972 Default value: \fB2\fR.
 973 .RE
 974
 975 .sp
 976 .ne 2
 977 .na
 978 \fBzfs_top_maxinflight\fR (int)
 979 .ad
 980 .RS 12n
 981 Max I/Os per top-level vdev during scrub or resilver operations.
 982 .sp
 983 Default value: \fB32\fR.
 984 .RE
 985
 986 .sp
 987 .ne 2
 988 .na
 989 \fBzfs_txg_history\fR (int)
 990 .ad
 991 .RS 12n
 992 Historic statistics for the last N txgs
 993 .sp
 994 Default value: \fB0\fR.
 995 .RE
 996
 997 .sp
 998 .ne 2
 999 .na
1000 \fBzfs_txg_timeout\fR (int)
1001 .ad
1002 .RS 12n
1003 Max seconds worth of delta per txg
1004 .sp
1005 Default value: \fB5\fR.
1006 .RE
1007
1008 .sp
1009 .ne 2
1010 .na
1011 \fBzfs_vdev_aggregation_limit\fR (int)
1012 .ad
1013 .RS 12n
1014 Max vdev I/O aggregation size
1015 .sp
1016 Default value: \fB131,072\fR.
1017 .RE
1018
1019 .sp
1020 .ne 2
1021 .na
1022 \fBzfs_vdev_cache_bshift\fR (int)
1023 .ad
1024 .RS 12n
1025 Shift size to inflate reads too
1026 .sp
1027 Default value: \fB16\fR.
1028 .RE
1029
1030 .sp
1031 .ne 2
1032 .na
1033 \fBzfs_vdev_cache_max\fR (int)
1034 .ad
1035 .RS 12n
1036 Inflate reads small than max
1037 .RE
1038
1039 .sp
1040 .ne 2
1041 .na
1042 \fBzfs_vdev_cache_size\fR (int)
1043 .ad
1044 .RS 12n
1045 Total size of the per-disk cache
1046 .sp
1047 Default value: \fB0\fR.
1048 .RE
1049
1050 .sp
1051 .ne 2
1052 .na
1053 \fBzfs_vdev_mirror_switch_us\fR (int)
1054 .ad
1055 .RS 12n
1056 Switch mirrors every N usecs
1057 .sp
1058 Default value: \fB10,000\fR.
1059 .RE
1060
1061 .sp
1062 .ne 2
1063 .na
1064 \fBzfs_vdev_read_gap_limit\fR (int)
1065 .ad
1066 .RS 12n
1067 Aggregate read I/O over gap
1068 .sp
1069 Default value: \fB32,768\fR.
1070 .RE
1071
1072 .sp
1073 .ne 2
1074 .na
1075 \fBzfs_vdev_scheduler\fR (charp)
1076 .ad
1077 .RS 12n
1078 I/O scheduler
1079 .sp
1080 Default value: \fBnoop\fR.
1081 .RE
1082
1083 .sp
1084 .ne 2
1085 .na
1086 \fBzfs_vdev_write_gap_limit\fR (int)
1087 .ad
1088 .RS 12n
1089 Aggregate write I/O over gap
1090 .sp
1091 Default value: \fB4,096\fR.
1092 .RE
1093
1094 .sp
1095 .ne 2
1096 .na
1097 \fBzfs_zevent_cols\fR (int)
1098 .ad
1099 .RS 12n
1100 Max event column width
1101 .sp
1102 Default value: \fB80\fR.
1103 .RE
1104
1105 .sp
1106 .ne 2
1107 .na
1108 \fBzfs_zevent_console\fR (int)
1109 .ad
1110 .RS 12n
1111 Log events to the console
1112 .sp
1113 Use \fB1\fR for yes and \fB0\fR for no (default).
1114 .RE
1115
1116 .sp
1117 .ne 2
1118 .na
1119 \fBzfs_zevent_len_max\fR (int)
1120 .ad
1121 .RS 12n
1122 Max event queue length
1123 .sp
1124 Default value: \fB0\fR.
1125 .RE
1126
1127 .sp
1128 .ne 2
1129 .na
1130 \fBzil_replay_disable\fR (int)
1131 .ad
1132 .RS 12n
1133 Disable intent logging replay
1134 .sp
1135 Use \fB1\fR for yes and \fB0\fR for no (default).
1136 .RE
1137
1138 .sp
1139 .ne 2
1140 .na
1141 \fBzil_slog_limit\fR (ulong)
1142 .ad
1143 .RS 12n
1144 Max commit bytes to separate log device
1145 .sp
1146 Default value: \fB1,048,576\fR.
1147 .RE
1148
1149 .sp
1150 .ne 2
1151 .na
1152 \fBzio_bulk_flags\fR (int)
1153 .ad
1154 .RS 12n
1155 Additional flags to pass to bulk buffers
1156 .sp
1157 Default value: \fB0\fR.
1158 .RE
1159
1160 .sp
1161 .ne 2
1162 .na
1163 \fBzio_delay_max\fR (int)
1164 .ad
1165 .RS 12n
1166 Max zio millisec delay before posting event
1167 .sp
1168 Default value: \fB30,000\fR.
1169 .RE
1170
1171 .sp
1172 .ne 2
1173 .na
1174 \fBzio_injection_enabled\fR (int)
1175 .ad
1176 .RS 12n
1177 Enable fault injection
1178 .sp
1179 Use \fB1\fR for yes and \fB0\fR for no (default).
1180 .RE
1181
1182 .sp
1183 .ne 2
1184 .na
1185 \fBzio_requeue_io_start_cut_in_line\fR (int)
1186 .ad
1187 .RS 12n
1188 Prioritize requeued I/O
1189 .sp
1190 Default value: \fB0\fR.
1191 .RE
1192
1193 .sp
1194 .ne 2
1195 .na
1196 \fBzvol_inhibit_dev\fR (uint)
1197 .ad
1198 .RS 12n
1199 Do not create zvol device nodes
1200 .sp
1201 Use \fB1\fR for yes and \fB0\fR for no (default).
1202 .RE
1203
1204 .sp
1205 .ne 2
1206 .na
1207 \fBzvol_major\fR (uint)
1208 .ad
1209 .RS 12n
1210 Major number for zvol device
1211 .sp
1212 Default value: \fB230\fR.
1213 .RE
1214
1215 .sp
1216 .ne 2
1217 .na
1218 \fBzvol_max_discard_blocks\fR (ulong)
1219 .ad
1220 .RS 12n
1221 Max number of blocks to discard at once
1222 .sp
1223 Default value: \fB16,384\fR.
1224 .RE
1225
1226 .sp
1227 .ne 2
1228 .na
1229 \fBzvol_threads\fR (uint)
1230 .ad
1231 .RS 12n
1232 Number of threads for zvol device
1233 .sp
1234 Default value: \fB32\fR.
1235 .RE
1236
1237 .SH ZFS I/O SCHEDULER
1238 ZFS issues I/O operations to leaf vdevs to satisfy and complete I/Os.
1239 The I/O scheduler determines when and in what order those operations are
1240 issued.  The I/O scheduler divides operations into five I/O classes
1241 prioritized in the following order: sync read, sync write, async read,
1242 async write, and scrub/resilver.  Each queue defines the minimum and
1243 maximum number of concurrent operations that may be issued to the
1244 device.  In addition, the device has an aggregate maximum,
1245 \fBzfs_vdev_max_active\fR. Note that the sum of the per-queue minimums
1246 must not exceed the aggregate maximum.  If the sum of the per-queue
1247 maximums exceeds the aggregate maximum, then the number of active I/Os
1248 may reach \fBzfs_vdev_max_active\fR, in which case no further I/Os will
1249 be issued regardless of whether all per-queue minimums have been met.
1250 .sp
1251 For many physical devices, throughput increases with the number of
1252 concurrent operations, but latency typically suffers. Further, physical
1253 devices typically have a limit at which more concurrent operations have no
1254 effect on throughput or can actually cause it to decrease.
1255 .sp
1256 The scheduler selects the next operation to issue by first looking for an
1257 I/O class whose minimum has not been satisfied. Once all are satisfied and
1258 the aggregate maximum has not been hit, the scheduler looks for classes
1259 whose maximum has not been satisfied. Iteration through the I/O classes is
1260 done in the order specified above. No further operations are issued if the
1261 aggregate maximum number of concurrent operations has been hit or if there
1262 are no operations queued for an I/O class that has not hit its maximum.
1263 Every time an I/O is queued or an operation completes, the I/O scheduler
1264 looks for new operations to issue.
1265 .sp
1266 In general, smaller max_active's will lead to lower latency of synchronous
1267 operations.  Larger max_active's may lead to higher overall throughput,
1268 depending on underlying storage.
1269 .sp
1270 The ratio of the queues' max_actives determines the balance of performance
1271 between reads, writes, and scrubs.  E.g., increasing
1272 \fBzfs_vdev_scrub_max_active\fR will cause the scrub or resilver to complete
1273 more quickly, but reads and writes to have higher latency and lower throughput.
1274 .sp
1275 All I/O classes have a fixed maximum number of outstanding operations
1276 except for the async write class. Asynchronous writes represent the data
1277 that is committed to stable storage during the syncing stage for
1278 transaction groups. Transaction groups enter the syncing state
1279 periodically so the number of queued async writes will quickly burst up
1280 and then bleed down to zero. Rather than servicing them as quickly as
1281 possible, the I/O scheduler changes the maximum number of active async
1282 write I/Os according to the amount of dirty data in the pool.  Since
1283 both throughput and latency typically increase with the number of
1284 concurrent operations issued to physical devices, reducing the
1285 burstiness in the number of concurrent operations also stabilizes the
1286 response time of operations from other -- and in particular synchronous
1287 -- queues. In broad strokes, the I/O scheduler will issue more
1288 concurrent operations from the async write queue as there's more dirty
1289 data in the pool.
1290 .sp
1291 Async Writes
1292 .sp
1293 The number of concurrent operations issued for the async write I/O class
1294 follows a piece-wise linear function defined by a few adjustable points.
1295 .nf
1296
1297        |              o---------| <-- zfs_vdev_async_write_max_active
1298   ^    |             /^         |
1299   |    |            / |         |
1300 active |           /  |         |
1301  I/O   |          /   |         |
1302 count  |         /    |         |
1303        |        /     |         |
1304        |-------o      |         | <-- zfs_vdev_async_write_min_active
1305       0|_______^______|_________|
1306        0%      |      |       100% of zfs_dirty_data_max
1307                |      |
1308                |      `-- zfs_vdev_async_write_active_max_dirty_percent
1309                `--------- zfs_vdev_async_write_active_min_dirty_percent
1310
1311 .fi
1312 Until the amount of dirty data exceeds a minimum percentage of the dirty
1313 data allowed in the pool, the I/O scheduler will limit the number of
1314 concurrent operations to the minimum. As that threshold is crossed, the
1315 number of concurrent operations issued increases linearly to the maximum at
1316 the specified maximum percentage of the dirty data allowed in the pool.
1317 .sp
1318 Ideally, the amount of dirty data on a busy pool will stay in the sloped
1319 part of the function between \fBzfs_vdev_async_write_active_min_dirty_percent\fR
1320 and \fBzfs_vdev_async_write_active_max_dirty_percent\fR. If it exceeds the
1321 maximum percentage, this indicates that the rate of incoming data is
1322 greater than the rate that the backend storage can handle. In this case, we
1323 must further throttle incoming writes, as described in the next section.
1324
1325 .SH ZFS TRANSACTION DELAY
1326 We delay transactions when we've determined that the backend storage
1327 isn't able to accommodate the rate of incoming writes.
1328 .sp
1329 If there is already a transaction waiting, we delay relative to when
1330 that transaction will finish waiting.  This way the calculated delay time
1331 is independent of the number of threads concurrently executing
1332 transactions.
1333 .sp
1334 If we are the only waiter, wait relative to when the transaction
1335 started, rather than the current time.  This credits the transaction for
1336 "time already served", e.g. reading indirect blocks.
1337 .sp
1338 The minimum time for a transaction to take is calculated as:
1339 .nf
1340     min_time = zfs_delay_scale * (dirty - min) / (max - dirty)
1341     min_time is then capped at 100 milliseconds.
1342 .fi
1343 .sp
1344 The delay has two degrees of freedom that can be adjusted via tunables.  The
1345 percentage of dirty data at which we start to delay is defined by
1346 \fBzfs_delay_min_dirty_percent\fR. This should typically be at or above
1347 \fBzfs_vdev_async_write_active_max_dirty_percent\fR so that we only start to
1348 delay after writing at full speed has failed to keep up with the incoming write
1349 rate. The scale of the curve is defined by \fBzfs_delay_scale\fR. Roughly speaking,
1350 this variable determines the amount of delay at the midpoint of the curve.
1351 .sp
1352 .nf
1353 delay
1354  10ms +-------------------------------------------------------------*+
1355       |                                                             *|
1356   9ms +                                                             *+
1357       |                                                             *|
1358   8ms +                                                             *+
1359       |                                                            * |
1360   7ms +                                                            * +
1361       |                                                            * |
1362   6ms +                                                            * +
1363       |                                                            * |
1364   5ms +                                                           *  +
1365       |                                                           *  |
1366   4ms +                                                           *  +
1367       |                                                           *  |
1368   3ms +                                                          *   +
1369       |                                                          *   |
1370   2ms +                                              (midpoint) *    +
1371       |                                                  |    **     |
1372   1ms +                                                  v ***       +
1373       |             zfs_delay_scale ---------->     ********         |
1374     0 +-------------------------------------*********----------------+
1375       0%                    <- zfs_dirty_data_max ->               100%
1376 .fi
1377 .sp
1378 Note that since the delay is added to the outstanding time remaining on the
1379 most recent transaction, the delay is effectively the inverse of IOPS.
1380 Here the midpoint of 500us translates to 2000 IOPS. The shape of the curve
1381 was chosen such that small changes in the amount of accumulated dirty data
1382 in the first 3/4 of the curve yield relatively small differences in the
1383 amount of delay.
1384 .sp
1385 The effects can be easier to understand when the amount of delay is
1386 represented on a log scale:
1387 .sp
1388 .nf
1389 delay
1390 100ms +-------------------------------------------------------------++
1391       +                                                              +
1392       |                                                              |
1393       +                                                             *+
1394  10ms +                                                             *+
1395       +                                                           ** +
1396       |                                              (midpoint)  **  |
1397       +                                                  |     **    +
1398   1ms +                                                  v ****      +
1399       +             zfs_delay_scale ---------->        *****         +
1400       |                                             ****             |
1401       +                                          ****                +
1402 100us +                                        **                    +
1403       +                                       *                      +
1404       |                                      *                       |
1405       +                                     *                        +
1406  10us +                                     *                        +
1407       +                                                              +
1408       |                                                              |
1409       +                                                              +
1410       +--------------------------------------------------------------+
1411       0%                    <- zfs_dirty_data_max ->               100%
1412 .fi
1413 .sp
1414 Note here that only as the amount of dirty data approaches its limit does
1415 the delay start to increase rapidly. The goal of a properly tuned system
1416 should be to keep the amount of dirty data out of that range by first
1417 ensuring that the appropriate limits are set for the I/O scheduler to reach
1418 optimal throughput on the backend storage, and then by changing the value
1419 of \fBzfs_delay_scale\fR to increase the steepness of the curve.