Cleanup: Specify unsignedness on things that should not be signed

[mirror_zfs.git] / module / zfs / arc.c
diff --git a/module/zfs/arc.c b/module/zfs/arc.c

index efc6bb138dc88e69c5837942b95fb8c2b6e247e3..33865f715b0f7850a430d2e07cc83faa44201262 100644 (file)
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -6,7 +6,7 @@
   * You may not use this file except in compliance with the License.
   *
   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
   * See the License for the specific language governing permissions
   * and limitations under the License.
   *
@@ -250,7 +250,7 @@
   * since the physical block is about to be rewritten. The new data contents
   * will be contained in the arc_buf_t. As the I/O pipeline performs the write,
   * it may compress the data before writing it to disk. The ARC will be called
- * with the transformed data and will bcopy the transformed on-disk block into
+ * with the transformed data and will memcpy the transformed on-disk block into
   * a newly allocated b_pabd. Writes are always done into buffers which have
   * either been loaned (and hence are new and don't have other readers) or
   * buffers which have been released (and hence have their own hdr, if there
@@ -294,7 +294,6 @@
  #include <sys/vdev.h>
  #include <sys/vdev_impl.h>
  #include <sys/dsl_pool.h>
-#include <sys/zio_checksum.h>
  #include <sys/multilist.h>
  #include <sys/abd.h>
  #include <sys/zil.h>
@@ -306,8 +305,10 @@
  #include <sys/arc_impl.h>
  #include <sys/trace_zfs.h>
  #include <sys/aggsum.h>
+#include <sys/wmsum.h>
  #include <cityhash.h>
  #include <sys/vdev_trim.h>
+#include <sys/zfs_racct.h>
  #include <sys/zstd/zstd.h>
  
  #ifndef _KERNEL
@@ -327,6 +328,8 @@ static zthr_t *arc_reap_zthr;
   * arc_evict(), which improves arc_is_overflowing().
   */
  static zthr_t *arc_evict_zthr;
+static arc_buf_hdr_t **arc_state_evict_markers;
+static int arc_state_evict_marker_count;
  
  static kmutex_t arc_evict_lock;
  static boolean_t arc_evict_needed = B_FALSE;
@@ -351,7 +354,7 @@ static list_t arc_evict_waiters;
   * can still happen, even during the potentially long time that arc_size is
   * more than arc_c.
   */
-int zfs_arc_eviction_pct = 200;
+static uint_t zfs_arc_eviction_pct = 200;
  
  /*
   * The number of headers to evict in arc_evict_state_impl() before
@@ -360,24 +363,24 @@ int zfs_arc_eviction_pct = 200;
   * oldest header in the arc state), but comes with higher overhead
   * (i.e. more invocations of arc_evict_state_impl()).
   */
-int zfs_arc_evict_batch_limit = 10;
+static uint_t zfs_arc_evict_batch_limit = 10;
  
  /* number of seconds before growing cache again */
-int arc_grow_retry = 5;
+uint_t arc_grow_retry = 5;
  
  /*
   * Minimum time between calls to arc_kmem_reap_soon().
   */
-int arc_kmem_cache_reap_retry_ms = 1000;
+static const int arc_kmem_cache_reap_retry_ms = 1000;
  
  /* shift of arc_c for calculating overflow limit in arc_get_data_impl */
-int zfs_arc_overflow_shift = 8;
+static int zfs_arc_overflow_shift = 8;
  
  /* shift of arc_c for calculating both min and max arc_p */
-int arc_p_min_shift = 4;
+static uint_t arc_p_min_shift = 4;
  
  /* log2(fraction of arc to reclaim) */
-int arc_shrink_shift = 7;
+uint_t arc_shrink_shift = 7;
  
  /* percent of pagecache to reclaim arc to */
  #ifdef _KERNEL
@@ -393,20 +396,20 @@ uint_t zfs_arc_pc_percent = 0;
   * This must be less than arc_shrink_shift, so that when we shrink the ARC,
   * we will still not allow it to grow.
   */
-int                    arc_no_grow_shift = 5;
+uint_t         arc_no_grow_shift = 5;
  
  
  /*
   * minimum lifespan of a prefetch block in clock ticks
   * (initialized in arc_init())
   */
-static int             arc_min_prefetch_ms;
-static int             arc_min_prescient_prefetch_ms;
+static uint_t          arc_min_prefetch_ms;
+static uint_t          arc_min_prescient_prefetch_ms;
  
  /*
   * If this percent of memory is free, don't throttle.
   */
-int arc_lotsfree_percent = 10;
+uint_t arc_lotsfree_percent = 10;
  
  /*
   * The arc has filled available memory and has now warmed up.
@@ -420,19 +423,22 @@ unsigned long zfs_arc_max = 0;
  unsigned long zfs_arc_min = 0;
  unsigned long zfs_arc_meta_limit = 0;
  unsigned long zfs_arc_meta_min = 0;
-unsigned long zfs_arc_dnode_limit = 0;
-unsigned long zfs_arc_dnode_reduce_percent = 10;
-int zfs_arc_grow_retry = 0;
-int zfs_arc_shrink_shift = 0;
-int zfs_arc_p_min_shift = 0;
-int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
+static unsigned long zfs_arc_dnode_limit = 0;
+static unsigned long zfs_arc_dnode_reduce_percent = 10;
+static uint_t zfs_arc_grow_retry = 0;
+static uint_t zfs_arc_shrink_shift = 0;
+static uint_t zfs_arc_p_min_shift = 0;
+uint_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
  
  /*
- * ARC dirty data constraints for arc_tempreserve_space() throttle.
+ * ARC dirty data constraints for arc_tempreserve_space() throttle:
+ * * total dirty data limit
+ * * anon block dirty limit
+ * * each pool's anon allowance
   */
-unsigned long zfs_arc_dirty_limit_percent = 50;        /* total dirty data limit */
-unsigned long zfs_arc_anon_limit_percent = 25; /* anon block dirty limit */
-unsigned long zfs_arc_pool_dirty_percent = 20; /* each pool's anon allowance */
+static const unsigned long zfs_arc_dirty_limit_percent = 50;
+static const unsigned long zfs_arc_anon_limit_percent = 25;
+static const unsigned long zfs_arc_pool_dirty_percent = 20;
  
  /*
   * Enable or disable compressed arc buffers.
@@ -443,24 +449,29 @@ int zfs_compressed_arc_enabled = B_TRUE;
   * ARC will evict meta buffers that exceed arc_meta_limit. This
   * tunable make arc_meta_limit adjustable for different workloads.
   */
-unsigned long zfs_arc_meta_limit_percent = 75;
+static unsigned long zfs_arc_meta_limit_percent = 75;
  
  /*
   * Percentage that can be consumed by dnodes of ARC meta buffers.
   */
-unsigned long zfs_arc_dnode_limit_percent = 10;
+static unsigned long zfs_arc_dnode_limit_percent = 10;
+
+/*
+ * These tunables are Linux-specific
+ */
+static unsigned long zfs_arc_sys_free = 0;
+static uint_t zfs_arc_min_prefetch_ms = 0;
+static uint_t zfs_arc_min_prescient_prefetch_ms = 0;
+static int zfs_arc_p_dampener_disable = 1;
+static uint_t zfs_arc_meta_prune = 10000;
+static uint_t zfs_arc_meta_strategy = ARC_STRATEGY_META_BALANCED;
+static uint_t zfs_arc_meta_adjust_restarts = 4096;
+static uint_t zfs_arc_lotsfree_percent = 10;
  
  /*
- * These tunables are Linux specific
+ * Number of arc_prune threads
   */
-unsigned long zfs_arc_sys_free = 0;
-int zfs_arc_min_prefetch_ms = 0;
-int zfs_arc_min_prescient_prefetch_ms = 0;
-int zfs_arc_p_dampener_disable = 1;
-int zfs_arc_meta_prune = 10000;
-int zfs_arc_meta_strategy = ARC_STRATEGY_META_BALANCED;
-int zfs_arc_meta_adjust_restarts = 4096;
-int zfs_arc_lotsfree_percent = 10;
+static int zfs_arc_prune_task_threads = 1;
  
  /* The 6 states: */
  arc_state_t ARC_anon;
@@ -599,6 +610,8 @@ arc_stats_t arc_stats = {
         { "abd_chunk_waste_size",       KSTAT_DATA_UINT64 },
  };
  
+arc_sums_t arc_sums;
+
  #define        ARCSTAT_MAX(stat, val) {                                        \
         uint64_t m;                                                     \
         while ((val) > (m = arc_stats.stat.value.ui64) &&               \
@@ -606,9 +619,6 @@ arc_stats_t arc_stats = {
                 continue;                                               \
  }
  
-#define        ARCSTAT_MAXSTAT(stat) \
-       ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
-
  /*
   * We define a macro to allow ARC hits/misses to be easily broken down by
   * two separate conditions, giving a total of four different subtypes for
@@ -644,17 +654,9 @@ arc_stats_t arc_stats = {
                 x = x - x / ARCSTAT_F_AVG_FACTOR + \
                     (value) / ARCSTAT_F_AVG_FACTOR; \
                 ARCSTAT(stat) = x; \
-               _NOTE(CONSTCOND) \
         } while (0)
  
-kstat_t                        *arc_ksp;
-static arc_state_t     *arc_anon;
-static arc_state_t     *arc_mru_ghost;
-static arc_state_t     *arc_mfu_ghost;
-static arc_state_t     *arc_l2c_only;
-
-arc_state_t    *arc_mru;
-arc_state_t    *arc_mfu;
+static kstat_t                 *arc_ksp;
  
  /*
   * There are several ARC variables that are critical to export as kstats --
@@ -670,37 +672,8 @@ arc_state_t        *arc_mfu;
  /* max size for dnodes */
  #define        arc_dnode_size_limit    ARCSTAT(arcstat_dnode_limit)
  #define        arc_meta_min    ARCSTAT(arcstat_meta_min) /* min size for metadata */
-#define        arc_meta_max    ARCSTAT(arcstat_meta_max) /* max size of metadata */
  #define        arc_need_free   ARCSTAT(arcstat_need_free) /* waiting to be evicted */
  
-/* size of all b_rabd's in entire arc */
-#define        arc_raw_size    ARCSTAT(arcstat_raw_size)
-/* compressed size of entire arc */
-#define        arc_compressed_size     ARCSTAT(arcstat_compressed_size)
-/* uncompressed size of entire arc */
-#define        arc_uncompressed_size   ARCSTAT(arcstat_uncompressed_size)
-/* number of bytes in the arc from arc_buf_t's */
-#define        arc_overhead_size       ARCSTAT(arcstat_overhead_size)
-
-/*
- * There are also some ARC variables that we want to export, but that are
- * updated so often that having the canonical representation be the statistic
- * variable causes a performance bottleneck. We want to use aggsum_t's for these
- * instead, but still be able to export the kstat in the same way as before.
- * The solution is to always use the aggsum version, except in the kstat update
- * callback.
- */
-aggsum_t arc_size;
-aggsum_t arc_meta_used;
-aggsum_t astat_data_size;
-aggsum_t astat_metadata_size;
-aggsum_t astat_dbuf_size;
-aggsum_t astat_dnode_size;
-aggsum_t astat_bonus_size;
-aggsum_t astat_hdr_size;
-aggsum_t astat_l2_hdr_size;
-aggsum_t astat_abd_chunk_waste_size;
-
  hrtime_t arc_growtime;
  list_t arc_prune_list;
  kmutex_t arc_prune_mtx;
@@ -769,29 +742,18 @@ taskq_t *arc_prune_taskq;
   * Hash table routines
   */
  
-#define        HT_LOCK_ALIGN   64
-#define        HT_LOCK_PAD     (P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
-
-struct ht_lock {
-       kmutex_t        ht_lock;
-#ifdef _KERNEL
-       unsigned char   pad[HT_LOCK_PAD];
-#endif
-};
-
-#define        BUF_LOCKS 8192
+#define        BUF_LOCKS 2048
  typedef struct buf_hash_table {
         uint64_t ht_mask;
         arc_buf_hdr_t **ht_table;
-       struct ht_lock ht_locks[BUF_LOCKS];
+       kmutex_t ht_locks[BUF_LOCKS] ____cacheline_aligned;
  } buf_hash_table_t;
  
  static buf_hash_table_t buf_hash_table;
  
  #define        BUF_HASH_INDEX(spa, dva, birth) \
         (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
-#define        BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
-#define        BUF_HASH_LOCK(idx)      (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
+#define        BUF_HASH_LOCK(idx)      (&buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
  #define        HDR_LOCK(hdr) \
         (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
  
@@ -818,9 +780,6 @@ uint64_t zfs_crc64_table[256];
   */
  #define        L2ARC_FEED_TYPES        4
  
-#define        l2arc_writes_sent       ARCSTAT(arcstat_l2_writes_sent)
-#define        l2arc_writes_done       ARCSTAT(arcstat_l2_writes_done)
-
  /* L2ARC Performance Tunables */
  unsigned long l2arc_write_max = L2ARC_WRITE_SIZE;      /* def max write size */
  unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE;    /* extra warmup write */
@@ -831,7 +790,7 @@ unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;        /* min interval msecs */
  int l2arc_noprefetch = B_TRUE;                 /* don't cache prefetch bufs */
  int l2arc_feed_again = B_TRUE;                 /* turbo warmup */
  int l2arc_norw = B_FALSE;                      /* no reads during writes */
-int l2arc_meta_percent = 33;                   /* limit on headers size */
+static uint_t l2arc_meta_percent = 33; /* limit on headers size */
  
  /*
   * L2ARC Internals
@@ -869,6 +828,12 @@ typedef enum arc_fill_flags {
         ARC_FILL_IN_PLACE       = 1 << 4  /* fill in place (special case) */
  } arc_fill_flags_t;
  
+typedef enum arc_ovf_level {
+       ARC_OVF_NONE,                   /* ARC within target size. */
+       ARC_OVF_SOME,                   /* ARC is slightly overflowed. */
+       ARC_OVF_SEVERE                  /* ARC is severely overflowed. */
+} arc_ovf_level_t;
+
  static kmutex_t l2arc_feed_thr_lock;
  static kcondvar_t l2arc_feed_thr_cv;
  static uint8_t l2arc_thread_exit;
@@ -879,15 +844,17 @@ static kcondvar_t l2arc_rebuild_thr_cv;
  enum arc_hdr_alloc_flags {
         ARC_HDR_ALLOC_RDATA = 0x1,
         ARC_HDR_DO_ADAPT = 0x2,
+       ARC_HDR_USE_RESERVE = 0x4,
  };
  
  
-static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
-static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
-static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
-static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
-static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
-static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
+static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, const void *, int);
+static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, const void *);
+static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, const void *, int);
+static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, const void *);
+static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, const void *);
+static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size,
+    const void *tag);
  static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t);
  static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int);
  static void arc_access(arc_buf_hdr_t *, kmutex_t *);
@@ -913,11 +880,19 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
  #define        l2arc_hdr_arcstats_decrement_state(hdr) \
         l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
  
+/*
+ * l2arc_exclude_special : A zfs module parameter that controls whether buffers
+ *             present on special vdevs are eligibile for caching in L2ARC. If
+ *             set to 1, exclude dbufs on special vdevs from being cached to
+ *             L2ARC.
+ */
+int l2arc_exclude_special = 0;
+
  /*
   * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
   *             metadata and data are cached from ARC into L2ARC.
   */
-int l2arc_mfuonly = 0;
+static int l2arc_mfuonly = 0;
  
  /*
   * L2ARC TRIM
@@ -934,7 +909,7 @@ int l2arc_mfuonly = 0;
   *             will vary depending of how well the specific device handles
   *             these commands.
   */
-unsigned long l2arc_trim_ahead = 0;
+static unsigned long l2arc_trim_ahead = 0;
  
  /*
   * Performance tuning of L2ARC persistence:
@@ -949,12 +924,12 @@ unsigned long l2arc_trim_ahead = 0;
   *             data. In this case do not write log blocks in L2ARC in order
   *             not to waste space.
   */
-int l2arc_rebuild_enabled = B_TRUE;
-unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
+static int l2arc_rebuild_enabled = B_TRUE;
+static unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
  
  /* L2ARC persistence rebuild control routines. */
  void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
-static void l2arc_dev_rebuild_thread(void *arg);
+static __attribute__((noreturn)) void l2arc_dev_rebuild_thread(void *arg);
  static int l2arc_rebuild(l2arc_dev_t *dev);
  
  /* L2ARC persistence read I/O routines. */
@@ -1084,9 +1059,9 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
  
                 ARCSTAT_MAX(arcstat_hash_chain_max, i);
         }
-
-       ARCSTAT_BUMP(arcstat_hash_elements);
-       ARCSTAT_MAXSTAT(arcstat_hash_elements);
+       uint64_t he = atomic_inc_64_nv(
+           &arc_stats.arcstat_hash_elements.value.ui64);
+       ARCSTAT_MAX(arcstat_hash_elements_max, he);
  
         return (NULL);
  }
@@ -1110,7 +1085,7 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
         arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
  
         /* collect some hash table performance data */
-       ARCSTAT_BUMPDOWN(arcstat_hash_elements);
+       atomic_dec_64(&arc_stats.arcstat_hash_elements.value.ui64);
  
         if (buf_hash_table.ht_table[idx] &&
             buf_hash_table.ht_table[idx]->b_hash_next == NULL)
@@ -1129,8 +1104,6 @@ static kmem_cache_t *buf_cache;
  static void
  buf_fini(void)
  {
-       int i;
-
  #if defined(_KERNEL)
         /*
          * Large allocations which do not require contiguous pages
@@ -1142,8 +1115,8 @@ buf_fini(void)
         kmem_free(buf_hash_table.ht_table,
             (buf_hash_table.ht_mask + 1) * sizeof (void *));
  #endif
-       for (i = 0; i < BUF_LOCKS; i++)
-               mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
+       for (int i = 0; i < BUF_LOCKS; i++)
+               mutex_destroy(BUF_HASH_LOCK(i));
         kmem_cache_destroy(hdr_full_cache);
         kmem_cache_destroy(hdr_full_crypt_cache);
         kmem_cache_destroy(hdr_l2only_cache);
@@ -1154,13 +1127,13 @@ buf_fini(void)
   * Constructor callback - called when the cache is empty
   * and a new buf is requested.
   */
-/* ARGSUSED */
  static int
  hdr_full_cons(void *vbuf, void *unused, int kmflag)
  {
+       (void) unused, (void) kmflag;
         arc_buf_hdr_t *hdr = vbuf;
  
-       bzero(hdr, HDR_FULL_SIZE);
+       memset(hdr, 0, HDR_FULL_SIZE);
         hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
         cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
         zfs_refcount_create(&hdr->b_l1hdr.b_refcnt);
@@ -1173,38 +1146,38 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
         return (0);
  }
  
-/* ARGSUSED */
  static int
  hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
  {
+       (void) unused;
         arc_buf_hdr_t *hdr = vbuf;
  
         hdr_full_cons(vbuf, unused, kmflag);
-       bzero(&hdr->b_crypt_hdr, sizeof (hdr->b_crypt_hdr));
+       memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr));
         arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
  
         return (0);
  }
  
-/* ARGSUSED */
  static int
  hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
  {
+       (void) unused, (void) kmflag;
         arc_buf_hdr_t *hdr = vbuf;
  
-       bzero(hdr, HDR_L2ONLY_SIZE);
+       memset(hdr, 0, HDR_L2ONLY_SIZE);
         arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
  
         return (0);
  }
  
-/* ARGSUSED */
  static int
  buf_cons(void *vbuf, void *unused, int kmflag)
  {
+       (void) unused, (void) kmflag;
         arc_buf_t *buf = vbuf;
  
-       bzero(buf, sizeof (arc_buf_t));
+       memset(buf, 0, sizeof (arc_buf_t));
         mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL);
         arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS);
  
@@ -1215,10 +1188,10 @@ buf_cons(void *vbuf, void *unused, int kmflag)
   * Destructor callback - called when a cached buf is
   * no longer required.
   */
-/* ARGSUSED */
  static void
  hdr_full_dest(void *vbuf, void *unused)
  {
+       (void) unused;
         arc_buf_hdr_t *hdr = vbuf;
  
         ASSERT(HDR_EMPTY(hdr));
@@ -1229,30 +1202,30 @@ hdr_full_dest(void *vbuf, void *unused)
         arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
  }
  
-/* ARGSUSED */
  static void
  hdr_full_crypt_dest(void *vbuf, void *unused)
  {
-       arc_buf_hdr_t *hdr = vbuf;
+       (void) vbuf, (void) unused;
  
         hdr_full_dest(vbuf, unused);
-       arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
+       arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr),
+           ARC_SPACE_HDRS);
  }
  
-/* ARGSUSED */
  static void
  hdr_l2only_dest(void *vbuf, void *unused)
  {
-       arc_buf_hdr_t *hdr __maybe_unused = vbuf;
+       (void) unused;
+       arc_buf_hdr_t *hdr = vbuf;
  
         ASSERT(HDR_EMPTY(hdr));
         arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
  }
  
-/* ARGSUSED */
  static void
  buf_dest(void *vbuf, void *unused)
  {
+       (void) unused;
         arc_buf_t *buf = vbuf;
  
         mutex_destroy(&buf->b_evict_lock);
@@ -1308,10 +1281,8 @@ retry:
                 for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
                         *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
  
-       for (i = 0; i < BUF_LOCKS; i++) {
-               mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
-                   NULL, MUTEX_DEFAULT, NULL);
-       }
+       for (i = 0; i < BUF_LOCKS; i++)
+               mutex_init(BUF_HASH_LOCK(i), NULL, MUTEX_DEFAULT, NULL);
  }
  
  #define        ARC_MINTIME     (hz>>4) /* 62 ms */
@@ -1362,9 +1333,9 @@ arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
  
         ASSERT(HDR_PROTECTED(hdr));
  
-       bcopy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
-       bcopy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
-       bcopy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
+       memcpy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+       memcpy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+       memcpy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
         *byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
             ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
  }
@@ -1550,11 +1521,11 @@ arc_cksum_compute(arc_buf_t *buf)
  void
  arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
  {
+       (void) sig, (void) unused;
         panic("Got SIGSEGV at address: 0x%lx\n", (long)si->si_addr);
  }
  #endif
  
-/* ARGSUSED */
  static void
  arc_buf_unwatch(arc_buf_t *buf)
  {
@@ -1563,10 +1534,11 @@ arc_buf_unwatch(arc_buf_t *buf)
                 ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
                     PROT_READ | PROT_WRITE));
         }
+#else
+       (void) buf;
  #endif
  }
  
-/* ARGSUSED */
  static void
  arc_buf_watch(arc_buf_t *buf)
  {
@@ -1574,6 +1546,8 @@ arc_buf_watch(arc_buf_t *buf)
         if (arc_watch)
                 ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
                     PROT_READ));
+#else
+       (void) buf;
  #endif
  }
  
@@ -1719,7 +1693,7 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
                 }
  
                 if (!ARC_BUF_COMPRESSED(from)) {
-                       bcopy(from->b_data, buf->b_data, arc_buf_size(buf));
+                       memcpy(buf->b_data, from->b_data, arc_buf_size(buf));
                         copied = B_TRUE;
                         break;
                 }
@@ -1901,7 +1875,8 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
                  * and then loan a buffer from it, rather than allocating a
                  * linear buffer and wrapping it in an abd later.
                  */
-               cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE);
+               cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
+                   ARC_HDR_DO_ADAPT);
                 tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
  
                 ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -1984,7 +1959,7 @@ error:
   * arc_buf_fill().
   */
  static void
-arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)
+arc_buf_untransform_in_place(arc_buf_t *buf)
  {
         arc_buf_hdr_t *hdr = buf->b_hdr;
  
@@ -2088,7 +2063,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
  
                         if (hash_lock != NULL)
                                 mutex_enter(hash_lock);
-                       arc_buf_untransform_in_place(buf, hash_lock);
+                       arc_buf_untransform_in_place(buf);
                         if (hash_lock != NULL)
                                 mutex_exit(hash_lock);
  
@@ -2107,7 +2082,6 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
         } else {
                 ASSERT(hdr_compressed);
                 ASSERT(!compressed);
-               ASSERT3U(HDR_GET_LSIZE(hdr), !=, HDR_GET_PSIZE(hdr));
  
                 /*
                  * If the buf is sharing its data with the hdr, unlink it and
@@ -2242,7 +2216,6 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
                 return;
         }
  
-       ASSERT(!GHOST_STATE(state));
         if (hdr->b_l1hdr.b_pabd != NULL) {
                 (void) zfs_refcount_add_many(&state->arcs_esize[type],
                     arc_hdr_size(hdr), hdr);
@@ -2283,7 +2256,6 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
                 return;
         }
  
-       ASSERT(!GHOST_STATE(state));
         if (hdr->b_l1hdr.b_pabd != NULL) {
                 (void) zfs_refcount_remove_many(&state->arcs_esize[type],
                     arc_hdr_size(hdr), hdr);
@@ -2309,7 +2281,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
   * it is not evictable.
   */
  static void
-add_reference(arc_buf_hdr_t *hdr, void *tag)
+add_reference(arc_buf_hdr_t *hdr, const void *tag)
  {
         arc_state_t *state;
  
@@ -2326,7 +2298,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag)
             (state != arc_anon)) {
                 /* We don't use the L2-only state list. */
                 if (state != arc_l2c_only) {
-                       multilist_remove(state->arcs_list[arc_buf_type(hdr)],
+                       multilist_remove(&state->arcs_list[arc_buf_type(hdr)],
                             hdr);
                         arc_evictable_space_decrement(hdr, state);
                 }
@@ -2345,7 +2317,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag)
   * list making it eligible for eviction.
   */
  static int
-remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
+remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, const void *tag)
  {
         int cnt;
         arc_state_t *state = hdr->b_l1hdr.b_state;
@@ -2360,7 +2332,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
          */
         if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
             (state != arc_anon)) {
-               multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr);
+               multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
                 ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
                 arc_evictable_space_increment(hdr, state);
         }
@@ -2377,6 +2349,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
  void
  arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
  {
+       (void) state_index;
         arc_buf_hdr_t *hdr = ab->b_hdr;
         l1arc_buf_hdr_t *l1hdr = NULL;
         l2arc_buf_hdr_t *l2hdr = NULL;
@@ -2463,7 +2436,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
         if (refcnt == 0) {
                 if (old_state != arc_anon && old_state != arc_l2c_only) {
                         ASSERT(HDR_HAS_L1HDR(hdr));
-                       multilist_remove(old_state->arcs_list[buftype], hdr);
+                       multilist_remove(&old_state->arcs_list[buftype], hdr);
  
                         if (GHOST_STATE(old_state)) {
                                 ASSERT0(bufcnt);
@@ -2480,7 +2453,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
                          * beforehand.
                          */
                         ASSERT(HDR_HAS_L1HDR(hdr));
-                       multilist_insert(new_state->arcs_list[buftype], hdr);
+                       multilist_insert(&new_state->arcs_list[buftype], hdr);
  
                         if (GHOST_STATE(new_state)) {
                                 ASSERT0(bufcnt);
@@ -2627,13 +2600,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
                         l2arc_hdr_arcstats_increment_state(hdr);
                 }
         }
-
-       /*
-        * L2 headers should never be on the L2 state list since they don't
-        * have L1 headers allocated.
-        */
-       ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
-           multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
  }
  
  void
@@ -2645,25 +2611,25 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
         default:
                 break;
         case ARC_SPACE_DATA:
-               aggsum_add(&astat_data_size, space);
+               ARCSTAT_INCR(arcstat_data_size, space);
                 break;
         case ARC_SPACE_META:
-               aggsum_add(&astat_metadata_size, space);
+               ARCSTAT_INCR(arcstat_metadata_size, space);
                 break;
         case ARC_SPACE_BONUS:
-               aggsum_add(&astat_bonus_size, space);
+               ARCSTAT_INCR(arcstat_bonus_size, space);
                 break;
         case ARC_SPACE_DNODE:
-               aggsum_add(&astat_dnode_size, space);
+               aggsum_add(&arc_sums.arcstat_dnode_size, space);
                 break;
         case ARC_SPACE_DBUF:
-               aggsum_add(&astat_dbuf_size, space);
+               ARCSTAT_INCR(arcstat_dbuf_size, space);
                 break;
         case ARC_SPACE_HDRS:
-               aggsum_add(&astat_hdr_size, space);
+               ARCSTAT_INCR(arcstat_hdr_size, space);
                 break;
         case ARC_SPACE_L2HDRS:
-               aggsum_add(&astat_l2_hdr_size, space);
+               aggsum_add(&arc_sums.arcstat_l2_hdr_size, space);
                 break;
         case ARC_SPACE_ABD_CHUNK_WASTE:
                 /*
@@ -2672,14 +2638,14 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
                  * scatter ABD's come from the ARC, because other users are
                  * very short-lived.
                  */
-               aggsum_add(&astat_abd_chunk_waste_size, space);
+               ARCSTAT_INCR(arcstat_abd_chunk_waste_size, space);
                 break;
         }
  
         if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE)
-               aggsum_add(&arc_meta_used, space);
+               aggsum_add(&arc_sums.arcstat_meta_used, space);
  
-       aggsum_add(&arc_size, space);
+       aggsum_add(&arc_sums.arcstat_size, space);
  }
  
  void
@@ -2691,45 +2657,41 @@ arc_space_return(uint64_t space, arc_space_type_t type)
         default:
                 break;
         case ARC_SPACE_DATA:
-               aggsum_add(&astat_data_size, -space);
+               ARCSTAT_INCR(arcstat_data_size, -space);
                 break;
         case ARC_SPACE_META:
-               aggsum_add(&astat_metadata_size, -space);
+               ARCSTAT_INCR(arcstat_metadata_size, -space);
                 break;
         case ARC_SPACE_BONUS:
-               aggsum_add(&astat_bonus_size, -space);
+               ARCSTAT_INCR(arcstat_bonus_size, -space);
                 break;
         case ARC_SPACE_DNODE:
-               aggsum_add(&astat_dnode_size, -space);
+               aggsum_add(&arc_sums.arcstat_dnode_size, -space);
                 break;
         case ARC_SPACE_DBUF:
-               aggsum_add(&astat_dbuf_size, -space);
+               ARCSTAT_INCR(arcstat_dbuf_size, -space);
                 break;
         case ARC_SPACE_HDRS:
-               aggsum_add(&astat_hdr_size, -space);
+               ARCSTAT_INCR(arcstat_hdr_size, -space);
                 break;
         case ARC_SPACE_L2HDRS:
-               aggsum_add(&astat_l2_hdr_size, -space);
+               aggsum_add(&arc_sums.arcstat_l2_hdr_size, -space);
                 break;
         case ARC_SPACE_ABD_CHUNK_WASTE:
-               aggsum_add(&astat_abd_chunk_waste_size, -space);
+               ARCSTAT_INCR(arcstat_abd_chunk_waste_size, -space);
                 break;
         }
  
         if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) {
-               ASSERT(aggsum_compare(&arc_meta_used, space) >= 0);
-               /*
-                * We use the upper bound here rather than the precise value
-                * because the arc_meta_max value doesn't need to be
-                * precise. It's only consumed by humans via arcstats.
-                */
-               if (arc_meta_max < aggsum_upper_bound(&arc_meta_used))
-                       arc_meta_max = aggsum_upper_bound(&arc_meta_used);
-               aggsum_add(&arc_meta_used, -space);
+               ASSERT(aggsum_compare(&arc_sums.arcstat_meta_used,
+                   space) >= 0);
+               ARCSTAT_MAX(arcstat_meta_max,
+                   aggsum_upper_bound(&arc_sums.arcstat_meta_used));
+               aggsum_add(&arc_sums.arcstat_meta_used, -space);
         }
  
-       ASSERT(aggsum_compare(&arc_size, space) >= 0);
-       aggsum_add(&arc_size, -space);
+       ASSERT(aggsum_compare(&arc_sums.arcstat_size, space) >= 0);
+       aggsum_add(&arc_sums.arcstat_size, -space);
  }
  
  /*
@@ -2779,8 +2741,8 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
   */
  static int
  arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
-    void *tag, boolean_t encrypted, boolean_t compressed, boolean_t noauth,
-    boolean_t fill, arc_buf_t **ret)
+    const void *tag, boolean_t encrypted, boolean_t compressed,
+    boolean_t noauth, boolean_t fill, arc_buf_t **ret)
  {
         arc_buf_t *buf;
         arc_fill_flags_t flags = ARC_FILL_LOCKED;
@@ -2793,12 +2755,6 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
         ASSERT3P(*ret, ==, NULL);
         IMPLY(encrypted, compressed);
  
-       hdr->b_l1hdr.b_mru_hits = 0;
-       hdr->b_l1hdr.b_mru_ghost_hits = 0;
-       hdr->b_l1hdr.b_mfu_hits = 0;
-       hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-       hdr->b_l1hdr.b_l2_hits = 0;
-
         buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
         buf->b_hdr = hdr;
         buf->b_data = NULL;
@@ -2886,7 +2842,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
         return (0);
  }
  
-static char *arc_onloan_tag = "onloan";
+static const char *arc_onloan_tag = "onloan";
  
  static inline void
  arc_loaned_bytes_update(int64_t delta)
@@ -2945,7 +2901,7 @@ arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
   * Return a loaned arc buffer to the arc.
   */
  void
-arc_return_buf(arc_buf_t *buf, void *tag)
+arc_return_buf(arc_buf_t *buf, const void *tag)
  {
         arc_buf_hdr_t *hdr = buf->b_hdr;
  
@@ -2959,7 +2915,7 @@ arc_return_buf(arc_buf_t *buf, void *tag)
  
  /* Detach an arc_buf from a dbuf (tag) */
  void
-arc_loan_inuse_buf(arc_buf_t *buf, void *tag)
+arc_loan_inuse_buf(arc_buf_t *buf, const void *tag)
  {
         arc_buf_hdr_t *hdr = buf->b_hdr;
  
@@ -3065,7 +3021,7 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
             arc_hdr_size(hdr), hdr, buf);
         arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
         abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd);
-       abd_put(hdr->b_l1hdr.b_pabd);
+       abd_free(hdr->b_l1hdr.b_pabd);
         hdr->b_l1hdr.b_pabd = NULL;
         buf->b_flags &= ~ARC_BUF_FLAG_SHARED;
  
@@ -3235,7 +3191,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
  {
         uint64_t size;
         boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0);
-       boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0);
  
         ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
         ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3246,14 +3201,14 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
                 size = HDR_GET_PSIZE(hdr);
                 ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
                 hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
-                   do_adapt);
+                   alloc_flags);
                 ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
                 ARCSTAT_INCR(arcstat_raw_size, size);
         } else {
                 size = arc_hdr_size(hdr);
                 ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
                 hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
-                   do_adapt);
+                   alloc_flags);
                 ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
         }
  
@@ -3299,13 +3254,34 @@ arc_hdr_free_abd(arc_buf_hdr_t *hdr, boolean_t free_rdata)
         ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
  }
  
+/*
+ * Allocate empty anonymous ARC header.  The header will get its identity
+ * assigned and buffers attached later as part of read or write operations.
+ *
+ * In case of read arc_read() assigns header its identify (b_dva + b_birth),
+ * inserts it into ARC hash to become globally visible and allocates physical
+ * (b_pabd) or raw (b_rabd) ABD buffer to read into from disk.  On disk read
+ * completion arc_read_done() allocates ARC buffer(s) as needed, potentially
+ * sharing one of them with the physical ABD buffer.
+ *
+ * In case of write arc_alloc_buf() allocates ARC buffer to be filled with
+ * data.  Then after compression and/or encryption arc_write_ready() allocates
+ * and fills (or potentially shares) physical (b_pabd) or raw (b_rabd) ABD
+ * buffer.  On disk write completion arc_write_done() assigns the header its
+ * new identity (b_dva + b_birth) and inserts into ARC hash.
+ *
+ * In case of partial overwrite the old data is read first as described. Then
+ * arc_release() either allocates new anonymous ARC header and moves the ARC
+ * buffer to it, or reuses the old ARC header by discarding its identity and
+ * removing it from ARC hash.  After buffer modification normal write process
+ * follows as described.
+ */
  static arc_buf_hdr_t *
  arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
      boolean_t protected, enum zio_compress compression_type, uint8_t complevel,
-    arc_buf_contents_t type, boolean_t alloc_rdata)
+    arc_buf_contents_t type)
  {
         arc_buf_hdr_t *hdr;
-       int flags = ARC_HDR_DO_ADAPT;
  
         VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
         if (protected) {
@@ -3313,7 +3289,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
         } else {
                 hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
         }
-       flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0;
  
         ASSERT(HDR_EMPTY(hdr));
         ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
@@ -3330,15 +3305,13 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
  
         hdr->b_l1hdr.b_state = arc_anon;
         hdr->b_l1hdr.b_arc_access = 0;
+       hdr->b_l1hdr.b_mru_hits = 0;
+       hdr->b_l1hdr.b_mru_ghost_hits = 0;
+       hdr->b_l1hdr.b_mfu_hits = 0;
+       hdr->b_l1hdr.b_mfu_ghost_hits = 0;
         hdr->b_l1hdr.b_bufcnt = 0;
         hdr->b_l1hdr.b_buf = NULL;
  
-       /*
-        * Allocate the hdr's buffer. This will contain either
-        * the compressed or uncompressed data depending on the block
-        * it references and compressed arc enablement.
-        */
-       arc_hdr_alloc_abd(hdr, flags);
         ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
  
         return (hdr);
@@ -3377,7 +3350,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
         ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
         buf_hash_remove(hdr);
  
-       bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
+       memcpy(nhdr, hdr, HDR_L2ONLY_SIZE);
  
         if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
                 arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
@@ -3468,7 +3441,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
         arc_buf_hdr_t *nhdr;
         arc_buf_t *buf;
         kmem_cache_t *ncache, *ocache;
-       unsigned nsize, osize;
  
         /*
          * This function requires that hdr is in the arc_anon state.
@@ -3485,14 +3457,10 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
  
         if (need_crypt) {
                 ncache = hdr_full_crypt_cache;
-               nsize = sizeof (hdr->b_crypt_hdr);
                 ocache = hdr_full_cache;
-               osize = HDR_FULL_SIZE;
         } else {
                 ncache = hdr_full_cache;
-               nsize = HDR_FULL_SIZE;
                 ocache = hdr_full_crypt_cache;
-               osize = sizeof (hdr->b_crypt_hdr);
         }
  
         nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
@@ -3518,7 +3486,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
         nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
         nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
         nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
-       nhdr->b_l1hdr.b_l2_hits = hdr->b_l1hdr.b_l2_hits;
         nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
         nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
  
@@ -3546,7 +3513,7 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
         }
  
         /* unset all members of the original hdr */
-       bzero(&hdr->b_dva, sizeof (dva_t));
+       memset(&hdr->b_dva, 0, sizeof (dva_t));
         hdr->b_birth = 0;
         hdr->b_type = ARC_BUFC_INVALID;
         hdr->b_flags = 0;
@@ -3563,7 +3530,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
         hdr->b_l1hdr.b_mru_ghost_hits = 0;
         hdr->b_l1hdr.b_mfu_hits = 0;
         hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-       hdr->b_l1hdr.b_l2_hits = 0;
         hdr->b_l1hdr.b_acb = NULL;
         hdr->b_l1hdr.b_pabd = NULL;
  
@@ -3572,9 +3538,9 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
                 hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
                 hdr->b_crypt_hdr.b_ebufcnt = 0;
                 hdr->b_crypt_hdr.b_dsobj = 0;
-               bzero(hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
-               bzero(hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
-               bzero(hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+               memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN);
+               memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN);
+               memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN);
         }
  
         buf_discard_identity(hdr);
@@ -3612,11 +3578,11 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
                 arc_cksum_free(hdr);
  
         if (salt != NULL)
-               bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+               memcpy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
         if (iv != NULL)
-               bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+               memcpy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
         if (mac != NULL)
-               bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+               memcpy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
  }
  
  /*
@@ -3624,10 +3590,11 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
   * The buf is returned thawed since we expect the consumer to modify it.
   */
  arc_buf_t *
-arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
+arc_alloc_buf(spa_t *spa, const void *tag, arc_buf_contents_t type,
+    int32_t size)
  {
         arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
-           B_FALSE, ZIO_COMPRESS_OFF, 0, type, B_FALSE);
+           B_FALSE, ZIO_COMPRESS_OFF, 0, type);
  
         arc_buf_t *buf = NULL;
         VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
@@ -3642,8 +3609,8 @@ arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
   * for bufs containing metadata.
   */
  arc_buf_t *
-arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type, uint8_t complevel)
+arc_alloc_compressed_buf(spa_t *spa, const void *tag, uint64_t psize,
+    uint64_t lsize, enum zio_compress compression_type, uint8_t complevel)
  {
         ASSERT3U(lsize, >, 0);
         ASSERT3U(lsize, >=, psize);
@@ -3651,7 +3618,7 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
         ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
  
         arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-           B_FALSE, compression_type, complevel, ARC_BUFC_DATA, B_FALSE);
+           B_FALSE, compression_type, complevel, ARC_BUFC_DATA);
  
         arc_buf_t *buf = NULL;
         VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
@@ -3659,24 +3626,20 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
         arc_buf_thaw(buf);
         ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
  
-       if (!arc_buf_is_shared(buf)) {
-               /*
-                * To ensure that the hdr has the correct data in it if we call
-                * arc_untransform() on this buf before it's been written to
-                * disk, it's easiest if we just set up sharing between the
-                * buf and the hdr.
-                */
-               arc_hdr_free_abd(hdr, B_FALSE);
-               arc_share_buf(hdr, buf);
-       }
+       /*
+        * To ensure that the hdr has the correct data in it if we call
+        * arc_untransform() on this buf before it's been written to disk,
+        * it's easiest if we just set up sharing between the buf and the hdr.
+        */
+       arc_share_buf(hdr, buf);
  
         return (buf);
  }
  
  arc_buf_t *
-arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
-    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
-    dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj,
+    boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
+    const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
      enum zio_compress compression_type, uint8_t complevel)
  {
         arc_buf_hdr_t *hdr;
@@ -3690,15 +3653,15 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
         ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
  
         hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
-           compression_type, complevel, type, B_TRUE);
+           compression_type, complevel, type);
  
         hdr->b_crypt_hdr.b_dsobj = dsobj;
         hdr->b_crypt_hdr.b_ot = ot;
         hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
             DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot);
-       bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
-       bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
-       bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+       memcpy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
+       memcpy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
+       memcpy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
  
         /*
          * This buffer will be considered encrypted even if the ot is not an
@@ -3833,8 +3796,13 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
                  * to acquire the l2ad_mtx. If that happens, we don't
                  * want to re-destroy the header's L2 portion.
                  */
-               if (HDR_HAS_L2HDR(hdr))
+               if (HDR_HAS_L2HDR(hdr)) {
+
+                       if (!HDR_EMPTY(hdr))
+                               buf_discard_identity(hdr);
+
                         arc_hdr_l2hdr_destroy(hdr);
+               }
  
                 if (!buflist_held)
                         mutex_exit(&dev->l2ad_mtx);
@@ -3878,7 +3846,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
  }
  
  void
-arc_buf_destroy(arc_buf_t *buf, void* tag)
+arc_buf_destroy(arc_buf_t *buf, const void *tag)
  {
         arc_buf_hdr_t *hdr = buf->b_hdr;
  
@@ -3915,18 +3883,28 @@ arc_buf_destroy(arc_buf_t *buf, void* tag)
   *    - arc_mru_ghost -> deleted
   *    - arc_mfu_ghost -> arc_l2c_only
   *    - arc_mfu_ghost -> deleted
+ *
+ * Return total size of evicted data buffers for eviction progress tracking.
+ * When evicting from ghost states return logical buffer size to make eviction
+ * progress at the same (or at least comparable) rate as from non-ghost states.
+ *
+ * Return *real_evicted for actual ARC size reduction to wake up threads
+ * waiting for it.  For non-ghost states it includes size of evicted data
+ * buffers (the headers are not freed there).  For ghost states it includes
+ * only the evicted headers size.
   */
  static int64_t
-arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
+arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted)
  {
         arc_state_t *evicted_state, *state;
         int64_t bytes_evicted = 0;
-       int min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
+       uint_t min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
             arc_min_prescient_prefetch_ms : arc_min_prefetch_ms;
  
         ASSERT(MUTEX_HELD(hash_lock));
         ASSERT(HDR_HAS_L1HDR(hdr));
  
+       *real_evicted = 0;
         state = hdr->b_l1hdr.b_state;
         if (GHOST_STATE(state)) {
                 ASSERT(!HDR_IO_IN_PROGRESS(hdr));
@@ -3963,9 +3941,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                          */
                         hdr = arc_hdr_realloc(hdr, hdr_full_cache,
                             hdr_l2only_cache);
+                       *real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE;
                 } else {
                         arc_change_state(arc_anon, hdr, hash_lock);
                         arc_hdr_destroy(hdr);
+                       *real_evicted += HDR_FULL_SIZE;
                 }
                 return (bytes_evicted);
         }
@@ -3989,8 +3969,10 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                         ARCSTAT_BUMP(arcstat_mutex_miss);
                         break;
                 }
-               if (buf->b_data != NULL)
+               if (buf->b_data != NULL) {
                         bytes_evicted += HDR_GET_LSIZE(hdr);
+                       *real_evicted += HDR_GET_LSIZE(hdr);
+               }
                 mutex_exit(&buf->b_evict_lock);
                 arc_buf_destroy_impl(buf);
         }
@@ -4026,6 +4008,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 arc_cksum_free(hdr);
  
                 bytes_evicted += arc_hdr_size(hdr);
+               *real_evicted += arc_hdr_size(hdr);
  
                 /*
                  * If this hdr is being evicted and has a compressed
@@ -4064,23 +4047,21 @@ arc_set_need_free(void)
  
  static uint64_t
  arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
-    uint64_t spa, int64_t bytes)
+    uint64_t spa, uint64_t bytes)
  {
         multilist_sublist_t *mls;
-       uint64_t bytes_evicted = 0;
+       uint64_t bytes_evicted = 0, real_evicted = 0;
         arc_buf_hdr_t *hdr;
         kmutex_t *hash_lock;
-       int evict_count = 0;
+       uint_t evict_count = zfs_arc_evict_batch_limit;
  
         ASSERT3P(marker, !=, NULL);
-       IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
  
         mls = multilist_sublist_lock(ml, idx);
  
-       for (hdr = multilist_sublist_prev(mls, marker); hdr != NULL;
+       for (hdr = multilist_sublist_prev(mls, marker); likely(hdr != NULL);
             hdr = multilist_sublist_prev(mls, marker)) {
-               if ((bytes != ARC_EVICT_ALL && bytes_evicted >= bytes) ||
-                   (evict_count >= zfs_arc_evict_batch_limit))
+               if ((evict_count == 0) || (bytes_evicted >= bytes))
                         break;
  
                 /*
@@ -4128,10 +4109,13 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
                 ASSERT(!MUTEX_HELD(hash_lock));
  
                 if (mutex_tryenter(hash_lock)) {
-                       uint64_t evicted = arc_evict_hdr(hdr, hash_lock);
+                       uint64_t revicted;
+                       uint64_t evicted = arc_evict_hdr(hdr, hash_lock,
+                           &revicted);
                         mutex_exit(hash_lock);
  
                         bytes_evicted += evicted;
+                       real_evicted += revicted;
  
                         /*
                          * If evicted is zero, arc_evict_hdr() must have
@@ -4139,7 +4123,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
                          * evict_count in this case.
                          */
                         if (evicted != 0)
-                               evict_count++;
+                               evict_count--;
  
                 } else {
                         ARCSTAT_BUMP(arcstat_mutex_miss);
@@ -4161,9 +4145,9 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
          * 1/64th of RAM).  See the comments in arc_wait_for_eviction().
          */
         mutex_enter(&arc_evict_lock);
-       arc_evict_count += bytes_evicted;
+       arc_evict_count += real_evicted;
  
-       if ((int64_t)(arc_free_memory() - arc_sys_free / 2) > 0) {
+       if (arc_free_memory() > arc_sys_free / 2) {
                 arc_evict_waiter_t *aw;
                 while ((aw = list_head(&arc_evict_waiters)) != NULL &&
                     aw->aew_count <= arc_evict_count) {
@@ -4181,11 +4165,43 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
          * this CPU are able to make progress, make a voluntary preemption
          * call here.
          */
-       cond_resched();
+       kpreempt(KPREEMPT_SYNC);
  
         return (bytes_evicted);
  }
  
+/*
+ * Allocate an array of buffer headers used as placeholders during arc state
+ * eviction.
+ */
+static arc_buf_hdr_t **
+arc_state_alloc_markers(int count)
+{
+       arc_buf_hdr_t **markers;
+
+       markers = kmem_zalloc(sizeof (*markers) * count, KM_SLEEP);
+       for (int i = 0; i < count; i++) {
+               markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP);
+
+               /*
+                * A b_spa of 0 is used to indicate that this header is
+                * a marker. This fact is used in arc_evict_type() and
+                * arc_evict_state_impl().
+                */
+               markers[i]->b_spa = 0;
+
+       }
+       return (markers);
+}
+
+static void
+arc_state_free_markers(arc_buf_hdr_t **markers, int count)
+{
+       for (int i = 0; i < count; i++)
+               kmem_cache_free(hdr_full_cache, markers[i]);
+       kmem_free(markers, sizeof (*markers) * count);
+}
+
  /*
   * Evict buffers from the given arc state, until we've removed the
   * specified number of bytes. Move the removed buffers to the
@@ -4200,16 +4216,14 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
   * the given arc state; which is used by arc_flush().
   */
  static uint64_t
-arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
+arc_evict_state(arc_state_t *state, uint64_t spa, uint64_t bytes,
      arc_buf_contents_t type)
  {
         uint64_t total_evicted = 0;
-       multilist_t *ml = state->arcs_list[type];
+       multilist_t *ml = &state->arcs_list[type];
         int num_sublists;
         arc_buf_hdr_t **markers;
  
-       IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
-
         num_sublists = multilist_get_num_sublists(ml);
  
         /*
@@ -4219,19 +4233,15 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
          * pick up where we left off for each individual sublist, rather
          * than starting from the tail each time.
          */
-       markers = kmem_zalloc(sizeof (*markers) * num_sublists, KM_SLEEP);
+       if (zthr_iscurthread(arc_evict_zthr)) {
+               markers = arc_state_evict_markers;
+               ASSERT3S(num_sublists, <=, arc_state_evict_marker_count);
+       } else {
+               markers = arc_state_alloc_markers(num_sublists);
+       }
         for (int i = 0; i < num_sublists; i++) {
                 multilist_sublist_t *mls;
  
-               markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP);
-
-               /*
-                * A b_spa of 0 is used to indicate that this header is
-                * a marker. This fact is used in arc_evict_type() and
-                * arc_evict_state_impl().
-                */
-               markers[i]->b_spa = 0;
-
                 mls = multilist_sublist_lock(ml, i);
                 multilist_sublist_insert_tail(mls, markers[i]);
                 multilist_sublist_unlock(mls);
@@ -4241,7 +4251,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
          * While we haven't hit our target number of bytes to evict, or
          * we're evicting all available buffers.
          */
-       while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
+       while (total_evicted < bytes) {
                 int sublist_idx = multilist_get_random_index(ml);
                 uint64_t scan_evicted = 0;
  
@@ -4250,9 +4260,10 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
                  * Request that 10% of the LRUs be scanned by the superblock
                  * shrinker.
                  */
-               if (type == ARC_BUFC_DATA && aggsum_compare(&astat_dnode_size,
-                   arc_dnode_size_limit) > 0) {
-                       arc_prune_async((aggsum_upper_bound(&astat_dnode_size) -
+               if (type == ARC_BUFC_DATA && aggsum_compare(
+                   &arc_sums.arcstat_dnode_size, arc_dnode_size_limit) > 0) {
+                       arc_prune_async((aggsum_upper_bound(
+                           &arc_sums.arcstat_dnode_size) -
                             arc_dnode_size_limit) / sizeof (dnode_t) /
                             zfs_arc_dnode_reduce_percent);
                 }
@@ -4268,9 +4279,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
                         uint64_t bytes_remaining;
                         uint64_t bytes_evicted;
  
-                       if (bytes == ARC_EVICT_ALL)
-                               bytes_remaining = ARC_EVICT_ALL;
-                       else if (total_evicted < bytes)
+                       if (total_evicted < bytes)
                                 bytes_remaining = bytes - total_evicted;
                         else
                                 break;
@@ -4314,10 +4323,9 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
                 multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
                 multilist_sublist_remove(mls, markers[i]);
                 multilist_sublist_unlock(mls);
-
-               kmem_cache_free(hdr_full_cache, markers[i]);
         }
-       kmem_free(markers, sizeof (*markers) * num_sublists);
+       if (markers != arc_state_evict_markers)
+               arc_state_free_markers(markers, num_sublists);
  
         return (total_evicted);
  }
@@ -4365,7 +4373,7 @@ static uint64_t
  arc_evict_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
      arc_buf_contents_t type)
  {
-       int64_t delta;
+       uint64_t delta;
  
         if (bytes > 0 && zfs_refcount_count(&state->arcs_esize[type]) > 0) {
                 delta = MIN(zfs_refcount_count(&state->arcs_esize[type]),
@@ -4396,10 +4404,10 @@ arc_evict_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
  static uint64_t
  arc_evict_meta_balanced(uint64_t meta_used)
  {
-       int64_t delta, prune = 0, adjustmnt;
-       uint64_t total_evicted = 0;
+       int64_t delta, adjustmnt;
+       uint64_t total_evicted = 0, prune = 0;
         arc_buf_contents_t type = ARC_BUFC_DATA;
-       int restarts = MAX(zfs_arc_meta_adjust_restarts, 0);
+       uint_t restarts = zfs_arc_meta_adjust_restarts;
  
  restart:
         /*
@@ -4482,7 +4490,7 @@ restart:
  }
  
  /*
- * Evict metadata buffers from the cache, such that arc_meta_used is
+ * Evict metadata buffers from the cache, such that arcstat_meta_used is
   * capped by the arc_meta_limit tunable.
   */
  static uint64_t
@@ -4538,8 +4546,8 @@ arc_evict_meta(uint64_t meta_used)
  static arc_buf_contents_t
  arc_evict_type(arc_state_t *state)
  {
-       multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA];
-       multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA];
+       multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
+       multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
         int data_idx = multilist_get_random_index(data_ml);
         int meta_idx = multilist_get_random_index(meta_ml);
         multilist_sublist_t *data_mls;
@@ -4603,7 +4611,7 @@ arc_evict_type(arc_state_t *state)
  }
  
  /*
- * Evict buffers from the cache, such that arc_size is capped by arc_c.
+ * Evict buffers from the cache, such that arcstat_size is capped by arc_c.
   */
  static uint64_t
  arc_evict(void)
@@ -4611,8 +4619,8 @@ arc_evict(void)
         uint64_t total_evicted = 0;
         uint64_t bytes;
         int64_t target;
-       uint64_t asize = aggsum_value(&arc_size);
-       uint64_t ameta = aggsum_value(&arc_meta_used);
+       uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
+       uint64_t ameta = aggsum_value(&arc_sums.arcstat_meta_used);
  
         /*
          * If we're over arc_meta_limit, we want to correct that before
@@ -4672,8 +4680,8 @@ arc_evict(void)
         /*
          * Re-sum ARC stats after the first round of evictions.
          */
-       asize = aggsum_value(&arc_size);
-       ameta = aggsum_value(&arc_meta_used);
+       asize = aggsum_value(&arc_sums.arcstat_size);
+       ameta = aggsum_value(&arc_sums.arcstat_meta_used);
  
  
         /*
@@ -4787,7 +4795,7 @@ arc_flush(spa_t *spa, boolean_t retry)
  void
  arc_reduce_target_size(int64_t to_free)
  {
-       uint64_t asize = aggsum_value(&arc_size);
+       uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
  
         /*
          * All callers want the ARC to actually evict (at least) this much
@@ -4836,12 +4844,10 @@ arc_kmem_reap_soon(void)
         size_t                  i;
         kmem_cache_t            *prev_cache = NULL;
         kmem_cache_t            *prev_data_cache = NULL;
-       extern kmem_cache_t     *zio_buf_cache[];
-       extern kmem_cache_t     *zio_data_buf_cache[];
  
  #ifdef _KERNEL
-       if ((aggsum_compare(&arc_meta_used, arc_meta_limit) >= 0) &&
-           zfs_arc_meta_prune) {
+       if ((aggsum_compare(&arc_sums.arcstat_meta_used,
+           arc_meta_limit) >= 0) && zfs_arc_meta_prune) {
                 /*
                  * We are exceeding our meta-data cache limit.
                  * Prune some entries to release holds on meta-data.
@@ -4878,18 +4884,12 @@ arc_kmem_reap_soon(void)
         abd_cache_reap_now();
  }
  
-/* ARGSUSED */
  static boolean_t
  arc_evict_cb_check(void *arg, zthr_t *zthr)
  {
-       /*
-        * This is necessary so that any changes which may have been made to
-        * many of the zfs_arc_* module parameters will be propagated to
-        * their actual internal variable counterparts. Without this,
-        * changing those module params at runtime would have no effect.
-        */
-       arc_tuning_update(B_FALSE);
+       (void) arg, (void) zthr;
  
+#ifdef ZFS_DEBUG
         /*
          * This is necessary in order to keep the kstat information
          * up to date for tools that display kstat data such as the
@@ -4897,15 +4897,15 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)
          * typically do not call kstat's update function, but simply
          * dump out stats from the most recent update.  Without
          * this call, these commands may show stale stats for the
-        * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
-        * with this change, the data might be up to 1 second
-        * out of date(the arc_evict_zthr has a maximum sleep
-        * time of 1 second); but that should suffice.  The
-        * arc_state_t structures can be queried directly if more
-        * accurate information is needed.
+        * anon, mru, mru_ghost, mfu, and mfu_ghost lists.  Even
+        * with this call, the data might be out of date if the
+        * evict thread hasn't been woken recently; but that should
+        * suffice.  The arc_state_t structures can be queried
+        * directly if more accurate information is needed.
          */
         if (arc_ksp != NULL)
                 arc_ksp->ks_update(arc_ksp, KSTAT_READ);
+#endif
  
         /*
          * We have to rely on arc_wait_for_eviction() to tell us when to
@@ -4928,10 +4928,11 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)
   * Keep arc_size under arc_c by running arc_evict which evicts data
   * from the ARC.
   */
-/* ARGSUSED */
  static void
  arc_evict_cb(void *arg, zthr_t *zthr)
  {
+       (void) arg, (void) zthr;
+
         uint64_t evicted = 0;
         fstrans_cookie_t cookie = spl_fstrans_mark();
  
@@ -4951,7 +4952,7 @@ arc_evict_cb(void *arg, zthr_t *zthr)
          */
         mutex_enter(&arc_evict_lock);
         arc_evict_needed = !zthr_iscancelled(arc_evict_zthr) &&
-           evicted > 0 && aggsum_compare(&arc_size, arc_c) > 0;
+           evicted > 0 && aggsum_compare(&arc_sums.arcstat_size, arc_c) > 0;
         if (!arc_evict_needed) {
                 /*
                  * We're either no longer overflowing, or we
@@ -4968,10 +4969,11 @@ arc_evict_cb(void *arg, zthr_t *zthr)
         spl_fstrans_unmark(cookie);
  }
  
-/* ARGSUSED */
  static boolean_t
  arc_reap_cb_check(void *arg, zthr_t *zthr)
  {
+       (void) arg, (void) zthr;
+
         int64_t free_memory = arc_available_memory();
         static int reap_cb_check_counter = 0;
  
@@ -5015,10 +5017,11 @@ arc_reap_cb_check(void *arg, zthr_t *zthr)
   * target size of the cache (arc_c), causing the arc_evict_cb()
   * to free more buffers.
   */
-/* ARGSUSED */
  static void
  arc_reap_cb(void *arg, zthr_t *zthr)
  {
+       (void) arg, (void) zthr;
+
         int64_t free_memory;
         fstrans_cookie_t cookie = spl_fstrans_mark();
  
@@ -5043,15 +5046,16 @@ arc_reap_cb(void *arg, zthr_t *zthr)
          * memory in the system at a fraction of the arc_size (1/128th by
          * default).  If oversubscribed (free_memory < 0) then reduce the
          * target arc_size by the deficit amount plus the fractional
-        * amount.  If free memory is positive but less then the fractional
+        * amount.  If free memory is positive but less than the fractional
          * amount, reduce by what is needed to hit the fractional amount.
          */
         free_memory = arc_available_memory();
  
-       int64_t to_free =
-           (arc_c >> arc_shrink_shift) - free_memory;
-       if (to_free > 0) {
-               arc_reduce_target_size(to_free);
+       int64_t can_free = arc_c - arc_c_min;
+       if (can_free > 0) {
+               int64_t to_free = (can_free >> arc_shrink_shift) - free_memory;
+               if (to_free > 0)
+                       arc_reduce_target_size(to_free);
         }
         spl_fstrans_unmark(cookie);
  }
@@ -5164,7 +5168,7 @@ arc_adapt(int bytes, arc_state_t *state)
          * cache size, increment the target cache size
          */
         ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT);
-       if (aggsum_upper_bound(&arc_size) >=
+       if (aggsum_upper_bound(&arc_sums.arcstat_size) >=
             arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
                 atomic_add_64(&arc_c, (int64_t)bytes);
                 if (arc_c > arc_c_max)
@@ -5181,8 +5185,8 @@ arc_adapt(int bytes, arc_state_t *state)
   * Check if arc_size has grown past our upper threshold, determined by
   * zfs_arc_overflow_shift.
   */
-boolean_t
-arc_is_overflowing(void)
+static arc_ovf_level_t
+arc_is_overflowing(boolean_t use_reserve)
  {
         /* Always allow at least one block of overflow */
         int64_t overflow = MAX(SPA_MAXBLOCKSIZE,
@@ -5197,16 +5201,21 @@ arc_is_overflowing(void)
          * in the ARC. In practice, that's in the tens of MB, which is low
          * enough to be safe.
          */
-       return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow);
+       int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) -
+           arc_c - overflow / 2;
+       if (!use_reserve)
+               overflow /= 2;
+       return (over < 0 ? ARC_OVF_NONE :
+           over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
  }
  
  static abd_t *
-arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
-    boolean_t do_adapt)
+arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
+    int alloc_flags)
  {
         arc_buf_contents_t type = arc_buf_type(hdr);
  
-       arc_get_data_impl(hdr, size, tag, do_adapt);
+       arc_get_data_impl(hdr, size, tag, alloc_flags);
         if (type == ARC_BUFC_METADATA) {
                 return (abd_alloc(size, B_TRUE));
         } else {
@@ -5216,11 +5225,11 @@ arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
  }
  
  static void *
-arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
+arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, const void *tag)
  {
         arc_buf_contents_t type = arc_buf_type(hdr);
  
-       arc_get_data_impl(hdr, size, tag, B_TRUE);
+       arc_get_data_impl(hdr, size, tag, ARC_HDR_DO_ADAPT);
         if (type == ARC_BUFC_METADATA) {
                 return (zio_buf_alloc(size));
         } else {
@@ -5237,54 +5246,75 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
   * of ARC behavior and settings.  See arc_lowmem_init().
   */
  void
-arc_wait_for_eviction(uint64_t amount)
+arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve)
  {
-       mutex_enter(&arc_evict_lock);
-       if (arc_is_overflowing()) {
-               arc_evict_needed = B_TRUE;
-               zthr_wakeup(arc_evict_zthr);
-
-               if (amount != 0) {
-                       arc_evict_waiter_t aw;
-                       list_link_init(&aw.aew_node);
-                       cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
+       switch (arc_is_overflowing(use_reserve)) {
+       case ARC_OVF_NONE:
+               return;
+       case ARC_OVF_SOME:
+               /*
+                * This is a bit racy without taking arc_evict_lock, but the
+                * worst that can happen is we either call zthr_wakeup() extra
+                * time due to race with other thread here, or the set flag
+                * get cleared by arc_evict_cb(), which is unlikely due to
+                * big hysteresis, but also not important since at this level
+                * of overflow the eviction is purely advisory.  Same time
+                * taking the global lock here every time without waiting for
+                * the actual eviction creates a significant lock contention.
+                */
+               if (!arc_evict_needed) {
+                       arc_evict_needed = B_TRUE;
+                       zthr_wakeup(arc_evict_zthr);
+               }
+               return;
+       case ARC_OVF_SEVERE:
+       default:
+       {
+               arc_evict_waiter_t aw;
+               list_link_init(&aw.aew_node);
+               cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
  
+               uint64_t last_count = 0;
+               mutex_enter(&arc_evict_lock);
+               if (!list_is_empty(&arc_evict_waiters)) {
                         arc_evict_waiter_t *last =
                             list_tail(&arc_evict_waiters);
-                       if (last != NULL) {
-                               ASSERT3U(last->aew_count, >, arc_evict_count);
-                               aw.aew_count = last->aew_count + amount;
-                       } else {
-                               aw.aew_count = arc_evict_count + amount;
-                       }
+                       last_count = last->aew_count;
+               } else if (!arc_evict_needed) {
+                       arc_evict_needed = B_TRUE;
+                       zthr_wakeup(arc_evict_zthr);
+               }
+               /*
+                * Note, the last waiter's count may be less than
+                * arc_evict_count if we are low on memory in which
+                * case arc_evict_state_impl() may have deferred
+                * wakeups (but still incremented arc_evict_count).
+                */
+               aw.aew_count = MAX(last_count, arc_evict_count) + amount;
  
-                       list_insert_tail(&arc_evict_waiters, &aw);
+               list_insert_tail(&arc_evict_waiters, &aw);
  
-                       arc_set_need_free();
+               arc_set_need_free();
  
-                       DTRACE_PROBE3(arc__wait__for__eviction,
-                           uint64_t, amount,
-                           uint64_t, arc_evict_count,
-                           uint64_t, aw.aew_count);
+               DTRACE_PROBE3(arc__wait__for__eviction,
+                   uint64_t, amount,
+                   uint64_t, arc_evict_count,
+                   uint64_t, aw.aew_count);
  
-                       /*
-                        * We will be woken up either when arc_evict_count
-                        * reaches aew_count, or when the ARC is no longer
-                        * overflowing and eviction completes.
-                        */
+               /*
+                * We will be woken up either when arc_evict_count reaches
+                * aew_count, or when the ARC is no longer overflowing and
+                * eviction completes.
+                * In case of "false" wakeup, we will still be on the list.
+                */
+               do {
                         cv_wait(&aw.aew_cv, &arc_evict_lock);
+               } while (list_link_active(&aw.aew_node));
+               mutex_exit(&arc_evict_lock);
  
-                       /*
-                        * In case of "false" wakeup, we will still be on the
-                        * list.
-                        */
-                       if (list_link_active(&aw.aew_node))
-                               list_remove(&arc_evict_waiters, &aw);
-
-                       cv_destroy(&aw.aew_cv);
-               }
+               cv_destroy(&aw.aew_cv);
+       }
         }
-       mutex_exit(&arc_evict_lock);
  }
  
  /*
@@ -5294,13 +5324,13 @@ arc_wait_for_eviction(uint64_t amount)
   * limit, we'll only signal the reclaim thread and continue on.
   */
  static void
-arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
-    boolean_t do_adapt)
+arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
+    int alloc_flags)
  {
         arc_state_t *state = hdr->b_l1hdr.b_state;
         arc_buf_contents_t type = arc_buf_type(hdr);
  
-       if (do_adapt)
+       if (alloc_flags & ARC_HDR_DO_ADAPT)
                 arc_adapt(size, state);
  
         /*
@@ -5315,16 +5345,9 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
          * requested size to be evicted.  This should be more than 100%, to
          * ensure that that progress is also made towards getting arc_size
          * under arc_c.  See the comment above zfs_arc_eviction_pct.
-        *
-        * We do the overflowing check without holding the arc_evict_lock to
-        * reduce lock contention in this hot path.  Note that
-        * arc_wait_for_eviction() will acquire the lock and check again to
-        * ensure we are truly overflowing before blocking.
          */
-       if (arc_is_overflowing()) {
-               arc_wait_for_eviction(size *
-                   zfs_arc_eviction_pct / 100);
-       }
+       arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100,
+           alloc_flags & ARC_HDR_USE_RESERVE);
  
         VERIFY3U(hdr->b_type, ==, type);
         if (type == ARC_BUFC_METADATA) {
@@ -5360,7 +5383,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
                  * If we are growing the cache, and we are adding anonymous
                  * data, and we have outgrown arc_p, update arc_p
                  */
-               if (aggsum_upper_bound(&arc_size) < arc_c &&
+               if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c &&
                     hdr->b_l1hdr.b_state == arc_anon &&
                     (zfs_refcount_count(&arc_anon->arcs_size) +
                     zfs_refcount_count(&arc_mru->arcs_size) > arc_p))
@@ -5369,14 +5392,15 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
  }
  
  static void
-arc_free_data_abd(arc_buf_hdr_t *hdr, abd_t *abd, uint64_t size, void *tag)
+arc_free_data_abd(arc_buf_hdr_t *hdr, abd_t *abd, uint64_t size,
+    const void *tag)
  {
         arc_free_data_impl(hdr, size, tag);
         abd_free(abd);
  }
  
  static void
-arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, void *tag)
+arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, const void *tag)
  {
         arc_buf_contents_t type = arc_buf_type(hdr);
  
@@ -5393,7 +5417,7 @@ arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, void *tag)
   * Free the arc data buffer.
   */
  static void
-arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
+arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag)
  {
         arc_state_t *state = hdr->b_l1hdr.b_state;
         arc_buf_contents_t type = arc_buf_type(hdr);
@@ -5463,7 +5487,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                                 arc_hdr_clear_flags(hdr,
                                     ARC_FLAG_PREFETCH |
                                     ARC_FLAG_PRESCIENT_PREFETCH);
-                               atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+                               hdr->b_l1hdr.b_mru_hits++;
                                 ARCSTAT_BUMP(arcstat_mru_hits);
                                 if (HDR_HAS_L2HDR(hdr))
                                         l2arc_hdr_arcstats_increment_state(hdr);
@@ -5488,7 +5512,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                         DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                         arc_change_state(arc_mfu, hdr, hash_lock);
                 }
-               atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+               hdr->b_l1hdr.b_mru_hits++;
                 ARCSTAT_BUMP(arcstat_mru_hits);
         } else if (hdr->b_l1hdr.b_state == arc_mru_ghost) {
                 arc_state_t     *new_state;
@@ -5517,7 +5541,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
                 arc_change_state(new_state, hdr, hash_lock);
  
-               atomic_inc_32(&hdr->b_l1hdr.b_mru_ghost_hits);
+               hdr->b_l1hdr.b_mru_ghost_hits++;
                 ARCSTAT_BUMP(arcstat_mru_ghost_hits);
         } else if (hdr->b_l1hdr.b_state == arc_mfu) {
                 /*
@@ -5530,7 +5554,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                  * the head of the list now.
                  */
  
-               atomic_inc_32(&hdr->b_l1hdr.b_mfu_hits);
+               hdr->b_l1hdr.b_mfu_hits++;
                 ARCSTAT_BUMP(arcstat_mfu_hits);
                 hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
         } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) {
@@ -5553,7 +5577,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                 arc_change_state(new_state, hdr, hash_lock);
  
-               atomic_inc_32(&hdr->b_l1hdr.b_mfu_ghost_hits);
+               hdr->b_l1hdr.b_mfu_ghost_hits++;
                 ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
         } else if (hdr->b_l1hdr.b_state == arc_l2c_only) {
                 /*
@@ -5614,24 +5638,25 @@ arc_buf_access(arc_buf_t *buf)
  }
  
  /* a generic arc_read_done_func_t which you can use */
-/* ARGSUSED */
  void
  arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
      arc_buf_t *buf, void *arg)
  {
+       (void) zio, (void) zb, (void) bp;
+
         if (buf == NULL)
                 return;
  
-       bcopy(buf->b_data, arg, arc_buf_size(buf));
+       memcpy(arg, buf->b_data, arc_buf_size(buf));
         arc_buf_destroy(buf, arg);
  }
  
  /* a generic arc_read_done_func_t */
-/* ARGSUSED */
  void
  arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
      arc_buf_t *buf, void *arg)
  {
+       (void) zb, (void) bp;
         arc_buf_t **bufp = arg;
  
         if (buf == NULL) {
@@ -5702,17 +5727,20 @@ arc_read_done(zio_t *zio)
                 zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
                     hdr->b_crypt_hdr.b_iv);
  
-               if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
-                       void *tmpbuf;
-
-                       tmpbuf = abd_borrow_buf_copy(zio->io_abd,
-                           sizeof (zil_chain_t));
-                       zio_crypt_decode_mac_zil(tmpbuf,
-                           hdr->b_crypt_hdr.b_mac);
-                       abd_return_buf(zio->io_abd, tmpbuf,
-                           sizeof (zil_chain_t));
-               } else {
-                       zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
+               if (zio->io_error == 0) {
+                       if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
+                               void *tmpbuf;
+
+                               tmpbuf = abd_borrow_buf_copy(zio->io_abd,
+                                   sizeof (zil_chain_t));
+                               zio_crypt_decode_mac_zil(tmpbuf,
+                                   hdr->b_crypt_hdr.b_mac);
+                               abd_return_buf(zio->io_abd, tmpbuf,
+                                   sizeof (zil_chain_t));
+                       } else {
+                               zio_crypt_decode_mac_bp(bp,
+                                   hdr->b_crypt_hdr.b_mac);
+                       }
                 }
         }
  
@@ -5759,7 +5787,7 @@ arc_read_done(zio_t *zio)
          */
         int callback_cnt = 0;
         for (acb = callback_list; acb != NULL; acb = acb->acb_next) {
-               if (!acb->acb_done)
+               if (!acb->acb_done || acb->acb_nobuf)
                         continue;
  
                 callback_cnt++;
@@ -5944,6 +5972,19 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
          */
         fstrans_cookie_t cookie = spl_fstrans_mark();
  top:
+       /*
+        * Verify the block pointer contents are reasonable.  This should
+        * always be the case since the blkptr is protected by a checksum.
+        * However, if there is damage it's desirable to detect this early
+        * and treat it as a checksum error.  This allows an alternate blkptr
+        * to be tried when one is available (e.g. ditto blocks).
+        */
+       if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER,
+           BLK_VERIFY_LOG)) {
+               rc = SET_ERROR(ECKSUM);
+               goto out;
+       }
+
         if (!embedded_bp) {
                 /*
                  * Embedded BP's have no DVA and require no I/O to "read".
@@ -5992,6 +6033,23 @@ top:
                                     ARC_FLAG_PREDICTIVE_PREFETCH);
                         }
  
+                       /*
+                        * If there are multiple threads reading the same block
+                        * and that block is not yet in the ARC, then only one
+                        * thread will do the physical I/O and all other
+                        * threads will wait until that I/O completes.
+                        * Synchronous reads use the b_cv whereas nowait reads
+                        * register a callback. Both are signalled/called in
+                        * arc_read_done.
+                        *
+                        * Errors of the physical I/O may need to be propagated
+                        * to the pio. For synchronous reads, we simply restart
+                        * this function and it will reassess.  Nowait reads
+                        * attach the acb_zio_dummy zio to pio and
+                        * arc_read_done propagates the physical I/O's io_error
+                        * to acb_zio_dummy, and thereby to pio.
+                        */
+
                         if (*arc_flags & ARC_FLAG_WAIT) {
                                 cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
                                 mutex_exit(hash_lock);
@@ -5999,7 +6057,7 @@ top:
                         }
                         ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
  
-                       if (done && !no_buf) {
+                       if (done) {
                                 arc_callback_t *acb = NULL;
  
                                 acb = kmem_zalloc(sizeof (arc_callback_t),
@@ -6009,6 +6067,7 @@ top:
                                 acb->acb_compressed = compressed_read;
                                 acb->acb_encrypted = encrypted_read;
                                 acb->acb_noauth = noauth_read;
+                               acb->acb_nobuf = no_buf;
                                 acb->acb_zb = *zb;
                                 if (pio != NULL)
                                         acb->acb_zio_dummy = zio_null(pio,
@@ -6018,8 +6077,6 @@ top:
                                 acb->acb_zio_head = head_zio;
                                 acb->acb_next = hdr->b_l1hdr.b_acb;
                                 hdr->b_l1hdr.b_acb = acb;
-                               mutex_exit(hash_lock);
-                               goto out;
                         }
                         mutex_exit(hash_lock);
                         goto out;
@@ -6121,17 +6178,6 @@ top:
                         goto out;
                 }
  
-               /*
-                * Gracefully handle a damaged logical block size as a
-                * checksum error.
-                */
-               if (lsize > spa_maxblocksize(spa)) {
-                       rc = SET_ERROR(ECKSUM);
-                       if (hash_lock != NULL)
-                               mutex_exit(hash_lock);
-                       goto out;
-               }
-
                 if (hdr == NULL) {
                         /*
                          * This block is not in the cache or it has
@@ -6140,8 +6186,7 @@ top:
                         arc_buf_hdr_t *exists = NULL;
                         arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
                         hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-                           BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type,
-                           encrypted_read);
+                           BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type);
  
                         if (!embedded_bp) {
                                 hdr->b_dva = *BP_IDENTITY(bp);
@@ -6155,6 +6200,7 @@ top:
                                 arc_hdr_destroy(hdr);
                                 goto top; /* restart the IO request */
                         }
+                       alloc_flags |= ARC_HDR_DO_ADAPT;
                 } else {
                         /*
                          * This block is in the ghost cache or encrypted data
@@ -6202,9 +6248,9 @@ top:
                          */
                         arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state);
                         arc_access(hdr, hash_lock);
-                       arc_hdr_alloc_abd(hdr, alloc_flags);
                 }
  
+               arc_hdr_alloc_abd(hdr, alloc_flags);
                 if (encrypted_read) {
                         ASSERT(HDR_HAS_RABD(hdr));
                         size = HDR_GET_PSIZE(hdr);
@@ -6304,6 +6350,7 @@ top:
                         ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
                             demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data,
                             metadata, misses);
+                       zfs_racct_read(size, 1);
                 }
  
                 /* Check if the spa even has l2 configured */
@@ -6329,7 +6376,7 @@ top:
  
                                 DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
                                 ARCSTAT_BUMP(arcstat_l2_hits);
-                               atomic_inc_32(&hdr->b_l2hdr.b_hits);
+                               hdr->b_l2hdr.b_hits++;
  
                                 cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
                                     KM_SLEEP);
@@ -6549,7 +6596,7 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
   * a new hdr for the buffer.
   */
  void
-arc_release(arc_buf_t *buf, void *tag)
+arc_release(arc_buf_t *buf, const void *tag)
  {
         arc_buf_hdr_t *hdr = buf->b_hdr;
  
@@ -6573,7 +6620,6 @@ arc_release(arc_buf_t *buf, void *tag)
                 ASSERT(!HDR_IO_IN_PROGRESS(hdr));
                 ASSERT(!HDR_IN_HASH_TABLE(hdr));
                 ASSERT(!HDR_HAS_L2HDR(hdr));
-               ASSERT(HDR_EMPTY(hdr));
  
                 ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
                 ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
@@ -6725,7 +6771,7 @@ arc_release(arc_buf_t *buf, void *tag)
                  * buffer which will be freed in arc_write().
                  */
                 nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
-                   compress, hdr->b_complevel, type, HDR_HAS_RABD(hdr));
+                   compress, hdr->b_complevel, type);
                 ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
                 ASSERT0(nhdr->b_l1hdr.b_bufcnt);
                 ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -6736,11 +6782,6 @@ arc_release(arc_buf_t *buf, void *tag)
                 nhdr->b_l1hdr.b_bufcnt = 1;
                 if (ARC_BUF_ENCRYPTED(buf))
                         nhdr->b_crypt_hdr.b_ebufcnt = 1;
-               nhdr->b_l1hdr.b_mru_hits = 0;
-               nhdr->b_l1hdr.b_mru_ghost_hits = 0;
-               nhdr->b_l1hdr.b_mfu_hits = 0;
-               nhdr->b_l1hdr.b_mfu_ghost_hits = 0;
-               nhdr->b_l1hdr.b_l2_hits = 0;
                 (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
                 buf->b_hdr = nhdr;
  
@@ -6757,7 +6798,6 @@ arc_release(arc_buf_t *buf, void *tag)
                 hdr->b_l1hdr.b_mru_ghost_hits = 0;
                 hdr->b_l1hdr.b_mfu_hits = 0;
                 hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-               hdr->b_l1hdr.b_l2_hits = 0;
                 arc_change_state(arc_anon, hdr, hash_lock);
                 hdr->b_l1hdr.b_arc_access = 0;
  
@@ -6911,9 +6951,11 @@ arc_write_ready(zio_t *zio)
         if (ARC_BUF_ENCRYPTED(buf)) {
                 ASSERT3U(psize, >, 0);
                 ASSERT(ARC_BUF_COMPRESSED(buf));
-               arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+               arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA |
+                   ARC_HDR_USE_RESERVE);
                 abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
-       } else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
+       } else if (!abd_size_alloc_linear(arc_buf_size(buf)) ||
+           !arc_can_share(hdr, buf)) {
                 /*
                  * Ideally, we would always copy the io_abd into b_pabd, but the
                  * user may have disabled compressed ARC, thus we must check the
@@ -6921,17 +6963,19 @@ arc_write_ready(zio_t *zio)
                  */
                 if (BP_IS_ENCRYPTED(bp)) {
                         ASSERT3U(psize, >, 0);
-                       arc_hdr_alloc_abd(hdr,
-                           ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+                           ARC_HDR_ALLOC_RDATA | ARC_HDR_USE_RESERVE);
                         abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
                 } else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
                     !ARC_BUF_COMPRESSED(buf)) {
                         ASSERT3U(psize, >, 0);
-                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+                           ARC_HDR_USE_RESERVE);
                         abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
                 } else {
                         ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
-                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+                           ARC_HDR_USE_RESERVE);
                         abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
                             arc_buf_size(buf));
                 }
@@ -7049,7 +7093,7 @@ arc_write_done(zio_t *zio)
         ASSERT(!zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
         callback->awcb_done(zio, buf, callback->awcb_private);
  
-       abd_put(zio->io_abd);
+       abd_free(zio->io_abd);
         kmem_free(callback, sizeof (arc_write_callback_t));
  }
  
@@ -7083,11 +7127,11 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
                 localprop.zp_byteorder =
                     (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
                     ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
-               bcopy(hdr->b_crypt_hdr.b_salt, localprop.zp_salt,
+               memcpy(localprop.zp_salt, hdr->b_crypt_hdr.b_salt,
                     ZIO_DATA_SALT_LEN);
-               bcopy(hdr->b_crypt_hdr.b_iv, localprop.zp_iv,
+               memcpy(localprop.zp_iv, hdr->b_crypt_hdr.b_iv,
                     ZIO_DATA_IV_LEN);
-               bcopy(hdr->b_crypt_hdr.b_mac, localprop.zp_mac,
+               memcpy(localprop.zp_mac, hdr->b_crypt_hdr.b_mac,
                     ZIO_DATA_MAC_LEN);
                 if (DMU_OT_IS_ENCRYPTED(localprop.zp_type)) {
                         localprop.zp_nopwrite = B_FALSE;
@@ -7224,8 +7268,11 @@ arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg)
                     zfs_refcount_count(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
                 dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
                     "anon_data=%lluK tempreserve=%lluK rarc_c=%lluK\n",
-                   arc_tempreserve >> 10, meta_esize >> 10,
-                   data_esize >> 10, reserve >> 10, rarc_c >> 10);
+                   (u_longlong_t)arc_tempreserve >> 10,
+                   (u_longlong_t)meta_esize >> 10,
+                   (u_longlong_t)data_esize >> 10,
+                   (u_longlong_t)reserve >> 10,
+                   (u_longlong_t)rarc_c >> 10);
  #endif
                 DMU_TX_STAT_BUMP(dmu_tx_dirty_throttle);
                 return (SET_ERROR(ERESTART));
@@ -7250,55 +7297,219 @@ arc_kstat_update(kstat_t *ksp, int rw)
  {
         arc_stats_t *as = ksp->ks_data;
  
-       if (rw == KSTAT_WRITE) {
+       if (rw == KSTAT_WRITE)
                 return (SET_ERROR(EACCES));
-       } else {
-               arc_kstat_update_state(arc_anon,
-                   &as->arcstat_anon_size,
-                   &as->arcstat_anon_evictable_data,
-                   &as->arcstat_anon_evictable_metadata);
-               arc_kstat_update_state(arc_mru,
-                   &as->arcstat_mru_size,
-                   &as->arcstat_mru_evictable_data,
-                   &as->arcstat_mru_evictable_metadata);
-               arc_kstat_update_state(arc_mru_ghost,
-                   &as->arcstat_mru_ghost_size,
-                   &as->arcstat_mru_ghost_evictable_data,
-                   &as->arcstat_mru_ghost_evictable_metadata);
-               arc_kstat_update_state(arc_mfu,
-                   &as->arcstat_mfu_size,
-                   &as->arcstat_mfu_evictable_data,
-                   &as->arcstat_mfu_evictable_metadata);
-               arc_kstat_update_state(arc_mfu_ghost,
-                   &as->arcstat_mfu_ghost_size,
-                   &as->arcstat_mfu_ghost_evictable_data,
-                   &as->arcstat_mfu_ghost_evictable_metadata);
-
-               ARCSTAT(arcstat_size) = aggsum_value(&arc_size);
-               ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used);
-               ARCSTAT(arcstat_data_size) = aggsum_value(&astat_data_size);
-               ARCSTAT(arcstat_metadata_size) =
-                   aggsum_value(&astat_metadata_size);
-               ARCSTAT(arcstat_hdr_size) = aggsum_value(&astat_hdr_size);
-               ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size);
-               ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size);
+
+       as->arcstat_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hits);
+       as->arcstat_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_misses);
+       as->arcstat_demand_data_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_data_hits);
+       as->arcstat_demand_data_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_data_misses);
+       as->arcstat_demand_metadata_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_metadata_hits);
+       as->arcstat_demand_metadata_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_metadata_misses);
+       as->arcstat_prefetch_data_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_data_hits);
+       as->arcstat_prefetch_data_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_data_misses);
+       as->arcstat_prefetch_metadata_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_metadata_hits);
+       as->arcstat_prefetch_metadata_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_metadata_misses);
+       as->arcstat_mru_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mru_hits);
+       as->arcstat_mru_ghost_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mru_ghost_hits);
+       as->arcstat_mfu_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mfu_hits);
+       as->arcstat_mfu_ghost_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mfu_ghost_hits);
+       as->arcstat_deleted.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_deleted);
+       as->arcstat_mutex_miss.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mutex_miss);
+       as->arcstat_access_skip.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_access_skip);
+       as->arcstat_evict_skip.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_skip);
+       as->arcstat_evict_not_enough.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_not_enough);
+       as->arcstat_evict_l2_cached.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_cached);
+       as->arcstat_evict_l2_eligible.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_eligible);
+       as->arcstat_evict_l2_eligible_mfu.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mfu);
+       as->arcstat_evict_l2_eligible_mru.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mru);
+       as->arcstat_evict_l2_ineligible.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_ineligible);
+       as->arcstat_evict_l2_skip.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_skip);
+       as->arcstat_hash_collisions.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hash_collisions);
+       as->arcstat_hash_chains.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hash_chains);
+       as->arcstat_size.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_size);
+       as->arcstat_compressed_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_compressed_size);
+       as->arcstat_uncompressed_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_uncompressed_size);
+       as->arcstat_overhead_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_overhead_size);
+       as->arcstat_hdr_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hdr_size);
+       as->arcstat_data_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_data_size);
+       as->arcstat_metadata_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_metadata_size);
+       as->arcstat_dbuf_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_dbuf_size);
  #if defined(COMPAT_FREEBSD11)
-               ARCSTAT(arcstat_other_size) = aggsum_value(&astat_bonus_size) +
-                   aggsum_value(&astat_dnode_size) +
-                   aggsum_value(&astat_dbuf_size);
+       as->arcstat_other_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_bonus_size) +
+           aggsum_value(&arc_sums.arcstat_dnode_size) +
+           wmsum_value(&arc_sums.arcstat_dbuf_size);
  #endif
-               ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
-               ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size);
-               ARCSTAT(arcstat_abd_chunk_waste_size) =
-                   aggsum_value(&astat_abd_chunk_waste_size);
  
-               as->arcstat_memory_all_bytes.value.ui64 =
-                   arc_all_memory();
-               as->arcstat_memory_free_bytes.value.ui64 =
-                   arc_free_memory();
-               as->arcstat_memory_available_bytes.value.i64 =
-                   arc_available_memory();
-       }
+       arc_kstat_update_state(arc_anon,
+           &as->arcstat_anon_size,
+           &as->arcstat_anon_evictable_data,
+           &as->arcstat_anon_evictable_metadata);
+       arc_kstat_update_state(arc_mru,
+           &as->arcstat_mru_size,
+           &as->arcstat_mru_evictable_data,
+           &as->arcstat_mru_evictable_metadata);
+       arc_kstat_update_state(arc_mru_ghost,
+           &as->arcstat_mru_ghost_size,
+           &as->arcstat_mru_ghost_evictable_data,
+           &as->arcstat_mru_ghost_evictable_metadata);
+       arc_kstat_update_state(arc_mfu,
+           &as->arcstat_mfu_size,
+           &as->arcstat_mfu_evictable_data,
+           &as->arcstat_mfu_evictable_metadata);
+       arc_kstat_update_state(arc_mfu_ghost,
+           &as->arcstat_mfu_ghost_size,
+           &as->arcstat_mfu_ghost_evictable_data,
+           &as->arcstat_mfu_ghost_evictable_metadata);
+
+       as->arcstat_dnode_size.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_dnode_size);
+       as->arcstat_bonus_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_bonus_size);
+       as->arcstat_l2_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_hits);
+       as->arcstat_l2_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_misses);
+       as->arcstat_l2_prefetch_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_prefetch_asize);
+       as->arcstat_l2_mru_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_mru_asize);
+       as->arcstat_l2_mfu_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_mfu_asize);
+       as->arcstat_l2_bufc_data_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_bufc_data_asize);
+       as->arcstat_l2_bufc_metadata_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_bufc_metadata_asize);
+       as->arcstat_l2_feeds.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_feeds);
+       as->arcstat_l2_rw_clash.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rw_clash);
+       as->arcstat_l2_read_bytes.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_read_bytes);
+       as->arcstat_l2_write_bytes.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_write_bytes);
+       as->arcstat_l2_writes_sent.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_sent);
+       as->arcstat_l2_writes_done.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_done);
+       as->arcstat_l2_writes_error.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_error);
+       as->arcstat_l2_writes_lock_retry.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_lock_retry);
+       as->arcstat_l2_evict_lock_retry.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_evict_lock_retry);
+       as->arcstat_l2_evict_reading.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_evict_reading);
+       as->arcstat_l2_evict_l1cached.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_evict_l1cached);
+       as->arcstat_l2_free_on_write.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_free_on_write);
+       as->arcstat_l2_abort_lowmem.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_abort_lowmem);
+       as->arcstat_l2_cksum_bad.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_cksum_bad);
+       as->arcstat_l2_io_error.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_io_error);
+       as->arcstat_l2_lsize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_lsize);
+       as->arcstat_l2_psize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_psize);
+       as->arcstat_l2_hdr_size.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_l2_hdr_size);
+       as->arcstat_l2_log_blk_writes.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_log_blk_writes);
+       as->arcstat_l2_log_blk_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_log_blk_asize);
+       as->arcstat_l2_log_blk_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_log_blk_count);
+       as->arcstat_l2_rebuild_success.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_success);
+       as->arcstat_l2_rebuild_abort_unsupported.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
+       as->arcstat_l2_rebuild_abort_io_errors.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
+       as->arcstat_l2_rebuild_abort_dh_errors.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
+       as->arcstat_l2_rebuild_abort_cksum_lb_errors.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
+       as->arcstat_l2_rebuild_abort_lowmem.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
+       as->arcstat_l2_rebuild_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_size);
+       as->arcstat_l2_rebuild_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_asize);
+       as->arcstat_l2_rebuild_bufs.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs);
+       as->arcstat_l2_rebuild_bufs_precached.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs_precached);
+       as->arcstat_l2_rebuild_log_blks.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_log_blks);
+       as->arcstat_memory_throttle_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_memory_throttle_count);
+       as->arcstat_memory_direct_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_memory_direct_count);
+       as->arcstat_memory_indirect_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_memory_indirect_count);
+
+       as->arcstat_memory_all_bytes.value.ui64 =
+           arc_all_memory();
+       as->arcstat_memory_free_bytes.value.ui64 =
+           arc_free_memory();
+       as->arcstat_memory_available_bytes.value.i64 =
+           arc_available_memory();
+
+       as->arcstat_prune.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prune);
+       as->arcstat_meta_used.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_meta_used);
+       as->arcstat_async_upgrade_sync.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_async_upgrade_sync);
+       as->arcstat_demand_hit_predictive_prefetch.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_hit_predictive_prefetch);
+       as->arcstat_demand_hit_prescient_prefetch.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_hit_prescient_prefetch);
+       as->arcstat_raw_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_raw_size);
+       as->arcstat_cached_only_in_progress.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_cached_only_in_progress);
+       as->arcstat_abd_chunk_waste_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_abd_chunk_waste_size);
  
         return (0);
  }
@@ -7332,17 +7543,24 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj)
          * Also, the low order bits of the hash value are thought to be
          * distributed evenly. Otherwise, in the case that the multilist
          * has a power of two number of sublists, each sublists' usage
-        * would not be evenly distributed.
+        * would not be evenly distributed. In this context full 64bit
+        * division would be a waste of time, so limit it to 32 bits.
          */
-       return (buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) %
+       return ((unsigned int)buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) %
             multilist_get_num_sublists(ml));
  }
  
+static unsigned int
+arc_state_l2c_multilist_index_func(multilist_t *ml, void *obj)
+{
+       panic("Header %p insert into arc_l2c_only %p", obj, ml);
+}
+
  #define        WARN_IF_TUNING_IGNORED(tuning, value, do_warn) do {     \
         if ((do_warn) && (tuning) && ((tuning) != (value))) {   \
                 cmn_err(CE_WARN,                                \
                     "ignoring tunable %s (using %llu instead)", \
-                   (#tuning), (value));                        \
+                   (#tuning), (u_longlong_t)(value));  \
         }                                                       \
  } while (0)
  
@@ -7370,7 +7588,7 @@ arc_tuning_update(boolean_t verbose)
  
         /* Valid range: 64M - <all physical memory> */
         if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) &&
-           (zfs_arc_max >= 64 << 20) && (zfs_arc_max < allmem) &&
+           (zfs_arc_max >= MIN_ARC_MAX) && (zfs_arc_max < allmem) &&
             (zfs_arc_max > arc_c_min)) {
                 arc_c_max = zfs_arc_max;
                 arc_c = MIN(arc_c, arc_c_max);
@@ -7438,68 +7656,63 @@ arc_tuning_update(boolean_t verbose)
         }
  
         /* Valid range: 0 - 100 */
-       if ((zfs_arc_lotsfree_percent >= 0) &&
-           (zfs_arc_lotsfree_percent <= 100))
+       if (zfs_arc_lotsfree_percent <= 100)
                 arc_lotsfree_percent = zfs_arc_lotsfree_percent;
         WARN_IF_TUNING_IGNORED(zfs_arc_lotsfree_percent, arc_lotsfree_percent,
             verbose);
  
         /* Valid range: 0 - <all physical memory> */
         if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
-               arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), allmem);
+               arc_sys_free = MIN(zfs_arc_sys_free, allmem);
         WARN_IF_TUNING_IGNORED(zfs_arc_sys_free, arc_sys_free, verbose);
  }
  
+static void
+arc_state_multilist_init(multilist_t *ml,
+    multilist_sublist_index_func_t *index_func, int *maxcountp)
+{
+       multilist_create(ml, sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), index_func);
+       *maxcountp = MAX(*maxcountp, multilist_get_num_sublists(ml));
+}
+
  static void
  arc_state_init(void)
  {
-       arc_anon = &ARC_anon;
-       arc_mru = &ARC_mru;
-       arc_mru_ghost = &ARC_mru_ghost;
-       arc_mfu = &ARC_mfu;
-       arc_mfu_ghost = &ARC_mfu_ghost;
-       arc_l2c_only = &ARC_l2c_only;
-
-       arc_mru->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mru->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mru_ghost->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_l2c_only->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_l2c_only->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
+       int num_sublists = 0;
+
+       arc_state_multilist_init(&arc_mru->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mru->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+
+       /*
+        * L2 headers should never be on the L2 state list since they don't
+        * have L1 headers allocated.  Special index function asserts that.
+        */
+       arc_state_multilist_init(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
+           arc_state_l2c_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
+           arc_state_l2c_multilist_index_func, &num_sublists);
+
+       /*
+        * Keep track of the number of markers needed to reclaim buffers from
+        * any ARC state.  The markers will be pre-allocated so as to minimize
+        * the number of memory allocations performed by the eviction thread.
+        */
+       arc_state_evict_marker_count = num_sublists;
  
         zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
         zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
@@ -7521,16 +7734,93 @@ arc_state_init(void)
         zfs_refcount_create(&arc_mfu_ghost->arcs_size);
         zfs_refcount_create(&arc_l2c_only->arcs_size);
  
-       aggsum_init(&arc_meta_used, 0);
-       aggsum_init(&arc_size, 0);
-       aggsum_init(&astat_data_size, 0);
-       aggsum_init(&astat_metadata_size, 0);
-       aggsum_init(&astat_hdr_size, 0);
-       aggsum_init(&astat_l2_hdr_size, 0);
-       aggsum_init(&astat_bonus_size, 0);
-       aggsum_init(&astat_dnode_size, 0);
-       aggsum_init(&astat_dbuf_size, 0);
-       aggsum_init(&astat_abd_chunk_waste_size, 0);
+       wmsum_init(&arc_sums.arcstat_hits, 0);
+       wmsum_init(&arc_sums.arcstat_misses, 0);
+       wmsum_init(&arc_sums.arcstat_demand_data_hits, 0);
+       wmsum_init(&arc_sums.arcstat_demand_data_misses, 0);
+       wmsum_init(&arc_sums.arcstat_demand_metadata_hits, 0);
+       wmsum_init(&arc_sums.arcstat_demand_metadata_misses, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_data_hits, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_data_misses, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_metadata_hits, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_metadata_misses, 0);
+       wmsum_init(&arc_sums.arcstat_mru_hits, 0);
+       wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0);
+       wmsum_init(&arc_sums.arcstat_mfu_hits, 0);
+       wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0);
+       wmsum_init(&arc_sums.arcstat_deleted, 0);
+       wmsum_init(&arc_sums.arcstat_mutex_miss, 0);
+       wmsum_init(&arc_sums.arcstat_access_skip, 0);
+       wmsum_init(&arc_sums.arcstat_evict_skip, 0);
+       wmsum_init(&arc_sums.arcstat_evict_not_enough, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_cached, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_eligible, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mfu, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mru, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_ineligible, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_skip, 0);
+       wmsum_init(&arc_sums.arcstat_hash_collisions, 0);
+       wmsum_init(&arc_sums.arcstat_hash_chains, 0);
+       aggsum_init(&arc_sums.arcstat_size, 0);
+       wmsum_init(&arc_sums.arcstat_compressed_size, 0);
+       wmsum_init(&arc_sums.arcstat_uncompressed_size, 0);
+       wmsum_init(&arc_sums.arcstat_overhead_size, 0);
+       wmsum_init(&arc_sums.arcstat_hdr_size, 0);
+       wmsum_init(&arc_sums.arcstat_data_size, 0);
+       wmsum_init(&arc_sums.arcstat_metadata_size, 0);
+       wmsum_init(&arc_sums.arcstat_dbuf_size, 0);
+       aggsum_init(&arc_sums.arcstat_dnode_size, 0);
+       wmsum_init(&arc_sums.arcstat_bonus_size, 0);
+       wmsum_init(&arc_sums.arcstat_l2_hits, 0);
+       wmsum_init(&arc_sums.arcstat_l2_misses, 0);
+       wmsum_init(&arc_sums.arcstat_l2_prefetch_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_mru_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_mfu_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_bufc_data_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_bufc_metadata_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_feeds, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rw_clash, 0);
+       wmsum_init(&arc_sums.arcstat_l2_read_bytes, 0);
+       wmsum_init(&arc_sums.arcstat_l2_write_bytes, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_sent, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_done, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_error, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_lock_retry, 0);
+       wmsum_init(&arc_sums.arcstat_l2_evict_lock_retry, 0);
+       wmsum_init(&arc_sums.arcstat_l2_evict_reading, 0);
+       wmsum_init(&arc_sums.arcstat_l2_evict_l1cached, 0);
+       wmsum_init(&arc_sums.arcstat_l2_free_on_write, 0);
+       wmsum_init(&arc_sums.arcstat_l2_abort_lowmem, 0);
+       wmsum_init(&arc_sums.arcstat_l2_cksum_bad, 0);
+       wmsum_init(&arc_sums.arcstat_l2_io_error, 0);
+       wmsum_init(&arc_sums.arcstat_l2_lsize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_psize, 0);
+       aggsum_init(&arc_sums.arcstat_l2_hdr_size, 0);
+       wmsum_init(&arc_sums.arcstat_l2_log_blk_writes, 0);
+       wmsum_init(&arc_sums.arcstat_l2_log_blk_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_log_blk_count, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_success, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_unsupported, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_io_errors, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_dh_errors, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_lowmem, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_size, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs_precached, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_log_blks, 0);
+       wmsum_init(&arc_sums.arcstat_memory_throttle_count, 0);
+       wmsum_init(&arc_sums.arcstat_memory_direct_count, 0);
+       wmsum_init(&arc_sums.arcstat_memory_indirect_count, 0);
+       wmsum_init(&arc_sums.arcstat_prune, 0);
+       aggsum_init(&arc_sums.arcstat_meta_used, 0);
+       wmsum_init(&arc_sums.arcstat_async_upgrade_sync, 0);
+       wmsum_init(&arc_sums.arcstat_demand_hit_predictive_prefetch, 0);
+       wmsum_init(&arc_sums.arcstat_demand_hit_prescient_prefetch, 0);
+       wmsum_init(&arc_sums.arcstat_raw_size, 0);
+       wmsum_init(&arc_sums.arcstat_cached_only_in_progress, 0);
+       wmsum_init(&arc_sums.arcstat_abd_chunk_waste_size, 0);
  
         arc_anon->arcs_state = ARC_STATE_ANON;
         arc_mru->arcs_state = ARC_STATE_MRU;
@@ -7563,27 +7853,104 @@ arc_state_fini(void)
         zfs_refcount_destroy(&arc_mfu_ghost->arcs_size);
         zfs_refcount_destroy(&arc_l2c_only->arcs_size);
  
-       multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
-
-       aggsum_fini(&arc_meta_used);
-       aggsum_fini(&arc_size);
-       aggsum_fini(&astat_data_size);
-       aggsum_fini(&astat_metadata_size);
-       aggsum_fini(&astat_hdr_size);
-       aggsum_fini(&astat_l2_hdr_size);
-       aggsum_fini(&astat_bonus_size);
-       aggsum_fini(&astat_dnode_size);
-       aggsum_fini(&astat_dbuf_size);
-       aggsum_fini(&astat_abd_chunk_waste_size);
+       multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
+
+       wmsum_fini(&arc_sums.arcstat_hits);
+       wmsum_fini(&arc_sums.arcstat_misses);
+       wmsum_fini(&arc_sums.arcstat_demand_data_hits);
+       wmsum_fini(&arc_sums.arcstat_demand_data_misses);
+       wmsum_fini(&arc_sums.arcstat_demand_metadata_hits);
+       wmsum_fini(&arc_sums.arcstat_demand_metadata_misses);
+       wmsum_fini(&arc_sums.arcstat_prefetch_data_hits);
+       wmsum_fini(&arc_sums.arcstat_prefetch_data_misses);
+       wmsum_fini(&arc_sums.arcstat_prefetch_metadata_hits);
+       wmsum_fini(&arc_sums.arcstat_prefetch_metadata_misses);
+       wmsum_fini(&arc_sums.arcstat_mru_hits);
+       wmsum_fini(&arc_sums.arcstat_mru_ghost_hits);
+       wmsum_fini(&arc_sums.arcstat_mfu_hits);
+       wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits);
+       wmsum_fini(&arc_sums.arcstat_deleted);
+       wmsum_fini(&arc_sums.arcstat_mutex_miss);
+       wmsum_fini(&arc_sums.arcstat_access_skip);
+       wmsum_fini(&arc_sums.arcstat_evict_skip);
+       wmsum_fini(&arc_sums.arcstat_evict_not_enough);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_cached);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_eligible);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mfu);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mru);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_ineligible);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_skip);
+       wmsum_fini(&arc_sums.arcstat_hash_collisions);
+       wmsum_fini(&arc_sums.arcstat_hash_chains);
+       aggsum_fini(&arc_sums.arcstat_size);
+       wmsum_fini(&arc_sums.arcstat_compressed_size);
+       wmsum_fini(&arc_sums.arcstat_uncompressed_size);
+       wmsum_fini(&arc_sums.arcstat_overhead_size);
+       wmsum_fini(&arc_sums.arcstat_hdr_size);
+       wmsum_fini(&arc_sums.arcstat_data_size);
+       wmsum_fini(&arc_sums.arcstat_metadata_size);
+       wmsum_fini(&arc_sums.arcstat_dbuf_size);
+       aggsum_fini(&arc_sums.arcstat_dnode_size);
+       wmsum_fini(&arc_sums.arcstat_bonus_size);
+       wmsum_fini(&arc_sums.arcstat_l2_hits);
+       wmsum_fini(&arc_sums.arcstat_l2_misses);
+       wmsum_fini(&arc_sums.arcstat_l2_prefetch_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_mru_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_mfu_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_bufc_data_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_bufc_metadata_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_feeds);
+       wmsum_fini(&arc_sums.arcstat_l2_rw_clash);
+       wmsum_fini(&arc_sums.arcstat_l2_read_bytes);
+       wmsum_fini(&arc_sums.arcstat_l2_write_bytes);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_sent);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_done);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_error);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_lock_retry);
+       wmsum_fini(&arc_sums.arcstat_l2_evict_lock_retry);
+       wmsum_fini(&arc_sums.arcstat_l2_evict_reading);
+       wmsum_fini(&arc_sums.arcstat_l2_evict_l1cached);
+       wmsum_fini(&arc_sums.arcstat_l2_free_on_write);
+       wmsum_fini(&arc_sums.arcstat_l2_abort_lowmem);
+       wmsum_fini(&arc_sums.arcstat_l2_cksum_bad);
+       wmsum_fini(&arc_sums.arcstat_l2_io_error);
+       wmsum_fini(&arc_sums.arcstat_l2_lsize);
+       wmsum_fini(&arc_sums.arcstat_l2_psize);
+       aggsum_fini(&arc_sums.arcstat_l2_hdr_size);
+       wmsum_fini(&arc_sums.arcstat_l2_log_blk_writes);
+       wmsum_fini(&arc_sums.arcstat_l2_log_blk_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_log_blk_count);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_success);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_size);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs_precached);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_log_blks);
+       wmsum_fini(&arc_sums.arcstat_memory_throttle_count);
+       wmsum_fini(&arc_sums.arcstat_memory_direct_count);
+       wmsum_fini(&arc_sums.arcstat_memory_indirect_count);
+       wmsum_fini(&arc_sums.arcstat_prune);
+       aggsum_fini(&arc_sums.arcstat_meta_used);
+       wmsum_fini(&arc_sums.arcstat_async_upgrade_sync);
+       wmsum_fini(&arc_sums.arcstat_demand_hit_predictive_prefetch);
+       wmsum_fini(&arc_sums.arcstat_demand_hit_prescient_prefetch);
+       wmsum_fini(&arc_sums.arcstat_raw_size);
+       wmsum_fini(&arc_sums.arcstat_cached_only_in_progress);
+       wmsum_fini(&arc_sums.arcstat_abd_chunk_waste_size);
  }
  
  uint64_t
@@ -7592,6 +7959,15 @@ arc_target_bytes(void)
         return (arc_c);
  }
  
+void
+arc_set_limits(uint64_t allmem)
+{
+       /* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
+       arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
+
+       /* How to set default max varies by platform. */
+       arc_c_max = arc_default_max(arc_c_min, allmem);
+}
  void
  arc_init(void)
  {
@@ -7607,13 +7983,25 @@ arc_init(void)
         arc_lowmem_init();
  #endif
  
-       /* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
-       arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
-
-       /* How to set default max varies by platform. */
-       arc_c_max = arc_default_max(arc_c_min, allmem);
+       arc_set_limits(allmem);
  
-#ifndef _KERNEL
+#ifdef _KERNEL
+       /*
+        * If zfs_arc_max is non-zero at init, meaning it was set in the kernel
+        * environment before the module was loaded, don't block setting the
+        * maximum because it is less than arc_c_min, instead, reset arc_c_min
+        * to a lower value.
+        * zfs_arc_min will be handled by arc_tuning_update().
+        */
+       if (zfs_arc_max != 0 && zfs_arc_max >= MIN_ARC_MAX &&
+           zfs_arc_max < allmem) {
+               arc_c_max = zfs_arc_max;
+               if (arc_c_min >= arc_c_max) {
+                       arc_c_min = MAX(zfs_arc_max / 2,
+                           2ULL << SPA_MAXBLOCKSHIFT);
+               }
+       }
+#else
         /*
          * In userland, there's only the memory pressure that we artificially
          * create (see arc_available_memory()).  Don't let arc_c get too
@@ -7628,8 +8016,6 @@ arc_init(void)
  
         /* Set min to 1/2 of arc_c_min */
         arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
-       /* Initialize maximum observed usage to zero */
-       arc_meta_max = 0;
         /*
          * Set arc_meta_limit to a percent of arc_c_max with a floor of
          * arc_meta_min, and a ceiling of arc_c_max.
@@ -7648,6 +8034,8 @@ arc_init(void)
         if (arc_c < arc_c_min)
                 arc_c = arc_c_min;
  
+       arc_register_hotplug();
+
         arc_state_init();
  
         buf_init();
@@ -7656,8 +8044,8 @@ arc_init(void)
             offsetof(arc_prune_t, p_node));
         mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
  
-       arc_prune_taskq = taskq_create("arc_prune", boot_ncpus, defclsyspri,
-           boot_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+       arc_prune_taskq = taskq_create("arc_prune", zfs_arc_prune_task_threads,
+           defclsyspri, 100, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
  
         arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
             sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
@@ -7668,10 +8056,12 @@ arc_init(void)
                 kstat_install(arc_ksp);
         }
  
-       arc_evict_zthr = zthr_create_timer("arc_evict",
-           arc_evict_cb_check, arc_evict_cb, NULL, SEC2NSEC(1));
+       arc_state_evict_markers =
+           arc_state_alloc_markers(arc_state_evict_marker_count);
+       arc_evict_zthr = zthr_create("arc_evict",
+           arc_evict_cb_check, arc_evict_cb, NULL, defclsyspri);
         arc_reap_zthr = zthr_create_timer("arc_reap",
-           arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1));
+           arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1), minclsyspri);
  
         arc_warm = B_FALSE;
  
@@ -7699,6 +8089,18 @@ arc_init(void)
                 zfs_dirty_data_max = MIN(zfs_dirty_data_max,
                     zfs_dirty_data_max_max);
         }
+
+       if (zfs_wrlog_data_max == 0) {
+
+               /*
+                * dp_wrlog_total is reduced for each txg at the end of
+                * spa_sync(). However, dp_dirty_total is reduced every time
+                * a block is written out. Thus under normal operation,
+                * dp_wrlog_total could grow 2 times as big as
+                * zfs_dirty_data_max.
+                */
+               zfs_wrlog_data_max = zfs_dirty_data_max * 2;
+       }
  }
  
  void
@@ -7735,6 +8137,8 @@ arc_fini(void)
  
         (void) zthr_cancel(arc_evict_zthr);
         (void) zthr_cancel(arc_reap_zthr);
+       arc_state_free_markers(arc_state_evict_markers,
+           arc_state_evict_marker_count);
  
         mutex_destroy(&arc_evict_lock);
         list_destroy(&arc_evict_waiters);
@@ -7754,6 +8158,8 @@ arc_fini(void)
         buf_fini();
         arc_state_fini();
  
+       arc_unregister_hotplug();
+
         /*
          * We destroy the zthrs after all the ARC state has been
          * torn down to avoid the case of them receiving any
@@ -8060,8 +8466,8 @@ l2arc_write_size(l2arc_dev_t *dev)
                     "plus the overhead of log blocks (persistent L2ARC, "
                     "%llu bytes) exceeds the size of the cache device "
                     "(guid %llu), resetting them to the default (%d)",
-                   l2arc_log_blk_overhead(size, dev),
-                   dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
+                   (u_longlong_t)l2arc_log_blk_overhead(size, dev),
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
                 size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
  
                 if (arc_warm == B_FALSE)
@@ -8336,21 +8742,22 @@ top:
                                  * block pointer in the header.
                                  */
                                 if (i == 0) {
-                                       bzero(l2dhdr, dev->l2ad_dev_hdr_asize);
+                                       memset(l2dhdr, 0,
+                                           dev->l2ad_dev_hdr_asize);
                                 } else {
-                                       bzero(&l2dhdr->dh_start_lbps[i],
+                                       memset(&l2dhdr->dh_start_lbps[i], 0,
                                             sizeof (l2arc_log_blkptr_t));
                                 }
                                 break;
                         }
-                       bcopy(lb_ptr_buf->lb_ptr, &l2dhdr->dh_start_lbps[i],
+                       memcpy(&l2dhdr->dh_start_lbps[i], lb_ptr_buf->lb_ptr,
                             sizeof (l2arc_log_blkptr_t));
                         lb_ptr_buf = list_next(&dev->l2ad_lbptr_list,
                             lb_ptr_buf);
                 }
         }
  
-       atomic_inc_64(&l2arc_writes_done);
+       ARCSTAT_BUMP(arcstat_l2_writes_done);
         list_remove(buflist, head);
         ASSERT(!HDR_HAS_L1HDR(head));
         kmem_cache_free(hdr_l2only_cache, head);
@@ -8392,7 +8799,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
          */
         if (BP_IS_ENCRYPTED(bp)) {
                 abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
-                   B_TRUE);
+                   ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
  
                 zio_crypt_decode_params_bp(bp, salt, iv);
                 zio_crypt_decode_mac_bp(bp, mac);
@@ -8429,7 +8836,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
         if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
             !HDR_COMPRESSION_ENABLED(hdr)) {
                 abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
-                   B_TRUE);
+                   ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
                 void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
  
                 ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -8619,16 +9026,16 @@ l2arc_sublist_lock(int list_num)
  
         switch (list_num) {
         case 0:
-               ml = arc_mfu->arcs_list[ARC_BUFC_METADATA];
+               ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
                 break;
         case 1:
-               ml = arc_mru->arcs_list[ARC_BUFC_METADATA];
+               ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
                 break;
         case 2:
-               ml = arc_mfu->arcs_list[ARC_BUFC_DATA];
+               ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
                 break;
         case 3:
-               ml = arc_mru->arcs_list[ARC_BUFC_DATA];
+               ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
                 break;
         default:
                 return (NULL);
@@ -8950,26 +9357,37 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
         }
  
         if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
-               cabd = abd_alloc_for_io(asize, ismd);
-               tmp = abd_borrow_buf(cabd, asize);
+               /*
+                * In some cases, we can wind up with size > asize, so
+                * we need to opt for the larger allocation option here.
+                *
+                * (We also need abd_return_buf_copy in all cases because
+                * it's an ASSERT() to modify the buffer before returning it
+                * with arc_return_buf(), and all the compressors
+                * write things before deciding to fail compression in nearly
+                * every case.)
+                */
+               cabd = abd_alloc_for_io(size, ismd);
+               tmp = abd_borrow_buf(cabd, size);
  
                 psize = zio_compress_data(compress, to_write, tmp, size,
                     hdr->b_complevel);
  
-               if (psize >= size) {
-                       abd_return_buf(cabd, tmp, asize);
+               if (psize >= asize) {
+                       psize = HDR_GET_PSIZE(hdr);
+                       abd_return_buf_copy(cabd, tmp, size);
                         HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
                         to_write = cabd;
-                       abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
-                       if (size != asize)
-                               abd_zero_off(to_write, size, asize - size);
+                       abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
+                       if (psize != asize)
+                               abd_zero_off(to_write, psize, asize - psize);
                         goto encrypt;
                 }
                 ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
                 if (psize < asize)
-                       bzero((char *)tmp + psize, asize - psize);
+                       memset((char *)tmp + psize, 0, asize - psize);
                 psize = HDR_GET_PSIZE(hdr);
-               abd_return_buf_copy(cabd, tmp, asize);
+               abd_return_buf_copy(cabd, tmp, size);
                 to_write = cabd;
         }
  
@@ -9002,7 +9420,7 @@ encrypt:
                         abd_zero_off(eabd, psize, asize - psize);
  
                 /* assert that the MAC we got here matches the one we saved */
-               ASSERT0(bcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN));
+               ASSERT0(memcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN));
                 spa_keystore_dsl_key_rele(spa, dck, FTAG);
  
                 if (to_write == cabd)
@@ -9035,7 +9453,7 @@ l2arc_blk_fetch_done(zio_t *zio)
  
         cb = zio->io_private;
         if (cb->l2rcb_abd != NULL)
-               abd_put(cb->l2rcb_abd);
+               abd_free(cb->l2rcb_abd);
         kmem_free(cb, sizeof (l2arc_read_callback_t));
  }
  
@@ -9060,6 +9478,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
         l2arc_write_callback_t  *cb = NULL;
         zio_t                   *pio, *wzio;
         uint64_t                guid = spa_load_guid(spa);
+       l2arc_dev_hdr_phys_t    *l2dhdr = dev->l2ad_dev_hdr;
  
         ASSERT3P(dev->l2ad_vdev, !=, NULL);
  
@@ -9072,17 +9491,17 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
         /*
          * Copy buffers for L2ARC writing.
          */
-       for (int try = 0; try < L2ARC_FEED_TYPES; try++) {
+       for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
                 /*
-                * If try == 1 or 3, we cache MRU metadata and data
+                * If pass == 1 or 3, we cache MRU metadata and data
                  * respectively.
                  */
                 if (l2arc_mfuonly) {
-                       if (try == 1 || try == 3)
+                       if (pass == 1 || pass == 3)
                                 continue;
                 }
  
-               multilist_sublist_t *mls = l2arc_sublist_lock(try);
+               multilist_sublist_t *mls = l2arc_sublist_lock(pass);
                 uint64_t passed_sz = 0;
  
                 VERIFY3P(mls, !=, NULL);
@@ -9133,12 +9552,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
                                 continue;
                         }
  
-                       /*
-                        * We rely on the L1 portion of the header below, so
-                        * it's invalid for this header to have been evicted out
-                        * of the ghost cache, prior to being written out. The
-                        * ARC_FLAG_L2_WRITING bit ensures this won't happen.
-                        */
                         ASSERT(HDR_HAS_L1HDR(hdr));
  
                         ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
@@ -9162,12 +9575,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
                          * ARC_FLAG_L2_WRITING bit ensures this won't happen.
                          */
                         arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING);
-                       ASSERT(HDR_HAS_L1HDR(hdr));
-
-                       ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
-                       ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
-                           HDR_HAS_RABD(hdr));
-                       ASSERT3U(arc_hdr_size(hdr), >, 0);
  
                         /*
                          * If this header has b_rabd, we can use this since it
@@ -9291,7 +9698,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
                  * Although we did not write any buffers l2ad_evict may
                  * have advanced.
                  */
-               l2arc_dev_hdr_update(dev);
+               if (dev->l2ad_evict != l2dhdr->dh_evict)
+                       l2arc_dev_hdr_update(dev);
  
                 return (0);
         }
@@ -9320,7 +9728,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
  static boolean_t
  l2arc_hdr_limit_reached(void)
  {
-       int64_t s = aggsum_upper_bound(&astat_l2_hdr_size);
+       int64_t s = aggsum_upper_bound(&arc_sums.arcstat_l2_hdr_size);
  
         return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) ||
             (s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100));
@@ -9330,10 +9738,10 @@ l2arc_hdr_limit_reached(void)
   * This thread feeds the L2ARC at regular intervals.  This is the beating
   * heart of the L2ARC.
   */
-/* ARGSUSED */
-static void
+static  __attribute__((noreturn)) void
  l2arc_feed_thread(void *unused)
  {
+       (void) unused;
         callb_cpr_t cpr;
         l2arc_dev_t *dev;
         spa_t *spa;
@@ -9453,6 +9861,80 @@ l2arc_vdev_get(vdev_t *vd)
         return (dev);
  }
  
+static void
+l2arc_rebuild_dev(l2arc_dev_t *dev, boolean_t reopen)
+{
+       l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
+       uint64_t l2dhdr_asize = dev->l2ad_dev_hdr_asize;
+       spa_t *spa = dev->l2ad_spa;
+
+       /*
+        * The L2ARC has to hold at least the payload of one log block for
+        * them to be restored (persistent L2ARC). The payload of a log block
+        * depends on the amount of its log entries. We always write log blocks
+        * with 1022 entries. How many of them are committed or restored depends
+        * on the size of the L2ARC device. Thus the maximum payload of
+        * one log block is 1022 * SPA_MAXBLOCKSIZE = 16GB. If the L2ARC device
+        * is less than that, we reduce the amount of committed and restored
+        * log entries per block so as to enable persistence.
+        */
+       if (dev->l2ad_end < l2arc_rebuild_blocks_min_l2size) {
+               dev->l2ad_log_entries = 0;
+       } else {
+               dev->l2ad_log_entries = MIN((dev->l2ad_end -
+                   dev->l2ad_start) >> SPA_MAXBLOCKSHIFT,
+                   L2ARC_LOG_BLK_MAX_ENTRIES);
+       }
+
+       /*
+        * Read the device header, if an error is returned do not rebuild L2ARC.
+        */
+       if (l2arc_dev_hdr_read(dev) == 0 && dev->l2ad_log_entries > 0) {
+               /*
+                * If we are onlining a cache device (vdev_reopen) that was
+                * still present (l2arc_vdev_present()) and rebuild is enabled,
+                * we should evict all ARC buffers and pointers to log blocks
+                * and reclaim their space before restoring its contents to
+                * L2ARC.
+                */
+               if (reopen) {
+                       if (!l2arc_rebuild_enabled) {
+                               return;
+                       } else {
+                               l2arc_evict(dev, 0, B_TRUE);
+                               /* start a new log block */
+                               dev->l2ad_log_ent_idx = 0;
+                               dev->l2ad_log_blk_payload_asize = 0;
+                               dev->l2ad_log_blk_payload_start = 0;
+                       }
+               }
+               /*
+                * Just mark the device as pending for a rebuild. We won't
+                * be starting a rebuild in line here as it would block pool
+                * import. Instead spa_load_impl will hand that off to an
+                * async task which will call l2arc_spa_rebuild_start.
+                */
+               dev->l2ad_rebuild = B_TRUE;
+       } else if (spa_writeable(spa)) {
+               /*
+                * In this case TRIM the whole device if l2arc_trim_ahead > 0,
+                * otherwise create a new header. We zero out the memory holding
+                * the header to reset dh_start_lbps. If we TRIM the whole
+                * device the new header will be written by
+                * vdev_trim_l2arc_thread() at the end of the TRIM to update the
+                * trim_state in the header too. When reading the header, if
+                * trim_state is not VDEV_TRIM_COMPLETE and l2arc_trim_ahead > 0
+                * we opt to TRIM the whole device again.
+                */
+               if (l2arc_trim_ahead > 0) {
+                       dev->l2ad_trim_all = B_TRUE;
+               } else {
+                       memset(l2dhdr, 0, l2dhdr_asize);
+                       l2arc_dev_hdr_update(dev);
+               }
+       }
+}
+
  /*
   * Add a vdev for use by the L2ARC.  By this point the spa has already
   * validated the vdev and opened it.
@@ -9505,6 +9987,15 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
         zfs_refcount_create(&adddev->l2ad_lb_asize);
         zfs_refcount_create(&adddev->l2ad_lb_count);
  
+       /*
+        * Decide if dev is eligible for L2ARC rebuild or whole device
+        * trimming. This has to happen before the device is added in the
+        * cache device list and l2arc_dev_mtx is released. Otherwise
+        * l2arc_feed_thread() might already start writing on the
+        * device.
+        */
+       l2arc_rebuild_dev(adddev, B_FALSE);
+
         /*
          * Add device to global list
          */
@@ -9512,92 +10003,36 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
         list_insert_head(l2arc_dev_list, adddev);
         atomic_inc_64(&l2arc_ndev);
         mutex_exit(&l2arc_dev_mtx);
-
-       /*
-        * Decide if vdev is eligible for L2ARC rebuild
-        */
-       l2arc_rebuild_vdev(adddev->l2ad_vdev, B_FALSE);
  }
  
+/*
+ * Decide if a vdev is eligible for L2ARC rebuild, called from vdev_reopen()
+ * in case of onlining a cache device.
+ */
  void
  l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
  {
         l2arc_dev_t             *dev = NULL;
-       l2arc_dev_hdr_phys_t    *l2dhdr;
-       uint64_t                l2dhdr_asize;
-       spa_t                   *spa;
  
         dev = l2arc_vdev_get(vd);
         ASSERT3P(dev, !=, NULL);
-       spa = dev->l2ad_spa;
-       l2dhdr = dev->l2ad_dev_hdr;
-       l2dhdr_asize = dev->l2ad_dev_hdr_asize;
-
-       /*
-        * The L2ARC has to hold at least the payload of one log block for
-        * them to be restored (persistent L2ARC). The payload of a log block
-        * depends on the amount of its log entries. We always write log blocks
-        * with 1022 entries. How many of them are committed or restored depends
-        * on the size of the L2ARC device. Thus the maximum payload of
-        * one log block is 1022 * SPA_MAXBLOCKSIZE = 16GB. If the L2ARC device
-        * is less than that, we reduce the amount of committed and restored
-        * log entries per block so as to enable persistence.
-        */
-       if (dev->l2ad_end < l2arc_rebuild_blocks_min_l2size) {
-               dev->l2ad_log_entries = 0;
-       } else {
-               dev->l2ad_log_entries = MIN((dev->l2ad_end -
-                   dev->l2ad_start) >> SPA_MAXBLOCKSHIFT,
-                   L2ARC_LOG_BLK_MAX_ENTRIES);
-       }
  
         /*
-        * Read the device header, if an error is returned do not rebuild L2ARC.
+        * In contrast to l2arc_add_vdev() we do not have to worry about
+        * l2arc_feed_thread() invalidating previous content when onlining a
+        * cache device. The device parameters (l2ad*) are not cleared when
+        * offlining the device and writing new buffers will not invalidate
+        * all previous content. In worst case only buffers that have not had
+        * their log block written to the device will be lost.
+        * When onlining the cache device (ie offline->online without exporting
+        * the pool in between) this happens:
+        * vdev_reopen() -> vdev_open() -> l2arc_rebuild_vdev()
+        *                      |                       |
+        *              vdev_is_dead() = B_FALSE        l2ad_rebuild = B_TRUE
+        * During the time where vdev_is_dead = B_FALSE and until l2ad_rebuild
+        * is set to B_TRUE we might write additional buffers to the device.
          */
-       if (l2arc_dev_hdr_read(dev) == 0 && dev->l2ad_log_entries > 0) {
-               /*
-                * If we are onlining a cache device (vdev_reopen) that was
-                * still present (l2arc_vdev_present()) and rebuild is enabled,
-                * we should evict all ARC buffers and pointers to log blocks
-                * and reclaim their space before restoring its contents to
-                * L2ARC.
-                */
-               if (reopen) {
-                       if (!l2arc_rebuild_enabled) {
-                               return;
-                       } else {
-                               l2arc_evict(dev, 0, B_TRUE);
-                               /* start a new log block */
-                               dev->l2ad_log_ent_idx = 0;
-                               dev->l2ad_log_blk_payload_asize = 0;
-                               dev->l2ad_log_blk_payload_start = 0;
-                       }
-               }
-               /*
-                * Just mark the device as pending for a rebuild. We won't
-                * be starting a rebuild in line here as it would block pool
-                * import. Instead spa_load_impl will hand that off to an
-                * async task which will call l2arc_spa_rebuild_start.
-                */
-               dev->l2ad_rebuild = B_TRUE;
-       } else if (spa_writeable(spa)) {
-               /*
-                * In this case TRIM the whole device if l2arc_trim_ahead > 0,
-                * otherwise create a new header. We zero out the memory holding
-                * the header to reset dh_start_lbps. If we TRIM the whole
-                * device the new header will be written by
-                * vdev_trim_l2arc_thread() at the end of the TRIM to update the
-                * trim_state in the header too. When reading the header, if
-                * trim_state is not VDEV_TRIM_COMPLETE and l2arc_trim_ahead > 0
-                * we opt to TRIM the whole device again.
-                */
-               if (l2arc_trim_ahead > 0) {
-                       dev->l2ad_trim_all = B_TRUE;
-               } else {
-                       bzero(l2dhdr, l2dhdr_asize);
-                       l2arc_dev_hdr_update(dev);
-               }
-       }
+       l2arc_rebuild_dev(dev, reopen);
  }
  
  /*
@@ -9654,8 +10089,6 @@ l2arc_init(void)
  {
         l2arc_thread_exit = 0;
         l2arc_ndev = 0;
-       l2arc_writes_sent = 0;
-       l2arc_writes_done = 0;
  
         mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
         cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
@@ -9744,7 +10177,7 @@ l2arc_spa_rebuild_start(spa_t *spa)
  /*
   * Main entry point for L2ARC rebuilding.
   */
-static void
+static __attribute__((noreturn)) void
  l2arc_dev_rebuild_thread(void *arg)
  {
         l2arc_dev_t *dev = arg;
@@ -9817,7 +10250,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
                 goto out;
  
         /* Prepare the rebuild process */
-       bcopy(l2dhdr->dh_start_lbps, lbps, sizeof (lbps));
+       memcpy(lbps, l2dhdr->dh_start_lbps, sizeof (lbps));
  
         /* Start the rebuild process */
         for (;;) {
@@ -9863,7 +10296,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
                 lb_ptr_buf = kmem_zalloc(sizeof (l2arc_lb_ptr_buf_t), KM_SLEEP);
                 lb_ptr_buf->lb_ptr = kmem_zalloc(sizeof (l2arc_log_blkptr_t),
                     KM_SLEEP);
-               bcopy(&lbps[0], lb_ptr_buf->lb_ptr,
+               memcpy(lb_ptr_buf->lb_ptr, &lbps[0],
                     sizeof (l2arc_log_blkptr_t));
                 mutex_enter(&dev->l2ad_mtx);
                 list_insert_tail(&dev->l2ad_lbptr_list, lb_ptr_buf);
@@ -9901,7 +10334,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
                     !dev->l2ad_first)
                         goto out;
  
-               cond_resched();
+               kpreempt(KPREEMPT_SYNC);
                 for (;;) {
                         mutex_enter(&l2arc_rebuild_thr_lock);
                         if (dev->l2ad_rebuild_cancel) {
@@ -9961,7 +10394,7 @@ out:
                  */
                 spa_history_log_internal(spa, "L2ARC rebuild", NULL,
                     "no valid log blocks");
-               bzero(l2dhdr, dev->l2ad_dev_hdr_asize);
+               memset(l2dhdr, 0, dev->l2ad_dev_hdr_asize);
                 l2arc_dev_hdr_update(dev);
         } else if (err == ECANCELED) {
                 /*
@@ -9969,7 +10402,7 @@ out:
                  * log as the pool may be in the process of being removed.
                  */
                 zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks",
-                   zfs_refcount_count(&dev->l2ad_lb_count));
+                   (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
         } else if (err != 0) {
                 spa_history_log_internal(spa, "L2ARC rebuild", NULL,
                     "aborted, restored %llu blocks",
@@ -10007,12 +10440,13 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
             ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
             ZIO_FLAG_SPECULATIVE, B_FALSE));
  
-       abd_put(abd);
+       abd_free(abd);
  
         if (err != 0) {
                 ARCSTAT_BUMP(arcstat_l2_rebuild_abort_dh_errors);
                 zfs_dbgmsg("L2ARC IO error (%d) while reading device header, "
-                   "vdev guid: %llu", err, dev->l2ad_vdev->vdev_guid);
+                   "vdev guid: %llu", err,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid);
                 return (err);
         }
  
@@ -10109,8 +10543,9 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
         if ((err = zio_wait(this_io)) != 0) {
                 ARCSTAT_BUMP(arcstat_l2_rebuild_abort_io_errors);
                 zfs_dbgmsg("L2ARC IO error (%d) while reading log block, "
-                   "offset: %llu, vdev guid: %llu", err, this_lbp->lbp_daddr,
-                   dev->l2ad_vdev->vdev_guid);
+                   "offset: %llu, vdev guid: %llu", err,
+                   (u_longlong_t)this_lbp->lbp_daddr,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid);
                 goto cleanup;
         }
  
@@ -10124,8 +10559,10 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
                 ARCSTAT_BUMP(arcstat_l2_rebuild_abort_cksum_lb_errors);
                 zfs_dbgmsg("L2ARC log block cksum failed, offset: %llu, "
                     "vdev guid: %llu, l2ad_hand: %llu, l2ad_evict: %llu",
-                   this_lbp->lbp_daddr, dev->l2ad_vdev->vdev_guid,
-                   dev->l2ad_hand, dev->l2ad_evict);
+                   (u_longlong_t)this_lbp->lbp_daddr,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid,
+                   (u_longlong_t)dev->l2ad_hand,
+                   (u_longlong_t)dev->l2ad_evict);
                 err = SET_ERROR(ECKSUM);
                 goto cleanup;
         }
@@ -10375,11 +10812,12 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
             VDEV_LABEL_START_SIZE, l2dhdr_asize, abd, ZIO_CHECKSUM_LABEL, NULL,
             NULL, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE));
  
-       abd_put(abd);
+       abd_free(abd);
  
         if (err != 0) {
                 zfs_dbgmsg("L2ARC IO error (%d) while writing device header, "
-                   "vdev guid: %llu", err, dev->l2ad_vdev->vdev_guid);
+                   "vdev guid: %llu", err,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid);
         }
  }
  
@@ -10438,7 +10876,6 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
             dev->l2ad_log_blk_payload_asize;
         l2dhdr->dh_start_lbps[0].lbp_payload_start =
             dev->l2ad_log_blk_payload_start;
-       _NOTE(CONSTCOND)
         L2BLK_SET_LSIZE(
             (&l2dhdr->dh_start_lbps[0])->lbp_prop, sizeof (*lb));
         L2BLK_SET_PSIZE(
@@ -10448,13 +10885,13 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
             ZIO_CHECKSUM_FLETCHER_4);
         if (asize < sizeof (*lb)) {
                 /* compression succeeded */
-               bzero(tmpbuf + psize, asize - psize);
+               memset(tmpbuf + psize, 0, asize - psize);
                 L2BLK_SET_COMPRESS(
                     (&l2dhdr->dh_start_lbps[0])->lbp_prop,
                     ZIO_COMPRESS_LZ4);
         } else {
                 /* compression failed */
-               bcopy(lb, tmpbuf, sizeof (*lb));
+               memcpy(tmpbuf, lb, sizeof (*lb));
                 L2BLK_SET_COMPRESS(
                     (&l2dhdr->dh_start_lbps[0])->lbp_prop,
                     ZIO_COMPRESS_OFF);
@@ -10464,7 +10901,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
         fletcher_4_native(tmpbuf, asize, NULL,
             &l2dhdr->dh_start_lbps[0].lbp_cksum);
  
-       abd_put(abd_buf->abd);
+       abd_free(abd_buf->abd);
  
         /* perform the write itself */
         abd_buf->abd = abd_get_from_buf(tmpbuf, sizeof (*lb));
@@ -10480,7 +10917,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
          * Include the committed log block's pointer  in the list of pointers
          * to log blocks present in the L2ARC device.
          */
-       bcopy(&l2dhdr->dh_start_lbps[0], lb_ptr_buf->lb_ptr,
+       memcpy(lb_ptr_buf->lb_ptr, &l2dhdr->dh_start_lbps[0],
             sizeof (l2arc_log_blkptr_t));
         mutex_enter(&dev->l2ad_mtx);
         list_insert_head(&dev->l2ad_lbptr_list, lb_ptr_buf);
@@ -10569,7 +11006,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
         ASSERT(HDR_HAS_L2HDR(hdr));
  
         le = &lb->lb_entries[index];
-       bzero(le, sizeof (*le));
+       memset(le, 0, sizeof (*le));
         le->le_dva = hdr->b_dva;
         le->le_birth = hdr->b_birth;
         le->le_daddr = hdr->b_l2hdr.b_daddr;
@@ -10638,58 +11075,57 @@ EXPORT_SYMBOL(arc_getbuf_func);
  EXPORT_SYMBOL(arc_add_prune_callback);
  EXPORT_SYMBOL(arc_remove_prune_callback);
  
-/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Min arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min,
+       param_get_ulong, ZMOD_RW, "Minimum ARC size in bytes");
  
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Max arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max,
+       param_get_ulong, ZMOD_RW, "Maximum ARC size in bytes");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Metadata limit for arc size");
+       param_get_ulong, ZMOD_RW, "Metadata limit for ARC size in bytes");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent,
-       param_set_arc_long, param_get_long, ZMOD_RW,
-       "Percent of arc size for arc meta limit");
+    param_set_arc_long, param_get_ulong, ZMOD_RW,
+       "Percent of ARC size for ARC meta limit");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Min arc metadata");
+       param_get_ulong, ZMOD_RW, "Minimum ARC metadata size in bytes");
  
  ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW,
         "Meta objects to scan for prune");
  
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_adjust_restarts, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_adjust_restarts, UINT, ZMOD_RW,
         "Limit number of restarts in arc_evict_meta");
  
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_strategy, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_strategy, UINT, ZMOD_RW,
         "Meta reclaim strategy");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int,
-       param_get_int, ZMOD_RW, "Seconds before growing arc size");
+       param_get_uint, ZMOD_RW, "Seconds before growing ARC size");
  
  ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, p_dampener_disable, INT, ZMOD_RW,
         "Disable arc_p adapt dampener");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int,
-       param_get_int, ZMOD_RW, "log2(fraction of arc to reclaim)");
+       param_get_uint, ZMOD_RW, "log2(fraction of ARC to reclaim)");
  
  ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW,
-       "Percent of pagecache to reclaim arc to");
+       "Percent of pagecache to reclaim ARC to");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, p_min_shift, param_set_arc_int,
-       param_get_int, ZMOD_RW, "arc_c shift to calc min/max arc_p");
+       param_get_uint, ZMOD_RW, "arc_c shift to calc min/max arc_p");
  
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, INT, ZMOD_RD,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, UINT, ZMOD_RD,
         "Target average block size");
  
  ZFS_MODULE_PARAM(zfs, zfs_, compressed_arc_enabled, INT, ZMOD_RW,
-       "Disable compressed arc buffers");
+       "Disable compressed ARC buffers");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prefetch_ms, param_set_arc_int,
-       param_get_int, ZMOD_RW, "Min life of prefetch block in ms");
+       param_get_uint, ZMOD_RW, "Min life of prefetch block in ms");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prescient_prefetch_ms,
-       param_set_arc_int, param_get_int, ZMOD_RW,
+    param_set_arc_int, param_get_uint, ZMOD_RW,
         "Min life of prescient prefetched block in ms");
  
  ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, ULONG, ZMOD_RW,
@@ -10722,7 +11158,7 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_again, INT, ZMOD_RW,
  ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, norw, INT, ZMOD_RW,
         "No reads during writes");
  
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, UINT, ZMOD_RW,
         "Percent of ARC size allowed for L2ARC-only headers");
  
  ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW,
@@ -10734,25 +11170,30 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
  ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
         "Cache only MFU data from ARC into L2ARC");
  
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
+       "Exclude dbufs on special vdevs from being cached to L2ARC if set.");
+
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
-       param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
+       param_get_uint, ZMOD_RW, "System free memory I/O throttle in bytes");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long,
-       param_get_long, ZMOD_RW, "System free memory target size in bytes");
+       param_get_ulong, ZMOD_RW, "System free memory target size in bytes");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Minimum bytes of dnodes in arc");
+       param_get_ulong, ZMOD_RW, "Minimum bytes of dnodes in ARC");
  
  ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent,
-       param_set_arc_long, param_get_long, ZMOD_RW,
+    param_set_arc_long, param_get_ulong, ZMOD_RW,
         "Percent of ARC meta buffers for dnodes");
  
  ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW,
         "Percentage of excess dnodes to try to unpin");
  
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW,
         "When full, ARC allocation waits for eviction of this % of alloc size");
  
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, UINT, ZMOD_RW,
         "The number of headers to evict per sublist before moving to the next");
-/* END CSTYLED */
+
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, prune_task_threads, INT, ZMOD_RW,
+       "Number of arc_prune threads");