]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/arc.c
Cleanup: Specify unsignedness on things that should not be signed
[mirror_zfs.git] / module / zfs / arc.c
index efc6bb138dc88e69c5837942b95fb8c2b6e247e3..33865f715b0f7850a430d2e07cc83faa44201262 100644 (file)
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * since the physical block is about to be rewritten. The new data contents
  * will be contained in the arc_buf_t. As the I/O pipeline performs the write,
  * it may compress the data before writing it to disk. The ARC will be called
- * with the transformed data and will bcopy the transformed on-disk block into
+ * with the transformed data and will memcpy the transformed on-disk block into
  * a newly allocated b_pabd. Writes are always done into buffers which have
  * either been loaned (and hence are new and don't have other readers) or
  * buffers which have been released (and hence have their own hdr, if there
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/dsl_pool.h>
-#include <sys/zio_checksum.h>
 #include <sys/multilist.h>
 #include <sys/abd.h>
 #include <sys/zil.h>
 #include <sys/arc_impl.h>
 #include <sys/trace_zfs.h>
 #include <sys/aggsum.h>
+#include <sys/wmsum.h>
 #include <cityhash.h>
 #include <sys/vdev_trim.h>
+#include <sys/zfs_racct.h>
 #include <sys/zstd/zstd.h>
 
 #ifndef _KERNEL
@@ -327,6 +328,8 @@ static zthr_t *arc_reap_zthr;
  * arc_evict(), which improves arc_is_overflowing().
  */
 static zthr_t *arc_evict_zthr;
+static arc_buf_hdr_t **arc_state_evict_markers;
+static int arc_state_evict_marker_count;
 
 static kmutex_t arc_evict_lock;
 static boolean_t arc_evict_needed = B_FALSE;
@@ -351,7 +354,7 @@ static list_t arc_evict_waiters;
  * can still happen, even during the potentially long time that arc_size is
  * more than arc_c.
  */
-int zfs_arc_eviction_pct = 200;
+static uint_t zfs_arc_eviction_pct = 200;
 
 /*
  * The number of headers to evict in arc_evict_state_impl() before
@@ -360,24 +363,24 @@ int zfs_arc_eviction_pct = 200;
  * oldest header in the arc state), but comes with higher overhead
  * (i.e. more invocations of arc_evict_state_impl()).
  */
-int zfs_arc_evict_batch_limit = 10;
+static uint_t zfs_arc_evict_batch_limit = 10;
 
 /* number of seconds before growing cache again */
-int arc_grow_retry = 5;
+uint_t arc_grow_retry = 5;
 
 /*
  * Minimum time between calls to arc_kmem_reap_soon().
  */
-int arc_kmem_cache_reap_retry_ms = 1000;
+static const int arc_kmem_cache_reap_retry_ms = 1000;
 
 /* shift of arc_c for calculating overflow limit in arc_get_data_impl */
-int zfs_arc_overflow_shift = 8;
+static int zfs_arc_overflow_shift = 8;
 
 /* shift of arc_c for calculating both min and max arc_p */
-int arc_p_min_shift = 4;
+static uint_t arc_p_min_shift = 4;
 
 /* log2(fraction of arc to reclaim) */
-int arc_shrink_shift = 7;
+uint_t arc_shrink_shift = 7;
 
 /* percent of pagecache to reclaim arc to */
 #ifdef _KERNEL
@@ -393,20 +396,20 @@ uint_t zfs_arc_pc_percent = 0;
  * This must be less than arc_shrink_shift, so that when we shrink the ARC,
  * we will still not allow it to grow.
  */
-int                    arc_no_grow_shift = 5;
+uint_t         arc_no_grow_shift = 5;
 
 
 /*
  * minimum lifespan of a prefetch block in clock ticks
  * (initialized in arc_init())
  */
-static int             arc_min_prefetch_ms;
-static int             arc_min_prescient_prefetch_ms;
+static uint_t          arc_min_prefetch_ms;
+static uint_t          arc_min_prescient_prefetch_ms;
 
 /*
  * If this percent of memory is free, don't throttle.
  */
-int arc_lotsfree_percent = 10;
+uint_t arc_lotsfree_percent = 10;
 
 /*
  * The arc has filled available memory and has now warmed up.
@@ -420,19 +423,22 @@ unsigned long zfs_arc_max = 0;
 unsigned long zfs_arc_min = 0;
 unsigned long zfs_arc_meta_limit = 0;
 unsigned long zfs_arc_meta_min = 0;
-unsigned long zfs_arc_dnode_limit = 0;
-unsigned long zfs_arc_dnode_reduce_percent = 10;
-int zfs_arc_grow_retry = 0;
-int zfs_arc_shrink_shift = 0;
-int zfs_arc_p_min_shift = 0;
-int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
+static unsigned long zfs_arc_dnode_limit = 0;
+static unsigned long zfs_arc_dnode_reduce_percent = 10;
+static uint_t zfs_arc_grow_retry = 0;
+static uint_t zfs_arc_shrink_shift = 0;
+static uint_t zfs_arc_p_min_shift = 0;
+uint_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
 
 /*
- * ARC dirty data constraints for arc_tempreserve_space() throttle.
+ * ARC dirty data constraints for arc_tempreserve_space() throttle:
+ * * total dirty data limit
+ * * anon block dirty limit
+ * * each pool's anon allowance
  */
-unsigned long zfs_arc_dirty_limit_percent = 50;        /* total dirty data limit */
-unsigned long zfs_arc_anon_limit_percent = 25; /* anon block dirty limit */
-unsigned long zfs_arc_pool_dirty_percent = 20; /* each pool's anon allowance */
+static const unsigned long zfs_arc_dirty_limit_percent = 50;
+static const unsigned long zfs_arc_anon_limit_percent = 25;
+static const unsigned long zfs_arc_pool_dirty_percent = 20;
 
 /*
  * Enable or disable compressed arc buffers.
@@ -443,24 +449,29 @@ int zfs_compressed_arc_enabled = B_TRUE;
  * ARC will evict meta buffers that exceed arc_meta_limit. This
  * tunable make arc_meta_limit adjustable for different workloads.
  */
-unsigned long zfs_arc_meta_limit_percent = 75;
+static unsigned long zfs_arc_meta_limit_percent = 75;
 
 /*
  * Percentage that can be consumed by dnodes of ARC meta buffers.
  */
-unsigned long zfs_arc_dnode_limit_percent = 10;
+static unsigned long zfs_arc_dnode_limit_percent = 10;
+
+/*
+ * These tunables are Linux-specific
+ */
+static unsigned long zfs_arc_sys_free = 0;
+static uint_t zfs_arc_min_prefetch_ms = 0;
+static uint_t zfs_arc_min_prescient_prefetch_ms = 0;
+static int zfs_arc_p_dampener_disable = 1;
+static uint_t zfs_arc_meta_prune = 10000;
+static uint_t zfs_arc_meta_strategy = ARC_STRATEGY_META_BALANCED;
+static uint_t zfs_arc_meta_adjust_restarts = 4096;
+static uint_t zfs_arc_lotsfree_percent = 10;
 
 /*
- * These tunables are Linux specific
+ * Number of arc_prune threads
  */
-unsigned long zfs_arc_sys_free = 0;
-int zfs_arc_min_prefetch_ms = 0;
-int zfs_arc_min_prescient_prefetch_ms = 0;
-int zfs_arc_p_dampener_disable = 1;
-int zfs_arc_meta_prune = 10000;
-int zfs_arc_meta_strategy = ARC_STRATEGY_META_BALANCED;
-int zfs_arc_meta_adjust_restarts = 4096;
-int zfs_arc_lotsfree_percent = 10;
+static int zfs_arc_prune_task_threads = 1;
 
 /* The 6 states: */
 arc_state_t ARC_anon;
@@ -599,6 +610,8 @@ arc_stats_t arc_stats = {
        { "abd_chunk_waste_size",       KSTAT_DATA_UINT64 },
 };
 
+arc_sums_t arc_sums;
+
 #define        ARCSTAT_MAX(stat, val) {                                        \
        uint64_t m;                                                     \
        while ((val) > (m = arc_stats.stat.value.ui64) &&               \
@@ -606,9 +619,6 @@ arc_stats_t arc_stats = {
                continue;                                               \
 }
 
-#define        ARCSTAT_MAXSTAT(stat) \
-       ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
-
 /*
  * We define a macro to allow ARC hits/misses to be easily broken down by
  * two separate conditions, giving a total of four different subtypes for
@@ -644,17 +654,9 @@ arc_stats_t arc_stats = {
                x = x - x / ARCSTAT_F_AVG_FACTOR + \
                    (value) / ARCSTAT_F_AVG_FACTOR; \
                ARCSTAT(stat) = x; \
-               _NOTE(CONSTCOND) \
        } while (0)
 
-kstat_t                        *arc_ksp;
-static arc_state_t     *arc_anon;
-static arc_state_t     *arc_mru_ghost;
-static arc_state_t     *arc_mfu_ghost;
-static arc_state_t     *arc_l2c_only;
-
-arc_state_t    *arc_mru;
-arc_state_t    *arc_mfu;
+static kstat_t                 *arc_ksp;
 
 /*
  * There are several ARC variables that are critical to export as kstats --
@@ -670,37 +672,8 @@ arc_state_t        *arc_mfu;
 /* max size for dnodes */
 #define        arc_dnode_size_limit    ARCSTAT(arcstat_dnode_limit)
 #define        arc_meta_min    ARCSTAT(arcstat_meta_min) /* min size for metadata */
-#define        arc_meta_max    ARCSTAT(arcstat_meta_max) /* max size of metadata */
 #define        arc_need_free   ARCSTAT(arcstat_need_free) /* waiting to be evicted */
 
-/* size of all b_rabd's in entire arc */
-#define        arc_raw_size    ARCSTAT(arcstat_raw_size)
-/* compressed size of entire arc */
-#define        arc_compressed_size     ARCSTAT(arcstat_compressed_size)
-/* uncompressed size of entire arc */
-#define        arc_uncompressed_size   ARCSTAT(arcstat_uncompressed_size)
-/* number of bytes in the arc from arc_buf_t's */
-#define        arc_overhead_size       ARCSTAT(arcstat_overhead_size)
-
-/*
- * There are also some ARC variables that we want to export, but that are
- * updated so often that having the canonical representation be the statistic
- * variable causes a performance bottleneck. We want to use aggsum_t's for these
- * instead, but still be able to export the kstat in the same way as before.
- * The solution is to always use the aggsum version, except in the kstat update
- * callback.
- */
-aggsum_t arc_size;
-aggsum_t arc_meta_used;
-aggsum_t astat_data_size;
-aggsum_t astat_metadata_size;
-aggsum_t astat_dbuf_size;
-aggsum_t astat_dnode_size;
-aggsum_t astat_bonus_size;
-aggsum_t astat_hdr_size;
-aggsum_t astat_l2_hdr_size;
-aggsum_t astat_abd_chunk_waste_size;
-
 hrtime_t arc_growtime;
 list_t arc_prune_list;
 kmutex_t arc_prune_mtx;
@@ -769,29 +742,18 @@ taskq_t *arc_prune_taskq;
  * Hash table routines
  */
 
-#define        HT_LOCK_ALIGN   64
-#define        HT_LOCK_PAD     (P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
-
-struct ht_lock {
-       kmutex_t        ht_lock;
-#ifdef _KERNEL
-       unsigned char   pad[HT_LOCK_PAD];
-#endif
-};
-
-#define        BUF_LOCKS 8192
+#define        BUF_LOCKS 2048
 typedef struct buf_hash_table {
        uint64_t ht_mask;
        arc_buf_hdr_t **ht_table;
-       struct ht_lock ht_locks[BUF_LOCKS];
+       kmutex_t ht_locks[BUF_LOCKS] ____cacheline_aligned;
 } buf_hash_table_t;
 
 static buf_hash_table_t buf_hash_table;
 
 #define        BUF_HASH_INDEX(spa, dva, birth) \
        (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
-#define        BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
-#define        BUF_HASH_LOCK(idx)      (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
+#define        BUF_HASH_LOCK(idx)      (&buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
 #define        HDR_LOCK(hdr) \
        (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
 
@@ -818,9 +780,6 @@ uint64_t zfs_crc64_table[256];
  */
 #define        L2ARC_FEED_TYPES        4
 
-#define        l2arc_writes_sent       ARCSTAT(arcstat_l2_writes_sent)
-#define        l2arc_writes_done       ARCSTAT(arcstat_l2_writes_done)
-
 /* L2ARC Performance Tunables */
 unsigned long l2arc_write_max = L2ARC_WRITE_SIZE;      /* def max write size */
 unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE;    /* extra warmup write */
@@ -831,7 +790,7 @@ unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;        /* min interval msecs */
 int l2arc_noprefetch = B_TRUE;                 /* don't cache prefetch bufs */
 int l2arc_feed_again = B_TRUE;                 /* turbo warmup */
 int l2arc_norw = B_FALSE;                      /* no reads during writes */
-int l2arc_meta_percent = 33;                   /* limit on headers size */
+static uint_t l2arc_meta_percent = 33; /* limit on headers size */
 
 /*
  * L2ARC Internals
@@ -869,6 +828,12 @@ typedef enum arc_fill_flags {
        ARC_FILL_IN_PLACE       = 1 << 4  /* fill in place (special case) */
 } arc_fill_flags_t;
 
+typedef enum arc_ovf_level {
+       ARC_OVF_NONE,                   /* ARC within target size. */
+       ARC_OVF_SOME,                   /* ARC is slightly overflowed. */
+       ARC_OVF_SEVERE                  /* ARC is severely overflowed. */
+} arc_ovf_level_t;
+
 static kmutex_t l2arc_feed_thr_lock;
 static kcondvar_t l2arc_feed_thr_cv;
 static uint8_t l2arc_thread_exit;
@@ -879,15 +844,17 @@ static kcondvar_t l2arc_rebuild_thr_cv;
 enum arc_hdr_alloc_flags {
        ARC_HDR_ALLOC_RDATA = 0x1,
        ARC_HDR_DO_ADAPT = 0x2,
+       ARC_HDR_USE_RESERVE = 0x4,
 };
 
 
-static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
-static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
-static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
-static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
-static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
-static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
+static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, const void *, int);
+static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, const void *);
+static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, const void *, int);
+static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, const void *);
+static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, const void *);
+static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size,
+    const void *tag);
 static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t);
 static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int);
 static void arc_access(arc_buf_hdr_t *, kmutex_t *);
@@ -913,11 +880,19 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
 #define        l2arc_hdr_arcstats_decrement_state(hdr) \
        l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
 
+/*
+ * l2arc_exclude_special : A zfs module parameter that controls whether buffers
+ *             present on special vdevs are eligibile for caching in L2ARC. If
+ *             set to 1, exclude dbufs on special vdevs from being cached to
+ *             L2ARC.
+ */
+int l2arc_exclude_special = 0;
+
 /*
  * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
  *             metadata and data are cached from ARC into L2ARC.
  */
-int l2arc_mfuonly = 0;
+static int l2arc_mfuonly = 0;
 
 /*
  * L2ARC TRIM
@@ -934,7 +909,7 @@ int l2arc_mfuonly = 0;
  *             will vary depending of how well the specific device handles
  *             these commands.
  */
-unsigned long l2arc_trim_ahead = 0;
+static unsigned long l2arc_trim_ahead = 0;
 
 /*
  * Performance tuning of L2ARC persistence:
@@ -949,12 +924,12 @@ unsigned long l2arc_trim_ahead = 0;
  *             data. In this case do not write log blocks in L2ARC in order
  *             not to waste space.
  */
-int l2arc_rebuild_enabled = B_TRUE;
-unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
+static int l2arc_rebuild_enabled = B_TRUE;
+static unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
 
 /* L2ARC persistence rebuild control routines. */
 void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
-static void l2arc_dev_rebuild_thread(void *arg);
+static __attribute__((noreturn)) void l2arc_dev_rebuild_thread(void *arg);
 static int l2arc_rebuild(l2arc_dev_t *dev);
 
 /* L2ARC persistence read I/O routines. */
@@ -1084,9 +1059,9 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
 
                ARCSTAT_MAX(arcstat_hash_chain_max, i);
        }
-
-       ARCSTAT_BUMP(arcstat_hash_elements);
-       ARCSTAT_MAXSTAT(arcstat_hash_elements);
+       uint64_t he = atomic_inc_64_nv(
+           &arc_stats.arcstat_hash_elements.value.ui64);
+       ARCSTAT_MAX(arcstat_hash_elements_max, he);
 
        return (NULL);
 }
@@ -1110,7 +1085,7 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
        arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
 
        /* collect some hash table performance data */
-       ARCSTAT_BUMPDOWN(arcstat_hash_elements);
+       atomic_dec_64(&arc_stats.arcstat_hash_elements.value.ui64);
 
        if (buf_hash_table.ht_table[idx] &&
            buf_hash_table.ht_table[idx]->b_hash_next == NULL)
@@ -1129,8 +1104,6 @@ static kmem_cache_t *buf_cache;
 static void
 buf_fini(void)
 {
-       int i;
-
 #if defined(_KERNEL)
        /*
         * Large allocations which do not require contiguous pages
@@ -1142,8 +1115,8 @@ buf_fini(void)
        kmem_free(buf_hash_table.ht_table,
            (buf_hash_table.ht_mask + 1) * sizeof (void *));
 #endif
-       for (i = 0; i < BUF_LOCKS; i++)
-               mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
+       for (int i = 0; i < BUF_LOCKS; i++)
+               mutex_destroy(BUF_HASH_LOCK(i));
        kmem_cache_destroy(hdr_full_cache);
        kmem_cache_destroy(hdr_full_crypt_cache);
        kmem_cache_destroy(hdr_l2only_cache);
@@ -1154,13 +1127,13 @@ buf_fini(void)
  * Constructor callback - called when the cache is empty
  * and a new buf is requested.
  */
-/* ARGSUSED */
 static int
 hdr_full_cons(void *vbuf, void *unused, int kmflag)
 {
+       (void) unused, (void) kmflag;
        arc_buf_hdr_t *hdr = vbuf;
 
-       bzero(hdr, HDR_FULL_SIZE);
+       memset(hdr, 0, HDR_FULL_SIZE);
        hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
        cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
        zfs_refcount_create(&hdr->b_l1hdr.b_refcnt);
@@ -1173,38 +1146,38 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
        return (0);
 }
 
-/* ARGSUSED */
 static int
 hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
 {
+       (void) unused;
        arc_buf_hdr_t *hdr = vbuf;
 
        hdr_full_cons(vbuf, unused, kmflag);
-       bzero(&hdr->b_crypt_hdr, sizeof (hdr->b_crypt_hdr));
+       memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr));
        arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
 
        return (0);
 }
 
-/* ARGSUSED */
 static int
 hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
 {
+       (void) unused, (void) kmflag;
        arc_buf_hdr_t *hdr = vbuf;
 
-       bzero(hdr, HDR_L2ONLY_SIZE);
+       memset(hdr, 0, HDR_L2ONLY_SIZE);
        arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
 
        return (0);
 }
 
-/* ARGSUSED */
 static int
 buf_cons(void *vbuf, void *unused, int kmflag)
 {
+       (void) unused, (void) kmflag;
        arc_buf_t *buf = vbuf;
 
-       bzero(buf, sizeof (arc_buf_t));
+       memset(buf, 0, sizeof (arc_buf_t));
        mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL);
        arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS);
 
@@ -1215,10 +1188,10 @@ buf_cons(void *vbuf, void *unused, int kmflag)
  * Destructor callback - called when a cached buf is
  * no longer required.
  */
-/* ARGSUSED */
 static void
 hdr_full_dest(void *vbuf, void *unused)
 {
+       (void) unused;
        arc_buf_hdr_t *hdr = vbuf;
 
        ASSERT(HDR_EMPTY(hdr));
@@ -1229,30 +1202,30 @@ hdr_full_dest(void *vbuf, void *unused)
        arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
 }
 
-/* ARGSUSED */
 static void
 hdr_full_crypt_dest(void *vbuf, void *unused)
 {
-       arc_buf_hdr_t *hdr = vbuf;
+       (void) vbuf, (void) unused;
 
        hdr_full_dest(vbuf, unused);
-       arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
+       arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr),
+           ARC_SPACE_HDRS);
 }
 
-/* ARGSUSED */
 static void
 hdr_l2only_dest(void *vbuf, void *unused)
 {
-       arc_buf_hdr_t *hdr __maybe_unused = vbuf;
+       (void) unused;
+       arc_buf_hdr_t *hdr = vbuf;
 
        ASSERT(HDR_EMPTY(hdr));
        arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
 }
 
-/* ARGSUSED */
 static void
 buf_dest(void *vbuf, void *unused)
 {
+       (void) unused;
        arc_buf_t *buf = vbuf;
 
        mutex_destroy(&buf->b_evict_lock);
@@ -1308,10 +1281,8 @@ retry:
                for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
                        *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
 
-       for (i = 0; i < BUF_LOCKS; i++) {
-               mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
-                   NULL, MUTEX_DEFAULT, NULL);
-       }
+       for (i = 0; i < BUF_LOCKS; i++)
+               mutex_init(BUF_HASH_LOCK(i), NULL, MUTEX_DEFAULT, NULL);
 }
 
 #define        ARC_MINTIME     (hz>>4) /* 62 ms */
@@ -1362,9 +1333,9 @@ arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
 
        ASSERT(HDR_PROTECTED(hdr));
 
-       bcopy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
-       bcopy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
-       bcopy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
+       memcpy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+       memcpy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+       memcpy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
        *byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
            ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
 }
@@ -1550,11 +1521,11 @@ arc_cksum_compute(arc_buf_t *buf)
 void
 arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
 {
+       (void) sig, (void) unused;
        panic("Got SIGSEGV at address: 0x%lx\n", (long)si->si_addr);
 }
 #endif
 
-/* ARGSUSED */
 static void
 arc_buf_unwatch(arc_buf_t *buf)
 {
@@ -1563,10 +1534,11 @@ arc_buf_unwatch(arc_buf_t *buf)
                ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
                    PROT_READ | PROT_WRITE));
        }
+#else
+       (void) buf;
 #endif
 }
 
-/* ARGSUSED */
 static void
 arc_buf_watch(arc_buf_t *buf)
 {
@@ -1574,6 +1546,8 @@ arc_buf_watch(arc_buf_t *buf)
        if (arc_watch)
                ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
                    PROT_READ));
+#else
+       (void) buf;
 #endif
 }
 
@@ -1719,7 +1693,7 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
                }
 
                if (!ARC_BUF_COMPRESSED(from)) {
-                       bcopy(from->b_data, buf->b_data, arc_buf_size(buf));
+                       memcpy(buf->b_data, from->b_data, arc_buf_size(buf));
                        copied = B_TRUE;
                        break;
                }
@@ -1901,7 +1875,8 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
                 * and then loan a buffer from it, rather than allocating a
                 * linear buffer and wrapping it in an abd later.
                 */
-               cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE);
+               cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
+                   ARC_HDR_DO_ADAPT);
                tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
 
                ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -1984,7 +1959,7 @@ error:
  * arc_buf_fill().
  */
 static void
-arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)
+arc_buf_untransform_in_place(arc_buf_t *buf)
 {
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
@@ -2088,7 +2063,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
 
                        if (hash_lock != NULL)
                                mutex_enter(hash_lock);
-                       arc_buf_untransform_in_place(buf, hash_lock);
+                       arc_buf_untransform_in_place(buf);
                        if (hash_lock != NULL)
                                mutex_exit(hash_lock);
 
@@ -2107,7 +2082,6 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
        } else {
                ASSERT(hdr_compressed);
                ASSERT(!compressed);
-               ASSERT3U(HDR_GET_LSIZE(hdr), !=, HDR_GET_PSIZE(hdr));
 
                /*
                 * If the buf is sharing its data with the hdr, unlink it and
@@ -2242,7 +2216,6 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
                return;
        }
 
-       ASSERT(!GHOST_STATE(state));
        if (hdr->b_l1hdr.b_pabd != NULL) {
                (void) zfs_refcount_add_many(&state->arcs_esize[type],
                    arc_hdr_size(hdr), hdr);
@@ -2283,7 +2256,6 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
                return;
        }
 
-       ASSERT(!GHOST_STATE(state));
        if (hdr->b_l1hdr.b_pabd != NULL) {
                (void) zfs_refcount_remove_many(&state->arcs_esize[type],
                    arc_hdr_size(hdr), hdr);
@@ -2309,7 +2281,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
  * it is not evictable.
  */
 static void
-add_reference(arc_buf_hdr_t *hdr, void *tag)
+add_reference(arc_buf_hdr_t *hdr, const void *tag)
 {
        arc_state_t *state;
 
@@ -2326,7 +2298,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag)
            (state != arc_anon)) {
                /* We don't use the L2-only state list. */
                if (state != arc_l2c_only) {
-                       multilist_remove(state->arcs_list[arc_buf_type(hdr)],
+                       multilist_remove(&state->arcs_list[arc_buf_type(hdr)],
                            hdr);
                        arc_evictable_space_decrement(hdr, state);
                }
@@ -2345,7 +2317,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag)
  * list making it eligible for eviction.
  */
 static int
-remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
+remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, const void *tag)
 {
        int cnt;
        arc_state_t *state = hdr->b_l1hdr.b_state;
@@ -2360,7 +2332,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
         */
        if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
            (state != arc_anon)) {
-               multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr);
+               multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
                ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
                arc_evictable_space_increment(hdr, state);
        }
@@ -2377,6 +2349,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
 void
 arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
 {
+       (void) state_index;
        arc_buf_hdr_t *hdr = ab->b_hdr;
        l1arc_buf_hdr_t *l1hdr = NULL;
        l2arc_buf_hdr_t *l2hdr = NULL;
@@ -2463,7 +2436,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
        if (refcnt == 0) {
                if (old_state != arc_anon && old_state != arc_l2c_only) {
                        ASSERT(HDR_HAS_L1HDR(hdr));
-                       multilist_remove(old_state->arcs_list[buftype], hdr);
+                       multilist_remove(&old_state->arcs_list[buftype], hdr);
 
                        if (GHOST_STATE(old_state)) {
                                ASSERT0(bufcnt);
@@ -2480,7 +2453,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
                         * beforehand.
                         */
                        ASSERT(HDR_HAS_L1HDR(hdr));
-                       multilist_insert(new_state->arcs_list[buftype], hdr);
+                       multilist_insert(&new_state->arcs_list[buftype], hdr);
 
                        if (GHOST_STATE(new_state)) {
                                ASSERT0(bufcnt);
@@ -2627,13 +2600,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
                        l2arc_hdr_arcstats_increment_state(hdr);
                }
        }
-
-       /*
-        * L2 headers should never be on the L2 state list since they don't
-        * have L1 headers allocated.
-        */
-       ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
-           multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
 }
 
 void
@@ -2645,25 +2611,25 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
        default:
                break;
        case ARC_SPACE_DATA:
-               aggsum_add(&astat_data_size, space);
+               ARCSTAT_INCR(arcstat_data_size, space);
                break;
        case ARC_SPACE_META:
-               aggsum_add(&astat_metadata_size, space);
+               ARCSTAT_INCR(arcstat_metadata_size, space);
                break;
        case ARC_SPACE_BONUS:
-               aggsum_add(&astat_bonus_size, space);
+               ARCSTAT_INCR(arcstat_bonus_size, space);
                break;
        case ARC_SPACE_DNODE:
-               aggsum_add(&astat_dnode_size, space);
+               aggsum_add(&arc_sums.arcstat_dnode_size, space);
                break;
        case ARC_SPACE_DBUF:
-               aggsum_add(&astat_dbuf_size, space);
+               ARCSTAT_INCR(arcstat_dbuf_size, space);
                break;
        case ARC_SPACE_HDRS:
-               aggsum_add(&astat_hdr_size, space);
+               ARCSTAT_INCR(arcstat_hdr_size, space);
                break;
        case ARC_SPACE_L2HDRS:
-               aggsum_add(&astat_l2_hdr_size, space);
+               aggsum_add(&arc_sums.arcstat_l2_hdr_size, space);
                break;
        case ARC_SPACE_ABD_CHUNK_WASTE:
                /*
@@ -2672,14 +2638,14 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
                 * scatter ABD's come from the ARC, because other users are
                 * very short-lived.
                 */
-               aggsum_add(&astat_abd_chunk_waste_size, space);
+               ARCSTAT_INCR(arcstat_abd_chunk_waste_size, space);
                break;
        }
 
        if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE)
-               aggsum_add(&arc_meta_used, space);
+               aggsum_add(&arc_sums.arcstat_meta_used, space);
 
-       aggsum_add(&arc_size, space);
+       aggsum_add(&arc_sums.arcstat_size, space);
 }
 
 void
@@ -2691,45 +2657,41 @@ arc_space_return(uint64_t space, arc_space_type_t type)
        default:
                break;
        case ARC_SPACE_DATA:
-               aggsum_add(&astat_data_size, -space);
+               ARCSTAT_INCR(arcstat_data_size, -space);
                break;
        case ARC_SPACE_META:
-               aggsum_add(&astat_metadata_size, -space);
+               ARCSTAT_INCR(arcstat_metadata_size, -space);
                break;
        case ARC_SPACE_BONUS:
-               aggsum_add(&astat_bonus_size, -space);
+               ARCSTAT_INCR(arcstat_bonus_size, -space);
                break;
        case ARC_SPACE_DNODE:
-               aggsum_add(&astat_dnode_size, -space);
+               aggsum_add(&arc_sums.arcstat_dnode_size, -space);
                break;
        case ARC_SPACE_DBUF:
-               aggsum_add(&astat_dbuf_size, -space);
+               ARCSTAT_INCR(arcstat_dbuf_size, -space);
                break;
        case ARC_SPACE_HDRS:
-               aggsum_add(&astat_hdr_size, -space);
+               ARCSTAT_INCR(arcstat_hdr_size, -space);
                break;
        case ARC_SPACE_L2HDRS:
-               aggsum_add(&astat_l2_hdr_size, -space);
+               aggsum_add(&arc_sums.arcstat_l2_hdr_size, -space);
                break;
        case ARC_SPACE_ABD_CHUNK_WASTE:
-               aggsum_add(&astat_abd_chunk_waste_size, -space);
+               ARCSTAT_INCR(arcstat_abd_chunk_waste_size, -space);
                break;
        }
 
        if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) {
-               ASSERT(aggsum_compare(&arc_meta_used, space) >= 0);
-               /*
-                * We use the upper bound here rather than the precise value
-                * because the arc_meta_max value doesn't need to be
-                * precise. It's only consumed by humans via arcstats.
-                */
-               if (arc_meta_max < aggsum_upper_bound(&arc_meta_used))
-                       arc_meta_max = aggsum_upper_bound(&arc_meta_used);
-               aggsum_add(&arc_meta_used, -space);
+               ASSERT(aggsum_compare(&arc_sums.arcstat_meta_used,
+                   space) >= 0);
+               ARCSTAT_MAX(arcstat_meta_max,
+                   aggsum_upper_bound(&arc_sums.arcstat_meta_used));
+               aggsum_add(&arc_sums.arcstat_meta_used, -space);
        }
 
-       ASSERT(aggsum_compare(&arc_size, space) >= 0);
-       aggsum_add(&arc_size, -space);
+       ASSERT(aggsum_compare(&arc_sums.arcstat_size, space) >= 0);
+       aggsum_add(&arc_sums.arcstat_size, -space);
 }
 
 /*
@@ -2779,8 +2741,8 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
  */
 static int
 arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
-    void *tag, boolean_t encrypted, boolean_t compressed, boolean_t noauth,
-    boolean_t fill, arc_buf_t **ret)
+    const void *tag, boolean_t encrypted, boolean_t compressed,
+    boolean_t noauth, boolean_t fill, arc_buf_t **ret)
 {
        arc_buf_t *buf;
        arc_fill_flags_t flags = ARC_FILL_LOCKED;
@@ -2793,12 +2755,6 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
        ASSERT3P(*ret, ==, NULL);
        IMPLY(encrypted, compressed);
 
-       hdr->b_l1hdr.b_mru_hits = 0;
-       hdr->b_l1hdr.b_mru_ghost_hits = 0;
-       hdr->b_l1hdr.b_mfu_hits = 0;
-       hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-       hdr->b_l1hdr.b_l2_hits = 0;
-
        buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
        buf->b_hdr = hdr;
        buf->b_data = NULL;
@@ -2886,7 +2842,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
        return (0);
 }
 
-static char *arc_onloan_tag = "onloan";
+static const char *arc_onloan_tag = "onloan";
 
 static inline void
 arc_loaned_bytes_update(int64_t delta)
@@ -2945,7 +2901,7 @@ arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
  * Return a loaned arc buffer to the arc.
  */
 void
-arc_return_buf(arc_buf_t *buf, void *tag)
+arc_return_buf(arc_buf_t *buf, const void *tag)
 {
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
@@ -2959,7 +2915,7 @@ arc_return_buf(arc_buf_t *buf, void *tag)
 
 /* Detach an arc_buf from a dbuf (tag) */
 void
-arc_loan_inuse_buf(arc_buf_t *buf, void *tag)
+arc_loan_inuse_buf(arc_buf_t *buf, const void *tag)
 {
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
@@ -3065,7 +3021,7 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
            arc_hdr_size(hdr), hdr, buf);
        arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
        abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd);
-       abd_put(hdr->b_l1hdr.b_pabd);
+       abd_free(hdr->b_l1hdr.b_pabd);
        hdr->b_l1hdr.b_pabd = NULL;
        buf->b_flags &= ~ARC_BUF_FLAG_SHARED;
 
@@ -3235,7 +3191,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
 {
        uint64_t size;
        boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0);
-       boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0);
 
        ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
        ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3246,14 +3201,14 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
                size = HDR_GET_PSIZE(hdr);
                ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
                hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
-                   do_adapt);
+                   alloc_flags);
                ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
                ARCSTAT_INCR(arcstat_raw_size, size);
        } else {
                size = arc_hdr_size(hdr);
                ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
                hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
-                   do_adapt);
+                   alloc_flags);
                ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
        }
 
@@ -3299,13 +3254,34 @@ arc_hdr_free_abd(arc_buf_hdr_t *hdr, boolean_t free_rdata)
        ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
 }
 
+/*
+ * Allocate empty anonymous ARC header.  The header will get its identity
+ * assigned and buffers attached later as part of read or write operations.
+ *
+ * In case of read arc_read() assigns header its identify (b_dva + b_birth),
+ * inserts it into ARC hash to become globally visible and allocates physical
+ * (b_pabd) or raw (b_rabd) ABD buffer to read into from disk.  On disk read
+ * completion arc_read_done() allocates ARC buffer(s) as needed, potentially
+ * sharing one of them with the physical ABD buffer.
+ *
+ * In case of write arc_alloc_buf() allocates ARC buffer to be filled with
+ * data.  Then after compression and/or encryption arc_write_ready() allocates
+ * and fills (or potentially shares) physical (b_pabd) or raw (b_rabd) ABD
+ * buffer.  On disk write completion arc_write_done() assigns the header its
+ * new identity (b_dva + b_birth) and inserts into ARC hash.
+ *
+ * In case of partial overwrite the old data is read first as described. Then
+ * arc_release() either allocates new anonymous ARC header and moves the ARC
+ * buffer to it, or reuses the old ARC header by discarding its identity and
+ * removing it from ARC hash.  After buffer modification normal write process
+ * follows as described.
+ */
 static arc_buf_hdr_t *
 arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
     boolean_t protected, enum zio_compress compression_type, uint8_t complevel,
-    arc_buf_contents_t type, boolean_t alloc_rdata)
+    arc_buf_contents_t type)
 {
        arc_buf_hdr_t *hdr;
-       int flags = ARC_HDR_DO_ADAPT;
 
        VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
        if (protected) {
@@ -3313,7 +3289,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
        } else {
                hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
        }
-       flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0;
 
        ASSERT(HDR_EMPTY(hdr));
        ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
@@ -3330,15 +3305,13 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
 
        hdr->b_l1hdr.b_state = arc_anon;
        hdr->b_l1hdr.b_arc_access = 0;
+       hdr->b_l1hdr.b_mru_hits = 0;
+       hdr->b_l1hdr.b_mru_ghost_hits = 0;
+       hdr->b_l1hdr.b_mfu_hits = 0;
+       hdr->b_l1hdr.b_mfu_ghost_hits = 0;
        hdr->b_l1hdr.b_bufcnt = 0;
        hdr->b_l1hdr.b_buf = NULL;
 
-       /*
-        * Allocate the hdr's buffer. This will contain either
-        * the compressed or uncompressed data depending on the block
-        * it references and compressed arc enablement.
-        */
-       arc_hdr_alloc_abd(hdr, flags);
        ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
 
        return (hdr);
@@ -3377,7 +3350,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
        ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
        buf_hash_remove(hdr);
 
-       bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
+       memcpy(nhdr, hdr, HDR_L2ONLY_SIZE);
 
        if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
                arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
@@ -3468,7 +3441,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
        arc_buf_hdr_t *nhdr;
        arc_buf_t *buf;
        kmem_cache_t *ncache, *ocache;
-       unsigned nsize, osize;
 
        /*
         * This function requires that hdr is in the arc_anon state.
@@ -3485,14 +3457,10 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
 
        if (need_crypt) {
                ncache = hdr_full_crypt_cache;
-               nsize = sizeof (hdr->b_crypt_hdr);
                ocache = hdr_full_cache;
-               osize = HDR_FULL_SIZE;
        } else {
                ncache = hdr_full_cache;
-               nsize = HDR_FULL_SIZE;
                ocache = hdr_full_crypt_cache;
-               osize = sizeof (hdr->b_crypt_hdr);
        }
 
        nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
@@ -3518,7 +3486,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
        nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
        nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
        nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
-       nhdr->b_l1hdr.b_l2_hits = hdr->b_l1hdr.b_l2_hits;
        nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
        nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
 
@@ -3546,7 +3513,7 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
        }
 
        /* unset all members of the original hdr */
-       bzero(&hdr->b_dva, sizeof (dva_t));
+       memset(&hdr->b_dva, 0, sizeof (dva_t));
        hdr->b_birth = 0;
        hdr->b_type = ARC_BUFC_INVALID;
        hdr->b_flags = 0;
@@ -3563,7 +3530,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
        hdr->b_l1hdr.b_mru_ghost_hits = 0;
        hdr->b_l1hdr.b_mfu_hits = 0;
        hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-       hdr->b_l1hdr.b_l2_hits = 0;
        hdr->b_l1hdr.b_acb = NULL;
        hdr->b_l1hdr.b_pabd = NULL;
 
@@ -3572,9 +3538,9 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
                hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
                hdr->b_crypt_hdr.b_ebufcnt = 0;
                hdr->b_crypt_hdr.b_dsobj = 0;
-               bzero(hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
-               bzero(hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
-               bzero(hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+               memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN);
+               memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN);
+               memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN);
        }
 
        buf_discard_identity(hdr);
@@ -3612,11 +3578,11 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
                arc_cksum_free(hdr);
 
        if (salt != NULL)
-               bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+               memcpy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
        if (iv != NULL)
-               bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+               memcpy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
        if (mac != NULL)
-               bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+               memcpy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
 }
 
 /*
@@ -3624,10 +3590,11 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
  * The buf is returned thawed since we expect the consumer to modify it.
  */
 arc_buf_t *
-arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
+arc_alloc_buf(spa_t *spa, const void *tag, arc_buf_contents_t type,
+    int32_t size)
 {
        arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
-           B_FALSE, ZIO_COMPRESS_OFF, 0, type, B_FALSE);
+           B_FALSE, ZIO_COMPRESS_OFF, 0, type);
 
        arc_buf_t *buf = NULL;
        VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
@@ -3642,8 +3609,8 @@ arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
  * for bufs containing metadata.
  */
 arc_buf_t *
-arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type, uint8_t complevel)
+arc_alloc_compressed_buf(spa_t *spa, const void *tag, uint64_t psize,
+    uint64_t lsize, enum zio_compress compression_type, uint8_t complevel)
 {
        ASSERT3U(lsize, >, 0);
        ASSERT3U(lsize, >=, psize);
@@ -3651,7 +3618,7 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
        ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
 
        arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-           B_FALSE, compression_type, complevel, ARC_BUFC_DATA, B_FALSE);
+           B_FALSE, compression_type, complevel, ARC_BUFC_DATA);
 
        arc_buf_t *buf = NULL;
        VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
@@ -3659,24 +3626,20 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
        arc_buf_thaw(buf);
        ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
 
-       if (!arc_buf_is_shared(buf)) {
-               /*
-                * To ensure that the hdr has the correct data in it if we call
-                * arc_untransform() on this buf before it's been written to
-                * disk, it's easiest if we just set up sharing between the
-                * buf and the hdr.
-                */
-               arc_hdr_free_abd(hdr, B_FALSE);
-               arc_share_buf(hdr, buf);
-       }
+       /*
+        * To ensure that the hdr has the correct data in it if we call
+        * arc_untransform() on this buf before it's been written to disk,
+        * it's easiest if we just set up sharing between the buf and the hdr.
+        */
+       arc_share_buf(hdr, buf);
 
        return (buf);
 }
 
 arc_buf_t *
-arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
-    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
-    dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj,
+    boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
+    const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
     enum zio_compress compression_type, uint8_t complevel)
 {
        arc_buf_hdr_t *hdr;
@@ -3690,15 +3653,15 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
        ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
 
        hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
-           compression_type, complevel, type, B_TRUE);
+           compression_type, complevel, type);
 
        hdr->b_crypt_hdr.b_dsobj = dsobj;
        hdr->b_crypt_hdr.b_ot = ot;
        hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
            DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot);
-       bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
-       bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
-       bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+       memcpy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
+       memcpy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
+       memcpy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
 
        /*
         * This buffer will be considered encrypted even if the ot is not an
@@ -3833,8 +3796,13 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
                 * to acquire the l2ad_mtx. If that happens, we don't
                 * want to re-destroy the header's L2 portion.
                 */
-               if (HDR_HAS_L2HDR(hdr))
+               if (HDR_HAS_L2HDR(hdr)) {
+
+                       if (!HDR_EMPTY(hdr))
+                               buf_discard_identity(hdr);
+
                        arc_hdr_l2hdr_destroy(hdr);
+               }
 
                if (!buflist_held)
                        mutex_exit(&dev->l2ad_mtx);
@@ -3878,7 +3846,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
 }
 
 void
-arc_buf_destroy(arc_buf_t *buf, void* tag)
+arc_buf_destroy(arc_buf_t *buf, const void *tag)
 {
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
@@ -3915,18 +3883,28 @@ arc_buf_destroy(arc_buf_t *buf, void* tag)
  *    - arc_mru_ghost -> deleted
  *    - arc_mfu_ghost -> arc_l2c_only
  *    - arc_mfu_ghost -> deleted
+ *
+ * Return total size of evicted data buffers for eviction progress tracking.
+ * When evicting from ghost states return logical buffer size to make eviction
+ * progress at the same (or at least comparable) rate as from non-ghost states.
+ *
+ * Return *real_evicted for actual ARC size reduction to wake up threads
+ * waiting for it.  For non-ghost states it includes size of evicted data
+ * buffers (the headers are not freed there).  For ghost states it includes
+ * only the evicted headers size.
  */
 static int64_t
-arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
+arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted)
 {
        arc_state_t *evicted_state, *state;
        int64_t bytes_evicted = 0;
-       int min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
+       uint_t min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
            arc_min_prescient_prefetch_ms : arc_min_prefetch_ms;
 
        ASSERT(MUTEX_HELD(hash_lock));
        ASSERT(HDR_HAS_L1HDR(hdr));
 
+       *real_evicted = 0;
        state = hdr->b_l1hdr.b_state;
        if (GHOST_STATE(state)) {
                ASSERT(!HDR_IO_IN_PROGRESS(hdr));
@@ -3963,9 +3941,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                         */
                        hdr = arc_hdr_realloc(hdr, hdr_full_cache,
                            hdr_l2only_cache);
+                       *real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE;
                } else {
                        arc_change_state(arc_anon, hdr, hash_lock);
                        arc_hdr_destroy(hdr);
+                       *real_evicted += HDR_FULL_SIZE;
                }
                return (bytes_evicted);
        }
@@ -3989,8 +3969,10 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                        ARCSTAT_BUMP(arcstat_mutex_miss);
                        break;
                }
-               if (buf->b_data != NULL)
+               if (buf->b_data != NULL) {
                        bytes_evicted += HDR_GET_LSIZE(hdr);
+                       *real_evicted += HDR_GET_LSIZE(hdr);
+               }
                mutex_exit(&buf->b_evict_lock);
                arc_buf_destroy_impl(buf);
        }
@@ -4026,6 +4008,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                arc_cksum_free(hdr);
 
                bytes_evicted += arc_hdr_size(hdr);
+               *real_evicted += arc_hdr_size(hdr);
 
                /*
                 * If this hdr is being evicted and has a compressed
@@ -4064,23 +4047,21 @@ arc_set_need_free(void)
 
 static uint64_t
 arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
-    uint64_t spa, int64_t bytes)
+    uint64_t spa, uint64_t bytes)
 {
        multilist_sublist_t *mls;
-       uint64_t bytes_evicted = 0;
+       uint64_t bytes_evicted = 0, real_evicted = 0;
        arc_buf_hdr_t *hdr;
        kmutex_t *hash_lock;
-       int evict_count = 0;
+       uint_t evict_count = zfs_arc_evict_batch_limit;
 
        ASSERT3P(marker, !=, NULL);
-       IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
 
        mls = multilist_sublist_lock(ml, idx);
 
-       for (hdr = multilist_sublist_prev(mls, marker); hdr != NULL;
+       for (hdr = multilist_sublist_prev(mls, marker); likely(hdr != NULL);
            hdr = multilist_sublist_prev(mls, marker)) {
-               if ((bytes != ARC_EVICT_ALL && bytes_evicted >= bytes) ||
-                   (evict_count >= zfs_arc_evict_batch_limit))
+               if ((evict_count == 0) || (bytes_evicted >= bytes))
                        break;
 
                /*
@@ -4128,10 +4109,13 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
                ASSERT(!MUTEX_HELD(hash_lock));
 
                if (mutex_tryenter(hash_lock)) {
-                       uint64_t evicted = arc_evict_hdr(hdr, hash_lock);
+                       uint64_t revicted;
+                       uint64_t evicted = arc_evict_hdr(hdr, hash_lock,
+                           &revicted);
                        mutex_exit(hash_lock);
 
                        bytes_evicted += evicted;
+                       real_evicted += revicted;
 
                        /*
                         * If evicted is zero, arc_evict_hdr() must have
@@ -4139,7 +4123,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
                         * evict_count in this case.
                         */
                        if (evicted != 0)
-                               evict_count++;
+                               evict_count--;
 
                } else {
                        ARCSTAT_BUMP(arcstat_mutex_miss);
@@ -4161,9 +4145,9 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
         * 1/64th of RAM).  See the comments in arc_wait_for_eviction().
         */
        mutex_enter(&arc_evict_lock);
-       arc_evict_count += bytes_evicted;
+       arc_evict_count += real_evicted;
 
-       if ((int64_t)(arc_free_memory() - arc_sys_free / 2) > 0) {
+       if (arc_free_memory() > arc_sys_free / 2) {
                arc_evict_waiter_t *aw;
                while ((aw = list_head(&arc_evict_waiters)) != NULL &&
                    aw->aew_count <= arc_evict_count) {
@@ -4181,11 +4165,43 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
         * this CPU are able to make progress, make a voluntary preemption
         * call here.
         */
-       cond_resched();
+       kpreempt(KPREEMPT_SYNC);
 
        return (bytes_evicted);
 }
 
+/*
+ * Allocate an array of buffer headers used as placeholders during arc state
+ * eviction.
+ */
+static arc_buf_hdr_t **
+arc_state_alloc_markers(int count)
+{
+       arc_buf_hdr_t **markers;
+
+       markers = kmem_zalloc(sizeof (*markers) * count, KM_SLEEP);
+       for (int i = 0; i < count; i++) {
+               markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP);
+
+               /*
+                * A b_spa of 0 is used to indicate that this header is
+                * a marker. This fact is used in arc_evict_type() and
+                * arc_evict_state_impl().
+                */
+               markers[i]->b_spa = 0;
+
+       }
+       return (markers);
+}
+
+static void
+arc_state_free_markers(arc_buf_hdr_t **markers, int count)
+{
+       for (int i = 0; i < count; i++)
+               kmem_cache_free(hdr_full_cache, markers[i]);
+       kmem_free(markers, sizeof (*markers) * count);
+}
+
 /*
  * Evict buffers from the given arc state, until we've removed the
  * specified number of bytes. Move the removed buffers to the
@@ -4200,16 +4216,14 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
  * the given arc state; which is used by arc_flush().
  */
 static uint64_t
-arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
+arc_evict_state(arc_state_t *state, uint64_t spa, uint64_t bytes,
     arc_buf_contents_t type)
 {
        uint64_t total_evicted = 0;
-       multilist_t *ml = state->arcs_list[type];
+       multilist_t *ml = &state->arcs_list[type];
        int num_sublists;
        arc_buf_hdr_t **markers;
 
-       IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
-
        num_sublists = multilist_get_num_sublists(ml);
 
        /*
@@ -4219,19 +4233,15 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
         * pick up where we left off for each individual sublist, rather
         * than starting from the tail each time.
         */
-       markers = kmem_zalloc(sizeof (*markers) * num_sublists, KM_SLEEP);
+       if (zthr_iscurthread(arc_evict_zthr)) {
+               markers = arc_state_evict_markers;
+               ASSERT3S(num_sublists, <=, arc_state_evict_marker_count);
+       } else {
+               markers = arc_state_alloc_markers(num_sublists);
+       }
        for (int i = 0; i < num_sublists; i++) {
                multilist_sublist_t *mls;
 
-               markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP);
-
-               /*
-                * A b_spa of 0 is used to indicate that this header is
-                * a marker. This fact is used in arc_evict_type() and
-                * arc_evict_state_impl().
-                */
-               markers[i]->b_spa = 0;
-
                mls = multilist_sublist_lock(ml, i);
                multilist_sublist_insert_tail(mls, markers[i]);
                multilist_sublist_unlock(mls);
@@ -4241,7 +4251,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
         * While we haven't hit our target number of bytes to evict, or
         * we're evicting all available buffers.
         */
-       while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
+       while (total_evicted < bytes) {
                int sublist_idx = multilist_get_random_index(ml);
                uint64_t scan_evicted = 0;
 
@@ -4250,9 +4260,10 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
                 * Request that 10% of the LRUs be scanned by the superblock
                 * shrinker.
                 */
-               if (type == ARC_BUFC_DATA && aggsum_compare(&astat_dnode_size,
-                   arc_dnode_size_limit) > 0) {
-                       arc_prune_async((aggsum_upper_bound(&astat_dnode_size) -
+               if (type == ARC_BUFC_DATA && aggsum_compare(
+                   &arc_sums.arcstat_dnode_size, arc_dnode_size_limit) > 0) {
+                       arc_prune_async((aggsum_upper_bound(
+                           &arc_sums.arcstat_dnode_size) -
                            arc_dnode_size_limit) / sizeof (dnode_t) /
                            zfs_arc_dnode_reduce_percent);
                }
@@ -4268,9 +4279,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
                        uint64_t bytes_remaining;
                        uint64_t bytes_evicted;
 
-                       if (bytes == ARC_EVICT_ALL)
-                               bytes_remaining = ARC_EVICT_ALL;
-                       else if (total_evicted < bytes)
+                       if (total_evicted < bytes)
                                bytes_remaining = bytes - total_evicted;
                        else
                                break;
@@ -4314,10 +4323,9 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
                multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
                multilist_sublist_remove(mls, markers[i]);
                multilist_sublist_unlock(mls);
-
-               kmem_cache_free(hdr_full_cache, markers[i]);
        }
-       kmem_free(markers, sizeof (*markers) * num_sublists);
+       if (markers != arc_state_evict_markers)
+               arc_state_free_markers(markers, num_sublists);
 
        return (total_evicted);
 }
@@ -4365,7 +4373,7 @@ static uint64_t
 arc_evict_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
     arc_buf_contents_t type)
 {
-       int64_t delta;
+       uint64_t delta;
 
        if (bytes > 0 && zfs_refcount_count(&state->arcs_esize[type]) > 0) {
                delta = MIN(zfs_refcount_count(&state->arcs_esize[type]),
@@ -4396,10 +4404,10 @@ arc_evict_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
 static uint64_t
 arc_evict_meta_balanced(uint64_t meta_used)
 {
-       int64_t delta, prune = 0, adjustmnt;
-       uint64_t total_evicted = 0;
+       int64_t delta, adjustmnt;
+       uint64_t total_evicted = 0, prune = 0;
        arc_buf_contents_t type = ARC_BUFC_DATA;
-       int restarts = MAX(zfs_arc_meta_adjust_restarts, 0);
+       uint_t restarts = zfs_arc_meta_adjust_restarts;
 
 restart:
        /*
@@ -4482,7 +4490,7 @@ restart:
 }
 
 /*
- * Evict metadata buffers from the cache, such that arc_meta_used is
+ * Evict metadata buffers from the cache, such that arcstat_meta_used is
  * capped by the arc_meta_limit tunable.
  */
 static uint64_t
@@ -4538,8 +4546,8 @@ arc_evict_meta(uint64_t meta_used)
 static arc_buf_contents_t
 arc_evict_type(arc_state_t *state)
 {
-       multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA];
-       multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA];
+       multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
+       multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
        int data_idx = multilist_get_random_index(data_ml);
        int meta_idx = multilist_get_random_index(meta_ml);
        multilist_sublist_t *data_mls;
@@ -4603,7 +4611,7 @@ arc_evict_type(arc_state_t *state)
 }
 
 /*
- * Evict buffers from the cache, such that arc_size is capped by arc_c.
+ * Evict buffers from the cache, such that arcstat_size is capped by arc_c.
  */
 static uint64_t
 arc_evict(void)
@@ -4611,8 +4619,8 @@ arc_evict(void)
        uint64_t total_evicted = 0;
        uint64_t bytes;
        int64_t target;
-       uint64_t asize = aggsum_value(&arc_size);
-       uint64_t ameta = aggsum_value(&arc_meta_used);
+       uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
+       uint64_t ameta = aggsum_value(&arc_sums.arcstat_meta_used);
 
        /*
         * If we're over arc_meta_limit, we want to correct that before
@@ -4672,8 +4680,8 @@ arc_evict(void)
        /*
         * Re-sum ARC stats after the first round of evictions.
         */
-       asize = aggsum_value(&arc_size);
-       ameta = aggsum_value(&arc_meta_used);
+       asize = aggsum_value(&arc_sums.arcstat_size);
+       ameta = aggsum_value(&arc_sums.arcstat_meta_used);
 
 
        /*
@@ -4787,7 +4795,7 @@ arc_flush(spa_t *spa, boolean_t retry)
 void
 arc_reduce_target_size(int64_t to_free)
 {
-       uint64_t asize = aggsum_value(&arc_size);
+       uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
 
        /*
         * All callers want the ARC to actually evict (at least) this much
@@ -4836,12 +4844,10 @@ arc_kmem_reap_soon(void)
        size_t                  i;
        kmem_cache_t            *prev_cache = NULL;
        kmem_cache_t            *prev_data_cache = NULL;
-       extern kmem_cache_t     *zio_buf_cache[];
-       extern kmem_cache_t     *zio_data_buf_cache[];
 
 #ifdef _KERNEL
-       if ((aggsum_compare(&arc_meta_used, arc_meta_limit) >= 0) &&
-           zfs_arc_meta_prune) {
+       if ((aggsum_compare(&arc_sums.arcstat_meta_used,
+           arc_meta_limit) >= 0) && zfs_arc_meta_prune) {
                /*
                 * We are exceeding our meta-data cache limit.
                 * Prune some entries to release holds on meta-data.
@@ -4878,18 +4884,12 @@ arc_kmem_reap_soon(void)
        abd_cache_reap_now();
 }
 
-/* ARGSUSED */
 static boolean_t
 arc_evict_cb_check(void *arg, zthr_t *zthr)
 {
-       /*
-        * This is necessary so that any changes which may have been made to
-        * many of the zfs_arc_* module parameters will be propagated to
-        * their actual internal variable counterparts. Without this,
-        * changing those module params at runtime would have no effect.
-        */
-       arc_tuning_update(B_FALSE);
+       (void) arg, (void) zthr;
 
+#ifdef ZFS_DEBUG
        /*
         * This is necessary in order to keep the kstat information
         * up to date for tools that display kstat data such as the
@@ -4897,15 +4897,15 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)
         * typically do not call kstat's update function, but simply
         * dump out stats from the most recent update.  Without
         * this call, these commands may show stale stats for the
-        * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
-        * with this change, the data might be up to 1 second
-        * out of date(the arc_evict_zthr has a maximum sleep
-        * time of 1 second); but that should suffice.  The
-        * arc_state_t structures can be queried directly if more
-        * accurate information is needed.
+        * anon, mru, mru_ghost, mfu, and mfu_ghost lists.  Even
+        * with this call, the data might be out of date if the
+        * evict thread hasn't been woken recently; but that should
+        * suffice.  The arc_state_t structures can be queried
+        * directly if more accurate information is needed.
         */
        if (arc_ksp != NULL)
                arc_ksp->ks_update(arc_ksp, KSTAT_READ);
+#endif
 
        /*
         * We have to rely on arc_wait_for_eviction() to tell us when to
@@ -4928,10 +4928,11 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)
  * Keep arc_size under arc_c by running arc_evict which evicts data
  * from the ARC.
  */
-/* ARGSUSED */
 static void
 arc_evict_cb(void *arg, zthr_t *zthr)
 {
+       (void) arg, (void) zthr;
+
        uint64_t evicted = 0;
        fstrans_cookie_t cookie = spl_fstrans_mark();
 
@@ -4951,7 +4952,7 @@ arc_evict_cb(void *arg, zthr_t *zthr)
         */
        mutex_enter(&arc_evict_lock);
        arc_evict_needed = !zthr_iscancelled(arc_evict_zthr) &&
-           evicted > 0 && aggsum_compare(&arc_size, arc_c) > 0;
+           evicted > 0 && aggsum_compare(&arc_sums.arcstat_size, arc_c) > 0;
        if (!arc_evict_needed) {
                /*
                 * We're either no longer overflowing, or we
@@ -4968,10 +4969,11 @@ arc_evict_cb(void *arg, zthr_t *zthr)
        spl_fstrans_unmark(cookie);
 }
 
-/* ARGSUSED */
 static boolean_t
 arc_reap_cb_check(void *arg, zthr_t *zthr)
 {
+       (void) arg, (void) zthr;
+
        int64_t free_memory = arc_available_memory();
        static int reap_cb_check_counter = 0;
 
@@ -5015,10 +5017,11 @@ arc_reap_cb_check(void *arg, zthr_t *zthr)
  * target size of the cache (arc_c), causing the arc_evict_cb()
  * to free more buffers.
  */
-/* ARGSUSED */
 static void
 arc_reap_cb(void *arg, zthr_t *zthr)
 {
+       (void) arg, (void) zthr;
+
        int64_t free_memory;
        fstrans_cookie_t cookie = spl_fstrans_mark();
 
@@ -5043,15 +5046,16 @@ arc_reap_cb(void *arg, zthr_t *zthr)
         * memory in the system at a fraction of the arc_size (1/128th by
         * default).  If oversubscribed (free_memory < 0) then reduce the
         * target arc_size by the deficit amount plus the fractional
-        * amount.  If free memory is positive but less then the fractional
+        * amount.  If free memory is positive but less than the fractional
         * amount, reduce by what is needed to hit the fractional amount.
         */
        free_memory = arc_available_memory();
 
-       int64_t to_free =
-           (arc_c >> arc_shrink_shift) - free_memory;
-       if (to_free > 0) {
-               arc_reduce_target_size(to_free);
+       int64_t can_free = arc_c - arc_c_min;
+       if (can_free > 0) {
+               int64_t to_free = (can_free >> arc_shrink_shift) - free_memory;
+               if (to_free > 0)
+                       arc_reduce_target_size(to_free);
        }
        spl_fstrans_unmark(cookie);
 }
@@ -5164,7 +5168,7 @@ arc_adapt(int bytes, arc_state_t *state)
         * cache size, increment the target cache size
         */
        ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT);
-       if (aggsum_upper_bound(&arc_size) >=
+       if (aggsum_upper_bound(&arc_sums.arcstat_size) >=
            arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
                atomic_add_64(&arc_c, (int64_t)bytes);
                if (arc_c > arc_c_max)
@@ -5181,8 +5185,8 @@ arc_adapt(int bytes, arc_state_t *state)
  * Check if arc_size has grown past our upper threshold, determined by
  * zfs_arc_overflow_shift.
  */
-boolean_t
-arc_is_overflowing(void)
+static arc_ovf_level_t
+arc_is_overflowing(boolean_t use_reserve)
 {
        /* Always allow at least one block of overflow */
        int64_t overflow = MAX(SPA_MAXBLOCKSIZE,
@@ -5197,16 +5201,21 @@ arc_is_overflowing(void)
         * in the ARC. In practice, that's in the tens of MB, which is low
         * enough to be safe.
         */
-       return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow);
+       int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) -
+           arc_c - overflow / 2;
+       if (!use_reserve)
+               overflow /= 2;
+       return (over < 0 ? ARC_OVF_NONE :
+           over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
 }
 
 static abd_t *
-arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
-    boolean_t do_adapt)
+arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
+    int alloc_flags)
 {
        arc_buf_contents_t type = arc_buf_type(hdr);
 
-       arc_get_data_impl(hdr, size, tag, do_adapt);
+       arc_get_data_impl(hdr, size, tag, alloc_flags);
        if (type == ARC_BUFC_METADATA) {
                return (abd_alloc(size, B_TRUE));
        } else {
@@ -5216,11 +5225,11 @@ arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
 }
 
 static void *
-arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
+arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, const void *tag)
 {
        arc_buf_contents_t type = arc_buf_type(hdr);
 
-       arc_get_data_impl(hdr, size, tag, B_TRUE);
+       arc_get_data_impl(hdr, size, tag, ARC_HDR_DO_ADAPT);
        if (type == ARC_BUFC_METADATA) {
                return (zio_buf_alloc(size));
        } else {
@@ -5237,54 +5246,75 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
  * of ARC behavior and settings.  See arc_lowmem_init().
  */
 void
-arc_wait_for_eviction(uint64_t amount)
+arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve)
 {
-       mutex_enter(&arc_evict_lock);
-       if (arc_is_overflowing()) {
-               arc_evict_needed = B_TRUE;
-               zthr_wakeup(arc_evict_zthr);
-
-               if (amount != 0) {
-                       arc_evict_waiter_t aw;
-                       list_link_init(&aw.aew_node);
-                       cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
+       switch (arc_is_overflowing(use_reserve)) {
+       case ARC_OVF_NONE:
+               return;
+       case ARC_OVF_SOME:
+               /*
+                * This is a bit racy without taking arc_evict_lock, but the
+                * worst that can happen is we either call zthr_wakeup() extra
+                * time due to race with other thread here, or the set flag
+                * get cleared by arc_evict_cb(), which is unlikely due to
+                * big hysteresis, but also not important since at this level
+                * of overflow the eviction is purely advisory.  Same time
+                * taking the global lock here every time without waiting for
+                * the actual eviction creates a significant lock contention.
+                */
+               if (!arc_evict_needed) {
+                       arc_evict_needed = B_TRUE;
+                       zthr_wakeup(arc_evict_zthr);
+               }
+               return;
+       case ARC_OVF_SEVERE:
+       default:
+       {
+               arc_evict_waiter_t aw;
+               list_link_init(&aw.aew_node);
+               cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
 
+               uint64_t last_count = 0;
+               mutex_enter(&arc_evict_lock);
+               if (!list_is_empty(&arc_evict_waiters)) {
                        arc_evict_waiter_t *last =
                            list_tail(&arc_evict_waiters);
-                       if (last != NULL) {
-                               ASSERT3U(last->aew_count, >, arc_evict_count);
-                               aw.aew_count = last->aew_count + amount;
-                       } else {
-                               aw.aew_count = arc_evict_count + amount;
-                       }
+                       last_count = last->aew_count;
+               } else if (!arc_evict_needed) {
+                       arc_evict_needed = B_TRUE;
+                       zthr_wakeup(arc_evict_zthr);
+               }
+               /*
+                * Note, the last waiter's count may be less than
+                * arc_evict_count if we are low on memory in which
+                * case arc_evict_state_impl() may have deferred
+                * wakeups (but still incremented arc_evict_count).
+                */
+               aw.aew_count = MAX(last_count, arc_evict_count) + amount;
 
-                       list_insert_tail(&arc_evict_waiters, &aw);
+               list_insert_tail(&arc_evict_waiters, &aw);
 
-                       arc_set_need_free();
+               arc_set_need_free();
 
-                       DTRACE_PROBE3(arc__wait__for__eviction,
-                           uint64_t, amount,
-                           uint64_t, arc_evict_count,
-                           uint64_t, aw.aew_count);
+               DTRACE_PROBE3(arc__wait__for__eviction,
+                   uint64_t, amount,
+                   uint64_t, arc_evict_count,
+                   uint64_t, aw.aew_count);
 
-                       /*
-                        * We will be woken up either when arc_evict_count
-                        * reaches aew_count, or when the ARC is no longer
-                        * overflowing and eviction completes.
-                        */
+               /*
+                * We will be woken up either when arc_evict_count reaches
+                * aew_count, or when the ARC is no longer overflowing and
+                * eviction completes.
+                * In case of "false" wakeup, we will still be on the list.
+                */
+               do {
                        cv_wait(&aw.aew_cv, &arc_evict_lock);
+               } while (list_link_active(&aw.aew_node));
+               mutex_exit(&arc_evict_lock);
 
-                       /*
-                        * In case of "false" wakeup, we will still be on the
-                        * list.
-                        */
-                       if (list_link_active(&aw.aew_node))
-                               list_remove(&arc_evict_waiters, &aw);
-
-                       cv_destroy(&aw.aew_cv);
-               }
+               cv_destroy(&aw.aew_cv);
+       }
        }
-       mutex_exit(&arc_evict_lock);
 }
 
 /*
@@ -5294,13 +5324,13 @@ arc_wait_for_eviction(uint64_t amount)
  * limit, we'll only signal the reclaim thread and continue on.
  */
 static void
-arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
-    boolean_t do_adapt)
+arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
+    int alloc_flags)
 {
        arc_state_t *state = hdr->b_l1hdr.b_state;
        arc_buf_contents_t type = arc_buf_type(hdr);
 
-       if (do_adapt)
+       if (alloc_flags & ARC_HDR_DO_ADAPT)
                arc_adapt(size, state);
 
        /*
@@ -5315,16 +5345,9 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
         * requested size to be evicted.  This should be more than 100%, to
         * ensure that that progress is also made towards getting arc_size
         * under arc_c.  See the comment above zfs_arc_eviction_pct.
-        *
-        * We do the overflowing check without holding the arc_evict_lock to
-        * reduce lock contention in this hot path.  Note that
-        * arc_wait_for_eviction() will acquire the lock and check again to
-        * ensure we are truly overflowing before blocking.
         */
-       if (arc_is_overflowing()) {
-               arc_wait_for_eviction(size *
-                   zfs_arc_eviction_pct / 100);
-       }
+       arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100,
+           alloc_flags & ARC_HDR_USE_RESERVE);
 
        VERIFY3U(hdr->b_type, ==, type);
        if (type == ARC_BUFC_METADATA) {
@@ -5360,7 +5383,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
                 * If we are growing the cache, and we are adding anonymous
                 * data, and we have outgrown arc_p, update arc_p
                 */
-               if (aggsum_upper_bound(&arc_size) < arc_c &&
+               if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c &&
                    hdr->b_l1hdr.b_state == arc_anon &&
                    (zfs_refcount_count(&arc_anon->arcs_size) +
                    zfs_refcount_count(&arc_mru->arcs_size) > arc_p))
@@ -5369,14 +5392,15 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
 }
 
 static void
-arc_free_data_abd(arc_buf_hdr_t *hdr, abd_t *abd, uint64_t size, void *tag)
+arc_free_data_abd(arc_buf_hdr_t *hdr, abd_t *abd, uint64_t size,
+    const void *tag)
 {
        arc_free_data_impl(hdr, size, tag);
        abd_free(abd);
 }
 
 static void
-arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, void *tag)
+arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, const void *tag)
 {
        arc_buf_contents_t type = arc_buf_type(hdr);
 
@@ -5393,7 +5417,7 @@ arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, void *tag)
  * Free the arc data buffer.
  */
 static void
-arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
+arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag)
 {
        arc_state_t *state = hdr->b_l1hdr.b_state;
        arc_buf_contents_t type = arc_buf_type(hdr);
@@ -5463,7 +5487,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                                arc_hdr_clear_flags(hdr,
                                    ARC_FLAG_PREFETCH |
                                    ARC_FLAG_PRESCIENT_PREFETCH);
-                               atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+                               hdr->b_l1hdr.b_mru_hits++;
                                ARCSTAT_BUMP(arcstat_mru_hits);
                                if (HDR_HAS_L2HDR(hdr))
                                        l2arc_hdr_arcstats_increment_state(hdr);
@@ -5488,7 +5512,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                        DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                        arc_change_state(arc_mfu, hdr, hash_lock);
                }
-               atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+               hdr->b_l1hdr.b_mru_hits++;
                ARCSTAT_BUMP(arcstat_mru_hits);
        } else if (hdr->b_l1hdr.b_state == arc_mru_ghost) {
                arc_state_t     *new_state;
@@ -5517,7 +5541,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
                arc_change_state(new_state, hdr, hash_lock);
 
-               atomic_inc_32(&hdr->b_l1hdr.b_mru_ghost_hits);
+               hdr->b_l1hdr.b_mru_ghost_hits++;
                ARCSTAT_BUMP(arcstat_mru_ghost_hits);
        } else if (hdr->b_l1hdr.b_state == arc_mfu) {
                /*
@@ -5530,7 +5554,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 * the head of the list now.
                 */
 
-               atomic_inc_32(&hdr->b_l1hdr.b_mfu_hits);
+               hdr->b_l1hdr.b_mfu_hits++;
                ARCSTAT_BUMP(arcstat_mfu_hits);
                hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
        } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) {
@@ -5553,7 +5577,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                arc_change_state(new_state, hdr, hash_lock);
 
-               atomic_inc_32(&hdr->b_l1hdr.b_mfu_ghost_hits);
+               hdr->b_l1hdr.b_mfu_ghost_hits++;
                ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
        } else if (hdr->b_l1hdr.b_state == arc_l2c_only) {
                /*
@@ -5614,24 +5638,25 @@ arc_buf_access(arc_buf_t *buf)
 }
 
 /* a generic arc_read_done_func_t which you can use */
-/* ARGSUSED */
 void
 arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *arg)
 {
+       (void) zio, (void) zb, (void) bp;
+
        if (buf == NULL)
                return;
 
-       bcopy(buf->b_data, arg, arc_buf_size(buf));
+       memcpy(arg, buf->b_data, arc_buf_size(buf));
        arc_buf_destroy(buf, arg);
 }
 
 /* a generic arc_read_done_func_t */
-/* ARGSUSED */
 void
 arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *arg)
 {
+       (void) zb, (void) bp;
        arc_buf_t **bufp = arg;
 
        if (buf == NULL) {
@@ -5702,17 +5727,20 @@ arc_read_done(zio_t *zio)
                zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
                    hdr->b_crypt_hdr.b_iv);
 
-               if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
-                       void *tmpbuf;
-
-                       tmpbuf = abd_borrow_buf_copy(zio->io_abd,
-                           sizeof (zil_chain_t));
-                       zio_crypt_decode_mac_zil(tmpbuf,
-                           hdr->b_crypt_hdr.b_mac);
-                       abd_return_buf(zio->io_abd, tmpbuf,
-                           sizeof (zil_chain_t));
-               } else {
-                       zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
+               if (zio->io_error == 0) {
+                       if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
+                               void *tmpbuf;
+
+                               tmpbuf = abd_borrow_buf_copy(zio->io_abd,
+                                   sizeof (zil_chain_t));
+                               zio_crypt_decode_mac_zil(tmpbuf,
+                                   hdr->b_crypt_hdr.b_mac);
+                               abd_return_buf(zio->io_abd, tmpbuf,
+                                   sizeof (zil_chain_t));
+                       } else {
+                               zio_crypt_decode_mac_bp(bp,
+                                   hdr->b_crypt_hdr.b_mac);
+                       }
                }
        }
 
@@ -5759,7 +5787,7 @@ arc_read_done(zio_t *zio)
         */
        int callback_cnt = 0;
        for (acb = callback_list; acb != NULL; acb = acb->acb_next) {
-               if (!acb->acb_done)
+               if (!acb->acb_done || acb->acb_nobuf)
                        continue;
 
                callback_cnt++;
@@ -5944,6 +5972,19 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
         */
        fstrans_cookie_t cookie = spl_fstrans_mark();
 top:
+       /*
+        * Verify the block pointer contents are reasonable.  This should
+        * always be the case since the blkptr is protected by a checksum.
+        * However, if there is damage it's desirable to detect this early
+        * and treat it as a checksum error.  This allows an alternate blkptr
+        * to be tried when one is available (e.g. ditto blocks).
+        */
+       if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER,
+           BLK_VERIFY_LOG)) {
+               rc = SET_ERROR(ECKSUM);
+               goto out;
+       }
+
        if (!embedded_bp) {
                /*
                 * Embedded BP's have no DVA and require no I/O to "read".
@@ -5992,6 +6033,23 @@ top:
                                    ARC_FLAG_PREDICTIVE_PREFETCH);
                        }
 
+                       /*
+                        * If there are multiple threads reading the same block
+                        * and that block is not yet in the ARC, then only one
+                        * thread will do the physical I/O and all other
+                        * threads will wait until that I/O completes.
+                        * Synchronous reads use the b_cv whereas nowait reads
+                        * register a callback. Both are signalled/called in
+                        * arc_read_done.
+                        *
+                        * Errors of the physical I/O may need to be propagated
+                        * to the pio. For synchronous reads, we simply restart
+                        * this function and it will reassess.  Nowait reads
+                        * attach the acb_zio_dummy zio to pio and
+                        * arc_read_done propagates the physical I/O's io_error
+                        * to acb_zio_dummy, and thereby to pio.
+                        */
+
                        if (*arc_flags & ARC_FLAG_WAIT) {
                                cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
                                mutex_exit(hash_lock);
@@ -5999,7 +6057,7 @@ top:
                        }
                        ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
 
-                       if (done && !no_buf) {
+                       if (done) {
                                arc_callback_t *acb = NULL;
 
                                acb = kmem_zalloc(sizeof (arc_callback_t),
@@ -6009,6 +6067,7 @@ top:
                                acb->acb_compressed = compressed_read;
                                acb->acb_encrypted = encrypted_read;
                                acb->acb_noauth = noauth_read;
+                               acb->acb_nobuf = no_buf;
                                acb->acb_zb = *zb;
                                if (pio != NULL)
                                        acb->acb_zio_dummy = zio_null(pio,
@@ -6018,8 +6077,6 @@ top:
                                acb->acb_zio_head = head_zio;
                                acb->acb_next = hdr->b_l1hdr.b_acb;
                                hdr->b_l1hdr.b_acb = acb;
-                               mutex_exit(hash_lock);
-                               goto out;
                        }
                        mutex_exit(hash_lock);
                        goto out;
@@ -6121,17 +6178,6 @@ top:
                        goto out;
                }
 
-               /*
-                * Gracefully handle a damaged logical block size as a
-                * checksum error.
-                */
-               if (lsize > spa_maxblocksize(spa)) {
-                       rc = SET_ERROR(ECKSUM);
-                       if (hash_lock != NULL)
-                               mutex_exit(hash_lock);
-                       goto out;
-               }
-
                if (hdr == NULL) {
                        /*
                         * This block is not in the cache or it has
@@ -6140,8 +6186,7 @@ top:
                        arc_buf_hdr_t *exists = NULL;
                        arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
                        hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-                           BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type,
-                           encrypted_read);
+                           BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type);
 
                        if (!embedded_bp) {
                                hdr->b_dva = *BP_IDENTITY(bp);
@@ -6155,6 +6200,7 @@ top:
                                arc_hdr_destroy(hdr);
                                goto top; /* restart the IO request */
                        }
+                       alloc_flags |= ARC_HDR_DO_ADAPT;
                } else {
                        /*
                         * This block is in the ghost cache or encrypted data
@@ -6202,9 +6248,9 @@ top:
                         */
                        arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state);
                        arc_access(hdr, hash_lock);
-                       arc_hdr_alloc_abd(hdr, alloc_flags);
                }
 
+               arc_hdr_alloc_abd(hdr, alloc_flags);
                if (encrypted_read) {
                        ASSERT(HDR_HAS_RABD(hdr));
                        size = HDR_GET_PSIZE(hdr);
@@ -6304,6 +6350,7 @@ top:
                        ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
                            demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data,
                            metadata, misses);
+                       zfs_racct_read(size, 1);
                }
 
                /* Check if the spa even has l2 configured */
@@ -6329,7 +6376,7 @@ top:
 
                                DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
                                ARCSTAT_BUMP(arcstat_l2_hits);
-                               atomic_inc_32(&hdr->b_l2hdr.b_hits);
+                               hdr->b_l2hdr.b_hits++;
 
                                cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
                                    KM_SLEEP);
@@ -6549,7 +6596,7 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
  * a new hdr for the buffer.
  */
 void
-arc_release(arc_buf_t *buf, void *tag)
+arc_release(arc_buf_t *buf, const void *tag)
 {
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
@@ -6573,7 +6620,6 @@ arc_release(arc_buf_t *buf, void *tag)
                ASSERT(!HDR_IO_IN_PROGRESS(hdr));
                ASSERT(!HDR_IN_HASH_TABLE(hdr));
                ASSERT(!HDR_HAS_L2HDR(hdr));
-               ASSERT(HDR_EMPTY(hdr));
 
                ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
                ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
@@ -6725,7 +6771,7 @@ arc_release(arc_buf_t *buf, void *tag)
                 * buffer which will be freed in arc_write().
                 */
                nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
-                   compress, hdr->b_complevel, type, HDR_HAS_RABD(hdr));
+                   compress, hdr->b_complevel, type);
                ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
                ASSERT0(nhdr->b_l1hdr.b_bufcnt);
                ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -6736,11 +6782,6 @@ arc_release(arc_buf_t *buf, void *tag)
                nhdr->b_l1hdr.b_bufcnt = 1;
                if (ARC_BUF_ENCRYPTED(buf))
                        nhdr->b_crypt_hdr.b_ebufcnt = 1;
-               nhdr->b_l1hdr.b_mru_hits = 0;
-               nhdr->b_l1hdr.b_mru_ghost_hits = 0;
-               nhdr->b_l1hdr.b_mfu_hits = 0;
-               nhdr->b_l1hdr.b_mfu_ghost_hits = 0;
-               nhdr->b_l1hdr.b_l2_hits = 0;
                (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
                buf->b_hdr = nhdr;
 
@@ -6757,7 +6798,6 @@ arc_release(arc_buf_t *buf, void *tag)
                hdr->b_l1hdr.b_mru_ghost_hits = 0;
                hdr->b_l1hdr.b_mfu_hits = 0;
                hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-               hdr->b_l1hdr.b_l2_hits = 0;
                arc_change_state(arc_anon, hdr, hash_lock);
                hdr->b_l1hdr.b_arc_access = 0;
 
@@ -6911,9 +6951,11 @@ arc_write_ready(zio_t *zio)
        if (ARC_BUF_ENCRYPTED(buf)) {
                ASSERT3U(psize, >, 0);
                ASSERT(ARC_BUF_COMPRESSED(buf));
-               arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+               arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA |
+                   ARC_HDR_USE_RESERVE);
                abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
-       } else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
+       } else if (!abd_size_alloc_linear(arc_buf_size(buf)) ||
+           !arc_can_share(hdr, buf)) {
                /*
                 * Ideally, we would always copy the io_abd into b_pabd, but the
                 * user may have disabled compressed ARC, thus we must check the
@@ -6921,17 +6963,19 @@ arc_write_ready(zio_t *zio)
                 */
                if (BP_IS_ENCRYPTED(bp)) {
                        ASSERT3U(psize, >, 0);
-                       arc_hdr_alloc_abd(hdr,
-                           ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+                           ARC_HDR_ALLOC_RDATA | ARC_HDR_USE_RESERVE);
                        abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
                } else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
                    !ARC_BUF_COMPRESSED(buf)) {
                        ASSERT3U(psize, >, 0);
-                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+                           ARC_HDR_USE_RESERVE);
                        abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
                } else {
                        ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
-                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+                       arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+                           ARC_HDR_USE_RESERVE);
                        abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
                            arc_buf_size(buf));
                }
@@ -7049,7 +7093,7 @@ arc_write_done(zio_t *zio)
        ASSERT(!zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
        callback->awcb_done(zio, buf, callback->awcb_private);
 
-       abd_put(zio->io_abd);
+       abd_free(zio->io_abd);
        kmem_free(callback, sizeof (arc_write_callback_t));
 }
 
@@ -7083,11 +7127,11 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
                localprop.zp_byteorder =
                    (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
                    ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
-               bcopy(hdr->b_crypt_hdr.b_salt, localprop.zp_salt,
+               memcpy(localprop.zp_salt, hdr->b_crypt_hdr.b_salt,
                    ZIO_DATA_SALT_LEN);
-               bcopy(hdr->b_crypt_hdr.b_iv, localprop.zp_iv,
+               memcpy(localprop.zp_iv, hdr->b_crypt_hdr.b_iv,
                    ZIO_DATA_IV_LEN);
-               bcopy(hdr->b_crypt_hdr.b_mac, localprop.zp_mac,
+               memcpy(localprop.zp_mac, hdr->b_crypt_hdr.b_mac,
                    ZIO_DATA_MAC_LEN);
                if (DMU_OT_IS_ENCRYPTED(localprop.zp_type)) {
                        localprop.zp_nopwrite = B_FALSE;
@@ -7224,8 +7268,11 @@ arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg)
                    zfs_refcount_count(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
                dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
                    "anon_data=%lluK tempreserve=%lluK rarc_c=%lluK\n",
-                   arc_tempreserve >> 10, meta_esize >> 10,
-                   data_esize >> 10, reserve >> 10, rarc_c >> 10);
+                   (u_longlong_t)arc_tempreserve >> 10,
+                   (u_longlong_t)meta_esize >> 10,
+                   (u_longlong_t)data_esize >> 10,
+                   (u_longlong_t)reserve >> 10,
+                   (u_longlong_t)rarc_c >> 10);
 #endif
                DMU_TX_STAT_BUMP(dmu_tx_dirty_throttle);
                return (SET_ERROR(ERESTART));
@@ -7250,55 +7297,219 @@ arc_kstat_update(kstat_t *ksp, int rw)
 {
        arc_stats_t *as = ksp->ks_data;
 
-       if (rw == KSTAT_WRITE) {
+       if (rw == KSTAT_WRITE)
                return (SET_ERROR(EACCES));
-       } else {
-               arc_kstat_update_state(arc_anon,
-                   &as->arcstat_anon_size,
-                   &as->arcstat_anon_evictable_data,
-                   &as->arcstat_anon_evictable_metadata);
-               arc_kstat_update_state(arc_mru,
-                   &as->arcstat_mru_size,
-                   &as->arcstat_mru_evictable_data,
-                   &as->arcstat_mru_evictable_metadata);
-               arc_kstat_update_state(arc_mru_ghost,
-                   &as->arcstat_mru_ghost_size,
-                   &as->arcstat_mru_ghost_evictable_data,
-                   &as->arcstat_mru_ghost_evictable_metadata);
-               arc_kstat_update_state(arc_mfu,
-                   &as->arcstat_mfu_size,
-                   &as->arcstat_mfu_evictable_data,
-                   &as->arcstat_mfu_evictable_metadata);
-               arc_kstat_update_state(arc_mfu_ghost,
-                   &as->arcstat_mfu_ghost_size,
-                   &as->arcstat_mfu_ghost_evictable_data,
-                   &as->arcstat_mfu_ghost_evictable_metadata);
-
-               ARCSTAT(arcstat_size) = aggsum_value(&arc_size);
-               ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used);
-               ARCSTAT(arcstat_data_size) = aggsum_value(&astat_data_size);
-               ARCSTAT(arcstat_metadata_size) =
-                   aggsum_value(&astat_metadata_size);
-               ARCSTAT(arcstat_hdr_size) = aggsum_value(&astat_hdr_size);
-               ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size);
-               ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size);
+
+       as->arcstat_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hits);
+       as->arcstat_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_misses);
+       as->arcstat_demand_data_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_data_hits);
+       as->arcstat_demand_data_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_data_misses);
+       as->arcstat_demand_metadata_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_metadata_hits);
+       as->arcstat_demand_metadata_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_metadata_misses);
+       as->arcstat_prefetch_data_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_data_hits);
+       as->arcstat_prefetch_data_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_data_misses);
+       as->arcstat_prefetch_metadata_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_metadata_hits);
+       as->arcstat_prefetch_metadata_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prefetch_metadata_misses);
+       as->arcstat_mru_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mru_hits);
+       as->arcstat_mru_ghost_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mru_ghost_hits);
+       as->arcstat_mfu_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mfu_hits);
+       as->arcstat_mfu_ghost_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mfu_ghost_hits);
+       as->arcstat_deleted.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_deleted);
+       as->arcstat_mutex_miss.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_mutex_miss);
+       as->arcstat_access_skip.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_access_skip);
+       as->arcstat_evict_skip.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_skip);
+       as->arcstat_evict_not_enough.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_not_enough);
+       as->arcstat_evict_l2_cached.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_cached);
+       as->arcstat_evict_l2_eligible.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_eligible);
+       as->arcstat_evict_l2_eligible_mfu.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mfu);
+       as->arcstat_evict_l2_eligible_mru.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mru);
+       as->arcstat_evict_l2_ineligible.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_ineligible);
+       as->arcstat_evict_l2_skip.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_evict_l2_skip);
+       as->arcstat_hash_collisions.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hash_collisions);
+       as->arcstat_hash_chains.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hash_chains);
+       as->arcstat_size.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_size);
+       as->arcstat_compressed_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_compressed_size);
+       as->arcstat_uncompressed_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_uncompressed_size);
+       as->arcstat_overhead_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_overhead_size);
+       as->arcstat_hdr_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_hdr_size);
+       as->arcstat_data_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_data_size);
+       as->arcstat_metadata_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_metadata_size);
+       as->arcstat_dbuf_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_dbuf_size);
 #if defined(COMPAT_FREEBSD11)
-               ARCSTAT(arcstat_other_size) = aggsum_value(&astat_bonus_size) +
-                   aggsum_value(&astat_dnode_size) +
-                   aggsum_value(&astat_dbuf_size);
+       as->arcstat_other_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_bonus_size) +
+           aggsum_value(&arc_sums.arcstat_dnode_size) +
+           wmsum_value(&arc_sums.arcstat_dbuf_size);
 #endif
-               ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
-               ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size);
-               ARCSTAT(arcstat_abd_chunk_waste_size) =
-                   aggsum_value(&astat_abd_chunk_waste_size);
 
-               as->arcstat_memory_all_bytes.value.ui64 =
-                   arc_all_memory();
-               as->arcstat_memory_free_bytes.value.ui64 =
-                   arc_free_memory();
-               as->arcstat_memory_available_bytes.value.i64 =
-                   arc_available_memory();
-       }
+       arc_kstat_update_state(arc_anon,
+           &as->arcstat_anon_size,
+           &as->arcstat_anon_evictable_data,
+           &as->arcstat_anon_evictable_metadata);
+       arc_kstat_update_state(arc_mru,
+           &as->arcstat_mru_size,
+           &as->arcstat_mru_evictable_data,
+           &as->arcstat_mru_evictable_metadata);
+       arc_kstat_update_state(arc_mru_ghost,
+           &as->arcstat_mru_ghost_size,
+           &as->arcstat_mru_ghost_evictable_data,
+           &as->arcstat_mru_ghost_evictable_metadata);
+       arc_kstat_update_state(arc_mfu,
+           &as->arcstat_mfu_size,
+           &as->arcstat_mfu_evictable_data,
+           &as->arcstat_mfu_evictable_metadata);
+       arc_kstat_update_state(arc_mfu_ghost,
+           &as->arcstat_mfu_ghost_size,
+           &as->arcstat_mfu_ghost_evictable_data,
+           &as->arcstat_mfu_ghost_evictable_metadata);
+
+       as->arcstat_dnode_size.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_dnode_size);
+       as->arcstat_bonus_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_bonus_size);
+       as->arcstat_l2_hits.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_hits);
+       as->arcstat_l2_misses.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_misses);
+       as->arcstat_l2_prefetch_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_prefetch_asize);
+       as->arcstat_l2_mru_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_mru_asize);
+       as->arcstat_l2_mfu_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_mfu_asize);
+       as->arcstat_l2_bufc_data_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_bufc_data_asize);
+       as->arcstat_l2_bufc_metadata_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_bufc_metadata_asize);
+       as->arcstat_l2_feeds.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_feeds);
+       as->arcstat_l2_rw_clash.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rw_clash);
+       as->arcstat_l2_read_bytes.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_read_bytes);
+       as->arcstat_l2_write_bytes.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_write_bytes);
+       as->arcstat_l2_writes_sent.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_sent);
+       as->arcstat_l2_writes_done.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_done);
+       as->arcstat_l2_writes_error.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_error);
+       as->arcstat_l2_writes_lock_retry.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_writes_lock_retry);
+       as->arcstat_l2_evict_lock_retry.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_evict_lock_retry);
+       as->arcstat_l2_evict_reading.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_evict_reading);
+       as->arcstat_l2_evict_l1cached.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_evict_l1cached);
+       as->arcstat_l2_free_on_write.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_free_on_write);
+       as->arcstat_l2_abort_lowmem.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_abort_lowmem);
+       as->arcstat_l2_cksum_bad.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_cksum_bad);
+       as->arcstat_l2_io_error.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_io_error);
+       as->arcstat_l2_lsize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_lsize);
+       as->arcstat_l2_psize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_psize);
+       as->arcstat_l2_hdr_size.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_l2_hdr_size);
+       as->arcstat_l2_log_blk_writes.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_log_blk_writes);
+       as->arcstat_l2_log_blk_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_log_blk_asize);
+       as->arcstat_l2_log_blk_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_log_blk_count);
+       as->arcstat_l2_rebuild_success.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_success);
+       as->arcstat_l2_rebuild_abort_unsupported.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
+       as->arcstat_l2_rebuild_abort_io_errors.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
+       as->arcstat_l2_rebuild_abort_dh_errors.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
+       as->arcstat_l2_rebuild_abort_cksum_lb_errors.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
+       as->arcstat_l2_rebuild_abort_lowmem.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
+       as->arcstat_l2_rebuild_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_size);
+       as->arcstat_l2_rebuild_asize.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_asize);
+       as->arcstat_l2_rebuild_bufs.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs);
+       as->arcstat_l2_rebuild_bufs_precached.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs_precached);
+       as->arcstat_l2_rebuild_log_blks.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_l2_rebuild_log_blks);
+       as->arcstat_memory_throttle_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_memory_throttle_count);
+       as->arcstat_memory_direct_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_memory_direct_count);
+       as->arcstat_memory_indirect_count.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_memory_indirect_count);
+
+       as->arcstat_memory_all_bytes.value.ui64 =
+           arc_all_memory();
+       as->arcstat_memory_free_bytes.value.ui64 =
+           arc_free_memory();
+       as->arcstat_memory_available_bytes.value.i64 =
+           arc_available_memory();
+
+       as->arcstat_prune.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_prune);
+       as->arcstat_meta_used.value.ui64 =
+           aggsum_value(&arc_sums.arcstat_meta_used);
+       as->arcstat_async_upgrade_sync.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_async_upgrade_sync);
+       as->arcstat_demand_hit_predictive_prefetch.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_hit_predictive_prefetch);
+       as->arcstat_demand_hit_prescient_prefetch.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_demand_hit_prescient_prefetch);
+       as->arcstat_raw_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_raw_size);
+       as->arcstat_cached_only_in_progress.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_cached_only_in_progress);
+       as->arcstat_abd_chunk_waste_size.value.ui64 =
+           wmsum_value(&arc_sums.arcstat_abd_chunk_waste_size);
 
        return (0);
 }
@@ -7332,17 +7543,24 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj)
         * Also, the low order bits of the hash value are thought to be
         * distributed evenly. Otherwise, in the case that the multilist
         * has a power of two number of sublists, each sublists' usage
-        * would not be evenly distributed.
+        * would not be evenly distributed. In this context full 64bit
+        * division would be a waste of time, so limit it to 32 bits.
         */
-       return (buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) %
+       return ((unsigned int)buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) %
            multilist_get_num_sublists(ml));
 }
 
+static unsigned int
+arc_state_l2c_multilist_index_func(multilist_t *ml, void *obj)
+{
+       panic("Header %p insert into arc_l2c_only %p", obj, ml);
+}
+
 #define        WARN_IF_TUNING_IGNORED(tuning, value, do_warn) do {     \
        if ((do_warn) && (tuning) && ((tuning) != (value))) {   \
                cmn_err(CE_WARN,                                \
                    "ignoring tunable %s (using %llu instead)", \
-                   (#tuning), (value));                        \
+                   (#tuning), (u_longlong_t)(value));  \
        }                                                       \
 } while (0)
 
@@ -7370,7 +7588,7 @@ arc_tuning_update(boolean_t verbose)
 
        /* Valid range: 64M - <all physical memory> */
        if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) &&
-           (zfs_arc_max >= 64 << 20) && (zfs_arc_max < allmem) &&
+           (zfs_arc_max >= MIN_ARC_MAX) && (zfs_arc_max < allmem) &&
            (zfs_arc_max > arc_c_min)) {
                arc_c_max = zfs_arc_max;
                arc_c = MIN(arc_c, arc_c_max);
@@ -7438,68 +7656,63 @@ arc_tuning_update(boolean_t verbose)
        }
 
        /* Valid range: 0 - 100 */
-       if ((zfs_arc_lotsfree_percent >= 0) &&
-           (zfs_arc_lotsfree_percent <= 100))
+       if (zfs_arc_lotsfree_percent <= 100)
                arc_lotsfree_percent = zfs_arc_lotsfree_percent;
        WARN_IF_TUNING_IGNORED(zfs_arc_lotsfree_percent, arc_lotsfree_percent,
            verbose);
 
        /* Valid range: 0 - <all physical memory> */
        if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
-               arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), allmem);
+               arc_sys_free = MIN(zfs_arc_sys_free, allmem);
        WARN_IF_TUNING_IGNORED(zfs_arc_sys_free, arc_sys_free, verbose);
 }
 
+static void
+arc_state_multilist_init(multilist_t *ml,
+    multilist_sublist_index_func_t *index_func, int *maxcountp)
+{
+       multilist_create(ml, sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), index_func);
+       *maxcountp = MAX(*maxcountp, multilist_get_num_sublists(ml));
+}
+
 static void
 arc_state_init(void)
 {
-       arc_anon = &ARC_anon;
-       arc_mru = &ARC_mru;
-       arc_mru_ghost = &ARC_mru_ghost;
-       arc_mfu = &ARC_mfu;
-       arc_mfu_ghost = &ARC_mfu_ghost;
-       arc_l2c_only = &ARC_l2c_only;
-
-       arc_mru->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mru->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mru_ghost->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_l2c_only->arcs_list[ARC_BUFC_METADATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
-       arc_l2c_only->arcs_list[ARC_BUFC_DATA] =
-           multilist_create(sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-           arc_state_multilist_index_func);
+       int num_sublists = 0;
+
+       arc_state_multilist_init(&arc_mru->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mru->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
+           arc_state_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
+           arc_state_multilist_index_func, &num_sublists);
+
+       /*
+        * L2 headers should never be on the L2 state list since they don't
+        * have L1 headers allocated.  Special index function asserts that.
+        */
+       arc_state_multilist_init(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
+           arc_state_l2c_multilist_index_func, &num_sublists);
+       arc_state_multilist_init(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
+           arc_state_l2c_multilist_index_func, &num_sublists);
+
+       /*
+        * Keep track of the number of markers needed to reclaim buffers from
+        * any ARC state.  The markers will be pre-allocated so as to minimize
+        * the number of memory allocations performed by the eviction thread.
+        */
+       arc_state_evict_marker_count = num_sublists;
 
        zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
        zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
@@ -7521,16 +7734,93 @@ arc_state_init(void)
        zfs_refcount_create(&arc_mfu_ghost->arcs_size);
        zfs_refcount_create(&arc_l2c_only->arcs_size);
 
-       aggsum_init(&arc_meta_used, 0);
-       aggsum_init(&arc_size, 0);
-       aggsum_init(&astat_data_size, 0);
-       aggsum_init(&astat_metadata_size, 0);
-       aggsum_init(&astat_hdr_size, 0);
-       aggsum_init(&astat_l2_hdr_size, 0);
-       aggsum_init(&astat_bonus_size, 0);
-       aggsum_init(&astat_dnode_size, 0);
-       aggsum_init(&astat_dbuf_size, 0);
-       aggsum_init(&astat_abd_chunk_waste_size, 0);
+       wmsum_init(&arc_sums.arcstat_hits, 0);
+       wmsum_init(&arc_sums.arcstat_misses, 0);
+       wmsum_init(&arc_sums.arcstat_demand_data_hits, 0);
+       wmsum_init(&arc_sums.arcstat_demand_data_misses, 0);
+       wmsum_init(&arc_sums.arcstat_demand_metadata_hits, 0);
+       wmsum_init(&arc_sums.arcstat_demand_metadata_misses, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_data_hits, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_data_misses, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_metadata_hits, 0);
+       wmsum_init(&arc_sums.arcstat_prefetch_metadata_misses, 0);
+       wmsum_init(&arc_sums.arcstat_mru_hits, 0);
+       wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0);
+       wmsum_init(&arc_sums.arcstat_mfu_hits, 0);
+       wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0);
+       wmsum_init(&arc_sums.arcstat_deleted, 0);
+       wmsum_init(&arc_sums.arcstat_mutex_miss, 0);
+       wmsum_init(&arc_sums.arcstat_access_skip, 0);
+       wmsum_init(&arc_sums.arcstat_evict_skip, 0);
+       wmsum_init(&arc_sums.arcstat_evict_not_enough, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_cached, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_eligible, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mfu, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mru, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_ineligible, 0);
+       wmsum_init(&arc_sums.arcstat_evict_l2_skip, 0);
+       wmsum_init(&arc_sums.arcstat_hash_collisions, 0);
+       wmsum_init(&arc_sums.arcstat_hash_chains, 0);
+       aggsum_init(&arc_sums.arcstat_size, 0);
+       wmsum_init(&arc_sums.arcstat_compressed_size, 0);
+       wmsum_init(&arc_sums.arcstat_uncompressed_size, 0);
+       wmsum_init(&arc_sums.arcstat_overhead_size, 0);
+       wmsum_init(&arc_sums.arcstat_hdr_size, 0);
+       wmsum_init(&arc_sums.arcstat_data_size, 0);
+       wmsum_init(&arc_sums.arcstat_metadata_size, 0);
+       wmsum_init(&arc_sums.arcstat_dbuf_size, 0);
+       aggsum_init(&arc_sums.arcstat_dnode_size, 0);
+       wmsum_init(&arc_sums.arcstat_bonus_size, 0);
+       wmsum_init(&arc_sums.arcstat_l2_hits, 0);
+       wmsum_init(&arc_sums.arcstat_l2_misses, 0);
+       wmsum_init(&arc_sums.arcstat_l2_prefetch_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_mru_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_mfu_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_bufc_data_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_bufc_metadata_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_feeds, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rw_clash, 0);
+       wmsum_init(&arc_sums.arcstat_l2_read_bytes, 0);
+       wmsum_init(&arc_sums.arcstat_l2_write_bytes, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_sent, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_done, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_error, 0);
+       wmsum_init(&arc_sums.arcstat_l2_writes_lock_retry, 0);
+       wmsum_init(&arc_sums.arcstat_l2_evict_lock_retry, 0);
+       wmsum_init(&arc_sums.arcstat_l2_evict_reading, 0);
+       wmsum_init(&arc_sums.arcstat_l2_evict_l1cached, 0);
+       wmsum_init(&arc_sums.arcstat_l2_free_on_write, 0);
+       wmsum_init(&arc_sums.arcstat_l2_abort_lowmem, 0);
+       wmsum_init(&arc_sums.arcstat_l2_cksum_bad, 0);
+       wmsum_init(&arc_sums.arcstat_l2_io_error, 0);
+       wmsum_init(&arc_sums.arcstat_l2_lsize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_psize, 0);
+       aggsum_init(&arc_sums.arcstat_l2_hdr_size, 0);
+       wmsum_init(&arc_sums.arcstat_l2_log_blk_writes, 0);
+       wmsum_init(&arc_sums.arcstat_l2_log_blk_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_log_blk_count, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_success, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_unsupported, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_io_errors, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_dh_errors, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_lowmem, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_size, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_asize, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs_precached, 0);
+       wmsum_init(&arc_sums.arcstat_l2_rebuild_log_blks, 0);
+       wmsum_init(&arc_sums.arcstat_memory_throttle_count, 0);
+       wmsum_init(&arc_sums.arcstat_memory_direct_count, 0);
+       wmsum_init(&arc_sums.arcstat_memory_indirect_count, 0);
+       wmsum_init(&arc_sums.arcstat_prune, 0);
+       aggsum_init(&arc_sums.arcstat_meta_used, 0);
+       wmsum_init(&arc_sums.arcstat_async_upgrade_sync, 0);
+       wmsum_init(&arc_sums.arcstat_demand_hit_predictive_prefetch, 0);
+       wmsum_init(&arc_sums.arcstat_demand_hit_prescient_prefetch, 0);
+       wmsum_init(&arc_sums.arcstat_raw_size, 0);
+       wmsum_init(&arc_sums.arcstat_cached_only_in_progress, 0);
+       wmsum_init(&arc_sums.arcstat_abd_chunk_waste_size, 0);
 
        arc_anon->arcs_state = ARC_STATE_ANON;
        arc_mru->arcs_state = ARC_STATE_MRU;
@@ -7563,27 +7853,104 @@ arc_state_fini(void)
        zfs_refcount_destroy(&arc_mfu_ghost->arcs_size);
        zfs_refcount_destroy(&arc_l2c_only->arcs_size);
 
-       multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
-       multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
-       multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
-
-       aggsum_fini(&arc_meta_used);
-       aggsum_fini(&arc_size);
-       aggsum_fini(&astat_data_size);
-       aggsum_fini(&astat_metadata_size);
-       aggsum_fini(&astat_hdr_size);
-       aggsum_fini(&astat_l2_hdr_size);
-       aggsum_fini(&astat_bonus_size);
-       aggsum_fini(&astat_dnode_size);
-       aggsum_fini(&astat_dbuf_size);
-       aggsum_fini(&astat_abd_chunk_waste_size);
+       multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+       multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
+       multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
+
+       wmsum_fini(&arc_sums.arcstat_hits);
+       wmsum_fini(&arc_sums.arcstat_misses);
+       wmsum_fini(&arc_sums.arcstat_demand_data_hits);
+       wmsum_fini(&arc_sums.arcstat_demand_data_misses);
+       wmsum_fini(&arc_sums.arcstat_demand_metadata_hits);
+       wmsum_fini(&arc_sums.arcstat_demand_metadata_misses);
+       wmsum_fini(&arc_sums.arcstat_prefetch_data_hits);
+       wmsum_fini(&arc_sums.arcstat_prefetch_data_misses);
+       wmsum_fini(&arc_sums.arcstat_prefetch_metadata_hits);
+       wmsum_fini(&arc_sums.arcstat_prefetch_metadata_misses);
+       wmsum_fini(&arc_sums.arcstat_mru_hits);
+       wmsum_fini(&arc_sums.arcstat_mru_ghost_hits);
+       wmsum_fini(&arc_sums.arcstat_mfu_hits);
+       wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits);
+       wmsum_fini(&arc_sums.arcstat_deleted);
+       wmsum_fini(&arc_sums.arcstat_mutex_miss);
+       wmsum_fini(&arc_sums.arcstat_access_skip);
+       wmsum_fini(&arc_sums.arcstat_evict_skip);
+       wmsum_fini(&arc_sums.arcstat_evict_not_enough);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_cached);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_eligible);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mfu);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mru);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_ineligible);
+       wmsum_fini(&arc_sums.arcstat_evict_l2_skip);
+       wmsum_fini(&arc_sums.arcstat_hash_collisions);
+       wmsum_fini(&arc_sums.arcstat_hash_chains);
+       aggsum_fini(&arc_sums.arcstat_size);
+       wmsum_fini(&arc_sums.arcstat_compressed_size);
+       wmsum_fini(&arc_sums.arcstat_uncompressed_size);
+       wmsum_fini(&arc_sums.arcstat_overhead_size);
+       wmsum_fini(&arc_sums.arcstat_hdr_size);
+       wmsum_fini(&arc_sums.arcstat_data_size);
+       wmsum_fini(&arc_sums.arcstat_metadata_size);
+       wmsum_fini(&arc_sums.arcstat_dbuf_size);
+       aggsum_fini(&arc_sums.arcstat_dnode_size);
+       wmsum_fini(&arc_sums.arcstat_bonus_size);
+       wmsum_fini(&arc_sums.arcstat_l2_hits);
+       wmsum_fini(&arc_sums.arcstat_l2_misses);
+       wmsum_fini(&arc_sums.arcstat_l2_prefetch_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_mru_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_mfu_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_bufc_data_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_bufc_metadata_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_feeds);
+       wmsum_fini(&arc_sums.arcstat_l2_rw_clash);
+       wmsum_fini(&arc_sums.arcstat_l2_read_bytes);
+       wmsum_fini(&arc_sums.arcstat_l2_write_bytes);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_sent);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_done);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_error);
+       wmsum_fini(&arc_sums.arcstat_l2_writes_lock_retry);
+       wmsum_fini(&arc_sums.arcstat_l2_evict_lock_retry);
+       wmsum_fini(&arc_sums.arcstat_l2_evict_reading);
+       wmsum_fini(&arc_sums.arcstat_l2_evict_l1cached);
+       wmsum_fini(&arc_sums.arcstat_l2_free_on_write);
+       wmsum_fini(&arc_sums.arcstat_l2_abort_lowmem);
+       wmsum_fini(&arc_sums.arcstat_l2_cksum_bad);
+       wmsum_fini(&arc_sums.arcstat_l2_io_error);
+       wmsum_fini(&arc_sums.arcstat_l2_lsize);
+       wmsum_fini(&arc_sums.arcstat_l2_psize);
+       aggsum_fini(&arc_sums.arcstat_l2_hdr_size);
+       wmsum_fini(&arc_sums.arcstat_l2_log_blk_writes);
+       wmsum_fini(&arc_sums.arcstat_l2_log_blk_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_log_blk_count);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_success);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_size);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_asize);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs_precached);
+       wmsum_fini(&arc_sums.arcstat_l2_rebuild_log_blks);
+       wmsum_fini(&arc_sums.arcstat_memory_throttle_count);
+       wmsum_fini(&arc_sums.arcstat_memory_direct_count);
+       wmsum_fini(&arc_sums.arcstat_memory_indirect_count);
+       wmsum_fini(&arc_sums.arcstat_prune);
+       aggsum_fini(&arc_sums.arcstat_meta_used);
+       wmsum_fini(&arc_sums.arcstat_async_upgrade_sync);
+       wmsum_fini(&arc_sums.arcstat_demand_hit_predictive_prefetch);
+       wmsum_fini(&arc_sums.arcstat_demand_hit_prescient_prefetch);
+       wmsum_fini(&arc_sums.arcstat_raw_size);
+       wmsum_fini(&arc_sums.arcstat_cached_only_in_progress);
+       wmsum_fini(&arc_sums.arcstat_abd_chunk_waste_size);
 }
 
 uint64_t
@@ -7592,6 +7959,15 @@ arc_target_bytes(void)
        return (arc_c);
 }
 
+void
+arc_set_limits(uint64_t allmem)
+{
+       /* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
+       arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
+
+       /* How to set default max varies by platform. */
+       arc_c_max = arc_default_max(arc_c_min, allmem);
+}
 void
 arc_init(void)
 {
@@ -7607,13 +7983,25 @@ arc_init(void)
        arc_lowmem_init();
 #endif
 
-       /* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
-       arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
-
-       /* How to set default max varies by platform. */
-       arc_c_max = arc_default_max(arc_c_min, allmem);
+       arc_set_limits(allmem);
 
-#ifndef _KERNEL
+#ifdef _KERNEL
+       /*
+        * If zfs_arc_max is non-zero at init, meaning it was set in the kernel
+        * environment before the module was loaded, don't block setting the
+        * maximum because it is less than arc_c_min, instead, reset arc_c_min
+        * to a lower value.
+        * zfs_arc_min will be handled by arc_tuning_update().
+        */
+       if (zfs_arc_max != 0 && zfs_arc_max >= MIN_ARC_MAX &&
+           zfs_arc_max < allmem) {
+               arc_c_max = zfs_arc_max;
+               if (arc_c_min >= arc_c_max) {
+                       arc_c_min = MAX(zfs_arc_max / 2,
+                           2ULL << SPA_MAXBLOCKSHIFT);
+               }
+       }
+#else
        /*
         * In userland, there's only the memory pressure that we artificially
         * create (see arc_available_memory()).  Don't let arc_c get too
@@ -7628,8 +8016,6 @@ arc_init(void)
 
        /* Set min to 1/2 of arc_c_min */
        arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
-       /* Initialize maximum observed usage to zero */
-       arc_meta_max = 0;
        /*
         * Set arc_meta_limit to a percent of arc_c_max with a floor of
         * arc_meta_min, and a ceiling of arc_c_max.
@@ -7648,6 +8034,8 @@ arc_init(void)
        if (arc_c < arc_c_min)
                arc_c = arc_c_min;
 
+       arc_register_hotplug();
+
        arc_state_init();
 
        buf_init();
@@ -7656,8 +8044,8 @@ arc_init(void)
            offsetof(arc_prune_t, p_node));
        mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
 
-       arc_prune_taskq = taskq_create("arc_prune", boot_ncpus, defclsyspri,
-           boot_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+       arc_prune_taskq = taskq_create("arc_prune", zfs_arc_prune_task_threads,
+           defclsyspri, 100, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 
        arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
            sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
@@ -7668,10 +8056,12 @@ arc_init(void)
                kstat_install(arc_ksp);
        }
 
-       arc_evict_zthr = zthr_create_timer("arc_evict",
-           arc_evict_cb_check, arc_evict_cb, NULL, SEC2NSEC(1));
+       arc_state_evict_markers =
+           arc_state_alloc_markers(arc_state_evict_marker_count);
+       arc_evict_zthr = zthr_create("arc_evict",
+           arc_evict_cb_check, arc_evict_cb, NULL, defclsyspri);
        arc_reap_zthr = zthr_create_timer("arc_reap",
-           arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1));
+           arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1), minclsyspri);
 
        arc_warm = B_FALSE;
 
@@ -7699,6 +8089,18 @@ arc_init(void)
                zfs_dirty_data_max = MIN(zfs_dirty_data_max,
                    zfs_dirty_data_max_max);
        }
+
+       if (zfs_wrlog_data_max == 0) {
+
+               /*
+                * dp_wrlog_total is reduced for each txg at the end of
+                * spa_sync(). However, dp_dirty_total is reduced every time
+                * a block is written out. Thus under normal operation,
+                * dp_wrlog_total could grow 2 times as big as
+                * zfs_dirty_data_max.
+                */
+               zfs_wrlog_data_max = zfs_dirty_data_max * 2;
+       }
 }
 
 void
@@ -7735,6 +8137,8 @@ arc_fini(void)
 
        (void) zthr_cancel(arc_evict_zthr);
        (void) zthr_cancel(arc_reap_zthr);
+       arc_state_free_markers(arc_state_evict_markers,
+           arc_state_evict_marker_count);
 
        mutex_destroy(&arc_evict_lock);
        list_destroy(&arc_evict_waiters);
@@ -7754,6 +8158,8 @@ arc_fini(void)
        buf_fini();
        arc_state_fini();
 
+       arc_unregister_hotplug();
+
        /*
         * We destroy the zthrs after all the ARC state has been
         * torn down to avoid the case of them receiving any
@@ -8060,8 +8466,8 @@ l2arc_write_size(l2arc_dev_t *dev)
                    "plus the overhead of log blocks (persistent L2ARC, "
                    "%llu bytes) exceeds the size of the cache device "
                    "(guid %llu), resetting them to the default (%d)",
-                   l2arc_log_blk_overhead(size, dev),
-                   dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
+                   (u_longlong_t)l2arc_log_blk_overhead(size, dev),
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
                size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
 
                if (arc_warm == B_FALSE)
@@ -8336,21 +8742,22 @@ top:
                                 * block pointer in the header.
                                 */
                                if (i == 0) {
-                                       bzero(l2dhdr, dev->l2ad_dev_hdr_asize);
+                                       memset(l2dhdr, 0,
+                                           dev->l2ad_dev_hdr_asize);
                                } else {
-                                       bzero(&l2dhdr->dh_start_lbps[i],
+                                       memset(&l2dhdr->dh_start_lbps[i], 0,
                                            sizeof (l2arc_log_blkptr_t));
                                }
                                break;
                        }
-                       bcopy(lb_ptr_buf->lb_ptr, &l2dhdr->dh_start_lbps[i],
+                       memcpy(&l2dhdr->dh_start_lbps[i], lb_ptr_buf->lb_ptr,
                            sizeof (l2arc_log_blkptr_t));
                        lb_ptr_buf = list_next(&dev->l2ad_lbptr_list,
                            lb_ptr_buf);
                }
        }
 
-       atomic_inc_64(&l2arc_writes_done);
+       ARCSTAT_BUMP(arcstat_l2_writes_done);
        list_remove(buflist, head);
        ASSERT(!HDR_HAS_L1HDR(head));
        kmem_cache_free(hdr_l2only_cache, head);
@@ -8392,7 +8799,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
         */
        if (BP_IS_ENCRYPTED(bp)) {
                abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
-                   B_TRUE);
+                   ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
 
                zio_crypt_decode_params_bp(bp, salt, iv);
                zio_crypt_decode_mac_bp(bp, mac);
@@ -8429,7 +8836,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
        if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
            !HDR_COMPRESSION_ENABLED(hdr)) {
                abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
-                   B_TRUE);
+                   ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
                void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
 
                ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -8619,16 +9026,16 @@ l2arc_sublist_lock(int list_num)
 
        switch (list_num) {
        case 0:
-               ml = arc_mfu->arcs_list[ARC_BUFC_METADATA];
+               ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
                break;
        case 1:
-               ml = arc_mru->arcs_list[ARC_BUFC_METADATA];
+               ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
                break;
        case 2:
-               ml = arc_mfu->arcs_list[ARC_BUFC_DATA];
+               ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
                break;
        case 3:
-               ml = arc_mru->arcs_list[ARC_BUFC_DATA];
+               ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
                break;
        default:
                return (NULL);
@@ -8950,26 +9357,37 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
        }
 
        if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
-               cabd = abd_alloc_for_io(asize, ismd);
-               tmp = abd_borrow_buf(cabd, asize);
+               /*
+                * In some cases, we can wind up with size > asize, so
+                * we need to opt for the larger allocation option here.
+                *
+                * (We also need abd_return_buf_copy in all cases because
+                * it's an ASSERT() to modify the buffer before returning it
+                * with arc_return_buf(), and all the compressors
+                * write things before deciding to fail compression in nearly
+                * every case.)
+                */
+               cabd = abd_alloc_for_io(size, ismd);
+               tmp = abd_borrow_buf(cabd, size);
 
                psize = zio_compress_data(compress, to_write, tmp, size,
                    hdr->b_complevel);
 
-               if (psize >= size) {
-                       abd_return_buf(cabd, tmp, asize);
+               if (psize >= asize) {
+                       psize = HDR_GET_PSIZE(hdr);
+                       abd_return_buf_copy(cabd, tmp, size);
                        HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
                        to_write = cabd;
-                       abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
-                       if (size != asize)
-                               abd_zero_off(to_write, size, asize - size);
+                       abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
+                       if (psize != asize)
+                               abd_zero_off(to_write, psize, asize - psize);
                        goto encrypt;
                }
                ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
                if (psize < asize)
-                       bzero((char *)tmp + psize, asize - psize);
+                       memset((char *)tmp + psize, 0, asize - psize);
                psize = HDR_GET_PSIZE(hdr);
-               abd_return_buf_copy(cabd, tmp, asize);
+               abd_return_buf_copy(cabd, tmp, size);
                to_write = cabd;
        }
 
@@ -9002,7 +9420,7 @@ encrypt:
                        abd_zero_off(eabd, psize, asize - psize);
 
                /* assert that the MAC we got here matches the one we saved */
-               ASSERT0(bcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN));
+               ASSERT0(memcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN));
                spa_keystore_dsl_key_rele(spa, dck, FTAG);
 
                if (to_write == cabd)
@@ -9035,7 +9453,7 @@ l2arc_blk_fetch_done(zio_t *zio)
 
        cb = zio->io_private;
        if (cb->l2rcb_abd != NULL)
-               abd_put(cb->l2rcb_abd);
+               abd_free(cb->l2rcb_abd);
        kmem_free(cb, sizeof (l2arc_read_callback_t));
 }
 
@@ -9060,6 +9478,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
        l2arc_write_callback_t  *cb = NULL;
        zio_t                   *pio, *wzio;
        uint64_t                guid = spa_load_guid(spa);
+       l2arc_dev_hdr_phys_t    *l2dhdr = dev->l2ad_dev_hdr;
 
        ASSERT3P(dev->l2ad_vdev, !=, NULL);
 
@@ -9072,17 +9491,17 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
        /*
         * Copy buffers for L2ARC writing.
         */
-       for (int try = 0; try < L2ARC_FEED_TYPES; try++) {
+       for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
                /*
-                * If try == 1 or 3, we cache MRU metadata and data
+                * If pass == 1 or 3, we cache MRU metadata and data
                 * respectively.
                 */
                if (l2arc_mfuonly) {
-                       if (try == 1 || try == 3)
+                       if (pass == 1 || pass == 3)
                                continue;
                }
 
-               multilist_sublist_t *mls = l2arc_sublist_lock(try);
+               multilist_sublist_t *mls = l2arc_sublist_lock(pass);
                uint64_t passed_sz = 0;
 
                VERIFY3P(mls, !=, NULL);
@@ -9133,12 +9552,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
                                continue;
                        }
 
-                       /*
-                        * We rely on the L1 portion of the header below, so
-                        * it's invalid for this header to have been evicted out
-                        * of the ghost cache, prior to being written out. The
-                        * ARC_FLAG_L2_WRITING bit ensures this won't happen.
-                        */
                        ASSERT(HDR_HAS_L1HDR(hdr));
 
                        ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
@@ -9162,12 +9575,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
                         * ARC_FLAG_L2_WRITING bit ensures this won't happen.
                         */
                        arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING);
-                       ASSERT(HDR_HAS_L1HDR(hdr));
-
-                       ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
-                       ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
-                           HDR_HAS_RABD(hdr));
-                       ASSERT3U(arc_hdr_size(hdr), >, 0);
 
                        /*
                         * If this header has b_rabd, we can use this since it
@@ -9291,7 +9698,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
                 * Although we did not write any buffers l2ad_evict may
                 * have advanced.
                 */
-               l2arc_dev_hdr_update(dev);
+               if (dev->l2ad_evict != l2dhdr->dh_evict)
+                       l2arc_dev_hdr_update(dev);
 
                return (0);
        }
@@ -9320,7 +9728,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 static boolean_t
 l2arc_hdr_limit_reached(void)
 {
-       int64_t s = aggsum_upper_bound(&astat_l2_hdr_size);
+       int64_t s = aggsum_upper_bound(&arc_sums.arcstat_l2_hdr_size);
 
        return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) ||
            (s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100));
@@ -9330,10 +9738,10 @@ l2arc_hdr_limit_reached(void)
  * This thread feeds the L2ARC at regular intervals.  This is the beating
  * heart of the L2ARC.
  */
-/* ARGSUSED */
-static void
+static  __attribute__((noreturn)) void
 l2arc_feed_thread(void *unused)
 {
+       (void) unused;
        callb_cpr_t cpr;
        l2arc_dev_t *dev;
        spa_t *spa;
@@ -9453,6 +9861,80 @@ l2arc_vdev_get(vdev_t *vd)
        return (dev);
 }
 
+static void
+l2arc_rebuild_dev(l2arc_dev_t *dev, boolean_t reopen)
+{
+       l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
+       uint64_t l2dhdr_asize = dev->l2ad_dev_hdr_asize;
+       spa_t *spa = dev->l2ad_spa;
+
+       /*
+        * The L2ARC has to hold at least the payload of one log block for
+        * them to be restored (persistent L2ARC). The payload of a log block
+        * depends on the amount of its log entries. We always write log blocks
+        * with 1022 entries. How many of them are committed or restored depends
+        * on the size of the L2ARC device. Thus the maximum payload of
+        * one log block is 1022 * SPA_MAXBLOCKSIZE = 16GB. If the L2ARC device
+        * is less than that, we reduce the amount of committed and restored
+        * log entries per block so as to enable persistence.
+        */
+       if (dev->l2ad_end < l2arc_rebuild_blocks_min_l2size) {
+               dev->l2ad_log_entries = 0;
+       } else {
+               dev->l2ad_log_entries = MIN((dev->l2ad_end -
+                   dev->l2ad_start) >> SPA_MAXBLOCKSHIFT,
+                   L2ARC_LOG_BLK_MAX_ENTRIES);
+       }
+
+       /*
+        * Read the device header, if an error is returned do not rebuild L2ARC.
+        */
+       if (l2arc_dev_hdr_read(dev) == 0 && dev->l2ad_log_entries > 0) {
+               /*
+                * If we are onlining a cache device (vdev_reopen) that was
+                * still present (l2arc_vdev_present()) and rebuild is enabled,
+                * we should evict all ARC buffers and pointers to log blocks
+                * and reclaim their space before restoring its contents to
+                * L2ARC.
+                */
+               if (reopen) {
+                       if (!l2arc_rebuild_enabled) {
+                               return;
+                       } else {
+                               l2arc_evict(dev, 0, B_TRUE);
+                               /* start a new log block */
+                               dev->l2ad_log_ent_idx = 0;
+                               dev->l2ad_log_blk_payload_asize = 0;
+                               dev->l2ad_log_blk_payload_start = 0;
+                       }
+               }
+               /*
+                * Just mark the device as pending for a rebuild. We won't
+                * be starting a rebuild in line here as it would block pool
+                * import. Instead spa_load_impl will hand that off to an
+                * async task which will call l2arc_spa_rebuild_start.
+                */
+               dev->l2ad_rebuild = B_TRUE;
+       } else if (spa_writeable(spa)) {
+               /*
+                * In this case TRIM the whole device if l2arc_trim_ahead > 0,
+                * otherwise create a new header. We zero out the memory holding
+                * the header to reset dh_start_lbps. If we TRIM the whole
+                * device the new header will be written by
+                * vdev_trim_l2arc_thread() at the end of the TRIM to update the
+                * trim_state in the header too. When reading the header, if
+                * trim_state is not VDEV_TRIM_COMPLETE and l2arc_trim_ahead > 0
+                * we opt to TRIM the whole device again.
+                */
+               if (l2arc_trim_ahead > 0) {
+                       dev->l2ad_trim_all = B_TRUE;
+               } else {
+                       memset(l2dhdr, 0, l2dhdr_asize);
+                       l2arc_dev_hdr_update(dev);
+               }
+       }
+}
+
 /*
  * Add a vdev for use by the L2ARC.  By this point the spa has already
  * validated the vdev and opened it.
@@ -9505,6 +9987,15 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
        zfs_refcount_create(&adddev->l2ad_lb_asize);
        zfs_refcount_create(&adddev->l2ad_lb_count);
 
+       /*
+        * Decide if dev is eligible for L2ARC rebuild or whole device
+        * trimming. This has to happen before the device is added in the
+        * cache device list and l2arc_dev_mtx is released. Otherwise
+        * l2arc_feed_thread() might already start writing on the
+        * device.
+        */
+       l2arc_rebuild_dev(adddev, B_FALSE);
+
        /*
         * Add device to global list
         */
@@ -9512,92 +10003,36 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
        list_insert_head(l2arc_dev_list, adddev);
        atomic_inc_64(&l2arc_ndev);
        mutex_exit(&l2arc_dev_mtx);
-
-       /*
-        * Decide if vdev is eligible for L2ARC rebuild
-        */
-       l2arc_rebuild_vdev(adddev->l2ad_vdev, B_FALSE);
 }
 
+/*
+ * Decide if a vdev is eligible for L2ARC rebuild, called from vdev_reopen()
+ * in case of onlining a cache device.
+ */
 void
 l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
 {
        l2arc_dev_t             *dev = NULL;
-       l2arc_dev_hdr_phys_t    *l2dhdr;
-       uint64_t                l2dhdr_asize;
-       spa_t                   *spa;
 
        dev = l2arc_vdev_get(vd);
        ASSERT3P(dev, !=, NULL);
-       spa = dev->l2ad_spa;
-       l2dhdr = dev->l2ad_dev_hdr;
-       l2dhdr_asize = dev->l2ad_dev_hdr_asize;
-
-       /*
-        * The L2ARC has to hold at least the payload of one log block for
-        * them to be restored (persistent L2ARC). The payload of a log block
-        * depends on the amount of its log entries. We always write log blocks
-        * with 1022 entries. How many of them are committed or restored depends
-        * on the size of the L2ARC device. Thus the maximum payload of
-        * one log block is 1022 * SPA_MAXBLOCKSIZE = 16GB. If the L2ARC device
-        * is less than that, we reduce the amount of committed and restored
-        * log entries per block so as to enable persistence.
-        */
-       if (dev->l2ad_end < l2arc_rebuild_blocks_min_l2size) {
-               dev->l2ad_log_entries = 0;
-       } else {
-               dev->l2ad_log_entries = MIN((dev->l2ad_end -
-                   dev->l2ad_start) >> SPA_MAXBLOCKSHIFT,
-                   L2ARC_LOG_BLK_MAX_ENTRIES);
-       }
 
        /*
-        * Read the device header, if an error is returned do not rebuild L2ARC.
+        * In contrast to l2arc_add_vdev() we do not have to worry about
+        * l2arc_feed_thread() invalidating previous content when onlining a
+        * cache device. The device parameters (l2ad*) are not cleared when
+        * offlining the device and writing new buffers will not invalidate
+        * all previous content. In worst case only buffers that have not had
+        * their log block written to the device will be lost.
+        * When onlining the cache device (ie offline->online without exporting
+        * the pool in between) this happens:
+        * vdev_reopen() -> vdev_open() -> l2arc_rebuild_vdev()
+        *                      |                       |
+        *              vdev_is_dead() = B_FALSE        l2ad_rebuild = B_TRUE
+        * During the time where vdev_is_dead = B_FALSE and until l2ad_rebuild
+        * is set to B_TRUE we might write additional buffers to the device.
         */
-       if (l2arc_dev_hdr_read(dev) == 0 && dev->l2ad_log_entries > 0) {
-               /*
-                * If we are onlining a cache device (vdev_reopen) that was
-                * still present (l2arc_vdev_present()) and rebuild is enabled,
-                * we should evict all ARC buffers and pointers to log blocks
-                * and reclaim their space before restoring its contents to
-                * L2ARC.
-                */
-               if (reopen) {
-                       if (!l2arc_rebuild_enabled) {
-                               return;
-                       } else {
-                               l2arc_evict(dev, 0, B_TRUE);
-                               /* start a new log block */
-                               dev->l2ad_log_ent_idx = 0;
-                               dev->l2ad_log_blk_payload_asize = 0;
-                               dev->l2ad_log_blk_payload_start = 0;
-                       }
-               }
-               /*
-                * Just mark the device as pending for a rebuild. We won't
-                * be starting a rebuild in line here as it would block pool
-                * import. Instead spa_load_impl will hand that off to an
-                * async task which will call l2arc_spa_rebuild_start.
-                */
-               dev->l2ad_rebuild = B_TRUE;
-       } else if (spa_writeable(spa)) {
-               /*
-                * In this case TRIM the whole device if l2arc_trim_ahead > 0,
-                * otherwise create a new header. We zero out the memory holding
-                * the header to reset dh_start_lbps. If we TRIM the whole
-                * device the new header will be written by
-                * vdev_trim_l2arc_thread() at the end of the TRIM to update the
-                * trim_state in the header too. When reading the header, if
-                * trim_state is not VDEV_TRIM_COMPLETE and l2arc_trim_ahead > 0
-                * we opt to TRIM the whole device again.
-                */
-               if (l2arc_trim_ahead > 0) {
-                       dev->l2ad_trim_all = B_TRUE;
-               } else {
-                       bzero(l2dhdr, l2dhdr_asize);
-                       l2arc_dev_hdr_update(dev);
-               }
-       }
+       l2arc_rebuild_dev(dev, reopen);
 }
 
 /*
@@ -9654,8 +10089,6 @@ l2arc_init(void)
 {
        l2arc_thread_exit = 0;
        l2arc_ndev = 0;
-       l2arc_writes_sent = 0;
-       l2arc_writes_done = 0;
 
        mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
        cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
@@ -9744,7 +10177,7 @@ l2arc_spa_rebuild_start(spa_t *spa)
 /*
  * Main entry point for L2ARC rebuilding.
  */
-static void
+static __attribute__((noreturn)) void
 l2arc_dev_rebuild_thread(void *arg)
 {
        l2arc_dev_t *dev = arg;
@@ -9817,7 +10250,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
                goto out;
 
        /* Prepare the rebuild process */
-       bcopy(l2dhdr->dh_start_lbps, lbps, sizeof (lbps));
+       memcpy(lbps, l2dhdr->dh_start_lbps, sizeof (lbps));
 
        /* Start the rebuild process */
        for (;;) {
@@ -9863,7 +10296,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
                lb_ptr_buf = kmem_zalloc(sizeof (l2arc_lb_ptr_buf_t), KM_SLEEP);
                lb_ptr_buf->lb_ptr = kmem_zalloc(sizeof (l2arc_log_blkptr_t),
                    KM_SLEEP);
-               bcopy(&lbps[0], lb_ptr_buf->lb_ptr,
+               memcpy(lb_ptr_buf->lb_ptr, &lbps[0],
                    sizeof (l2arc_log_blkptr_t));
                mutex_enter(&dev->l2ad_mtx);
                list_insert_tail(&dev->l2ad_lbptr_list, lb_ptr_buf);
@@ -9901,7 +10334,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
                    !dev->l2ad_first)
                        goto out;
 
-               cond_resched();
+               kpreempt(KPREEMPT_SYNC);
                for (;;) {
                        mutex_enter(&l2arc_rebuild_thr_lock);
                        if (dev->l2ad_rebuild_cancel) {
@@ -9961,7 +10394,7 @@ out:
                 */
                spa_history_log_internal(spa, "L2ARC rebuild", NULL,
                    "no valid log blocks");
-               bzero(l2dhdr, dev->l2ad_dev_hdr_asize);
+               memset(l2dhdr, 0, dev->l2ad_dev_hdr_asize);
                l2arc_dev_hdr_update(dev);
        } else if (err == ECANCELED) {
                /*
@@ -9969,7 +10402,7 @@ out:
                 * log as the pool may be in the process of being removed.
                 */
                zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks",
-                   zfs_refcount_count(&dev->l2ad_lb_count));
+                   (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
        } else if (err != 0) {
                spa_history_log_internal(spa, "L2ARC rebuild", NULL,
                    "aborted, restored %llu blocks",
@@ -10007,12 +10440,13 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
            ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
            ZIO_FLAG_SPECULATIVE, B_FALSE));
 
-       abd_put(abd);
+       abd_free(abd);
 
        if (err != 0) {
                ARCSTAT_BUMP(arcstat_l2_rebuild_abort_dh_errors);
                zfs_dbgmsg("L2ARC IO error (%d) while reading device header, "
-                   "vdev guid: %llu", err, dev->l2ad_vdev->vdev_guid);
+                   "vdev guid: %llu", err,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid);
                return (err);
        }
 
@@ -10109,8 +10543,9 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
        if ((err = zio_wait(this_io)) != 0) {
                ARCSTAT_BUMP(arcstat_l2_rebuild_abort_io_errors);
                zfs_dbgmsg("L2ARC IO error (%d) while reading log block, "
-                   "offset: %llu, vdev guid: %llu", err, this_lbp->lbp_daddr,
-                   dev->l2ad_vdev->vdev_guid);
+                   "offset: %llu, vdev guid: %llu", err,
+                   (u_longlong_t)this_lbp->lbp_daddr,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid);
                goto cleanup;
        }
 
@@ -10124,8 +10559,10 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
                ARCSTAT_BUMP(arcstat_l2_rebuild_abort_cksum_lb_errors);
                zfs_dbgmsg("L2ARC log block cksum failed, offset: %llu, "
                    "vdev guid: %llu, l2ad_hand: %llu, l2ad_evict: %llu",
-                   this_lbp->lbp_daddr, dev->l2ad_vdev->vdev_guid,
-                   dev->l2ad_hand, dev->l2ad_evict);
+                   (u_longlong_t)this_lbp->lbp_daddr,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid,
+                   (u_longlong_t)dev->l2ad_hand,
+                   (u_longlong_t)dev->l2ad_evict);
                err = SET_ERROR(ECKSUM);
                goto cleanup;
        }
@@ -10375,11 +10812,12 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
            VDEV_LABEL_START_SIZE, l2dhdr_asize, abd, ZIO_CHECKSUM_LABEL, NULL,
            NULL, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE));
 
-       abd_put(abd);
+       abd_free(abd);
 
        if (err != 0) {
                zfs_dbgmsg("L2ARC IO error (%d) while writing device header, "
-                   "vdev guid: %llu", err, dev->l2ad_vdev->vdev_guid);
+                   "vdev guid: %llu", err,
+                   (u_longlong_t)dev->l2ad_vdev->vdev_guid);
        }
 }
 
@@ -10438,7 +10876,6 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
            dev->l2ad_log_blk_payload_asize;
        l2dhdr->dh_start_lbps[0].lbp_payload_start =
            dev->l2ad_log_blk_payload_start;
-       _NOTE(CONSTCOND)
        L2BLK_SET_LSIZE(
            (&l2dhdr->dh_start_lbps[0])->lbp_prop, sizeof (*lb));
        L2BLK_SET_PSIZE(
@@ -10448,13 +10885,13 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
            ZIO_CHECKSUM_FLETCHER_4);
        if (asize < sizeof (*lb)) {
                /* compression succeeded */
-               bzero(tmpbuf + psize, asize - psize);
+               memset(tmpbuf + psize, 0, asize - psize);
                L2BLK_SET_COMPRESS(
                    (&l2dhdr->dh_start_lbps[0])->lbp_prop,
                    ZIO_COMPRESS_LZ4);
        } else {
                /* compression failed */
-               bcopy(lb, tmpbuf, sizeof (*lb));
+               memcpy(tmpbuf, lb, sizeof (*lb));
                L2BLK_SET_COMPRESS(
                    (&l2dhdr->dh_start_lbps[0])->lbp_prop,
                    ZIO_COMPRESS_OFF);
@@ -10464,7 +10901,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
        fletcher_4_native(tmpbuf, asize, NULL,
            &l2dhdr->dh_start_lbps[0].lbp_cksum);
 
-       abd_put(abd_buf->abd);
+       abd_free(abd_buf->abd);
 
        /* perform the write itself */
        abd_buf->abd = abd_get_from_buf(tmpbuf, sizeof (*lb));
@@ -10480,7 +10917,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
         * Include the committed log block's pointer  in the list of pointers
         * to log blocks present in the L2ARC device.
         */
-       bcopy(&l2dhdr->dh_start_lbps[0], lb_ptr_buf->lb_ptr,
+       memcpy(lb_ptr_buf->lb_ptr, &l2dhdr->dh_start_lbps[0],
            sizeof (l2arc_log_blkptr_t));
        mutex_enter(&dev->l2ad_mtx);
        list_insert_head(&dev->l2ad_lbptr_list, lb_ptr_buf);
@@ -10569,7 +11006,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
        ASSERT(HDR_HAS_L2HDR(hdr));
 
        le = &lb->lb_entries[index];
-       bzero(le, sizeof (*le));
+       memset(le, 0, sizeof (*le));
        le->le_dva = hdr->b_dva;
        le->le_birth = hdr->b_birth;
        le->le_daddr = hdr->b_l2hdr.b_daddr;
@@ -10638,58 +11075,57 @@ EXPORT_SYMBOL(arc_getbuf_func);
 EXPORT_SYMBOL(arc_add_prune_callback);
 EXPORT_SYMBOL(arc_remove_prune_callback);
 
-/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Min arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min,
+       param_get_ulong, ZMOD_RW, "Minimum ARC size in bytes");
 
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Max arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max,
+       param_get_ulong, ZMOD_RW, "Maximum ARC size in bytes");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Metadata limit for arc size");
+       param_get_ulong, ZMOD_RW, "Metadata limit for ARC size in bytes");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent,
-       param_set_arc_long, param_get_long, ZMOD_RW,
-       "Percent of arc size for arc meta limit");
+    param_set_arc_long, param_get_ulong, ZMOD_RW,
+       "Percent of ARC size for ARC meta limit");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Min arc metadata");
+       param_get_ulong, ZMOD_RW, "Minimum ARC metadata size in bytes");
 
 ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW,
        "Meta objects to scan for prune");
 
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_adjust_restarts, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_adjust_restarts, UINT, ZMOD_RW,
        "Limit number of restarts in arc_evict_meta");
 
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_strategy, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_strategy, UINT, ZMOD_RW,
        "Meta reclaim strategy");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int,
-       param_get_int, ZMOD_RW, "Seconds before growing arc size");
+       param_get_uint, ZMOD_RW, "Seconds before growing ARC size");
 
 ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, p_dampener_disable, INT, ZMOD_RW,
        "Disable arc_p adapt dampener");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int,
-       param_get_int, ZMOD_RW, "log2(fraction of arc to reclaim)");
+       param_get_uint, ZMOD_RW, "log2(fraction of ARC to reclaim)");
 
 ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW,
-       "Percent of pagecache to reclaim arc to");
+       "Percent of pagecache to reclaim ARC to");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, p_min_shift, param_set_arc_int,
-       param_get_int, ZMOD_RW, "arc_c shift to calc min/max arc_p");
+       param_get_uint, ZMOD_RW, "arc_c shift to calc min/max arc_p");
 
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, INT, ZMOD_RD,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, UINT, ZMOD_RD,
        "Target average block size");
 
 ZFS_MODULE_PARAM(zfs, zfs_, compressed_arc_enabled, INT, ZMOD_RW,
-       "Disable compressed arc buffers");
+       "Disable compressed ARC buffers");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prefetch_ms, param_set_arc_int,
-       param_get_int, ZMOD_RW, "Min life of prefetch block in ms");
+       param_get_uint, ZMOD_RW, "Min life of prefetch block in ms");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prescient_prefetch_ms,
-       param_set_arc_int, param_get_int, ZMOD_RW,
+    param_set_arc_int, param_get_uint, ZMOD_RW,
        "Min life of prescient prefetched block in ms");
 
 ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, ULONG, ZMOD_RW,
@@ -10722,7 +11158,7 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_again, INT, ZMOD_RW,
 ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, norw, INT, ZMOD_RW,
        "No reads during writes");
 
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, UINT, ZMOD_RW,
        "Percent of ARC size allowed for L2ARC-only headers");
 
 ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW,
@@ -10734,25 +11170,30 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
 ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
        "Cache only MFU data from ARC into L2ARC");
 
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
+       "Exclude dbufs on special vdevs from being cached to L2ARC if set.");
+
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
-       param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
+       param_get_uint, ZMOD_RW, "System free memory I/O throttle in bytes");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long,
-       param_get_long, ZMOD_RW, "System free memory target size in bytes");
+       param_get_ulong, ZMOD_RW, "System free memory target size in bytes");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long,
-       param_get_long, ZMOD_RW, "Minimum bytes of dnodes in arc");
+       param_get_ulong, ZMOD_RW, "Minimum bytes of dnodes in ARC");
 
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent,
-       param_set_arc_long, param_get_long, ZMOD_RW,
+    param_set_arc_long, param_get_ulong, ZMOD_RW,
        "Percent of ARC meta buffers for dnodes");
 
 ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW,
        "Percentage of excess dnodes to try to unpin");
 
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW,
        "When full, ARC allocation waits for eviction of this % of alloc size");
 
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, UINT, ZMOD_RW,
        "The number of headers to evict per sublist before moving to the next");
-/* END CSTYLED */
+
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, prune_task_threads, INT, ZMOD_RW,
+       "Number of arc_prune threads");