Extend import_progress kstat with a notes field

[mirror_zfs.git] / module / zfs / spa.c
diff --git a/module/zfs/spa.c b/module/zfs/spa.c

index 825e0f1cccabdc0251fa81517e9c54e903abc875..2ca5e7bac1a4ea5478590368a56fbe02c9aa7b27 100644 (file)
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -6,7 +6,7 @@
   * You may not use this file except in compliance with the License.
   *
   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
   * See the License for the specific language governing permissions
   * and limitations under the License.
   *
@@ -33,6 +33,7 @@
   * Copyright 2017 Joyent, Inc.
   * Copyright (c) 2017, Intel Corporation.
   * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+ * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
   */
  
  /*
@@ -52,6 +53,7 @@
  #include <sys/dmu_tx.h>
  #include <sys/zap.h>
  #include <sys/zil.h>
+#include <sys/brt.h>
  #include <sys/ddt.h>
  #include <sys/vdev_impl.h>
  #include <sys/vdev_removal.h>
@@ -61,6 +63,7 @@
  #include <sys/vdev_rebuild.h>
  #include <sys/vdev_trim.h>
  #include <sys/vdev_disk.h>
+#include <sys/vdev_raidz.h>
  #include <sys/vdev_draid.h>
  #include <sys/metaslab.h>
  #include <sys/metaslab_impl.h>
@@ -81,7 +84,6 @@
  #include <sys/arc.h>
  #include <sys/callb.h>
  #include <sys/systeminfo.h>
-#include <sys/spa_boot.h>
  #include <sys/zfs_ioctl.h>
  #include <sys/dsl_scan.h>
  #include <sys/zfeature.h>
@@ -98,6 +100,27 @@
  
  #include "zfs_prop.h"
  #include "zfs_comutil.h"
+#include <cityhash.h>
+
+/*
+ * spa_thread() existed on Illumos as a parent thread for the various worker
+ * threads that actually run the pool, as a way to both reference the entire
+ * pool work as a single object, and to share properties like scheduling
+ * options. It has not yet been adapted to Linux or FreeBSD. This define is
+ * used to mark related parts of the code to make things easier for the reader,
+ * and to compile this code out. It can be removed when someone implements it,
+ * moves it to some Illumos-specific place, or removes it entirely.
+ */
+#undef HAVE_SPA_THREAD
+
+/*
+ * The "System Duty Cycle" scheduling class is an Illumos feature to help
+ * prevent CPU-intensive kernel threads from affecting latency on interactive
+ * threads. It doesn't exist on Linux or FreeBSD, so the supporting code is
+ * gated behind a define. On Illumos SDC depends on spa_thread(), but
+ * spa_thread() also has other uses, so this is a separate define.
+ */
+#undef HAVE_SYSDC
  
  /*
   * The interval, in seconds, at which failed configuration cache file writes
@@ -107,16 +130,16 @@ int zfs_ccw_retry_interval = 300;
  
  typedef enum zti_modes {
         ZTI_MODE_FIXED,                 /* value is # of threads (min 1) */
-       ZTI_MODE_BATCH,                 /* cpu-intensive; value is ignored */
         ZTI_MODE_SCALE,                 /* Taskqs scale with CPUs. */
+       ZTI_MODE_SYNC,                  /* sync thread assigned */
         ZTI_MODE_NULL,                  /* don't create a taskq */
         ZTI_NMODES
  } zti_modes_t;
  
  #define        ZTI_P(n, q)     { ZTI_MODE_FIXED, (n), (q) }
  #define        ZTI_PCT(n)      { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
-#define        ZTI_BATCH       { ZTI_MODE_BATCH, 0, 1 }
  #define        ZTI_SCALE       { ZTI_MODE_SCALE, 0, 1 }
+#define        ZTI_SYNC        { ZTI_MODE_SYNC, 0, 1 }
  #define        ZTI_NULL        { ZTI_MODE_NULL, 0, 0 }
  
  #define        ZTI_N(n)        ZTI_P(n, 1)
@@ -137,24 +160,24 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
   * initializing a pool, we use this table to create an appropriately sized
   * taskq. Some operations are low volume and therefore have a small, static
   * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
- * macros. Other operations process a large amount of data; the ZTI_BATCH
+ * macros. Other operations process a large amount of data; the ZTI_SCALE
   * macro causes us to create a taskq oriented for throughput. Some operations
   * are so high frequency and short-lived that the taskq itself can become a
   * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
   * additional degree of parallelism specified by the number of threads per-
   * taskq and the number of taskqs; when dispatching an event in this case, the
- * particular taskq is chosen at random. ZTI_SCALE is similar to ZTI_BATCH,
- * but with number of taskqs also scaling with number of CPUs.
+ * particular taskq is chosen at random. ZTI_SCALE uses a number of taskqs
+ * that scales with the number of CPUs.
   *
   * The different taskq priorities are to handle the different contexts (issue
   * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
   * need to be handled with minimum delay.
   */
-const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
+static const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
         /* ISSUE        ISSUE_HIGH      INTR            INTR_HIGH */
         { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL }, /* NULL */
         { ZTI_N(8),     ZTI_NULL,       ZTI_SCALE,      ZTI_NULL }, /* READ */
-       { ZTI_BATCH,    ZTI_N(5),       ZTI_SCALE,      ZTI_N(5) }, /* WRITE */
+       { ZTI_SYNC,     ZTI_N(5),       ZTI_SCALE,      ZTI_N(5) }, /* WRITE */
         { ZTI_SCALE,    ZTI_NULL,       ZTI_ONE,        ZTI_NULL }, /* FREE */
         { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL }, /* CLAIM */
         { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL }, /* IOCTL */
@@ -164,15 +187,28 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
  static void spa_sync_version(void *arg, dmu_tx_t *tx);
  static void spa_sync_props(void *arg, dmu_tx_t *tx);
  static boolean_t spa_has_active_shared_spare(spa_t *spa);
-static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport);
+static int spa_load_impl(spa_t *spa, spa_import_type_t type,
+    const char **ereport);
  static void spa_vdev_resilver_done(spa_t *spa);
  
-uint_t         zio_taskq_batch_pct = 80;       /* 1 thread per cpu in pset */
-uint_t         zio_taskq_batch_tpq;            /* threads per taskq */
-boolean_t      zio_taskq_sysdc = B_TRUE;       /* use SDC scheduling class */
-uint_t         zio_taskq_basedc = 80;          /* base duty cycle */
+/*
+ * Percentage of all CPUs that can be used by the metaslab preload taskq.
+ */
+static uint_t metaslab_preload_pct = 50;
  
-boolean_t      spa_create_process = B_TRUE;    /* no process ==> no sysdc */
+static uint_t  zio_taskq_batch_pct = 80;         /* 1 thread per cpu in pset */
+static uint_t  zio_taskq_batch_tpq;              /* threads per taskq */
+
+#ifdef HAVE_SYSDC
+static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
+static const uint_t    zio_taskq_basedc = 80;    /* base duty cycle */
+#endif
+
+#ifdef HAVE_SPA_THREAD
+static const boolean_t spa_create_process = B_TRUE; /* no process => no sysdc */
+#endif
+
+static uint_t  zio_taskq_wr_iss_ncpus = 0;
  
  /*
   * Report any spa_load_verify errors found, but do not fail spa_load.
@@ -180,6 +216,12 @@ boolean_t  spa_create_process = B_TRUE;    /* no process ==> no sysdc */
   */
  boolean_t      spa_load_verify_dryrun = B_FALSE;
  
+/*
+ * Allow read spacemaps in case of readonly import (spa_mode == SPA_MODE_READ).
+ * This is used by zdb for spacemaps verification.
+ */
+boolean_t      spa_mode_readable_spacemaps = B_FALSE;
+
  /*
   * This (illegal) pool name is used when temporarily importing a spa_t in order
   * to get the vdev stats associated with the imported devices.
@@ -189,7 +231,7 @@ boolean_t   spa_load_verify_dryrun = B_FALSE;
  /*
   * For debugging purposes: print out vdev tree during pool import.
   */
-int            spa_load_print_vdev_tree = B_FALSE;
+static int             spa_load_print_vdev_tree = B_FALSE;
  
  /*
   * A non-zero value for zfs_max_missing_tvds means that we allow importing
@@ -212,7 +254,7 @@ int         spa_load_print_vdev_tree = B_FALSE;
   * there are also risks of performing an inadvertent rewind as we might be
   * missing all the vdevs with the latest uberblocks.
   */
-unsigned long  zfs_max_missing_tvds = 0;
+uint64_t       zfs_max_missing_tvds = 0;
  
  /*
   * The parameters below are similar to zfs_max_missing_tvds but are only
@@ -238,28 +280,28 @@ uint64_t  zfs_max_missing_tvds_scan = 0;
  /*
   * Debugging aid that pauses spa_sync() towards the end.
   */
-boolean_t      zfs_pause_spa_sync = B_FALSE;
+static const boolean_t zfs_pause_spa_sync = B_FALSE;
  
  /*
   * Variables to indicate the livelist condense zthr func should wait at certain
   * points for the livelist to be removed - used to test condense/destroy races
   */
-int zfs_livelist_condense_zthr_pause = 0;
-int zfs_livelist_condense_sync_pause = 0;
+static int zfs_livelist_condense_zthr_pause = 0;
+static int zfs_livelist_condense_sync_pause = 0;
  
  /*
   * Variables to track whether or not condense cancellation has been
   * triggered in testing.
   */
-int zfs_livelist_condense_sync_cancel = 0;
-int zfs_livelist_condense_zthr_cancel = 0;
+static int zfs_livelist_condense_sync_cancel = 0;
+static int zfs_livelist_condense_zthr_cancel = 0;
  
  /*
   * Variable to track whether or not extra ALLOC blkptrs were added to a
   * livelist entry while it was being condensed (caused by the way we track
   * remapped blkptrs in dbuf_remap_impl)
   */
-int zfs_livelist_condense_new_alloc = 0;
+static int zfs_livelist_condense_new_alloc = 0;
  
  /*
   * ==========================================================================
@@ -271,20 +313,36 @@ int zfs_livelist_condense_new_alloc = 0;
   * Add a (source=src, propname=propval) list to an nvlist.
   */
  static void
-spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
+spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, const char *strval,
      uint64_t intval, zprop_source_t src)
  {
         const char *propname = zpool_prop_to_name(prop);
         nvlist_t *propval;
  
-       VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-       VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
+       propval = fnvlist_alloc();
+       fnvlist_add_uint64(propval, ZPROP_SOURCE, src);
  
         if (strval != NULL)
-               VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
+               fnvlist_add_string(propval, ZPROP_VALUE, strval);
         else
-               VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
+               fnvlist_add_uint64(propval, ZPROP_VALUE, intval);
+
+       fnvlist_add_nvlist(nvl, propname, propval);
+       nvlist_free(propval);
+}
+
+/*
+ * Add a user property (source=src, propname=propval) to an nvlist.
+ */
+static void
+spa_prop_add_user(nvlist_t *nvl, const char *propname, char *strval,
+    zprop_source_t src)
+{
+       nvlist_t *propval;
  
+       VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+       VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
+       VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
         VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
         nvlist_free(propval);
  }
@@ -335,6 +393,12 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
  
                 spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
                     ddt_get_pool_dedup_ratio(spa), src);
+               spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONEUSED, NULL,
+                   brt_get_used(spa), src);
+               spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONESAVED, NULL,
+                   brt_get_saved(spa), src);
+               spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONERATIO, NULL,
+                   brt_get_ratio(spa), src);
  
                 spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
                     rvd->vdev_state, src);
@@ -458,7 +522,8 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
                 zprop_source_t src = ZPROP_SRC_DEFAULT;
                 zpool_prop_t prop;
  
-               if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL)
+               if ((prop = zpool_name_to_prop(za.za_name)) ==
+                   ZPOOL_PROP_INVAL && !zfs_prop_user(za.za_name))
                         continue;
  
                 switch (za.za_integer_length) {
@@ -501,7 +566,13 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
                                 kmem_free(strval, za.za_num_integers);
                                 break;
                         }
-                       spa_prop_add_list(*nvp, prop, strval, 0, src);
+                       if (prop != ZPOOL_PROP_INVAL) {
+                               spa_prop_add_list(*nvp, prop, strval, 0, src);
+                       } else {
+                               src = ZPROP_SRC_LOCAL;
+                               spa_prop_add_user(*nvp, za.za_name, strval,
+                                   src);
+                       }
                         kmem_free(strval, za.za_num_integers);
                         break;
  
@@ -537,42 +608,53 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
         elem = NULL;
         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
                 uint64_t intval;
-               char *strval, *slash, *check, *fname;
+               const char *strval, *slash, *check, *fname;
                 const char *propname = nvpair_name(elem);
                 zpool_prop_t prop = zpool_name_to_prop(propname);
  
                 switch (prop) {
                 case ZPOOL_PROP_INVAL:
-                       if (!zpool_prop_feature(propname)) {
-                               error = SET_ERROR(EINVAL);
-                               break;
-                       }
-
                         /*
                          * Sanitize the input.
                          */
-                       if (nvpair_type(elem) != DATA_TYPE_UINT64) {
-                               error = SET_ERROR(EINVAL);
-                               break;
-                       }
+                       if (zfs_prop_user(propname)) {
+                               if (strlen(propname) >= ZAP_MAXNAMELEN) {
+                                       error = SET_ERROR(ENAMETOOLONG);
+                                       break;
+                               }
  
-                       if (nvpair_value_uint64(elem, &intval) != 0) {
-                               error = SET_ERROR(EINVAL);
-                               break;
-                       }
+                               if (strlen(fnvpair_value_string(elem)) >=
+                                   ZAP_MAXVALUELEN) {
+                                       error = SET_ERROR(E2BIG);
+                                       break;
+                               }
+                       } else if (zpool_prop_feature(propname)) {
+                               if (nvpair_type(elem) != DATA_TYPE_UINT64) {
+                                       error = SET_ERROR(EINVAL);
+                                       break;
+                               }
  
-                       if (intval != 0) {
-                               error = SET_ERROR(EINVAL);
-                               break;
-                       }
+                               if (nvpair_value_uint64(elem, &intval) != 0) {
+                                       error = SET_ERROR(EINVAL);
+                                       break;
+                               }
+
+                               if (intval != 0) {
+                                       error = SET_ERROR(EINVAL);
+                                       break;
+                               }
  
-                       fname = strchr(propname, '@') + 1;
-                       if (zfeature_lookup_name(fname, NULL) != 0) {
+                               fname = strchr(propname, '@') + 1;
+                               if (zfeature_lookup_name(fname, NULL) != 0) {
+                                       error = SET_ERROR(EINVAL);
+                                       break;
+                               }
+
+                               has_feature = B_TRUE;
+                       } else {
                                 error = SET_ERROR(EINVAL);
                                 break;
                         }
-
-                       has_feature = B_TRUE;
                         break;
  
                 case ZPOOL_PROP_VERSION:
@@ -739,7 +821,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
  void
  spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
  {
-       char *cachefile;
+       const char *cachefile;
         spa_config_dirent_t *dp;
  
         if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
@@ -779,8 +861,14 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
                     prop == ZPOOL_PROP_READONLY)
                         continue;
  
+               if (prop == ZPOOL_PROP_INVAL &&
+                   zfs_prop_user(nvpair_name(elem))) {
+                       need_sync = B_TRUE;
+                       break;
+               }
+
                 if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) {
-                       uint64_t ver;
+                       uint64_t ver = 0;
  
                         if (prop == ZPOOL_PROP_VERSION) {
                                 VERIFY(nvpair_value_uint64(elem, &ver) == 0);
@@ -834,7 +922,6 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
         }
  }
  
-/*ARGSUSED*/
  static int
  spa_change_guid_check(void *arg, dmu_tx_t *tx)
  {
@@ -904,7 +991,16 @@ spa_change_guid(spa_t *spa)
             spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED);
  
         if (error == 0) {
-               spa_write_cachefile(spa, B_FALSE, B_TRUE);
+               /*
+                * Clear the kobj flag from all the vdevs to allow
+                * vdev_cache_process_kobj_evt() to post events to all the
+                * vdevs since GUID is updated.
+                */
+               vdev_clear_kobj_evt(spa->spa_root_vdev);
+               for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
+                       vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]);
+
+               spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
                 spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID);
         }
  
@@ -942,8 +1038,8 @@ spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
  {
         ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
  
-       bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
-       bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
+       memcpy(last, &spa->spa_errlist_last, sizeof (avl_tree_t));
+       memcpy(scrub, &spa->spa_errlist_scrub, sizeof (avl_tree_t));
  
         avl_create(&spa->spa_errlist_scrub,
             spa_error_entry_compare, sizeof (spa_error_entry_t),
@@ -962,17 +1058,34 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
         uint_t count = ztip->zti_count;
         spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
         uint_t cpus, flags = TASKQ_DYNAMIC;
-       boolean_t batch = B_FALSE;
  
         switch (mode) {
         case ZTI_MODE_FIXED:
                 ASSERT3U(value, >, 0);
                 break;
  
-       case ZTI_MODE_BATCH:
-               batch = B_TRUE;
+       case ZTI_MODE_SYNC:
+
+               /*
+                * Create one wr_iss taskq for every 'zio_taskq_wr_iss_ncpus',
+                * not to exceed the number of spa allocators.
+                */
+               if (zio_taskq_wr_iss_ncpus == 0) {
+                       count = MAX(boot_ncpus / spa->spa_alloc_count, 1);
+               } else {
+                       count = MAX(1,
+                           boot_ncpus / MAX(1, zio_taskq_wr_iss_ncpus));
+               }
+               count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
+               count = MIN(count, spa->spa_alloc_count);
+
+               /*
+                * zio_taskq_batch_pct is unbounded and may exceed 100%, but no
+                * single taskq may have more threads than 100% of online cpus.
+                */
+               value = (zio_taskq_batch_pct + count / 2) / count;
+               value = MIN(value, 100);
                 flags |= TASKQ_THREADS_CPU_PCT;
-               value = MIN(zio_taskq_batch_pct, 100);
                 break;
  
         case ZTI_MODE_SCALE:
@@ -1019,7 +1132,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
  
         default:
                 panic("unrecognized mode for %s_%s taskq (%u:%u) in "
-                   "spa_activate()",
+                   "spa_taskqs_init()",
                     zio_type_name[t], zio_taskq_types[q], mode, value);
                 break;
         }
@@ -1039,13 +1152,13 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
                         (void) snprintf(name, sizeof (name), "%s_%s",
                             zio_type_name[t], zio_taskq_types[q]);
  
+#ifdef HAVE_SYSDC
                 if (zio_taskq_sysdc && spa->spa_proc != &p0) {
-                       if (batch)
-                               flags |= TASKQ_DC_BATCH;
-
+                       (void) zio_taskq_basedc;
                         tq = taskq_create_sysdc(name, value, 50, INT_MAX,
                             spa->spa_proc, zio_taskq_basedc, flags);
                 } else {
+#endif
                         pri_t pri = maxclsyspri;
                         /*
                          * The write issue taskq can be extremely CPU
@@ -1071,7 +1184,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
                         }
                         tq = taskq_create_proc(name, value, pri, 50,
                             INT_MAX, spa->spa_proc, flags);
+#ifdef HAVE_SYSDC
                 }
+#endif
  
                 tqs->stqs_taskq[i] = tq;
         }
@@ -1099,12 +1214,11 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
  /*
   * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
   * Note that a type may have multiple discrete taskqs to avoid lock contention
- * on the taskq itself. In that case we choose which taskq at random by using
- * the low bits of gethrtime().
+ * on the taskq itself.
   */
-void
-spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
-    task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
+static taskq_t *
+spa_taskq_dispatch_select(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+    zio_t *zio)
  {
         spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
         taskq_t *tq;
@@ -1112,12 +1226,27 @@ spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
         ASSERT3P(tqs->stqs_taskq, !=, NULL);
         ASSERT3U(tqs->stqs_count, !=, 0);
  
+       if ((t == ZIO_TYPE_WRITE) && (q == ZIO_TASKQ_ISSUE) &&
+           (zio != NULL) && (zio->io_wr_iss_tq != NULL)) {
+               /* dispatch to assigned write issue taskq */
+               tq = zio->io_wr_iss_tq;
+               return (tq);
+       }
+
         if (tqs->stqs_count == 1) {
                 tq = tqs->stqs_taskq[0];
         } else {
                 tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
         }
+       return (tq);
+}
  
+void
+spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+    task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent,
+    zio_t *zio)
+{
+       taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, zio);
         taskq_dispatch_ent(tq, func, arg, flags, ent);
  }
  
@@ -1128,20 +1257,8 @@ void
  spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
      task_func_t *func, void *arg, uint_t flags)
  {
-       spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
-       taskq_t *tq;
-       taskqid_t id;
-
-       ASSERT3P(tqs->stqs_taskq, !=, NULL);
-       ASSERT3U(tqs->stqs_count, !=, 0);
-
-       if (tqs->stqs_count == 1) {
-               tq = tqs->stqs_taskq[0];
-       } else {
-               tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
-       }
-
-       id = taskq_dispatch(tq, func, arg, flags);
+       taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, NULL);
+       taskqid_t id = taskq_dispatch(tq, func, arg, flags);
         if (id)
                 taskq_wait_id(tq, id);
  }
@@ -1156,11 +1273,6 @@ spa_create_zio_taskqs(spa_t *spa)
         }
  }
  
-/*
- * Disabled until spa_thread() can be adapted for Linux.
- */
-#undef HAVE_SPA_THREAD
-
  #if defined(_KERNEL) && defined(HAVE_SPA_THREAD)
  static void
  spa_thread(void *arg)
@@ -1201,9 +1313,11 @@ spa_thread(void *arg)
                 pool_unlock();
         }
  
+#ifdef HAVE_SYSDC
         if (zio_taskq_sysdc) {
                 sysdc_thread_enter(curthread, 100, 0);
         }
+#endif
  
         spa->spa_proc = curproc;
         spa->spa_did = curthread->t_did;
@@ -1232,23 +1346,26 @@ spa_thread(void *arg)
  }
  #endif
  
+extern metaslab_ops_t *metaslab_allocator(spa_t *spa);
+
  /*
   * Activate an uninitialized pool.
   */
  static void
  spa_activate(spa_t *spa, spa_mode_t mode)
  {
+       metaslab_ops_t *msp = metaslab_allocator(spa);
         ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
  
         spa->spa_state = POOL_STATE_ACTIVE;
         spa->spa_mode = mode;
+       spa->spa_read_spacemaps = spa_mode_readable_spacemaps;
  
-       spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
-       spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
-       spa->spa_embedded_log_class =
-           metaslab_class_create(spa, zfs_metaslab_ops);
-       spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops);
-       spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops);
+       spa->spa_normal_class = metaslab_class_create(spa, msp);
+       spa->spa_log_class = metaslab_class_create(spa, msp);
+       spa->spa_embedded_log_class = metaslab_class_create(spa, msp);
+       spa->spa_special_class = metaslab_class_create(spa, msp);
+       spa->spa_dedup_class = metaslab_class_create(spa, msp);
  
         /* Try to create a covering process */
         mutex_enter(&spa->spa_proc_lock);
@@ -1306,6 +1423,11 @@ spa_activate(spa_t *spa, spa_mode_t mode)
         avl_create(&spa->spa_errlist_last,
             spa_error_entry_compare, sizeof (spa_error_entry_t),
             offsetof(spa_error_entry_t, se_avl));
+       avl_create(&spa->spa_errlist_healed,
+           spa_error_entry_compare, sizeof (spa_error_entry_t),
+           offsetof(spa_error_entry_t, se_avl));
+
+       spa_activate_os(spa);
  
         spa_keystore_init(&spa->spa_keystore);
  
@@ -1327,6 +1449,13 @@ spa_activate(spa_t *spa, spa_mode_t mode)
         spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
             1, INT_MAX, 0);
  
+       /*
+        * The taskq to preload metaslabs.
+        */
+       spa->spa_metaslab_taskq = taskq_create("z_metaslab",
+           metaslab_preload_pct, maxclsyspri, 1, INT_MAX,
+           TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
+
         /*
          * Taskq dedicated to prefetcher threads: this is used to prevent the
          * pool traverse code from monopolizing the global (and limited)
@@ -1362,6 +1491,11 @@ spa_deactivate(spa_t *spa)
                 spa->spa_zvol_taskq = NULL;
         }
  
+       if (spa->spa_metaslab_taskq) {
+               taskq_destroy(spa->spa_metaslab_taskq);
+               spa->spa_metaslab_taskq = NULL;
+       }
+
         if (spa->spa_prefetch_taskq) {
                 taskq_destroy(spa->spa_prefetch_taskq);
                 spa->spa_prefetch_taskq = NULL;
@@ -1414,6 +1548,7 @@ spa_deactivate(spa_t *spa)
         spa_errlog_drain(spa);
         avl_destroy(&spa->spa_errlist_scrub);
         avl_destroy(&spa->spa_errlist_last);
+       avl_destroy(&spa->spa_errlist_healed);
  
         spa_keystore_fini(&spa->spa_keystore);
  
@@ -1443,6 +1578,9 @@ spa_deactivate(spa_t *spa)
                 thread_join(spa->spa_did);
                 spa->spa_did = 0;
         }
+
+       spa_deactivate_os(spa);
+
  }
  
  /*
@@ -1535,16 +1673,16 @@ spa_unload_log_sm_metadata(spa_t *spa)
  {
         void *cookie = NULL;
         spa_log_sm_t *sls;
+       log_summary_entry_t *e;
+
         while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg,
             &cookie)) != NULL) {
                 VERIFY0(sls->sls_mscount);
                 kmem_free(sls, sizeof (spa_log_sm_t));
         }
  
-       for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
-           e != NULL; e = list_head(&spa->spa_log_summary)) {
+       while ((e = list_remove_head(&spa->spa_log_summary)) != NULL) {
                 VERIFY0(e->lse_mscount);
-               list_remove(&spa->spa_log_summary, e);
                 kmem_free(e, sizeof (log_summary_entry_t));
         }
  
@@ -1572,6 +1710,10 @@ spa_destroy_aux_threads(spa_t *spa)
                 zthr_destroy(spa->spa_livelist_condense_zthr);
                 spa->spa_livelist_condense_zthr = NULL;
         }
+       if (spa->spa_raidz_expand_zthr != NULL) {
+               zthr_destroy(spa->spa_raidz_expand_zthr);
+               spa->spa_raidz_expand_zthr = NULL;
+       }
  }
  
  /*
@@ -1589,25 +1731,33 @@ spa_unload(spa_t *spa)
         spa_wake_waiters(spa);
  
         /*
-        * If the log space map feature is enabled and the pool is getting
-        * exported (but not destroyed), we want to spend some time flushing
-        * as many metaslabs as we can in an attempt to destroy log space
-        * maps and save import time.
+        * If we have set the spa_final_txg, we have already performed the
+        * tasks below in spa_export_common(). We should not redo it here since
+        * we delay the final TXGs beyond what spa_final_txg is set at.
          */
-       if (spa_should_flush_logs_on_unload(spa))
-               spa_unload_log_sm_flush_all(spa);
+       if (spa->spa_final_txg == UINT64_MAX) {
+               /*
+                * If the log space map feature is enabled and the pool is
+                * getting exported (but not destroyed), we want to spend some
+                * time flushing as many metaslabs as we can in an attempt to
+                * destroy log space maps and save import time.
+                */
+               if (spa_should_flush_logs_on_unload(spa))
+                       spa_unload_log_sm_flush_all(spa);
  
-       /*
-        * Stop async tasks.
-        */
-       spa_async_suspend(spa);
+               /*
+                * Stop async tasks.
+                */
+               spa_async_suspend(spa);
  
-       if (spa->spa_root_vdev) {
-               vdev_t *root_vdev = spa->spa_root_vdev;
-               vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE);
-               vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
-               vdev_autotrim_stop_all(spa);
-               vdev_rebuild_stop_all(spa);
+               if (spa->spa_root_vdev) {
+                       vdev_t *root_vdev = spa->spa_root_vdev;
+                       vdev_initialize_stop_all(root_vdev,
+                           VDEV_INITIALIZE_ACTIVE);
+                       vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
+                       vdev_autotrim_stop_all(spa);
+                       vdev_rebuild_stop_all(spa);
+               }
         }
  
         /*
@@ -1622,13 +1772,7 @@ spa_unload(spa_t *spa)
          * This ensures that there is no async metaslab prefetching
          * while we attempt to unload the spa.
          */
-       if (spa->spa_root_vdev != NULL) {
-               for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
-                       vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
-                       if (vc->vdev_mg != NULL)
-                               taskq_wait(vc->vdev_mg->mg_taskq);
-               }
-       }
+       taskq_wait(spa->spa_metaslab_taskq);
  
         if (spa->spa_mmp.mmp_thread)
                 mmp_thread_stop(spa);
@@ -1673,6 +1817,7 @@ spa_unload(spa_t *spa)
         }
  
         ddt_unload(spa);
+       brt_unload(spa);
         spa_unload_log_sm_metadata(spa);
  
         /*
@@ -1680,9 +1825,9 @@ spa_unload(spa_t *spa)
          */
         spa_l2cache_drop(spa);
  
-       for (int i = 0; i < spa->spa_spares.sav_count; i++)
-               vdev_free(spa->spa_spares.sav_vdevs[i]);
         if (spa->spa_spares.sav_vdevs) {
+               for (int i = 0; i < spa->spa_spares.sav_count; i++)
+                       vdev_free(spa->spa_spares.sav_vdevs[i]);
                 kmem_free(spa->spa_spares.sav_vdevs,
                     spa->spa_spares.sav_count * sizeof (void *));
                 spa->spa_spares.sav_vdevs = NULL;
@@ -1693,11 +1838,11 @@ spa_unload(spa_t *spa)
         }
         spa->spa_spares.sav_count = 0;
  
-       for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
-               vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
-               vdev_free(spa->spa_l2cache.sav_vdevs[i]);
-       }
         if (spa->spa_l2cache.sav_vdevs) {
+               for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+                       vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
+                       vdev_free(spa->spa_l2cache.sav_vdevs[i]);
+               }
                 kmem_free(spa->spa_l2cache.sav_vdevs,
                     spa->spa_l2cache.sav_count * sizeof (void *));
                 spa->spa_l2cache.sav_vdevs = NULL;
@@ -1721,6 +1866,8 @@ spa_unload(spa_t *spa)
                 spa->spa_compatibility = NULL;
         }
  
+       spa->spa_raidz_expand = NULL;
+
         spa_config_exit(spa, SCL_ALL, spa);
  }
  
@@ -1755,26 +1902,27 @@ spa_load_spares(spa_t *spa)
         /*
          * First, close and free any existing spare vdevs.
          */
-       for (i = 0; i < spa->spa_spares.sav_count; i++) {
-               vd = spa->spa_spares.sav_vdevs[i];
+       if (spa->spa_spares.sav_vdevs) {
+               for (i = 0; i < spa->spa_spares.sav_count; i++) {
+                       vd = spa->spa_spares.sav_vdevs[i];
  
-               /* Undo the call to spa_activate() below */
-               if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
-                   B_FALSE)) != NULL && tvd->vdev_isspare)
-                       spa_spare_remove(tvd);
-               vdev_close(vd);
-               vdev_free(vd);
-       }
+                       /* Undo the call to spa_activate() below */
+                       if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
+                           B_FALSE)) != NULL && tvd->vdev_isspare)
+                               spa_spare_remove(tvd);
+                       vdev_close(vd);
+                       vdev_free(vd);
+               }
  
-       if (spa->spa_spares.sav_vdevs)
                 kmem_free(spa->spa_spares.sav_vdevs,
                     spa->spa_spares.sav_count * sizeof (void *));
+       }
  
         if (spa->spa_spares.sav_config == NULL)
                 nspares = 0;
         else
-               VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
-                   ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+               VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+                   ZPOOL_CONFIG_SPARES, &spares, &nspares));
  
         spa->spa_spares.sav_count = (int)nspares;
         spa->spa_spares.sav_vdevs = NULL;
@@ -1836,16 +1984,16 @@ spa_load_spares(spa_t *spa)
          * Recompute the stashed list of spares, with status information
          * this time.
          */
-       VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
-           DATA_TYPE_NVLIST_ARRAY) == 0);
+       fnvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES);
  
         spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
             KM_SLEEP);
         for (i = 0; i < spa->spa_spares.sav_count; i++)
                 spares[i] = vdev_config_generate(spa,
                     spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
-       VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
-           ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
+       fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+           ZPOOL_CONFIG_SPARES, (const nvlist_t * const *)spares,
+           spa->spa_spares.sav_count);
         for (i = 0; i < spa->spa_spares.sav_count; i++)
                 nvlist_free(spares[i]);
         kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
@@ -1895,16 +2043,15 @@ spa_load_l2cache(spa_t *spa)
                 goto out;
         }
  
-       VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
-           ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+       VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config,
+           ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache));
         newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
  
         /*
          * Process new nvlist of vdevs.
          */
         for (i = 0; i < nl2cache; i++) {
-               VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
-                   &guid) == 0);
+               guid = fnvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID);
  
                 newvdevs[i] = NULL;
                 for (j = 0; j < oldnvdevs; j++) {
@@ -1965,8 +2112,7 @@ spa_load_l2cache(spa_t *spa)
          * Recompute the stashed list of l2cache devices, with status
          * information this time.
          */
-       VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
-           DATA_TYPE_NVLIST_ARRAY) == 0);
+       fnvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE);
  
         if (sav->sav_count > 0)
                 l2cache = kmem_alloc(sav->sav_count * sizeof (void *),
@@ -1974,30 +2120,31 @@ spa_load_l2cache(spa_t *spa)
         for (i = 0; i < sav->sav_count; i++)
                 l2cache[i] = vdev_config_generate(spa,
                     sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
-       VERIFY(nvlist_add_nvlist_array(sav->sav_config,
-           ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
+       fnvlist_add_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
+           (const nvlist_t * const *)l2cache, sav->sav_count);
  
  out:
         /*
          * Purge vdevs that were dropped
          */
-       for (i = 0; i < oldnvdevs; i++) {
-               uint64_t pool;
+       if (oldvdevs) {
+               for (i = 0; i < oldnvdevs; i++) {
+                       uint64_t pool;
  
-               vd = oldvdevs[i];
-               if (vd != NULL) {
-                       ASSERT(vd->vdev_isl2cache);
+                       vd = oldvdevs[i];
+                       if (vd != NULL) {
+                               ASSERT(vd->vdev_isl2cache);
  
-                       if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
-                           pool != 0ULL && l2arc_vdev_present(vd))
-                               l2arc_remove_vdev(vd);
-                       vdev_clear_stats(vd);
-                       vdev_free(vd);
+                               if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
+                                   pool != 0ULL && l2arc_vdev_present(vd))
+                                       l2arc_remove_vdev(vd);
+                               vdev_clear_stats(vd);
+                               vdev_free(vd);
+                       }
                 }
-       }
  
-       if (oldvdevs)
                 kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
+       }
  
         for (i = 0; i < sav->sav_count; i++)
                 nvlist_free(l2cache[i]);
@@ -2085,7 +2232,7 @@ spa_check_for_missing_logs(spa_t *spa)
  
                 child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *),
                     KM_SLEEP);
-               VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+               nv = fnvlist_alloc();
  
                 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
                         vdev_t *tvd = rvd->vdev_child[c];
@@ -2103,8 +2250,8 @@ spa_check_for_missing_logs(spa_t *spa)
                 }
  
                 if (idx > 0) {
-                       fnvlist_add_nvlist_array(nv,
-                           ZPOOL_CONFIG_CHILDREN, child, idx);
+                       fnvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+                           (const nvlist_t * const *)child, idx);
                         fnvlist_add_nvlist(spa->spa_load_info,
                             ZPOOL_CONFIG_MISSING_DEVICES, nv);
  
@@ -2245,6 +2392,7 @@ spa_claim_notify(zio_t *zio)
  }
  
  typedef struct spa_load_error {
+       boolean_t       sle_verify_data;
         uint64_t        sle_meta_count;
         uint64_t        sle_data_count;
  } spa_load_error_t;
@@ -2277,18 +2425,19 @@ spa_load_verify_done(zio_t *zio)
   * Maximum number of inflight bytes is the log2 fraction of the arc size.
   * By default, we set it to 1/16th of the arc.
   */
-int spa_load_verify_shift = 4;
-int spa_load_verify_metadata = B_TRUE;
-int spa_load_verify_data = B_TRUE;
+static uint_t spa_load_verify_shift = 4;
+static int spa_load_verify_metadata = B_TRUE;
+static int spa_load_verify_data = B_TRUE;
  
-/*ARGSUSED*/
  static int
  spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
      const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
  {
-       if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
-           BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp))
-               return (0);
+       zio_t *rio = arg;
+       spa_load_error_t *sle = rio->io_private;
+
+       (void) zilog, (void) dnp;
+
         /*
          * Note: normally this routine will not be called if
          * spa_load_verify_metadata is not set.  However, it may be useful
@@ -2296,12 +2445,28 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
          */
         if (!spa_load_verify_metadata)
                 return (0);
-       if (!BP_IS_METADATA(bp) && !spa_load_verify_data)
+
+       /*
+        * Sanity check the block pointer in order to detect obvious damage
+        * before using the contents in subsequent checks or in zio_read().
+        * When damaged consider it to be a metadata error since we cannot
+        * trust the BP_GET_TYPE and BP_GET_LEVEL values.
+        */
+       if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
+               atomic_inc_64(&sle->sle_meta_count);
+               return (0);
+       }
+
+       if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
+           BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp))
+               return (0);
+
+       if (!BP_IS_METADATA(bp) &&
+           (!spa_load_verify_data || !sle->sle_verify_data))
                 return (0);
  
         uint64_t maxinflight_bytes =
             arc_target_bytes() >> spa_load_verify_shift;
-       zio_t *rio = arg;
         size_t size = BP_GET_PSIZE(bp);
  
         mutex_enter(&spa->spa_scrub_lock);
@@ -2317,10 +2482,11 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
         return (0);
  }
  
-/* ARGSUSED */
  static int
  verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
  {
+       (void) dp, (void) arg;
+
         if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN)
                 return (SET_ERROR(ENAMETOOLONG));
  
@@ -2338,7 +2504,8 @@ spa_load_verify(spa_t *spa)
  
         zpool_get_load_policy(spa->spa_config, &policy);
  
-       if (policy.zlp_rewind & ZPOOL_NEVER_REWIND)
+       if (policy.zlp_rewind & ZPOOL_NEVER_REWIND ||
+           policy.zlp_maxmeta == UINT64_MAX)
                 return (0);
  
         dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
@@ -2349,6 +2516,13 @@ spa_load_verify(spa_t *spa)
         if (error != 0)
                 return (error);
  
+       /*
+        * Verify data only if we are rewinding or error limit was set.
+        * Otherwise nothing except dbgmsg care about it to waste time.
+        */
+       sle.sle_verify_data = (policy.zlp_rewind & ZPOOL_REWIND_MASK) ||
+           (policy.zlp_maxdata < UINT64_MAX);
+
         rio = zio_root(spa, NULL, &sle,
             ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
  
@@ -2388,12 +2562,14 @@ spa_load_verify(spa_t *spa)
                 spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
  
                 loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
-               VERIFY(nvlist_add_uint64(spa->spa_load_info,
-                   ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
-               VERIFY(nvlist_add_int64(spa->spa_load_info,
-                   ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
-               VERIFY(nvlist_add_uint64(spa->spa_load_info,
-                   ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
+               fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_TIME,
+                   spa->spa_load_txg_ts);
+               fnvlist_add_int64(spa->spa_load_info, ZPOOL_CONFIG_REWIND_TIME,
+                   loss);
+               fnvlist_add_uint64(spa->spa_load_info,
+                   ZPOOL_CONFIG_LOAD_META_ERRORS, sle.sle_meta_count);
+               fnvlist_add_uint64(spa->spa_load_info,
+                   ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count);
         } else {
                 spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
         }
@@ -2450,10 +2626,10 @@ spa_livelist_delete_check(spa_t *spa)
         return (spa->spa_livelists_to_delete != 0);
  }
  
-/* ARGSUSED */
  static boolean_t
  spa_livelist_delete_cb_check(void *arg, zthr_t *z)
  {
+       (void) z;
         spa_t *spa = arg;
         return (spa_livelist_delete_check(spa));
  }
@@ -2545,7 +2721,6 @@ livelist_delete_sync(void *arg, dmu_tx_t *tx)
   * be freed. Then, call a synctask which performs the actual frees and updates
   * the pool-wide livelist data.
   */
-/* ARGSUSED */
  static void
  spa_livelist_delete_cb(void *arg, zthr_t *z)
  {
@@ -2610,7 +2785,8 @@ spa_start_livelist_destroy_thread(spa_t *spa)
         ASSERT3P(spa->spa_livelist_delete_zthr, ==, NULL);
         spa->spa_livelist_delete_zthr =
             zthr_create("z_livelist_destroy",
-           spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa);
+           spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa,
+           minclsyspri);
  }
  
  typedef struct livelist_new_arg {
@@ -2790,7 +2966,6 @@ spa_livelist_condense_cb(void *arg, zthr_t *t)
                 zfs_livelist_condense_zthr_cancel++;
  }
  
-/* ARGSUSED */
  /*
   * Check that there is something to condense but that a condense is not
   * already in progress and that condensing has not been cancelled.
@@ -2798,6 +2973,7 @@ spa_livelist_condense_cb(void *arg, zthr_t *t)
  static boolean_t
  spa_livelist_condense_cb_check(void *arg, zthr_t *z)
  {
+       (void) z;
         spa_t *spa = arg;
         if ((spa->spa_to_condense.ds != NULL) &&
             (spa->spa_to_condense.syncing == B_FALSE) &&
@@ -2820,7 +2996,7 @@ spa_start_livelist_condensing_thread(spa_t *spa)
         spa->spa_livelist_condense_zthr =
             zthr_create("z_livelist_condense",
             spa_livelist_condense_cb_check,
-           spa_livelist_condense_cb, spa);
+           spa_livelist_condense_cb, spa, minclsyspri);
  }
  
  static void
@@ -2830,6 +3006,7 @@ spa_spawn_aux_threads(spa_t *spa)
  
         ASSERT(MUTEX_HELD(&spa_namespace_lock));
  
+       spa_start_raidz_expansion_thread(spa);
         spa_start_indirect_condensing_thread(spa);
         spa_start_livelist_destroy_thread(spa);
         spa_start_livelist_condensing_thread(spa);
@@ -2838,7 +3015,7 @@ spa_spawn_aux_threads(spa_t *spa)
         spa->spa_checkpoint_discard_zthr =
             zthr_create("z_checkpoint_discard",
             spa_checkpoint_discard_thread_check,
-           spa_checkpoint_discard_thread, spa);
+           spa_checkpoint_discard_thread, spa, minclsyspri);
  }
  
  /*
@@ -2926,12 +3103,13 @@ spa_try_repair(spa_t *spa, nvlist_t *config)
  static int
  spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
  {
-       char *ereport = FM_EREPORT_ZFS_POOL;
+       const char *ereport = FM_EREPORT_ZFS_POOL;
         int error;
  
         spa->spa_load_state = state;
         (void) spa_import_progress_set_state(spa_guid(spa),
             spa_load_state(spa));
+       spa_import_progress_set_notes(spa, "spa_load()");
  
         gethrestime(&spa->spa_loaded_ts);
         error = spa_load_impl(spa, type, &ereport);
@@ -2973,6 +3151,12 @@ vdev_count_verify_zaps(vdev_t *vd)
         spa_t *spa = vd->vdev_spa;
         uint64_t total = 0;
  
+       if (spa_feature_is_active(vd->vdev_spa, SPA_FEATURE_AVZ_V2) &&
+           vd->vdev_root_zap != 0) {
+               total++;
+               ASSERT0(zap_lookup_int(spa->spa_meta_objset,
+                   spa->spa_all_vdev_zaps, vd->vdev_root_zap));
+       }
         if (vd->vdev_top_zap != 0) {
                 total++;
                 ASSERT0(zap_lookup_int(spa->spa_meta_objset,
@@ -2990,6 +3174,8 @@ vdev_count_verify_zaps(vdev_t *vd)
  
         return (total);
  }
+#else
+#define        vdev_count_verify_zaps(vd) ((void) sizeof (vd), 0)
  #endif
  
  /*
@@ -3152,7 +3338,7 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
         uint64_t mmp_config = ub->ub_mmp_config;
         uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0;
         uint64_t import_delay;
-       hrtime_t import_expire;
+       hrtime_t import_expire, now;
         nvlist_t *mmp_label = NULL;
         vdev_t *rvd = spa->spa_root_vdev;
         kcondvar_t cv;
@@ -3190,7 +3376,17 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
  
         import_expire = gethrtime() + import_delay;
  
-       while (gethrtime() < import_expire) {
+       spa_import_progress_set_notes(spa, "Checking MMP activity, waiting "
+           "%llu ms", (u_longlong_t)NSEC2MSEC(import_delay));
+
+       int interations = 0;
+       while ((now = gethrtime()) < import_expire) {
+               if (interations++ % 30 == 0) {
+                       spa_import_progress_set_notes(spa, "Checking MMP "
+                           "activity, %llu ms remaining",
+                           (u_longlong_t)NSEC2MSEC(import_expire - now));
+               }
+
                 (void) spa_import_progress_set_mmp_check(spa_guid(spa),
                     NSEC2SEC(import_expire - gethrtime()));
  
@@ -3241,7 +3437,7 @@ out:
          * ZPOOL_CONFIG_MMP_HOSTID   - hostid from the active pool
          */
         if (error == EREMOTEIO) {
-               char *hostname = "<unknown>";
+               const char *hostname = "<unknown>";
                 uint64_t hostid = 0;
  
                 if (mmp_label) {
@@ -3278,7 +3474,7 @@ static int
  spa_verify_host(spa_t *spa, nvlist_t *mos_config)
  {
         uint64_t hostid;
-       char *hostname;
+       const char *hostname;
         uint64_t myhostid = 0;
  
         if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config,
@@ -3313,8 +3509,8 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type)
         int parse;
         vdev_t *rvd;
         uint64_t pool_guid;
-       char *comment;
-       char *compatibility;
+       const char *comment;
+       const char *compatibility;
  
         /*
          * Versioning wasn't explicitly added to the label until later, so if
@@ -3576,6 +3772,12 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
         }
         spa_load_note(spa, "using uberblock with txg=%llu",
             (u_longlong_t)ub->ub_txg);
+       if (ub->ub_raidz_reflow_info != 0) {
+               spa_load_note(spa, "uberblock raidz_reflow_info: "
+                   "state=%u offset=%llu",
+                   (int)RRSS_GET_STATE(ub),
+                   (u_longlong_t)RRSS_GET_OFFSET(ub));
+       }
  
  
         /*
@@ -3646,7 +3848,7 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
                  * from the label.
                  */
                 nvlist_free(spa->spa_label_features);
-               VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
+               spa->spa_label_features = fnvlist_dup(features);
         }
  
         nvlist_free(label);
@@ -3659,21 +3861,20 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
         if (ub->ub_version >= SPA_VERSION_FEATURES) {
                 nvlist_t *unsup_feat;
  
-               VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
-                   0);
+               unsup_feat = fnvlist_alloc();
  
                 for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
                     NULL); nvp != NULL;
                     nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
                         if (!zfeature_is_supported(nvpair_name(nvp))) {
-                               VERIFY(nvlist_add_string(unsup_feat,
-                                   nvpair_name(nvp), "") == 0);
+                               fnvlist_add_string(unsup_feat,
+                                   nvpair_name(nvp), "");
                         }
                 }
  
                 if (!nvlist_empty(unsup_feat)) {
-                       VERIFY(nvlist_add_nvlist(spa->spa_load_info,
-                           ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+                       fnvlist_add_nvlist(spa->spa_load_info,
+                           ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat);
                         nvlist_free(unsup_feat);
                         spa_load_failed(spa, "some features are unsupported");
                         return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
@@ -3801,6 +4002,24 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type,
         rvd = mrvd;
         spa_config_exit(spa, SCL_ALL, FTAG);
  
+       /*
+        * If 'zpool import' used a cached config, then the on-disk hostid and
+        * hostname may be different to the cached config in ways that should
+        * prevent import.  Userspace can't discover this without a scan, but
+        * we know, so we add these values to LOAD_INFO so the caller can know
+        * the difference.
+        *
+        * Note that we have to do this before the config is regenerated,
+        * because the new config will have the hostid and hostname for this
+        * host, in readiness for import.
+        */
+       if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTID))
+               fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_HOSTID,
+                   fnvlist_lookup_uint64(mos_config, ZPOOL_CONFIG_HOSTID));
+       if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTNAME))
+               fnvlist_add_string(spa->spa_load_info, ZPOOL_CONFIG_HOSTNAME,
+                   fnvlist_lookup_string(mos_config, ZPOOL_CONFIG_HOSTNAME));
+
         /*
          * We will use spa_config if we decide to reload the spa or if spa_load
          * fails and we rewind. We must thus regenerate the config using the
@@ -4159,6 +4378,7 @@ spa_ld_get_props(spa_t *spa)
                 spa->spa_avz_action = AVZ_ACTION_INITIALIZE;
                 ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev));
         } else if (error != 0) {
+               nvlist_free(mos_config);
                 return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
         } else if (!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) {
                 /*
@@ -4183,7 +4403,7 @@ spa_ld_get_props(spa_t *spa)
                 return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
  
         if (error == 0) {
-               uint64_t autoreplace;
+               uint64_t autoreplace = 0;
  
                 spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
                 spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
@@ -4319,7 +4539,7 @@ spa_ld_load_vdev_metadata(spa_t *spa)
  
         error = spa_ld_log_spacemaps(spa);
         if (error != 0) {
-               spa_load_failed(spa, "spa_ld_log_sm_data failed [error=%d]",
+               spa_load_failed(spa, "spa_ld_log_spacemaps failed [error=%d]",
                     error);
                 return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
         }
@@ -4350,7 +4570,22 @@ spa_ld_load_dedup_tables(spa_t *spa)
  }
  
  static int
-spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, char **ereport)
+spa_ld_load_brt(spa_t *spa)
+{
+       int error = 0;
+       vdev_t *rvd = spa->spa_root_vdev;
+
+       error = brt_load(spa);
+       if (error != 0) {
+               spa_load_failed(spa, "brt_load failed [error=%d]", error);
+               return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+       }
+
+       return (0);
+}
+
+static int
+spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, const char **ereport)
  {
         vdev_t *rvd = spa->spa_root_vdev;
  
@@ -4717,7 +4952,7 @@ spa_ld_mos_with_trusted_config(spa_t *spa, spa_import_type_t type,
   * config stored in the MOS.
   */
  static int
-spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
+spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
  {
         int error = 0;
         boolean_t missing_feat_write = B_FALSE;
@@ -4771,6 +5006,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
         /*
          * Retrieve the checkpoint txg if the pool has a checkpoint.
          */
+       spa_import_progress_set_notes(spa, "Loading checkpoint txg");
         error = spa_ld_read_checkpoint_txg(spa);
         if (error != 0)
                 return (error);
@@ -4783,6 +5019,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * initiated. Otherwise we could be reading from indirect vdevs before
          * we have loaded their mappings.
          */
+       spa_import_progress_set_notes(spa, "Loading indirect vdev metadata");
         error = spa_ld_open_indirect_vdev_metadata(spa);
         if (error != 0)
                 return (error);
@@ -4791,6 +5028,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * Retrieve the full list of active features from the MOS and check if
          * they are all supported.
          */
+       spa_import_progress_set_notes(spa, "Checking feature flags");
         error = spa_ld_check_features(spa, &missing_feat_write);
         if (error != 0)
                 return (error);
@@ -4799,6 +5037,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * Load several special directories from the MOS needed by the dsl_pool
          * layer.
          */
+       spa_import_progress_set_notes(spa, "Loading special MOS directories");
         error = spa_ld_load_special_directories(spa);
         if (error != 0)
                 return (error);
@@ -4806,6 +5045,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
         /*
          * Retrieve pool properties from the MOS.
          */
+       spa_import_progress_set_notes(spa, "Loading properties");
         error = spa_ld_get_props(spa);
         if (error != 0)
                 return (error);
@@ -4814,6 +5054,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * Retrieve the list of auxiliary devices - cache devices and spares -
          * and open them.
          */
+       spa_import_progress_set_notes(spa, "Loading AUX vdevs");
         error = spa_ld_open_aux_vdevs(spa, type);
         if (error != 0)
                 return (error);
@@ -4822,18 +5063,26 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * Load the metadata for all vdevs. Also check if unopenable devices
          * should be autoreplaced.
          */
+       spa_import_progress_set_notes(spa, "Loading vdev metadata");
         error = spa_ld_load_vdev_metadata(spa);
         if (error != 0)
                 return (error);
  
+       spa_import_progress_set_notes(spa, "Loading dedup tables");
         error = spa_ld_load_dedup_tables(spa);
         if (error != 0)
                 return (error);
  
+       spa_import_progress_set_notes(spa, "Loading BRT");
+       error = spa_ld_load_brt(spa);
+       if (error != 0)
+               return (error);
+
         /*
          * Verify the logs now to make sure we don't have any unexpected errors
          * when we claim log blocks later.
          */
+       spa_import_progress_set_notes(spa, "Verifying Log Devices");
         error = spa_ld_verify_logs(spa, type, ereport);
         if (error != 0)
                 return (error);
@@ -4855,6 +5104,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * state. When performing an extreme rewind, we verify the whole pool,
          * which can take a very long time.
          */
+       spa_import_progress_set_notes(spa, "Verifying pool data");
         error = spa_ld_verify_pool_data(spa);
         if (error != 0)
                 return (error);
@@ -4864,6 +5114,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * we write anything to the pool because we'd need to update the space
          * accounting using the deflated sizes.
          */
+       spa_import_progress_set_notes(spa, "Calculating deflated space");
         spa_update_dspace(spa);
  
         /*
@@ -4871,12 +5122,20 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
          * pool. If we are importing the pool in read-write mode, a few
          * additional steps must be performed to finish the import.
          */
+       spa_import_progress_set_notes(spa, "Starting import");
         if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER ||
             spa->spa_load_max_txg == UINT64_MAX)) {
                 uint64_t config_cache_txg = spa->spa_config_txg;
  
                 ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT);
  
+               /*
+                * Before we do any zio_write's, complete the raidz expansion
+                * scratch space copying, if necessary.
+                */
+               if (RRSS_GET_STATE(&spa->spa_uberblock) == RRSS_SCRATCH_VALID)
+                       vdev_raidz_reflow_copy_scratch(spa);
+
                 /*
                  * In case of a checkpoint rewind, log the original txg
                  * of the checkpointed uberblock.
@@ -4887,6 +5146,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                             (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg);
                 }
  
+               spa_import_progress_set_notes(spa, "Claiming ZIL blocks");
                 /*
                  * Traverse the ZIL and claim all blocks.
                  */
@@ -4906,6 +5166,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                  * will have been set for us by ZIL traversal operations
                  * performed above.
                  */
+               spa_import_progress_set_notes(spa, "Syncing ZIL claims");
                 txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
  
                 /*
@@ -4913,6 +5174,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                  * next sync, we would update the config stored in vdev labels
                  * and the cachefile (by default /etc/zfs/zpool.cache).
                  */
+               spa_import_progress_set_notes(spa, "Updating configs");
                 spa_ld_check_for_config_update(spa, config_cache_txg,
                     update_config_cache);
  
@@ -4921,6 +5183,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                  * Then check all DTLs to see if anything needs resilvering.
                  * The resilver will be deferred if a rebuild was started.
                  */
+               spa_import_progress_set_notes(spa, "Starting resilvers");
                 if (vdev_rebuild_active(spa->spa_root_vdev)) {
                         vdev_rebuild_restart(spa);
                 } else if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
@@ -4934,6 +5197,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                  */
                 spa_history_log_version(spa, "open", NULL);
  
+               spa_import_progress_set_notes(spa,
+                   "Restarting device removals");
                 spa_restart_removal(spa);
                 spa_spawn_aux_threads(spa);
  
@@ -4946,19 +5211,26 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                  * auxiliary threads above (from which the livelist
                  * deletion zthr is part of).
                  */
+               spa_import_progress_set_notes(spa,
+                   "Cleaning up inconsistent objsets");
                 (void) dmu_objset_find(spa_name(spa),
                     dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
  
                 /*
                  * Clean up any stale temporary dataset userrefs.
                  */
+               spa_import_progress_set_notes(spa,
+                   "Cleaning up temporary userrefs");
                 dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
  
                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+               spa_import_progress_set_notes(spa, "Restarting initialize");
                 vdev_initialize_restart(spa->spa_root_vdev);
+               spa_import_progress_set_notes(spa, "Restarting TRIM");
                 vdev_trim_restart(spa->spa_root_vdev);
                 vdev_autotrim_restart(spa);
                 spa_config_exit(spa, SCL_CONFIG, FTAG);
+               spa_import_progress_set_notes(spa, "Finished importing");
         }
  
         spa_import_progress_remove(spa_guid(spa));
@@ -5108,8 +5380,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
   * ambiguous state.
   */
  static int
-spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
-    nvlist_t **config)
+spa_open_common(const char *pool, spa_t **spapp, const void *tag,
+    nvlist_t *nvpolicy, nvlist_t **config)
  {
         spa_t *spa;
         spa_load_state_t state = SPA_LOAD_OPEN;
@@ -5166,7 +5438,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
                          */
                         spa_unload(spa);
                         spa_deactivate(spa);
-                       spa_write_cachefile(spa, B_TRUE, B_TRUE);
+                       spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
                         spa_remove(spa);
                         if (locked)
                                 mutex_exit(&spa_namespace_lock);
@@ -5180,11 +5452,10 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
                          * attempted vdev_open().  Return this to the user.
                          */
                         if (config != NULL && spa->spa_config) {
-                               VERIFY(nvlist_dup(spa->spa_config, config,
-                                   KM_SLEEP) == 0);
-                               VERIFY(nvlist_add_nvlist(*config,
+                               *config = fnvlist_dup(spa->spa_config);
+                               fnvlist_add_nvlist(*config,
                                     ZPOOL_CONFIG_LOAD_INFO,
-                                   spa->spa_load_info) == 0);
+                                   spa->spa_load_info);
                         }
                         spa_unload(spa);
                         spa_deactivate(spa);
@@ -5205,9 +5476,9 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
          * If we've recovered the pool, pass back any information we
          * gathered while doing the load.
          */
-       if (state == SPA_LOAD_RECOVER) {
-               VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
-                   spa->spa_load_info) == 0);
+       if (state == SPA_LOAD_RECOVER && config != NULL) {
+               fnvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
+                   spa->spa_load_info);
         }
  
         if (locked) {
@@ -5226,14 +5497,14 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
  }
  
  int
-spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
-    nvlist_t **config)
+spa_open_rewind(const char *name, spa_t **spapp, const void *tag,
+    nvlist_t *policy, nvlist_t **config)
  {
         return (spa_open_common(name, spapp, tag, policy, config));
  }
  
  int
-spa_open(const char *name, spa_t **spapp, void *tag)
+spa_open(const char *name, spa_t **spapp, const void *tag)
  {
         return (spa_open_common(name, spapp, tag, NULL, NULL));
  }
@@ -5285,15 +5556,14 @@ spa_add_spares(spa_t *spa, nvlist_t *config)
         if (spa->spa_spares.sav_count == 0)
                 return;
  
-       VERIFY(nvlist_lookup_nvlist(config,
-           ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-       VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
-           ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+       nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+       VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+           ZPOOL_CONFIG_SPARES, &spares, &nspares));
         if (nspares != 0) {
-               VERIFY(nvlist_add_nvlist_array(nvroot,
-                   ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
-               VERIFY(nvlist_lookup_nvlist_array(nvroot,
-                   ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+               fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+                   (const nvlist_t * const *)spares, nspares);
+               VERIFY0(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+                   &spares, &nspares));
  
                 /*
                  * Go through and find any spares which have since been
@@ -5301,15 +5571,17 @@ spa_add_spares(spa_t *spa, nvlist_t *config)
                  * their status appropriately.
                  */
                 for (i = 0; i < nspares; i++) {
-                       VERIFY(nvlist_lookup_uint64(spares[i],
-                           ZPOOL_CONFIG_GUID, &guid) == 0);
+                       guid = fnvlist_lookup_uint64(spares[i],
+                           ZPOOL_CONFIG_GUID);
+                       VERIFY0(nvlist_lookup_uint64_array(spares[i],
+                           ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc));
                         if (spa_spare_exists(guid, &pool, NULL) &&
                             pool != 0ULL) {
-                               VERIFY(nvlist_lookup_uint64_array(
-                                   spares[i], ZPOOL_CONFIG_VDEV_STATS,
-                                   (uint64_t **)&vs, &vsc) == 0);
                                 vs->vs_state = VDEV_STATE_CANT_OPEN;
                                 vs->vs_aux = VDEV_AUX_SPARED;
+                       } else {
+                               vs->vs_state =
+                                   spa->spa_spares.sav_vdevs[i]->vdev_state;
                         }
                 }
         }
@@ -5334,23 +5606,22 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
         if (spa->spa_l2cache.sav_count == 0)
                 return;
  
-       VERIFY(nvlist_lookup_nvlist(config,
-           ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-       VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
-           ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+       nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+       VERIFY0(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
+           ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache));
         if (nl2cache != 0) {
-               VERIFY(nvlist_add_nvlist_array(nvroot,
-                   ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
-               VERIFY(nvlist_lookup_nvlist_array(nvroot,
-                   ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+               fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+                   (const nvlist_t * const *)l2cache, nl2cache);
+               VERIFY0(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+                   &l2cache, &nl2cache));
  
                 /*
                  * Update level 2 cache device stats.
                  */
  
                 for (i = 0; i < nl2cache; i++) {
-                       VERIFY(nvlist_lookup_uint64(l2cache[i],
-                           ZPOOL_CONFIG_GUID, &guid) == 0);
+                       guid = fnvlist_lookup_uint64(l2cache[i],
+                           ZPOOL_CONFIG_GUID);
  
                         vd = NULL;
                         for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
@@ -5362,9 +5633,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
                         }
                         ASSERT(vd != NULL);
  
-                       VERIFY(nvlist_lookup_uint64_array(l2cache[i],
-                           ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
-                           == 0);
+                       VERIFY0(nvlist_lookup_uint64_array(l2cache[i],
+                           ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc));
                         vdev_get_stats(vd, vs);
                         vdev_config_generate_stats(vd, l2cache[i]);
  
@@ -5479,20 +5749,20 @@ spa_get_stats(const char *name, nvlist_t **config,
  
                         loadtimes[0] = spa->spa_loaded_ts.tv_sec;
                         loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
-                       VERIFY(nvlist_add_uint64_array(*config,
-                           ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
+                       fnvlist_add_uint64_array(*config,
+                           ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2);
  
-                       VERIFY(nvlist_add_uint64(*config,
+                       fnvlist_add_uint64(*config,
                             ZPOOL_CONFIG_ERRCOUNT,
-                           spa_get_errlog_size(spa)) == 0);
+                           spa_approx_errlog_size(spa));
  
                         if (spa_suspended(spa)) {
-                               VERIFY(nvlist_add_uint64(*config,
+                               fnvlist_add_uint64(*config,
                                     ZPOOL_CONFIG_SUSPENDED,
-                                   spa->spa_failmode) == 0);
-                               VERIFY(nvlist_add_uint64(*config,
+                                   spa->spa_failmode);
+                               fnvlist_add_uint64(*config,
                                     ZPOOL_CONFIG_SUSPENDED_REASON,
-                                   spa->spa_suspended) == 0);
+                                   spa->spa_suspended);
                         }
  
                         spa_add_spares(spa, *config);
@@ -5584,8 +5854,8 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
  
                 if ((error = vdev_open(vd)) == 0 &&
                     (error = vdev_label_init(vd, crtxg, label)) == 0) {
-                       VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
-                           vd->vdev_guid) == 0);
+                       fnvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
+                           vd->vdev_guid);
                 }
  
                 vdev_free(vd);
@@ -5636,23 +5906,20 @@ spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
                  * Generate new dev list by concatenating with the
                  * current dev list.
                  */
-               VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
-                   &olddevs, &oldndevs) == 0);
+               VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config, config,
+                   &olddevs, &oldndevs));
  
                 newdevs = kmem_alloc(sizeof (void *) *
                     (ndevs + oldndevs), KM_SLEEP);
                 for (i = 0; i < oldndevs; i++)
-                       VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
-                           KM_SLEEP) == 0);
+                       newdevs[i] = fnvlist_dup(olddevs[i]);
                 for (i = 0; i < ndevs; i++)
-                       VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
-                           KM_SLEEP) == 0);
+                       newdevs[i + oldndevs] = fnvlist_dup(devs[i]);
  
-               VERIFY(nvlist_remove(sav->sav_config, config,
-                   DATA_TYPE_NVLIST_ARRAY) == 0);
+               fnvlist_remove(sav->sav_config, config);
  
-               VERIFY(nvlist_add_nvlist_array(sav->sav_config,
-                   config, newdevs, ndevs + oldndevs) == 0);
+               fnvlist_add_nvlist_array(sav->sav_config, config,
+                   (const nvlist_t * const *)newdevs, ndevs + oldndevs);
                 for (i = 0; i < oldndevs + ndevs; i++)
                         nvlist_free(newdevs[i]);
                 kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
@@ -5660,10 +5927,9 @@ spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
                 /*
                  * Generate a new dev list.
                  */
-               VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
-                   KM_SLEEP) == 0);
-               VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
-                   devs, ndevs) == 0);
+               sav->sav_config = fnvlist_alloc();
+               fnvlist_add_nvlist_array(sav->sav_config, config,
+                   (const nvlist_t * const *)devs, ndevs);
         }
  }
  
@@ -5713,7 +5979,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
      nvlist_t *zplprops, dsl_crypto_params_t *dcp)
  {
         spa_t *spa;
-       char *altroot = NULL;
+       const char *altroot = NULL;
         vdev_t *rvd;
         dsl_pool_t *dp;
         dmu_tx_t *tx;
@@ -5726,8 +5992,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
         boolean_t has_encryption;
         boolean_t has_allocclass;
         spa_feature_t feat;
-       char *feat_name;
-       char *poolname;
+       const char *feat_name;
+       const char *poolname;
         nvlist_t *nvl;
  
         if (props == NULL ||
@@ -5872,10 +6138,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
          */
         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
             &spares, &nspares) == 0) {
-               VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
-                   KM_SLEEP) == 0);
-               VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
-                   ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+               spa->spa_spares.sav_config = fnvlist_alloc();
+               fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+                   ZPOOL_CONFIG_SPARES, (const nvlist_t * const *)spares,
+                   nspares);
                 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
                 spa_load_spares(spa);
                 spa_config_exit(spa, SCL_ALL, FTAG);
@@ -5887,10 +6153,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
          */
         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
             &l2cache, &nl2cache) == 0) {
-               VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
-                   NV_UNIQUE_NAME, KM_SLEEP) == 0);
-               VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
-                   ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+               VERIFY0(nvlist_alloc(&spa->spa_l2cache.sav_config,
+                   NV_UNIQUE_NAME, KM_SLEEP));
+               fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+                   ZPOOL_CONFIG_L2CACHE, (const nvlist_t * const *)l2cache,
+                   nl2cache);
                 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
                 spa_load_l2cache(spa);
                 spa_config_exit(spa, SCL_ALL, FTAG);
@@ -5905,6 +6172,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
          * Create DDTs (dedup tables).
          */
         ddt_create(spa);
+       /*
+        * Create BRT table and BRT table object.
+        */
+       brt_create(spa);
  
         spa_update_dspace(spa);
  
@@ -5997,7 +6268,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
  
         spa_spawn_aux_threads(spa);
  
-       spa_write_cachefile(spa, B_FALSE, B_TRUE);
+       spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
  
         /*
          * Don't count references from objsets that are already closed
@@ -6007,6 +6278,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
         spa->spa_minref = zfs_refcount_count(&spa->spa_refcount);
         spa->spa_load_state = SPA_LOAD_NONE;
  
+       spa_import_os(spa);
+
         mutex_exit(&spa_namespace_lock);
  
         return (0);
@@ -6019,7 +6292,7 @@ int
  spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
  {
         spa_t *spa;
-       char *altroot = NULL;
+       const char *altroot = NULL;
         spa_load_state_t state = SPA_LOAD_IMPORT;
         zpool_load_policy_t policy;
         spa_mode_t mode = spa_mode_global;
@@ -6058,7 +6331,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
                 if (props != NULL)
                         spa_configfile_set(spa, props, B_FALSE);
  
-               spa_write_cachefile(spa, B_FALSE, B_TRUE);
+               spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
                 spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
                 zfs_dbgmsg("spa_import: verbatim import of %s", pool);
                 mutex_exit(&spa_namespace_lock);
@@ -6091,8 +6364,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
          * Propagate anything learned while loading the pool and pass it
          * back to caller (i.e. rewind info, missing devices, etc).
          */
-       VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
-           spa->spa_load_info) == 0);
+       fnvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info);
  
         spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
         /*
@@ -6110,8 +6382,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
                 spa_load_l2cache(spa);
         }
  
-       VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-           &nvroot) == 0);
+       nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
         spa_config_exit(spa, SCL_ALL, FTAG);
  
         if (props != NULL)
@@ -6135,13 +6406,13 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
             &spares, &nspares) == 0) {
                 if (spa->spa_spares.sav_config)
-                       VERIFY(nvlist_remove(spa->spa_spares.sav_config,
-                           ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+                       fnvlist_remove(spa->spa_spares.sav_config,
+                           ZPOOL_CONFIG_SPARES);
                 else
-                       VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
-                           NV_UNIQUE_NAME, KM_SLEEP) == 0);
-               VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
-                   ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+                       spa->spa_spares.sav_config = fnvlist_alloc();
+               fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+                   ZPOOL_CONFIG_SPARES, (const nvlist_t * const *)spares,
+                   nspares);
                 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
                 spa_load_spares(spa);
                 spa_config_exit(spa, SCL_ALL, FTAG);
@@ -6150,13 +6421,13 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
             &l2cache, &nl2cache) == 0) {
                 if (spa->spa_l2cache.sav_config)
-                       VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
-                           ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
+                       fnvlist_remove(spa->spa_l2cache.sav_config,
+                           ZPOOL_CONFIG_L2CACHE);
                 else
-                       VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
-                           NV_UNIQUE_NAME, KM_SLEEP) == 0);
-               VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
-                   ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+                       spa->spa_l2cache.sav_config = fnvlist_alloc();
+               fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+                   ZPOOL_CONFIG_L2CACHE, (const nvlist_t * const *)l2cache,
+                   nl2cache);
                 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
                 spa_load_l2cache(spa);
                 spa_config_exit(spa, SCL_ALL, FTAG);
@@ -6192,6 +6463,8 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
  
         zvol_create_minors_recursive(pool);
  
+       spa_import_os(spa);
+
         return (0);
  }
  
@@ -6199,7 +6472,7 @@ nvlist_t *
  spa_tryimport(nvlist_t *tryconfig)
  {
         nvlist_t *config = NULL;
-       char *poolname, *cachefile;
+       const char *poolname, *cachefile;
         spa_t *spa;
         uint64_t state;
         int error;
@@ -6239,6 +6512,16 @@ spa_tryimport(nvlist_t *tryconfig)
                 spa->spa_config_source = SPA_CONFIG_SRC_SCAN;
         }
  
+       /*
+        * spa_import() relies on a pool config fetched by spa_try_import()
+        * for spare/cache devices. Import flags are not passed to
+        * spa_tryimport(), which makes it return early due to a missing log
+        * device and missing retrieving the cache device and spare eventually.
+        * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch
+        * the correct configuration regardless of the missing log device.
+        */
+       spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG;
+
         error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING);
  
         /*
@@ -6246,16 +6529,14 @@ spa_tryimport(nvlist_t *tryconfig)
          */
         if (spa->spa_root_vdev != NULL) {
                 config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
-               VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
-                   poolname) == 0);
-               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-                   state) == 0);
-               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
-                   spa->spa_uberblock.ub_timestamp) == 0);
-               VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
-                   spa->spa_load_info) == 0);
-               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
-                   spa->spa_errata) == 0);
+               fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, poolname);
+               fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state);
+               fnvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
+                   spa->spa_uberblock.ub_timestamp);
+               fnvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+                   spa->spa_load_info);
+               fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
+                   spa->spa_errata);
  
                 /*
                  * If the bootfs property exists on this pool then we
@@ -6284,8 +6565,8 @@ spa_tryimport(nvlist_t *tryconfig)
                                         (void) snprintf(dsname, MAXPATHLEN,
                                             "%s/%s", poolname, ++cp);
                                 }
-                               VERIFY(nvlist_add_string(config,
-                                   ZPOOL_CONFIG_BOOTFS, dsname) == 0);
+                               fnvlist_add_string(config, ZPOOL_CONFIG_BOOTFS,
+                                   dsname);
                                 kmem_free(dsname, MAXPATHLEN);
                         }
                         kmem_free(tmpname, MAXPATHLEN);
@@ -6380,6 +6661,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
         }
  
         if (spa->spa_sync_on) {
+               vdev_t *rvd = spa->spa_root_vdev;
                 /*
                  * A pool cannot be exported if it has an active shared spare.
                  * This is to prevent other pools stealing the active spare
@@ -6399,13 +6681,10 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
                  * dirty data resulting from the initialization is
                  * committed to disk before we unload the pool.
                  */
-               if (spa->spa_root_vdev != NULL) {
-                       vdev_t *rvd = spa->spa_root_vdev;
-                       vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE);
-                       vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE);
-                       vdev_autotrim_stop_all(spa);
-                       vdev_rebuild_stop_all(spa);
-               }
+               vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE);
+               vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE);
+               vdev_autotrim_stop_all(spa);
+               vdev_rebuild_stop_all(spa);
  
                 /*
                  * We want this to be reflected on every label,
@@ -6415,14 +6694,34 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
                 if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
                         spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
                         spa->spa_state = new_state;
+                       vdev_config_dirty(rvd);
+                       spa_config_exit(spa, SCL_ALL, FTAG);
+               }
+
+               /*
+                * If the log space map feature is enabled and the pool is
+                * getting exported (but not destroyed), we want to spend some
+                * time flushing as many metaslabs as we can in an attempt to
+                * destroy log space maps and save import time. This has to be
+                * done before we set the spa_final_txg, otherwise
+                * spa_sync() -> spa_flush_metaslabs() may dirty the final TXGs.
+                * spa_should_flush_logs_on_unload() should be called after
+                * spa_state has been set to the new_state.
+                */
+               if (spa_should_flush_logs_on_unload(spa))
+                       spa_unload_log_sm_flush_all(spa);
+
+               if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
+                       spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
                         spa->spa_final_txg = spa_last_synced_txg(spa) +
                             TXG_DEFER_SIZE + 1;
-                       vdev_config_dirty(spa->spa_root_vdev);
                         spa_config_exit(spa, SCL_ALL, FTAG);
                 }
         }
  
  export_spa:
+       spa_export_os(spa);
+
         if (new_state == POOL_STATE_DESTROYED)
                 spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_DESTROY);
         else if (new_state == POOL_STATE_EXPORTED)
@@ -6434,11 +6733,11 @@ export_spa:
         }
  
         if (oldconfig && spa->spa_config)
-               VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
+               *oldconfig = fnvlist_dup(spa->spa_config);
  
         if (new_state != POOL_STATE_UNINITIALIZED) {
                 if (!hardforce)
-                       spa_write_cachefile(spa, B_TRUE, B_TRUE);
+                       spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
                 spa_remove(spa);
         } else {
                 /*
@@ -6664,9 +6963,10 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
  }
  
  /*
- * Attach a device to a mirror.  The arguments are the path to any device
- * in the mirror, and the nvroot for the new device.  If the path specifies
- * a device that is not mirrored, we automatically insert the mirror vdev.
+ * Attach a device to a vdev specified by its guid.  The vdev type can be
+ * a mirror, a raidz, or a leaf device that is also a top-level (e.g. a
+ * single device). When the vdev is a single device, a mirror vdev will be
+ * automatically inserted.
   *
   * If 'replacing' is specified, the new device is intended to replace the
   * existing device; in this case the two devices are made into their own
@@ -6689,7 +6989,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
         vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
         vdev_ops_t *pvops;
         char *oldvdpath, *newvdpath;
-       int newvd_isspare;
+       int newvd_isspare = B_FALSE;
         int error;
  
         ASSERT(spa_writeable(spa));
@@ -6709,28 +7009,49 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
                 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
                         return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
  
-               if (dsl_scan_resilvering(spa_get_dsl(spa)))
+               if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
+                   dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
                         return (spa_vdev_exit(spa, NULL, txg,
                             ZFS_ERR_RESILVER_IN_PROGRESS));
+               }
         } else {
                 if (vdev_rebuild_active(rvd))
                         return (spa_vdev_exit(spa, NULL, txg,
                             ZFS_ERR_REBUILD_IN_PROGRESS));
         }
  
-       if (spa->spa_vdev_removal != NULL)
-               return (spa_vdev_exit(spa, NULL, txg, EBUSY));
+       if (spa->spa_vdev_removal != NULL) {
+               return (spa_vdev_exit(spa, NULL, txg,
+                   ZFS_ERR_DEVRM_IN_PROGRESS));
+       }
  
         if (oldvd == NULL)
                 return (spa_vdev_exit(spa, NULL, txg, ENODEV));
  
-       if (!oldvd->vdev_ops->vdev_op_leaf)
+       boolean_t raidz = oldvd->vdev_ops == &vdev_raidz_ops;
+
+       if (raidz) {
+               if (!spa_feature_is_enabled(spa, SPA_FEATURE_RAIDZ_EXPANSION))
+                       return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+
+               /*
+                * Can't expand a raidz while prior expand is in progress.
+                */
+               if (spa->spa_raidz_expand != NULL) {
+                       return (spa_vdev_exit(spa, NULL, txg,
+                           ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS));
+               }
+       } else if (!oldvd->vdev_ops->vdev_op_leaf) {
                 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+       }
  
-       pvd = oldvd->vdev_parent;
+       if (raidz)
+               pvd = oldvd;
+       else
+               pvd = oldvd->vdev_parent;
  
-       if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
-           VDEV_ALLOC_ATTACH)) != 0)
+       if (spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
+           VDEV_ALLOC_ATTACH) != 0)
                 return (spa_vdev_exit(spa, NULL, txg, EINVAL));
  
         if (newrootvd->vdev_children != 1)
@@ -6745,10 +7066,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
                 return (spa_vdev_exit(spa, newrootvd, txg, error));
  
         /*
-        * Spares can't replace logs
+        * log, dedup and special vdevs should not be replaced by spares.
          */
-       if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
+       if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||
+           oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {
                 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+       }
  
         /*
          * A dRAID spare can only replace a child of its parent dRAID vdev.
@@ -6777,11 +7100,13 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
  
         if (!replacing) {
                 /*
-                * For attach, the only allowable parent is a mirror or the root
-                * vdev.
+                * For attach, the only allowable parent is a mirror or
+                * the root vdev. A raidz vdev can be attached to, but
+                * you cannot attach to a raidz child.
                  */
                 if (pvd->vdev_ops != &vdev_mirror_ops &&
-                   pvd->vdev_ops != &vdev_root_ops)
+                   pvd->vdev_ops != &vdev_root_ops &&
+                   !raidz)
                         return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
  
                 pvops = &vdev_mirror_ops;
@@ -6820,7 +7145,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
         /*
          * Make sure the new device is big enough.
          */
-       if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
+       vdev_t *min_vdev = raidz ? oldvd->vdev_child[0] : oldvd;
+       if (newvd->vdev_asize < vdev_get_min_asize(min_vdev))
                 return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
  
         /*
@@ -6830,32 +7156,75 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
         if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
                 return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
  
+       /*
+        * RAIDZ-expansion-specific checks.
+        */
+       if (raidz) {
+               if (vdev_raidz_attach_check(newvd) != 0)
+                       return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+
+               /*
+                * Fail early if a child is not healthy or being replaced
+                */
+               for (int i = 0; i < oldvd->vdev_children; i++) {
+                       if (vdev_is_dead(oldvd->vdev_child[i]) ||
+                           !oldvd->vdev_child[i]->vdev_ops->vdev_op_leaf) {
+                               return (spa_vdev_exit(spa, newrootvd, txg,
+                                   ENXIO));
+                       }
+                       /* Also fail if reserved boot area is in-use */
+                       if (vdev_check_boot_reserve(spa, oldvd->vdev_child[i])
+                           != 0) {
+                               return (spa_vdev_exit(spa, newrootvd, txg,
+                                   EADDRINUSE));
+                       }
+               }
+       }
+
+       if (raidz) {
+               /*
+                * Note: oldvdpath is freed by spa_strfree(),  but
+                * kmem_asprintf() is freed by kmem_strfree(), so we have to
+                * move it to a spa_strdup-ed string.
+                */
+               char *tmp = kmem_asprintf("raidz%u-%u",
+                   (uint_t)vdev_get_nparity(oldvd), (uint_t)oldvd->vdev_id);
+               oldvdpath = spa_strdup(tmp);
+               kmem_strfree(tmp);
+       } else {
+               oldvdpath = spa_strdup(oldvd->vdev_path);
+       }
+       newvdpath = spa_strdup(newvd->vdev_path);
+
         /*
          * If this is an in-place replacement, update oldvd's path and devid
          * to make it distinguishable from newvd, and unopenable from now on.
          */
-       if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
+       if (strcmp(oldvdpath, newvdpath) == 0) {
                 spa_strfree(oldvd->vdev_path);
-               oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
+               oldvd->vdev_path = kmem_alloc(strlen(newvdpath) + 5,
                     KM_SLEEP);
-               (void) snprintf(oldvd->vdev_path, strlen(newvd->vdev_path) + 5,
-                   "%s/%s", newvd->vdev_path, "old");
+               (void) sprintf(oldvd->vdev_path, "%s/old",
+                   newvdpath);
                 if (oldvd->vdev_devid != NULL) {
                         spa_strfree(oldvd->vdev_devid);
                         oldvd->vdev_devid = NULL;
                 }
+               spa_strfree(oldvdpath);
+               oldvdpath = spa_strdup(oldvd->vdev_path);
         }
  
         /*
          * If the parent is not a mirror, or if we're replacing, insert the new
          * mirror/replacing/spare vdev above oldvd.
          */
-       if (pvd->vdev_ops != pvops)
+       if (!raidz && pvd->vdev_ops != pvops) {
                 pvd = vdev_add_parent(oldvd, pvops);
+               ASSERT(pvd->vdev_ops == pvops);
+               ASSERT(oldvd->vdev_parent == pvd);
+       }
  
         ASSERT(pvd->vdev_top->vdev_parent == rvd);
-       ASSERT(pvd->vdev_ops == pvops);
-       ASSERT(oldvd->vdev_parent == pvd);
  
         /*
          * Extract the new device from its root and add it to pvd.
@@ -6883,41 +7252,66 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
          */
         dtl_max_txg = txg + TXG_CONCURRENT_STATES;
  
-       vdev_dtl_dirty(newvd, DTL_MISSING,
-           TXG_INITIAL, dtl_max_txg - TXG_INITIAL);
+       if (raidz) {
+               /*
+                * Wait for the youngest allocations and frees to sync,
+                * and then wait for the deferral of those frees to finish.
+                */
+               spa_vdev_config_exit(spa, NULL,
+                   txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
  
-       if (newvd->vdev_isspare) {
-               spa_spare_activate(newvd);
-               spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_SPARE);
-       }
+               vdev_initialize_stop_all(tvd, VDEV_INITIALIZE_ACTIVE);
+               vdev_trim_stop_all(tvd, VDEV_TRIM_ACTIVE);
+               vdev_autotrim_stop_wait(tvd);
  
-       oldvdpath = spa_strdup(oldvd->vdev_path);
-       newvdpath = spa_strdup(newvd->vdev_path);
-       newvd_isspare = newvd->vdev_isspare;
+               dtl_max_txg = spa_vdev_config_enter(spa);
  
-       /*
-        * Mark newvd's DTL dirty in this txg.
-        */
-       vdev_dirty(tvd, VDD_DTL, newvd, txg);
+               tvd->vdev_rz_expanding = B_TRUE;
  
-       /*
-        * Schedule the resilver or rebuild to restart in the future. We do
-        * this to ensure that dmu_sync-ed blocks have been stitched into the
-        * respective datasets.
-        */
-       if (rebuild) {
-               newvd->vdev_rebuild_txg = txg;
+               vdev_dirty_leaves(tvd, VDD_DTL, dtl_max_txg);
+               vdev_config_dirty(tvd);
  
-               vdev_rebuild(tvd);
+               dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool,
+                   dtl_max_txg);
+               dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_raidz_attach_sync,
+                   newvd, tx);
+               dmu_tx_commit(tx);
         } else {
-               newvd->vdev_resilver_txg = txg;
+               vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
+                   dtl_max_txg - TXG_INITIAL);
+
+               if (newvd->vdev_isspare) {
+                       spa_spare_activate(newvd);
+                       spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_SPARE);
+               }
+
+               newvd_isspare = newvd->vdev_isspare;
+
+               /*
+                * Mark newvd's DTL dirty in this txg.
+                */
+               vdev_dirty(tvd, VDD_DTL, newvd, txg);
+
+               /*
+                * Schedule the resilver or rebuild to restart in the future.
+                * We do this to ensure that dmu_sync-ed blocks have been
+                * stitched into the respective datasets.
+                */
+               if (rebuild) {
+                       newvd->vdev_rebuild_txg = txg;
  
-               if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
-                   spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
-                       vdev_defer_resilver(newvd);
+                       vdev_rebuild(tvd);
                 } else {
-                       dsl_scan_restart_resilver(spa->spa_dsl_pool,
-                           dtl_max_txg);
+                       newvd->vdev_resilver_txg = txg;
+
+                       if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
+                           spa_feature_is_enabled(spa,
+                           SPA_FEATURE_RESILVER_DEFER)) {
+                               vdev_defer_resilver(newvd);
+                       } else {
+                               dsl_scan_restart_resilver(spa->spa_dsl_pool,
+                                   dtl_max_txg);
+                       }
                 }
         }
  
@@ -6947,7 +7341,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
   * Detach a device from a mirror or replacing vdev.
   *
   * If 'replace_done' is specified, only detach if the parent
- * is a replacing vdev.
+ * is a replacing or a spare vdev.
   */
  int
  spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
@@ -7086,7 +7480,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
          * it may be that the unwritability of the disk is the reason
          * it's being detached!
          */
-       error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
+       (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
  
         /*
          * Remove vd from its parent and compact the parent's children.
@@ -7242,7 +7636,7 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
          */
         if (cmd_type == POOL_INITIALIZE_START &&
             (vd->vdev_initialize_thread != NULL ||
-           vd->vdev_top->vdev_removing)) {
+           vd->vdev_top->vdev_removing || vd->vdev_top->vdev_rz_expanding)) {
                 mutex_exit(&vd->vdev_initialize_lock);
                 return (SET_ERROR(EBUSY));
         } else if (cmd_type == POOL_INITIALIZE_CANCEL &&
@@ -7254,6 +7648,10 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
             vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
                 mutex_exit(&vd->vdev_initialize_lock);
                 return (SET_ERROR(ESRCH));
+       } else if (cmd_type == POOL_INITIALIZE_UNINIT &&
+           vd->vdev_initialize_thread != NULL) {
+               mutex_exit(&vd->vdev_initialize_lock);
+               return (SET_ERROR(EBUSY));
         }
  
         switch (cmd_type) {
@@ -7266,6 +7664,9 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
         case POOL_INITIALIZE_SUSPEND:
                 vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED, vd_list);
                 break;
+       case POOL_INITIALIZE_UNINIT:
+               vdev_uninitialize(vd);
+               break;
         default:
                 panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
         }
@@ -7357,7 +7758,8 @@ spa_vdev_trim_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
          * which has completed but the thread is not exited.
          */
         if (cmd_type == POOL_TRIM_START &&
-           (vd->vdev_trim_thread != NULL || vd->vdev_top->vdev_removing)) {
+           (vd->vdev_trim_thread != NULL || vd->vdev_top->vdev_removing ||
+           vd->vdev_top->vdev_rz_expanding)) {
                 mutex_exit(&vd->vdev_trim_lock);
                 return (SET_ERROR(EBUSY));
         } else if (cmd_type == POOL_TRIM_CANCEL &&
@@ -7445,7 +7847,7 @@ spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, uint64_t rate,
   * Split a set of devices from their mirrors, and create a new pool from them.
   */
  int
-spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
+spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config,
      nvlist_t *props, boolean_t exp)
  {
         int error = 0;
@@ -7454,7 +7856,7 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
         uint_t c, children, lastlog;
         nvlist_t **child, *nvl, *tmp;
         dmu_tx_t *tx;
-       char *altroot = NULL;
+       const char *altroot = NULL;
         vdev_t *rvd, **vml = NULL;                      /* vdev modify list */
         boolean_t activate_slog;
  
@@ -7577,14 +7979,14 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
                 }
  
                 /* we need certain info from the top level */
-               VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
-                   vml[c]->vdev_top->vdev_ms_array) == 0);
-               VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
-                   vml[c]->vdev_top->vdev_ms_shift) == 0);
-               VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
-                   vml[c]->vdev_top->vdev_asize) == 0);
-               VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
-                   vml[c]->vdev_top->vdev_ashift) == 0);
+               fnvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
+                   vml[c]->vdev_top->vdev_ms_array);
+               fnvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
+                   vml[c]->vdev_top->vdev_ms_shift);
+               fnvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
+                   vml[c]->vdev_top->vdev_asize);
+               fnvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
+                   vml[c]->vdev_top->vdev_ashift);
  
                 /* transfer per-vdev ZAPs */
                 ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0);
@@ -7614,28 +8016,24 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
          * Temporarily record the splitting vdevs in the spa config.  This
          * will disappear once the config is regenerated.
          */
-       VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-       VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
-           glist, children) == 0);
+       nvl = fnvlist_alloc();
+       fnvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, glist, children);
         kmem_free(glist, children * sizeof (uint64_t));
  
         mutex_enter(&spa->spa_props_lock);
-       VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
-           nvl) == 0);
+       fnvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, nvl);
         mutex_exit(&spa->spa_props_lock);
         spa->spa_config_splitting = nvl;
         vdev_config_dirty(spa->spa_root_vdev);
  
         /* configure and create the new pool */
-       VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-           exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
-           spa_version(spa)) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
-           spa->spa_config_txg) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
-           spa_generate_guid(NULL)) == 0);
+       fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname);
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+           exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE);
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa));
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, spa->spa_config_txg);
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+           spa_generate_guid(NULL));
         VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS));
         (void) nvlist_lookup_string(props,
             zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
@@ -7697,10 +8095,9 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
  
         /* if that worked, generate a real config for the new pool */
         if (newspa->spa_root_vdev != NULL) {
-               VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
-                   NV_UNIQUE_NAME, KM_SLEEP) == 0);
-               VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
-                   ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
+               newspa->spa_config_splitting = fnvlist_alloc();
+               fnvlist_add_uint64(newspa->spa_config_splitting,
+                   ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa));
                 spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
                     B_TRUE));
         }
@@ -8004,6 +8401,7 @@ spa_scan_stop(spa_t *spa)
         ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
         if (dsl_scan_resilvering(spa->spa_dsl_pool))
                 return (SET_ERROR(EBUSY));
+
         return (dsl_scan_cancel(spa->spa_dsl_pool));
  }
  
@@ -8029,6 +8427,10 @@ spa_scan(spa_t *spa, pool_scan_func_t func)
                 return (0);
         }
  
+       if (func == POOL_SCAN_ERRORSCRUB &&
+           !spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG))
+               return (SET_ERROR(ENOTSUP));
+
         return (dsl_scan(spa->spa_dsl_pool, func));
  }
  
@@ -8095,7 +8497,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd)
         spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_AUTOEXPAND);
  }
  
-static void
+static __attribute__((noreturn)) void
  spa_async_thread(void *arg)
  {
         spa_t *spa = (spa_t *)arg;
@@ -8175,7 +8577,8 @@ spa_async_thread(void *arg)
          * If any devices are done replacing, detach them.
          */
         if (tasks & SPA_ASYNC_RESILVER_DONE ||
-           tasks & SPA_ASYNC_REBUILD_DONE) {
+           tasks & SPA_ASYNC_REBUILD_DONE ||
+           tasks & SPA_ASYNC_DETACH_SPARE) {
                 spa_vdev_resilver_done(spa);
         }
  
@@ -8259,6 +8662,10 @@ spa_async_suspend(spa_t *spa)
         if (condense_thread != NULL)
                 zthr_cancel(condense_thread);
  
+       zthr_t *raidz_expand_thread = spa->spa_raidz_expand_zthr;
+       if (raidz_expand_thread != NULL)
+               zthr_cancel(raidz_expand_thread);
+
         zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr;
         if (discard_thread != NULL)
                 zthr_cancel(discard_thread);
@@ -8285,6 +8692,10 @@ spa_async_resume(spa_t *spa)
         if (condense_thread != NULL)
                 zthr_resume(condense_thread);
  
+       zthr_t *raidz_expand_thread = spa->spa_raidz_expand_zthr;
+       if (raidz_expand_thread != NULL)
+               zthr_resume(raidz_expand_thread);
+
         zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr;
         if (discard_thread != NULL)
                 zthr_resume(discard_thread);
@@ -8451,7 +8862,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
  
         VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
             KM_SLEEP) == 0);
-       bzero(packed + nvsize, bufsize - nvsize);
+       memset(packed + nvsize, 0, bufsize - nvsize);
  
         dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
  
@@ -8488,16 +8899,17 @@ spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
                     &sav->sav_object, tx) == 0);
         }
  
-       VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+       nvroot = fnvlist_alloc();
         if (sav->sav_count == 0) {
-               VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
+               fnvlist_add_nvlist_array(nvroot, config,
+                   (const nvlist_t * const *)NULL, 0);
         } else {
                 list = kmem_alloc(sav->sav_count*sizeof (void *), KM_SLEEP);
                 for (i = 0; i < sav->sav_count; i++)
                         list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
                             B_FALSE, VDEV_CONFIG_L2CACHE);
-               VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
-                   sav->sav_count) == 0);
+               fnvlist_add_nvlist_array(nvroot, config,
+                   (const nvlist_t * const *)list, sav->sav_count);
                 for (i = 0; i < sav->sav_count; i++)
                         nvlist_free(list[i]);
                 kmem_free(list, sav->sav_count * sizeof (void *));
@@ -8518,6 +8930,11 @@ spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx)
  {
         spa_t *spa = vd->vdev_spa;
  
+       if (vd->vdev_root_zap != 0 &&
+           spa_feature_is_active(spa, SPA_FEATURE_AVZ_V2)) {
+               VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
+                   vd->vdev_root_zap, tx));
+       }
         if (vd->vdev_top_zap != 0) {
                 VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
                     vd->vdev_top_zap, tx));
@@ -8678,27 +9095,14 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
  
         while ((elem = nvlist_next_nvpair(nvp, elem))) {
                 uint64_t intval;
-               char *strval, *fname;
+               const char *strval, *fname;
                 zpool_prop_t prop;
                 const char *propname;
+               const char *elemname = nvpair_name(elem);
                 zprop_type_t proptype;
                 spa_feature_t fid;
  
-               switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
-               case ZPOOL_PROP_INVAL:
-                       /*
-                        * We checked this earlier in spa_prop_validate().
-                        */
-                       ASSERT(zpool_prop_feature(nvpair_name(elem)));
-
-                       fname = strchr(nvpair_name(elem), '@') + 1;
-                       VERIFY0(zfeature_lookup_name(fname, &fid));
-
-                       spa_feature_enable(spa, fid, tx);
-                       spa_history_log_internal(spa, "set", tx,
-                           "%s=enabled", nvpair_name(elem));
-                       break;
-
+               switch (prop = zpool_name_to_prop(elemname)) {
                 case ZPOOL_PROP_VERSION:
                         intval = fnvpair_value_uint64(elem);
                         /*
@@ -8730,14 +9134,18 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
                         spa->spa_comment = spa_strdup(strval);
                         /*
                          * We need to dirty the configuration on all the vdevs
-                        * so that their labels get updated.  It's unnecessary
-                        * to do this for pool creation since the vdev's
-                        * configuration has already been dirtied.
+                        * so that their labels get updated.  We also need to
+                        * update the cache file to keep it in sync with the
+                        * MOS version. It's unnecessary to do this for pool
+                        * creation since the vdev's configuration has already
+                        * been dirtied.
                          */
-                       if (tx->tx_txg != TXG_INITIAL)
+                       if (tx->tx_txg != TXG_INITIAL) {
                                 vdev_config_dirty(spa->spa_root_vdev);
+                               spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+                       }
                         spa_history_log_internal(spa, "set", tx,
-                           "%s=%s", nvpair_name(elem), strval);
+                           "%s=%s", elemname, strval);
                         break;
                 case ZPOOL_PROP_COMPATIBILITY:
                         strval = fnvpair_value_string(elem);
@@ -8747,12 +9155,29 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
                         /*
                          * Dirty the configuration on vdevs as above.
                          */
-                       if (tx->tx_txg != TXG_INITIAL)
+                       if (tx->tx_txg != TXG_INITIAL) {
                                 vdev_config_dirty(spa->spa_root_vdev);
+                               spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+                       }
+
                         spa_history_log_internal(spa, "set", tx,
                             "%s=%s", nvpair_name(elem), strval);
                         break;
  
+               case ZPOOL_PROP_INVAL:
+                       if (zpool_prop_feature(elemname)) {
+                               fname = strchr(elemname, '@') + 1;
+                               VERIFY0(zfeature_lookup_name(fname, &fid));
+
+                               spa_feature_enable(spa, fid, tx);
+                               spa_history_log_internal(spa, "set", tx,
+                                   "%s=enabled", elemname);
+                               break;
+                       } else if (!zfs_prop_user(elemname)) {
+                               ASSERT(zpool_prop_feature(elemname));
+                               break;
+                       }
+                       zfs_fallthrough;
                 default:
                         /*
                          * Set pool property values in the poolprops mos object.
@@ -8765,8 +9190,13 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
                         }
  
                         /* normalize the property name */
-                       propname = zpool_prop_to_name(prop);
-                       proptype = zpool_prop_get_type(prop);
+                       if (prop == ZPOOL_PROP_INVAL) {
+                               propname = elemname;
+                               proptype = PROP_TYPE_STRING;
+                       } else {
+                               propname = zpool_prop_to_name(prop);
+                               proptype = zpool_prop_get_type(prop);
+                       }
  
                         if (nvpair_type(elem) == DATA_TYPE_STRING) {
                                 ASSERT(proptype == PROP_TYPE_STRING);
@@ -8775,7 +9205,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
                                     spa->spa_pool_props_object, propname,
                                     1, strlen(strval) + 1, strval, tx));
                                 spa_history_log_internal(spa, "set", tx,
-                                   "%s=%s", nvpair_name(elem), strval);
+                                   "%s=%s", elemname, strval);
                         } else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
                                 intval = fnvpair_value_uint64(elem);
  
@@ -8788,38 +9218,38 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
                                     spa->spa_pool_props_object, propname,
                                     8, 1, &intval, tx));
                                 spa_history_log_internal(spa, "set", tx,
-                                   "%s=%lld", nvpair_name(elem),
+                                   "%s=%lld", elemname,
                                     (longlong_t)intval);
-                       } else {
-                               ASSERT(0); /* not allowed */
-                       }
  
-                       switch (prop) {
-                       case ZPOOL_PROP_DELEGATION:
-                               spa->spa_delegation = intval;
-                               break;
-                       case ZPOOL_PROP_BOOTFS:
-                               spa->spa_bootfs = intval;
-                               break;
-                       case ZPOOL_PROP_FAILUREMODE:
-                               spa->spa_failmode = intval;
-                               break;
-                       case ZPOOL_PROP_AUTOTRIM:
-                               spa->spa_autotrim = intval;
-                               spa_async_request(spa,
-                                   SPA_ASYNC_AUTOTRIM_RESTART);
-                               break;
-                       case ZPOOL_PROP_AUTOEXPAND:
-                               spa->spa_autoexpand = intval;
-                               if (tx->tx_txg != TXG_INITIAL)
+                               switch (prop) {
+                               case ZPOOL_PROP_DELEGATION:
+                                       spa->spa_delegation = intval;
+                                       break;
+                               case ZPOOL_PROP_BOOTFS:
+                                       spa->spa_bootfs = intval;
+                                       break;
+                               case ZPOOL_PROP_FAILUREMODE:
+                                       spa->spa_failmode = intval;
+                                       break;
+                               case ZPOOL_PROP_AUTOTRIM:
+                                       spa->spa_autotrim = intval;
                                         spa_async_request(spa,
-                                           SPA_ASYNC_AUTOEXPAND);
-                               break;
-                       case ZPOOL_PROP_MULTIHOST:
-                               spa->spa_multihost = intval;
-                               break;
-                       default:
-                               break;
+                                           SPA_ASYNC_AUTOTRIM_RESTART);
+                                       break;
+                               case ZPOOL_PROP_AUTOEXPAND:
+                                       spa->spa_autoexpand = intval;
+                                       if (tx->tx_txg != TXG_INITIAL)
+                                               spa_async_request(spa,
+                                                   SPA_ASYNC_AUTOEXPAND);
+                                       break;
+                               case ZPOOL_PROP_MULTIHOST:
+                                       spa->spa_multihost = intval;
+                                       break;
+                               default:
+                                       break;
+                               }
+                       } else {
+                               ASSERT(0); /* not allowed */
                         }
                 }
  
@@ -9057,8 +9487,10 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
                             &spa->spa_deferred_bpobj, tx);
                 }
  
+               brt_sync(spa, txg);
                 ddt_sync(spa, txg);
                 dsl_scan_sync(dp, tx);
+               dsl_errorscrub_sync(dp, tx);
                 svr_sync(spa, tx);
                 spa_sync_upgrades(spa, tx);
  
@@ -9069,6 +9501,27 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
                     != NULL)
                         vdev_sync(vd, txg);
  
+               if (pass == 1) {
+                       /*
+                        * dsl_pool_sync() -> dp_sync_tasks may have dirtied
+                        * the config. If that happens, this txg should not
+                        * be a no-op. So we must sync the config to the MOS
+                        * before checking for no-op.
+                        *
+                        * Note that when the config is dirty, it will
+                        * be written to the MOS (i.e. the MOS will be
+                        * dirtied) every time we call spa_sync_config_object()
+                        * in this txg.  Therefore we can't call this after
+                        * dsl_pool_sync() every pass, because it would
+                        * prevent us from converging, since we'd dirty
+                        * the MOS every pass.
+                        *
+                        * Sync tasks can only be processed in pass 1, so
+                        * there's no need to do this in later passes.
+                        */
+                       spa_sync_config_object(spa, tx);
+               }
+
                 /*
                  * Note: We need to check if the MOS is dirty because we could
                  * have marked the MOS dirty without updating the uberblock
@@ -9181,6 +9634,13 @@ spa_sync(spa_t *spa, uint64_t txg)
         spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL,
             ZIO_FLAG_CANFAIL);
  
+       /*
+        * Now that there can be no more cloning in this transaction group,
+        * but we are still before issuing frees, we can process pending BRT
+        * updates.
+        */
+       brt_pending_apply(spa, txg);
+
         /*
          * Lock out configuration changes.
          */
@@ -9190,9 +9650,9 @@ spa_sync(spa_t *spa, uint64_t txg)
         spa->spa_sync_pass = 0;
  
         for (int i = 0; i < spa->spa_alloc_count; i++) {
-               mutex_enter(&spa->spa_alloc_locks[i]);
-               VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
-               mutex_exit(&spa->spa_alloc_locks[i]);
+               mutex_enter(&spa->spa_allocs[i].spaa_lock);
+               VERIFY0(avl_numnodes(&spa->spa_allocs[i].spaa_tree));
+               mutex_exit(&spa->spa_allocs[i].spaa_lock);
         }
  
         /*
@@ -9200,7 +9660,13 @@ spa_sync(spa_t *spa, uint64_t txg)
          * into config changes that go out with this transaction group.
          */
         spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
-       while (list_head(&spa->spa_state_dirty_list) != NULL) {
+       while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
+               /* Avoid holding the write lock unless actually necessary */
+               if (vd->vdev_aux == NULL) {
+                       vdev_state_clean(vd);
+                       vdev_config_dirty(vd);
+                       continue;
+               }
                 /*
                  * We need the write lock here because, for aux vdevs,
                  * calling vdev_config_dirty() modifies sav_config.
@@ -9302,9 +9768,9 @@ spa_sync(spa_t *spa, uint64_t txg)
         dsl_pool_sync_done(dp, txg);
  
         for (int i = 0; i < spa->spa_alloc_count; i++) {
-               mutex_enter(&spa->spa_alloc_locks[i]);
-               VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
-               mutex_exit(&spa->spa_alloc_locks[i]);
+               mutex_enter(&spa->spa_allocs[i].spaa_lock);
+               VERIFY0(avl_numnodes(&spa->spa_allocs[i].spaa_tree));
+               mutex_exit(&spa->spa_allocs[i].spaa_lock);
         }
  
         /*
@@ -9321,6 +9787,9 @@ spa_sync(spa_t *spa, uint64_t txg)
  
         spa_update_dspace(spa);
  
+       if (spa_get_autotrim(spa) == SPA_AUTOTRIM_ON)
+               vdev_autotrim_kick(spa);
+
         /*
          * It had better be the case that we didn't dirty anything
          * since vdev_config_sync().
@@ -9374,6 +9843,104 @@ spa_sync_allpools(void)
         mutex_exit(&spa_namespace_lock);
  }
  
+taskq_t *
+spa_sync_tq_create(spa_t *spa, const char *name)
+{
+       kthread_t **kthreads;
+
+       ASSERT(spa->spa_sync_tq == NULL);
+       ASSERT3S(spa->spa_alloc_count, <=, boot_ncpus);
+
+       /*
+        * - do not allow more allocators than cpus.
+        * - there may be more cpus than allocators.
+        * - do not allow more sync taskq threads than allocators or cpus.
+        */
+       int nthreads = spa->spa_alloc_count;
+       spa->spa_syncthreads = kmem_zalloc(sizeof (spa_syncthread_info_t) *
+           nthreads, KM_SLEEP);
+
+       spa->spa_sync_tq = taskq_create_synced(name, nthreads, minclsyspri,
+           nthreads, INT_MAX, TASKQ_PREPOPULATE, &kthreads);
+       VERIFY(spa->spa_sync_tq != NULL);
+       VERIFY(kthreads != NULL);
+
+       spa_taskqs_t *tqs =
+           &spa->spa_zio_taskq[ZIO_TYPE_WRITE][ZIO_TASKQ_ISSUE];
+
+       spa_syncthread_info_t *ti = spa->spa_syncthreads;
+       for (int i = 0, w = 0; i < nthreads; i++, w++, ti++) {
+               ti->sti_thread = kthreads[i];
+               if (w == tqs->stqs_count) {
+                       w = 0;
+               }
+               ti->sti_wr_iss_tq = tqs->stqs_taskq[w];
+       }
+
+       kmem_free(kthreads, sizeof (*kthreads) * nthreads);
+       return (spa->spa_sync_tq);
+}
+
+void
+spa_sync_tq_destroy(spa_t *spa)
+{
+       ASSERT(spa->spa_sync_tq != NULL);
+
+       taskq_wait(spa->spa_sync_tq);
+       taskq_destroy(spa->spa_sync_tq);
+       kmem_free(spa->spa_syncthreads,
+           sizeof (spa_syncthread_info_t) * spa->spa_alloc_count);
+       spa->spa_sync_tq = NULL;
+}
+
+void
+spa_select_allocator(zio_t *zio)
+{
+       zbookmark_phys_t *bm = &zio->io_bookmark;
+       spa_t *spa = zio->io_spa;
+
+       ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+
+       /*
+        * A gang block (for example) may have inherited its parent's
+        * allocator, in which case there is nothing further to do here.
+        */
+       if (ZIO_HAS_ALLOCATOR(zio))
+               return;
+
+       ASSERT(spa != NULL);
+       ASSERT(bm != NULL);
+
+       /*
+        * First try to use an allocator assigned to the syncthread, and set
+        * the corresponding write issue taskq for the allocator.
+        * Note, we must have an open pool to do this.
+        */
+       if (spa->spa_sync_tq != NULL) {
+               spa_syncthread_info_t *ti = spa->spa_syncthreads;
+               for (int i = 0; i < spa->spa_alloc_count; i++, ti++) {
+                       if (ti->sti_thread == curthread) {
+                               zio->io_allocator = i;
+                               zio->io_wr_iss_tq = ti->sti_wr_iss_tq;
+                               return;
+                       }
+               }
+       }
+
+       /*
+        * We want to try to use as many allocators as possible to help improve
+        * performance, but we also want logically adjacent IOs to be physically
+        * adjacent to improve sequential read performance. We chunk each object
+        * into 2^20 block regions, and then hash based on the objset, object,
+        * level, and region to accomplish both of these goals.
+        */
+       uint64_t hv = cityhash4(bm->zb_objset, bm->zb_object, bm->zb_level,
+           bm->zb_blkid >> 20);
+
+       zio->io_allocator = (uint_t)hv % spa->spa_alloc_count;
+       zio->io_wr_iss_tq = NULL;
+}
+
  /*
   * ==========================================================================
   * Miscellaneous routines
@@ -9463,12 +10030,12 @@ spa_upgrade(spa_t *spa, uint64_t version)
         txg_wait_synced(spa_get_dsl(spa), 0);
  }
  
-boolean_t
-spa_has_spare(spa_t *spa, uint64_t guid)
+static boolean_t
+spa_has_aux_vdev(spa_t *spa, uint64_t guid, spa_aux_vdev_t *sav)
  {
+       (void) spa;
         int i;
-       uint64_t spareguid;
-       spa_aux_vdev_t *sav = &spa->spa_spares;
+       uint64_t vdev_guid;
  
         for (i = 0; i < sav->sav_count; i++)
                 if (sav->sav_vdevs[i]->vdev_guid == guid)
@@ -9476,13 +10043,25 @@ spa_has_spare(spa_t *spa, uint64_t guid)
  
         for (i = 0; i < sav->sav_npending; i++) {
                 if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
-                   &spareguid) == 0 && spareguid == guid)
+                   &vdev_guid) == 0 && vdev_guid == guid)
                         return (B_TRUE);
         }
  
         return (B_FALSE);
  }
  
+boolean_t
+spa_has_l2cache(spa_t *spa, uint64_t guid)
+{
+       return (spa_has_aux_vdev(spa, guid, &spa->spa_l2cache));
+}
+
+boolean_t
+spa_has_spare(spa_t *spa, uint64_t guid)
+{
+       return (spa_has_aux_vdev(spa, guid, &spa->spa_spares));
+}
+
  /*
   * Check if a pool has an active shared spare device.
   * Note: reference count of an active spare is 2, as a spare and as a replace
@@ -9700,9 +10279,10 @@ spa_activity_in_progress(spa_t *spa, zpool_wait_activity_t activity,
                     DSS_SCANNING);
                 break;
         case ZPOOL_WAIT_RESILVER:
-               if ((*in_progress = vdev_rebuild_active(spa->spa_root_vdev)))
+               *in_progress = vdev_rebuild_active(spa->spa_root_vdev);
+               if (*in_progress)
                         break;
-               /* fall through */
+               zfs_fallthrough;
         case ZPOOL_WAIT_SCRUB:
         {
                 boolean_t scanning, paused, is_scrub;
@@ -9715,6 +10295,12 @@ spa_activity_in_progress(spa_t *spa, zpool_wait_activity_t activity,
                     is_scrub == (activity == ZPOOL_WAIT_SCRUB));
                 break;
         }
+       case ZPOOL_WAIT_RAIDZ_EXPAND:
+       {
+               vdev_raidz_expand_t *vre = spa->spa_raidz_expand;
+               *in_progress = (vre != NULL && vre->vre_state == DSS_SCANNING);
+               break;
+       }
         default:
                 panic("unrecognized value for activity %d", activity);
         }
@@ -9818,6 +10404,8 @@ spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name)
                 ev = kmem_alloc(sizeof (sysevent_t), KM_SLEEP);
                 ev->resource = resource;
         }
+#else
+       (void) spa, (void) vd, (void) hist_nvl, (void) name;
  #endif
         return (ev);
  }
@@ -9830,6 +10418,8 @@ spa_event_post(sysevent_t *ev)
                 zfs_zevent_post(ev->resource, NULL, zfs_zevent_post_cb);
                 kmem_free(ev, sizeof (*ev));
         }
+#else
+       (void) ev;
  #endif
  }
  
@@ -9901,10 +10491,14 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
  /* asynchronous event notification */
  EXPORT_SYMBOL(spa_event_notify);
  
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_pct, UINT, ZMOD_RW,
+       "Percentage of CPUs to run a metaslab preload taskq");
+
  /* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW,
         "log2 fraction of arc that can be used by inflight I/Os when "
         "verifying pool during import");
+/* END CSTYLED */
  
  ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_metadata, INT, ZMOD_RW,
         "Set to traverse metadata on pool import");
@@ -9921,23 +10515,32 @@ ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_pct, UINT, ZMOD_RD,
  ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RD,
         "Number of threads per IO worker taskqueue");
  
-ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, ULONG, ZMOD_RW,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW,
         "Allow importing pool with up to this number of missing top-level "
         "vdevs (in read-only mode)");
+/* END CSTYLED */
  
-ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT, ZMOD_RW,
-       "Set the livelist condense zthr to pause");
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT,
+       ZMOD_RW, "Set the livelist condense zthr to pause");
  
-ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_pause, INT, ZMOD_RW,
-       "Set the livelist condense synctask to pause");
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_pause, INT,
+       ZMOD_RW, "Set the livelist condense synctask to pause");
  
-ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_cancel, INT, ZMOD_RW,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_cancel,
+       INT, ZMOD_RW,
         "Whether livelist condensing was canceled in the synctask");
  
-ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_cancel, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_cancel,
+       INT, ZMOD_RW,
         "Whether livelist condensing was canceled in the zthr function");
  
-ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT,
+       ZMOD_RW,
         "Whether extra ALLOC blkptrs were added to a livelist entry while it "
         "was being condensed");
  /* END CSTYLED */
+
+ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_wr_iss_ncpus, UINT, ZMOD_RW,
+       "Number of CPUs to run write issue taskqs");