Update vdev devid and physpath if changed between imports

[mirror_zfs.git] / module / zfs / vdev.c
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c

index 7bc79a2259df40c83dc0367b933b03f98a97df84..d6286dc5920bd8294022f25ab5d29a16c000a5b5 100644 (file)
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -6,7 +6,7 @@
   * You may not use this file except in compliance with the License.
   *
   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
   * See the License for the specific language governing permissions
   * and limitations under the License.
   *
@@ -29,7 +29,7 @@
   * Copyright (c) 2017, Intel Corporation.
   * Copyright (c) 2019, Datto Inc. All rights reserved.
   * Copyright (c) 2021, Klara Inc.
- * Copyright [2021] Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
   */
  
  #include <sys/zfs_context.h>
@@ -58,6 +58,7 @@
  #include <sys/abd.h>
  #include <sys/vdev_initialize.h>
  #include <sys/vdev_trim.h>
+#include <sys/vdev_raidz.h>
  #include <sys/zvol.h>
  #include <sys/zfs_ratelimit.h>
  #include "zfs_prop.h"
@@ -81,22 +82,22 @@
   * 1 << (spa_slop_shift + 1), on small pools the usable space may be reduced
   * (by more than 1<<spa_slop_shift) due to the embedded slog metaslab.
   */
-static int zfs_embedded_slog_min_ms = 64;
+static uint_t zfs_embedded_slog_min_ms = 64;
  
  /* default target for number of metaslabs per top-level vdev */
-static int zfs_vdev_default_ms_count = 200;
+static uint_t zfs_vdev_default_ms_count = 200;
  
  /* minimum number of metaslabs per top-level vdev */
-static int zfs_vdev_min_ms_count = 16;
+static uint_t zfs_vdev_min_ms_count = 16;
  
  /* practical upper limit of total metaslabs per top-level vdev */
-static int zfs_vdev_ms_count_limit = 1ULL << 17;
+static uint_t zfs_vdev_ms_count_limit = 1ULL << 17;
  
  /* lower limit for metaslab size (512M) */
-static int zfs_vdev_default_ms_shift = 29;
+static uint_t zfs_vdev_default_ms_shift = 29;
  
  /* upper limit for metaslab size (16G) */
-static const int zfs_vdev_max_ms_shift = 34;
+static uint_t zfs_vdev_max_ms_shift = 34;
  
  int vdev_validate_skip = B_FALSE;
  
@@ -136,8 +137,16 @@ int zfs_vdev_standard_sm_blksz = (1 << 17);
   */
  int zfs_nocacheflush = 0;
  
-uint64_t zfs_vdev_max_auto_ashift = ASHIFT_MAX;
-uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
+/*
+ * Maximum and minimum ashift values that can be automatically set based on
+ * vdev's physical ashift (disk's physical sector size).  While ASHIFT_MAX
+ * is higher than the maximum value, it is intentionally limited here to not
+ * excessively impact pool space efficiency.  Higher ashift values may still
+ * be forced by vdev logical ashift or by user via ashift property, but won't
+ * be set automatically as a performance optimization.
+ */
+uint_t zfs_vdev_max_auto_ashift = 14;
+uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
  
  void
  vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
@@ -216,7 +225,7 @@ vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
   * Virtual device management.
   */
  
-static const vdev_ops_t *const vdev_ops_table[] = {
+static vdev_ops_t *const vdev_ops_table[] = {
         &vdev_root_ops,
         &vdev_raidz_ops,
         &vdev_draid_ops,
@@ -238,7 +247,7 @@ static const vdev_ops_t *const vdev_ops_table[] = {
  static vdev_ops_t *
  vdev_getops(const char *type)
  {
-       const vdev_ops_t *ops, *const *opspp;
+       vdev_ops_t *ops, *const *opspp;
  
         for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++)
                 if (strcmp(ops->vdev_op_type, type) == 0)
@@ -297,13 +306,13 @@ vdev_derive_alloc_bias(const char *bias)
   * all children.  This is what's used by anything other than RAID-Z.
   */
  uint64_t
-vdev_default_asize(vdev_t *vd, uint64_t psize)
+vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
  {
         uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
         uint64_t csize;
  
         for (int c = 0; c < vd->vdev_children; c++) {
-               csize = vdev_psize_to_asize(vd->vdev_child[c], psize);
+               csize = vdev_psize_to_asize_txg(vd->vdev_child[c], psize, txg);
                 asize = MAX(asize, csize);
         }
  
@@ -381,6 +390,33 @@ vdev_get_nparity(vdev_t *vd)
         return (nparity);
  }
  
+static int
+vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
+{
+       spa_t *spa = vd->vdev_spa;
+       objset_t *mos = spa->spa_meta_objset;
+       uint64_t objid;
+       int err;
+
+       if (vd->vdev_root_zap != 0) {
+               objid = vd->vdev_root_zap;
+       } else if (vd->vdev_top_zap != 0) {
+               objid = vd->vdev_top_zap;
+       } else if (vd->vdev_leaf_zap != 0) {
+               objid = vd->vdev_leaf_zap;
+       } else {
+               return (EINVAL);
+       }
+
+       err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
+           sizeof (uint64_t), 1, value);
+
+       if (err == ENOENT)
+               *value = vdev_prop_default_numeric(prop);
+
+       return (err);
+}
+
  /*
   * Get the number of data disks for a top-level vdev.
   */
@@ -475,7 +511,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd)
  
         newchild = kmem_alloc(newsize, KM_SLEEP);
         if (pvd->vdev_child != NULL) {
-               bcopy(pvd->vdev_child, newchild, oldsize);
+               memcpy(newchild, pvd->vdev_child, oldsize);
                 kmem_free(pvd->vdev_child, oldsize);
         }
  
@@ -634,6 +670,14 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
         zfs_ratelimit_init(&vd->vdev_checksum_rl,
             &zfs_checksum_events_per_second, 1);
  
+       /*
+        * Default Thresholds for tuning ZED
+        */
+       vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
+       vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
+       vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
+       vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
+
         list_link_init(&vd->vdev_config_dirty_node);
         list_link_init(&vd->vdev_state_dirty_node);
         list_link_init(&vd->vdev_initialize_node);
@@ -655,6 +699,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
         mutex_init(&vd->vdev_trim_io_lock, NULL, MUTEX_DEFAULT, NULL);
         cv_init(&vd->vdev_trim_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&vd->vdev_autotrim_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&vd->vdev_autotrim_kick_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
  
         mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -671,7 +716,6 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
             offsetof(struct vdev, vdev_dtl_node));
         vd->vdev_stat.vs_timestamp = gethrtime();
         vdev_queue_init(vd);
-       vdev_cache_init(vd);
  
         return (vd);
  }
@@ -686,11 +730,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
      int alloctype)
  {
         vdev_ops_t *ops;
-       char *type;
+       const char *type;
         uint64_t guid = 0, islog;
         vdev_t *vd;
         vdev_indirect_config_t *vic;
-       char *tmp = NULL;
+       const char *tmp = NULL;
         int rc;
         vdev_alloc_bias_t alloc_bias = VDEV_BIAS_NONE;
         boolean_t top_level = (parent && !parent->vdev_parent);
@@ -745,7 +789,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                 return (SET_ERROR(ENOTSUP));
  
         if (top_level && alloctype == VDEV_ALLOC_ADD) {
-               char *bias;
+               const char *bias;
  
                 /*
                  * If creating a top-level vdev, check for allocation
@@ -791,8 +835,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
         if (top_level && alloc_bias != VDEV_BIAS_NONE)
                 vd->vdev_alloc_bias = alloc_bias;
  
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
-               vd->vdev_path = spa_strdup(vd->vdev_path);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &tmp) == 0)
+               vd->vdev_path = spa_strdup(tmp);
  
         /*
          * ZPOOL_CONFIG_AUX_STATE = "external" means we previously forced a
@@ -806,18 +850,17 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                 vd->vdev_label_aux = VDEV_AUX_EXTERNAL;
         }
  
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0)
-               vd->vdev_devid = spa_strdup(vd->vdev_devid);
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
-           &vd->vdev_physpath) == 0)
-               vd->vdev_physpath = spa_strdup(vd->vdev_physpath);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &tmp) == 0)
+               vd->vdev_devid = spa_strdup(tmp);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, &tmp) == 0)
+               vd->vdev_physpath = spa_strdup(tmp);
  
         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
-           &vd->vdev_enc_sysfs_path) == 0)
-               vd->vdev_enc_sysfs_path = spa_strdup(vd->vdev_enc_sysfs_path);
+           &tmp) == 0)
+               vd->vdev_enc_sysfs_path = spa_strdup(tmp);
  
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0)
-               vd->vdev_fru = spa_strdup(vd->vdev_fru);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &tmp) == 0)
+               vd->vdev_fru = spa_strdup(tmp);
  
         /*
          * Set the whole_disk property.  If it's not specified, leave the value
@@ -847,9 +890,15 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
             &vd->vdev_not_present);
  
         /*
-        * Get the alignment requirement.
+        * Get the alignment requirement. Ignore pool ashift for vdev
+        * attach case.
          */
-       (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
+       if (alloctype != VDEV_ALLOC_ATTACH) {
+               (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT,
+                   &vd->vdev_ashift);
+       } else {
+               vd->vdev_attaching = B_TRUE;
+       }
  
         /*
          * Retrieve the vdev creation time.
@@ -857,6 +906,14 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_CREATE_TXG,
             &vd->vdev_crtxg);
  
+       if (vd->vdev_ops == &vdev_root_ops &&
+           (alloctype == VDEV_ALLOC_LOAD ||
+           alloctype == VDEV_ALLOC_SPLIT ||
+           alloctype == VDEV_ALLOC_ROOTPOOL)) {
+               (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_ROOT_ZAP,
+                   &vd->vdev_root_zap);
+       }
+
         /*
          * If we're a top-level vdev, try to load the allocation parameters.
          */
@@ -874,6 +931,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                     &vd->vdev_removing);
                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
                     &vd->vdev_top_zap);
+               vd->vdev_rz_expanding = nvlist_exists(nv,
+                   ZPOOL_CONFIG_RAIDZ_EXPANDING);
         } else {
                 ASSERT0(vd->vdev_top_zap);
         }
@@ -948,7 +1007,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                             &vd->vdev_removed);
  
                         if (vd->vdev_faulted || vd->vdev_degraded) {
-                               char *aux;
+                               const char *aux;
  
                                 vd->vdev_label_aux =
                                     VDEV_AUX_ERR_EXCEEDED;
@@ -1045,7 +1104,6 @@ vdev_free(vdev_t *vd)
          * Clean up vdev structure.
          */
         vdev_queue_fini(vd);
-       vdev_cache_fini(vd);
  
         if (vd->vdev_path)
                 spa_strfree(vd->vdev_path);
@@ -1108,6 +1166,7 @@ vdev_free(vdev_t *vd)
         mutex_destroy(&vd->vdev_trim_io_lock);
         cv_destroy(&vd->vdev_trim_cv);
         cv_destroy(&vd->vdev_autotrim_cv);
+       cv_destroy(&vd->vdev_autotrim_kick_cv);
         cv_destroy(&vd->vdev_trim_io_cv);
  
         mutex_destroy(&vd->vdev_rebuild_lock);
@@ -1136,7 +1195,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
  
         ASSERT(tvd == tvd->vdev_top);
  
-       tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
         tvd->vdev_ms_array = svd->vdev_ms_array;
         tvd->vdev_ms_shift = svd->vdev_ms_shift;
         tvd->vdev_ms_count = svd->vdev_ms_count;
@@ -1343,6 +1401,36 @@ vdev_remove_parent(vdev_t *cvd)
         vdev_free(mvd);
  }
  
+/*
+ * Choose GCD for spa_gcd_alloc.
+ */
+static uint64_t
+vdev_gcd(uint64_t a, uint64_t b)
+{
+       while (b != 0) {
+               uint64_t t = b;
+               b = a % b;
+               a = t;
+       }
+       return (a);
+}
+
+/*
+ * Set spa_min_alloc and spa_gcd_alloc.
+ */
+static void
+vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
+{
+       if (min_alloc < spa->spa_min_alloc)
+               spa->spa_min_alloc = min_alloc;
+       if (spa->spa_gcd_alloc == INT_MAX) {
+               spa->spa_gcd_alloc = min_alloc;
+       } else {
+               spa->spa_gcd_alloc = vdev_gcd(min_alloc,
+                   spa->spa_gcd_alloc);
+       }
+}
+
  void
  vdev_metaslab_group_create(vdev_t *vd)
  {
@@ -1395,8 +1483,7 @@ vdev_metaslab_group_create(vdev_t *vd)
                                 spa->spa_min_ashift = vd->vdev_ashift;
  
                         uint64_t min_alloc = vdev_get_min_alloc(vd);
-                       if (min_alloc < spa->spa_min_alloc)
-                               spa->spa_min_alloc = min_alloc;
+                       vdev_spa_set_alloc(spa, min_alloc);
                 }
         }
  }
@@ -1426,7 +1513,7 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
         mspp = vmem_zalloc(newc * sizeof (*mspp), KM_SLEEP);
  
         if (expanding) {
-               bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp));
+               memcpy(mspp, vd->vdev_ms, oldc * sizeof (*mspp));
                 vmem_free(vd->vdev_ms, oldc * sizeof (*mspp));
         }
  
@@ -1523,13 +1610,6 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
         if (txg == 0)
                 spa_config_exit(spa, SCL_ALLOC, FTAG);
  
-       /*
-        * Regardless whether this vdev was just added or it is being
-        * expanded, the metaslab count has changed. Recalculate the
-        * block limit.
-        */
-       spa_log_sm_set_blocklimit(spa);
-
         return (0);
  }
  
@@ -1577,7 +1657,6 @@ vdev_metaslab_fini(vdev_t *vd)
                 }
         }
         ASSERT0(vd->vdev_ms_count);
-       ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
  }
  
  typedef struct vdev_probe_stats {
@@ -1616,6 +1695,8 @@ vdev_probe_done(zio_t *zio)
  
                 vd->vdev_cant_read |= !vps->vps_readable;
                 vd->vdev_cant_write |= !vps->vps_writeable;
+               vdev_dbgmsg(vd, "probe done, cant_read=%u cant_write=%u",
+                   vd->vdev_cant_read, vd->vdev_cant_write);
  
                 if (vdev_readable(vd) &&
                     (vdev_writeable(vd) || !spa_writeable(spa))) {
@@ -1675,8 +1756,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
                 vps = kmem_zalloc(sizeof (*vps), KM_SLEEP);
  
                 vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
-                   ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
-                   ZIO_FLAG_TRYHARD;
+                   ZIO_FLAG_DONT_AGGREGATE | ZIO_FLAG_TRYHARD;
  
                 if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
                         /*
@@ -1838,20 +1918,41 @@ vdev_open_children_subset(vdev_t *vd, vdev_open_children_func_t *open_func)
  }
  
  /*
- * Compute the raidz-deflation ratio.  Note, we hard-code
- * in 128k (1 << 17) because it is the "typical" blocksize.
- * Even though SPA_MAXBLOCKSIZE changed, this algorithm can not change,
- * otherwise it would inconsistently account for existing bp's.
+ * Compute the raidz-deflation ratio.  Note, we hard-code 128k (1 << 17)
+ * because it is the "typical" blocksize.  Even though SPA_MAXBLOCKSIZE
+ * changed, this algorithm can not change, otherwise it would inconsistently
+ * account for existing bp's.  We also hard-code txg 0 for the same reason
+ * since expanded RAIDZ vdevs can use a different asize for different birth
+ * txg's.
   */
  static void
  vdev_set_deflate_ratio(vdev_t *vd)
  {
         if (vd == vd->vdev_top && !vd->vdev_ishole && vd->vdev_ashift != 0) {
                 vd->vdev_deflate_ratio = (1 << 17) /
-                   (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
+                   (vdev_psize_to_asize_txg(vd, 1 << 17, 0) >>
+                   SPA_MINBLOCKSHIFT);
         }
  }
  
+/*
+ * Choose the best of two ashifts, preferring one between logical ashift
+ * (absolute minimum) and administrator defined maximum, otherwise take
+ * the biggest of the two.
+ */
+uint64_t
+vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b)
+{
+       if (a > logical && a <= zfs_vdev_max_auto_ashift) {
+               if (b <= logical || b > zfs_vdev_max_auto_ashift)
+                       return (a);
+               else
+                       return (MAX(a, b));
+       } else if (b <= logical || b > zfs_vdev_max_auto_ashift)
+               return (MAX(a, b));
+       return (b);
+}
+
  /*
   * Maximize performance by inflating the configured ashift for top level
   * vdevs to be as close to the physical ashift as possible while maintaining
@@ -1863,7 +1964,8 @@ vdev_ashift_optimize(vdev_t *vd)
  {
         ASSERT(vd == vd->vdev_top);
  
-       if (vd->vdev_ashift < vd->vdev_physical_ashift) {
+       if (vd->vdev_ashift < vd->vdev_physical_ashift &&
+           vd->vdev_physical_ashift <= zfs_vdev_max_auto_ashift) {
                 vd->vdev_ashift = MIN(
                     MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
                     MAX(zfs_vdev_min_auto_ashift,
@@ -1928,6 +2030,14 @@ vdev_open(vdev_t *vd)
  
         error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
             &logical_ashift, &physical_ashift);
+
+       /* Keep the device in removed state if unplugged */
+       if (error == ENOENT && vd->vdev_removed) {
+               vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED,
+                   VDEV_AUX_NONE);
+               return (error);
+       }
+
         /*
          * Physical volume size should never be larger than its max size, unless
          * the disk has shrunk while we were reading it or the device is buggy
@@ -2075,9 +2185,9 @@ vdev_open(vdev_t *vd)
                                 return (SET_ERROR(EDOM));
                         }
  
-                       if (vd->vdev_top == vd) {
+                       if (vd->vdev_top == vd && vd->vdev_attaching == B_FALSE)
                                 vdev_ashift_optimize(vd);
-                       }
+                       vd->vdev_attaching = B_FALSE;
                 }
                 if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
                     vd->vdev_ashift > ASHIFT_MAX)) {
@@ -2138,8 +2248,7 @@ vdev_open(vdev_t *vd)
         if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
             vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
                 uint64_t min_alloc = vdev_get_min_alloc(vd);
-               if (min_alloc < spa->spa_min_alloc)
-                       spa->spa_min_alloc = min_alloc;
+               vdev_spa_set_alloc(spa, min_alloc);
         }
  
         /*
@@ -2384,22 +2493,36 @@ vdev_validate(vdev_t *vd)
  }
  
  static void
-vdev_copy_path_impl(vdev_t *svd, vdev_t *dvd)
-{
-       char *old, *new;
-       if (svd->vdev_path != NULL && dvd->vdev_path != NULL) {
-               if (strcmp(svd->vdev_path, dvd->vdev_path) != 0) {
-                       zfs_dbgmsg("vdev_copy_path: vdev %llu: path changed "
-                           "from '%s' to '%s'", (u_longlong_t)dvd->vdev_guid,
-                           dvd->vdev_path, svd->vdev_path);
-                       spa_strfree(dvd->vdev_path);
-                       dvd->vdev_path = spa_strdup(svd->vdev_path);
+vdev_update_path(const char *prefix, char *svd, char **dvd, uint64_t guid)
+{
+       if (svd != NULL && *dvd != NULL) {
+               if (strcmp(svd, *dvd) != 0) {
+                       zfs_dbgmsg("vdev_copy_path: vdev %llu: %s changed "
+                           "from '%s' to '%s'", (u_longlong_t)guid, prefix,
+                           *dvd, svd);
+                       spa_strfree(*dvd);
+                       *dvd = spa_strdup(svd);
                 }
-       } else if (svd->vdev_path != NULL) {
-               dvd->vdev_path = spa_strdup(svd->vdev_path);
+       } else if (svd != NULL) {
+               *dvd = spa_strdup(svd);
                 zfs_dbgmsg("vdev_copy_path: vdev %llu: path set to '%s'",
-                   (u_longlong_t)dvd->vdev_guid, dvd->vdev_path);
+                   (u_longlong_t)guid, *dvd);
         }
+}
+
+static void
+vdev_copy_path_impl(vdev_t *svd, vdev_t *dvd)
+{
+       char *old, *new;
+
+       vdev_update_path("vdev_path", svd->vdev_path, &dvd->vdev_path,
+           dvd->vdev_guid);
+
+       vdev_update_path("vdev_devid", svd->vdev_devid, &dvd->vdev_devid,
+           dvd->vdev_guid);
+
+       vdev_update_path("vdev_physpath", svd->vdev_physpath,
+           &dvd->vdev_physpath, dvd->vdev_guid);
  
         /*
          * Our enclosure sysfs path may have changed between imports
@@ -2540,8 +2663,6 @@ vdev_close(vdev_t *vd)
  
         vd->vdev_ops->vdev_op_close(vd);
  
-       vdev_cache_purge(vd);
-
         /*
          * We record the previous state before we close it, so that if we are
          * doing a reopen(), we don't generate FMA ereports if we notice that
@@ -2627,6 +2748,17 @@ vdev_reopen(vdev_t *vd)
                 (void) vdev_validate(vd);
         }
  
+       /*
+        * Recheck if resilver is still needed and cancel any
+        * scheduled resilver if resilver is unneeded.
+        */
+       if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
+           spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
+               mutex_enter(&spa->spa_async_lock);
+               spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
+               mutex_exit(&spa->spa_async_lock);
+       }
+
         /*
          * Reassess parent vdev's health.
          */
@@ -3118,32 +3250,71 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
  
                 if (txg != 0)
                         vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
-               return;
+       } else {
+               mutex_enter(&vd->vdev_dtl_lock);
+               for (int t = 0; t < DTL_TYPES; t++) {
+                       /* account for child's outage in parent's missing map */
+                       int s = (t == DTL_MISSING) ? DTL_OUTAGE: t;
+                       if (t == DTL_SCRUB) {
+                               /* leaf vdevs only */
+                               continue;
+                       }
+                       if (t == DTL_PARTIAL) {
+                               /* i.e. non-zero */
+                               minref = 1;
+                       } else if (vdev_get_nparity(vd) != 0) {
+                               /* RAIDZ, DRAID */
+                               minref = vdev_get_nparity(vd) + 1;
+                       } else {
+                               /* any kind of mirror */
+                               minref = vd->vdev_children;
+                       }
+                       space_reftree_create(&reftree);
+                       for (int c = 0; c < vd->vdev_children; c++) {
+                               vdev_t *cvd = vd->vdev_child[c];
+                               mutex_enter(&cvd->vdev_dtl_lock);
+                               space_reftree_add_map(&reftree,
+                                   cvd->vdev_dtl[s], 1);
+                               mutex_exit(&cvd->vdev_dtl_lock);
+                       }
+                       space_reftree_generate_map(&reftree,
+                           vd->vdev_dtl[t], minref);
+                       space_reftree_destroy(&reftree);
+               }
+               mutex_exit(&vd->vdev_dtl_lock);
         }
  
-       mutex_enter(&vd->vdev_dtl_lock);
-       for (int t = 0; t < DTL_TYPES; t++) {
-               /* account for child's outage in parent's missing map */
-               int s = (t == DTL_MISSING) ? DTL_OUTAGE: t;
-               if (t == DTL_SCRUB)
-                       continue;                       /* leaf vdevs only */
-               if (t == DTL_PARTIAL)
-                       minref = 1;                     /* i.e. non-zero */
-               else if (vdev_get_nparity(vd) != 0)
-                       minref = vdev_get_nparity(vd) + 1; /* RAID-Z, dRAID */
-               else
-                       minref = vd->vdev_children;     /* any kind of mirror */
-               space_reftree_create(&reftree);
-               for (int c = 0; c < vd->vdev_children; c++) {
-                       vdev_t *cvd = vd->vdev_child[c];
-                       mutex_enter(&cvd->vdev_dtl_lock);
-                       space_reftree_add_map(&reftree, cvd->vdev_dtl[s], 1);
-                       mutex_exit(&cvd->vdev_dtl_lock);
-               }
-               space_reftree_generate_map(&reftree, vd->vdev_dtl[t], minref);
-               space_reftree_destroy(&reftree);
+       if (vd->vdev_top->vdev_ops == &vdev_raidz_ops) {
+               raidz_dtl_reassessed(vd);
         }
-       mutex_exit(&vd->vdev_dtl_lock);
+}
+
+/*
+ * Iterate over all the vdevs except spare, and post kobj events
+ */
+void
+vdev_post_kobj_evt(vdev_t *vd)
+{
+       if (vd->vdev_ops->vdev_op_kobj_evt_post &&
+           vd->vdev_kobj_flag == B_FALSE) {
+               vd->vdev_kobj_flag = B_TRUE;
+               vd->vdev_ops->vdev_op_kobj_evt_post(vd);
+       }
+
+       for (int c = 0; c < vd->vdev_children; c++)
+               vdev_post_kobj_evt(vd->vdev_child[c]);
+}
+
+/*
+ * Iterate over all the vdevs except spare, and clear kobj events
+ */
+void
+vdev_clear_kobj_evt(vdev_t *vd)
+{
+       vd->vdev_kobj_flag = B_FALSE;
+
+       for (int c = 0; c < vd->vdev_children; c++)
+               vdev_clear_kobj_evt(vd->vdev_child[c]);
  }
  
  int
@@ -3257,6 +3428,12 @@ vdev_construct_zaps(vdev_t *vd, dmu_tx_t *tx)
                                 vdev_zap_allocation_data(vd, tx);
                 }
         }
+       if (vd->vdev_ops == &vdev_root_ops && vd->vdev_root_zap == 0 &&
+           spa_feature_is_enabled(vd->vdev_spa, SPA_FEATURE_AVZ_V2)) {
+               if (!spa_feature_is_active(vd->vdev_spa, SPA_FEATURE_AVZ_V2))
+                       spa_feature_incr(vd->vdev_spa, SPA_FEATURE_AVZ_V2, tx);
+               vd->vdev_root_zap = vdev_create_link_zap(vd, tx);
+       }
  
         for (uint64_t i = 0; i < vd->vdev_children; i++) {
                 vdev_construct_zaps(vd->vdev_child[i], tx);
@@ -3484,6 +3661,12 @@ vdev_load(vdev_t *vd)
  
         vdev_set_deflate_ratio(vd);
  
+       if (vd->vdev_ops == &vdev_raidz_ops) {
+               error = vdev_raidz_load(vd);
+               if (error != 0)
+                       return (error);
+       }
+
         /*
          * On spa_load path, grab the allocation bias from our zap
          */
@@ -3507,6 +3690,26 @@ vdev_load(vdev_t *vd)
                 }
         }
  
+       if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
+               spa_t *spa = vd->vdev_spa;
+               uint64_t failfast;
+
+               error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+                   vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
+                   1, &failfast);
+               if (error == 0) {
+                       vd->vdev_failfast = failfast & 1;
+               } else if (error == ENOENT) {
+                       vd->vdev_failfast = vdev_prop_default_numeric(
+                           VDEV_PROP_FAILFAST);
+               } else {
+                       vdev_dbgmsg(vd,
+                           "vdev_load: zap_lookup(top_zap=%llu) "
+                           "failed [error=%d]",
+                           (u_longlong_t)vd->vdev_top_zap, error);
+               }
+       }
+
         /*
          * Load any rebuild state from the top-level vdev zap.
          */
@@ -3521,6 +3724,39 @@ vdev_load(vdev_t *vd)
                 }
         }
  
+       if (vd->vdev_top_zap != 0 || vd->vdev_leaf_zap != 0) {
+               uint64_t zapobj;
+
+               if (vd->vdev_top_zap != 0)
+                       zapobj = vd->vdev_top_zap;
+               else
+                       zapobj = vd->vdev_leaf_zap;
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_N,
+                   &vd->vdev_checksum_n);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_T,
+                   &vd->vdev_checksum_t);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_IO_N,
+                   &vd->vdev_io_n);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_IO_T,
+                   &vd->vdev_io_t);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+       }
+
         /*
          * If this is a top-level vdev, initialize its metaslabs.
          */
@@ -3808,10 +4044,22 @@ vdev_sync(vdev_t *vd, uint64_t txg)
         dmu_tx_commit(tx);
  }
  
+/*
+ * Return the amount of space that should be (or was) allocated for the given
+ * psize (compressed block size) in the given TXG. Note that for expanded
+ * RAIDZ vdevs, the size allocated for older BP's may be larger. See
+ * vdev_raidz_asize().
+ */
+uint64_t
+vdev_psize_to_asize_txg(vdev_t *vd, uint64_t psize, uint64_t txg)
+{
+       return (vd->vdev_ops->vdev_op_asize(vd, psize, txg));
+}
+
  uint64_t
  vdev_psize_to_asize(vdev_t *vd, uint64_t psize)
  {
-       return (vd->vdev_ops->vdev_op_asize(vd, psize));
+       return (vdev_psize_to_asize_txg(vd, psize, 0));
  }
  
  /*
@@ -3927,6 +4175,36 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux)
         return (spa_vdev_state_exit(spa, vd, 0));
  }
  
+int
+vdev_remove_wanted(spa_t *spa, uint64_t guid)
+{
+       vdev_t *vd;
+
+       spa_vdev_state_enter(spa, SCL_NONE);
+
+       if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
+               return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
+
+       /*
+        * If the vdev is already removed, or expanding which can trigger
+        * repartition add/remove events, then don't do anything.
+        */
+       if (vd->vdev_removed || vd->vdev_expanding)
+               return (spa_vdev_state_exit(spa, NULL, 0));
+
+       /*
+        * Confirm the vdev has been removed, otherwise don't do anything.
+        */
+       if (vd->vdev_ops->vdev_op_leaf && !zio_wait(vdev_probe(vd, NULL)))
+               return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
+
+       vd->vdev_remove_wanted = B_TRUE;
+       spa_async_request(spa, SPA_ASYNC_REMOVE);
+
+       return (spa_vdev_state_exit(spa, vd, 0));
+}
+
+
  /*
   * Online the given vdev.
   *
@@ -3947,9 +4225,6 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
         if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
                 return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
  
-       if (!vd->vdev_ops->vdev_op_leaf)
-               return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
-
         wasoffline = (vd->vdev_offline || vd->vdev_tmpoffline);
         oldstate = vd->vdev_state;
  
@@ -3988,6 +4263,7 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
                 /* XXX - L2ARC 1.0 does not support expansion */
                 if (vd->vdev_aux)
                         return (spa_vdev_state_exit(spa, vd, ENOTSUP));
+               spa->spa_ccw_fail_time = 0;
                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
         }
  
@@ -4017,9 +4293,19 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
  
         if (wasoffline ||
             (oldstate < VDEV_STATE_DEGRADED &&
-           vd->vdev_state >= VDEV_STATE_DEGRADED))
+           vd->vdev_state >= VDEV_STATE_DEGRADED)) {
                 spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE);
  
+               /*
+                * Asynchronously detach spare vdev if resilver or
+                * rebuild is not required
+                */
+               if (vd->vdev_unspare &&
+                   !dsl_scan_resilvering(spa->spa_dsl_pool) &&
+                   !dsl_scan_resilver_scheduled(spa->spa_dsl_pool) &&
+                   !vdev_rebuild_active(tvd))
+                       spa_async_request(spa, SPA_ASYNC_DETACH_SPARE);
+       }
         return (spa_vdev_state_exit(spa, vd, 0));
  }
  
@@ -4170,9 +4456,9 @@ vdev_clear(spa_t *spa, vdev_t *vd)
                 vdev_clear(spa, vd->vdev_child[c]);
  
         /*
-        * It makes no sense to "clear" an indirect vdev.
+        * It makes no sense to "clear" an indirect  or removed vdev.
          */
-       if (!vdev_is_concrete(vd))
+       if (!vdev_is_concrete(vd) || vd->vdev_removed)
                 return;
  
         /*
@@ -4403,11 +4689,9 @@ vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
  
                 memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex));
  
-               for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) {
-                       vsx->vsx_active_queue[t] =
-                           vd->vdev_queue.vq_class[t].vqc_active;
-                       vsx->vsx_pend_queue[t] = avl_numnodes(
-                           &vd->vdev_queue.vq_class[t].vqc_queued_tree);
+               for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
+                       vsx->vsx_active_queue[t] = vd->vdev_queue.vq_cactive[t];
+                       vsx->vsx_pend_queue[t] = vdev_queue_class_length(vd, t);
                 }
         }
  }
@@ -4418,7 +4702,7 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
         vdev_t *tvd = vd->vdev_top;
         mutex_enter(&vd->vdev_stat_lock);
         if (vs) {
-               bcopy(&vd->vdev_stat, vs, sizeof (*vs));
+               memcpy(vs, &vd->vdev_stat, sizeof (*vs));
                 vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
                 vs->vs_state = vd->vdev_state;
                 vs->vs_rsize = vdev_get_min_asize(vd);
@@ -4470,7 +4754,10 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
                 vs->vs_configured_ashift = vd->vdev_top != NULL
                     ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
                 vs->vs_logical_ashift = vd->vdev_logical_ashift;
-               vs->vs_physical_ashift = vd->vdev_physical_ashift;
+               if (vd->vdev_physical_ashift <= ASHIFT_MAX)
+                       vs->vs_physical_ashift = vd->vdev_physical_ashift;
+               else
+                       vs->vs_physical_ashift = 0;
  
                 /*
                  * Report fragmentation and rebuild progress for top-level,
@@ -4532,8 +4819,14 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
         vdev_t *vd = zio->io_vd ? zio->io_vd : rvd;
         vdev_t *pvd;
         uint64_t txg = zio->io_txg;
+/* Suppress ASAN false positive */
+#ifdef __SANITIZE_ADDRESS__
         vdev_stat_t *vs = vd ? &vd->vdev_stat : NULL;
         vdev_stat_ex_t *vsx = vd ? &vd->vdev_stat_ex : NULL;
+#else
+       vdev_stat_t *vs = &vd->vdev_stat;
+       vdev_stat_ex_t *vsx = &vd->vdev_stat_ex;
+#endif
         zio_type_t type = zio->io_type;
         int flags = zio->io_flags;
  
@@ -5212,7 +5505,9 @@ vdev_expand(vdev_t *vd, uint64_t txg)
  
         vdev_set_deflate_ratio(vd);
  
-       if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count &&
+       if ((vd->vdev_spa->spa_raidz_expand == NULL ||
+           vd->vdev_spa->spa_raidz_expand->vre_vdev_id != vd->vdev_id) &&
+           (vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count &&
             vdev_is_concrete(vd)) {
                 vdev_metaslab_group_create(vd);
                 VERIFY(vdev_metaslab_init(vd, txg) == 0);
@@ -5228,9 +5523,13 @@ vdev_split(vdev_t *vd)
  {
         vdev_t *cvd, *pvd = vd->vdev_parent;
  
+       VERIFY3U(pvd->vdev_children, >, 1);
+
         vdev_remove_child(pvd, vd);
         vdev_compact_children(pvd);
  
+       ASSERT3P(pvd->vdev_child, !=, NULL);
+
         cvd = pvd->vdev_child[0];
         if (pvd->vdev_children == 1) {
                 vdev_remove_parent(cvd);
@@ -5240,7 +5539,7 @@ vdev_split(vdev_t *vd)
  }
  
  void
-vdev_deadman(vdev_t *vd, char *tag)
+vdev_deadman(vdev_t *vd, const char *tag)
  {
         for (int c = 0; c < vd->vdev_children; c++) {
                 vdev_t *cvd = vd->vdev_child[c];
@@ -5252,20 +5551,20 @@ vdev_deadman(vdev_t *vd, char *tag)
                 vdev_queue_t *vq = &vd->vdev_queue;
  
                 mutex_enter(&vq->vq_lock);
-               if (avl_numnodes(&vq->vq_active_tree) > 0) {
+               if (vq->vq_active > 0) {
                         spa_t *spa = vd->vdev_spa;
                         zio_t *fio;
                         uint64_t delta;
  
-                       zfs_dbgmsg("slow vdev: %s has %lu active IOs",
-                           vd->vdev_path, avl_numnodes(&vq->vq_active_tree));
+                       zfs_dbgmsg("slow vdev: %s has %u active IOs",
+                           vd->vdev_path, vq->vq_active);
  
                         /*
                          * Look at the head of all the pending queues,
                          * if any I/O has been outstanding for longer than
                          * the spa_deadman_synctime invoke the deadman logic.
                          */
-                       fio = avl_first(&vq->vq_active_tree);
+                       fio = list_head(&vq->vq_active_list);
                         delta = gethrtime() - fio->io_timestamp;
                         if (delta > spa_deadman_synctime(spa))
                                 zio_deadman(fio, tag);
@@ -5446,7 +5745,7 @@ vdev_replace_in_progress(vdev_t *vdev)
   * Add a (source=src, propname=propval) list to an nvlist.
   */
  static void
-vdev_prop_add_list(nvlist_t *nvl, const char *propname, char *strval,
+vdev_prop_add_list(nvlist_t *nvl, const char *propname, const char *strval,
      uint64_t intval, zprop_source_t src)
  {
         nvlist_t *propval;
@@ -5472,6 +5771,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
         objset_t *mos = spa->spa_meta_objset;
         nvpair_t *elem = NULL;
         uint64_t vdev_guid;
+       uint64_t objid;
         nvlist_t *nvprops;
  
         vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
@@ -5482,28 +5782,30 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
         if (vd == NULL)
                 return;
  
+       /*
+        * Set vdev property values in the vdev props mos object.
+        */
+       if (vd->vdev_root_zap != 0) {
+               objid = vd->vdev_root_zap;
+       } else if (vd->vdev_top_zap != 0) {
+               objid = vd->vdev_top_zap;
+       } else if (vd->vdev_leaf_zap != 0) {
+               objid = vd->vdev_leaf_zap;
+       } else {
+               panic("unexpected vdev type");
+       }
+
         mutex_enter(&spa->spa_props_lock);
  
         while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
-               uint64_t intval, objid = 0;
-               char *strval;
+               uint64_t intval;
+               const char *strval;
                 vdev_prop_t prop;
                 const char *propname = nvpair_name(elem);
                 zprop_type_t proptype;
  
-               /*
-                * Set vdev property values in the vdev props mos object.
-                */
-               if (vd->vdev_top_zap != 0) {
-                       objid = vd->vdev_top_zap;
-               } else if (vd->vdev_leaf_zap != 0) {
-                       objid = vd->vdev_leaf_zap;
-               } else {
-                       panic("vdev not top or leaf");
-               }
-
                 switch (prop = vdev_name_to_prop(propname)) {
-               case VDEV_PROP_USER:
+               case VDEV_PROP_USERPROP:
                         if (vdev_prop_user(propname)) {
                                 strval = fnvpair_value_string(elem);
                                 if (strlen(strval) == 0) {
@@ -5566,10 +5868,16 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
         nvpair_t *elem = NULL;
         uint64_t vdev_guid;
         nvlist_t *nvprops;
-       int error;
+       int error = 0;
  
         ASSERT(vd != NULL);
  
+       /* Check that vdev has a zap we can use */
+       if (vd->vdev_root_zap == 0 &&
+           vd->vdev_top_zap == 0 &&
+           vd->vdev_leaf_zap == 0)
+               return (SET_ERROR(EINVAL));
+
         if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
             &vdev_guid) != 0)
                 return (SET_ERROR(EINVAL));
@@ -5582,12 +5890,12 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                 return (SET_ERROR(EINVAL));
  
         while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
-               char *propname = nvpair_name(elem);
+               const char *propname = nvpair_name(elem);
                 vdev_prop_t prop = vdev_name_to_prop(propname);
                 uint64_t intval = 0;
-               char *strval = NULL;
+               const char *strval = NULL;
  
-               if (prop == VDEV_PROP_USER && !vdev_prop_user(propname)) {
+               if (prop == VDEV_PROP_USERPROP && !vdev_prop_user(propname)) {
                         error = EINVAL;
                         goto end;
                 }
@@ -5627,6 +5935,41 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                         else
                                 error = spa_vdev_alloc(spa, vdev_guid);
                         break;
+               case VDEV_PROP_FAILFAST:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_failfast = intval & 1;
+                       break;
+               case VDEV_PROP_CHECKSUM_N:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_checksum_n = intval;
+                       break;
+               case VDEV_PROP_CHECKSUM_T:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_checksum_t = intval;
+                       break;
+               case VDEV_PROP_IO_N:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_io_n = intval;
+                       break;
+               case VDEV_PROP_IO_T:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_io_t = intval;
+                       break;
                 default:
                         /* Most processing is done in vdev_props_set_sync */
                         break;
@@ -5667,7 +6010,9 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
  
         nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
  
-       if (vd->vdev_top_zap != 0) {
+       if (vd->vdev_root_zap != 0) {
+               objid = vd->vdev_root_zap;
+       } else if (vd->vdev_top_zap != 0) {
                 objid = vd->vdev_top_zap;
         } else if (vd->vdev_leaf_zap != 0) {
                 objid = vd->vdev_leaf_zap;
@@ -5797,7 +6142,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                                             KM_SLEEP);
                                 for (uint64_t i = 0; i < vd->vdev_children;
                                     i++) {
-                                       char *vname;
+                                       const char *vname;
  
                                         vname = vdev_name(vd->vdev_child[i],
                                             namebuf, sizeof (namebuf));
@@ -5914,37 +6259,77 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                                 vdev_prop_add_list(outnvl, propname, NULL,
                                     vd->vdev_removing, ZPROP_SRC_NONE);
                                 continue;
+                       case VDEV_PROP_RAIDZ_EXPANDING:
+                               /* Only expose this for raidz */
+                               if (vd->vdev_ops == &vdev_raidz_ops) {
+                                       vdev_prop_add_list(outnvl, propname,
+                                           NULL, vd->vdev_rz_expanding,
+                                           ZPROP_SRC_NONE);
+                               }
+                               continue;
                         /* Numeric Properites */
                         case VDEV_PROP_ALLOCATING:
+                               /* Leaf vdevs cannot have this property */
+                               if (vd->vdev_mg == NULL &&
+                                   vd->vdev_top != NULL) {
+                                       src = ZPROP_SRC_NONE;
+                                       intval = ZPROP_BOOLEAN_NA;
+                               } else {
+                                       err = vdev_prop_get_int(vd, prop,
+                                           &intval);
+                                       if (err && err != ENOENT)
+                                               break;
+
+                                       if (intval ==
+                                           vdev_prop_default_numeric(prop))
+                                               src = ZPROP_SRC_DEFAULT;
+                                       else
+                                               src = ZPROP_SRC_LOCAL;
+                               }
+
+                               vdev_prop_add_list(outnvl, propname, NULL,
+                                   intval, src);
+                               break;
+                       case VDEV_PROP_FAILFAST:
                                 src = ZPROP_SRC_LOCAL;
                                 strval = NULL;
  
                                 err = zap_lookup(mos, objid, nvpair_name(elem),
                                     sizeof (uint64_t), 1, &intval);
                                 if (err == ENOENT) {
-                                       intval =
-                                           vdev_prop_default_numeric(prop);
+                                       intval = vdev_prop_default_numeric(
+                                           prop);
                                         err = 0;
-                               } else if (err)
+                               } else if (err) {
                                         break;
+                               }
                                 if (intval == vdev_prop_default_numeric(prop))
                                         src = ZPROP_SRC_DEFAULT;
  
-                               /* Leaf vdevs cannot have this property */
-                               if (vd->vdev_mg == NULL &&
-                                   vd->vdev_top != NULL) {
-                                       src = ZPROP_SRC_NONE;
-                                       intval = ZPROP_BOOLEAN_NA;
-                               }
-
                                 vdev_prop_add_list(outnvl, propname, strval,
                                     intval, src);
                                 break;
+                       case VDEV_PROP_CHECKSUM_N:
+                       case VDEV_PROP_CHECKSUM_T:
+                       case VDEV_PROP_IO_N:
+                       case VDEV_PROP_IO_T:
+                               err = vdev_prop_get_int(vd, prop, &intval);
+                               if (err && err != ENOENT)
+                                       break;
+
+                               if (intval == vdev_prop_default_numeric(prop))
+                                       src = ZPROP_SRC_DEFAULT;
+                               else
+                                       src = ZPROP_SRC_LOCAL;
+
+                               vdev_prop_add_list(outnvl, propname, NULL,
+                                   intval, src);
+                               break;
                         /* Text Properties */
                         case VDEV_PROP_COMMENT:
                                 /* Exists in the ZAP below */
                                 /* FALLTHRU */
-                       case VDEV_PROP_USER:
+                       case VDEV_PROP_USERPROP:
                                 /* User Properites */
                                 src = ZPROP_SRC_LOCAL;
  
@@ -5996,7 +6381,6 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                         strval = NULL;
                         zprop_source_t src = ZPROP_SRC_DEFAULT;
                         propname = za.za_name;
-                       prop = vdev_name_to_prop(propname);
  
                         switch (za.za_integer_length) {
                         case 8:
@@ -6039,16 +6423,19 @@ EXPORT_SYMBOL(vdev_online);
  EXPORT_SYMBOL(vdev_offline);
  EXPORT_SYMBOL(vdev_clear);
  
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_count, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_count, UINT, ZMOD_RW,
         "Target number of metaslabs per top-level vdev");
  
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_shift, INT, ZMOD_RW,
-       "Default limit for metaslab size");
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_shift, UINT, ZMOD_RW,
+       "Default lower limit for metaslab size");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, max_ms_shift, UINT, ZMOD_RW,
+       "Default upper limit for metaslab size");
  
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, min_ms_count, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, min_ms_count, UINT, ZMOD_RW,
         "Minimum number of metaslabs per top-level vdev");
  
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, ms_count_limit, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, ms_count_limit, UINT, ZMOD_RW,
         "Practical upper limit of total metaslabs per top-level vdev");
  
  ZFS_MODULE_PARAM(zfs, zfs_, slow_io_events_per_second, UINT, ZMOD_RW,
@@ -6069,16 +6456,16 @@ ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW,
  ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW,
         "Disable cache flushes");
  
-ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, UINT, ZMOD_RW,
         "Minimum number of metaslabs required to dedicate one for log blocks");
  
  /* BEGIN CSTYLED */
  ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift,
-       param_set_min_auto_ashift, param_get_ulong, ZMOD_RW,
+       param_set_min_auto_ashift, param_get_uint, ZMOD_RW,
         "Minimum ashift used when creating new top-level vdevs");
  
  ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift,
-       param_set_max_auto_ashift, param_get_ulong, ZMOD_RW,
+       param_set_max_auto_ashift, param_get_uint, ZMOD_RW,
         "Maximum ashift used when optimizing for logical -> physical sector "
         "size on new top-level vdevs");
  /* END CSTYLED */