]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/vdev.c
Update vdev devid and physpath if changed between imports
[mirror_zfs.git] / module / zfs / vdev.c
index 7bc79a2259df40c83dc0367b933b03f98a97df84..d6286dc5920bd8294022f25ab5d29a16c000a5b5 100644 (file)
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -29,7 +29,7 @@
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, Datto Inc. All rights reserved.
  * Copyright (c) 2021, Klara Inc.
- * Copyright [2021] Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
  */
 
 #include <sys/zfs_context.h>
@@ -58,6 +58,7 @@
 #include <sys/abd.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
+#include <sys/vdev_raidz.h>
 #include <sys/zvol.h>
 #include <sys/zfs_ratelimit.h>
 #include "zfs_prop.h"
  * 1 << (spa_slop_shift + 1), on small pools the usable space may be reduced
  * (by more than 1<<spa_slop_shift) due to the embedded slog metaslab.
  */
-static int zfs_embedded_slog_min_ms = 64;
+static uint_t zfs_embedded_slog_min_ms = 64;
 
 /* default target for number of metaslabs per top-level vdev */
-static int zfs_vdev_default_ms_count = 200;
+static uint_t zfs_vdev_default_ms_count = 200;
 
 /* minimum number of metaslabs per top-level vdev */
-static int zfs_vdev_min_ms_count = 16;
+static uint_t zfs_vdev_min_ms_count = 16;
 
 /* practical upper limit of total metaslabs per top-level vdev */
-static int zfs_vdev_ms_count_limit = 1ULL << 17;
+static uint_t zfs_vdev_ms_count_limit = 1ULL << 17;
 
 /* lower limit for metaslab size (512M) */
-static int zfs_vdev_default_ms_shift = 29;
+static uint_t zfs_vdev_default_ms_shift = 29;
 
 /* upper limit for metaslab size (16G) */
-static const int zfs_vdev_max_ms_shift = 34;
+static uint_t zfs_vdev_max_ms_shift = 34;
 
 int vdev_validate_skip = B_FALSE;
 
@@ -136,8 +137,16 @@ int zfs_vdev_standard_sm_blksz = (1 << 17);
  */
 int zfs_nocacheflush = 0;
 
-uint64_t zfs_vdev_max_auto_ashift = ASHIFT_MAX;
-uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
+/*
+ * Maximum and minimum ashift values that can be automatically set based on
+ * vdev's physical ashift (disk's physical sector size).  While ASHIFT_MAX
+ * is higher than the maximum value, it is intentionally limited here to not
+ * excessively impact pool space efficiency.  Higher ashift values may still
+ * be forced by vdev logical ashift or by user via ashift property, but won't
+ * be set automatically as a performance optimization.
+ */
+uint_t zfs_vdev_max_auto_ashift = 14;
+uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
 
 void
 vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
@@ -216,7 +225,7 @@ vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
  * Virtual device management.
  */
 
-static const vdev_ops_t *const vdev_ops_table[] = {
+static vdev_ops_t *const vdev_ops_table[] = {
        &vdev_root_ops,
        &vdev_raidz_ops,
        &vdev_draid_ops,
@@ -238,7 +247,7 @@ static const vdev_ops_t *const vdev_ops_table[] = {
 static vdev_ops_t *
 vdev_getops(const char *type)
 {
-       const vdev_ops_t *ops, *const *opspp;
+       vdev_ops_t *ops, *const *opspp;
 
        for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++)
                if (strcmp(ops->vdev_op_type, type) == 0)
@@ -297,13 +306,13 @@ vdev_derive_alloc_bias(const char *bias)
  * all children.  This is what's used by anything other than RAID-Z.
  */
 uint64_t
-vdev_default_asize(vdev_t *vd, uint64_t psize)
+vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
 {
        uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
        uint64_t csize;
 
        for (int c = 0; c < vd->vdev_children; c++) {
-               csize = vdev_psize_to_asize(vd->vdev_child[c], psize);
+               csize = vdev_psize_to_asize_txg(vd->vdev_child[c], psize, txg);
                asize = MAX(asize, csize);
        }
 
@@ -381,6 +390,33 @@ vdev_get_nparity(vdev_t *vd)
        return (nparity);
 }
 
+static int
+vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
+{
+       spa_t *spa = vd->vdev_spa;
+       objset_t *mos = spa->spa_meta_objset;
+       uint64_t objid;
+       int err;
+
+       if (vd->vdev_root_zap != 0) {
+               objid = vd->vdev_root_zap;
+       } else if (vd->vdev_top_zap != 0) {
+               objid = vd->vdev_top_zap;
+       } else if (vd->vdev_leaf_zap != 0) {
+               objid = vd->vdev_leaf_zap;
+       } else {
+               return (EINVAL);
+       }
+
+       err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
+           sizeof (uint64_t), 1, value);
+
+       if (err == ENOENT)
+               *value = vdev_prop_default_numeric(prop);
+
+       return (err);
+}
+
 /*
  * Get the number of data disks for a top-level vdev.
  */
@@ -475,7 +511,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd)
 
        newchild = kmem_alloc(newsize, KM_SLEEP);
        if (pvd->vdev_child != NULL) {
-               bcopy(pvd->vdev_child, newchild, oldsize);
+               memcpy(newchild, pvd->vdev_child, oldsize);
                kmem_free(pvd->vdev_child, oldsize);
        }
 
@@ -634,6 +670,14 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
        zfs_ratelimit_init(&vd->vdev_checksum_rl,
            &zfs_checksum_events_per_second, 1);
 
+       /*
+        * Default Thresholds for tuning ZED
+        */
+       vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
+       vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
+       vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
+       vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
+
        list_link_init(&vd->vdev_config_dirty_node);
        list_link_init(&vd->vdev_state_dirty_node);
        list_link_init(&vd->vdev_initialize_node);
@@ -655,6 +699,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
        mutex_init(&vd->vdev_trim_io_lock, NULL, MUTEX_DEFAULT, NULL);
        cv_init(&vd->vdev_trim_cv, NULL, CV_DEFAULT, NULL);
        cv_init(&vd->vdev_autotrim_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&vd->vdev_autotrim_kick_cv, NULL, CV_DEFAULT, NULL);
        cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
 
        mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -671,7 +716,6 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
            offsetof(struct vdev, vdev_dtl_node));
        vd->vdev_stat.vs_timestamp = gethrtime();
        vdev_queue_init(vd);
-       vdev_cache_init(vd);
 
        return (vd);
 }
@@ -686,11 +730,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
     int alloctype)
 {
        vdev_ops_t *ops;
-       char *type;
+       const char *type;
        uint64_t guid = 0, islog;
        vdev_t *vd;
        vdev_indirect_config_t *vic;
-       char *tmp = NULL;
+       const char *tmp = NULL;
        int rc;
        vdev_alloc_bias_t alloc_bias = VDEV_BIAS_NONE;
        boolean_t top_level = (parent && !parent->vdev_parent);
@@ -745,7 +789,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                return (SET_ERROR(ENOTSUP));
 
        if (top_level && alloctype == VDEV_ALLOC_ADD) {
-               char *bias;
+               const char *bias;
 
                /*
                 * If creating a top-level vdev, check for allocation
@@ -791,8 +835,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
        if (top_level && alloc_bias != VDEV_BIAS_NONE)
                vd->vdev_alloc_bias = alloc_bias;
 
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
-               vd->vdev_path = spa_strdup(vd->vdev_path);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &tmp) == 0)
+               vd->vdev_path = spa_strdup(tmp);
 
        /*
         * ZPOOL_CONFIG_AUX_STATE = "external" means we previously forced a
@@ -806,18 +850,17 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                vd->vdev_label_aux = VDEV_AUX_EXTERNAL;
        }
 
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0)
-               vd->vdev_devid = spa_strdup(vd->vdev_devid);
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
-           &vd->vdev_physpath) == 0)
-               vd->vdev_physpath = spa_strdup(vd->vdev_physpath);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &tmp) == 0)
+               vd->vdev_devid = spa_strdup(tmp);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, &tmp) == 0)
+               vd->vdev_physpath = spa_strdup(tmp);
 
        if (nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
-           &vd->vdev_enc_sysfs_path) == 0)
-               vd->vdev_enc_sysfs_path = spa_strdup(vd->vdev_enc_sysfs_path);
+           &tmp) == 0)
+               vd->vdev_enc_sysfs_path = spa_strdup(tmp);
 
-       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0)
-               vd->vdev_fru = spa_strdup(vd->vdev_fru);
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &tmp) == 0)
+               vd->vdev_fru = spa_strdup(tmp);
 
        /*
         * Set the whole_disk property.  If it's not specified, leave the value
@@ -847,9 +890,15 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
            &vd->vdev_not_present);
 
        /*
-        * Get the alignment requirement.
+        * Get the alignment requirement. Ignore pool ashift for vdev
+        * attach case.
         */
-       (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
+       if (alloctype != VDEV_ALLOC_ATTACH) {
+               (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT,
+                   &vd->vdev_ashift);
+       } else {
+               vd->vdev_attaching = B_TRUE;
+       }
 
        /*
         * Retrieve the vdev creation time.
@@ -857,6 +906,14 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
        (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_CREATE_TXG,
            &vd->vdev_crtxg);
 
+       if (vd->vdev_ops == &vdev_root_ops &&
+           (alloctype == VDEV_ALLOC_LOAD ||
+           alloctype == VDEV_ALLOC_SPLIT ||
+           alloctype == VDEV_ALLOC_ROOTPOOL)) {
+               (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_ROOT_ZAP,
+                   &vd->vdev_root_zap);
+       }
+
        /*
         * If we're a top-level vdev, try to load the allocation parameters.
         */
@@ -874,6 +931,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                    &vd->vdev_removing);
                (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
                    &vd->vdev_top_zap);
+               vd->vdev_rz_expanding = nvlist_exists(nv,
+                   ZPOOL_CONFIG_RAIDZ_EXPANDING);
        } else {
                ASSERT0(vd->vdev_top_zap);
        }
@@ -948,7 +1007,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                            &vd->vdev_removed);
 
                        if (vd->vdev_faulted || vd->vdev_degraded) {
-                               char *aux;
+                               const char *aux;
 
                                vd->vdev_label_aux =
                                    VDEV_AUX_ERR_EXCEEDED;
@@ -1045,7 +1104,6 @@ vdev_free(vdev_t *vd)
         * Clean up vdev structure.
         */
        vdev_queue_fini(vd);
-       vdev_cache_fini(vd);
 
        if (vd->vdev_path)
                spa_strfree(vd->vdev_path);
@@ -1108,6 +1166,7 @@ vdev_free(vdev_t *vd)
        mutex_destroy(&vd->vdev_trim_io_lock);
        cv_destroy(&vd->vdev_trim_cv);
        cv_destroy(&vd->vdev_autotrim_cv);
+       cv_destroy(&vd->vdev_autotrim_kick_cv);
        cv_destroy(&vd->vdev_trim_io_cv);
 
        mutex_destroy(&vd->vdev_rebuild_lock);
@@ -1136,7 +1195,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
 
        ASSERT(tvd == tvd->vdev_top);
 
-       tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
        tvd->vdev_ms_array = svd->vdev_ms_array;
        tvd->vdev_ms_shift = svd->vdev_ms_shift;
        tvd->vdev_ms_count = svd->vdev_ms_count;
@@ -1343,6 +1401,36 @@ vdev_remove_parent(vdev_t *cvd)
        vdev_free(mvd);
 }
 
+/*
+ * Choose GCD for spa_gcd_alloc.
+ */
+static uint64_t
+vdev_gcd(uint64_t a, uint64_t b)
+{
+       while (b != 0) {
+               uint64_t t = b;
+               b = a % b;
+               a = t;
+       }
+       return (a);
+}
+
+/*
+ * Set spa_min_alloc and spa_gcd_alloc.
+ */
+static void
+vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
+{
+       if (min_alloc < spa->spa_min_alloc)
+               spa->spa_min_alloc = min_alloc;
+       if (spa->spa_gcd_alloc == INT_MAX) {
+               spa->spa_gcd_alloc = min_alloc;
+       } else {
+               spa->spa_gcd_alloc = vdev_gcd(min_alloc,
+                   spa->spa_gcd_alloc);
+       }
+}
+
 void
 vdev_metaslab_group_create(vdev_t *vd)
 {
@@ -1395,8 +1483,7 @@ vdev_metaslab_group_create(vdev_t *vd)
                                spa->spa_min_ashift = vd->vdev_ashift;
 
                        uint64_t min_alloc = vdev_get_min_alloc(vd);
-                       if (min_alloc < spa->spa_min_alloc)
-                               spa->spa_min_alloc = min_alloc;
+                       vdev_spa_set_alloc(spa, min_alloc);
                }
        }
 }
@@ -1426,7 +1513,7 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
        mspp = vmem_zalloc(newc * sizeof (*mspp), KM_SLEEP);
 
        if (expanding) {
-               bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp));
+               memcpy(mspp, vd->vdev_ms, oldc * sizeof (*mspp));
                vmem_free(vd->vdev_ms, oldc * sizeof (*mspp));
        }
 
@@ -1523,13 +1610,6 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
        if (txg == 0)
                spa_config_exit(spa, SCL_ALLOC, FTAG);
 
-       /*
-        * Regardless whether this vdev was just added or it is being
-        * expanded, the metaslab count has changed. Recalculate the
-        * block limit.
-        */
-       spa_log_sm_set_blocklimit(spa);
-
        return (0);
 }
 
@@ -1577,7 +1657,6 @@ vdev_metaslab_fini(vdev_t *vd)
                }
        }
        ASSERT0(vd->vdev_ms_count);
-       ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
 }
 
 typedef struct vdev_probe_stats {
@@ -1616,6 +1695,8 @@ vdev_probe_done(zio_t *zio)
 
                vd->vdev_cant_read |= !vps->vps_readable;
                vd->vdev_cant_write |= !vps->vps_writeable;
+               vdev_dbgmsg(vd, "probe done, cant_read=%u cant_write=%u",
+                   vd->vdev_cant_read, vd->vdev_cant_write);
 
                if (vdev_readable(vd) &&
                    (vdev_writeable(vd) || !spa_writeable(spa))) {
@@ -1675,8 +1756,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
                vps = kmem_zalloc(sizeof (*vps), KM_SLEEP);
 
                vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
-                   ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
-                   ZIO_FLAG_TRYHARD;
+                   ZIO_FLAG_DONT_AGGREGATE | ZIO_FLAG_TRYHARD;
 
                if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
                        /*
@@ -1838,20 +1918,41 @@ vdev_open_children_subset(vdev_t *vd, vdev_open_children_func_t *open_func)
 }
 
 /*
- * Compute the raidz-deflation ratio.  Note, we hard-code
- * in 128k (1 << 17) because it is the "typical" blocksize.
- * Even though SPA_MAXBLOCKSIZE changed, this algorithm can not change,
- * otherwise it would inconsistently account for existing bp's.
+ * Compute the raidz-deflation ratio.  Note, we hard-code 128k (1 << 17)
+ * because it is the "typical" blocksize.  Even though SPA_MAXBLOCKSIZE
+ * changed, this algorithm can not change, otherwise it would inconsistently
+ * account for existing bp's.  We also hard-code txg 0 for the same reason
+ * since expanded RAIDZ vdevs can use a different asize for different birth
+ * txg's.
  */
 static void
 vdev_set_deflate_ratio(vdev_t *vd)
 {
        if (vd == vd->vdev_top && !vd->vdev_ishole && vd->vdev_ashift != 0) {
                vd->vdev_deflate_ratio = (1 << 17) /
-                   (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
+                   (vdev_psize_to_asize_txg(vd, 1 << 17, 0) >>
+                   SPA_MINBLOCKSHIFT);
        }
 }
 
+/*
+ * Choose the best of two ashifts, preferring one between logical ashift
+ * (absolute minimum) and administrator defined maximum, otherwise take
+ * the biggest of the two.
+ */
+uint64_t
+vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b)
+{
+       if (a > logical && a <= zfs_vdev_max_auto_ashift) {
+               if (b <= logical || b > zfs_vdev_max_auto_ashift)
+                       return (a);
+               else
+                       return (MAX(a, b));
+       } else if (b <= logical || b > zfs_vdev_max_auto_ashift)
+               return (MAX(a, b));
+       return (b);
+}
+
 /*
  * Maximize performance by inflating the configured ashift for top level
  * vdevs to be as close to the physical ashift as possible while maintaining
@@ -1863,7 +1964,8 @@ vdev_ashift_optimize(vdev_t *vd)
 {
        ASSERT(vd == vd->vdev_top);
 
-       if (vd->vdev_ashift < vd->vdev_physical_ashift) {
+       if (vd->vdev_ashift < vd->vdev_physical_ashift &&
+           vd->vdev_physical_ashift <= zfs_vdev_max_auto_ashift) {
                vd->vdev_ashift = MIN(
                    MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
                    MAX(zfs_vdev_min_auto_ashift,
@@ -1928,6 +2030,14 @@ vdev_open(vdev_t *vd)
 
        error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
            &logical_ashift, &physical_ashift);
+
+       /* Keep the device in removed state if unplugged */
+       if (error == ENOENT && vd->vdev_removed) {
+               vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED,
+                   VDEV_AUX_NONE);
+               return (error);
+       }
+
        /*
         * Physical volume size should never be larger than its max size, unless
         * the disk has shrunk while we were reading it or the device is buggy
@@ -2075,9 +2185,9 @@ vdev_open(vdev_t *vd)
                                return (SET_ERROR(EDOM));
                        }
 
-                       if (vd->vdev_top == vd) {
+                       if (vd->vdev_top == vd && vd->vdev_attaching == B_FALSE)
                                vdev_ashift_optimize(vd);
-                       }
+                       vd->vdev_attaching = B_FALSE;
                }
                if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
                    vd->vdev_ashift > ASHIFT_MAX)) {
@@ -2138,8 +2248,7 @@ vdev_open(vdev_t *vd)
        if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
            vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
                uint64_t min_alloc = vdev_get_min_alloc(vd);
-               if (min_alloc < spa->spa_min_alloc)
-                       spa->spa_min_alloc = min_alloc;
+               vdev_spa_set_alloc(spa, min_alloc);
        }
 
        /*
@@ -2384,22 +2493,36 @@ vdev_validate(vdev_t *vd)
 }
 
 static void
-vdev_copy_path_impl(vdev_t *svd, vdev_t *dvd)
-{
-       char *old, *new;
-       if (svd->vdev_path != NULL && dvd->vdev_path != NULL) {
-               if (strcmp(svd->vdev_path, dvd->vdev_path) != 0) {
-                       zfs_dbgmsg("vdev_copy_path: vdev %llu: path changed "
-                           "from '%s' to '%s'", (u_longlong_t)dvd->vdev_guid,
-                           dvd->vdev_path, svd->vdev_path);
-                       spa_strfree(dvd->vdev_path);
-                       dvd->vdev_path = spa_strdup(svd->vdev_path);
+vdev_update_path(const char *prefix, char *svd, char **dvd, uint64_t guid)
+{
+       if (svd != NULL && *dvd != NULL) {
+               if (strcmp(svd, *dvd) != 0) {
+                       zfs_dbgmsg("vdev_copy_path: vdev %llu: %s changed "
+                           "from '%s' to '%s'", (u_longlong_t)guid, prefix,
+                           *dvd, svd);
+                       spa_strfree(*dvd);
+                       *dvd = spa_strdup(svd);
                }
-       } else if (svd->vdev_path != NULL) {
-               dvd->vdev_path = spa_strdup(svd->vdev_path);
+       } else if (svd != NULL) {
+               *dvd = spa_strdup(svd);
                zfs_dbgmsg("vdev_copy_path: vdev %llu: path set to '%s'",
-                   (u_longlong_t)dvd->vdev_guid, dvd->vdev_path);
+                   (u_longlong_t)guid, *dvd);
        }
+}
+
+static void
+vdev_copy_path_impl(vdev_t *svd, vdev_t *dvd)
+{
+       char *old, *new;
+
+       vdev_update_path("vdev_path", svd->vdev_path, &dvd->vdev_path,
+           dvd->vdev_guid);
+
+       vdev_update_path("vdev_devid", svd->vdev_devid, &dvd->vdev_devid,
+           dvd->vdev_guid);
+
+       vdev_update_path("vdev_physpath", svd->vdev_physpath,
+           &dvd->vdev_physpath, dvd->vdev_guid);
 
        /*
         * Our enclosure sysfs path may have changed between imports
@@ -2540,8 +2663,6 @@ vdev_close(vdev_t *vd)
 
        vd->vdev_ops->vdev_op_close(vd);
 
-       vdev_cache_purge(vd);
-
        /*
         * We record the previous state before we close it, so that if we are
         * doing a reopen(), we don't generate FMA ereports if we notice that
@@ -2627,6 +2748,17 @@ vdev_reopen(vdev_t *vd)
                (void) vdev_validate(vd);
        }
 
+       /*
+        * Recheck if resilver is still needed and cancel any
+        * scheduled resilver if resilver is unneeded.
+        */
+       if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
+           spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
+               mutex_enter(&spa->spa_async_lock);
+               spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
+               mutex_exit(&spa->spa_async_lock);
+       }
+
        /*
         * Reassess parent vdev's health.
         */
@@ -3118,32 +3250,71 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
 
                if (txg != 0)
                        vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
-               return;
+       } else {
+               mutex_enter(&vd->vdev_dtl_lock);
+               for (int t = 0; t < DTL_TYPES; t++) {
+                       /* account for child's outage in parent's missing map */
+                       int s = (t == DTL_MISSING) ? DTL_OUTAGE: t;
+                       if (t == DTL_SCRUB) {
+                               /* leaf vdevs only */
+                               continue;
+                       }
+                       if (t == DTL_PARTIAL) {
+                               /* i.e. non-zero */
+                               minref = 1;
+                       } else if (vdev_get_nparity(vd) != 0) {
+                               /* RAIDZ, DRAID */
+                               minref = vdev_get_nparity(vd) + 1;
+                       } else {
+                               /* any kind of mirror */
+                               minref = vd->vdev_children;
+                       }
+                       space_reftree_create(&reftree);
+                       for (int c = 0; c < vd->vdev_children; c++) {
+                               vdev_t *cvd = vd->vdev_child[c];
+                               mutex_enter(&cvd->vdev_dtl_lock);
+                               space_reftree_add_map(&reftree,
+                                   cvd->vdev_dtl[s], 1);
+                               mutex_exit(&cvd->vdev_dtl_lock);
+                       }
+                       space_reftree_generate_map(&reftree,
+                           vd->vdev_dtl[t], minref);
+                       space_reftree_destroy(&reftree);
+               }
+               mutex_exit(&vd->vdev_dtl_lock);
        }
 
-       mutex_enter(&vd->vdev_dtl_lock);
-       for (int t = 0; t < DTL_TYPES; t++) {
-               /* account for child's outage in parent's missing map */
-               int s = (t == DTL_MISSING) ? DTL_OUTAGE: t;
-               if (t == DTL_SCRUB)
-                       continue;                       /* leaf vdevs only */
-               if (t == DTL_PARTIAL)
-                       minref = 1;                     /* i.e. non-zero */
-               else if (vdev_get_nparity(vd) != 0)
-                       minref = vdev_get_nparity(vd) + 1; /* RAID-Z, dRAID */
-               else
-                       minref = vd->vdev_children;     /* any kind of mirror */
-               space_reftree_create(&reftree);
-               for (int c = 0; c < vd->vdev_children; c++) {
-                       vdev_t *cvd = vd->vdev_child[c];
-                       mutex_enter(&cvd->vdev_dtl_lock);
-                       space_reftree_add_map(&reftree, cvd->vdev_dtl[s], 1);
-                       mutex_exit(&cvd->vdev_dtl_lock);
-               }
-               space_reftree_generate_map(&reftree, vd->vdev_dtl[t], minref);
-               space_reftree_destroy(&reftree);
+       if (vd->vdev_top->vdev_ops == &vdev_raidz_ops) {
+               raidz_dtl_reassessed(vd);
        }
-       mutex_exit(&vd->vdev_dtl_lock);
+}
+
+/*
+ * Iterate over all the vdevs except spare, and post kobj events
+ */
+void
+vdev_post_kobj_evt(vdev_t *vd)
+{
+       if (vd->vdev_ops->vdev_op_kobj_evt_post &&
+           vd->vdev_kobj_flag == B_FALSE) {
+               vd->vdev_kobj_flag = B_TRUE;
+               vd->vdev_ops->vdev_op_kobj_evt_post(vd);
+       }
+
+       for (int c = 0; c < vd->vdev_children; c++)
+               vdev_post_kobj_evt(vd->vdev_child[c]);
+}
+
+/*
+ * Iterate over all the vdevs except spare, and clear kobj events
+ */
+void
+vdev_clear_kobj_evt(vdev_t *vd)
+{
+       vd->vdev_kobj_flag = B_FALSE;
+
+       for (int c = 0; c < vd->vdev_children; c++)
+               vdev_clear_kobj_evt(vd->vdev_child[c]);
 }
 
 int
@@ -3257,6 +3428,12 @@ vdev_construct_zaps(vdev_t *vd, dmu_tx_t *tx)
                                vdev_zap_allocation_data(vd, tx);
                }
        }
+       if (vd->vdev_ops == &vdev_root_ops && vd->vdev_root_zap == 0 &&
+           spa_feature_is_enabled(vd->vdev_spa, SPA_FEATURE_AVZ_V2)) {
+               if (!spa_feature_is_active(vd->vdev_spa, SPA_FEATURE_AVZ_V2))
+                       spa_feature_incr(vd->vdev_spa, SPA_FEATURE_AVZ_V2, tx);
+               vd->vdev_root_zap = vdev_create_link_zap(vd, tx);
+       }
 
        for (uint64_t i = 0; i < vd->vdev_children; i++) {
                vdev_construct_zaps(vd->vdev_child[i], tx);
@@ -3484,6 +3661,12 @@ vdev_load(vdev_t *vd)
 
        vdev_set_deflate_ratio(vd);
 
+       if (vd->vdev_ops == &vdev_raidz_ops) {
+               error = vdev_raidz_load(vd);
+               if (error != 0)
+                       return (error);
+       }
+
        /*
         * On spa_load path, grab the allocation bias from our zap
         */
@@ -3507,6 +3690,26 @@ vdev_load(vdev_t *vd)
                }
        }
 
+       if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
+               spa_t *spa = vd->vdev_spa;
+               uint64_t failfast;
+
+               error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+                   vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
+                   1, &failfast);
+               if (error == 0) {
+                       vd->vdev_failfast = failfast & 1;
+               } else if (error == ENOENT) {
+                       vd->vdev_failfast = vdev_prop_default_numeric(
+                           VDEV_PROP_FAILFAST);
+               } else {
+                       vdev_dbgmsg(vd,
+                           "vdev_load: zap_lookup(top_zap=%llu) "
+                           "failed [error=%d]",
+                           (u_longlong_t)vd->vdev_top_zap, error);
+               }
+       }
+
        /*
         * Load any rebuild state from the top-level vdev zap.
         */
@@ -3521,6 +3724,39 @@ vdev_load(vdev_t *vd)
                }
        }
 
+       if (vd->vdev_top_zap != 0 || vd->vdev_leaf_zap != 0) {
+               uint64_t zapobj;
+
+               if (vd->vdev_top_zap != 0)
+                       zapobj = vd->vdev_top_zap;
+               else
+                       zapobj = vd->vdev_leaf_zap;
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_N,
+                   &vd->vdev_checksum_n);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_T,
+                   &vd->vdev_checksum_t);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_IO_N,
+                   &vd->vdev_io_n);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+
+               error = vdev_prop_get_int(vd, VDEV_PROP_IO_T,
+                   &vd->vdev_io_t);
+               if (error && error != ENOENT)
+                       vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+                           "failed [error=%d]", (u_longlong_t)zapobj, error);
+       }
+
        /*
         * If this is a top-level vdev, initialize its metaslabs.
         */
@@ -3808,10 +4044,22 @@ vdev_sync(vdev_t *vd, uint64_t txg)
        dmu_tx_commit(tx);
 }
 
+/*
+ * Return the amount of space that should be (or was) allocated for the given
+ * psize (compressed block size) in the given TXG. Note that for expanded
+ * RAIDZ vdevs, the size allocated for older BP's may be larger. See
+ * vdev_raidz_asize().
+ */
+uint64_t
+vdev_psize_to_asize_txg(vdev_t *vd, uint64_t psize, uint64_t txg)
+{
+       return (vd->vdev_ops->vdev_op_asize(vd, psize, txg));
+}
+
 uint64_t
 vdev_psize_to_asize(vdev_t *vd, uint64_t psize)
 {
-       return (vd->vdev_ops->vdev_op_asize(vd, psize));
+       return (vdev_psize_to_asize_txg(vd, psize, 0));
 }
 
 /*
@@ -3927,6 +4175,36 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux)
        return (spa_vdev_state_exit(spa, vd, 0));
 }
 
+int
+vdev_remove_wanted(spa_t *spa, uint64_t guid)
+{
+       vdev_t *vd;
+
+       spa_vdev_state_enter(spa, SCL_NONE);
+
+       if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
+               return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
+
+       /*
+        * If the vdev is already removed, or expanding which can trigger
+        * repartition add/remove events, then don't do anything.
+        */
+       if (vd->vdev_removed || vd->vdev_expanding)
+               return (spa_vdev_state_exit(spa, NULL, 0));
+
+       /*
+        * Confirm the vdev has been removed, otherwise don't do anything.
+        */
+       if (vd->vdev_ops->vdev_op_leaf && !zio_wait(vdev_probe(vd, NULL)))
+               return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
+
+       vd->vdev_remove_wanted = B_TRUE;
+       spa_async_request(spa, SPA_ASYNC_REMOVE);
+
+       return (spa_vdev_state_exit(spa, vd, 0));
+}
+
+
 /*
  * Online the given vdev.
  *
@@ -3947,9 +4225,6 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
        if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
                return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
 
-       if (!vd->vdev_ops->vdev_op_leaf)
-               return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
-
        wasoffline = (vd->vdev_offline || vd->vdev_tmpoffline);
        oldstate = vd->vdev_state;
 
@@ -3988,6 +4263,7 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
                /* XXX - L2ARC 1.0 does not support expansion */
                if (vd->vdev_aux)
                        return (spa_vdev_state_exit(spa, vd, ENOTSUP));
+               spa->spa_ccw_fail_time = 0;
                spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
        }
 
@@ -4017,9 +4293,19 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
 
        if (wasoffline ||
            (oldstate < VDEV_STATE_DEGRADED &&
-           vd->vdev_state >= VDEV_STATE_DEGRADED))
+           vd->vdev_state >= VDEV_STATE_DEGRADED)) {
                spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE);
 
+               /*
+                * Asynchronously detach spare vdev if resilver or
+                * rebuild is not required
+                */
+               if (vd->vdev_unspare &&
+                   !dsl_scan_resilvering(spa->spa_dsl_pool) &&
+                   !dsl_scan_resilver_scheduled(spa->spa_dsl_pool) &&
+                   !vdev_rebuild_active(tvd))
+                       spa_async_request(spa, SPA_ASYNC_DETACH_SPARE);
+       }
        return (spa_vdev_state_exit(spa, vd, 0));
 }
 
@@ -4170,9 +4456,9 @@ vdev_clear(spa_t *spa, vdev_t *vd)
                vdev_clear(spa, vd->vdev_child[c]);
 
        /*
-        * It makes no sense to "clear" an indirect vdev.
+        * It makes no sense to "clear" an indirect  or removed vdev.
         */
-       if (!vdev_is_concrete(vd))
+       if (!vdev_is_concrete(vd) || vd->vdev_removed)
                return;
 
        /*
@@ -4403,11 +4689,9 @@ vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
 
                memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex));
 
-               for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) {
-                       vsx->vsx_active_queue[t] =
-                           vd->vdev_queue.vq_class[t].vqc_active;
-                       vsx->vsx_pend_queue[t] = avl_numnodes(
-                           &vd->vdev_queue.vq_class[t].vqc_queued_tree);
+               for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
+                       vsx->vsx_active_queue[t] = vd->vdev_queue.vq_cactive[t];
+                       vsx->vsx_pend_queue[t] = vdev_queue_class_length(vd, t);
                }
        }
 }
@@ -4418,7 +4702,7 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
        vdev_t *tvd = vd->vdev_top;
        mutex_enter(&vd->vdev_stat_lock);
        if (vs) {
-               bcopy(&vd->vdev_stat, vs, sizeof (*vs));
+               memcpy(vs, &vd->vdev_stat, sizeof (*vs));
                vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
                vs->vs_state = vd->vdev_state;
                vs->vs_rsize = vdev_get_min_asize(vd);
@@ -4470,7 +4754,10 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
                vs->vs_configured_ashift = vd->vdev_top != NULL
                    ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
                vs->vs_logical_ashift = vd->vdev_logical_ashift;
-               vs->vs_physical_ashift = vd->vdev_physical_ashift;
+               if (vd->vdev_physical_ashift <= ASHIFT_MAX)
+                       vs->vs_physical_ashift = vd->vdev_physical_ashift;
+               else
+                       vs->vs_physical_ashift = 0;
 
                /*
                 * Report fragmentation and rebuild progress for top-level,
@@ -4532,8 +4819,14 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
        vdev_t *vd = zio->io_vd ? zio->io_vd : rvd;
        vdev_t *pvd;
        uint64_t txg = zio->io_txg;
+/* Suppress ASAN false positive */
+#ifdef __SANITIZE_ADDRESS__
        vdev_stat_t *vs = vd ? &vd->vdev_stat : NULL;
        vdev_stat_ex_t *vsx = vd ? &vd->vdev_stat_ex : NULL;
+#else
+       vdev_stat_t *vs = &vd->vdev_stat;
+       vdev_stat_ex_t *vsx = &vd->vdev_stat_ex;
+#endif
        zio_type_t type = zio->io_type;
        int flags = zio->io_flags;
 
@@ -5212,7 +5505,9 @@ vdev_expand(vdev_t *vd, uint64_t txg)
 
        vdev_set_deflate_ratio(vd);
 
-       if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count &&
+       if ((vd->vdev_spa->spa_raidz_expand == NULL ||
+           vd->vdev_spa->spa_raidz_expand->vre_vdev_id != vd->vdev_id) &&
+           (vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count &&
            vdev_is_concrete(vd)) {
                vdev_metaslab_group_create(vd);
                VERIFY(vdev_metaslab_init(vd, txg) == 0);
@@ -5228,9 +5523,13 @@ vdev_split(vdev_t *vd)
 {
        vdev_t *cvd, *pvd = vd->vdev_parent;
 
+       VERIFY3U(pvd->vdev_children, >, 1);
+
        vdev_remove_child(pvd, vd);
        vdev_compact_children(pvd);
 
+       ASSERT3P(pvd->vdev_child, !=, NULL);
+
        cvd = pvd->vdev_child[0];
        if (pvd->vdev_children == 1) {
                vdev_remove_parent(cvd);
@@ -5240,7 +5539,7 @@ vdev_split(vdev_t *vd)
 }
 
 void
-vdev_deadman(vdev_t *vd, char *tag)
+vdev_deadman(vdev_t *vd, const char *tag)
 {
        for (int c = 0; c < vd->vdev_children; c++) {
                vdev_t *cvd = vd->vdev_child[c];
@@ -5252,20 +5551,20 @@ vdev_deadman(vdev_t *vd, char *tag)
                vdev_queue_t *vq = &vd->vdev_queue;
 
                mutex_enter(&vq->vq_lock);
-               if (avl_numnodes(&vq->vq_active_tree) > 0) {
+               if (vq->vq_active > 0) {
                        spa_t *spa = vd->vdev_spa;
                        zio_t *fio;
                        uint64_t delta;
 
-                       zfs_dbgmsg("slow vdev: %s has %lu active IOs",
-                           vd->vdev_path, avl_numnodes(&vq->vq_active_tree));
+                       zfs_dbgmsg("slow vdev: %s has %u active IOs",
+                           vd->vdev_path, vq->vq_active);
 
                        /*
                         * Look at the head of all the pending queues,
                         * if any I/O has been outstanding for longer than
                         * the spa_deadman_synctime invoke the deadman logic.
                         */
-                       fio = avl_first(&vq->vq_active_tree);
+                       fio = list_head(&vq->vq_active_list);
                        delta = gethrtime() - fio->io_timestamp;
                        if (delta > spa_deadman_synctime(spa))
                                zio_deadman(fio, tag);
@@ -5446,7 +5745,7 @@ vdev_replace_in_progress(vdev_t *vdev)
  * Add a (source=src, propname=propval) list to an nvlist.
  */
 static void
-vdev_prop_add_list(nvlist_t *nvl, const char *propname, char *strval,
+vdev_prop_add_list(nvlist_t *nvl, const char *propname, const char *strval,
     uint64_t intval, zprop_source_t src)
 {
        nvlist_t *propval;
@@ -5472,6 +5771,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
        objset_t *mos = spa->spa_meta_objset;
        nvpair_t *elem = NULL;
        uint64_t vdev_guid;
+       uint64_t objid;
        nvlist_t *nvprops;
 
        vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
@@ -5482,28 +5782,30 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
        if (vd == NULL)
                return;
 
+       /*
+        * Set vdev property values in the vdev props mos object.
+        */
+       if (vd->vdev_root_zap != 0) {
+               objid = vd->vdev_root_zap;
+       } else if (vd->vdev_top_zap != 0) {
+               objid = vd->vdev_top_zap;
+       } else if (vd->vdev_leaf_zap != 0) {
+               objid = vd->vdev_leaf_zap;
+       } else {
+               panic("unexpected vdev type");
+       }
+
        mutex_enter(&spa->spa_props_lock);
 
        while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
-               uint64_t intval, objid = 0;
-               char *strval;
+               uint64_t intval;
+               const char *strval;
                vdev_prop_t prop;
                const char *propname = nvpair_name(elem);
                zprop_type_t proptype;
 
-               /*
-                * Set vdev property values in the vdev props mos object.
-                */
-               if (vd->vdev_top_zap != 0) {
-                       objid = vd->vdev_top_zap;
-               } else if (vd->vdev_leaf_zap != 0) {
-                       objid = vd->vdev_leaf_zap;
-               } else {
-                       panic("vdev not top or leaf");
-               }
-
                switch (prop = vdev_name_to_prop(propname)) {
-               case VDEV_PROP_USER:
+               case VDEV_PROP_USERPROP:
                        if (vdev_prop_user(propname)) {
                                strval = fnvpair_value_string(elem);
                                if (strlen(strval) == 0) {
@@ -5566,10 +5868,16 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
        nvpair_t *elem = NULL;
        uint64_t vdev_guid;
        nvlist_t *nvprops;
-       int error;
+       int error = 0;
 
        ASSERT(vd != NULL);
 
+       /* Check that vdev has a zap we can use */
+       if (vd->vdev_root_zap == 0 &&
+           vd->vdev_top_zap == 0 &&
+           vd->vdev_leaf_zap == 0)
+               return (SET_ERROR(EINVAL));
+
        if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
            &vdev_guid) != 0)
                return (SET_ERROR(EINVAL));
@@ -5582,12 +5890,12 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                return (SET_ERROR(EINVAL));
 
        while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
-               char *propname = nvpair_name(elem);
+               const char *propname = nvpair_name(elem);
                vdev_prop_t prop = vdev_name_to_prop(propname);
                uint64_t intval = 0;
-               char *strval = NULL;
+               const char *strval = NULL;
 
-               if (prop == VDEV_PROP_USER && !vdev_prop_user(propname)) {
+               if (prop == VDEV_PROP_USERPROP && !vdev_prop_user(propname)) {
                        error = EINVAL;
                        goto end;
                }
@@ -5627,6 +5935,41 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                        else
                                error = spa_vdev_alloc(spa, vdev_guid);
                        break;
+               case VDEV_PROP_FAILFAST:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_failfast = intval & 1;
+                       break;
+               case VDEV_PROP_CHECKSUM_N:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_checksum_n = intval;
+                       break;
+               case VDEV_PROP_CHECKSUM_T:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_checksum_t = intval;
+                       break;
+               case VDEV_PROP_IO_N:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_io_n = intval;
+                       break;
+               case VDEV_PROP_IO_T:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_io_t = intval;
+                       break;
                default:
                        /* Most processing is done in vdev_props_set_sync */
                        break;
@@ -5667,7 +6010,9 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 
        nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
 
-       if (vd->vdev_top_zap != 0) {
+       if (vd->vdev_root_zap != 0) {
+               objid = vd->vdev_root_zap;
+       } else if (vd->vdev_top_zap != 0) {
                objid = vd->vdev_top_zap;
        } else if (vd->vdev_leaf_zap != 0) {
                objid = vd->vdev_leaf_zap;
@@ -5797,7 +6142,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                                            KM_SLEEP);
                                for (uint64_t i = 0; i < vd->vdev_children;
                                    i++) {
-                                       char *vname;
+                                       const char *vname;
 
                                        vname = vdev_name(vd->vdev_child[i],
                                            namebuf, sizeof (namebuf));
@@ -5914,37 +6259,77 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                                vdev_prop_add_list(outnvl, propname, NULL,
                                    vd->vdev_removing, ZPROP_SRC_NONE);
                                continue;
+                       case VDEV_PROP_RAIDZ_EXPANDING:
+                               /* Only expose this for raidz */
+                               if (vd->vdev_ops == &vdev_raidz_ops) {
+                                       vdev_prop_add_list(outnvl, propname,
+                                           NULL, vd->vdev_rz_expanding,
+                                           ZPROP_SRC_NONE);
+                               }
+                               continue;
                        /* Numeric Properites */
                        case VDEV_PROP_ALLOCATING:
+                               /* Leaf vdevs cannot have this property */
+                               if (vd->vdev_mg == NULL &&
+                                   vd->vdev_top != NULL) {
+                                       src = ZPROP_SRC_NONE;
+                                       intval = ZPROP_BOOLEAN_NA;
+                               } else {
+                                       err = vdev_prop_get_int(vd, prop,
+                                           &intval);
+                                       if (err && err != ENOENT)
+                                               break;
+
+                                       if (intval ==
+                                           vdev_prop_default_numeric(prop))
+                                               src = ZPROP_SRC_DEFAULT;
+                                       else
+                                               src = ZPROP_SRC_LOCAL;
+                               }
+
+                               vdev_prop_add_list(outnvl, propname, NULL,
+                                   intval, src);
+                               break;
+                       case VDEV_PROP_FAILFAST:
                                src = ZPROP_SRC_LOCAL;
                                strval = NULL;
 
                                err = zap_lookup(mos, objid, nvpair_name(elem),
                                    sizeof (uint64_t), 1, &intval);
                                if (err == ENOENT) {
-                                       intval =
-                                           vdev_prop_default_numeric(prop);
+                                       intval = vdev_prop_default_numeric(
+                                           prop);
                                        err = 0;
-                               } else if (err)
+                               } else if (err) {
                                        break;
+                               }
                                if (intval == vdev_prop_default_numeric(prop))
                                        src = ZPROP_SRC_DEFAULT;
 
-                               /* Leaf vdevs cannot have this property */
-                               if (vd->vdev_mg == NULL &&
-                                   vd->vdev_top != NULL) {
-                                       src = ZPROP_SRC_NONE;
-                                       intval = ZPROP_BOOLEAN_NA;
-                               }
-
                                vdev_prop_add_list(outnvl, propname, strval,
                                    intval, src);
                                break;
+                       case VDEV_PROP_CHECKSUM_N:
+                       case VDEV_PROP_CHECKSUM_T:
+                       case VDEV_PROP_IO_N:
+                       case VDEV_PROP_IO_T:
+                               err = vdev_prop_get_int(vd, prop, &intval);
+                               if (err && err != ENOENT)
+                                       break;
+
+                               if (intval == vdev_prop_default_numeric(prop))
+                                       src = ZPROP_SRC_DEFAULT;
+                               else
+                                       src = ZPROP_SRC_LOCAL;
+
+                               vdev_prop_add_list(outnvl, propname, NULL,
+                                   intval, src);
+                               break;
                        /* Text Properties */
                        case VDEV_PROP_COMMENT:
                                /* Exists in the ZAP below */
                                /* FALLTHRU */
-                       case VDEV_PROP_USER:
+                       case VDEV_PROP_USERPROP:
                                /* User Properites */
                                src = ZPROP_SRC_LOCAL;
 
@@ -5996,7 +6381,6 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                        strval = NULL;
                        zprop_source_t src = ZPROP_SRC_DEFAULT;
                        propname = za.za_name;
-                       prop = vdev_name_to_prop(propname);
 
                        switch (za.za_integer_length) {
                        case 8:
@@ -6039,16 +6423,19 @@ EXPORT_SYMBOL(vdev_online);
 EXPORT_SYMBOL(vdev_offline);
 EXPORT_SYMBOL(vdev_clear);
 
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_count, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_count, UINT, ZMOD_RW,
        "Target number of metaslabs per top-level vdev");
 
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_shift, INT, ZMOD_RW,
-       "Default limit for metaslab size");
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_shift, UINT, ZMOD_RW,
+       "Default lower limit for metaslab size");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, max_ms_shift, UINT, ZMOD_RW,
+       "Default upper limit for metaslab size");
 
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, min_ms_count, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, min_ms_count, UINT, ZMOD_RW,
        "Minimum number of metaslabs per top-level vdev");
 
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, ms_count_limit, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, ms_count_limit, UINT, ZMOD_RW,
        "Practical upper limit of total metaslabs per top-level vdev");
 
 ZFS_MODULE_PARAM(zfs, zfs_, slow_io_events_per_second, UINT, ZMOD_RW,
@@ -6069,16 +6456,16 @@ ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW,
 ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW,
        "Disable cache flushes");
 
-ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, UINT, ZMOD_RW,
        "Minimum number of metaslabs required to dedicate one for log blocks");
 
 /* BEGIN CSTYLED */
 ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift,
-       param_set_min_auto_ashift, param_get_ulong, ZMOD_RW,
+       param_set_min_auto_ashift, param_get_uint, ZMOD_RW,
        "Minimum ashift used when creating new top-level vdevs");
 
 ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift,
-       param_set_max_auto_ashift, param_get_ulong, ZMOD_RW,
+       param_set_max_auto_ashift, param_get_uint, ZMOD_RW,
        "Maximum ashift used when optimizing for logical -> physical sector "
        "size on new top-level vdevs");
 /* END CSTYLED */