]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/vdev.c
Distributed Spare (dRAID) Feature
[mirror_zfs.git] / module / zfs / vdev.c
index e41e79ab8a188f61df56594d677852f38e3733ec..38f36e52fca66c80f092ea00251a47a70f887d7c 100644 (file)
@@ -40,6 +40,7 @@
 #include <sys/dsl_dir.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_rebuild.h>
+#include <sys/vdev_draid.h>
 #include <sys/uberblock_impl.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
@@ -51,6 +52,7 @@
 #include <sys/arc.h>
 #include <sys/zil.h>
 #include <sys/dsl_scan.h>
+#include <sys/vdev_raidz.h>
 #include <sys/abd.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
@@ -193,6 +195,8 @@ vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
 static vdev_ops_t *vdev_ops_table[] = {
        &vdev_root_ops,
        &vdev_raidz_ops,
+       &vdev_draid_ops,
+       &vdev_draid_spare_ops,
        &vdev_mirror_ops,
        &vdev_replacing_ops,
        &vdev_spare_ops,
@@ -221,10 +225,11 @@ vdev_getops(const char *type)
 
 /* ARGSUSED */
 void
-vdev_default_xlate(vdev_t *vd, const range_seg64_t *in, range_seg64_t *res)
+vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
 {
-       res->rs_start = in->rs_start;
-       res->rs_end = in->rs_end;
+       physical_rs->rs_start = logical_rs->rs_start;
+       physical_rs->rs_end = logical_rs->rs_end;
 }
 
 /*
@@ -264,6 +269,12 @@ vdev_default_asize(vdev_t *vd, uint64_t psize)
        return (asize);
 }
 
+uint64_t
+vdev_default_min_asize(vdev_t *vd)
+{
+       return (vd->vdev_min_asize);
+}
+
 /*
  * Get the minimum allocatable size. We define the allocatable size as
  * the vdev's asize rounded to the nearest metaslab. This allows us to
@@ -289,15 +300,7 @@ vdev_get_min_asize(vdev_t *vd)
        if (vd == vd->vdev_top)
                return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift));
 
-       /*
-        * The allocatable space for a raidz vdev is N * sizeof(smallest child),
-        * so each child must provide at least 1/Nth of its asize.
-        */
-       if (pvd->vdev_ops == &vdev_raidz_ops)
-               return ((pvd->vdev_min_asize + pvd->vdev_children - 1) /
-                   pvd->vdev_children);
-
-       return (pvd->vdev_min_asize);
+       return (pvd->vdev_ops->vdev_op_min_asize(pvd));
 }
 
 void
@@ -309,6 +312,48 @@ vdev_set_min_asize(vdev_t *vd)
                vdev_set_min_asize(vd->vdev_child[c]);
 }
 
+/*
+ * Get the minimal allocation size for the top-level vdev.
+ */
+uint64_t
+vdev_get_min_alloc(vdev_t *vd)
+{
+       uint64_t min_alloc = 1ULL << vd->vdev_ashift;
+
+       if (vd->vdev_ops->vdev_op_min_alloc != NULL)
+               min_alloc = vd->vdev_ops->vdev_op_min_alloc(vd);
+
+       return (min_alloc);
+}
+
+/*
+ * Get the parity level for a top-level vdev.
+ */
+uint64_t
+vdev_get_nparity(vdev_t *vd)
+{
+       uint64_t nparity = 0;
+
+       if (vd->vdev_ops->vdev_op_nparity != NULL)
+               nparity = vd->vdev_ops->vdev_op_nparity(vd);
+
+       return (nparity);
+}
+
+/*
+ * Get the number of data disks for a top-level vdev.
+ */
+uint64_t
+vdev_get_ndisks(vdev_t *vd)
+{
+       uint64_t ndisks = 1;
+
+       if (vd->vdev_ops->vdev_op_ndisks != NULL)
+               ndisks = vd->vdev_ops->vdev_op_ndisks(vd);
+
+       return (ndisks);
+}
+
 vdev_t *
 vdev_lookup_top(spa_t *spa, uint64_t vdev)
 {
@@ -551,6 +596,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
        list_link_init(&vd->vdev_initialize_node);
        list_link_init(&vd->vdev_leaf_node);
        list_link_init(&vd->vdev_trim_node);
+
        mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_NOLOCKDEP, NULL);
        mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -569,9 +615,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
        cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
 
        mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
-       mutex_init(&vd->vdev_rebuild_io_lock, NULL, MUTEX_DEFAULT, NULL);
        cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL);
-       cv_init(&vd->vdev_rebuild_io_cv, NULL, CV_DEFAULT, NULL);
 
        for (int t = 0; t < DTL_TYPES; t++) {
                vd->vdev_dtl[t] = range_tree_create(NULL, RANGE_SEG64, NULL, 0,
@@ -600,7 +644,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
 {
        vdev_ops_t *ops;
        char *type;
-       uint64_t guid = 0, islog, nparity;
+       uint64_t guid = 0, islog;
        vdev_t *vd;
        vdev_indirect_config_t *vic;
        char *tmp = NULL;
@@ -657,48 +701,13 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
        if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES)
                return (SET_ERROR(ENOTSUP));
 
-       /*
-        * Set the nparity property for RAID-Z vdevs.
-        */
-       nparity = -1ULL;
-       if (ops == &vdev_raidz_ops) {
-               if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
-                   &nparity) == 0) {
-                       if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY)
-                               return (SET_ERROR(EINVAL));
-                       /*
-                        * Previous versions could only support 1 or 2 parity
-                        * device.
-                        */
-                       if (nparity > 1 &&
-                           spa_version(spa) < SPA_VERSION_RAIDZ2)
-                               return (SET_ERROR(ENOTSUP));
-                       if (nparity > 2 &&
-                           spa_version(spa) < SPA_VERSION_RAIDZ3)
-                               return (SET_ERROR(ENOTSUP));
-               } else {
-                       /*
-                        * We require the parity to be specified for SPAs that
-                        * support multiple parity levels.
-                        */
-                       if (spa_version(spa) >= SPA_VERSION_RAIDZ2)
-                               return (SET_ERROR(EINVAL));
-                       /*
-                        * Otherwise, we default to 1 parity device for RAID-Z.
-                        */
-                       nparity = 1;
-               }
-       } else {
-               nparity = 0;
-       }
-       ASSERT(nparity != -1ULL);
-
-       /*
-        * If creating a top-level vdev, check for allocation classes input
-        */
        if (top_level && alloctype == VDEV_ALLOC_ADD) {
                char *bias;
 
+               /*
+                * If creating a top-level vdev, check for allocation
+                * classes input.
+                */
                if (nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS,
                    &bias) == 0) {
                        alloc_bias = vdev_derive_alloc_bias(bias);
@@ -710,13 +719,32 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                                return (SET_ERROR(ENOTSUP));
                        }
                }
+
+               /* spa_vdev_add() expects feature to be enabled */
+               if (ops == &vdev_draid_ops &&
+                   spa->spa_load_state != SPA_LOAD_CREATE &&
+                   !spa_feature_is_enabled(spa, SPA_FEATURE_DRAID)) {
+                       return (SET_ERROR(ENOTSUP));
+               }
        }
 
-       vd = vdev_alloc_common(spa, id, guid, ops);
-       vic = &vd->vdev_indirect_config;
+       /*
+        * Initialize the vdev specific data.  This is done before calling
+        * vdev_alloc_common() since it may fail and this simplifies the
+        * error reporting and cleanup code paths.
+        */
+       void *tsd = NULL;
+       if (ops->vdev_op_init != NULL) {
+               rc = ops->vdev_op_init(spa, nv, &tsd);
+               if (rc != 0) {
+                       return (rc);
+               }
+       }
 
+       vd = vdev_alloc_common(spa, id, guid, ops);
+       vd->vdev_tsd = tsd;
        vd->vdev_islog = islog;
-       vd->vdev_nparity = nparity;
+
        if (top_level && alloc_bias != VDEV_BIAS_NONE)
                vd->vdev_alloc_bias = alloc_bias;
 
@@ -756,6 +784,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
            &vd->vdev_wholedisk) != 0)
                vd->vdev_wholedisk = -1ULL;
 
+       vic = &vd->vdev_indirect_config;
+
        ASSERT0(vic->vic_mapping_object);
        (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_INDIRECT_OBJECT,
            &vic->vic_mapping_object);
@@ -937,6 +967,9 @@ vdev_free(vdev_t *vd)
        ASSERT(vd->vdev_child == NULL);
        ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
 
+       if (vd->vdev_ops->vdev_op_fini != NULL)
+               vd->vdev_ops->vdev_op_fini(vd);
+
        /*
         * Discard allocation state.
         */
@@ -1028,9 +1061,7 @@ vdev_free(vdev_t *vd)
        cv_destroy(&vd->vdev_trim_io_cv);
 
        mutex_destroy(&vd->vdev_rebuild_lock);
-       mutex_destroy(&vd->vdev_rebuild_io_lock);
        cv_destroy(&vd->vdev_rebuild_cv);
-       cv_destroy(&vd->vdev_rebuild_io_cv);
 
        zfs_ratelimit_fini(&vd->vdev_delay_rl);
        zfs_ratelimit_fini(&vd->vdev_checksum_rl);
@@ -1161,7 +1192,8 @@ vdev_top_update(vdev_t *tvd, vdev_t *vd)
 }
 
 /*
- * Add a mirror/replacing vdev above an existing vdev.
+ * Add a mirror/replacing vdev above an existing vdev.  There is no need to
+ * call .vdev_op_init() since mirror/replacing vdevs do not have private state.
  */
 vdev_t *
 vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
@@ -1296,6 +1328,10 @@ vdev_metaslab_group_create(vdev_t *vd)
                                spa->spa_max_ashift = vd->vdev_ashift;
                        if (vd->vdev_ashift < spa->spa_min_ashift)
                                spa->spa_min_ashift = vd->vdev_ashift;
+
+                       uint64_t min_alloc = vdev_get_min_alloc(vd);
+                       if (min_alloc < spa->spa_min_alloc)
+                               spa->spa_min_alloc = min_alloc;
                }
        }
 }
@@ -1622,39 +1658,67 @@ vdev_uses_zvols(vdev_t *vd)
        return (B_FALSE);
 }
 
-void
-vdev_open_children(vdev_t *vd)
+/*
+ * Returns B_TRUE if the passed child should be opened.
+ */
+static boolean_t
+vdev_default_open_children_func(vdev_t *vd)
+{
+       return (B_TRUE);
+}
+
+/*
+ * Open the requested child vdevs.  If any of the leaf vdevs are using
+ * a ZFS volume then do the opens in a single thread.  This avoids a
+ * deadlock when the current thread is holding the spa_namespace_lock.
+ */
+static void
+vdev_open_children_impl(vdev_t *vd, vdev_open_children_func_t *open_func)
 {
-       taskq_t *tq;
        int children = vd->vdev_children;
 
-       /*
-        * in order to handle pools on top of zvols, do the opens
-        * in a single thread so that the same thread holds the
-        * spa_namespace_lock
-        */
-       if (vdev_uses_zvols(vd)) {
-retry_sync:
-               for (int c = 0; c < children; c++)
-                       vd->vdev_child[c]->vdev_open_error =
-                           vdev_open(vd->vdev_child[c]);
-       } else {
-               tq = taskq_create("vdev_open", children, minclsyspri,
-                   children, children, TASKQ_PREPOPULATE);
-               if (tq == NULL)
-                       goto retry_sync;
+       taskq_t *tq = taskq_create("vdev_open", children, minclsyspri,
+           children, children, TASKQ_PREPOPULATE);
+       vd->vdev_nonrot = B_TRUE;
 
-               for (int c = 0; c < children; c++)
+       for (int c = 0; c < children; c++) {
+               vdev_t *cvd = vd->vdev_child[c];
+
+               if (open_func(cvd) == B_FALSE)
+                       continue;
+
+               if (tq == NULL || vdev_uses_zvols(vd)) {
+                       cvd->vdev_open_error = vdev_open(cvd);
+               } else {
                        VERIFY(taskq_dispatch(tq, vdev_open_child,
-                           vd->vdev_child[c], TQ_SLEEP) != TASKQID_INVALID);
+                           cvd, TQ_SLEEP) != TASKQID_INVALID);
+               }
 
+               vd->vdev_nonrot &= cvd->vdev_nonrot;
+       }
+
+       if (tq != NULL) {
+               taskq_wait(tq);
                taskq_destroy(tq);
        }
+}
 
-       vd->vdev_nonrot = B_TRUE;
+/*
+ * Open all child vdevs.
+ */
+void
+vdev_open_children(vdev_t *vd)
+{
+       vdev_open_children_impl(vd, vdev_default_open_children_func);
+}
 
-       for (int c = 0; c < children; c++)
-               vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot;
+/*
+ * Conditionally open a subset of child vdevs.
+ */
+void
+vdev_open_children_subset(vdev_t *vd, vdev_open_children_func_t *open_func)
+{
+       vdev_open_children_impl(vd, open_func);
 }
 
 /*
@@ -1952,6 +2016,16 @@ vdev_open(vdev_t *vd)
                return (error);
        }
 
+       /*
+        * Track the the minimum allocation size.
+        */
+       if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
+           vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
+               uint64_t min_alloc = vdev_get_min_alloc(vd);
+               if (min_alloc < spa->spa_min_alloc)
+                       spa->spa_min_alloc = min_alloc;
+       }
+
        /*
         * If this is a leaf vdev, assess whether a resilver is needed.
         * But don't do this if we are doing a reopen for a scrub, since
@@ -2278,7 +2352,9 @@ vdev_close(vdev_t *vd)
        vdev_t *pvd = vd->vdev_parent;
        spa_t *spa __maybe_unused = vd->vdev_spa;
 
-       ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
+       ASSERT(vd != NULL);
+       ASSERT(vd->vdev_open_thread == curthread ||
+           spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
 
        /*
         * If our parent is reopening, then we are as well, unless we are
@@ -2606,10 +2682,26 @@ vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t)
 }
 
 /*
- * Returns B_TRUE if vdev determines offset needs to be resilvered.
+ * Check if the txg falls within the range which must be
+ * resilvered.  DVAs outside this range can always be skipped.
+ */
+boolean_t
+vdev_default_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
+    uint64_t phys_birth)
+{
+       /* Set by sequential resilver. */
+       if (phys_birth == TXG_UNKNOWN)
+               return (B_TRUE);
+
+       return (vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1));
+}
+
+/*
+ * Returns B_TRUE if the vdev determines the DVA needs to be resilvered.
  */
 boolean_t
-vdev_dtl_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
+vdev_dtl_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
+    uint64_t phys_birth)
 {
        ASSERT(vd != vd->vdev_spa->spa_root_vdev);
 
@@ -2617,7 +2709,8 @@ vdev_dtl_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
            vd->vdev_ops->vdev_op_leaf)
                return (B_TRUE);
 
-       return (vd->vdev_ops->vdev_op_need_resilver(vd, offset, psize));
+       return (vd->vdev_ops->vdev_op_need_resilver(vd, dva, psize,
+           phys_birth));
 }
 
 /*
@@ -2862,8 +2955,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
                        continue;                       /* leaf vdevs only */
                if (t == DTL_PARTIAL)
                        minref = 1;                     /* i.e. non-zero */
-               else if (vd->vdev_nparity != 0)
-                       minref = vd->vdev_nparity + 1;  /* RAID-Z */
+               else if (vdev_get_nparity(vd) != 0)
+                       minref = vdev_get_nparity(vd) + 1; /* RAID-Z, dRAID */
                else
                        minref = vd->vdev_children;     /* any kind of mirror */
                space_reftree_create(&reftree);
@@ -3727,6 +3820,9 @@ top:
        if (!vd->vdev_ops->vdev_op_leaf)
                return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
 
+       if (vd->vdev_ops == &vdev_draid_spare_ops)
+               return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
+
        tvd = vd->vdev_top;
        mg = tvd->vdev_mg;
        generation = spa->spa_config_generation + 1;
@@ -3971,6 +4067,13 @@ vdev_accessible(vdev_t *vd, zio_t *zio)
 static void
 vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs)
 {
+       /*
+        * Exclude the dRAID spare when aggregating to avoid double counting
+        * the ops and bytes.  These IOs are counted by the physical leaves.
+        */
+       if (cvd->vdev_ops == &vdev_draid_spare_ops)
+               return;
+
        for (int t = 0; t < VS_ZIO_TYPES; t++) {
                vs->vs_ops[t] += cvs->vs_ops[t];
                vs->vs_bytes[t] += cvs->vs_bytes[t];
@@ -4063,7 +4166,6 @@ vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
                                vdev_get_child_stat(cvd, vs, cvs);
                        if (vsx)
                                vdev_get_child_stat_ex(cvd, vsx, cvsx);
-
                }
        } else {
                /*
@@ -4248,7 +4350,9 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
 
                        /*
                         * Repair is the result of a rebuild issued by the
-                        * rebuild thread (vdev_rebuild_thread).
+                        * rebuild thread (vdev_rebuild_thread).  To avoid
+                        * double counting repaired bytes the virtual dRAID
+                        * spare vdev is excluded from the processed bytes.
                         */
                        if (zio->io_priority == ZIO_PRIORITY_REBUILD) {
                                vdev_t *tvd = vd->vdev_top;
@@ -4256,8 +4360,10 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
                                vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
                                uint64_t *rebuilt = &vrp->vrp_bytes_rebuilt;
 
-                               if (vd->vdev_ops->vdev_op_leaf)
+                               if (vd->vdev_ops->vdev_op_leaf &&
+                                   vd->vdev_ops != &vdev_draid_spare_ops) {
                                        atomic_add_64(rebuilt, psize);
+                               }
                                vs->vs_rebuild_processed += psize;
                        }
 
@@ -4981,31 +5087,42 @@ vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx)
            vdev_resilver_needed(vd, NULL, NULL));
 }
 
+boolean_t
+vdev_xlate_is_empty(range_seg64_t *rs)
+{
+       return (rs->rs_start == rs->rs_end);
+}
+
 /*
- * Translate a logical range to the physical range for the specified vdev_t.
- * This function is initially called with a leaf vdev and will walk each
- * parent vdev until it reaches a top-level vdev. Once the top-level is
- * reached the physical range is initialized and the recursive function
- * begins to unwind. As it unwinds it calls the parent's vdev specific
- * translation function to do the real conversion.
+ * Translate a logical range to the first contiguous physical range for the
+ * specified vdev_t.  This function is initially called with a leaf vdev and
+ * will walk each parent vdev until it reaches a top-level vdev. Once the
+ * top-level is reached the physical range is initialized and the recursive
+ * function begins to unwind. As it unwinds it calls the parent's vdev
+ * specific translation function to do the real conversion.
  */
 void
 vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
-    range_seg64_t *physical_rs)
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
 {
        /*
         * Walk up the vdev tree
         */
        if (vd != vd->vdev_top) {
-               vdev_xlate(vd->vdev_parent, logical_rs, physical_rs);
+               vdev_xlate(vd->vdev_parent, logical_rs, physical_rs,
+                   remain_rs);
        } else {
                /*
-                * We've reached the top-level vdev, initialize the
-                * physical range to the logical range and start to
-                * unwind.
+                * We've reached the top-level vdev, initialize the physical
+                * range to the logical range and set an empty remaining
+                * range then start to unwind.
                 */
                physical_rs->rs_start = logical_rs->rs_start;
                physical_rs->rs_end = logical_rs->rs_end;
+
+               remain_rs->rs_start = logical_rs->rs_start;
+               remain_rs->rs_end = logical_rs->rs_start;
+
                return;
        }
 
@@ -5015,16 +5132,40 @@ vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
 
        /*
         * As this recursive function unwinds, translate the logical
-        * range into its physical components by calling the
-        * vdev specific translate function.
+        * range into its physical and any remaining components by calling
+        * the vdev specific translate function.
         */
        range_seg64_t intermediate = { 0 };
-       pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate);
+       pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate, remain_rs);
 
        physical_rs->rs_start = intermediate.rs_start;
        physical_rs->rs_end = intermediate.rs_end;
 }
 
+void
+vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs,
+    vdev_xlate_func_t *func, void *arg)
+{
+       range_seg64_t iter_rs = *logical_rs;
+       range_seg64_t physical_rs;
+       range_seg64_t remain_rs;
+
+       while (!vdev_xlate_is_empty(&iter_rs)) {
+
+               vdev_xlate(vd, &iter_rs, &physical_rs, &remain_rs);
+
+               /*
+                * With raidz and dRAID, it's possible that the logical range
+                * does not live on this leaf vdev. Only when there is a non-
+                * zero physical size call the provided function.
+                */
+               if (!vdev_xlate_is_empty(&physical_rs))
+                       func(arg, &physical_rs);
+
+               iter_rs = remain_rs;
+       }
+}
+
 /*
  * Look at the vdev tree and determine whether any devices are currently being
  * replaced.