Parallelize vdev_load

author Alan Somers <asomers@FreeBSD.org>

Tue, 12 Jan 2021 00:00:19 +0000 (17:00 -0700)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Wed, 27 Jan 2021 03:35:59 +0000 (19:35 -0800)
author Alan Somers <asomers@FreeBSD.org>
Tue, 12 Jan 2021 00:00:19 +0000 (17:00 -0700)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Wed, 27 Jan 2021 03:35:59 +0000 (19:35 -0800)
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h

index 1239451bf42827aab4d84e1297e92b52b7395d93..c509d390fc76ad807e8b31b71637f6fbff7546ae 100644 (file)
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -269,6 +269,7 @@ struct vdev {
         boolean_t       vdev_expanding; /* expand the vdev?             */
         boolean_t       vdev_reopening; /* reopen in progress?          */
         boolean_t       vdev_nonrot;    /* true if solid state          */
+       int             vdev_load_error; /* error on last load          */
         int             vdev_open_error; /* error on last open          */
         kthread_t       *vdev_open_thread; /* thread opening children   */
         uint64_t        vdev_crtxg;     /* txg when top-level was added */
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c

index fdc6922b001149da4e3b7483bfb639de66524d21..bc4f007b61a1dd0358c21e25e43e42b2ec22a193 100644 (file)
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -522,6 +522,7 @@ metaslab_class_histogram_verify(metaslab_class_t *mc)
         mc_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE,
             KM_SLEEP);
  
+       mutex_enter(&mc->mc_lock);
         for (int c = 0; c < rvd->vdev_children; c++) {
                 vdev_t *tvd = rvd->vdev_child[c];
                 metaslab_group_t *mg = vdev_get_mg(tvd, mc);
@@ -546,6 +547,7 @@ metaslab_class_histogram_verify(metaslab_class_t *mc)
                 VERIFY3U(mc_hist[i], ==, mc->mc_histogram[i]);
         }
  
+       mutex_exit(&mc->mc_lock);
         kmem_free(mc_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE);
  }
  
@@ -1067,6 +1069,7 @@ metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp)
                 return;
  
         mutex_enter(&mg->mg_lock);
+       mutex_enter(&mc->mc_lock);
         for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
                 IMPLY(mg == mg->mg_vd->vdev_log_mg,
                     mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
@@ -1075,6 +1078,7 @@ metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp)
                 mc->mc_histogram[i + ashift] +=
                     msp->ms_sm->sm_phys->smp_histogram[i];
         }
+       mutex_exit(&mc->mc_lock);
         mutex_exit(&mg->mg_lock);
  }
  
@@ -1089,6 +1093,7 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
                 return;
  
         mutex_enter(&mg->mg_lock);
+       mutex_enter(&mc->mc_lock);
         for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
                 ASSERT3U(mg->mg_histogram[i + ashift], >=,
                     msp->ms_sm->sm_phys->smp_histogram[i]);
@@ -1102,6 +1107,7 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
                 mc->mc_histogram[i + ashift] -=
                     msp->ms_sm->sm_phys->smp_histogram[i];
         }
+       mutex_exit(&mc->mc_lock);
         mutex_exit(&mg->mg_lock);
  }
  
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c

index f305da6f56e7513829cf5534de262d8db1023138..018e48c3867fd754fa6af72afa9b06d0493398c1 100644 (file)
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -1724,6 +1724,14 @@ vdev_probe(vdev_t *vd, zio_t *zio)
         return (NULL);
  }
  
+static void
+vdev_load_child(void *arg)
+{
+       vdev_t *vd = arg;
+
+       vd->vdev_load_error = vdev_load(vd);
+}
+
  static void
  vdev_open_child(void *arg)
  {
@@ -3350,18 +3358,46 @@ vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj)
  int
  vdev_load(vdev_t *vd)
  {
+       int children = vd->vdev_children;
         int error = 0;
+       taskq_t *tq = NULL;
+
+       /*
+        * It's only worthwhile to use the taskq for the root vdev, because the
+        * slow part is metaslab_init, and that only happens for top-level
+        * vdevs.
+        */
+       if (vd->vdev_ops == &vdev_root_ops && vd->vdev_children > 0) {
+               tq = taskq_create("vdev_load", children, minclsyspri,
+                   children, children, TASKQ_PREPOPULATE);
+       }
  
         /*
          * Recursively load all children.
          */
         for (int c = 0; c < vd->vdev_children; c++) {
-               error = vdev_load(vd->vdev_child[c]);
-               if (error != 0) {
-                       return (error);
+               vdev_t *cvd = vd->vdev_child[c];
+
+               if (tq == NULL || vdev_uses_zvols(cvd)) {
+                       cvd->vdev_load_error = vdev_load(cvd);
+               } else {
+                       VERIFY(taskq_dispatch(tq, vdev_load_child,
+                           cvd, TQ_SLEEP) != TASKQID_INVALID);
                 }
         }
  
+       if (tq != NULL) {
+               taskq_wait(tq);
+               taskq_destroy(tq);
+       }
+
+       for (int c = 0; c < vd->vdev_children; c++) {
+               int error = vd->vdev_child[c]->vdev_load_error;
+
+               if (error != 0)
+                       return (error);
+       }
+
         vdev_set_deflate_ratio(vd);
  
         /*
author	Alan Somers <asomers@FreeBSD.org>
	Tue, 12 Jan 2021 00:00:19 +0000 (17:00 -0700)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Wed, 27 Jan 2021 03:35:59 +0000 (19:35 -0800)
include/sys/vdev_impl.h		patch \| blob \| blame \| history
module/zfs/metaslab.c		patch \| blob \| blame \| history
module/zfs/vdev.c		patch \| blob \| blame \| history