Reduce number of metaslab preload taskq threads.

author Alexander Motin <mav@FreeBSD.org>

Fri, 6 Oct 2023 16:04:00 +0000 (12:04 -0400)

committer GitHub <noreply@github.com>

Fri, 6 Oct 2023 16:04:00 +0000 (09:04 -0700)
author Alexander Motin <mav@FreeBSD.org>
Fri, 6 Oct 2023 16:04:00 +0000 (12:04 -0400)
committer GitHub <noreply@github.com>
Fri, 6 Oct 2023 16:04:00 +0000 (09:04 -0700)
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h

index d328068890ccf8fac5e585ccebabc520e6d0cab1..4f434291ddbfff3c150b8be99a6cb4f0e5c6e24f 100644 (file)
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -250,7 +250,6 @@ struct metaslab_group {
         int64_t                 mg_activation_count;
         metaslab_class_t        *mg_class;
         vdev_t                  *mg_vd;
-       taskq_t                 *mg_taskq;
         metaslab_group_t        *mg_prev;
         metaslab_group_t        *mg_next;
  
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h

index 1a04bedc3137e923e9ca3ea28d95a15095da87ef..094258d47a48d1054501f6a0d58a5da9a130ce36 100644 (file)
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -424,7 +424,9 @@ struct spa {
  
         hrtime_t        spa_ccw_fail_time;      /* Conf cache write fail time */
         taskq_t         *spa_zvol_taskq;        /* Taskq for minor management */
+       taskq_t         *spa_metaslab_taskq;    /* Taskq for metaslab preload */
         taskq_t         *spa_prefetch_taskq;    /* Taskq for prefetch threads */
+       taskq_t         *spa_upgrade_taskq;     /* Taskq for upgrade jobs */
         uint64_t        spa_multihost;          /* multihost aware (mmp) */
         mmp_thread_t    spa_mmp;                /* multihost mmp thread */
         list_t          spa_leaf_list;          /* list of leaf vdevs */
@@ -448,8 +450,6 @@ struct spa {
          */
         spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
         zfs_refcount_t  spa_refcount;           /* number of opens */
-
-       taskq_t         *spa_upgrade_taskq;     /* taskq for upgrade jobs */
  };
  
  extern char *spa_config_path;
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4

index 3843419731b8d55339bf4a7c9c11e784855dabf4..66e4f6a4b578b9e887fa3fdadaae44a1aa39bd13 100644 (file)
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -402,6 +402,12 @@ Practical upper limit of total metaslabs per top-level vdev.
  .It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
  Enable metaslab group preloading.
  .
+.It Sy metaslab_preload_limit Ns = Ns Sy 10 Pq uint
+Maximum number of metaslabs per group to preload
+.
+.It Sy metaslab_preload_pct Ns = Ns Sy 50 Pq uint
+Percentage of CPUs to run a metaslab preload taskq
+.
  .It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
  Give more weight to metaslabs with lower LBAs,
  assuming they have greater bandwidth,
diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c

index ba9a95e4a66d21fdaea74e73bdb8bde8d8154cf0..312d76c3e0235c3fa888a9b6fcc11b7e7b5b6bf3 100644 (file)
--- a/module/os/freebsd/zfs/sysctl_os.c
+++ b/module/os/freebsd/zfs/sysctl_os.c
@@ -614,28 +614,6 @@ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct,
         " space map to continue allocations in a first-fit fashion");
  /* END CSTYLED */
  
-/*
- * Percentage of all cpus that can be used by the metaslab taskq.
- */
-extern int metaslab_load_pct;
-
-/* BEGIN CSTYLED */
-SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct,
-       CTLFLAG_RWTUN, &metaslab_load_pct, 0,
-       "Percentage of cpus that can be used by the metaslab taskq");
-/* END CSTYLED */
-
-/*
- * Max number of metaslabs per group to preload.
- */
-extern uint_t metaslab_preload_limit;
-
-/* BEGIN CSTYLED */
-SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, preload_limit,
-       CTLFLAG_RWTUN, &metaslab_preload_limit, 0,
-       "Max number of metaslabs per group to preload");
-/* END CSTYLED */
-
  /* mmp.c */
  
  int
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c

index 8635403d6ad4794bc3ee1a90311e3af885301611..e0d4a6a635082ba9da72ad40c509891f81d49d74 100644 (file)
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -205,11 +205,6 @@ static const uint32_t metaslab_min_search_count = 100;
   */
  static int metaslab_df_use_largest_segment = B_FALSE;
  
-/*
- * Percentage of all cpus that can be used by the metaslab taskq.
- */
-int metaslab_load_pct = 50;
-
  /*
   * These tunables control how long a metaslab will remain loaded after the
   * last allocation from it.  A metaslab can't be unloaded until at least
@@ -854,9 +849,6 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
                 zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth);
         }
  
-       mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
-           maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
-
         return (mg);
  }
  
@@ -872,7 +864,6 @@ metaslab_group_destroy(metaslab_group_t *mg)
          */
         ASSERT(mg->mg_activation_count <= 0);
  
-       taskq_destroy(mg->mg_taskq);
         avl_destroy(&mg->mg_metaslab_tree);
         mutex_destroy(&mg->mg_lock);
         mutex_destroy(&mg->mg_ms_disabled_lock);
@@ -963,7 +954,7 @@ metaslab_group_passivate(metaslab_group_t *mg)
          * allocations from taking place and any changes to the vdev tree.
          */
         spa_config_exit(spa, locks & ~(SCL_ZIO - 1), spa);
-       taskq_wait_outstanding(mg->mg_taskq, 0);
+       taskq_wait_outstanding(spa->spa_metaslab_taskq, 0);
         spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER);
         metaslab_group_alloc_update(mg);
         for (int i = 0; i < mg->mg_allocators; i++) {
@@ -3571,10 +3562,8 @@ metaslab_group_preload(metaslab_group_t *mg)
         avl_tree_t *t = &mg->mg_metaslab_tree;
         int m = 0;
  
-       if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
-               taskq_wait_outstanding(mg->mg_taskq, 0);
+       if (spa_shutting_down(spa) || !metaslab_preload_enabled)
                 return;
-       }
  
         mutex_enter(&mg->mg_lock);
  
@@ -3594,8 +3583,9 @@ metaslab_group_preload(metaslab_group_t *mg)
                         continue;
                 }
  
-               VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
-                   msp, TQ_SLEEP) != TASKQID_INVALID);
+               VERIFY(taskq_dispatch(spa->spa_metaslab_taskq, metaslab_preload,
+                   msp, TQ_SLEEP | (m <= mg->mg_allocators ? TQ_FRONT : 0))
+                   != TASKQID_INVALID);
         }
         mutex_exit(&mg->mg_lock);
  }
@@ -6224,6 +6214,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_unload, INT, ZMOD_RW,
  ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_enabled, INT, ZMOD_RW,
         "Preload potential metaslabs during reassessment");
  
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_limit, UINT, ZMOD_RW,
+       "Max number of metaslabs per group to preload");
+
  ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, UINT, ZMOD_RW,
         "Delay in txgs after metaslab was last used before unloading");
  
diff --git a/module/zfs/spa.c b/module/zfs/spa.c

index cda62f939c1e4a64ebb05ffc39b5022bfd9bc81d..413150fd220f27b947c768caff968825a39cfc07 100644 (file)
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -169,6 +169,11 @@ static int spa_load_impl(spa_t *spa, spa_import_type_t type,
      const char **ereport);
  static void spa_vdev_resilver_done(spa_t *spa);
  
+/*
+ * Percentage of all CPUs that can be used by the metaslab preload taskq.
+ */
+static uint_t metaslab_preload_pct = 50;
+
  static uint_t  zio_taskq_batch_pct = 80;         /* 1 thread per cpu in pset */
  static uint_t  zio_taskq_batch_tpq;              /* threads per taskq */
  static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
@@ -1399,6 +1404,13 @@ spa_activate(spa_t *spa, spa_mode_t mode)
         spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
             1, INT_MAX, 0);
  
+       /*
+        * The taskq to preload metaslabs.
+        */
+       spa->spa_metaslab_taskq = taskq_create("z_metaslab",
+           metaslab_preload_pct, maxclsyspri, 1, INT_MAX,
+           TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
+
         /*
          * Taskq dedicated to prefetcher threads: this is used to prevent the
          * pool traverse code from monopolizing the global (and limited)
@@ -1434,6 +1446,11 @@ spa_deactivate(spa_t *spa)
                 spa->spa_zvol_taskq = NULL;
         }
  
+       if (spa->spa_metaslab_taskq) {
+               taskq_destroy(spa->spa_metaslab_taskq);
+               spa->spa_metaslab_taskq = NULL;
+       }
+
         if (spa->spa_prefetch_taskq) {
                 taskq_destroy(spa->spa_prefetch_taskq);
                 spa->spa_prefetch_taskq = NULL;
@@ -1706,13 +1723,7 @@ spa_unload(spa_t *spa)
          * This ensures that there is no async metaslab prefetching
          * while we attempt to unload the spa.
          */
-       if (spa->spa_root_vdev != NULL) {
-               for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
-                       vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
-                       if (vc->vdev_mg != NULL)
-                               taskq_wait(vc->vdev_mg->mg_taskq);
-               }
-       }
+       taskq_wait(spa->spa_metaslab_taskq);
  
         if (spa->spa_mmp.mmp_thread)
                 mmp_thread_stop(spa);
@@ -10134,6 +10145,9 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
  /* asynchronous event notification */
  EXPORT_SYMBOL(spa_event_notify);
  
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_pct, UINT, ZMOD_RW,
+       "Percentage of CPUs to run a metaslab preload taskq");
+
  /* BEGIN CSTYLED */
  ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW,
         "log2 fraction of arc that can be used by inflight I/Os when "
author	Alexander Motin <mav@FreeBSD.org>
	Fri, 6 Oct 2023 16:04:00 +0000 (12:04 -0400)
committer	GitHub <noreply@github.com>
	Fri, 6 Oct 2023 16:04:00 +0000 (09:04 -0700)
include/sys/metaslab_impl.h		patch \| blob \| blame \| history
include/sys/spa_impl.h		patch \| blob \| blame \| history
man/man4/zfs.4		patch \| blob \| blame \| history
module/os/freebsd/zfs/sysctl_os.c		patch \| blob \| blame \| history
module/zfs/metaslab.c		patch \| blob \| blame \| history
module/zfs/spa.c		patch \| blob \| blame \| history