Fix short-lived txg caused by autotrim

author Kevin Jin <33590050+jxdking@users.noreply.github.com>

Tue, 28 Mar 2023 15:43:41 +0000 (11:43 -0400)

committer GitHub <noreply@github.com>

Tue, 28 Mar 2023 15:43:41 +0000 (08:43 -0700)
author Kevin Jin <33590050+jxdking@users.noreply.github.com>
Tue, 28 Mar 2023 15:43:41 +0000 (11:43 -0400)
committer GitHub <noreply@github.com>
Tue, 28 Mar 2023 15:43:41 +0000 (08:43 -0700)
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h

index 73c0206efa2ea8d702079caae2c16cbed5a1f1fc..7cfffe3b4eeddacf8bb25ad7a015fa9e7cf3cb23 100644 (file)
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -329,6 +329,7 @@ struct vdev {
         list_node_t     vdev_trim_node;
         kmutex_t        vdev_autotrim_lock;
         kcondvar_t      vdev_autotrim_cv;
+       kcondvar_t      vdev_autotrim_kick_cv;
         kthread_t       *vdev_autotrim_thread;
         /* Protects vdev_trim_thread and vdev_trim_state. */
         kmutex_t        vdev_trim_lock;
diff --git a/include/sys/vdev_trim.h b/include/sys/vdev_trim.h

index 2a217f0d43ce8fe4d8c9fd6c0285eec126346d6b..7a94d4af098f1be055ebe58c0b66d1222cf1c83a 100644 (file)
--- a/include/sys/vdev_trim.h
+++ b/include/sys/vdev_trim.h
@@ -41,6 +41,7 @@ extern void vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state);
  extern void vdev_trim_stop_wait(spa_t *spa, list_t *vd_list);
  extern void vdev_trim_restart(vdev_t *vd);
  extern void vdev_autotrim(spa_t *spa);
+extern void vdev_autotrim_kick(spa_t *spa);
  extern void vdev_autotrim_stop_all(spa_t *spa);
  extern void vdev_autotrim_stop_wait(vdev_t *vd);
  extern void vdev_autotrim_restart(spa_t *spa);
diff --git a/module/zfs/spa.c b/module/zfs/spa.c

index 0573daa92f9d2f355a03e8adf13eb3c51620d2f6..dc202978c0f6629b62777985dc48f6a18ad491b1 100644 (file)
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -9449,6 +9449,9 @@ spa_sync(spa_t *spa, uint64_t txg)
  
         spa_update_dspace(spa);
  
+       if (spa_get_autotrim(spa) == SPA_AUTOTRIM_ON)
+               vdev_autotrim_kick(spa);
+
         /*
          * It had better be the case that we didn't dirty anything
          * since vdev_config_sync().
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c

index 1af5baeecd2db258c25c5b6dc4b9481c9311e5c5..ad932a7ba764342a688d442451571e92e15329bc 100644 (file)
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -696,6 +696,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
         mutex_init(&vd->vdev_trim_io_lock, NULL, MUTEX_DEFAULT, NULL);
         cv_init(&vd->vdev_trim_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&vd->vdev_autotrim_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&vd->vdev_autotrim_kick_cv, NULL, CV_DEFAULT, NULL);
         cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
  
         mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -1148,6 +1149,7 @@ vdev_free(vdev_t *vd)
         mutex_destroy(&vd->vdev_trim_io_lock);
         cv_destroy(&vd->vdev_trim_cv);
         cv_destroy(&vd->vdev_autotrim_cv);
+       cv_destroy(&vd->vdev_autotrim_kick_cv);
         cv_destroy(&vd->vdev_trim_io_cv);
  
         mutex_destroy(&vd->vdev_rebuild_lock);
diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c

index 5b5076c8722c9a5d21a6fe3fbdfedf1799f224b5..0d71b94343420bdbabc35697b4a1dca089e7e143 100644 (file)
--- a/module/zfs/vdev_trim.c
+++ b/module/zfs/vdev_trim.c
@@ -182,6 +182,25 @@ vdev_autotrim_should_stop(vdev_t *tvd)
             spa_get_autotrim(tvd->vdev_spa) == SPA_AUTOTRIM_OFF);
  }
  
+/*
+ * Wait for given number of kicks, return true if the wait is aborted due to
+ * vdev_autotrim_exit_wanted.
+ */
+static boolean_t
+vdev_autotrim_wait_kick(vdev_t *vd, int num_of_kick)
+{
+       mutex_enter(&vd->vdev_autotrim_lock);
+       for (int i = 0; i < num_of_kick; i++) {
+               if (vd->vdev_autotrim_exit_wanted)
+                       break;
+               cv_wait(&vd->vdev_autotrim_kick_cv, &vd->vdev_autotrim_lock);
+       }
+       boolean_t exit_wanted = vd->vdev_autotrim_exit_wanted;
+       mutex_exit(&vd->vdev_autotrim_lock);
+
+       return (exit_wanted);
+}
+
  /*
   * The sync task for updating the on-disk state of a manual TRIM.  This
   * is scheduled by vdev_trim_change_state().
@@ -1190,7 +1209,6 @@ vdev_autotrim_thread(void *arg)
  
         while (!vdev_autotrim_should_stop(vd)) {
                 int txgs_per_trim = MAX(zfs_trim_txg_batch, 1);
-               boolean_t issued_trim = B_FALSE;
                 uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
                 uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
  
@@ -1224,6 +1242,8 @@ vdev_autotrim_thread(void *arg)
                     i += txgs_per_trim) {
                         metaslab_t *msp = vd->vdev_ms[i];
                         range_tree_t *trim_tree;
+                       boolean_t issued_trim = B_FALSE;
+                       boolean_t wait_aborted = B_FALSE;
  
                         spa_config_exit(spa, SCL_CONFIG, FTAG);
                         metaslab_disable(msp);
@@ -1374,7 +1394,18 @@ vdev_autotrim_thread(void *arg)
                         range_tree_vacate(trim_tree, NULL, NULL);
                         range_tree_destroy(trim_tree);
  
-                       metaslab_enable(msp, issued_trim, B_FALSE);
+                       /*
+                        * Wait for couples of kicks, to ensure the trim io is
+                        * synced. If the wait is aborted due to
+                        * vdev_autotrim_exit_wanted, we need to signal
+                        * metaslab_enable() to wait for sync.
+                        */
+                       if (issued_trim) {
+                               wait_aborted = vdev_autotrim_wait_kick(vd,
+                                   TXG_CONCURRENT_STATES + TXG_DEFER_SIZE);
+                       }
+
+                       metaslab_enable(msp, wait_aborted, B_FALSE);
                         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
  
                         for (uint64_t c = 0; c < children; c++) {
@@ -1388,17 +1419,14 @@ vdev_autotrim_thread(void *arg)
                         }
  
                         kmem_free(tap, sizeof (trim_args_t) * children);
+
+                       if (vdev_autotrim_should_stop(vd))
+                               break;
                 }
  
                 spa_config_exit(spa, SCL_CONFIG, FTAG);
  
-               /*
-                * After completing the group of metaslabs wait for the next
-                * open txg.  This is done to make sure that a minimum of
-                * zfs_trim_txg_batch txgs will occur before these metaslabs
-                * are trimmed again.
-                */
-               txg_wait_open(spa_get_dsl(spa), 0, issued_trim);
+               vdev_autotrim_wait_kick(vd, 1);
  
                 shift++;
                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
@@ -1476,11 +1504,9 @@ vdev_autotrim_stop_wait(vdev_t *tvd)
         mutex_enter(&tvd->vdev_autotrim_lock);
         if (tvd->vdev_autotrim_thread != NULL) {
                 tvd->vdev_autotrim_exit_wanted = B_TRUE;
-
-               while (tvd->vdev_autotrim_thread != NULL) {
-                       cv_wait(&tvd->vdev_autotrim_cv,
-                           &tvd->vdev_autotrim_lock);
-               }
+               cv_broadcast(&tvd->vdev_autotrim_kick_cv);
+               cv_wait(&tvd->vdev_autotrim_cv,
+                   &tvd->vdev_autotrim_lock);
  
                 ASSERT3P(tvd->vdev_autotrim_thread, ==, NULL);
                 tvd->vdev_autotrim_exit_wanted = B_FALSE;
@@ -1488,6 +1514,24 @@ vdev_autotrim_stop_wait(vdev_t *tvd)
         mutex_exit(&tvd->vdev_autotrim_lock);
  }
  
+void
+vdev_autotrim_kick(spa_t *spa)
+{
+       ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
+
+       vdev_t *root_vd = spa->spa_root_vdev;
+       vdev_t *tvd;
+
+       for (uint64_t i = 0; i < root_vd->vdev_children; i++) {
+               tvd = root_vd->vdev_child[i];
+
+               mutex_enter(&tvd->vdev_autotrim_lock);
+               if (tvd->vdev_autotrim_thread != NULL)
+                       cv_broadcast(&tvd->vdev_autotrim_kick_cv);
+               mutex_exit(&tvd->vdev_autotrim_lock);
+       }
+}
+
  /*
   * Wait for all of the vdev_autotrim_thread associated with the pool to
   * be terminated (canceled or stopped).
author	Kevin Jin <33590050+jxdking@users.noreply.github.com>
	Tue, 28 Mar 2023 15:43:41 +0000 (11:43 -0400)
committer	GitHub <noreply@github.com>
	Tue, 28 Mar 2023 15:43:41 +0000 (08:43 -0700)
include/sys/vdev_impl.h		patch \| blob \| blame \| history
include/sys/vdev_trim.h		patch \| blob \| blame \| history
module/zfs/spa.c		patch \| blob \| blame \| history
module/zfs/vdev.c		patch \| blob \| blame \| history
module/zfs/vdev_trim.c		patch \| blob \| blame \| history