]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/vdev_trim.c
Remove bcopy(), bzero(), bcmp()
[mirror_zfs.git] / module / zfs / vdev_trim.c
index c7c429cbd5e31aa33137a07586cee55deec6bb9e..43027f1368982af1fffd236c00561f2c6494e88c 100644 (file)
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2016 by Delphix. All rights reserved.
  * Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
  */
 
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_trim.h>
-#include <sys/refcount.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_synctask.h>
 #include <sys/zap.h>
 #include <sys/dmu_tx.h>
+#include <sys/arc_impl.h>
 
 /*
  * TRIM is a feature which is used to notify a SSD that some previously
 /*
  * Maximum size of TRIM I/O, ranges will be chunked in to 128MiB lengths.
  */
-unsigned int zfs_trim_extent_bytes_max = 128 * 1024 * 1024;
+static unsigned int zfs_trim_extent_bytes_max = 128 * 1024 * 1024;
 
 /*
  * Minimum size of TRIM I/O, extents smaller than 32Kib will be skipped.
  */
-unsigned int zfs_trim_extent_bytes_min = 32 * 1024;
+static unsigned int zfs_trim_extent_bytes_min = 32 * 1024;
 
 /*
  * Skip uninitialized metaslabs during the TRIM process.  This option is
@@ -117,7 +118,7 @@ unsigned int zfs_trim_metaslab_skip = 0;
  * concurrent TRIM I/Os issued to the device is controlled by the
  * zfs_vdev_trim_min_active and zfs_vdev_trim_max_active module options.
  */
-unsigned int zfs_trim_queue_limit = 10;
+static unsigned int zfs_trim_queue_limit = 10;
 
 /*
  * The minimum number of transaction groups between automatic trims of a
@@ -133,7 +134,7 @@ unsigned int zfs_trim_queue_limit = 10;
  * has the opposite effect.  The default value of 32 was determined though
  * testing to be a reasonable compromise.
  */
-unsigned int zfs_trim_txg_batch = 32;
+static unsigned int zfs_trim_txg_batch = 32;
 
 /*
  * The trim_args are a control structure which describe how a leaf vdev
@@ -311,13 +312,14 @@ vdev_trim_change_state(vdev_t *vd, vdev_trim_state_t new_state,
                        vd->vdev_trim_secure = secure;
        }
 
-       boolean_t resumed = !!(vd->vdev_trim_state == VDEV_TRIM_SUSPENDED);
+       vdev_trim_state_t old_state = vd->vdev_trim_state;
+       boolean_t resumed = (old_state == VDEV_TRIM_SUSPENDED);
        vd->vdev_trim_state = new_state;
 
        dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
        VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
        dsl_sync_task_nowait(spa_get_dsl(spa), vdev_trim_zap_update_sync,
-           guid, 2, ZFS_SPACE_CHECK_NONE, tx);
+           guid, tx);
 
        switch (new_state) {
        case VDEV_TRIM_ACTIVE:
@@ -332,9 +334,12 @@ vdev_trim_change_state(vdev_t *vd, vdev_trim_state_t new_state,
                    "vdev=%s suspended", vd->vdev_path);
                break;
        case VDEV_TRIM_CANCELED:
-               spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_CANCEL);
-               spa_history_log_internal(spa, "trim", tx,
-                   "vdev=%s canceled", vd->vdev_path);
+               if (old_state == VDEV_TRIM_ACTIVE ||
+                   old_state == VDEV_TRIM_SUSPENDED) {
+                       spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_CANCEL);
+                       spa_history_log_internal(spa, "trim", tx,
+                           "vdev=%s canceled", vd->vdev_path);
+               }
                break;
        case VDEV_TRIM_COMPLETE:
                spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_FINISH);
@@ -346,6 +351,9 @@ vdev_trim_change_state(vdev_t *vd, vdev_trim_state_t new_state,
        }
 
        dmu_tx_commit(tx);
+
+       if (new_state != VDEV_TRIM_ACTIVE)
+               spa_notify_waiters(spa);
 }
 
 /*
@@ -419,6 +427,35 @@ vdev_autotrim_cb(zio_t *zio)
        spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
 }
 
+/*
+ * The zio_done_func_t done callback for each TRIM issued via
+ * vdev_trim_simple(). It is responsible for updating the TRIM stats and
+ * limiting the number of in flight TRIM I/Os.  Simple TRIM I/Os are best
+ * effort and are never reissued on failure.
+ */
+static void
+vdev_trim_simple_cb(zio_t *zio)
+{
+       vdev_t *vd = zio->io_vd;
+
+       mutex_enter(&vd->vdev_trim_io_lock);
+
+       if (zio->io_error != 0) {
+               vd->vdev_stat.vs_trim_errors++;
+               spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+                   0, 0, 0, 0, 1, zio->io_orig_size);
+       } else {
+               spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+                   1, zio->io_orig_size, 0, 0, 0, 0);
+       }
+
+       ASSERT3U(vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE], >, 0);
+       vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE]--;
+       cv_broadcast(&vd->vdev_trim_io_cv);
+       mutex_exit(&vd->vdev_trim_io_lock);
+
+       spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+}
 /*
  * Returns the average trim rate in bytes/sec for the ta->trim_vdev.
  */
@@ -438,6 +475,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
 {
        vdev_t *vd = ta->trim_vdev;
        spa_t *spa = vd->vdev_spa;
+       void *cb;
 
        mutex_enter(&vd->vdev_trim_io_lock);
 
@@ -448,7 +486,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
        if (ta->trim_type == TRIM_TYPE_MANUAL) {
                while (vd->vdev_trim_rate != 0 && !vdev_trim_should_stop(vd) &&
                    vdev_trim_calculate_rate(ta) > vd->vdev_trim_rate) {
-                       cv_timedwait_sig(&vd->vdev_trim_io_cv,
+                       cv_timedwait_idle(&vd->vdev_trim_io_cv,
                            &vd->vdev_trim_io_lock, ddi_get_lbolt() +
                            MSEC_TO_TICK(10));
                }
@@ -456,8 +494,8 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
        ta->trim_bytes_done += size;
 
        /* Limit in flight trimming I/Os */
-       while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] >=
-           zfs_trim_queue_limit) {
+       while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] +
+           vd->vdev_trim_inflight[2] >= zfs_trim_queue_limit) {
                cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
        }
        vd->vdev_trim_inflight[ta->trim_type]++;
@@ -477,8 +515,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
 
                /* This is the first write of this txg. */
                dsl_sync_task_nowait(spa_get_dsl(spa),
-                   vdev_trim_zap_update_sync, guid, 2,
-                   ZFS_SPACE_CHECK_RESERVED, tx);
+                   vdev_trim_zap_update_sync, guid, tx);
        }
 
        /*
@@ -502,10 +539,17 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
        if (ta->trim_type == TRIM_TYPE_MANUAL)
                vd->vdev_trim_offset[txg & TXG_MASK] = start + size;
 
+       if (ta->trim_type == TRIM_TYPE_MANUAL) {
+               cb = vdev_trim_cb;
+       } else if (ta->trim_type == TRIM_TYPE_AUTO) {
+               cb = vdev_autotrim_cb;
+       } else {
+               cb = vdev_trim_simple_cb;
+       }
+
        zio_nowait(zio_trim(spa->spa_txg_zio[txg & TXG_MASK], vd,
-           start, size, ta->trim_type == TRIM_TYPE_MANUAL ?
-           vdev_trim_cb : vdev_autotrim_cb, NULL,
-           ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL, ta->trim_flags));
+           start, size, cb, NULL, ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL,
+           ta->trim_flags));
        /* vdev_trim_cb and vdev_autotrim_cb release SCL_STATE_ALL */
 
        dmu_tx_commit(tx);
@@ -562,6 +606,32 @@ vdev_trim_ranges(trim_args_t *ta)
        return (0);
 }
 
+static void
+vdev_trim_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs)
+{
+       uint64_t *last_rs_end = (uint64_t *)arg;
+
+       if (physical_rs->rs_end > *last_rs_end)
+               *last_rs_end = physical_rs->rs_end;
+}
+
+static void
+vdev_trim_xlate_progress(void *arg, range_seg64_t *physical_rs)
+{
+       vdev_t *vd = (vdev_t *)arg;
+
+       uint64_t size = physical_rs->rs_end - physical_rs->rs_start;
+       vd->vdev_trim_bytes_est += size;
+
+       if (vd->vdev_trim_last_offset >= physical_rs->rs_end) {
+               vd->vdev_trim_bytes_done += size;
+       } else if (vd->vdev_trim_last_offset > physical_rs->rs_start &&
+           vd->vdev_trim_last_offset <= physical_rs->rs_end) {
+               vd->vdev_trim_bytes_done +=
+                   vd->vdev_trim_last_offset - physical_rs->rs_start;
+       }
+}
+
 /*
  * Calculates the completion percentage of a manual TRIM.
  */
@@ -579,27 +649,35 @@ vdev_trim_calculate_progress(vdev_t *vd)
                metaslab_t *msp = vd->vdev_top->vdev_ms[i];
                mutex_enter(&msp->ms_lock);
 
-               uint64_t ms_free = msp->ms_size -
-                   metaslab_allocated_space(msp);
-
-               if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
-                       ms_free /= vd->vdev_top->vdev_children;
+               uint64_t ms_free = (msp->ms_size -
+                   metaslab_allocated_space(msp)) /
+                   vdev_get_ndisks(vd->vdev_top);
 
                /*
                 * Convert the metaslab range to a physical range
                 * on our vdev. We use this to determine if we are
                 * in the middle of this metaslab range.
                 */
-               range_seg64_t logical_rs, physical_rs;
+               range_seg64_t logical_rs, physical_rs, remain_rs;
                logical_rs.rs_start = msp->ms_start;
                logical_rs.rs_end = msp->ms_start + msp->ms_size;
-               vdev_xlate(vd, &logical_rs, &physical_rs);
 
+               /* Metaslab space after this offset has not been trimmed. */
+               vdev_xlate(vd, &logical_rs, &physical_rs, &remain_rs);
                if (vd->vdev_trim_last_offset <= physical_rs.rs_start) {
                        vd->vdev_trim_bytes_est += ms_free;
                        mutex_exit(&msp->ms_lock);
                        continue;
-               } else if (vd->vdev_trim_last_offset > physical_rs.rs_end) {
+               }
+
+               /* Metaslab space before this offset has been trimmed */
+               uint64_t last_rs_end = physical_rs.rs_end;
+               if (!vdev_xlate_is_empty(&remain_rs)) {
+                       vdev_xlate_walk(vd, &remain_rs,
+                           vdev_trim_xlate_last_rs_end, &last_rs_end);
+               }
+
+               if (vd->vdev_trim_last_offset > last_rs_end) {
                        vd->vdev_trim_bytes_done += ms_free;
                        vd->vdev_trim_bytes_est += ms_free;
                        mutex_exit(&msp->ms_lock);
@@ -620,21 +698,9 @@ vdev_trim_calculate_progress(vdev_t *vd)
                    rs != NULL; rs = zfs_btree_next(bt, &idx, &idx)) {
                        logical_rs.rs_start = rs_get_start(rs, rt);
                        logical_rs.rs_end = rs_get_end(rs, rt);
-                       vdev_xlate(vd, &logical_rs, &physical_rs);
-
-                       uint64_t size = physical_rs.rs_end -
-                           physical_rs.rs_start;
-                       vd->vdev_trim_bytes_est += size;
-                       if (vd->vdev_trim_last_offset >= physical_rs.rs_end) {
-                               vd->vdev_trim_bytes_done += size;
-                       } else if (vd->vdev_trim_last_offset >
-                           physical_rs.rs_start &&
-                           vd->vdev_trim_last_offset <=
-                           physical_rs.rs_end) {
-                               vd->vdev_trim_bytes_done +=
-                                   vd->vdev_trim_last_offset -
-                                   physical_rs.rs_start;
-                       }
+
+                       vdev_xlate_walk(vd, &logical_rs,
+                           vdev_trim_xlate_progress, vd);
                }
                mutex_exit(&msp->ms_lock);
        }
@@ -702,8 +768,38 @@ vdev_trim_load(vdev_t *vd)
        return (err);
 }
 
+static void
+vdev_trim_xlate_range_add(void *arg, range_seg64_t *physical_rs)
+{
+       trim_args_t *ta = arg;
+       vdev_t *vd = ta->trim_vdev;
+
+       /*
+        * Only a manual trim will be traversing the vdev sequentially.
+        * For an auto trim all valid ranges should be added.
+        */
+       if (ta->trim_type == TRIM_TYPE_MANUAL) {
+
+               /* Only add segments that we have not visited yet */
+               if (physical_rs->rs_end <= vd->vdev_trim_last_offset)
+                       return;
+
+               /* Pick up where we left off mid-range. */
+               if (vd->vdev_trim_last_offset > physical_rs->rs_start) {
+                       ASSERT3U(physical_rs->rs_end, >,
+                           vd->vdev_trim_last_offset);
+                       physical_rs->rs_start = vd->vdev_trim_last_offset;
+               }
+       }
+
+       ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start);
+
+       range_tree_add(ta->trim_tree, physical_rs->rs_start,
+           physical_rs->rs_end - physical_rs->rs_start);
+}
+
 /*
- * Convert the logical range into a physical range and add it to the
+ * Convert the logical range into physical ranges and add them to the
  * range tree passed in the trim_args_t.
  */
 static void
@@ -711,7 +807,7 @@ vdev_trim_range_add(void *arg, uint64_t start, uint64_t size)
 {
        trim_args_t *ta = arg;
        vdev_t *vd = ta->trim_vdev;
-       range_seg64_t logical_rs, physical_rs;
+       range_seg64_t logical_rs;
        logical_rs.rs_start = start;
        logical_rs.rs_end = start + size;
 
@@ -728,44 +824,7 @@ vdev_trim_range_add(void *arg, uint64_t start, uint64_t size)
        }
 
        ASSERT(vd->vdev_ops->vdev_op_leaf);
-       vdev_xlate(vd, &logical_rs, &physical_rs);
-
-       IMPLY(vd->vdev_top == vd,
-           logical_rs.rs_start == physical_rs.rs_start);
-       IMPLY(vd->vdev_top == vd,
-           logical_rs.rs_end == physical_rs.rs_end);
-
-       /*
-        * Only a manual trim will be traversing the vdev sequentially.
-        * For an auto trim all valid ranges should be added.
-        */
-       if (ta->trim_type == TRIM_TYPE_MANUAL) {
-
-               /* Only add segments that we have not visited yet */
-               if (physical_rs.rs_end <= vd->vdev_trim_last_offset)
-                       return;
-
-               /* Pick up where we left off mid-range. */
-               if (vd->vdev_trim_last_offset > physical_rs.rs_start) {
-                       ASSERT3U(physical_rs.rs_end, >,
-                           vd->vdev_trim_last_offset);
-                       physical_rs.rs_start = vd->vdev_trim_last_offset;
-               }
-       }
-
-       ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
-
-       /*
-        * With raidz, it's possible that the logical range does not live on
-        * this leaf vdev. We only add the physical range to this vdev's if it
-        * has a length greater than 0.
-        */
-       if (physical_rs.rs_end > physical_rs.rs_start) {
-               range_tree_add(ta->trim_tree, physical_rs.rs_start,
-                   physical_rs.rs_end - physical_rs.rs_start);
-       } else {
-               ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
-       }
+       vdev_xlate_walk(vd, &logical_rs, vdev_trim_xlate_range_add, arg);
 }
 
 /*
@@ -775,7 +834,7 @@ vdev_trim_range_add(void *arg, uint64_t start, uint64_t size)
  * by its ms_allocatable.  While a metaslab is undergoing trimming it is
  * not eligible for new allocations.
  */
-static void
+static _Noreturn void
 vdev_trim_thread(void *arg)
 {
        vdev_t *vd = arg;
@@ -872,10 +931,16 @@ vdev_trim_thread(void *arg)
        range_tree_destroy(ta.trim_tree);
 
        mutex_enter(&vd->vdev_trim_lock);
-       if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
-               vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
-                   vd->vdev_trim_rate, vd->vdev_trim_partial,
-                   vd->vdev_trim_secure);
+       if (!vd->vdev_trim_exit_wanted) {
+               if (vdev_writeable(vd)) {
+                       vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+                           vd->vdev_trim_rate, vd->vdev_trim_partial,
+                           vd->vdev_trim_secure);
+               } else if (vd->vdev_faulted) {
+                       vdev_trim_change_state(vd, VDEV_TRIM_CANCELED,
+                           vd->vdev_trim_rate, vd->vdev_trim_partial,
+                           vd->vdev_trim_secure);
+               }
        }
        ASSERT(vd->vdev_trim_thread != NULL || vd->vdev_trim_inflight[0] == 0);
 
@@ -893,6 +958,8 @@ vdev_trim_thread(void *arg)
        vd->vdev_trim_thread = NULL;
        cv_broadcast(&vd->vdev_trim_cv);
        mutex_exit(&vd->vdev_trim_lock);
+
+       thread_exit();
 }
 
 /*
@@ -936,6 +1003,7 @@ vdev_trim_stop_wait_impl(vdev_t *vd)
 void
 vdev_trim_stop_wait(spa_t *spa, list_t *vd_list)
 {
+       (void) spa;
        vdev_t *vd;
 
        ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -1011,6 +1079,7 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
 {
        spa_t *spa = vd->vdev_spa;
        list_t vd_list;
+       vdev_t *vd_l2cache;
 
        ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
@@ -1018,6 +1087,17 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
            offsetof(vdev_t, vdev_trim_node));
 
        vdev_trim_stop_all_impl(vd, tgt_state, &vd_list);
+
+       /*
+        * Iterate over cache devices and request stop trimming the
+        * whole device in case we export the pool or remove the cache
+        * device prematurely.
+        */
+       for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+               vd_l2cache = spa->spa_l2cache.sav_vdevs[i];
+               vdev_trim_stop_all_impl(vd_l2cache, tgt_state, &vd_list);
+       }
+
        vdev_trim_stop_wait(spa, &vd_list);
 
        if (vd->vdev_spa->spa_sync_on) {
@@ -1051,7 +1131,7 @@ vdev_trim_restart(vdev_t *vd)
                    vd->vdev_leaf_zap, VDEV_LEAF_ZAP_TRIM_ACTION_TIME,
                    sizeof (timestamp), 1, &timestamp);
                ASSERT(err == 0 || err == ENOENT);
-               vd->vdev_trim_action_time = (time_t)timestamp;
+               vd->vdev_trim_action_time = timestamp;
 
                if (vd->vdev_trim_state == VDEV_TRIM_SUSPENDED ||
                    vd->vdev_offline) {
@@ -1095,7 +1175,7 @@ vdev_trim_range_verify(void *arg, uint64_t start, uint64_t size)
  * N.B. This behavior is different from a manual TRIM where a thread
  * is created for each leaf vdev, instead of each top-level vdev.
  */
-static void
+static _Noreturn void
 vdev_autotrim_thread(void *arg)
 {
        vdev_t *vd = arg;
@@ -1357,6 +1437,8 @@ vdev_autotrim_thread(void *arg)
        vd->vdev_autotrim_thread = NULL;
        cv_broadcast(&vd->vdev_autotrim_cv);
        mutex_exit(&vd->vdev_autotrim_lock);
+
+       thread_exit();
 }
 
 /*
@@ -1432,6 +1514,187 @@ vdev_autotrim_restart(spa_t *spa)
                vdev_autotrim(spa);
 }
 
+static _Noreturn void
+vdev_trim_l2arc_thread(void *arg)
+{
+       vdev_t          *vd = arg;
+       spa_t           *spa = vd->vdev_spa;
+       l2arc_dev_t     *dev = l2arc_vdev_get(vd);
+       trim_args_t     ta = {0};
+       range_seg64_t   physical_rs;
+
+       ASSERT(vdev_is_concrete(vd));
+       spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+       vd->vdev_trim_last_offset = 0;
+       vd->vdev_trim_rate = 0;
+       vd->vdev_trim_partial = 0;
+       vd->vdev_trim_secure = 0;
+
+       ta.trim_vdev = vd;
+       ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+       ta.trim_type = TRIM_TYPE_MANUAL;
+       ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+       ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+       ta.trim_flags = 0;
+
+       physical_rs.rs_start = vd->vdev_trim_bytes_done = 0;
+       physical_rs.rs_end = vd->vdev_trim_bytes_est =
+           vdev_get_min_asize(vd);
+
+       range_tree_add(ta.trim_tree, physical_rs.rs_start,
+           physical_rs.rs_end - physical_rs.rs_start);
+
+       mutex_enter(&vd->vdev_trim_lock);
+       vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+       mutex_exit(&vd->vdev_trim_lock);
+
+       (void) vdev_trim_ranges(&ta);
+
+       spa_config_exit(spa, SCL_CONFIG, FTAG);
+       mutex_enter(&vd->vdev_trim_io_lock);
+       while (vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] > 0) {
+               cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+       }
+       mutex_exit(&vd->vdev_trim_io_lock);
+
+       range_tree_vacate(ta.trim_tree, NULL, NULL);
+       range_tree_destroy(ta.trim_tree);
+
+       mutex_enter(&vd->vdev_trim_lock);
+       if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
+               vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+                   vd->vdev_trim_rate, vd->vdev_trim_partial,
+                   vd->vdev_trim_secure);
+       }
+       ASSERT(vd->vdev_trim_thread != NULL ||
+           vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] == 0);
+
+       /*
+        * Drop the vdev_trim_lock while we sync out the txg since it's
+        * possible that a device might be trying to come online and
+        * must check to see if it needs to restart a trim. That thread
+        * will be holding the spa_config_lock which would prevent the
+        * txg_wait_synced from completing. Same strategy as in
+        * vdev_trim_thread().
+        */
+       mutex_exit(&vd->vdev_trim_lock);
+       txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
+       mutex_enter(&vd->vdev_trim_lock);
+
+       /*
+        * Update the header of the cache device here, before
+        * broadcasting vdev_trim_cv which may lead to the removal
+        * of the device. The same applies for setting l2ad_trim_all to
+        * false.
+        */
+       spa_config_enter(vd->vdev_spa, SCL_L2ARC, vd,
+           RW_READER);
+       memset(dev->l2ad_dev_hdr, 0, dev->l2ad_dev_hdr_asize);
+       l2arc_dev_hdr_update(dev);
+       spa_config_exit(vd->vdev_spa, SCL_L2ARC, vd);
+
+       vd->vdev_trim_thread = NULL;
+       if (vd->vdev_trim_state == VDEV_TRIM_COMPLETE)
+               dev->l2ad_trim_all = B_FALSE;
+
+       cv_broadcast(&vd->vdev_trim_cv);
+       mutex_exit(&vd->vdev_trim_lock);
+
+       thread_exit();
+}
+
+/*
+ * Punches out TRIM threads for the L2ARC devices in a spa and assigns them
+ * to vd->vdev_trim_thread variable. This facilitates the management of
+ * trimming the whole cache device using TRIM_TYPE_MANUAL upon addition
+ * to a pool or pool creation or when the header of the device is invalid.
+ */
+void
+vdev_trim_l2arc(spa_t *spa)
+{
+       ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+       /*
+        * Locate the spa's l2arc devices and kick off TRIM threads.
+        */
+       for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+               vdev_t *vd = spa->spa_l2cache.sav_vdevs[i];
+               l2arc_dev_t *dev = l2arc_vdev_get(vd);
+
+               if (dev == NULL || !dev->l2ad_trim_all) {
+                       /*
+                        * Don't attempt TRIM if the vdev is UNAVAIL or if the
+                        * cache device was not marked for whole device TRIM
+                        * (ie l2arc_trim_ahead = 0, or the L2ARC device header
+                        * is valid with trim_state = VDEV_TRIM_COMPLETE and
+                        * l2ad_log_entries > 0).
+                        */
+                       continue;
+               }
+
+               mutex_enter(&vd->vdev_trim_lock);
+               ASSERT(vd->vdev_ops->vdev_op_leaf);
+               ASSERT(vdev_is_concrete(vd));
+               ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+               ASSERT(!vd->vdev_detached);
+               ASSERT(!vd->vdev_trim_exit_wanted);
+               ASSERT(!vd->vdev_top->vdev_removing);
+               vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+               vd->vdev_trim_thread = thread_create(NULL, 0,
+                   vdev_trim_l2arc_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+               mutex_exit(&vd->vdev_trim_lock);
+       }
+}
+
+/*
+ * A wrapper which calls vdev_trim_ranges(). It is intended to be called
+ * on leaf vdevs.
+ */
+int
+vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size)
+{
+       trim_args_t ta = {0};
+       range_seg64_t physical_rs;
+       int error;
+       physical_rs.rs_start = start;
+       physical_rs.rs_end = start + size;
+
+       ASSERT(vdev_is_concrete(vd));
+       ASSERT(vd->vdev_ops->vdev_op_leaf);
+       ASSERT(!vd->vdev_detached);
+       ASSERT(!vd->vdev_top->vdev_removing);
+
+       ta.trim_vdev = vd;
+       ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+       ta.trim_type = TRIM_TYPE_SIMPLE;
+       ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+       ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+       ta.trim_flags = 0;
+
+       ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
+
+       if (physical_rs.rs_end > physical_rs.rs_start) {
+               range_tree_add(ta.trim_tree, physical_rs.rs_start,
+                   physical_rs.rs_end - physical_rs.rs_start);
+       } else {
+               ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
+       }
+
+       error = vdev_trim_ranges(&ta);
+
+       mutex_enter(&vd->vdev_trim_io_lock);
+       while (vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE] > 0) {
+               cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+       }
+       mutex_exit(&vd->vdev_trim_io_lock);
+
+       range_tree_vacate(ta.trim_tree, NULL, NULL);
+       range_tree_destroy(ta.trim_tree);
+
+       return (error);
+}
+
 EXPORT_SYMBOL(vdev_trim);
 EXPORT_SYMBOL(vdev_trim_stop);
 EXPORT_SYMBOL(vdev_trim_stop_all);
@@ -1441,20 +1704,20 @@ EXPORT_SYMBOL(vdev_autotrim);
 EXPORT_SYMBOL(vdev_autotrim_stop_all);
 EXPORT_SYMBOL(vdev_autotrim_stop_wait);
 EXPORT_SYMBOL(vdev_autotrim_restart);
+EXPORT_SYMBOL(vdev_trim_l2arc);
+EXPORT_SYMBOL(vdev_trim_simple);
 
-/* BEGIN CSTYLED */
 ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_max, UINT, ZMOD_RW,
-    "Max size of TRIM commands, larger will be split");
+       "Max size of TRIM commands, larger will be split");
 
 ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_min, UINT, ZMOD_RW,
-    "Min size of TRIM commands, smaller will be skipped");
+       "Min size of TRIM commands, smaller will be skipped");
 
 ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, metaslab_skip, UINT, ZMOD_RW,
-    "Skip metaslabs which have never been initialized");
+       "Skip metaslabs which have never been initialized");
 
 ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, txg_batch, UINT, ZMOD_RW,
-    "Min number of txgs to aggregate frees before issuing TRIM");
+       "Min number of txgs to aggregate frees before issuing TRIM");
 
 ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, queue_limit, UINT, ZMOD_RW,
-    "Max queued TRIMs outstanding per leaf vdev");
-/* END CSTYLED */
+       "Max queued TRIMs outstanding per leaf vdev");