/*
* Copyright (c) 2016 by Delphix. All rights reserved.
* Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
*/
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_trim.h>
-#include <sys/refcount.h>
#include <sys/metaslab_impl.h>
#include <sys/dsl_synctask.h>
#include <sys/zap.h>
#include <sys/dmu_tx.h>
+#include <sys/arc_impl.h>
/*
* TRIM is a feature which is used to notify a SSD that some previously
/*
* Maximum size of TRIM I/O, ranges will be chunked in to 128MiB lengths.
*/
-unsigned int zfs_trim_extent_bytes_max = 128 * 1024 * 1024;
+static unsigned int zfs_trim_extent_bytes_max = 128 * 1024 * 1024;
/*
* Minimum size of TRIM I/O, extents smaller than 32Kib will be skipped.
*/
-unsigned int zfs_trim_extent_bytes_min = 32 * 1024;
+static unsigned int zfs_trim_extent_bytes_min = 32 * 1024;
/*
* Skip uninitialized metaslabs during the TRIM process. This option is
* concurrent TRIM I/Os issued to the device is controlled by the
* zfs_vdev_trim_min_active and zfs_vdev_trim_max_active module options.
*/
-unsigned int zfs_trim_queue_limit = 10;
+static unsigned int zfs_trim_queue_limit = 10;
/*
* The minimum number of transaction groups between automatic trims of a
* has the opposite effect. The default value of 32 was determined though
* testing to be a reasonable compromise.
*/
-unsigned int zfs_trim_txg_batch = 32;
+static unsigned int zfs_trim_txg_batch = 32;
/*
* The trim_args are a control structure which describe how a leaf vdev
vd->vdev_trim_secure = secure;
}
- boolean_t resumed = !!(vd->vdev_trim_state == VDEV_TRIM_SUSPENDED);
+ vdev_trim_state_t old_state = vd->vdev_trim_state;
+ boolean_t resumed = (old_state == VDEV_TRIM_SUSPENDED);
vd->vdev_trim_state = new_state;
dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
dsl_sync_task_nowait(spa_get_dsl(spa), vdev_trim_zap_update_sync,
- guid, 2, ZFS_SPACE_CHECK_NONE, tx);
+ guid, tx);
switch (new_state) {
case VDEV_TRIM_ACTIVE:
"vdev=%s suspended", vd->vdev_path);
break;
case VDEV_TRIM_CANCELED:
- spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_CANCEL);
- spa_history_log_internal(spa, "trim", tx,
- "vdev=%s canceled", vd->vdev_path);
+ if (old_state == VDEV_TRIM_ACTIVE ||
+ old_state == VDEV_TRIM_SUSPENDED) {
+ spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_CANCEL);
+ spa_history_log_internal(spa, "trim", tx,
+ "vdev=%s canceled", vd->vdev_path);
+ }
break;
case VDEV_TRIM_COMPLETE:
spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_FINISH);
}
dmu_tx_commit(tx);
+
+ if (new_state != VDEV_TRIM_ACTIVE)
+ spa_notify_waiters(spa);
}
/*
spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
}
+/*
+ * The zio_done_func_t done callback for each TRIM issued via
+ * vdev_trim_simple(). It is responsible for updating the TRIM stats and
+ * limiting the number of in flight TRIM I/Os. Simple TRIM I/Os are best
+ * effort and are never reissued on failure.
+ */
+static void
+vdev_trim_simple_cb(zio_t *zio)
+{
+ vdev_t *vd = zio->io_vd;
+
+ mutex_enter(&vd->vdev_trim_io_lock);
+
+ if (zio->io_error != 0) {
+ vd->vdev_stat.vs_trim_errors++;
+ spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+ 0, 0, 0, 0, 1, zio->io_orig_size);
+ } else {
+ spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+ 1, zio->io_orig_size, 0, 0, 0, 0);
+ }
+
+ ASSERT3U(vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE], >, 0);
+ vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE]--;
+ cv_broadcast(&vd->vdev_trim_io_cv);
+ mutex_exit(&vd->vdev_trim_io_lock);
+
+ spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+}
/*
* Returns the average trim rate in bytes/sec for the ta->trim_vdev.
*/
{
vdev_t *vd = ta->trim_vdev;
spa_t *spa = vd->vdev_spa;
+ void *cb;
mutex_enter(&vd->vdev_trim_io_lock);
if (ta->trim_type == TRIM_TYPE_MANUAL) {
while (vd->vdev_trim_rate != 0 && !vdev_trim_should_stop(vd) &&
vdev_trim_calculate_rate(ta) > vd->vdev_trim_rate) {
- cv_timedwait_sig(&vd->vdev_trim_io_cv,
+ cv_timedwait_idle(&vd->vdev_trim_io_cv,
&vd->vdev_trim_io_lock, ddi_get_lbolt() +
MSEC_TO_TICK(10));
}
ta->trim_bytes_done += size;
/* Limit in flight trimming I/Os */
- while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] >=
- zfs_trim_queue_limit) {
+ while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] +
+ vd->vdev_trim_inflight[2] >= zfs_trim_queue_limit) {
cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
}
vd->vdev_trim_inflight[ta->trim_type]++;
/* This is the first write of this txg. */
dsl_sync_task_nowait(spa_get_dsl(spa),
- vdev_trim_zap_update_sync, guid, 2,
- ZFS_SPACE_CHECK_RESERVED, tx);
+ vdev_trim_zap_update_sync, guid, tx);
}
/*
if (ta->trim_type == TRIM_TYPE_MANUAL)
vd->vdev_trim_offset[txg & TXG_MASK] = start + size;
+ if (ta->trim_type == TRIM_TYPE_MANUAL) {
+ cb = vdev_trim_cb;
+ } else if (ta->trim_type == TRIM_TYPE_AUTO) {
+ cb = vdev_autotrim_cb;
+ } else {
+ cb = vdev_trim_simple_cb;
+ }
+
zio_nowait(zio_trim(spa->spa_txg_zio[txg & TXG_MASK], vd,
- start, size, ta->trim_type == TRIM_TYPE_MANUAL ?
- vdev_trim_cb : vdev_autotrim_cb, NULL,
- ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL, ta->trim_flags));
+ start, size, cb, NULL, ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL,
+ ta->trim_flags));
/* vdev_trim_cb and vdev_autotrim_cb release SCL_STATE_ALL */
dmu_tx_commit(tx);
return (0);
}
+static void
+vdev_trim_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs)
+{
+ uint64_t *last_rs_end = (uint64_t *)arg;
+
+ if (physical_rs->rs_end > *last_rs_end)
+ *last_rs_end = physical_rs->rs_end;
+}
+
+static void
+vdev_trim_xlate_progress(void *arg, range_seg64_t *physical_rs)
+{
+ vdev_t *vd = (vdev_t *)arg;
+
+ uint64_t size = physical_rs->rs_end - physical_rs->rs_start;
+ vd->vdev_trim_bytes_est += size;
+
+ if (vd->vdev_trim_last_offset >= physical_rs->rs_end) {
+ vd->vdev_trim_bytes_done += size;
+ } else if (vd->vdev_trim_last_offset > physical_rs->rs_start &&
+ vd->vdev_trim_last_offset <= physical_rs->rs_end) {
+ vd->vdev_trim_bytes_done +=
+ vd->vdev_trim_last_offset - physical_rs->rs_start;
+ }
+}
+
/*
* Calculates the completion percentage of a manual TRIM.
*/
metaslab_t *msp = vd->vdev_top->vdev_ms[i];
mutex_enter(&msp->ms_lock);
- uint64_t ms_free = msp->ms_size -
- metaslab_allocated_space(msp);
-
- if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
- ms_free /= vd->vdev_top->vdev_children;
+ uint64_t ms_free = (msp->ms_size -
+ metaslab_allocated_space(msp)) /
+ vdev_get_ndisks(vd->vdev_top);
/*
* Convert the metaslab range to a physical range
* on our vdev. We use this to determine if we are
* in the middle of this metaslab range.
*/
- range_seg64_t logical_rs, physical_rs;
+ range_seg64_t logical_rs, physical_rs, remain_rs;
logical_rs.rs_start = msp->ms_start;
logical_rs.rs_end = msp->ms_start + msp->ms_size;
- vdev_xlate(vd, &logical_rs, &physical_rs);
+ /* Metaslab space after this offset has not been trimmed. */
+ vdev_xlate(vd, &logical_rs, &physical_rs, &remain_rs);
if (vd->vdev_trim_last_offset <= physical_rs.rs_start) {
vd->vdev_trim_bytes_est += ms_free;
mutex_exit(&msp->ms_lock);
continue;
- } else if (vd->vdev_trim_last_offset > physical_rs.rs_end) {
+ }
+
+ /* Metaslab space before this offset has been trimmed */
+ uint64_t last_rs_end = physical_rs.rs_end;
+ if (!vdev_xlate_is_empty(&remain_rs)) {
+ vdev_xlate_walk(vd, &remain_rs,
+ vdev_trim_xlate_last_rs_end, &last_rs_end);
+ }
+
+ if (vd->vdev_trim_last_offset > last_rs_end) {
vd->vdev_trim_bytes_done += ms_free;
vd->vdev_trim_bytes_est += ms_free;
mutex_exit(&msp->ms_lock);
rs != NULL; rs = zfs_btree_next(bt, &idx, &idx)) {
logical_rs.rs_start = rs_get_start(rs, rt);
logical_rs.rs_end = rs_get_end(rs, rt);
- vdev_xlate(vd, &logical_rs, &physical_rs);
-
- uint64_t size = physical_rs.rs_end -
- physical_rs.rs_start;
- vd->vdev_trim_bytes_est += size;
- if (vd->vdev_trim_last_offset >= physical_rs.rs_end) {
- vd->vdev_trim_bytes_done += size;
- } else if (vd->vdev_trim_last_offset >
- physical_rs.rs_start &&
- vd->vdev_trim_last_offset <=
- physical_rs.rs_end) {
- vd->vdev_trim_bytes_done +=
- vd->vdev_trim_last_offset -
- physical_rs.rs_start;
- }
+
+ vdev_xlate_walk(vd, &logical_rs,
+ vdev_trim_xlate_progress, vd);
}
mutex_exit(&msp->ms_lock);
}
return (err);
}
+static void
+vdev_trim_xlate_range_add(void *arg, range_seg64_t *physical_rs)
+{
+ trim_args_t *ta = arg;
+ vdev_t *vd = ta->trim_vdev;
+
+ /*
+ * Only a manual trim will be traversing the vdev sequentially.
+ * For an auto trim all valid ranges should be added.
+ */
+ if (ta->trim_type == TRIM_TYPE_MANUAL) {
+
+ /* Only add segments that we have not visited yet */
+ if (physical_rs->rs_end <= vd->vdev_trim_last_offset)
+ return;
+
+ /* Pick up where we left off mid-range. */
+ if (vd->vdev_trim_last_offset > physical_rs->rs_start) {
+ ASSERT3U(physical_rs->rs_end, >,
+ vd->vdev_trim_last_offset);
+ physical_rs->rs_start = vd->vdev_trim_last_offset;
+ }
+ }
+
+ ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start);
+
+ range_tree_add(ta->trim_tree, physical_rs->rs_start,
+ physical_rs->rs_end - physical_rs->rs_start);
+}
+
/*
- * Convert the logical range into a physical range and add it to the
+ * Convert the logical range into physical ranges and add them to the
* range tree passed in the trim_args_t.
*/
static void
{
trim_args_t *ta = arg;
vdev_t *vd = ta->trim_vdev;
- range_seg64_t logical_rs, physical_rs;
+ range_seg64_t logical_rs;
logical_rs.rs_start = start;
logical_rs.rs_end = start + size;
}
ASSERT(vd->vdev_ops->vdev_op_leaf);
- vdev_xlate(vd, &logical_rs, &physical_rs);
-
- IMPLY(vd->vdev_top == vd,
- logical_rs.rs_start == physical_rs.rs_start);
- IMPLY(vd->vdev_top == vd,
- logical_rs.rs_end == physical_rs.rs_end);
-
- /*
- * Only a manual trim will be traversing the vdev sequentially.
- * For an auto trim all valid ranges should be added.
- */
- if (ta->trim_type == TRIM_TYPE_MANUAL) {
-
- /* Only add segments that we have not visited yet */
- if (physical_rs.rs_end <= vd->vdev_trim_last_offset)
- return;
-
- /* Pick up where we left off mid-range. */
- if (vd->vdev_trim_last_offset > physical_rs.rs_start) {
- ASSERT3U(physical_rs.rs_end, >,
- vd->vdev_trim_last_offset);
- physical_rs.rs_start = vd->vdev_trim_last_offset;
- }
- }
-
- ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
-
- /*
- * With raidz, it's possible that the logical range does not live on
- * this leaf vdev. We only add the physical range to this vdev's if it
- * has a length greater than 0.
- */
- if (physical_rs.rs_end > physical_rs.rs_start) {
- range_tree_add(ta->trim_tree, physical_rs.rs_start,
- physical_rs.rs_end - physical_rs.rs_start);
- } else {
- ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
- }
+ vdev_xlate_walk(vd, &logical_rs, vdev_trim_xlate_range_add, arg);
}
/*
* by its ms_allocatable. While a metaslab is undergoing trimming it is
* not eligible for new allocations.
*/
-static void
+static _Noreturn void
vdev_trim_thread(void *arg)
{
vdev_t *vd = arg;
range_tree_destroy(ta.trim_tree);
mutex_enter(&vd->vdev_trim_lock);
- if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
- vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
- vd->vdev_trim_rate, vd->vdev_trim_partial,
- vd->vdev_trim_secure);
+ if (!vd->vdev_trim_exit_wanted) {
+ if (vdev_writeable(vd)) {
+ vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+ vd->vdev_trim_rate, vd->vdev_trim_partial,
+ vd->vdev_trim_secure);
+ } else if (vd->vdev_faulted) {
+ vdev_trim_change_state(vd, VDEV_TRIM_CANCELED,
+ vd->vdev_trim_rate, vd->vdev_trim_partial,
+ vd->vdev_trim_secure);
+ }
}
ASSERT(vd->vdev_trim_thread != NULL || vd->vdev_trim_inflight[0] == 0);
vd->vdev_trim_thread = NULL;
cv_broadcast(&vd->vdev_trim_cv);
mutex_exit(&vd->vdev_trim_lock);
+
+ thread_exit();
}
/*
void
vdev_trim_stop_wait(spa_t *spa, list_t *vd_list)
{
+ (void) spa;
vdev_t *vd;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
{
spa_t *spa = vd->vdev_spa;
list_t vd_list;
+ vdev_t *vd_l2cache;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
offsetof(vdev_t, vdev_trim_node));
vdev_trim_stop_all_impl(vd, tgt_state, &vd_list);
+
+ /*
+ * Iterate over cache devices and request stop trimming the
+ * whole device in case we export the pool or remove the cache
+ * device prematurely.
+ */
+ for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+ vd_l2cache = spa->spa_l2cache.sav_vdevs[i];
+ vdev_trim_stop_all_impl(vd_l2cache, tgt_state, &vd_list);
+ }
+
vdev_trim_stop_wait(spa, &vd_list);
if (vd->vdev_spa->spa_sync_on) {
vd->vdev_leaf_zap, VDEV_LEAF_ZAP_TRIM_ACTION_TIME,
sizeof (timestamp), 1, ×tamp);
ASSERT(err == 0 || err == ENOENT);
- vd->vdev_trim_action_time = (time_t)timestamp;
+ vd->vdev_trim_action_time = timestamp;
if (vd->vdev_trim_state == VDEV_TRIM_SUSPENDED ||
vd->vdev_offline) {
* N.B. This behavior is different from a manual TRIM where a thread
* is created for each leaf vdev, instead of each top-level vdev.
*/
-static void
+static _Noreturn void
vdev_autotrim_thread(void *arg)
{
vdev_t *vd = arg;
vd->vdev_autotrim_thread = NULL;
cv_broadcast(&vd->vdev_autotrim_cv);
mutex_exit(&vd->vdev_autotrim_lock);
+
+ thread_exit();
}
/*
vdev_autotrim(spa);
}
+static _Noreturn void
+vdev_trim_l2arc_thread(void *arg)
+{
+ vdev_t *vd = arg;
+ spa_t *spa = vd->vdev_spa;
+ l2arc_dev_t *dev = l2arc_vdev_get(vd);
+ trim_args_t ta = {0};
+ range_seg64_t physical_rs;
+
+ ASSERT(vdev_is_concrete(vd));
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ vd->vdev_trim_last_offset = 0;
+ vd->vdev_trim_rate = 0;
+ vd->vdev_trim_partial = 0;
+ vd->vdev_trim_secure = 0;
+
+ ta.trim_vdev = vd;
+ ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+ ta.trim_type = TRIM_TYPE_MANUAL;
+ ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+ ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+ ta.trim_flags = 0;
+
+ physical_rs.rs_start = vd->vdev_trim_bytes_done = 0;
+ physical_rs.rs_end = vd->vdev_trim_bytes_est =
+ vdev_get_min_asize(vd);
+
+ range_tree_add(ta.trim_tree, physical_rs.rs_start,
+ physical_rs.rs_end - physical_rs.rs_start);
+
+ mutex_enter(&vd->vdev_trim_lock);
+ vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+ mutex_exit(&vd->vdev_trim_lock);
+
+ (void) vdev_trim_ranges(&ta);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+ mutex_enter(&vd->vdev_trim_io_lock);
+ while (vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] > 0) {
+ cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+ }
+ mutex_exit(&vd->vdev_trim_io_lock);
+
+ range_tree_vacate(ta.trim_tree, NULL, NULL);
+ range_tree_destroy(ta.trim_tree);
+
+ mutex_enter(&vd->vdev_trim_lock);
+ if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
+ vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+ vd->vdev_trim_rate, vd->vdev_trim_partial,
+ vd->vdev_trim_secure);
+ }
+ ASSERT(vd->vdev_trim_thread != NULL ||
+ vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] == 0);
+
+ /*
+ * Drop the vdev_trim_lock while we sync out the txg since it's
+ * possible that a device might be trying to come online and
+ * must check to see if it needs to restart a trim. That thread
+ * will be holding the spa_config_lock which would prevent the
+ * txg_wait_synced from completing. Same strategy as in
+ * vdev_trim_thread().
+ */
+ mutex_exit(&vd->vdev_trim_lock);
+ txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
+ mutex_enter(&vd->vdev_trim_lock);
+
+ /*
+ * Update the header of the cache device here, before
+ * broadcasting vdev_trim_cv which may lead to the removal
+ * of the device. The same applies for setting l2ad_trim_all to
+ * false.
+ */
+ spa_config_enter(vd->vdev_spa, SCL_L2ARC, vd,
+ RW_READER);
+ memset(dev->l2ad_dev_hdr, 0, dev->l2ad_dev_hdr_asize);
+ l2arc_dev_hdr_update(dev);
+ spa_config_exit(vd->vdev_spa, SCL_L2ARC, vd);
+
+ vd->vdev_trim_thread = NULL;
+ if (vd->vdev_trim_state == VDEV_TRIM_COMPLETE)
+ dev->l2ad_trim_all = B_FALSE;
+
+ cv_broadcast(&vd->vdev_trim_cv);
+ mutex_exit(&vd->vdev_trim_lock);
+
+ thread_exit();
+}
+
+/*
+ * Punches out TRIM threads for the L2ARC devices in a spa and assigns them
+ * to vd->vdev_trim_thread variable. This facilitates the management of
+ * trimming the whole cache device using TRIM_TYPE_MANUAL upon addition
+ * to a pool or pool creation or when the header of the device is invalid.
+ */
+void
+vdev_trim_l2arc(spa_t *spa)
+{
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+ /*
+ * Locate the spa's l2arc devices and kick off TRIM threads.
+ */
+ for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+ vdev_t *vd = spa->spa_l2cache.sav_vdevs[i];
+ l2arc_dev_t *dev = l2arc_vdev_get(vd);
+
+ if (dev == NULL || !dev->l2ad_trim_all) {
+ /*
+ * Don't attempt TRIM if the vdev is UNAVAIL or if the
+ * cache device was not marked for whole device TRIM
+ * (ie l2arc_trim_ahead = 0, or the L2ARC device header
+ * is valid with trim_state = VDEV_TRIM_COMPLETE and
+ * l2ad_log_entries > 0).
+ */
+ continue;
+ }
+
+ mutex_enter(&vd->vdev_trim_lock);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ ASSERT(vdev_is_concrete(vd));
+ ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+ ASSERT(!vd->vdev_detached);
+ ASSERT(!vd->vdev_trim_exit_wanted);
+ ASSERT(!vd->vdev_top->vdev_removing);
+ vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+ vd->vdev_trim_thread = thread_create(NULL, 0,
+ vdev_trim_l2arc_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+ mutex_exit(&vd->vdev_trim_lock);
+ }
+}
+
+/*
+ * A wrapper which calls vdev_trim_ranges(). It is intended to be called
+ * on leaf vdevs.
+ */
+int
+vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size)
+{
+ trim_args_t ta = {0};
+ range_seg64_t physical_rs;
+ int error;
+ physical_rs.rs_start = start;
+ physical_rs.rs_end = start + size;
+
+ ASSERT(vdev_is_concrete(vd));
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ ASSERT(!vd->vdev_detached);
+ ASSERT(!vd->vdev_top->vdev_removing);
+
+ ta.trim_vdev = vd;
+ ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+ ta.trim_type = TRIM_TYPE_SIMPLE;
+ ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+ ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+ ta.trim_flags = 0;
+
+ ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
+
+ if (physical_rs.rs_end > physical_rs.rs_start) {
+ range_tree_add(ta.trim_tree, physical_rs.rs_start,
+ physical_rs.rs_end - physical_rs.rs_start);
+ } else {
+ ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
+ }
+
+ error = vdev_trim_ranges(&ta);
+
+ mutex_enter(&vd->vdev_trim_io_lock);
+ while (vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE] > 0) {
+ cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+ }
+ mutex_exit(&vd->vdev_trim_io_lock);
+
+ range_tree_vacate(ta.trim_tree, NULL, NULL);
+ range_tree_destroy(ta.trim_tree);
+
+ return (error);
+}
+
EXPORT_SYMBOL(vdev_trim);
EXPORT_SYMBOL(vdev_trim_stop);
EXPORT_SYMBOL(vdev_trim_stop_all);
EXPORT_SYMBOL(vdev_autotrim_stop_all);
EXPORT_SYMBOL(vdev_autotrim_stop_wait);
EXPORT_SYMBOL(vdev_autotrim_restart);
+EXPORT_SYMBOL(vdev_trim_l2arc);
+EXPORT_SYMBOL(vdev_trim_simple);
-/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_max, UINT, ZMOD_RW,
- "Max size of TRIM commands, larger will be split");
+ "Max size of TRIM commands, larger will be split");
ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_min, UINT, ZMOD_RW,
- "Min size of TRIM commands, smaller will be skipped");
+ "Min size of TRIM commands, smaller will be skipped");
ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, metaslab_skip, UINT, ZMOD_RW,
- "Skip metaslabs which have never been initialized");
+ "Skip metaslabs which have never been initialized");
ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, txg_batch, UINT, ZMOD_RW,
- "Min number of txgs to aggregate frees before issuing TRIM");
+ "Min number of txgs to aggregate frees before issuing TRIM");
ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, queue_limit, UINT, ZMOD_RW,
- "Max queued TRIMs outstanding per leaf vdev");
-/* END CSTYLED */
+ "Max queued TRIMs outstanding per leaf vdev");