From c40a1124e1d1010b665909ad31d2904630018f6f Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Wed, 28 Nov 2018 23:47:09 -0500 Subject: [PATCH] Fix consistency of ztest_device_removal_active ztest currently uses the boolean flag ztest_device_removal_active to protect some tests that may not run successfully if they occur at the same time as ztest_device_removal(). Unfortunately, in the event that ztest is in the middle of a device removal when it decides to issue a SIGKILL, the device removal will be automatically restarted (without setting the flag) when the pool is re-imported on the next run. This patch corrects this by ensuring that any in-progress removals are completed before running further tests after the re-import. This patch also makes a few small changes to prevent race conditions involving the creation and destruction of spa->spa_vdev_removal, since this field is not protected by any locks. Some checks that may run concurrently with setting / unsetting this field have been updated to check spa->spa_removing_phys.sr_state instead. The most significant change here is that spa_removal_get_stats() no longer accounts for in-flight work done, since that could result in a NULL pointer dereference. Reviewed by: Matthew Ahrens Reviewed-by: Serapheim Dimitropoulos Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #8105 --- cmd/ztest/ztest.c | 22 +++++++++++++++++++++- module/zfs/spa_checkpoint.c | 2 +- module/zfs/vdev_removal.c | 9 +-------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index eab8940fb..111d45b9d 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -3573,7 +3573,7 @@ ztest_device_removal(ztest_ds_t *zd, uint64_t id) */ txg_wait_synced(spa_get_dsl(spa), 0); - while (spa->spa_vdev_removal != NULL) + while (spa->spa_removing_phys.sr_state == DSS_SCANNING) txg_wait_synced(spa_get_dsl(spa), 0); } else { mutex_exit(&ztest_vdev_lock); @@ -6887,6 +6887,26 @@ ztest_run(ztest_shared_t *zs) } zs->zs_enospc_count = 0; + /* + * If we were in the middle of ztest_device_removal() and were killed + * we need to ensure the removal and scrub complete before running + * any tests that check ztest_device_removal_active. The removal will + * be restarted automatically when the spa is opened, but we need to + * initate the scrub manually if it is not already in progress. Note + * that we always run the scrub whenever an indirect vdev exists + * because we have no way of knowing for sure if ztest_device_removal() + * fully completed its scrub before the pool was reimported. + */ + if (spa->spa_removing_phys.sr_state == DSS_SCANNING || + spa->spa_removing_phys.sr_prev_indirect_vdev != -1) { + while (spa->spa_removing_phys.sr_state == DSS_SCANNING) + txg_wait_synced(spa_get_dsl(spa), 0); + + (void) spa_scan(spa, POOL_SCAN_SCRUB); + while (dsl_scan_scrubbing(spa_get_dsl(spa))) + txg_wait_synced(spa_get_dsl(spa), 0); + } + run_threads = umem_zalloc(ztest_opts.zo_threads * sizeof (kthread_t *), UMEM_NOFAIL); diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c index 6f7e9ab83..863ec46b1 100644 --- a/module/zfs/spa_checkpoint.c +++ b/module/zfs/spa_checkpoint.c @@ -462,7 +462,7 @@ spa_checkpoint_check(void *arg, dmu_tx_t *tx) if (!spa_top_vdevs_spacemap_addressable(spa)) return (SET_ERROR(ZFS_ERR_VDEV_TOO_BIG)); - if (spa->spa_vdev_removal != NULL) + if (spa->spa_removing_phys.sr_state == DSS_SCANNING) return (SET_ERROR(ZFS_ERR_DEVRM_IN_PROGRESS)); if (spa->spa_checkpoint_txg != 0) diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index e8d036c61..49b9ed3a1 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -672,7 +672,7 @@ spa_finish_removal(spa_t *spa, dsl_scan_state_t state, dmu_tx_t *tx) vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); vdev_indirect_config_t *vic = &vd->vdev_indirect_config; - if (srp->sr_prev_indirect_vdev != UINT64_MAX) { + if (srp->sr_prev_indirect_vdev != -1) { vdev_t *pvd; pvd = vdev_lookup_top(spa, srp->sr_prev_indirect_vdev); @@ -2145,13 +2145,6 @@ spa_removal_get_stats(spa_t *spa, pool_removal_stat_t *prs) prs->prs_to_copy = spa->spa_removing_phys.sr_to_copy; prs->prs_copied = spa->spa_removing_phys.sr_copied; - if (spa->spa_vdev_removal != NULL) { - for (int i = 0; i < TXG_SIZE; i++) { - prs->prs_copied += - spa->spa_vdev_removal->svr_bytes_done[i]; - } - } - prs->prs_mapping_memory = 0; uint64_t indirect_vdev_id = spa->spa_removing_phys.sr_prev_indirect_vdev; -- 2.39.2