]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Remove races from scrub / resilver tests
authorTom Caputi <tcaputi@datto.com>
Wed, 28 Nov 2018 18:12:08 +0000 (13:12 -0500)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Wed, 28 Nov 2018 18:12:08 +0000 (10:12 -0800)
Currently, several tests in the ZFS Test Suite that attempt to
test scrub and resilver behavior occasionally fail. A big reason
for this is that these tests use a combination of zinject and
zfs_scan_vdev_limit to attempt to slow these operations enough
to verify their test commands. This method works most of the time,
but provides no guarantees and leads to flaky behavior. This patch
adds a new tunable, zfs_scan_suspend_progress, that ensures that
scans make no progress, guaranteeing that tests can be run without
racing.

This patch also changes zfs_remove_max_bytes_pause to match this
new tunable. This provides some consistency between these two
similar tunables and ensures that the tunable will not misbehave
on 32-bit systems.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #8111

14 files changed:
module/zfs/dsl_scan.c
module/zfs/vdev_removal.c
tests/zfs-tests/tests/functional/cli_root/zpool_import/cleanup.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_replaced.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_scrub/cleanup.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_002_pos.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_003_pos.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_004_pos.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_resilver.ksh
tests/zfs-tests/tests/functional/removal/removal.kshlib

index 61d42deca6524eec7e8c1758bc2b917912eeb77a..d9d9900a2d03a6ab5aca4767a1322fdc6f3a9b61 100644 (file)
@@ -169,6 +169,7 @@ int zfs_obsolete_min_time_ms = 500; /* min millisecs to obsolete per txg */
 int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
 int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
 int zfs_scan_checkpoint_intval = 7200; /* in seconds */
+int zfs_scan_suspend_progress = 0; /* set to prevent scans from progressing */
 int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
 int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
 enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
@@ -3356,6 +3357,27 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
        if (spa->spa_syncing_txg < spa->spa_first_txg + SCAN_IMPORT_WAIT_TXGS)
                return;
 
+       /*
+        * zfs_scan_suspend_progress can be set to disable scan progress.
+        * We don't want to spin the txg_sync thread, so we add a delay
+        * here to simulate the time spent doing a scan. This is mostly
+        * useful for testing and debugging.
+        */
+       if (zfs_scan_suspend_progress) {
+               uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;
+               int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
+                   zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
+
+               while (zfs_scan_suspend_progress &&
+                   !txg_sync_waiting(scn->scn_dp) &&
+                   !spa_shutting_down(scn->scn_dp->dp_spa) &&
+                   NSEC2MSEC(scan_time_ns) < mintime) {
+                       delay(hz);
+                       scan_time_ns = gethrtime() - scn->scn_sync_start_time;
+               }
+               return;
+       }
+
        /*
         * It is possible to switch from unsorted to sorted at any time,
         * but afterwards the scan will remain sorted unless reloaded from
@@ -4070,6 +4092,10 @@ MODULE_PARM_DESC(zfs_free_min_time_ms, "Min millisecs to free per txg");
 module_param(zfs_resilver_min_time_ms, int, 0644);
 MODULE_PARM_DESC(zfs_resilver_min_time_ms, "Min millisecs to resilver per txg");
 
+module_param(zfs_scan_suspend_progress, int, 0644);
+MODULE_PARM_DESC(zfs_scan_suspend_progress,
+       "Set to prevent scans from progressing");
+
 module_param(zfs_no_scrub_io, int, 0644);
 MODULE_PARM_DESC(zfs_no_scrub_io, "Set to disable scrub I/O");
 
index c259b5a1b45cb2a74d71bb19b36a65433f72b2a8..e8d036c6130fa64dce6810b2315dbd013ca6e0dc 100644 (file)
@@ -121,7 +121,7 @@ int vdev_removal_max_span = 32 * 1024;
  * This is used by the test suite so that it can ensure that certain
  * actions happen while in the middle of a removal.
  */
-unsigned long zfs_remove_max_bytes_pause = -1UL;
+int zfs_removal_suspend_progress = 0;
 
 #define        VDEV_REMOVAL_ZAP_OBJS   "lzap"
 
@@ -1449,14 +1449,14 @@ spa_vdev_remove_thread(void *arg)
 
                        /*
                         * This delay will pause the removal around the point
-                        * specified by zfs_remove_max_bytes_pause. We do this
+                        * specified by zfs_removal_suspend_progress. We do this
                         * solely from the test suite or during debugging.
                         */
                        uint64_t bytes_copied =
                            spa->spa_removing_phys.sr_copied;
                        for (int i = 0; i < TXG_SIZE; i++)
                                bytes_copied += svr->svr_bytes_done[i];
-                       while (zfs_remove_max_bytes_pause <= bytes_copied &&
+                       while (zfs_removal_suspend_progress &&
                            !svr->svr_thread_exit)
                                delay(hz);
 
@@ -2178,8 +2178,8 @@ MODULE_PARM_DESC(vdev_removal_max_span,
        "Largest span of free chunks a remap segment can span");
 
 /* BEGIN CSTYLED */
-module_param(zfs_remove_max_bytes_pause, ulong, 0644);
-MODULE_PARM_DESC(zfs_remove_max_bytes_pause,
+module_param(zfs_removal_suspend_progress, int, 0644);
+MODULE_PARM_DESC(zfs_removal_suspend_progress,
        "Pause device removal after this many bytes are copied "
        "(debug use only - causes removal to hang)");
 /* END CSTYLED */
index fd67dc7694f194991a1e310ea1894923eb92a560..029fa66816b781e52c60115586814f52ed2a545a 100755 (executable)
@@ -34,6 +34,8 @@
 
 verify_runnable "global"
 
+log_must set_tunable32 zfs_scan_suspend_progress 0
+
 for pool in "$TESTPOOL" "$TESTPOOL1"; do
        datasetexists $pool/$TESTFS && \
                log_must zfs destroy -Rf $pool/$TESTFS
index f42c85b9837aafd87429a59d1ed4db0281f85cac..e7edb1a3b04bd05edd886243a65b0a9000fb46d1 100755 (executable)
 #         each sync.
 #      2. Add data to pool
 #      3. Re-import the pool so that data isn't cached
-#      4. Use zinject to slow down device I/O
+#      4. Use zfs_scan_suspend_progress to ensure resilvers don't progress
 #      5. Trigger the resilvering
 #      6. Use spa freeze to stop writing to the pool.
-#      7. Clear zinject events (needed to export the pool)
+#      7. Re-enable scan progress
 #      8. Export the pool
 #
 
@@ -59,8 +59,7 @@ function custom_cleanup
        [[ -n ZFS_TXG_TIMEOUT ]] &&
            log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
 
-       zinject -c all
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+       log_must set_tunable32 zfs_scan_suspend_progress 0
        cleanup
 }
 
@@ -88,24 +87,16 @@ function test_replacing_vdevs
        log_must zpool export $TESTPOOL1
        log_must cp $CPATHBKP $CPATH
        log_must zpool import -c $CPATH -o cachefile=$CPATH $TESTPOOL1
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
-       typeset device
-       for device in $zinjectdevices ; do
-               log_must zinject -d $device -D 50:1 $TESTPOOL1 > /dev/null
-       done
+       log_must set_tunable32 zfs_scan_suspend_progress 1
        log_must zpool replace $TESTPOOL1 $replacevdev $replaceby
 
        # Cachefile: pool in resilvering state
        log_must cp $CPATH $CPATHBKP2
 
-       # We must disable zinject in order to export the pool, so we freeze
-       # it first to prevent writing out subsequent resilvering progress.
-       log_must zpool freeze $TESTPOOL1
        # Confirm pool is still replacing
        log_must pool_is_replacing $TESTPOOL1
-       log_must zinject -c all > /dev/null
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
        log_must zpool export $TESTPOOL1
+       log_must set_tunable32 zfs_scan_suspend_progress 0
 
        ( $earlyremove ) && log_must rm $replacevdev
 
index 574c192755f5c588f01fd271c6abcf1d467bb9ef..bc2c611ae013595e56653c8b719a8b081a0b917c 100755 (executable)
@@ -63,7 +63,7 @@ function custom_cleanup
        [[ -n ZFS_TXG_TIMEOUT ]] &&
            log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
        log_must rm -rf $BACKUP_DEVICE_DIR
-       zinject -c all
+       log_must set_tunable32 zfs_scan_suspend_progress 0
        cleanup
 }
 
@@ -98,22 +98,17 @@ function test_replace_vdev
        # This should not free original data.
        log_must overwrite_data $TESTPOOL1 ""
 
-       # Steps to insure resilvering happens very slowly.
        log_must zpool export $TESTPOOL1
        log_must zpool import -d $DEVICE_DIR $TESTPOOL1
-       typeset device
-       for device in $zinjectdevices ; do
-               log_must zinject -d $device -D 200:1 $TESTPOOL1 > /dev/null
-       done
+
+       # Ensure resilvering doesn't complete.
+       log_must set_tunable32 zfs_scan_suspend_progress 1
        log_must zpool replace $TESTPOOL1 $replacevdev $replaceby
 
-       # We must disable zinject in order to export the pool, so we freeze
-       # it first to prevent writing out subsequent resilvering progress.
-       log_must zpool freeze $TESTPOOL1
        # Confirm pool is still replacing
        log_must pool_is_replacing $TESTPOOL1
-       log_must zinject -c all > /dev/null
        log_must zpool export $TESTPOOL1
+       log_must set_tunable32 zfs_scan_suspend_progress 0
 
        ############################################################
        # Test 1: rewind while device is resilvering.
index ecdf0ee534668fbb4b59d1806f87891eb42145d5..79423abe25756ea1ca7035a94ba15848d4705db3 100644 (file)
@@ -137,6 +137,3 @@ export VDEV3=$DEVICE_DIR/${DEVICE_FILE}3
 export VDEV4=$DEVICE_DIR/${DEVICE_FILE}4
 
 export ALTER_ROOT=/alter_import-test
-
-export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
-export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024))
index 7d92984d6a3b147d55da37ef647eeff38f1394f5..5c013c72322ff5ab02ad83165fb7ab445185aacb 100644 (file)
@@ -27,7 +27,4 @@ export DISK1=$(echo $DISKS | nawk '{print $1}')
 export DISK2=$(echo $DISKS | nawk '{print $2}')
 export DISK3=$(echo $DISKS | nawk '{print $3}')
 
-export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
-export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024))
-
 export MAXTIMEOUT=80
index de9e5ecdf49b528072f0fad6ce96e6f2e1856c41..4f98ced960a9aa29d597167b93c4a6a701410357 100755 (executable)
 #         deferred
 #      4. Manually restart the resilver with all drives
 #
-# NOTES:
-#      Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
-#      low and adding a 50ms zio delay in order to ensure that the resilver
-#      does not complete early.
-#
 
 verify_runnable "global"
 
 function cleanup
 {
-       log_must zinject -c all
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+       log_must set_tunable32 zfs_scan_suspend_progress 0
        log_must rm -f $mntpnt/biggerfile1
        log_must rm -f $mntpnt/biggerfile2
 }
@@ -73,22 +67,19 @@ log_must sync
 log_must zpool detach $TESTPOOL $DISK3
 
 # 3. Reattach the drives, causing the second drive's resilver to be deferred
-log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
+log_must set_tunable32 zfs_scan_suspend_progress 1
 
 log_must zpool attach $TESTPOOL $DISK1 $DISK2
-log_must zinject -d $DISK2 -D50:1 $TESTPOOL
 log_must is_pool_resilvering $TESTPOOL true
 
 log_must zpool attach $TESTPOOL $DISK1 $DISK3
-log_must zinject -d $DISK3 -D50:1 $TESTPOOL
 log_must is_pool_resilvering $TESTPOOL true
 
 # 4. Manually restart the resilver with all drives
 log_must zpool resilver $TESTPOOL
-log_must zinject -c all
-log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
-log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
 log_must is_deferred_scan_started $TESTPOOL
+log_must set_tunable32 zfs_scan_suspend_progress 0
+log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
 log_must check_state $TESTPOOL "$DISK2" "online"
 log_must check_state $TESTPOOL "$DISK3" "online"
 
index e8bb8bceb6ab32a4a018a94c6daee460b9247527..b3cb58ceb6fdc474a9504aa924acca44e3f8eaa1 100755 (executable)
@@ -30,5 +30,5 @@
 
 verify_runnable "global"
 
-log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+log_must set_tunable32 zfs_scan_suspend_progress 0
 destroy_mirrors
index 712097bb1ca8d7f37176fb3996afac862e964d0d..71a204060b70b82451eda1e8ed38a64cdaa7fb91 100755 (executable)
 #      5. Resume the paused scrub and verify scrub is again being performed.
 #      6. Verify zpool scrub -s succeed when the system is scrubbing.
 #
-# NOTES:
-#      Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
-#      low and adding a 50ms zio delay in order to ensure that the scrub does
-#      not complete early.
-#
 
 verify_runnable "global"
 
 function cleanup
 {
-       log_must zinject -c all
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+       log_must set_tunable32 zfs_scan_suspend_progress 0
        log_must rm -f $mntpnt/biggerfile
 }
 
@@ -69,8 +63,7 @@ mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
 log_must file_write -b 1048576 -c 1024 -o create -d 0 -f $mntpnt/biggerfile
 log_must sync
 
-log_must zinject -d $DISK1 -D50:1 $TESTPOOL
-log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
+log_must set_tunable32 zfs_scan_suspend_progress 1
 log_must zpool scrub $TESTPOOL
 log_must is_pool_scrubbing $TESTPOOL true
 log_must zpool scrub -p $TESTPOOL
index c52ad84bc513797e41dec97e566103978745a826..56225456b8a01bc92969eeb195606860f38b895c 100755 (executable)
 #      2. Kick off a scrub
 #      2. Kick off a second scrub and verify it fails
 #
-# NOTES:
-#      Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
-#      low in order to ensure that the scrub does not complete early.
-#
 
 verify_runnable "global"
 
 function cleanup
 {
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+       log_must set_tunable32 zfs_scan_suspend_progress 0
 }
 
 log_onexit cleanup
 
 log_assert "Scrub command fails when there is already a scrub in progress"
 
-log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
+log_must set_tunable32 zfs_scan_suspend_progress 1
 log_must zpool scrub $TESTPOOL
 log_must is_pool_scrubbing $TESTPOOL true
 log_mustnot zpool scrub $TESTPOOL
index 14563d64d149274f68b7cc1d668639fd0dcaa4ea..9b6274cd10e4b032d23252e5cc85bfefba549f5c 100755 (executable)
 #      4. Export/import the pool to ensure the cache is dropped
 #      5. Verify scrub failed until the resilver completed
 #
-# NOTES:
-#      Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
-#      low in order to ensure that the scrub does not complete early.
-#
 
 function cleanup
 {
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+       log_must set_tunable32 zfs_scan_suspend_progress 0
        rm -f $mntpnt/extra
 }
 
@@ -61,7 +57,9 @@ log_onexit cleanup
 log_assert "Resilver prevent scrub from starting until the resilver completes"
 
 mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
-log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
+
+# Temporarily prevent scan progress so our test doesn't race
+log_must set_tunable32 zfs_scan_suspend_progress 1
 
 while ! is_pool_resilvering $TESTPOOL; do
        log_must zpool detach $TESTPOOL $DISK2
@@ -74,6 +72,7 @@ done
 log_must is_pool_resilvering $TESTPOOL
 log_mustnot zpool scrub $TESTPOOL
 
+log_must set_tunable32 zfs_scan_suspend_progress 0
 while ! is_pool_resilvered $TESTPOOL; do
        sleep 1
 done
index ffc841f7662e4e570f4244163f908ebb7dcb0a49..1a5c3198f09b051934c90278cee8a7f5a2d85963 100755 (executable)
@@ -41,7 +41,7 @@ verify_runnable "both"
 
 function cleanup
 {
-       log_must zinject -c all
+       log_must set_tunable32 zfs_scan_suspend_progress 0
        destroy_pool $TESTPOOL
        destroy_pool $TESTPOOL2
        rm -f $DEVICE1 $DEVICE2
@@ -68,10 +68,8 @@ function zpool_split #disk_to_be_offline/online
        log_must file_write -b 2097152 -c 1024 -o create -d 0 -f $mntpnt/biggerfile
        log_must sync
 
-       # slow-down resilvering, so it will not finish too early
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
-       log_must zinject -d $DEVICE1 -D 50:1 $TESTPOOL
-       log_must zinject -d $DEVICE2 -D 50:1 $TESTPOOL
+       # temporarily prevent resilvering progress, so it will not finish too early
+       log_must set_tunable32 zfs_scan_suspend_progress 1
 
        log_must zpool online $TESTPOOL $disk
 
@@ -86,7 +84,7 @@ function zpool_split #disk_to_be_offline/online
 
        log_mustnot zpool split $TESTPOOL $TESTPOOL2
 
-       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+       log_must set_tunable32 zfs_scan_suspend_progress 0
 }
 
 log_assert "Verify 'zpool split' will fail if resilver in progress for a disk"
@@ -96,15 +94,12 @@ DEVSIZE='3g'
 DEVICE1="$TEST_BASE_DIR/device-1"
 DEVICE2="$TEST_BASE_DIR/device-2"
 
-ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
-ZFS_SCAN_VDEV_LIMIT_DEFAULT=$(get_tunable zfs_scan_vdev_limit)
-
-log_note "Verify ZFS prevents main pool curruption during 'split'"
+log_note "Verify ZFS prevents main pool corruption during 'split'"
 zpool_split $DEVICE1
 
 cleanup
 
-log_note "Verify ZFS prevents new pool curruption during 'split'"
+log_note "Verify ZFS prevents new pool corruption during 'split'"
 zpool_split $DEVICE2
 
 log_pass "'zpool split' failed as expected"
index 7aa3835854b5a6fa338e4c7851f721bb321cb43f..c1ab044c7265181e7de8d1c9782414c321c055ed 100644 (file)
@@ -62,7 +62,7 @@ function attempt_during_removal # pool disk callback [args]
        typeset callback=$3
 
        shift 3
-       set_tunable64 zfs_remove_max_bytes_pause 0
+       set_tunable32 zfs_removal_suspend_progress 1
 
        log_must zpool remove $pool $disk
 
@@ -81,7 +81,7 @@ function attempt_during_removal # pool disk callback [args]
        #
        log_must is_pool_removing $pool
 
-       set_tunable64 zfs_remove_max_bytes_pause 18446744073709551615
+       set_tunable32 zfs_removal_suspend_progress 0
 
        log_must wait_for_removal $pool
        log_mustnot vdevs_in_pool $pool $disk