]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Fix txg_sync_thread hang in scan_exec_io()
authorBrian Behlendorf <behlendorf1@llnl.gov>
Wed, 31 Jan 2018 17:33:33 +0000 (09:33 -0800)
committerGitHub <noreply@github.com>
Wed, 31 Jan 2018 17:33:33 +0000 (09:33 -0800)
When scn->scn_maxinflight_bytes has not been initialized it's
possible to hang on the condition variable in scan_exec_io().
This issue was uncovered by ztest and is only possible when
deduplication is enabled through the following call path.

  txg_sync_thread()
    spa_sync()
      ddt_sync_table()
        ddt_sync_entry()
          dsl_scan_ddt_entry()
            dsl_scan_scrub_cb()
              dsl_scan_enqueuei()
                scan_exec_io()
                  cv_wait()

Resolve the issue by always initializing scn_maxinflight_bytes
to a reasonable minimum value.  This value will be recalculated
in dsl_scan_sync() to pick up changes to zfs_scan_vdev_limit
and the addition/removal of vdevs.

Reviewed-by: Tom Caputi <tcaputi@datto.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #7098

module/zfs/dsl_scan.c

index 4b4f1665c00eabf05417bb26b2402f5aba9b378f..fc0c24e1c31505ed1405bee2dffa4ddd66203178 100644 (file)
@@ -124,6 +124,7 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
 static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
 static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
 static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
+static uint64_t dsl_scan_count_leaves(vdev_t *vd);
 
 extern int zfs_vdev_async_write_active_min_dirty_percent;
 
@@ -378,6 +379,14 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
        scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa,
            SPA_FEATURE_ASYNC_DESTROY);
 
+       /*
+        * Calculate the max number of in-flight bytes for pool-wide
+        * scanning operations (minimum 1MB). Limits for the issuing
+        * phase are done per top-level vdev and are handled separately.
+        */
+       scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
+           dsl_scan_count_leaves(spa->spa_root_vdev), 1ULL << 20);
+
        bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys));
        avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
            offsetof(scan_ds_t, sds_node));
@@ -2290,7 +2299,7 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
        zbookmark_phys_t zb = { 0 };
        int p;
 
-       if (scn->scn_phys.scn_state != DSS_SCANNING)
+       if (!dsl_scan_is_running(scn))
                return;
 
        for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
@@ -3207,7 +3216,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
                uint64_t nr_leaves = dsl_scan_count_leaves(spa->spa_root_vdev);
 
                /*
-                * Calculate the max number of in-flight bytes for pool-wide
+                * Recalculate the max number of in-flight bytes for pool-wide
                 * scanning operations (minimum 1MB). Limits for the issuing
                 * phase are done per top-level vdev and are handled separately.
                 */
@@ -3564,6 +3573,8 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
        size_t size = BP_GET_PSIZE(bp);
        abd_t *data = abd_alloc_for_io(size, B_FALSE);
 
+       ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
+
        if (queue == NULL) {
                mutex_enter(&spa->spa_scrub_lock);
                while (spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes)