]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Fix hung z_zvol tasks during 'zfs receive'
authorLOLi <loli10K@users.noreply.github.com>
Fri, 30 Mar 2018 19:10:01 +0000 (21:10 +0200)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 30 Mar 2018 19:10:01 +0000 (12:10 -0700)
During a receive operation zvol_create_minors_impl() can wait
needlessly for the prefetch thread because both share the same tasks
queue.  This results in hung tasks:

<3>INFO: task z_zvol:5541 blocked for more than 120 seconds.
<3>      Tainted: P           O  3.16.0-4-amd64
<3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.

The first z_zvol:5541 (zvol_task_cb) is waiting for the long running
traverse_prefetch_thread:260

root@linux:~# cat /proc/spl/taskq
taskq                       act  nthr  spwn  maxt   pri  mina
spl_system_taskq/0            1     2     0    64   100     1
active: [260]traverse_prefetch_thread [zfs](0xffff88003347ae40)
wait: 5541
spl_delay_taskq/0             0     1     0     4   100     1
delay: spa_deadman [zfs](0xffff880039924000)
z_zvol/1                      1     1     0     1   120     1
active: [5541]zvol_task_cb [zfs](0xffff88001fde6400)
pend: zvol_task_cb [zfs](0xffff88001fde6800)

This change adds a dedicated, per-pool, prefetch taskq to prevent the
traverse code from monopolizing the global (and limited) system_taskq by
inappropriately scheduling long running tasks on it.

Reviewed-by: Albert Lee <trisk@forkgnu.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #6330
Closes #6890
Closes #7343

include/sys/spa_impl.h
module/zfs/dmu_traverse.c
module/zfs/spa.c

index af1d6aef0fe9abac99e3e702e38113d6ed70b903..77625d4b00727a14c3bc753c9fc3eb63c59529be 100644 (file)
@@ -280,6 +280,7 @@ struct spa {
        spa_keystore_t  spa_keystore;           /* loaded crypto keys */
        hrtime_t        spa_ccw_fail_time;      /* Conf cache write fail time */
        taskq_t         *spa_zvol_taskq;        /* Taskq for minor management */
+       taskq_t         *spa_prefetch_taskq;    /* Taskq for prefetch threads */
        uint64_t        spa_multihost;          /* multihost aware (mmp) */
        mmp_thread_t    spa_mmp;                /* multihost mmp thread */
 
index 5407e4817292473edb77b6811788595591a2f783..cffcd2d00ec876fa16e7c210956d2ea5983eeba0 100644 (file)
@@ -31,6 +31,7 @@
 #include <sys/dsl_pool.h>
 #include <sys/dnode.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/dmu_impl.h>
 #include <sys/sa.h>
@@ -661,7 +662,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
        }
 
        if (!(flags & TRAVERSE_PREFETCH_DATA) ||
-           taskq_dispatch(system_taskq, traverse_prefetch_thread,
+           taskq_dispatch(spa->spa_prefetch_taskq, traverse_prefetch_thread,
            td, TQ_NOQUEUE) == TASKQID_INVALID)
                pd->pd_exited = B_TRUE;
 
index 1e9e7b0131b935ed2c3cdde7011468ebe15cf691..4b6196cc3610105e80d4671a730cbdbb6853934a 100644 (file)
@@ -1186,6 +1186,14 @@ spa_activate(spa_t *spa, int mode)
        spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
            1, INT_MAX, 0);
 
+       /*
+        * Taskq dedicated to prefetcher threads: this is used to prevent the
+        * pool traverse code from monopolizing the global (and limited)
+        * system_taskq by inappropriately scheduling long running tasks on it.
+        */
+       spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus,
+           defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
+
        /*
         * The taskq to upgrade datasets in this pool. Currently used by
         * feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA.
@@ -1213,6 +1221,11 @@ spa_deactivate(spa_t *spa)
                spa->spa_zvol_taskq = NULL;
        }
 
+       if (spa->spa_prefetch_taskq) {
+               taskq_destroy(spa->spa_prefetch_taskq);
+               spa->spa_prefetch_taskq = NULL;
+       }
+
        if (spa->spa_upgrade_taskq) {
                taskq_destroy(spa->spa_upgrade_taskq);
                spa->spa_upgrade_taskq = NULL;