]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Fix hung z_zvol tasks during 'zfs receive'
authorLOLi <loli10K@users.noreply.github.com>
Fri, 30 Mar 2018 19:10:01 +0000 (21:10 +0200)
committerTony Hutter <hutter2@llnl.gov>
Tue, 8 May 2018 00:19:57 +0000 (17:19 -0700)
During a receive operation zvol_create_minors_impl() can wait
needlessly for the prefetch thread because both share the same tasks
queue.  This results in hung tasks:

<3>INFO: task z_zvol:5541 blocked for more than 120 seconds.
<3>      Tainted: P           O  3.16.0-4-amd64
<3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.

The first z_zvol:5541 (zvol_task_cb) is waiting for the long running
traverse_prefetch_thread:260

root@linux:~# cat /proc/spl/taskq
taskq                       act  nthr  spwn  maxt   pri  mina
spl_system_taskq/0            1     2     0    64   100     1
active: [260]traverse_prefetch_thread [zfs](0xffff88003347ae40)
wait: 5541
spl_delay_taskq/0             0     1     0     4   100     1
delay: spa_deadman [zfs](0xffff880039924000)
z_zvol/1                      1     1     0     1   120     1
active: [5541]zvol_task_cb [zfs](0xffff88001fde6400)
pend: zvol_task_cb [zfs](0xffff88001fde6800)

This change adds a dedicated, per-pool, prefetch taskq to prevent the
traverse code from monopolizing the global (and limited) system_taskq by
inappropriately scheduling long running tasks on it.

Reviewed-by: Albert Lee <trisk@forkgnu.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #6330
Closes #6890
Closes #7343

include/sys/spa_impl.h
module/zfs/dmu_traverse.c
module/zfs/spa.c

index 73ad1c60c5c6c4bc8c1304a927f67a897f02f159..b1e78c1d592b187048c62f5200975cb0ede906b9 100644 (file)
@@ -275,6 +275,7 @@ struct spa {
        spa_stats_t     spa_stats;              /* assorted spa statistics */
        hrtime_t        spa_ccw_fail_time;      /* Conf cache write fail time */
        taskq_t         *spa_zvol_taskq;        /* Taskq for minor management */
+       taskq_t         *spa_prefetch_taskq;    /* Taskq for prefetch threads */
        uint64_t        spa_multihost;          /* multihost aware (mmp) */
        mmp_thread_t    spa_mmp;                /* multihost mmp thread */
 
index b494bef3583138527833dc0807439a8572c7cd89..f63903ef649dc82960c263b164484d0b170c9d28 100644 (file)
@@ -31,6 +31,7 @@
 #include <sys/dsl_pool.h>
 #include <sys/dnode.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/dmu_impl.h>
 #include <sys/sa.h>
@@ -623,7 +624,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
        }
 
        if (!(flags & TRAVERSE_PREFETCH_DATA) ||
-           taskq_dispatch(system_taskq, traverse_prefetch_thread,
+           taskq_dispatch(spa->spa_prefetch_taskq, traverse_prefetch_thread,
            td, TQ_NOQUEUE) == TASKQID_INVALID)
                pd->pd_exited = B_TRUE;
 
index 561f4d04bfc9c0a17354880e419157afb06126ed..1add7ad246fd42833e73c6c8dd59f19a57f87edc 100644 (file)
@@ -1182,6 +1182,14 @@ spa_activate(spa_t *spa, int mode)
        spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
            1, INT_MAX, 0);
 
+       /*
+        * Taskq dedicated to prefetcher threads: this is used to prevent the
+        * pool traverse code from monopolizing the global (and limited)
+        * system_taskq by inappropriately scheduling long running tasks on it.
+        */
+       spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus,
+           defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
+
        /*
         * The taskq to upgrade datasets in this pool. Currently used by
         * feature SPA_FEATURE_USEROBJ_ACCOUNTING.
@@ -1211,6 +1219,11 @@ spa_deactivate(spa_t *spa)
                spa->spa_zvol_taskq = NULL;
        }
 
+       if (spa->spa_prefetch_taskq) {
+               taskq_destroy(spa->spa_prefetch_taskq);
+               spa->spa_prefetch_taskq = NULL;
+       }
+
        if (spa->spa_upgrade_taskq) {
                taskq_destroy(spa->spa_upgrade_taskq);
                spa->spa_upgrade_taskq = NULL;