]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Use taskq for dump_bytes()
authorBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 3 May 2013 21:17:21 +0000 (14:17 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Mon, 6 May 2013 21:05:42 +0000 (14:05 -0700)
The vn_rdwr() function performs I/O by calling the vfs_write() or
vfs_read() functions.  These functions reside just below the system
call layer and the expectation is they have almost the entire 8k of
stack space to work with.  In fact, certain layered configurations
such as ext+lvm+md+multipath require the majority of this stack to
avoid stack overflows.

To avoid this posibility the vn_rdwr() call in dump_bytes() has been
moved to the ZIO_TYPE_FREE, taskq.  This ensures that all I/O will be
performed with the majority of the stack space available.  This ends
up being very similiar to as if the I/O were issued via sys_write()
or sys_read().

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1399
Closes #1423

include/sys/spa_impl.h
include/sys/zfs_context.h
lib/libzpool/taskq.c
module/zfs/dmu_send.c
module/zfs/spa.c

index 1b12b4e3a8b26e9ca383d885bfa56caa6be59e0c..47dfe432eed2f38fa536cec8a34198f7add27012 100644 (file)
@@ -250,6 +250,9 @@ extern char *spa_config_path;
 
 extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
     task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
+extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
+    task_func_t *func, void *arg, uint_t flags);
+
 
 #ifdef __cplusplus
 }
index a23bfdcf82a29b385a0973240e930f802f6db3b4..0b24216b5974fe109114c1d51324552370ac3498 100644 (file)
@@ -409,6 +409,7 @@ extern int  taskq_empty_ent(taskq_ent_t *);
 extern void    taskq_init_ent(taskq_ent_t *);
 extern void    taskq_destroy(taskq_t *);
 extern void    taskq_wait(taskq_t *);
+extern void    taskq_wait_id(taskq_t *, taskqid_t);
 extern int     taskq_member(taskq_t *, kthread_t *);
 extern int     taskq_cancel_id(taskq_t *, taskqid_t);
 extern void    system_taskq_init(void);
index 64e214205e62c9f6a7ba9e52e8b40da2f400845e..96c0d5c2be989446a4c8d0e17d64672dc6a4b918 100644 (file)
@@ -211,6 +211,12 @@ taskq_wait(taskq_t *tq)
        mutex_exit(&tq->tq_lock);
 }
 
+void
+taskq_wait_id(taskq_t *tq, taskqid_t id)
+{
+       taskq_wait(tq);
+}
+
 static void
 taskq_thread(void *arg)
 {
index 921c3d76f470ea0c806052cee55c41c1ab2c3e36..b2c6bfe2b82ff553cbab880678fda56ee891ff17 100644 (file)
@@ -39,6 +39,7 @@
 #include <sys/dsl_prop.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_synctask.h>
+#include <sys/spa_impl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zap.h>
 #include <sys/zio_checksum.h>
@@ -53,21 +54,48 @@ int zfs_send_corrupt_data = B_FALSE;
 
 static char *dmu_recv_tag = "dmu_recv_tag";
 
-static int
-dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
+typedef struct dump_bytes_io {
+       dmu_sendarg_t   *dbi_dsp;
+       void            *dbi_buf;
+       int             dbi_len;
+} dump_bytes_io_t;
+
+static void
+dump_bytes_strategy(void *arg)
 {
+       dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
+       dmu_sendarg_t *dsp = dbi->dbi_dsp;
        dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
        ssize_t resid; /* have to get resid to get detailed errno */
-       ASSERT3U(len % 8, ==, 0);
+       ASSERT3U(dbi->dbi_len % 8, ==, 0);
 
-       fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
+       fletcher_4_incremental_native(dbi->dbi_buf, dbi->dbi_len, &dsp->dsa_zc);
        dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
-           (caddr_t)buf, len,
+           (caddr_t)dbi->dbi_buf, dbi->dbi_len,
            0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
 
        mutex_enter(&ds->ds_sendstream_lock);
-       *dsp->dsa_off += len;
+       *dsp->dsa_off += dbi->dbi_len;
        mutex_exit(&ds->ds_sendstream_lock);
+}
+
+static int
+dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
+{
+       dump_bytes_io_t dbi;
+
+       dbi.dbi_dsp = dsp;
+       dbi.dbi_buf = buf;
+       dbi.dbi_len = len;
+
+       /*
+        * The vn_rdwr() call is performed in a taskq to ensure that there is
+        * always enough stack space to write safely to the target filesystem.
+        * The ZIO_TYPE_FREE threads are used because there can be a lot of
+        * them and they are used in vdev_file.c for a similar purpose.
+        */
+       spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE,
+           ZIO_TASKQ_ISSUE, dump_bytes_strategy, &dbi, TQ_SLEEP);
 
        return (dsp->dsa_err);
 }
index e022c325864b3d7767cd1de9e5d045536472351d..82ee445ab498764abde3ebb093b0426c2cfbb7d6 100644 (file)
@@ -926,6 +926,31 @@ spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
        taskq_dispatch_ent(tq, func, arg, flags, ent);
 }
 
+/*
+ * Same as spa_taskq_dispatch_ent() but block on the task until completion.
+ */
+void
+spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+    task_func_t *func, void *arg, uint_t flags)
+{
+       spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+       taskq_t *tq;
+       taskqid_t id;
+
+       ASSERT3P(tqs->stqs_taskq, !=, NULL);
+       ASSERT3U(tqs->stqs_count, !=, 0);
+
+       if (tqs->stqs_count == 1) {
+               tq = tqs->stqs_taskq[0];
+       } else {
+               tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count];
+       }
+
+       id = taskq_dispatch(tq, func, arg, flags);
+       if (id)
+               taskq_wait_id(tq, id);
+}
+
 static void
 spa_create_zio_taskqs(spa_t *spa)
 {