]> git.proxmox.com Git - mirror_qemu.git/commitdiff
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
authorRichard Henderson <richard.henderson@linaro.org>
Mon, 22 May 2023 14:04:50 +0000 (07:04 -0700)
committerRichard Henderson <richard.henderson@linaro.org>
Mon, 22 May 2023 14:04:50 +0000 (07:04 -0700)
Block layer patches

- qcow2 spec: Rename "zlib" compression to "deflate"
- Honour graph read lock even in the main thread + prerequisite fixes
- aio-posix: do not nest poll handlers (fixes infinite recursion)
- Refactor QMP blockdev transactions
- graph-lock: Disable locking for now
- iotests/245: Check if 'compress' driver is available

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmRnrxURHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9aHyw/9H0xpceVb0kcC5CStOWCcq4PJHzkl/8/m
# c6ABFe0fgEuN2FCiKiCKOt6+V7qaIAw0+YLgPr/LGIsbIBzdxF3Xgd2UyIH6o4dK
# bSaIAaes6ZLTcYGIYEVJtHuwNgvzhjyBlW5qqwTpN0YArKS411eHyQ3wlUkCEVwK
# ZNmDY/MC8jq8r1xfwpPi7CaH6k1I6HhDmyl1PdURW9hmoAKZQZMhEdA5reJrUwZ9
# EhfgbLIaK0kkLLsufJ9YIkd+b/P3mUbH30kekNMOiA0XlnhWm1Djol5pxlnNiflg
# CGh6CAyhJKdXzwV567cSF11NYCsFmiY+c/l0xRIGscujwvO4iD7wFT5xk2geUAKV
# yaox8JA7Le36g7lO2CRadlS24/Ekqnle6q09g2i8s2tZwB4fS286vaZz6QDPmf7W
# VSQp9vuDj6ZcVjMsuo2+LzF3yA2Vqvgd9s032iBAjRDSGLAoOdQZjBJrreypJ0Oi
# pVFwgK+9QNCZBsqVhwVOgElSoK/3Vbl1kqpi30Ikgc0epAn0suM1g2QQPJ2Zt/MJ
# xqMlTv+48OW3vq3ebr8GXqkhvG/u0ku6I1G6ZyCrjOce89osK8QUaovERyi1eOmo
# ouoZ8UJJa6VfEkkmdhq2vF6u/MP4PeZ8MW3pYQy6qEnSOPDKpLnR30Z/s/HZCZcm
# H4QIbfQnzic=
# =edNP
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 19 May 2023 10:17:09 AM PDT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin: (21 commits)
  iotests: Test commit with iothreads and ongoing I/O
  nbd/server: Fix drained_poll to wake coroutine in right AioContext
  graph-lock: Disable locking for now
  tested: add test for nested aio_poll() in poll handlers
  aio-posix: do not nest poll handlers
  iotests/245: Check if 'compress' driver is available
  graph-lock: Honour read locks even in the main thread
  blockjob: Adhere to rate limit even when reentered early
  test-bdrv-drain: Call bdrv_co_unref() in coroutine context
  test-bdrv-drain: Take graph lock more selectively
  qemu-img: Take graph lock more selectively
  qcow2: Unlock the graph in qcow2_do_open() where necessary
  block/export: Fix null pointer dereference in error path
  block: Call .bdrv_co_create(_opts) unlocked
  docs/interop/qcow2.txt: fix description about "zlib" clusters
  blockdev: qmp_transaction: drop extra generic layer
  blockdev: use state.bitmap in block-dirty-bitmap-add action
  blockdev: transaction: refactor handling transaction properties
  blockdev: qmp_transaction: refactor loop to classic for
  blockdev: transactions: rename some things
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
36 files changed:
block.c
block/commit.c
block/create.c
block/crypto.c
block/export/export.c
block/graph-lock.c
block/mirror.c
block/parallels.c
block/qcow.c
block/qcow2.c
block/qed.c
block/raw-format.c
block/stream.c
block/vdi.c
block/vhdx.c
block/vmdk.c
block/vpc.c
blockdev.c
blockjob.c
docs/interop/qcow2.txt
include/block/block-global-state.h
include/block/block_int-common.h
include/block/blockjob_int.h
include/io/channel.h
io/channel.c
nbd/server.c
qemu-img.c
tests/qemu-iotests/245
tests/qemu-iotests/245.out
tests/qemu-iotests/iotests.py
tests/qemu-iotests/tests/graph-changes-while-io
tests/qemu-iotests/tests/graph-changes-while-io.out
tests/unit/meson.build
tests/unit/test-bdrv-drain.c
tests/unit/test-nested-aio-poll.c [new file with mode: 0644]
util/aio-posix.c

diff --git a/block.c b/block.c
index f04a6ad4e8218c8bed918546cad274f299157747..a2f8d5a0c0de0c4a1475002adf84952e29587ff1 100644 (file)
--- a/block.c
+++ b/block.c
@@ -533,7 +533,6 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
     int ret;
     GLOBAL_STATE_CODE();
     ERRP_GUARD();
-    assert_bdrv_graph_readable();
 
     if (!drv->bdrv_co_create_opts) {
         error_setg(errp, "Driver '%s' does not support image creation",
index 2b20fd0fd4d2c56a9adc3ada7b91a0a944894c54..aa45beb0f0f8e7601c7a9c7da3ca9a2130dcd8e1 100644 (file)
@@ -116,7 +116,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     int64_t offset;
-    uint64_t delay_ns = 0;
     int ret = 0;
     int64_t n = 0; /* bytes */
     QEMU_AUTO_VFREE void *buf = NULL;
@@ -149,7 +148,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
          */
-        job_sleep_ns(&s->common.job, delay_ns);
+        block_job_ratelimit_sleep(&s->common);
         if (job_is_cancelled(&s->common.job)) {
             break;
         }
@@ -184,9 +183,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         job_progress_update(&s->common.job, n);
 
         if (copy) {
-            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
-        } else {
-            delay_ns = 0;
+            block_job_ratelimit_processed_bytes(&s->common, n);
         }
     }
 
index bf67b9947cb1697e3d761b858664cd16a8973eca..6b23a2167535e7fe7321b22c1a28686a6189492b 100644 (file)
@@ -43,7 +43,6 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
     int ret;
 
     GLOBAL_STATE_CODE();
-    GRAPH_RDLOCK_GUARD();
 
     job_progress_set_remaining(&s->common, 1);
     ret = s->drv->bdrv_co_create(s->opts, errp);
index 30093cff9b34e941f89d1257a4b930333d472ee9..6ee8d46d3025b468e39d6d64d42e4b15f7d263b1 100644 (file)
@@ -99,12 +99,10 @@ struct BlockCryptoCreateData {
 };
 
 
-static int block_crypto_create_write_func(QCryptoBlock *block,
-                                          size_t offset,
-                                          const uint8_t *buf,
-                                          size_t buflen,
-                                          void *opaque,
-                                          Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_write_func(QCryptoBlock *block, size_t offset,
+                               const uint8_t *buf, size_t buflen, void *opaque,
+                               Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;
     ssize_t ret;
@@ -117,10 +115,9 @@ static int block_crypto_create_write_func(QCryptoBlock *block,
     return 0;
 }
 
-static int block_crypto_create_init_func(QCryptoBlock *block,
-                                         size_t headerlen,
-                                         void *opaque,
-                                         Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_init_func(QCryptoBlock *block, size_t headerlen,
+                              void *opaque, Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;
     Error *local_error = NULL;
@@ -314,7 +311,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
 }
 
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
                                QCryptoBlockCreateOptions *opts,
                                PreallocMode prealloc, Error **errp)
@@ -627,7 +624,7 @@ static int block_crypto_open_luks(BlockDriverState *bs,
                                      bs, options, flags, errp);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
 {
     BlockdevCreateOptionsLUKS *luks_opts;
@@ -665,7 +662,7 @@ fail:
     return ret;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
                                  QemuOpts *opts, Error **errp)
 {
@@ -727,7 +724,9 @@ fail:
      * beforehand, it has been truncated and corrupted in the process.
      */
     if (ret) {
+        bdrv_graph_co_rdlock();
         bdrv_co_delete_file_noerr(bs);
+        bdrv_graph_co_rdunlock();
     }
 
     bdrv_co_unref(bs);
index 62c7c22d45da1e2013f28e65e31a6ad5e048eb8f..a5c8f42f5332457b51a5342ce09cf1e8e491ed7f 100644 (file)
@@ -192,8 +192,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
     return exp;
 
 fail:
-    blk_set_dev_ops(exp->blk, NULL, NULL);
-    blk_unref(blk);
+    if (blk) {
+        blk_set_dev_ops(blk, NULL, NULL);
+        blk_unref(blk);
+    }
     aio_context_release(ctx);
     if (exp) {
         g_free(exp->id);
index 377884c3a9f6c951fedcb76dfb61353fe62f9997..a92c6ae219ea41a827916a2bf44c964167b2d8d0 100644 (file)
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
 /* Protects the list of aiocontext and orphaned_reader_count */
 static QemuMutex aio_context_list_lock;
 
+#if 0
 /* Written and read with atomic operations. */
 static int has_writer;
+#endif
 
 /*
  * A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
     g_free(ctx->bdrv_graph);
 }
 
+#if 0
 static uint32_t reader_count(void)
 {
     BdrvGraphRWlock *brdv_graph;
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
     assert((int32_t)rd >= 0);
     return rd;
 }
+#endif
 
 void bdrv_graph_wrlock(void)
 {
     GLOBAL_STATE_CODE();
+    /*
+     * TODO Some callers hold an AioContext lock when this is called, which
+     * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+     * AioContext locks are gone).
+     */
+#if 0
     assert(!qatomic_read(&has_writer));
 
     /* Make sure that constantly arriving new I/O doesn't cause starvation */
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
     } while (reader_count() >= 1);
 
     bdrv_drain_all_end();
+#endif
 }
 
 void bdrv_graph_wrunlock(void)
 {
     GLOBAL_STATE_CODE();
+#if 0
     QEMU_LOCK_GUARD(&aio_context_list_lock);
     assert(qatomic_read(&has_writer));
 
@@ -155,18 +167,16 @@ void bdrv_graph_wrunlock(void)
 
     /* Wake up all coroutine that are waiting to read the graph */
     qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
 }
 
 void coroutine_fn bdrv_graph_co_rdlock(void)
 {
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
     BdrvGraphRWlock *bdrv_graph;
     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
 
-    /* Do not lock if in main thread */
-    if (qemu_in_main_thread()) {
-        return;
-    }
-
     for (;;) {
         qatomic_set(&bdrv_graph->reader_count,
                     bdrv_graph->reader_count + 1);
@@ -223,18 +233,15 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
             qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
         }
     }
+#endif
 }
 
 void coroutine_fn bdrv_graph_co_rdunlock(void)
 {
+#if 0
     BdrvGraphRWlock *bdrv_graph;
     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
 
-    /* Do not lock if in main thread */
-    if (qemu_in_main_thread()) {
-        return;
-    }
-
     qatomic_store_release(&bdrv_graph->reader_count,
                           bdrv_graph->reader_count - 1);
     /* make sure writer sees reader_count before we check has_writer */
@@ -249,6 +256,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
     if (qatomic_read(&has_writer)) {
         aio_wait_kick();
     }
+#endif
 }
 
 void bdrv_graph_rdlock_main_loop(void)
@@ -266,13 +274,19 @@ void bdrv_graph_rdunlock_main_loop(void)
 void assert_bdrv_graph_readable(void)
 {
     /* reader_count() is slow due to aio_context_list_lock lock contention */
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
 #ifdef CONFIG_DEBUG_GRAPH_LOCK
     assert(qemu_in_main_thread() || reader_count());
 #endif
+#endif
 }
 
 void assert_bdrv_graph_writable(void)
 {
     assert(qemu_in_main_thread());
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
     assert(qatomic_read(&has_writer));
+#endif
 }
index 717442ca4d09102527445420583d901529be4bed..b7d92d13786779235e5c55e4015258a0f4d32aa7 100644 (file)
@@ -471,12 +471,11 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
     return bytes_handled;
 }
 
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
     BlockDriverState *source = s->mirror_top_bs->backing->bs;
     MirrorOp *pseudo_op;
     int64_t offset;
-    uint64_t delay_ns = 0, ret = 0;
     /* At least the first dirty chunk is mirrored in one iteration. */
     int nb_chunks = 1;
     bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
@@ -608,16 +607,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         assert(io_bytes);
         offset += io_bytes;
         nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
-        delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
+        block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct);
     }
 
-    ret = delay_ns;
 fail:
     QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
     qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
     g_free(pseudo_op);
-
-    return ret;
 }
 
 static void mirror_free_init(MirrorBlockJob *s)
@@ -1011,7 +1007,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
     assert(!s->dbi);
     s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
     for (;;) {
-        uint64_t delay_ns = 0;
         int64_t cnt, delta;
         bool should_complete;
 
@@ -1051,7 +1046,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
                 mirror_wait_for_free_in_flight_slot(s);
                 continue;
             } else if (cnt != 0) {
-                delay_ns = mirror_iteration(s);
+                mirror_iteration(s);
             }
         }
 
@@ -1114,12 +1109,14 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         }
 
         if (job_is_ready(&s->common.job) && !should_complete) {
-            delay_ns = (s->in_flight == 0 &&
-                        cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
+            if (s->in_flight == 0 && cnt == 0) {
+                trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
+                                          BLOCK_JOB_SLICE_TIME);
+                job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME);
+            }
+        } else {
+            block_job_ratelimit_sleep(&s->common);
         }
-        trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
-                                  delay_ns);
-        job_sleep_ns(&s->common.job, delay_ns);
         s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     }
 
index b49c35929ebe694ce3d1b613247cf1ad0ffcfaf5..d8a3f13e247b0a283abbc2fed40d49ddfa0379a0 100644 (file)
@@ -522,8 +522,8 @@ out:
 }
 
 
-static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+parallels_co_create(BlockdevCreateOptions* opts, Error **errp)
 {
     BlockdevCreateOptionsParallels *parallels_opts;
     BlockDriverState *bs;
@@ -622,7 +622,7 @@ exit:
     goto out;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 parallels_co_create_opts(BlockDriver *drv, const char *filename,
                          QemuOpts *opts, Error **errp)
 {
index a0c701f578adb1daa60ed0759eb1649818f4b8c9..3644bbf5cb0dcbd92a451ca9dc33ece0b7580b06 100644 (file)
@@ -800,8 +800,8 @@ static void qcow_close(BlockDriverState *bs)
     error_free(s->migration_blocker);
 }
 
-static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
-                                       Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+qcow_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsQcow *qcow_opts;
     int header_size, backing_filename_len, l1_size, shift, i;
@@ -921,7 +921,7 @@ exit:
     return ret;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 qcow_co_create_opts(BlockDriver *drv, const char *filename,
                     QemuOpts *opts, Error **errp)
 {
index 5bde3b840198b70d373c1f8be7d68287f83021d0..b00b4e757523b57334796f9c5f767a9f0b828164 100644 (file)
@@ -118,8 +118,9 @@ static int qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
 }
 
 
-static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
-                                      void *opaque, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque,
+                           Error **errp)
 {
     BlockDriverState *bs = opaque;
     BDRVQcow2State *s = bs->opaque;
@@ -144,9 +145,7 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
      */
     clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
     assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
-    ret = bdrv_pwrite_zeroes(bs->file,
-                             ret,
-                             clusterlen, 0);
+    ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not zero fill encryption header");
         return -1;
@@ -156,9 +155,11 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
 }
 
 
-static int qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
-                                       const uint8_t *buf, size_t buflen,
-                                       void *opaque, Error **errp)
+/* The graph lock must be held when called in coroutine context */
+static int coroutine_mixed_fn
+qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
+                            const uint8_t *buf, size_t buflen,
+                            void *opaque, Error **errp)
 {
     BlockDriverState *bs = opaque;
     BDRVQcow2State *s = bs->opaque;
@@ -1618,9 +1619,11 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
 
     if (open_data_file) {
         /* Open external data file */
+        bdrv_graph_co_rdunlock();
         s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
                                           &child_of_bds, BDRV_CHILD_DATA,
                                           true, errp);
+        bdrv_graph_co_rdlock();
         if (*errp) {
             ret = -EINVAL;
             goto fail;
@@ -1628,10 +1631,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
 
         if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
             if (!s->data_file && s->image_data_file) {
+                bdrv_graph_co_rdunlock();
                 s->data_file = bdrv_co_open_child(s->image_data_file, options,
                                                   "data-file", bs,
                                                   &child_of_bds,
                                                   BDRV_CHILD_DATA, false, errp);
+                bdrv_graph_co_rdlock();
                 if (!s->data_file) {
                     ret = -EINVAL;
                     goto fail;
@@ -1856,7 +1861,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
  fail:
     g_free(s->image_data_file);
     if (open_data_file && has_data_file(bs)) {
+        bdrv_graph_co_rdunlock();
         bdrv_unref_child(bs, s->data_file);
+        bdrv_graph_co_rdlock();
         s->data_file = NULL;
     }
     g_free(s->unknown_header_fields);
@@ -3137,9 +3144,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
     return qcow2_update_header(bs);
 }
 
-static int qcow2_set_up_encryption(BlockDriverState *bs,
-                                   QCryptoBlockCreateOptions *cryptoopts,
-                                   Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_set_up_encryption(BlockDriverState *bs,
+                        QCryptoBlockCreateOptions *cryptoopts,
+                        Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     QCryptoBlock *crypto = NULL;
@@ -3426,7 +3434,7 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
     return refcount_bits;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
 qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
     BlockdevCreateOptionsQcow2 *qcow2_opts;
@@ -3724,8 +3732,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
         goto out;
     }
 
+    bdrv_graph_co_rdlock();
     ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
     if (ret < 0) {
+        bdrv_graph_co_rdunlock();
         error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
                          "header and refcount table");
         goto out;
@@ -3743,6 +3753,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
 
     /* Create a full header (including things like feature table) */
     ret = qcow2_update_header(blk_bs(blk));
+    bdrv_graph_co_rdunlock();
+
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not update qcow2 header");
         goto out;
@@ -3776,7 +3788,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
 
     /* Want encryption? There you go. */
     if (qcow2_opts->encrypt) {
+        bdrv_graph_co_rdlock();
         ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
+        bdrv_graph_co_rdunlock();
+
         if (ret < 0) {
             goto out;
         }
@@ -3813,7 +3828,7 @@ out:
     return ret;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
                      Error **errp)
 {
@@ -3933,8 +3948,10 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
     ret = qcow2_co_create(create_options, errp);
 finish:
     if (ret < 0) {
+        bdrv_graph_co_rdlock();
         bdrv_co_delete_file_noerr(bs);
         bdrv_co_delete_file_noerr(data_bs);
+        bdrv_graph_co_rdunlock();
     } else {
         ret = 0;
     }
index be9ff0fb34cdb42640005bdcc3866094ba28de99..9a0350b534439d9699f8ea3217d8235c987a5664 100644 (file)
@@ -630,8 +630,8 @@ static void bdrv_qed_close(BlockDriverState *bs)
     qemu_vfree(s->l1_table);
 }
 
-static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+bdrv_qed_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsQed *qed_opts;
     BlockBackend *blk = NULL;
@@ -751,7 +751,7 @@ out:
     return ret;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
                         QemuOpts *opts, Error **errp)
 {
index 3a3946213fc06898d5ab37c9169604dcb41515bf..918fe4fb7e603da4266019366e9de36a46173e2a 100644 (file)
@@ -457,7 +457,7 @@ static int raw_has_zero_init(BlockDriverState *bs)
     return bdrv_has_zero_init(bs->file->bs);
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 raw_co_create_opts(BlockDriver *drv, const char *filename,
                    QemuOpts *opts, Error **errp)
 {
index 7f9e1ecdbb41b8d4ab6231bea498a48b52a7a8c6..e522bbdec547e2f1c7ae27e81e19b04dcdd7ea51 100644 (file)
@@ -131,7 +131,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
     BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
     int64_t len;
     int64_t offset = 0;
-    uint64_t delay_ns = 0;
     int error = 0;
     int64_t n = 0; /* bytes */
 
@@ -155,7 +154,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
          */
-        job_sleep_ns(&s->common.job, delay_ns);
+        block_job_ratelimit_sleep(&s->common);
         if (job_is_cancelled(&s->common.job)) {
             break;
         }
@@ -204,9 +203,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
         /* Publish progress */
         job_progress_update(&s->common.job, n);
         if (copy) {
-            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
-        } else {
-            delay_ns = 0;
+            block_job_ratelimit_processed_bytes(&s->common, n);
         }
     }
 
index 08331d2dd7b5879ab1aa8e7873fc9d0d294e1531..6c35309e04aab3c1dc53748b0a21b4d8d0eaa217 100644 (file)
@@ -734,8 +734,9 @@ nonallocating_write:
     return ret;
 }
 
-static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
-                                         size_t block_size, Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_do_create(BlockdevCreateOptions *create_options, size_t block_size,
+                 Error **errp)
 {
     BlockdevCreateOptionsVdi *vdi_opts;
     int ret = 0;
@@ -892,13 +893,13 @@ exit:
     return ret;
 }
 
-static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options,
-                                      Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
     return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp);
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vdi_co_create_opts(BlockDriver *drv, const char *filename,
                    QemuOpts *opts, Error **errp)
 {
index b20b1edf11d6719b72bd5f09edc15585d1fb3503..89913cba870c522396cb6473084c7ef1b7274461 100644 (file)
@@ -1506,7 +1506,7 @@ exit:
  * There are 2 headers, and the highest sequence number will represent
  * the active header
  */
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
                         uint32_t log_size)
 {
@@ -1515,6 +1515,8 @@ vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
     int ret = 0;
     VHDXHeader *hdr = NULL;
 
+    GRAPH_RDLOCK_GUARD();
+
     hdr = g_new0(VHDXHeader, 1);
 
     hdr->signature       = VHDX_HEADER_SIGNATURE;
@@ -1898,7 +1900,7 @@ exit:
  *    .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
  *   1MB
  */
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vhdx_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsVhdx *vhdx_opts;
@@ -2060,7 +2062,7 @@ delete_and_exit:
     return ret;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vhdx_co_create_opts(BlockDriver *drv, const char *filename,
                     QemuOpts *opts, Error **errp)
 {
index fddbd1c86cb4c6167a08e0dd3287b3240bdd396e..e3e86608ec984c108805bcbb3ec9254e4b5bf8fd 100644 (file)
@@ -2165,10 +2165,9 @@ vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return ret;
 }
 
-static int vmdk_init_extent(BlockBackend *blk,
-                            int64_t filesize, bool flat,
-                            bool compress, bool zeroed_grain,
-                            Error **errp)
+static int GRAPH_UNLOCKED
+vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
+                 bool zeroed_grain, Error **errp)
 {
     int ret, i;
     VMDK4Header header;
@@ -2277,7 +2276,7 @@ exit:
     return ret;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
                    bool compress, bool zeroed_grain, BlockBackend **pbb,
                    QemuOpts *opts, Error **errp)
@@ -2358,7 +2357,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
  *           non-split format.
  * idx >= 1: get the n-th extent if in a split subformat
  */
-typedef BlockBackend * coroutine_fn /* GRAPH_RDLOCK */
+typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR
     (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
                              bool compress, bool zeroed_grain, void *opaque,
                              Error **errp);
@@ -2374,7 +2373,7 @@ static void vmdk_desc_add_extent(GString *desc,
     g_free(basename);
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vmdk_co_do_create(int64_t size,
                   BlockdevVmdkSubformat subformat,
                   BlockdevVmdkAdapterType adapter_type,
@@ -2605,7 +2604,7 @@ typedef struct {
     QemuOpts *opts;
 } VMDKCreateOptsData;
 
-static BlockBackend * coroutine_fn GRAPH_RDLOCK
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
 vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
                        bool compress, bool zeroed_grain, void *opaque,
                        Error **errp)
@@ -2647,7 +2646,7 @@ exit:
     return blk;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vmdk_co_create_opts(BlockDriver *drv, const char *filename,
                     QemuOpts *opts, Error **errp)
 {
@@ -2756,11 +2755,9 @@ exit:
     return ret;
 }
 
-static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
-                                                     bool flat, bool split,
-                                                     bool compress,
-                                                     bool zeroed_grain,
-                                                     void *opaque, Error **errp)
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
+vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress,
+                  bool zeroed_grain, void *opaque, Error **errp)
 {
     int ret;
     BlockDriverState *bs;
@@ -2809,7 +2806,7 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
     return blk;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
     BlockdevCreateOptionsVmdk *opts;
index 07ddda5b993166e4dd3c024a3383a20a7356c904..7ee7c7b4e03d861c702ce78cfad2bc8527a07bdd 100644 (file)
@@ -967,8 +967,8 @@ static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
     return 0;
 }
 
-static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
-                                      Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vpc_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsVpc *vpc_opts;
     BlockBackend *blk = NULL;
@@ -1087,7 +1087,7 @@ out:
     return ret;
 }
 
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
 vpc_co_create_opts(BlockDriver *drv, const char *filename,
                    QemuOpts *opts, Error **errp)
 {
index d141ca7a2d56df84332430babcebabb66c4df76f..5d56b79df4ad97e18b9c967b55ae551033dc1b01 100644 (file)
@@ -1188,79 +1188,22 @@ out_aio_context:
     return NULL;
 }
 
-/* New and old BlockDriverState structs for atomic group operations */
-
-typedef struct BlkActionState BlkActionState;
-
-/**
- * BlkActionOps:
- * Table of operations that define an Action.
- *
- * @instance_size: Size of state struct, in bytes.
- * @prepare: Prepare the work, must NOT be NULL.
- * @commit: Commit the changes, can be NULL.
- * @abort: Abort the changes on fail, can be NULL.
- * @clean: Clean up resources after all transaction actions have called
- *         commit() or abort(). Can be NULL.
- *
- * Only prepare() may fail. In a single transaction, only one of commit() or
- * abort() will be called. clean() will always be called if it is present.
- *
- * Always run under BQL.
- */
-typedef struct BlkActionOps {
-    size_t instance_size;
-    void (*prepare)(BlkActionState *common, Error **errp);
-    void (*commit)(BlkActionState *common);
-    void (*abort)(BlkActionState *common);
-    void (*clean)(BlkActionState *common);
-} BlkActionOps;
-
-/**
- * BlkActionState:
- * Describes one Action's state within a Transaction.
- *
- * @action: QAPI-defined enum identifying which Action to perform.
- * @ops: Table of ActionOps this Action can perform.
- * @block_job_txn: Transaction which this action belongs to.
- * @entry: List membership for all Actions in this Transaction.
- *
- * This structure must be arranged as first member in a subclassed type,
- * assuming that the compiler will also arrange it to the same offsets as the
- * base class.
- */
-struct BlkActionState {
-    TransactionAction *action;
-    const BlkActionOps *ops;
-    JobTxn *block_job_txn;
-    TransactionProperties *txn_props;
-    QTAILQ_ENTRY(BlkActionState) entry;
-};
-
 /* internal snapshot private data */
 typedef struct InternalSnapshotState {
-    BlkActionState common;
     BlockDriverState *bs;
     QEMUSnapshotInfo sn;
     bool created;
 } InternalSnapshotState;
 
+static void internal_snapshot_abort(void *opaque);
+static void internal_snapshot_clean(void *opaque);
+TransactionActionDrv internal_snapshot_drv = {
+    .abort = internal_snapshot_abort,
+    .clean = internal_snapshot_clean,
+};
 
-static int action_check_completion_mode(BlkActionState *s, Error **errp)
-{
-    if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
-        error_setg(errp,
-                   "Action '%s' does not support Transaction property "
-                   "completion-mode = %s",
-                   TransactionActionKind_str(s->action->type),
-                   ActionCompletionMode_str(s->txn_props->completion_mode));
-        return -1;
-    }
-    return 0;
-}
-
-static void internal_snapshot_prepare(BlkActionState *common,
-                                      Error **errp)
+static void internal_snapshot_action(BlockdevSnapshotInternal *internal,
+                                     Transaction *tran, Error **errp)
 {
     Error *local_err = NULL;
     const char *device;
@@ -1269,25 +1212,15 @@ static void internal_snapshot_prepare(BlkActionState *common,
     QEMUSnapshotInfo old_sn, *sn;
     bool ret;
     int64_t rt;
-    BlockdevSnapshotInternal *internal;
-    InternalSnapshotState *state;
+    InternalSnapshotState *state = g_new0(InternalSnapshotState, 1);
     AioContext *aio_context;
     int ret1;
 
-    g_assert(common->action->type ==
-             TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
-    internal = common->action->u.blockdev_snapshot_internal_sync.data;
-    state = DO_UPCAST(InternalSnapshotState, common, common);
+    tran_add(tran, &internal_snapshot_drv, state);
 
-    /* 1. parse input */
     device = internal->device;
     name = internal->name;
 
-    /* 2. check for validation */
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
-
     bs = qmp_get_root_bs(device, errp);
     if (!bs) {
         return;
@@ -1363,10 +1296,9 @@ out:
     aio_context_release(aio_context);
 }
 
-static void internal_snapshot_abort(BlkActionState *common)
+static void internal_snapshot_abort(void *opaque)
 {
-    InternalSnapshotState *state =
-                             DO_UPCAST(InternalSnapshotState, common, common);
+    InternalSnapshotState *state = opaque;
     BlockDriverState *bs = state->bs;
     QEMUSnapshotInfo *sn = &state->sn;
     AioContext *aio_context;
@@ -1390,10 +1322,9 @@ static void internal_snapshot_abort(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void internal_snapshot_clean(BlkActionState *common)
+static void internal_snapshot_clean(void *opaque)
 {
-    InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
-                                             common, common);
+    g_autofree InternalSnapshotState *state = opaque;
     AioContext *aio_context;
 
     if (!state->bs) {
@@ -1410,14 +1341,22 @@ static void internal_snapshot_clean(BlkActionState *common)
 
 /* external snapshot private data */
 typedef struct ExternalSnapshotState {
-    BlkActionState common;
     BlockDriverState *old_bs;
     BlockDriverState *new_bs;
     bool overlay_appended;
 } ExternalSnapshotState;
 
-static void external_snapshot_prepare(BlkActionState *common,
-                                      Error **errp)
+static void external_snapshot_commit(void *opaque);
+static void external_snapshot_abort(void *opaque);
+static void external_snapshot_clean(void *opaque);
+TransactionActionDrv external_snapshot_drv = {
+    .commit = external_snapshot_commit,
+    .abort = external_snapshot_abort,
+    .clean = external_snapshot_clean,
+};
+
+static void external_snapshot_action(TransactionAction *action,
+                                     Transaction *tran, Error **errp)
 {
     int ret;
     int flags = 0;
@@ -1430,12 +1369,12 @@ static void external_snapshot_prepare(BlkActionState *common,
     const char *snapshot_ref;
     /* File name of the new image (for 'blockdev-snapshot-sync') */
     const char *new_image_file;
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
-    TransactionAction *action = common->action;
+    ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1);
     AioContext *aio_context;
     uint64_t perm, shared;
 
+    tran_add(tran, &external_snapshot_drv, state);
+
     /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
      * purpose but a different set of parameters */
     switch (action->type) {
@@ -1462,9 +1401,6 @@ static void external_snapshot_prepare(BlkActionState *common,
     }
 
     /* start processing */
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
 
     state->old_bs = bdrv_lookup_bs(device, node_name, errp);
     if (!state->old_bs) {
@@ -1588,10 +1524,9 @@ out:
     aio_context_release(aio_context);
 }
 
-static void external_snapshot_commit(BlkActionState *common)
+static void external_snapshot_commit(void *opaque)
 {
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
+    ExternalSnapshotState *state = opaque;
     AioContext *aio_context;
 
     aio_context = bdrv_get_aio_context(state->old_bs);
@@ -1607,10 +1542,9 @@ static void external_snapshot_commit(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void external_snapshot_abort(BlkActionState *common)
+static void external_snapshot_abort(void *opaque)
 {
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
+    ExternalSnapshotState *state = opaque;
     if (state->new_bs) {
         if (state->overlay_appended) {
             AioContext *aio_context;
@@ -1650,10 +1584,9 @@ static void external_snapshot_abort(BlkActionState *common)
     }
 }
 
-static void external_snapshot_clean(BlkActionState *common)
+static void external_snapshot_clean(void *opaque)
 {
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
+    g_autofree ExternalSnapshotState *state = opaque;
     AioContext *aio_context;
 
     if (!state->old_bs) {
@@ -1670,7 +1603,6 @@ static void external_snapshot_clean(BlkActionState *common)
 }
 
 typedef struct DriveBackupState {
-    BlkActionState common;
     BlockDriverState *bs;
     BlockJob *job;
 } DriveBackupState;
@@ -1681,10 +1613,20 @@ static BlockJob *do_backup_common(BackupCommon *backup,
                                   AioContext *aio_context,
                                   JobTxn *txn, Error **errp);
 
-static void drive_backup_prepare(BlkActionState *common, Error **errp)
+static void drive_backup_commit(void *opaque);
+static void drive_backup_abort(void *opaque);
+static void drive_backup_clean(void *opaque);
+TransactionActionDrv drive_backup_drv = {
+    .commit = drive_backup_commit,
+    .abort = drive_backup_abort,
+    .clean = drive_backup_clean,
+};
+
+static void drive_backup_action(DriveBackup *backup,
+                                JobTxn *block_job_txn,
+                                Transaction *tran, Error **errp)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    DriveBackup *backup;
+    DriveBackupState *state = g_new0(DriveBackupState, 1);
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     BlockDriverState *source = NULL;
@@ -1698,8 +1640,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     bool set_backing_hd = false;
     int ret;
 
-    assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
-    backup = common->action->u.drive_backup.data;
+    tran_add(tran, &drive_backup_drv, state);
 
     if (!backup->has_mode) {
         backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1820,7 +1761,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
 
     state->job = do_backup_common(qapi_DriveBackup_base(backup),
                                   bs, target_bs, aio_context,
-                                  common->block_job_txn, errp);
+                                  block_job_txn, errp);
 
 unref:
     bdrv_unref(target_bs);
@@ -1828,9 +1769,9 @@ out:
     aio_context_release(aio_context);
 }
 
-static void drive_backup_commit(BlkActionState *common)
+static void drive_backup_commit(void *opaque)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    DriveBackupState *state = opaque;
     AioContext *aio_context;
 
     aio_context = bdrv_get_aio_context(state->bs);
@@ -1842,18 +1783,18 @@ static void drive_backup_commit(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void drive_backup_abort(BlkActionState *common)
+static void drive_backup_abort(void *opaque)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    DriveBackupState *state = opaque;
 
     if (state->job) {
         job_cancel_sync(&state->job->job, true);
     }
 }
 
-static void drive_backup_clean(BlkActionState *common)
+static void drive_backup_clean(void *opaque)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    g_autofree DriveBackupState *state = opaque;
     AioContext *aio_context;
 
     if (!state->bs) {
@@ -1869,23 +1810,31 @@ static void drive_backup_clean(BlkActionState *common)
 }
 
 typedef struct BlockdevBackupState {
-    BlkActionState common;
     BlockDriverState *bs;
     BlockJob *job;
 } BlockdevBackupState;
 
-static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
+static void blockdev_backup_commit(void *opaque);
+static void blockdev_backup_abort(void *opaque);
+static void blockdev_backup_clean(void *opaque);
+TransactionActionDrv blockdev_backup_drv = {
+    .commit = blockdev_backup_commit,
+    .abort = blockdev_backup_abort,
+    .clean = blockdev_backup_clean,
+};
+
+static void blockdev_backup_action(BlockdevBackup *backup,
+                                   JobTxn *block_job_txn,
+                                   Transaction *tran, Error **errp)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
-    BlockdevBackup *backup;
+    BlockdevBackupState *state = g_new0(BlockdevBackupState, 1);
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     AioContext *aio_context;
     AioContext *old_context;
     int ret;
 
-    assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
-    backup = common->action->u.blockdev_backup.data;
+    tran_add(tran, &blockdev_backup_drv, state);
 
     bs = bdrv_lookup_bs(backup->device, backup->device, errp);
     if (!bs) {
@@ -1917,14 +1866,14 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
 
     state->job = do_backup_common(qapi_BlockdevBackup_base(backup),
                                   bs, target_bs, aio_context,
-                                  common->block_job_txn, errp);
+                                  block_job_txn, errp);
 
     aio_context_release(aio_context);
 }
 
-static void blockdev_backup_commit(BlkActionState *common)
+static void blockdev_backup_commit(void *opaque)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockdevBackupState *state = opaque;
     AioContext *aio_context;
 
     aio_context = bdrv_get_aio_context(state->bs);
@@ -1936,18 +1885,18 @@ static void blockdev_backup_commit(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void blockdev_backup_abort(BlkActionState *common)
+static void blockdev_backup_abort(void *opaque)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockdevBackupState *state = opaque;
 
     if (state->job) {
         job_cancel_sync(&state->job->job, true);
     }
 }
 
-static void blockdev_backup_clean(BlkActionState *common)
+static void blockdev_backup_clean(void *opaque)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    g_autofree BlockdevBackupState *state = opaque;
     AioContext *aio_context;
 
     if (!state->bs) {
@@ -1963,27 +1912,26 @@ static void blockdev_backup_clean(BlkActionState *common)
 }
 
 typedef struct BlockDirtyBitmapState {
-    BlkActionState common;
     BdrvDirtyBitmap *bitmap;
     BlockDriverState *bs;
     HBitmap *backup;
-    bool prepared;
     bool was_enabled;
 } BlockDirtyBitmapState;
 
-static void block_dirty_bitmap_add_prepare(BlkActionState *common,
-                                           Error **errp)
+static void block_dirty_bitmap_add_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_add_drv = {
+    .abort = block_dirty_bitmap_add_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_add_action(BlockDirtyBitmapAdd *action,
+                                          Transaction *tran, Error **errp)
 {
     Error *local_err = NULL;
-    BlockDirtyBitmapAdd *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_add_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_add.data;
     /* AIO context taken and released within qmp_block_dirty_bitmap_add */
     qmp_block_dirty_bitmap_add(action->node, action->name,
                                action->has_granularity, action->granularity,
@@ -1992,39 +1940,37 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
                                &local_err);
 
     if (!local_err) {
-        state->prepared = true;
+        state->bitmap = block_dirty_bitmap_lookup(action->node, action->name,
+                                                  NULL, &error_abort);
     } else {
         error_propagate(errp, local_err);
     }
 }
 
-static void block_dirty_bitmap_add_abort(BlkActionState *common)
+static void block_dirty_bitmap_add_abort(void *opaque)
 {
-    BlockDirtyBitmapAdd *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
-    action = common->action->u.block_dirty_bitmap_add.data;
-    /* Should not be able to fail: IF the bitmap was added via .prepare(),
-     * then the node reference and bitmap name must have been valid.
-     */
-    if (state->prepared) {
-        qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort);
+    if (state->bitmap) {
+        bdrv_release_dirty_bitmap(state->bitmap);
     }
 }
 
-static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
-                                             Error **errp)
+static void block_dirty_bitmap_restore(void *opaque);
+static void block_dirty_bitmap_free_backup(void *opaque);
+TransactionActionDrv block_dirty_bitmap_clear_drv = {
+    .abort = block_dirty_bitmap_restore,
+    .commit = block_dirty_bitmap_free_backup,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_clear_action(BlockDirtyBitmap *action,
+                                            Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
-    BlockDirtyBitmap *action;
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_clear_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_clear.data;
     state->bitmap = block_dirty_bitmap_lookup(action->node,
                                               action->name,
                                               &state->bs,
@@ -2040,36 +1986,35 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
     bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
 }
 
-static void block_dirty_bitmap_restore(BlkActionState *common)
+static void block_dirty_bitmap_restore(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (state->backup) {
         bdrv_restore_dirty_bitmap(state->bitmap, state->backup);
     }
 }
 
-static void block_dirty_bitmap_free_backup(BlkActionState *common)
+static void block_dirty_bitmap_free_backup(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     hbitmap_free(state->backup);
 }
 
-static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
-                                              Error **errp)
+static void block_dirty_bitmap_enable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_enable_drv = {
+    .abort = block_dirty_bitmap_enable_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_enable_action(BlockDirtyBitmap *action,
+                                             Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmap *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_enable_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_enable.data;
     state->bitmap = block_dirty_bitmap_lookup(action->node,
                                               action->name,
                                               NULL,
@@ -2086,28 +2031,28 @@ static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
     bdrv_enable_dirty_bitmap(state->bitmap);
 }
 
-static void block_dirty_bitmap_enable_abort(BlkActionState *common)
+static void block_dirty_bitmap_enable_abort(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (!state->was_enabled) {
         bdrv_disable_dirty_bitmap(state->bitmap);
     }
 }
 
-static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
-                                               Error **errp)
+static void block_dirty_bitmap_disable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_disable_drv = {
+    .abort = block_dirty_bitmap_disable_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_disable_action(BlockDirtyBitmap *action,
+                                              Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmap *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_disable_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_disable.data;
     state->bitmap = block_dirty_bitmap_lookup(action->node,
                                               action->name,
                                               NULL,
@@ -2124,46 +2069,48 @@ static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
     bdrv_disable_dirty_bitmap(state->bitmap);
 }
 
-static void block_dirty_bitmap_disable_abort(BlkActionState *common)
+static void block_dirty_bitmap_disable_abort(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (state->was_enabled) {
         bdrv_enable_dirty_bitmap(state->bitmap);
     }
 }
 
-static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
-                                             Error **errp)
-{
-    BlockDirtyBitmapMerge *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+TransactionActionDrv block_dirty_bitmap_merge_drv = {
+    .commit = block_dirty_bitmap_free_backup,
+    .abort = block_dirty_bitmap_restore,
+    .clean = g_free,
+};
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+static void block_dirty_bitmap_merge_action(BlockDirtyBitmapMerge *action,
+                                            Transaction *tran, Error **errp)
+{
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    action = common->action->u.block_dirty_bitmap_merge.data;
+    tran_add(tran, &block_dirty_bitmap_merge_drv, state);
 
     state->bitmap = block_dirty_bitmap_merge(action->node, action->target,
                                              action->bitmaps, &state->backup,
                                              errp);
 }
 
-static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
-                                              Error **errp)
+static void block_dirty_bitmap_remove_commit(void *opaque);
+static void block_dirty_bitmap_remove_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_remove_drv = {
+    .commit = block_dirty_bitmap_remove_commit,
+    .abort = block_dirty_bitmap_remove_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_remove_action(BlockDirtyBitmap *action,
+                                             Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmap *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_remove_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_remove.data;
 
     state->bitmap = block_dirty_bitmap_remove(action->node, action->name,
                                               false, &state->bs, errp);
@@ -2173,10 +2120,9 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
     }
 }
 
-static void block_dirty_bitmap_remove_abort(BlkActionState *common)
+static void block_dirty_bitmap_remove_abort(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (state->bitmap) {
         bdrv_dirty_bitmap_skip_store(state->bitmap, false);
@@ -2184,210 +2130,156 @@ static void block_dirty_bitmap_remove_abort(BlkActionState *common)
     }
 }
 
-static void block_dirty_bitmap_remove_commit(BlkActionState *common)
+static void block_dirty_bitmap_remove_commit(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     bdrv_dirty_bitmap_set_busy(state->bitmap, false);
     bdrv_release_dirty_bitmap(state->bitmap);
 }
 
-static void abort_prepare(BlkActionState *common, Error **errp)
+static void abort_commit(void *opaque);
+TransactionActionDrv abort_drv = {
+    .commit = abort_commit,
+};
+
+static void abort_action(Transaction *tran, Error **errp)
 {
+    tran_add(tran, &abort_drv, NULL);
     error_setg(errp, "Transaction aborted using Abort action");
 }
 
-static void abort_commit(BlkActionState *common)
+static void abort_commit(void *opaque)
 {
     g_assert_not_reached(); /* this action never succeeds */
 }
 
-static const BlkActionOps actions[] = {
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
-        .instance_size = sizeof(ExternalSnapshotState),
-        .prepare  = external_snapshot_prepare,
-        .commit   = external_snapshot_commit,
-        .abort = external_snapshot_abort,
-        .clean = external_snapshot_clean,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
-        .instance_size = sizeof(ExternalSnapshotState),
-        .prepare  = external_snapshot_prepare,
-        .commit   = external_snapshot_commit,
-        .abort = external_snapshot_abort,
-        .clean = external_snapshot_clean,
-    },
-    [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
-        .instance_size = sizeof(DriveBackupState),
-        .prepare = drive_backup_prepare,
-        .commit = drive_backup_commit,
-        .abort = drive_backup_abort,
-        .clean = drive_backup_clean,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
-        .instance_size = sizeof(BlockdevBackupState),
-        .prepare = blockdev_backup_prepare,
-        .commit = blockdev_backup_commit,
-        .abort = blockdev_backup_abort,
-        .clean = blockdev_backup_clean,
-    },
-    [TRANSACTION_ACTION_KIND_ABORT] = {
-        .instance_size = sizeof(BlkActionState),
-        .prepare = abort_prepare,
-        .commit = abort_commit,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = {
-        .instance_size = sizeof(InternalSnapshotState),
-        .prepare  = internal_snapshot_prepare,
-        .abort = internal_snapshot_abort,
-        .clean = internal_snapshot_clean,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_add_prepare,
-        .abort = block_dirty_bitmap_add_abort,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_clear_prepare,
-        .commit = block_dirty_bitmap_free_backup,
-        .abort = block_dirty_bitmap_restore,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_enable_prepare,
-        .abort = block_dirty_bitmap_enable_abort,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_disable_prepare,
-        .abort = block_dirty_bitmap_disable_abort,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_merge_prepare,
-        .commit = block_dirty_bitmap_free_backup,
-        .abort = block_dirty_bitmap_restore,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_remove_prepare,
-        .commit = block_dirty_bitmap_remove_commit,
-        .abort = block_dirty_bitmap_remove_abort,
-    },
-    /* Where are transactions for MIRROR, COMMIT and STREAM?
+static void transaction_action(TransactionAction *act, JobTxn *block_job_txn,
+                               Transaction *tran, Error **errp)
+{
+    switch (act->type) {
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+        external_snapshot_action(act, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_DRIVE_BACKUP:
+        drive_backup_action(act->u.drive_backup.data,
+                            block_job_txn, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP:
+        blockdev_backup_action(act->u.blockdev_backup.data,
+                               block_job_txn, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_ABORT:
+        abort_action(tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC:
+        internal_snapshot_action(act->u.blockdev_snapshot_internal_sync.data,
+                                 tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD:
+        block_dirty_bitmap_add_action(act->u.block_dirty_bitmap_add.data,
+                                      tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR:
+        block_dirty_bitmap_clear_action(act->u.block_dirty_bitmap_clear.data,
+                                        tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE:
+        block_dirty_bitmap_enable_action(act->u.block_dirty_bitmap_enable.data,
+                                         tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE:
+        block_dirty_bitmap_disable_action(
+                act->u.block_dirty_bitmap_disable.data, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE:
+        block_dirty_bitmap_merge_action(act->u.block_dirty_bitmap_merge.data,
+                                        tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE:
+        block_dirty_bitmap_remove_action(act->u.block_dirty_bitmap_remove.data,
+                                         tran, errp);
+        return;
+    /*
+     * Where are transactions for MIRROR, COMMIT and STREAM?
      * Although these blockjobs use transaction callbacks like the backup job,
      * these jobs do not necessarily adhere to transaction semantics.
      * These jobs may not fully undo all of their actions on abort, nor do they
      * necessarily work in transactions with more than one job in them.
      */
-};
-
-/**
- * Allocate a TransactionProperties structure if necessary, and fill
- * that structure with desired defaults if they are unset.
- */
-static TransactionProperties *get_transaction_properties(
-    TransactionProperties *props)
-{
-    if (!props) {
-        props = g_new0(TransactionProperties, 1);
-    }
-
-    if (!props->has_completion_mode) {
-        props->has_completion_mode = true;
-        props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL;
-    }
-
-    return props;
+    case TRANSACTION_ACTION_KIND__MAX:
+    default:
+        g_assert_not_reached();
+    };
 }
 
+
 /*
  * 'Atomic' group operations.  The operations are performed as a set, and if
  * any fail then we roll back all operations in the group.
  *
  * Always run under BQL.
  */
-void qmp_transaction(TransactionActionList *dev_list,
-                     struct TransactionProperties *props,
+void qmp_transaction(TransactionActionList *actions,
+                     struct TransactionProperties *properties,
                      Error **errp)
 {
-    TransactionActionList *dev_entry = dev_list;
-    bool has_props = !!props;
+    TransactionActionList *act;
     JobTxn *block_job_txn = NULL;
-    BlkActionState *state, *next;
     Error *local_err = NULL;
+    Transaction *tran;
+    ActionCompletionMode comp_mode =
+        properties ? properties->completion_mode :
+        ACTION_COMPLETION_MODE_INDIVIDUAL;
 
     GLOBAL_STATE_CODE();
 
-    QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
-    QTAILQ_INIT(&snap_bdrv_states);
-
     /* Does this transaction get canceled as a group on failure?
      * If not, we don't really need to make a JobTxn.
      */
-    props = get_transaction_properties(props);
-    if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+    if (comp_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+        for (act = actions; act; act = act->next) {
+            TransactionActionKind type = act->value->type;
+
+            if (type != TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP &&
+                type != TRANSACTION_ACTION_KIND_DRIVE_BACKUP)
+            {
+                error_setg(errp,
+                           "Action '%s' does not support transaction property "
+                           "completion-mode = %s",
+                           TransactionActionKind_str(type),
+                           ActionCompletionMode_str(comp_mode));
+                return;
+            }
+        }
+
         block_job_txn = job_txn_new();
     }
 
     /* drain all i/o before any operations */
     bdrv_drain_all();
 
-    /* We don't do anything in this loop that commits us to the operations */
-    while (NULL != dev_entry) {
-        TransactionAction *dev_info = NULL;
-        const BlkActionOps *ops;
-
-        dev_info = dev_entry->value;
-        dev_entry = dev_entry->next;
+    tran = tran_new();
 
-        assert(dev_info->type < ARRAY_SIZE(actions));
-
-        ops = &actions[dev_info->type];
-        assert(ops->instance_size > 0);
-
-        state = g_malloc0(ops->instance_size);
-        state->ops = ops;
-        state->action = dev_info;
-        state->block_job_txn = block_job_txn;
-        state->txn_props = props;
-        QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
-
-        state->ops->prepare(state, &local_err);
+    /* We don't do anything in this loop that commits us to the operations */
+    for (act = actions; act; act = act->next) {
+        transaction_action(act->value, block_job_txn, tran, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             goto delete_and_fail;
         }
     }
 
-    QTAILQ_FOREACH(state, &snap_bdrv_states, entry) {
-        if (state->ops->commit) {
-            state->ops->commit(state);
-        }
-    }
+    tran_commit(tran);
 
     /* success */
     goto exit;
 
 delete_and_fail:
     /* failure, and it is all-or-none; roll back all operations */
-    QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) {
-        if (state->ops->abort) {
-            state->ops->abort(state);
-        }
-    }
+    tran_abort(tran);
 exit:
-    QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
-        if (state->ops->clean) {
-            state->ops->clean(state);
-        }
-        g_free(state);
-    }
-    if (!has_props) {
-        qapi_free_TransactionProperties(props);
-    }
     job_txn_unref(block_job_txn);
 }
 
index 659c3cb9de651ff8eb579cf5d93086886e55ff2b..913da3cbf7b48b0247ee22f84a06aaf7727129c7 100644 (file)
@@ -319,10 +319,28 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
     return block_job_set_speed_locked(job, speed, errp);
 }
 
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
 {
     IO_CODE();
-    return ratelimit_calculate_delay(&job->limit, n);
+    ratelimit_calculate_delay(&job->limit, n);
+}
+
+void block_job_ratelimit_sleep(BlockJob *job)
+{
+    uint64_t delay_ns;
+
+    /*
+     * Sleep at least once. If the job is reentered early, keep waiting until
+     * we've waited for the full time that is necessary to keep the job at the
+     * right speed.
+     *
+     * Make sure to recalculate the delay after each (possibly interrupted)
+     * sleep because the speed can change while the job has yielded.
+     */
+    do {
+        delay_ns = ratelimit_calculate_delay(&job->limit, 0);
+        job_sleep_ns(&job->job, delay_ns);
+    } while (delay_ns && !job_is_cancelled(&job->job));
 }
 
 BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
index f7dc304ff699a1fb85a333139b49a7575ddd5205..e7f036c286b977d4b7310a8bf0aa549a41839ff1 100644 (file)
@@ -214,14 +214,18 @@ version 2.
                     type.
 
                     If the incompatible bit "Compression type" is set: the field
-                    must be present and non-zero (which means non-zlib
+                    must be present and non-zero (which means non-deflate
                     compression type). Otherwise, this field must not be present
-                    or must be zero (which means zlib).
+                    or must be zero (which means deflate).
 
                     Available compression type values:
-                        0: zlib <https://www.zlib.net/>
+                        0: deflate <https://www.ietf.org/rfc/rfc1951.txt>
                         1: zstd <http://github.com/facebook/zstd>
 
+                    The deflate compression type is called "zlib"
+                    <https://www.zlib.net/> in QEMU. However, clusters with the
+                    deflate compression type do not have zlib headers.
+
 
 === Header padding ===
 
index 2d93423d355782ec3366e17c33ca478eac152de2..f347199bff1c51f8a1e57f6d523d5cebef873b4b 100644 (file)
@@ -58,14 +58,14 @@ BlockDriver *bdrv_find_protocol(const char *filename,
                                 Error **errp);
 BlockDriver *bdrv_find_format(const char *format_name);
 
-int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_UNLOCKED
 bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts,
                Error **errp);
 
-int co_wrapper_bdrv_rdlock bdrv_create(BlockDriver *drv, const char *filename,
-                                       QemuOpts *opts, Error **errp);
+int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
+                           QemuOpts *opts, Error **errp);
 
-int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_UNLOCKED
 bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
 
 BlockDriverState *bdrv_new(void);
index dbec0e3bb485c913246e0787df335efc16deaa9b..6492a1e538d84e672b0d3e9c555a232636c3768b 100644 (file)
@@ -250,10 +250,10 @@ struct BlockDriver {
         BlockDriverState *bs, QDict *options, int flags, Error **errp);
     void (*bdrv_close)(BlockDriverState *bs);
 
-    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)(
+    int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)(
         BlockdevCreateOptions *opts, Error **errp);
 
-    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create_opts)(
+    int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)(
         BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp);
 
     int (*bdrv_amend_options)(BlockDriverState *bs,
index f00844628561dccc1e199443d2527fd1f8a05deb..104824040ccb3b37feba034e06a760d4045fdd65 100644 (file)
@@ -126,12 +126,18 @@ void block_job_user_resume(Job *job);
  */
 
 /**
- * block_job_ratelimit_get_delay:
+ * block_job_ratelimit_processed_bytes:
  *
- * Calculate and return delay for the next request in ns. See the documentation
- * of ratelimit_calculate_delay() for details.
+ * To be called after some work has been done. Adjusts the delay for the next
+ * request. See the documentation of ratelimit_calculate_delay() for details.
  */
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n);
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n);
+
+/**
+ * Put the job to sleep (assuming that it wasn't canceled) to throttle it to the
+ * right speed according to its rate limiting.
+ */
+void block_job_ratelimit_sleep(BlockJob *job);
 
 /**
  * block_job_error_action:
index 446a566e5e1b21ff79a623b550a9b290026cdc85..229bf36910f5567cd25d79bba66a3837c0bfff39 100644 (file)
@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc);
 void coroutine_fn qio_channel_yield(QIOChannel *ioc,
                                     GIOCondition condition);
 
+/**
+ * qio_channel_wake_read:
+ * @ioc: the channel object
+ *
+ * If qio_channel_yield() is currently waiting for the channel to become
+ * readable, interrupt it and reenter immediately. This function is safe to call
+ * from any thread.
+ */
+void qio_channel_wake_read(QIOChannel *ioc);
+
 /**
  * qio_channel_wait:
  * @ioc: the channel object
index 375a130a398d34cbc453c1d0c3b2feb9d55146dd..72f0066af55a430d3912a2b04ee6124cdcffc7d5 100644 (file)
@@ -19,6 +19,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/aio-wait.h"
 #include "io/channel.h"
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc,
 static void qio_channel_restart_read(void *opaque)
 {
     QIOChannel *ioc = opaque;
-    Coroutine *co = ioc->read_coroutine;
+    Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL);
+
+    if (!co) {
+        return;
+    }
 
     /* Assert that aio_co_wake() reenters the coroutine directly */
     assert(qemu_get_current_aio_context() ==
@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque)
 static void qio_channel_restart_write(void *opaque)
 {
     QIOChannel *ioc = opaque;
-    Coroutine *co = ioc->write_coroutine;
+    Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL);
+
+    if (!co) {
+        return;
+    }
 
     /* Assert that aio_co_wake() reenters the coroutine directly */
     assert(qemu_get_current_aio_context() ==
@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc)
 void coroutine_fn qio_channel_yield(QIOChannel *ioc,
                                     GIOCondition condition)
 {
+    AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context();
+
     assert(qemu_in_coroutine());
+    assert(in_aio_context_home_thread(ioc_ctx));
+
     if (condition == G_IO_IN) {
         assert(!ioc->read_coroutine);
         ioc->read_coroutine = qemu_coroutine_self();
@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc,
     }
     qio_channel_set_aio_fd_handlers(ioc);
     qemu_coroutine_yield();
+    assert(in_aio_context_home_thread(ioc_ctx));
 
     /* Allow interrupting the operation by reentering the coroutine other than
      * through the aio_fd_handlers. */
-    if (condition == G_IO_IN && ioc->read_coroutine) {
-        ioc->read_coroutine = NULL;
+    if (condition == G_IO_IN) {
+        assert(ioc->read_coroutine == NULL);
         qio_channel_set_aio_fd_handlers(ioc);
-    } else if (condition == G_IO_OUT && ioc->write_coroutine) {
-        ioc->write_coroutine = NULL;
+    } else if (condition == G_IO_OUT) {
+        assert(ioc->write_coroutine == NULL);
         qio_channel_set_aio_fd_handlers(ioc);
     }
 }
 
+void qio_channel_wake_read(QIOChannel *ioc)
+{
+    Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL);
+    if (co) {
+        aio_co_wake(co);
+    }
+}
 
 static gboolean qio_channel_wait_complete(QIOChannel *ioc,
                                           GIOCondition condition,
index e239c2890fafbc5ab5add83710e9b9dd07832d2f..2664d43bffd46cc65d993a1e06c2d788bd3bbd02 100644 (file)
@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque)
              * enter it here so we don't depend on the client to wake it up.
              */
             if (client->recv_coroutine != NULL && client->read_yielding) {
-                qemu_aio_coroutine_enter(exp->common.ctx,
-                                         client->recv_coroutine);
+                qio_channel_wake_read(client->ioc);
             }
 
             return true;
index 9f9f0a762945778706bd482ffb4c9dfe4946d8d4..27f48051b0cdc5f3a2e4d30f8728e5c49cdc401f 100644 (file)
@@ -2938,8 +2938,6 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts,
         }
         bs = blk_bs(blk);
 
-        GRAPH_RDLOCK_GUARD_MAINLOOP();
-
         /*
          * Note that the returned BlockGraphInfo object will not have
          * information about this image's backing node, because we have opened
@@ -2947,7 +2945,10 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts,
          * duplicate the backing chain information that we obtain by walking
          * the chain manually here.
          */
+        bdrv_graph_rdlock_main_loop();
         bdrv_query_block_graph_info(bs, &info, &err);
+        bdrv_graph_rdunlock_main_loop();
+
         if (err) {
             error_report_err(err);
             blk_unref(blk);
index edaf29094b55993ca244c2a2b0ffe4c652c3ab62..92b28c79beb90a2f70e733b405a5e3f40b1c24cf 100755 (executable)
@@ -611,6 +611,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         self.reopen(hd0_opts, {'file': 'hd0-file'})
 
     # Insert (and remove) a compress filter
+    @iotests.skip_if_unsupported(['compress'])
     def test_insert_compress_filter(self):
         # Add an image to the VM: hd (raw) -> hd0 (qcow2) -> hd0-file (file)
         opts = {'driver': 'raw', 'node-name': 'hd', 'file': hd_opts(0)}
@@ -650,9 +651,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # Check the first byte of the first three L2 entries and verify that
         # the second one is compressed (0x40) while the others are not (0x80)
-        iotests.qemu_io_log('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
-                                         '-c', 'read -P 0x40 0x40008 1',
-                                         '-c', 'read -P 0x80 0x40010 1', hd_path[0])
+        iotests.qemu_io('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
+                                     '-c', 'read -P 0x40 0x40008 1',
+                                     '-c', 'read -P 0x80 0x40010 1', hd_path[0])
 
     # Swap the disk images of two active block devices
     def test_swap_files(self):
index a4e04a3266b5bf1c6fa042ffc33846c6e8c74426..0970ced62a887aa69a23619ace1f94105010a39a 100644 (file)
 {"return": {}}
 {"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
-....read 1/1 bytes at offset 262144
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 1/1 bytes at offset 262152
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 1/1 bytes at offset 262160
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-
-................
+....................
 ----------------------------------------------------------------------
 Ran 26 tests
 
index 3e82c634cfef9ccaeb624d5e804897e6cee0bf9d..7073579a7df88b0ddefa4bd05c3958245f12a4a1 100644 (file)
@@ -462,6 +462,10 @@ class QemuStorageDaemon:
         assert self._qmp is not None
         return self._qmp.cmd(cmd, args)
 
+    def get_qmp(self) -> QEMUMonitorProtocol:
+        assert self._qmp is not None
+        return self._qmp
+
     def stop(self, kill_signal=15):
         self._p.send_signal(kill_signal)
         self._p.wait()
index 7664f336897ce758014ce7f25cdb405b27266964..750e7d4d3872df6e2517d552e3e3d48b0a8135ec 100755 (executable)
 import os
 from threading import Thread
 import iotests
-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
-        QemuStorageDaemon
+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \
+        QMPTestCase, QemuStorageDaemon
 
 
 top = os.path.join(iotests.test_dir, 'top.img')
 nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
 
 
-def do_qemu_img_bench() -> None:
+def do_qemu_img_bench(count: int = 2000000) -> None:
     """
     Do some I/O requests on `nbd_sock`.
     """
-    qemu_img('bench', '-f', 'raw', '-c', '2000000',
+    qemu_img('bench', '-f', 'raw', '-c', str(count),
              f'nbd+unix:///node0?socket={nbd_sock}')
 
 
@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase):
 
         bench_thr.join()
 
+    def test_commit_while_io(self) -> None:
+        # Run qemu-img bench in the background
+        bench_thr = Thread(target=do_qemu_img_bench, args=(200000, ))
+        bench_thr.start()
+
+        qemu_io('-c', 'write 0 64k', top)
+        qemu_io('-c', 'write 128k 64k', top)
+
+        result = self.qsd.qmp('blockdev-add', {
+            'driver': imgfmt,
+            'node-name': 'overlay',
+            'backing': None,
+            'file': {
+                'driver': 'file',
+                'filename': top
+            }
+        })
+        self.assert_qmp(result, 'return', {})
+
+        result = self.qsd.qmp('blockdev-snapshot', {
+            'node': 'node0',
+            'overlay': 'overlay',
+        })
+        self.assert_qmp(result, 'return', {})
+
+        # While qemu-img bench is running, repeatedly commit overlay to node0
+        while bench_thr.is_alive():
+            result = self.qsd.qmp('block-commit', {
+                'job-id': 'job0',
+                'device': 'overlay',
+            })
+            self.assert_qmp(result, 'return', {})
+
+            result = self.qsd.qmp('block-job-cancel', {
+                'device': 'job0',
+            })
+            self.assert_qmp(result, 'return', {})
+
+            cancelled = False
+            while not cancelled:
+                for event in self.qsd.get_qmp().get_events(wait=10.0):
+                    if event['event'] != 'JOB_STATUS_CHANGE':
+                        continue
+                    if event['data']['status'] == 'null':
+                        cancelled = True
+
+        bench_thr.join()
+
 if __name__ == '__main__':
     # Format must support raw backing files
     iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
index ae1213e6f86322c94cf822305444a7316c3b9cab..fbc63e62f885f55a8d6c777b49b180d529ba6e8e 100644 (file)
@@ -1,5 +1,5 @@
-.
+..
 ----------------------------------------------------------------------
-Ran 1 tests
+Ran 2 tests
 
 OK
index 48ae66011b1716a72d3c21c8358c270d91556474..3a6314269bb4b93f21f2b7d2d76dc16d1c968d03 100644 (file)
@@ -114,7 +114,10 @@ if have_block
     tests += {'test-crypto-xts': [crypto, io]}
   endif
   if 'CONFIG_POSIX' in config_host
-    tests += {'test-image-locking': [testblock]}
+    tests += {
+      'test-image-locking': [testblock],
+      'test-nested-aio-poll': [testblock],
+    }
   endif
   if config_host_data.get('CONFIG_REPLICATION')
     tests += {'test-replication': [testblock]}
index 9a4c5e59d6ee06c22d9ac022a196f8e7586a9d68..08bb0f99845c825992754406406a66769a402138 100644 (file)
@@ -1004,8 +1004,6 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
     void *buffer = g_malloc(65536);
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
 
-    GRAPH_RDLOCK_GUARD();
-
     /* Pretend some internal write operation from parent to child.
      * Important: We have to read from the child, not from the parent!
      * Draining works by first propagating it all up the tree to the
@@ -1014,12 +1012,14 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
      * everything will be drained before we go back down the tree, but
      * we do not want that.  We want to be in the middle of draining
      * when this following requests returns. */
+    bdrv_graph_co_rdlock();
     bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0);
+    bdrv_graph_co_rdunlock();
 
     g_assert_cmpint(bs->refcnt, ==, 1);
 
     if (!dbdd->detach_instead_of_delete) {
-        blk_unref(blk);
+        blk_co_unref(blk);
     } else {
         BdrvChild *c, *next_c;
         QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c
new file mode 100644 (file)
index 0000000..9bbe18b
--- /dev/null
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Test that poll handlers are not re-entrant in nested aio_poll()
+ *
+ * Copyright Red Hat
+ *
+ * Poll handlers are usually level-triggered. That means they continue firing
+ * until the condition is reset (e.g. a virtqueue becomes empty). If a poll
+ * handler calls nested aio_poll() before the condition is reset, then infinite
+ * recursion occurs.
+ *
+ * aio_poll() is supposed to prevent this by disabling poll handlers in nested
+ * aio_poll() calls. This test case checks that this is indeed what happens.
+ */
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "qapi/error.h"
+
+typedef struct {
+    AioContext *ctx;
+
+    /* This is the EventNotifier that drives the test */
+    EventNotifier poll_notifier;
+
+    /* This EventNotifier is only used to wake aio_poll() */
+    EventNotifier dummy_notifier;
+
+    bool nested;
+} TestData;
+
+static void io_read(EventNotifier *notifier)
+{
+    fprintf(stderr, "%s %p\n", __func__, notifier);
+    event_notifier_test_and_clear(notifier);
+}
+
+static bool io_poll_true(void *opaque)
+{
+    fprintf(stderr, "%s %p\n", __func__, opaque);
+    return true;
+}
+
+static bool io_poll_false(void *opaque)
+{
+    fprintf(stderr, "%s %p\n", __func__, opaque);
+    return false;
+}
+
+static void io_poll_ready(EventNotifier *notifier)
+{
+    TestData *td = container_of(notifier, TestData, poll_notifier);
+
+    fprintf(stderr, "> %s\n", __func__);
+
+    g_assert(!td->nested);
+    td->nested = true;
+
+    /* Wake the following nested aio_poll() call */
+    event_notifier_set(&td->dummy_notifier);
+
+    /* This nested event loop must not call io_poll()/io_poll_ready() */
+    g_assert(aio_poll(td->ctx, true));
+
+    td->nested = false;
+
+    fprintf(stderr, "< %s\n", __func__);
+}
+
+/* dummy_notifier never triggers */
+static void io_poll_never_ready(EventNotifier *notifier)
+{
+    g_assert_not_reached();
+}
+
+static void test(void)
+{
+    TestData td = {
+        .ctx = aio_context_new(&error_abort),
+    };
+
+    qemu_set_current_aio_context(td.ctx);
+
+    /* Enable polling */
+    aio_context_set_poll_params(td.ctx, 1000000, 2, 2, &error_abort);
+
+    /*
+     * The GSource is unused but this has the side-effect of changing the fdmon
+     * that AioContext uses.
+     */
+    aio_get_g_source(td.ctx);
+
+    /* Make the event notifier active (set) right away */
+    event_notifier_init(&td.poll_notifier, 1);
+    aio_set_event_notifier(td.ctx, &td.poll_notifier, false,
+                           io_read, io_poll_true, io_poll_ready);
+
+    /* This event notifier will be used later */
+    event_notifier_init(&td.dummy_notifier, 0);
+    aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+                           io_read, io_poll_false, io_poll_never_ready);
+
+    /* Consume aio_notify() */
+    g_assert(!aio_poll(td.ctx, false));
+
+    /*
+     * Run the io_read() handler. This has the side-effect of activating
+     * polling in future aio_poll() calls.
+     */
+    g_assert(aio_poll(td.ctx, true));
+
+    /* The second time around the io_poll()/io_poll_ready() handler runs */
+    g_assert(aio_poll(td.ctx, true));
+
+    /* Run io_poll()/io_poll_ready() one more time to show it keeps working */
+    g_assert(aio_poll(td.ctx, true));
+
+    aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+                           NULL, NULL, NULL);
+    aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL);
+    event_notifier_cleanup(&td.dummy_notifier);
+    event_notifier_cleanup(&td.poll_notifier);
+    aio_context_unref(td.ctx);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/nested-aio-poll", test);
+    return g_test_run();
+}
index a8be940f760dc2e005bbded9d340f0014de074db..34bc2a64d834e858489920fc3b42eadcd8a8a68f 100644 (file)
@@ -353,8 +353,19 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
         poll_ready && revents == 0 &&
         aio_node_check(ctx, node->is_external) &&
         node->io_poll_ready) {
+        /*
+         * Remove temporarily to avoid infinite loops when ->io_poll_ready()
+         * calls aio_poll() before clearing the condition that made the poll
+         * handler become ready.
+         */
+        QLIST_SAFE_REMOVE(node, node_poll);
+
         node->io_poll_ready(node->opaque);
 
+        if (!QLIST_IS_INSERTED(node, node_poll)) {
+            QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
+        }
+
         /*
          * Return early since revents was zero. aio_notify() does not count as
          * progress.