]> git.proxmox.com Git - pve-qemu.git/commitdiff
add patch to disable graph locking
authorFiona Ebner <f.ebner@proxmox.com>
Tue, 17 Oct 2023 12:10:10 +0000 (14:10 +0200)
committerThomas Lamprecht <t.lamprecht@proxmox.com>
Tue, 24 Oct 2023 13:01:23 +0000 (15:01 +0200)
There are still some issues with graph locking, e.g. deadlocks during
backup canceling [0] and initial attempts to fix it didn't work [1].
Because the AioContext locks still exist, it should still be safe to
disable graph locking.

[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
[1]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg06905.html

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
debian/patches/extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch [new file with mode: 0644]
debian/patches/series

diff --git a/debian/patches/extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch b/debian/patches/extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch
new file mode 100644 (file)
index 0000000..f0648d2
--- /dev/null
@@ -0,0 +1,140 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 28 Sep 2023 10:07:03 +0200
+Subject: [PATCH] Revert "Revert "graph-lock: Disable locking for now""
+
+This reverts commit 3cce22defb4b0e47cf135444e30cc673cff5ebad.
+
+There are still some issues with graph locking, e.g. deadlocks during
+backup canceling [0]. Because the AioContext locks still exist, it
+should be safe to disable locking again.
+
+From the original 80fc5d2600 ("graph-lock: Disable locking for now"):
+
+> We don't currently rely on graph locking yet. It is supposed to replace
+> the AioContext lock eventually to enable multiqueue support, but as long
+> as we still have the AioContext lock, it is sufficient without the graph
+> lock. Once the AioContext lock goes away, the deadlock doesn't exist any
+> more either and this commit can be reverted. (Of course, it can also be
+> reverted while the AioContext lock still exists if the callers have been
+> fixed.)
+
+[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/graph-lock.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/block/graph-lock.c b/block/graph-lock.c
+index 5e66f01ae8..5c2873262a 100644
+--- a/block/graph-lock.c
++++ b/block/graph-lock.c
+@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
+ /* Protects the list of aiocontext and orphaned_reader_count */
+ static QemuMutex aio_context_list_lock;
++#if 0
+ /* Written and read with atomic operations. */
+ static int has_writer;
++#endif
+ /*
+  * A reader coroutine could move from an AioContext to another.
+@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
+     g_free(ctx->bdrv_graph);
+ }
++#if 0
+ static uint32_t reader_count(void)
+ {
+     BdrvGraphRWlock *brdv_graph;
+@@ -105,12 +108,19 @@ static uint32_t reader_count(void)
+     assert((int32_t)rd >= 0);
+     return rd;
+ }
++#endif
+ void bdrv_graph_wrlock(BlockDriverState *bs)
+ {
++#if 0
+     AioContext *ctx = NULL;
+     GLOBAL_STATE_CODE();
++    /*
++     * TODO Some callers hold an AioContext lock when this is called, which
++     * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
++     * AioContext locks are gone).
++     */
+     assert(!qatomic_read(&has_writer));
+     /*
+@@ -158,11 +168,13 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
+     if (ctx) {
+         aio_context_acquire(bdrv_get_aio_context(bs));
+     }
++#endif
+ }
+ void bdrv_graph_wrunlock(void)
+ {
+     GLOBAL_STATE_CODE();
++#if 0
+     QEMU_LOCK_GUARD(&aio_context_list_lock);
+     assert(qatomic_read(&has_writer));
+@@ -174,10 +186,13 @@ void bdrv_graph_wrunlock(void)
+     /* Wake up all coroutine that are waiting to read the graph */
+     qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
++#endif
+ }
+ void coroutine_fn bdrv_graph_co_rdlock(void)
+ {
++    /* TODO Reenable when wrlock is reenabled */
++#if 0
+     BdrvGraphRWlock *bdrv_graph;
+     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
+@@ -237,10 +252,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
+             qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
+         }
+     }
++#endif
+ }
+ void coroutine_fn bdrv_graph_co_rdunlock(void)
+ {
++#if 0
+     BdrvGraphRWlock *bdrv_graph;
+     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
+@@ -258,6 +275,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
+     if (qatomic_read(&has_writer)) {
+         aio_wait_kick();
+     }
++#endif
+ }
+ void bdrv_graph_rdlock_main_loop(void)
+@@ -275,13 +293,19 @@ void bdrv_graph_rdunlock_main_loop(void)
+ void assert_bdrv_graph_readable(void)
+ {
+     /* reader_count() is slow due to aio_context_list_lock lock contention */
++    /* TODO Reenable when wrlock is reenabled */
++#if 0
+ #ifdef CONFIG_DEBUG_GRAPH_LOCK
+     assert(qemu_in_main_thread() || reader_count());
+ #endif
++#endif
+ }
+ void assert_bdrv_graph_writable(void)
+ {
+     assert(qemu_in_main_thread());
++    /* TODO Reenable when wrlock is reenabled */
++#if 0
+     assert(qatomic_read(&has_writer));
++#endif
+ }
index 01d4d3cd8da53f19eb9e0ac754e073847852cfa2..6d681daa040e19ca050ac9860b40c27fbe2e09f5 100644 (file)
@@ -3,6 +3,7 @@ extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
 extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
 extra/0004-migration-block-dirty-bitmap-fix-loading-bitmap-when.patch
 extra/0005-hw-ide-reset-cancel-async-DMA-operation-before-reset.patch
+extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch
 bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
 bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
 bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch