]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch
add patch fixing resume for snapshot and hibernate with drive with iothread and a...
[pve-qemu.git] / debian / patches / pve / 0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Fiona Ebner <f.ebner@proxmox.com>
3 Date: Fri, 5 May 2023 13:39:53 +0200
4 Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
5
6 In spirit, this is a partial revert of commit 9b09503752 ("migration:
7 run setup callbacks out of big lock"), but only for the snapshot case.
8
9 For snapshots, the bdrv_writev_vmstate() function is used during setup
10 (in QIOChannelBlock backing the QEMUFile), but not holding the BQL
11 while calling it could lead to an assertion failure. To understand
12 how, first note the following:
13
14 1. Generated coroutine wrappers for block layer functions spawn the
15 coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
16 2. If the host OS switches threads at an inconvenient time, it can
17 happen that a bottom half scheduled for the main thread's AioContext
18 is executed as part of a vCPU thread's aio_poll().
19
20 An example leading to the assertion failure is as follows:
21
22 main thread:
23 1. A snapshot-save QMP command gets issued.
24 2. snapshot_save_job_bh() is scheduled.
25
26 vCPU thread:
27 3. aio_poll() for the main thread's AioContext is called (e.g. when
28 the guest writes to a pflash device, as part of blk_pwrite which is a
29 generated coroutine wrapper).
30 4. snapshot_save_job_bh() is executed as part of aio_poll().
31 3. qemu_savevm_state() is called.
32 4. qemu_mutex_unlock_iothread() is called. Now
33 qemu_get_current_aio_context() returns 0x0.
34 5. bdrv_writev_vmstate() is executed during the usual savevm setup.
35 But this function is a generated coroutine wrapper, so it uses
36 AIO_WAIT_WHILE. There, the assertion
37 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
38 will fail.
39
40 To fix it, ensure that the BQL is held during setup. To avoid changing
41 the behavior for migration too, introduce conditionals for the setup
42 callbacks that need the BQL and only take the lock if it's not already
43 held.
44
45 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
46 ---
47 include/migration/register.h | 2 +-
48 migration/block-dirty-bitmap.c | 15 ++++++++++++---
49 migration/block.c | 15 ++++++++++++---
50 migration/ram.c | 16 +++++++++++++---
51 migration/savevm.c | 2 --
52 5 files changed, 38 insertions(+), 12 deletions(-)
53
54 diff --git a/include/migration/register.h b/include/migration/register.h
55 index a8dfd8fefd..fa9b0b0f10 100644
56 --- a/include/migration/register.h
57 +++ b/include/migration/register.h
58 @@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
59 * by other locks.
60 */
61 int (*save_live_iterate)(QEMUFile *f, void *opaque);
62 + int (*save_setup)(QEMUFile *f, void *opaque);
63
64 /* This runs outside the iothread lock! */
65 - int (*save_setup)(QEMUFile *f, void *opaque);
66 /* Note for save_live_pending:
67 * must_precopy:
68 * - must be migrated in precopy or in stopped state
69 diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
70 index 509f3df0a6..42dc4a8d61 100644
71 --- a/migration/block-dirty-bitmap.c
72 +++ b/migration/block-dirty-bitmap.c
73 @@ -1220,10 +1220,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
74 {
75 DBMSaveState *s = &((DBMState *)opaque)->save;
76 SaveBitmapState *dbms = NULL;
77 + bool release_lock = false;
78
79 - qemu_mutex_lock_iothread();
80 + /* For snapshots, the BQL is held during setup. */
81 + if (!qemu_mutex_iothread_locked()) {
82 + qemu_mutex_lock_iothread();
83 + release_lock = true;
84 + }
85 if (init_dirty_bitmap_migration(s) < 0) {
86 - qemu_mutex_unlock_iothread();
87 + if (release_lock) {
88 + qemu_mutex_unlock_iothread();
89 + }
90 return -1;
91 }
92
93 @@ -1231,7 +1238,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
94 send_bitmap_start(f, s, dbms);
95 }
96 qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
97 - qemu_mutex_unlock_iothread();
98 + if (release_lock) {
99 + qemu_mutex_unlock_iothread();
100 + }
101 return 0;
102 }
103
104 diff --git a/migration/block.c b/migration/block.c
105 index b2497bbd32..c9d55be642 100644
106 --- a/migration/block.c
107 +++ b/migration/block.c
108 @@ -716,21 +716,30 @@ static void block_migration_cleanup(void *opaque)
109 static int block_save_setup(QEMUFile *f, void *opaque)
110 {
111 int ret;
112 + bool release_lock = false;
113
114 trace_migration_block_save("setup", block_mig_state.submitted,
115 block_mig_state.transferred);
116
117 - qemu_mutex_lock_iothread();
118 + /* For snapshots, the BQL is held during setup. */
119 + if (!qemu_mutex_iothread_locked()) {
120 + qemu_mutex_lock_iothread();
121 + release_lock = true;
122 + }
123 ret = init_blk_migration(f);
124 if (ret < 0) {
125 - qemu_mutex_unlock_iothread();
126 + if (release_lock) {
127 + qemu_mutex_unlock_iothread();
128 + }
129 return ret;
130 }
131
132 /* start track dirty blocks */
133 ret = set_dirty_tracking();
134
135 - qemu_mutex_unlock_iothread();
136 + if (release_lock) {
137 + qemu_mutex_unlock_iothread();
138 + }
139
140 if (ret) {
141 return ret;
142 diff --git a/migration/ram.c b/migration/ram.c
143 index 79d881f735..0ecbbc3202 100644
144 --- a/migration/ram.c
145 +++ b/migration/ram.c
146 @@ -3117,8 +3117,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
147
148 static void ram_init_bitmaps(RAMState *rs)
149 {
150 - /* For memory_global_dirty_log_start below. */
151 - qemu_mutex_lock_iothread();
152 + bool release_lock = false;
153 +
154 + /*
155 + * For memory_global_dirty_log_start below.
156 + * For snapshots, the BQL is held during setup.
157 + */
158 + if (!qemu_mutex_iothread_locked()) {
159 + qemu_mutex_lock_iothread();
160 + release_lock = true;
161 + }
162 qemu_mutex_lock_ramlist();
163
164 WITH_RCU_READ_LOCK_GUARD() {
165 @@ -3130,7 +3138,9 @@ static void ram_init_bitmaps(RAMState *rs)
166 }
167 }
168 qemu_mutex_unlock_ramlist();
169 - qemu_mutex_unlock_iothread();
170 + if (release_lock) {
171 + qemu_mutex_unlock_iothread();
172 + }
173
174 /*
175 * After an eventual first bitmap sync, fixup the initial bitmap
176 diff --git a/migration/savevm.c b/migration/savevm.c
177 index aa54a67fda..fc6a82a555 100644
178 --- a/migration/savevm.c
179 +++ b/migration/savevm.c
180 @@ -1621,10 +1621,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
181 memset(&compression_counters, 0, sizeof(compression_counters));
182 ms->to_dst_file = f;
183
184 - qemu_mutex_unlock_iothread();
185 qemu_savevm_state_header(f);
186 qemu_savevm_state_setup(f);
187 - qemu_mutex_lock_iothread();
188
189 while (qemu_file_get_error(f) == 0) {
190 if (qemu_savevm_state_iterate(f, false) > 0) {