]>
Commit | Line | Data |
---|---|---|
bf251437 FE |
1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
2 | From: Fiona Ebner <f.ebner@proxmox.com> | |
3 | Date: Fri, 5 May 2023 13:39:53 +0200 | |
4 | Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks | |
5 | ||
6 | In spirit, this is a partial revert of commit 9b09503752 ("migration: | |
7 | run setup callbacks out of big lock"), but only for the snapshot case. | |
8 | ||
9 | For snapshots, the bdrv_writev_vmstate() function is used during setup | |
10 | (in QIOChannelBlock backing the QEMUFile), but not holding the BQL | |
11 | while calling it could lead to an assertion failure. To understand | |
12 | how, first note the following: | |
13 | ||
14 | 1. Generated coroutine wrappers for block layer functions spawn the | |
15 | coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it. | |
16 | 2. If the host OS switches threads at an inconvenient time, it can | |
17 | happen that a bottom half scheduled for the main thread's AioContext | |
18 | is executed as part of a vCPU thread's aio_poll(). | |
19 | ||
20 | An example leading to the assertion failure is as follows: | |
21 | ||
22 | main thread: | |
23 | 1. A snapshot-save QMP command gets issued. | |
24 | 2. snapshot_save_job_bh() is scheduled. | |
25 | ||
26 | vCPU thread: | |
27 | 3. aio_poll() for the main thread's AioContext is called (e.g. when | |
28 | the guest writes to a pflash device, as part of blk_pwrite which is a | |
29 | generated coroutine wrapper). | |
30 | 4. snapshot_save_job_bh() is executed as part of aio_poll(). | |
31 | 3. qemu_savevm_state() is called. | |
32 | 4. qemu_mutex_unlock_iothread() is called. Now | |
33 | qemu_get_current_aio_context() returns 0x0. | |
34 | 5. bdrv_writev_vmstate() is executed during the usual savevm setup. | |
35 | But this function is a generated coroutine wrapper, so it uses | |
36 | AIO_WAIT_WHILE. There, the assertion | |
37 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | |
38 | will fail. | |
39 | ||
40 | To fix it, ensure that the BQL is held during setup. To avoid changing | |
41 | the behavior for migration too, introduce conditionals for the setup | |
42 | callbacks that need the BQL and only take the lock if it's not already | |
43 | held. | |
44 | ||
45 | Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> | |
46 | --- | |
47 | include/migration/register.h | 2 +- | |
48 | migration/block-dirty-bitmap.c | 15 ++++++++++++--- | |
49 | migration/block.c | 15 ++++++++++++--- | |
50 | migration/ram.c | 16 +++++++++++++--- | |
51 | migration/savevm.c | 2 -- | |
52 | 5 files changed, 38 insertions(+), 12 deletions(-) | |
53 | ||
54 | diff --git a/include/migration/register.h b/include/migration/register.h | |
55 | index a8dfd8fefd..fa9b0b0f10 100644 | |
56 | --- a/include/migration/register.h | |
57 | +++ b/include/migration/register.h | |
58 | @@ -43,9 +43,9 @@ typedef struct SaveVMHandlers { | |
59 | * by other locks. | |
60 | */ | |
61 | int (*save_live_iterate)(QEMUFile *f, void *opaque); | |
62 | + int (*save_setup)(QEMUFile *f, void *opaque); | |
63 | ||
64 | /* This runs outside the iothread lock! */ | |
65 | - int (*save_setup)(QEMUFile *f, void *opaque); | |
66 | /* Note for save_live_pending: | |
67 | * must_precopy: | |
68 | * - must be migrated in precopy or in stopped state | |
69 | diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c | |
70 | index a6440929fa..69fab3275c 100644 | |
71 | --- a/migration/block-dirty-bitmap.c | |
72 | +++ b/migration/block-dirty-bitmap.c | |
73 | @@ -1214,10 +1214,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque) | |
74 | { | |
75 | DBMSaveState *s = &((DBMState *)opaque)->save; | |
76 | SaveBitmapState *dbms = NULL; | |
77 | + bool release_lock = false; | |
78 | ||
79 | - qemu_mutex_lock_iothread(); | |
80 | + /* For snapshots, the BQL is held during setup. */ | |
81 | + if (!qemu_mutex_iothread_locked()) { | |
82 | + qemu_mutex_lock_iothread(); | |
83 | + release_lock = true; | |
84 | + } | |
85 | if (init_dirty_bitmap_migration(s) < 0) { | |
86 | - qemu_mutex_unlock_iothread(); | |
87 | + if (release_lock) { | |
88 | + qemu_mutex_unlock_iothread(); | |
89 | + } | |
90 | return -1; | |
91 | } | |
92 | ||
93 | @@ -1225,7 +1232,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque) | |
94 | send_bitmap_start(f, s, dbms); | |
95 | } | |
96 | qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS); | |
97 | - qemu_mutex_unlock_iothread(); | |
98 | + if (release_lock) { | |
99 | + qemu_mutex_unlock_iothread(); | |
100 | + } | |
101 | return 0; | |
102 | } | |
103 | ||
104 | diff --git a/migration/block.c b/migration/block.c | |
105 | index b2497bbd32..c9d55be642 100644 | |
106 | --- a/migration/block.c | |
107 | +++ b/migration/block.c | |
108 | @@ -716,21 +716,30 @@ static void block_migration_cleanup(void *opaque) | |
109 | static int block_save_setup(QEMUFile *f, void *opaque) | |
110 | { | |
111 | int ret; | |
112 | + bool release_lock = false; | |
113 | ||
114 | trace_migration_block_save("setup", block_mig_state.submitted, | |
115 | block_mig_state.transferred); | |
116 | ||
117 | - qemu_mutex_lock_iothread(); | |
118 | + /* For snapshots, the BQL is held during setup. */ | |
119 | + if (!qemu_mutex_iothread_locked()) { | |
120 | + qemu_mutex_lock_iothread(); | |
121 | + release_lock = true; | |
122 | + } | |
123 | ret = init_blk_migration(f); | |
124 | if (ret < 0) { | |
125 | - qemu_mutex_unlock_iothread(); | |
126 | + if (release_lock) { | |
127 | + qemu_mutex_unlock_iothread(); | |
128 | + } | |
129 | return ret; | |
130 | } | |
131 | ||
132 | /* start track dirty blocks */ | |
133 | ret = set_dirty_tracking(); | |
134 | ||
135 | - qemu_mutex_unlock_iothread(); | |
136 | + if (release_lock) { | |
137 | + qemu_mutex_unlock_iothread(); | |
138 | + } | |
139 | ||
140 | if (ret) { | |
141 | return ret; | |
142 | diff --git a/migration/ram.c b/migration/ram.c | |
143 | index 79d881f735..0ecbbc3202 100644 | |
144 | --- a/migration/ram.c | |
145 | +++ b/migration/ram.c | |
146 | @@ -3117,8 +3117,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs) | |
147 | ||
148 | static void ram_init_bitmaps(RAMState *rs) | |
149 | { | |
150 | - /* For memory_global_dirty_log_start below. */ | |
151 | - qemu_mutex_lock_iothread(); | |
152 | + bool release_lock = false; | |
153 | + | |
154 | + /* | |
155 | + * For memory_global_dirty_log_start below. | |
156 | + * For snapshots, the BQL is held during setup. | |
157 | + */ | |
158 | + if (!qemu_mutex_iothread_locked()) { | |
159 | + qemu_mutex_lock_iothread(); | |
160 | + release_lock = true; | |
161 | + } | |
162 | qemu_mutex_lock_ramlist(); | |
163 | ||
164 | WITH_RCU_READ_LOCK_GUARD() { | |
165 | @@ -3130,7 +3138,9 @@ static void ram_init_bitmaps(RAMState *rs) | |
166 | } | |
167 | } | |
168 | qemu_mutex_unlock_ramlist(); | |
169 | - qemu_mutex_unlock_iothread(); | |
170 | + if (release_lock) { | |
171 | + qemu_mutex_unlock_iothread(); | |
172 | + } | |
173 | ||
174 | /* | |
175 | * After an eventual first bitmap sync, fixup the initial bitmap | |
176 | diff --git a/migration/savevm.c b/migration/savevm.c | |
177 | index aa54a67fda..fc6a82a555 100644 | |
178 | --- a/migration/savevm.c | |
179 | +++ b/migration/savevm.c | |
180 | @@ -1621,10 +1621,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) | |
181 | memset(&compression_counters, 0, sizeof(compression_counters)); | |
182 | ms->to_dst_file = f; | |
183 | ||
184 | - qemu_mutex_unlock_iothread(); | |
185 | qemu_savevm_state_header(f); | |
186 | qemu_savevm_state_setup(f); | |
187 | - qemu_mutex_lock_iothread(); | |
188 | ||
189 | while (qemu_file_get_error(f) == 0) { | |
190 | if (qemu_savevm_state_iterate(f, false) > 0) { |