]>
Commit | Line | Data |
---|---|---|
23102ed6 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
c25a2220 WB |
2 | From: Wolfgang Bumiller <w.bumiller@proxmox.com> |
3 | Date: Tue, 5 Dec 2017 12:12:15 +0100 | |
23102ed6 | 4 | Subject: [PATCH] backup: fix race in backup-stop command |
c25a2220 WB |
5 | |
6 | Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com> | |
7 | --- | |
8 | blockdev.c | 90 +++++++++++++++++++++++++++++++++----------------------------- | |
9 | blockjob.c | 8 +++--- | |
10 | 2 files changed, 52 insertions(+), 46 deletions(-) | |
11 | ||
12 | diff --git a/blockdev.c b/blockdev.c | |
13 | index 19a82e8774..d3490936d8 100644 | |
14 | --- a/blockdev.c | |
15 | +++ b/blockdev.c | |
16 | @@ -2934,31 +2934,6 @@ out: | |
17 | aio_context_release(aio_context); | |
18 | } | |
19 | ||
20 | -void block_job_event_cancelled(BlockJob *job); | |
21 | -void block_job_event_completed(BlockJob *job, const char *msg); | |
22 | -static void block_job_cb(void *opaque, int ret) | |
23 | -{ | |
24 | - /* Note that this function may be executed from another AioContext besides | |
25 | - * the QEMU main loop. If you need to access anything that assumes the | |
26 | - * QEMU global mutex, use a BH or introduce a mutex. | |
27 | - */ | |
28 | - | |
29 | - BlockDriverState *bs = opaque; | |
30 | - const char *msg = NULL; | |
31 | - | |
32 | - assert(bs->job); | |
33 | - | |
34 | - if (ret < 0) { | |
35 | - msg = strerror(-ret); | |
36 | - } | |
37 | - | |
38 | - if (block_job_is_cancelled(bs->job)) { | |
39 | - block_job_event_cancelled(bs->job); | |
40 | - } else { | |
41 | - block_job_event_completed(bs->job, msg); | |
42 | - } | |
43 | -} | |
44 | - | |
45 | /* PVE backup related function */ | |
46 | ||
47 | static struct PVEBackupState { | |
48 | @@ -2975,6 +2950,8 @@ static struct PVEBackupState { | |
49 | size_t total; | |
50 | size_t transferred; | |
51 | size_t zero_bytes; | |
52 | + QemuMutex backup_mutex; | |
53 | + bool backup_mutex_initialized; | |
54 | } backup_state; | |
55 | ||
56 | typedef struct PVEBackupDevInfo { | |
57 | @@ -3055,6 +3032,13 @@ static int pvebackup_dump_cb(void *opaque, BlockBackend *target, | |
58 | ||
59 | static void pvebackup_cleanup(void) | |
60 | { | |
61 | + qemu_mutex_lock(&backup_state.backup_mutex); | |
62 | + // Avoid race between block jobs and backup-cancel command: | |
63 | + if (!backup_state.vmaw) { | |
64 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
65 | + return; | |
66 | + } | |
67 | + | |
68 | backup_state.end_time = time(NULL); | |
69 | ||
70 | if (backup_state.vmaw) { | |
71 | @@ -3064,16 +3048,9 @@ static void pvebackup_cleanup(void) | |
72 | backup_state.vmaw = NULL; | |
73 | } | |
74 | ||
75 | - if (backup_state.di_list) { | |
76 | - GList *l = backup_state.di_list; | |
77 | - while (l) { | |
78 | - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; | |
79 | - l = g_list_next(l); | |
80 | - g_free(di); | |
81 | - } | |
82 | - g_list_free(backup_state.di_list); | |
83 | - backup_state.di_list = NULL; | |
84 | - } | |
85 | + g_list_free(backup_state.di_list); | |
86 | + backup_state.di_list = NULL; | |
87 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
88 | } | |
89 | ||
90 | static void coroutine_fn backup_close_vma_stream(void *opaque) | |
91 | @@ -3085,6 +3062,8 @@ static void coroutine_fn backup_close_vma_stream(void *opaque) | |
92 | ||
93 | static void pvebackup_complete_cb(void *opaque, int ret) | |
94 | { | |
95 | + // This always runs in the main loop | |
96 | + | |
97 | PVEBackupDevInfo *di = opaque; | |
98 | ||
99 | di->completed = true; | |
100 | @@ -3094,8 +3073,6 @@ static void pvebackup_complete_cb(void *opaque, int ret) | |
101 | ret, strerror(-ret)); | |
102 | } | |
103 | ||
104 | - BlockDriverState *bs = di->bs; | |
105 | - | |
106 | di->bs = NULL; | |
107 | di->target = NULL; | |
108 | ||
109 | @@ -3104,7 +3081,11 @@ static void pvebackup_complete_cb(void *opaque, int ret) | |
110 | qemu_coroutine_enter(co); | |
111 | } | |
112 | ||
113 | - block_job_cb(bs, ret); | |
114 | + // remove self from job queue | |
115 | + qemu_mutex_lock(&backup_state.backup_mutex); | |
116 | + backup_state.di_list = g_list_remove(backup_state.di_list, di); | |
117 | + g_free(di); | |
118 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
119 | ||
120 | if (!backup_state.cancel) { | |
121 | pvebackup_run_next_job(); | |
122 | @@ -3114,6 +3095,12 @@ static void pvebackup_complete_cb(void *opaque, int ret) | |
123 | static void pvebackup_cancel(void *opaque) | |
124 | { | |
125 | backup_state.cancel = true; | |
126 | + qemu_mutex_lock(&backup_state.backup_mutex); | |
127 | + // Avoid race between block jobs and backup-cancel command: | |
128 | + if (!backup_state.vmaw) { | |
129 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
130 | + return; | |
131 | + } | |
132 | ||
133 | if (!backup_state.error) { | |
134 | error_setg(&backup_state.error, "backup cancelled"); | |
135 | @@ -3131,13 +3118,17 @@ static void pvebackup_cancel(void *opaque) | |
136 | if (!di->completed && di->bs) { | |
137 | BlockJob *job = di->bs->job; | |
138 | if (job) { | |
139 | + AioContext *aio_context = blk_get_aio_context(job->blk); | |
140 | + aio_context_acquire(aio_context); | |
141 | if (!di->completed) { | |
142 | - block_job_cancel_sync(job); | |
143 | + block_job_cancel(job); | |
144 | } | |
145 | + aio_context_release(aio_context); | |
146 | } | |
147 | } | |
148 | } | |
149 | ||
150 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
151 | pvebackup_cleanup(); | |
152 | } | |
153 | ||
154 | @@ -3193,24 +3184,31 @@ static int config_to_vma(const char *file, BackupFormat format, | |
155 | bool block_job_should_pause(BlockJob *job); | |
156 | static void pvebackup_run_next_job(void) | |
157 | { | |
158 | + qemu_mutex_lock(&backup_state.backup_mutex); | |
159 | + | |
160 | GList *l = backup_state.di_list; | |
161 | while (l) { | |
162 | PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; | |
163 | l = g_list_next(l); | |
164 | if (!di->completed && di->bs && di->bs->job) { | |
165 | BlockJob *job = di->bs->job; | |
166 | + AioContext *aio_context = blk_get_aio_context(job->blk); | |
167 | + aio_context_acquire(aio_context); | |
168 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
169 | if (block_job_should_pause(job)) { | |
170 | - bool cancel = backup_state.error || backup_state.cancel; | |
171 | - if (cancel) { | |
172 | - block_job_cancel(job); | |
173 | + if (backup_state.error || backup_state.cancel) { | |
174 | + block_job_cancel_sync(job); | |
175 | } else { | |
176 | block_job_resume(job); | |
177 | } | |
178 | } | |
179 | + aio_context_release(aio_context); | |
180 | return; | |
181 | } | |
182 | } | |
183 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
184 | ||
185 | + // no more jobs, run the cleanup | |
186 | pvebackup_cleanup(); | |
187 | } | |
188 | ||
189 | @@ -3233,6 +3231,11 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format, | |
190 | UuidInfo *uuid_info; | |
191 | BlockJob *job; | |
192 | ||
193 | + if (!backup_state.backup_mutex_initialized) { | |
194 | + qemu_mutex_init(&backup_state.backup_mutex); | |
195 | + backup_state.backup_mutex_initialized = true; | |
196 | + } | |
197 | + | |
198 | if (backup_state.di_list) { | |
199 | error_set(errp, ERROR_CLASS_GENERIC_ERROR, | |
200 | "previous backup not finished"); | |
201 | @@ -3405,6 +3408,7 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format, | |
202 | uuid_copy(backup_state.uuid, uuid); | |
203 | uuid_unparse_lower(uuid, backup_state.uuid_str); | |
204 | ||
205 | + qemu_mutex_lock(&backup_state.backup_mutex); | |
206 | backup_state.di_list = di_list; | |
207 | ||
208 | backup_state.total = total; | |
209 | @@ -3428,6 +3432,8 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format, | |
210 | block_job_start(job); | |
211 | } | |
212 | ||
213 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
214 | + | |
215 | if (!backup_state.error) { | |
216 | pvebackup_run_next_job(); // run one job | |
217 | } | |
218 | diff --git a/blockjob.c b/blockjob.c | |
219 | index cb3741f6dd..199d5852db 100644 | |
220 | --- a/blockjob.c | |
221 | +++ b/blockjob.c | |
222 | @@ -37,8 +37,8 @@ | |
223 | #include "qemu/timer.h" | |
224 | #include "qapi-event.h" | |
225 | ||
226 | -void block_job_event_cancelled(BlockJob *job); | |
227 | -void block_job_event_completed(BlockJob *job, const char *msg); | |
228 | +static void block_job_event_cancelled(BlockJob *job); | |
229 | +static void block_job_event_completed(BlockJob *job, const char *msg); | |
230 | ||
231 | /* Transactional group of block jobs */ | |
232 | struct BlockJobTxn { | |
233 | @@ -688,7 +688,7 @@ static void block_job_iostatus_set_err(BlockJob *job, int error) | |
234 | } | |
235 | } | |
236 | ||
237 | -void block_job_event_cancelled(BlockJob *job) | |
238 | +static void block_job_event_cancelled(BlockJob *job) | |
239 | { | |
240 | if (block_job_is_internal(job)) { | |
241 | return; | |
242 | @@ -702,7 +702,7 @@ void block_job_event_cancelled(BlockJob *job) | |
243 | &error_abort); | |
244 | } | |
245 | ||
246 | -void block_job_event_completed(BlockJob *job, const char *msg) | |
247 | +static void block_job_event_completed(BlockJob *job, const char *msg) | |
248 | { | |
249 | if (block_job_is_internal(job)) { | |
250 | return; | |
251 | -- | |
252 | 2.11.0 | |
253 |