1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3 Date: Tue, 5 Dec 2017 12:12:15 +0100
4 Subject: [PATCH] backup: fix race in backup-stop command
6 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
8 blockdev.c | 90 +++++++++++++++++++++++++++++++++-----------------------------
10 2 files changed, 52 insertions(+), 46 deletions(-)
12 diff --git a/blockdev.c b/blockdev.c
13 index 19a82e8774..d3490936d8 100644
16 @@ -2934,31 +2934,6 @@ out:
17 aio_context_release(aio_context);
20 -void block_job_event_cancelled(BlockJob *job);
21 -void block_job_event_completed(BlockJob *job, const char *msg);
22 -static void block_job_cb(void *opaque, int ret)
24 - /* Note that this function may be executed from another AioContext besides
25 - * the QEMU main loop. If you need to access anything that assumes the
26 - * QEMU global mutex, use a BH or introduce a mutex.
29 - BlockDriverState *bs = opaque;
30 - const char *msg = NULL;
35 - msg = strerror(-ret);
38 - if (block_job_is_cancelled(bs->job)) {
39 - block_job_event_cancelled(bs->job);
41 - block_job_event_completed(bs->job, msg);
45 /* PVE backup related function */
47 static struct PVEBackupState {
48 @@ -2975,6 +2950,8 @@ static struct PVEBackupState {
52 + QemuMutex backup_mutex;
53 + bool backup_mutex_initialized;
56 typedef struct PVEBackupDevInfo {
57 @@ -3055,6 +3032,13 @@ static int pvebackup_dump_cb(void *opaque, BlockBackend *target,
59 static void pvebackup_cleanup(void)
61 + qemu_mutex_lock(&backup_state.backup_mutex);
62 + // Avoid race between block jobs and backup-cancel command:
63 + if (!backup_state.vmaw) {
64 + qemu_mutex_unlock(&backup_state.backup_mutex);
68 backup_state.end_time = time(NULL);
70 if (backup_state.vmaw) {
71 @@ -3064,16 +3048,9 @@ static void pvebackup_cleanup(void)
72 backup_state.vmaw = NULL;
75 - if (backup_state.di_list) {
76 - GList *l = backup_state.di_list;
78 - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
82 - g_list_free(backup_state.di_list);
83 - backup_state.di_list = NULL;
85 + g_list_free(backup_state.di_list);
86 + backup_state.di_list = NULL;
87 + qemu_mutex_unlock(&backup_state.backup_mutex);
90 static void coroutine_fn backup_close_vma_stream(void *opaque)
91 @@ -3085,6 +3062,8 @@ static void coroutine_fn backup_close_vma_stream(void *opaque)
93 static void pvebackup_complete_cb(void *opaque, int ret)
95 + // This always runs in the main loop
97 PVEBackupDevInfo *di = opaque;
100 @@ -3094,8 +3073,6 @@ static void pvebackup_complete_cb(void *opaque, int ret)
101 ret, strerror(-ret));
104 - BlockDriverState *bs = di->bs;
109 @@ -3104,7 +3081,11 @@ static void pvebackup_complete_cb(void *opaque, int ret)
110 qemu_coroutine_enter(co);
113 - block_job_cb(bs, ret);
114 + // remove self from job queue
115 + qemu_mutex_lock(&backup_state.backup_mutex);
116 + backup_state.di_list = g_list_remove(backup_state.di_list, di);
118 + qemu_mutex_unlock(&backup_state.backup_mutex);
120 if (!backup_state.cancel) {
121 pvebackup_run_next_job();
122 @@ -3114,6 +3095,12 @@ static void pvebackup_complete_cb(void *opaque, int ret)
123 static void pvebackup_cancel(void *opaque)
125 backup_state.cancel = true;
126 + qemu_mutex_lock(&backup_state.backup_mutex);
127 + // Avoid race between block jobs and backup-cancel command:
128 + if (!backup_state.vmaw) {
129 + qemu_mutex_unlock(&backup_state.backup_mutex);
133 if (!backup_state.error) {
134 error_setg(&backup_state.error, "backup cancelled");
135 @@ -3131,13 +3118,17 @@ static void pvebackup_cancel(void *opaque)
136 if (!di->completed && di->bs) {
137 BlockJob *job = di->bs->job;
139 + AioContext *aio_context = blk_get_aio_context(job->blk);
140 + aio_context_acquire(aio_context);
141 if (!di->completed) {
142 - block_job_cancel_sync(job);
143 + block_job_cancel(job);
145 + aio_context_release(aio_context);
150 + qemu_mutex_unlock(&backup_state.backup_mutex);
154 @@ -3193,24 +3184,31 @@ static int config_to_vma(const char *file, BackupFormat format,
155 bool block_job_should_pause(BlockJob *job);
156 static void pvebackup_run_next_job(void)
158 + qemu_mutex_lock(&backup_state.backup_mutex);
160 GList *l = backup_state.di_list;
162 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
164 if (!di->completed && di->bs && di->bs->job) {
165 BlockJob *job = di->bs->job;
166 + AioContext *aio_context = blk_get_aio_context(job->blk);
167 + aio_context_acquire(aio_context);
168 + qemu_mutex_unlock(&backup_state.backup_mutex);
169 if (block_job_should_pause(job)) {
170 - bool cancel = backup_state.error || backup_state.cancel;
172 - block_job_cancel(job);
173 + if (backup_state.error || backup_state.cancel) {
174 + block_job_cancel_sync(job);
176 block_job_resume(job);
179 + aio_context_release(aio_context);
183 + qemu_mutex_unlock(&backup_state.backup_mutex);
185 + // no more jobs, run the cleanup
189 @@ -3233,6 +3231,11 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
193 + if (!backup_state.backup_mutex_initialized) {
194 + qemu_mutex_init(&backup_state.backup_mutex);
195 + backup_state.backup_mutex_initialized = true;
198 if (backup_state.di_list) {
199 error_set(errp, ERROR_CLASS_GENERIC_ERROR,
200 "previous backup not finished");
201 @@ -3405,6 +3408,7 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
202 uuid_copy(backup_state.uuid, uuid);
203 uuid_unparse_lower(uuid, backup_state.uuid_str);
205 + qemu_mutex_lock(&backup_state.backup_mutex);
206 backup_state.di_list = di_list;
208 backup_state.total = total;
209 @@ -3428,6 +3432,8 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
210 block_job_start(job);
213 + qemu_mutex_unlock(&backup_state.backup_mutex);
215 if (!backup_state.error) {
216 pvebackup_run_next_job(); // run one job
218 diff --git a/blockjob.c b/blockjob.c
219 index cb3741f6dd..199d5852db 100644
223 #include "qemu/timer.h"
224 #include "qapi-event.h"
226 -void block_job_event_cancelled(BlockJob *job);
227 -void block_job_event_completed(BlockJob *job, const char *msg);
228 +static void block_job_event_cancelled(BlockJob *job);
229 +static void block_job_event_completed(BlockJob *job, const char *msg);
231 /* Transactional group of block jobs */
233 @@ -688,7 +688,7 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
237 -void block_job_event_cancelled(BlockJob *job)
238 +static void block_job_event_cancelled(BlockJob *job)
240 if (block_job_is_internal(job)) {
242 @@ -702,7 +702,7 @@ void block_job_event_cancelled(BlockJob *job)
246 -void block_job_event_completed(BlockJob *job, const char *msg)
247 +static void block_job_event_completed(BlockJob *job, const char *msg)
249 if (block_job_is_internal(job)) {