]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0053-PVE-fix-and-clean-up-error-handling-for-create_backu.patch
bump version to 5.2.0-2
[pve-qemu.git] / debian / patches / pve / 0053-PVE-fix-and-clean-up-error-handling-for-create_backu.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Stefan Reiter <s.reiter@proxmox.com>
3 Date: Thu, 22 Oct 2020 17:01:07 +0200
4 Subject: [PATCH] PVE: fix and clean up error handling for create_backup_jobs
5
6 No more weird bool returns, just the standard "errp" format used
7 everywhere else too. With this, if backup_job_create fails, the error
8 message is actually returned over QMP and can be shown to the user.
9
10 To facilitate correct cleanup on such an error, we call
11 create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
12 This additionally allows us to actually hold the backup_mutex during
13 operation.
14
15 Also add a job_cancel_sync before job_unref, since a job must be in
16 STATUS_NULL to be deleted by unref, which could trigger an assert
17 before.
18
19 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
20 ---
21 pve-backup.c | 79 +++++++++++++++++++++++++++++++++++-----------------
22 1 file changed, 54 insertions(+), 25 deletions(-)
23
24 diff --git a/pve-backup.c b/pve-backup.c
25 index f3b8ce1f3a..ded90cb822 100644
26 --- a/pve-backup.c
27 +++ b/pve-backup.c
28 @@ -50,6 +50,7 @@ static struct PVEBackupState {
29 size_t zero_bytes;
30 GList *bitmap_list;
31 bool finishing;
32 + bool starting;
33 } stat;
34 int64_t speed;
35 VmaWriter *vmaw;
36 @@ -277,6 +278,16 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
37 PVEBackupDevInfo *di = opaque;
38 int ret = di->completed_ret;
39
40 + qemu_mutex_lock(&backup_state.stat.lock);
41 + bool starting = backup_state.stat.starting;
42 + qemu_mutex_unlock(&backup_state.stat.lock);
43 + if (starting) {
44 + /* in 'starting' state, no tasks have been run yet, meaning we can (and
45 + * must) skip all cleanup, as we don't know what has and hasn't been
46 + * initialized yet. */
47 + return;
48 + }
49 +
50 qemu_co_mutex_lock(&backup_state.backup_mutex);
51
52 if (ret < 0) {
53 @@ -441,15 +452,15 @@ static int coroutine_fn pvebackup_co_add_config(
54 /*
55 * backup_job_create can *not* be run from a coroutine (and requires an
56 * acquired AioContext), so this can't either.
57 - * This does imply that this function cannot run with backup_mutex acquired.
58 - * That is ok because it is only ever called between setting up the backup_state
59 - * struct and starting the jobs, and from within a QMP call. This means that no
60 - * other QMP call can interrupt, and no background job is running yet.
61 + * The caller is responsible that backup_mutex is held nonetheless.
62 */
63 -static bool create_backup_jobs(void) {
64 +static void create_backup_jobs_bh(void *opaque) {
65
66 assert(!qemu_in_coroutine());
67
68 + CoCtxData *data = (CoCtxData*)opaque;
69 + Error **errp = (Error**)data->data;
70 +
71 Error *local_err = NULL;
72
73 /* create job transaction to synchronize bitmap commit and cancel all
74 @@ -483,24 +494,19 @@ static bool create_backup_jobs(void) {
75
76 aio_context_release(aio_context);
77
78 - if (!job || local_err != NULL) {
79 - Error *create_job_err = NULL;
80 - error_setg(&create_job_err, "backup_job_create failed: %s",
81 - local_err ? error_get_pretty(local_err) : "null");
82 + di->job = job;
83
84 - pvebackup_propagate_error(create_job_err);
85 + if (!job || local_err) {
86 + error_setg(errp, "backup_job_create failed: %s",
87 + local_err ? error_get_pretty(local_err) : "null");
88 break;
89 }
90
91 - di->job = job;
92 -
93 bdrv_unref(di->target);
94 di->target = NULL;
95 }
96
97 - bool errors = pvebackup_error_or_canceled();
98 -
99 - if (errors) {
100 + if (*errp) {
101 l = backup_state.di_list;
102 while (l) {
103 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
104 @@ -512,12 +518,17 @@ static bool create_backup_jobs(void) {
105 }
106
107 if (di->job) {
108 + AioContext *ctx = di->job->job.aio_context;
109 + aio_context_acquire(ctx);
110 + job_cancel_sync(&di->job->job);
111 job_unref(&di->job->job);
112 + aio_context_release(ctx);
113 }
114 }
115 }
116
117 - return errors;
118 + /* return */
119 + aio_co_enter(data->ctx, data->co);
120 }
121
122 typedef struct QmpBackupTask {
123 @@ -883,6 +894,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
124 backup_state.stat.transferred = 0;
125 backup_state.stat.zero_bytes = 0;
126 backup_state.stat.finishing = false;
127 + backup_state.stat.starting = true;
128
129 qemu_mutex_unlock(&backup_state.stat.lock);
130
131 @@ -898,7 +910,32 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
132
133 task->result = uuid_info;
134
135 + /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
136 + * backup_mutex locked. This is fine, a CoMutex can be held across yield
137 + * points, and we'll release it as soon as the BH reschedules us.
138 + */
139 + CoCtxData waker = {
140 + .co = qemu_coroutine_self(),
141 + .ctx = qemu_get_current_aio_context(),
142 + .data = &local_err,
143 + };
144 + aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
145 + qemu_coroutine_yield();
146 +
147 + if (local_err) {
148 + error_propagate(task->errp, local_err);
149 + goto err;
150 + }
151 +
152 qemu_co_mutex_unlock(&backup_state.backup_mutex);
153 +
154 + qemu_mutex_lock(&backup_state.stat.lock);
155 + backup_state.stat.starting = false;
156 + qemu_mutex_unlock(&backup_state.stat.lock);
157 +
158 + /* start the first job in the transaction */
159 + job_txn_start_seq(backup_state.txn);
160 +
161 return;
162
163 err_mutex:
164 @@ -921,6 +958,7 @@ err:
165 g_free(di);
166 }
167 g_list_free(di_list);
168 + backup_state.di_list = NULL;
169
170 if (devs) {
171 g_strfreev(devs);
172 @@ -998,15 +1036,6 @@ UuidInfo *qmp_backup(
173
174 block_on_coroutine_fn(pvebackup_co_prepare, &task);
175
176 - if (*errp == NULL) {
177 - bool errors = create_backup_jobs();
178 -
179 - if (!errors) {
180 - // start the first job in the transaction
181 - job_txn_start_seq(backup_state.txn);
182 - }
183 - }
184 -
185 return task.result;
186 }
187