]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0052-PVE-Backup-Use-more-coroutines-and-don-t-block-on-fi.patch
Several fixes for backup abort and error reporting
[pve-qemu.git] / debian / patches / pve / 0052-PVE-Backup-Use-more-coroutines-and-don-t-block-on-fi.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Stefan Reiter <s.reiter@proxmox.com>
3 Date: Mon, 28 Sep 2020 13:40:51 +0200
4 Subject: [PATCH] PVE-Backup: Use more coroutines and don't block on finishing
5
6 proxmox_backup_co_finish is already async, but previously we would wait
7 for the coroutine using block_on_coroutine_fn(). Avoid this by
8 scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
9 as a real coroutine when calling from pvebackup_complete_cb. This is ok,
10 since complete_stream uses the backup_mutex internally to synchronize,
11 and other streams can happily continue writing in the meantime anyway.
12
13 To accomodate, backup_mutex is converted to a CoMutex. This means
14 converting every user to a coroutine. This is not just useful here, but
15 will come in handy once this series[0] is merged, and QMP calls can be
16 yield-able coroutines too. Then we can also finally get rid of
17 block_on_coroutine_fn.
18
19 Cases of aio_context_acquire/release from within what is now a coroutine
20 are changed to aio_co_reschedule_self, which works since a running
21 coroutine always holds the aio lock for the context it is running in.
22
23 job_cancel_sync is called from a BH since it can't be run from a
24 coroutine (uses AIO_WAIT_WHILE internally).
25
26 Same thing for create_backup_jobs, which is converted to a BH too.
27
28 To communicate the finishing state, a new property is introduced to
29 query-backup: 'finishing'. A new state is explicitly not used, since
30 that would break compatibility with older qemu-server versions.
31
32 [0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
33
34 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
35 ---
36 pve-backup.c | 148 ++++++++++++++++++++++++++-----------------
37 qapi/block-core.json | 5 +-
38 2 files changed, 95 insertions(+), 58 deletions(-)
39
40 diff --git a/pve-backup.c b/pve-backup.c
41 index 9562e9c98d..0466145bec 100644
42 --- a/pve-backup.c
43 +++ b/pve-backup.c
44 @@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
45
46 static struct PVEBackupState {
47 struct {
48 - // Everithing accessed from qmp_backup_query command is protected using lock
49 + // Everything accessed from qmp_backup_query command is protected using
50 + // this lock. Do NOT hold this lock for long times, as it is sometimes
51 + // acquired from coroutines, and thus any wait time may block the guest.
52 QemuMutex lock;
53 Error *error;
54 time_t start_time;
55 @@ -47,20 +49,21 @@ static struct PVEBackupState {
56 size_t reused;
57 size_t zero_bytes;
58 GList *bitmap_list;
59 + bool finishing;
60 } stat;
61 int64_t speed;
62 VmaWriter *vmaw;
63 ProxmoxBackupHandle *pbs;
64 GList *di_list;
65 JobTxn *txn;
66 - QemuMutex backup_mutex;
67 + CoMutex backup_mutex;
68 CoMutex dump_callback_mutex;
69 } backup_state;
70
71 static void pvebackup_init(void)
72 {
73 qemu_mutex_init(&backup_state.stat.lock);
74 - qemu_mutex_init(&backup_state.backup_mutex);
75 + qemu_co_mutex_init(&backup_state.backup_mutex);
76 qemu_co_mutex_init(&backup_state.dump_callback_mutex);
77 }
78
79 @@ -72,6 +75,7 @@ typedef struct PVEBackupDevInfo {
80 size_t size;
81 uint64_t block_size;
82 uint8_t dev_id;
83 + int completed_ret; // INT_MAX if not completed
84 char targetfile[PATH_MAX];
85 BdrvDirtyBitmap *bitmap;
86 BlockDriverState *target;
87 @@ -227,12 +231,12 @@ pvebackup_co_dump_vma_cb(
88 }
89
90 // assumes the caller holds backup_mutex
91 -static void coroutine_fn pvebackup_co_cleanup(void *unused)
92 +static void coroutine_fn pvebackup_co_cleanup(void)
93 {
94 assert(qemu_in_coroutine());
95
96 qemu_mutex_lock(&backup_state.stat.lock);
97 - backup_state.stat.end_time = time(NULL);
98 + backup_state.stat.finishing = true;
99 qemu_mutex_unlock(&backup_state.stat.lock);
100
101 if (backup_state.vmaw) {
102 @@ -261,12 +265,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
103
104 g_list_free(backup_state.di_list);
105 backup_state.di_list = NULL;
106 +
107 + qemu_mutex_lock(&backup_state.stat.lock);
108 + backup_state.stat.end_time = time(NULL);
109 + backup_state.stat.finishing = false;
110 + qemu_mutex_unlock(&backup_state.stat.lock);
111 }
112
113 -// assumes the caller holds backup_mutex
114 -static void coroutine_fn pvebackup_complete_stream(void *opaque)
115 +static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
116 {
117 PVEBackupDevInfo *di = opaque;
118 + int ret = di->completed_ret;
119 +
120 + qemu_co_mutex_lock(&backup_state.backup_mutex);
121 +
122 + if (ret < 0) {
123 + Error *local_err = NULL;
124 + error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
125 + pvebackup_propagate_error(local_err);
126 + }
127 +
128 + di->bs = NULL;
129 +
130 + assert(di->target == NULL);
131
132 bool error_or_canceled = pvebackup_error_or_canceled();
133
134 @@ -281,27 +302,6 @@ static void coroutine_fn pvebackup_complete_stream(void *opaque)
135 pvebackup_propagate_error(local_err);
136 }
137 }
138 -}
139 -
140 -static void pvebackup_complete_cb(void *opaque, int ret)
141 -{
142 - assert(!qemu_in_coroutine());
143 -
144 - PVEBackupDevInfo *di = opaque;
145 -
146 - qemu_mutex_lock(&backup_state.backup_mutex);
147 -
148 - if (ret < 0) {
149 - Error *local_err = NULL;
150 - error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
151 - pvebackup_propagate_error(local_err);
152 - }
153 -
154 - di->bs = NULL;
155 -
156 - assert(di->target == NULL);
157 -
158 - block_on_coroutine_fn(pvebackup_complete_stream, di);
159
160 // remove self from job list
161 backup_state.di_list = g_list_remove(backup_state.di_list, di);
162 @@ -310,21 +310,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
163
164 /* call cleanup if we're the last job */
165 if (!g_list_first(backup_state.di_list)) {
166 - block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
167 + pvebackup_co_cleanup();
168 }
169
170 - qemu_mutex_unlock(&backup_state.backup_mutex);
171 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
172 }
173
174 -static void pvebackup_cancel(void)
175 +static void pvebackup_complete_cb(void *opaque, int ret)
176 {
177 - assert(!qemu_in_coroutine());
178 + PVEBackupDevInfo *di = opaque;
179 + di->completed_ret = ret;
180 +
181 + /*
182 + * Schedule stream cleanup in async coroutine. close_image and finish might
183 + * take a while, so we can't block on them here. This way it also doesn't
184 + * matter if we're already running in a coroutine or not.
185 + * Note: di is a pointer to an entry in the global backup_state struct, so
186 + * it stays valid.
187 + */
188 + Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
189 + aio_co_enter(qemu_get_aio_context(), co);
190 +}
191
192 +/*
193 + * job_cancel(_sync) does not like to be called from coroutines, so defer to
194 + * main loop processing via a bottom half.
195 + */
196 +static void job_cancel_bh(void *opaque) {
197 + CoCtxData *data = (CoCtxData*)opaque;
198 + Job *job = (Job*)data->data;
199 + AioContext *job_ctx = job->aio_context;
200 + aio_context_acquire(job_ctx);
201 + job_cancel_sync(job);
202 + aio_context_release(job_ctx);
203 + aio_co_enter(data->ctx, data->co);
204 +}
205 +
206 +static void coroutine_fn pvebackup_co_cancel(void *opaque)
207 +{
208 Error *cancel_err = NULL;
209 error_setg(&cancel_err, "backup canceled");
210 pvebackup_propagate_error(cancel_err);
211
212 - qemu_mutex_lock(&backup_state.backup_mutex);
213 + qemu_co_mutex_lock(&backup_state.backup_mutex);
214
215 if (backup_state.vmaw) {
216 /* make sure vma writer does not block anymore */
217 @@ -342,27 +370,22 @@ static void pvebackup_cancel(void)
218 ((PVEBackupDevInfo *)bdi->data)->job :
219 NULL;
220
221 - /* ref the job before releasing the mutex, just to be safe */
222 if (cancel_job) {
223 - job_ref(&cancel_job->job);
224 + CoCtxData data = {
225 + .ctx = qemu_get_current_aio_context(),
226 + .co = qemu_coroutine_self(),
227 + .data = &cancel_job->job,
228 + };
229 + aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
230 + qemu_coroutine_yield();
231 }
232
233 - /* job_cancel_sync may enter the job, so we need to release the
234 - * backup_mutex to avoid deadlock */
235 - qemu_mutex_unlock(&backup_state.backup_mutex);
236 -
237 - if (cancel_job) {
238 - AioContext *aio_context = cancel_job->job.aio_context;
239 - aio_context_acquire(aio_context);
240 - job_cancel_sync(&cancel_job->job);
241 - job_unref(&cancel_job->job);
242 - aio_context_release(aio_context);
243 - }
244 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
245 }
246
247 void qmp_backup_cancel(Error **errp)
248 {
249 - pvebackup_cancel();
250 + block_on_coroutine_fn(pvebackup_co_cancel, NULL);
251 }
252
253 // assumes the caller holds backup_mutex
254 @@ -415,6 +438,14 @@ static int coroutine_fn pvebackup_co_add_config(
255 goto out;
256 }
257
258 +/*
259 + * backup_job_create can *not* be run from a coroutine (and requires an
260 + * acquired AioContext), so this can't either.
261 + * This does imply that this function cannot run with backup_mutex acquired.
262 + * That is ok because it is only ever called between setting up the backup_state
263 + * struct and starting the jobs, and from within a QMP call. This means that no
264 + * other QMP call can interrupt, and no background job is running yet.
265 + */
266 static bool create_backup_jobs(void) {
267
268 assert(!qemu_in_coroutine());
269 @@ -523,11 +554,12 @@ typedef struct QmpBackupTask {
270 UuidInfo *result;
271 } QmpBackupTask;
272
273 -// assumes the caller holds backup_mutex
274 static void coroutine_fn pvebackup_co_prepare(void *opaque)
275 {
276 assert(qemu_in_coroutine());
277
278 + qemu_co_mutex_lock(&backup_state.backup_mutex);
279 +
280 QmpBackupTask *task = opaque;
281
282 task->result = NULL; // just to be sure
283 @@ -548,8 +580,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
284 const char *firewall_name = "qemu-server.fw";
285
286 if (backup_state.di_list) {
287 - error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
288 + error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
289 "previous backup not finished");
290 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
291 return;
292 }
293
294 @@ -616,6 +649,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
295 }
296 di->size = size;
297 total += size;
298 +
299 + di->completed_ret = INT_MAX;
300 }
301
302 uuid_generate(uuid);
303 @@ -847,6 +882,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
304 backup_state.stat.dirty = total - backup_state.stat.reused;
305 backup_state.stat.transferred = 0;
306 backup_state.stat.zero_bytes = 0;
307 + backup_state.stat.finishing = false;
308
309 qemu_mutex_unlock(&backup_state.stat.lock);
310
311 @@ -861,6 +897,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
312 uuid_info->UUID = uuid_str;
313
314 task->result = uuid_info;
315 +
316 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
317 return;
318
319 err_mutex:
320 @@ -903,6 +941,8 @@ err:
321 }
322
323 task->result = NULL;
324 +
325 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
326 return;
327 }
328
329 @@ -956,22 +996,15 @@ UuidInfo *qmp_backup(
330 .errp = errp,
331 };
332
333 - qemu_mutex_lock(&backup_state.backup_mutex);
334 -
335 block_on_coroutine_fn(pvebackup_co_prepare, &task);
336
337 if (*errp == NULL) {
338 bool errors = create_backup_jobs();
339 - qemu_mutex_unlock(&backup_state.backup_mutex);
340
341 if (!errors) {
342 - /* start the first job in the transaction
343 - * note: this might directly enter the job, so we need to do this
344 - * after unlocking the backup_mutex */
345 + // start the first job in the transaction
346 job_txn_start_seq(backup_state.txn);
347 }
348 - } else {
349 - qemu_mutex_unlock(&backup_state.backup_mutex);
350 }
351
352 return task.result;
353 @@ -1025,6 +1058,7 @@ BackupStatus *qmp_query_backup(Error **errp)
354 info->transferred = backup_state.stat.transferred;
355 info->has_reused = true;
356 info->reused = backup_state.stat.reused;
357 + info->finishing = backup_state.stat.finishing;
358
359 qemu_mutex_unlock(&backup_state.stat.lock);
360
361 diff --git a/qapi/block-core.json b/qapi/block-core.json
362 index 5fc42e87f3..b31ad8d989 100644
363 --- a/qapi/block-core.json
364 +++ b/qapi/block-core.json
365 @@ -784,12 +784,15 @@
366 #
367 # @uuid: uuid for this backup job
368 #
369 +# @finishing: if status='active' and finishing=true, then the backup process is
370 +# waiting for the target to finish.
371 +#
372 ##
373 { 'struct': 'BackupStatus',
374 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
375 '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
376 '*start-time': 'int', '*end-time': 'int',
377 - '*backup-file': 'str', '*uuid': 'str' } }
378 + '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
379
380 ##
381 # @BackupFormat: