]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0039-PVE-Backup-Don-t-block-on-finishing-and-cleanup-crea.patch
update submodule and patches to 7.1.0
[pve-qemu.git] / debian / patches / pve / 0039-PVE-Backup-Don-t-block-on-finishing-and-cleanup-crea.patch
CommitLineData
d333327a
SR
1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Stefan Reiter <s.reiter@proxmox.com>
3Date: Mon, 28 Sep 2020 13:40:51 +0200
0c893fd8
SR
4Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
5 create_backup_jobs
d333327a
SR
6
7proxmox_backup_co_finish is already async, but previously we would wait
8for the coroutine using block_on_coroutine_fn(). Avoid this by
9scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
10as a real coroutine when calling from pvebackup_complete_cb. This is ok,
11since complete_stream uses the backup_mutex internally to synchronize,
12and other streams can happily continue writing in the meantime anyway.
13
14To accomodate, backup_mutex is converted to a CoMutex. This means
15converting every user to a coroutine. This is not just useful here, but
16will come in handy once this series[0] is merged, and QMP calls can be
17yield-able coroutines too. Then we can also finally get rid of
18block_on_coroutine_fn.
19
20Cases of aio_context_acquire/release from within what is now a coroutine
21are changed to aio_co_reschedule_self, which works since a running
22coroutine always holds the aio lock for the context it is running in.
23
72ae34ec
SR
24job_cancel_sync is called from a BH since it can't be run from a
25coroutine (uses AIO_WAIT_WHILE internally).
d333327a 26
72ae34ec 27Same thing for create_backup_jobs, which is converted to a BH too.
d333327a
SR
28
29To communicate the finishing state, a new property is introduced to
30query-backup: 'finishing'. A new state is explicitly not used, since
31that would break compatibility with older qemu-server versions.
32
0c893fd8
SR
33Also fix create_backup_jobs:
34
35No more weird bool returns, just the standard "errp" format used
36everywhere else too. With this, if backup_job_create fails, the error
37message is actually returned over QMP and can be shown to the user.
38
39To facilitate correct cleanup on such an error, we call
40create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
41This additionally allows us to actually hold the backup_mutex during
42operation.
43
44Also add a job_cancel_sync before job_unref, since a job must be in
45STATUS_NULL to be deleted by unref, which could trigger an assert
46before.
47
d333327a 48[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
72ae34ec
SR
49
50Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
ddbf7a87 51Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
4567474e
FE
52[add new force parameter to job_cancel_sync calls]
53Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
d333327a 54---
0c893fd8 55 pve-backup.c | 217 ++++++++++++++++++++++++++++---------------
72ae34ec 56 qapi/block-core.json | 5 +-
0c893fd8 57 2 files changed, 144 insertions(+), 78 deletions(-)
d333327a 58
d333327a 59diff --git a/pve-backup.c b/pve-backup.c
4567474e 60index 63c686463f..6f05796fad 100644
d333327a
SR
61--- a/pve-backup.c
62+++ b/pve-backup.c
63@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
64
65 static struct PVEBackupState {
66 struct {
67- // Everithing accessed from qmp_backup_query command is protected using lock
68+ // Everything accessed from qmp_backup_query command is protected using
69+ // this lock. Do NOT hold this lock for long times, as it is sometimes
70+ // acquired from coroutines, and thus any wait time may block the guest.
71 QemuMutex lock;
72 Error *error;
73 time_t start_time;
0c893fd8 74@@ -47,20 +49,22 @@ static struct PVEBackupState {
d333327a
SR
75 size_t reused;
76 size_t zero_bytes;
77 GList *bitmap_list;
78+ bool finishing;
0c893fd8 79+ bool starting;
d333327a
SR
80 } stat;
81 int64_t speed;
82 VmaWriter *vmaw;
83 ProxmoxBackupHandle *pbs;
84 GList *di_list;
85 JobTxn *txn;
86- QemuMutex backup_mutex;
87+ CoMutex backup_mutex;
88 CoMutex dump_callback_mutex;
89 } backup_state;
90
91 static void pvebackup_init(void)
92 {
93 qemu_mutex_init(&backup_state.stat.lock);
94- qemu_mutex_init(&backup_state.backup_mutex);
95+ qemu_co_mutex_init(&backup_state.backup_mutex);
96 qemu_co_mutex_init(&backup_state.dump_callback_mutex);
97 }
98
0c893fd8 99@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
d333327a
SR
100 size_t size;
101 uint64_t block_size;
102 uint8_t dev_id;
103+ int completed_ret; // INT_MAX if not completed
104 char targetfile[PATH_MAX];
105 BdrvDirtyBitmap *bitmap;
106 BlockDriverState *target;
0c893fd8 107@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
d333327a
SR
108 }
109
110 // assumes the caller holds backup_mutex
111-static void coroutine_fn pvebackup_co_cleanup(void *unused)
112+static void coroutine_fn pvebackup_co_cleanup(void)
113 {
114 assert(qemu_in_coroutine());
115
116 qemu_mutex_lock(&backup_state.stat.lock);
117- backup_state.stat.end_time = time(NULL);
118+ backup_state.stat.finishing = true;
119 qemu_mutex_unlock(&backup_state.stat.lock);
120
121 if (backup_state.vmaw) {
0c893fd8 122@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
d333327a
SR
123
124 g_list_free(backup_state.di_list);
125 backup_state.di_list = NULL;
126+
127+ qemu_mutex_lock(&backup_state.stat.lock);
128+ backup_state.stat.end_time = time(NULL);
129+ backup_state.stat.finishing = false;
130+ qemu_mutex_unlock(&backup_state.stat.lock);
131 }
132
133-// assumes the caller holds backup_mutex
134-static void coroutine_fn pvebackup_complete_stream(void *opaque)
135+static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
136 {
137 PVEBackupDevInfo *di = opaque;
138+ int ret = di->completed_ret;
d333327a 139
0c893fd8
SR
140- bool error_or_canceled = pvebackup_error_or_canceled();
141-
142- if (backup_state.vmaw) {
143- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
144+ qemu_mutex_lock(&backup_state.stat.lock);
145+ bool starting = backup_state.stat.starting;
146+ qemu_mutex_unlock(&backup_state.stat.lock);
147+ if (starting) {
148+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
149+ * must) skip all cleanup, as we don't know what has and hasn't been
150+ * initialized yet. */
151+ return;
d333327a 152 }
0c893fd8
SR
153
154- if (backup_state.pbs && !error_or_canceled) {
155- Error *local_err = NULL;
156- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
157- if (local_err != NULL) {
158- pvebackup_propagate_error(local_err);
159- }
160- }
d333327a
SR
161-}
162-
163-static void pvebackup_complete_cb(void *opaque, int ret)
164-{
165- assert(!qemu_in_coroutine());
166-
167- PVEBackupDevInfo *di = opaque;
168-
169- qemu_mutex_lock(&backup_state.backup_mutex);
0c893fd8
SR
170+ qemu_co_mutex_lock(&backup_state.backup_mutex);
171
172 if (ret < 0) {
173 Error *local_err = NULL;
174@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
175
176 assert(di->target == NULL);
177
d333327a 178- block_on_coroutine_fn(pvebackup_complete_stream, di);
0c893fd8
SR
179+ bool error_or_canceled = pvebackup_error_or_canceled();
180+
181+ if (backup_state.vmaw) {
182+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
183+ }
184+
185+ if (backup_state.pbs && !error_or_canceled) {
186+ Error *local_err = NULL;
187+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
188+ if (local_err != NULL) {
189+ pvebackup_propagate_error(local_err);
190+ }
191+ }
d333327a
SR
192
193 // remove self from job list
194 backup_state.di_list = g_list_remove(backup_state.di_list, di);
0c893fd8 195@@ -310,21 +321,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
d333327a
SR
196
197 /* call cleanup if we're the last job */
198 if (!g_list_first(backup_state.di_list)) {
199- block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
200+ pvebackup_co_cleanup();
201 }
202
203- qemu_mutex_unlock(&backup_state.backup_mutex);
204+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
205 }
206
207-static void pvebackup_cancel(void)
208+static void pvebackup_complete_cb(void *opaque, int ret)
209 {
72ae34ec 210- assert(!qemu_in_coroutine());
d333327a
SR
211+ PVEBackupDevInfo *di = opaque;
212+ di->completed_ret = ret;
4fd0fa7f 213+
d333327a
SR
214+ /*
215+ * Schedule stream cleanup in async coroutine. close_image and finish might
72ae34ec
SR
216+ * take a while, so we can't block on them here. This way it also doesn't
217+ * matter if we're already running in a coroutine or not.
d333327a
SR
218+ * Note: di is a pointer to an entry in the global backup_state struct, so
219+ * it stays valid.
220+ */
221+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
72ae34ec
SR
222+ aio_co_enter(qemu_get_aio_context(), co);
223+}
0c893fd8 224+
72ae34ec
SR
225+/*
226+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
227+ * main loop processing via a bottom half.
228+ */
229+static void job_cancel_bh(void *opaque) {
230+ CoCtxData *data = (CoCtxData*)opaque;
231+ Job *job = (Job*)data->data;
232+ AioContext *job_ctx = job->aio_context;
233+ aio_context_acquire(job_ctx);
4567474e 234+ job_cancel_sync(job, true);
72ae34ec
SR
235+ aio_context_release(job_ctx);
236+ aio_co_enter(data->ctx, data->co);
d333327a 237+}
4fd0fa7f 238
d333327a
SR
239+static void coroutine_fn pvebackup_co_cancel(void *opaque)
240+{
241 Error *cancel_err = NULL;
242 error_setg(&cancel_err, "backup canceled");
243 pvebackup_propagate_error(cancel_err);
244
245- qemu_mutex_lock(&backup_state.backup_mutex);
246+ qemu_co_mutex_lock(&backup_state.backup_mutex);
247
248 if (backup_state.vmaw) {
249 /* make sure vma writer does not block anymore */
0c893fd8 250@@ -342,27 +381,22 @@ static void pvebackup_cancel(void)
d333327a
SR
251 ((PVEBackupDevInfo *)bdi->data)->job :
252 NULL;
253
254- /* ref the job before releasing the mutex, just to be safe */
255 if (cancel_job) {
256- job_ref(&cancel_job->job);
72ae34ec
SR
257+ CoCtxData data = {
258+ .ctx = qemu_get_current_aio_context(),
259+ .co = qemu_coroutine_self(),
260+ .data = &cancel_job->job,
261+ };
262+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
263+ qemu_coroutine_yield();
d333327a
SR
264 }
265
266- /* job_cancel_sync may enter the job, so we need to release the
267- * backup_mutex to avoid deadlock */
268- qemu_mutex_unlock(&backup_state.backup_mutex);
269-
270- if (cancel_job) {
271- AioContext *aio_context = cancel_job->job.aio_context;
272- aio_context_acquire(aio_context);
4567474e 273- job_cancel_sync(&cancel_job->job, true);
d333327a
SR
274- job_unref(&cancel_job->job);
275- aio_context_release(aio_context);
276- }
277+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
278 }
279
280 void qmp_backup_cancel(Error **errp)
281 {
282- pvebackup_cancel();
283+ block_on_coroutine_fn(pvebackup_co_cancel, NULL);
284 }
285
286 // assumes the caller holds backup_mutex
0c893fd8 287@@ -415,10 +449,18 @@ static int coroutine_fn pvebackup_co_add_config(
d333327a
SR
288 goto out;
289 }
290
0c893fd8 291-static bool create_backup_jobs(void) {
d333327a
SR
292+/*
293+ * backup_job_create can *not* be run from a coroutine (and requires an
294+ * acquired AioContext), so this can't either.
0c893fd8 295+ * The caller is responsible that backup_mutex is held nonetheless.
d333327a 296+ */
0c893fd8 297+static void create_backup_jobs_bh(void *opaque) {
d333327a
SR
298
299 assert(!qemu_in_coroutine());
0c893fd8
SR
300
301+ CoCtxData *data = (CoCtxData*)opaque;
302+ Error **errp = (Error**)data->data;
303+
304 Error *local_err = NULL;
305
306 /* create job transaction to synchronize bitmap commit and cancel all
8dca018b 307@@ -454,24 +496,19 @@ static bool create_backup_jobs(void) {
0c893fd8
SR
308
309 aio_context_release(aio_context);
310
311- if (!job || local_err != NULL) {
312- Error *create_job_err = NULL;
313- error_setg(&create_job_err, "backup_job_create failed: %s",
4fd0fa7f 314- local_err ? error_get_pretty(local_err) : "null");
0c893fd8 315+ di->job = job;
4fd0fa7f
TL
316
317- pvebackup_propagate_error(create_job_err);
0c893fd8
SR
318+ if (!job || local_err) {
319+ error_setg(errp, "backup_job_create failed: %s",
4fd0fa7f 320+ local_err ? error_get_pretty(local_err) : "null");
0c893fd8
SR
321 break;
322 }
323
324- di->job = job;
325-
326 bdrv_unref(di->target);
327 di->target = NULL;
328 }
329
330- bool errors = pvebackup_error_or_canceled();
331-
332- if (errors) {
333+ if (*errp) {
334 l = backup_state.di_list;
335 while (l) {
336 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
8dca018b 337@@ -483,12 +520,17 @@ static bool create_backup_jobs(void) {
0c893fd8
SR
338 }
339
340 if (di->job) {
341+ AioContext *ctx = di->job->job.aio_context;
342+ aio_context_acquire(ctx);
4567474e 343+ job_cancel_sync(&di->job->job, true);
0c893fd8
SR
344 job_unref(&di->job->job);
345+ aio_context_release(ctx);
346 }
347 }
348 }
349
350- return errors;
351+ /* return */
352+ aio_co_enter(data->ctx, data->co);
353 }
354
355 typedef struct QmpBackupTask {
8dca018b 356@@ -525,11 +567,12 @@ typedef struct QmpBackupTask {
d333327a
SR
357 UuidInfo *result;
358 } QmpBackupTask;
359
360-// assumes the caller holds backup_mutex
361 static void coroutine_fn pvebackup_co_prepare(void *opaque)
362 {
363 assert(qemu_in_coroutine());
364
365+ qemu_co_mutex_lock(&backup_state.backup_mutex);
366+
367 QmpBackupTask *task = opaque;
368
369 task->result = NULL; // just to be sure
8dca018b 370@@ -550,8 +593,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
72ae34ec
SR
371 const char *firewall_name = "qemu-server.fw";
372
373 if (backup_state.di_list) {
374- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
375+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
376 "previous backup not finished");
377+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
378 return;
379 }
380
8dca018b 381@@ -618,6 +662,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
d333327a
SR
382 }
383 di->size = size;
384 total += size;
385+
386+ di->completed_ret = INT_MAX;
387 }
388
389 uuid_generate(uuid);
8dca018b 390@@ -849,6 +895,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
d333327a
SR
391 backup_state.stat.dirty = total - backup_state.stat.reused;
392 backup_state.stat.transferred = 0;
393 backup_state.stat.zero_bytes = 0;
394+ backup_state.stat.finishing = false;
0c893fd8 395+ backup_state.stat.starting = true;
d333327a
SR
396
397 qemu_mutex_unlock(&backup_state.stat.lock);
398
8dca018b 399@@ -863,6 +911,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
d333327a
SR
400 uuid_info->UUID = uuid_str;
401
402 task->result = uuid_info;
403+
0c893fd8
SR
404+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
405+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
406+ * points, and we'll release it as soon as the BH reschedules us.
407+ */
408+ CoCtxData waker = {
409+ .co = qemu_coroutine_self(),
410+ .ctx = qemu_get_current_aio_context(),
411+ .data = &local_err,
412+ };
413+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
414+ qemu_coroutine_yield();
415+
416+ if (local_err) {
417+ error_propagate(task->errp, local_err);
418+ goto err;
419+ }
420+
d333327a 421+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
0c893fd8
SR
422+
423+ qemu_mutex_lock(&backup_state.stat.lock);
424+ backup_state.stat.starting = false;
425+ qemu_mutex_unlock(&backup_state.stat.lock);
426+
427+ /* start the first job in the transaction */
428+ job_txn_start_seq(backup_state.txn);
429+
d333327a
SR
430 return;
431
432 err_mutex:
8dca018b 433@@ -885,6 +960,7 @@ err:
0c893fd8
SR
434 g_free(di);
435 }
436 g_list_free(di_list);
437+ backup_state.di_list = NULL;
438
439 if (devs) {
440 g_strfreev(devs);
8dca018b 441@@ -905,6 +981,8 @@ err:
d333327a
SR
442 }
443
444 task->result = NULL;
445+
446+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
447 return;
448 }
449
8dca018b 450@@ -958,24 +1036,8 @@ UuidInfo *qmp_backup(
d333327a
SR
451 .errp = errp,
452 };
453
454- qemu_mutex_lock(&backup_state.backup_mutex);
455-
456 block_on_coroutine_fn(pvebackup_co_prepare, &task);
457
0c893fd8
SR
458- if (*errp == NULL) {
459- bool errors = create_backup_jobs();
d333327a 460- qemu_mutex_unlock(&backup_state.backup_mutex);
0c893fd8
SR
461-
462- if (!errors) {
d333327a
SR
463- /* start the first job in the transaction
464- * note: this might directly enter the job, so we need to do this
465- * after unlocking the backup_mutex */
0c893fd8
SR
466- job_txn_start_seq(backup_state.txn);
467- }
d333327a
SR
468- } else {
469- qemu_mutex_unlock(&backup_state.backup_mutex);
0c893fd8
SR
470- }
471-
d333327a 472 return task.result;
0c893fd8
SR
473 }
474
8dca018b 475@@ -1027,6 +1089,7 @@ BackupStatus *qmp_query_backup(Error **errp)
d333327a
SR
476 info->transferred = backup_state.stat.transferred;
477 info->has_reused = true;
478 info->reused = backup_state.stat.reused;
479+ info->finishing = backup_state.stat.finishing;
480
481 qemu_mutex_unlock(&backup_state.stat.lock);
482
483diff --git a/qapi/block-core.json b/qapi/block-core.json
5b15e2ec 484index 16e184dd28..cb17d00fe0 100644
d333327a
SR
485--- a/qapi/block-core.json
486+++ b/qapi/block-core.json
5b15e2ec 487@@ -770,12 +770,15 @@
d333327a
SR
488 #
489 # @uuid: uuid for this backup job
490 #
491+# @finishing: if status='active' and finishing=true, then the backup process is
492+# waiting for the target to finish.
493+#
494 ##
495 { 'struct': 'BackupStatus',
496 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
497 '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
498 '*start-time': 'int', '*end-time': 'int',
499- '*backup-file': 'str', '*uuid': 'str' } }
500+ '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
501
502 ##
503 # @BackupFormat: