1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Dietmar Maurer <dietmar@proxmox.com>
3 Date: Mon, 6 Apr 2020 12:17:00 +0200
4 Subject: [PATCH] PVE-Backup: avoid coroutines to fix AIO freeze, cleanups
6 We observed various AIO pool loop freezes, so we decided to avoid
7 coroutines and restrict ourselfes using similar code as upstream
8 (see blockdev.c: do_backup_common).
10 * avoid coroutine for job related code (causes hangs with iothreads)
11 - We then acquire/release all mutexes outside coroutines now, so we can now
12 correctly use a normal mutex.
14 * split pvebackup_co_dump_cb into:
15 - pvebackup_co_dump_pbs_cb and
16 - pvebackup_co_dump_pbs_cb
18 * new helper functions
19 - pvebackup_propagate_error
20 - pvebackup_error_or_canceled
21 - pvebackup_add_transfered_bytes
23 * avoid cancel flag (not needed)
25 * simplify backup_cancel logic
27 There is progress on upstream to support running qmp commands inside
29 https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04852.html
31 We should consider using that when it is available in upstream qemu.
33 Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
35 pve-backup.c | 638 ++++++++++++++++++++++++++-------------------------
36 1 file changed, 320 insertions(+), 318 deletions(-)
38 diff --git a/pve-backup.c b/pve-backup.c
39 index 9ae89fb679..bb917ee972 100644
44 /* PVE backup state and related function */
47 + * Note: A resume from a qemu_coroutine_yield can happen in a different thread,
48 + * so you may not use normal mutexes within coroutines:
51 + * qemu_rec_mutex_lock(lock)
53 + * qemu_coroutine_yield() // wait for something
54 + * // we are now inside a different thread
55 + * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!!
56 + * ---end-bad-example--
58 + * ==> Always use CoMutext inside coroutines.
59 + * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
63 static struct PVEBackupState {
65 - // Everithing accessed from qmp command, protected using rwlock
67 + // Everithing accessed from qmp_backup_query command is protected using lock
72 @@ -25,19 +41,20 @@ static struct PVEBackupState {
80 ProxmoxBackupHandle *pbs;
82 - CoMutex backup_mutex;
83 + QemuMutex backup_mutex;
84 + CoMutex dump_callback_mutex;
87 static void pvebackup_init(void)
89 - qemu_co_rwlock_init(&backup_state.stat.rwlock);
90 - qemu_co_mutex_init(&backup_state.backup_mutex);
91 + qemu_mutex_init(&backup_state.stat.lock);
92 + qemu_mutex_init(&backup_state.backup_mutex);
93 + qemu_co_mutex_init(&backup_state.dump_callback_mutex);
96 // initialize PVEBackupState at startup
97 @@ -52,10 +69,54 @@ typedef struct PVEBackupDevInfo {
98 BlockDriverState *target;
101 -static void pvebackup_co_run_next_job(void);
102 +static void pvebackup_run_next_job(void);
105 +lookup_active_block_job(PVEBackupDevInfo *di)
107 + if (!di->completed && di->bs) {
108 + for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
109 + if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
113 + BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
114 + if (bjob && bjob->source_bs == di->bs) {
122 +static void pvebackup_propagate_error(Error *err)
124 + qemu_mutex_lock(&backup_state.stat.lock);
125 + error_propagate(&backup_state.stat.error, err);
126 + qemu_mutex_unlock(&backup_state.stat.lock);
129 +static bool pvebackup_error_or_canceled(void)
131 + qemu_mutex_lock(&backup_state.stat.lock);
132 + bool error_or_canceled = !!backup_state.stat.error;
133 + qemu_mutex_unlock(&backup_state.stat.lock);
135 + return error_or_canceled;
138 +static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
140 + qemu_mutex_lock(&backup_state.stat.lock);
141 + backup_state.stat.zero_bytes += zero_bytes;
142 + backup_state.stat.transferred += transferred;
143 + qemu_mutex_unlock(&backup_state.stat.lock);
146 +// This may get called from multiple coroutines in multiple io-threads
147 +// Note1: this may get called after job_cancel()
148 static int coroutine_fn
149 -pvebackup_co_dump_cb(
150 +pvebackup_co_dump_pbs_cb(
154 @@ -67,137 +128,127 @@ pvebackup_co_dump_cb(
155 const unsigned char *buf = pbuf;
156 PVEBackupDevInfo *di = opaque;
158 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
159 - bool cancel = backup_state.stat.cancel;
160 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
161 + assert(backup_state.pbs);
163 + Error *local_err = NULL;
166 + qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
169 - return size; // return success
170 + // avoid deadlock if job is cancelled
171 + if (pvebackup_error_or_canceled()) {
172 + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
176 - qemu_co_mutex_lock(&backup_state.backup_mutex);
177 + pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
178 + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
182 + pvebackup_propagate_error(local_err);
185 + pvebackup_add_transfered_bytes(size, !buf ? size : 0);
188 - if (backup_state.vmaw) {
189 - size_t zero_bytes = 0;
190 - uint64_t remaining = size;
192 - uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
193 - if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
194 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
195 - if (!backup_state.stat.error) {
196 - qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
197 - error_setg(&backup_state.stat.error,
198 - "got unaligned write inside backup dump "
199 - "callback (sector %ld)", start);
201 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
202 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
203 - return -1; // not aligned to cluster size
208 - while (remaining > 0) {
209 - ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num,
213 - buf += VMA_CLUSTER_SIZE;
216 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
217 - if (!backup_state.stat.error) {
218 - qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
219 - vma_writer_error_propagate(backup_state.vmaw, &backup_state.stat.error);
221 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
222 +// This may get called from multiple coroutines in multiple io-threads
223 +static int coroutine_fn
224 +pvebackup_co_dump_vma_cb(
230 + assert(qemu_in_coroutine());
232 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
235 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
236 - backup_state.stat.zero_bytes += zero_bytes;
237 - if (remaining >= VMA_CLUSTER_SIZE) {
238 - backup_state.stat.transferred += VMA_CLUSTER_SIZE;
239 - remaining -= VMA_CLUSTER_SIZE;
241 - backup_state.stat.transferred += remaining;
244 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
247 - } else if (backup_state.pbs) {
248 - Error *local_err = NULL;
250 + const uint64_t size = bytes;
251 + const unsigned char *buf = pbuf;
252 + PVEBackupDevInfo *di = opaque;
254 - pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
257 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
258 + assert(backup_state.vmaw);
261 - error_propagate(&backup_state.stat.error, local_err);
262 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
263 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
267 - backup_state.stat.zero_bytes += size;
269 - backup_state.stat.transferred += size;
270 + uint64_t remaining = size;
272 + uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
273 + if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
274 + Error *local_err = NULL;
275 + error_setg(&local_err,
276 + "got unaligned write inside backup dump "
277 + "callback (sector %ld)", start);
278 + pvebackup_propagate_error(local_err);
279 + return -1; // not aligned to cluster size
282 + while (remaining > 0) {
283 + qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
284 + // avoid deadlock if job is cancelled
285 + if (pvebackup_error_or_canceled()) {
286 + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
290 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
291 + size_t zero_bytes = 0;
292 + ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
293 + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
296 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
298 - backup_state.stat.zero_bytes += size;
301 + buf += VMA_CLUSTER_SIZE;
304 + Error *local_err = NULL;
305 + vma_writer_error_propagate(backup_state.vmaw, &local_err);
306 + pvebackup_propagate_error(local_err);
309 + if (remaining >= VMA_CLUSTER_SIZE) {
310 + assert(ret == VMA_CLUSTER_SIZE);
311 + pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
312 + remaining -= VMA_CLUSTER_SIZE;
314 + assert(ret == remaining);
315 + pvebackup_add_transfered_bytes(remaining, zero_bytes);
319 - backup_state.stat.transferred += size;
320 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
323 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
328 -static void coroutine_fn pvebackup_co_cleanup(void)
329 +// assumes the caller holds backup_mutex
330 +static void coroutine_fn pvebackup_co_cleanup(void *unused)
332 assert(qemu_in_coroutine());
334 - qemu_co_mutex_lock(&backup_state.backup_mutex);
336 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
337 + qemu_mutex_lock(&backup_state.stat.lock);
338 backup_state.stat.end_time = time(NULL);
339 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
340 + qemu_mutex_unlock(&backup_state.stat.lock);
342 if (backup_state.vmaw) {
343 Error *local_err = NULL;
344 vma_writer_close(backup_state.vmaw, &local_err);
346 if (local_err != NULL) {
347 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
348 - error_propagate(&backup_state.stat.error, local_err);
349 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
351 + pvebackup_propagate_error(local_err);
354 backup_state.vmaw = NULL;
357 if (backup_state.pbs) {
358 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
359 - bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
360 - if (!error_or_canceled) {
361 + if (!pvebackup_error_or_canceled()) {
362 Error *local_err = NULL;
363 proxmox_backup_co_finish(backup_state.pbs, &local_err);
364 if (local_err != NULL) {
365 - qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
366 - error_propagate(&backup_state.stat.error, local_err);
368 + pvebackup_propagate_error(local_err);
371 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
373 proxmox_backup_disconnect(backup_state.pbs);
374 backup_state.pbs = NULL;
375 @@ -205,43 +256,14 @@ static void coroutine_fn pvebackup_co_cleanup(void)
377 g_list_free(backup_state.di_list);
378 backup_state.di_list = NULL;
379 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
382 -typedef struct PVEBackupCompeteCallbackData {
383 - PVEBackupDevInfo *di;
385 -} PVEBackupCompeteCallbackData;
387 -static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
388 +// assumes the caller holds backup_mutex
389 +static void coroutine_fn pvebackup_complete_stream(void *opaque)
391 - assert(qemu_in_coroutine());
393 - PVEBackupCompeteCallbackData *cb_data = opaque;
395 - qemu_co_mutex_lock(&backup_state.backup_mutex);
397 - PVEBackupDevInfo *di = cb_data->di;
398 - int ret = cb_data->result;
400 - di->completed = true;
402 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
403 - bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
405 - if (ret < 0 && !backup_state.stat.error) {
406 - qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
407 - error_setg(&backup_state.stat.error, "job failed with err %d - %s",
408 - ret, strerror(-ret));
410 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
413 + PVEBackupDevInfo *di = opaque;
416 - bdrv_unref(di->target);
419 + bool error_or_canceled = pvebackup_error_or_canceled();
421 if (backup_state.vmaw) {
422 vma_writer_close_stream(backup_state.vmaw, di->dev_id);
423 @@ -251,110 +273,101 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
424 Error *local_err = NULL;
425 proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
426 if (local_err != NULL) {
427 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
428 - error_propagate(&backup_state.stat.error, local_err);
429 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
430 + pvebackup_propagate_error(local_err);
435 - // remove self from job queue
436 - backup_state.di_list = g_list_remove(backup_state.di_list, di);
438 +static void pvebackup_complete_cb(void *opaque, int ret)
440 + assert(!qemu_in_coroutine());
442 - int pending_jobs = g_list_length(backup_state.di_list);
443 + PVEBackupDevInfo *di = opaque;
445 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
446 + qemu_mutex_lock(&backup_state.backup_mutex);
448 - if (pending_jobs > 0) {
449 - pvebackup_co_run_next_job();
451 - pvebackup_co_cleanup();
452 + di->completed = true;
455 + Error *local_err = NULL;
456 + error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
457 + pvebackup_propagate_error(local_err);
461 -static void pvebackup_complete_cb(void *opaque, int ret)
463 - // This can be called from the main loop, or from a coroutine
464 - PVEBackupCompeteCallbackData cb_data = {
470 - if (qemu_in_coroutine()) {
471 - pvebackup_co_complete_cb(&cb_data);
473 - block_on_coroutine_fn(pvebackup_co_complete_cb, &cb_data);
476 + assert(di->target == NULL);
478 -static void coroutine_fn pvebackup_co_cancel(void *opaque)
480 - assert(qemu_in_coroutine());
481 + block_on_coroutine_fn(pvebackup_complete_stream, di);
483 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
484 - backup_state.stat.cancel = true;
485 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
486 + // remove self from job queue
487 + backup_state.di_list = g_list_remove(backup_state.di_list, di);
489 - qemu_co_mutex_lock(&backup_state.backup_mutex);
492 - // Avoid race between block jobs and backup-cancel command:
493 - if (!(backup_state.vmaw || backup_state.pbs)) {
494 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
497 + qemu_mutex_unlock(&backup_state.backup_mutex);
499 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
500 - if (!backup_state.stat.error) {
501 - qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
502 - error_setg(&backup_state.stat.error, "backup cancelled");
504 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
505 + pvebackup_run_next_job();
508 +static void pvebackup_cancel(void)
510 + assert(!qemu_in_coroutine());
512 + Error *cancel_err = NULL;
513 + error_setg(&cancel_err, "backup canceled");
514 + pvebackup_propagate_error(cancel_err);
516 + qemu_mutex_lock(&backup_state.backup_mutex);
518 if (backup_state.vmaw) {
519 /* make sure vma writer does not block anymore */
520 - vma_writer_set_error(backup_state.vmaw, "backup cancelled");
521 + vma_writer_set_error(backup_state.vmaw, "backup canceled");
524 if (backup_state.pbs) {
525 - proxmox_backup_abort(backup_state.pbs, "backup cancelled");
526 + proxmox_backup_abort(backup_state.pbs, "backup canceled");
529 - bool running_jobs = 0;
530 - GList *l = backup_state.di_list;
532 - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
533 - l = g_list_next(l);
534 - if (!di->completed && di->bs) {
535 - for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
536 - if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
539 + qemu_mutex_unlock(&backup_state.backup_mutex);
541 - BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
542 - if (bjob && bjob->source_bs == di->bs) {
543 - AioContext *aio_context = job->job.aio_context;
544 - aio_context_acquire(aio_context);
547 - if (!di->completed) {
549 - job_cancel(&job->job, false);
551 - aio_context_release(aio_context);
553 + BlockJob *next_job = NULL;
555 + qemu_mutex_lock(&backup_state.backup_mutex);
557 + GList *l = backup_state.di_list;
559 + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
560 + l = g_list_next(l);
562 + BlockJob *job = lookup_active_block_job(di);
570 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
571 + qemu_mutex_unlock(&backup_state.backup_mutex);
573 - if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler
575 + AioContext *aio_context = next_job->job.aio_context;
576 + aio_context_acquire(aio_context);
577 + job_cancel_sync(&next_job->job);
578 + aio_context_release(aio_context);
585 void qmp_backup_cancel(Error **errp)
587 - block_on_coroutine_fn(pvebackup_co_cancel, NULL);
588 + pvebackup_cancel();
591 +// assumes the caller holds backup_mutex
592 static int coroutine_fn pvebackup_co_add_config(
595 @@ -406,46 +419,97 @@ static int coroutine_fn pvebackup_co_add_config(
597 bool job_should_pause(Job *job);
599 -static void coroutine_fn pvebackup_co_run_next_job(void)
600 +static void pvebackup_run_next_job(void)
602 - assert(qemu_in_coroutine());
603 + assert(!qemu_in_coroutine());
605 - qemu_co_mutex_lock(&backup_state.backup_mutex);
606 + qemu_mutex_lock(&backup_state.backup_mutex);
608 GList *l = backup_state.di_list;
610 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
612 - if (!di->completed && di->bs) {
613 - for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
614 - if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
618 - BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
619 - if (bjob && bjob->source_bs == di->bs) {
620 - AioContext *aio_context = job->job.aio_context;
621 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
622 - aio_context_acquire(aio_context);
624 - if (job_should_pause(&job->job)) {
625 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
626 - bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
627 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
629 - if (error_or_canceled) {
630 - job_cancel(&job->job, false);
632 - job_resume(&job->job);
635 - aio_context_release(aio_context);
637 + BlockJob *job = lookup_active_block_job(di);
640 + qemu_mutex_unlock(&backup_state.backup_mutex);
642 + AioContext *aio_context = job->job.aio_context;
643 + aio_context_acquire(aio_context);
645 + if (job_should_pause(&job->job)) {
646 + bool error_or_canceled = pvebackup_error_or_canceled();
647 + if (error_or_canceled) {
648 + job_cancel_sync(&job->job);
650 + job_resume(&job->job);
653 + aio_context_release(aio_context);
658 + block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
660 + qemu_mutex_unlock(&backup_state.backup_mutex);
663 +static bool create_backup_jobs(void) {
665 + assert(!qemu_in_coroutine());
667 + Error *local_err = NULL;
669 + /* create and start all jobs (paused state) */
670 + GList *l = backup_state.di_list;
672 + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
673 + l = g_list_next(l);
675 + assert(di->target != NULL);
677 + AioContext *aio_context = bdrv_get_aio_context(di->bs);
678 + aio_context_acquire(aio_context);
680 + BlockJob *job = backup_job_create(
681 + NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
682 + BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
683 + JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
685 + aio_context_release(aio_context);
687 + if (!job || local_err != NULL) {
688 + Error *create_job_err = NULL;
689 + error_setg(&create_job_err, "backup_job_create failed: %s",
690 + local_err ? error_get_pretty(local_err) : "null");
692 + pvebackup_propagate_error(create_job_err);
695 + job_start(&job->job);
697 + bdrv_unref(di->target);
701 + bool errors = pvebackup_error_or_canceled();
704 + l = backup_state.di_list;
706 + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
707 + l = g_list_next(l);
710 + bdrv_unref(di->target);
715 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
720 typedef struct QmpBackupTask {
721 @@ -476,7 +540,8 @@ typedef struct QmpBackupTask {
725 -static void coroutine_fn pvebackup_co_start(void *opaque)
726 +// assumes the caller holds backup_mutex
727 +static void coroutine_fn pvebackup_co_prepare(void *opaque)
729 assert(qemu_in_coroutine());
731 @@ -495,16 +560,12 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
732 GList *di_list = NULL;
737 const char *config_name = "qemu-server.conf";
738 const char *firewall_name = "qemu-server.fw";
740 - qemu_co_mutex_lock(&backup_state.backup_mutex);
742 if (backup_state.di_list) {
743 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
744 - error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
745 + error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
746 "previous backup not finished");
749 @@ -631,7 +692,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
753 - if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_cb, di, task->errp))) {
754 + if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
758 @@ -652,7 +713,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
759 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
762 - if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_cb, di, task->errp))) {
763 + if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
767 @@ -717,9 +778,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
769 /* initialize global backup_state now */
771 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
773 - backup_state.stat.cancel = false;
774 + qemu_mutex_lock(&backup_state.stat.lock);
776 if (backup_state.stat.error) {
777 error_free(backup_state.stat.error);
778 @@ -742,7 +801,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
779 backup_state.stat.transferred = 0;
780 backup_state.stat.zero_bytes = 0;
782 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
783 + qemu_mutex_unlock(&backup_state.stat.lock);
785 backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
787 @@ -751,48 +810,6 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
789 backup_state.di_list = di_list;
791 - /* start all jobs (paused state) */
794 - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
795 - l = g_list_next(l);
797 - // make sure target runs in same aoi_context as source
798 - AioContext *aio_context = bdrv_get_aio_context(di->bs);
799 - aio_context_acquire(aio_context);
800 - GSList *ignore = NULL;
801 - bdrv_set_aio_context_ignore(di->target, aio_context, &ignore);
802 - g_slist_free(ignore);
803 - aio_context_release(aio_context);
805 - job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
806 - BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
807 - JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
808 - if (!job || local_err != NULL) {
809 - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
810 - error_setg(&backup_state.stat.error, "backup_job_create failed");
811 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
814 - job_start(&job->job);
816 - bdrv_unref(di->target);
821 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
823 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
824 - bool no_errors = !backup_state.stat.error;
825 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
828 - pvebackup_co_run_next_job(); // run one job
830 - pvebackup_co_cancel(NULL);
833 uuid_info = g_malloc0(sizeof(*uuid_info));
834 uuid_info->UUID = uuid_str;
836 @@ -835,8 +852,6 @@ err:
840 - qemu_co_mutex_unlock(&backup_state.backup_mutex);
845 @@ -880,32 +895,31 @@ UuidInfo *qmp_backup(
849 - block_on_coroutine_fn(pvebackup_co_start, &task);
850 + qemu_mutex_lock(&backup_state.backup_mutex);
852 - return task.result;
854 + block_on_coroutine_fn(pvebackup_co_prepare, &task);
856 + if (*errp == NULL) {
857 + create_backup_jobs();
858 + qemu_mutex_unlock(&backup_state.backup_mutex);
859 + pvebackup_run_next_job();
861 + qemu_mutex_unlock(&backup_state.backup_mutex);
864 -typedef struct QmpQueryBackupTask {
866 - BackupStatus *result;
867 -} QmpQueryBackupTask;
868 + return task.result;
871 -static void coroutine_fn pvebackup_co_query(void *opaque)
872 +BackupStatus *qmp_query_backup(Error **errp)
874 - assert(qemu_in_coroutine());
876 - QmpQueryBackupTask *task = opaque;
878 BackupStatus *info = g_malloc0(sizeof(*info));
880 - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
881 + qemu_mutex_lock(&backup_state.stat.lock);
883 if (!backup_state.stat.start_time) {
884 /* not started, return {} */
885 - task->result = info;
886 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
888 + qemu_mutex_unlock(&backup_state.stat.lock);
892 info->has_status = true;
893 @@ -941,19 +955,7 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
894 info->has_transferred = true;
895 info->transferred = backup_state.stat.transferred;
897 - task->result = info;
898 + qemu_mutex_unlock(&backup_state.stat.lock);
900 - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
903 -BackupStatus *qmp_query_backup(Error **errp)
905 - QmpQueryBackupTask task = {
910 - block_on_coroutine_fn(pvebackup_co_query, &task);
912 - return task.result;