+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Stefan Reiter <s.reiter@proxmox.com>
-Date: Mon, 28 Sep 2020 13:40:51 +0200
-Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
- create_backup_jobs
-
-proxmox_backup_co_finish is already async, but previously we would wait
-for the coroutine using block_on_coroutine_fn(). Avoid this by
-scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
-as a real coroutine when calling from pvebackup_complete_cb. This is ok,
-since complete_stream uses the backup_mutex internally to synchronize,
-and other streams can happily continue writing in the meantime anyway.
-
-To accomodate, backup_mutex is converted to a CoMutex. This means
-converting every user to a coroutine. This is not just useful here, but
-will come in handy once this series[0] is merged, and QMP calls can be
-yield-able coroutines too. Then we can also finally get rid of
-block_on_coroutine_fn.
-
-Cases of aio_context_acquire/release from within what is now a coroutine
-are changed to aio_co_reschedule_self, which works since a running
-coroutine always holds the aio lock for the context it is running in.
-
-job_cancel_sync is called from a BH since it can't be run from a
-coroutine (uses AIO_WAIT_WHILE internally).
-
-Same thing for create_backup_jobs, which is converted to a BH too.
-
-To communicate the finishing state, a new property is introduced to
-query-backup: 'finishing'. A new state is explicitly not used, since
-that would break compatibility with older qemu-server versions.
-
-Also fix create_backup_jobs:
-
-No more weird bool returns, just the standard "errp" format used
-everywhere else too. With this, if backup_job_create fails, the error
-message is actually returned over QMP and can be shown to the user.
-
-To facilitate correct cleanup on such an error, we call
-create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
-This additionally allows us to actually hold the backup_mutex during
-operation.
-
-Also add a job_cancel_sync before job_unref, since a job must be in
-STATUS_NULL to be deleted by unref, which could trigger an assert
-before.
-
-[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
-
-Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
-[add new force parameter to job_cancel_sync calls]
-Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
----
- pve-backup.c | 217 ++++++++++++++++++++++++++++---------------
- qapi/block-core.json | 5 +-
- 2 files changed, 144 insertions(+), 78 deletions(-)
-
-diff --git a/pve-backup.c b/pve-backup.c
-index 63c686463f..6f05796fad 100644
---- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
-
- static struct PVEBackupState {
- struct {
-- // Everithing accessed from qmp_backup_query command is protected using lock
-+ // Everything accessed from qmp_backup_query command is protected using
-+ // this lock. Do NOT hold this lock for long times, as it is sometimes
-+ // acquired from coroutines, and thus any wait time may block the guest.
- QemuMutex lock;
- Error *error;
- time_t start_time;
-@@ -47,20 +49,22 @@ static struct PVEBackupState {
- size_t reused;
- size_t zero_bytes;
- GList *bitmap_list;
-+ bool finishing;
-+ bool starting;
- } stat;
- int64_t speed;
- VmaWriter *vmaw;
- ProxmoxBackupHandle *pbs;
- GList *di_list;
- JobTxn *txn;
-- QemuMutex backup_mutex;
-+ CoMutex backup_mutex;
- CoMutex dump_callback_mutex;
- } backup_state;
-
- static void pvebackup_init(void)
- {
- qemu_mutex_init(&backup_state.stat.lock);
-- qemu_mutex_init(&backup_state.backup_mutex);
-+ qemu_co_mutex_init(&backup_state.backup_mutex);
- qemu_co_mutex_init(&backup_state.dump_callback_mutex);
- }
-
-@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
- size_t size;
- uint64_t block_size;
- uint8_t dev_id;
-+ int completed_ret; // INT_MAX if not completed
- char targetfile[PATH_MAX];
- BdrvDirtyBitmap *bitmap;
- BlockDriverState *target;
-@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
- }
-
- // assumes the caller holds backup_mutex
--static void coroutine_fn pvebackup_co_cleanup(void *unused)
-+static void coroutine_fn pvebackup_co_cleanup(void)
- {
- assert(qemu_in_coroutine());
-
- qemu_mutex_lock(&backup_state.stat.lock);
-- backup_state.stat.end_time = time(NULL);
-+ backup_state.stat.finishing = true;
- qemu_mutex_unlock(&backup_state.stat.lock);
-
- if (backup_state.vmaw) {
-@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
-
- g_list_free(backup_state.di_list);
- backup_state.di_list = NULL;
-+
-+ qemu_mutex_lock(&backup_state.stat.lock);
-+ backup_state.stat.end_time = time(NULL);
-+ backup_state.stat.finishing = false;
-+ qemu_mutex_unlock(&backup_state.stat.lock);
- }
-
--// assumes the caller holds backup_mutex
--static void coroutine_fn pvebackup_complete_stream(void *opaque)
-+static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
- {
- PVEBackupDevInfo *di = opaque;
-+ int ret = di->completed_ret;
-
-- bool error_or_canceled = pvebackup_error_or_canceled();
--
-- if (backup_state.vmaw) {
-- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
-+ qemu_mutex_lock(&backup_state.stat.lock);
-+ bool starting = backup_state.stat.starting;
-+ qemu_mutex_unlock(&backup_state.stat.lock);
-+ if (starting) {
-+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
-+ * must) skip all cleanup, as we don't know what has and hasn't been
-+ * initialized yet. */
-+ return;
- }
-
-- if (backup_state.pbs && !error_or_canceled) {
-- Error *local_err = NULL;
-- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
-- if (local_err != NULL) {
-- pvebackup_propagate_error(local_err);
-- }
-- }
--}
--
--static void pvebackup_complete_cb(void *opaque, int ret)
--{
-- assert(!qemu_in_coroutine());
--
-- PVEBackupDevInfo *di = opaque;
--
-- qemu_mutex_lock(&backup_state.backup_mutex);
-+ qemu_co_mutex_lock(&backup_state.backup_mutex);
-
- if (ret < 0) {
- Error *local_err = NULL;
-@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
-
- assert(di->target == NULL);
-
-- block_on_coroutine_fn(pvebackup_complete_stream, di);
-+ bool error_or_canceled = pvebackup_error_or_canceled();
-+
-+ if (backup_state.vmaw) {
-+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
-+ }
-+
-+ if (backup_state.pbs && !error_or_canceled) {
-+ Error *local_err = NULL;
-+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
-+ if (local_err != NULL) {
-+ pvebackup_propagate_error(local_err);
-+ }
-+ }
-
- // remove self from job list
- backup_state.di_list = g_list_remove(backup_state.di_list, di);
-@@ -310,21 +321,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
-
- /* call cleanup if we're the last job */
- if (!g_list_first(backup_state.di_list)) {
-- block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
-+ pvebackup_co_cleanup();
- }
-
-- qemu_mutex_unlock(&backup_state.backup_mutex);
-+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
- }
-
--static void pvebackup_cancel(void)
-+static void pvebackup_complete_cb(void *opaque, int ret)
- {
-- assert(!qemu_in_coroutine());
-+ PVEBackupDevInfo *di = opaque;
-+ di->completed_ret = ret;
-+
-+ /*
-+ * Schedule stream cleanup in async coroutine. close_image and finish might
-+ * take a while, so we can't block on them here. This way it also doesn't
-+ * matter if we're already running in a coroutine or not.
-+ * Note: di is a pointer to an entry in the global backup_state struct, so
-+ * it stays valid.
-+ */
-+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
-+ aio_co_enter(qemu_get_aio_context(), co);
-+}
-+
-+/*
-+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
-+ * main loop processing via a bottom half.
-+ */
-+static void job_cancel_bh(void *opaque) {
-+ CoCtxData *data = (CoCtxData*)opaque;
-+ Job *job = (Job*)data->data;
-+ AioContext *job_ctx = job->aio_context;
-+ aio_context_acquire(job_ctx);
-+ job_cancel_sync(job, true);
-+ aio_context_release(job_ctx);
-+ aio_co_enter(data->ctx, data->co);
-+}
-
-+static void coroutine_fn pvebackup_co_cancel(void *opaque)
-+{
- Error *cancel_err = NULL;
- error_setg(&cancel_err, "backup canceled");
- pvebackup_propagate_error(cancel_err);
-
-- qemu_mutex_lock(&backup_state.backup_mutex);
-+ qemu_co_mutex_lock(&backup_state.backup_mutex);
-
- if (backup_state.vmaw) {
- /* make sure vma writer does not block anymore */
-@@ -342,27 +381,22 @@ static void pvebackup_cancel(void)
- ((PVEBackupDevInfo *)bdi->data)->job :
- NULL;
-
-- /* ref the job before releasing the mutex, just to be safe */
- if (cancel_job) {
-- job_ref(&cancel_job->job);
-+ CoCtxData data = {
-+ .ctx = qemu_get_current_aio_context(),
-+ .co = qemu_coroutine_self(),
-+ .data = &cancel_job->job,
-+ };
-+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
-+ qemu_coroutine_yield();
- }
-
-- /* job_cancel_sync may enter the job, so we need to release the
-- * backup_mutex to avoid deadlock */
-- qemu_mutex_unlock(&backup_state.backup_mutex);
--
-- if (cancel_job) {
-- AioContext *aio_context = cancel_job->job.aio_context;
-- aio_context_acquire(aio_context);
-- job_cancel_sync(&cancel_job->job, true);
-- job_unref(&cancel_job->job);
-- aio_context_release(aio_context);
-- }
-+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
- }
-
- void qmp_backup_cancel(Error **errp)
- {
-- pvebackup_cancel();
-+ block_on_coroutine_fn(pvebackup_co_cancel, NULL);
- }
-
- // assumes the caller holds backup_mutex
-@@ -415,10 +449,18 @@ static int coroutine_fn pvebackup_co_add_config(
- goto out;
- }
-
--static bool create_backup_jobs(void) {
-+/*
-+ * backup_job_create can *not* be run from a coroutine (and requires an
-+ * acquired AioContext), so this can't either.
-+ * The caller is responsible that backup_mutex is held nonetheless.
-+ */
-+static void create_backup_jobs_bh(void *opaque) {
-
- assert(!qemu_in_coroutine());
-
-+ CoCtxData *data = (CoCtxData*)opaque;
-+ Error **errp = (Error**)data->data;
-+
- Error *local_err = NULL;
-
- /* create job transaction to synchronize bitmap commit and cancel all
-@@ -454,24 +496,19 @@ static bool create_backup_jobs(void) {
-
- aio_context_release(aio_context);
-
-- if (!job || local_err != NULL) {
-- Error *create_job_err = NULL;
-- error_setg(&create_job_err, "backup_job_create failed: %s",
-- local_err ? error_get_pretty(local_err) : "null");
-+ di->job = job;
-
-- pvebackup_propagate_error(create_job_err);
-+ if (!job || local_err) {
-+ error_setg(errp, "backup_job_create failed: %s",
-+ local_err ? error_get_pretty(local_err) : "null");
- break;
- }
-
-- di->job = job;
--
- bdrv_unref(di->target);
- di->target = NULL;
- }
-
-- bool errors = pvebackup_error_or_canceled();
--
-- if (errors) {
-+ if (*errp) {
- l = backup_state.di_list;
- while (l) {
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
-@@ -483,12 +520,17 @@ static bool create_backup_jobs(void) {
- }
-
- if (di->job) {
-+ AioContext *ctx = di->job->job.aio_context;
-+ aio_context_acquire(ctx);
-+ job_cancel_sync(&di->job->job, true);
- job_unref(&di->job->job);
-+ aio_context_release(ctx);
- }
- }
- }
-
-- return errors;
-+ /* return */
-+ aio_co_enter(data->ctx, data->co);
- }
-
- typedef struct QmpBackupTask {
-@@ -525,11 +567,12 @@ typedef struct QmpBackupTask {
- UuidInfo *result;
- } QmpBackupTask;
-
--// assumes the caller holds backup_mutex
- static void coroutine_fn pvebackup_co_prepare(void *opaque)
- {
- assert(qemu_in_coroutine());
-
-+ qemu_co_mutex_lock(&backup_state.backup_mutex);
-+
- QmpBackupTask *task = opaque;
-
- task->result = NULL; // just to be sure
-@@ -550,8 +593,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
- const char *firewall_name = "qemu-server.fw";
-
- if (backup_state.di_list) {
-- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
-+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
- "previous backup not finished");
-+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
- }
-
-@@ -618,6 +662,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
- }
- di->size = size;
- total += size;
-+
-+ di->completed_ret = INT_MAX;
- }
-
- uuid_generate(uuid);
-@@ -849,6 +895,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
- backup_state.stat.dirty = total - backup_state.stat.reused;
- backup_state.stat.transferred = 0;
- backup_state.stat.zero_bytes = 0;
-+ backup_state.stat.finishing = false;
-+ backup_state.stat.starting = true;
-
- qemu_mutex_unlock(&backup_state.stat.lock);
-
-@@ -863,6 +911,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
- uuid_info->UUID = uuid_str;
-
- task->result = uuid_info;
-+
-+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
-+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
-+ * points, and we'll release it as soon as the BH reschedules us.
-+ */
-+ CoCtxData waker = {
-+ .co = qemu_coroutine_self(),
-+ .ctx = qemu_get_current_aio_context(),
-+ .data = &local_err,
-+ };
-+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
-+ qemu_coroutine_yield();
-+
-+ if (local_err) {
-+ error_propagate(task->errp, local_err);
-+ goto err;
-+ }
-+
-+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
-+
-+ qemu_mutex_lock(&backup_state.stat.lock);
-+ backup_state.stat.starting = false;
-+ qemu_mutex_unlock(&backup_state.stat.lock);
-+
-+ /* start the first job in the transaction */
-+ job_txn_start_seq(backup_state.txn);
-+
- return;
-
- err_mutex:
-@@ -885,6 +960,7 @@ err:
- g_free(di);
- }
- g_list_free(di_list);
-+ backup_state.di_list = NULL;
-
- if (devs) {
- g_strfreev(devs);
-@@ -905,6 +981,8 @@ err:
- }
-
- task->result = NULL;
-+
-+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return;
- }
-
-@@ -958,24 +1036,8 @@ UuidInfo *qmp_backup(
- .errp = errp,
- };
-
-- qemu_mutex_lock(&backup_state.backup_mutex);
--
- block_on_coroutine_fn(pvebackup_co_prepare, &task);
-
-- if (*errp == NULL) {
-- bool errors = create_backup_jobs();
-- qemu_mutex_unlock(&backup_state.backup_mutex);
--
-- if (!errors) {
-- /* start the first job in the transaction
-- * note: this might directly enter the job, so we need to do this
-- * after unlocking the backup_mutex */
-- job_txn_start_seq(backup_state.txn);
-- }
-- } else {
-- qemu_mutex_unlock(&backup_state.backup_mutex);
-- }
--
- return task.result;
- }
-
-@@ -1027,6 +1089,7 @@ BackupStatus *qmp_query_backup(Error **errp)
- info->transferred = backup_state.stat.transferred;
- info->has_reused = true;
- info->reused = backup_state.stat.reused;
-+ info->finishing = backup_state.stat.finishing;
-
- qemu_mutex_unlock(&backup_state.stat.lock);
-
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 69571d86eb..e6c3687bea 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -774,12 +774,15 @@
- #
- # @uuid: uuid for this backup job
- #
-+# @finishing: if status='active' and finishing=true, then the backup process is
-+# waiting for the target to finish.
-+#
- ##
- { 'struct': 'BackupStatus',
- 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
- '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
- '*start-time': 'int', '*end-time': 'int',
-- '*backup-file': 'str', '*uuid': 'str' } }
-+ '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
-
- ##
- # @BackupFormat: