--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dietmar Maurer <dietmar@proxmox.com>
+Date: Mon, 6 Apr 2020 12:16:46 +0200
+Subject: [PATCH] PVE: add savevm-async for background state snapshots
+
+Put qemu_savevm_state_{header,setup} into the main loop and the rest
+of the iteration into a coroutine. The former need to lock the
+iothread (and we can't unlock it in the coroutine), and the latter
+can't deal with being in a separate thread, so a coroutine it must
+be.
+
+Truncate output file at 1024 boundary.
+
+Do not block the VM and save the state on aborting a snapshot, as the
+snapshot will be invalid anyway.
+
+Also, when aborting, wait for the target file to be closed, otherwise a
+client might run into race-conditions when trying to remove the file
+still opened by QEMU.
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+[improve aborting]
+Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
+[FE: further improve aborting
+ adapt to removal of QEMUFileOps]
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hmp-commands-info.hx | 13 +
+ hmp-commands.hx | 33 +++
+ include/migration/snapshot.h | 2 +
+ include/monitor/hmp.h | 5 +
+ migration/meson.build | 1 +
+ migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
+ monitor/hmp-cmds.c | 57 ++++
+ qapi/migration.json | 34 +++
+ qapi/misc.json | 32 +++
+ qemu-options.hx | 12 +
+ softmmu/vl.c | 10 +
+ 11 files changed, 730 insertions(+)
+ create mode 100644 migration/savevm-async.c
+
+diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
+index 188d9ece3b..97b88eaaad 100644
+--- a/hmp-commands-info.hx
++++ b/hmp-commands-info.hx
+@@ -538,6 +538,19 @@ SRST
+ Show current migration parameters.
+ ERST
+
++ {
++ .name = "savevm",
++ .args_type = "",
++ .params = "",
++ .help = "show savevm status",
++ .cmd = hmp_info_savevm,
++ },
++
++SRST
++ ``info savevm``
++ Show savevm status.
++ERST
++
+ {
+ .name = "balloon",
+ .args_type = "",
+diff --git a/hmp-commands.hx b/hmp-commands.hx
+index 182e639d14..bbcc73e942 100644
+--- a/hmp-commands.hx
++++ b/hmp-commands.hx
+@@ -1800,3 +1800,36 @@ ERST
+ "\n\t\t\t\t\t limit on a specified virtual cpu",
+ .cmd = hmp_cancel_vcpu_dirty_limit,
+ },
++
++ {
++ .name = "savevm-start",
++ .args_type = "statefile:s?",
++ .params = "[statefile]",
++ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
++ .cmd = hmp_savevm_start,
++ },
++
++ {
++ .name = "snapshot-drive",
++ .args_type = "device:s,name:s",
++ .params = "device name",
++ .help = "Create internal snapshot.",
++ .cmd = hmp_snapshot_drive,
++ },
++
++ {
++ .name = "delete-drive-snapshot",
++ .args_type = "device:s,name:s",
++ .params = "device name",
++ .help = "Delete internal snapshot.",
++ .cmd = hmp_delete_drive_snapshot,
++ },
++
++ {
++ .name = "savevm-end",
++ .args_type = "",
++ .params = "",
++ .help = "Resume VM after snaphot.",
++ .cmd = hmp_savevm_end,
++ .coroutine = true,
++ },
+diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
+index e72083b117..c846d37806 100644
+--- a/include/migration/snapshot.h
++++ b/include/migration/snapshot.h
+@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
+ bool has_devices, strList *devices,
+ Error **errp);
+
++int load_snapshot_from_blockdev(const char *filename, Error **errp);
++
+ #endif
+diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
+index a618eb1e4e..55067beff1 100644
+--- a/include/monitor/hmp.h
++++ b/include/monitor/hmp.h
+@@ -26,6 +26,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
+ void hmp_info_uuid(Monitor *mon, const QDict *qdict);
+ void hmp_info_chardev(Monitor *mon, const QDict *qdict);
+ void hmp_info_mice(Monitor *mon, const QDict *qdict);
++void hmp_info_savevm(Monitor *mon, const QDict *qdict);
+ void hmp_info_migrate(Monitor *mon, const QDict *qdict);
+ void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
+ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
+@@ -80,6 +81,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
+ void hmp_netdev_del(Monitor *mon, const QDict *qdict);
+ void hmp_getfd(Monitor *mon, const QDict *qdict);
+ void hmp_closefd(Monitor *mon, const QDict *qdict);
++void hmp_savevm_start(Monitor *mon, const QDict *qdict);
++void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
++void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
++void hmp_savevm_end(Monitor *mon, const QDict *qdict);
+ void hmp_sendkey(Monitor *mon, const QDict *qdict);
+ void hmp_screendump(Monitor *mon, const QDict *qdict);
+ void hmp_chardev_add(Monitor *mon, const QDict *qdict);
+diff --git a/migration/meson.build b/migration/meson.build
+index 8cac83c06c..0842d00cd2 100644
+--- a/migration/meson.build
++++ b/migration/meson.build
+@@ -24,6 +24,7 @@ softmmu_ss.add(files(
+ 'multifd-zlib.c',
+ 'postcopy-ram.c',
+ 'savevm.c',
++ 'savevm-async.c',
+ 'socket.c',
+ 'tls.c',
+ ), gnutls)
+diff --git a/migration/savevm-async.c b/migration/savevm-async.c
+new file mode 100644
+index 0000000000..b3692739a0
+--- /dev/null
++++ b/migration/savevm-async.c
+@@ -0,0 +1,531 @@
++#include "qemu/osdep.h"
++#include "migration/channel-savevm-async.h"
++#include "migration/migration.h"
++#include "migration/savevm.h"
++#include "migration/snapshot.h"
++#include "migration/global_state.h"
++#include "migration/ram.h"
++#include "migration/qemu-file.h"
++#include "sysemu/sysemu.h"
++#include "sysemu/runstate.h"
++#include "block/block.h"
++#include "sysemu/block-backend.h"
++#include "qapi/error.h"
++#include "qapi/qmp/qerror.h"
++#include "qapi/qmp/qdict.h"
++#include "qapi/qapi-commands-migration.h"
++#include "qapi/qapi-commands-misc.h"
++#include "qapi/qapi-commands-block.h"
++#include "qemu/cutils.h"
++#include "qemu/timer.h"
++#include "qemu/main-loop.h"
++#include "qemu/rcu.h"
++
++/* #define DEBUG_SAVEVM_STATE */
++
++#ifdef DEBUG_SAVEVM_STATE
++#define DPRINTF(fmt, ...) \
++ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
++#else
++#define DPRINTF(fmt, ...) \
++ do { } while (0)
++#endif
++
++enum {
++ SAVE_STATE_DONE,
++ SAVE_STATE_ERROR,
++ SAVE_STATE_ACTIVE,
++ SAVE_STATE_COMPLETED,
++ SAVE_STATE_CANCELLED
++};
++
++
++static struct SnapshotState {
++ BlockBackend *target;
++ size_t bs_pos;
++ int state;
++ Error *error;
++ Error *blocker;
++ int saved_vm_running;
++ QEMUFile *file;
++ int64_t total_time;
++ QEMUBH *finalize_bh;
++ Coroutine *co;
++ QemuCoSleep target_close_wait;
++} snap_state;
++
++static bool savevm_aborted(void)
++{
++ return snap_state.state == SAVE_STATE_CANCELLED ||
++ snap_state.state == SAVE_STATE_ERROR;
++}
++
++SaveVMInfo *qmp_query_savevm(Error **errp)
++{
++ SaveVMInfo *info = g_malloc0(sizeof(*info));
++ struct SnapshotState *s = &snap_state;
++
++ if (s->state != SAVE_STATE_DONE) {
++ info->has_bytes = true;
++ info->bytes = s->bs_pos;
++ switch (s->state) {
++ case SAVE_STATE_ERROR:
++ info->has_status = true;
++ info->status = g_strdup("failed");
++ info->has_total_time = true;
++ info->total_time = s->total_time;
++ if (s->error) {
++ info->has_error = true;
++ info->error = g_strdup(error_get_pretty(s->error));
++ }
++ break;
++ case SAVE_STATE_ACTIVE:
++ info->has_status = true;
++ info->status = g_strdup("active");
++ info->has_total_time = true;
++ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
++ - s->total_time;
++ break;
++ case SAVE_STATE_COMPLETED:
++ info->has_status = true;
++ info->status = g_strdup("completed");
++ info->has_total_time = true;
++ info->total_time = s->total_time;
++ break;
++ }
++ }
++
++ return info;
++}
++
++static int save_snapshot_cleanup(void)
++{
++ int ret = 0;
++
++ DPRINTF("save_snapshot_cleanup\n");
++
++ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
++ snap_state.total_time;
++
++ if (snap_state.file) {
++ ret = qemu_fclose(snap_state.file);
++ snap_state.file = NULL;
++ }
++
++ if (snap_state.target) {
++ if (!savevm_aborted()) {
++ /* try to truncate, but ignore errors (will fail on block devices).
++ * note1: bdrv_read() need whole blocks, so we need to round up
++ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
++ */
++ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
++ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
++ }
++ blk_op_unblock_all(snap_state.target, snap_state.blocker);
++ error_free(snap_state.blocker);
++ snap_state.blocker = NULL;
++ blk_unref(snap_state.target);
++ snap_state.target = NULL;
++
++ qemu_co_sleep_wake(&snap_state.target_close_wait);
++ }
++
++ return ret;
++}
++
++static void save_snapshot_error(const char *fmt, ...)
++{
++ va_list ap;
++ char *msg;
++
++ va_start(ap, fmt);
++ msg = g_strdup_vprintf(fmt, ap);
++ va_end(ap);
++
++ DPRINTF("save_snapshot_error: %s\n", msg);
++
++ if (!snap_state.error) {
++ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
++ }
++
++ g_free (msg);
++
++ snap_state.state = SAVE_STATE_ERROR;
++}
++
++static void process_savevm_finalize(void *opaque)
++{
++ int ret;
++ AioContext *iohandler_ctx = iohandler_get_aio_context();
++ MigrationState *ms = migrate_get_current();
++
++ bool aborted = savevm_aborted();
++
++#ifdef DEBUG_SAVEVM_STATE
++ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
++#endif
++
++ qemu_bh_delete(snap_state.finalize_bh);
++ snap_state.finalize_bh = NULL;
++ snap_state.co = NULL;
++
++ /* We need to own the target bdrv's context for the following functions,
++ * so move it back. It can stay in the main context and live out its live
++ * there, since we're done with it after this method ends anyway.
++ */
++ aio_context_acquire(iohandler_ctx);
++ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
++ aio_context_release(iohandler_ctx);
++
++ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
++ if (ret < 0) {
++ save_snapshot_error("vm_stop_force_state error %d", ret);
++ }
++
++ if (!aborted) {
++ /* skip state saving if we aborted, snapshot will be invalid anyway */
++ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
++ ret = qemu_file_get_error(snap_state.file);
++ if (ret < 0) {
++ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
++ }
++ }
++
++ DPRINTF("state saving complete\n");
++ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
++ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
++
++ /* clear migration state */
++ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
++ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
++ ms->to_dst_file = NULL;
++
++ qemu_savevm_state_cleanup();
++
++ ret = save_snapshot_cleanup();
++ if (ret < 0) {
++ save_snapshot_error("save_snapshot_cleanup error %d", ret);
++ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
++ snap_state.state = SAVE_STATE_COMPLETED;
++ } else if (aborted) {
++ /*
++ * If there was an error, there's no need to set a new one here.
++ * If the snapshot was canceled, leave setting the state to
++ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
++ */
++ } else {
++ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
++ snap_state.state);
++ }
++ if (snap_state.saved_vm_running) {
++ vm_start();
++ snap_state.saved_vm_running = false;
++ }
++
++ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
++ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
++}
++
++static void coroutine_fn process_savevm_co(void *opaque)
++{
++ int ret;
++ int64_t maxlen;
++ BdrvNextIterator it;
++ BlockDriverState *bs = NULL;
++
++#ifdef DEBUG_SAVEVM_STATE
++ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
++#endif
++
++ ret = qemu_file_get_error(snap_state.file);
++ if (ret < 0) {
++ save_snapshot_error("qemu_savevm_state_setup failed");
++ return;
++ }
++
++ while (snap_state.state == SAVE_STATE_ACTIVE) {
++ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
++
++ /* pending is expected to be called without iothread lock */
++ qemu_mutex_unlock_iothread();
++ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
++ qemu_mutex_lock_iothread();
++
++ pending_size = pend_precopy + pend_compatible + pend_postcopy;
++
++ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
++
++ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
++ ret = qemu_savevm_state_iterate(snap_state.file, false);
++ if (ret < 0) {
++ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
++ break;
++ }
++ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
++ } else {
++ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
++ ret = global_state_store();
++ if (ret) {
++ save_snapshot_error("global_state_store error %d", ret);
++ break;
++ }
++
++ DPRINTF("savevm iterate complete\n");
++ break;
++ }
++ }
++
++ DPRINTF("timing: process_savevm_co took %ld ms\n",
++ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
++
++#ifdef DEBUG_SAVEVM_STATE
++ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
++#endif
++ /* If a drive runs in an IOThread we can flush it async, and only
++ * need to sync-flush whatever IO happens between now and
++ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
++ * so move there now and after every flush.
++ */
++ aio_co_reschedule_self(qemu_get_aio_context());
++ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
++ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
++ if (bs == blk_bs(snap_state.target)) {
++ continue;
++ }
++
++ AioContext *bs_ctx = bdrv_get_aio_context(bs);
++ if (bs_ctx != qemu_get_aio_context()) {
++ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
++ aio_co_reschedule_self(bs_ctx);
++ bdrv_flush(bs);
++ aio_co_reschedule_self(qemu_get_aio_context());
++ }
++ }
++
++ DPRINTF("timing: async flushing took %ld ms\n",
++ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
++
++ qemu_bh_schedule(snap_state.finalize_bh);
++}
++
++void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
++{
++ Error *local_err = NULL;
++ MigrationState *ms = migrate_get_current();
++ AioContext *iohandler_ctx = iohandler_get_aio_context();
++
++ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
++
++ if (snap_state.state != SAVE_STATE_DONE) {
++ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
++ "VM snapshot already started\n");
++ return;
++ }
++
++ if (migration_is_running(ms->state)) {
++ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
++ return;
++ }
++
++ if (migrate_use_block()) {
++ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
++ "Block migration and snapshots are incompatible");
++ return;
++ }
++
++ /* initialize snapshot info */
++ snap_state.saved_vm_running = runstate_is_running();
++ snap_state.bs_pos = 0;
++ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
++ snap_state.blocker = NULL;
++ snap_state.target_close_wait.to_wake = NULL;
++
++ if (snap_state.error) {
++ error_free(snap_state.error);
++ snap_state.error = NULL;
++ }
++
++ if (!has_statefile) {
++ vm_stop(RUN_STATE_SAVE_VM);
++ snap_state.state = SAVE_STATE_COMPLETED;
++ return;
++ }
++
++ if (qemu_savevm_state_blocked(errp)) {
++ return;
++ }
++
++ /* Open the image */
++ QDict *options = NULL;
++ options = qdict_new();
++ qdict_put_str(options, "driver", "raw");
++ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
++ if (!snap_state.target) {
++ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
++ goto restart;
++ }
++
++ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
++ &snap_state.bs_pos));
++ snap_state.file = qemu_file_new_output(ioc);
++
++ if (!snap_state.file) {
++ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
++ goto restart;
++ }
++
++ /*
++ * qemu_savevm_* paths use migration code and expect a migration state.
++ * State is cleared in process_savevm_co, but has to be initialized
++ * here (blocking main thread, from QMP) to avoid race conditions.
++ */
++ migrate_init(ms);
++ memset(&ram_counters, 0, sizeof(ram_counters));
++ ms->to_dst_file = snap_state.file;
++
++ error_setg(&snap_state.blocker, "block device is in use by savevm");
++ blk_op_block_all(snap_state.target, snap_state.blocker);
++
++ snap_state.state = SAVE_STATE_ACTIVE;
++ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
++ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
++ qemu_mutex_unlock_iothread();
++ qemu_savevm_state_header(snap_state.file);
++ qemu_savevm_state_setup(snap_state.file);
++ qemu_mutex_lock_iothread();
++
++ /* Async processing from here on out happens in iohandler context, so let
++ * the target bdrv have its home there.
++ */
++ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
++
++ aio_co_schedule(iohandler_ctx, snap_state.co);
++
++ return;
++
++restart:
++
++ save_snapshot_error("setup failed");
++
++ if (snap_state.saved_vm_running) {
++ vm_start();
++ snap_state.saved_vm_running = false;
++ }
++}
++
++void coroutine_fn qmp_savevm_end(Error **errp)
++{
++ int64_t timeout;
++
++ if (snap_state.state == SAVE_STATE_DONE) {
++ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
++ "VM snapshot not started\n");
++ return;
++ }
++
++ if (snap_state.state == SAVE_STATE_ACTIVE) {
++ snap_state.state = SAVE_STATE_CANCELLED;
++ goto wait_for_close;
++ }
++
++ if (snap_state.saved_vm_running) {
++ vm_start();
++ snap_state.saved_vm_running = false;
++ }
++
++ snap_state.state = SAVE_STATE_DONE;
++
++wait_for_close:
++ if (!snap_state.target) {
++ DPRINTF("savevm-end: no target file open\n");
++ return;
++ }
++
++ /* wait until cleanup is done before returning, this ensures that after this
++ * call exits the statefile will be closed and can be removed immediately */
++ DPRINTF("savevm-end: waiting for cleanup\n");
++ timeout = 30L * 1000 * 1000 * 1000;
++ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
++ QEMU_CLOCK_REALTIME, timeout);
++ if (snap_state.target) {
++ save_snapshot_error("timeout waiting for target file close in "
++ "qmp_savevm_end");
++ /* we cannot assume the snapshot finished in this case, so leave the
++ * state alone - caller has to figure something out */
++ return;
++ }
++
++ // File closed and no other error, so ensure next snapshot can be started.
++ if (snap_state.state != SAVE_STATE_ERROR) {
++ snap_state.state = SAVE_STATE_DONE;
++ }
++
++ DPRINTF("savevm-end: cleanup done\n");
++}
++
++// FIXME: Deprecated
++void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
++{
++ // Compatibility to older qemu-server.
++ qmp_blockdev_snapshot_internal_sync(device, name, errp);
++}
++
++// FIXME: Deprecated
++void qmp_delete_drive_snapshot(const char *device, const char *name,
++ Error **errp)
++{
++ // Compatibility to older qemu-server.
++ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
++ true, name, errp);
++}
++
++int load_snapshot_from_blockdev(const char *filename, Error **errp)
++{
++ BlockBackend *be;
++ Error *local_err = NULL;
++ Error *blocker = NULL;
++
++ QEMUFile *f;
++ size_t bs_pos = 0;
++ int ret = -EINVAL;
++
++ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
++
++ if (!be) {
++ error_setg(errp, "Could not open VM state file");
++ goto the_end;
++ }
++
++ error_setg(&blocker, "block device is in use by load state");
++ blk_op_block_all(be, blocker);
++
++ /* restore the VM state */
++ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
++ if (!f) {
++ error_setg(errp, "Could not open VM state file");
++ goto the_end;
++ }
++
++ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
++ ret = qemu_loadvm_state(f);
++
++ /* dirty bitmap migration has a special case we need to trigger manually */
++ dirty_bitmap_mig_before_vm_start();
++
++ qemu_fclose(f);
++ migration_incoming_state_destroy();
++ if (ret < 0) {
++ error_setg_errno(errp, -ret, "Error while loading VM state");
++ goto the_end;
++ }
++
++ ret = 0;
++
++ the_end:
++ if (be) {
++ blk_op_unblock_all(be, blocker);
++ error_free(blocker);
++ blk_unref(be);
++ }
++ return ret;
++}
+diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
+index 15572befb1..1507180990 100644
+--- a/monitor/hmp-cmds.c
++++ b/monitor/hmp-cmds.c
+@@ -1925,6 +1925,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
+ hmp_handle_error(mon, err);
+ }
+
++void hmp_savevm_start(Monitor *mon, const QDict *qdict)
++{
++ Error *errp = NULL;
++ const char *statefile = qdict_get_try_str(qdict, "statefile");
++
++ qmp_savevm_start(statefile != NULL, statefile, &errp);
++ hmp_handle_error(mon, errp);
++}
++
++void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
++{
++ Error *errp = NULL;
++ const char *name = qdict_get_str(qdict, "name");
++ const char *device = qdict_get_str(qdict, "device");
++
++ qmp_snapshot_drive(device, name, &errp);
++ hmp_handle_error(mon, errp);
++}
++
++void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
++{
++ Error *errp = NULL;
++ const char *name = qdict_get_str(qdict, "name");
++ const char *device = qdict_get_str(qdict, "device");
++
++ qmp_delete_drive_snapshot(device, name, &errp);
++ hmp_handle_error(mon, errp);
++}
++
++void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
++{
++ Error *errp = NULL;
++
++ qmp_savevm_end(&errp);
++ hmp_handle_error(mon, errp);
++}
++
++void hmp_info_savevm(Monitor *mon, const QDict *qdict)
++{
++ SaveVMInfo *info;
++ info = qmp_query_savevm(NULL);
++
++ if (info->has_status) {
++ monitor_printf(mon, "savevm status: %s\n", info->status);
++ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
++ info->total_time);
++ } else {
++ monitor_printf(mon, "savevm status: not running\n");
++ }
++ if (info->has_bytes) {
++ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
++ }
++ if (info->has_error) {
++ monitor_printf(mon, "Error: %s\n", info->error);
++ }
++}
++
+ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
+ {
+ IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
+diff --git a/qapi/migration.json b/qapi/migration.json
+index 81185d4311..3129f71fa8 100644
+--- a/qapi/migration.json
++++ b/qapi/migration.json
+@@ -261,6 +261,40 @@
+ '*compression': 'CompressionStats',
+ '*socket-address': ['SocketAddress'] } }
+
++##
++# @SaveVMInfo:
++#
++# Information about current migration process.
++#
++# @status: string describing the current savevm status.
++# This can be 'active', 'completed', 'failed'.
++# If this field is not returned, no savevm process
++# has been initiated
++#
++# @error: string containing error message is status is failed.
++#
++# @total-time: total amount of milliseconds since savevm started.
++# If savevm has ended, it returns the total save time
++#
++# @bytes: total amount of data transfered
++#
++# Since: 1.3
++##
++{ 'struct': 'SaveVMInfo',
++ 'data': {'*status': 'str', '*error': 'str',
++ '*total-time': 'int', '*bytes': 'int'} }
++
++##
++# @query-savevm:
++#
++# Returns information about current savevm process.
++#
++# Returns: @SaveVMInfo
++#
++# Since: 1.3
++##
++{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
++
+ ##
+ # @query-migrate:
+ #
+diff --git a/qapi/misc.json b/qapi/misc.json
+index 27ef5a2b20..b3ce75dcae 100644
+--- a/qapi/misc.json
++++ b/qapi/misc.json
+@@ -435,6 +435,38 @@
+ ##
+ { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
+
++##
++# @savevm-start:
++#
++# Prepare for snapshot and halt VM. Save VM state to statefile.
++#
++##
++{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
++
++##
++# @snapshot-drive:
++#
++# Create an internal drive snapshot.
++#
++##
++{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
++
++##
++# @delete-drive-snapshot:
++#
++# Delete a drive snapshot.
++#
++##
++{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
++
++##
++# @savevm-end:
++#
++# Resume VM after a snapshot.
++#
++##
++{ 'command': 'savevm-end', 'coroutine': true }
++
+ ##
+ # @CommandLineParameterType:
+ #
+diff --git a/qemu-options.hx b/qemu-options.hx
+index 31c04f7eea..c2ca6e91b5 100644
+--- a/qemu-options.hx
++++ b/qemu-options.hx
+@@ -4341,6 +4341,18 @@ SRST
+ Start right away with a saved state (``loadvm`` in monitor)
+ ERST
+
++DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
++ "-loadstate file\n" \
++ " start right away with a saved state\n",
++ QEMU_ARCH_ALL)
++SRST
++``-loadstate file``
++ Start right away with a saved state. This option does not rollback
++ disk state like @code{loadvm}, so user must make sure that disk
++ have correct state. @var{file} can be any valid device URL. See the section
++ for "Device URL Syntax" for more information.
++ERST
++
+ #ifndef _WIN32
+ DEF("daemonize", 0, QEMU_OPTION_daemonize, \
+ "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
+diff --git a/softmmu/vl.c b/softmmu/vl.c
+index 706bd7cff7..b8637c4262 100644
+--- a/softmmu/vl.c
++++ b/softmmu/vl.c
+@@ -165,6 +165,7 @@ static const char *accelerators;
+ static bool have_custom_ram_size;
+ static const char *ram_memdev_id;
+ static QDict *machine_opts_dict;
++static const char *loadstate;
+ static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
+ static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
+ static int display_remote;
+@@ -2584,6 +2585,12 @@ void qmp_x_exit_preconfig(Error **errp)
+
+ if (loadvm) {
+ load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
++ } else if (loadstate) {
++ Error *local_err = NULL;
++ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
++ error_report_err(local_err);
++ autostart = 0;
++ }
+ }
+ if (replay_mode != REPLAY_MODE_NONE) {
+ replay_vmstate_init();
+@@ -3133,6 +3140,9 @@ void qemu_init(int argc, char **argv, char **envp)
+ case QEMU_OPTION_loadvm:
+ loadvm = optarg;
+ break;
++ case QEMU_OPTION_loadstate:
++ loadstate = optarg;
++ break;
+ case QEMU_OPTION_full_screen:
+ dpy.has_full_screen = true;
+ dpy.full_screen = true;