]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
update submodule and patches to 7.1.0
[pve-qemu.git] / debian / patches / pve / 0017-PVE-add-savevm-async-for-background-state-snapshots.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 3Date: Mon, 6 Apr 2020 12:16:46 +0200
817b7667 4Subject: [PATCH] PVE: add savevm-async for background state snapshots
95259824 5
d7f4e01a
TL
6Put qemu_savevm_state_{header,setup} into the main loop and the rest
7of the iteration into a coroutine. The former need to lock the
8iothread (and we can't unlock it in the coroutine), and the latter
9can't deal with being in a separate thread, so a coroutine it must
10be.
11
817b7667
SR
12Truncate output file at 1024 boundary.
13
14Do not block the VM and save the state on aborting a snapshot, as the
15snapshot will be invalid anyway.
16
17Also, when aborting, wait for the target file to be closed, otherwise a
18client might run into race-conditions when trying to remove the file
19still opened by QEMU.
20
b855dce7 21Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6402d961 22Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 23Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
817b7667
SR
24[improve aborting]
25Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
5b15e2ec
FE
26[FE: further improve aborting
27 adapt to removal of QEMUFileOps]
563c5928 28Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
95259824 29---
b855dce7 30 hmp-commands-info.hx | 13 +
5b15e2ec 31 hmp-commands.hx | 33 +++
8dca018b 32 include/migration/snapshot.h | 2 +
be901f66 33 include/monitor/hmp.h | 5 +
817b7667 34 migration/meson.build | 1 +
5b15e2ec 35 migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
d7f4e01a 36 monitor/hmp-cmds.c | 57 ++++
5b15e2ec
FE
37 qapi/migration.json | 34 +++
38 qapi/misc.json | 32 +++
83faa3fe 39 qemu-options.hx | 12 +
83faa3fe 40 softmmu/vl.c | 10 +
5b15e2ec 41 11 files changed, 730 insertions(+)
817b7667 42 create mode 100644 migration/savevm-async.c
95259824 43
95259824 44diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
5b15e2ec 45index 188d9ece3b..97b88eaaad 100644
95259824
WB
46--- a/hmp-commands-info.hx
47+++ b/hmp-commands-info.hx
dc9827a6 48@@ -538,6 +538,19 @@ SRST
8dca018b 49 Show current migration parameters.
83faa3fe
TL
50 ERST
51
b855dce7 52+ {
95259824
WB
53+ .name = "savevm",
54+ .args_type = "",
55+ .params = "",
56+ .help = "show savevm status",
a544966d 57+ .cmd = hmp_info_savevm,
95259824
WB
58+ },
59+
83faa3fe
TL
60+SRST
61+ ``info savevm``
62+ Show savevm status.
63+ERST
64+
b855dce7 65 {
83faa3fe
TL
66 .name = "balloon",
67 .args_type = "",
95259824 68diff --git a/hmp-commands.hx b/hmp-commands.hx
5b15e2ec 69index 182e639d14..bbcc73e942 100644
95259824
WB
70--- a/hmp-commands.hx
71+++ b/hmp-commands.hx
5b15e2ec
FE
72@@ -1800,3 +1800,36 @@ ERST
73 "\n\t\t\t\t\t limit on a specified virtual cpu",
74 .cmd = hmp_cancel_vcpu_dirty_limit,
83faa3fe 75 },
95259824
WB
76+
77+ {
78+ .name = "savevm-start",
79+ .args_type = "statefile:s?",
80+ .params = "[statefile]",
81+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
a544966d 82+ .cmd = hmp_savevm_start,
95259824
WB
83+ },
84+
85+ {
86+ .name = "snapshot-drive",
87+ .args_type = "device:s,name:s",
88+ .params = "device name",
89+ .help = "Create internal snapshot.",
a544966d 90+ .cmd = hmp_snapshot_drive,
95259824
WB
91+ },
92+
93+ {
94+ .name = "delete-drive-snapshot",
95+ .args_type = "device:s,name:s",
96+ .params = "device name",
97+ .help = "Delete internal snapshot.",
a544966d 98+ .cmd = hmp_delete_drive_snapshot,
95259824
WB
99+ },
100+
101+ {
102+ .name = "savevm-end",
103+ .args_type = "",
104+ .params = "",
105+ .help = "Resume VM after snaphot.",
817b7667
SR
106+ .cmd = hmp_savevm_end,
107+ .coroutine = true,
95259824 108+ },
be901f66 109diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
8dca018b 110index e72083b117..c846d37806 100644
be901f66
SR
111--- a/include/migration/snapshot.h
112+++ b/include/migration/snapshot.h
8dca018b
SR
113@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
114 bool has_devices, strList *devices,
115 Error **errp);
be901f66 116
be901f66 117+int load_snapshot_from_blockdev(const char *filename, Error **errp);
8dca018b 118+
be901f66
SR
119 #endif
120diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
5b15e2ec 121index a618eb1e4e..55067beff1 100644
be901f66
SR
122--- a/include/monitor/hmp.h
123+++ b/include/monitor/hmp.h
4567474e 124@@ -26,6 +26,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
be901f66
SR
125 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
126 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
127 void hmp_info_mice(Monitor *mon, const QDict *qdict);
128+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
129 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
130 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
131 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
4567474e 132@@ -80,6 +81,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
be901f66
SR
133 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
134 void hmp_getfd(Monitor *mon, const QDict *qdict);
135 void hmp_closefd(Monitor *mon, const QDict *qdict);
136+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
137+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
138+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
139+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
140 void hmp_sendkey(Monitor *mon, const QDict *qdict);
141 void hmp_screendump(Monitor *mon, const QDict *qdict);
83faa3fe 142 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
817b7667 143diff --git a/migration/meson.build b/migration/meson.build
5b15e2ec 144index 8cac83c06c..0842d00cd2 100644
817b7667
SR
145--- a/migration/meson.build
146+++ b/migration/meson.build
5b15e2ec 147@@ -24,6 +24,7 @@ softmmu_ss.add(files(
817b7667
SR
148 'multifd-zlib.c',
149 'postcopy-ram.c',
150 'savevm.c',
151+ 'savevm-async.c',
152 'socket.c',
153 'tls.c',
8dca018b 154 ), gnutls)
817b7667 155diff --git a/migration/savevm-async.c b/migration/savevm-async.c
95259824 156new file mode 100644
5b15e2ec 157index 0000000000..b3692739a0
95259824 158--- /dev/null
817b7667 159+++ b/migration/savevm-async.c
5b15e2ec 160@@ -0,0 +1,531 @@
95259824 161+#include "qemu/osdep.h"
5b15e2ec 162+#include "migration/channel-savevm-async.h"
6838f038
WB
163+#include "migration/migration.h"
164+#include "migration/savevm.h"
165+#include "migration/snapshot.h"
166+#include "migration/global_state.h"
167+#include "migration/ram.h"
168+#include "migration/qemu-file.h"
95259824 169+#include "sysemu/sysemu.h"
6402d961 170+#include "sysemu/runstate.h"
95259824 171+#include "block/block.h"
95259824 172+#include "sysemu/block-backend.h"
53e83913
WB
173+#include "qapi/error.h"
174+#include "qapi/qmp/qerror.h"
175+#include "qapi/qmp/qdict.h"
176+#include "qapi/qapi-commands-migration.h"
177+#include "qapi/qapi-commands-misc.h"
0775f12b 178+#include "qapi/qapi-commands-block.h"
95259824 179+#include "qemu/cutils.h"
817b7667 180+#include "qemu/timer.h"
6402d961
TL
181+#include "qemu/main-loop.h"
182+#include "qemu/rcu.h"
95259824
WB
183+
184+/* #define DEBUG_SAVEVM_STATE */
185+
186+#ifdef DEBUG_SAVEVM_STATE
187+#define DPRINTF(fmt, ...) \
188+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
189+#else
190+#define DPRINTF(fmt, ...) \
191+ do { } while (0)
192+#endif
193+
194+enum {
195+ SAVE_STATE_DONE,
196+ SAVE_STATE_ERROR,
197+ SAVE_STATE_ACTIVE,
198+ SAVE_STATE_COMPLETED,
199+ SAVE_STATE_CANCELLED
200+};
201+
202+
203+static struct SnapshotState {
67af0fa4 204+ BlockBackend *target;
95259824
WB
205+ size_t bs_pos;
206+ int state;
207+ Error *error;
208+ Error *blocker;
209+ int saved_vm_running;
210+ QEMUFile *file;
211+ int64_t total_time;
d7f4e01a
TL
212+ QEMUBH *finalize_bh;
213+ Coroutine *co;
563c5928 214+ QemuCoSleep target_close_wait;
95259824
WB
215+} snap_state;
216+
817b7667
SR
217+static bool savevm_aborted(void)
218+{
219+ return snap_state.state == SAVE_STATE_CANCELLED ||
220+ snap_state.state == SAVE_STATE_ERROR;
221+}
222+
95259824
WB
223+SaveVMInfo *qmp_query_savevm(Error **errp)
224+{
225+ SaveVMInfo *info = g_malloc0(sizeof(*info));
226+ struct SnapshotState *s = &snap_state;
227+
228+ if (s->state != SAVE_STATE_DONE) {
229+ info->has_bytes = true;
230+ info->bytes = s->bs_pos;
231+ switch (s->state) {
232+ case SAVE_STATE_ERROR:
233+ info->has_status = true;
234+ info->status = g_strdup("failed");
235+ info->has_total_time = true;
236+ info->total_time = s->total_time;
237+ if (s->error) {
238+ info->has_error = true;
239+ info->error = g_strdup(error_get_pretty(s->error));
240+ }
241+ break;
242+ case SAVE_STATE_ACTIVE:
243+ info->has_status = true;
244+ info->status = g_strdup("active");
245+ info->has_total_time = true;
246+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
247+ - s->total_time;
248+ break;
249+ case SAVE_STATE_COMPLETED:
250+ info->has_status = true;
251+ info->status = g_strdup("completed");
252+ info->has_total_time = true;
253+ info->total_time = s->total_time;
254+ break;
255+ }
256+ }
257+
258+ return info;
259+}
260+
261+static int save_snapshot_cleanup(void)
262+{
263+ int ret = 0;
264+
265+ DPRINTF("save_snapshot_cleanup\n");
266+
267+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
268+ snap_state.total_time;
269+
270+ if (snap_state.file) {
271+ ret = qemu_fclose(snap_state.file);
5b15e2ec 272+ snap_state.file = NULL;
95259824
WB
273+ }
274+
67af0fa4 275+ if (snap_state.target) {
817b7667
SR
276+ if (!savevm_aborted()) {
277+ /* try to truncate, but ignore errors (will fail on block devices).
278+ * note1: bdrv_read() need whole blocks, so we need to round up
279+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
280+ */
281+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
282+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
283+ }
67af0fa4 284+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
95259824
WB
285+ error_free(snap_state.blocker);
286+ snap_state.blocker = NULL;
67af0fa4
WB
287+ blk_unref(snap_state.target);
288+ snap_state.target = NULL;
817b7667 289+
563c5928 290+ qemu_co_sleep_wake(&snap_state.target_close_wait);
95259824
WB
291+ }
292+
293+ return ret;
294+}
295+
296+static void save_snapshot_error(const char *fmt, ...)
297+{
298+ va_list ap;
299+ char *msg;
300+
301+ va_start(ap, fmt);
302+ msg = g_strdup_vprintf(fmt, ap);
303+ va_end(ap);
304+
305+ DPRINTF("save_snapshot_error: %s\n", msg);
306+
307+ if (!snap_state.error) {
308+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
309+ }
310+
311+ g_free (msg);
312+
313+ snap_state.state = SAVE_STATE_ERROR;
95259824
WB
314+}
315+
d7f4e01a 316+static void process_savevm_finalize(void *opaque)
0775f12b
WB
317+{
318+ int ret;
d7f4e01a
TL
319+ AioContext *iohandler_ctx = iohandler_get_aio_context();
320+ MigrationState *ms = migrate_get_current();
321+
817b7667
SR
322+ bool aborted = savevm_aborted();
323+
d7f4e01a
TL
324+#ifdef DEBUG_SAVEVM_STATE
325+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
326+#endif
327+
328+ qemu_bh_delete(snap_state.finalize_bh);
329+ snap_state.finalize_bh = NULL;
330+ snap_state.co = NULL;
331+
332+ /* We need to own the target bdrv's context for the following functions,
333+ * so move it back. It can stay in the main context and live out its live
334+ * there, since we're done with it after this method ends anyway.
335+ */
336+ aio_context_acquire(iohandler_ctx);
337+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
338+ aio_context_release(iohandler_ctx);
339+
340+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
341+ if (ret < 0) {
342+ save_snapshot_error("vm_stop_force_state error %d", ret);
343+ }
344+
817b7667
SR
345+ if (!aborted) {
346+ /* skip state saving if we aborted, snapshot will be invalid anyway */
347+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
348+ ret = qemu_file_get_error(snap_state.file);
349+ if (ret < 0) {
350+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
351+ }
d7f4e01a
TL
352+ }
353+
354+ DPRINTF("state saving complete\n");
355+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
356+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
357+
358+ /* clear migration state */
359+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
817b7667 360+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
d7f4e01a
TL
361+ ms->to_dst_file = NULL;
362+
363+ qemu_savevm_state_cleanup();
364+
0775f12b
WB
365+ ret = save_snapshot_cleanup();
366+ if (ret < 0) {
367+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
368+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
369+ snap_state.state = SAVE_STATE_COMPLETED;
817b7667 370+ } else if (aborted) {
1976ca46
FE
371+ /*
372+ * If there was an error, there's no need to set a new one here.
373+ * If the snapshot was canceled, leave setting the state to
374+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
375+ */
0775f12b
WB
376+ } else {
377+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
378+ snap_state.state);
95259824 379+ }
0775f12b
WB
380+ if (snap_state.saved_vm_running) {
381+ vm_start();
382+ snap_state.saved_vm_running = false;
95259824 383+ }
d7f4e01a
TL
384+
385+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
386+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
95259824
WB
387+}
388+
d7f4e01a 389+static void coroutine_fn process_savevm_co(void *opaque)
95259824
WB
390+{
391+ int ret;
392+ int64_t maxlen;
d7f4e01a
TL
393+ BdrvNextIterator it;
394+ BlockDriverState *bs = NULL;
95259824 395+
d7f4e01a
TL
396+#ifdef DEBUG_SAVEVM_STATE
397+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
398+#endif
95259824 399+
6838f038 400+ ret = qemu_file_get_error(snap_state.file);
95259824 401+ if (ret < 0) {
6838f038 402+ save_snapshot_error("qemu_savevm_state_setup failed");
d7f4e01a 403+ return;
95259824
WB
404+ }
405+
406+ while (snap_state.state == SAVE_STATE_ACTIVE) {
0775f12b 407+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
95259824 408+
e9b36665
SR
409+ /* pending is expected to be called without iothread lock */
410+ qemu_mutex_unlock_iothread();
0775f12b 411+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
e9b36665
SR
412+ qemu_mutex_lock_iothread();
413+
0775f12b 414+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
95259824 415+
0775f12b
WB
416+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
417+
418+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
0775f12b
WB
419+ ret = qemu_savevm_state_iterate(snap_state.file, false);
420+ if (ret < 0) {
421+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
422+ break;
423+ }
d7f4e01a 424+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
95259824 425+ } else {
b855dce7 426+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
0775f12b
WB
427+ ret = global_state_store();
428+ if (ret) {
429+ save_snapshot_error("global_state_store error %d", ret);
95259824 430+ break;
0775f12b 431+ }
d7f4e01a
TL
432+
433+ DPRINTF("savevm iterate complete\n");
95259824
WB
434+ break;
435+ }
95259824
WB
436+ }
437+
d7f4e01a
TL
438+ DPRINTF("timing: process_savevm_co took %ld ms\n",
439+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
440+
441+#ifdef DEBUG_SAVEVM_STATE
442+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
443+#endif
444+ /* If a drive runs in an IOThread we can flush it async, and only
445+ * need to sync-flush whatever IO happens between now and
446+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
447+ * so move there now and after every flush.
448+ */
449+ aio_co_reschedule_self(qemu_get_aio_context());
450+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
451+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
452+ if (bs == blk_bs(snap_state.target)) {
453+ continue;
454+ }
455+
456+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
457+ if (bs_ctx != qemu_get_aio_context()) {
458+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
459+ aio_co_reschedule_self(bs_ctx);
460+ bdrv_flush(bs);
461+ aio_co_reschedule_self(qemu_get_aio_context());
462+ }
463+ }
464+
465+ DPRINTF("timing: async flushing took %ld ms\n",
466+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
95259824 467+
d7f4e01a 468+ qemu_bh_schedule(snap_state.finalize_bh);
95259824
WB
469+}
470+
95259824
WB
471+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
472+{
95259824 473+ Error *local_err = NULL;
d7f4e01a
TL
474+ MigrationState *ms = migrate_get_current();
475+ AioContext *iohandler_ctx = iohandler_get_aio_context();
95259824 476+
67af0fa4 477+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
95259824
WB
478+
479+ if (snap_state.state != SAVE_STATE_DONE) {
480+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
481+ "VM snapshot already started\n");
482+ return;
483+ }
484+
d7f4e01a
TL
485+ if (migration_is_running(ms->state)) {
486+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
487+ return;
488+ }
489+
490+ if (migrate_use_block()) {
491+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
492+ "Block migration and snapshots are incompatible");
493+ return;
494+ }
495+
95259824
WB
496+ /* initialize snapshot info */
497+ snap_state.saved_vm_running = runstate_is_running();
498+ snap_state.bs_pos = 0;
499+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
500+ snap_state.blocker = NULL;
563c5928 501+ snap_state.target_close_wait.to_wake = NULL;
95259824
WB
502+
503+ if (snap_state.error) {
504+ error_free(snap_state.error);
505+ snap_state.error = NULL;
506+ }
507+
508+ if (!has_statefile) {
509+ vm_stop(RUN_STATE_SAVE_VM);
510+ snap_state.state = SAVE_STATE_COMPLETED;
511+ return;
512+ }
513+
514+ if (qemu_savevm_state_blocked(errp)) {
515+ return;
516+ }
517+
518+ /* Open the image */
95259824
WB
519+ QDict *options = NULL;
520+ options = qdict_new();
53e83913 521+ qdict_put_str(options, "driver", "raw");
67af0fa4
WB
522+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
523+ if (!snap_state.target) {
95259824
WB
524+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
525+ goto restart;
526+ }
527+
5b15e2ec
FE
528+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
529+ &snap_state.bs_pos));
530+ snap_state.file = qemu_file_new_output(ioc);
95259824
WB
531+
532+ if (!snap_state.file) {
533+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
534+ goto restart;
535+ }
536+
d7f4e01a
TL
537+ /*
538+ * qemu_savevm_* paths use migration code and expect a migration state.
539+ * State is cleared in process_savevm_co, but has to be initialized
540+ * here (blocking main thread, from QMP) to avoid race conditions.
541+ */
542+ migrate_init(ms);
543+ memset(&ram_counters, 0, sizeof(ram_counters));
544+ ms->to_dst_file = snap_state.file;
95259824
WB
545+
546+ error_setg(&snap_state.blocker, "block device is in use by savevm");
67af0fa4 547+ blk_op_block_all(snap_state.target, snap_state.blocker);
95259824 548+
0775f12b 549+ snap_state.state = SAVE_STATE_ACTIVE;
d7f4e01a
TL
550+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
551+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
552+ qemu_mutex_unlock_iothread();
553+ qemu_savevm_state_header(snap_state.file);
554+ qemu_savevm_state_setup(snap_state.file);
555+ qemu_mutex_lock_iothread();
556+
557+ /* Async processing from here on out happens in iohandler context, so let
558+ * the target bdrv have its home there.
559+ */
560+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
561+
562+ aio_co_schedule(iohandler_ctx, snap_state.co);
95259824
WB
563+
564+ return;
565+
566+restart:
567+
568+ save_snapshot_error("setup failed");
569+
570+ if (snap_state.saved_vm_running) {
571+ vm_start();
817b7667 572+ snap_state.saved_vm_running = false;
95259824
WB
573+ }
574+}
575+
817b7667 576+void coroutine_fn qmp_savevm_end(Error **errp)
95259824 577+{
817b7667
SR
578+ int64_t timeout;
579+
95259824
WB
580+ if (snap_state.state == SAVE_STATE_DONE) {
581+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
582+ "VM snapshot not started\n");
583+ return;
584+ }
585+
586+ if (snap_state.state == SAVE_STATE_ACTIVE) {
587+ snap_state.state = SAVE_STATE_CANCELLED;
817b7667 588+ goto wait_for_close;
95259824
WB
589+ }
590+
591+ if (snap_state.saved_vm_running) {
592+ vm_start();
817b7667 593+ snap_state.saved_vm_running = false;
95259824
WB
594+ }
595+
596+ snap_state.state = SAVE_STATE_DONE;
817b7667
SR
597+
598+wait_for_close:
599+ if (!snap_state.target) {
600+ DPRINTF("savevm-end: no target file open\n");
601+ return;
602+ }
603+
604+ /* wait until cleanup is done before returning, this ensures that after this
605+ * call exits the statefile will be closed and can be removed immediately */
606+ DPRINTF("savevm-end: waiting for cleanup\n");
607+ timeout = 30L * 1000 * 1000 * 1000;
563c5928 608+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
f376b2b9 609+ QEMU_CLOCK_REALTIME, timeout);
817b7667
SR
610+ if (snap_state.target) {
611+ save_snapshot_error("timeout waiting for target file close in "
612+ "qmp_savevm_end");
613+ /* we cannot assume the snapshot finished in this case, so leave the
614+ * state alone - caller has to figure something out */
615+ return;
616+ }
617+
1976ca46
FE
618+ // File closed and no other error, so ensure next snapshot can be started.
619+ if (snap_state.state != SAVE_STATE_ERROR) {
620+ snap_state.state = SAVE_STATE_DONE;
621+ }
622+
817b7667 623+ DPRINTF("savevm-end: cleanup done\n");
95259824
WB
624+}
625+
0775f12b 626+// FIXME: Deprecated
95259824
WB
627+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
628+{
0775f12b
WB
629+ // Compatibility to older qemu-server.
630+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
95259824
WB
631+}
632+
0775f12b 633+// FIXME: Deprecated
95259824
WB
634+void qmp_delete_drive_snapshot(const char *device, const char *name,
635+ Error **errp)
636+{
0775f12b
WB
637+ // Compatibility to older qemu-server.
638+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
639+ true, name, errp);
95259824
WB
640+}
641+
6838f038 642+int load_snapshot_from_blockdev(const char *filename, Error **errp)
95259824 643+{
67af0fa4 644+ BlockBackend *be;
95259824
WB
645+ Error *local_err = NULL;
646+ Error *blocker = NULL;
647+
648+ QEMUFile *f;
5b15e2ec 649+ size_t bs_pos = 0;
67af0fa4 650+ int ret = -EINVAL;
95259824 651+
67af0fa4 652+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
95259824 653+
67af0fa4 654+ if (!be) {
6838f038 655+ error_setg(errp, "Could not open VM state file");
95259824
WB
656+ goto the_end;
657+ }
658+
67af0fa4
WB
659+ error_setg(&blocker, "block device is in use by load state");
660+ blk_op_block_all(be, blocker);
661+
95259824 662+ /* restore the VM state */
5b15e2ec 663+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
95259824 664+ if (!f) {
6838f038 665+ error_setg(errp, "Could not open VM state file");
95259824
WB
666+ goto the_end;
667+ }
668+
6838f038 669+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
95259824
WB
670+ ret = qemu_loadvm_state(f);
671+
e9b36665
SR
672+ /* dirty bitmap migration has a special case we need to trigger manually */
673+ dirty_bitmap_mig_before_vm_start();
674+
95259824
WB
675+ qemu_fclose(f);
676+ migration_incoming_state_destroy();
677+ if (ret < 0) {
6838f038 678+ error_setg_errno(errp, -ret, "Error while loading VM state");
95259824
WB
679+ goto the_end;
680+ }
681+
682+ ret = 0;
683+
684+ the_end:
67af0fa4
WB
685+ if (be) {
686+ blk_op_unblock_all(be, blocker);
95259824 687+ error_free(blocker);
67af0fa4 688+ blk_unref(be);
95259824
WB
689+ }
690+ return ret;
691+}
817b7667 692diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
5b15e2ec 693index 15572befb1..1507180990 100644
817b7667
SR
694--- a/monitor/hmp-cmds.c
695+++ b/monitor/hmp-cmds.c
5b15e2ec 696@@ -1925,6 +1925,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
817b7667
SR
697 hmp_handle_error(mon, err);
698 }
699
700+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
701+{
702+ Error *errp = NULL;
703+ const char *statefile = qdict_get_try_str(qdict, "statefile");
704+
705+ qmp_savevm_start(statefile != NULL, statefile, &errp);
706+ hmp_handle_error(mon, errp);
707+}
708+
709+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
710+{
711+ Error *errp = NULL;
712+ const char *name = qdict_get_str(qdict, "name");
713+ const char *device = qdict_get_str(qdict, "device");
714+
715+ qmp_snapshot_drive(device, name, &errp);
716+ hmp_handle_error(mon, errp);
717+}
718+
719+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
720+{
721+ Error *errp = NULL;
722+ const char *name = qdict_get_str(qdict, "name");
723+ const char *device = qdict_get_str(qdict, "device");
724+
725+ qmp_delete_drive_snapshot(device, name, &errp);
726+ hmp_handle_error(mon, errp);
727+}
728+
729+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
730+{
731+ Error *errp = NULL;
732+
733+ qmp_savevm_end(&errp);
734+ hmp_handle_error(mon, errp);
735+}
736+
737+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
738+{
739+ SaveVMInfo *info;
740+ info = qmp_query_savevm(NULL);
741+
742+ if (info->has_status) {
743+ monitor_printf(mon, "savevm status: %s\n", info->status);
744+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
745+ info->total_time);
746+ } else {
747+ monitor_printf(mon, "savevm status: not running\n");
748+ }
749+ if (info->has_bytes) {
750+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
751+ }
752+ if (info->has_error) {
753+ monitor_printf(mon, "Error: %s\n", info->error);
754+ }
755+}
756+
757 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
758 {
759 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
760diff --git a/qapi/migration.json b/qapi/migration.json
5b15e2ec 761index 81185d4311..3129f71fa8 100644
817b7667
SR
762--- a/qapi/migration.json
763+++ b/qapi/migration.json
5b15e2ec 764@@ -261,6 +261,40 @@
817b7667
SR
765 '*compression': 'CompressionStats',
766 '*socket-address': ['SocketAddress'] } }
767
768+##
769+# @SaveVMInfo:
770+#
771+# Information about current migration process.
772+#
773+# @status: string describing the current savevm status.
774+# This can be 'active', 'completed', 'failed'.
775+# If this field is not returned, no savevm process
776+# has been initiated
777+#
778+# @error: string containing error message is status is failed.
779+#
780+# @total-time: total amount of milliseconds since savevm started.
781+# If savevm has ended, it returns the total save time
782+#
783+# @bytes: total amount of data transfered
784+#
785+# Since: 1.3
786+##
787+{ 'struct': 'SaveVMInfo',
788+ 'data': {'*status': 'str', '*error': 'str',
789+ '*total-time': 'int', '*bytes': 'int'} }
790+
791+##
792+# @query-savevm:
793+#
794+# Returns information about current savevm process.
795+#
796+# Returns: @SaveVMInfo
797+#
798+# Since: 1.3
799+##
800+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
801+
802 ##
803 # @query-migrate:
804 #
805diff --git a/qapi/misc.json b/qapi/misc.json
5b15e2ec 806index 27ef5a2b20..b3ce75dcae 100644
817b7667
SR
807--- a/qapi/misc.json
808+++ b/qapi/misc.json
4567474e 809@@ -435,6 +435,38 @@
817b7667
SR
810 ##
811 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
812
813+##
814+# @savevm-start:
815+#
816+# Prepare for snapshot and halt VM. Save VM state to statefile.
817+#
818+##
819+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
820+
821+##
822+# @snapshot-drive:
823+#
824+# Create an internal drive snapshot.
825+#
826+##
827+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
828+
829+##
830+# @delete-drive-snapshot:
831+#
832+# Delete a drive snapshot.
833+#
834+##
835+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
836+
837+##
838+# @savevm-end:
839+#
840+# Resume VM after a snapshot.
841+#
842+##
843+{ 'command': 'savevm-end', 'coroutine': true }
844+
845 ##
846 # @CommandLineParameterType:
847 #
848diff --git a/qemu-options.hx b/qemu-options.hx
5b15e2ec 849index 31c04f7eea..c2ca6e91b5 100644
817b7667
SR
850--- a/qemu-options.hx
851+++ b/qemu-options.hx
5b15e2ec 852@@ -4341,6 +4341,18 @@ SRST
817b7667
SR
853 Start right away with a saved state (``loadvm`` in monitor)
854 ERST
855
856+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
857+ "-loadstate file\n" \
858+ " start right away with a saved state\n",
859+ QEMU_ARCH_ALL)
860+SRST
861+``-loadstate file``
862+ Start right away with a saved state. This option does not rollback
863+ disk state like @code{loadvm}, so user must make sure that disk
864+ have correct state. @var{file} can be any valid device URL. See the section
865+ for "Device URL Syntax" for more information.
866+ERST
867+
868 #ifndef _WIN32
869 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
870 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
83faa3fe 871diff --git a/softmmu/vl.c b/softmmu/vl.c
5b15e2ec 872index 706bd7cff7..b8637c4262 100644
83faa3fe
TL
873--- a/softmmu/vl.c
874+++ b/softmmu/vl.c
5b15e2ec
FE
875@@ -165,6 +165,7 @@ static const char *accelerators;
876 static bool have_custom_ram_size;
877 static const char *ram_memdev_id;
f376b2b9 878 static QDict *machine_opts_dict;
8dca018b
SR
879+static const char *loadstate;
880 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
4567474e 881 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
5b15e2ec
FE
882 static int display_remote;
883@@ -2584,6 +2585,12 @@ void qmp_x_exit_preconfig(Error **errp)
4567474e
FE
884
885 if (loadvm) {
886 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
95259824 887+ } else if (loadstate) {
6838f038
WB
888+ Error *local_err = NULL;
889+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
890+ error_report_err(local_err);
95259824
WB
891+ autostart = 0;
892+ }
893 }
b855dce7
TL
894 if (replay_mode != REPLAY_MODE_NONE) {
895 replay_vmstate_init();
5b15e2ec 896@@ -3133,6 +3140,9 @@ void qemu_init(int argc, char **argv, char **envp)
8dca018b
SR
897 case QEMU_OPTION_loadvm:
898 loadvm = optarg;
899 break;
900+ case QEMU_OPTION_loadstate:
901+ loadstate = optarg;
902+ break;
903 case QEMU_OPTION_full_screen:
904 dpy.has_full_screen = true;
905 dpy.full_screen = true;