]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
bump version to 5.2.0-3
[pve-qemu.git] / debian / patches / pve / 0017-PVE-add-savevm-async-for-background-state-snapshots.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 3Date: Mon, 6 Apr 2020 12:16:46 +0200
817b7667 4Subject: [PATCH] PVE: add savevm-async for background state snapshots
95259824 5
d7f4e01a
TL
6Put qemu_savevm_state_{header,setup} into the main loop and the rest
7of the iteration into a coroutine. The former need to lock the
8iothread (and we can't unlock it in the coroutine), and the latter
9can't deal with being in a separate thread, so a coroutine it must
10be.
11
817b7667
SR
12Truncate output file at 1024 boundary.
13
14Do not block the VM and save the state on aborting a snapshot, as the
15snapshot will be invalid anyway.
16
17Also, when aborting, wait for the target file to be closed, otherwise a
18client might run into race-conditions when trying to remove the file
19still opened by QEMU.
20
b855dce7 21Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6402d961 22Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 23Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
817b7667
SR
24[improve aborting]
25Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
95259824 26---
b855dce7 27 hmp-commands-info.hx | 13 +
817b7667 28 hmp-commands.hx | 33 ++
6838f038 29 include/migration/snapshot.h | 1 +
be901f66 30 include/monitor/hmp.h | 5 +
817b7667
SR
31 migration/meson.build | 1 +
32 migration/savevm-async.c | 591 +++++++++++++++++++++++++++++++++++
d7f4e01a 33 monitor/hmp-cmds.c | 57 ++++
817b7667
SR
34 qapi/migration.json | 34 ++
35 qapi/misc.json | 32 ++
83faa3fe 36 qemu-options.hx | 12 +
83faa3fe 37 softmmu/vl.c | 10 +
817b7667
SR
38 11 files changed, 789 insertions(+)
39 create mode 100644 migration/savevm-async.c
95259824 40
95259824 41diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
817b7667 42index 117ba25f91..b3b797ca28 100644
95259824
WB
43--- a/hmp-commands-info.hx
44+++ b/hmp-commands-info.hx
60ae3775 45@@ -580,6 +580,19 @@ SRST
83faa3fe
TL
46 Show current migration xbzrle cache size.
47 ERST
48
b855dce7 49+ {
95259824
WB
50+ .name = "savevm",
51+ .args_type = "",
52+ .params = "",
53+ .help = "show savevm status",
a544966d 54+ .cmd = hmp_info_savevm,
95259824
WB
55+ },
56+
83faa3fe
TL
57+SRST
58+ ``info savevm``
59+ Show savevm status.
60+ERST
61+
b855dce7 62 {
83faa3fe
TL
63 .name = "balloon",
64 .args_type = "",
95259824 65diff --git a/hmp-commands.hx b/hmp-commands.hx
817b7667 66index ff2d7aa8f3..d294c234a5 100644
95259824
WB
67--- a/hmp-commands.hx
68+++ b/hmp-commands.hx
817b7667 69@@ -1866,3 +1866,36 @@ ERST
83faa3fe
TL
70 .flags = "p",
71 },
72
95259824
WB
73+
74+ {
75+ .name = "savevm-start",
76+ .args_type = "statefile:s?",
77+ .params = "[statefile]",
78+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
a544966d 79+ .cmd = hmp_savevm_start,
95259824
WB
80+ },
81+
82+ {
83+ .name = "snapshot-drive",
84+ .args_type = "device:s,name:s",
85+ .params = "device name",
86+ .help = "Create internal snapshot.",
a544966d 87+ .cmd = hmp_snapshot_drive,
95259824
WB
88+ },
89+
90+ {
91+ .name = "delete-drive-snapshot",
92+ .args_type = "device:s,name:s",
93+ .params = "device name",
94+ .help = "Delete internal snapshot.",
a544966d 95+ .cmd = hmp_delete_drive_snapshot,
95259824
WB
96+ },
97+
98+ {
99+ .name = "savevm-end",
100+ .args_type = "",
101+ .params = "",
102+ .help = "Resume VM after snaphot.",
817b7667
SR
103+ .cmd = hmp_savevm_end,
104+ .coroutine = true,
95259824 105+ },
be901f66
SR
106diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
107index c85b6ec75b..4411b7121d 100644
108--- a/include/migration/snapshot.h
109+++ b/include/migration/snapshot.h
110@@ -17,5 +17,6 @@
111
112 int save_snapshot(const char *name, Error **errp);
113 int load_snapshot(const char *name, Error **errp);
114+int load_snapshot_from_blockdev(const char *filename, Error **errp);
115
116 #endif
117diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
817b7667 118index ed2913fd18..4e06f89e8e 100644
be901f66
SR
119--- a/include/monitor/hmp.h
120+++ b/include/monitor/hmp.h
121@@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
122 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
123 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
124 void hmp_info_mice(Monitor *mon, const QDict *qdict);
125+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
126 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
127 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
128 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
83faa3fe 129@@ -83,6 +84,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
be901f66
SR
130 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
131 void hmp_getfd(Monitor *mon, const QDict *qdict);
132 void hmp_closefd(Monitor *mon, const QDict *qdict);
133+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
134+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
135+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
136+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
137 void hmp_sendkey(Monitor *mon, const QDict *qdict);
138 void hmp_screendump(Monitor *mon, const QDict *qdict);
83faa3fe 139 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
817b7667
SR
140diff --git a/migration/meson.build b/migration/meson.build
141index 980e37865c..e62b79b60f 100644
142--- a/migration/meson.build
143+++ b/migration/meson.build
144@@ -23,6 +23,7 @@ softmmu_ss.add(files(
145 'multifd-zlib.c',
146 'postcopy-ram.c',
147 'savevm.c',
148+ 'savevm-async.c',
149 'socket.c',
150 'tls.c',
151 ))
152diff --git a/migration/savevm-async.c b/migration/savevm-async.c
95259824 153new file mode 100644
817b7667 154index 0000000000..4e345c1a7d
95259824 155--- /dev/null
817b7667
SR
156+++ b/migration/savevm-async.c
157@@ -0,0 +1,591 @@
95259824 158+#include "qemu/osdep.h"
6838f038
WB
159+#include "migration/migration.h"
160+#include "migration/savevm.h"
161+#include "migration/snapshot.h"
162+#include "migration/global_state.h"
163+#include "migration/ram.h"
164+#include "migration/qemu-file.h"
95259824 165+#include "sysemu/sysemu.h"
6402d961 166+#include "sysemu/runstate.h"
95259824 167+#include "block/block.h"
95259824 168+#include "sysemu/block-backend.h"
53e83913
WB
169+#include "qapi/error.h"
170+#include "qapi/qmp/qerror.h"
171+#include "qapi/qmp/qdict.h"
172+#include "qapi/qapi-commands-migration.h"
173+#include "qapi/qapi-commands-misc.h"
0775f12b 174+#include "qapi/qapi-commands-block.h"
95259824 175+#include "qemu/cutils.h"
817b7667 176+#include "qemu/timer.h"
6402d961
TL
177+#include "qemu/main-loop.h"
178+#include "qemu/rcu.h"
95259824
WB
179+
180+/* #define DEBUG_SAVEVM_STATE */
181+
0775f12b
WB
182+/* used while emulated sync operation in progress */
183+#define NOT_DONE -EINPROGRESS
67af0fa4 184+
95259824
WB
185+#ifdef DEBUG_SAVEVM_STATE
186+#define DPRINTF(fmt, ...) \
187+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
188+#else
189+#define DPRINTF(fmt, ...) \
190+ do { } while (0)
191+#endif
192+
193+enum {
194+ SAVE_STATE_DONE,
195+ SAVE_STATE_ERROR,
196+ SAVE_STATE_ACTIVE,
197+ SAVE_STATE_COMPLETED,
198+ SAVE_STATE_CANCELLED
199+};
200+
201+
202+static struct SnapshotState {
67af0fa4 203+ BlockBackend *target;
95259824
WB
204+ size_t bs_pos;
205+ int state;
206+ Error *error;
207+ Error *blocker;
208+ int saved_vm_running;
209+ QEMUFile *file;
210+ int64_t total_time;
d7f4e01a
TL
211+ QEMUBH *finalize_bh;
212+ Coroutine *co;
817b7667 213+ QemuCoSleepState *target_close_wait;
95259824
WB
214+} snap_state;
215+
817b7667
SR
216+static bool savevm_aborted(void)
217+{
218+ return snap_state.state == SAVE_STATE_CANCELLED ||
219+ snap_state.state == SAVE_STATE_ERROR;
220+}
221+
95259824
WB
222+SaveVMInfo *qmp_query_savevm(Error **errp)
223+{
224+ SaveVMInfo *info = g_malloc0(sizeof(*info));
225+ struct SnapshotState *s = &snap_state;
226+
227+ if (s->state != SAVE_STATE_DONE) {
228+ info->has_bytes = true;
229+ info->bytes = s->bs_pos;
230+ switch (s->state) {
231+ case SAVE_STATE_ERROR:
232+ info->has_status = true;
233+ info->status = g_strdup("failed");
234+ info->has_total_time = true;
235+ info->total_time = s->total_time;
236+ if (s->error) {
237+ info->has_error = true;
238+ info->error = g_strdup(error_get_pretty(s->error));
239+ }
240+ break;
241+ case SAVE_STATE_ACTIVE:
242+ info->has_status = true;
243+ info->status = g_strdup("active");
244+ info->has_total_time = true;
245+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
246+ - s->total_time;
247+ break;
248+ case SAVE_STATE_COMPLETED:
249+ info->has_status = true;
250+ info->status = g_strdup("completed");
251+ info->has_total_time = true;
252+ info->total_time = s->total_time;
253+ break;
254+ }
255+ }
256+
257+ return info;
258+}
259+
260+static int save_snapshot_cleanup(void)
261+{
262+ int ret = 0;
263+
264+ DPRINTF("save_snapshot_cleanup\n");
265+
266+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
267+ snap_state.total_time;
268+
269+ if (snap_state.file) {
270+ ret = qemu_fclose(snap_state.file);
271+ }
272+
67af0fa4 273+ if (snap_state.target) {
817b7667
SR
274+ if (!savevm_aborted()) {
275+ /* try to truncate, but ignore errors (will fail on block devices).
276+ * note1: bdrv_read() need whole blocks, so we need to round up
277+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
278+ */
279+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
280+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
281+ }
67af0fa4 282+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
95259824
WB
283+ error_free(snap_state.blocker);
284+ snap_state.blocker = NULL;
67af0fa4
WB
285+ blk_unref(snap_state.target);
286+ snap_state.target = NULL;
817b7667
SR
287+
288+ if (snap_state.target_close_wait) {
289+ qemu_co_sleep_wake(snap_state.target_close_wait);
290+ }
95259824
WB
291+ }
292+
293+ return ret;
294+}
295+
296+static void save_snapshot_error(const char *fmt, ...)
297+{
298+ va_list ap;
299+ char *msg;
300+
301+ va_start(ap, fmt);
302+ msg = g_strdup_vprintf(fmt, ap);
303+ va_end(ap);
304+
305+ DPRINTF("save_snapshot_error: %s\n", msg);
306+
307+ if (!snap_state.error) {
308+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
309+ }
310+
311+ g_free (msg);
312+
313+ snap_state.state = SAVE_STATE_ERROR;
95259824
WB
314+}
315+
6402d961 316+static int block_state_close(void *opaque, Error **errp)
95259824
WB
317+{
318+ snap_state.file = NULL;
67af0fa4 319+ return blk_flush(snap_state.target);
95259824
WB
320+}
321+
0775f12b
WB
322+typedef struct BlkRwCo {
323+ int64_t offset;
324+ QEMUIOVector *qiov;
325+ ssize_t ret;
326+} BlkRwCo;
327+
328+static void coroutine_fn block_state_write_entry(void *opaque) {
329+ BlkRwCo *rwco = opaque;
330+ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
331+ rwco->qiov, 0);
d7f4e01a 332+ aio_wait_kick();
0775f12b
WB
333+}
334+
67af0fa4 335+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
6402d961 336+ int iovcnt, int64_t pos, Error **errp)
95259824 337+{
67af0fa4 338+ QEMUIOVector qiov;
0775f12b
WB
339+ BlkRwCo rwco;
340+
341+ assert(pos == snap_state.bs_pos);
342+ rwco = (BlkRwCo) {
343+ .offset = pos,
344+ .qiov = &qiov,
345+ .ret = NOT_DONE,
346+ };
95259824 347+
67af0fa4 348+ qemu_iovec_init_external(&qiov, iov, iovcnt);
0775f12b
WB
349+
350+ if (qemu_in_coroutine()) {
351+ block_state_write_entry(&rwco);
352+ } else {
353+ Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
354+ bdrv_coroutine_enter(blk_bs(snap_state.target), co);
355+ BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
95259824 356+ }
0775f12b
WB
357+ if (rwco.ret < 0) {
358+ return rwco.ret;
359+ }
360+
67af0fa4
WB
361+ snap_state.bs_pos += qiov.size;
362+ return qiov.size;
95259824
WB
363+}
364+
0775f12b
WB
365+static const QEMUFileOps block_file_ops = {
366+ .writev_buffer = block_state_writev_buffer,
367+ .close = block_state_close,
368+};
369+
d7f4e01a 370+static void process_savevm_finalize(void *opaque)
0775f12b
WB
371+{
372+ int ret;
d7f4e01a
TL
373+ AioContext *iohandler_ctx = iohandler_get_aio_context();
374+ MigrationState *ms = migrate_get_current();
375+
817b7667
SR
376+ bool aborted = savevm_aborted();
377+
d7f4e01a
TL
378+#ifdef DEBUG_SAVEVM_STATE
379+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
380+#endif
381+
382+ qemu_bh_delete(snap_state.finalize_bh);
383+ snap_state.finalize_bh = NULL;
384+ snap_state.co = NULL;
385+
386+ /* We need to own the target bdrv's context for the following functions,
387+ * so move it back. It can stay in the main context and live out its live
388+ * there, since we're done with it after this method ends anyway.
389+ */
390+ aio_context_acquire(iohandler_ctx);
391+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
392+ aio_context_release(iohandler_ctx);
393+
394+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
395+ if (ret < 0) {
396+ save_snapshot_error("vm_stop_force_state error %d", ret);
397+ }
398+
817b7667
SR
399+ if (!aborted) {
400+ /* skip state saving if we aborted, snapshot will be invalid anyway */
401+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
402+ ret = qemu_file_get_error(snap_state.file);
403+ if (ret < 0) {
404+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
405+ }
d7f4e01a
TL
406+ }
407+
408+ DPRINTF("state saving complete\n");
409+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
410+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
411+
412+ /* clear migration state */
413+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
817b7667 414+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
d7f4e01a
TL
415+ ms->to_dst_file = NULL;
416+
417+ qemu_savevm_state_cleanup();
418+
0775f12b
WB
419+ ret = save_snapshot_cleanup();
420+ if (ret < 0) {
421+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
422+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
423+ snap_state.state = SAVE_STATE_COMPLETED;
817b7667
SR
424+ } else if (aborted) {
425+ save_snapshot_error("process_savevm_cleanup: found aborted state: %d",
426+ snap_state.state);
0775f12b
WB
427+ } else {
428+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
429+ snap_state.state);
95259824 430+ }
0775f12b
WB
431+ if (snap_state.saved_vm_running) {
432+ vm_start();
433+ snap_state.saved_vm_running = false;
95259824 434+ }
d7f4e01a
TL
435+
436+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
437+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
95259824
WB
438+}
439+
d7f4e01a 440+static void coroutine_fn process_savevm_co(void *opaque)
95259824
WB
441+{
442+ int ret;
443+ int64_t maxlen;
d7f4e01a
TL
444+ BdrvNextIterator it;
445+ BlockDriverState *bs = NULL;
95259824 446+
d7f4e01a
TL
447+#ifdef DEBUG_SAVEVM_STATE
448+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
449+#endif
95259824 450+
6838f038 451+ ret = qemu_file_get_error(snap_state.file);
95259824 452+ if (ret < 0) {
6838f038 453+ save_snapshot_error("qemu_savevm_state_setup failed");
d7f4e01a 454+ return;
95259824
WB
455+ }
456+
457+ while (snap_state.state == SAVE_STATE_ACTIVE) {
0775f12b 458+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
95259824 459+
0775f12b
WB
460+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
461+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
95259824 462+
0775f12b
WB
463+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
464+
465+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
0775f12b
WB
466+ ret = qemu_savevm_state_iterate(snap_state.file, false);
467+ if (ret < 0) {
468+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
469+ break;
470+ }
d7f4e01a 471+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
95259824 472+ } else {
b855dce7 473+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
0775f12b
WB
474+ ret = global_state_store();
475+ if (ret) {
476+ save_snapshot_error("global_state_store error %d", ret);
95259824 477+ break;
0775f12b 478+ }
d7f4e01a
TL
479+
480+ DPRINTF("savevm iterate complete\n");
95259824
WB
481+ break;
482+ }
95259824
WB
483+ }
484+
d7f4e01a
TL
485+ DPRINTF("timing: process_savevm_co took %ld ms\n",
486+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
487+
488+#ifdef DEBUG_SAVEVM_STATE
489+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
490+#endif
491+ /* If a drive runs in an IOThread we can flush it async, and only
492+ * need to sync-flush whatever IO happens between now and
493+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
494+ * so move there now and after every flush.
495+ */
496+ aio_co_reschedule_self(qemu_get_aio_context());
497+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
498+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
499+ if (bs == blk_bs(snap_state.target)) {
500+ continue;
501+ }
502+
503+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
504+ if (bs_ctx != qemu_get_aio_context()) {
505+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
506+ aio_co_reschedule_self(bs_ctx);
507+ bdrv_flush(bs);
508+ aio_co_reschedule_self(qemu_get_aio_context());
509+ }
510+ }
511+
512+ DPRINTF("timing: async flushing took %ld ms\n",
513+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
95259824 514+
d7f4e01a 515+ qemu_bh_schedule(snap_state.finalize_bh);
95259824
WB
516+}
517+
95259824
WB
518+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
519+{
95259824 520+ Error *local_err = NULL;
d7f4e01a
TL
521+ MigrationState *ms = migrate_get_current();
522+ AioContext *iohandler_ctx = iohandler_get_aio_context();
95259824 523+
67af0fa4 524+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
95259824
WB
525+
526+ if (snap_state.state != SAVE_STATE_DONE) {
527+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
528+ "VM snapshot already started\n");
529+ return;
530+ }
531+
d7f4e01a
TL
532+ if (migration_is_running(ms->state)) {
533+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
534+ return;
535+ }
536+
537+ if (migrate_use_block()) {
538+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
539+ "Block migration and snapshots are incompatible");
540+ return;
541+ }
542+
95259824
WB
543+ /* initialize snapshot info */
544+ snap_state.saved_vm_running = runstate_is_running();
545+ snap_state.bs_pos = 0;
546+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
547+ snap_state.blocker = NULL;
548+
549+ if (snap_state.error) {
550+ error_free(snap_state.error);
551+ snap_state.error = NULL;
552+ }
553+
554+ if (!has_statefile) {
555+ vm_stop(RUN_STATE_SAVE_VM);
556+ snap_state.state = SAVE_STATE_COMPLETED;
557+ return;
558+ }
559+
560+ if (qemu_savevm_state_blocked(errp)) {
561+ return;
562+ }
563+
564+ /* Open the image */
95259824
WB
565+ QDict *options = NULL;
566+ options = qdict_new();
53e83913 567+ qdict_put_str(options, "driver", "raw");
67af0fa4
WB
568+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
569+ if (!snap_state.target) {
95259824
WB
570+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
571+ goto restart;
572+ }
573+
574+ snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
575+
576+ if (!snap_state.file) {
577+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
578+ goto restart;
579+ }
580+
d7f4e01a
TL
581+ /*
582+ * qemu_savevm_* paths use migration code and expect a migration state.
583+ * State is cleared in process_savevm_co, but has to be initialized
584+ * here (blocking main thread, from QMP) to avoid race conditions.
585+ */
586+ migrate_init(ms);
587+ memset(&ram_counters, 0, sizeof(ram_counters));
588+ ms->to_dst_file = snap_state.file;
95259824
WB
589+
590+ error_setg(&snap_state.blocker, "block device is in use by savevm");
67af0fa4 591+ blk_op_block_all(snap_state.target, snap_state.blocker);
95259824 592+
0775f12b 593+ snap_state.state = SAVE_STATE_ACTIVE;
d7f4e01a
TL
594+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
595+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
596+ qemu_mutex_unlock_iothread();
597+ qemu_savevm_state_header(snap_state.file);
598+ qemu_savevm_state_setup(snap_state.file);
599+ qemu_mutex_lock_iothread();
600+
601+ /* Async processing from here on out happens in iohandler context, so let
602+ * the target bdrv have its home there.
603+ */
604+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
605+
606+ aio_co_schedule(iohandler_ctx, snap_state.co);
95259824
WB
607+
608+ return;
609+
610+restart:
611+
612+ save_snapshot_error("setup failed");
613+
614+ if (snap_state.saved_vm_running) {
615+ vm_start();
817b7667 616+ snap_state.saved_vm_running = false;
95259824
WB
617+ }
618+}
619+
817b7667 620+void coroutine_fn qmp_savevm_end(Error **errp)
95259824 621+{
817b7667
SR
622+ int64_t timeout;
623+
95259824
WB
624+ if (snap_state.state == SAVE_STATE_DONE) {
625+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
626+ "VM snapshot not started\n");
627+ return;
628+ }
629+
630+ if (snap_state.state == SAVE_STATE_ACTIVE) {
631+ snap_state.state = SAVE_STATE_CANCELLED;
817b7667 632+ goto wait_for_close;
95259824
WB
633+ }
634+
635+ if (snap_state.saved_vm_running) {
636+ vm_start();
817b7667 637+ snap_state.saved_vm_running = false;
95259824
WB
638+ }
639+
640+ snap_state.state = SAVE_STATE_DONE;
817b7667
SR
641+
642+wait_for_close:
643+ if (!snap_state.target) {
644+ DPRINTF("savevm-end: no target file open\n");
645+ return;
646+ }
647+
648+ /* wait until cleanup is done before returning, this ensures that after this
649+ * call exits the statefile will be closed and can be removed immediately */
650+ DPRINTF("savevm-end: waiting for cleanup\n");
651+ timeout = 30L * 1000 * 1000 * 1000;
652+ qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
653+ &snap_state.target_close_wait);
654+ snap_state.target_close_wait = NULL;
655+ if (snap_state.target) {
656+ save_snapshot_error("timeout waiting for target file close in "
657+ "qmp_savevm_end");
658+ /* we cannot assume the snapshot finished in this case, so leave the
659+ * state alone - caller has to figure something out */
660+ return;
661+ }
662+
663+ DPRINTF("savevm-end: cleanup done\n");
95259824
WB
664+}
665+
0775f12b 666+// FIXME: Deprecated
95259824
WB
667+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
668+{
0775f12b
WB
669+ // Compatibility to older qemu-server.
670+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
95259824
WB
671+}
672+
0775f12b 673+// FIXME: Deprecated
95259824
WB
674+void qmp_delete_drive_snapshot(const char *device, const char *name,
675+ Error **errp)
676+{
0775f12b
WB
677+ // Compatibility to older qemu-server.
678+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
679+ true, name, errp);
95259824
WB
680+}
681+
67af0fa4 682+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
6402d961 683+ size_t size, Error **errp)
95259824 684+{
67af0fa4
WB
685+ BlockBackend *be = opaque;
686+ int64_t maxlen = blk_getlength(be);
95259824
WB
687+ if (pos > maxlen) {
688+ return -EIO;
689+ }
690+ if ((pos + size) > maxlen) {
691+ size = maxlen - pos - 1;
692+ }
693+ if (size == 0) {
694+ return 0;
695+ }
67af0fa4 696+ return blk_pread(be, pos, buf, size);
95259824
WB
697+}
698+
699+static const QEMUFileOps loadstate_file_ops = {
700+ .get_buffer = loadstate_get_buffer,
701+};
702+
6838f038 703+int load_snapshot_from_blockdev(const char *filename, Error **errp)
95259824 704+{
67af0fa4 705+ BlockBackend *be;
95259824
WB
706+ Error *local_err = NULL;
707+ Error *blocker = NULL;
708+
709+ QEMUFile *f;
67af0fa4 710+ int ret = -EINVAL;
95259824 711+
67af0fa4 712+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
95259824 713+
67af0fa4 714+ if (!be) {
6838f038 715+ error_setg(errp, "Could not open VM state file");
95259824
WB
716+ goto the_end;
717+ }
718+
67af0fa4
WB
719+ error_setg(&blocker, "block device is in use by load state");
720+ blk_op_block_all(be, blocker);
721+
95259824 722+ /* restore the VM state */
67af0fa4 723+ f = qemu_fopen_ops(be, &loadstate_file_ops);
95259824 724+ if (!f) {
6838f038 725+ error_setg(errp, "Could not open VM state file");
95259824
WB
726+ goto the_end;
727+ }
728+
6838f038 729+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
95259824
WB
730+ ret = qemu_loadvm_state(f);
731+
732+ qemu_fclose(f);
733+ migration_incoming_state_destroy();
734+ if (ret < 0) {
6838f038 735+ error_setg_errno(errp, -ret, "Error while loading VM state");
95259824
WB
736+ goto the_end;
737+ }
738+
739+ ret = 0;
740+
741+ the_end:
67af0fa4
WB
742+ if (be) {
743+ blk_op_unblock_all(be, blocker);
95259824 744+ error_free(blocker);
67af0fa4 745+ blk_unref(be);
95259824
WB
746+ }
747+ return ret;
748+}
817b7667
SR
749diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
750index 705f08a8f1..77ab152aab 100644
751--- a/monitor/hmp-cmds.c
752+++ b/monitor/hmp-cmds.c
753@@ -1949,6 +1949,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
754 hmp_handle_error(mon, err);
755 }
756
757+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
758+{
759+ Error *errp = NULL;
760+ const char *statefile = qdict_get_try_str(qdict, "statefile");
761+
762+ qmp_savevm_start(statefile != NULL, statefile, &errp);
763+ hmp_handle_error(mon, errp);
764+}
765+
766+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
767+{
768+ Error *errp = NULL;
769+ const char *name = qdict_get_str(qdict, "name");
770+ const char *device = qdict_get_str(qdict, "device");
771+
772+ qmp_snapshot_drive(device, name, &errp);
773+ hmp_handle_error(mon, errp);
774+}
775+
776+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
777+{
778+ Error *errp = NULL;
779+ const char *name = qdict_get_str(qdict, "name");
780+ const char *device = qdict_get_str(qdict, "device");
781+
782+ qmp_delete_drive_snapshot(device, name, &errp);
783+ hmp_handle_error(mon, errp);
784+}
785+
786+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
787+{
788+ Error *errp = NULL;
789+
790+ qmp_savevm_end(&errp);
791+ hmp_handle_error(mon, errp);
792+}
793+
794+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
795+{
796+ SaveVMInfo *info;
797+ info = qmp_query_savevm(NULL);
798+
799+ if (info->has_status) {
800+ monitor_printf(mon, "savevm status: %s\n", info->status);
801+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
802+ info->total_time);
803+ } else {
804+ monitor_printf(mon, "savevm status: not running\n");
805+ }
806+ if (info->has_bytes) {
807+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
808+ }
809+ if (info->has_error) {
810+ monitor_printf(mon, "Error: %s\n", info->error);
811+ }
812+}
813+
814 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
815 {
816 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
817diff --git a/qapi/migration.json b/qapi/migration.json
818index 3c75820527..cb3627884c 100644
819--- a/qapi/migration.json
820+++ b/qapi/migration.json
821@@ -242,6 +242,40 @@
822 '*compression': 'CompressionStats',
823 '*socket-address': ['SocketAddress'] } }
824
825+##
826+# @SaveVMInfo:
827+#
828+# Information about current migration process.
829+#
830+# @status: string describing the current savevm status.
831+# This can be 'active', 'completed', 'failed'.
832+# If this field is not returned, no savevm process
833+# has been initiated
834+#
835+# @error: string containing error message is status is failed.
836+#
837+# @total-time: total amount of milliseconds since savevm started.
838+# If savevm has ended, it returns the total save time
839+#
840+# @bytes: total amount of data transfered
841+#
842+# Since: 1.3
843+##
844+{ 'struct': 'SaveVMInfo',
845+ 'data': {'*status': 'str', '*error': 'str',
846+ '*total-time': 'int', '*bytes': 'int'} }
847+
848+##
849+# @query-savevm:
850+#
851+# Returns information about current savevm process.
852+#
853+# Returns: @SaveVMInfo
854+#
855+# Since: 1.3
856+##
857+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
858+
859 ##
860 # @query-migrate:
861 #
862diff --git a/qapi/misc.json b/qapi/misc.json
863index 40df513856..4f5333d960 100644
864--- a/qapi/misc.json
865+++ b/qapi/misc.json
866@@ -476,6 +476,38 @@
867 ##
868 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
869
870+##
871+# @savevm-start:
872+#
873+# Prepare for snapshot and halt VM. Save VM state to statefile.
874+#
875+##
876+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
877+
878+##
879+# @snapshot-drive:
880+#
881+# Create an internal drive snapshot.
882+#
883+##
884+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
885+
886+##
887+# @delete-drive-snapshot:
888+#
889+# Delete a drive snapshot.
890+#
891+##
892+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
893+
894+##
895+# @savevm-end:
896+#
897+# Resume VM after a snapshot.
898+#
899+##
900+{ 'command': 'savevm-end', 'coroutine': true }
901+
902 ##
903 # @CommandLineParameterType:
904 #
905diff --git a/qemu-options.hx b/qemu-options.hx
906index 104632ea34..c1352312c2 100644
907--- a/qemu-options.hx
908+++ b/qemu-options.hx
909@@ -3903,6 +3903,18 @@ SRST
910 Start right away with a saved state (``loadvm`` in monitor)
911 ERST
912
913+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
914+ "-loadstate file\n" \
915+ " start right away with a saved state\n",
916+ QEMU_ARCH_ALL)
917+SRST
918+``-loadstate file``
919+ Start right away with a saved state. This option does not rollback
920+ disk state like @code{loadvm}, so user must make sure that disk
921+ have correct state. @var{file} can be any valid device URL. See the section
922+ for "Device URL Syntax" for more information.
923+ERST
924+
925 #ifndef _WIN32
926 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
927 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
83faa3fe 928diff --git a/softmmu/vl.c b/softmmu/vl.c
817b7667 929index e6e0ad5a92..03152c816c 100644
83faa3fe
TL
930--- a/softmmu/vl.c
931+++ b/softmmu/vl.c
817b7667 932@@ -2878,6 +2878,7 @@ void qemu_init(int argc, char **argv, char **envp)
95259824
WB
933 int optind;
934 const char *optarg;
935 const char *loadvm = NULL;
936+ const char *loadstate = NULL;
937 MachineClass *machine_class;
be901f66 938 const char *cpu_option;
95259824 939 const char *vga_model = NULL;
817b7667 940@@ -3439,6 +3440,9 @@ void qemu_init(int argc, char **argv, char **envp)
95259824
WB
941 case QEMU_OPTION_loadvm:
942 loadvm = optarg;
943 break;
944+ case QEMU_OPTION_loadstate:
945+ loadstate = optarg;
946+ break;
947 case QEMU_OPTION_full_screen:
53e83913
WB
948 dpy.has_full_screen = true;
949 dpy.full_screen = true;
817b7667 950@@ -4478,6 +4482,12 @@ void qemu_init(int argc, char **argv, char **envp)
95259824 951 autostart = 0;
b855dce7 952 exit(1);
95259824
WB
953 }
954+ } else if (loadstate) {
6838f038
WB
955+ Error *local_err = NULL;
956+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
957+ error_report_err(local_err);
95259824
WB
958+ autostart = 0;
959+ }
960 }
b855dce7
TL
961 if (replay_mode != REPLAY_MODE_NONE) {
962 replay_vmstate_init();