]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
savevm-async: keep more free space when entering final stage
[pve-qemu.git] / debian / patches / pve / 0017-PVE-add-savevm-async-for-background-state-snapshots.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 3Date: Mon, 6 Apr 2020 12:16:46 +0200
817b7667 4Subject: [PATCH] PVE: add savevm-async for background state snapshots
95259824 5
d7f4e01a
TL
6Put qemu_savevm_state_{header,setup} into the main loop and the rest
7of the iteration into a coroutine. The former need to lock the
8iothread (and we can't unlock it in the coroutine), and the latter
9can't deal with being in a separate thread, so a coroutine it must
10be.
11
817b7667
SR
12Truncate output file at 1024 boundary.
13
14Do not block the VM and save the state on aborting a snapshot, as the
15snapshot will be invalid anyway.
16
17Also, when aborting, wait for the target file to be closed, otherwise a
18client might run into race-conditions when trying to remove the file
19still opened by QEMU.
20
b855dce7 21Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6402d961 22Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 23Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
817b7667
SR
24[improve aborting]
25Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
5b15e2ec 26[FE: further improve aborting
8051a24b
FE
27 adapt to removal of QEMUFileOps
28 improve condition for entering final stage]
563c5928 29Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
95259824 30---
b855dce7 31 hmp-commands-info.hx | 13 +
5b15e2ec 32 hmp-commands.hx | 33 +++
8dca018b 33 include/migration/snapshot.h | 2 +
be901f66 34 include/monitor/hmp.h | 5 +
817b7667 35 migration/meson.build | 1 +
eee064d9 36 migration/savevm-async.c | 538 +++++++++++++++++++++++++++++++++++
d7f4e01a 37 monitor/hmp-cmds.c | 57 ++++
5b15e2ec
FE
38 qapi/migration.json | 34 +++
39 qapi/misc.json | 32 +++
83faa3fe 40 qemu-options.hx | 12 +
83faa3fe 41 softmmu/vl.c | 10 +
eee064d9 42 11 files changed, 737 insertions(+)
817b7667 43 create mode 100644 migration/savevm-async.c
95259824 44
95259824 45diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
d03e1b3c 46index 754b1e8408..489c524e9e 100644
95259824
WB
47--- a/hmp-commands-info.hx
48+++ b/hmp-commands-info.hx
d03e1b3c 49@@ -540,6 +540,19 @@ SRST
8dca018b 50 Show current migration parameters.
83faa3fe
TL
51 ERST
52
b855dce7 53+ {
95259824
WB
54+ .name = "savevm",
55+ .args_type = "",
56+ .params = "",
57+ .help = "show savevm status",
a544966d 58+ .cmd = hmp_info_savevm,
95259824
WB
59+ },
60+
83faa3fe
TL
61+SRST
62+ ``info savevm``
63+ Show savevm status.
64+ERST
65+
b855dce7 66 {
83faa3fe
TL
67 .name = "balloon",
68 .args_type = "",
95259824 69diff --git a/hmp-commands.hx b/hmp-commands.hx
d03e1b3c 70index 673e39a697..039be0033d 100644
95259824
WB
71--- a/hmp-commands.hx
72+++ b/hmp-commands.hx
d03e1b3c
FE
73@@ -1815,3 +1815,36 @@ SRST
74 Dump the FDT in dtb format to *filename*.
75 ERST
76 #endif
95259824
WB
77+
78+ {
79+ .name = "savevm-start",
80+ .args_type = "statefile:s?",
81+ .params = "[statefile]",
82+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
a544966d 83+ .cmd = hmp_savevm_start,
95259824
WB
84+ },
85+
86+ {
87+ .name = "snapshot-drive",
88+ .args_type = "device:s,name:s",
89+ .params = "device name",
90+ .help = "Create internal snapshot.",
a544966d 91+ .cmd = hmp_snapshot_drive,
95259824
WB
92+ },
93+
94+ {
95+ .name = "delete-drive-snapshot",
96+ .args_type = "device:s,name:s",
97+ .params = "device name",
98+ .help = "Delete internal snapshot.",
a544966d 99+ .cmd = hmp_delete_drive_snapshot,
95259824
WB
100+ },
101+
102+ {
103+ .name = "savevm-end",
104+ .args_type = "",
105+ .params = "",
106+ .help = "Resume VM after snaphot.",
817b7667
SR
107+ .cmd = hmp_savevm_end,
108+ .coroutine = true,
95259824 109+ },
be901f66 110diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
8dca018b 111index e72083b117..c846d37806 100644
be901f66
SR
112--- a/include/migration/snapshot.h
113+++ b/include/migration/snapshot.h
8dca018b
SR
114@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
115 bool has_devices, strList *devices,
116 Error **errp);
be901f66 117
be901f66 118+int load_snapshot_from_blockdev(const char *filename, Error **errp);
8dca018b 119+
be901f66
SR
120 #endif
121diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
d03e1b3c 122index dfbc0c9a2f..440f86aba8 100644
be901f66
SR
123--- a/include/monitor/hmp.h
124+++ b/include/monitor/hmp.h
d03e1b3c 125@@ -27,6 +27,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
be901f66
SR
126 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
127 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
128 void hmp_info_mice(Monitor *mon, const QDict *qdict);
129+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
130 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
131 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
132 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
d03e1b3c 133@@ -81,6 +82,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
be901f66
SR
134 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
135 void hmp_getfd(Monitor *mon, const QDict *qdict);
136 void hmp_closefd(Monitor *mon, const QDict *qdict);
137+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
138+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
139+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
140+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
141 void hmp_sendkey(Monitor *mon, const QDict *qdict);
d03e1b3c 142 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
83faa3fe 143 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
817b7667 144diff --git a/migration/meson.build b/migration/meson.build
5b15e2ec 145index 8cac83c06c..0842d00cd2 100644
817b7667
SR
146--- a/migration/meson.build
147+++ b/migration/meson.build
5b15e2ec 148@@ -24,6 +24,7 @@ softmmu_ss.add(files(
817b7667
SR
149 'multifd-zlib.c',
150 'postcopy-ram.c',
151 'savevm.c',
152+ 'savevm-async.c',
153 'socket.c',
154 'tls.c',
8dca018b 155 ), gnutls)
817b7667 156diff --git a/migration/savevm-async.c b/migration/savevm-async.c
95259824 157new file mode 100644
eee064d9 158index 0000000000..dc30558713
95259824 159--- /dev/null
817b7667 160+++ b/migration/savevm-async.c
eee064d9 161@@ -0,0 +1,538 @@
95259824 162+#include "qemu/osdep.h"
5b15e2ec 163+#include "migration/channel-savevm-async.h"
6838f038
WB
164+#include "migration/migration.h"
165+#include "migration/savevm.h"
166+#include "migration/snapshot.h"
167+#include "migration/global_state.h"
168+#include "migration/ram.h"
169+#include "migration/qemu-file.h"
95259824 170+#include "sysemu/sysemu.h"
6402d961 171+#include "sysemu/runstate.h"
95259824 172+#include "block/block.h"
95259824 173+#include "sysemu/block-backend.h"
53e83913
WB
174+#include "qapi/error.h"
175+#include "qapi/qmp/qerror.h"
176+#include "qapi/qmp/qdict.h"
177+#include "qapi/qapi-commands-migration.h"
178+#include "qapi/qapi-commands-misc.h"
0775f12b 179+#include "qapi/qapi-commands-block.h"
95259824 180+#include "qemu/cutils.h"
817b7667 181+#include "qemu/timer.h"
6402d961
TL
182+#include "qemu/main-loop.h"
183+#include "qemu/rcu.h"
95259824
WB
184+
185+/* #define DEBUG_SAVEVM_STATE */
186+
187+#ifdef DEBUG_SAVEVM_STATE
188+#define DPRINTF(fmt, ...) \
189+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
190+#else
191+#define DPRINTF(fmt, ...) \
192+ do { } while (0)
193+#endif
194+
195+enum {
196+ SAVE_STATE_DONE,
197+ SAVE_STATE_ERROR,
198+ SAVE_STATE_ACTIVE,
199+ SAVE_STATE_COMPLETED,
200+ SAVE_STATE_CANCELLED
201+};
202+
203+
204+static struct SnapshotState {
67af0fa4 205+ BlockBackend *target;
95259824
WB
206+ size_t bs_pos;
207+ int state;
208+ Error *error;
209+ Error *blocker;
210+ int saved_vm_running;
211+ QEMUFile *file;
212+ int64_t total_time;
d7f4e01a
TL
213+ QEMUBH *finalize_bh;
214+ Coroutine *co;
563c5928 215+ QemuCoSleep target_close_wait;
95259824
WB
216+} snap_state;
217+
817b7667
SR
218+static bool savevm_aborted(void)
219+{
220+ return snap_state.state == SAVE_STATE_CANCELLED ||
221+ snap_state.state == SAVE_STATE_ERROR;
222+}
223+
95259824
WB
224+SaveVMInfo *qmp_query_savevm(Error **errp)
225+{
226+ SaveVMInfo *info = g_malloc0(sizeof(*info));
227+ struct SnapshotState *s = &snap_state;
228+
229+ if (s->state != SAVE_STATE_DONE) {
230+ info->has_bytes = true;
231+ info->bytes = s->bs_pos;
232+ switch (s->state) {
233+ case SAVE_STATE_ERROR:
234+ info->has_status = true;
235+ info->status = g_strdup("failed");
236+ info->has_total_time = true;
237+ info->total_time = s->total_time;
238+ if (s->error) {
239+ info->has_error = true;
240+ info->error = g_strdup(error_get_pretty(s->error));
241+ }
242+ break;
243+ case SAVE_STATE_ACTIVE:
244+ info->has_status = true;
245+ info->status = g_strdup("active");
246+ info->has_total_time = true;
247+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
248+ - s->total_time;
249+ break;
250+ case SAVE_STATE_COMPLETED:
251+ info->has_status = true;
252+ info->status = g_strdup("completed");
253+ info->has_total_time = true;
254+ info->total_time = s->total_time;
255+ break;
256+ }
257+ }
258+
259+ return info;
260+}
261+
262+static int save_snapshot_cleanup(void)
263+{
264+ int ret = 0;
265+
266+ DPRINTF("save_snapshot_cleanup\n");
267+
268+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
269+ snap_state.total_time;
270+
271+ if (snap_state.file) {
272+ ret = qemu_fclose(snap_state.file);
5b15e2ec 273+ snap_state.file = NULL;
95259824
WB
274+ }
275+
67af0fa4 276+ if (snap_state.target) {
817b7667
SR
277+ if (!savevm_aborted()) {
278+ /* try to truncate, but ignore errors (will fail on block devices).
279+ * note1: bdrv_read() need whole blocks, so we need to round up
280+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
281+ */
282+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
283+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
284+ }
67af0fa4 285+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
95259824
WB
286+ error_free(snap_state.blocker);
287+ snap_state.blocker = NULL;
67af0fa4
WB
288+ blk_unref(snap_state.target);
289+ snap_state.target = NULL;
817b7667 290+
563c5928 291+ qemu_co_sleep_wake(&snap_state.target_close_wait);
95259824
WB
292+ }
293+
294+ return ret;
295+}
296+
297+static void save_snapshot_error(const char *fmt, ...)
298+{
299+ va_list ap;
300+ char *msg;
301+
302+ va_start(ap, fmt);
303+ msg = g_strdup_vprintf(fmt, ap);
304+ va_end(ap);
305+
306+ DPRINTF("save_snapshot_error: %s\n", msg);
307+
308+ if (!snap_state.error) {
309+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
310+ }
311+
312+ g_free (msg);
313+
314+ snap_state.state = SAVE_STATE_ERROR;
95259824
WB
315+}
316+
d7f4e01a 317+static void process_savevm_finalize(void *opaque)
0775f12b
WB
318+{
319+ int ret;
d7f4e01a
TL
320+ AioContext *iohandler_ctx = iohandler_get_aio_context();
321+ MigrationState *ms = migrate_get_current();
322+
817b7667
SR
323+ bool aborted = savevm_aborted();
324+
d7f4e01a
TL
325+#ifdef DEBUG_SAVEVM_STATE
326+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
327+#endif
328+
329+ qemu_bh_delete(snap_state.finalize_bh);
330+ snap_state.finalize_bh = NULL;
331+ snap_state.co = NULL;
332+
333+ /* We need to own the target bdrv's context for the following functions,
334+ * so move it back. It can stay in the main context and live out its live
335+ * there, since we're done with it after this method ends anyway.
336+ */
337+ aio_context_acquire(iohandler_ctx);
338+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
339+ aio_context_release(iohandler_ctx);
340+
341+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
342+ if (ret < 0) {
343+ save_snapshot_error("vm_stop_force_state error %d", ret);
344+ }
345+
817b7667
SR
346+ if (!aborted) {
347+ /* skip state saving if we aborted, snapshot will be invalid anyway */
348+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
349+ ret = qemu_file_get_error(snap_state.file);
350+ if (ret < 0) {
a0208150 351+ save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
817b7667 352+ }
d7f4e01a
TL
353+ }
354+
355+ DPRINTF("state saving complete\n");
356+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
357+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
358+
359+ /* clear migration state */
360+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
817b7667 361+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
d7f4e01a
TL
362+ ms->to_dst_file = NULL;
363+
364+ qemu_savevm_state_cleanup();
365+
0775f12b
WB
366+ ret = save_snapshot_cleanup();
367+ if (ret < 0) {
368+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
369+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
370+ snap_state.state = SAVE_STATE_COMPLETED;
817b7667 371+ } else if (aborted) {
1976ca46
FE
372+ /*
373+ * If there was an error, there's no need to set a new one here.
374+ * If the snapshot was canceled, leave setting the state to
375+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
376+ */
0775f12b
WB
377+ } else {
378+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
379+ snap_state.state);
95259824 380+ }
0775f12b
WB
381+ if (snap_state.saved_vm_running) {
382+ vm_start();
383+ snap_state.saved_vm_running = false;
95259824 384+ }
d7f4e01a
TL
385+
386+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
387+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
95259824
WB
388+}
389+
d7f4e01a 390+static void coroutine_fn process_savevm_co(void *opaque)
95259824
WB
391+{
392+ int ret;
393+ int64_t maxlen;
d7f4e01a
TL
394+ BdrvNextIterator it;
395+ BlockDriverState *bs = NULL;
95259824 396+
d7f4e01a
TL
397+#ifdef DEBUG_SAVEVM_STATE
398+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
399+#endif
95259824 400+
6838f038 401+ ret = qemu_file_get_error(snap_state.file);
95259824 402+ if (ret < 0) {
6838f038 403+ save_snapshot_error("qemu_savevm_state_setup failed");
d7f4e01a 404+ return;
95259824
WB
405+ }
406+
407+ while (snap_state.state == SAVE_STATE_ACTIVE) {
0775f12b 408+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
95259824 409+
e9b36665
SR
410+ /* pending is expected to be called without iothread lock */
411+ qemu_mutex_unlock_iothread();
0775f12b 412+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
e9b36665
SR
413+ qemu_mutex_lock_iothread();
414+
0775f12b 415+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
95259824 416+
eee064d9
FE
417+ /*
418+ * A guest reaching this cutoff is dirtying lots of RAM. It should be
419+ * large enough so that the guest can't dirty this much between the
420+ * check and the guest actually being stopped, but it should be small
421+ * enough to avoid long downtimes for non-hibernation snapshots.
422+ */
423+ maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
0775f12b 424+
8051a24b
FE
425+ /* Note that there is no progress for pend_postcopy when iterating */
426+ if (pending_size - pend_postcopy > 400000 && snap_state.bs_pos + pending_size < maxlen) {
0775f12b
WB
427+ ret = qemu_savevm_state_iterate(snap_state.file, false);
428+ if (ret < 0) {
429+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
430+ break;
431+ }
d7f4e01a 432+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
95259824 433+ } else {
b855dce7 434+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
0775f12b
WB
435+ ret = global_state_store();
436+ if (ret) {
437+ save_snapshot_error("global_state_store error %d", ret);
95259824 438+ break;
0775f12b 439+ }
d7f4e01a
TL
440+
441+ DPRINTF("savevm iterate complete\n");
95259824
WB
442+ break;
443+ }
95259824
WB
444+ }
445+
d7f4e01a
TL
446+ DPRINTF("timing: process_savevm_co took %ld ms\n",
447+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
448+
449+#ifdef DEBUG_SAVEVM_STATE
450+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
451+#endif
452+ /* If a drive runs in an IOThread we can flush it async, and only
453+ * need to sync-flush whatever IO happens between now and
454+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
455+ * so move there now and after every flush.
456+ */
457+ aio_co_reschedule_self(qemu_get_aio_context());
458+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
459+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
460+ if (bs == blk_bs(snap_state.target)) {
461+ continue;
462+ }
463+
464+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
465+ if (bs_ctx != qemu_get_aio_context()) {
466+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
467+ aio_co_reschedule_self(bs_ctx);
468+ bdrv_flush(bs);
469+ aio_co_reschedule_self(qemu_get_aio_context());
470+ }
471+ }
472+
473+ DPRINTF("timing: async flushing took %ld ms\n",
474+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
95259824 475+
d7f4e01a 476+ qemu_bh_schedule(snap_state.finalize_bh);
95259824
WB
477+}
478+
95259824
WB
479+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
480+{
95259824 481+ Error *local_err = NULL;
d7f4e01a
TL
482+ MigrationState *ms = migrate_get_current();
483+ AioContext *iohandler_ctx = iohandler_get_aio_context();
95259824 484+
67af0fa4 485+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
95259824
WB
486+
487+ if (snap_state.state != SAVE_STATE_DONE) {
488+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
489+ "VM snapshot already started\n");
490+ return;
491+ }
492+
d7f4e01a
TL
493+ if (migration_is_running(ms->state)) {
494+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
495+ return;
496+ }
497+
498+ if (migrate_use_block()) {
499+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
500+ "Block migration and snapshots are incompatible");
501+ return;
502+ }
503+
95259824
WB
504+ /* initialize snapshot info */
505+ snap_state.saved_vm_running = runstate_is_running();
506+ snap_state.bs_pos = 0;
507+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
508+ snap_state.blocker = NULL;
a262e964 509+ snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
95259824
WB
510+
511+ if (snap_state.error) {
512+ error_free(snap_state.error);
513+ snap_state.error = NULL;
514+ }
515+
516+ if (!has_statefile) {
517+ vm_stop(RUN_STATE_SAVE_VM);
518+ snap_state.state = SAVE_STATE_COMPLETED;
519+ return;
520+ }
521+
522+ if (qemu_savevm_state_blocked(errp)) {
523+ return;
524+ }
525+
526+ /* Open the image */
95259824
WB
527+ QDict *options = NULL;
528+ options = qdict_new();
53e83913 529+ qdict_put_str(options, "driver", "raw");
67af0fa4
WB
530+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
531+ if (!snap_state.target) {
95259824
WB
532+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
533+ goto restart;
534+ }
535+
5b15e2ec
FE
536+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
537+ &snap_state.bs_pos));
538+ snap_state.file = qemu_file_new_output(ioc);
95259824
WB
539+
540+ if (!snap_state.file) {
541+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
542+ goto restart;
543+ }
544+
d7f4e01a
TL
545+ /*
546+ * qemu_savevm_* paths use migration code and expect a migration state.
547+ * State is cleared in process_savevm_co, but has to be initialized
548+ * here (blocking main thread, from QMP) to avoid race conditions.
549+ */
550+ migrate_init(ms);
551+ memset(&ram_counters, 0, sizeof(ram_counters));
552+ ms->to_dst_file = snap_state.file;
95259824
WB
553+
554+ error_setg(&snap_state.blocker, "block device is in use by savevm");
67af0fa4 555+ blk_op_block_all(snap_state.target, snap_state.blocker);
95259824 556+
0775f12b 557+ snap_state.state = SAVE_STATE_ACTIVE;
d7f4e01a
TL
558+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
559+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
560+ qemu_mutex_unlock_iothread();
561+ qemu_savevm_state_header(snap_state.file);
562+ qemu_savevm_state_setup(snap_state.file);
563+ qemu_mutex_lock_iothread();
564+
565+ /* Async processing from here on out happens in iohandler context, so let
566+ * the target bdrv have its home there.
567+ */
568+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
569+
570+ aio_co_schedule(iohandler_ctx, snap_state.co);
95259824
WB
571+
572+ return;
573+
574+restart:
575+
576+ save_snapshot_error("setup failed");
577+
578+ if (snap_state.saved_vm_running) {
579+ vm_start();
817b7667 580+ snap_state.saved_vm_running = false;
95259824
WB
581+ }
582+}
583+
817b7667 584+void coroutine_fn qmp_savevm_end(Error **errp)
95259824 585+{
817b7667
SR
586+ int64_t timeout;
587+
95259824
WB
588+ if (snap_state.state == SAVE_STATE_DONE) {
589+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
590+ "VM snapshot not started\n");
591+ return;
592+ }
593+
594+ if (snap_state.state == SAVE_STATE_ACTIVE) {
595+ snap_state.state = SAVE_STATE_CANCELLED;
817b7667 596+ goto wait_for_close;
95259824
WB
597+ }
598+
599+ if (snap_state.saved_vm_running) {
600+ vm_start();
817b7667 601+ snap_state.saved_vm_running = false;
95259824
WB
602+ }
603+
604+ snap_state.state = SAVE_STATE_DONE;
817b7667
SR
605+
606+wait_for_close:
607+ if (!snap_state.target) {
608+ DPRINTF("savevm-end: no target file open\n");
609+ return;
610+ }
611+
612+ /* wait until cleanup is done before returning, this ensures that after this
613+ * call exits the statefile will be closed and can be removed immediately */
614+ DPRINTF("savevm-end: waiting for cleanup\n");
615+ timeout = 30L * 1000 * 1000 * 1000;
563c5928 616+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
f376b2b9 617+ QEMU_CLOCK_REALTIME, timeout);
817b7667
SR
618+ if (snap_state.target) {
619+ save_snapshot_error("timeout waiting for target file close in "
620+ "qmp_savevm_end");
621+ /* we cannot assume the snapshot finished in this case, so leave the
622+ * state alone - caller has to figure something out */
623+ return;
624+ }
625+
1976ca46
FE
626+ // File closed and no other error, so ensure next snapshot can be started.
627+ if (snap_state.state != SAVE_STATE_ERROR) {
628+ snap_state.state = SAVE_STATE_DONE;
629+ }
630+
817b7667 631+ DPRINTF("savevm-end: cleanup done\n");
95259824
WB
632+}
633+
0775f12b 634+// FIXME: Deprecated
95259824
WB
635+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
636+{
0775f12b
WB
637+ // Compatibility to older qemu-server.
638+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
95259824
WB
639+}
640+
0775f12b 641+// FIXME: Deprecated
95259824
WB
642+void qmp_delete_drive_snapshot(const char *device, const char *name,
643+ Error **errp)
644+{
0775f12b
WB
645+ // Compatibility to older qemu-server.
646+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
647+ true, name, errp);
95259824
WB
648+}
649+
6838f038 650+int load_snapshot_from_blockdev(const char *filename, Error **errp)
95259824 651+{
67af0fa4 652+ BlockBackend *be;
95259824
WB
653+ Error *local_err = NULL;
654+ Error *blocker = NULL;
655+
656+ QEMUFile *f;
5b15e2ec 657+ size_t bs_pos = 0;
67af0fa4 658+ int ret = -EINVAL;
95259824 659+
67af0fa4 660+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
95259824 661+
67af0fa4 662+ if (!be) {
6838f038 663+ error_setg(errp, "Could not open VM state file");
95259824
WB
664+ goto the_end;
665+ }
666+
67af0fa4
WB
667+ error_setg(&blocker, "block device is in use by load state");
668+ blk_op_block_all(be, blocker);
669+
95259824 670+ /* restore the VM state */
5b15e2ec 671+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
95259824 672+ if (!f) {
6838f038 673+ error_setg(errp, "Could not open VM state file");
95259824
WB
674+ goto the_end;
675+ }
676+
6838f038 677+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
95259824
WB
678+ ret = qemu_loadvm_state(f);
679+
e9b36665
SR
680+ /* dirty bitmap migration has a special case we need to trigger manually */
681+ dirty_bitmap_mig_before_vm_start();
682+
95259824
WB
683+ qemu_fclose(f);
684+ migration_incoming_state_destroy();
685+ if (ret < 0) {
6838f038 686+ error_setg_errno(errp, -ret, "Error while loading VM state");
95259824
WB
687+ goto the_end;
688+ }
689+
690+ ret = 0;
691+
692+ the_end:
67af0fa4
WB
693+ if (be) {
694+ blk_op_unblock_all(be, blocker);
95259824 695+ error_free(blocker);
67af0fa4 696+ blk_unref(be);
95259824
WB
697+ }
698+ return ret;
699+}
817b7667 700diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
d03e1b3c 701index 480b798963..cfebfd1db5 100644
817b7667
SR
702--- a/monitor/hmp-cmds.c
703+++ b/monitor/hmp-cmds.c
d03e1b3c 704@@ -1906,6 +1906,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
817b7667
SR
705 hmp_handle_error(mon, err);
706 }
707
708+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
709+{
710+ Error *errp = NULL;
711+ const char *statefile = qdict_get_try_str(qdict, "statefile");
712+
713+ qmp_savevm_start(statefile != NULL, statefile, &errp);
714+ hmp_handle_error(mon, errp);
715+}
716+
717+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
718+{
719+ Error *errp = NULL;
720+ const char *name = qdict_get_str(qdict, "name");
721+ const char *device = qdict_get_str(qdict, "device");
722+
723+ qmp_snapshot_drive(device, name, &errp);
724+ hmp_handle_error(mon, errp);
725+}
726+
727+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
728+{
729+ Error *errp = NULL;
730+ const char *name = qdict_get_str(qdict, "name");
731+ const char *device = qdict_get_str(qdict, "device");
732+
733+ qmp_delete_drive_snapshot(device, name, &errp);
734+ hmp_handle_error(mon, errp);
735+}
736+
737+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
738+{
739+ Error *errp = NULL;
740+
741+ qmp_savevm_end(&errp);
742+ hmp_handle_error(mon, errp);
743+}
744+
745+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
746+{
747+ SaveVMInfo *info;
748+ info = qmp_query_savevm(NULL);
749+
750+ if (info->has_status) {
751+ monitor_printf(mon, "savevm status: %s\n", info->status);
752+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
753+ info->total_time);
754+ } else {
755+ monitor_printf(mon, "savevm status: not running\n");
756+ }
757+ if (info->has_bytes) {
758+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
759+ }
760+ if (info->has_error) {
761+ monitor_printf(mon, "Error: %s\n", info->error);
762+ }
763+}
764+
765 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
766 {
767 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
768diff --git a/qapi/migration.json b/qapi/migration.json
d03e1b3c 769index 88ecf86ac8..4435866379 100644
817b7667
SR
770--- a/qapi/migration.json
771+++ b/qapi/migration.json
5b15e2ec 772@@ -261,6 +261,40 @@
817b7667
SR
773 '*compression': 'CompressionStats',
774 '*socket-address': ['SocketAddress'] } }
775
776+##
777+# @SaveVMInfo:
778+#
779+# Information about current migration process.
780+#
781+# @status: string describing the current savevm status.
782+# This can be 'active', 'completed', 'failed'.
783+# If this field is not returned, no savevm process
784+# has been initiated
785+#
786+# @error: string containing error message is status is failed.
787+#
788+# @total-time: total amount of milliseconds since savevm started.
789+# If savevm has ended, it returns the total save time
790+#
791+# @bytes: total amount of data transfered
792+#
793+# Since: 1.3
794+##
795+{ 'struct': 'SaveVMInfo',
796+ 'data': {'*status': 'str', '*error': 'str',
797+ '*total-time': 'int', '*bytes': 'int'} }
798+
799+##
800+# @query-savevm:
801+#
802+# Returns information about current savevm process.
803+#
804+# Returns: @SaveVMInfo
805+#
806+# Since: 1.3
807+##
808+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
809+
810 ##
811 # @query-migrate:
812 #
813diff --git a/qapi/misc.json b/qapi/misc.json
5b15e2ec 814index 27ef5a2b20..b3ce75dcae 100644
817b7667
SR
815--- a/qapi/misc.json
816+++ b/qapi/misc.json
4567474e 817@@ -435,6 +435,38 @@
817b7667
SR
818 ##
819 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
820
821+##
822+# @savevm-start:
823+#
824+# Prepare for snapshot and halt VM. Save VM state to statefile.
825+#
826+##
827+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
828+
829+##
830+# @snapshot-drive:
831+#
832+# Create an internal drive snapshot.
833+#
834+##
835+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
836+
837+##
838+# @delete-drive-snapshot:
839+#
840+# Delete a drive snapshot.
841+#
842+##
843+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
844+
845+##
846+# @savevm-end:
847+#
848+# Resume VM after a snapshot.
849+#
850+##
851+{ 'command': 'savevm-end', 'coroutine': true }
852+
853 ##
854 # @CommandLineParameterType:
855 #
856diff --git a/qemu-options.hx b/qemu-options.hx
d03e1b3c 857index 7f99d15b23..54efb127c4 100644
817b7667
SR
858--- a/qemu-options.hx
859+++ b/qemu-options.hx
d03e1b3c 860@@ -4391,6 +4391,18 @@ SRST
817b7667
SR
861 Start right away with a saved state (``loadvm`` in monitor)
862 ERST
863
864+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
865+ "-loadstate file\n" \
866+ " start right away with a saved state\n",
867+ QEMU_ARCH_ALL)
868+SRST
869+``-loadstate file``
870+ Start right away with a saved state. This option does not rollback
871+ disk state like @code{loadvm}, so user must make sure that disk
872+ have correct state. @var{file} can be any valid device URL. See the section
873+ for "Device URL Syntax" for more information.
874+ERST
875+
876 #ifndef _WIN32
877 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
878 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
83faa3fe 879diff --git a/softmmu/vl.c b/softmmu/vl.c
d03e1b3c 880index 5f7f6ca981..21f067d115 100644
83faa3fe
TL
881--- a/softmmu/vl.c
882+++ b/softmmu/vl.c
d03e1b3c 883@@ -164,6 +164,7 @@ static const char *accelerators;
5b15e2ec
FE
884 static bool have_custom_ram_size;
885 static const char *ram_memdev_id;
f376b2b9 886 static QDict *machine_opts_dict;
8dca018b
SR
887+static const char *loadstate;
888 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
4567474e 889 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
5b15e2ec 890 static int display_remote;
d03e1b3c 891@@ -2607,6 +2608,12 @@ void qmp_x_exit_preconfig(Error **errp)
4567474e
FE
892
893 if (loadvm) {
894 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
95259824 895+ } else if (loadstate) {
6838f038
WB
896+ Error *local_err = NULL;
897+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
898+ error_report_err(local_err);
95259824
WB
899+ autostart = 0;
900+ }
901 }
b855dce7
TL
902 if (replay_mode != REPLAY_MODE_NONE) {
903 replay_vmstate_init();
d03e1b3c 904@@ -3151,6 +3158,9 @@ void qemu_init(int argc, char **argv)
8dca018b
SR
905 case QEMU_OPTION_loadvm:
906 loadvm = optarg;
907 break;
908+ case QEMU_OPTION_loadstate:
909+ loadstate = optarg;
910+ break;
911 case QEMU_OPTION_full_screen:
912 dpy.has_full_screen = true;
913 dpy.full_screen = true;