]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
backup: improve error when copy-before-write fails for fleecing
[pve-qemu.git] / debian / patches / pve / 0017-PVE-add-savevm-async-for-background-state-snapshots.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 3Date: Mon, 6 Apr 2020 12:16:46 +0200
817b7667 4Subject: [PATCH] PVE: add savevm-async for background state snapshots
95259824 5
d7f4e01a
TL
6Put qemu_savevm_state_{header,setup} into the main loop and the rest
7of the iteration into a coroutine. The former need to lock the
8iothread (and we can't unlock it in the coroutine), and the latter
9can't deal with being in a separate thread, so a coroutine it must
10be.
11
817b7667
SR
12Truncate output file at 1024 boundary.
13
14Do not block the VM and save the state on aborting a snapshot, as the
15snapshot will be invalid anyway.
16
17Also, when aborting, wait for the target file to be closed, otherwise a
18client might run into race-conditions when trying to remove the file
19still opened by QEMU.
20
b855dce7 21Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6402d961 22Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 23Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
db5d2a4b
FE
24[SR: improve aborting
25 register yank before migration_incoming_state_destroy]
817b7667 26Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
5b15e2ec 27[FE: further improve aborting
8051a24b 28 adapt to removal of QEMUFileOps
bf251437 29 improve condition for entering final stage
f1eed34a 30 adapt to QAPI and other changes for 8.2]
563c5928 31Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
95259824 32---
b855dce7 33 hmp-commands-info.hx | 13 +
99f9ce2c 34 hmp-commands.hx | 17 ++
8dca018b 35 include/migration/snapshot.h | 2 +
99f9ce2c 36 include/monitor/hmp.h | 3 +
817b7667 37 migration/meson.build | 1 +
4fbd50e2 38 migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
99f9ce2c 39 monitor/hmp-cmds.c | 38 +++
5b15e2ec 40 qapi/migration.json | 34 +++
4fbd50e2 41 qapi/misc.json | 18 ++
83faa3fe 42 qemu-options.hx | 12 +
f1eed34a 43 system/vl.c | 10 +
4fbd50e2 44 11 files changed, 679 insertions(+)
817b7667 45 create mode 100644 migration/savevm-async.c
95259824 46
95259824 47diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
4fbd50e2 48index ad1b1306e3..d5ab880492 100644
95259824
WB
49--- a/hmp-commands-info.hx
50+++ b/hmp-commands-info.hx
10e10933 51@@ -525,6 +525,19 @@ SRST
8dca018b 52 Show current migration parameters.
83faa3fe
TL
53 ERST
54
b855dce7 55+ {
95259824
WB
56+ .name = "savevm",
57+ .args_type = "",
58+ .params = "",
59+ .help = "show savevm status",
a544966d 60+ .cmd = hmp_info_savevm,
95259824
WB
61+ },
62+
83faa3fe
TL
63+SRST
64+ ``info savevm``
65+ Show savevm status.
66+ERST
67+
b855dce7 68 {
83faa3fe
TL
69 .name = "balloon",
70 .args_type = "",
95259824 71diff --git a/hmp-commands.hx b/hmp-commands.hx
4fbd50e2 72index 2e2a3bcf98..7506de251c 100644
95259824
WB
73--- a/hmp-commands.hx
74+++ b/hmp-commands.hx
4fbd50e2 75@@ -1862,3 +1862,20 @@ SRST
bf251437 76 List event channels in the guest
d03e1b3c
FE
77 ERST
78 #endif
95259824
WB
79+
80+ {
81+ .name = "savevm-start",
82+ .args_type = "statefile:s?",
83+ .params = "[statefile]",
84+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
a544966d 85+ .cmd = hmp_savevm_start,
95259824
WB
86+ },
87+
88+ {
95259824
WB
89+ .name = "savevm-end",
90+ .args_type = "",
91+ .params = "",
92+ .help = "Resume VM after snaphot.",
817b7667
SR
93+ .cmd = hmp_savevm_end,
94+ .coroutine = true,
95259824 95+ },
be901f66 96diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
4fbd50e2 97index 9e4dcaaa75..2581730d74 100644
be901f66
SR
98--- a/include/migration/snapshot.h
99+++ b/include/migration/snapshot.h
4fbd50e2
FE
100@@ -68,4 +68,6 @@ bool delete_snapshot(const char *name,
101 */
102 void load_snapshot_resume(RunState state);
be901f66 103
be901f66 104+int load_snapshot_from_blockdev(const char *filename, Error **errp);
8dca018b 105+
be901f66
SR
106 #endif
107diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
10e10933 108index 13f9a2dedb..7a7def7530 100644
be901f66
SR
109--- a/include/monitor/hmp.h
110+++ b/include/monitor/hmp.h
bf251437 111@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
be901f66
SR
112 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
113 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
114 void hmp_info_mice(Monitor *mon, const QDict *qdict);
115+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
116 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
117 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
118 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
99f9ce2c 119@@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
bf251437
FE
120 void hmp_mouse_move(Monitor *mon, const QDict *qdict);
121 void hmp_mouse_button(Monitor *mon, const QDict *qdict);
122 void hmp_mouse_set(Monitor *mon, const QDict *qdict);
be901f66 123+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
be901f66
SR
124+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
125 void hmp_sendkey(Monitor *mon, const QDict *qdict);
d03e1b3c 126 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
83faa3fe 127 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
817b7667 128diff --git a/migration/meson.build b/migration/meson.build
4fbd50e2 129index 95d1cf2250..800f12a60d 100644
817b7667
SR
130--- a/migration/meson.build
131+++ b/migration/meson.build
4fbd50e2 132@@ -28,6 +28,7 @@ system_ss.add(files(
10e10933 133 'options.c',
817b7667
SR
134 'postcopy-ram.c',
135 'savevm.c',
136+ 'savevm-async.c',
137 'socket.c',
138 'tls.c',
bf251437 139 'threadinfo.c',
817b7667 140diff --git a/migration/savevm-async.c b/migration/savevm-async.c
95259824 141new file mode 100644
4fbd50e2 142index 0000000000..779e4e2a78
95259824 143--- /dev/null
817b7667 144+++ b/migration/savevm-async.c
4fbd50e2 145@@ -0,0 +1,531 @@
95259824 146+#include "qemu/osdep.h"
5b15e2ec 147+#include "migration/channel-savevm-async.h"
6838f038 148+#include "migration/migration.h"
10e10933
FE
149+#include "migration/migration-stats.h"
150+#include "migration/options.h"
6838f038
WB
151+#include "migration/savevm.h"
152+#include "migration/snapshot.h"
153+#include "migration/global_state.h"
154+#include "migration/ram.h"
155+#include "migration/qemu-file.h"
95259824 156+#include "sysemu/sysemu.h"
6402d961 157+#include "sysemu/runstate.h"
95259824 158+#include "block/block.h"
95259824 159+#include "sysemu/block-backend.h"
53e83913
WB
160+#include "qapi/error.h"
161+#include "qapi/qmp/qerror.h"
162+#include "qapi/qmp/qdict.h"
163+#include "qapi/qapi-commands-migration.h"
164+#include "qapi/qapi-commands-misc.h"
0775f12b 165+#include "qapi/qapi-commands-block.h"
95259824 166+#include "qemu/cutils.h"
817b7667 167+#include "qemu/timer.h"
6402d961
TL
168+#include "qemu/main-loop.h"
169+#include "qemu/rcu.h"
db5d2a4b 170+#include "qemu/yank.h"
95259824
WB
171+
172+/* #define DEBUG_SAVEVM_STATE */
173+
174+#ifdef DEBUG_SAVEVM_STATE
175+#define DPRINTF(fmt, ...) \
176+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
177+#else
178+#define DPRINTF(fmt, ...) \
179+ do { } while (0)
180+#endif
181+
182+enum {
183+ SAVE_STATE_DONE,
184+ SAVE_STATE_ERROR,
185+ SAVE_STATE_ACTIVE,
186+ SAVE_STATE_COMPLETED,
187+ SAVE_STATE_CANCELLED
188+};
189+
190+
191+static struct SnapshotState {
67af0fa4 192+ BlockBackend *target;
95259824
WB
193+ size_t bs_pos;
194+ int state;
195+ Error *error;
196+ Error *blocker;
197+ int saved_vm_running;
198+ QEMUFile *file;
199+ int64_t total_time;
d7f4e01a
TL
200+ QEMUBH *finalize_bh;
201+ Coroutine *co;
563c5928 202+ QemuCoSleep target_close_wait;
95259824
WB
203+} snap_state;
204+
817b7667
SR
205+static bool savevm_aborted(void)
206+{
207+ return snap_state.state == SAVE_STATE_CANCELLED ||
208+ snap_state.state == SAVE_STATE_ERROR;
209+}
210+
95259824
WB
211+SaveVMInfo *qmp_query_savevm(Error **errp)
212+{
213+ SaveVMInfo *info = g_malloc0(sizeof(*info));
214+ struct SnapshotState *s = &snap_state;
215+
216+ if (s->state != SAVE_STATE_DONE) {
217+ info->has_bytes = true;
218+ info->bytes = s->bs_pos;
219+ switch (s->state) {
220+ case SAVE_STATE_ERROR:
95259824
WB
221+ info->status = g_strdup("failed");
222+ info->has_total_time = true;
223+ info->total_time = s->total_time;
224+ if (s->error) {
95259824
WB
225+ info->error = g_strdup(error_get_pretty(s->error));
226+ }
227+ break;
228+ case SAVE_STATE_ACTIVE:
95259824
WB
229+ info->status = g_strdup("active");
230+ info->has_total_time = true;
231+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
232+ - s->total_time;
233+ break;
234+ case SAVE_STATE_COMPLETED:
95259824
WB
235+ info->status = g_strdup("completed");
236+ info->has_total_time = true;
237+ info->total_time = s->total_time;
238+ break;
239+ }
240+ }
241+
242+ return info;
243+}
244+
245+static int save_snapshot_cleanup(void)
246+{
247+ int ret = 0;
248+
249+ DPRINTF("save_snapshot_cleanup\n");
250+
251+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
252+ snap_state.total_time;
253+
254+ if (snap_state.file) {
255+ ret = qemu_fclose(snap_state.file);
5b15e2ec 256+ snap_state.file = NULL;
95259824
WB
257+ }
258+
67af0fa4 259+ if (snap_state.target) {
817b7667
SR
260+ if (!savevm_aborted()) {
261+ /* try to truncate, but ignore errors (will fail on block devices).
262+ * note1: bdrv_read() need whole blocks, so we need to round up
263+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
264+ */
265+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
266+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
267+ }
67af0fa4 268+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
95259824
WB
269+ error_free(snap_state.blocker);
270+ snap_state.blocker = NULL;
67af0fa4
WB
271+ blk_unref(snap_state.target);
272+ snap_state.target = NULL;
817b7667 273+
563c5928 274+ qemu_co_sleep_wake(&snap_state.target_close_wait);
95259824
WB
275+ }
276+
277+ return ret;
278+}
279+
b8b4ce04 280+static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...)
95259824
WB
281+{
282+ va_list ap;
283+ char *msg;
284+
285+ va_start(ap, fmt);
286+ msg = g_strdup_vprintf(fmt, ap);
287+ va_end(ap);
288+
289+ DPRINTF("save_snapshot_error: %s\n", msg);
290+
291+ if (!snap_state.error) {
292+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
293+ }
294+
295+ g_free (msg);
296+
297+ snap_state.state = SAVE_STATE_ERROR;
95259824
WB
298+}
299+
d7f4e01a 300+static void process_savevm_finalize(void *opaque)
0775f12b
WB
301+{
302+ int ret;
d7f4e01a
TL
303+ MigrationState *ms = migrate_get_current();
304+
817b7667
SR
305+ bool aborted = savevm_aborted();
306+
d7f4e01a
TL
307+#ifdef DEBUG_SAVEVM_STATE
308+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
309+#endif
310+
311+ qemu_bh_delete(snap_state.finalize_bh);
312+ snap_state.finalize_bh = NULL;
313+ snap_state.co = NULL;
314+
315+ /* We need to own the target bdrv's context for the following functions,
316+ * so move it back. It can stay in the main context and live out its live
317+ * there, since we're done with it after this method ends anyway.
318+ */
d7f4e01a 319+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
d7f4e01a
TL
320+
321+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
322+ if (ret < 0) {
323+ save_snapshot_error("vm_stop_force_state error %d", ret);
324+ }
325+
817b7667
SR
326+ if (!aborted) {
327+ /* skip state saving if we aborted, snapshot will be invalid anyway */
328+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
329+ ret = qemu_file_get_error(snap_state.file);
330+ if (ret < 0) {
a0208150 331+ save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
817b7667 332+ }
d7f4e01a
TL
333+ }
334+
335+ DPRINTF("state saving complete\n");
336+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
337+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
338+
339+ /* clear migration state */
340+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
817b7667 341+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
d7f4e01a
TL
342+ ms->to_dst_file = NULL;
343+
344+ qemu_savevm_state_cleanup();
345+
0775f12b
WB
346+ ret = save_snapshot_cleanup();
347+ if (ret < 0) {
348+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
349+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
350+ snap_state.state = SAVE_STATE_COMPLETED;
817b7667 351+ } else if (aborted) {
1976ca46
FE
352+ /*
353+ * If there was an error, there's no need to set a new one here.
354+ * If the snapshot was canceled, leave setting the state to
355+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
356+ */
0775f12b
WB
357+ } else {
358+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
359+ snap_state.state);
95259824 360+ }
0775f12b
WB
361+ if (snap_state.saved_vm_running) {
362+ vm_start();
363+ snap_state.saved_vm_running = false;
95259824 364+ }
d7f4e01a
TL
365+
366+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
367+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
95259824
WB
368+}
369+
d7f4e01a 370+static void coroutine_fn process_savevm_co(void *opaque)
95259824
WB
371+{
372+ int ret;
373+ int64_t maxlen;
d7f4e01a
TL
374+ BdrvNextIterator it;
375+ BlockDriverState *bs = NULL;
95259824 376+
d7f4e01a
TL
377+#ifdef DEBUG_SAVEVM_STATE
378+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
379+#endif
95259824 380+
6838f038 381+ ret = qemu_file_get_error(snap_state.file);
95259824 382+ if (ret < 0) {
6838f038 383+ save_snapshot_error("qemu_savevm_state_setup failed");
d7f4e01a 384+ return;
95259824
WB
385+ }
386+
387+ while (snap_state.state == SAVE_STATE_ACTIVE) {
bf251437 388+ uint64_t pending_size, pend_precopy, pend_postcopy;
db5d2a4b 389+ uint64_t threshold = 400 * 1000;
95259824 390+
db5d2a4b
FE
391+ /*
392+ * pending_{estimate,exact} are expected to be called without iothread
393+ * lock. Similar to what is done in migration.c, call the exact variant
394+ * only once pend_precopy in the estimate is below the threshold.
395+ */
4fbd50e2 396+ bql_unlock();
db5d2a4b
FE
397+ qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
398+ if (pend_precopy <= threshold) {
399+ qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
400+ }
4fbd50e2 401+ bql_lock();
bf251437 402+ pending_size = pend_precopy + pend_postcopy;
95259824 403+
eee064d9
FE
404+ /*
405+ * A guest reaching this cutoff is dirtying lots of RAM. It should be
406+ * large enough so that the guest can't dirty this much between the
407+ * check and the guest actually being stopped, but it should be small
408+ * enough to avoid long downtimes for non-hibernation snapshots.
409+ */
410+ maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
0775f12b 411+
8051a24b 412+ /* Note that there is no progress for pend_postcopy when iterating */
db5d2a4b 413+ if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
0775f12b
WB
414+ ret = qemu_savevm_state_iterate(snap_state.file, false);
415+ if (ret < 0) {
416+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
417+ break;
418+ }
d7f4e01a 419+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
95259824 420+ } else {
b855dce7 421+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
10e10933 422+ global_state_store();
d7f4e01a
TL
423+
424+ DPRINTF("savevm iterate complete\n");
95259824
WB
425+ break;
426+ }
95259824
WB
427+ }
428+
d7f4e01a
TL
429+ DPRINTF("timing: process_savevm_co took %ld ms\n",
430+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
431+
432+#ifdef DEBUG_SAVEVM_STATE
433+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
434+#endif
435+ /* If a drive runs in an IOThread we can flush it async, and only
436+ * need to sync-flush whatever IO happens between now and
437+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
438+ * so move there now and after every flush.
439+ */
440+ aio_co_reschedule_self(qemu_get_aio_context());
f1eed34a
FE
441+ bdrv_graph_co_rdlock();
442+ bs = bdrv_first(&it);
443+ bdrv_graph_co_rdunlock();
444+ while (bs) {
d7f4e01a 445+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
f1eed34a
FE
446+ if (bs != blk_bs(snap_state.target)) {
447+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
448+ if (bs_ctx != qemu_get_aio_context()) {
449+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
450+ aio_co_reschedule_self(bs_ctx);
451+ bdrv_graph_co_rdlock();
452+ bdrv_flush(bs);
453+ bdrv_graph_co_rdunlock();
454+ aio_co_reschedule_self(qemu_get_aio_context());
455+ }
d7f4e01a 456+ }
f1eed34a
FE
457+ bdrv_graph_co_rdlock();
458+ bs = bdrv_next(&it);
459+ bdrv_graph_co_rdunlock();
d7f4e01a
TL
460+ }
461+
462+ DPRINTF("timing: async flushing took %ld ms\n",
463+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
95259824 464+
d7f4e01a 465+ qemu_bh_schedule(snap_state.finalize_bh);
95259824
WB
466+}
467+
bf251437 468+void qmp_savevm_start(const char *statefile, Error **errp)
95259824 469+{
95259824 470+ Error *local_err = NULL;
d7f4e01a
TL
471+ MigrationState *ms = migrate_get_current();
472+ AioContext *iohandler_ctx = iohandler_get_aio_context();
95259824 473+
67af0fa4 474+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
95259824
WB
475+
476+ if (snap_state.state != SAVE_STATE_DONE) {
477+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
478+ "VM snapshot already started\n");
479+ return;
480+ }
481+
4fbd50e2 482+ if (migration_is_running()) {
d7f4e01a
TL
483+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
484+ return;
485+ }
486+
10e10933 487+ if (migrate_block()) {
d7f4e01a
TL
488+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
489+ "Block migration and snapshots are incompatible");
490+ return;
491+ }
492+
95259824
WB
493+ /* initialize snapshot info */
494+ snap_state.saved_vm_running = runstate_is_running();
495+ snap_state.bs_pos = 0;
496+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
497+ snap_state.blocker = NULL;
a262e964 498+ snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
95259824
WB
499+
500+ if (snap_state.error) {
501+ error_free(snap_state.error);
502+ snap_state.error = NULL;
503+ }
504+
bf251437 505+ if (!statefile) {
95259824
WB
506+ vm_stop(RUN_STATE_SAVE_VM);
507+ snap_state.state = SAVE_STATE_COMPLETED;
508+ return;
509+ }
510+
511+ if (qemu_savevm_state_blocked(errp)) {
512+ return;
513+ }
514+
515+ /* Open the image */
95259824
WB
516+ QDict *options = NULL;
517+ options = qdict_new();
53e83913 518+ qdict_put_str(options, "driver", "raw");
67af0fa4
WB
519+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
520+ if (!snap_state.target) {
95259824
WB
521+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
522+ goto restart;
523+ }
524+
5b15e2ec
FE
525+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
526+ &snap_state.bs_pos));
527+ snap_state.file = qemu_file_new_output(ioc);
95259824
WB
528+
529+ if (!snap_state.file) {
530+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
531+ goto restart;
532+ }
533+
d7f4e01a
TL
534+ /*
535+ * qemu_savevm_* paths use migration code and expect a migration state.
536+ * State is cleared in process_savevm_co, but has to be initialized
537+ * here (blocking main thread, from QMP) to avoid race conditions.
538+ */
f1eed34a
FE
539+ if (migrate_init(ms, errp)) {
540+ return;
541+ }
10e10933 542+ memset(&mig_stats, 0, sizeof(mig_stats));
d7f4e01a 543+ ms->to_dst_file = snap_state.file;
95259824
WB
544+
545+ error_setg(&snap_state.blocker, "block device is in use by savevm");
67af0fa4 546+ blk_op_block_all(snap_state.target, snap_state.blocker);
95259824 547+
0775f12b 548+ snap_state.state = SAVE_STATE_ACTIVE;
d7f4e01a
TL
549+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
550+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
d7f4e01a
TL
551+ qemu_savevm_state_header(snap_state.file);
552+ qemu_savevm_state_setup(snap_state.file);
d7f4e01a
TL
553+
554+ /* Async processing from here on out happens in iohandler context, so let
555+ * the target bdrv have its home there.
556+ */
557+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
558+
559+ aio_co_schedule(iohandler_ctx, snap_state.co);
95259824
WB
560+
561+ return;
562+
563+restart:
564+
565+ save_snapshot_error("setup failed");
566+
567+ if (snap_state.saved_vm_running) {
568+ vm_start();
817b7667 569+ snap_state.saved_vm_running = false;
95259824
WB
570+ }
571+}
572+
817b7667 573+void coroutine_fn qmp_savevm_end(Error **errp)
95259824 574+{
817b7667
SR
575+ int64_t timeout;
576+
95259824
WB
577+ if (snap_state.state == SAVE_STATE_DONE) {
578+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
579+ "VM snapshot not started\n");
580+ return;
581+ }
582+
583+ if (snap_state.state == SAVE_STATE_ACTIVE) {
584+ snap_state.state = SAVE_STATE_CANCELLED;
817b7667 585+ goto wait_for_close;
95259824
WB
586+ }
587+
588+ if (snap_state.saved_vm_running) {
589+ vm_start();
817b7667 590+ snap_state.saved_vm_running = false;
95259824
WB
591+ }
592+
593+ snap_state.state = SAVE_STATE_DONE;
817b7667
SR
594+
595+wait_for_close:
596+ if (!snap_state.target) {
597+ DPRINTF("savevm-end: no target file open\n");
598+ return;
599+ }
600+
601+ /* wait until cleanup is done before returning, this ensures that after this
602+ * call exits the statefile will be closed and can be removed immediately */
603+ DPRINTF("savevm-end: waiting for cleanup\n");
604+ timeout = 30L * 1000 * 1000 * 1000;
563c5928 605+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
f376b2b9 606+ QEMU_CLOCK_REALTIME, timeout);
817b7667
SR
607+ if (snap_state.target) {
608+ save_snapshot_error("timeout waiting for target file close in "
609+ "qmp_savevm_end");
610+ /* we cannot assume the snapshot finished in this case, so leave the
611+ * state alone - caller has to figure something out */
612+ return;
613+ }
614+
1976ca46
FE
615+ // File closed and no other error, so ensure next snapshot can be started.
616+ if (snap_state.state != SAVE_STATE_ERROR) {
617+ snap_state.state = SAVE_STATE_DONE;
618+ }
619+
817b7667 620+ DPRINTF("savevm-end: cleanup done\n");
95259824
WB
621+}
622+
6838f038 623+int load_snapshot_from_blockdev(const char *filename, Error **errp)
95259824 624+{
67af0fa4 625+ BlockBackend *be;
95259824
WB
626+ Error *local_err = NULL;
627+ Error *blocker = NULL;
628+
629+ QEMUFile *f;
5b15e2ec 630+ size_t bs_pos = 0;
67af0fa4 631+ int ret = -EINVAL;
95259824 632+
67af0fa4 633+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
95259824 634+
67af0fa4 635+ if (!be) {
6838f038 636+ error_setg(errp, "Could not open VM state file");
95259824
WB
637+ goto the_end;
638+ }
639+
67af0fa4
WB
640+ error_setg(&blocker, "block device is in use by load state");
641+ blk_op_block_all(be, blocker);
642+
95259824 643+ /* restore the VM state */
5b15e2ec 644+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
95259824 645+ if (!f) {
6838f038 646+ error_setg(errp, "Could not open VM state file");
95259824
WB
647+ goto the_end;
648+ }
649+
6838f038 650+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
95259824
WB
651+ ret = qemu_loadvm_state(f);
652+
e9b36665
SR
653+ /* dirty bitmap migration has a special case we need to trigger manually */
654+ dirty_bitmap_mig_before_vm_start();
655+
95259824 656+ qemu_fclose(f);
db5d2a4b
FE
657+
658+ /* state_destroy assumes a real migration which would have added a yank */
659+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
660+
95259824
WB
661+ migration_incoming_state_destroy();
662+ if (ret < 0) {
6838f038 663+ error_setg_errno(errp, -ret, "Error while loading VM state");
95259824
WB
664+ goto the_end;
665+ }
666+
667+ ret = 0;
668+
669+ the_end:
67af0fa4
WB
670+ if (be) {
671+ blk_op_unblock_all(be, blocker);
95259824 672+ error_free(blocker);
67af0fa4 673+ blk_unref(be);
95259824
WB
674+ }
675+ return ret;
676+}
817b7667 677diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
f1eed34a 678index 871898ac46..ef4634e5c1 100644
817b7667
SR
679--- a/monitor/hmp-cmds.c
680+++ b/monitor/hmp-cmds.c
bf251437
FE
681@@ -22,6 +22,7 @@
682 #include "monitor/monitor-internal.h"
683 #include "qapi/error.h"
684 #include "qapi/qapi-commands-control.h"
685+#include "qapi/qapi-commands-migration.h"
686 #include "qapi/qapi-commands-misc.h"
687 #include "qapi/qmp/qdict.h"
f1eed34a 688 #include "qemu/cutils.h"
99f9ce2c 689@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
817b7667 690
bf251437
FE
691 mtree_info(flatview, dispatch_tree, owner, disabled);
692 }
693+
817b7667
SR
694+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
695+{
696+ Error *errp = NULL;
697+ const char *statefile = qdict_get_try_str(qdict, "statefile");
698+
bf251437 699+ qmp_savevm_start(statefile, &errp);
817b7667
SR
700+ hmp_handle_error(mon, errp);
701+}
702+
817b7667
SR
703+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
704+{
705+ Error *errp = NULL;
706+
707+ qmp_savevm_end(&errp);
708+ hmp_handle_error(mon, errp);
709+}
710+
711+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
712+{
713+ SaveVMInfo *info;
714+ info = qmp_query_savevm(NULL);
715+
bf251437 716+ if (info->status) {
817b7667
SR
717+ monitor_printf(mon, "savevm status: %s\n", info->status);
718+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
719+ info->total_time);
720+ } else {
721+ monitor_printf(mon, "savevm status: not running\n");
722+ }
723+ if (info->has_bytes) {
724+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
725+ }
bf251437 726+ if (info->error) {
817b7667
SR
727+ monitor_printf(mon, "Error: %s\n", info->error);
728+ }
729+}
817b7667 730diff --git a/qapi/migration.json b/qapi/migration.json
4fbd50e2 731index 8c65b90328..ed20d066cd 100644
817b7667
SR
732--- a/qapi/migration.json
733+++ b/qapi/migration.json
4fbd50e2 734@@ -297,6 +297,40 @@
10e10933
FE
735 '*dirty-limit-throttle-time-per-round': 'uint64',
736 '*dirty-limit-ring-full-time': 'uint64'} }
817b7667
SR
737
738+##
739+# @SaveVMInfo:
740+#
741+# Information about current migration process.
742+#
743+# @status: string describing the current savevm status.
744+# This can be 'active', 'completed', 'failed'.
745+# If this field is not returned, no savevm process
746+# has been initiated
747+#
748+# @error: string containing error message is status is failed.
749+#
750+# @total-time: total amount of milliseconds since savevm started.
751+# If savevm has ended, it returns the total save time
752+#
753+# @bytes: total amount of data transfered
754+#
755+# Since: 1.3
756+##
757+{ 'struct': 'SaveVMInfo',
758+ 'data': {'*status': 'str', '*error': 'str',
759+ '*total-time': 'int', '*bytes': 'int'} }
760+
761+##
762+# @query-savevm:
763+#
764+# Returns information about current savevm process.
765+#
766+# Returns: @SaveVMInfo
767+#
768+# Since: 1.3
769+##
770+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
771+
772 ##
773 # @query-migrate:
774 #
775diff --git a/qapi/misc.json b/qapi/misc.json
4fbd50e2 776index ec30e5c570..7147199a12 100644
817b7667
SR
777--- a/qapi/misc.json
778+++ b/qapi/misc.json
4fbd50e2 779@@ -454,6 +454,24 @@
817b7667
SR
780 ##
781 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
782
783+##
784+# @savevm-start:
785+#
786+# Prepare for snapshot and halt VM. Save VM state to statefile.
787+#
4fbd50e2
FE
788+# @statefile: target file that state should be written to.
789+#
817b7667
SR
790+##
791+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
792+
793+##
817b7667
SR
794+# @savevm-end:
795+#
796+# Resume VM after a snapshot.
797+#
798+##
799+{ 'command': 'savevm-end', 'coroutine': true }
800+
801 ##
802 # @CommandLineParameterType:
803 #
804diff --git a/qemu-options.hx b/qemu-options.hx
4fbd50e2 805index 8ce85d4559..511ab9415e 100644
817b7667
SR
806--- a/qemu-options.hx
807+++ b/qemu-options.hx
4fbd50e2 808@@ -4610,6 +4610,18 @@ SRST
817b7667
SR
809 Start right away with a saved state (``loadvm`` in monitor)
810 ERST
811
812+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
813+ "-loadstate file\n" \
814+ " start right away with a saved state\n",
815+ QEMU_ARCH_ALL)
816+SRST
817+``-loadstate file``
818+ Start right away with a saved state. This option does not rollback
819+ disk state like @code{loadvm}, so user must make sure that disk
820+ have correct state. @var{file} can be any valid device URL. See the section
821+ for "Device URL Syntax" for more information.
822+ERST
823+
824 #ifndef _WIN32
825 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
826 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
f1eed34a 827diff --git a/system/vl.c b/system/vl.c
4fbd50e2 828index c644222982..2738ab7c91 100644
f1eed34a
FE
829--- a/system/vl.c
830+++ b/system/vl.c
831@@ -163,6 +163,7 @@ static const char *accelerators;
5b15e2ec
FE
832 static bool have_custom_ram_size;
833 static const char *ram_memdev_id;
f376b2b9 834 static QDict *machine_opts_dict;
8dca018b
SR
835+static const char *loadstate;
836 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
4567474e 837 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
5b15e2ec 838 static int display_remote;
4fbd50e2
FE
839@@ -2712,6 +2713,12 @@ void qmp_x_exit_preconfig(Error **errp)
840 RunState state = autostart ? RUN_STATE_RUNNING : runstate_get();
4567474e 841 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
4fbd50e2 842 load_snapshot_resume(state);
95259824 843+ } else if (loadstate) {
6838f038
WB
844+ Error *local_err = NULL;
845+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
846+ error_report_err(local_err);
95259824
WB
847+ autostart = 0;
848+ }
849 }
b855dce7
TL
850 if (replay_mode != REPLAY_MODE_NONE) {
851 replay_vmstate_init();
4fbd50e2 852@@ -3259,6 +3266,9 @@ void qemu_init(int argc, char **argv)
8dca018b
SR
853 case QEMU_OPTION_loadvm:
854 loadvm = optarg;
855 break;
856+ case QEMU_OPTION_loadstate:
857+ loadstate = optarg;
858+ break;
859 case QEMU_OPTION_full_screen:
860 dpy.has_full_screen = true;
861 dpy.full_screen = true;