]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
drop deprecated custom drive snapshot QMP commands
[pve-qemu.git] / debian / patches / pve / 0017-PVE-add-savevm-async-for-background-state-snapshots.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 3Date: Mon, 6 Apr 2020 12:16:46 +0200
817b7667 4Subject: [PATCH] PVE: add savevm-async for background state snapshots
95259824 5
d7f4e01a
TL
6Put qemu_savevm_state_{header,setup} into the main loop and the rest
7of the iteration into a coroutine. The former need to lock the
8iothread (and we can't unlock it in the coroutine), and the latter
9can't deal with being in a separate thread, so a coroutine it must
10be.
11
817b7667
SR
12Truncate output file at 1024 boundary.
13
14Do not block the VM and save the state on aborting a snapshot, as the
15snapshot will be invalid anyway.
16
17Also, when aborting, wait for the target file to be closed, otherwise a
18client might run into race-conditions when trying to remove the file
19still opened by QEMU.
20
b855dce7 21Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6402d961 22Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 23Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
db5d2a4b
FE
24[SR: improve aborting
25 register yank before migration_incoming_state_destroy]
817b7667 26Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
5b15e2ec 27[FE: further improve aborting
8051a24b 28 adapt to removal of QEMUFileOps
bf251437
FE
29 improve condition for entering final stage
30 adapt to QAPI and other changes for 8.0]
563c5928 31Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
95259824 32---
b855dce7 33 hmp-commands-info.hx | 13 +
99f9ce2c 34 hmp-commands.hx | 17 ++
8dca018b 35 include/migration/snapshot.h | 2 +
99f9ce2c 36 include/monitor/hmp.h | 3 +
817b7667 37 migration/meson.build | 1 +
99f9ce2c
FE
38 migration/savevm-async.c | 533 +++++++++++++++++++++++++++++++++++
39 monitor/hmp-cmds.c | 38 +++
5b15e2ec 40 qapi/migration.json | 34 +++
99f9ce2c 41 qapi/misc.json | 16 ++
83faa3fe 42 qemu-options.hx | 12 +
83faa3fe 43 softmmu/vl.c | 10 +
99f9ce2c 44 11 files changed, 679 insertions(+)
817b7667 45 create mode 100644 migration/savevm-async.c
95259824 46
95259824 47diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
bf251437 48index 47d63d26db..a166bff3d5 100644
95259824
WB
49--- a/hmp-commands-info.hx
50+++ b/hmp-commands-info.hx
d03e1b3c 51@@ -540,6 +540,19 @@ SRST
8dca018b 52 Show current migration parameters.
83faa3fe
TL
53 ERST
54
b855dce7 55+ {
95259824
WB
56+ .name = "savevm",
57+ .args_type = "",
58+ .params = "",
59+ .help = "show savevm status",
a544966d 60+ .cmd = hmp_info_savevm,
95259824
WB
61+ },
62+
83faa3fe
TL
63+SRST
64+ ``info savevm``
65+ Show savevm status.
66+ERST
67+
b855dce7 68 {
83faa3fe
TL
69 .name = "balloon",
70 .args_type = "",
95259824 71diff --git a/hmp-commands.hx b/hmp-commands.hx
99f9ce2c 72index bb85ee1d26..d9f9f42d11 100644
95259824
WB
73--- a/hmp-commands.hx
74+++ b/hmp-commands.hx
99f9ce2c 75@@ -1846,3 +1846,20 @@ SRST
bf251437 76 List event channels in the guest
d03e1b3c
FE
77 ERST
78 #endif
95259824
WB
79+
80+ {
81+ .name = "savevm-start",
82+ .args_type = "statefile:s?",
83+ .params = "[statefile]",
84+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
a544966d 85+ .cmd = hmp_savevm_start,
95259824
WB
86+ },
87+
88+ {
95259824
WB
89+ .name = "savevm-end",
90+ .args_type = "",
91+ .params = "",
92+ .help = "Resume VM after snaphot.",
817b7667
SR
93+ .cmd = hmp_savevm_end,
94+ .coroutine = true,
95259824 95+ },
be901f66 96diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
8dca018b 97index e72083b117..c846d37806 100644
be901f66
SR
98--- a/include/migration/snapshot.h
99+++ b/include/migration/snapshot.h
8dca018b
SR
100@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
101 bool has_devices, strList *devices,
102 Error **errp);
be901f66 103
be901f66 104+int load_snapshot_from_blockdev(const char *filename, Error **errp);
8dca018b 105+
be901f66
SR
106 #endif
107diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
99f9ce2c 108index fdb69b7f9c..fdf6b45fb8 100644
be901f66
SR
109--- a/include/monitor/hmp.h
110+++ b/include/monitor/hmp.h
bf251437 111@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
be901f66
SR
112 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
113 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
114 void hmp_info_mice(Monitor *mon, const QDict *qdict);
115+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
116 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
117 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
118 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
99f9ce2c 119@@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
bf251437
FE
120 void hmp_mouse_move(Monitor *mon, const QDict *qdict);
121 void hmp_mouse_button(Monitor *mon, const QDict *qdict);
122 void hmp_mouse_set(Monitor *mon, const QDict *qdict);
be901f66 123+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
be901f66
SR
124+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
125 void hmp_sendkey(Monitor *mon, const QDict *qdict);
d03e1b3c 126 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
83faa3fe 127 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
817b7667 128diff --git a/migration/meson.build b/migration/meson.build
bf251437 129index 8a142fc7a9..a7824b5266 100644
817b7667
SR
130--- a/migration/meson.build
131+++ b/migration/meson.build
bf251437 132@@ -25,6 +25,7 @@ softmmu_ss.add(files(
817b7667
SR
133 'multifd-zlib.c',
134 'postcopy-ram.c',
135 'savevm.c',
136+ 'savevm-async.c',
137 'socket.c',
138 'tls.c',
bf251437 139 'threadinfo.c',
817b7667 140diff --git a/migration/savevm-async.c b/migration/savevm-async.c
95259824 141new file mode 100644
99f9ce2c 142index 0000000000..ac1fac6378
95259824 143--- /dev/null
817b7667 144+++ b/migration/savevm-async.c
99f9ce2c 145@@ -0,0 +1,533 @@
95259824 146+#include "qemu/osdep.h"
5b15e2ec 147+#include "migration/channel-savevm-async.h"
6838f038
WB
148+#include "migration/migration.h"
149+#include "migration/savevm.h"
150+#include "migration/snapshot.h"
151+#include "migration/global_state.h"
152+#include "migration/ram.h"
153+#include "migration/qemu-file.h"
95259824 154+#include "sysemu/sysemu.h"
6402d961 155+#include "sysemu/runstate.h"
95259824 156+#include "block/block.h"
95259824 157+#include "sysemu/block-backend.h"
53e83913
WB
158+#include "qapi/error.h"
159+#include "qapi/qmp/qerror.h"
160+#include "qapi/qmp/qdict.h"
161+#include "qapi/qapi-commands-migration.h"
162+#include "qapi/qapi-commands-misc.h"
0775f12b 163+#include "qapi/qapi-commands-block.h"
95259824 164+#include "qemu/cutils.h"
817b7667 165+#include "qemu/timer.h"
6402d961
TL
166+#include "qemu/main-loop.h"
167+#include "qemu/rcu.h"
db5d2a4b 168+#include "qemu/yank.h"
95259824
WB
169+
170+/* #define DEBUG_SAVEVM_STATE */
171+
172+#ifdef DEBUG_SAVEVM_STATE
173+#define DPRINTF(fmt, ...) \
174+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
175+#else
176+#define DPRINTF(fmt, ...) \
177+ do { } while (0)
178+#endif
179+
180+enum {
181+ SAVE_STATE_DONE,
182+ SAVE_STATE_ERROR,
183+ SAVE_STATE_ACTIVE,
184+ SAVE_STATE_COMPLETED,
185+ SAVE_STATE_CANCELLED
186+};
187+
188+
189+static struct SnapshotState {
67af0fa4 190+ BlockBackend *target;
95259824
WB
191+ size_t bs_pos;
192+ int state;
193+ Error *error;
194+ Error *blocker;
195+ int saved_vm_running;
196+ QEMUFile *file;
197+ int64_t total_time;
d7f4e01a
TL
198+ QEMUBH *finalize_bh;
199+ Coroutine *co;
563c5928 200+ QemuCoSleep target_close_wait;
95259824
WB
201+} snap_state;
202+
817b7667
SR
203+static bool savevm_aborted(void)
204+{
205+ return snap_state.state == SAVE_STATE_CANCELLED ||
206+ snap_state.state == SAVE_STATE_ERROR;
207+}
208+
95259824
WB
209+SaveVMInfo *qmp_query_savevm(Error **errp)
210+{
211+ SaveVMInfo *info = g_malloc0(sizeof(*info));
212+ struct SnapshotState *s = &snap_state;
213+
214+ if (s->state != SAVE_STATE_DONE) {
215+ info->has_bytes = true;
216+ info->bytes = s->bs_pos;
217+ switch (s->state) {
218+ case SAVE_STATE_ERROR:
95259824
WB
219+ info->status = g_strdup("failed");
220+ info->has_total_time = true;
221+ info->total_time = s->total_time;
222+ if (s->error) {
95259824
WB
223+ info->error = g_strdup(error_get_pretty(s->error));
224+ }
225+ break;
226+ case SAVE_STATE_ACTIVE:
95259824
WB
227+ info->status = g_strdup("active");
228+ info->has_total_time = true;
229+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
230+ - s->total_time;
231+ break;
232+ case SAVE_STATE_COMPLETED:
95259824
WB
233+ info->status = g_strdup("completed");
234+ info->has_total_time = true;
235+ info->total_time = s->total_time;
236+ break;
237+ }
238+ }
239+
240+ return info;
241+}
242+
243+static int save_snapshot_cleanup(void)
244+{
245+ int ret = 0;
246+
247+ DPRINTF("save_snapshot_cleanup\n");
248+
249+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
250+ snap_state.total_time;
251+
252+ if (snap_state.file) {
253+ ret = qemu_fclose(snap_state.file);
5b15e2ec 254+ snap_state.file = NULL;
95259824
WB
255+ }
256+
67af0fa4 257+ if (snap_state.target) {
817b7667
SR
258+ if (!savevm_aborted()) {
259+ /* try to truncate, but ignore errors (will fail on block devices).
260+ * note1: bdrv_read() need whole blocks, so we need to round up
261+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
262+ */
263+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
264+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
265+ }
67af0fa4 266+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
95259824
WB
267+ error_free(snap_state.blocker);
268+ snap_state.blocker = NULL;
67af0fa4
WB
269+ blk_unref(snap_state.target);
270+ snap_state.target = NULL;
817b7667 271+
563c5928 272+ qemu_co_sleep_wake(&snap_state.target_close_wait);
95259824
WB
273+ }
274+
275+ return ret;
276+}
277+
278+static void save_snapshot_error(const char *fmt, ...)
279+{
280+ va_list ap;
281+ char *msg;
282+
283+ va_start(ap, fmt);
284+ msg = g_strdup_vprintf(fmt, ap);
285+ va_end(ap);
286+
287+ DPRINTF("save_snapshot_error: %s\n", msg);
288+
289+ if (!snap_state.error) {
290+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
291+ }
292+
293+ g_free (msg);
294+
295+ snap_state.state = SAVE_STATE_ERROR;
95259824
WB
296+}
297+
d7f4e01a 298+static void process_savevm_finalize(void *opaque)
0775f12b
WB
299+{
300+ int ret;
d7f4e01a
TL
301+ AioContext *iohandler_ctx = iohandler_get_aio_context();
302+ MigrationState *ms = migrate_get_current();
303+
817b7667
SR
304+ bool aborted = savevm_aborted();
305+
d7f4e01a
TL
306+#ifdef DEBUG_SAVEVM_STATE
307+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
308+#endif
309+
310+ qemu_bh_delete(snap_state.finalize_bh);
311+ snap_state.finalize_bh = NULL;
312+ snap_state.co = NULL;
313+
314+ /* We need to own the target bdrv's context for the following functions,
315+ * so move it back. It can stay in the main context and live out its live
316+ * there, since we're done with it after this method ends anyway.
317+ */
318+ aio_context_acquire(iohandler_ctx);
319+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
320+ aio_context_release(iohandler_ctx);
321+
322+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
323+ if (ret < 0) {
324+ save_snapshot_error("vm_stop_force_state error %d", ret);
325+ }
326+
817b7667
SR
327+ if (!aborted) {
328+ /* skip state saving if we aborted, snapshot will be invalid anyway */
329+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
330+ ret = qemu_file_get_error(snap_state.file);
331+ if (ret < 0) {
a0208150 332+ save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
817b7667 333+ }
d7f4e01a
TL
334+ }
335+
336+ DPRINTF("state saving complete\n");
337+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
338+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
339+
340+ /* clear migration state */
341+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
817b7667 342+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
d7f4e01a
TL
343+ ms->to_dst_file = NULL;
344+
345+ qemu_savevm_state_cleanup();
346+
0775f12b
WB
347+ ret = save_snapshot_cleanup();
348+ if (ret < 0) {
349+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
350+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
351+ snap_state.state = SAVE_STATE_COMPLETED;
817b7667 352+ } else if (aborted) {
1976ca46
FE
353+ /*
354+ * If there was an error, there's no need to set a new one here.
355+ * If the snapshot was canceled, leave setting the state to
356+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
357+ */
0775f12b
WB
358+ } else {
359+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
360+ snap_state.state);
95259824 361+ }
0775f12b
WB
362+ if (snap_state.saved_vm_running) {
363+ vm_start();
364+ snap_state.saved_vm_running = false;
95259824 365+ }
d7f4e01a
TL
366+
367+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
368+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
95259824
WB
369+}
370+
d7f4e01a 371+static void coroutine_fn process_savevm_co(void *opaque)
95259824
WB
372+{
373+ int ret;
374+ int64_t maxlen;
d7f4e01a
TL
375+ BdrvNextIterator it;
376+ BlockDriverState *bs = NULL;
95259824 377+
d7f4e01a
TL
378+#ifdef DEBUG_SAVEVM_STATE
379+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
380+#endif
95259824 381+
6838f038 382+ ret = qemu_file_get_error(snap_state.file);
95259824 383+ if (ret < 0) {
6838f038 384+ save_snapshot_error("qemu_savevm_state_setup failed");
d7f4e01a 385+ return;
95259824
WB
386+ }
387+
388+ while (snap_state.state == SAVE_STATE_ACTIVE) {
bf251437 389+ uint64_t pending_size, pend_precopy, pend_postcopy;
db5d2a4b 390+ uint64_t threshold = 400 * 1000;
95259824 391+
db5d2a4b
FE
392+ /*
393+ * pending_{estimate,exact} are expected to be called without iothread
394+ * lock. Similar to what is done in migration.c, call the exact variant
395+ * only once pend_precopy in the estimate is below the threshold.
396+ */
e9b36665 397+ qemu_mutex_unlock_iothread();
db5d2a4b
FE
398+ qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
399+ if (pend_precopy <= threshold) {
400+ qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
401+ }
e9b36665 402+ qemu_mutex_lock_iothread();
bf251437 403+ pending_size = pend_precopy + pend_postcopy;
95259824 404+
eee064d9
FE
405+ /*
406+ * A guest reaching this cutoff is dirtying lots of RAM. It should be
407+ * large enough so that the guest can't dirty this much between the
408+ * check and the guest actually being stopped, but it should be small
409+ * enough to avoid long downtimes for non-hibernation snapshots.
410+ */
411+ maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
0775f12b 412+
8051a24b 413+ /* Note that there is no progress for pend_postcopy when iterating */
db5d2a4b 414+ if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
0775f12b
WB
415+ ret = qemu_savevm_state_iterate(snap_state.file, false);
416+ if (ret < 0) {
417+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
418+ break;
419+ }
d7f4e01a 420+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
95259824 421+ } else {
b855dce7 422+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
0775f12b
WB
423+ ret = global_state_store();
424+ if (ret) {
425+ save_snapshot_error("global_state_store error %d", ret);
95259824 426+ break;
0775f12b 427+ }
d7f4e01a
TL
428+
429+ DPRINTF("savevm iterate complete\n");
95259824
WB
430+ break;
431+ }
95259824
WB
432+ }
433+
d7f4e01a
TL
434+ DPRINTF("timing: process_savevm_co took %ld ms\n",
435+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
436+
437+#ifdef DEBUG_SAVEVM_STATE
438+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
439+#endif
440+ /* If a drive runs in an IOThread we can flush it async, and only
441+ * need to sync-flush whatever IO happens between now and
442+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
443+ * so move there now and after every flush.
444+ */
445+ aio_co_reschedule_self(qemu_get_aio_context());
446+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
447+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
448+ if (bs == blk_bs(snap_state.target)) {
449+ continue;
450+ }
451+
452+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
453+ if (bs_ctx != qemu_get_aio_context()) {
454+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
455+ aio_co_reschedule_self(bs_ctx);
bf251437 456+ bdrv_graph_co_rdlock();
d7f4e01a 457+ bdrv_flush(bs);
bf251437 458+ bdrv_graph_co_rdunlock();
d7f4e01a
TL
459+ aio_co_reschedule_self(qemu_get_aio_context());
460+ }
461+ }
462+
463+ DPRINTF("timing: async flushing took %ld ms\n",
464+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
95259824 465+
d7f4e01a 466+ qemu_bh_schedule(snap_state.finalize_bh);
95259824
WB
467+}
468+
bf251437 469+void qmp_savevm_start(const char *statefile, Error **errp)
95259824 470+{
95259824 471+ Error *local_err = NULL;
d7f4e01a
TL
472+ MigrationState *ms = migrate_get_current();
473+ AioContext *iohandler_ctx = iohandler_get_aio_context();
95259824 474+
67af0fa4 475+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
95259824
WB
476+
477+ if (snap_state.state != SAVE_STATE_DONE) {
478+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
479+ "VM snapshot already started\n");
480+ return;
481+ }
482+
d7f4e01a
TL
483+ if (migration_is_running(ms->state)) {
484+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
485+ return;
486+ }
487+
488+ if (migrate_use_block()) {
489+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
490+ "Block migration and snapshots are incompatible");
491+ return;
492+ }
493+
95259824
WB
494+ /* initialize snapshot info */
495+ snap_state.saved_vm_running = runstate_is_running();
496+ snap_state.bs_pos = 0;
497+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
498+ snap_state.blocker = NULL;
a262e964 499+ snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
95259824
WB
500+
501+ if (snap_state.error) {
502+ error_free(snap_state.error);
503+ snap_state.error = NULL;
504+ }
505+
bf251437 506+ if (!statefile) {
95259824
WB
507+ vm_stop(RUN_STATE_SAVE_VM);
508+ snap_state.state = SAVE_STATE_COMPLETED;
509+ return;
510+ }
511+
512+ if (qemu_savevm_state_blocked(errp)) {
513+ return;
514+ }
515+
516+ /* Open the image */
95259824
WB
517+ QDict *options = NULL;
518+ options = qdict_new();
53e83913 519+ qdict_put_str(options, "driver", "raw");
67af0fa4
WB
520+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
521+ if (!snap_state.target) {
95259824
WB
522+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
523+ goto restart;
524+ }
525+
5b15e2ec
FE
526+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
527+ &snap_state.bs_pos));
528+ snap_state.file = qemu_file_new_output(ioc);
95259824
WB
529+
530+ if (!snap_state.file) {
531+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
532+ goto restart;
533+ }
534+
d7f4e01a
TL
535+ /*
536+ * qemu_savevm_* paths use migration code and expect a migration state.
537+ * State is cleared in process_savevm_co, but has to be initialized
538+ * here (blocking main thread, from QMP) to avoid race conditions.
539+ */
540+ migrate_init(ms);
541+ memset(&ram_counters, 0, sizeof(ram_counters));
db5d2a4b 542+ memset(&compression_counters, 0, sizeof(compression_counters));
d7f4e01a 543+ ms->to_dst_file = snap_state.file;
95259824
WB
544+
545+ error_setg(&snap_state.blocker, "block device is in use by savevm");
67af0fa4 546+ blk_op_block_all(snap_state.target, snap_state.blocker);
95259824 547+
0775f12b 548+ snap_state.state = SAVE_STATE_ACTIVE;
d7f4e01a
TL
549+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
550+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
551+ qemu_mutex_unlock_iothread();
552+ qemu_savevm_state_header(snap_state.file);
553+ qemu_savevm_state_setup(snap_state.file);
554+ qemu_mutex_lock_iothread();
555+
556+ /* Async processing from here on out happens in iohandler context, so let
557+ * the target bdrv have its home there.
558+ */
559+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
560+
561+ aio_co_schedule(iohandler_ctx, snap_state.co);
95259824
WB
562+
563+ return;
564+
565+restart:
566+
567+ save_snapshot_error("setup failed");
568+
569+ if (snap_state.saved_vm_running) {
570+ vm_start();
817b7667 571+ snap_state.saved_vm_running = false;
95259824
WB
572+ }
573+}
574+
817b7667 575+void coroutine_fn qmp_savevm_end(Error **errp)
95259824 576+{
817b7667
SR
577+ int64_t timeout;
578+
95259824
WB
579+ if (snap_state.state == SAVE_STATE_DONE) {
580+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
581+ "VM snapshot not started\n");
582+ return;
583+ }
584+
585+ if (snap_state.state == SAVE_STATE_ACTIVE) {
586+ snap_state.state = SAVE_STATE_CANCELLED;
817b7667 587+ goto wait_for_close;
95259824
WB
588+ }
589+
590+ if (snap_state.saved_vm_running) {
591+ vm_start();
817b7667 592+ snap_state.saved_vm_running = false;
95259824
WB
593+ }
594+
595+ snap_state.state = SAVE_STATE_DONE;
817b7667
SR
596+
597+wait_for_close:
598+ if (!snap_state.target) {
599+ DPRINTF("savevm-end: no target file open\n");
600+ return;
601+ }
602+
603+ /* wait until cleanup is done before returning, this ensures that after this
604+ * call exits the statefile will be closed and can be removed immediately */
605+ DPRINTF("savevm-end: waiting for cleanup\n");
606+ timeout = 30L * 1000 * 1000 * 1000;
563c5928 607+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
f376b2b9 608+ QEMU_CLOCK_REALTIME, timeout);
817b7667
SR
609+ if (snap_state.target) {
610+ save_snapshot_error("timeout waiting for target file close in "
611+ "qmp_savevm_end");
612+ /* we cannot assume the snapshot finished in this case, so leave the
613+ * state alone - caller has to figure something out */
614+ return;
615+ }
616+
1976ca46
FE
617+ // File closed and no other error, so ensure next snapshot can be started.
618+ if (snap_state.state != SAVE_STATE_ERROR) {
619+ snap_state.state = SAVE_STATE_DONE;
620+ }
621+
817b7667 622+ DPRINTF("savevm-end: cleanup done\n");
95259824
WB
623+}
624+
6838f038 625+int load_snapshot_from_blockdev(const char *filename, Error **errp)
95259824 626+{
67af0fa4 627+ BlockBackend *be;
95259824
WB
628+ Error *local_err = NULL;
629+ Error *blocker = NULL;
630+
631+ QEMUFile *f;
5b15e2ec 632+ size_t bs_pos = 0;
67af0fa4 633+ int ret = -EINVAL;
95259824 634+
67af0fa4 635+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
95259824 636+
67af0fa4 637+ if (!be) {
6838f038 638+ error_setg(errp, "Could not open VM state file");
95259824
WB
639+ goto the_end;
640+ }
641+
67af0fa4
WB
642+ error_setg(&blocker, "block device is in use by load state");
643+ blk_op_block_all(be, blocker);
644+
95259824 645+ /* restore the VM state */
5b15e2ec 646+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
95259824 647+ if (!f) {
6838f038 648+ error_setg(errp, "Could not open VM state file");
95259824
WB
649+ goto the_end;
650+ }
651+
6838f038 652+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
95259824
WB
653+ ret = qemu_loadvm_state(f);
654+
e9b36665
SR
655+ /* dirty bitmap migration has a special case we need to trigger manually */
656+ dirty_bitmap_mig_before_vm_start();
657+
95259824 658+ qemu_fclose(f);
db5d2a4b
FE
659+
660+ /* state_destroy assumes a real migration which would have added a yank */
661+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
662+
95259824
WB
663+ migration_incoming_state_destroy();
664+ if (ret < 0) {
6838f038 665+ error_setg_errno(errp, -ret, "Error while loading VM state");
95259824
WB
666+ goto the_end;
667+ }
668+
669+ ret = 0;
670+
671+ the_end:
67af0fa4
WB
672+ if (be) {
673+ blk_op_unblock_all(be, blocker);
95259824 674+ error_free(blocker);
67af0fa4 675+ blk_unref(be);
95259824
WB
676+ }
677+ return ret;
678+}
817b7667 679diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
99f9ce2c 680index 6c559b48c8..91be698308 100644
817b7667
SR
681--- a/monitor/hmp-cmds.c
682+++ b/monitor/hmp-cmds.c
bf251437
FE
683@@ -22,6 +22,7 @@
684 #include "monitor/monitor-internal.h"
685 #include "qapi/error.h"
686 #include "qapi/qapi-commands-control.h"
687+#include "qapi/qapi-commands-migration.h"
688 #include "qapi/qapi-commands-misc.h"
689 #include "qapi/qmp/qdict.h"
690 #include "qapi/qmp/qerror.h"
99f9ce2c 691@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
817b7667 692
bf251437
FE
693 mtree_info(flatview, dispatch_tree, owner, disabled);
694 }
695+
817b7667
SR
696+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
697+{
698+ Error *errp = NULL;
699+ const char *statefile = qdict_get_try_str(qdict, "statefile");
700+
bf251437 701+ qmp_savevm_start(statefile, &errp);
817b7667
SR
702+ hmp_handle_error(mon, errp);
703+}
704+
817b7667
SR
705+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
706+{
707+ Error *errp = NULL;
708+
709+ qmp_savevm_end(&errp);
710+ hmp_handle_error(mon, errp);
711+}
712+
713+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
714+{
715+ SaveVMInfo *info;
716+ info = qmp_query_savevm(NULL);
717+
bf251437 718+ if (info->status) {
817b7667
SR
719+ monitor_printf(mon, "savevm status: %s\n", info->status);
720+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
721+ info->total_time);
722+ } else {
723+ monitor_printf(mon, "savevm status: not running\n");
724+ }
725+ if (info->has_bytes) {
726+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
727+ }
bf251437 728+ if (info->error) {
817b7667
SR
729+ monitor_printf(mon, "Error: %s\n", info->error);
730+ }
731+}
817b7667 732diff --git a/qapi/migration.json b/qapi/migration.json
bf251437 733index c84fa10e86..1702b92553 100644
817b7667
SR
734--- a/qapi/migration.json
735+++ b/qapi/migration.json
5b15e2ec 736@@ -261,6 +261,40 @@
817b7667
SR
737 '*compression': 'CompressionStats',
738 '*socket-address': ['SocketAddress'] } }
739
740+##
741+# @SaveVMInfo:
742+#
743+# Information about current migration process.
744+#
745+# @status: string describing the current savevm status.
746+# This can be 'active', 'completed', 'failed'.
747+# If this field is not returned, no savevm process
748+# has been initiated
749+#
750+# @error: string containing error message is status is failed.
751+#
752+# @total-time: total amount of milliseconds since savevm started.
753+# If savevm has ended, it returns the total save time
754+#
755+# @bytes: total amount of data transfered
756+#
757+# Since: 1.3
758+##
759+{ 'struct': 'SaveVMInfo',
760+ 'data': {'*status': 'str', '*error': 'str',
761+ '*total-time': 'int', '*bytes': 'int'} }
762+
763+##
764+# @query-savevm:
765+#
766+# Returns information about current savevm process.
767+#
768+# Returns: @SaveVMInfo
769+#
770+# Since: 1.3
771+##
772+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
773+
774 ##
775 # @query-migrate:
776 #
777diff --git a/qapi/misc.json b/qapi/misc.json
99f9ce2c 778index 6ddd16ea28..e5681ae8a2 100644
817b7667
SR
779--- a/qapi/misc.json
780+++ b/qapi/misc.json
99f9ce2c 781@@ -469,6 +469,22 @@
817b7667
SR
782 ##
783 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
784
785+##
786+# @savevm-start:
787+#
788+# Prepare for snapshot and halt VM. Save VM state to statefile.
789+#
790+##
791+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
792+
793+##
817b7667
SR
794+# @savevm-end:
795+#
796+# Resume VM after a snapshot.
797+#
798+##
799+{ 'command': 'savevm-end', 'coroutine': true }
800+
801 ##
802 # @CommandLineParameterType:
803 #
804diff --git a/qemu-options.hx b/qemu-options.hx
bf251437 805index 59bdf67a2c..fc6cb23dd9 100644
817b7667
SR
806--- a/qemu-options.hx
807+++ b/qemu-options.hx
bf251437 808@@ -4378,6 +4378,18 @@ SRST
817b7667
SR
809 Start right away with a saved state (``loadvm`` in monitor)
810 ERST
811
812+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
813+ "-loadstate file\n" \
814+ " start right away with a saved state\n",
815+ QEMU_ARCH_ALL)
816+SRST
817+``-loadstate file``
818+ Start right away with a saved state. This option does not rollback
819+ disk state like @code{loadvm}, so user must make sure that disk
820+ have correct state. @var{file} can be any valid device URL. See the section
821+ for "Device URL Syntax" for more information.
822+ERST
823+
824 #ifndef _WIN32
825 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
826 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
83faa3fe 827diff --git a/softmmu/vl.c b/softmmu/vl.c
bf251437 828index ea20b23e4c..0eabc71b68 100644
83faa3fe
TL
829--- a/softmmu/vl.c
830+++ b/softmmu/vl.c
d03e1b3c 831@@ -164,6 +164,7 @@ static const char *accelerators;
5b15e2ec
FE
832 static bool have_custom_ram_size;
833 static const char *ram_memdev_id;
f376b2b9 834 static QDict *machine_opts_dict;
8dca018b
SR
835+static const char *loadstate;
836 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
4567474e 837 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
5b15e2ec 838 static int display_remote;
bf251437 839@@ -2612,6 +2613,12 @@ void qmp_x_exit_preconfig(Error **errp)
4567474e
FE
840
841 if (loadvm) {
842 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
95259824 843+ } else if (loadstate) {
6838f038
WB
844+ Error *local_err = NULL;
845+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
846+ error_report_err(local_err);
95259824
WB
847+ autostart = 0;
848+ }
849 }
b855dce7
TL
850 if (replay_mode != REPLAY_MODE_NONE) {
851 replay_vmstate_init();
bf251437 852@@ -3159,6 +3166,9 @@ void qemu_init(int argc, char **argv)
8dca018b
SR
853 case QEMU_OPTION_loadvm:
854 loadvm = optarg;
855 break;
856+ case QEMU_OPTION_loadstate:
857+ loadstate = optarg;
858+ break;
859 case QEMU_OPTION_full_screen:
860 dpy.has_full_screen = true;
861 dpy.full_screen = true;