]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0018-PVE-add-savevm-async-for-background-state-snapshots.patch
bump version to 8.0.2-1
[pve-qemu.git] / debian / patches / pve / 0018-PVE-add-savevm-async-for-background-state-snapshots.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 3Date: Mon, 6 Apr 2020 12:16:46 +0200
817b7667 4Subject: [PATCH] PVE: add savevm-async for background state snapshots
95259824 5
d7f4e01a
TL
6Put qemu_savevm_state_{header,setup} into the main loop and the rest
7of the iteration into a coroutine. The former need to lock the
8iothread (and we can't unlock it in the coroutine), and the latter
9can't deal with being in a separate thread, so a coroutine it must
10be.
11
817b7667
SR
12Truncate output file at 1024 boundary.
13
14Do not block the VM and save the state on aborting a snapshot, as the
15snapshot will be invalid anyway.
16
17Also, when aborting, wait for the target file to be closed, otherwise a
18client might run into race-conditions when trying to remove the file
19still opened by QEMU.
20
b855dce7 21Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6402d961 22Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 23Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
db5d2a4b
FE
24[SR: improve aborting
25 register yank before migration_incoming_state_destroy]
817b7667 26Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
5b15e2ec 27[FE: further improve aborting
8051a24b 28 adapt to removal of QEMUFileOps
bf251437
FE
29 improve condition for entering final stage
30 adapt to QAPI and other changes for 8.0]
563c5928 31Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
95259824 32---
b855dce7 33 hmp-commands-info.hx | 13 +
5b15e2ec 34 hmp-commands.hx | 33 +++
8dca018b 35 include/migration/snapshot.h | 2 +
be901f66 36 include/monitor/hmp.h | 5 +
817b7667 37 migration/meson.build | 1 +
db5d2a4b 38 migration/savevm-async.c | 548 +++++++++++++++++++++++++++++++++++
bf251437 39 monitor/hmp-cmds.c | 58 ++++
5b15e2ec 40 qapi/migration.json | 34 +++
db5d2a4b 41 qapi/misc.json | 32 ++
83faa3fe 42 qemu-options.hx | 12 +
83faa3fe 43 softmmu/vl.c | 10 +
db5d2a4b 44 11 files changed, 748 insertions(+)
817b7667 45 create mode 100644 migration/savevm-async.c
95259824 46
95259824 47diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
bf251437 48index 47d63d26db..a166bff3d5 100644
95259824
WB
49--- a/hmp-commands-info.hx
50+++ b/hmp-commands-info.hx
d03e1b3c 51@@ -540,6 +540,19 @@ SRST
8dca018b 52 Show current migration parameters.
83faa3fe
TL
53 ERST
54
b855dce7 55+ {
95259824
WB
56+ .name = "savevm",
57+ .args_type = "",
58+ .params = "",
59+ .help = "show savevm status",
a544966d 60+ .cmd = hmp_info_savevm,
95259824
WB
61+ },
62+
83faa3fe
TL
63+SRST
64+ ``info savevm``
65+ Show savevm status.
66+ERST
67+
b855dce7 68 {
83faa3fe
TL
69 .name = "balloon",
70 .args_type = "",
95259824 71diff --git a/hmp-commands.hx b/hmp-commands.hx
bf251437 72index bb85ee1d26..b66d7fc4ab 100644
95259824
WB
73--- a/hmp-commands.hx
74+++ b/hmp-commands.hx
bf251437
FE
75@@ -1846,3 +1846,36 @@ SRST
76 List event channels in the guest
d03e1b3c
FE
77 ERST
78 #endif
95259824
WB
79+
80+ {
81+ .name = "savevm-start",
82+ .args_type = "statefile:s?",
83+ .params = "[statefile]",
84+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
a544966d 85+ .cmd = hmp_savevm_start,
95259824
WB
86+ },
87+
88+ {
89+ .name = "snapshot-drive",
90+ .args_type = "device:s,name:s",
91+ .params = "device name",
92+ .help = "Create internal snapshot.",
a544966d 93+ .cmd = hmp_snapshot_drive,
95259824
WB
94+ },
95+
96+ {
97+ .name = "delete-drive-snapshot",
98+ .args_type = "device:s,name:s",
99+ .params = "device name",
100+ .help = "Delete internal snapshot.",
a544966d 101+ .cmd = hmp_delete_drive_snapshot,
95259824
WB
102+ },
103+
104+ {
105+ .name = "savevm-end",
106+ .args_type = "",
107+ .params = "",
108+ .help = "Resume VM after snaphot.",
817b7667
SR
109+ .cmd = hmp_savevm_end,
110+ .coroutine = true,
95259824 111+ },
be901f66 112diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
8dca018b 113index e72083b117..c846d37806 100644
be901f66
SR
114--- a/include/migration/snapshot.h
115+++ b/include/migration/snapshot.h
8dca018b
SR
116@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
117 bool has_devices, strList *devices,
118 Error **errp);
be901f66 119
be901f66 120+int load_snapshot_from_blockdev(const char *filename, Error **errp);
8dca018b 121+
be901f66
SR
122 #endif
123diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
bf251437 124index fdb69b7f9c..c012bad741 100644
be901f66
SR
125--- a/include/monitor/hmp.h
126+++ b/include/monitor/hmp.h
bf251437 127@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
be901f66
SR
128 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
129 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
130 void hmp_info_mice(Monitor *mon, const QDict *qdict);
131+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
132 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
133 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
134 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
bf251437
FE
135@@ -94,6 +95,10 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
136 void hmp_mouse_move(Monitor *mon, const QDict *qdict);
137 void hmp_mouse_button(Monitor *mon, const QDict *qdict);
138 void hmp_mouse_set(Monitor *mon, const QDict *qdict);
be901f66
SR
139+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
140+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
141+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
142+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
143 void hmp_sendkey(Monitor *mon, const QDict *qdict);
d03e1b3c 144 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
83faa3fe 145 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
817b7667 146diff --git a/migration/meson.build b/migration/meson.build
bf251437 147index 8a142fc7a9..a7824b5266 100644
817b7667
SR
148--- a/migration/meson.build
149+++ b/migration/meson.build
bf251437 150@@ -25,6 +25,7 @@ softmmu_ss.add(files(
817b7667
SR
151 'multifd-zlib.c',
152 'postcopy-ram.c',
153 'savevm.c',
154+ 'savevm-async.c',
155 'socket.c',
156 'tls.c',
bf251437 157 'threadinfo.c',
817b7667 158diff --git a/migration/savevm-async.c b/migration/savevm-async.c
95259824 159new file mode 100644
db5d2a4b 160index 0000000000..c5db9e9c1e
95259824 161--- /dev/null
817b7667 162+++ b/migration/savevm-async.c
db5d2a4b 163@@ -0,0 +1,548 @@
95259824 164+#include "qemu/osdep.h"
5b15e2ec 165+#include "migration/channel-savevm-async.h"
6838f038
WB
166+#include "migration/migration.h"
167+#include "migration/savevm.h"
168+#include "migration/snapshot.h"
169+#include "migration/global_state.h"
170+#include "migration/ram.h"
171+#include "migration/qemu-file.h"
95259824 172+#include "sysemu/sysemu.h"
6402d961 173+#include "sysemu/runstate.h"
95259824 174+#include "block/block.h"
95259824 175+#include "sysemu/block-backend.h"
53e83913
WB
176+#include "qapi/error.h"
177+#include "qapi/qmp/qerror.h"
178+#include "qapi/qmp/qdict.h"
179+#include "qapi/qapi-commands-migration.h"
180+#include "qapi/qapi-commands-misc.h"
0775f12b 181+#include "qapi/qapi-commands-block.h"
95259824 182+#include "qemu/cutils.h"
817b7667 183+#include "qemu/timer.h"
6402d961
TL
184+#include "qemu/main-loop.h"
185+#include "qemu/rcu.h"
db5d2a4b 186+#include "qemu/yank.h"
95259824
WB
187+
188+/* #define DEBUG_SAVEVM_STATE */
189+
190+#ifdef DEBUG_SAVEVM_STATE
191+#define DPRINTF(fmt, ...) \
192+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
193+#else
194+#define DPRINTF(fmt, ...) \
195+ do { } while (0)
196+#endif
197+
198+enum {
199+ SAVE_STATE_DONE,
200+ SAVE_STATE_ERROR,
201+ SAVE_STATE_ACTIVE,
202+ SAVE_STATE_COMPLETED,
203+ SAVE_STATE_CANCELLED
204+};
205+
206+
207+static struct SnapshotState {
67af0fa4 208+ BlockBackend *target;
95259824
WB
209+ size_t bs_pos;
210+ int state;
211+ Error *error;
212+ Error *blocker;
213+ int saved_vm_running;
214+ QEMUFile *file;
215+ int64_t total_time;
d7f4e01a
TL
216+ QEMUBH *finalize_bh;
217+ Coroutine *co;
563c5928 218+ QemuCoSleep target_close_wait;
95259824
WB
219+} snap_state;
220+
817b7667
SR
221+static bool savevm_aborted(void)
222+{
223+ return snap_state.state == SAVE_STATE_CANCELLED ||
224+ snap_state.state == SAVE_STATE_ERROR;
225+}
226+
95259824
WB
227+SaveVMInfo *qmp_query_savevm(Error **errp)
228+{
229+ SaveVMInfo *info = g_malloc0(sizeof(*info));
230+ struct SnapshotState *s = &snap_state;
231+
232+ if (s->state != SAVE_STATE_DONE) {
233+ info->has_bytes = true;
234+ info->bytes = s->bs_pos;
235+ switch (s->state) {
236+ case SAVE_STATE_ERROR:
95259824
WB
237+ info->status = g_strdup("failed");
238+ info->has_total_time = true;
239+ info->total_time = s->total_time;
240+ if (s->error) {
95259824
WB
241+ info->error = g_strdup(error_get_pretty(s->error));
242+ }
243+ break;
244+ case SAVE_STATE_ACTIVE:
95259824
WB
245+ info->status = g_strdup("active");
246+ info->has_total_time = true;
247+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
248+ - s->total_time;
249+ break;
250+ case SAVE_STATE_COMPLETED:
95259824
WB
251+ info->status = g_strdup("completed");
252+ info->has_total_time = true;
253+ info->total_time = s->total_time;
254+ break;
255+ }
256+ }
257+
258+ return info;
259+}
260+
261+static int save_snapshot_cleanup(void)
262+{
263+ int ret = 0;
264+
265+ DPRINTF("save_snapshot_cleanup\n");
266+
267+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
268+ snap_state.total_time;
269+
270+ if (snap_state.file) {
271+ ret = qemu_fclose(snap_state.file);
5b15e2ec 272+ snap_state.file = NULL;
95259824
WB
273+ }
274+
67af0fa4 275+ if (snap_state.target) {
817b7667
SR
276+ if (!savevm_aborted()) {
277+ /* try to truncate, but ignore errors (will fail on block devices).
278+ * note1: bdrv_read() need whole blocks, so we need to round up
279+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
280+ */
281+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
282+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
283+ }
67af0fa4 284+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
95259824
WB
285+ error_free(snap_state.blocker);
286+ snap_state.blocker = NULL;
67af0fa4
WB
287+ blk_unref(snap_state.target);
288+ snap_state.target = NULL;
817b7667 289+
563c5928 290+ qemu_co_sleep_wake(&snap_state.target_close_wait);
95259824
WB
291+ }
292+
293+ return ret;
294+}
295+
296+static void save_snapshot_error(const char *fmt, ...)
297+{
298+ va_list ap;
299+ char *msg;
300+
301+ va_start(ap, fmt);
302+ msg = g_strdup_vprintf(fmt, ap);
303+ va_end(ap);
304+
305+ DPRINTF("save_snapshot_error: %s\n", msg);
306+
307+ if (!snap_state.error) {
308+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
309+ }
310+
311+ g_free (msg);
312+
313+ snap_state.state = SAVE_STATE_ERROR;
95259824
WB
314+}
315+
d7f4e01a 316+static void process_savevm_finalize(void *opaque)
0775f12b
WB
317+{
318+ int ret;
d7f4e01a
TL
319+ AioContext *iohandler_ctx = iohandler_get_aio_context();
320+ MigrationState *ms = migrate_get_current();
321+
817b7667
SR
322+ bool aborted = savevm_aborted();
323+
d7f4e01a
TL
324+#ifdef DEBUG_SAVEVM_STATE
325+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
326+#endif
327+
328+ qemu_bh_delete(snap_state.finalize_bh);
329+ snap_state.finalize_bh = NULL;
330+ snap_state.co = NULL;
331+
332+ /* We need to own the target bdrv's context for the following functions,
333+ * so move it back. It can stay in the main context and live out its live
334+ * there, since we're done with it after this method ends anyway.
335+ */
336+ aio_context_acquire(iohandler_ctx);
337+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
338+ aio_context_release(iohandler_ctx);
339+
340+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
341+ if (ret < 0) {
342+ save_snapshot_error("vm_stop_force_state error %d", ret);
343+ }
344+
817b7667
SR
345+ if (!aborted) {
346+ /* skip state saving if we aborted, snapshot will be invalid anyway */
347+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
348+ ret = qemu_file_get_error(snap_state.file);
349+ if (ret < 0) {
a0208150 350+ save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
817b7667 351+ }
d7f4e01a
TL
352+ }
353+
354+ DPRINTF("state saving complete\n");
355+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
356+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
357+
358+ /* clear migration state */
359+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
817b7667 360+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
d7f4e01a
TL
361+ ms->to_dst_file = NULL;
362+
363+ qemu_savevm_state_cleanup();
364+
0775f12b
WB
365+ ret = save_snapshot_cleanup();
366+ if (ret < 0) {
367+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
368+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
369+ snap_state.state = SAVE_STATE_COMPLETED;
817b7667 370+ } else if (aborted) {
1976ca46
FE
371+ /*
372+ * If there was an error, there's no need to set a new one here.
373+ * If the snapshot was canceled, leave setting the state to
374+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
375+ */
0775f12b
WB
376+ } else {
377+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
378+ snap_state.state);
95259824 379+ }
0775f12b
WB
380+ if (snap_state.saved_vm_running) {
381+ vm_start();
382+ snap_state.saved_vm_running = false;
95259824 383+ }
d7f4e01a
TL
384+
385+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
386+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
95259824
WB
387+}
388+
d7f4e01a 389+static void coroutine_fn process_savevm_co(void *opaque)
95259824
WB
390+{
391+ int ret;
392+ int64_t maxlen;
d7f4e01a
TL
393+ BdrvNextIterator it;
394+ BlockDriverState *bs = NULL;
95259824 395+
d7f4e01a
TL
396+#ifdef DEBUG_SAVEVM_STATE
397+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
398+#endif
95259824 399+
6838f038 400+ ret = qemu_file_get_error(snap_state.file);
95259824 401+ if (ret < 0) {
6838f038 402+ save_snapshot_error("qemu_savevm_state_setup failed");
d7f4e01a 403+ return;
95259824
WB
404+ }
405+
406+ while (snap_state.state == SAVE_STATE_ACTIVE) {
bf251437 407+ uint64_t pending_size, pend_precopy, pend_postcopy;
db5d2a4b 408+ uint64_t threshold = 400 * 1000;
95259824 409+
db5d2a4b
FE
410+ /*
411+ * pending_{estimate,exact} are expected to be called without iothread
412+ * lock. Similar to what is done in migration.c, call the exact variant
413+ * only once pend_precopy in the estimate is below the threshold.
414+ */
e9b36665 415+ qemu_mutex_unlock_iothread();
db5d2a4b
FE
416+ qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
417+ if (pend_precopy <= threshold) {
418+ qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
419+ }
e9b36665 420+ qemu_mutex_lock_iothread();
bf251437 421+ pending_size = pend_precopy + pend_postcopy;
95259824 422+
eee064d9
FE
423+ /*
424+ * A guest reaching this cutoff is dirtying lots of RAM. It should be
425+ * large enough so that the guest can't dirty this much between the
426+ * check and the guest actually being stopped, but it should be small
427+ * enough to avoid long downtimes for non-hibernation snapshots.
428+ */
429+ maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
0775f12b 430+
8051a24b 431+ /* Note that there is no progress for pend_postcopy when iterating */
db5d2a4b 432+ if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
0775f12b
WB
433+ ret = qemu_savevm_state_iterate(snap_state.file, false);
434+ if (ret < 0) {
435+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
436+ break;
437+ }
d7f4e01a 438+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
95259824 439+ } else {
b855dce7 440+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
0775f12b
WB
441+ ret = global_state_store();
442+ if (ret) {
443+ save_snapshot_error("global_state_store error %d", ret);
95259824 444+ break;
0775f12b 445+ }
d7f4e01a
TL
446+
447+ DPRINTF("savevm iterate complete\n");
95259824
WB
448+ break;
449+ }
95259824
WB
450+ }
451+
d7f4e01a
TL
452+ DPRINTF("timing: process_savevm_co took %ld ms\n",
453+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
454+
455+#ifdef DEBUG_SAVEVM_STATE
456+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
457+#endif
458+ /* If a drive runs in an IOThread we can flush it async, and only
459+ * need to sync-flush whatever IO happens between now and
460+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
461+ * so move there now and after every flush.
462+ */
463+ aio_co_reschedule_self(qemu_get_aio_context());
464+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
465+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
466+ if (bs == blk_bs(snap_state.target)) {
467+ continue;
468+ }
469+
470+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
471+ if (bs_ctx != qemu_get_aio_context()) {
472+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
473+ aio_co_reschedule_self(bs_ctx);
bf251437 474+ bdrv_graph_co_rdlock();
d7f4e01a 475+ bdrv_flush(bs);
bf251437 476+ bdrv_graph_co_rdunlock();
d7f4e01a
TL
477+ aio_co_reschedule_self(qemu_get_aio_context());
478+ }
479+ }
480+
481+ DPRINTF("timing: async flushing took %ld ms\n",
482+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
95259824 483+
d7f4e01a 484+ qemu_bh_schedule(snap_state.finalize_bh);
95259824
WB
485+}
486+
bf251437 487+void qmp_savevm_start(const char *statefile, Error **errp)
95259824 488+{
95259824 489+ Error *local_err = NULL;
d7f4e01a
TL
490+ MigrationState *ms = migrate_get_current();
491+ AioContext *iohandler_ctx = iohandler_get_aio_context();
95259824 492+
67af0fa4 493+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
95259824
WB
494+
495+ if (snap_state.state != SAVE_STATE_DONE) {
496+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
497+ "VM snapshot already started\n");
498+ return;
499+ }
500+
d7f4e01a
TL
501+ if (migration_is_running(ms->state)) {
502+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
503+ return;
504+ }
505+
506+ if (migrate_use_block()) {
507+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
508+ "Block migration and snapshots are incompatible");
509+ return;
510+ }
511+
95259824
WB
512+ /* initialize snapshot info */
513+ snap_state.saved_vm_running = runstate_is_running();
514+ snap_state.bs_pos = 0;
515+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
516+ snap_state.blocker = NULL;
a262e964 517+ snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
95259824
WB
518+
519+ if (snap_state.error) {
520+ error_free(snap_state.error);
521+ snap_state.error = NULL;
522+ }
523+
bf251437 524+ if (!statefile) {
95259824
WB
525+ vm_stop(RUN_STATE_SAVE_VM);
526+ snap_state.state = SAVE_STATE_COMPLETED;
527+ return;
528+ }
529+
530+ if (qemu_savevm_state_blocked(errp)) {
531+ return;
532+ }
533+
534+ /* Open the image */
95259824
WB
535+ QDict *options = NULL;
536+ options = qdict_new();
53e83913 537+ qdict_put_str(options, "driver", "raw");
67af0fa4
WB
538+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
539+ if (!snap_state.target) {
95259824
WB
540+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
541+ goto restart;
542+ }
543+
5b15e2ec
FE
544+ QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
545+ &snap_state.bs_pos));
546+ snap_state.file = qemu_file_new_output(ioc);
95259824
WB
547+
548+ if (!snap_state.file) {
549+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
550+ goto restart;
551+ }
552+
d7f4e01a
TL
553+ /*
554+ * qemu_savevm_* paths use migration code and expect a migration state.
555+ * State is cleared in process_savevm_co, but has to be initialized
556+ * here (blocking main thread, from QMP) to avoid race conditions.
557+ */
558+ migrate_init(ms);
559+ memset(&ram_counters, 0, sizeof(ram_counters));
db5d2a4b 560+ memset(&compression_counters, 0, sizeof(compression_counters));
d7f4e01a 561+ ms->to_dst_file = snap_state.file;
95259824
WB
562+
563+ error_setg(&snap_state.blocker, "block device is in use by savevm");
67af0fa4 564+ blk_op_block_all(snap_state.target, snap_state.blocker);
95259824 565+
0775f12b 566+ snap_state.state = SAVE_STATE_ACTIVE;
d7f4e01a
TL
567+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
568+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
569+ qemu_mutex_unlock_iothread();
570+ qemu_savevm_state_header(snap_state.file);
571+ qemu_savevm_state_setup(snap_state.file);
572+ qemu_mutex_lock_iothread();
573+
574+ /* Async processing from here on out happens in iohandler context, so let
575+ * the target bdrv have its home there.
576+ */
577+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
578+
579+ aio_co_schedule(iohandler_ctx, snap_state.co);
95259824
WB
580+
581+ return;
582+
583+restart:
584+
585+ save_snapshot_error("setup failed");
586+
587+ if (snap_state.saved_vm_running) {
588+ vm_start();
817b7667 589+ snap_state.saved_vm_running = false;
95259824
WB
590+ }
591+}
592+
817b7667 593+void coroutine_fn qmp_savevm_end(Error **errp)
95259824 594+{
817b7667
SR
595+ int64_t timeout;
596+
95259824
WB
597+ if (snap_state.state == SAVE_STATE_DONE) {
598+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
599+ "VM snapshot not started\n");
600+ return;
601+ }
602+
603+ if (snap_state.state == SAVE_STATE_ACTIVE) {
604+ snap_state.state = SAVE_STATE_CANCELLED;
817b7667 605+ goto wait_for_close;
95259824
WB
606+ }
607+
608+ if (snap_state.saved_vm_running) {
609+ vm_start();
817b7667 610+ snap_state.saved_vm_running = false;
95259824
WB
611+ }
612+
613+ snap_state.state = SAVE_STATE_DONE;
817b7667
SR
614+
615+wait_for_close:
616+ if (!snap_state.target) {
617+ DPRINTF("savevm-end: no target file open\n");
618+ return;
619+ }
620+
621+ /* wait until cleanup is done before returning, this ensures that after this
622+ * call exits the statefile will be closed and can be removed immediately */
623+ DPRINTF("savevm-end: waiting for cleanup\n");
624+ timeout = 30L * 1000 * 1000 * 1000;
563c5928 625+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
f376b2b9 626+ QEMU_CLOCK_REALTIME, timeout);
817b7667
SR
627+ if (snap_state.target) {
628+ save_snapshot_error("timeout waiting for target file close in "
629+ "qmp_savevm_end");
630+ /* we cannot assume the snapshot finished in this case, so leave the
631+ * state alone - caller has to figure something out */
632+ return;
633+ }
634+
1976ca46
FE
635+ // File closed and no other error, so ensure next snapshot can be started.
636+ if (snap_state.state != SAVE_STATE_ERROR) {
637+ snap_state.state = SAVE_STATE_DONE;
638+ }
639+
817b7667 640+ DPRINTF("savevm-end: cleanup done\n");
95259824
WB
641+}
642+
0775f12b 643+// FIXME: Deprecated
95259824
WB
644+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
645+{
0775f12b
WB
646+ // Compatibility to older qemu-server.
647+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
95259824
WB
648+}
649+
0775f12b 650+// FIXME: Deprecated
95259824
WB
651+void qmp_delete_drive_snapshot(const char *device, const char *name,
652+ Error **errp)
653+{
0775f12b 654+ // Compatibility to older qemu-server.
bf251437 655+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, NULL, name, errp);
95259824
WB
656+}
657+
6838f038 658+int load_snapshot_from_blockdev(const char *filename, Error **errp)
95259824 659+{
67af0fa4 660+ BlockBackend *be;
95259824
WB
661+ Error *local_err = NULL;
662+ Error *blocker = NULL;
663+
664+ QEMUFile *f;
5b15e2ec 665+ size_t bs_pos = 0;
67af0fa4 666+ int ret = -EINVAL;
95259824 667+
67af0fa4 668+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
95259824 669+
67af0fa4 670+ if (!be) {
6838f038 671+ error_setg(errp, "Could not open VM state file");
95259824
WB
672+ goto the_end;
673+ }
674+
67af0fa4
WB
675+ error_setg(&blocker, "block device is in use by load state");
676+ blk_op_block_all(be, blocker);
677+
95259824 678+ /* restore the VM state */
5b15e2ec 679+ f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
95259824 680+ if (!f) {
6838f038 681+ error_setg(errp, "Could not open VM state file");
95259824
WB
682+ goto the_end;
683+ }
684+
6838f038 685+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
95259824
WB
686+ ret = qemu_loadvm_state(f);
687+
e9b36665
SR
688+ /* dirty bitmap migration has a special case we need to trigger manually */
689+ dirty_bitmap_mig_before_vm_start();
690+
95259824 691+ qemu_fclose(f);
db5d2a4b
FE
692+
693+ /* state_destroy assumes a real migration which would have added a yank */
694+ yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
695+
95259824
WB
696+ migration_incoming_state_destroy();
697+ if (ret < 0) {
6838f038 698+ error_setg_errno(errp, -ret, "Error while loading VM state");
95259824
WB
699+ goto the_end;
700+ }
701+
702+ ret = 0;
703+
704+ the_end:
67af0fa4
WB
705+ if (be) {
706+ blk_op_unblock_all(be, blocker);
95259824 707+ error_free(blocker);
67af0fa4 708+ blk_unref(be);
95259824
WB
709+ }
710+ return ret;
711+}
817b7667 712diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
bf251437 713index 6c559b48c8..435f9334f9 100644
817b7667
SR
714--- a/monitor/hmp-cmds.c
715+++ b/monitor/hmp-cmds.c
bf251437
FE
716@@ -22,6 +22,7 @@
717 #include "monitor/monitor-internal.h"
718 #include "qapi/error.h"
719 #include "qapi/qapi-commands-control.h"
720+#include "qapi/qapi-commands-migration.h"
721 #include "qapi/qapi-commands-misc.h"
722 #include "qapi/qmp/qdict.h"
723 #include "qapi/qmp/qerror.h"
724@@ -443,3 +444,60 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
817b7667 725
bf251437
FE
726 mtree_info(flatview, dispatch_tree, owner, disabled);
727 }
728+
817b7667
SR
729+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
730+{
731+ Error *errp = NULL;
732+ const char *statefile = qdict_get_try_str(qdict, "statefile");
733+
bf251437 734+ qmp_savevm_start(statefile, &errp);
817b7667
SR
735+ hmp_handle_error(mon, errp);
736+}
737+
738+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
739+{
740+ Error *errp = NULL;
741+ const char *name = qdict_get_str(qdict, "name");
742+ const char *device = qdict_get_str(qdict, "device");
743+
744+ qmp_snapshot_drive(device, name, &errp);
745+ hmp_handle_error(mon, errp);
746+}
747+
748+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
749+{
750+ Error *errp = NULL;
751+ const char *name = qdict_get_str(qdict, "name");
752+ const char *device = qdict_get_str(qdict, "device");
753+
754+ qmp_delete_drive_snapshot(device, name, &errp);
755+ hmp_handle_error(mon, errp);
756+}
757+
758+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
759+{
760+ Error *errp = NULL;
761+
762+ qmp_savevm_end(&errp);
763+ hmp_handle_error(mon, errp);
764+}
765+
766+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
767+{
768+ SaveVMInfo *info;
769+ info = qmp_query_savevm(NULL);
770+
bf251437 771+ if (info->status) {
817b7667
SR
772+ monitor_printf(mon, "savevm status: %s\n", info->status);
773+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
774+ info->total_time);
775+ } else {
776+ monitor_printf(mon, "savevm status: not running\n");
777+ }
778+ if (info->has_bytes) {
779+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
780+ }
bf251437 781+ if (info->error) {
817b7667
SR
782+ monitor_printf(mon, "Error: %s\n", info->error);
783+ }
784+}
817b7667 785diff --git a/qapi/migration.json b/qapi/migration.json
bf251437 786index c84fa10e86..1702b92553 100644
817b7667
SR
787--- a/qapi/migration.json
788+++ b/qapi/migration.json
5b15e2ec 789@@ -261,6 +261,40 @@
817b7667
SR
790 '*compression': 'CompressionStats',
791 '*socket-address': ['SocketAddress'] } }
792
793+##
794+# @SaveVMInfo:
795+#
796+# Information about current migration process.
797+#
798+# @status: string describing the current savevm status.
799+# This can be 'active', 'completed', 'failed'.
800+# If this field is not returned, no savevm process
801+# has been initiated
802+#
803+# @error: string containing error message is status is failed.
804+#
805+# @total-time: total amount of milliseconds since savevm started.
806+# If savevm has ended, it returns the total save time
807+#
808+# @bytes: total amount of data transfered
809+#
810+# Since: 1.3
811+##
812+{ 'struct': 'SaveVMInfo',
813+ 'data': {'*status': 'str', '*error': 'str',
814+ '*total-time': 'int', '*bytes': 'int'} }
815+
816+##
817+# @query-savevm:
818+#
819+# Returns information about current savevm process.
820+#
821+# Returns: @SaveVMInfo
822+#
823+# Since: 1.3
824+##
825+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
826+
827 ##
828 # @query-migrate:
829 #
830diff --git a/qapi/misc.json b/qapi/misc.json
bf251437 831index 6ddd16ea28..098c9bbe93 100644
817b7667
SR
832--- a/qapi/misc.json
833+++ b/qapi/misc.json
bf251437 834@@ -469,6 +469,38 @@
817b7667
SR
835 ##
836 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
837
838+##
839+# @savevm-start:
840+#
841+# Prepare for snapshot and halt VM. Save VM state to statefile.
842+#
843+##
844+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
845+
846+##
847+# @snapshot-drive:
848+#
849+# Create an internal drive snapshot.
850+#
851+##
852+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
853+
854+##
855+# @delete-drive-snapshot:
856+#
857+# Delete a drive snapshot.
858+#
859+##
860+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
861+
862+##
863+# @savevm-end:
864+#
865+# Resume VM after a snapshot.
866+#
867+##
868+{ 'command': 'savevm-end', 'coroutine': true }
869+
870 ##
871 # @CommandLineParameterType:
872 #
873diff --git a/qemu-options.hx b/qemu-options.hx
bf251437 874index 59bdf67a2c..fc6cb23dd9 100644
817b7667
SR
875--- a/qemu-options.hx
876+++ b/qemu-options.hx
bf251437 877@@ -4378,6 +4378,18 @@ SRST
817b7667
SR
878 Start right away with a saved state (``loadvm`` in monitor)
879 ERST
880
881+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
882+ "-loadstate file\n" \
883+ " start right away with a saved state\n",
884+ QEMU_ARCH_ALL)
885+SRST
886+``-loadstate file``
887+ Start right away with a saved state. This option does not rollback
888+ disk state like @code{loadvm}, so user must make sure that disk
889+ have correct state. @var{file} can be any valid device URL. See the section
890+ for "Device URL Syntax" for more information.
891+ERST
892+
893 #ifndef _WIN32
894 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
895 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
83faa3fe 896diff --git a/softmmu/vl.c b/softmmu/vl.c
bf251437 897index ea20b23e4c..0eabc71b68 100644
83faa3fe
TL
898--- a/softmmu/vl.c
899+++ b/softmmu/vl.c
d03e1b3c 900@@ -164,6 +164,7 @@ static const char *accelerators;
5b15e2ec
FE
901 static bool have_custom_ram_size;
902 static const char *ram_memdev_id;
f376b2b9 903 static QDict *machine_opts_dict;
8dca018b
SR
904+static const char *loadstate;
905 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
4567474e 906 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
5b15e2ec 907 static int display_remote;
bf251437 908@@ -2612,6 +2613,12 @@ void qmp_x_exit_preconfig(Error **errp)
4567474e
FE
909
910 if (loadvm) {
911 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
95259824 912+ } else if (loadstate) {
6838f038
WB
913+ Error *local_err = NULL;
914+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
915+ error_report_err(local_err);
95259824
WB
916+ autostart = 0;
917+ }
918 }
b855dce7
TL
919 if (replay_mode != REPLAY_MODE_NONE) {
920 replay_vmstate_init();
bf251437 921@@ -3159,6 +3166,9 @@ void qemu_init(int argc, char **argv)
8dca018b
SR
922 case QEMU_OPTION_loadvm:
923 loadvm = optarg;
924 break;
925+ case QEMU_OPTION_loadstate:
926+ loadstate = optarg;
927+ break;
928 case QEMU_OPTION_full_screen:
929 dpy.has_full_screen = true;
930 dpy.full_screen = true;