]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0016-PVE-add-savevm-async-for-background-state-snapshots.patch
savevm-async: set SAVE_STATE_DONE when closing state file was successful
[pve-qemu.git] / debian / patches / pve / 0016-PVE-add-savevm-async-for-background-state-snapshots.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 3Date: Mon, 6 Apr 2020 12:16:46 +0200
817b7667 4Subject: [PATCH] PVE: add savevm-async for background state snapshots
95259824 5
d7f4e01a
TL
6Put qemu_savevm_state_{header,setup} into the main loop and the rest
7of the iteration into a coroutine. The former need to lock the
8iothread (and we can't unlock it in the coroutine), and the latter
9can't deal with being in a separate thread, so a coroutine it must
10be.
11
817b7667
SR
12Truncate output file at 1024 boundary.
13
14Do not block the VM and save the state on aborting a snapshot, as the
15snapshot will be invalid anyway.
16
17Also, when aborting, wait for the target file to be closed, otherwise a
18client might run into race-conditions when trying to remove the file
19still opened by QEMU.
20
b855dce7 21Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
6402d961 22Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 23Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
817b7667
SR
24[improve aborting]
25Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
563c5928
FE
26[FE: further improve aborting]
27Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
95259824 28---
b855dce7 29 hmp-commands-info.hx | 13 +
817b7667 30 hmp-commands.hx | 33 ++
8dca018b 31 include/migration/snapshot.h | 2 +
be901f66 32 include/monitor/hmp.h | 5 +
817b7667 33 migration/meson.build | 1 +
1976ca46 34 migration/savevm-async.c | 604 +++++++++++++++++++++++++++++++++++
d7f4e01a 35 monitor/hmp-cmds.c | 57 ++++
817b7667
SR
36 qapi/migration.json | 34 ++
37 qapi/misc.json | 32 ++
83faa3fe 38 qemu-options.hx | 12 +
83faa3fe 39 softmmu/vl.c | 10 +
1976ca46 40 11 files changed, 803 insertions(+)
817b7667 41 create mode 100644 migration/savevm-async.c
95259824 42
95259824 43diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
dc9827a6 44index adfa085a9b..925e680e5a 100644
95259824
WB
45--- a/hmp-commands-info.hx
46+++ b/hmp-commands-info.hx
dc9827a6 47@@ -538,6 +538,19 @@ SRST
8dca018b 48 Show current migration parameters.
83faa3fe
TL
49 ERST
50
b855dce7 51+ {
95259824
WB
52+ .name = "savevm",
53+ .args_type = "",
54+ .params = "",
55+ .help = "show savevm status",
a544966d 56+ .cmd = hmp_info_savevm,
95259824
WB
57+ },
58+
83faa3fe
TL
59+SRST
60+ ``info savevm``
61+ Show savevm status.
62+ERST
63+
b855dce7 64 {
83faa3fe
TL
65 .name = "balloon",
66 .args_type = "",
95259824 67diff --git a/hmp-commands.hx b/hmp-commands.hx
dc9827a6 68index 8476277aa9..7f0ac498c4 100644
95259824
WB
69--- a/hmp-commands.hx
70+++ b/hmp-commands.hx
4567474e
FE
71@@ -1746,3 +1746,36 @@ ERST
72 "\n\t\t\t -b to specify dirty bitmap as method of calculation)",
f376b2b9 73 .cmd = hmp_calc_dirty_rate,
83faa3fe 74 },
95259824
WB
75+
76+ {
77+ .name = "savevm-start",
78+ .args_type = "statefile:s?",
79+ .params = "[statefile]",
80+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
a544966d 81+ .cmd = hmp_savevm_start,
95259824
WB
82+ },
83+
84+ {
85+ .name = "snapshot-drive",
86+ .args_type = "device:s,name:s",
87+ .params = "device name",
88+ .help = "Create internal snapshot.",
a544966d 89+ .cmd = hmp_snapshot_drive,
95259824
WB
90+ },
91+
92+ {
93+ .name = "delete-drive-snapshot",
94+ .args_type = "device:s,name:s",
95+ .params = "device name",
96+ .help = "Delete internal snapshot.",
a544966d 97+ .cmd = hmp_delete_drive_snapshot,
95259824
WB
98+ },
99+
100+ {
101+ .name = "savevm-end",
102+ .args_type = "",
103+ .params = "",
104+ .help = "Resume VM after snaphot.",
817b7667
SR
105+ .cmd = hmp_savevm_end,
106+ .coroutine = true,
95259824 107+ },
be901f66 108diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
8dca018b 109index e72083b117..c846d37806 100644
be901f66
SR
110--- a/include/migration/snapshot.h
111+++ b/include/migration/snapshot.h
8dca018b
SR
112@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
113 bool has_devices, strList *devices,
114 Error **errp);
be901f66 115
be901f66 116+int load_snapshot_from_blockdev(const char *filename, Error **errp);
8dca018b 117+
be901f66
SR
118 #endif
119diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
4567474e 120index 96d014826a..3a39ba41b5 100644
be901f66
SR
121--- a/include/monitor/hmp.h
122+++ b/include/monitor/hmp.h
4567474e 123@@ -26,6 +26,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
be901f66
SR
124 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
125 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
126 void hmp_info_mice(Monitor *mon, const QDict *qdict);
127+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
128 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
129 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
130 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
4567474e 131@@ -80,6 +81,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
be901f66
SR
132 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
133 void hmp_getfd(Monitor *mon, const QDict *qdict);
134 void hmp_closefd(Monitor *mon, const QDict *qdict);
135+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
136+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
137+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
138+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
139 void hmp_sendkey(Monitor *mon, const QDict *qdict);
140 void hmp_screendump(Monitor *mon, const QDict *qdict);
83faa3fe 141 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
817b7667 142diff --git a/migration/meson.build b/migration/meson.build
dc9827a6 143index 8b5ca5c047..1e2aec8486 100644
817b7667
SR
144--- a/migration/meson.build
145+++ b/migration/meson.build
146@@ -23,6 +23,7 @@ softmmu_ss.add(files(
147 'multifd-zlib.c',
148 'postcopy-ram.c',
149 'savevm.c',
150+ 'savevm-async.c',
151 'socket.c',
152 'tls.c',
8dca018b 153 ), gnutls)
817b7667 154diff --git a/migration/savevm-async.c b/migration/savevm-async.c
95259824 155new file mode 100644
1976ca46 156index 0000000000..b9a43c56bc
95259824 157--- /dev/null
817b7667 158+++ b/migration/savevm-async.c
1976ca46 159@@ -0,0 +1,604 @@
95259824 160+#include "qemu/osdep.h"
6838f038
WB
161+#include "migration/migration.h"
162+#include "migration/savevm.h"
163+#include "migration/snapshot.h"
164+#include "migration/global_state.h"
165+#include "migration/ram.h"
166+#include "migration/qemu-file.h"
95259824 167+#include "sysemu/sysemu.h"
6402d961 168+#include "sysemu/runstate.h"
95259824 169+#include "block/block.h"
95259824 170+#include "sysemu/block-backend.h"
53e83913
WB
171+#include "qapi/error.h"
172+#include "qapi/qmp/qerror.h"
173+#include "qapi/qmp/qdict.h"
174+#include "qapi/qapi-commands-migration.h"
175+#include "qapi/qapi-commands-misc.h"
0775f12b 176+#include "qapi/qapi-commands-block.h"
95259824 177+#include "qemu/cutils.h"
817b7667 178+#include "qemu/timer.h"
6402d961
TL
179+#include "qemu/main-loop.h"
180+#include "qemu/rcu.h"
95259824
WB
181+
182+/* #define DEBUG_SAVEVM_STATE */
183+
0775f12b
WB
184+/* used while emulated sync operation in progress */
185+#define NOT_DONE -EINPROGRESS
67af0fa4 186+
95259824
WB
187+#ifdef DEBUG_SAVEVM_STATE
188+#define DPRINTF(fmt, ...) \
189+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
190+#else
191+#define DPRINTF(fmt, ...) \
192+ do { } while (0)
193+#endif
194+
195+enum {
196+ SAVE_STATE_DONE,
197+ SAVE_STATE_ERROR,
198+ SAVE_STATE_ACTIVE,
199+ SAVE_STATE_COMPLETED,
200+ SAVE_STATE_CANCELLED
201+};
202+
203+
204+static struct SnapshotState {
67af0fa4 205+ BlockBackend *target;
95259824
WB
206+ size_t bs_pos;
207+ int state;
208+ Error *error;
209+ Error *blocker;
210+ int saved_vm_running;
211+ QEMUFile *file;
212+ int64_t total_time;
d7f4e01a
TL
213+ QEMUBH *finalize_bh;
214+ Coroutine *co;
563c5928 215+ QemuCoSleep target_close_wait;
95259824
WB
216+} snap_state;
217+
817b7667
SR
218+static bool savevm_aborted(void)
219+{
220+ return snap_state.state == SAVE_STATE_CANCELLED ||
221+ snap_state.state == SAVE_STATE_ERROR;
222+}
223+
95259824
WB
224+SaveVMInfo *qmp_query_savevm(Error **errp)
225+{
226+ SaveVMInfo *info = g_malloc0(sizeof(*info));
227+ struct SnapshotState *s = &snap_state;
228+
229+ if (s->state != SAVE_STATE_DONE) {
230+ info->has_bytes = true;
231+ info->bytes = s->bs_pos;
232+ switch (s->state) {
233+ case SAVE_STATE_ERROR:
234+ info->has_status = true;
235+ info->status = g_strdup("failed");
236+ info->has_total_time = true;
237+ info->total_time = s->total_time;
238+ if (s->error) {
239+ info->has_error = true;
240+ info->error = g_strdup(error_get_pretty(s->error));
241+ }
242+ break;
243+ case SAVE_STATE_ACTIVE:
244+ info->has_status = true;
245+ info->status = g_strdup("active");
246+ info->has_total_time = true;
247+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
248+ - s->total_time;
249+ break;
250+ case SAVE_STATE_COMPLETED:
251+ info->has_status = true;
252+ info->status = g_strdup("completed");
253+ info->has_total_time = true;
254+ info->total_time = s->total_time;
255+ break;
256+ }
257+ }
258+
259+ return info;
260+}
261+
262+static int save_snapshot_cleanup(void)
263+{
264+ int ret = 0;
265+
266+ DPRINTF("save_snapshot_cleanup\n");
267+
268+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
269+ snap_state.total_time;
270+
271+ if (snap_state.file) {
272+ ret = qemu_fclose(snap_state.file);
273+ }
274+
67af0fa4 275+ if (snap_state.target) {
817b7667
SR
276+ if (!savevm_aborted()) {
277+ /* try to truncate, but ignore errors (will fail on block devices).
278+ * note1: bdrv_read() need whole blocks, so we need to round up
279+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
280+ */
281+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
282+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
283+ }
67af0fa4 284+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
95259824
WB
285+ error_free(snap_state.blocker);
286+ snap_state.blocker = NULL;
67af0fa4
WB
287+ blk_unref(snap_state.target);
288+ snap_state.target = NULL;
817b7667 289+
563c5928 290+ qemu_co_sleep_wake(&snap_state.target_close_wait);
95259824
WB
291+ }
292+
293+ return ret;
294+}
295+
296+static void save_snapshot_error(const char *fmt, ...)
297+{
298+ va_list ap;
299+ char *msg;
300+
301+ va_start(ap, fmt);
302+ msg = g_strdup_vprintf(fmt, ap);
303+ va_end(ap);
304+
305+ DPRINTF("save_snapshot_error: %s\n", msg);
306+
307+ if (!snap_state.error) {
308+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
309+ }
310+
311+ g_free (msg);
312+
313+ snap_state.state = SAVE_STATE_ERROR;
95259824
WB
314+}
315+
6402d961 316+static int block_state_close(void *opaque, Error **errp)
95259824
WB
317+{
318+ snap_state.file = NULL;
67af0fa4 319+ return blk_flush(snap_state.target);
95259824
WB
320+}
321+
0775f12b
WB
322+typedef struct BlkRwCo {
323+ int64_t offset;
324+ QEMUIOVector *qiov;
325+ ssize_t ret;
326+} BlkRwCo;
327+
328+static void coroutine_fn block_state_write_entry(void *opaque) {
329+ BlkRwCo *rwco = opaque;
330+ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
331+ rwco->qiov, 0);
d7f4e01a 332+ aio_wait_kick();
0775f12b
WB
333+}
334+
67af0fa4 335+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
6402d961 336+ int iovcnt, int64_t pos, Error **errp)
95259824 337+{
67af0fa4 338+ QEMUIOVector qiov;
0775f12b
WB
339+ BlkRwCo rwco;
340+
341+ assert(pos == snap_state.bs_pos);
342+ rwco = (BlkRwCo) {
343+ .offset = pos,
344+ .qiov = &qiov,
345+ .ret = NOT_DONE,
346+ };
95259824 347+
67af0fa4 348+ qemu_iovec_init_external(&qiov, iov, iovcnt);
0775f12b
WB
349+
350+ if (qemu_in_coroutine()) {
351+ block_state_write_entry(&rwco);
352+ } else {
353+ Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
354+ bdrv_coroutine_enter(blk_bs(snap_state.target), co);
355+ BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
95259824 356+ }
0775f12b
WB
357+ if (rwco.ret < 0) {
358+ return rwco.ret;
359+ }
360+
67af0fa4
WB
361+ snap_state.bs_pos += qiov.size;
362+ return qiov.size;
95259824
WB
363+}
364+
0775f12b
WB
365+static const QEMUFileOps block_file_ops = {
366+ .writev_buffer = block_state_writev_buffer,
367+ .close = block_state_close,
368+};
369+
d7f4e01a 370+static void process_savevm_finalize(void *opaque)
0775f12b
WB
371+{
372+ int ret;
d7f4e01a
TL
373+ AioContext *iohandler_ctx = iohandler_get_aio_context();
374+ MigrationState *ms = migrate_get_current();
375+
817b7667
SR
376+ bool aborted = savevm_aborted();
377+
d7f4e01a
TL
378+#ifdef DEBUG_SAVEVM_STATE
379+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
380+#endif
381+
382+ qemu_bh_delete(snap_state.finalize_bh);
383+ snap_state.finalize_bh = NULL;
384+ snap_state.co = NULL;
385+
386+ /* We need to own the target bdrv's context for the following functions,
387+ * so move it back. It can stay in the main context and live out its live
388+ * there, since we're done with it after this method ends anyway.
389+ */
390+ aio_context_acquire(iohandler_ctx);
391+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
392+ aio_context_release(iohandler_ctx);
393+
394+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
395+ if (ret < 0) {
396+ save_snapshot_error("vm_stop_force_state error %d", ret);
397+ }
398+
817b7667
SR
399+ if (!aborted) {
400+ /* skip state saving if we aborted, snapshot will be invalid anyway */
401+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
402+ ret = qemu_file_get_error(snap_state.file);
403+ if (ret < 0) {
404+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
405+ }
d7f4e01a
TL
406+ }
407+
408+ DPRINTF("state saving complete\n");
409+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
410+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
411+
412+ /* clear migration state */
413+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
817b7667 414+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
d7f4e01a
TL
415+ ms->to_dst_file = NULL;
416+
417+ qemu_savevm_state_cleanup();
418+
0775f12b
WB
419+ ret = save_snapshot_cleanup();
420+ if (ret < 0) {
421+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
422+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
423+ snap_state.state = SAVE_STATE_COMPLETED;
817b7667 424+ } else if (aborted) {
1976ca46
FE
425+ /*
426+ * If there was an error, there's no need to set a new one here.
427+ * If the snapshot was canceled, leave setting the state to
428+ * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
429+ */
0775f12b
WB
430+ } else {
431+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
432+ snap_state.state);
95259824 433+ }
0775f12b
WB
434+ if (snap_state.saved_vm_running) {
435+ vm_start();
436+ snap_state.saved_vm_running = false;
95259824 437+ }
d7f4e01a
TL
438+
439+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
440+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
95259824
WB
441+}
442+
d7f4e01a 443+static void coroutine_fn process_savevm_co(void *opaque)
95259824
WB
444+{
445+ int ret;
446+ int64_t maxlen;
d7f4e01a
TL
447+ BdrvNextIterator it;
448+ BlockDriverState *bs = NULL;
95259824 449+
d7f4e01a
TL
450+#ifdef DEBUG_SAVEVM_STATE
451+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
452+#endif
95259824 453+
6838f038 454+ ret = qemu_file_get_error(snap_state.file);
95259824 455+ if (ret < 0) {
6838f038 456+ save_snapshot_error("qemu_savevm_state_setup failed");
d7f4e01a 457+ return;
95259824
WB
458+ }
459+
460+ while (snap_state.state == SAVE_STATE_ACTIVE) {
0775f12b 461+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
95259824 462+
e9b36665
SR
463+ /* pending is expected to be called without iothread lock */
464+ qemu_mutex_unlock_iothread();
0775f12b 465+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
e9b36665
SR
466+ qemu_mutex_lock_iothread();
467+
0775f12b 468+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
95259824 469+
0775f12b
WB
470+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
471+
472+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
0775f12b
WB
473+ ret = qemu_savevm_state_iterate(snap_state.file, false);
474+ if (ret < 0) {
475+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
476+ break;
477+ }
d7f4e01a 478+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
95259824 479+ } else {
b855dce7 480+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
0775f12b
WB
481+ ret = global_state_store();
482+ if (ret) {
483+ save_snapshot_error("global_state_store error %d", ret);
95259824 484+ break;
0775f12b 485+ }
d7f4e01a
TL
486+
487+ DPRINTF("savevm iterate complete\n");
95259824
WB
488+ break;
489+ }
95259824
WB
490+ }
491+
d7f4e01a
TL
492+ DPRINTF("timing: process_savevm_co took %ld ms\n",
493+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
494+
495+#ifdef DEBUG_SAVEVM_STATE
496+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
497+#endif
498+ /* If a drive runs in an IOThread we can flush it async, and only
499+ * need to sync-flush whatever IO happens between now and
500+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
501+ * so move there now and after every flush.
502+ */
503+ aio_co_reschedule_self(qemu_get_aio_context());
504+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
505+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
506+ if (bs == blk_bs(snap_state.target)) {
507+ continue;
508+ }
509+
510+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
511+ if (bs_ctx != qemu_get_aio_context()) {
512+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
513+ aio_co_reschedule_self(bs_ctx);
514+ bdrv_flush(bs);
515+ aio_co_reschedule_self(qemu_get_aio_context());
516+ }
517+ }
518+
519+ DPRINTF("timing: async flushing took %ld ms\n",
520+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
95259824 521+
d7f4e01a 522+ qemu_bh_schedule(snap_state.finalize_bh);
95259824
WB
523+}
524+
95259824
WB
525+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
526+{
95259824 527+ Error *local_err = NULL;
d7f4e01a
TL
528+ MigrationState *ms = migrate_get_current();
529+ AioContext *iohandler_ctx = iohandler_get_aio_context();
95259824 530+
67af0fa4 531+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
95259824
WB
532+
533+ if (snap_state.state != SAVE_STATE_DONE) {
534+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
535+ "VM snapshot already started\n");
536+ return;
537+ }
538+
d7f4e01a
TL
539+ if (migration_is_running(ms->state)) {
540+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
541+ return;
542+ }
543+
544+ if (migrate_use_block()) {
545+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
546+ "Block migration and snapshots are incompatible");
547+ return;
548+ }
549+
95259824
WB
550+ /* initialize snapshot info */
551+ snap_state.saved_vm_running = runstate_is_running();
552+ snap_state.bs_pos = 0;
553+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
554+ snap_state.blocker = NULL;
563c5928 555+ snap_state.target_close_wait.to_wake = NULL;
95259824
WB
556+
557+ if (snap_state.error) {
558+ error_free(snap_state.error);
559+ snap_state.error = NULL;
560+ }
561+
562+ if (!has_statefile) {
563+ vm_stop(RUN_STATE_SAVE_VM);
564+ snap_state.state = SAVE_STATE_COMPLETED;
565+ return;
566+ }
567+
568+ if (qemu_savevm_state_blocked(errp)) {
569+ return;
570+ }
571+
572+ /* Open the image */
95259824
WB
573+ QDict *options = NULL;
574+ options = qdict_new();
53e83913 575+ qdict_put_str(options, "driver", "raw");
67af0fa4
WB
576+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
577+ if (!snap_state.target) {
95259824
WB
578+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
579+ goto restart;
580+ }
581+
582+ snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
583+
584+ if (!snap_state.file) {
585+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
586+ goto restart;
587+ }
588+
d7f4e01a
TL
589+ /*
590+ * qemu_savevm_* paths use migration code and expect a migration state.
591+ * State is cleared in process_savevm_co, but has to be initialized
592+ * here (blocking main thread, from QMP) to avoid race conditions.
593+ */
594+ migrate_init(ms);
595+ memset(&ram_counters, 0, sizeof(ram_counters));
596+ ms->to_dst_file = snap_state.file;
95259824
WB
597+
598+ error_setg(&snap_state.blocker, "block device is in use by savevm");
67af0fa4 599+ blk_op_block_all(snap_state.target, snap_state.blocker);
95259824 600+
0775f12b 601+ snap_state.state = SAVE_STATE_ACTIVE;
d7f4e01a
TL
602+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
603+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
604+ qemu_mutex_unlock_iothread();
605+ qemu_savevm_state_header(snap_state.file);
606+ qemu_savevm_state_setup(snap_state.file);
607+ qemu_mutex_lock_iothread();
608+
609+ /* Async processing from here on out happens in iohandler context, so let
610+ * the target bdrv have its home there.
611+ */
612+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
613+
614+ aio_co_schedule(iohandler_ctx, snap_state.co);
95259824
WB
615+
616+ return;
617+
618+restart:
619+
620+ save_snapshot_error("setup failed");
621+
622+ if (snap_state.saved_vm_running) {
623+ vm_start();
817b7667 624+ snap_state.saved_vm_running = false;
95259824
WB
625+ }
626+}
627+
817b7667 628+void coroutine_fn qmp_savevm_end(Error **errp)
95259824 629+{
817b7667
SR
630+ int64_t timeout;
631+
95259824
WB
632+ if (snap_state.state == SAVE_STATE_DONE) {
633+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
634+ "VM snapshot not started\n");
635+ return;
636+ }
637+
638+ if (snap_state.state == SAVE_STATE_ACTIVE) {
639+ snap_state.state = SAVE_STATE_CANCELLED;
817b7667 640+ goto wait_for_close;
95259824
WB
641+ }
642+
643+ if (snap_state.saved_vm_running) {
644+ vm_start();
817b7667 645+ snap_state.saved_vm_running = false;
95259824
WB
646+ }
647+
648+ snap_state.state = SAVE_STATE_DONE;
817b7667
SR
649+
650+wait_for_close:
651+ if (!snap_state.target) {
652+ DPRINTF("savevm-end: no target file open\n");
653+ return;
654+ }
655+
656+ /* wait until cleanup is done before returning, this ensures that after this
657+ * call exits the statefile will be closed and can be removed immediately */
658+ DPRINTF("savevm-end: waiting for cleanup\n");
659+ timeout = 30L * 1000 * 1000 * 1000;
563c5928 660+ qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
f376b2b9 661+ QEMU_CLOCK_REALTIME, timeout);
817b7667
SR
662+ if (snap_state.target) {
663+ save_snapshot_error("timeout waiting for target file close in "
664+ "qmp_savevm_end");
665+ /* we cannot assume the snapshot finished in this case, so leave the
666+ * state alone - caller has to figure something out */
667+ return;
668+ }
669+
1976ca46
FE
670+ // File closed and no other error, so ensure next snapshot can be started.
671+ if (snap_state.state != SAVE_STATE_ERROR) {
672+ snap_state.state = SAVE_STATE_DONE;
673+ }
674+
817b7667 675+ DPRINTF("savevm-end: cleanup done\n");
95259824
WB
676+}
677+
0775f12b 678+// FIXME: Deprecated
95259824
WB
679+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
680+{
0775f12b
WB
681+ // Compatibility to older qemu-server.
682+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
95259824
WB
683+}
684+
0775f12b 685+// FIXME: Deprecated
95259824
WB
686+void qmp_delete_drive_snapshot(const char *device, const char *name,
687+ Error **errp)
688+{
0775f12b
WB
689+ // Compatibility to older qemu-server.
690+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
691+ true, name, errp);
95259824
WB
692+}
693+
67af0fa4 694+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
6402d961 695+ size_t size, Error **errp)
95259824 696+{
67af0fa4
WB
697+ BlockBackend *be = opaque;
698+ int64_t maxlen = blk_getlength(be);
95259824
WB
699+ if (pos > maxlen) {
700+ return -EIO;
701+ }
702+ if ((pos + size) > maxlen) {
703+ size = maxlen - pos - 1;
704+ }
705+ if (size == 0) {
706+ return 0;
707+ }
67af0fa4 708+ return blk_pread(be, pos, buf, size);
95259824
WB
709+}
710+
711+static const QEMUFileOps loadstate_file_ops = {
712+ .get_buffer = loadstate_get_buffer,
713+};
714+
6838f038 715+int load_snapshot_from_blockdev(const char *filename, Error **errp)
95259824 716+{
67af0fa4 717+ BlockBackend *be;
95259824
WB
718+ Error *local_err = NULL;
719+ Error *blocker = NULL;
720+
721+ QEMUFile *f;
67af0fa4 722+ int ret = -EINVAL;
95259824 723+
67af0fa4 724+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
95259824 725+
67af0fa4 726+ if (!be) {
6838f038 727+ error_setg(errp, "Could not open VM state file");
95259824
WB
728+ goto the_end;
729+ }
730+
67af0fa4
WB
731+ error_setg(&blocker, "block device is in use by load state");
732+ blk_op_block_all(be, blocker);
733+
95259824 734+ /* restore the VM state */
67af0fa4 735+ f = qemu_fopen_ops(be, &loadstate_file_ops);
95259824 736+ if (!f) {
6838f038 737+ error_setg(errp, "Could not open VM state file");
95259824
WB
738+ goto the_end;
739+ }
740+
6838f038 741+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
95259824
WB
742+ ret = qemu_loadvm_state(f);
743+
e9b36665
SR
744+ /* dirty bitmap migration has a special case we need to trigger manually */
745+ dirty_bitmap_mig_before_vm_start();
746+
95259824
WB
747+ qemu_fclose(f);
748+ migration_incoming_state_destroy();
749+ if (ret < 0) {
6838f038 750+ error_setg_errno(errp, -ret, "Error while loading VM state");
95259824
WB
751+ goto the_end;
752+ }
753+
754+ ret = 0;
755+
756+ the_end:
67af0fa4
WB
757+ if (be) {
758+ blk_op_unblock_all(be, blocker);
95259824 759+ error_free(blocker);
67af0fa4 760+ blk_unref(be);
95259824
WB
761+ }
762+ return ret;
763+}
817b7667 764diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
dc9827a6 765index 5482dd0569..c64b959738 100644
817b7667
SR
766--- a/monitor/hmp-cmds.c
767+++ b/monitor/hmp-cmds.c
dc9827a6 768@@ -1906,6 +1906,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
817b7667
SR
769 hmp_handle_error(mon, err);
770 }
771
772+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
773+{
774+ Error *errp = NULL;
775+ const char *statefile = qdict_get_try_str(qdict, "statefile");
776+
777+ qmp_savevm_start(statefile != NULL, statefile, &errp);
778+ hmp_handle_error(mon, errp);
779+}
780+
781+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
782+{
783+ Error *errp = NULL;
784+ const char *name = qdict_get_str(qdict, "name");
785+ const char *device = qdict_get_str(qdict, "device");
786+
787+ qmp_snapshot_drive(device, name, &errp);
788+ hmp_handle_error(mon, errp);
789+}
790+
791+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
792+{
793+ Error *errp = NULL;
794+ const char *name = qdict_get_str(qdict, "name");
795+ const char *device = qdict_get_str(qdict, "device");
796+
797+ qmp_delete_drive_snapshot(device, name, &errp);
798+ hmp_handle_error(mon, errp);
799+}
800+
801+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
802+{
803+ Error *errp = NULL;
804+
805+ qmp_savevm_end(&errp);
806+ hmp_handle_error(mon, errp);
807+}
808+
809+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
810+{
811+ SaveVMInfo *info;
812+ info = qmp_query_savevm(NULL);
813+
814+ if (info->has_status) {
815+ monitor_printf(mon, "savevm status: %s\n", info->status);
816+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
817+ info->total_time);
818+ } else {
819+ monitor_printf(mon, "savevm status: not running\n");
820+ }
821+ if (info->has_bytes) {
822+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
823+ }
824+ if (info->has_error) {
825+ monitor_printf(mon, "Error: %s\n", info->error);
826+ }
827+}
828+
829 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
830 {
831 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
832diff --git a/qapi/migration.json b/qapi/migration.json
dc9827a6 833index 27d7b28158..31d6cc902e 100644
817b7667
SR
834--- a/qapi/migration.json
835+++ b/qapi/migration.json
dc9827a6 836@@ -258,6 +258,40 @@
817b7667
SR
837 '*compression': 'CompressionStats',
838 '*socket-address': ['SocketAddress'] } }
839
840+##
841+# @SaveVMInfo:
842+#
843+# Information about current migration process.
844+#
845+# @status: string describing the current savevm status.
846+# This can be 'active', 'completed', 'failed'.
847+# If this field is not returned, no savevm process
848+# has been initiated
849+#
850+# @error: string containing error message is status is failed.
851+#
852+# @total-time: total amount of milliseconds since savevm started.
853+# If savevm has ended, it returns the total save time
854+#
855+# @bytes: total amount of data transfered
856+#
857+# Since: 1.3
858+##
859+{ 'struct': 'SaveVMInfo',
860+ 'data': {'*status': 'str', '*error': 'str',
861+ '*total-time': 'int', '*bytes': 'int'} }
862+
863+##
864+# @query-savevm:
865+#
866+# Returns information about current savevm process.
867+#
868+# Returns: @SaveVMInfo
869+#
870+# Since: 1.3
871+##
872+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
873+
874 ##
875 # @query-migrate:
876 #
877diff --git a/qapi/misc.json b/qapi/misc.json
dc9827a6 878index b83cc39029..1e5dd7db29 100644
817b7667
SR
879--- a/qapi/misc.json
880+++ b/qapi/misc.json
4567474e 881@@ -435,6 +435,38 @@
817b7667
SR
882 ##
883 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
884
885+##
886+# @savevm-start:
887+#
888+# Prepare for snapshot and halt VM. Save VM state to statefile.
889+#
890+##
891+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
892+
893+##
894+# @snapshot-drive:
895+#
896+# Create an internal drive snapshot.
897+#
898+##
899+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
900+
901+##
902+# @delete-drive-snapshot:
903+#
904+# Delete a drive snapshot.
905+#
906+##
907+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
908+
909+##
910+# @savevm-end:
911+#
912+# Resume VM after a snapshot.
913+#
914+##
915+{ 'command': 'savevm-end', 'coroutine': true }
916+
917 ##
918 # @CommandLineParameterType:
919 #
920diff --git a/qemu-options.hx b/qemu-options.hx
dc9827a6 921index 34e9b32a5c..aeade4ef80 100644
817b7667
SR
922--- a/qemu-options.hx
923+++ b/qemu-options.hx
dc9827a6 924@@ -4254,6 +4254,18 @@ SRST
817b7667
SR
925 Start right away with a saved state (``loadvm`` in monitor)
926 ERST
927
928+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
929+ "-loadstate file\n" \
930+ " start right away with a saved state\n",
931+ QEMU_ARCH_ALL)
932+SRST
933+``-loadstate file``
934+ Start right away with a saved state. This option does not rollback
935+ disk state like @code{loadvm}, so user must make sure that disk
936+ have correct state. @var{file} can be any valid device URL. See the section
937+ for "Device URL Syntax" for more information.
938+ERST
939+
940 #ifndef _WIN32
941 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
942 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
83faa3fe 943diff --git a/softmmu/vl.c b/softmmu/vl.c
dc9827a6 944index 6f646531a0..a3f2a3818c 100644
83faa3fe
TL
945--- a/softmmu/vl.c
946+++ b/softmmu/vl.c
dc9827a6 947@@ -157,6 +157,7 @@ static const char *incoming;
8dca018b 948 static const char *loadvm;
f376b2b9
SR
949 static const char *accelerators;
950 static QDict *machine_opts_dict;
8dca018b
SR
951+static const char *loadstate;
952 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
4567474e 953 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
8dca018b 954 static ram_addr_t maxram_size;
dc9827a6 955@@ -2749,6 +2750,12 @@ void qmp_x_exit_preconfig(Error **errp)
4567474e
FE
956
957 if (loadvm) {
958 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
95259824 959+ } else if (loadstate) {
6838f038
WB
960+ Error *local_err = NULL;
961+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
962+ error_report_err(local_err);
95259824
WB
963+ autostart = 0;
964+ }
965 }
b855dce7
TL
966 if (replay_mode != REPLAY_MODE_NONE) {
967 replay_vmstate_init();
dc9827a6 968@@ -3289,6 +3296,9 @@ void qemu_init(int argc, char **argv, char **envp)
8dca018b
SR
969 case QEMU_OPTION_loadvm:
970 loadvm = optarg;
971 break;
972+ case QEMU_OPTION_loadstate:
973+ loadstate = optarg;
974+ break;
975 case QEMU_OPTION_full_screen:
976 dpy.has_full_screen = true;
977 dpy.full_screen = true;