1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3 Date: Wed, 9 Dec 2015 16:04:32 +0100
4 Subject: [PATCH] internal snapshot async
8 hmp-commands-info.hx | 13 ++
9 hmp-commands.hx | 32 +++
12 include/migration/snapshot.h | 1 +
13 qapi-schema.json | 32 +++
14 qapi/migration.json | 34 +++
15 qemu-options.hx | 13 ++
16 savevm-async.c | 524 +++++++++++++++++++++++++++++++++++++++++++
18 11 files changed, 722 insertions(+), 1 deletion(-)
19 create mode 100644 savevm-async.c
21 diff --git a/Makefile.objs b/Makefile.objs
22 index 285c6f3c15..686247b556 100644
25 @@ -41,6 +41,7 @@ io-obj-y = io/
26 ifeq ($(CONFIG_SOFTMMU),y)
27 common-obj-y = blockdev.o blockdev-nbd.o block/
28 common-obj-y += bootdevice.o iothread.o
29 +common-obj-y += savevm-async.o
31 common-obj-y += qdev-monitor.o device-hotplug.o
32 common-obj-$(CONFIG_WIN32) += os-win32.o
33 @@ -49,7 +50,6 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
34 common-obj-$(CONFIG_LINUX) += fsdev/
36 common-obj-y += migration/
38 common-obj-y += audio/
41 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
42 index 54c3e5eac6..3bf69a193c 100644
43 --- a/hmp-commands-info.hx
44 +++ b/hmp-commands-info.hx
45 @@ -566,6 +566,19 @@ Show current migration xbzrle cache size.
52 + .help = "show savevm status",
53 + .cmd = hmp_info_savevm,
65 diff --git a/hmp-commands.hx b/hmp-commands.hx
66 index 4afd57cf5f..b35bc6ab6c 100644
69 @@ -1873,3 +1873,35 @@ ETEXI
75 + .name = "savevm-start",
76 + .args_type = "statefile:s?",
77 + .params = "[statefile]",
78 + .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
79 + .cmd = hmp_savevm_start,
83 + .name = "snapshot-drive",
84 + .args_type = "device:s,name:s",
85 + .params = "device name",
86 + .help = "Create internal snapshot.",
87 + .cmd = hmp_snapshot_drive,
91 + .name = "delete-drive-snapshot",
92 + .args_type = "device:s,name:s",
93 + .params = "device name",
94 + .help = "Delete internal snapshot.",
95 + .cmd = hmp_delete_drive_snapshot,
99 + .name = "savevm-end",
102 + .help = "Resume VM after snaphot.",
103 + .cmd = hmp_savevm_end,
105 diff --git a/hmp.c b/hmp.c
106 index 4e1d571003..b9ade681f0 100644
109 @@ -2486,6 +2486,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
110 hmp_handle_error(mon, &err);
113 +void hmp_savevm_start(Monitor *mon, const QDict *qdict)
115 + Error *errp = NULL;
116 + const char *statefile = qdict_get_try_str(qdict, "statefile");
118 + qmp_savevm_start(statefile != NULL, statefile, &errp);
119 + hmp_handle_error(mon, &errp);
122 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
124 + Error *errp = NULL;
125 + const char *name = qdict_get_str(qdict, "name");
126 + const char *device = qdict_get_str(qdict, "device");
128 + qmp_snapshot_drive(device, name, &errp);
129 + hmp_handle_error(mon, &errp);
132 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
134 + Error *errp = NULL;
135 + const char *name = qdict_get_str(qdict, "name");
136 + const char *device = qdict_get_str(qdict, "device");
138 + qmp_delete_drive_snapshot(device, name, &errp);
139 + hmp_handle_error(mon, &errp);
142 +void hmp_savevm_end(Monitor *mon, const QDict *qdict)
144 + Error *errp = NULL;
146 + qmp_savevm_end(&errp);
147 + hmp_handle_error(mon, &errp);
150 +void hmp_info_savevm(Monitor *mon, const QDict *qdict)
153 + info = qmp_query_savevm(NULL);
155 + if (info->has_status) {
156 + monitor_printf(mon, "savevm status: %s\n", info->status);
157 + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
160 + monitor_printf(mon, "savevm status: not running\n");
162 + if (info->has_bytes) {
163 + monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
165 + if (info->has_error) {
166 + monitor_printf(mon, "Error: %s\n", info->error);
170 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
172 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
173 diff --git a/hmp.h b/hmp.h
174 index a6f56b1f29..45ada581b6 100644
177 @@ -26,6 +26,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
178 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
179 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
180 void hmp_info_mice(Monitor *mon, const QDict *qdict);
181 +void hmp_info_savevm(Monitor *mon, const QDict *qdict);
182 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
183 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
184 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
185 @@ -97,6 +98,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
186 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
187 void hmp_getfd(Monitor *mon, const QDict *qdict);
188 void hmp_closefd(Monitor *mon, const QDict *qdict);
189 +void hmp_savevm_start(Monitor *mon, const QDict *qdict);
190 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
191 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
192 +void hmp_savevm_end(Monitor *mon, const QDict *qdict);
193 void hmp_sendkey(Monitor *mon, const QDict *qdict);
194 void hmp_screendump(Monitor *mon, const QDict *qdict);
195 void hmp_nbd_server_start(Monitor *mon, const QDict *qdict);
196 diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
197 index c85b6ec75b..4411b7121d 100644
198 --- a/include/migration/snapshot.h
199 +++ b/include/migration/snapshot.h
202 int save_snapshot(const char *name, Error **errp);
203 int load_snapshot(const char *name, Error **errp);
204 +int load_snapshot_from_blockdev(const char *filename, Error **errp);
207 diff --git a/qapi-schema.json b/qapi-schema.json
208 index 8f436ba1f3..348b527681 100644
209 --- a/qapi-schema.json
210 +++ b/qapi-schema.json
211 @@ -2439,6 +2439,38 @@
212 { 'command': 'query-target', 'returns': 'TargetInfo' }
217 +# Prepare for snapshot and halt VM. Save VM state to statefile.
220 +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
225 +# Create an internal drive snapshot.
228 +{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
231 +# @delete-drive-snapshot:
233 +# Delete a drive snapshot.
236 +{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
241 +# Resume VM after a snapshot.
244 +{ 'command': 'savevm-end' }
249 # Specify an ACPI table on the command line to load.
250 diff --git a/qapi/migration.json b/qapi/migration.json
251 index 03f57c9616..9ae55b81a2 100644
252 --- a/qapi/migration.json
253 +++ b/qapi/migration.json
255 '*error-desc': 'str'} }
260 +# Information about current migration process.
262 +# @status: string describing the current savevm status.
263 +# This can be 'active', 'completed', 'failed'.
264 +# If this field is not returned, no savevm process
265 +# has been initiated
267 +# @error: string containing error message is status is failed.
269 +# @total-time: total amount of milliseconds since savevm started.
270 +# If savevm has ended, it returns the total save time
272 +# @bytes: total amount of data transfered
276 +{ 'struct': 'SaveVMInfo',
277 + 'data': {'*status': 'str', '*error': 'str',
278 + '*total-time': 'int', '*bytes': 'int'} }
283 +# Returns information about current savevm process.
285 +# Returns: @SaveVMInfo
289 +{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
294 # Returns information about current migration process. If migration
295 diff --git a/qemu-options.hx b/qemu-options.hx
296 index 57f2c6a75f..7c054af8f9 100644
297 --- a/qemu-options.hx
298 +++ b/qemu-options.hx
299 @@ -3698,6 +3698,19 @@ STEXI
300 Start right away with a saved state (@code{loadvm} in monitor)
303 +DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
304 + "-loadstate file\n" \
305 + " start right away with a saved state\n",
308 +@item -loadstate @var{file}
310 +Start right away with a saved state. This option does not rollback
311 +disk state like @code{loadvm}, so user must make sure that disk
312 +have correct state. @var{file} can be any valid device URL. See the section
313 +for "Device URL Syntax" for more information.
317 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
318 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
319 diff --git a/savevm-async.c b/savevm-async.c
321 index 0000000000..897134ab5a
325 +#include "qemu/osdep.h"
326 +#include "migration/migration.h"
327 +#include "migration/savevm.h"
328 +#include "migration/snapshot.h"
329 +#include "migration/global_state.h"
330 +#include "migration/ram.h"
331 +#include "migration/qemu-file.h"
332 +#include "qapi/qmp/qerror.h"
333 +#include "sysemu/sysemu.h"
334 +#include "qmp-commands.h"
335 +#include "block/block.h"
336 +#include "sysemu/block-backend.h"
337 +#include "qapi/qmp/qstring.h"
338 +#include "qemu/cutils.h"
340 +/* #define DEBUG_SAVEVM_STATE */
342 +#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
344 +#ifdef DEBUG_SAVEVM_STATE
345 +#define DPRINTF(fmt, ...) \
346 + do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
348 +#define DPRINTF(fmt, ...) \
356 + SAVE_STATE_COMPLETED,
357 + SAVE_STATE_CANCELLED
361 +static struct SnapshotState {
362 + BlockBackend *target;
367 + int saved_vm_running;
369 + int64_t total_time;
372 +SaveVMInfo *qmp_query_savevm(Error **errp)
374 + SaveVMInfo *info = g_malloc0(sizeof(*info));
375 + struct SnapshotState *s = &snap_state;
377 + if (s->state != SAVE_STATE_DONE) {
378 + info->has_bytes = true;
379 + info->bytes = s->bs_pos;
380 + switch (s->state) {
381 + case SAVE_STATE_ERROR:
382 + info->has_status = true;
383 + info->status = g_strdup("failed");
384 + info->has_total_time = true;
385 + info->total_time = s->total_time;
387 + info->has_error = true;
388 + info->error = g_strdup(error_get_pretty(s->error));
391 + case SAVE_STATE_ACTIVE:
392 + info->has_status = true;
393 + info->status = g_strdup("active");
394 + info->has_total_time = true;
395 + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
398 + case SAVE_STATE_COMPLETED:
399 + info->has_status = true;
400 + info->status = g_strdup("completed");
401 + info->has_total_time = true;
402 + info->total_time = s->total_time;
410 +static int save_snapshot_cleanup(void)
414 + DPRINTF("save_snapshot_cleanup\n");
416 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
417 + snap_state.total_time;
419 + if (snap_state.file) {
420 + ret = qemu_fclose(snap_state.file);
423 + if (snap_state.target) {
424 + /* try to truncate, but ignore errors (will fail on block devices).
425 + * note: bdrv_read() need whole blocks, so we round up
427 + size_t size = (snap_state.bs_pos + BDRV_SECTOR_SIZE) & BDRV_SECTOR_MASK;
428 + blk_truncate(snap_state.target, size, PREALLOC_MODE_OFF, NULL);
429 + blk_op_unblock_all(snap_state.target, snap_state.blocker);
430 + error_free(snap_state.blocker);
431 + snap_state.blocker = NULL;
432 + blk_unref(snap_state.target);
433 + snap_state.target = NULL;
439 +static void save_snapshot_error(const char *fmt, ...)
445 + msg = g_strdup_vprintf(fmt, ap);
448 + DPRINTF("save_snapshot_error: %s\n", msg);
450 + if (!snap_state.error) {
451 + error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
456 + snap_state.state = SAVE_STATE_ERROR;
458 + save_snapshot_cleanup();
461 +static void save_snapshot_completed(void)
463 + DPRINTF("save_snapshot_completed\n");
465 + if (save_snapshot_cleanup() < 0) {
466 + snap_state.state = SAVE_STATE_ERROR;
468 + snap_state.state = SAVE_STATE_COMPLETED;
472 +static int block_state_close(void *opaque)
474 + snap_state.file = NULL;
475 + return blk_flush(snap_state.target);
478 +static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
479 + int iovcnt, int64_t pos)
484 + qemu_iovec_init_external(&qiov, iov, iovcnt);
485 + ret = blk_co_pwritev(snap_state.target, pos, qiov.size, &qiov, 0);
489 + snap_state.bs_pos += qiov.size;
493 +static int store_and_stop(void) {
494 + if (global_state_store()) {
495 + save_snapshot_error("Error saving global state");
498 + if (runstate_is_running()) {
499 + vm_stop(RUN_STATE_SAVE_VM);
504 +static void process_savevm_co(void *opaque)
509 + snap_state.state = SAVE_STATE_ACTIVE;
511 + qemu_mutex_unlock_iothread();
512 + qemu_savevm_state_header(snap_state.file);
513 + qemu_savevm_state_setup(snap_state.file);
514 + ret = qemu_file_get_error(snap_state.file);
515 + qemu_mutex_lock_iothread();
518 + save_snapshot_error("qemu_savevm_state_setup failed");
522 + while (snap_state.state == SAVE_STATE_ACTIVE) {
523 + uint64_t pending_size, pend_post, pend_nonpost;
525 + qemu_savevm_state_pending(snap_state.file, 0, &pend_nonpost, &pend_post);
526 + pending_size = pend_post + pend_nonpost;
528 + if (pending_size) {
529 + ret = qemu_savevm_state_iterate(snap_state.file, false);
531 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
534 + DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
536 + DPRINTF("done iterating\n");
537 + if (store_and_stop())
539 + DPRINTF("savevm inerate finished\n");
540 + /* upstream made the return value here inconsistent
541 + * (-1 instead of 'ret' in one case and 0 after flush which can
542 + * still set a file error...)
544 + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
545 + ret = qemu_file_get_error(snap_state.file);
547 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
550 + DPRINTF("save complete\n");
551 + save_snapshot_completed();
555 + /* stop the VM if we get to the end of available space,
556 + * or if pending_size is just a few MB
558 + maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
559 + if ((pending_size < 100000) ||
560 + ((snap_state.bs_pos + pending_size) >= maxlen)) {
561 + if (store_and_stop())
566 + if(snap_state.state == SAVE_STATE_CANCELLED) {
567 + save_snapshot_completed();
568 + Error *errp = NULL;
569 + qmp_savevm_end(&errp);
574 +static const QEMUFileOps block_file_ops = {
575 + .writev_buffer = block_state_writev_buffer,
576 + .close = block_state_close,
580 +void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
582 + Error *local_err = NULL;
584 + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
586 + if (snap_state.state != SAVE_STATE_DONE) {
587 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
588 + "VM snapshot already started\n");
592 + /* initialize snapshot info */
593 + snap_state.saved_vm_running = runstate_is_running();
594 + snap_state.bs_pos = 0;
595 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
596 + snap_state.blocker = NULL;
598 + if (snap_state.error) {
599 + error_free(snap_state.error);
600 + snap_state.error = NULL;
603 + if (!has_statefile) {
604 + vm_stop(RUN_STATE_SAVE_VM);
605 + snap_state.state = SAVE_STATE_COMPLETED;
609 + if (qemu_savevm_state_blocked(errp)) {
613 + /* Open the image */
614 + QDict *options = NULL;
615 + options = qdict_new();
616 + qdict_put(options, "driver", qstring_from_str("raw"));
617 + snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
618 + if (!snap_state.target) {
619 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
623 + snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
625 + if (!snap_state.file) {
626 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
631 + error_setg(&snap_state.blocker, "block device is in use by savevm");
632 + blk_op_block_all(snap_state.target, snap_state.blocker);
634 + Coroutine *co = qemu_coroutine_create(process_savevm_co, NULL);
635 + qemu_coroutine_enter(co);
641 + save_snapshot_error("setup failed");
643 + if (snap_state.saved_vm_running) {
648 +void qmp_savevm_end(Error **errp)
650 + if (snap_state.state == SAVE_STATE_DONE) {
651 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
652 + "VM snapshot not started\n");
656 + if (snap_state.state == SAVE_STATE_ACTIVE) {
657 + snap_state.state = SAVE_STATE_CANCELLED;
661 + if (snap_state.saved_vm_running) {
665 + snap_state.state = SAVE_STATE_DONE;
668 +void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
671 + BlockDriverState *bs;
672 + QEMUSnapshotInfo sn1, *sn = &sn1;
680 + if (snap_state.state != SAVE_STATE_COMPLETED) {
681 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
682 + "VM snapshot not ready/started\n");
686 + blk = blk_by_name(device);
688 + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
689 + "Device '%s' not found", device);
694 + if (!bdrv_is_inserted(bs)) {
695 + error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
699 + if (bdrv_is_read_only(bs)) {
700 + error_setg(errp, "Node '%s' is read only", device);
704 + if (!bdrv_can_snapshot(bs)) {
705 + error_setg(errp, QERR_UNSUPPORTED);
709 + if (bdrv_snapshot_find(bs, sn, name) >= 0) {
710 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
711 + "snapshot '%s' already exists", name);
716 + memset(sn, 0, sizeof(*sn));
720 + sn->date_sec = tb.time;
721 + sn->date_nsec = tb.millitm * 1000000;
723 + gettimeofday(&tv, NULL);
724 + sn->date_sec = tv.tv_sec;
725 + sn->date_nsec = tv.tv_usec * 1000;
727 + sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
729 + pstrcpy(sn->name, sizeof(sn->name), name);
731 + sn->vm_state_size = 0; /* do not save state */
733 + ret = bdrv_snapshot_create(bs, sn);
735 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
736 + "Error while creating snapshot on '%s'\n", device);
741 +void qmp_delete_drive_snapshot(const char *device, const char *name,
745 + BlockDriverState *bs;
746 + QEMUSnapshotInfo sn1, *sn = &sn1;
747 + Error *local_err = NULL;
751 + blk = blk_by_name(device);
753 + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
754 + "Device '%s' not found", device);
759 + if (bdrv_is_read_only(bs)) {
760 + error_setg(errp, "Node '%s' is read only", device);
764 + if (!bdrv_can_snapshot(bs)) {
765 + error_setg(errp, QERR_UNSUPPORTED);
769 + if (bdrv_snapshot_find(bs, sn, name) < 0) {
770 + /* return success if snapshot does not exists */
774 + ret = bdrv_snapshot_delete(bs, NULL, name, &local_err);
776 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
777 + "Error while deleting snapshot on '%s'\n", device);
782 +static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
785 + BlockBackend *be = opaque;
786 + int64_t maxlen = blk_getlength(be);
787 + if (pos > maxlen) {
790 + if ((pos + size) > maxlen) {
791 + size = maxlen - pos - 1;
796 + return blk_pread(be, pos, buf, size);
799 +static const QEMUFileOps loadstate_file_ops = {
800 + .get_buffer = loadstate_get_buffer,
803 +int load_snapshot_from_blockdev(const char *filename, Error **errp)
806 + Error *local_err = NULL;
807 + Error *blocker = NULL;
812 + be = blk_new_open(filename, NULL, NULL, 0, &local_err);
815 + error_setg(errp, "Could not open VM state file");
819 + error_setg(&blocker, "block device is in use by load state");
820 + blk_op_block_all(be, blocker);
822 + /* restore the VM state */
823 + f = qemu_fopen_ops(be, &loadstate_file_ops);
825 + error_setg(errp, "Could not open VM state file");
829 + qemu_system_reset(SHUTDOWN_CAUSE_NONE);
830 + ret = qemu_loadvm_state(f);
833 + migration_incoming_state_destroy();
835 + error_setg_errno(errp, -ret, "Error while loading VM state");
843 + blk_op_unblock_all(be, blocker);
844 + error_free(blocker);
849 diff --git a/vl.c b/vl.c
850 index 2e0fe15978..1bfbe95b22 100644
853 @@ -3109,6 +3109,7 @@ int main(int argc, char **argv, char **envp)
856 const char *loadvm = NULL;
857 + const char *loadstate = NULL;
858 MachineClass *machine_class;
859 const char *cpu_model;
860 const char *vga_model = NULL;
861 @@ -3785,6 +3786,9 @@ int main(int argc, char **argv, char **envp)
862 case QEMU_OPTION_loadvm:
865 + case QEMU_OPTION_loadstate:
866 + loadstate = optarg;
868 case QEMU_OPTION_full_screen:
871 @@ -4891,6 +4895,12 @@ int main(int argc, char **argv, char **envp)
872 error_report_err(local_err);
875 + } else if (loadstate) {
876 + Error *local_err = NULL;
877 + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
878 + error_report_err(local_err);
883 qdev_prop_check_globals();