1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3 Date: Wed, 9 Dec 2015 16:04:32 +0100
4 Subject: [PATCH] PVE: internal snapshot async
8 hmp-commands-info.hx | 13 ++
9 hmp-commands.hx | 32 +++
12 include/migration/snapshot.h | 1 +
13 qapi/migration.json | 34 ++++
14 qapi/misc.json | 32 +++
15 qemu-options.hx | 13 ++
16 savevm-async.c | 460 +++++++++++++++++++++++++++++++++++++++++++
18 11 files changed, 658 insertions(+)
19 create mode 100644 savevm-async.c
21 diff --git a/Makefile.objs b/Makefile.objs
22 index 7a9828da28..a836ee87d7 100644
25 @@ -98,6 +98,7 @@ ifeq ($(CONFIG_SOFTMMU),y)
26 common-obj-y = blockdev.o blockdev-nbd.o block/
27 common-obj-y += bootdevice.o iothread.o
28 common-obj-y += job-qmp.o
29 +common-obj-y += savevm-async.o
31 common-obj-y += qdev-monitor.o device-hotplug.o
32 common-obj-$(CONFIG_WIN32) += os-win32.o
33 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
34 index 70639f656a..42c148fdc9 100644
35 --- a/hmp-commands-info.hx
36 +++ b/hmp-commands-info.hx
37 @@ -575,6 +575,19 @@ Show current migration xbzrle cache size.
44 + .help = "show savevm status",
45 + .cmd = hmp_info_savevm,
57 diff --git a/hmp-commands.hx b/hmp-commands.hx
58 index 91dfe51c37..a6f0720442 100644
61 @@ -1886,3 +1886,35 @@ ETEXI
67 + .name = "savevm-start",
68 + .args_type = "statefile:s?",
69 + .params = "[statefile]",
70 + .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
71 + .cmd = hmp_savevm_start,
75 + .name = "snapshot-drive",
76 + .args_type = "device:s,name:s",
77 + .params = "device name",
78 + .help = "Create internal snapshot.",
79 + .cmd = hmp_snapshot_drive,
83 + .name = "delete-drive-snapshot",
84 + .args_type = "device:s,name:s",
85 + .params = "device name",
86 + .help = "Delete internal snapshot.",
87 + .cmd = hmp_delete_drive_snapshot,
91 + .name = "savevm-end",
94 + .help = "Resume VM after snaphot.",
95 + .cmd = hmp_savevm_end,
97 diff --git a/hmp.c b/hmp.c
98 index 4d60782f56..7c975f3ead 100644
101 @@ -2558,6 +2558,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
102 hmp_handle_error(mon, &err);
105 +void hmp_savevm_start(Monitor *mon, const QDict *qdict)
107 + Error *errp = NULL;
108 + const char *statefile = qdict_get_try_str(qdict, "statefile");
110 + qmp_savevm_start(statefile != NULL, statefile, &errp);
111 + hmp_handle_error(mon, &errp);
114 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
116 + Error *errp = NULL;
117 + const char *name = qdict_get_str(qdict, "name");
118 + const char *device = qdict_get_str(qdict, "device");
120 + qmp_snapshot_drive(device, name, &errp);
121 + hmp_handle_error(mon, &errp);
124 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
126 + Error *errp = NULL;
127 + const char *name = qdict_get_str(qdict, "name");
128 + const char *device = qdict_get_str(qdict, "device");
130 + qmp_delete_drive_snapshot(device, name, &errp);
131 + hmp_handle_error(mon, &errp);
134 +void hmp_savevm_end(Monitor *mon, const QDict *qdict)
136 + Error *errp = NULL;
138 + qmp_savevm_end(&errp);
139 + hmp_handle_error(mon, &errp);
142 +void hmp_info_savevm(Monitor *mon, const QDict *qdict)
145 + info = qmp_query_savevm(NULL);
147 + if (info->has_status) {
148 + monitor_printf(mon, "savevm status: %s\n", info->status);
149 + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
152 + monitor_printf(mon, "savevm status: not running\n");
154 + if (info->has_bytes) {
155 + monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
157 + if (info->has_error) {
158 + monitor_printf(mon, "Error: %s\n", info->error);
162 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
164 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
165 diff --git a/hmp.h b/hmp.h
166 index 33354f1bdd..98bb7a44db 100644
169 @@ -24,6 +24,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
170 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
171 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
172 void hmp_info_mice(Monitor *mon, const QDict *qdict);
173 +void hmp_info_savevm(Monitor *mon, const QDict *qdict);
174 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
175 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
176 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
177 @@ -98,6 +99,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
178 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
179 void hmp_getfd(Monitor *mon, const QDict *qdict);
180 void hmp_closefd(Monitor *mon, const QDict *qdict);
181 +void hmp_savevm_start(Monitor *mon, const QDict *qdict);
182 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
183 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
184 +void hmp_savevm_end(Monitor *mon, const QDict *qdict);
185 void hmp_sendkey(Monitor *mon, const QDict *qdict);
186 void hmp_screendump(Monitor *mon, const QDict *qdict);
187 void hmp_nbd_server_start(Monitor *mon, const QDict *qdict);
188 diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
189 index c85b6ec75b..4411b7121d 100644
190 --- a/include/migration/snapshot.h
191 +++ b/include/migration/snapshot.h
194 int save_snapshot(const char *name, Error **errp);
195 int load_snapshot(const char *name, Error **errp);
196 +int load_snapshot_from_blockdev(const char *filename, Error **errp);
199 diff --git a/qapi/migration.json b/qapi/migration.json
200 index 186e8a7303..8d2626f6ad 100644
201 --- a/qapi/migration.json
202 +++ b/qapi/migration.json
204 '*postcopy-vcpu-blocktime': ['uint32']} }
209 +# Information about current migration process.
211 +# @status: string describing the current savevm status.
212 +# This can be 'active', 'completed', 'failed'.
213 +# If this field is not returned, no savevm process
214 +# has been initiated
216 +# @error: string containing error message is status is failed.
218 +# @total-time: total amount of milliseconds since savevm started.
219 +# If savevm has ended, it returns the total save time
221 +# @bytes: total amount of data transfered
225 +{ 'struct': 'SaveVMInfo',
226 + 'data': {'*status': 'str', '*error': 'str',
227 + '*total-time': 'int', '*bytes': 'int'} }
232 +# Returns information about current savevm process.
234 +# Returns: @SaveVMInfo
238 +{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
243 # Returns information about current migration process. If migration
244 diff --git a/qapi/misc.json b/qapi/misc.json
245 index 4e8ebf9adc..b6ad5f028d 100644
248 @@ -2525,6 +2525,38 @@
249 { 'command': 'query-target', 'returns': 'TargetInfo' }
254 +# Prepare for snapshot and halt VM. Save VM state to statefile.
257 +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
262 +# Create an internal drive snapshot.
265 +{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
268 +# @delete-drive-snapshot:
270 +# Delete a drive snapshot.
273 +{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
278 +# Resume VM after a snapshot.
281 +{ 'command': 'savevm-end' }
286 # Specify an ACPI table on the command line to load.
287 diff --git a/qemu-options.hx b/qemu-options.hx
288 index b1bf0f485f..31329e26e2 100644
289 --- a/qemu-options.hx
290 +++ b/qemu-options.hx
291 @@ -3520,6 +3520,19 @@ STEXI
292 Start right away with a saved state (@code{loadvm} in monitor)
295 +DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
296 + "-loadstate file\n" \
297 + " start right away with a saved state\n",
300 +@item -loadstate @var{file}
302 +Start right away with a saved state. This option does not rollback
303 +disk state like @code{loadvm}, so user must make sure that disk
304 +have correct state. @var{file} can be any valid device URL. See the section
305 +for "Device URL Syntax" for more information.
309 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
310 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
311 diff --git a/savevm-async.c b/savevm-async.c
313 index 0000000000..73b7fe75ed
317 +#include "qemu/osdep.h"
318 +#include "migration/migration.h"
319 +#include "migration/savevm.h"
320 +#include "migration/snapshot.h"
321 +#include "migration/global_state.h"
322 +#include "migration/ram.h"
323 +#include "migration/qemu-file.h"
324 +#include "sysemu/sysemu.h"
325 +#include "block/block.h"
326 +#include "sysemu/block-backend.h"
327 +#include "qapi/error.h"
328 +#include "qapi/qmp/qerror.h"
329 +#include "qapi/qmp/qdict.h"
330 +#include "qapi/qapi-commands-migration.h"
331 +#include "qapi/qapi-commands-misc.h"
332 +#include "qapi/qapi-commands-block.h"
333 +#include "qemu/cutils.h"
335 +/* #define DEBUG_SAVEVM_STATE */
337 +/* used while emulated sync operation in progress */
338 +#define NOT_DONE -EINPROGRESS
340 +#ifdef DEBUG_SAVEVM_STATE
341 +#define DPRINTF(fmt, ...) \
342 + do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
344 +#define DPRINTF(fmt, ...) \
352 + SAVE_STATE_COMPLETED,
353 + SAVE_STATE_CANCELLED
357 +static struct SnapshotState {
358 + BlockBackend *target;
363 + int saved_vm_running;
365 + int64_t total_time;
366 + QEMUBH *cleanup_bh;
370 +SaveVMInfo *qmp_query_savevm(Error **errp)
372 + SaveVMInfo *info = g_malloc0(sizeof(*info));
373 + struct SnapshotState *s = &snap_state;
375 + if (s->state != SAVE_STATE_DONE) {
376 + info->has_bytes = true;
377 + info->bytes = s->bs_pos;
378 + switch (s->state) {
379 + case SAVE_STATE_ERROR:
380 + info->has_status = true;
381 + info->status = g_strdup("failed");
382 + info->has_total_time = true;
383 + info->total_time = s->total_time;
385 + info->has_error = true;
386 + info->error = g_strdup(error_get_pretty(s->error));
389 + case SAVE_STATE_ACTIVE:
390 + info->has_status = true;
391 + info->status = g_strdup("active");
392 + info->has_total_time = true;
393 + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
396 + case SAVE_STATE_COMPLETED:
397 + info->has_status = true;
398 + info->status = g_strdup("completed");
399 + info->has_total_time = true;
400 + info->total_time = s->total_time;
408 +static int save_snapshot_cleanup(void)
412 + DPRINTF("save_snapshot_cleanup\n");
414 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
415 + snap_state.total_time;
417 + if (snap_state.file) {
418 + ret = qemu_fclose(snap_state.file);
421 + if (snap_state.target) {
422 + /* try to truncate, but ignore errors (will fail on block devices).
423 + * note: bdrv_read() need whole blocks, so we round up
425 + size_t size = (snap_state.bs_pos + BDRV_SECTOR_SIZE) & BDRV_SECTOR_MASK;
426 + blk_truncate(snap_state.target, size, PREALLOC_MODE_OFF, NULL);
427 + blk_op_unblock_all(snap_state.target, snap_state.blocker);
428 + error_free(snap_state.blocker);
429 + snap_state.blocker = NULL;
430 + blk_unref(snap_state.target);
431 + snap_state.target = NULL;
437 +static void save_snapshot_error(const char *fmt, ...)
443 + msg = g_strdup_vprintf(fmt, ap);
446 + DPRINTF("save_snapshot_error: %s\n", msg);
448 + if (!snap_state.error) {
449 + error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
454 + snap_state.state = SAVE_STATE_ERROR;
457 +static int block_state_close(void *opaque)
459 + snap_state.file = NULL;
460 + return blk_flush(snap_state.target);
463 +typedef struct BlkRwCo {
465 + QEMUIOVector *qiov;
469 +static void coroutine_fn block_state_write_entry(void *opaque) {
470 + BlkRwCo *rwco = opaque;
471 + rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
475 +static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
476 + int iovcnt, int64_t pos)
481 + assert(pos == snap_state.bs_pos);
488 + qemu_iovec_init_external(&qiov, iov, iovcnt);
490 + if (qemu_in_coroutine()) {
491 + block_state_write_entry(&rwco);
493 + Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
494 + bdrv_coroutine_enter(blk_bs(snap_state.target), co);
495 + BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
497 + if (rwco.ret < 0) {
501 + snap_state.bs_pos += qiov.size;
505 +static const QEMUFileOps block_file_ops = {
506 + .writev_buffer = block_state_writev_buffer,
507 + .close = block_state_close,
510 +static void process_savevm_cleanup(void *opaque)
513 + qemu_bh_delete(snap_state.cleanup_bh);
514 + snap_state.cleanup_bh = NULL;
515 + qemu_mutex_unlock_iothread();
516 + qemu_thread_join(&snap_state.thread);
517 + qemu_mutex_lock_iothread();
518 + ret = save_snapshot_cleanup();
520 + save_snapshot_error("save_snapshot_cleanup error %d", ret);
521 + } else if (snap_state.state == SAVE_STATE_ACTIVE) {
522 + snap_state.state = SAVE_STATE_COMPLETED;
524 + save_snapshot_error("process_savevm_cleanup: invalid state: %d",
527 + if (snap_state.saved_vm_running) {
529 + snap_state.saved_vm_running = false;
533 +static void *process_savevm_thread(void *opaque)
538 + rcu_register_thread();
540 + qemu_savevm_state_header(snap_state.file);
541 + qemu_savevm_state_setup(snap_state.file);
542 + ret = qemu_file_get_error(snap_state.file);
545 + save_snapshot_error("qemu_savevm_state_setup failed");
546 + rcu_unregister_thread();
550 + while (snap_state.state == SAVE_STATE_ACTIVE) {
551 + uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
553 + qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
554 + pending_size = pend_precopy + pend_compatible + pend_postcopy;
556 + maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
558 + if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
559 + qemu_mutex_lock_iothread();
560 + ret = qemu_savevm_state_iterate(snap_state.file, false);
562 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
565 + qemu_mutex_unlock_iothread();
566 + DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
568 + qemu_mutex_lock_iothread();
569 + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
570 + ret = global_state_store();
572 + save_snapshot_error("global_state_store error %d", ret);
575 + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
577 + save_snapshot_error("vm_stop_force_state error %d", ret);
580 + DPRINTF("savevm inerate finished\n");
581 + /* upstream made the return value here inconsistent
582 + * (-1 instead of 'ret' in one case and 0 after flush which can
583 + * still set a file error...)
585 + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
586 + ret = qemu_file_get_error(snap_state.file);
588 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
591 + qemu_savevm_state_cleanup();
592 + DPRINTF("save complete\n");
597 + qemu_bh_schedule(snap_state.cleanup_bh);
598 + qemu_mutex_unlock_iothread();
600 + rcu_unregister_thread();
604 +void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
606 + Error *local_err = NULL;
608 + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
610 + if (snap_state.state != SAVE_STATE_DONE) {
611 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
612 + "VM snapshot already started\n");
616 + /* initialize snapshot info */
617 + snap_state.saved_vm_running = runstate_is_running();
618 + snap_state.bs_pos = 0;
619 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
620 + snap_state.blocker = NULL;
622 + if (snap_state.error) {
623 + error_free(snap_state.error);
624 + snap_state.error = NULL;
627 + if (!has_statefile) {
628 + vm_stop(RUN_STATE_SAVE_VM);
629 + snap_state.state = SAVE_STATE_COMPLETED;
633 + if (qemu_savevm_state_blocked(errp)) {
637 + /* Open the image */
638 + QDict *options = NULL;
639 + options = qdict_new();
640 + qdict_put_str(options, "driver", "raw");
641 + snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
642 + if (!snap_state.target) {
643 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
647 + snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
649 + if (!snap_state.file) {
650 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
655 + error_setg(&snap_state.blocker, "block device is in use by savevm");
656 + blk_op_block_all(snap_state.target, snap_state.blocker);
658 + snap_state.state = SAVE_STATE_ACTIVE;
659 + snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
660 + qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
661 + NULL, QEMU_THREAD_JOINABLE);
667 + save_snapshot_error("setup failed");
669 + if (snap_state.saved_vm_running) {
674 +void qmp_savevm_end(Error **errp)
676 + if (snap_state.state == SAVE_STATE_DONE) {
677 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
678 + "VM snapshot not started\n");
682 + if (snap_state.state == SAVE_STATE_ACTIVE) {
683 + snap_state.state = SAVE_STATE_CANCELLED;
687 + if (snap_state.saved_vm_running) {
691 + snap_state.state = SAVE_STATE_DONE;
694 +// FIXME: Deprecated
695 +void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
697 + // Compatibility to older qemu-server.
698 + qmp_blockdev_snapshot_internal_sync(device, name, errp);
701 +// FIXME: Deprecated
702 +void qmp_delete_drive_snapshot(const char *device, const char *name,
705 + // Compatibility to older qemu-server.
706 + (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
710 +static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
713 + BlockBackend *be = opaque;
714 + int64_t maxlen = blk_getlength(be);
715 + if (pos > maxlen) {
718 + if ((pos + size) > maxlen) {
719 + size = maxlen - pos - 1;
724 + return blk_pread(be, pos, buf, size);
727 +static const QEMUFileOps loadstate_file_ops = {
728 + .get_buffer = loadstate_get_buffer,
731 +int load_snapshot_from_blockdev(const char *filename, Error **errp)
734 + Error *local_err = NULL;
735 + Error *blocker = NULL;
740 + be = blk_new_open(filename, NULL, NULL, 0, &local_err);
743 + error_setg(errp, "Could not open VM state file");
747 + error_setg(&blocker, "block device is in use by load state");
748 + blk_op_block_all(be, blocker);
750 + /* restore the VM state */
751 + f = qemu_fopen_ops(be, &loadstate_file_ops);
753 + error_setg(errp, "Could not open VM state file");
757 + qemu_system_reset(SHUTDOWN_CAUSE_NONE);
758 + ret = qemu_loadvm_state(f);
761 + migration_incoming_state_destroy();
763 + error_setg_errno(errp, -ret, "Error while loading VM state");
771 + blk_op_unblock_all(be, blocker);
772 + error_free(blocker);
777 diff --git a/vl.c b/vl.c
778 index 9c3a41bfe2..63107d82a3 100644
781 @@ -2927,6 +2927,7 @@ int main(int argc, char **argv, char **envp)
784 const char *loadvm = NULL;
785 + const char *loadstate = NULL;
786 MachineClass *machine_class;
787 const char *cpu_model;
788 const char *vga_model = NULL;
789 @@ -3529,6 +3530,9 @@ int main(int argc, char **argv, char **envp)
790 case QEMU_OPTION_loadvm:
793 + case QEMU_OPTION_loadstate:
794 + loadstate = optarg;
796 case QEMU_OPTION_full_screen:
797 dpy.has_full_screen = true;
798 dpy.full_screen = true;
799 @@ -4624,6 +4628,12 @@ int main(int argc, char **argv, char **envp)
800 error_report_err(local_err);
803 + } else if (loadstate) {
804 + Error *local_err = NULL;
805 + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
806 + error_report_err(local_err);
811 qdev_prop_check_globals();