1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3 Date: Wed, 9 Dec 2015 16:04:32 +0100
4 Subject: [PATCH] PVE: internal snapshot async
6 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
9 hmp-commands-info.hx | 13 +
10 hmp-commands.hx | 32 +++
13 include/migration/snapshot.h | 1 +
14 qapi/migration.json | 34 +++
15 qapi/misc.json | 32 +++
16 qemu-options.hx | 13 +
17 savevm-async.c | 460 +++++++++++++++++++++++++++++++++++
19 11 files changed, 658 insertions(+)
20 create mode 100644 savevm-async.c
22 diff --git a/Makefile.objs b/Makefile.objs
23 index cf065de5ed..559486973a 100644
26 @@ -46,6 +46,7 @@ ifeq ($(CONFIG_SOFTMMU),y)
27 common-obj-y = blockdev.o blockdev-nbd.o block/
28 common-obj-y += bootdevice.o iothread.o
29 common-obj-y += job-qmp.o
30 +common-obj-y += savevm-async.o
32 common-obj-y += qdev-monitor.o device-hotplug.o
33 common-obj-$(CONFIG_WIN32) += os-win32.o
34 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
35 index c59444c461..444bd8e43d 100644
36 --- a/hmp-commands-info.hx
37 +++ b/hmp-commands-info.hx
38 @@ -608,6 +608,19 @@ STEXI
39 @item info migrate_cache_size
40 @findex info migrate_cache_size
41 Show current migration xbzrle cache size.
48 + .help = "show savevm status",
49 + .cmd = hmp_info_savevm,
58 diff --git a/hmp-commands.hx b/hmp-commands.hx
59 index 9b4035965c..284e97973a 100644
62 @@ -1926,3 +1926,35 @@ ETEXI
68 + .name = "savevm-start",
69 + .args_type = "statefile:s?",
70 + .params = "[statefile]",
71 + .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
72 + .cmd = hmp_savevm_start,
76 + .name = "snapshot-drive",
77 + .args_type = "device:s,name:s",
78 + .params = "device name",
79 + .help = "Create internal snapshot.",
80 + .cmd = hmp_snapshot_drive,
84 + .name = "delete-drive-snapshot",
85 + .args_type = "device:s,name:s",
86 + .params = "device name",
87 + .help = "Delete internal snapshot.",
88 + .cmd = hmp_delete_drive_snapshot,
92 + .name = "savevm-end",
95 + .help = "Resume VM after snaphot.",
96 + .cmd = hmp_savevm_end,
98 diff --git a/hmp.c b/hmp.c
99 index 25fe18cbcf..16243bba50 100644
102 @@ -2722,6 +2722,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
103 hmp_handle_error(mon, &err);
106 +void hmp_savevm_start(Monitor *mon, const QDict *qdict)
108 + Error *errp = NULL;
109 + const char *statefile = qdict_get_try_str(qdict, "statefile");
111 + qmp_savevm_start(statefile != NULL, statefile, &errp);
112 + hmp_handle_error(mon, &errp);
115 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
117 + Error *errp = NULL;
118 + const char *name = qdict_get_str(qdict, "name");
119 + const char *device = qdict_get_str(qdict, "device");
121 + qmp_snapshot_drive(device, name, &errp);
122 + hmp_handle_error(mon, &errp);
125 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
127 + Error *errp = NULL;
128 + const char *name = qdict_get_str(qdict, "name");
129 + const char *device = qdict_get_str(qdict, "device");
131 + qmp_delete_drive_snapshot(device, name, &errp);
132 + hmp_handle_error(mon, &errp);
135 +void hmp_savevm_end(Monitor *mon, const QDict *qdict)
137 + Error *errp = NULL;
139 + qmp_savevm_end(&errp);
140 + hmp_handle_error(mon, &errp);
143 +void hmp_info_savevm(Monitor *mon, const QDict *qdict)
146 + info = qmp_query_savevm(NULL);
148 + if (info->has_status) {
149 + monitor_printf(mon, "savevm status: %s\n", info->status);
150 + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
153 + monitor_printf(mon, "savevm status: not running\n");
155 + if (info->has_bytes) {
156 + monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
158 + if (info->has_error) {
159 + monitor_printf(mon, "Error: %s\n", info->error);
163 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
165 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
166 diff --git a/hmp.h b/hmp.h
167 index 43617f2646..bcb90c478f 100644
170 @@ -24,6 +24,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
171 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
172 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
173 void hmp_info_mice(Monitor *mon, const QDict *qdict);
174 +void hmp_info_savevm(Monitor *mon, const QDict *qdict);
175 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
176 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
177 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
178 @@ -101,6 +102,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
179 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
180 void hmp_getfd(Monitor *mon, const QDict *qdict);
181 void hmp_closefd(Monitor *mon, const QDict *qdict);
182 +void hmp_savevm_start(Monitor *mon, const QDict *qdict);
183 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
184 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
185 +void hmp_savevm_end(Monitor *mon, const QDict *qdict);
186 void hmp_sendkey(Monitor *mon, const QDict *qdict);
187 void hmp_screendump(Monitor *mon, const QDict *qdict);
188 void hmp_nbd_server_start(Monitor *mon, const QDict *qdict);
189 diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
190 index c85b6ec75b..4411b7121d 100644
191 --- a/include/migration/snapshot.h
192 +++ b/include/migration/snapshot.h
195 int save_snapshot(const char *name, Error **errp);
196 int load_snapshot(const char *name, Error **errp);
197 +int load_snapshot_from_blockdev(const char *filename, Error **errp);
200 diff --git a/qapi/migration.json b/qapi/migration.json
201 index 9cfbaf8c6c..e206355d56 100644
202 --- a/qapi/migration.json
203 +++ b/qapi/migration.json
205 '*compression': 'CompressionStats',
206 '*socket-address': ['SocketAddress'] } }
211 +# Information about current migration process.
213 +# @status: string describing the current savevm status.
214 +# This can be 'active', 'completed', 'failed'.
215 +# If this field is not returned, no savevm process
216 +# has been initiated
218 +# @error: string containing error message is status is failed.
220 +# @total-time: total amount of milliseconds since savevm started.
221 +# If savevm has ended, it returns the total save time
223 +# @bytes: total amount of data transfered
227 +{ 'struct': 'SaveVMInfo',
228 + 'data': {'*status': 'str', '*error': 'str',
229 + '*total-time': 'int', '*bytes': 'int'} }
234 +# Returns information about current savevm process.
236 +# Returns: @SaveVMInfo
240 +{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
245 diff --git a/qapi/misc.json b/qapi/misc.json
246 index 4a0e26720e..49dfda0b28 100644
249 @@ -2358,6 +2358,38 @@
251 { 'command': 'query-target', 'returns': 'TargetInfo' }
256 +# Prepare for snapshot and halt VM. Save VM state to statefile.
259 +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
264 +# Create an internal drive snapshot.
267 +{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
270 +# @delete-drive-snapshot:
272 +# Delete a drive snapshot.
275 +{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
280 +# Resume VM after a snapshot.
283 +{ 'command': 'savevm-end' }
288 diff --git a/qemu-options.hx b/qemu-options.hx
289 index 08749a3391..880c63dab2 100644
290 --- a/qemu-options.hx
291 +++ b/qemu-options.hx
292 @@ -3680,6 +3680,19 @@ STEXI
293 Start right away with a saved state (@code{loadvm} in monitor)
296 +DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
297 + "-loadstate file\n" \
298 + " start right away with a saved state\n",
301 +@item -loadstate @var{file}
303 +Start right away with a saved state. This option does not rollback
304 +disk state like @code{loadvm}, so user must make sure that disk
305 +have correct state. @var{file} can be any valid device URL. See the section
306 +for "Device URL Syntax" for more information.
310 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
311 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
312 diff --git a/savevm-async.c b/savevm-async.c
314 index 0000000000..2149010bb8
318 +#include "qemu/osdep.h"
319 +#include "migration/migration.h"
320 +#include "migration/savevm.h"
321 +#include "migration/snapshot.h"
322 +#include "migration/global_state.h"
323 +#include "migration/ram.h"
324 +#include "migration/qemu-file.h"
325 +#include "sysemu/sysemu.h"
326 +#include "block/block.h"
327 +#include "sysemu/block-backend.h"
328 +#include "qapi/error.h"
329 +#include "qapi/qmp/qerror.h"
330 +#include "qapi/qmp/qdict.h"
331 +#include "qapi/qapi-commands-migration.h"
332 +#include "qapi/qapi-commands-misc.h"
333 +#include "qapi/qapi-commands-block.h"
334 +#include "qemu/cutils.h"
336 +/* #define DEBUG_SAVEVM_STATE */
338 +/* used while emulated sync operation in progress */
339 +#define NOT_DONE -EINPROGRESS
341 +#ifdef DEBUG_SAVEVM_STATE
342 +#define DPRINTF(fmt, ...) \
343 + do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
345 +#define DPRINTF(fmt, ...) \
353 + SAVE_STATE_COMPLETED,
354 + SAVE_STATE_CANCELLED
358 +static struct SnapshotState {
359 + BlockBackend *target;
364 + int saved_vm_running;
366 + int64_t total_time;
367 + QEMUBH *cleanup_bh;
371 +SaveVMInfo *qmp_query_savevm(Error **errp)
373 + SaveVMInfo *info = g_malloc0(sizeof(*info));
374 + struct SnapshotState *s = &snap_state;
376 + if (s->state != SAVE_STATE_DONE) {
377 + info->has_bytes = true;
378 + info->bytes = s->bs_pos;
379 + switch (s->state) {
380 + case SAVE_STATE_ERROR:
381 + info->has_status = true;
382 + info->status = g_strdup("failed");
383 + info->has_total_time = true;
384 + info->total_time = s->total_time;
386 + info->has_error = true;
387 + info->error = g_strdup(error_get_pretty(s->error));
390 + case SAVE_STATE_ACTIVE:
391 + info->has_status = true;
392 + info->status = g_strdup("active");
393 + info->has_total_time = true;
394 + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
397 + case SAVE_STATE_COMPLETED:
398 + info->has_status = true;
399 + info->status = g_strdup("completed");
400 + info->has_total_time = true;
401 + info->total_time = s->total_time;
409 +static int save_snapshot_cleanup(void)
413 + DPRINTF("save_snapshot_cleanup\n");
415 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
416 + snap_state.total_time;
418 + if (snap_state.file) {
419 + ret = qemu_fclose(snap_state.file);
422 + if (snap_state.target) {
423 + /* try to truncate, but ignore errors (will fail on block devices).
424 + * note: bdrv_read() need whole blocks, so we round up
426 + size_t size = (snap_state.bs_pos + BDRV_SECTOR_SIZE) & BDRV_SECTOR_MASK;
427 + blk_truncate(snap_state.target, size, PREALLOC_MODE_OFF, NULL);
428 + blk_op_unblock_all(snap_state.target, snap_state.blocker);
429 + error_free(snap_state.blocker);
430 + snap_state.blocker = NULL;
431 + blk_unref(snap_state.target);
432 + snap_state.target = NULL;
438 +static void save_snapshot_error(const char *fmt, ...)
444 + msg = g_strdup_vprintf(fmt, ap);
447 + DPRINTF("save_snapshot_error: %s\n", msg);
449 + if (!snap_state.error) {
450 + error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
455 + snap_state.state = SAVE_STATE_ERROR;
458 +static int block_state_close(void *opaque)
460 + snap_state.file = NULL;
461 + return blk_flush(snap_state.target);
464 +typedef struct BlkRwCo {
466 + QEMUIOVector *qiov;
470 +static void coroutine_fn block_state_write_entry(void *opaque) {
471 + BlkRwCo *rwco = opaque;
472 + rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
476 +static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
477 + int iovcnt, int64_t pos)
482 + assert(pos == snap_state.bs_pos);
489 + qemu_iovec_init_external(&qiov, iov, iovcnt);
491 + if (qemu_in_coroutine()) {
492 + block_state_write_entry(&rwco);
494 + Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
495 + bdrv_coroutine_enter(blk_bs(snap_state.target), co);
496 + BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
498 + if (rwco.ret < 0) {
502 + snap_state.bs_pos += qiov.size;
506 +static const QEMUFileOps block_file_ops = {
507 + .writev_buffer = block_state_writev_buffer,
508 + .close = block_state_close,
511 +static void process_savevm_cleanup(void *opaque)
514 + qemu_bh_delete(snap_state.cleanup_bh);
515 + snap_state.cleanup_bh = NULL;
516 + qemu_mutex_unlock_iothread();
517 + qemu_thread_join(&snap_state.thread);
518 + qemu_mutex_lock_iothread();
519 + ret = save_snapshot_cleanup();
521 + save_snapshot_error("save_snapshot_cleanup error %d", ret);
522 + } else if (snap_state.state == SAVE_STATE_ACTIVE) {
523 + snap_state.state = SAVE_STATE_COMPLETED;
525 + save_snapshot_error("process_savevm_cleanup: invalid state: %d",
528 + if (snap_state.saved_vm_running) {
530 + snap_state.saved_vm_running = false;
534 +static void *process_savevm_thread(void *opaque)
539 + rcu_register_thread();
541 + qemu_savevm_state_header(snap_state.file);
542 + qemu_savevm_state_setup(snap_state.file);
543 + ret = qemu_file_get_error(snap_state.file);
546 + save_snapshot_error("qemu_savevm_state_setup failed");
547 + rcu_unregister_thread();
551 + while (snap_state.state == SAVE_STATE_ACTIVE) {
552 + uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
554 + qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
555 + pending_size = pend_precopy + pend_compatible + pend_postcopy;
557 + maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
559 + if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
560 + qemu_mutex_lock_iothread();
561 + ret = qemu_savevm_state_iterate(snap_state.file, false);
563 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
566 + qemu_mutex_unlock_iothread();
567 + DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
569 + qemu_mutex_lock_iothread();
570 + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
571 + ret = global_state_store();
573 + save_snapshot_error("global_state_store error %d", ret);
576 + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
578 + save_snapshot_error("vm_stop_force_state error %d", ret);
581 + DPRINTF("savevm inerate finished\n");
582 + /* upstream made the return value here inconsistent
583 + * (-1 instead of 'ret' in one case and 0 after flush which can
584 + * still set a file error...)
586 + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
587 + ret = qemu_file_get_error(snap_state.file);
589 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
592 + qemu_savevm_state_cleanup();
593 + DPRINTF("save complete\n");
598 + qemu_bh_schedule(snap_state.cleanup_bh);
599 + qemu_mutex_unlock_iothread();
601 + rcu_unregister_thread();
605 +void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
607 + Error *local_err = NULL;
609 + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
611 + if (snap_state.state != SAVE_STATE_DONE) {
612 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
613 + "VM snapshot already started\n");
617 + /* initialize snapshot info */
618 + snap_state.saved_vm_running = runstate_is_running();
619 + snap_state.bs_pos = 0;
620 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
621 + snap_state.blocker = NULL;
623 + if (snap_state.error) {
624 + error_free(snap_state.error);
625 + snap_state.error = NULL;
628 + if (!has_statefile) {
629 + vm_stop(RUN_STATE_SAVE_VM);
630 + snap_state.state = SAVE_STATE_COMPLETED;
634 + if (qemu_savevm_state_blocked(errp)) {
638 + /* Open the image */
639 + QDict *options = NULL;
640 + options = qdict_new();
641 + qdict_put_str(options, "driver", "raw");
642 + snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
643 + if (!snap_state.target) {
644 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
648 + snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
650 + if (!snap_state.file) {
651 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
656 + error_setg(&snap_state.blocker, "block device is in use by savevm");
657 + blk_op_block_all(snap_state.target, snap_state.blocker);
659 + snap_state.state = SAVE_STATE_ACTIVE;
660 + snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
661 + qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
662 + NULL, QEMU_THREAD_JOINABLE);
668 + save_snapshot_error("setup failed");
670 + if (snap_state.saved_vm_running) {
675 +void qmp_savevm_end(Error **errp)
677 + if (snap_state.state == SAVE_STATE_DONE) {
678 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
679 + "VM snapshot not started\n");
683 + if (snap_state.state == SAVE_STATE_ACTIVE) {
684 + snap_state.state = SAVE_STATE_CANCELLED;
688 + if (snap_state.saved_vm_running) {
692 + snap_state.state = SAVE_STATE_DONE;
695 +// FIXME: Deprecated
696 +void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
698 + // Compatibility to older qemu-server.
699 + qmp_blockdev_snapshot_internal_sync(device, name, errp);
702 +// FIXME: Deprecated
703 +void qmp_delete_drive_snapshot(const char *device, const char *name,
706 + // Compatibility to older qemu-server.
707 + (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
711 +static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
714 + BlockBackend *be = opaque;
715 + int64_t maxlen = blk_getlength(be);
716 + if (pos > maxlen) {
719 + if ((pos + size) > maxlen) {
720 + size = maxlen - pos - 1;
725 + return blk_pread(be, pos, buf, size);
728 +static const QEMUFileOps loadstate_file_ops = {
729 + .get_buffer = loadstate_get_buffer,
732 +int load_snapshot_from_blockdev(const char *filename, Error **errp)
735 + Error *local_err = NULL;
736 + Error *blocker = NULL;
741 + be = blk_new_open(filename, NULL, NULL, 0, &local_err);
744 + error_setg(errp, "Could not open VM state file");
748 + error_setg(&blocker, "block device is in use by load state");
749 + blk_op_block_all(be, blocker);
751 + /* restore the VM state */
752 + f = qemu_fopen_ops(be, &loadstate_file_ops);
754 + error_setg(errp, "Could not open VM state file");
758 + qemu_system_reset(SHUTDOWN_CAUSE_NONE);
759 + ret = qemu_loadvm_state(f);
762 + migration_incoming_state_destroy();
764 + error_setg_errno(errp, -ret, "Error while loading VM state");
772 + blk_op_unblock_all(be, blocker);
773 + error_free(blocker);
778 diff --git a/vl.c b/vl.c
779 index 98efed1a79..68ca370e19 100644
782 @@ -3005,6 +3005,7 @@ int main(int argc, char **argv, char **envp)
785 const char *loadvm = NULL;
786 + const char *loadstate = NULL;
787 MachineClass *machine_class;
788 const char *cpu_model;
789 const char *vga_model = NULL;
790 @@ -3572,6 +3573,9 @@ int main(int argc, char **argv, char **envp)
791 case QEMU_OPTION_loadvm:
794 + case QEMU_OPTION_loadstate:
795 + loadstate = optarg;
797 case QEMU_OPTION_full_screen:
798 dpy.has_full_screen = true;
799 dpy.full_screen = true;
800 @@ -4579,6 +4583,12 @@ int main(int argc, char **argv, char **envp)
804 + } else if (loadstate) {
805 + Error *local_err = NULL;
806 + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
807 + error_report_err(local_err);
811 if (replay_mode != REPLAY_MODE_NONE) {
812 replay_vmstate_init();