]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
91d47107ab21afc9cf873bb0d3d15ccd48c822c9
[pve-qemu.git] / debian / patches / pve / 0017-PVE-add-savevm-async-for-background-state-snapshots.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Dietmar Maurer <dietmar@proxmox.com>
3 Date: Mon, 6 Apr 2020 12:16:46 +0200
4 Subject: [PATCH] PVE: add savevm-async for background state snapshots
5
6 Put qemu_savevm_state_{header,setup} into the main loop and the rest
7 of the iteration into a coroutine. The former need to lock the
8 iothread (and we can't unlock it in the coroutine), and the latter
9 can't deal with being in a separate thread, so a coroutine it must
10 be.
11
12 Truncate output file at 1024 boundary.
13
14 Do not block the VM and save the state on aborting a snapshot, as the
15 snapshot will be invalid anyway.
16
17 Also, when aborting, wait for the target file to be closed, otherwise a
18 client might run into race-conditions when trying to remove the file
19 still opened by QEMU.
20
21 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
22 Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
23 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
24 [improve aborting]
25 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
26 [FE: further improve aborting
27 adapt to removal of QEMUFileOps
28 improve condition for entering final stage]
29 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
30 ---
31 hmp-commands-info.hx | 13 +
32 hmp-commands.hx | 33 +++
33 include/migration/snapshot.h | 2 +
34 include/monitor/hmp.h | 5 +
35 migration/meson.build | 1 +
36 migration/savevm-async.c | 532 +++++++++++++++++++++++++++++++++++
37 monitor/hmp-cmds.c | 57 ++++
38 qapi/migration.json | 34 +++
39 qapi/misc.json | 32 +++
40 qemu-options.hx | 12 +
41 softmmu/vl.c | 10 +
42 11 files changed, 731 insertions(+)
43 create mode 100644 migration/savevm-async.c
44
45 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
46 index 754b1e8408..489c524e9e 100644
47 --- a/hmp-commands-info.hx
48 +++ b/hmp-commands-info.hx
49 @@ -540,6 +540,19 @@ SRST
50 Show current migration parameters.
51 ERST
52
53 + {
54 + .name = "savevm",
55 + .args_type = "",
56 + .params = "",
57 + .help = "show savevm status",
58 + .cmd = hmp_info_savevm,
59 + },
60 +
61 +SRST
62 + ``info savevm``
63 + Show savevm status.
64 +ERST
65 +
66 {
67 .name = "balloon",
68 .args_type = "",
69 diff --git a/hmp-commands.hx b/hmp-commands.hx
70 index 673e39a697..039be0033d 100644
71 --- a/hmp-commands.hx
72 +++ b/hmp-commands.hx
73 @@ -1815,3 +1815,36 @@ SRST
74 Dump the FDT in dtb format to *filename*.
75 ERST
76 #endif
77 +
78 + {
79 + .name = "savevm-start",
80 + .args_type = "statefile:s?",
81 + .params = "[statefile]",
82 + .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
83 + .cmd = hmp_savevm_start,
84 + },
85 +
86 + {
87 + .name = "snapshot-drive",
88 + .args_type = "device:s,name:s",
89 + .params = "device name",
90 + .help = "Create internal snapshot.",
91 + .cmd = hmp_snapshot_drive,
92 + },
93 +
94 + {
95 + .name = "delete-drive-snapshot",
96 + .args_type = "device:s,name:s",
97 + .params = "device name",
98 + .help = "Delete internal snapshot.",
99 + .cmd = hmp_delete_drive_snapshot,
100 + },
101 +
102 + {
103 + .name = "savevm-end",
104 + .args_type = "",
105 + .params = "",
106 + .help = "Resume VM after snaphot.",
107 + .cmd = hmp_savevm_end,
108 + .coroutine = true,
109 + },
110 diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
111 index e72083b117..c846d37806 100644
112 --- a/include/migration/snapshot.h
113 +++ b/include/migration/snapshot.h
114 @@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
115 bool has_devices, strList *devices,
116 Error **errp);
117
118 +int load_snapshot_from_blockdev(const char *filename, Error **errp);
119 +
120 #endif
121 diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
122 index dfbc0c9a2f..440f86aba8 100644
123 --- a/include/monitor/hmp.h
124 +++ b/include/monitor/hmp.h
125 @@ -27,6 +27,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
126 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
127 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
128 void hmp_info_mice(Monitor *mon, const QDict *qdict);
129 +void hmp_info_savevm(Monitor *mon, const QDict *qdict);
130 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
131 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
132 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
133 @@ -81,6 +82,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
134 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
135 void hmp_getfd(Monitor *mon, const QDict *qdict);
136 void hmp_closefd(Monitor *mon, const QDict *qdict);
137 +void hmp_savevm_start(Monitor *mon, const QDict *qdict);
138 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
139 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
140 +void hmp_savevm_end(Monitor *mon, const QDict *qdict);
141 void hmp_sendkey(Monitor *mon, const QDict *qdict);
142 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
143 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
144 diff --git a/migration/meson.build b/migration/meson.build
145 index 8cac83c06c..0842d00cd2 100644
146 --- a/migration/meson.build
147 +++ b/migration/meson.build
148 @@ -24,6 +24,7 @@ softmmu_ss.add(files(
149 'multifd-zlib.c',
150 'postcopy-ram.c',
151 'savevm.c',
152 + 'savevm-async.c',
153 'socket.c',
154 'tls.c',
155 ), gnutls)
156 diff --git a/migration/savevm-async.c b/migration/savevm-async.c
157 new file mode 100644
158 index 0000000000..4a4e91a26d
159 --- /dev/null
160 +++ b/migration/savevm-async.c
161 @@ -0,0 +1,532 @@
162 +#include "qemu/osdep.h"
163 +#include "migration/channel-savevm-async.h"
164 +#include "migration/migration.h"
165 +#include "migration/savevm.h"
166 +#include "migration/snapshot.h"
167 +#include "migration/global_state.h"
168 +#include "migration/ram.h"
169 +#include "migration/qemu-file.h"
170 +#include "sysemu/sysemu.h"
171 +#include "sysemu/runstate.h"
172 +#include "block/block.h"
173 +#include "sysemu/block-backend.h"
174 +#include "qapi/error.h"
175 +#include "qapi/qmp/qerror.h"
176 +#include "qapi/qmp/qdict.h"
177 +#include "qapi/qapi-commands-migration.h"
178 +#include "qapi/qapi-commands-misc.h"
179 +#include "qapi/qapi-commands-block.h"
180 +#include "qemu/cutils.h"
181 +#include "qemu/timer.h"
182 +#include "qemu/main-loop.h"
183 +#include "qemu/rcu.h"
184 +
185 +/* #define DEBUG_SAVEVM_STATE */
186 +
187 +#ifdef DEBUG_SAVEVM_STATE
188 +#define DPRINTF(fmt, ...) \
189 + do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
190 +#else
191 +#define DPRINTF(fmt, ...) \
192 + do { } while (0)
193 +#endif
194 +
195 +enum {
196 + SAVE_STATE_DONE,
197 + SAVE_STATE_ERROR,
198 + SAVE_STATE_ACTIVE,
199 + SAVE_STATE_COMPLETED,
200 + SAVE_STATE_CANCELLED
201 +};
202 +
203 +
204 +static struct SnapshotState {
205 + BlockBackend *target;
206 + size_t bs_pos;
207 + int state;
208 + Error *error;
209 + Error *blocker;
210 + int saved_vm_running;
211 + QEMUFile *file;
212 + int64_t total_time;
213 + QEMUBH *finalize_bh;
214 + Coroutine *co;
215 + QemuCoSleep target_close_wait;
216 +} snap_state;
217 +
218 +static bool savevm_aborted(void)
219 +{
220 + return snap_state.state == SAVE_STATE_CANCELLED ||
221 + snap_state.state == SAVE_STATE_ERROR;
222 +}
223 +
224 +SaveVMInfo *qmp_query_savevm(Error **errp)
225 +{
226 + SaveVMInfo *info = g_malloc0(sizeof(*info));
227 + struct SnapshotState *s = &snap_state;
228 +
229 + if (s->state != SAVE_STATE_DONE) {
230 + info->has_bytes = true;
231 + info->bytes = s->bs_pos;
232 + switch (s->state) {
233 + case SAVE_STATE_ERROR:
234 + info->has_status = true;
235 + info->status = g_strdup("failed");
236 + info->has_total_time = true;
237 + info->total_time = s->total_time;
238 + if (s->error) {
239 + info->has_error = true;
240 + info->error = g_strdup(error_get_pretty(s->error));
241 + }
242 + break;
243 + case SAVE_STATE_ACTIVE:
244 + info->has_status = true;
245 + info->status = g_strdup("active");
246 + info->has_total_time = true;
247 + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
248 + - s->total_time;
249 + break;
250 + case SAVE_STATE_COMPLETED:
251 + info->has_status = true;
252 + info->status = g_strdup("completed");
253 + info->has_total_time = true;
254 + info->total_time = s->total_time;
255 + break;
256 + }
257 + }
258 +
259 + return info;
260 +}
261 +
262 +static int save_snapshot_cleanup(void)
263 +{
264 + int ret = 0;
265 +
266 + DPRINTF("save_snapshot_cleanup\n");
267 +
268 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
269 + snap_state.total_time;
270 +
271 + if (snap_state.file) {
272 + ret = qemu_fclose(snap_state.file);
273 + snap_state.file = NULL;
274 + }
275 +
276 + if (snap_state.target) {
277 + if (!savevm_aborted()) {
278 + /* try to truncate, but ignore errors (will fail on block devices).
279 + * note1: bdrv_read() need whole blocks, so we need to round up
280 + * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
281 + */
282 + size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
283 + blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
284 + }
285 + blk_op_unblock_all(snap_state.target, snap_state.blocker);
286 + error_free(snap_state.blocker);
287 + snap_state.blocker = NULL;
288 + blk_unref(snap_state.target);
289 + snap_state.target = NULL;
290 +
291 + qemu_co_sleep_wake(&snap_state.target_close_wait);
292 + }
293 +
294 + return ret;
295 +}
296 +
297 +static void save_snapshot_error(const char *fmt, ...)
298 +{
299 + va_list ap;
300 + char *msg;
301 +
302 + va_start(ap, fmt);
303 + msg = g_strdup_vprintf(fmt, ap);
304 + va_end(ap);
305 +
306 + DPRINTF("save_snapshot_error: %s\n", msg);
307 +
308 + if (!snap_state.error) {
309 + error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
310 + }
311 +
312 + g_free (msg);
313 +
314 + snap_state.state = SAVE_STATE_ERROR;
315 +}
316 +
317 +static void process_savevm_finalize(void *opaque)
318 +{
319 + int ret;
320 + AioContext *iohandler_ctx = iohandler_get_aio_context();
321 + MigrationState *ms = migrate_get_current();
322 +
323 + bool aborted = savevm_aborted();
324 +
325 +#ifdef DEBUG_SAVEVM_STATE
326 + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
327 +#endif
328 +
329 + qemu_bh_delete(snap_state.finalize_bh);
330 + snap_state.finalize_bh = NULL;
331 + snap_state.co = NULL;
332 +
333 + /* We need to own the target bdrv's context for the following functions,
334 + * so move it back. It can stay in the main context and live out its live
335 + * there, since we're done with it after this method ends anyway.
336 + */
337 + aio_context_acquire(iohandler_ctx);
338 + blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
339 + aio_context_release(iohandler_ctx);
340 +
341 + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
342 + if (ret < 0) {
343 + save_snapshot_error("vm_stop_force_state error %d", ret);
344 + }
345 +
346 + if (!aborted) {
347 + /* skip state saving if we aborted, snapshot will be invalid anyway */
348 + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
349 + ret = qemu_file_get_error(snap_state.file);
350 + if (ret < 0) {
351 + save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
352 + }
353 + }
354 +
355 + DPRINTF("state saving complete\n");
356 + DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
357 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
358 +
359 + /* clear migration state */
360 + migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
361 + ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
362 + ms->to_dst_file = NULL;
363 +
364 + qemu_savevm_state_cleanup();
365 +
366 + ret = save_snapshot_cleanup();
367 + if (ret < 0) {
368 + save_snapshot_error("save_snapshot_cleanup error %d", ret);
369 + } else if (snap_state.state == SAVE_STATE_ACTIVE) {
370 + snap_state.state = SAVE_STATE_COMPLETED;
371 + } else if (aborted) {
372 + /*
373 + * If there was an error, there's no need to set a new one here.
374 + * If the snapshot was canceled, leave setting the state to
375 + * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
376 + */
377 + } else {
378 + save_snapshot_error("process_savevm_cleanup: invalid state: %d",
379 + snap_state.state);
380 + }
381 + if (snap_state.saved_vm_running) {
382 + vm_start();
383 + snap_state.saved_vm_running = false;
384 + }
385 +
386 + DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
387 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
388 +}
389 +
390 +static void coroutine_fn process_savevm_co(void *opaque)
391 +{
392 + int ret;
393 + int64_t maxlen;
394 + BdrvNextIterator it;
395 + BlockDriverState *bs = NULL;
396 +
397 +#ifdef DEBUG_SAVEVM_STATE
398 + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
399 +#endif
400 +
401 + ret = qemu_file_get_error(snap_state.file);
402 + if (ret < 0) {
403 + save_snapshot_error("qemu_savevm_state_setup failed");
404 + return;
405 + }
406 +
407 + while (snap_state.state == SAVE_STATE_ACTIVE) {
408 + uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
409 +
410 + /* pending is expected to be called without iothread lock */
411 + qemu_mutex_unlock_iothread();
412 + qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
413 + qemu_mutex_lock_iothread();
414 +
415 + pending_size = pend_precopy + pend_compatible + pend_postcopy;
416 +
417 + maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
418 +
419 + /* Note that there is no progress for pend_postcopy when iterating */
420 + if (pending_size - pend_postcopy > 400000 && snap_state.bs_pos + pending_size < maxlen) {
421 + ret = qemu_savevm_state_iterate(snap_state.file, false);
422 + if (ret < 0) {
423 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
424 + break;
425 + }
426 + DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
427 + } else {
428 + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
429 + ret = global_state_store();
430 + if (ret) {
431 + save_snapshot_error("global_state_store error %d", ret);
432 + break;
433 + }
434 +
435 + DPRINTF("savevm iterate complete\n");
436 + break;
437 + }
438 + }
439 +
440 + DPRINTF("timing: process_savevm_co took %ld ms\n",
441 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
442 +
443 +#ifdef DEBUG_SAVEVM_STATE
444 + int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
445 +#endif
446 + /* If a drive runs in an IOThread we can flush it async, and only
447 + * need to sync-flush whatever IO happens between now and
448 + * vm_stop_force_state. bdrv_next can only be called from main AioContext,
449 + * so move there now and after every flush.
450 + */
451 + aio_co_reschedule_self(qemu_get_aio_context());
452 + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
453 + /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
454 + if (bs == blk_bs(snap_state.target)) {
455 + continue;
456 + }
457 +
458 + AioContext *bs_ctx = bdrv_get_aio_context(bs);
459 + if (bs_ctx != qemu_get_aio_context()) {
460 + DPRINTF("savevm: async flushing drive %s\n", bs->filename);
461 + aio_co_reschedule_self(bs_ctx);
462 + bdrv_flush(bs);
463 + aio_co_reschedule_self(qemu_get_aio_context());
464 + }
465 + }
466 +
467 + DPRINTF("timing: async flushing took %ld ms\n",
468 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
469 +
470 + qemu_bh_schedule(snap_state.finalize_bh);
471 +}
472 +
473 +void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
474 +{
475 + Error *local_err = NULL;
476 + MigrationState *ms = migrate_get_current();
477 + AioContext *iohandler_ctx = iohandler_get_aio_context();
478 +
479 + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
480 +
481 + if (snap_state.state != SAVE_STATE_DONE) {
482 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
483 + "VM snapshot already started\n");
484 + return;
485 + }
486 +
487 + if (migration_is_running(ms->state)) {
488 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
489 + return;
490 + }
491 +
492 + if (migrate_use_block()) {
493 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
494 + "Block migration and snapshots are incompatible");
495 + return;
496 + }
497 +
498 + /* initialize snapshot info */
499 + snap_state.saved_vm_running = runstate_is_running();
500 + snap_state.bs_pos = 0;
501 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
502 + snap_state.blocker = NULL;
503 + snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
504 +
505 + if (snap_state.error) {
506 + error_free(snap_state.error);
507 + snap_state.error = NULL;
508 + }
509 +
510 + if (!has_statefile) {
511 + vm_stop(RUN_STATE_SAVE_VM);
512 + snap_state.state = SAVE_STATE_COMPLETED;
513 + return;
514 + }
515 +
516 + if (qemu_savevm_state_blocked(errp)) {
517 + return;
518 + }
519 +
520 + /* Open the image */
521 + QDict *options = NULL;
522 + options = qdict_new();
523 + qdict_put_str(options, "driver", "raw");
524 + snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
525 + if (!snap_state.target) {
526 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
527 + goto restart;
528 + }
529 +
530 + QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
531 + &snap_state.bs_pos));
532 + snap_state.file = qemu_file_new_output(ioc);
533 +
534 + if (!snap_state.file) {
535 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
536 + goto restart;
537 + }
538 +
539 + /*
540 + * qemu_savevm_* paths use migration code and expect a migration state.
541 + * State is cleared in process_savevm_co, but has to be initialized
542 + * here (blocking main thread, from QMP) to avoid race conditions.
543 + */
544 + migrate_init(ms);
545 + memset(&ram_counters, 0, sizeof(ram_counters));
546 + ms->to_dst_file = snap_state.file;
547 +
548 + error_setg(&snap_state.blocker, "block device is in use by savevm");
549 + blk_op_block_all(snap_state.target, snap_state.blocker);
550 +
551 + snap_state.state = SAVE_STATE_ACTIVE;
552 + snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
553 + snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
554 + qemu_mutex_unlock_iothread();
555 + qemu_savevm_state_header(snap_state.file);
556 + qemu_savevm_state_setup(snap_state.file);
557 + qemu_mutex_lock_iothread();
558 +
559 + /* Async processing from here on out happens in iohandler context, so let
560 + * the target bdrv have its home there.
561 + */
562 + blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
563 +
564 + aio_co_schedule(iohandler_ctx, snap_state.co);
565 +
566 + return;
567 +
568 +restart:
569 +
570 + save_snapshot_error("setup failed");
571 +
572 + if (snap_state.saved_vm_running) {
573 + vm_start();
574 + snap_state.saved_vm_running = false;
575 + }
576 +}
577 +
578 +void coroutine_fn qmp_savevm_end(Error **errp)
579 +{
580 + int64_t timeout;
581 +
582 + if (snap_state.state == SAVE_STATE_DONE) {
583 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
584 + "VM snapshot not started\n");
585 + return;
586 + }
587 +
588 + if (snap_state.state == SAVE_STATE_ACTIVE) {
589 + snap_state.state = SAVE_STATE_CANCELLED;
590 + goto wait_for_close;
591 + }
592 +
593 + if (snap_state.saved_vm_running) {
594 + vm_start();
595 + snap_state.saved_vm_running = false;
596 + }
597 +
598 + snap_state.state = SAVE_STATE_DONE;
599 +
600 +wait_for_close:
601 + if (!snap_state.target) {
602 + DPRINTF("savevm-end: no target file open\n");
603 + return;
604 + }
605 +
606 + /* wait until cleanup is done before returning, this ensures that after this
607 + * call exits the statefile will be closed and can be removed immediately */
608 + DPRINTF("savevm-end: waiting for cleanup\n");
609 + timeout = 30L * 1000 * 1000 * 1000;
610 + qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
611 + QEMU_CLOCK_REALTIME, timeout);
612 + if (snap_state.target) {
613 + save_snapshot_error("timeout waiting for target file close in "
614 + "qmp_savevm_end");
615 + /* we cannot assume the snapshot finished in this case, so leave the
616 + * state alone - caller has to figure something out */
617 + return;
618 + }
619 +
620 + // File closed and no other error, so ensure next snapshot can be started.
621 + if (snap_state.state != SAVE_STATE_ERROR) {
622 + snap_state.state = SAVE_STATE_DONE;
623 + }
624 +
625 + DPRINTF("savevm-end: cleanup done\n");
626 +}
627 +
628 +// FIXME: Deprecated
629 +void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
630 +{
631 + // Compatibility to older qemu-server.
632 + qmp_blockdev_snapshot_internal_sync(device, name, errp);
633 +}
634 +
635 +// FIXME: Deprecated
636 +void qmp_delete_drive_snapshot(const char *device, const char *name,
637 + Error **errp)
638 +{
639 + // Compatibility to older qemu-server.
640 + (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
641 + true, name, errp);
642 +}
643 +
644 +int load_snapshot_from_blockdev(const char *filename, Error **errp)
645 +{
646 + BlockBackend *be;
647 + Error *local_err = NULL;
648 + Error *blocker = NULL;
649 +
650 + QEMUFile *f;
651 + size_t bs_pos = 0;
652 + int ret = -EINVAL;
653 +
654 + be = blk_new_open(filename, NULL, NULL, 0, &local_err);
655 +
656 + if (!be) {
657 + error_setg(errp, "Could not open VM state file");
658 + goto the_end;
659 + }
660 +
661 + error_setg(&blocker, "block device is in use by load state");
662 + blk_op_block_all(be, blocker);
663 +
664 + /* restore the VM state */
665 + f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
666 + if (!f) {
667 + error_setg(errp, "Could not open VM state file");
668 + goto the_end;
669 + }
670 +
671 + qemu_system_reset(SHUTDOWN_CAUSE_NONE);
672 + ret = qemu_loadvm_state(f);
673 +
674 + /* dirty bitmap migration has a special case we need to trigger manually */
675 + dirty_bitmap_mig_before_vm_start();
676 +
677 + qemu_fclose(f);
678 + migration_incoming_state_destroy();
679 + if (ret < 0) {
680 + error_setg_errno(errp, -ret, "Error while loading VM state");
681 + goto the_end;
682 + }
683 +
684 + ret = 0;
685 +
686 + the_end:
687 + if (be) {
688 + blk_op_unblock_all(be, blocker);
689 + error_free(blocker);
690 + blk_unref(be);
691 + }
692 + return ret;
693 +}
694 diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
695 index 480b798963..cfebfd1db5 100644
696 --- a/monitor/hmp-cmds.c
697 +++ b/monitor/hmp-cmds.c
698 @@ -1906,6 +1906,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
699 hmp_handle_error(mon, err);
700 }
701
702 +void hmp_savevm_start(Monitor *mon, const QDict *qdict)
703 +{
704 + Error *errp = NULL;
705 + const char *statefile = qdict_get_try_str(qdict, "statefile");
706 +
707 + qmp_savevm_start(statefile != NULL, statefile, &errp);
708 + hmp_handle_error(mon, errp);
709 +}
710 +
711 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
712 +{
713 + Error *errp = NULL;
714 + const char *name = qdict_get_str(qdict, "name");
715 + const char *device = qdict_get_str(qdict, "device");
716 +
717 + qmp_snapshot_drive(device, name, &errp);
718 + hmp_handle_error(mon, errp);
719 +}
720 +
721 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
722 +{
723 + Error *errp = NULL;
724 + const char *name = qdict_get_str(qdict, "name");
725 + const char *device = qdict_get_str(qdict, "device");
726 +
727 + qmp_delete_drive_snapshot(device, name, &errp);
728 + hmp_handle_error(mon, errp);
729 +}
730 +
731 +void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
732 +{
733 + Error *errp = NULL;
734 +
735 + qmp_savevm_end(&errp);
736 + hmp_handle_error(mon, errp);
737 +}
738 +
739 +void hmp_info_savevm(Monitor *mon, const QDict *qdict)
740 +{
741 + SaveVMInfo *info;
742 + info = qmp_query_savevm(NULL);
743 +
744 + if (info->has_status) {
745 + monitor_printf(mon, "savevm status: %s\n", info->status);
746 + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
747 + info->total_time);
748 + } else {
749 + monitor_printf(mon, "savevm status: not running\n");
750 + }
751 + if (info->has_bytes) {
752 + monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
753 + }
754 + if (info->has_error) {
755 + monitor_printf(mon, "Error: %s\n", info->error);
756 + }
757 +}
758 +
759 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
760 {
761 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
762 diff --git a/qapi/migration.json b/qapi/migration.json
763 index 88ecf86ac8..4435866379 100644
764 --- a/qapi/migration.json
765 +++ b/qapi/migration.json
766 @@ -261,6 +261,40 @@
767 '*compression': 'CompressionStats',
768 '*socket-address': ['SocketAddress'] } }
769
770 +##
771 +# @SaveVMInfo:
772 +#
773 +# Information about current migration process.
774 +#
775 +# @status: string describing the current savevm status.
776 +# This can be 'active', 'completed', 'failed'.
777 +# If this field is not returned, no savevm process
778 +# has been initiated
779 +#
780 +# @error: string containing error message is status is failed.
781 +#
782 +# @total-time: total amount of milliseconds since savevm started.
783 +# If savevm has ended, it returns the total save time
784 +#
785 +# @bytes: total amount of data transfered
786 +#
787 +# Since: 1.3
788 +##
789 +{ 'struct': 'SaveVMInfo',
790 + 'data': {'*status': 'str', '*error': 'str',
791 + '*total-time': 'int', '*bytes': 'int'} }
792 +
793 +##
794 +# @query-savevm:
795 +#
796 +# Returns information about current savevm process.
797 +#
798 +# Returns: @SaveVMInfo
799 +#
800 +# Since: 1.3
801 +##
802 +{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
803 +
804 ##
805 # @query-migrate:
806 #
807 diff --git a/qapi/misc.json b/qapi/misc.json
808 index 27ef5a2b20..b3ce75dcae 100644
809 --- a/qapi/misc.json
810 +++ b/qapi/misc.json
811 @@ -435,6 +435,38 @@
812 ##
813 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
814
815 +##
816 +# @savevm-start:
817 +#
818 +# Prepare for snapshot and halt VM. Save VM state to statefile.
819 +#
820 +##
821 +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
822 +
823 +##
824 +# @snapshot-drive:
825 +#
826 +# Create an internal drive snapshot.
827 +#
828 +##
829 +{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
830 +
831 +##
832 +# @delete-drive-snapshot:
833 +#
834 +# Delete a drive snapshot.
835 +#
836 +##
837 +{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
838 +
839 +##
840 +# @savevm-end:
841 +#
842 +# Resume VM after a snapshot.
843 +#
844 +##
845 +{ 'command': 'savevm-end', 'coroutine': true }
846 +
847 ##
848 # @CommandLineParameterType:
849 #
850 diff --git a/qemu-options.hx b/qemu-options.hx
851 index 7f99d15b23..54efb127c4 100644
852 --- a/qemu-options.hx
853 +++ b/qemu-options.hx
854 @@ -4391,6 +4391,18 @@ SRST
855 Start right away with a saved state (``loadvm`` in monitor)
856 ERST
857
858 +DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
859 + "-loadstate file\n" \
860 + " start right away with a saved state\n",
861 + QEMU_ARCH_ALL)
862 +SRST
863 +``-loadstate file``
864 + Start right away with a saved state. This option does not rollback
865 + disk state like @code{loadvm}, so user must make sure that disk
866 + have correct state. @var{file} can be any valid device URL. See the section
867 + for "Device URL Syntax" for more information.
868 +ERST
869 +
870 #ifndef _WIN32
871 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
872 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
873 diff --git a/softmmu/vl.c b/softmmu/vl.c
874 index 5f7f6ca981..21f067d115 100644
875 --- a/softmmu/vl.c
876 +++ b/softmmu/vl.c
877 @@ -164,6 +164,7 @@ static const char *accelerators;
878 static bool have_custom_ram_size;
879 static const char *ram_memdev_id;
880 static QDict *machine_opts_dict;
881 +static const char *loadstate;
882 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
883 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
884 static int display_remote;
885 @@ -2607,6 +2608,12 @@ void qmp_x_exit_preconfig(Error **errp)
886
887 if (loadvm) {
888 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
889 + } else if (loadstate) {
890 + Error *local_err = NULL;
891 + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
892 + error_report_err(local_err);
893 + autostart = 0;
894 + }
895 }
896 if (replay_mode != REPLAY_MODE_NONE) {
897 replay_vmstate_init();
898 @@ -3151,6 +3158,9 @@ void qemu_init(int argc, char **argv)
899 case QEMU_OPTION_loadvm:
900 loadvm = optarg;
901 break;
902 + case QEMU_OPTION_loadstate:
903 + loadstate = optarg;
904 + break;
905 case QEMU_OPTION_full_screen:
906 dpy.has_full_screen = true;
907 dpy.full_screen = true;