]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
update submodule and patches to QEMU 8.1.2
[pve-qemu.git] / debian / patches / pve / 0017-PVE-add-savevm-async-for-background-state-snapshots.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Dietmar Maurer <dietmar@proxmox.com>
3 Date: Mon, 6 Apr 2020 12:16:46 +0200
4 Subject: [PATCH] PVE: add savevm-async for background state snapshots
5
6 Put qemu_savevm_state_{header,setup} into the main loop and the rest
7 of the iteration into a coroutine. The former need to lock the
8 iothread (and we can't unlock it in the coroutine), and the latter
9 can't deal with being in a separate thread, so a coroutine it must
10 be.
11
12 Truncate output file at 1024 boundary.
13
14 Do not block the VM and save the state on aborting a snapshot, as the
15 snapshot will be invalid anyway.
16
17 Also, when aborting, wait for the target file to be closed, otherwise a
18 client might run into race-conditions when trying to remove the file
19 still opened by QEMU.
20
21 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
22 Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
23 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
24 [SR: improve aborting
25 register yank before migration_incoming_state_destroy]
26 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
27 [FE: further improve aborting
28 adapt to removal of QEMUFileOps
29 improve condition for entering final stage
30 adapt to QAPI and other changes for 8.0]
31 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
32 ---
33 hmp-commands-info.hx | 13 +
34 hmp-commands.hx | 17 ++
35 include/migration/snapshot.h | 2 +
36 include/monitor/hmp.h | 3 +
37 migration/meson.build | 1 +
38 migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++
39 monitor/hmp-cmds.c | 38 +++
40 qapi/migration.json | 34 +++
41 qapi/misc.json | 16 ++
42 qemu-options.hx | 12 +
43 softmmu/vl.c | 10 +
44 11 files changed, 677 insertions(+)
45 create mode 100644 migration/savevm-async.c
46
47 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
48 index f5b37eb74a..10fdd822e0 100644
49 --- a/hmp-commands-info.hx
50 +++ b/hmp-commands-info.hx
51 @@ -525,6 +525,19 @@ SRST
52 Show current migration parameters.
53 ERST
54
55 + {
56 + .name = "savevm",
57 + .args_type = "",
58 + .params = "",
59 + .help = "show savevm status",
60 + .cmd = hmp_info_savevm,
61 + },
62 +
63 +SRST
64 + ``info savevm``
65 + Show savevm status.
66 +ERST
67 +
68 {
69 .name = "balloon",
70 .args_type = "",
71 diff --git a/hmp-commands.hx b/hmp-commands.hx
72 index 2cbd0f77a0..e352f86872 100644
73 --- a/hmp-commands.hx
74 +++ b/hmp-commands.hx
75 @@ -1865,3 +1865,20 @@ SRST
76 List event channels in the guest
77 ERST
78 #endif
79 +
80 + {
81 + .name = "savevm-start",
82 + .args_type = "statefile:s?",
83 + .params = "[statefile]",
84 + .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
85 + .cmd = hmp_savevm_start,
86 + },
87 +
88 + {
89 + .name = "savevm-end",
90 + .args_type = "",
91 + .params = "",
92 + .help = "Resume VM after snaphot.",
93 + .cmd = hmp_savevm_end,
94 + .coroutine = true,
95 + },
96 diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
97 index e72083b117..c846d37806 100644
98 --- a/include/migration/snapshot.h
99 +++ b/include/migration/snapshot.h
100 @@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
101 bool has_devices, strList *devices,
102 Error **errp);
103
104 +int load_snapshot_from_blockdev(const char *filename, Error **errp);
105 +
106 #endif
107 diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
108 index 13f9a2dedb..7a7def7530 100644
109 --- a/include/monitor/hmp.h
110 +++ b/include/monitor/hmp.h
111 @@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
112 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
113 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
114 void hmp_info_mice(Monitor *mon, const QDict *qdict);
115 +void hmp_info_savevm(Monitor *mon, const QDict *qdict);
116 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
117 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
118 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
119 @@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
120 void hmp_mouse_move(Monitor *mon, const QDict *qdict);
121 void hmp_mouse_button(Monitor *mon, const QDict *qdict);
122 void hmp_mouse_set(Monitor *mon, const QDict *qdict);
123 +void hmp_savevm_start(Monitor *mon, const QDict *qdict);
124 +void hmp_savevm_end(Monitor *mon, const QDict *qdict);
125 void hmp_sendkey(Monitor *mon, const QDict *qdict);
126 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
127 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
128 diff --git a/migration/meson.build b/migration/meson.build
129 index 37ddcb5d60..07f6057acc 100644
130 --- a/migration/meson.build
131 +++ b/migration/meson.build
132 @@ -26,6 +26,7 @@ system_ss.add(files(
133 'options.c',
134 'postcopy-ram.c',
135 'savevm.c',
136 + 'savevm-async.c',
137 'socket.c',
138 'tls.c',
139 'threadinfo.c',
140 diff --git a/migration/savevm-async.c b/migration/savevm-async.c
141 new file mode 100644
142 index 0000000000..e9fc18fb10
143 --- /dev/null
144 +++ b/migration/savevm-async.c
145 @@ -0,0 +1,531 @@
146 +#include "qemu/osdep.h"
147 +#include "migration/channel-savevm-async.h"
148 +#include "migration/migration.h"
149 +#include "migration/migration-stats.h"
150 +#include "migration/options.h"
151 +#include "migration/savevm.h"
152 +#include "migration/snapshot.h"
153 +#include "migration/global_state.h"
154 +#include "migration/ram.h"
155 +#include "migration/qemu-file.h"
156 +#include "sysemu/sysemu.h"
157 +#include "sysemu/runstate.h"
158 +#include "block/block.h"
159 +#include "sysemu/block-backend.h"
160 +#include "qapi/error.h"
161 +#include "qapi/qmp/qerror.h"
162 +#include "qapi/qmp/qdict.h"
163 +#include "qapi/qapi-commands-migration.h"
164 +#include "qapi/qapi-commands-misc.h"
165 +#include "qapi/qapi-commands-block.h"
166 +#include "qemu/cutils.h"
167 +#include "qemu/timer.h"
168 +#include "qemu/main-loop.h"
169 +#include "qemu/rcu.h"
170 +#include "qemu/yank.h"
171 +
172 +/* #define DEBUG_SAVEVM_STATE */
173 +
174 +#ifdef DEBUG_SAVEVM_STATE
175 +#define DPRINTF(fmt, ...) \
176 + do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
177 +#else
178 +#define DPRINTF(fmt, ...) \
179 + do { } while (0)
180 +#endif
181 +
182 +enum {
183 + SAVE_STATE_DONE,
184 + SAVE_STATE_ERROR,
185 + SAVE_STATE_ACTIVE,
186 + SAVE_STATE_COMPLETED,
187 + SAVE_STATE_CANCELLED
188 +};
189 +
190 +
191 +static struct SnapshotState {
192 + BlockBackend *target;
193 + size_t bs_pos;
194 + int state;
195 + Error *error;
196 + Error *blocker;
197 + int saved_vm_running;
198 + QEMUFile *file;
199 + int64_t total_time;
200 + QEMUBH *finalize_bh;
201 + Coroutine *co;
202 + QemuCoSleep target_close_wait;
203 +} snap_state;
204 +
205 +static bool savevm_aborted(void)
206 +{
207 + return snap_state.state == SAVE_STATE_CANCELLED ||
208 + snap_state.state == SAVE_STATE_ERROR;
209 +}
210 +
211 +SaveVMInfo *qmp_query_savevm(Error **errp)
212 +{
213 + SaveVMInfo *info = g_malloc0(sizeof(*info));
214 + struct SnapshotState *s = &snap_state;
215 +
216 + if (s->state != SAVE_STATE_DONE) {
217 + info->has_bytes = true;
218 + info->bytes = s->bs_pos;
219 + switch (s->state) {
220 + case SAVE_STATE_ERROR:
221 + info->status = g_strdup("failed");
222 + info->has_total_time = true;
223 + info->total_time = s->total_time;
224 + if (s->error) {
225 + info->error = g_strdup(error_get_pretty(s->error));
226 + }
227 + break;
228 + case SAVE_STATE_ACTIVE:
229 + info->status = g_strdup("active");
230 + info->has_total_time = true;
231 + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
232 + - s->total_time;
233 + break;
234 + case SAVE_STATE_COMPLETED:
235 + info->status = g_strdup("completed");
236 + info->has_total_time = true;
237 + info->total_time = s->total_time;
238 + break;
239 + }
240 + }
241 +
242 + return info;
243 +}
244 +
245 +static int save_snapshot_cleanup(void)
246 +{
247 + int ret = 0;
248 +
249 + DPRINTF("save_snapshot_cleanup\n");
250 +
251 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
252 + snap_state.total_time;
253 +
254 + if (snap_state.file) {
255 + ret = qemu_fclose(snap_state.file);
256 + snap_state.file = NULL;
257 + }
258 +
259 + if (snap_state.target) {
260 + if (!savevm_aborted()) {
261 + /* try to truncate, but ignore errors (will fail on block devices).
262 + * note1: bdrv_read() need whole blocks, so we need to round up
263 + * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
264 + */
265 + size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
266 + blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
267 + }
268 + blk_op_unblock_all(snap_state.target, snap_state.blocker);
269 + error_free(snap_state.blocker);
270 + snap_state.blocker = NULL;
271 + blk_unref(snap_state.target);
272 + snap_state.target = NULL;
273 +
274 + qemu_co_sleep_wake(&snap_state.target_close_wait);
275 + }
276 +
277 + return ret;
278 +}
279 +
280 +static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...)
281 +{
282 + va_list ap;
283 + char *msg;
284 +
285 + va_start(ap, fmt);
286 + msg = g_strdup_vprintf(fmt, ap);
287 + va_end(ap);
288 +
289 + DPRINTF("save_snapshot_error: %s\n", msg);
290 +
291 + if (!snap_state.error) {
292 + error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
293 + }
294 +
295 + g_free (msg);
296 +
297 + snap_state.state = SAVE_STATE_ERROR;
298 +}
299 +
300 +static void process_savevm_finalize(void *opaque)
301 +{
302 + int ret;
303 + AioContext *iohandler_ctx = iohandler_get_aio_context();
304 + MigrationState *ms = migrate_get_current();
305 +
306 + bool aborted = savevm_aborted();
307 +
308 +#ifdef DEBUG_SAVEVM_STATE
309 + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
310 +#endif
311 +
312 + qemu_bh_delete(snap_state.finalize_bh);
313 + snap_state.finalize_bh = NULL;
314 + snap_state.co = NULL;
315 +
316 + /* We need to own the target bdrv's context for the following functions,
317 + * so move it back. It can stay in the main context and live out its live
318 + * there, since we're done with it after this method ends anyway.
319 + */
320 + aio_context_acquire(iohandler_ctx);
321 + blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
322 + aio_context_release(iohandler_ctx);
323 +
324 + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
325 + if (ret < 0) {
326 + save_snapshot_error("vm_stop_force_state error %d", ret);
327 + }
328 +
329 + if (!aborted) {
330 + /* skip state saving if we aborted, snapshot will be invalid anyway */
331 + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
332 + ret = qemu_file_get_error(snap_state.file);
333 + if (ret < 0) {
334 + save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
335 + }
336 + }
337 +
338 + DPRINTF("state saving complete\n");
339 + DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
340 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
341 +
342 + /* clear migration state */
343 + migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
344 + ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
345 + ms->to_dst_file = NULL;
346 +
347 + qemu_savevm_state_cleanup();
348 +
349 + ret = save_snapshot_cleanup();
350 + if (ret < 0) {
351 + save_snapshot_error("save_snapshot_cleanup error %d", ret);
352 + } else if (snap_state.state == SAVE_STATE_ACTIVE) {
353 + snap_state.state = SAVE_STATE_COMPLETED;
354 + } else if (aborted) {
355 + /*
356 + * If there was an error, there's no need to set a new one here.
357 + * If the snapshot was canceled, leave setting the state to
358 + * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
359 + */
360 + } else {
361 + save_snapshot_error("process_savevm_cleanup: invalid state: %d",
362 + snap_state.state);
363 + }
364 + if (snap_state.saved_vm_running) {
365 + vm_start();
366 + snap_state.saved_vm_running = false;
367 + }
368 +
369 + DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
370 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
371 +}
372 +
373 +static void coroutine_fn process_savevm_co(void *opaque)
374 +{
375 + int ret;
376 + int64_t maxlen;
377 + BdrvNextIterator it;
378 + BlockDriverState *bs = NULL;
379 +
380 +#ifdef DEBUG_SAVEVM_STATE
381 + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
382 +#endif
383 +
384 + ret = qemu_file_get_error(snap_state.file);
385 + if (ret < 0) {
386 + save_snapshot_error("qemu_savevm_state_setup failed");
387 + return;
388 + }
389 +
390 + while (snap_state.state == SAVE_STATE_ACTIVE) {
391 + uint64_t pending_size, pend_precopy, pend_postcopy;
392 + uint64_t threshold = 400 * 1000;
393 +
394 + /*
395 + * pending_{estimate,exact} are expected to be called without iothread
396 + * lock. Similar to what is done in migration.c, call the exact variant
397 + * only once pend_precopy in the estimate is below the threshold.
398 + */
399 + qemu_mutex_unlock_iothread();
400 + qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
401 + if (pend_precopy <= threshold) {
402 + qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
403 + }
404 + qemu_mutex_lock_iothread();
405 + pending_size = pend_precopy + pend_postcopy;
406 +
407 + /*
408 + * A guest reaching this cutoff is dirtying lots of RAM. It should be
409 + * large enough so that the guest can't dirty this much between the
410 + * check and the guest actually being stopped, but it should be small
411 + * enough to avoid long downtimes for non-hibernation snapshots.
412 + */
413 + maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
414 +
415 + /* Note that there is no progress for pend_postcopy when iterating */
416 + if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
417 + ret = qemu_savevm_state_iterate(snap_state.file, false);
418 + if (ret < 0) {
419 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
420 + break;
421 + }
422 + DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
423 + } else {
424 + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
425 + global_state_store();
426 +
427 + DPRINTF("savevm iterate complete\n");
428 + break;
429 + }
430 + }
431 +
432 + DPRINTF("timing: process_savevm_co took %ld ms\n",
433 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
434 +
435 +#ifdef DEBUG_SAVEVM_STATE
436 + int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
437 +#endif
438 + /* If a drive runs in an IOThread we can flush it async, and only
439 + * need to sync-flush whatever IO happens between now and
440 + * vm_stop_force_state. bdrv_next can only be called from main AioContext,
441 + * so move there now and after every flush.
442 + */
443 + aio_co_reschedule_self(qemu_get_aio_context());
444 + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
445 + /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
446 + if (bs == blk_bs(snap_state.target)) {
447 + continue;
448 + }
449 +
450 + AioContext *bs_ctx = bdrv_get_aio_context(bs);
451 + if (bs_ctx != qemu_get_aio_context()) {
452 + DPRINTF("savevm: async flushing drive %s\n", bs->filename);
453 + aio_co_reschedule_self(bs_ctx);
454 + bdrv_graph_co_rdlock();
455 + bdrv_flush(bs);
456 + bdrv_graph_co_rdunlock();
457 + aio_co_reschedule_self(qemu_get_aio_context());
458 + }
459 + }
460 +
461 + DPRINTF("timing: async flushing took %ld ms\n",
462 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
463 +
464 + qemu_bh_schedule(snap_state.finalize_bh);
465 +}
466 +
467 +void qmp_savevm_start(const char *statefile, Error **errp)
468 +{
469 + Error *local_err = NULL;
470 + MigrationState *ms = migrate_get_current();
471 + AioContext *iohandler_ctx = iohandler_get_aio_context();
472 +
473 + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
474 +
475 + if (snap_state.state != SAVE_STATE_DONE) {
476 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
477 + "VM snapshot already started\n");
478 + return;
479 + }
480 +
481 + if (migration_is_running(ms->state)) {
482 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
483 + return;
484 + }
485 +
486 + if (migrate_block()) {
487 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
488 + "Block migration and snapshots are incompatible");
489 + return;
490 + }
491 +
492 + /* initialize snapshot info */
493 + snap_state.saved_vm_running = runstate_is_running();
494 + snap_state.bs_pos = 0;
495 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
496 + snap_state.blocker = NULL;
497 + snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
498 +
499 + if (snap_state.error) {
500 + error_free(snap_state.error);
501 + snap_state.error = NULL;
502 + }
503 +
504 + if (!statefile) {
505 + vm_stop(RUN_STATE_SAVE_VM);
506 + snap_state.state = SAVE_STATE_COMPLETED;
507 + return;
508 + }
509 +
510 + if (qemu_savevm_state_blocked(errp)) {
511 + return;
512 + }
513 +
514 + /* Open the image */
515 + QDict *options = NULL;
516 + options = qdict_new();
517 + qdict_put_str(options, "driver", "raw");
518 + snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
519 + if (!snap_state.target) {
520 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
521 + goto restart;
522 + }
523 +
524 + QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
525 + &snap_state.bs_pos));
526 + snap_state.file = qemu_file_new_output(ioc);
527 +
528 + if (!snap_state.file) {
529 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
530 + goto restart;
531 + }
532 +
533 + /*
534 + * qemu_savevm_* paths use migration code and expect a migration state.
535 + * State is cleared in process_savevm_co, but has to be initialized
536 + * here (blocking main thread, from QMP) to avoid race conditions.
537 + */
538 + migrate_init(ms);
539 + memset(&mig_stats, 0, sizeof(mig_stats));
540 + memset(&compression_counters, 0, sizeof(compression_counters));
541 + ms->to_dst_file = snap_state.file;
542 +
543 + error_setg(&snap_state.blocker, "block device is in use by savevm");
544 + blk_op_block_all(snap_state.target, snap_state.blocker);
545 +
546 + snap_state.state = SAVE_STATE_ACTIVE;
547 + snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
548 + snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
549 + qemu_mutex_unlock_iothread();
550 + qemu_savevm_state_header(snap_state.file);
551 + qemu_savevm_state_setup(snap_state.file);
552 + qemu_mutex_lock_iothread();
553 +
554 + /* Async processing from here on out happens in iohandler context, so let
555 + * the target bdrv have its home there.
556 + */
557 + blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
558 +
559 + aio_co_schedule(iohandler_ctx, snap_state.co);
560 +
561 + return;
562 +
563 +restart:
564 +
565 + save_snapshot_error("setup failed");
566 +
567 + if (snap_state.saved_vm_running) {
568 + vm_start();
569 + snap_state.saved_vm_running = false;
570 + }
571 +}
572 +
573 +void coroutine_fn qmp_savevm_end(Error **errp)
574 +{
575 + int64_t timeout;
576 +
577 + if (snap_state.state == SAVE_STATE_DONE) {
578 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
579 + "VM snapshot not started\n");
580 + return;
581 + }
582 +
583 + if (snap_state.state == SAVE_STATE_ACTIVE) {
584 + snap_state.state = SAVE_STATE_CANCELLED;
585 + goto wait_for_close;
586 + }
587 +
588 + if (snap_state.saved_vm_running) {
589 + vm_start();
590 + snap_state.saved_vm_running = false;
591 + }
592 +
593 + snap_state.state = SAVE_STATE_DONE;
594 +
595 +wait_for_close:
596 + if (!snap_state.target) {
597 + DPRINTF("savevm-end: no target file open\n");
598 + return;
599 + }
600 +
601 + /* wait until cleanup is done before returning, this ensures that after this
602 + * call exits the statefile will be closed and can be removed immediately */
603 + DPRINTF("savevm-end: waiting for cleanup\n");
604 + timeout = 30L * 1000 * 1000 * 1000;
605 + qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
606 + QEMU_CLOCK_REALTIME, timeout);
607 + if (snap_state.target) {
608 + save_snapshot_error("timeout waiting for target file close in "
609 + "qmp_savevm_end");
610 + /* we cannot assume the snapshot finished in this case, so leave the
611 + * state alone - caller has to figure something out */
612 + return;
613 + }
614 +
615 + // File closed and no other error, so ensure next snapshot can be started.
616 + if (snap_state.state != SAVE_STATE_ERROR) {
617 + snap_state.state = SAVE_STATE_DONE;
618 + }
619 +
620 + DPRINTF("savevm-end: cleanup done\n");
621 +}
622 +
623 +int load_snapshot_from_blockdev(const char *filename, Error **errp)
624 +{
625 + BlockBackend *be;
626 + Error *local_err = NULL;
627 + Error *blocker = NULL;
628 +
629 + QEMUFile *f;
630 + size_t bs_pos = 0;
631 + int ret = -EINVAL;
632 +
633 + be = blk_new_open(filename, NULL, NULL, 0, &local_err);
634 +
635 + if (!be) {
636 + error_setg(errp, "Could not open VM state file");
637 + goto the_end;
638 + }
639 +
640 + error_setg(&blocker, "block device is in use by load state");
641 + blk_op_block_all(be, blocker);
642 +
643 + /* restore the VM state */
644 + f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
645 + if (!f) {
646 + error_setg(errp, "Could not open VM state file");
647 + goto the_end;
648 + }
649 +
650 + qemu_system_reset(SHUTDOWN_CAUSE_NONE);
651 + ret = qemu_loadvm_state(f);
652 +
653 + /* dirty bitmap migration has a special case we need to trigger manually */
654 + dirty_bitmap_mig_before_vm_start();
655 +
656 + qemu_fclose(f);
657 +
658 + /* state_destroy assumes a real migration which would have added a yank */
659 + yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
660 +
661 + migration_incoming_state_destroy();
662 + if (ret < 0) {
663 + error_setg_errno(errp, -ret, "Error while loading VM state");
664 + goto the_end;
665 + }
666 +
667 + ret = 0;
668 +
669 + the_end:
670 + if (be) {
671 + blk_op_unblock_all(be, blocker);
672 + error_free(blocker);
673 + blk_unref(be);
674 + }
675 + return ret;
676 +}
677 diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
678 index 6c559b48c8..91be698308 100644
679 --- a/monitor/hmp-cmds.c
680 +++ b/monitor/hmp-cmds.c
681 @@ -22,6 +22,7 @@
682 #include "monitor/monitor-internal.h"
683 #include "qapi/error.h"
684 #include "qapi/qapi-commands-control.h"
685 +#include "qapi/qapi-commands-migration.h"
686 #include "qapi/qapi-commands-misc.h"
687 #include "qapi/qmp/qdict.h"
688 #include "qapi/qmp/qerror.h"
689 @@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)
690
691 mtree_info(flatview, dispatch_tree, owner, disabled);
692 }
693 +
694 +void hmp_savevm_start(Monitor *mon, const QDict *qdict)
695 +{
696 + Error *errp = NULL;
697 + const char *statefile = qdict_get_try_str(qdict, "statefile");
698 +
699 + qmp_savevm_start(statefile, &errp);
700 + hmp_handle_error(mon, errp);
701 +}
702 +
703 +void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
704 +{
705 + Error *errp = NULL;
706 +
707 + qmp_savevm_end(&errp);
708 + hmp_handle_error(mon, errp);
709 +}
710 +
711 +void hmp_info_savevm(Monitor *mon, const QDict *qdict)
712 +{
713 + SaveVMInfo *info;
714 + info = qmp_query_savevm(NULL);
715 +
716 + if (info->status) {
717 + monitor_printf(mon, "savevm status: %s\n", info->status);
718 + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
719 + info->total_time);
720 + } else {
721 + monitor_printf(mon, "savevm status: not running\n");
722 + }
723 + if (info->has_bytes) {
724 + monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
725 + }
726 + if (info->error) {
727 + monitor_printf(mon, "Error: %s\n", info->error);
728 + }
729 +}
730 diff --git a/qapi/migration.json b/qapi/migration.json
731 index 8843e74b59..aca0ca1ac1 100644
732 --- a/qapi/migration.json
733 +++ b/qapi/migration.json
734 @@ -291,6 +291,40 @@
735 '*dirty-limit-throttle-time-per-round': 'uint64',
736 '*dirty-limit-ring-full-time': 'uint64'} }
737
738 +##
739 +# @SaveVMInfo:
740 +#
741 +# Information about current migration process.
742 +#
743 +# @status: string describing the current savevm status.
744 +# This can be 'active', 'completed', 'failed'.
745 +# If this field is not returned, no savevm process
746 +# has been initiated
747 +#
748 +# @error: string containing error message is status is failed.
749 +#
750 +# @total-time: total amount of milliseconds since savevm started.
751 +# If savevm has ended, it returns the total save time
752 +#
753 +# @bytes: total amount of data transfered
754 +#
755 +# Since: 1.3
756 +##
757 +{ 'struct': 'SaveVMInfo',
758 + 'data': {'*status': 'str', '*error': 'str',
759 + '*total-time': 'int', '*bytes': 'int'} }
760 +
761 +##
762 +# @query-savevm:
763 +#
764 +# Returns information about current savevm process.
765 +#
766 +# Returns: @SaveVMInfo
767 +#
768 +# Since: 1.3
769 +##
770 +{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
771 +
772 ##
773 # @query-migrate:
774 #
775 diff --git a/qapi/misc.json b/qapi/misc.json
776 index cda2effa81..94a58bb0bf 100644
777 --- a/qapi/misc.json
778 +++ b/qapi/misc.json
779 @@ -456,6 +456,22 @@
780 ##
781 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
782
783 +##
784 +# @savevm-start:
785 +#
786 +# Prepare for snapshot and halt VM. Save VM state to statefile.
787 +#
788 +##
789 +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
790 +
791 +##
792 +# @savevm-end:
793 +#
794 +# Resume VM after a snapshot.
795 +#
796 +##
797 +{ 'command': 'savevm-end', 'coroutine': true }
798 +
799 ##
800 # @CommandLineParameterType:
801 #
802 diff --git a/qemu-options.hx b/qemu-options.hx
803 index b56f6b2fb2..c8c78c92d4 100644
804 --- a/qemu-options.hx
805 +++ b/qemu-options.hx
806 @@ -4479,6 +4479,18 @@ SRST
807 Start right away with a saved state (``loadvm`` in monitor)
808 ERST
809
810 +DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
811 + "-loadstate file\n" \
812 + " start right away with a saved state\n",
813 + QEMU_ARCH_ALL)
814 +SRST
815 +``-loadstate file``
816 + Start right away with a saved state. This option does not rollback
817 + disk state like @code{loadvm}, so user must make sure that disk
818 + have correct state. @var{file} can be any valid device URL. See the section
819 + for "Device URL Syntax" for more information.
820 +ERST
821 +
822 #ifndef _WIN32
823 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
824 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
825 diff --git a/softmmu/vl.c b/softmmu/vl.c
826 index b0b96f67fa..f3251de3e7 100644
827 --- a/softmmu/vl.c
828 +++ b/softmmu/vl.c
829 @@ -164,6 +164,7 @@ static const char *accelerators;
830 static bool have_custom_ram_size;
831 static const char *ram_memdev_id;
832 static QDict *machine_opts_dict;
833 +static const char *loadstate;
834 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
835 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
836 static int display_remote;
837 @@ -2643,6 +2644,12 @@ void qmp_x_exit_preconfig(Error **errp)
838
839 if (loadvm) {
840 load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
841 + } else if (loadstate) {
842 + Error *local_err = NULL;
843 + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
844 + error_report_err(local_err);
845 + autostart = 0;
846 + }
847 }
848 if (replay_mode != REPLAY_MODE_NONE) {
849 replay_vmstate_init();
850 @@ -3190,6 +3197,9 @@ void qemu_init(int argc, char **argv)
851 case QEMU_OPTION_loadvm:
852 loadvm = optarg;
853 break;
854 + case QEMU_OPTION_loadstate:
855 + loadstate = optarg;
856 + break;
857 case QEMU_OPTION_full_screen:
858 dpy.has_full_screen = true;
859 dpy.full_screen = true;