]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0016-PVE-add-savevm-async-for-background-state-snapshots.patch
8b6ebcb4b66bb71ff8023a9ed130bed128e7761a
[pve-qemu.git] / debian / patches / pve / 0016-PVE-add-savevm-async-for-background-state-snapshots.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Dietmar Maurer <dietmar@proxmox.com>
3 Date: Mon, 6 Apr 2020 12:16:46 +0200
4 Subject: [PATCH] PVE: add savevm-async for background state snapshots
5
6 Put qemu_savevm_state_{header,setup} into the main loop and the rest
7 of the iteration into a coroutine. The former need to lock the
8 iothread (and we can't unlock it in the coroutine), and the latter
9 can't deal with being in a separate thread, so a coroutine it must
10 be.
11
12 Truncate output file at 1024 boundary.
13
14 Do not block the VM and save the state on aborting a snapshot, as the
15 snapshot will be invalid anyway.
16
17 Also, when aborting, wait for the target file to be closed, otherwise a
18 client might run into race-conditions when trying to remove the file
19 still opened by QEMU.
20
21 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
22 Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
23 Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
24 [improve aborting]
25 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
26 ---
27 hmp-commands-info.hx | 13 +
28 hmp-commands.hx | 33 ++
29 include/migration/snapshot.h | 2 +
30 include/monitor/hmp.h | 5 +
31 migration/meson.build | 1 +
32 migration/savevm-async.c | 598 +++++++++++++++++++++++++++++++++++
33 monitor/hmp-cmds.c | 57 ++++
34 qapi/migration.json | 34 ++
35 qapi/misc.json | 32 ++
36 qemu-options.hx | 12 +
37 softmmu/vl.c | 10 +
38 11 files changed, 797 insertions(+)
39 create mode 100644 migration/savevm-async.c
40
41 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
42 index 27206ac049..e6dd3be07a 100644
43 --- a/hmp-commands-info.hx
44 +++ b/hmp-commands-info.hx
45 @@ -551,6 +551,19 @@ SRST
46 Show current migration parameters.
47 ERST
48
49 + {
50 + .name = "savevm",
51 + .args_type = "",
52 + .params = "",
53 + .help = "show savevm status",
54 + .cmd = hmp_info_savevm,
55 + },
56 +
57 +SRST
58 + ``info savevm``
59 + Show savevm status.
60 +ERST
61 +
62 {
63 .name = "balloon",
64 .args_type = "",
65 diff --git a/hmp-commands.hx b/hmp-commands.hx
66 index d78e4cfc47..42203dbe92 100644
67 --- a/hmp-commands.hx
68 +++ b/hmp-commands.hx
69 @@ -1744,3 +1744,36 @@ ERST
70 .help = "start a round of guest dirty rate measurement",
71 .cmd = hmp_calc_dirty_rate,
72 },
73 +
74 + {
75 + .name = "savevm-start",
76 + .args_type = "statefile:s?",
77 + .params = "[statefile]",
78 + .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
79 + .cmd = hmp_savevm_start,
80 + },
81 +
82 + {
83 + .name = "snapshot-drive",
84 + .args_type = "device:s,name:s",
85 + .params = "device name",
86 + .help = "Create internal snapshot.",
87 + .cmd = hmp_snapshot_drive,
88 + },
89 +
90 + {
91 + .name = "delete-drive-snapshot",
92 + .args_type = "device:s,name:s",
93 + .params = "device name",
94 + .help = "Delete internal snapshot.",
95 + .cmd = hmp_delete_drive_snapshot,
96 + },
97 +
98 + {
99 + .name = "savevm-end",
100 + .args_type = "",
101 + .params = "",
102 + .help = "Resume VM after snaphot.",
103 + .cmd = hmp_savevm_end,
104 + .coroutine = true,
105 + },
106 diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
107 index e72083b117..c846d37806 100644
108 --- a/include/migration/snapshot.h
109 +++ b/include/migration/snapshot.h
110 @@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
111 bool has_devices, strList *devices,
112 Error **errp);
113
114 +int load_snapshot_from_blockdev(const char *filename, Error **errp);
115 +
116 #endif
117 diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
118 index 3baa1058e2..1247d7362a 100644
119 --- a/include/monitor/hmp.h
120 +++ b/include/monitor/hmp.h
121 @@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
122 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
123 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
124 void hmp_info_mice(Monitor *mon, const QDict *qdict);
125 +void hmp_info_savevm(Monitor *mon, const QDict *qdict);
126 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
127 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
128 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
129 @@ -79,6 +80,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
130 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
131 void hmp_getfd(Monitor *mon, const QDict *qdict);
132 void hmp_closefd(Monitor *mon, const QDict *qdict);
133 +void hmp_savevm_start(Monitor *mon, const QDict *qdict);
134 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
135 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
136 +void hmp_savevm_end(Monitor *mon, const QDict *qdict);
137 void hmp_sendkey(Monitor *mon, const QDict *qdict);
138 void hmp_screendump(Monitor *mon, const QDict *qdict);
139 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
140 diff --git a/migration/meson.build b/migration/meson.build
141 index f8714dcb15..ea9aedeefc 100644
142 --- a/migration/meson.build
143 +++ b/migration/meson.build
144 @@ -23,6 +23,7 @@ softmmu_ss.add(files(
145 'multifd-zlib.c',
146 'postcopy-ram.c',
147 'savevm.c',
148 + 'savevm-async.c',
149 'socket.c',
150 'tls.c',
151 ), gnutls)
152 diff --git a/migration/savevm-async.c b/migration/savevm-async.c
153 new file mode 100644
154 index 0000000000..79a0cda906
155 --- /dev/null
156 +++ b/migration/savevm-async.c
157 @@ -0,0 +1,598 @@
158 +#include "qemu/osdep.h"
159 +#include "migration/migration.h"
160 +#include "migration/savevm.h"
161 +#include "migration/snapshot.h"
162 +#include "migration/global_state.h"
163 +#include "migration/ram.h"
164 +#include "migration/qemu-file.h"
165 +#include "sysemu/sysemu.h"
166 +#include "sysemu/runstate.h"
167 +#include "block/block.h"
168 +#include "sysemu/block-backend.h"
169 +#include "qapi/error.h"
170 +#include "qapi/qmp/qerror.h"
171 +#include "qapi/qmp/qdict.h"
172 +#include "qapi/qapi-commands-migration.h"
173 +#include "qapi/qapi-commands-misc.h"
174 +#include "qapi/qapi-commands-block.h"
175 +#include "qemu/cutils.h"
176 +#include "qemu/timer.h"
177 +#include "qemu/main-loop.h"
178 +#include "qemu/rcu.h"
179 +
180 +/* #define DEBUG_SAVEVM_STATE */
181 +
182 +/* used while emulated sync operation in progress */
183 +#define NOT_DONE -EINPROGRESS
184 +
185 +#ifdef DEBUG_SAVEVM_STATE
186 +#define DPRINTF(fmt, ...) \
187 + do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
188 +#else
189 +#define DPRINTF(fmt, ...) \
190 + do { } while (0)
191 +#endif
192 +
193 +enum {
194 + SAVE_STATE_DONE,
195 + SAVE_STATE_ERROR,
196 + SAVE_STATE_ACTIVE,
197 + SAVE_STATE_COMPLETED,
198 + SAVE_STATE_CANCELLED
199 +};
200 +
201 +
202 +static struct SnapshotState {
203 + BlockBackend *target;
204 + size_t bs_pos;
205 + int state;
206 + Error *error;
207 + Error *blocker;
208 + int saved_vm_running;
209 + QEMUFile *file;
210 + int64_t total_time;
211 + QEMUBH *finalize_bh;
212 + Coroutine *co;
213 + QemuCoSleep *target_close_wait;
214 +} snap_state;
215 +
216 +static bool savevm_aborted(void)
217 +{
218 + return snap_state.state == SAVE_STATE_CANCELLED ||
219 + snap_state.state == SAVE_STATE_ERROR;
220 +}
221 +
222 +SaveVMInfo *qmp_query_savevm(Error **errp)
223 +{
224 + SaveVMInfo *info = g_malloc0(sizeof(*info));
225 + struct SnapshotState *s = &snap_state;
226 +
227 + if (s->state != SAVE_STATE_DONE) {
228 + info->has_bytes = true;
229 + info->bytes = s->bs_pos;
230 + switch (s->state) {
231 + case SAVE_STATE_ERROR:
232 + info->has_status = true;
233 + info->status = g_strdup("failed");
234 + info->has_total_time = true;
235 + info->total_time = s->total_time;
236 + if (s->error) {
237 + info->has_error = true;
238 + info->error = g_strdup(error_get_pretty(s->error));
239 + }
240 + break;
241 + case SAVE_STATE_ACTIVE:
242 + info->has_status = true;
243 + info->status = g_strdup("active");
244 + info->has_total_time = true;
245 + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
246 + - s->total_time;
247 + break;
248 + case SAVE_STATE_COMPLETED:
249 + info->has_status = true;
250 + info->status = g_strdup("completed");
251 + info->has_total_time = true;
252 + info->total_time = s->total_time;
253 + break;
254 + }
255 + }
256 +
257 + return info;
258 +}
259 +
260 +static int save_snapshot_cleanup(void)
261 +{
262 + int ret = 0;
263 +
264 + DPRINTF("save_snapshot_cleanup\n");
265 +
266 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
267 + snap_state.total_time;
268 +
269 + if (snap_state.file) {
270 + ret = qemu_fclose(snap_state.file);
271 + }
272 +
273 + if (snap_state.target) {
274 + if (!savevm_aborted()) {
275 + /* try to truncate, but ignore errors (will fail on block devices).
276 + * note1: bdrv_read() need whole blocks, so we need to round up
277 + * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
278 + */
279 + size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
280 + blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
281 + }
282 + blk_op_unblock_all(snap_state.target, snap_state.blocker);
283 + error_free(snap_state.blocker);
284 + snap_state.blocker = NULL;
285 + blk_unref(snap_state.target);
286 + snap_state.target = NULL;
287 +
288 + if (snap_state.target_close_wait) {
289 + qemu_co_sleep_wake(snap_state.target_close_wait);
290 + }
291 + }
292 +
293 + return ret;
294 +}
295 +
296 +static void save_snapshot_error(const char *fmt, ...)
297 +{
298 + va_list ap;
299 + char *msg;
300 +
301 + va_start(ap, fmt);
302 + msg = g_strdup_vprintf(fmt, ap);
303 + va_end(ap);
304 +
305 + DPRINTF("save_snapshot_error: %s\n", msg);
306 +
307 + if (!snap_state.error) {
308 + error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
309 + }
310 +
311 + g_free (msg);
312 +
313 + snap_state.state = SAVE_STATE_ERROR;
314 +}
315 +
316 +static int block_state_close(void *opaque, Error **errp)
317 +{
318 + snap_state.file = NULL;
319 + return blk_flush(snap_state.target);
320 +}
321 +
322 +typedef struct BlkRwCo {
323 + int64_t offset;
324 + QEMUIOVector *qiov;
325 + ssize_t ret;
326 +} BlkRwCo;
327 +
328 +static void coroutine_fn block_state_write_entry(void *opaque) {
329 + BlkRwCo *rwco = opaque;
330 + rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
331 + rwco->qiov, 0);
332 + aio_wait_kick();
333 +}
334 +
335 +static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
336 + int iovcnt, int64_t pos, Error **errp)
337 +{
338 + QEMUIOVector qiov;
339 + BlkRwCo rwco;
340 +
341 + assert(pos == snap_state.bs_pos);
342 + rwco = (BlkRwCo) {
343 + .offset = pos,
344 + .qiov = &qiov,
345 + .ret = NOT_DONE,
346 + };
347 +
348 + qemu_iovec_init_external(&qiov, iov, iovcnt);
349 +
350 + if (qemu_in_coroutine()) {
351 + block_state_write_entry(&rwco);
352 + } else {
353 + Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
354 + bdrv_coroutine_enter(blk_bs(snap_state.target), co);
355 + BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
356 + }
357 + if (rwco.ret < 0) {
358 + return rwco.ret;
359 + }
360 +
361 + snap_state.bs_pos += qiov.size;
362 + return qiov.size;
363 +}
364 +
365 +static const QEMUFileOps block_file_ops = {
366 + .writev_buffer = block_state_writev_buffer,
367 + .close = block_state_close,
368 +};
369 +
370 +static void process_savevm_finalize(void *opaque)
371 +{
372 + int ret;
373 + AioContext *iohandler_ctx = iohandler_get_aio_context();
374 + MigrationState *ms = migrate_get_current();
375 +
376 + bool aborted = savevm_aborted();
377 +
378 +#ifdef DEBUG_SAVEVM_STATE
379 + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
380 +#endif
381 +
382 + qemu_bh_delete(snap_state.finalize_bh);
383 + snap_state.finalize_bh = NULL;
384 + snap_state.co = NULL;
385 +
386 + /* We need to own the target bdrv's context for the following functions,
387 + * so move it back. It can stay in the main context and live out its live
388 + * there, since we're done with it after this method ends anyway.
389 + */
390 + aio_context_acquire(iohandler_ctx);
391 + blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
392 + aio_context_release(iohandler_ctx);
393 +
394 + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
395 + if (ret < 0) {
396 + save_snapshot_error("vm_stop_force_state error %d", ret);
397 + }
398 +
399 + if (!aborted) {
400 + /* skip state saving if we aborted, snapshot will be invalid anyway */
401 + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
402 + ret = qemu_file_get_error(snap_state.file);
403 + if (ret < 0) {
404 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
405 + }
406 + }
407 +
408 + DPRINTF("state saving complete\n");
409 + DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
410 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
411 +
412 + /* clear migration state */
413 + migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
414 + ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
415 + ms->to_dst_file = NULL;
416 +
417 + qemu_savevm_state_cleanup();
418 +
419 + ret = save_snapshot_cleanup();
420 + if (ret < 0) {
421 + save_snapshot_error("save_snapshot_cleanup error %d", ret);
422 + } else if (snap_state.state == SAVE_STATE_ACTIVE) {
423 + snap_state.state = SAVE_STATE_COMPLETED;
424 + } else if (aborted) {
425 + save_snapshot_error("process_savevm_cleanup: found aborted state: %d",
426 + snap_state.state);
427 + } else {
428 + save_snapshot_error("process_savevm_cleanup: invalid state: %d",
429 + snap_state.state);
430 + }
431 + if (snap_state.saved_vm_running) {
432 + vm_start();
433 + snap_state.saved_vm_running = false;
434 + }
435 +
436 + DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
437 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
438 +}
439 +
440 +static void coroutine_fn process_savevm_co(void *opaque)
441 +{
442 + int ret;
443 + int64_t maxlen;
444 + BdrvNextIterator it;
445 + BlockDriverState *bs = NULL;
446 +
447 +#ifdef DEBUG_SAVEVM_STATE
448 + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
449 +#endif
450 +
451 + ret = qemu_file_get_error(snap_state.file);
452 + if (ret < 0) {
453 + save_snapshot_error("qemu_savevm_state_setup failed");
454 + return;
455 + }
456 +
457 + while (snap_state.state == SAVE_STATE_ACTIVE) {
458 + uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
459 +
460 + /* pending is expected to be called without iothread lock */
461 + qemu_mutex_unlock_iothread();
462 + qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
463 + qemu_mutex_lock_iothread();
464 +
465 + pending_size = pend_precopy + pend_compatible + pend_postcopy;
466 +
467 + maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
468 +
469 + if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
470 + ret = qemu_savevm_state_iterate(snap_state.file, false);
471 + if (ret < 0) {
472 + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
473 + break;
474 + }
475 + DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
476 + } else {
477 + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
478 + ret = global_state_store();
479 + if (ret) {
480 + save_snapshot_error("global_state_store error %d", ret);
481 + break;
482 + }
483 +
484 + DPRINTF("savevm iterate complete\n");
485 + break;
486 + }
487 + }
488 +
489 + DPRINTF("timing: process_savevm_co took %ld ms\n",
490 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
491 +
492 +#ifdef DEBUG_SAVEVM_STATE
493 + int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
494 +#endif
495 + /* If a drive runs in an IOThread we can flush it async, and only
496 + * need to sync-flush whatever IO happens between now and
497 + * vm_stop_force_state. bdrv_next can only be called from main AioContext,
498 + * so move there now and after every flush.
499 + */
500 + aio_co_reschedule_self(qemu_get_aio_context());
501 + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
502 + /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
503 + if (bs == blk_bs(snap_state.target)) {
504 + continue;
505 + }
506 +
507 + AioContext *bs_ctx = bdrv_get_aio_context(bs);
508 + if (bs_ctx != qemu_get_aio_context()) {
509 + DPRINTF("savevm: async flushing drive %s\n", bs->filename);
510 + aio_co_reschedule_self(bs_ctx);
511 + bdrv_flush(bs);
512 + aio_co_reschedule_self(qemu_get_aio_context());
513 + }
514 + }
515 +
516 + DPRINTF("timing: async flushing took %ld ms\n",
517 + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
518 +
519 + qemu_bh_schedule(snap_state.finalize_bh);
520 +}
521 +
522 +void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
523 +{
524 + Error *local_err = NULL;
525 + MigrationState *ms = migrate_get_current();
526 + AioContext *iohandler_ctx = iohandler_get_aio_context();
527 +
528 + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
529 +
530 + if (snap_state.state != SAVE_STATE_DONE) {
531 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
532 + "VM snapshot already started\n");
533 + return;
534 + }
535 +
536 + if (migration_is_running(ms->state)) {
537 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
538 + return;
539 + }
540 +
541 + if (migrate_use_block()) {
542 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
543 + "Block migration and snapshots are incompatible");
544 + return;
545 + }
546 +
547 + /* initialize snapshot info */
548 + snap_state.saved_vm_running = runstate_is_running();
549 + snap_state.bs_pos = 0;
550 + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
551 + snap_state.blocker = NULL;
552 +
553 + if (snap_state.error) {
554 + error_free(snap_state.error);
555 + snap_state.error = NULL;
556 + }
557 +
558 + if (!has_statefile) {
559 + vm_stop(RUN_STATE_SAVE_VM);
560 + snap_state.state = SAVE_STATE_COMPLETED;
561 + return;
562 + }
563 +
564 + if (qemu_savevm_state_blocked(errp)) {
565 + return;
566 + }
567 +
568 + /* Open the image */
569 + QDict *options = NULL;
570 + options = qdict_new();
571 + qdict_put_str(options, "driver", "raw");
572 + snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
573 + if (!snap_state.target) {
574 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
575 + goto restart;
576 + }
577 +
578 + snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
579 +
580 + if (!snap_state.file) {
581 + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
582 + goto restart;
583 + }
584 +
585 + /*
586 + * qemu_savevm_* paths use migration code and expect a migration state.
587 + * State is cleared in process_savevm_co, but has to be initialized
588 + * here (blocking main thread, from QMP) to avoid race conditions.
589 + */
590 + migrate_init(ms);
591 + memset(&ram_counters, 0, sizeof(ram_counters));
592 + ms->to_dst_file = snap_state.file;
593 +
594 + error_setg(&snap_state.blocker, "block device is in use by savevm");
595 + blk_op_block_all(snap_state.target, snap_state.blocker);
596 +
597 + snap_state.state = SAVE_STATE_ACTIVE;
598 + snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
599 + snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
600 + qemu_mutex_unlock_iothread();
601 + qemu_savevm_state_header(snap_state.file);
602 + qemu_savevm_state_setup(snap_state.file);
603 + qemu_mutex_lock_iothread();
604 +
605 + /* Async processing from here on out happens in iohandler context, so let
606 + * the target bdrv have its home there.
607 + */
608 + blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
609 +
610 + aio_co_schedule(iohandler_ctx, snap_state.co);
611 +
612 + return;
613 +
614 +restart:
615 +
616 + save_snapshot_error("setup failed");
617 +
618 + if (snap_state.saved_vm_running) {
619 + vm_start();
620 + snap_state.saved_vm_running = false;
621 + }
622 +}
623 +
624 +void coroutine_fn qmp_savevm_end(Error **errp)
625 +{
626 + int64_t timeout;
627 +
628 + if (snap_state.state == SAVE_STATE_DONE) {
629 + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
630 + "VM snapshot not started\n");
631 + return;
632 + }
633 +
634 + if (snap_state.state == SAVE_STATE_ACTIVE) {
635 + snap_state.state = SAVE_STATE_CANCELLED;
636 + goto wait_for_close;
637 + }
638 +
639 + if (snap_state.saved_vm_running) {
640 + vm_start();
641 + snap_state.saved_vm_running = false;
642 + }
643 +
644 + snap_state.state = SAVE_STATE_DONE;
645 +
646 +wait_for_close:
647 + if (!snap_state.target) {
648 + DPRINTF("savevm-end: no target file open\n");
649 + return;
650 + }
651 +
652 + /* wait until cleanup is done before returning, this ensures that after this
653 + * call exits the statefile will be closed and can be removed immediately */
654 + DPRINTF("savevm-end: waiting for cleanup\n");
655 + timeout = 30L * 1000 * 1000 * 1000;
656 + qemu_co_sleep_ns_wakeable(snap_state.target_close_wait,
657 + QEMU_CLOCK_REALTIME, timeout);
658 + snap_state.target_close_wait = NULL;
659 + if (snap_state.target) {
660 + save_snapshot_error("timeout waiting for target file close in "
661 + "qmp_savevm_end");
662 + /* we cannot assume the snapshot finished in this case, so leave the
663 + * state alone - caller has to figure something out */
664 + return;
665 + }
666 +
667 + DPRINTF("savevm-end: cleanup done\n");
668 +}
669 +
670 +// FIXME: Deprecated
671 +void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
672 +{
673 + // Compatibility to older qemu-server.
674 + qmp_blockdev_snapshot_internal_sync(device, name, errp);
675 +}
676 +
677 +// FIXME: Deprecated
678 +void qmp_delete_drive_snapshot(const char *device, const char *name,
679 + Error **errp)
680 +{
681 + // Compatibility to older qemu-server.
682 + (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
683 + true, name, errp);
684 +}
685 +
686 +static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
687 + size_t size, Error **errp)
688 +{
689 + BlockBackend *be = opaque;
690 + int64_t maxlen = blk_getlength(be);
691 + if (pos > maxlen) {
692 + return -EIO;
693 + }
694 + if ((pos + size) > maxlen) {
695 + size = maxlen - pos - 1;
696 + }
697 + if (size == 0) {
698 + return 0;
699 + }
700 + return blk_pread(be, pos, buf, size);
701 +}
702 +
703 +static const QEMUFileOps loadstate_file_ops = {
704 + .get_buffer = loadstate_get_buffer,
705 +};
706 +
707 +int load_snapshot_from_blockdev(const char *filename, Error **errp)
708 +{
709 + BlockBackend *be;
710 + Error *local_err = NULL;
711 + Error *blocker = NULL;
712 +
713 + QEMUFile *f;
714 + int ret = -EINVAL;
715 +
716 + be = blk_new_open(filename, NULL, NULL, 0, &local_err);
717 +
718 + if (!be) {
719 + error_setg(errp, "Could not open VM state file");
720 + goto the_end;
721 + }
722 +
723 + error_setg(&blocker, "block device is in use by load state");
724 + blk_op_block_all(be, blocker);
725 +
726 + /* restore the VM state */
727 + f = qemu_fopen_ops(be, &loadstate_file_ops);
728 + if (!f) {
729 + error_setg(errp, "Could not open VM state file");
730 + goto the_end;
731 + }
732 +
733 + qemu_system_reset(SHUTDOWN_CAUSE_NONE);
734 + ret = qemu_loadvm_state(f);
735 +
736 + /* dirty bitmap migration has a special case we need to trigger manually */
737 + dirty_bitmap_mig_before_vm_start();
738 +
739 + qemu_fclose(f);
740 + migration_incoming_state_destroy();
741 + if (ret < 0) {
742 + error_setg_errno(errp, -ret, "Error while loading VM state");
743 + goto the_end;
744 + }
745 +
746 + ret = 0;
747 +
748 + the_end:
749 + if (be) {
750 + blk_op_unblock_all(be, blocker);
751 + error_free(blocker);
752 + blk_unref(be);
753 + }
754 + return ret;
755 +}
756 diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
757 index c8b97909e7..64a84cf4ee 100644
758 --- a/monitor/hmp-cmds.c
759 +++ b/monitor/hmp-cmds.c
760 @@ -1961,6 +1961,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
761 hmp_handle_error(mon, err);
762 }
763
764 +void hmp_savevm_start(Monitor *mon, const QDict *qdict)
765 +{
766 + Error *errp = NULL;
767 + const char *statefile = qdict_get_try_str(qdict, "statefile");
768 +
769 + qmp_savevm_start(statefile != NULL, statefile, &errp);
770 + hmp_handle_error(mon, errp);
771 +}
772 +
773 +void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
774 +{
775 + Error *errp = NULL;
776 + const char *name = qdict_get_str(qdict, "name");
777 + const char *device = qdict_get_str(qdict, "device");
778 +
779 + qmp_snapshot_drive(device, name, &errp);
780 + hmp_handle_error(mon, errp);
781 +}
782 +
783 +void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
784 +{
785 + Error *errp = NULL;
786 + const char *name = qdict_get_str(qdict, "name");
787 + const char *device = qdict_get_str(qdict, "device");
788 +
789 + qmp_delete_drive_snapshot(device, name, &errp);
790 + hmp_handle_error(mon, errp);
791 +}
792 +
793 +void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
794 +{
795 + Error *errp = NULL;
796 +
797 + qmp_savevm_end(&errp);
798 + hmp_handle_error(mon, errp);
799 +}
800 +
801 +void hmp_info_savevm(Monitor *mon, const QDict *qdict)
802 +{
803 + SaveVMInfo *info;
804 + info = qmp_query_savevm(NULL);
805 +
806 + if (info->has_status) {
807 + monitor_printf(mon, "savevm status: %s\n", info->status);
808 + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
809 + info->total_time);
810 + } else {
811 + monitor_printf(mon, "savevm status: not running\n");
812 + }
813 + if (info->has_bytes) {
814 + monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
815 + }
816 + if (info->has_error) {
817 + monitor_printf(mon, "Error: %s\n", info->error);
818 + }
819 +}
820 +
821 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
822 {
823 IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
824 diff --git a/qapi/migration.json b/qapi/migration.json
825 index 1124a2dda8..3d72b3e3f3 100644
826 --- a/qapi/migration.json
827 +++ b/qapi/migration.json
828 @@ -247,6 +247,40 @@
829 '*compression': 'CompressionStats',
830 '*socket-address': ['SocketAddress'] } }
831
832 +##
833 +# @SaveVMInfo:
834 +#
835 +# Information about current migration process.
836 +#
837 +# @status: string describing the current savevm status.
838 +# This can be 'active', 'completed', 'failed'.
839 +# If this field is not returned, no savevm process
840 +# has been initiated
841 +#
842 +# @error: string containing error message is status is failed.
843 +#
844 +# @total-time: total amount of milliseconds since savevm started.
845 +# If savevm has ended, it returns the total save time
846 +#
847 +# @bytes: total amount of data transfered
848 +#
849 +# Since: 1.3
850 +##
851 +{ 'struct': 'SaveVMInfo',
852 + 'data': {'*status': 'str', '*error': 'str',
853 + '*total-time': 'int', '*bytes': 'int'} }
854 +
855 +##
856 +# @query-savevm:
857 +#
858 +# Returns information about current savevm process.
859 +#
860 +# Returns: @SaveVMInfo
861 +#
862 +# Since: 1.3
863 +##
864 +{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
865 +
866 ##
867 # @query-migrate:
868 #
869 diff --git a/qapi/misc.json b/qapi/misc.json
870 index 5c2ca3b556..9bc14e1032 100644
871 --- a/qapi/misc.json
872 +++ b/qapi/misc.json
873 @@ -431,6 +431,38 @@
874 ##
875 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
876
877 +##
878 +# @savevm-start:
879 +#
880 +# Prepare for snapshot and halt VM. Save VM state to statefile.
881 +#
882 +##
883 +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
884 +
885 +##
886 +# @snapshot-drive:
887 +#
888 +# Create an internal drive snapshot.
889 +#
890 +##
891 +{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
892 +
893 +##
894 +# @delete-drive-snapshot:
895 +#
896 +# Delete a drive snapshot.
897 +#
898 +##
899 +{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
900 +
901 +##
902 +# @savevm-end:
903 +#
904 +# Resume VM after a snapshot.
905 +#
906 +##
907 +{ 'command': 'savevm-end', 'coroutine': true }
908 +
909 ##
910 # @CommandLineParameterType:
911 #
912 diff --git a/qemu-options.hx b/qemu-options.hx
913 index 83aa59a920..002ba697e9 100644
914 --- a/qemu-options.hx
915 +++ b/qemu-options.hx
916 @@ -4131,6 +4131,18 @@ SRST
917 Start right away with a saved state (``loadvm`` in monitor)
918 ERST
919
920 +DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
921 + "-loadstate file\n" \
922 + " start right away with a saved state\n",
923 + QEMU_ARCH_ALL)
924 +SRST
925 +``-loadstate file``
926 + Start right away with a saved state. This option does not rollback
927 + disk state like @code{loadvm}, so user must make sure that disk
928 + have correct state. @var{file} can be any valid device URL. See the section
929 + for "Device URL Syntax" for more information.
930 +ERST
931 +
932 #ifndef _WIN32
933 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
934 "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
935 diff --git a/softmmu/vl.c b/softmmu/vl.c
936 index 5ca11e7469..220c67cd32 100644
937 --- a/softmmu/vl.c
938 +++ b/softmmu/vl.c
939 @@ -150,6 +150,7 @@ static const char *incoming;
940 static const char *loadvm;
941 static const char *accelerators;
942 static QDict *machine_opts_dict;
943 +static const char *loadstate;
944 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
945 static ram_addr_t maxram_size;
946 static uint64_t ram_slots;
947 @@ -2700,6 +2701,12 @@ void qmp_x_exit_preconfig(Error **errp)
948 autostart = 0;
949 exit(1);
950 }
951 + } else if (loadstate) {
952 + Error *local_err = NULL;
953 + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
954 + error_report_err(local_err);
955 + autostart = 0;
956 + }
957 }
958 if (replay_mode != REPLAY_MODE_NONE) {
959 replay_vmstate_init();
960 @@ -3238,6 +3245,9 @@ void qemu_init(int argc, char **argv, char **envp)
961 case QEMU_OPTION_loadvm:
962 loadvm = optarg;
963 break;
964 + case QEMU_OPTION_loadstate:
965 + loadstate = optarg;
966 + break;
967 case QEMU_OPTION_full_screen:
968 dpy.has_full_screen = true;
969 dpy.full_screen = true;