]>
Commit | Line | Data |
---|---|---|
23102ed6 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
6402d961 | 2 | From: Dietmar Maurer <dietmar@proxmox.com> |
83faa3fe | 3 | Date: Mon, 6 Apr 2020 12:16:46 +0200 |
817b7667 | 4 | Subject: [PATCH] PVE: add savevm-async for background state snapshots |
95259824 | 5 | |
d7f4e01a TL |
6 | Put qemu_savevm_state_{header,setup} into the main loop and the rest |
7 | of the iteration into a coroutine. The former need to lock the | |
8 | iothread (and we can't unlock it in the coroutine), and the latter | |
9 | can't deal with being in a separate thread, so a coroutine it must | |
10 | be. | |
11 | ||
817b7667 SR |
12 | Truncate output file at 1024 boundary. |
13 | ||
14 | Do not block the VM and save the state on aborting a snapshot, as the | |
15 | snapshot will be invalid anyway. | |
16 | ||
17 | Also, when aborting, wait for the target file to be closed, otherwise a | |
18 | client might run into race-conditions when trying to remove the file | |
19 | still opened by QEMU. | |
20 | ||
b855dce7 | 21 | Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> |
6402d961 | 22 | Signed-off-by: Dietmar Maurer <dietmar@proxmox.com> |
d7f4e01a | 23 | Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com> |
db5d2a4b FE |
24 | [SR: improve aborting |
25 | register yank before migration_incoming_state_destroy] | |
817b7667 | 26 | Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> |
5b15e2ec | 27 | [FE: further improve aborting |
8051a24b | 28 | adapt to removal of QEMUFileOps |
bf251437 | 29 | improve condition for entering final stage |
f1eed34a | 30 | adapt to QAPI and other changes for 8.2] |
563c5928 | 31 | Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> |
95259824 | 32 | --- |
b855dce7 | 33 | hmp-commands-info.hx | 13 + |
99f9ce2c | 34 | hmp-commands.hx | 17 ++ |
8dca018b | 35 | include/migration/snapshot.h | 2 + |
99f9ce2c | 36 | include/monitor/hmp.h | 3 + |
817b7667 | 37 | migration/meson.build | 1 + |
4fbd50e2 | 38 | migration/savevm-async.c | 531 +++++++++++++++++++++++++++++++++++ |
99f9ce2c | 39 | monitor/hmp-cmds.c | 38 +++ |
5b15e2ec | 40 | qapi/migration.json | 34 +++ |
4fbd50e2 | 41 | qapi/misc.json | 18 ++ |
83faa3fe | 42 | qemu-options.hx | 12 + |
f1eed34a | 43 | system/vl.c | 10 + |
4fbd50e2 | 44 | 11 files changed, 679 insertions(+) |
817b7667 | 45 | create mode 100644 migration/savevm-async.c |
95259824 | 46 | |
95259824 | 47 | diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx |
4fbd50e2 | 48 | index ad1b1306e3..d5ab880492 100644 |
95259824 WB |
49 | --- a/hmp-commands-info.hx |
50 | +++ b/hmp-commands-info.hx | |
10e10933 | 51 | @@ -525,6 +525,19 @@ SRST |
8dca018b | 52 | Show current migration parameters. |
83faa3fe TL |
53 | ERST |
54 | ||
b855dce7 | 55 | + { |
95259824 WB |
56 | + .name = "savevm", |
57 | + .args_type = "", | |
58 | + .params = "", | |
59 | + .help = "show savevm status", | |
a544966d | 60 | + .cmd = hmp_info_savevm, |
95259824 WB |
61 | + }, |
62 | + | |
83faa3fe TL |
63 | +SRST |
64 | + ``info savevm`` | |
65 | + Show savevm status. | |
66 | +ERST | |
67 | + | |
b855dce7 | 68 | { |
83faa3fe TL |
69 | .name = "balloon", |
70 | .args_type = "", | |
95259824 | 71 | diff --git a/hmp-commands.hx b/hmp-commands.hx |
4fbd50e2 | 72 | index 2e2a3bcf98..7506de251c 100644 |
95259824 WB |
73 | --- a/hmp-commands.hx |
74 | +++ b/hmp-commands.hx | |
4fbd50e2 | 75 | @@ -1862,3 +1862,20 @@ SRST |
bf251437 | 76 | List event channels in the guest |
d03e1b3c FE |
77 | ERST |
78 | #endif | |
95259824 WB |
79 | + |
80 | + { | |
81 | + .name = "savevm-start", | |
82 | + .args_type = "statefile:s?", | |
83 | + .params = "[statefile]", | |
84 | + .help = "Prepare for snapshot and halt VM. Save VM state to statefile.", | |
a544966d | 85 | + .cmd = hmp_savevm_start, |
95259824 WB |
86 | + }, |
87 | + | |
88 | + { | |
95259824 WB |
89 | + .name = "savevm-end", |
90 | + .args_type = "", | |
91 | + .params = "", | |
92 | + .help = "Resume VM after snaphot.", | |
817b7667 SR |
93 | + .cmd = hmp_savevm_end, |
94 | + .coroutine = true, | |
95259824 | 95 | + }, |
be901f66 | 96 | diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h |
4fbd50e2 | 97 | index 9e4dcaaa75..2581730d74 100644 |
be901f66 SR |
98 | --- a/include/migration/snapshot.h |
99 | +++ b/include/migration/snapshot.h | |
4fbd50e2 FE |
100 | @@ -68,4 +68,6 @@ bool delete_snapshot(const char *name, |
101 | */ | |
102 | void load_snapshot_resume(RunState state); | |
be901f66 | 103 | |
be901f66 | 104 | +int load_snapshot_from_blockdev(const char *filename, Error **errp); |
8dca018b | 105 | + |
be901f66 SR |
106 | #endif |
107 | diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h | |
10e10933 | 108 | index 13f9a2dedb..7a7def7530 100644 |
be901f66 SR |
109 | --- a/include/monitor/hmp.h |
110 | +++ b/include/monitor/hmp.h | |
bf251437 | 111 | @@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict); |
be901f66 SR |
112 | void hmp_info_uuid(Monitor *mon, const QDict *qdict); |
113 | void hmp_info_chardev(Monitor *mon, const QDict *qdict); | |
114 | void hmp_info_mice(Monitor *mon, const QDict *qdict); | |
115 | +void hmp_info_savevm(Monitor *mon, const QDict *qdict); | |
116 | void hmp_info_migrate(Monitor *mon, const QDict *qdict); | |
117 | void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict); | |
118 | void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict); | |
99f9ce2c | 119 | @@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict); |
bf251437 FE |
120 | void hmp_mouse_move(Monitor *mon, const QDict *qdict); |
121 | void hmp_mouse_button(Monitor *mon, const QDict *qdict); | |
122 | void hmp_mouse_set(Monitor *mon, const QDict *qdict); | |
be901f66 | 123 | +void hmp_savevm_start(Monitor *mon, const QDict *qdict); |
be901f66 SR |
124 | +void hmp_savevm_end(Monitor *mon, const QDict *qdict); |
125 | void hmp_sendkey(Monitor *mon, const QDict *qdict); | |
d03e1b3c | 126 | void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict); |
83faa3fe | 127 | void hmp_chardev_add(Monitor *mon, const QDict *qdict); |
817b7667 | 128 | diff --git a/migration/meson.build b/migration/meson.build |
4fbd50e2 | 129 | index 95d1cf2250..800f12a60d 100644 |
817b7667 SR |
130 | --- a/migration/meson.build |
131 | +++ b/migration/meson.build | |
4fbd50e2 | 132 | @@ -28,6 +28,7 @@ system_ss.add(files( |
10e10933 | 133 | 'options.c', |
817b7667 SR |
134 | 'postcopy-ram.c', |
135 | 'savevm.c', | |
136 | + 'savevm-async.c', | |
137 | 'socket.c', | |
138 | 'tls.c', | |
bf251437 | 139 | 'threadinfo.c', |
817b7667 | 140 | diff --git a/migration/savevm-async.c b/migration/savevm-async.c |
95259824 | 141 | new file mode 100644 |
4fbd50e2 | 142 | index 0000000000..779e4e2a78 |
95259824 | 143 | --- /dev/null |
817b7667 | 144 | +++ b/migration/savevm-async.c |
4fbd50e2 | 145 | @@ -0,0 +1,531 @@ |
95259824 | 146 | +#include "qemu/osdep.h" |
5b15e2ec | 147 | +#include "migration/channel-savevm-async.h" |
6838f038 | 148 | +#include "migration/migration.h" |
10e10933 FE |
149 | +#include "migration/migration-stats.h" |
150 | +#include "migration/options.h" | |
6838f038 WB |
151 | +#include "migration/savevm.h" |
152 | +#include "migration/snapshot.h" | |
153 | +#include "migration/global_state.h" | |
154 | +#include "migration/ram.h" | |
155 | +#include "migration/qemu-file.h" | |
95259824 | 156 | +#include "sysemu/sysemu.h" |
6402d961 | 157 | +#include "sysemu/runstate.h" |
95259824 | 158 | +#include "block/block.h" |
95259824 | 159 | +#include "sysemu/block-backend.h" |
53e83913 WB |
160 | +#include "qapi/error.h" |
161 | +#include "qapi/qmp/qerror.h" | |
162 | +#include "qapi/qmp/qdict.h" | |
163 | +#include "qapi/qapi-commands-migration.h" | |
164 | +#include "qapi/qapi-commands-misc.h" | |
0775f12b | 165 | +#include "qapi/qapi-commands-block.h" |
95259824 | 166 | +#include "qemu/cutils.h" |
817b7667 | 167 | +#include "qemu/timer.h" |
6402d961 TL |
168 | +#include "qemu/main-loop.h" |
169 | +#include "qemu/rcu.h" | |
db5d2a4b | 170 | +#include "qemu/yank.h" |
95259824 WB |
171 | + |
172 | +/* #define DEBUG_SAVEVM_STATE */ | |
173 | + | |
174 | +#ifdef DEBUG_SAVEVM_STATE | |
175 | +#define DPRINTF(fmt, ...) \ | |
176 | + do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0) | |
177 | +#else | |
178 | +#define DPRINTF(fmt, ...) \ | |
179 | + do { } while (0) | |
180 | +#endif | |
181 | + | |
182 | +enum { | |
183 | + SAVE_STATE_DONE, | |
184 | + SAVE_STATE_ERROR, | |
185 | + SAVE_STATE_ACTIVE, | |
186 | + SAVE_STATE_COMPLETED, | |
187 | + SAVE_STATE_CANCELLED | |
188 | +}; | |
189 | + | |
190 | + | |
191 | +static struct SnapshotState { | |
67af0fa4 | 192 | + BlockBackend *target; |
95259824 WB |
193 | + size_t bs_pos; |
194 | + int state; | |
195 | + Error *error; | |
196 | + Error *blocker; | |
197 | + int saved_vm_running; | |
198 | + QEMUFile *file; | |
199 | + int64_t total_time; | |
d7f4e01a TL |
200 | + QEMUBH *finalize_bh; |
201 | + Coroutine *co; | |
563c5928 | 202 | + QemuCoSleep target_close_wait; |
95259824 WB |
203 | +} snap_state; |
204 | + | |
817b7667 SR |
205 | +static bool savevm_aborted(void) |
206 | +{ | |
207 | + return snap_state.state == SAVE_STATE_CANCELLED || | |
208 | + snap_state.state == SAVE_STATE_ERROR; | |
209 | +} | |
210 | + | |
95259824 WB |
211 | +SaveVMInfo *qmp_query_savevm(Error **errp) |
212 | +{ | |
213 | + SaveVMInfo *info = g_malloc0(sizeof(*info)); | |
214 | + struct SnapshotState *s = &snap_state; | |
215 | + | |
216 | + if (s->state != SAVE_STATE_DONE) { | |
217 | + info->has_bytes = true; | |
218 | + info->bytes = s->bs_pos; | |
219 | + switch (s->state) { | |
220 | + case SAVE_STATE_ERROR: | |
95259824 WB |
221 | + info->status = g_strdup("failed"); |
222 | + info->has_total_time = true; | |
223 | + info->total_time = s->total_time; | |
224 | + if (s->error) { | |
95259824 WB |
225 | + info->error = g_strdup(error_get_pretty(s->error)); |
226 | + } | |
227 | + break; | |
228 | + case SAVE_STATE_ACTIVE: | |
95259824 WB |
229 | + info->status = g_strdup("active"); |
230 | + info->has_total_time = true; | |
231 | + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) | |
232 | + - s->total_time; | |
233 | + break; | |
234 | + case SAVE_STATE_COMPLETED: | |
95259824 WB |
235 | + info->status = g_strdup("completed"); |
236 | + info->has_total_time = true; | |
237 | + info->total_time = s->total_time; | |
238 | + break; | |
239 | + } | |
240 | + } | |
241 | + | |
242 | + return info; | |
243 | +} | |
244 | + | |
245 | +static int save_snapshot_cleanup(void) | |
246 | +{ | |
247 | + int ret = 0; | |
248 | + | |
249 | + DPRINTF("save_snapshot_cleanup\n"); | |
250 | + | |
251 | + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - | |
252 | + snap_state.total_time; | |
253 | + | |
254 | + if (snap_state.file) { | |
255 | + ret = qemu_fclose(snap_state.file); | |
5b15e2ec | 256 | + snap_state.file = NULL; |
95259824 WB |
257 | + } |
258 | + | |
67af0fa4 | 259 | + if (snap_state.target) { |
817b7667 SR |
260 | + if (!savevm_aborted()) { |
261 | + /* try to truncate, but ignore errors (will fail on block devices). | |
262 | + * note1: bdrv_read() need whole blocks, so we need to round up | |
263 | + * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment | |
264 | + */ | |
265 | + size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2); | |
266 | + blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL); | |
267 | + } | |
67af0fa4 | 268 | + blk_op_unblock_all(snap_state.target, snap_state.blocker); |
95259824 WB |
269 | + error_free(snap_state.blocker); |
270 | + snap_state.blocker = NULL; | |
67af0fa4 WB |
271 | + blk_unref(snap_state.target); |
272 | + snap_state.target = NULL; | |
817b7667 | 273 | + |
563c5928 | 274 | + qemu_co_sleep_wake(&snap_state.target_close_wait); |
95259824 WB |
275 | + } |
276 | + | |
277 | + return ret; | |
278 | +} | |
279 | + | |
b8b4ce04 | 280 | +static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...) |
95259824 WB |
281 | +{ |
282 | + va_list ap; | |
283 | + char *msg; | |
284 | + | |
285 | + va_start(ap, fmt); | |
286 | + msg = g_strdup_vprintf(fmt, ap); | |
287 | + va_end(ap); | |
288 | + | |
289 | + DPRINTF("save_snapshot_error: %s\n", msg); | |
290 | + | |
291 | + if (!snap_state.error) { | |
292 | + error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg); | |
293 | + } | |
294 | + | |
295 | + g_free (msg); | |
296 | + | |
297 | + snap_state.state = SAVE_STATE_ERROR; | |
95259824 WB |
298 | +} |
299 | + | |
d7f4e01a | 300 | +static void process_savevm_finalize(void *opaque) |
0775f12b WB |
301 | +{ |
302 | + int ret; | |
d7f4e01a TL |
303 | + MigrationState *ms = migrate_get_current(); |
304 | + | |
817b7667 SR |
305 | + bool aborted = savevm_aborted(); |
306 | + | |
d7f4e01a TL |
307 | +#ifdef DEBUG_SAVEVM_STATE |
308 | + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); | |
309 | +#endif | |
310 | + | |
311 | + qemu_bh_delete(snap_state.finalize_bh); | |
312 | + snap_state.finalize_bh = NULL; | |
313 | + snap_state.co = NULL; | |
314 | + | |
315 | + /* We need to own the target bdrv's context for the following functions, | |
316 | + * so move it back. It can stay in the main context and live out its live | |
317 | + * there, since we're done with it after this method ends anyway. | |
318 | + */ | |
d7f4e01a | 319 | + blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL); |
d7f4e01a TL |
320 | + |
321 | + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); | |
322 | + if (ret < 0) { | |
323 | + save_snapshot_error("vm_stop_force_state error %d", ret); | |
324 | + } | |
325 | + | |
817b7667 SR |
326 | + if (!aborted) { |
327 | + /* skip state saving if we aborted, snapshot will be invalid anyway */ | |
328 | + (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false); | |
329 | + ret = qemu_file_get_error(snap_state.file); | |
330 | + if (ret < 0) { | |
a0208150 | 331 | + save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret); |
817b7667 | 332 | + } |
d7f4e01a TL |
333 | + } |
334 | + | |
335 | + DPRINTF("state saving complete\n"); | |
336 | + DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n", | |
337 | + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time); | |
338 | + | |
339 | + /* clear migration state */ | |
340 | + migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, | |
817b7667 | 341 | + ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED); |
d7f4e01a TL |
342 | + ms->to_dst_file = NULL; |
343 | + | |
344 | + qemu_savevm_state_cleanup(); | |
345 | + | |
0775f12b WB |
346 | + ret = save_snapshot_cleanup(); |
347 | + if (ret < 0) { | |
348 | + save_snapshot_error("save_snapshot_cleanup error %d", ret); | |
349 | + } else if (snap_state.state == SAVE_STATE_ACTIVE) { | |
350 | + snap_state.state = SAVE_STATE_COMPLETED; | |
817b7667 | 351 | + } else if (aborted) { |
1976ca46 FE |
352 | + /* |
353 | + * If there was an error, there's no need to set a new one here. | |
354 | + * If the snapshot was canceled, leave setting the state to | |
355 | + * qmp_savevm_end(), which is waked by save_snapshot_cleanup(). | |
356 | + */ | |
0775f12b WB |
357 | + } else { |
358 | + save_snapshot_error("process_savevm_cleanup: invalid state: %d", | |
359 | + snap_state.state); | |
95259824 | 360 | + } |
0775f12b WB |
361 | + if (snap_state.saved_vm_running) { |
362 | + vm_start(); | |
363 | + snap_state.saved_vm_running = false; | |
95259824 | 364 | + } |
d7f4e01a TL |
365 | + |
366 | + DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n", | |
367 | + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time); | |
95259824 WB |
368 | +} |
369 | + | |
d7f4e01a | 370 | +static void coroutine_fn process_savevm_co(void *opaque) |
95259824 WB |
371 | +{ |
372 | + int ret; | |
373 | + int64_t maxlen; | |
d7f4e01a TL |
374 | + BdrvNextIterator it; |
375 | + BlockDriverState *bs = NULL; | |
95259824 | 376 | + |
d7f4e01a TL |
377 | +#ifdef DEBUG_SAVEVM_STATE |
378 | + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); | |
379 | +#endif | |
95259824 | 380 | + |
6838f038 | 381 | + ret = qemu_file_get_error(snap_state.file); |
95259824 | 382 | + if (ret < 0) { |
6838f038 | 383 | + save_snapshot_error("qemu_savevm_state_setup failed"); |
d7f4e01a | 384 | + return; |
95259824 WB |
385 | + } |
386 | + | |
387 | + while (snap_state.state == SAVE_STATE_ACTIVE) { | |
bf251437 | 388 | + uint64_t pending_size, pend_precopy, pend_postcopy; |
db5d2a4b | 389 | + uint64_t threshold = 400 * 1000; |
95259824 | 390 | + |
db5d2a4b FE |
391 | + /* |
392 | + * pending_{estimate,exact} are expected to be called without iothread | |
393 | + * lock. Similar to what is done in migration.c, call the exact variant | |
394 | + * only once pend_precopy in the estimate is below the threshold. | |
395 | + */ | |
4fbd50e2 | 396 | + bql_unlock(); |
db5d2a4b FE |
397 | + qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy); |
398 | + if (pend_precopy <= threshold) { | |
399 | + qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy); | |
400 | + } | |
4fbd50e2 | 401 | + bql_lock(); |
bf251437 | 402 | + pending_size = pend_precopy + pend_postcopy; |
95259824 | 403 | + |
eee064d9 FE |
404 | + /* |
405 | + * A guest reaching this cutoff is dirtying lots of RAM. It should be | |
406 | + * large enough so that the guest can't dirty this much between the | |
407 | + * check and the guest actually being stopped, but it should be small | |
408 | + * enough to avoid long downtimes for non-hibernation snapshots. | |
409 | + */ | |
410 | + maxlen = blk_getlength(snap_state.target) - 100*1024*1024; | |
0775f12b | 411 | + |
8051a24b | 412 | + /* Note that there is no progress for pend_postcopy when iterating */ |
db5d2a4b | 413 | + if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) { |
0775f12b WB |
414 | + ret = qemu_savevm_state_iterate(snap_state.file, false); |
415 | + if (ret < 0) { | |
416 | + save_snapshot_error("qemu_savevm_state_iterate error %d", ret); | |
417 | + break; | |
418 | + } | |
d7f4e01a | 419 | + DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret); |
95259824 | 420 | + } else { |
b855dce7 | 421 | + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); |
10e10933 | 422 | + global_state_store(); |
d7f4e01a TL |
423 | + |
424 | + DPRINTF("savevm iterate complete\n"); | |
95259824 WB |
425 | + break; |
426 | + } | |
95259824 WB |
427 | + } |
428 | + | |
d7f4e01a TL |
429 | + DPRINTF("timing: process_savevm_co took %ld ms\n", |
430 | + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time); | |
431 | + | |
432 | +#ifdef DEBUG_SAVEVM_STATE | |
433 | + int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); | |
434 | +#endif | |
435 | + /* If a drive runs in an IOThread we can flush it async, and only | |
436 | + * need to sync-flush whatever IO happens between now and | |
437 | + * vm_stop_force_state. bdrv_next can only be called from main AioContext, | |
438 | + * so move there now and after every flush. | |
439 | + */ | |
440 | + aio_co_reschedule_self(qemu_get_aio_context()); | |
f1eed34a FE |
441 | + bdrv_graph_co_rdlock(); |
442 | + bs = bdrv_first(&it); | |
443 | + bdrv_graph_co_rdunlock(); | |
444 | + while (bs) { | |
d7f4e01a | 445 | + /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */ |
f1eed34a FE |
446 | + if (bs != blk_bs(snap_state.target)) { |
447 | + AioContext *bs_ctx = bdrv_get_aio_context(bs); | |
448 | + if (bs_ctx != qemu_get_aio_context()) { | |
449 | + DPRINTF("savevm: async flushing drive %s\n", bs->filename); | |
450 | + aio_co_reschedule_self(bs_ctx); | |
451 | + bdrv_graph_co_rdlock(); | |
452 | + bdrv_flush(bs); | |
453 | + bdrv_graph_co_rdunlock(); | |
454 | + aio_co_reschedule_self(qemu_get_aio_context()); | |
455 | + } | |
d7f4e01a | 456 | + } |
f1eed34a FE |
457 | + bdrv_graph_co_rdlock(); |
458 | + bs = bdrv_next(&it); | |
459 | + bdrv_graph_co_rdunlock(); | |
d7f4e01a TL |
460 | + } |
461 | + | |
462 | + DPRINTF("timing: async flushing took %ld ms\n", | |
463 | + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush); | |
95259824 | 464 | + |
d7f4e01a | 465 | + qemu_bh_schedule(snap_state.finalize_bh); |
95259824 WB |
466 | +} |
467 | + | |
bf251437 | 468 | +void qmp_savevm_start(const char *statefile, Error **errp) |
95259824 | 469 | +{ |
95259824 | 470 | + Error *local_err = NULL; |
d7f4e01a TL |
471 | + MigrationState *ms = migrate_get_current(); |
472 | + AioContext *iohandler_ctx = iohandler_get_aio_context(); | |
95259824 | 473 | + |
67af0fa4 | 474 | + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH; |
95259824 WB |
475 | + |
476 | + if (snap_state.state != SAVE_STATE_DONE) { | |
477 | + error_set(errp, ERROR_CLASS_GENERIC_ERROR, | |
478 | + "VM snapshot already started\n"); | |
479 | + return; | |
480 | + } | |
481 | + | |
4fbd50e2 | 482 | + if (migration_is_running()) { |
d7f4e01a TL |
483 | + error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE); |
484 | + return; | |
485 | + } | |
486 | + | |
10e10933 | 487 | + if (migrate_block()) { |
d7f4e01a TL |
488 | + error_set(errp, ERROR_CLASS_GENERIC_ERROR, |
489 | + "Block migration and snapshots are incompatible"); | |
490 | + return; | |
491 | + } | |
492 | + | |
95259824 WB |
493 | + /* initialize snapshot info */ |
494 | + snap_state.saved_vm_running = runstate_is_running(); | |
495 | + snap_state.bs_pos = 0; | |
496 | + snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); | |
497 | + snap_state.blocker = NULL; | |
a262e964 | 498 | + snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL }; |
95259824 WB |
499 | + |
500 | + if (snap_state.error) { | |
501 | + error_free(snap_state.error); | |
502 | + snap_state.error = NULL; | |
503 | + } | |
504 | + | |
bf251437 | 505 | + if (!statefile) { |
95259824 WB |
506 | + vm_stop(RUN_STATE_SAVE_VM); |
507 | + snap_state.state = SAVE_STATE_COMPLETED; | |
508 | + return; | |
509 | + } | |
510 | + | |
511 | + if (qemu_savevm_state_blocked(errp)) { | |
512 | + return; | |
513 | + } | |
514 | + | |
515 | + /* Open the image */ | |
95259824 WB |
516 | + QDict *options = NULL; |
517 | + options = qdict_new(); | |
53e83913 | 518 | + qdict_put_str(options, "driver", "raw"); |
67af0fa4 WB |
519 | + snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err); |
520 | + if (!snap_state.target) { | |
95259824 WB |
521 | + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile); |
522 | + goto restart; | |
523 | + } | |
524 | + | |
5b15e2ec FE |
525 | + QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target, |
526 | + &snap_state.bs_pos)); | |
527 | + snap_state.file = qemu_file_new_output(ioc); | |
95259824 WB |
528 | + |
529 | + if (!snap_state.file) { | |
530 | + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile); | |
531 | + goto restart; | |
532 | + } | |
533 | + | |
d7f4e01a TL |
534 | + /* |
535 | + * qemu_savevm_* paths use migration code and expect a migration state. | |
536 | + * State is cleared in process_savevm_co, but has to be initialized | |
537 | + * here (blocking main thread, from QMP) to avoid race conditions. | |
538 | + */ | |
f1eed34a FE |
539 | + if (migrate_init(ms, errp)) { |
540 | + return; | |
541 | + } | |
10e10933 | 542 | + memset(&mig_stats, 0, sizeof(mig_stats)); |
d7f4e01a | 543 | + ms->to_dst_file = snap_state.file; |
95259824 WB |
544 | + |
545 | + error_setg(&snap_state.blocker, "block device is in use by savevm"); | |
67af0fa4 | 546 | + blk_op_block_all(snap_state.target, snap_state.blocker); |
95259824 | 547 | + |
0775f12b | 548 | + snap_state.state = SAVE_STATE_ACTIVE; |
d7f4e01a TL |
549 | + snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state); |
550 | + snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL); | |
d7f4e01a TL |
551 | + qemu_savevm_state_header(snap_state.file); |
552 | + qemu_savevm_state_setup(snap_state.file); | |
d7f4e01a TL |
553 | + |
554 | + /* Async processing from here on out happens in iohandler context, so let | |
555 | + * the target bdrv have its home there. | |
556 | + */ | |
557 | + blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err); | |
558 | + | |
559 | + aio_co_schedule(iohandler_ctx, snap_state.co); | |
95259824 WB |
560 | + |
561 | + return; | |
562 | + | |
563 | +restart: | |
564 | + | |
565 | + save_snapshot_error("setup failed"); | |
566 | + | |
567 | + if (snap_state.saved_vm_running) { | |
568 | + vm_start(); | |
817b7667 | 569 | + snap_state.saved_vm_running = false; |
95259824 WB |
570 | + } |
571 | +} | |
572 | + | |
817b7667 | 573 | +void coroutine_fn qmp_savevm_end(Error **errp) |
95259824 | 574 | +{ |
817b7667 SR |
575 | + int64_t timeout; |
576 | + | |
95259824 WB |
577 | + if (snap_state.state == SAVE_STATE_DONE) { |
578 | + error_set(errp, ERROR_CLASS_GENERIC_ERROR, | |
579 | + "VM snapshot not started\n"); | |
580 | + return; | |
581 | + } | |
582 | + | |
583 | + if (snap_state.state == SAVE_STATE_ACTIVE) { | |
584 | + snap_state.state = SAVE_STATE_CANCELLED; | |
817b7667 | 585 | + goto wait_for_close; |
95259824 WB |
586 | + } |
587 | + | |
588 | + if (snap_state.saved_vm_running) { | |
589 | + vm_start(); | |
817b7667 | 590 | + snap_state.saved_vm_running = false; |
95259824 WB |
591 | + } |
592 | + | |
593 | + snap_state.state = SAVE_STATE_DONE; | |
817b7667 SR |
594 | + |
595 | +wait_for_close: | |
596 | + if (!snap_state.target) { | |
597 | + DPRINTF("savevm-end: no target file open\n"); | |
598 | + return; | |
599 | + } | |
600 | + | |
601 | + /* wait until cleanup is done before returning, this ensures that after this | |
602 | + * call exits the statefile will be closed and can be removed immediately */ | |
603 | + DPRINTF("savevm-end: waiting for cleanup\n"); | |
604 | + timeout = 30L * 1000 * 1000 * 1000; | |
563c5928 | 605 | + qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait, |
f376b2b9 | 606 | + QEMU_CLOCK_REALTIME, timeout); |
817b7667 SR |
607 | + if (snap_state.target) { |
608 | + save_snapshot_error("timeout waiting for target file close in " | |
609 | + "qmp_savevm_end"); | |
610 | + /* we cannot assume the snapshot finished in this case, so leave the | |
611 | + * state alone - caller has to figure something out */ | |
612 | + return; | |
613 | + } | |
614 | + | |
1976ca46 FE |
615 | + // File closed and no other error, so ensure next snapshot can be started. |
616 | + if (snap_state.state != SAVE_STATE_ERROR) { | |
617 | + snap_state.state = SAVE_STATE_DONE; | |
618 | + } | |
619 | + | |
817b7667 | 620 | + DPRINTF("savevm-end: cleanup done\n"); |
95259824 WB |
621 | +} |
622 | + | |
6838f038 | 623 | +int load_snapshot_from_blockdev(const char *filename, Error **errp) |
95259824 | 624 | +{ |
67af0fa4 | 625 | + BlockBackend *be; |
95259824 WB |
626 | + Error *local_err = NULL; |
627 | + Error *blocker = NULL; | |
628 | + | |
629 | + QEMUFile *f; | |
5b15e2ec | 630 | + size_t bs_pos = 0; |
67af0fa4 | 631 | + int ret = -EINVAL; |
95259824 | 632 | + |
67af0fa4 | 633 | + be = blk_new_open(filename, NULL, NULL, 0, &local_err); |
95259824 | 634 | + |
67af0fa4 | 635 | + if (!be) { |
6838f038 | 636 | + error_setg(errp, "Could not open VM state file"); |
95259824 WB |
637 | + goto the_end; |
638 | + } | |
639 | + | |
67af0fa4 WB |
640 | + error_setg(&blocker, "block device is in use by load state"); |
641 | + blk_op_block_all(be, blocker); | |
642 | + | |
95259824 | 643 | + /* restore the VM state */ |
5b15e2ec | 644 | + f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos))); |
95259824 | 645 | + if (!f) { |
6838f038 | 646 | + error_setg(errp, "Could not open VM state file"); |
95259824 WB |
647 | + goto the_end; |
648 | + } | |
649 | + | |
6838f038 | 650 | + qemu_system_reset(SHUTDOWN_CAUSE_NONE); |
95259824 WB |
651 | + ret = qemu_loadvm_state(f); |
652 | + | |
e9b36665 SR |
653 | + /* dirty bitmap migration has a special case we need to trigger manually */ |
654 | + dirty_bitmap_mig_before_vm_start(); | |
655 | + | |
95259824 | 656 | + qemu_fclose(f); |
db5d2a4b FE |
657 | + |
658 | + /* state_destroy assumes a real migration which would have added a yank */ | |
659 | + yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort); | |
660 | + | |
95259824 WB |
661 | + migration_incoming_state_destroy(); |
662 | + if (ret < 0) { | |
6838f038 | 663 | + error_setg_errno(errp, -ret, "Error while loading VM state"); |
95259824 WB |
664 | + goto the_end; |
665 | + } | |
666 | + | |
667 | + ret = 0; | |
668 | + | |
669 | + the_end: | |
67af0fa4 WB |
670 | + if (be) { |
671 | + blk_op_unblock_all(be, blocker); | |
95259824 | 672 | + error_free(blocker); |
67af0fa4 | 673 | + blk_unref(be); |
95259824 WB |
674 | + } |
675 | + return ret; | |
676 | +} | |
817b7667 | 677 | diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c |
f1eed34a | 678 | index 871898ac46..ef4634e5c1 100644 |
817b7667 SR |
679 | --- a/monitor/hmp-cmds.c |
680 | +++ b/monitor/hmp-cmds.c | |
bf251437 FE |
681 | @@ -22,6 +22,7 @@ |
682 | #include "monitor/monitor-internal.h" | |
683 | #include "qapi/error.h" | |
684 | #include "qapi/qapi-commands-control.h" | |
685 | +#include "qapi/qapi-commands-migration.h" | |
686 | #include "qapi/qapi-commands-misc.h" | |
687 | #include "qapi/qmp/qdict.h" | |
f1eed34a | 688 | #include "qemu/cutils.h" |
99f9ce2c | 689 | @@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict) |
817b7667 | 690 | |
bf251437 FE |
691 | mtree_info(flatview, dispatch_tree, owner, disabled); |
692 | } | |
693 | + | |
817b7667 SR |
694 | +void hmp_savevm_start(Monitor *mon, const QDict *qdict) |
695 | +{ | |
696 | + Error *errp = NULL; | |
697 | + const char *statefile = qdict_get_try_str(qdict, "statefile"); | |
698 | + | |
bf251437 | 699 | + qmp_savevm_start(statefile, &errp); |
817b7667 SR |
700 | + hmp_handle_error(mon, errp); |
701 | +} | |
702 | + | |
817b7667 SR |
703 | +void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict) |
704 | +{ | |
705 | + Error *errp = NULL; | |
706 | + | |
707 | + qmp_savevm_end(&errp); | |
708 | + hmp_handle_error(mon, errp); | |
709 | +} | |
710 | + | |
711 | +void hmp_info_savevm(Monitor *mon, const QDict *qdict) | |
712 | +{ | |
713 | + SaveVMInfo *info; | |
714 | + info = qmp_query_savevm(NULL); | |
715 | + | |
bf251437 | 716 | + if (info->status) { |
817b7667 SR |
717 | + monitor_printf(mon, "savevm status: %s\n", info->status); |
718 | + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n", | |
719 | + info->total_time); | |
720 | + } else { | |
721 | + monitor_printf(mon, "savevm status: not running\n"); | |
722 | + } | |
723 | + if (info->has_bytes) { | |
724 | + monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes); | |
725 | + } | |
bf251437 | 726 | + if (info->error) { |
817b7667 SR |
727 | + monitor_printf(mon, "Error: %s\n", info->error); |
728 | + } | |
729 | +} | |
817b7667 | 730 | diff --git a/qapi/migration.json b/qapi/migration.json |
4fbd50e2 | 731 | index 8c65b90328..ed20d066cd 100644 |
817b7667 SR |
732 | --- a/qapi/migration.json |
733 | +++ b/qapi/migration.json | |
4fbd50e2 | 734 | @@ -297,6 +297,40 @@ |
10e10933 FE |
735 | '*dirty-limit-throttle-time-per-round': 'uint64', |
736 | '*dirty-limit-ring-full-time': 'uint64'} } | |
817b7667 SR |
737 | |
738 | +## | |
739 | +# @SaveVMInfo: | |
740 | +# | |
741 | +# Information about current migration process. | |
742 | +# | |
743 | +# @status: string describing the current savevm status. | |
744 | +# This can be 'active', 'completed', 'failed'. | |
745 | +# If this field is not returned, no savevm process | |
746 | +# has been initiated | |
747 | +# | |
748 | +# @error: string containing error message is status is failed. | |
749 | +# | |
750 | +# @total-time: total amount of milliseconds since savevm started. | |
751 | +# If savevm has ended, it returns the total save time | |
752 | +# | |
753 | +# @bytes: total amount of data transfered | |
754 | +# | |
755 | +# Since: 1.3 | |
756 | +## | |
757 | +{ 'struct': 'SaveVMInfo', | |
758 | + 'data': {'*status': 'str', '*error': 'str', | |
759 | + '*total-time': 'int', '*bytes': 'int'} } | |
760 | + | |
761 | +## | |
762 | +# @query-savevm: | |
763 | +# | |
764 | +# Returns information about current savevm process. | |
765 | +# | |
766 | +# Returns: @SaveVMInfo | |
767 | +# | |
768 | +# Since: 1.3 | |
769 | +## | |
770 | +{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' } | |
771 | + | |
772 | ## | |
773 | # @query-migrate: | |
774 | # | |
775 | diff --git a/qapi/misc.json b/qapi/misc.json | |
4fbd50e2 | 776 | index ec30e5c570..7147199a12 100644 |
817b7667 SR |
777 | --- a/qapi/misc.json |
778 | +++ b/qapi/misc.json | |
4fbd50e2 | 779 | @@ -454,6 +454,24 @@ |
817b7667 SR |
780 | ## |
781 | { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] } | |
782 | ||
783 | +## | |
784 | +# @savevm-start: | |
785 | +# | |
786 | +# Prepare for snapshot and halt VM. Save VM state to statefile. | |
787 | +# | |
4fbd50e2 FE |
788 | +# @statefile: target file that state should be written to. |
789 | +# | |
817b7667 SR |
790 | +## |
791 | +{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } } | |
792 | + | |
793 | +## | |
817b7667 SR |
794 | +# @savevm-end: |
795 | +# | |
796 | +# Resume VM after a snapshot. | |
797 | +# | |
798 | +## | |
799 | +{ 'command': 'savevm-end', 'coroutine': true } | |
800 | + | |
801 | ## | |
802 | # @CommandLineParameterType: | |
803 | # | |
804 | diff --git a/qemu-options.hx b/qemu-options.hx | |
4fbd50e2 | 805 | index 8ce85d4559..511ab9415e 100644 |
817b7667 SR |
806 | --- a/qemu-options.hx |
807 | +++ b/qemu-options.hx | |
4fbd50e2 | 808 | @@ -4610,6 +4610,18 @@ SRST |
817b7667 SR |
809 | Start right away with a saved state (``loadvm`` in monitor) |
810 | ERST | |
811 | ||
812 | +DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \ | |
813 | + "-loadstate file\n" \ | |
814 | + " start right away with a saved state\n", | |
815 | + QEMU_ARCH_ALL) | |
816 | +SRST | |
817 | +``-loadstate file`` | |
818 | + Start right away with a saved state. This option does not rollback | |
819 | + disk state like @code{loadvm}, so user must make sure that disk | |
820 | + have correct state. @var{file} can be any valid device URL. See the section | |
821 | + for "Device URL Syntax" for more information. | |
822 | +ERST | |
823 | + | |
824 | #ifndef _WIN32 | |
825 | DEF("daemonize", 0, QEMU_OPTION_daemonize, \ | |
826 | "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL) | |
f1eed34a | 827 | diff --git a/system/vl.c b/system/vl.c |
4fbd50e2 | 828 | index c644222982..2738ab7c91 100644 |
f1eed34a FE |
829 | --- a/system/vl.c |
830 | +++ b/system/vl.c | |
831 | @@ -163,6 +163,7 @@ static const char *accelerators; | |
5b15e2ec FE |
832 | static bool have_custom_ram_size; |
833 | static const char *ram_memdev_id; | |
f376b2b9 | 834 | static QDict *machine_opts_dict; |
8dca018b SR |
835 | +static const char *loadstate; |
836 | static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts); | |
4567474e | 837 | static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts); |
5b15e2ec | 838 | static int display_remote; |
4fbd50e2 FE |
839 | @@ -2712,6 +2713,12 @@ void qmp_x_exit_preconfig(Error **errp) |
840 | RunState state = autostart ? RUN_STATE_RUNNING : runstate_get(); | |
4567474e | 841 | load_snapshot(loadvm, NULL, false, NULL, &error_fatal); |
4fbd50e2 | 842 | load_snapshot_resume(state); |
95259824 | 843 | + } else if (loadstate) { |
6838f038 WB |
844 | + Error *local_err = NULL; |
845 | + if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) { | |
846 | + error_report_err(local_err); | |
95259824 WB |
847 | + autostart = 0; |
848 | + } | |
849 | } | |
b855dce7 TL |
850 | if (replay_mode != REPLAY_MODE_NONE) { |
851 | replay_vmstate_init(); | |
4fbd50e2 | 852 | @@ -3259,6 +3266,9 @@ void qemu_init(int argc, char **argv) |
8dca018b SR |
853 | case QEMU_OPTION_loadvm: |
854 | loadvm = optarg; | |
855 | break; | |
856 | + case QEMU_OPTION_loadstate: | |
857 | + loadstate = optarg; | |
858 | + break; | |
859 | case QEMU_OPTION_full_screen: | |
860 | dpy.has_full_screen = true; | |
861 | dpy.full_screen = true; |