]> git.proxmox.com Git - mirror_qemu.git/blame - migration/migration.c
postcopy: ram_enable_notify to switch on userfault
[mirror_qemu.git] / migration / migration.c
CommitLineData
5bb7910a
AL
1/*
2 * QEMU live migration
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
6b620ca3
PB
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
5bb7910a
AL
14 */
15
16#include "qemu-common.h"
d49b6836 17#include "qemu/error-report.h"
6a1751b7 18#include "qemu/main-loop.h"
caf71f86 19#include "migration/migration.h"
0d82d0e8 20#include "migration/qemu-file.h"
9c17d615 21#include "sysemu/sysemu.h"
737e150e 22#include "block/block.h"
cc7a8ea7 23#include "qapi/qmp/qerror.h"
1de7afc9 24#include "qemu/sockets.h"
ab28bd23 25#include "qemu/rcu.h"
caf71f86 26#include "migration/block.h"
e0b266f0 27#include "migration/postcopy-ram.h"
766bd176 28#include "qemu/thread.h"
791e7c82 29#include "qmp-commands.h"
c09e5bb1 30#include "trace.h"
df4b1024 31#include "qapi/util.h"
598cd2bd 32#include "qapi-event.h"
070afca2 33#include "qom/cpu.h"
065e2813 34
dc325627 35#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
5bb7910a 36
5b4e1eb7
JQ
37/* Amount of time to allocate to each "chunk" of bandwidth-throttled
38 * data. */
39#define BUFFER_DELAY 100
40#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
41
8706d2d5
LL
42/* Default compression thread count */
43#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
3fcb38c2
LL
44/* Default decompression thread count, usually decompression is at
45 * least 4 times as fast as compression.*/
46#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
8706d2d5
LL
47/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
48#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
1626fee3
JH
49/* Define default autoconverge cpu throttle migration parameters */
50#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20
51#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10
8706d2d5 52
17ad9b35
OW
53/* Migration XBZRLE default cache size */
54#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
55
99a0db9b
GH
56static NotifierList migration_state_notifiers =
57 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
58
adde220a
DDAG
59static bool deferred_incoming;
60
093e3c42
DDAG
61/*
62 * Current state of incoming postcopy; note this is not part of
63 * MigrationIncomingState since it's state is used during cleanup
64 * at the end as MIS is being freed.
65 */
66static PostcopyState incoming_postcopy_state;
67
17549e84
JQ
68/* When we add fault tolerance, we could have several
69 migrations at once. For now we don't need to add
70 dynamic creation of migration */
71
bca7856a 72/* For outgoing */
859bc756 73MigrationState *migrate_get_current(void)
17549e84
JQ
74{
75 static MigrationState current_migration = {
31194731 76 .state = MIGRATION_STATUS_NONE,
d0ae46c1 77 .bandwidth_limit = MAX_THROTTLE,
17ad9b35 78 .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
7e114f8c 79 .mbps = -1,
43c60a81
LL
80 .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] =
81 DEFAULT_MIGRATE_COMPRESS_LEVEL,
82 .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
83 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
84 .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
85 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
1626fee3
JH
86 .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
87 DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL,
88 .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
89 DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
17549e84
JQ
90 };
91
92 return &current_migration;
93}
94
bca7856a
DDAG
95/* For incoming */
96static MigrationIncomingState *mis_current;
97
98MigrationIncomingState *migration_incoming_get_current(void)
99{
100 return mis_current;
101}
102
103MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
104{
97f3ad35 105 mis_current = g_new0(MigrationIncomingState, 1);
42e2aa56 106 mis_current->from_src_file = f;
1a8f46f8 107 QLIST_INIT(&mis_current->loadvm_handlers);
6decec93 108 qemu_mutex_init(&mis_current->rp_mutex);
7b89bf27 109 qemu_event_init(&mis_current->main_thread_load_event, false);
bca7856a
DDAG
110
111 return mis_current;
112}
113
114void migration_incoming_state_destroy(void)
115{
7b89bf27 116 qemu_event_destroy(&mis_current->main_thread_load_event);
1a8f46f8 117 loadvm_free_handlers(mis_current);
bca7856a
DDAG
118 g_free(mis_current);
119 mis_current = NULL;
120}
121
df4b1024
JQ
122
123typedef struct {
13d16814 124 bool optional;
df4b1024
JQ
125 uint32_t size;
126 uint8_t runstate[100];
172c4356
JQ
127 RunState state;
128 bool received;
df4b1024
JQ
129} GlobalState;
130
131static GlobalState global_state;
132
560d027b 133int global_state_store(void)
df4b1024
JQ
134{
135 if (!runstate_store((char *)global_state.runstate,
136 sizeof(global_state.runstate))) {
137 error_report("runstate name too big: %s", global_state.runstate);
138 trace_migrate_state_too_big();
139 return -EINVAL;
140 }
141 return 0;
142}
143
c69adea4
AP
144void global_state_store_running(void)
145{
146 const char *state = RunState_lookup[RUN_STATE_RUNNING];
147 strncpy((char *)global_state.runstate,
148 state, sizeof(global_state.runstate));
149}
150
172c4356 151static bool global_state_received(void)
df4b1024 152{
172c4356
JQ
153 return global_state.received;
154}
155
156static RunState global_state_get_runstate(void)
157{
158 return global_state.state;
df4b1024
JQ
159}
160
13d16814
JQ
161void global_state_set_optional(void)
162{
163 global_state.optional = true;
164}
165
166static bool global_state_needed(void *opaque)
167{
168 GlobalState *s = opaque;
169 char *runstate = (char *)s->runstate;
170
171 /* If it is not optional, it is mandatory */
172
173 if (s->optional == false) {
174 return true;
175 }
176
177 /* If state is running or paused, it is not needed */
178
179 if (strcmp(runstate, "running") == 0 ||
180 strcmp(runstate, "paused") == 0) {
181 return false;
182 }
183
184 /* for any other state it is needed */
185 return true;
186}
187
df4b1024
JQ
188static int global_state_post_load(void *opaque, int version_id)
189{
190 GlobalState *s = opaque;
172c4356
JQ
191 Error *local_err = NULL;
192 int r;
df4b1024
JQ
193 char *runstate = (char *)s->runstate;
194
172c4356 195 s->received = true;
df4b1024
JQ
196 trace_migrate_global_state_post_load(runstate);
197
172c4356 198 r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
df4b1024
JQ
199 -1, &local_err);
200
172c4356
JQ
201 if (r == -1) {
202 if (local_err) {
203 error_report_err(local_err);
df4b1024 204 }
172c4356 205 return -EINVAL;
df4b1024 206 }
172c4356 207 s->state = r;
df4b1024 208
172c4356 209 return 0;
df4b1024
JQ
210}
211
212static void global_state_pre_save(void *opaque)
213{
214 GlobalState *s = opaque;
215
216 trace_migrate_global_state_pre_save((char *)s->runstate);
217 s->size = strlen((char *)s->runstate) + 1;
218}
219
220static const VMStateDescription vmstate_globalstate = {
221 .name = "globalstate",
222 .version_id = 1,
223 .minimum_version_id = 1,
224 .post_load = global_state_post_load,
225 .pre_save = global_state_pre_save,
13d16814 226 .needed = global_state_needed,
df4b1024
JQ
227 .fields = (VMStateField[]) {
228 VMSTATE_UINT32(size, GlobalState),
229 VMSTATE_BUFFER(runstate, GlobalState),
230 VMSTATE_END_OF_LIST()
231 },
232};
233
234void register_global_state(void)
235{
236 /* We would use it independently that we receive it */
237 strcpy((char *)&global_state.runstate, "");
172c4356 238 global_state.received = false;
df4b1024
JQ
239 vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
240}
241
b05dc723
JQ
242static void migrate_generate_event(int new_state)
243{
244 if (migrate_use_events()) {
245 qapi_event_send_migration(new_state, &error_abort);
b05dc723
JQ
246 }
247}
248
adde220a
DDAG
249/*
250 * Called on -incoming with a defer: uri.
251 * The migration can be started later after any parameters have been
252 * changed.
253 */
254static void deferred_incoming_migration(Error **errp)
255{
256 if (deferred_incoming) {
257 error_setg(errp, "Incoming migration already deferred");
258 }
259 deferred_incoming = true;
260}
261
43eaae28 262void qemu_start_incoming_migration(const char *uri, Error **errp)
5bb7910a 263{
34c9dd8e
AL
264 const char *p;
265
7cf1fe6d 266 qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
adde220a
DDAG
267 if (!strcmp(uri, "defer")) {
268 deferred_incoming_migration(errp);
269 } else if (strstart(uri, "tcp:", &p)) {
43eaae28 270 tcp_start_incoming_migration(p, errp);
2da776db 271#ifdef CONFIG_RDMA
adde220a 272 } else if (strstart(uri, "rdma:", &p)) {
2da776db
MH
273 rdma_start_incoming_migration(p, errp);
274#endif
065e2813 275#if !defined(WIN32)
adde220a 276 } else if (strstart(uri, "exec:", &p)) {
43eaae28 277 exec_start_incoming_migration(p, errp);
adde220a 278 } else if (strstart(uri, "unix:", &p)) {
43eaae28 279 unix_start_incoming_migration(p, errp);
adde220a 280 } else if (strstart(uri, "fd:", &p)) {
43eaae28 281 fd_start_incoming_migration(p, errp);
065e2813 282#endif
adde220a 283 } else {
312fd5f2 284 error_setg(errp, "unknown migration protocol: %s", uri);
8ca5e801 285 }
5bb7910a
AL
286}
287
82a4da79 288static void process_incoming_migration_co(void *opaque)
511c0231 289{
82a4da79 290 QEMUFile *f = opaque;
5a8a30db 291 Error *local_err = NULL;
1c12e1f5
PB
292 int ret;
293
bca7856a 294 migration_incoming_state_new(f);
093e3c42 295 postcopy_state_set(POSTCOPY_INCOMING_NONE);
7cf1fe6d 296 migrate_generate_event(MIGRATION_STATUS_ACTIVE);
1c12e1f5 297 ret = qemu_loadvm_state(f);
bca7856a 298
1c12e1f5 299 qemu_fclose(f);
905f26f2 300 free_xbzrle_decoded_buf();
bca7856a
DDAG
301 migration_incoming_state_destroy();
302
1c12e1f5 303 if (ret < 0) {
7cf1fe6d 304 migrate_generate_event(MIGRATION_STATUS_FAILED);
db80face 305 error_report("load of migration failed: %s", strerror(-ret));
3fcb38c2 306 migrate_decompress_threads_join();
4aead692 307 exit(EXIT_FAILURE);
511c0231 308 }
511c0231 309
0f15423c 310 /* Make sure all file formats flush their mutable metadata */
5a8a30db
KW
311 bdrv_invalidate_cache_all(&local_err);
312 if (local_err) {
ed1f3e00 313 migrate_generate_event(MIGRATION_STATUS_FAILED);
97baf9d9 314 error_report_err(local_err);
3fcb38c2 315 migrate_decompress_threads_join();
5a8a30db
KW
316 exit(EXIT_FAILURE);
317 }
0f15423c 318
92e37622
AS
319 /*
320 * This must happen after all error conditions are dealt with and
321 * we're sure the VM is going to be running on this host.
322 */
323 qemu_announce_self();
324
172c4356
JQ
325 /* If global state section was not received or we are in running
326 state, we need to obey autostart. Any other state is set with
327 runstate_set. */
df4b1024 328
172c4356
JQ
329 if (!global_state_received() ||
330 global_state_get_runstate() == RUN_STATE_RUNNING) {
df4b1024
JQ
331 if (autostart) {
332 vm_start();
333 } else {
334 runstate_set(RUN_STATE_PAUSED);
335 }
172c4356
JQ
336 } else {
337 runstate_set(global_state_get_runstate());
f5bbfba1 338 }
3fcb38c2 339 migrate_decompress_threads_join();
ed1f3e00
DDAG
340 /*
341 * This must happen after any state changes since as soon as an external
342 * observer sees this event they might start to prod at the VM assuming
343 * it's ready to use.
344 */
345 migrate_generate_event(MIGRATION_STATUS_COMPLETED);
511c0231
JQ
346}
347
82a4da79
PB
348void process_incoming_migration(QEMUFile *f)
349{
350 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
351 int fd = qemu_get_fd(f);
352
353 assert(fd != -1);
3fcb38c2 354 migrate_decompress_threads_create();
f9e8cacc 355 qemu_set_nonblock(fd);
82a4da79
PB
356 qemu_coroutine_enter(co, f);
357}
358
6decec93
DDAG
359/*
360 * Send a message on the return channel back to the source
361 * of the migration.
362 */
363void migrate_send_rp_message(MigrationIncomingState *mis,
364 enum mig_rp_message_type message_type,
365 uint16_t len, void *data)
366{
367 trace_migrate_send_rp_message((int)message_type, len);
368 qemu_mutex_lock(&mis->rp_mutex);
369 qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
370 qemu_put_be16(mis->to_src_file, len);
371 qemu_put_buffer(mis->to_src_file, data, len);
372 qemu_fflush(mis->to_src_file);
373 qemu_mutex_unlock(&mis->rp_mutex);
374}
375
376/*
377 * Send a 'SHUT' message on the return channel with the given value
378 * to indicate that we've finished with the RP. Non-0 value indicates
379 * error.
380 */
381void migrate_send_rp_shut(MigrationIncomingState *mis,
382 uint32_t value)
383{
384 uint32_t buf;
385
386 buf = cpu_to_be32(value);
387 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
388}
389
390/*
391 * Send a 'PONG' message on the return channel with the given value
392 * (normally in response to a 'PING')
393 */
394void migrate_send_rp_pong(MigrationIncomingState *mis,
395 uint32_t value)
396{
397 uint32_t buf;
398
399 buf = cpu_to_be32(value);
400 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
401}
402
a0a3fd60
GC
403/* amount of nanoseconds we are willing to wait for migration to be down.
404 * the choice of nanoseconds is because it is the maximum resolution that
405 * get_clock() can achieve. It is an internal measure. All user-visible
406 * units must be in seconds */
f7cd55a0 407static uint64_t max_downtime = 300000000;
a0a3fd60
GC
408
409uint64_t migrate_max_downtime(void)
410{
411 return max_downtime;
412}
413
bbf6da32
OW
414MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
415{
416 MigrationCapabilityStatusList *head = NULL;
417 MigrationCapabilityStatusList *caps;
418 MigrationState *s = migrate_get_current();
419 int i;
420
387eedeb 421 caps = NULL; /* silence compiler warning */
bbf6da32
OW
422 for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
423 if (head == NULL) {
424 head = g_malloc0(sizeof(*caps));
425 caps = head;
426 } else {
427 caps->next = g_malloc0(sizeof(*caps));
428 caps = caps->next;
429 }
430 caps->value =
431 g_malloc(sizeof(*caps->value));
432 caps->value->capability = i;
433 caps->value->state = s->enabled_capabilities[i];
434 }
435
436 return head;
437}
438
85de8323
LL
439MigrationParameters *qmp_query_migrate_parameters(Error **errp)
440{
441 MigrationParameters *params;
442 MigrationState *s = migrate_get_current();
443
444 params = g_malloc0(sizeof(*params));
445 params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
446 params->compress_threads =
447 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
448 params->decompress_threads =
449 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
1626fee3
JH
450 params->x_cpu_throttle_initial =
451 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
452 params->x_cpu_throttle_increment =
453 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
85de8323
LL
454
455 return params;
456}
457
f6844b99
DDAG
458/*
459 * Return true if we're already in the middle of a migration
460 * (i.e. any of the active or setup states)
461 */
462static bool migration_is_setup_or_active(int state)
463{
464 switch (state) {
465 case MIGRATION_STATUS_ACTIVE:
9ec055ae 466 case MIGRATION_STATUS_POSTCOPY_ACTIVE:
f6844b99
DDAG
467 case MIGRATION_STATUS_SETUP:
468 return true;
469
470 default:
471 return false;
472
473 }
474}
475
f36d55af
OW
476static void get_xbzrle_cache_stats(MigrationInfo *info)
477{
478 if (migrate_use_xbzrle()) {
479 info->has_xbzrle_cache = true;
480 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
481 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
482 info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
483 info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
484 info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
8bc39233 485 info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate();
f36d55af
OW
486 info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
487 }
488}
489
791e7c82 490MigrationInfo *qmp_query_migrate(Error **errp)
5bb7910a 491{
791e7c82 492 MigrationInfo *info = g_malloc0(sizeof(*info));
17549e84
JQ
493 MigrationState *s = migrate_get_current();
494
495 switch (s->state) {
31194731 496 case MIGRATION_STATUS_NONE:
17549e84
JQ
497 /* no migration has happened ever */
498 break;
31194731 499 case MIGRATION_STATUS_SETUP:
29ae8a41 500 info->has_status = true;
ed4fbd10 501 info->has_total_time = false;
29ae8a41 502 break;
31194731
HZ
503 case MIGRATION_STATUS_ACTIVE:
504 case MIGRATION_STATUS_CANCELLING:
791e7c82 505 info->has_status = true;
7aa939af 506 info->has_total_time = true;
bc72ad67 507 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
7aa939af 508 - s->total_time;
2c52ddf1
JQ
509 info->has_expected_downtime = true;
510 info->expected_downtime = s->expected_downtime;
ed4fbd10
MH
511 info->has_setup_time = true;
512 info->setup_time = s->setup_time;
17549e84 513
791e7c82
LC
514 info->has_ram = true;
515 info->ram = g_malloc0(sizeof(*info->ram));
516 info->ram->transferred = ram_bytes_transferred();
517 info->ram->remaining = ram_bytes_remaining();
518 info->ram->total = ram_bytes_total();
004d4c10 519 info->ram->duplicate = dup_mig_pages_transferred();
f1c72795 520 info->ram->skipped = skipped_mig_pages_transferred();
004d4c10
OW
521 info->ram->normal = norm_mig_pages_transferred();
522 info->ram->normal_bytes = norm_mig_bytes_transferred();
8d017193 523 info->ram->dirty_pages_rate = s->dirty_pages_rate;
7e114f8c 524 info->ram->mbps = s->mbps;
58570ed8 525 info->ram->dirty_sync_count = s->dirty_sync_count;
8d017193 526
17549e84 527 if (blk_mig_active()) {
791e7c82
LC
528 info->has_disk = true;
529 info->disk = g_malloc0(sizeof(*info->disk));
530 info->disk->transferred = blk_mig_bytes_transferred();
531 info->disk->remaining = blk_mig_bytes_remaining();
532 info->disk->total = blk_mig_bytes_total();
ff8d81d8 533 }
f36d55af 534
4782893e
JH
535 if (cpu_throttle_active()) {
536 info->has_x_cpu_throttle_percentage = true;
537 info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
538 }
539
9ec055ae
DDAG
540 get_xbzrle_cache_stats(info);
541 break;
542 case MIGRATION_STATUS_POSTCOPY_ACTIVE:
543 /* Mostly the same as active; TODO add some postcopy stats */
544 info->has_status = true;
545 info->has_total_time = true;
546 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
547 - s->total_time;
548 info->has_expected_downtime = true;
549 info->expected_downtime = s->expected_downtime;
550 info->has_setup_time = true;
551 info->setup_time = s->setup_time;
552
553 info->has_ram = true;
554 info->ram = g_malloc0(sizeof(*info->ram));
555 info->ram->transferred = ram_bytes_transferred();
556 info->ram->remaining = ram_bytes_remaining();
557 info->ram->total = ram_bytes_total();
558 info->ram->duplicate = dup_mig_pages_transferred();
559 info->ram->skipped = skipped_mig_pages_transferred();
560 info->ram->normal = norm_mig_pages_transferred();
561 info->ram->normal_bytes = norm_mig_bytes_transferred();
562 info->ram->dirty_pages_rate = s->dirty_pages_rate;
563 info->ram->mbps = s->mbps;
564
565 if (blk_mig_active()) {
566 info->has_disk = true;
567 info->disk = g_malloc0(sizeof(*info->disk));
568 info->disk->transferred = blk_mig_bytes_transferred();
569 info->disk->remaining = blk_mig_bytes_remaining();
570 info->disk->total = blk_mig_bytes_total();
571 }
572
f36d55af 573 get_xbzrle_cache_stats(info);
17549e84 574 break;
31194731 575 case MIGRATION_STATUS_COMPLETED:
f36d55af
OW
576 get_xbzrle_cache_stats(info);
577
791e7c82 578 info->has_status = true;
00c14997 579 info->has_total_time = true;
7aa939af 580 info->total_time = s->total_time;
9c5a9fcf
JQ
581 info->has_downtime = true;
582 info->downtime = s->downtime;
ed4fbd10
MH
583 info->has_setup_time = true;
584 info->setup_time = s->setup_time;
d5f8a570
JQ
585
586 info->has_ram = true;
587 info->ram = g_malloc0(sizeof(*info->ram));
588 info->ram->transferred = ram_bytes_transferred();
589 info->ram->remaining = 0;
590 info->ram->total = ram_bytes_total();
004d4c10 591 info->ram->duplicate = dup_mig_pages_transferred();
f1c72795 592 info->ram->skipped = skipped_mig_pages_transferred();
004d4c10
OW
593 info->ram->normal = norm_mig_pages_transferred();
594 info->ram->normal_bytes = norm_mig_bytes_transferred();
7e114f8c 595 info->ram->mbps = s->mbps;
58570ed8 596 info->ram->dirty_sync_count = s->dirty_sync_count;
17549e84 597 break;
31194731 598 case MIGRATION_STATUS_FAILED:
791e7c82 599 info->has_status = true;
17549e84 600 break;
31194731 601 case MIGRATION_STATUS_CANCELLED:
791e7c82 602 info->has_status = true;
17549e84 603 break;
5bb7910a 604 }
cde63fbe 605 info->status = s->state;
791e7c82
LC
606
607 return info;
5bb7910a
AL
608}
609
00458433
OW
610void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
611 Error **errp)
612{
613 MigrationState *s = migrate_get_current();
614 MigrationCapabilityStatusList *cap;
615
f6844b99 616 if (migration_is_setup_or_active(s->state)) {
c6bd8c70 617 error_setg(errp, QERR_MIGRATION_ACTIVE);
00458433
OW
618 return;
619 }
620
621 for (cap = params; cap; cap = cap->next) {
622 s->enabled_capabilities[cap->value->capability] = cap->value->state;
623 }
53dd370c
DDAG
624
625 if (migrate_postcopy_ram()) {
626 if (migrate_use_compression()) {
627 /* The decompression threads asynchronously write into RAM
628 * rather than use the atomic copies needed to avoid
629 * userfaulting. It should be possible to fix the decompression
630 * threads for compatibility in future.
631 */
632 error_report("Postcopy is not currently compatible with "
633 "compression");
634 s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] =
635 false;
636 }
637 }
00458433
OW
638}
639
85de8323
LL
640void qmp_migrate_set_parameters(bool has_compress_level,
641 int64_t compress_level,
642 bool has_compress_threads,
643 int64_t compress_threads,
644 bool has_decompress_threads,
1626fee3
JH
645 int64_t decompress_threads,
646 bool has_x_cpu_throttle_initial,
647 int64_t x_cpu_throttle_initial,
648 bool has_x_cpu_throttle_increment,
649 int64_t x_cpu_throttle_increment, Error **errp)
85de8323
LL
650{
651 MigrationState *s = migrate_get_current();
652
653 if (has_compress_level && (compress_level < 0 || compress_level > 9)) {
c6bd8c70
MA
654 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
655 "is invalid, it should be in the range of 0 to 9");
85de8323
LL
656 return;
657 }
658 if (has_compress_threads &&
659 (compress_threads < 1 || compress_threads > 255)) {
c6bd8c70
MA
660 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
661 "compress_threads",
662 "is invalid, it should be in the range of 1 to 255");
85de8323
LL
663 return;
664 }
665 if (has_decompress_threads &&
666 (decompress_threads < 1 || decompress_threads > 255)) {
c6bd8c70
MA
667 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
668 "decompress_threads",
669 "is invalid, it should be in the range of 1 to 255");
85de8323
LL
670 return;
671 }
1626fee3
JH
672 if (has_x_cpu_throttle_initial &&
673 (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) {
674 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
675 "x_cpu_throttle_initial",
676 "an integer in the range of 1 to 99");
677 }
678 if (has_x_cpu_throttle_increment &&
679 (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) {
680 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
681 "x_cpu_throttle_increment",
682 "an integer in the range of 1 to 99");
683 }
85de8323
LL
684
685 if (has_compress_level) {
686 s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
687 }
688 if (has_compress_threads) {
689 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads;
690 }
691 if (has_decompress_threads) {
692 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
693 decompress_threads;
694 }
1626fee3
JH
695 if (has_x_cpu_throttle_initial) {
696 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
697 x_cpu_throttle_initial;
698 }
699
700 if (has_x_cpu_throttle_increment) {
701 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
702 x_cpu_throttle_increment;
703 }
85de8323
LL
704}
705
4886a1bc
DDAG
706void qmp_migrate_start_postcopy(Error **errp)
707{
708 MigrationState *s = migrate_get_current();
709
710 if (!migrate_postcopy_ram()) {
711 error_setg(errp, "Enable postcopy with migration_set_capability before"
712 " the start of migration");
713 return;
714 }
715
716 if (s->state == MIGRATION_STATUS_NONE) {
717 error_setg(errp, "Postcopy must be started after migration has been"
718 " started");
719 return;
720 }
721 /*
722 * we don't error if migration has finished since that would be racy
723 * with issuing this command.
724 */
725 atomic_set(&s->start_postcopy, true);
726}
727
065e2813
AL
728/* shared migration helpers */
729
51cf4c1a
Z
730static void migrate_set_state(MigrationState *s, int old_state, int new_state)
731{
a5c17b5f 732 if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
4ba4bc5e 733 trace_migrate_set_state(new_state);
b05dc723 734 migrate_generate_event(new_state);
51cf4c1a
Z
735 }
736}
737
bb1fadc4 738static void migrate_fd_cleanup(void *opaque)
065e2813 739{
bb1fadc4
PB
740 MigrationState *s = opaque;
741
742 qemu_bh_delete(s->cleanup_bh);
743 s->cleanup_bh = NULL;
744
065e2813 745 if (s->file) {
9013dca5 746 trace_migrate_fd_cleanup();
404a7c05
PB
747 qemu_mutex_unlock_iothread();
748 qemu_thread_join(&s->thread);
749 qemu_mutex_lock_iothread();
750
8706d2d5 751 migrate_compress_threads_join();
6f190a06
PB
752 qemu_fclose(s->file);
753 s->file = NULL;
065e2813
AL
754 }
755
9ec055ae
DDAG
756 assert((s->state != MIGRATION_STATUS_ACTIVE) &&
757 (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
7a2c1721 758
94f5a437
LL
759 if (s->state == MIGRATION_STATUS_CANCELLING) {
760 migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
761 MIGRATION_STATUS_CANCELLED);
7a2c1721 762 }
a3fa1d78
PB
763
764 notifier_list_notify(&migration_state_notifiers, s);
065e2813
AL
765}
766
8b6b99b3 767void migrate_fd_error(MigrationState *s)
065e2813 768{
9013dca5 769 trace_migrate_fd_error();
bb1fadc4 770 assert(s->file == NULL);
7844337d 771 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
bb1fadc4 772 notifier_list_notify(&migration_state_notifiers, s);
458cf28e
JQ
773}
774
0edda1c4 775static void migrate_fd_cancel(MigrationState *s)
065e2813 776{
6f2b811a 777 int old_state ;
a26ba26e 778 QEMUFile *f = migrate_get_current()->file;
9013dca5 779 trace_migrate_fd_cancel();
065e2813 780
70b20477
DDAG
781 if (s->rp_state.from_dst_file) {
782 /* shutdown the rp socket, so causing the rp thread to shutdown */
783 qemu_file_shutdown(s->rp_state.from_dst_file);
784 }
785
6f2b811a
Z
786 do {
787 old_state = s->state;
f6844b99 788 if (!migration_is_setup_or_active(old_state)) {
6f2b811a
Z
789 break;
790 }
31194731
HZ
791 migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
792 } while (s->state != MIGRATION_STATUS_CANCELLING);
a26ba26e
DDAG
793
794 /*
795 * If we're unlucky the migration code might be stuck somewhere in a
796 * send/write while the network has failed and is waiting to timeout;
797 * if we've got shutdown(2) available then we can force it to quit.
798 * The outgoing qemu file gets closed in migrate_fd_cleanup that is
799 * called in a bh, so there is no race against this cancel.
800 */
31194731 801 if (s->state == MIGRATION_STATUS_CANCELLING && f) {
a26ba26e
DDAG
802 qemu_file_shutdown(f);
803 }
065e2813
AL
804}
805
99a0db9b
GH
806void add_migration_state_change_notifier(Notifier *notify)
807{
808 notifier_list_add(&migration_state_notifiers, notify);
809}
810
811void remove_migration_state_change_notifier(Notifier *notify)
812{
31552529 813 notifier_remove(notify);
99a0db9b
GH
814}
815
02edd2e7 816bool migration_in_setup(MigrationState *s)
afe2df69 817{
31194731 818 return s->state == MIGRATION_STATUS_SETUP;
afe2df69
GH
819}
820
7073693b 821bool migration_has_finished(MigrationState *s)
99a0db9b 822{
31194731 823 return s->state == MIGRATION_STATUS_COMPLETED;
99a0db9b 824}
0edda1c4 825
afe2df69
GH
826bool migration_has_failed(MigrationState *s)
827{
31194731
HZ
828 return (s->state == MIGRATION_STATUS_CANCELLED ||
829 s->state == MIGRATION_STATUS_FAILED);
afe2df69
GH
830}
831
9ec055ae
DDAG
832bool migration_in_postcopy(MigrationState *s)
833{
834 return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
835}
836
aefeb18b 837MigrationState *migrate_init(const MigrationParams *params)
0edda1c4 838{
17549e84 839 MigrationState *s = migrate_get_current();
d0ae46c1 840 int64_t bandwidth_limit = s->bandwidth_limit;
bbf6da32 841 bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
17ad9b35 842 int64_t xbzrle_cache_size = s->xbzrle_cache_size;
43c60a81
LL
843 int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
844 int compress_thread_count =
845 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
846 int decompress_thread_count =
847 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
1626fee3
JH
848 int x_cpu_throttle_initial =
849 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
850 int x_cpu_throttle_increment =
851 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
bbf6da32
OW
852
853 memcpy(enabled_capabilities, s->enabled_capabilities,
854 sizeof(enabled_capabilities));
0edda1c4 855
17549e84 856 memset(s, 0, sizeof(*s));
6607ae23 857 s->params = *params;
bbf6da32
OW
858 memcpy(s->enabled_capabilities, enabled_capabilities,
859 sizeof(enabled_capabilities));
17ad9b35 860 s->xbzrle_cache_size = xbzrle_cache_size;
1299c631 861
43c60a81
LL
862 s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
863 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
864 compress_thread_count;
865 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
866 decompress_thread_count;
1626fee3
JH
867 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
868 x_cpu_throttle_initial;
869 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
870 x_cpu_throttle_increment;
0edda1c4 871 s->bandwidth_limit = bandwidth_limit;
7844337d 872 migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
0edda1c4 873
bc72ad67 874 s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
0edda1c4
JQ
875 return s;
876}
cab30143 877
fa2756b7
AL
878static GSList *migration_blockers;
879
880void migrate_add_blocker(Error *reason)
881{
882 migration_blockers = g_slist_prepend(migration_blockers, reason);
883}
884
885void migrate_del_blocker(Error *reason)
886{
887 migration_blockers = g_slist_remove(migration_blockers, reason);
888}
889
bf1ae1f4
DDAG
890void qmp_migrate_incoming(const char *uri, Error **errp)
891{
892 Error *local_err = NULL;
4debb5f5 893 static bool once = true;
bf1ae1f4
DDAG
894
895 if (!deferred_incoming) {
4debb5f5 896 error_setg(errp, "For use with '-incoming defer'");
bf1ae1f4
DDAG
897 return;
898 }
4debb5f5
DDAG
899 if (!once) {
900 error_setg(errp, "The incoming migration has already been started");
901 }
bf1ae1f4
DDAG
902
903 qemu_start_incoming_migration(uri, &local_err);
904
905 if (local_err) {
906 error_propagate(errp, local_err);
907 return;
908 }
909
4debb5f5 910 once = false;
bf1ae1f4
DDAG
911}
912
e1c37d0e
LC
913void qmp_migrate(const char *uri, bool has_blk, bool blk,
914 bool has_inc, bool inc, bool has_detach, bool detach,
915 Error **errp)
cab30143 916{
be7059cd 917 Error *local_err = NULL;
17549e84 918 MigrationState *s = migrate_get_current();
6607ae23 919 MigrationParams params;
cab30143 920 const char *p;
cab30143 921
8c0426ae
PP
922 params.blk = has_blk && blk;
923 params.shared = has_inc && inc;
6607ae23 924
f6844b99 925 if (migration_is_setup_or_active(s->state) ||
31194731 926 s->state == MIGRATION_STATUS_CANCELLING) {
c6bd8c70 927 error_setg(errp, QERR_MIGRATION_ACTIVE);
e1c37d0e 928 return;
cab30143 929 }
ca99993a
DDAG
930 if (runstate_check(RUN_STATE_INMIGRATE)) {
931 error_setg(errp, "Guest is waiting for an incoming migration");
932 return;
933 }
934
e1c37d0e
LC
935 if (qemu_savevm_state_blocked(errp)) {
936 return;
cab30143
JQ
937 }
938
fa2756b7 939 if (migration_blockers) {
e1c37d0e
LC
940 *errp = error_copy(migration_blockers->data);
941 return;
fa2756b7
AL
942 }
943
656a2334
JQ
944 /* We are starting a new migration, so we want to start in a clean
945 state. This change is only needed if previous migration
946 failed/was cancelled. We don't use migrate_set_state() because
947 we are setting the initial state, not changing it. */
948 s->state = MIGRATION_STATUS_NONE;
949
6607ae23 950 s = migrate_init(&params);
cab30143
JQ
951
952 if (strstart(uri, "tcp:", &p)) {
f37afb5a 953 tcp_start_outgoing_migration(s, p, &local_err);
2da776db 954#ifdef CONFIG_RDMA
41310c68 955 } else if (strstart(uri, "rdma:", &p)) {
2da776db
MH
956 rdma_start_outgoing_migration(s, p, &local_err);
957#endif
cab30143
JQ
958#if !defined(WIN32)
959 } else if (strstart(uri, "exec:", &p)) {
f37afb5a 960 exec_start_outgoing_migration(s, p, &local_err);
cab30143 961 } else if (strstart(uri, "unix:", &p)) {
f37afb5a 962 unix_start_outgoing_migration(s, p, &local_err);
cab30143 963 } else if (strstart(uri, "fd:", &p)) {
f37afb5a 964 fd_start_outgoing_migration(s, p, &local_err);
cab30143 965#endif
99a0db9b 966 } else {
c6bd8c70
MA
967 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
968 "a valid migration protocol");
7844337d 969 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
e1c37d0e 970 return;
cab30143
JQ
971 }
972
f37afb5a 973 if (local_err) {
342ab8d1 974 migrate_fd_error(s);
f37afb5a 975 error_propagate(errp, local_err);
e1c37d0e 976 return;
1299c631 977 }
cab30143
JQ
978}
979
6cdedb07 980void qmp_migrate_cancel(Error **errp)
cab30143 981{
17549e84 982 migrate_fd_cancel(migrate_get_current());
cab30143
JQ
983}
984
9e1ba4cc
OW
985void qmp_migrate_set_cache_size(int64_t value, Error **errp)
986{
987 MigrationState *s = migrate_get_current();
c91e681a 988 int64_t new_size;
9e1ba4cc
OW
989
990 /* Check for truncation */
991 if (value != (size_t)value) {
c6bd8c70
MA
992 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
993 "exceeding address space");
9e1ba4cc
OW
994 return;
995 }
996
a5615b14
OW
997 /* Cache should not be larger than guest ram size */
998 if (value > ram_bytes_total()) {
c6bd8c70
MA
999 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1000 "exceeds guest ram size ");
a5615b14
OW
1001 return;
1002 }
1003
c91e681a
OW
1004 new_size = xbzrle_cache_resize(value);
1005 if (new_size < 0) {
c6bd8c70
MA
1006 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1007 "is smaller than page size");
c91e681a
OW
1008 return;
1009 }
1010
1011 s->xbzrle_cache_size = new_size;
9e1ba4cc
OW
1012}
1013
1014int64_t qmp_query_migrate_cache_size(Error **errp)
1015{
1016 return migrate_xbzrle_cache_size();
1017}
1018
3dc85383 1019void qmp_migrate_set_speed(int64_t value, Error **errp)
cab30143 1020{
cab30143
JQ
1021 MigrationState *s;
1022
3dc85383
LC
1023 if (value < 0) {
1024 value = 0;
99a0db9b 1025 }
442773ce
PB
1026 if (value > SIZE_MAX) {
1027 value = SIZE_MAX;
1028 }
cab30143 1029
17549e84 1030 s = migrate_get_current();
3dc85383 1031 s->bandwidth_limit = value;
442773ce
PB
1032 if (s->file) {
1033 qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO);
1034 }
cab30143
JQ
1035}
1036
4f0a993b 1037void qmp_migrate_set_downtime(double value, Error **errp)
cab30143 1038{
4f0a993b
LC
1039 value *= 1e9;
1040 value = MAX(0, MIN(UINT64_MAX, value));
1041 max_downtime = (uint64_t)value;
99a0db9b 1042}
17ad9b35 1043
53dd370c
DDAG
1044bool migrate_postcopy_ram(void)
1045{
1046 MigrationState *s;
1047
1048 s = migrate_get_current();
1049
1050 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM];
1051}
1052
bde1e2ec
CV
1053bool migrate_auto_converge(void)
1054{
1055 MigrationState *s;
1056
1057 s = migrate_get_current();
1058
1059 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
1060}
1061
323004a3
PL
1062bool migrate_zero_blocks(void)
1063{
1064 MigrationState *s;
1065
1066 s = migrate_get_current();
1067
1068 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
1069}
1070
8706d2d5
LL
1071bool migrate_use_compression(void)
1072{
dde4e694
LL
1073 MigrationState *s;
1074
1075 s = migrate_get_current();
1076
1077 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
8706d2d5
LL
1078}
1079
1080int migrate_compress_level(void)
1081{
1082 MigrationState *s;
1083
1084 s = migrate_get_current();
1085
43c60a81 1086 return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
8706d2d5
LL
1087}
1088
1089int migrate_compress_threads(void)
1090{
1091 MigrationState *s;
1092
1093 s = migrate_get_current();
1094
43c60a81 1095 return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
8706d2d5
LL
1096}
1097
3fcb38c2
LL
1098int migrate_decompress_threads(void)
1099{
1100 MigrationState *s;
1101
1102 s = migrate_get_current();
1103
43c60a81 1104 return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
3fcb38c2
LL
1105}
1106
b05dc723
JQ
1107bool migrate_use_events(void)
1108{
1109 MigrationState *s;
1110
1111 s = migrate_get_current();
1112
1113 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
1114}
1115
17ad9b35
OW
1116int migrate_use_xbzrle(void)
1117{
1118 MigrationState *s;
1119
1120 s = migrate_get_current();
1121
1122 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
1123}
1124
1125int64_t migrate_xbzrle_cache_size(void)
1126{
1127 MigrationState *s;
1128
1129 s = migrate_get_current();
1130
1131 return s->xbzrle_cache_size;
1132}
0d82d0e8 1133
70b20477
DDAG
1134/* migration thread support */
1135/*
1136 * Something bad happened to the RP stream, mark an error
1137 * The caller shall print or trace something to indicate why
1138 */
1139static void mark_source_rp_bad(MigrationState *s)
1140{
1141 s->rp_state.error = true;
1142}
1143
1144static struct rp_cmd_args {
1145 ssize_t len; /* -1 = variable */
1146 const char *name;
1147} rp_cmd_args[] = {
1148 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
1149 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
1150 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
1151 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
1152};
1153
1154/*
1155 * Handles messages sent on the return path towards the source VM
1156 *
1157 */
1158static void *source_return_path_thread(void *opaque)
1159{
1160 MigrationState *ms = opaque;
1161 QEMUFile *rp = ms->rp_state.from_dst_file;
1162 uint16_t header_len, header_type;
1163 const int max_len = 512;
1164 uint8_t buf[max_len];
1165 uint32_t tmp32, sibling_error;
1166 int res;
1167
1168 trace_source_return_path_thread_entry();
1169 while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
1170 migration_is_setup_or_active(ms->state)) {
1171 trace_source_return_path_thread_loop_top();
1172 header_type = qemu_get_be16(rp);
1173 header_len = qemu_get_be16(rp);
1174
1175 if (header_type >= MIG_RP_MSG_MAX ||
1176 header_type == MIG_RP_MSG_INVALID) {
1177 error_report("RP: Received invalid message 0x%04x length 0x%04x",
1178 header_type, header_len);
1179 mark_source_rp_bad(ms);
1180 goto out;
1181 }
1182
1183 if ((rp_cmd_args[header_type].len != -1 &&
1184 header_len != rp_cmd_args[header_type].len) ||
1185 header_len > max_len) {
1186 error_report("RP: Received '%s' message (0x%04x) with"
1187 "incorrect length %d expecting %zu",
1188 rp_cmd_args[header_type].name, header_type, header_len,
1189 (size_t)rp_cmd_args[header_type].len);
1190 mark_source_rp_bad(ms);
1191 goto out;
1192 }
1193
1194 /* We know we've got a valid header by this point */
1195 res = qemu_get_buffer(rp, buf, header_len);
1196 if (res != header_len) {
1197 error_report("RP: Failed reading data for message 0x%04x"
1198 " read %d expected %d",
1199 header_type, res, header_len);
1200 mark_source_rp_bad(ms);
1201 goto out;
1202 }
1203
1204 /* OK, we have the message and the data */
1205 switch (header_type) {
1206 case MIG_RP_MSG_SHUT:
1207 sibling_error = be32_to_cpup((uint32_t *)buf);
1208 trace_source_return_path_thread_shut(sibling_error);
1209 if (sibling_error) {
1210 error_report("RP: Sibling indicated error %d", sibling_error);
1211 mark_source_rp_bad(ms);
1212 }
1213 /*
1214 * We'll let the main thread deal with closing the RP
1215 * we could do a shutdown(2) on it, but we're the only user
1216 * anyway, so there's nothing gained.
1217 */
1218 goto out;
1219
1220 case MIG_RP_MSG_PONG:
1221 tmp32 = be32_to_cpup((uint32_t *)buf);
1222 trace_source_return_path_thread_pong(tmp32);
1223 break;
1224
1225 default:
1226 break;
1227 }
1228 }
1229 if (rp && qemu_file_get_error(rp)) {
1230 trace_source_return_path_thread_bad_end();
1231 mark_source_rp_bad(ms);
1232 }
1233
1234 trace_source_return_path_thread_end();
1235out:
1236 ms->rp_state.from_dst_file = NULL;
1237 qemu_fclose(rp);
1238 return NULL;
1239}
1240
1241__attribute__ (( unused )) /* Until later in patch series */
1242static int open_return_path_on_source(MigrationState *ms)
1243{
1244
1245 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->file);
1246 if (!ms->rp_state.from_dst_file) {
1247 return -1;
1248 }
1249
1250 trace_open_return_path_on_source();
1251 qemu_thread_create(&ms->rp_state.rp_thread, "return path",
1252 source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
1253
1254 trace_open_return_path_on_source_continue();
1255
1256 return 0;
1257}
1258
1259__attribute__ (( unused )) /* Until later in patch series */
1260/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
1261static int await_return_path_close_on_source(MigrationState *ms)
1262{
1263 /*
1264 * If this is a normal exit then the destination will send a SHUT and the
1265 * rp_thread will exit, however if there's an error we need to cause
1266 * it to exit.
1267 */
1268 if (qemu_file_get_error(ms->file) && ms->rp_state.from_dst_file) {
1269 /*
1270 * shutdown(2), if we have it, will cause it to unblock if it's stuck
1271 * waiting for the destination.
1272 */
1273 qemu_file_shutdown(ms->rp_state.from_dst_file);
1274 mark_source_rp_bad(ms);
1275 }
1276 trace_await_return_path_close_on_source_joining();
1277 qemu_thread_join(&ms->rp_state.rp_thread);
1278 trace_await_return_path_close_on_source_close();
1279 return ms->rp_state.error;
1280}
1281
09f6c85e
DDAG
1282/**
1283 * migration_completion: Used by migration_thread when there's not much left.
1284 * The caller 'breaks' the loop when this returns.
1285 *
1286 * @s: Current migration state
36f48567 1287 * @current_active_state: The migration state we expect to be in
09f6c85e
DDAG
1288 * @*old_vm_running: Pointer to old_vm_running flag
1289 * @*start_time: Pointer to time to update
1290 */
36f48567
DDAG
1291static void migration_completion(MigrationState *s, int current_active_state,
1292 bool *old_vm_running,
09f6c85e
DDAG
1293 int64_t *start_time)
1294{
1295 int ret;
1296
1297 qemu_mutex_lock_iothread();
1298 *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1299 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1300 *old_vm_running = runstate_is_running();
1301
1302 ret = global_state_store();
1303 if (!ret) {
1304 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1305 if (ret >= 0) {
1306 qemu_file_set_rate_limit(s->file, INT64_MAX);
a3e06c3d 1307 qemu_savevm_state_complete_precopy(s->file);
09f6c85e
DDAG
1308 }
1309 }
1310 qemu_mutex_unlock_iothread();
1311
1312 if (ret < 0) {
1313 goto fail;
1314 }
1315
1316 if (qemu_file_get_error(s->file)) {
1317 trace_migration_completion_file_err();
1318 goto fail;
1319 }
1320
36f48567 1321 migrate_set_state(s, current_active_state, MIGRATION_STATUS_COMPLETED);
09f6c85e
DDAG
1322 return;
1323
1324fail:
36f48567 1325 migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED);
09f6c85e
DDAG
1326}
1327
70b20477
DDAG
1328/*
1329 * Master migration thread on the source VM.
1330 * It drives the migration and pumps the data down the outgoing channel.
1331 */
5f496a1b 1332static void *migration_thread(void *opaque)
0d82d0e8 1333{
9848a404 1334 MigrationState *s = opaque;
bc72ad67
AB
1335 int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1336 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
be7172e2 1337 int64_t initial_bytes = 0;
0d82d0e8 1338 int64_t max_size = 0;
a3fa1d78 1339 int64_t start_time = initial_time;
94f5a437 1340 int64_t end_time;
a3fa1d78 1341 bool old_vm_running = false;
76f5933a 1342
ab28bd23
PB
1343 rcu_register_thread();
1344
f796baa1 1345 qemu_savevm_state_header(s->file);
dba433c0 1346 qemu_savevm_state_begin(s->file, &s->params);
0d82d0e8 1347
bc72ad67 1348 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
31194731 1349 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
29ae8a41 1350
9ec055ae
DDAG
1351 trace_migration_thread_setup_complete();
1352
1353 while (s->state == MIGRATION_STATUS_ACTIVE ||
1354 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
a3e879cd 1355 int64_t current_time;
c369f40d 1356 uint64_t pending_size;
0d82d0e8 1357
a0ff044b 1358 if (!qemu_file_rate_limit(s->file)) {
c31b098f
DDAG
1359 uint64_t pend_post, pend_nonpost;
1360
1361 qemu_savevm_state_pending(s->file, max_size, &pend_nonpost,
1362 &pend_post);
1363 pending_size = pend_nonpost + pend_post;
1364 trace_migrate_pending(pending_size, max_size,
1365 pend_post, pend_nonpost);
b22ff1fb 1366 if (pending_size && pending_size >= max_size) {
dba433c0 1367 qemu_savevm_state_iterate(s->file);
c369f40d 1368 } else {
09f6c85e 1369 trace_migration_thread_low_pending(pending_size);
36f48567
DDAG
1370 migration_completion(s, MIGRATION_STATUS_ACTIVE,
1371 &old_vm_running, &start_time);
09f6c85e 1372 break;
c369f40d
JQ
1373 }
1374 }
f4410a5d 1375
fd45ee2c 1376 if (qemu_file_get_error(s->file)) {
31194731
HZ
1377 migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
1378 MIGRATION_STATUS_FAILED);
fd45ee2c
PB
1379 break;
1380 }
bc72ad67 1381 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
0d82d0e8 1382 if (current_time >= initial_time + BUFFER_DELAY) {
be7172e2 1383 uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
77417f10 1384 uint64_t time_spent = current_time - initial_time;
0d82d0e8
JQ
1385 double bandwidth = transferred_bytes / time_spent;
1386 max_size = bandwidth * migrate_max_downtime() / 1000000;
1387
7e114f8c
MH
1388 s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
1389 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1;
1390
9013dca5
AK
1391 trace_migrate_transferred(transferred_bytes, time_spent,
1392 bandwidth, max_size);
90f8ae72
JQ
1393 /* if we haven't sent anything, we don't want to recalculate
1394 10000 is a small enough number for our purposes */
1395 if (s->dirty_bytes_rate && transferred_bytes > 10000) {
1396 s->expected_downtime = s->dirty_bytes_rate / bandwidth;
1397 }
0d82d0e8 1398
1964a397 1399 qemu_file_reset_rate_limit(s->file);
0d82d0e8 1400 initial_time = current_time;
be7172e2 1401 initial_bytes = qemu_ftell(s->file);
0d82d0e8 1402 }
a0ff044b 1403 if (qemu_file_rate_limit(s->file)) {
0d82d0e8
JQ
1404 /* usleep expects microseconds */
1405 g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
1406 }
a3fa1d78
PB
1407 }
1408
070afca2
JH
1409 /* If we enabled cpu throttling for auto-converge, turn it off. */
1410 cpu_throttle_stop();
94f5a437 1411 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
070afca2 1412
f4410a5d 1413 qemu_mutex_lock_iothread();
ea7415fa 1414 qemu_savevm_state_cleanup();
31194731 1415 if (s->state == MIGRATION_STATUS_COMPLETED) {
d6ed7312 1416 uint64_t transferred_bytes = qemu_ftell(s->file);
a3fa1d78
PB
1417 s->total_time = end_time - s->total_time;
1418 s->downtime = end_time - start_time;
d6ed7312
PL
1419 if (s->total_time) {
1420 s->mbps = (((double) transferred_bytes * 8.0) /
1421 ((double) s->total_time)) / 1000;
1422 }
a3fa1d78
PB
1423 runstate_set(RUN_STATE_POSTMIGRATE);
1424 } else {
1425 if (old_vm_running) {
a3fa1d78 1426 vm_start();
dba433c0 1427 }
0d82d0e8 1428 }
bb1fadc4 1429 qemu_bh_schedule(s->cleanup_bh);
dba433c0 1430 qemu_mutex_unlock_iothread();
f4410a5d 1431
ab28bd23 1432 rcu_unregister_thread();
0d82d0e8
JQ
1433 return NULL;
1434}
1435
9848a404 1436void migrate_fd_connect(MigrationState *s)
0d82d0e8 1437{
cc283e3b
JQ
1438 /* This is a best 1st approximation. ns to ms */
1439 s->expected_downtime = max_downtime/1000000;
bb1fadc4 1440 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
0d82d0e8 1441
442773ce
PB
1442 qemu_file_set_rate_limit(s->file,
1443 s->bandwidth_limit / XFER_LIMIT_RATIO);
1444
9287ac27
SH
1445 /* Notify before starting migration thread */
1446 notifier_list_notify(&migration_state_notifiers, s);
1447
8706d2d5 1448 migrate_compress_threads_create();
4900116e 1449 qemu_thread_create(&s->thread, "migration", migration_thread, s,
bb1fadc4 1450 QEMU_THREAD_JOINABLE);
0d82d0e8 1451}
093e3c42
DDAG
1452
1453PostcopyState postcopy_state_get(void)
1454{
1455 return atomic_mb_read(&incoming_postcopy_state);
1456}
1457
1458/* Set the state and return the old state */
1459PostcopyState postcopy_state_set(PostcopyState new_state)
1460{
1461 return atomic_xchg(&incoming_postcopy_state, new_state);
1462}
1463