2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
3 * (a.k.a. Fault Tolerance or Continuous Replication)
5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
6 * Copyright (c) 2016 FUJITSU LIMITED
7 * Copyright (c) 2016 Intel Corporation
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/timer.h"
15 #include "sysemu/sysemu.h"
16 #include "migration/colo.h"
17 #include "io/channel-buffer.h"
19 #include "qemu/error-report.h"
20 #include "qapi/error.h"
21 #include "migration/failover.h"
23 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
25 bool colo_supported(void)
30 bool migration_in_colo_state(void)
32 MigrationState
*s
= migrate_get_current();
34 return (s
->state
== MIGRATION_STATUS_COLO
);
37 bool migration_incoming_in_colo_state(void)
39 MigrationIncomingState
*mis
= migration_incoming_get_current();
41 return mis
&& (mis
->state
== MIGRATION_STATUS_COLO
);
44 static bool colo_runstate_is_stopped(void)
46 return runstate_check(RUN_STATE_COLO
) || !runstate_is_running();
49 static void secondary_vm_do_failover(void)
52 MigrationIncomingState
*mis
= migration_incoming_get_current();
54 migrate_set_state(&mis
->state
, MIGRATION_STATUS_COLO
,
55 MIGRATION_STATUS_COMPLETED
);
58 error_report("\"-S\" qemu option will be ignored in secondary side");
59 /* recover runstate to normal migration finish state */
63 * Make sure COLO incoming thread not block in recv or send,
64 * If mis->from_src_file and mis->to_src_file use the same fd,
65 * The second shutdown() will return -1, we ignore this value,
68 if (mis
->from_src_file
) {
69 qemu_file_shutdown(mis
->from_src_file
);
71 if (mis
->to_src_file
) {
72 qemu_file_shutdown(mis
->to_src_file
);
75 old_state
= failover_set_state(FAILOVER_STATUS_ACTIVE
,
76 FAILOVER_STATUS_COMPLETED
);
77 if (old_state
!= FAILOVER_STATUS_ACTIVE
) {
78 error_report("Incorrect state (%s) while doing failover for "
79 "secondary VM", FailoverStatus_lookup
[old_state
]);
82 /* Notify COLO incoming thread that failover work is finished */
83 qemu_sem_post(&mis
->colo_incoming_sem
);
84 /* For Secondary VM, jump to incoming co */
85 if (mis
->migration_incoming_co
) {
86 qemu_coroutine_enter(mis
->migration_incoming_co
);
90 static void primary_vm_do_failover(void)
92 MigrationState
*s
= migrate_get_current();
95 migrate_set_state(&s
->state
, MIGRATION_STATUS_COLO
,
96 MIGRATION_STATUS_COMPLETED
);
99 * Wake up COLO thread which may blocked in recv() or send(),
100 * The s->rp_state.from_dst_file and s->to_dst_file may use the
101 * same fd, but we still shutdown the fd for twice, it is harmless.
103 if (s
->to_dst_file
) {
104 qemu_file_shutdown(s
->to_dst_file
);
106 if (s
->rp_state
.from_dst_file
) {
107 qemu_file_shutdown(s
->rp_state
.from_dst_file
);
110 old_state
= failover_set_state(FAILOVER_STATUS_ACTIVE
,
111 FAILOVER_STATUS_COMPLETED
);
112 if (old_state
!= FAILOVER_STATUS_ACTIVE
) {
113 error_report("Incorrect state (%s) while doing failover for Primary VM",
114 FailoverStatus_lookup
[old_state
]);
117 /* Notify COLO thread that failover work is finished */
118 qemu_sem_post(&s
->colo_exit_sem
);
121 void colo_do_failover(MigrationState
*s
)
123 /* Make sure VM stopped while failover happened. */
124 if (!colo_runstate_is_stopped()) {
125 vm_stop_force_state(RUN_STATE_COLO
);
128 if (get_colo_mode() == COLO_MODE_PRIMARY
) {
129 primary_vm_do_failover();
131 secondary_vm_do_failover();
135 static void colo_send_message(QEMUFile
*f
, COLOMessage msg
,
140 if (msg
>= COLO_MESSAGE__MAX
) {
141 error_setg(errp
, "%s: Invalid message", __func__
);
144 qemu_put_be32(f
, msg
);
147 ret
= qemu_file_get_error(f
);
149 error_setg_errno(errp
, -ret
, "Can't send COLO message");
151 trace_colo_send_message(COLOMessage_lookup
[msg
]);
154 static void colo_send_message_value(QEMUFile
*f
, COLOMessage msg
,
155 uint64_t value
, Error
**errp
)
157 Error
*local_err
= NULL
;
160 colo_send_message(f
, msg
, &local_err
);
162 error_propagate(errp
, local_err
);
165 qemu_put_be64(f
, value
);
168 ret
= qemu_file_get_error(f
);
170 error_setg_errno(errp
, -ret
, "Failed to send value for message:%s",
171 COLOMessage_lookup
[msg
]);
175 static COLOMessage
colo_receive_message(QEMUFile
*f
, Error
**errp
)
180 msg
= qemu_get_be32(f
);
181 ret
= qemu_file_get_error(f
);
183 error_setg_errno(errp
, -ret
, "Can't receive COLO message");
186 if (msg
>= COLO_MESSAGE__MAX
) {
187 error_setg(errp
, "%s: Invalid message", __func__
);
190 trace_colo_receive_message(COLOMessage_lookup
[msg
]);
194 static void colo_receive_check_message(QEMUFile
*f
, COLOMessage expect_msg
,
198 Error
*local_err
= NULL
;
200 msg
= colo_receive_message(f
, &local_err
);
202 error_propagate(errp
, local_err
);
205 if (msg
!= expect_msg
) {
206 error_setg(errp
, "Unexpected COLO message %d, expected %d",
211 static uint64_t colo_receive_message_value(QEMUFile
*f
, uint32_t expect_msg
,
214 Error
*local_err
= NULL
;
218 colo_receive_check_message(f
, expect_msg
, &local_err
);
220 error_propagate(errp
, local_err
);
224 value
= qemu_get_be64(f
);
225 ret
= qemu_file_get_error(f
);
227 error_setg_errno(errp
, -ret
, "Failed to get value for COLO message: %s",
228 COLOMessage_lookup
[expect_msg
]);
233 static int colo_do_checkpoint_transaction(MigrationState
*s
,
234 QIOChannelBuffer
*bioc
,
237 Error
*local_err
= NULL
;
240 colo_send_message(s
->to_dst_file
, COLO_MESSAGE_CHECKPOINT_REQUEST
,
246 colo_receive_check_message(s
->rp_state
.from_dst_file
,
247 COLO_MESSAGE_CHECKPOINT_REPLY
, &local_err
);
251 /* Reset channel-buffer directly */
252 qio_channel_io_seek(QIO_CHANNEL(bioc
), 0, 0, NULL
);
255 qemu_mutex_lock_iothread();
256 if (failover_get_state() != FAILOVER_STATUS_NONE
) {
257 qemu_mutex_unlock_iothread();
260 vm_stop_force_state(RUN_STATE_COLO
);
261 qemu_mutex_unlock_iothread();
262 trace_colo_vm_state_change("run", "stop");
264 * Failover request bh could be called after vm_stop_force_state(),
265 * So we need check failover_request_is_active() again.
267 if (failover_get_state() != FAILOVER_STATUS_NONE
) {
271 /* Disable block migration */
273 s
->params
.shared
= 0;
274 qemu_savevm_state_header(fb
);
275 qemu_savevm_state_begin(fb
, &s
->params
);
276 qemu_mutex_lock_iothread();
277 qemu_savevm_state_complete_precopy(fb
, false);
278 qemu_mutex_unlock_iothread();
282 colo_send_message(s
->to_dst_file
, COLO_MESSAGE_VMSTATE_SEND
, &local_err
);
287 * We need the size of the VMstate data in Secondary side,
288 * With which we can decide how much data should be read.
290 colo_send_message_value(s
->to_dst_file
, COLO_MESSAGE_VMSTATE_SIZE
,
291 bioc
->usage
, &local_err
);
296 qemu_put_buffer(s
->to_dst_file
, bioc
->data
, bioc
->usage
);
297 qemu_fflush(s
->to_dst_file
);
298 ret
= qemu_file_get_error(s
->to_dst_file
);
303 colo_receive_check_message(s
->rp_state
.from_dst_file
,
304 COLO_MESSAGE_VMSTATE_RECEIVED
, &local_err
);
309 colo_receive_check_message(s
->rp_state
.from_dst_file
,
310 COLO_MESSAGE_VMSTATE_LOADED
, &local_err
);
317 qemu_mutex_lock_iothread();
319 qemu_mutex_unlock_iothread();
320 trace_colo_vm_state_change("stop", "run");
324 error_report_err(local_err
);
329 static void colo_process_checkpoint(MigrationState
*s
)
331 QIOChannelBuffer
*bioc
;
333 int64_t current_time
= qemu_clock_get_ms(QEMU_CLOCK_HOST
);
334 Error
*local_err
= NULL
;
337 failover_init_state();
339 s
->rp_state
.from_dst_file
= qemu_file_get_return_path(s
->to_dst_file
);
340 if (!s
->rp_state
.from_dst_file
) {
341 error_report("Open QEMUFile from_dst_file failed");
346 * Wait for Secondary finish loading VM states and enter COLO
349 colo_receive_check_message(s
->rp_state
.from_dst_file
,
350 COLO_MESSAGE_CHECKPOINT_READY
, &local_err
);
354 bioc
= qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE
);
355 fb
= qemu_fopen_channel_output(QIO_CHANNEL(bioc
));
356 object_unref(OBJECT(bioc
));
358 qemu_mutex_lock_iothread();
360 qemu_mutex_unlock_iothread();
361 trace_colo_vm_state_change("stop", "run");
363 timer_mod(s
->colo_delay_timer
,
364 current_time
+ s
->parameters
.x_checkpoint_delay
);
366 while (s
->state
== MIGRATION_STATUS_COLO
) {
367 if (failover_get_state() != FAILOVER_STATUS_NONE
) {
368 error_report("failover request");
372 qemu_sem_wait(&s
->colo_checkpoint_sem
);
374 ret
= colo_do_checkpoint_transaction(s
, bioc
, fb
);
381 /* Throw the unreported error message after exited from loop */
383 error_report_err(local_err
);
390 timer_del(s
->colo_delay_timer
);
392 /* Hope this not to be too long to wait here */
393 qemu_sem_wait(&s
->colo_exit_sem
);
394 qemu_sem_destroy(&s
->colo_exit_sem
);
396 * Must be called after failover BH is completed,
397 * Or the failover BH may shutdown the wrong fd that
398 * re-used by other threads after we release here.
400 if (s
->rp_state
.from_dst_file
) {
401 qemu_fclose(s
->rp_state
.from_dst_file
);
405 void colo_checkpoint_notify(void *opaque
)
407 MigrationState
*s
= opaque
;
408 int64_t next_notify_time
;
410 qemu_sem_post(&s
->colo_checkpoint_sem
);
411 s
->colo_checkpoint_time
= qemu_clock_get_ms(QEMU_CLOCK_HOST
);
412 next_notify_time
= s
->colo_checkpoint_time
+
413 s
->parameters
.x_checkpoint_delay
;
414 timer_mod(s
->colo_delay_timer
, next_notify_time
);
417 void migrate_start_colo_process(MigrationState
*s
)
419 qemu_mutex_unlock_iothread();
420 qemu_sem_init(&s
->colo_checkpoint_sem
, 0);
421 s
->colo_delay_timer
= timer_new_ms(QEMU_CLOCK_HOST
,
422 colo_checkpoint_notify
, s
);
424 qemu_sem_init(&s
->colo_exit_sem
, 0);
425 migrate_set_state(&s
->state
, MIGRATION_STATUS_ACTIVE
,
426 MIGRATION_STATUS_COLO
);
427 colo_process_checkpoint(s
);
428 qemu_mutex_lock_iothread();
431 static void colo_wait_handle_message(QEMUFile
*f
, int *checkpoint_request
,
435 Error
*local_err
= NULL
;
437 msg
= colo_receive_message(f
, &local_err
);
439 error_propagate(errp
, local_err
);
444 case COLO_MESSAGE_CHECKPOINT_REQUEST
:
445 *checkpoint_request
= 1;
448 *checkpoint_request
= 0;
449 error_setg(errp
, "Got unknown COLO message: %d", msg
);
454 void *colo_process_incoming_thread(void *opaque
)
456 MigrationIncomingState
*mis
= opaque
;
458 QIOChannelBuffer
*bioc
= NULL
; /* Cache incoming device state */
461 Error
*local_err
= NULL
;
463 qemu_sem_init(&mis
->colo_incoming_sem
, 0);
465 migrate_set_state(&mis
->state
, MIGRATION_STATUS_ACTIVE
,
466 MIGRATION_STATUS_COLO
);
468 failover_init_state();
470 mis
->to_src_file
= qemu_file_get_return_path(mis
->from_src_file
);
471 if (!mis
->to_src_file
) {
472 error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
476 * Note: the communication between Primary side and Secondary side
477 * should be sequential, we set the fd to unblocked in migration incoming
478 * coroutine, and here we are in the COLO incoming thread, so it is ok to
479 * set the fd back to blocked.
481 qemu_file_set_blocking(mis
->from_src_file
, true);
483 bioc
= qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE
);
484 fb
= qemu_fopen_channel_input(QIO_CHANNEL(bioc
));
485 object_unref(OBJECT(bioc
));
487 colo_send_message(mis
->to_src_file
, COLO_MESSAGE_CHECKPOINT_READY
,
493 while (mis
->state
== MIGRATION_STATUS_COLO
) {
496 colo_wait_handle_message(mis
->from_src_file
, &request
, &local_err
);
501 if (failover_get_state() != FAILOVER_STATUS_NONE
) {
502 error_report("failover request");
506 /* FIXME: This is unnecessary for periodic checkpoint mode */
507 colo_send_message(mis
->to_src_file
, COLO_MESSAGE_CHECKPOINT_REPLY
,
513 colo_receive_check_message(mis
->from_src_file
,
514 COLO_MESSAGE_VMSTATE_SEND
, &local_err
);
519 value
= colo_receive_message_value(mis
->from_src_file
,
520 COLO_MESSAGE_VMSTATE_SIZE
, &local_err
);
526 * Read VM device state data into channel buffer,
527 * It's better to re-use the memory allocated.
528 * Here we need to handle the channel buffer directly.
530 if (value
> bioc
->capacity
) {
531 bioc
->capacity
= value
;
532 bioc
->data
= g_realloc(bioc
->data
, bioc
->capacity
);
534 total_size
= qemu_get_buffer(mis
->from_src_file
, bioc
->data
, value
);
535 if (total_size
!= value
) {
536 error_report("Got %" PRIu64
" VMState data, less than expected"
537 " %" PRIu64
, total_size
, value
);
540 bioc
->usage
= total_size
;
541 qio_channel_io_seek(QIO_CHANNEL(bioc
), 0, 0, NULL
);
543 colo_send_message(mis
->to_src_file
, COLO_MESSAGE_VMSTATE_RECEIVED
,
549 qemu_mutex_lock_iothread();
550 qemu_system_reset(VMRESET_SILENT
);
551 if (qemu_loadvm_state(fb
) < 0) {
552 error_report("COLO: loadvm failed");
553 qemu_mutex_unlock_iothread();
556 qemu_mutex_unlock_iothread();
558 colo_send_message(mis
->to_src_file
, COLO_MESSAGE_VMSTATE_LOADED
,
566 /* Throw the unreported error message after exited from loop */
568 error_report_err(local_err
);
575 /* Hope this not to be too long to loop here */
576 qemu_sem_wait(&mis
->colo_incoming_sem
);
577 qemu_sem_destroy(&mis
->colo_incoming_sem
);
578 /* Must be called after failover BH is completed */
579 if (mis
->to_src_file
) {
580 qemu_fclose(mis
->to_src_file
);
582 migration_incoming_exit_colo();