]> git.proxmox.com Git - mirror_qemu.git/blame - migration/colo.c
COLO: Add a new RunState RUN_STATE_COLO
[mirror_qemu.git] / migration / colo.c
CommitLineData
35a6ed4f
HZ
1/*
2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
3 * (a.k.a. Fault Tolerance or Continuous Replication)
4 *
5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
6 * Copyright (c) 2016 FUJITSU LIMITED
7 * Copyright (c) 2016 Intel Corporation
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13#include "qemu/osdep.h"
0b827d5e 14#include "sysemu/sysemu.h"
35a6ed4f 15#include "migration/colo.h"
0b827d5e 16#include "trace.h"
56ba83d2 17#include "qemu/error-report.h"
4f97558e 18#include "qapi/error.h"
35a6ed4f
HZ
19
20bool colo_supported(void)
21{
22 return false;
23}
0b827d5e
HZ
24
25bool migration_in_colo_state(void)
26{
27 MigrationState *s = migrate_get_current();
28
29 return (s->state == MIGRATION_STATUS_COLO);
30}
31
25d0c16f
HZ
32bool migration_incoming_in_colo_state(void)
33{
34 MigrationIncomingState *mis = migration_incoming_get_current();
35
36 return mis && (mis->state == MIGRATION_STATUS_COLO);
37}
38
4f97558e
HZ
39static void colo_send_message(QEMUFile *f, COLOMessage msg,
40 Error **errp)
41{
42 int ret;
43
44 if (msg >= COLO_MESSAGE__MAX) {
45 error_setg(errp, "%s: Invalid message", __func__);
46 return;
47 }
48 qemu_put_be32(f, msg);
49 qemu_fflush(f);
50
51 ret = qemu_file_get_error(f);
52 if (ret < 0) {
53 error_setg_errno(errp, -ret, "Can't send COLO message");
54 }
55 trace_colo_send_message(COLOMessage_lookup[msg]);
56}
57
58static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
59{
60 COLOMessage msg;
61 int ret;
62
63 msg = qemu_get_be32(f);
64 ret = qemu_file_get_error(f);
65 if (ret < 0) {
66 error_setg_errno(errp, -ret, "Can't receive COLO message");
67 return msg;
68 }
69 if (msg >= COLO_MESSAGE__MAX) {
70 error_setg(errp, "%s: Invalid message", __func__);
71 return msg;
72 }
73 trace_colo_receive_message(COLOMessage_lookup[msg]);
74 return msg;
75}
76
77static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
78 Error **errp)
79{
80 COLOMessage msg;
81 Error *local_err = NULL;
82
83 msg = colo_receive_message(f, &local_err);
84 if (local_err) {
85 error_propagate(errp, local_err);
86 return;
87 }
88 if (msg != expect_msg) {
89 error_setg(errp, "Unexpected COLO message %d, expected %d",
90 msg, expect_msg);
91 }
92}
93
94static int colo_do_checkpoint_transaction(MigrationState *s)
95{
96 Error *local_err = NULL;
97
98 colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
99 &local_err);
100 if (local_err) {
101 goto out;
102 }
103
104 colo_receive_check_message(s->rp_state.from_dst_file,
105 COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
106 if (local_err) {
107 goto out;
108 }
109
110 /* TODO: suspend and save vm state to colo buffer */
111
112 colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
113 if (local_err) {
114 goto out;
115 }
116
117 /* TODO: send vmstate to Secondary */
118
119 colo_receive_check_message(s->rp_state.from_dst_file,
120 COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
121 if (local_err) {
122 goto out;
123 }
124
125 colo_receive_check_message(s->rp_state.from_dst_file,
126 COLO_MESSAGE_VMSTATE_LOADED, &local_err);
127 if (local_err) {
128 goto out;
129 }
130
131 /* TODO: resume Primary */
132
133 return 0;
134out:
135 if (local_err) {
136 error_report_err(local_err);
137 }
138 return -EINVAL;
139}
140
0b827d5e
HZ
141static void colo_process_checkpoint(MigrationState *s)
142{
4f97558e
HZ
143 Error *local_err = NULL;
144 int ret;
145
56ba83d2
HZ
146 s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
147 if (!s->rp_state.from_dst_file) {
148 error_report("Open QEMUFile from_dst_file failed");
149 goto out;
150 }
151
4f97558e
HZ
152 /*
153 * Wait for Secondary finish loading VM states and enter COLO
154 * restore.
155 */
156 colo_receive_check_message(s->rp_state.from_dst_file,
157 COLO_MESSAGE_CHECKPOINT_READY, &local_err);
158 if (local_err) {
159 goto out;
160 }
161
0b827d5e
HZ
162 qemu_mutex_lock_iothread();
163 vm_start();
164 qemu_mutex_unlock_iothread();
165 trace_colo_vm_state_change("stop", "run");
166
4f97558e
HZ
167 while (s->state == MIGRATION_STATUS_COLO) {
168 ret = colo_do_checkpoint_transaction(s);
169 if (ret < 0) {
170 goto out;
171 }
172 }
0b827d5e 173
56ba83d2 174out:
4f97558e
HZ
175 /* Throw the unreported error message after exited from loop */
176 if (local_err) {
177 error_report_err(local_err);
178 }
179
56ba83d2
HZ
180 if (s->rp_state.from_dst_file) {
181 qemu_fclose(s->rp_state.from_dst_file);
182 }
0b827d5e
HZ
183}
184
185void migrate_start_colo_process(MigrationState *s)
186{
187 qemu_mutex_unlock_iothread();
188 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
189 MIGRATION_STATUS_COLO);
190 colo_process_checkpoint(s);
191 qemu_mutex_lock_iothread();
192}
25d0c16f 193
4f97558e
HZ
194static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
195 Error **errp)
196{
197 COLOMessage msg;
198 Error *local_err = NULL;
199
200 msg = colo_receive_message(f, &local_err);
201 if (local_err) {
202 error_propagate(errp, local_err);
203 return;
204 }
205
206 switch (msg) {
207 case COLO_MESSAGE_CHECKPOINT_REQUEST:
208 *checkpoint_request = 1;
209 break;
210 default:
211 *checkpoint_request = 0;
212 error_setg(errp, "Got unknown COLO message: %d", msg);
213 break;
214 }
215}
216
25d0c16f
HZ
217void *colo_process_incoming_thread(void *opaque)
218{
219 MigrationIncomingState *mis = opaque;
4f97558e 220 Error *local_err = NULL;
25d0c16f
HZ
221
222 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
223 MIGRATION_STATUS_COLO);
224
56ba83d2
HZ
225 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
226 if (!mis->to_src_file) {
227 error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
228 goto out;
229 }
230 /*
231 * Note: the communication between Primary side and Secondary side
232 * should be sequential, we set the fd to unblocked in migration incoming
233 * coroutine, and here we are in the COLO incoming thread, so it is ok to
234 * set the fd back to blocked.
235 */
236 qemu_file_set_blocking(mis->from_src_file, true);
237
4f97558e
HZ
238 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
239 &local_err);
240 if (local_err) {
241 goto out;
242 }
243
244 while (mis->state == MIGRATION_STATUS_COLO) {
245 int request;
246
247 colo_wait_handle_message(mis->from_src_file, &request, &local_err);
248 if (local_err) {
249 goto out;
250 }
251 assert(request);
252 /* FIXME: This is unnecessary for periodic checkpoint mode */
253 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
254 &local_err);
255 if (local_err) {
256 goto out;
257 }
258
259 colo_receive_check_message(mis->from_src_file,
260 COLO_MESSAGE_VMSTATE_SEND, &local_err);
261 if (local_err) {
262 goto out;
263 }
264
265 /* TODO: read migration data into colo buffer */
266
267 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
268 &local_err);
269 if (local_err) {
270 goto out;
271 }
272
273 /* TODO: load vm state */
274
275 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
276 &local_err);
277 if (local_err) {
278 goto out;
279 }
280 }
25d0c16f 281
56ba83d2 282out:
4f97558e
HZ
283 /* Throw the unreported error message after exited from loop */
284 if (local_err) {
285 error_report_err(local_err);
286 }
287
56ba83d2
HZ
288 if (mis->to_src_file) {
289 qemu_fclose(mis->to_src_file);
290 }
25d0c16f
HZ
291 migration_incoming_exit_colo();
292
293 return NULL;
294}