]> git.proxmox.com Git - mirror_qemu.git/blob - migration/colo.c
COLO: Introduce checkpointing protocol
[mirror_qemu.git] / migration / colo.c
1 /*
2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
3 * (a.k.a. Fault Tolerance or Continuous Replication)
4 *
5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
6 * Copyright (c) 2016 FUJITSU LIMITED
7 * Copyright (c) 2016 Intel Corporation
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "sysemu/sysemu.h"
15 #include "migration/colo.h"
16 #include "trace.h"
17 #include "qemu/error-report.h"
18 #include "qapi/error.h"
19
20 bool colo_supported(void)
21 {
22 return false;
23 }
24
25 bool migration_in_colo_state(void)
26 {
27 MigrationState *s = migrate_get_current();
28
29 return (s->state == MIGRATION_STATUS_COLO);
30 }
31
32 bool migration_incoming_in_colo_state(void)
33 {
34 MigrationIncomingState *mis = migration_incoming_get_current();
35
36 return mis && (mis->state == MIGRATION_STATUS_COLO);
37 }
38
39 static void colo_send_message(QEMUFile *f, COLOMessage msg,
40 Error **errp)
41 {
42 int ret;
43
44 if (msg >= COLO_MESSAGE__MAX) {
45 error_setg(errp, "%s: Invalid message", __func__);
46 return;
47 }
48 qemu_put_be32(f, msg);
49 qemu_fflush(f);
50
51 ret = qemu_file_get_error(f);
52 if (ret < 0) {
53 error_setg_errno(errp, -ret, "Can't send COLO message");
54 }
55 trace_colo_send_message(COLOMessage_lookup[msg]);
56 }
57
58 static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
59 {
60 COLOMessage msg;
61 int ret;
62
63 msg = qemu_get_be32(f);
64 ret = qemu_file_get_error(f);
65 if (ret < 0) {
66 error_setg_errno(errp, -ret, "Can't receive COLO message");
67 return msg;
68 }
69 if (msg >= COLO_MESSAGE__MAX) {
70 error_setg(errp, "%s: Invalid message", __func__);
71 return msg;
72 }
73 trace_colo_receive_message(COLOMessage_lookup[msg]);
74 return msg;
75 }
76
77 static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
78 Error **errp)
79 {
80 COLOMessage msg;
81 Error *local_err = NULL;
82
83 msg = colo_receive_message(f, &local_err);
84 if (local_err) {
85 error_propagate(errp, local_err);
86 return;
87 }
88 if (msg != expect_msg) {
89 error_setg(errp, "Unexpected COLO message %d, expected %d",
90 msg, expect_msg);
91 }
92 }
93
94 static int colo_do_checkpoint_transaction(MigrationState *s)
95 {
96 Error *local_err = NULL;
97
98 colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
99 &local_err);
100 if (local_err) {
101 goto out;
102 }
103
104 colo_receive_check_message(s->rp_state.from_dst_file,
105 COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
106 if (local_err) {
107 goto out;
108 }
109
110 /* TODO: suspend and save vm state to colo buffer */
111
112 colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
113 if (local_err) {
114 goto out;
115 }
116
117 /* TODO: send vmstate to Secondary */
118
119 colo_receive_check_message(s->rp_state.from_dst_file,
120 COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
121 if (local_err) {
122 goto out;
123 }
124
125 colo_receive_check_message(s->rp_state.from_dst_file,
126 COLO_MESSAGE_VMSTATE_LOADED, &local_err);
127 if (local_err) {
128 goto out;
129 }
130
131 /* TODO: resume Primary */
132
133 return 0;
134 out:
135 if (local_err) {
136 error_report_err(local_err);
137 }
138 return -EINVAL;
139 }
140
141 static void colo_process_checkpoint(MigrationState *s)
142 {
143 Error *local_err = NULL;
144 int ret;
145
146 s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
147 if (!s->rp_state.from_dst_file) {
148 error_report("Open QEMUFile from_dst_file failed");
149 goto out;
150 }
151
152 /*
153 * Wait for Secondary finish loading VM states and enter COLO
154 * restore.
155 */
156 colo_receive_check_message(s->rp_state.from_dst_file,
157 COLO_MESSAGE_CHECKPOINT_READY, &local_err);
158 if (local_err) {
159 goto out;
160 }
161
162 qemu_mutex_lock_iothread();
163 vm_start();
164 qemu_mutex_unlock_iothread();
165 trace_colo_vm_state_change("stop", "run");
166
167 while (s->state == MIGRATION_STATUS_COLO) {
168 ret = colo_do_checkpoint_transaction(s);
169 if (ret < 0) {
170 goto out;
171 }
172 }
173
174 out:
175 /* Throw the unreported error message after exited from loop */
176 if (local_err) {
177 error_report_err(local_err);
178 }
179
180 if (s->rp_state.from_dst_file) {
181 qemu_fclose(s->rp_state.from_dst_file);
182 }
183 }
184
185 void migrate_start_colo_process(MigrationState *s)
186 {
187 qemu_mutex_unlock_iothread();
188 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
189 MIGRATION_STATUS_COLO);
190 colo_process_checkpoint(s);
191 qemu_mutex_lock_iothread();
192 }
193
194 static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
195 Error **errp)
196 {
197 COLOMessage msg;
198 Error *local_err = NULL;
199
200 msg = colo_receive_message(f, &local_err);
201 if (local_err) {
202 error_propagate(errp, local_err);
203 return;
204 }
205
206 switch (msg) {
207 case COLO_MESSAGE_CHECKPOINT_REQUEST:
208 *checkpoint_request = 1;
209 break;
210 default:
211 *checkpoint_request = 0;
212 error_setg(errp, "Got unknown COLO message: %d", msg);
213 break;
214 }
215 }
216
217 void *colo_process_incoming_thread(void *opaque)
218 {
219 MigrationIncomingState *mis = opaque;
220 Error *local_err = NULL;
221
222 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
223 MIGRATION_STATUS_COLO);
224
225 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
226 if (!mis->to_src_file) {
227 error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
228 goto out;
229 }
230 /*
231 * Note: the communication between Primary side and Secondary side
232 * should be sequential, we set the fd to unblocked in migration incoming
233 * coroutine, and here we are in the COLO incoming thread, so it is ok to
234 * set the fd back to blocked.
235 */
236 qemu_file_set_blocking(mis->from_src_file, true);
237
238 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
239 &local_err);
240 if (local_err) {
241 goto out;
242 }
243
244 while (mis->state == MIGRATION_STATUS_COLO) {
245 int request;
246
247 colo_wait_handle_message(mis->from_src_file, &request, &local_err);
248 if (local_err) {
249 goto out;
250 }
251 assert(request);
252 /* FIXME: This is unnecessary for periodic checkpoint mode */
253 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
254 &local_err);
255 if (local_err) {
256 goto out;
257 }
258
259 colo_receive_check_message(mis->from_src_file,
260 COLO_MESSAGE_VMSTATE_SEND, &local_err);
261 if (local_err) {
262 goto out;
263 }
264
265 /* TODO: read migration data into colo buffer */
266
267 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
268 &local_err);
269 if (local_err) {
270 goto out;
271 }
272
273 /* TODO: load vm state */
274
275 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
276 &local_err);
277 if (local_err) {
278 goto out;
279 }
280 }
281
282 out:
283 /* Throw the unreported error message after exited from loop */
284 if (local_err) {
285 error_report_err(local_err);
286 }
287
288 if (mis->to_src_file) {
289 qemu_fclose(mis->to_src_file);
290 }
291 migration_incoming_exit_colo();
292
293 return NULL;
294 }