]>
Commit | Line | Data |
---|---|---|
35a6ed4f HZ |
1 | /* |
2 | * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | |
3 | * (a.k.a. Fault Tolerance or Continuous Replication) | |
4 | * | |
5 | * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | |
6 | * Copyright (c) 2016 FUJITSU LIMITED | |
7 | * Copyright (c) 2016 Intel Corporation | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | #include "qemu/osdep.h" | |
0b827d5e | 14 | #include "sysemu/sysemu.h" |
35a6ed4f | 15 | #include "migration/colo.h" |
0b827d5e | 16 | #include "trace.h" |
56ba83d2 | 17 | #include "qemu/error-report.h" |
4f97558e | 18 | #include "qapi/error.h" |
35a6ed4f HZ |
19 | |
20 | bool colo_supported(void) | |
21 | { | |
22 | return false; | |
23 | } | |
0b827d5e HZ |
24 | |
25 | bool migration_in_colo_state(void) | |
26 | { | |
27 | MigrationState *s = migrate_get_current(); | |
28 | ||
29 | return (s->state == MIGRATION_STATUS_COLO); | |
30 | } | |
31 | ||
25d0c16f HZ |
32 | bool migration_incoming_in_colo_state(void) |
33 | { | |
34 | MigrationIncomingState *mis = migration_incoming_get_current(); | |
35 | ||
36 | return mis && (mis->state == MIGRATION_STATUS_COLO); | |
37 | } | |
38 | ||
4f97558e HZ |
39 | static void colo_send_message(QEMUFile *f, COLOMessage msg, |
40 | Error **errp) | |
41 | { | |
42 | int ret; | |
43 | ||
44 | if (msg >= COLO_MESSAGE__MAX) { | |
45 | error_setg(errp, "%s: Invalid message", __func__); | |
46 | return; | |
47 | } | |
48 | qemu_put_be32(f, msg); | |
49 | qemu_fflush(f); | |
50 | ||
51 | ret = qemu_file_get_error(f); | |
52 | if (ret < 0) { | |
53 | error_setg_errno(errp, -ret, "Can't send COLO message"); | |
54 | } | |
55 | trace_colo_send_message(COLOMessage_lookup[msg]); | |
56 | } | |
57 | ||
58 | static COLOMessage colo_receive_message(QEMUFile *f, Error **errp) | |
59 | { | |
60 | COLOMessage msg; | |
61 | int ret; | |
62 | ||
63 | msg = qemu_get_be32(f); | |
64 | ret = qemu_file_get_error(f); | |
65 | if (ret < 0) { | |
66 | error_setg_errno(errp, -ret, "Can't receive COLO message"); | |
67 | return msg; | |
68 | } | |
69 | if (msg >= COLO_MESSAGE__MAX) { | |
70 | error_setg(errp, "%s: Invalid message", __func__); | |
71 | return msg; | |
72 | } | |
73 | trace_colo_receive_message(COLOMessage_lookup[msg]); | |
74 | return msg; | |
75 | } | |
76 | ||
77 | static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg, | |
78 | Error **errp) | |
79 | { | |
80 | COLOMessage msg; | |
81 | Error *local_err = NULL; | |
82 | ||
83 | msg = colo_receive_message(f, &local_err); | |
84 | if (local_err) { | |
85 | error_propagate(errp, local_err); | |
86 | return; | |
87 | } | |
88 | if (msg != expect_msg) { | |
89 | error_setg(errp, "Unexpected COLO message %d, expected %d", | |
90 | msg, expect_msg); | |
91 | } | |
92 | } | |
93 | ||
94 | static int colo_do_checkpoint_transaction(MigrationState *s) | |
95 | { | |
96 | Error *local_err = NULL; | |
97 | ||
98 | colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, | |
99 | &local_err); | |
100 | if (local_err) { | |
101 | goto out; | |
102 | } | |
103 | ||
104 | colo_receive_check_message(s->rp_state.from_dst_file, | |
105 | COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); | |
106 | if (local_err) { | |
107 | goto out; | |
108 | } | |
109 | ||
110 | /* TODO: suspend and save vm state to colo buffer */ | |
111 | ||
112 | colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
113 | if (local_err) { | |
114 | goto out; | |
115 | } | |
116 | ||
117 | /* TODO: send vmstate to Secondary */ | |
118 | ||
119 | colo_receive_check_message(s->rp_state.from_dst_file, | |
120 | COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); | |
121 | if (local_err) { | |
122 | goto out; | |
123 | } | |
124 | ||
125 | colo_receive_check_message(s->rp_state.from_dst_file, | |
126 | COLO_MESSAGE_VMSTATE_LOADED, &local_err); | |
127 | if (local_err) { | |
128 | goto out; | |
129 | } | |
130 | ||
131 | /* TODO: resume Primary */ | |
132 | ||
133 | return 0; | |
134 | out: | |
135 | if (local_err) { | |
136 | error_report_err(local_err); | |
137 | } | |
138 | return -EINVAL; | |
139 | } | |
140 | ||
0b827d5e HZ |
141 | static void colo_process_checkpoint(MigrationState *s) |
142 | { | |
4f97558e HZ |
143 | Error *local_err = NULL; |
144 | int ret; | |
145 | ||
56ba83d2 HZ |
146 | s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); |
147 | if (!s->rp_state.from_dst_file) { | |
148 | error_report("Open QEMUFile from_dst_file failed"); | |
149 | goto out; | |
150 | } | |
151 | ||
4f97558e HZ |
152 | /* |
153 | * Wait for Secondary finish loading VM states and enter COLO | |
154 | * restore. | |
155 | */ | |
156 | colo_receive_check_message(s->rp_state.from_dst_file, | |
157 | COLO_MESSAGE_CHECKPOINT_READY, &local_err); | |
158 | if (local_err) { | |
159 | goto out; | |
160 | } | |
161 | ||
0b827d5e HZ |
162 | qemu_mutex_lock_iothread(); |
163 | vm_start(); | |
164 | qemu_mutex_unlock_iothread(); | |
165 | trace_colo_vm_state_change("stop", "run"); | |
166 | ||
4f97558e HZ |
167 | while (s->state == MIGRATION_STATUS_COLO) { |
168 | ret = colo_do_checkpoint_transaction(s); | |
169 | if (ret < 0) { | |
170 | goto out; | |
171 | } | |
172 | } | |
0b827d5e | 173 | |
56ba83d2 | 174 | out: |
4f97558e HZ |
175 | /* Throw the unreported error message after exited from loop */ |
176 | if (local_err) { | |
177 | error_report_err(local_err); | |
178 | } | |
179 | ||
56ba83d2 HZ |
180 | if (s->rp_state.from_dst_file) { |
181 | qemu_fclose(s->rp_state.from_dst_file); | |
182 | } | |
0b827d5e HZ |
183 | } |
184 | ||
185 | void migrate_start_colo_process(MigrationState *s) | |
186 | { | |
187 | qemu_mutex_unlock_iothread(); | |
188 | migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, | |
189 | MIGRATION_STATUS_COLO); | |
190 | colo_process_checkpoint(s); | |
191 | qemu_mutex_lock_iothread(); | |
192 | } | |
25d0c16f | 193 | |
4f97558e HZ |
194 | static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request, |
195 | Error **errp) | |
196 | { | |
197 | COLOMessage msg; | |
198 | Error *local_err = NULL; | |
199 | ||
200 | msg = colo_receive_message(f, &local_err); | |
201 | if (local_err) { | |
202 | error_propagate(errp, local_err); | |
203 | return; | |
204 | } | |
205 | ||
206 | switch (msg) { | |
207 | case COLO_MESSAGE_CHECKPOINT_REQUEST: | |
208 | *checkpoint_request = 1; | |
209 | break; | |
210 | default: | |
211 | *checkpoint_request = 0; | |
212 | error_setg(errp, "Got unknown COLO message: %d", msg); | |
213 | break; | |
214 | } | |
215 | } | |
216 | ||
25d0c16f HZ |
217 | void *colo_process_incoming_thread(void *opaque) |
218 | { | |
219 | MigrationIncomingState *mis = opaque; | |
4f97558e | 220 | Error *local_err = NULL; |
25d0c16f HZ |
221 | |
222 | migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, | |
223 | MIGRATION_STATUS_COLO); | |
224 | ||
56ba83d2 HZ |
225 | mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); |
226 | if (!mis->to_src_file) { | |
227 | error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); | |
228 | goto out; | |
229 | } | |
230 | /* | |
231 | * Note: the communication between Primary side and Secondary side | |
232 | * should be sequential, we set the fd to unblocked in migration incoming | |
233 | * coroutine, and here we are in the COLO incoming thread, so it is ok to | |
234 | * set the fd back to blocked. | |
235 | */ | |
236 | qemu_file_set_blocking(mis->from_src_file, true); | |
237 | ||
4f97558e HZ |
238 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, |
239 | &local_err); | |
240 | if (local_err) { | |
241 | goto out; | |
242 | } | |
243 | ||
244 | while (mis->state == MIGRATION_STATUS_COLO) { | |
245 | int request; | |
246 | ||
247 | colo_wait_handle_message(mis->from_src_file, &request, &local_err); | |
248 | if (local_err) { | |
249 | goto out; | |
250 | } | |
251 | assert(request); | |
252 | /* FIXME: This is unnecessary for periodic checkpoint mode */ | |
253 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, | |
254 | &local_err); | |
255 | if (local_err) { | |
256 | goto out; | |
257 | } | |
258 | ||
259 | colo_receive_check_message(mis->from_src_file, | |
260 | COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
261 | if (local_err) { | |
262 | goto out; | |
263 | } | |
264 | ||
265 | /* TODO: read migration data into colo buffer */ | |
266 | ||
267 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, | |
268 | &local_err); | |
269 | if (local_err) { | |
270 | goto out; | |
271 | } | |
272 | ||
273 | /* TODO: load vm state */ | |
274 | ||
275 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, | |
276 | &local_err); | |
277 | if (local_err) { | |
278 | goto out; | |
279 | } | |
280 | } | |
25d0c16f | 281 | |
56ba83d2 | 282 | out: |
4f97558e HZ |
283 | /* Throw the unreported error message after exited from loop */ |
284 | if (local_err) { | |
285 | error_report_err(local_err); | |
286 | } | |
287 | ||
56ba83d2 HZ |
288 | if (mis->to_src_file) { |
289 | qemu_fclose(mis->to_src_file); | |
290 | } | |
25d0c16f HZ |
291 | migration_incoming_exit_colo(); |
292 | ||
293 | return NULL; | |
294 | } |