]>
Commit | Line | Data |
---|---|---|
a9e271ec KW |
1 | /* |
2 | * Migration support for VFIO devices | |
3 | * | |
4 | * Copyright NVIDIA, Inc. 2020 | |
5 | * | |
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
7 | * the COPYING file in the top-level directory. | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
7c2f5f75 KW |
11 | #include "qemu/main-loop.h" |
12 | #include "qemu/cutils.h" | |
a9e271ec | 13 | #include <linux/vfio.h> |
e663f516 | 14 | #include <sys/ioctl.h> |
a9e271ec | 15 | |
02a7e71b | 16 | #include "sysemu/runstate.h" |
a9e271ec | 17 | #include "hw/vfio/vfio-common.h" |
a9e271ec | 18 | #include "migration/migration.h" |
7c2f5f75 | 19 | #include "migration/vmstate.h" |
a9e271ec KW |
20 | #include "migration/qemu-file.h" |
21 | #include "migration/register.h" | |
22 | #include "migration/blocker.h" | |
23 | #include "migration/misc.h" | |
24 | #include "qapi/error.h" | |
25 | #include "exec/ramlist.h" | |
26 | #include "exec/ram_addr.h" | |
27 | #include "pci.h" | |
28 | #include "trace.h" | |
02a7e71b KW |
29 | #include "hw/hw.h" |
30 | ||
7c2f5f75 KW |
31 | /* |
32 | * Flags to be used as unique delimiters for VFIO devices in the migration | |
33 | * stream. These flags are composed as: | |
34 | * 0xffffffff => MSB 32-bit all 1s | |
35 | * 0xef10 => Magic ID, represents emulated (virtual) function IO | |
36 | * 0x0000 => 16-bits reserved for flags | |
37 | * | |
38 | * The beginning of state information is marked by _DEV_CONFIG_STATE, | |
39 | * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a | |
40 | * certain state information is marked by _END_OF_STATE. | |
41 | */ | |
42 | #define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) | |
43 | #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) | |
44 | #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) | |
45 | #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) | |
46 | ||
3710586c KW |
47 | static int64_t bytes_transferred; |
48 | ||
02a7e71b KW |
49 | static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, |
50 | off_t off, bool iswrite) | |
51 | { | |
52 | int ret; | |
53 | ||
54 | ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : | |
55 | pread(vbasedev->fd, val, count, off); | |
56 | if (ret < count) { | |
57 | error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" | |
58 | HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, | |
59 | vbasedev->name, off, strerror(errno)); | |
60 | return (ret < 0) ? ret : -EINVAL; | |
61 | } | |
62 | return 0; | |
63 | } | |
64 | ||
65 | static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, | |
66 | off_t off, bool iswrite) | |
67 | { | |
68 | int ret, done = 0; | |
69 | __u8 *tbuf = buf; | |
70 | ||
71 | while (count) { | |
72 | int bytes = 0; | |
73 | ||
74 | if (count >= 8 && !(off % 8)) { | |
75 | bytes = 8; | |
76 | } else if (count >= 4 && !(off % 4)) { | |
77 | bytes = 4; | |
78 | } else if (count >= 2 && !(off % 2)) { | |
79 | bytes = 2; | |
80 | } else { | |
81 | bytes = 1; | |
82 | } | |
83 | ||
84 | ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); | |
85 | if (ret) { | |
86 | return ret; | |
87 | } | |
88 | ||
89 | count -= bytes; | |
90 | done += bytes; | |
91 | off += bytes; | |
92 | tbuf += bytes; | |
93 | } | |
94 | return done; | |
95 | } | |
96 | ||
97 | #define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) | |
98 | #define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) | |
99 | ||
100 | #define VFIO_MIG_STRUCT_OFFSET(f) \ | |
101 | offsetof(struct vfio_device_migration_info, f) | |
102 | /* | |
103 | * Change the device_state register for device @vbasedev. Bits set in @mask | |
104 | * are preserved, bits set in @value are set, and bits not set in either @mask | |
105 | * or @value are cleared in device_state. If the register cannot be accessed, | |
106 | * the resulting state would be invalid, or the device enters an error state, | |
107 | * an error is returned. | |
108 | */ | |
109 | ||
110 | static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, | |
111 | uint32_t value) | |
112 | { | |
113 | VFIOMigration *migration = vbasedev->migration; | |
114 | VFIORegion *region = &migration->region; | |
115 | off_t dev_state_off = region->fd_offset + | |
116 | VFIO_MIG_STRUCT_OFFSET(device_state); | |
117 | uint32_t device_state; | |
118 | int ret; | |
119 | ||
120 | ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), | |
121 | dev_state_off); | |
122 | if (ret < 0) { | |
123 | return ret; | |
124 | } | |
125 | ||
126 | device_state = (device_state & mask) | value; | |
127 | ||
128 | if (!VFIO_DEVICE_STATE_VALID(device_state)) { | |
129 | return -EINVAL; | |
130 | } | |
131 | ||
132 | ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), | |
133 | dev_state_off); | |
134 | if (ret < 0) { | |
135 | int rret; | |
136 | ||
137 | rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), | |
138 | dev_state_off); | |
139 | ||
140 | if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { | |
141 | hw_error("%s: Device in error state 0x%x", vbasedev->name, | |
142 | device_state); | |
143 | return rret ? rret : -EIO; | |
144 | } | |
145 | return ret; | |
146 | } | |
147 | ||
148 | migration->device_state = device_state; | |
149 | trace_vfio_migration_set_state(vbasedev->name, device_state); | |
150 | return 0; | |
151 | } | |
152 | ||
1bc3c535 KW |
153 | static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, |
154 | uint64_t data_size, uint64_t *size) | |
155 | { | |
156 | void *ptr = NULL; | |
157 | uint64_t limit = 0; | |
158 | int i; | |
159 | ||
160 | if (!region->mmaps) { | |
161 | if (size) { | |
162 | *size = MIN(data_size, region->size - data_offset); | |
163 | } | |
164 | return ptr; | |
165 | } | |
166 | ||
167 | for (i = 0; i < region->nr_mmaps; i++) { | |
168 | VFIOMmap *map = region->mmaps + i; | |
169 | ||
170 | if ((data_offset >= map->offset) && | |
171 | (data_offset < map->offset + map->size)) { | |
172 | ||
173 | /* check if data_offset is within sparse mmap areas */ | |
174 | ptr = map->mmap + data_offset - map->offset; | |
175 | if (size) { | |
176 | *size = MIN(data_size, map->offset + map->size - data_offset); | |
177 | } | |
178 | break; | |
179 | } else if ((data_offset < map->offset) && | |
180 | (!limit || limit > map->offset)) { | |
181 | /* | |
182 | * data_offset is not within sparse mmap areas, find size of | |
183 | * non-mapped area. Check through all list since region->mmaps list | |
184 | * is not sorted. | |
185 | */ | |
186 | limit = map->offset; | |
187 | } | |
188 | } | |
189 | ||
190 | if (!ptr && size) { | |
191 | *size = limit ? MIN(data_size, limit - data_offset) : data_size; | |
192 | } | |
193 | return ptr; | |
194 | } | |
195 | ||
196 | static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) | |
197 | { | |
198 | VFIOMigration *migration = vbasedev->migration; | |
199 | VFIORegion *region = &migration->region; | |
200 | uint64_t data_offset = 0, data_size = 0, sz; | |
201 | int ret; | |
202 | ||
203 | ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), | |
204 | region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); | |
205 | if (ret < 0) { | |
206 | return ret; | |
207 | } | |
208 | ||
209 | ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), | |
210 | region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); | |
211 | if (ret < 0) { | |
212 | return ret; | |
213 | } | |
214 | ||
215 | trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, | |
216 | migration->pending_bytes); | |
217 | ||
218 | qemu_put_be64(f, data_size); | |
219 | sz = data_size; | |
220 | ||
221 | while (sz) { | |
222 | void *buf; | |
223 | uint64_t sec_size; | |
224 | bool buf_allocated = false; | |
225 | ||
226 | buf = get_data_section_size(region, data_offset, sz, &sec_size); | |
227 | ||
228 | if (!buf) { | |
229 | buf = g_try_malloc(sec_size); | |
230 | if (!buf) { | |
231 | error_report("%s: Error allocating buffer ", __func__); | |
232 | return -ENOMEM; | |
233 | } | |
234 | buf_allocated = true; | |
235 | ||
236 | ret = vfio_mig_read(vbasedev, buf, sec_size, | |
237 | region->fd_offset + data_offset); | |
238 | if (ret < 0) { | |
239 | g_free(buf); | |
240 | return ret; | |
241 | } | |
242 | } | |
243 | ||
244 | qemu_put_buffer(f, buf, sec_size); | |
245 | ||
246 | if (buf_allocated) { | |
247 | g_free(buf); | |
248 | } | |
249 | sz -= sec_size; | |
250 | data_offset += sec_size; | |
251 | } | |
252 | ||
253 | ret = qemu_file_get_error(f); | |
254 | ||
255 | if (!ret && size) { | |
256 | *size = data_size; | |
257 | } | |
258 | ||
3710586c | 259 | bytes_transferred += data_size; |
1bc3c535 KW |
260 | return ret; |
261 | } | |
262 | ||
3336d217 KW |
263 | static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, |
264 | uint64_t data_size) | |
265 | { | |
266 | VFIORegion *region = &vbasedev->migration->region; | |
267 | uint64_t data_offset = 0, size, report_size; | |
268 | int ret; | |
269 | ||
270 | do { | |
271 | ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), | |
272 | region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); | |
273 | if (ret < 0) { | |
274 | return ret; | |
275 | } | |
276 | ||
277 | if (data_offset + data_size > region->size) { | |
278 | /* | |
279 | * If data_size is greater than the data section of migration region | |
280 | * then iterate the write buffer operation. This case can occur if | |
281 | * size of migration region at destination is smaller than size of | |
282 | * migration region at source. | |
283 | */ | |
284 | report_size = size = region->size - data_offset; | |
285 | data_size -= size; | |
286 | } else { | |
287 | report_size = size = data_size; | |
288 | data_size = 0; | |
289 | } | |
290 | ||
291 | trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); | |
292 | ||
293 | while (size) { | |
294 | void *buf; | |
295 | uint64_t sec_size; | |
296 | bool buf_alloc = false; | |
297 | ||
298 | buf = get_data_section_size(region, data_offset, size, &sec_size); | |
299 | ||
300 | if (!buf) { | |
301 | buf = g_try_malloc(sec_size); | |
302 | if (!buf) { | |
303 | error_report("%s: Error allocating buffer ", __func__); | |
304 | return -ENOMEM; | |
305 | } | |
306 | buf_alloc = true; | |
307 | } | |
308 | ||
309 | qemu_get_buffer(f, buf, sec_size); | |
310 | ||
311 | if (buf_alloc) { | |
312 | ret = vfio_mig_write(vbasedev, buf, sec_size, | |
313 | region->fd_offset + data_offset); | |
314 | g_free(buf); | |
315 | ||
316 | if (ret < 0) { | |
317 | return ret; | |
318 | } | |
319 | } | |
320 | size -= sec_size; | |
321 | data_offset += sec_size; | |
322 | } | |
323 | ||
324 | ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), | |
325 | region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); | |
326 | if (ret < 0) { | |
327 | return ret; | |
328 | } | |
329 | } while (data_size); | |
330 | ||
331 | return 0; | |
332 | } | |
333 | ||
1bc3c535 KW |
334 | static int vfio_update_pending(VFIODevice *vbasedev) |
335 | { | |
336 | VFIOMigration *migration = vbasedev->migration; | |
337 | VFIORegion *region = &migration->region; | |
338 | uint64_t pending_bytes = 0; | |
339 | int ret; | |
340 | ||
341 | ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), | |
342 | region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); | |
343 | if (ret < 0) { | |
344 | migration->pending_bytes = 0; | |
345 | return ret; | |
346 | } | |
347 | ||
348 | migration->pending_bytes = pending_bytes; | |
349 | trace_vfio_update_pending(vbasedev->name, pending_bytes); | |
350 | return 0; | |
351 | } | |
352 | ||
353 | static int vfio_save_device_config_state(QEMUFile *f, void *opaque) | |
354 | { | |
355 | VFIODevice *vbasedev = opaque; | |
356 | ||
357 | qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE); | |
358 | ||
359 | if (vbasedev->ops && vbasedev->ops->vfio_save_config) { | |
360 | vbasedev->ops->vfio_save_config(vbasedev, f); | |
361 | } | |
362 | ||
363 | qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); | |
364 | ||
365 | trace_vfio_save_device_config_state(vbasedev->name); | |
366 | ||
367 | return qemu_file_get_error(f); | |
368 | } | |
369 | ||
3336d217 KW |
370 | static int vfio_load_device_config_state(QEMUFile *f, void *opaque) |
371 | { | |
372 | VFIODevice *vbasedev = opaque; | |
373 | uint64_t data; | |
374 | ||
375 | if (vbasedev->ops && vbasedev->ops->vfio_load_config) { | |
376 | int ret; | |
377 | ||
378 | ret = vbasedev->ops->vfio_load_config(vbasedev, f); | |
379 | if (ret) { | |
380 | error_report("%s: Failed to load device config space", | |
381 | vbasedev->name); | |
382 | return ret; | |
383 | } | |
384 | } | |
385 | ||
386 | data = qemu_get_be64(f); | |
387 | if (data != VFIO_MIG_FLAG_END_OF_STATE) { | |
388 | error_report("%s: Failed loading device config space, " | |
389 | "end flag incorrect 0x%"PRIx64, vbasedev->name, data); | |
390 | return -EINVAL; | |
391 | } | |
392 | ||
393 | trace_vfio_load_device_config_state(vbasedev->name); | |
394 | return qemu_file_get_error(f); | |
395 | } | |
396 | ||
7c2f5f75 KW |
397 | static void vfio_migration_cleanup(VFIODevice *vbasedev) |
398 | { | |
399 | VFIOMigration *migration = vbasedev->migration; | |
400 | ||
401 | if (migration->region.mmaps) { | |
402 | vfio_region_unmap(&migration->region); | |
403 | } | |
404 | } | |
405 | ||
406 | /* ---------------------------------------------------------------------- */ | |
407 | ||
408 | static int vfio_save_setup(QEMUFile *f, void *opaque) | |
409 | { | |
410 | VFIODevice *vbasedev = opaque; | |
411 | VFIOMigration *migration = vbasedev->migration; | |
412 | int ret; | |
413 | ||
414 | trace_vfio_save_setup(vbasedev->name); | |
415 | ||
416 | qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); | |
417 | ||
418 | if (migration->region.mmaps) { | |
419 | /* | |
420 | * Calling vfio_region_mmap() from migration thread. Memory API called | |
421 | * from this function require locking the iothread when called from | |
422 | * outside the main loop thread. | |
423 | */ | |
424 | qemu_mutex_lock_iothread(); | |
425 | ret = vfio_region_mmap(&migration->region); | |
426 | qemu_mutex_unlock_iothread(); | |
427 | if (ret) { | |
428 | error_report("%s: Failed to mmap VFIO migration region: %s", | |
429 | vbasedev->name, strerror(-ret)); | |
430 | error_report("%s: Falling back to slow path", vbasedev->name); | |
431 | } | |
432 | } | |
433 | ||
434 | ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, | |
e4082063 | 435 | VFIO_DEVICE_STATE_V1_SAVING); |
7c2f5f75 KW |
436 | if (ret) { |
437 | error_report("%s: Failed to set state SAVING", vbasedev->name); | |
438 | return ret; | |
439 | } | |
440 | ||
441 | qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); | |
442 | ||
443 | ret = qemu_file_get_error(f); | |
444 | if (ret) { | |
445 | return ret; | |
446 | } | |
447 | ||
448 | return 0; | |
449 | } | |
450 | ||
451 | static void vfio_save_cleanup(void *opaque) | |
452 | { | |
453 | VFIODevice *vbasedev = opaque; | |
454 | ||
455 | vfio_migration_cleanup(vbasedev); | |
456 | trace_vfio_save_cleanup(vbasedev->name); | |
457 | } | |
458 | ||
1bc3c535 KW |
459 | static void vfio_save_pending(QEMUFile *f, void *opaque, |
460 | uint64_t threshold_size, | |
461 | uint64_t *res_precopy_only, | |
462 | uint64_t *res_compatible, | |
463 | uint64_t *res_postcopy_only) | |
464 | { | |
465 | VFIODevice *vbasedev = opaque; | |
466 | VFIOMigration *migration = vbasedev->migration; | |
467 | int ret; | |
468 | ||
469 | ret = vfio_update_pending(vbasedev); | |
470 | if (ret) { | |
471 | return; | |
472 | } | |
473 | ||
474 | *res_precopy_only += migration->pending_bytes; | |
475 | ||
476 | trace_vfio_save_pending(vbasedev->name, *res_precopy_only, | |
477 | *res_postcopy_only, *res_compatible); | |
478 | } | |
479 | ||
480 | static int vfio_save_iterate(QEMUFile *f, void *opaque) | |
481 | { | |
482 | VFIODevice *vbasedev = opaque; | |
483 | VFIOMigration *migration = vbasedev->migration; | |
484 | uint64_t data_size; | |
485 | int ret; | |
486 | ||
487 | qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); | |
488 | ||
489 | if (migration->pending_bytes == 0) { | |
490 | ret = vfio_update_pending(vbasedev); | |
491 | if (ret) { | |
492 | return ret; | |
493 | } | |
494 | ||
495 | if (migration->pending_bytes == 0) { | |
496 | qemu_put_be64(f, 0); | |
497 | qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); | |
498 | /* indicates data finished, goto complete phase */ | |
499 | return 1; | |
500 | } | |
501 | } | |
502 | ||
503 | ret = vfio_save_buffer(f, vbasedev, &data_size); | |
504 | if (ret) { | |
505 | error_report("%s: vfio_save_buffer failed %s", vbasedev->name, | |
506 | strerror(errno)); | |
507 | return ret; | |
508 | } | |
509 | ||
510 | qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); | |
511 | ||
512 | ret = qemu_file_get_error(f); | |
513 | if (ret) { | |
514 | return ret; | |
515 | } | |
516 | ||
517 | /* | |
518 | * Reset pending_bytes as .save_live_pending is not called during savevm or | |
519 | * snapshot case, in such case vfio_update_pending() at the start of this | |
520 | * function updates pending_bytes. | |
521 | */ | |
522 | migration->pending_bytes = 0; | |
523 | trace_vfio_save_iterate(vbasedev->name, data_size); | |
524 | return 0; | |
525 | } | |
526 | ||
527 | static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) | |
528 | { | |
529 | VFIODevice *vbasedev = opaque; | |
530 | VFIOMigration *migration = vbasedev->migration; | |
531 | uint64_t data_size; | |
532 | int ret; | |
533 | ||
e4082063 AW |
534 | ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING, |
535 | VFIO_DEVICE_STATE_V1_SAVING); | |
1bc3c535 KW |
536 | if (ret) { |
537 | error_report("%s: Failed to set state STOP and SAVING", | |
538 | vbasedev->name); | |
539 | return ret; | |
540 | } | |
541 | ||
1bc3c535 KW |
542 | ret = vfio_update_pending(vbasedev); |
543 | if (ret) { | |
544 | return ret; | |
545 | } | |
546 | ||
547 | while (migration->pending_bytes > 0) { | |
548 | qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); | |
549 | ret = vfio_save_buffer(f, vbasedev, &data_size); | |
550 | if (ret < 0) { | |
551 | error_report("%s: Failed to save buffer", vbasedev->name); | |
552 | return ret; | |
553 | } | |
554 | ||
555 | if (data_size == 0) { | |
556 | break; | |
557 | } | |
558 | ||
559 | ret = vfio_update_pending(vbasedev); | |
560 | if (ret) { | |
561 | return ret; | |
562 | } | |
563 | } | |
564 | ||
565 | qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); | |
566 | ||
567 | ret = qemu_file_get_error(f); | |
568 | if (ret) { | |
569 | return ret; | |
570 | } | |
571 | ||
e4082063 | 572 | ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0); |
1bc3c535 KW |
573 | if (ret) { |
574 | error_report("%s: Failed to set state STOPPED", vbasedev->name); | |
575 | return ret; | |
576 | } | |
577 | ||
578 | trace_vfio_save_complete_precopy(vbasedev->name); | |
579 | return ret; | |
580 | } | |
581 | ||
d329f503 SL |
582 | static void vfio_save_state(QEMUFile *f, void *opaque) |
583 | { | |
584 | VFIODevice *vbasedev = opaque; | |
585 | int ret; | |
586 | ||
587 | ret = vfio_save_device_config_state(f, opaque); | |
588 | if (ret) { | |
589 | error_report("%s: Failed to save device config space", | |
590 | vbasedev->name); | |
591 | qemu_file_set_error(f, ret); | |
592 | } | |
593 | } | |
594 | ||
3336d217 KW |
595 | static int vfio_load_setup(QEMUFile *f, void *opaque) |
596 | { | |
597 | VFIODevice *vbasedev = opaque; | |
598 | VFIOMigration *migration = vbasedev->migration; | |
599 | int ret = 0; | |
600 | ||
601 | if (migration->region.mmaps) { | |
602 | ret = vfio_region_mmap(&migration->region); | |
603 | if (ret) { | |
604 | error_report("%s: Failed to mmap VFIO migration region %d: %s", | |
605 | vbasedev->name, migration->region.nr, | |
606 | strerror(-ret)); | |
607 | error_report("%s: Falling back to slow path", vbasedev->name); | |
608 | } | |
609 | } | |
610 | ||
611 | ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, | |
e4082063 | 612 | VFIO_DEVICE_STATE_V1_RESUMING); |
3336d217 KW |
613 | if (ret) { |
614 | error_report("%s: Failed to set state RESUMING", vbasedev->name); | |
615 | if (migration->region.mmaps) { | |
616 | vfio_region_unmap(&migration->region); | |
617 | } | |
618 | } | |
619 | return ret; | |
620 | } | |
621 | ||
622 | static int vfio_load_cleanup(void *opaque) | |
623 | { | |
624 | VFIODevice *vbasedev = opaque; | |
625 | ||
626 | vfio_migration_cleanup(vbasedev); | |
627 | trace_vfio_load_cleanup(vbasedev->name); | |
628 | return 0; | |
629 | } | |
630 | ||
631 | static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) | |
632 | { | |
633 | VFIODevice *vbasedev = opaque; | |
634 | int ret = 0; | |
635 | uint64_t data; | |
636 | ||
637 | data = qemu_get_be64(f); | |
638 | while (data != VFIO_MIG_FLAG_END_OF_STATE) { | |
639 | ||
640 | trace_vfio_load_state(vbasedev->name, data); | |
641 | ||
642 | switch (data) { | |
643 | case VFIO_MIG_FLAG_DEV_CONFIG_STATE: | |
644 | { | |
d329f503 | 645 | return vfio_load_device_config_state(f, opaque); |
3336d217 KW |
646 | } |
647 | case VFIO_MIG_FLAG_DEV_SETUP_STATE: | |
648 | { | |
649 | data = qemu_get_be64(f); | |
650 | if (data == VFIO_MIG_FLAG_END_OF_STATE) { | |
651 | return ret; | |
652 | } else { | |
653 | error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, | |
654 | vbasedev->name, data); | |
655 | return -EINVAL; | |
656 | } | |
657 | break; | |
658 | } | |
659 | case VFIO_MIG_FLAG_DEV_DATA_STATE: | |
660 | { | |
661 | uint64_t data_size = qemu_get_be64(f); | |
662 | ||
663 | if (data_size) { | |
664 | ret = vfio_load_buffer(f, vbasedev, data_size); | |
665 | if (ret < 0) { | |
666 | return ret; | |
667 | } | |
668 | } | |
669 | break; | |
670 | } | |
671 | default: | |
672 | error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); | |
673 | return -EINVAL; | |
674 | } | |
675 | ||
676 | data = qemu_get_be64(f); | |
677 | ret = qemu_file_get_error(f); | |
678 | if (ret) { | |
679 | return ret; | |
680 | } | |
681 | } | |
682 | return ret; | |
683 | } | |
684 | ||
7c2f5f75 KW |
685 | static SaveVMHandlers savevm_vfio_handlers = { |
686 | .save_setup = vfio_save_setup, | |
687 | .save_cleanup = vfio_save_cleanup, | |
1bc3c535 KW |
688 | .save_live_pending = vfio_save_pending, |
689 | .save_live_iterate = vfio_save_iterate, | |
690 | .save_live_complete_precopy = vfio_save_complete_precopy, | |
d329f503 | 691 | .save_state = vfio_save_state, |
3336d217 KW |
692 | .load_setup = vfio_load_setup, |
693 | .load_cleanup = vfio_load_cleanup, | |
694 | .load_state = vfio_load_state, | |
7c2f5f75 KW |
695 | }; |
696 | ||
697 | /* ---------------------------------------------------------------------- */ | |
698 | ||
538f0497 | 699 | static void vfio_vmstate_change(void *opaque, bool running, RunState state) |
02a7e71b KW |
700 | { |
701 | VFIODevice *vbasedev = opaque; | |
702 | VFIOMigration *migration = vbasedev->migration; | |
703 | uint32_t value, mask; | |
704 | int ret; | |
705 | ||
706 | if (vbasedev->migration->vm_running == running) { | |
707 | return; | |
708 | } | |
709 | ||
710 | if (running) { | |
711 | /* | |
712 | * Here device state can have one of _SAVING, _RESUMING or _STOP bit. | |
713 | * Transition from _SAVING to _RUNNING can happen if there is migration | |
714 | * failure, in that case clear _SAVING bit. | |
715 | * Transition from _RESUMING to _RUNNING occurs during resuming | |
716 | * phase, in that case clear _RESUMING bit. | |
717 | * In both the above cases, set _RUNNING bit. | |
718 | */ | |
719 | mask = ~VFIO_DEVICE_STATE_MASK; | |
e4082063 | 720 | value = VFIO_DEVICE_STATE_V1_RUNNING; |
02a7e71b KW |
721 | } else { |
722 | /* | |
723 | * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset | |
724 | * _RUNNING bit | |
725 | */ | |
e4082063 | 726 | mask = ~VFIO_DEVICE_STATE_V1_RUNNING; |
d742d064 KW |
727 | |
728 | /* | |
729 | * When VM state transition to stop for savevm command, device should | |
730 | * start saving data. | |
731 | */ | |
732 | if (state == RUN_STATE_SAVE_VM) { | |
e4082063 | 733 | value = VFIO_DEVICE_STATE_V1_SAVING; |
d742d064 KW |
734 | } else { |
735 | value = 0; | |
736 | } | |
02a7e71b KW |
737 | } |
738 | ||
739 | ret = vfio_migration_set_state(vbasedev, mask, value); | |
740 | if (ret) { | |
741 | /* | |
742 | * Migration should be aborted in this case, but vm_state_notify() | |
743 | * currently does not support reporting failures. | |
744 | */ | |
745 | error_report("%s: Failed to set device state 0x%x", vbasedev->name, | |
746 | (migration->device_state & mask) | value); | |
747 | qemu_file_set_error(migrate_get_current()->to_dst_file, ret); | |
748 | } | |
749 | vbasedev->migration->vm_running = running; | |
750 | trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), | |
751 | (migration->device_state & mask) | value); | |
752 | } | |
a9e271ec | 753 | |
050c588c KW |
754 | static void vfio_migration_state_notifier(Notifier *notifier, void *data) |
755 | { | |
756 | MigrationState *s = data; | |
757 | VFIOMigration *migration = container_of(notifier, VFIOMigration, | |
758 | migration_state); | |
759 | VFIODevice *vbasedev = migration->vbasedev; | |
760 | int ret; | |
761 | ||
762 | trace_vfio_migration_state_notifier(vbasedev->name, | |
763 | MigrationStatus_str(s->state)); | |
764 | ||
765 | switch (s->state) { | |
766 | case MIGRATION_STATUS_CANCELLING: | |
767 | case MIGRATION_STATUS_CANCELLED: | |
768 | case MIGRATION_STATUS_FAILED: | |
3710586c | 769 | bytes_transferred = 0; |
050c588c | 770 | ret = vfio_migration_set_state(vbasedev, |
e4082063 AW |
771 | ~(VFIO_DEVICE_STATE_V1_SAVING | |
772 | VFIO_DEVICE_STATE_V1_RESUMING), | |
773 | VFIO_DEVICE_STATE_V1_RUNNING); | |
050c588c KW |
774 | if (ret) { |
775 | error_report("%s: Failed to set state RUNNING", vbasedev->name); | |
776 | } | |
777 | } | |
778 | } | |
779 | ||
a9e271ec KW |
780 | static void vfio_migration_exit(VFIODevice *vbasedev) |
781 | { | |
782 | VFIOMigration *migration = vbasedev->migration; | |
783 | ||
784 | vfio_region_exit(&migration->region); | |
785 | vfio_region_finalize(&migration->region); | |
786 | g_free(vbasedev->migration); | |
787 | vbasedev->migration = NULL; | |
788 | } | |
789 | ||
790 | static int vfio_migration_init(VFIODevice *vbasedev, | |
791 | struct vfio_region_info *info) | |
792 | { | |
793 | int ret; | |
794 | Object *obj; | |
02a7e71b | 795 | VFIOMigration *migration; |
7c2f5f75 KW |
796 | char id[256] = ""; |
797 | g_autofree char *path = NULL, *oid = NULL; | |
a9e271ec KW |
798 | |
799 | if (!vbasedev->ops->vfio_get_object) { | |
800 | return -EINVAL; | |
801 | } | |
802 | ||
803 | obj = vbasedev->ops->vfio_get_object(vbasedev); | |
804 | if (!obj) { | |
805 | return -EINVAL; | |
806 | } | |
807 | ||
808 | vbasedev->migration = g_new0(VFIOMigration, 1); | |
809 | ||
810 | ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, | |
811 | info->index, "migration"); | |
812 | if (ret) { | |
813 | error_report("%s: Failed to setup VFIO migration region %d: %s", | |
814 | vbasedev->name, info->index, strerror(-ret)); | |
815 | goto err; | |
816 | } | |
817 | ||
818 | if (!vbasedev->migration->region.size) { | |
819 | error_report("%s: Invalid zero-sized VFIO migration region %d", | |
820 | vbasedev->name, info->index); | |
821 | ret = -EINVAL; | |
822 | goto err; | |
823 | } | |
02a7e71b KW |
824 | |
825 | migration = vbasedev->migration; | |
050c588c | 826 | migration->vbasedev = vbasedev; |
7c2f5f75 KW |
827 | |
828 | oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); | |
829 | if (oid) { | |
830 | path = g_strdup_printf("%s/vfio", oid); | |
831 | } else { | |
832 | path = g_strdup("vfio"); | |
833 | } | |
834 | strpadcpy(id, sizeof(id), path, '\0'); | |
835 | ||
836 | register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, | |
837 | vbasedev); | |
838 | ||
8ce1ff99 SL |
839 | migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, |
840 | vfio_vmstate_change, | |
02a7e71b | 841 | vbasedev); |
050c588c KW |
842 | migration->migration_state.notify = vfio_migration_state_notifier; |
843 | add_migration_state_change_notifier(&migration->migration_state); | |
a9e271ec KW |
844 | return 0; |
845 | ||
846 | err: | |
847 | vfio_migration_exit(vbasedev); | |
848 | return ret; | |
849 | } | |
850 | ||
851 | /* ---------------------------------------------------------------------- */ | |
852 | ||
3710586c KW |
853 | int64_t vfio_mig_bytes_transferred(void) |
854 | { | |
855 | return bytes_transferred; | |
856 | } | |
857 | ||
a9e271ec KW |
858 | int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) |
859 | { | |
87ea529c | 860 | VFIOContainer *container = vbasedev->group->container; |
a9e271ec | 861 | struct vfio_region_info *info = NULL; |
87ea529c KW |
862 | int ret = -ENOTSUP; |
863 | ||
cf254988 | 864 | if (!vbasedev->enable_migration || !container->dirty_pages_supported) { |
87ea529c KW |
865 | goto add_blocker; |
866 | } | |
a9e271ec | 867 | |
e4082063 AW |
868 | ret = vfio_get_dev_region_info(vbasedev, |
869 | VFIO_REGION_TYPE_MIGRATION_DEPRECATED, | |
870 | VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, | |
871 | &info); | |
a9e271ec KW |
872 | if (ret) { |
873 | goto add_blocker; | |
874 | } | |
875 | ||
876 | ret = vfio_migration_init(vbasedev, info); | |
877 | if (ret) { | |
878 | goto add_blocker; | |
879 | } | |
880 | ||
a9e271ec | 881 | trace_vfio_migration_probe(vbasedev->name, info->index); |
e408aeef | 882 | g_free(info); |
a9e271ec KW |
883 | return 0; |
884 | ||
885 | add_blocker: | |
886 | error_setg(&vbasedev->migration_blocker, | |
887 | "VFIO device doesn't support migration"); | |
888 | g_free(info); | |
889 | ||
eb24a23e MA |
890 | ret = migrate_add_blocker(vbasedev->migration_blocker, errp); |
891 | if (ret < 0) { | |
a9e271ec KW |
892 | error_free(vbasedev->migration_blocker); |
893 | vbasedev->migration_blocker = NULL; | |
894 | } | |
895 | return ret; | |
896 | } | |
897 | ||
898 | void vfio_migration_finalize(VFIODevice *vbasedev) | |
899 | { | |
900 | if (vbasedev->migration) { | |
02a7e71b KW |
901 | VFIOMigration *migration = vbasedev->migration; |
902 | ||
050c588c | 903 | remove_migration_state_change_notifier(&migration->migration_state); |
02a7e71b | 904 | qemu_del_vm_change_state_handler(migration->vm_state); |
22fca190 | 905 | unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); |
a9e271ec KW |
906 | vfio_migration_exit(vbasedev); |
907 | } | |
908 | ||
909 | if (vbasedev->migration_blocker) { | |
910 | migrate_del_blocker(vbasedev->migration_blocker); | |
911 | error_free(vbasedev->migration_blocker); | |
912 | vbasedev->migration_blocker = NULL; | |
913 | } | |
914 | } |