]>
Commit | Line | Data |
---|---|---|
8f9a9259 JR |
1 | /** |
2 | * QEMU vfio-user-server server object | |
3 | * | |
4 | * Copyright © 2022 Oracle and/or its affiliates. | |
5 | * | |
6 | * This work is licensed under the terms of the GNU GPL-v2, version 2 or later. | |
7 | * | |
8 | * See the COPYING file in the top-level directory. | |
9 | * | |
10 | */ | |
11 | ||
12 | /** | |
13 | * Usage: add options: | |
14 | * -machine x-remote,vfio-user=on,auto-shutdown=on | |
15 | * -device <PCI-device>,id=<pci-dev-id> | |
16 | * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>, | |
17 | * device=<pci-dev-id> | |
18 | * | |
19 | * Note that x-vfio-user-server object must be used with x-remote machine only. | |
20 | * This server could only support PCI devices for now. | |
21 | * | |
22 | * type - SocketAddress type - presently "unix" alone is supported. Required | |
23 | * option | |
24 | * | |
25 | * path - named unix socket, it will be created by the server. It is | |
26 | * a required option | |
27 | * | |
28 | * device - id of a device on the server, a required option. PCI devices | |
29 | * alone are supported presently. | |
9fb3fba1 JR |
30 | * |
31 | * notes - x-vfio-user-server could block IO and monitor during the | |
32 | * initialization phase. | |
8f9a9259 JR |
33 | */ |
34 | ||
35 | #include "qemu/osdep.h" | |
36 | ||
37 | #include "qom/object.h" | |
38 | #include "qom/object_interfaces.h" | |
39 | #include "qemu/error-report.h" | |
40 | #include "trace.h" | |
41 | #include "sysemu/runstate.h" | |
42 | #include "hw/boards.h" | |
43 | #include "hw/remote/machine.h" | |
44 | #include "qapi/error.h" | |
45 | #include "qapi/qapi-visit-sockets.h" | |
9fb3fba1 | 46 | #include "qapi/qapi-events-misc.h" |
87f7249f | 47 | #include "qemu/notify.h" |
9fb3fba1 | 48 | #include "qemu/thread.h" |
90072f29 | 49 | #include "qemu/main-loop.h" |
87f7249f JR |
50 | #include "sysemu/sysemu.h" |
51 | #include "libvfio-user.h" | |
a6e8d6d9 JR |
52 | #include "hw/qdev-core.h" |
53 | #include "hw/pci/pci.h" | |
9fb3fba1 | 54 | #include "qemu/timer.h" |
3123f93d | 55 | #include "exec/memory.h" |
08cf3dc6 JR |
56 | #include "hw/pci/msi.h" |
57 | #include "hw/pci/msix.h" | |
58 | #include "hw/remote/vfio-user-obj.h" | |
8f9a9259 JR |
59 | |
60 | #define TYPE_VFU_OBJECT "x-vfio-user-server" | |
61 | OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) | |
62 | ||
63 | /** | |
64 | * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown | |
65 | * is set, it aborts the machine on error. Otherwise, it logs an | |
66 | * error message without aborting. | |
67 | */ | |
68 | #define VFU_OBJECT_ERROR(o, fmt, ...) \ | |
69 | { \ | |
70 | if (vfu_object_auto_shutdown()) { \ | |
71 | error_setg(&error_abort, (fmt), ## __VA_ARGS__); \ | |
72 | } else { \ | |
73 | error_report((fmt), ## __VA_ARGS__); \ | |
74 | } \ | |
75 | } \ | |
76 | ||
77 | struct VfuObjectClass { | |
78 | ObjectClass parent_class; | |
79 | ||
80 | unsigned int nr_devs; | |
81 | }; | |
82 | ||
83 | struct VfuObject { | |
84 | /* private */ | |
85 | Object parent; | |
86 | ||
87 | SocketAddress *socket; | |
88 | ||
89 | char *device; | |
90 | ||
91 | Error *err; | |
87f7249f JR |
92 | |
93 | Notifier machine_done; | |
94 | ||
95 | vfu_ctx_t *vfu_ctx; | |
a6e8d6d9 JR |
96 | |
97 | PCIDevice *pci_dev; | |
98 | ||
99 | Error *unplug_blocker; | |
9fb3fba1 JR |
100 | |
101 | int vfu_poll_fd; | |
08cf3dc6 JR |
102 | |
103 | MSITriggerFunc *default_msi_trigger; | |
104 | MSIPrepareMessageFunc *default_msi_prepare_message; | |
105 | MSIxPrepareMessageFunc *default_msix_prepare_message; | |
8f9a9259 JR |
106 | }; |
107 | ||
87f7249f JR |
108 | static void vfu_object_init_ctx(VfuObject *o, Error **errp); |
109 | ||
8f9a9259 JR |
110 | static bool vfu_object_auto_shutdown(void) |
111 | { | |
112 | bool auto_shutdown = true; | |
113 | Error *local_err = NULL; | |
114 | ||
115 | if (!current_machine) { | |
116 | return auto_shutdown; | |
117 | } | |
118 | ||
119 | auto_shutdown = object_property_get_bool(OBJECT(current_machine), | |
120 | "auto-shutdown", | |
121 | &local_err); | |
122 | ||
123 | /* | |
124 | * local_err would be set if no such property exists - safe to ignore. | |
125 | * Unlikely scenario as auto-shutdown is always defined for | |
126 | * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with | |
127 | * TYPE_REMOTE_MACHINE | |
128 | */ | |
129 | if (local_err) { | |
130 | auto_shutdown = true; | |
131 | error_free(local_err); | |
132 | } | |
133 | ||
134 | return auto_shutdown; | |
135 | } | |
136 | ||
137 | static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, | |
138 | void *opaque, Error **errp) | |
139 | { | |
140 | VfuObject *o = VFU_OBJECT(obj); | |
141 | ||
87f7249f JR |
142 | if (o->vfu_ctx) { |
143 | error_setg(errp, "vfu: Unable to set socket property - server busy"); | |
144 | return; | |
145 | } | |
146 | ||
8f9a9259 JR |
147 | qapi_free_SocketAddress(o->socket); |
148 | ||
149 | o->socket = NULL; | |
150 | ||
151 | visit_type_SocketAddress(v, name, &o->socket, errp); | |
152 | ||
153 | if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { | |
154 | error_setg(errp, "vfu: Unsupported socket type - %s", | |
155 | SocketAddressType_str(o->socket->type)); | |
156 | qapi_free_SocketAddress(o->socket); | |
157 | o->socket = NULL; | |
158 | return; | |
159 | } | |
160 | ||
161 | trace_vfu_prop("socket", o->socket->u.q_unix.path); | |
87f7249f JR |
162 | |
163 | vfu_object_init_ctx(o, errp); | |
8f9a9259 JR |
164 | } |
165 | ||
166 | static void vfu_object_set_device(Object *obj, const char *str, Error **errp) | |
167 | { | |
168 | VfuObject *o = VFU_OBJECT(obj); | |
169 | ||
87f7249f JR |
170 | if (o->vfu_ctx) { |
171 | error_setg(errp, "vfu: Unable to set device property - server busy"); | |
172 | return; | |
173 | } | |
174 | ||
8f9a9259 JR |
175 | g_free(o->device); |
176 | ||
177 | o->device = g_strdup(str); | |
178 | ||
179 | trace_vfu_prop("device", str); | |
87f7249f JR |
180 | |
181 | vfu_object_init_ctx(o, errp); | |
182 | } | |
183 | ||
9fb3fba1 JR |
184 | static void vfu_object_ctx_run(void *opaque) |
185 | { | |
186 | VfuObject *o = opaque; | |
187 | const char *vfu_id; | |
188 | char *vfu_path, *pci_dev_path; | |
189 | int ret = -1; | |
190 | ||
191 | while (ret != 0) { | |
192 | ret = vfu_run_ctx(o->vfu_ctx); | |
193 | if (ret < 0) { | |
194 | if (errno == EINTR) { | |
195 | continue; | |
196 | } else if (errno == ENOTCONN) { | |
197 | vfu_id = object_get_canonical_path_component(OBJECT(o)); | |
198 | vfu_path = object_get_canonical_path(OBJECT(o)); | |
199 | g_assert(o->pci_dev); | |
200 | pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev)); | |
201 | /* o->device is a required property and is non-NULL here */ | |
202 | g_assert(o->device); | |
203 | qapi_event_send_vfu_client_hangup(vfu_id, vfu_path, | |
204 | o->device, pci_dev_path); | |
205 | qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); | |
206 | o->vfu_poll_fd = -1; | |
207 | object_unparent(OBJECT(o)); | |
208 | g_free(vfu_path); | |
209 | g_free(pci_dev_path); | |
210 | break; | |
211 | } else { | |
212 | VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s", | |
213 | o->device, strerror(errno)); | |
214 | break; | |
215 | } | |
216 | } | |
217 | } | |
218 | } | |
219 | ||
220 | static void vfu_object_attach_ctx(void *opaque) | |
221 | { | |
222 | VfuObject *o = opaque; | |
223 | GPollFD pfds[1]; | |
224 | int ret; | |
225 | ||
226 | qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); | |
227 | ||
228 | pfds[0].fd = o->vfu_poll_fd; | |
229 | pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; | |
230 | ||
231 | retry_attach: | |
232 | ret = vfu_attach_ctx(o->vfu_ctx); | |
233 | if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { | |
234 | /** | |
235 | * vfu_object_attach_ctx can block QEMU's main loop | |
236 | * during attach - the monitor and other IO | |
237 | * could be unresponsive during this time. | |
238 | */ | |
239 | (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); | |
240 | goto retry_attach; | |
241 | } else if (ret < 0) { | |
242 | VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s", | |
243 | o->device, strerror(errno)); | |
244 | return; | |
245 | } | |
246 | ||
247 | o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); | |
248 | if (o->vfu_poll_fd < 0) { | |
249 | VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device); | |
250 | return; | |
251 | } | |
252 | ||
253 | qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o); | |
254 | } | |
255 | ||
90072f29 JR |
256 | static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, |
257 | size_t count, loff_t offset, | |
258 | const bool is_write) | |
259 | { | |
260 | VfuObject *o = vfu_get_private(vfu_ctx); | |
261 | uint32_t pci_access_width = sizeof(uint32_t); | |
262 | size_t bytes = count; | |
263 | uint32_t val = 0; | |
264 | char *ptr = buf; | |
265 | int len; | |
266 | ||
267 | /* | |
268 | * Writes to the BAR registers would trigger an update to the | |
269 | * global Memory and IO AddressSpaces. But the remote device | |
270 | * never uses the global AddressSpaces, therefore overlapping | |
271 | * memory regions are not a problem | |
272 | */ | |
273 | while (bytes > 0) { | |
274 | len = (bytes > pci_access_width) ? pci_access_width : bytes; | |
275 | if (is_write) { | |
276 | memcpy(&val, ptr, len); | |
277 | pci_host_config_write_common(o->pci_dev, offset, | |
278 | pci_config_size(o->pci_dev), | |
279 | val, len); | |
280 | trace_vfu_cfg_write(offset, val); | |
281 | } else { | |
282 | val = pci_host_config_read_common(o->pci_dev, offset, | |
283 | pci_config_size(o->pci_dev), len); | |
284 | memcpy(ptr, &val, len); | |
285 | trace_vfu_cfg_read(offset, val); | |
286 | } | |
287 | offset += len; | |
288 | ptr += len; | |
289 | bytes -= len; | |
290 | } | |
291 | ||
292 | return count; | |
293 | } | |
294 | ||
15ccf9be JR |
295 | static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) |
296 | { | |
297 | VfuObject *o = vfu_get_private(vfu_ctx); | |
298 | AddressSpace *dma_as = NULL; | |
299 | MemoryRegion *subregion = NULL; | |
300 | g_autofree char *name = NULL; | |
301 | struct iovec *iov = &info->iova; | |
302 | ||
303 | if (!info->vaddr) { | |
304 | return; | |
305 | } | |
306 | ||
307 | name = g_strdup_printf("mem-%s-%"PRIx64"", o->device, | |
308 | (uint64_t)info->vaddr); | |
309 | ||
310 | subregion = g_new0(MemoryRegion, 1); | |
311 | ||
312 | memory_region_init_ram_ptr(subregion, NULL, name, | |
313 | iov->iov_len, info->vaddr); | |
314 | ||
315 | dma_as = pci_device_iommu_address_space(o->pci_dev); | |
316 | ||
317 | memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion); | |
318 | ||
319 | trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len); | |
320 | } | |
321 | ||
322 | static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) | |
323 | { | |
324 | VfuObject *o = vfu_get_private(vfu_ctx); | |
325 | AddressSpace *dma_as = NULL; | |
326 | MemoryRegion *mr = NULL; | |
327 | ram_addr_t offset; | |
328 | ||
329 | mr = memory_region_from_host(info->vaddr, &offset); | |
330 | if (!mr) { | |
331 | return; | |
332 | } | |
333 | ||
334 | dma_as = pci_device_iommu_address_space(o->pci_dev); | |
335 | ||
336 | memory_region_del_subregion(dma_as->root, mr); | |
337 | ||
338 | object_unparent((OBJECT(mr))); | |
339 | ||
340 | trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); | |
341 | } | |
342 | ||
3123f93d JR |
343 | static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset, |
344 | hwaddr size, const bool is_write) | |
345 | { | |
346 | uint8_t *ptr = buf; | |
347 | bool release_lock = false; | |
348 | uint8_t *ram_ptr = NULL; | |
349 | MemTxResult result; | |
350 | int access_size; | |
351 | uint64_t val; | |
352 | ||
353 | if (memory_access_is_direct(mr, is_write)) { | |
354 | /** | |
355 | * Some devices expose a PCI expansion ROM, which could be buffer | |
356 | * based as compared to other regions which are primarily based on | |
357 | * MemoryRegionOps. memory_region_find() would already check | |
358 | * for buffer overflow, we don't need to repeat it here. | |
359 | */ | |
360 | ram_ptr = memory_region_get_ram_ptr(mr); | |
361 | ||
362 | if (is_write) { | |
363 | memcpy((ram_ptr + offset), buf, size); | |
364 | } else { | |
365 | memcpy(buf, (ram_ptr + offset), size); | |
366 | } | |
367 | ||
368 | return 0; | |
369 | } | |
370 | ||
371 | while (size) { | |
372 | /** | |
373 | * The read/write logic used below is similar to the ones in | |
374 | * flatview_read/write_continue() | |
375 | */ | |
376 | release_lock = prepare_mmio_access(mr); | |
377 | ||
378 | access_size = memory_access_size(mr, size, offset); | |
379 | ||
380 | if (is_write) { | |
381 | val = ldn_he_p(ptr, access_size); | |
382 | ||
383 | result = memory_region_dispatch_write(mr, offset, val, | |
384 | size_memop(access_size), | |
385 | MEMTXATTRS_UNSPECIFIED); | |
386 | } else { | |
387 | result = memory_region_dispatch_read(mr, offset, &val, | |
388 | size_memop(access_size), | |
389 | MEMTXATTRS_UNSPECIFIED); | |
390 | ||
391 | stn_he_p(ptr, access_size, val); | |
392 | } | |
393 | ||
394 | if (release_lock) { | |
395 | qemu_mutex_unlock_iothread(); | |
396 | release_lock = false; | |
397 | } | |
398 | ||
399 | if (result != MEMTX_OK) { | |
400 | return -1; | |
401 | } | |
402 | ||
403 | size -= access_size; | |
404 | ptr += access_size; | |
405 | offset += access_size; | |
406 | } | |
407 | ||
408 | return 0; | |
409 | } | |
410 | ||
411 | static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar, | |
412 | hwaddr bar_offset, char * const buf, | |
413 | hwaddr len, const bool is_write) | |
414 | { | |
415 | MemoryRegionSection section = { 0 }; | |
416 | uint8_t *ptr = (uint8_t *)buf; | |
417 | MemoryRegion *section_mr = NULL; | |
418 | uint64_t section_size; | |
419 | hwaddr section_offset; | |
420 | hwaddr size = 0; | |
421 | ||
422 | while (len) { | |
423 | section = memory_region_find(pci_dev->io_regions[pci_bar].memory, | |
424 | bar_offset, len); | |
425 | ||
426 | if (!section.mr) { | |
427 | warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset); | |
428 | return size; | |
429 | } | |
430 | ||
431 | section_mr = section.mr; | |
432 | section_offset = section.offset_within_region; | |
433 | section_size = int128_get64(section.size); | |
434 | ||
435 | if (is_write && section_mr->readonly) { | |
436 | warn_report("vfu: attempting to write to readonly region in " | |
437 | "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]", | |
438 | pci_bar, bar_offset, | |
439 | (bar_offset + section_size)); | |
440 | memory_region_unref(section_mr); | |
441 | return size; | |
442 | } | |
443 | ||
444 | if (vfu_object_mr_rw(section_mr, ptr, section_offset, | |
445 | section_size, is_write)) { | |
446 | warn_report("vfu: failed to %s " | |
447 | "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d", | |
448 | is_write ? "write to" : "read from", bar_offset, | |
449 | (bar_offset + section_size), pci_bar); | |
450 | memory_region_unref(section_mr); | |
451 | return size; | |
452 | } | |
453 | ||
454 | size += section_size; | |
455 | bar_offset += section_size; | |
456 | ptr += section_size; | |
457 | len -= section_size; | |
458 | ||
459 | memory_region_unref(section_mr); | |
460 | } | |
461 | ||
462 | return size; | |
463 | } | |
464 | ||
465 | /** | |
466 | * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs. | |
467 | * | |
468 | * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would | |
469 | * define vfu_object_bar2_handler | |
470 | */ | |
471 | #define VFU_OBJECT_BAR_HANDLER(BAR_NO) \ | |
472 | static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \ | |
473 | char * const buf, size_t count, \ | |
474 | loff_t offset, const bool is_write) \ | |
475 | { \ | |
476 | VfuObject *o = vfu_get_private(vfu_ctx); \ | |
477 | PCIDevice *pci_dev = o->pci_dev; \ | |
478 | \ | |
479 | return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \ | |
480 | buf, count, is_write); \ | |
481 | } \ | |
482 | ||
483 | VFU_OBJECT_BAR_HANDLER(0) | |
484 | VFU_OBJECT_BAR_HANDLER(1) | |
485 | VFU_OBJECT_BAR_HANDLER(2) | |
486 | VFU_OBJECT_BAR_HANDLER(3) | |
487 | VFU_OBJECT_BAR_HANDLER(4) | |
488 | VFU_OBJECT_BAR_HANDLER(5) | |
489 | VFU_OBJECT_BAR_HANDLER(6) | |
490 | ||
491 | static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = { | |
492 | &vfu_object_bar0_handler, | |
493 | &vfu_object_bar1_handler, | |
494 | &vfu_object_bar2_handler, | |
495 | &vfu_object_bar3_handler, | |
496 | &vfu_object_bar4_handler, | |
497 | &vfu_object_bar5_handler, | |
498 | &vfu_object_bar6_handler, | |
499 | }; | |
500 | ||
501 | /** | |
502 | * vfu_object_register_bars - Identify active BAR regions of pdev and setup | |
503 | * callbacks to handle read/write accesses | |
504 | */ | |
505 | static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) | |
506 | { | |
507 | int flags = VFU_REGION_FLAG_RW; | |
508 | int i; | |
509 | ||
510 | for (i = 0; i < PCI_NUM_REGIONS; i++) { | |
511 | if (!pdev->io_regions[i].size) { | |
512 | continue; | |
513 | } | |
514 | ||
515 | if ((i == VFU_PCI_DEV_ROM_REGION_IDX) || | |
516 | pdev->io_regions[i].memory->readonly) { | |
517 | flags &= ~VFU_REGION_FLAG_WRITE; | |
518 | } | |
519 | ||
520 | vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i, | |
521 | (size_t)pdev->io_regions[i].size, | |
522 | vfu_object_bar_handlers[i], | |
523 | flags, NULL, 0, -1, 0); | |
524 | ||
525 | trace_vfu_bar_register(i, pdev->io_regions[i].addr, | |
526 | pdev->io_regions[i].size); | |
527 | } | |
528 | } | |
529 | ||
08cf3dc6 JR |
530 | static int vfu_object_map_irq(PCIDevice *pci_dev, int intx) |
531 | { | |
532 | int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), | |
533 | pci_dev->devfn); | |
534 | ||
535 | return pci_bdf; | |
536 | } | |
537 | ||
538 | static void vfu_object_set_irq(void *opaque, int pirq, int level) | |
539 | { | |
540 | PCIBus *pci_bus = opaque; | |
541 | PCIDevice *pci_dev = NULL; | |
542 | vfu_ctx_t *vfu_ctx = NULL; | |
543 | int pci_bus_num, devfn; | |
544 | ||
545 | if (level) { | |
546 | pci_bus_num = PCI_BUS_NUM(pirq); | |
547 | devfn = PCI_BDF_TO_DEVFN(pirq); | |
548 | ||
549 | /* | |
550 | * pci_find_device() performs at O(1) if the device is attached | |
551 | * to the root PCI bus. Whereas, if the device is attached to a | |
552 | * secondary PCI bus (such as when a root port is involved), | |
553 | * finding the parent PCI bus could take O(n) | |
554 | */ | |
555 | pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn); | |
556 | ||
557 | vfu_ctx = pci_dev->irq_opaque; | |
558 | ||
559 | g_assert(vfu_ctx); | |
560 | ||
561 | vfu_irq_trigger(vfu_ctx, 0); | |
562 | } | |
563 | } | |
564 | ||
565 | static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev, | |
566 | unsigned int vector) | |
567 | { | |
568 | MSIMessage msg; | |
569 | ||
570 | msg.address = 0; | |
571 | msg.data = vector; | |
572 | ||
573 | return msg; | |
574 | } | |
575 | ||
576 | static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg) | |
577 | { | |
578 | vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque; | |
579 | ||
580 | vfu_irq_trigger(vfu_ctx, msg.data); | |
581 | } | |
582 | ||
583 | static void vfu_object_setup_msi_cbs(VfuObject *o) | |
584 | { | |
585 | o->default_msi_trigger = o->pci_dev->msi_trigger; | |
586 | o->default_msi_prepare_message = o->pci_dev->msi_prepare_message; | |
587 | o->default_msix_prepare_message = o->pci_dev->msix_prepare_message; | |
588 | ||
589 | o->pci_dev->msi_trigger = vfu_object_msi_trigger; | |
590 | o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg; | |
591 | o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg; | |
592 | } | |
593 | ||
594 | static void vfu_object_restore_msi_cbs(VfuObject *o) | |
595 | { | |
596 | o->pci_dev->msi_trigger = o->default_msi_trigger; | |
597 | o->pci_dev->msi_prepare_message = o->default_msi_prepare_message; | |
598 | o->pci_dev->msix_prepare_message = o->default_msix_prepare_message; | |
599 | } | |
600 | ||
601 | static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, | |
602 | uint32_t count, bool mask) | |
603 | { | |
604 | VfuObject *o = vfu_get_private(vfu_ctx); | |
08cf3dc6 JR |
605 | uint32_t vector; |
606 | ||
607 | for (vector = start; vector < count; vector++) { | |
15377f6e | 608 | msix_set_mask(o->pci_dev, vector, mask); |
08cf3dc6 JR |
609 | } |
610 | } | |
611 | ||
612 | static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, | |
613 | uint32_t count, bool mask) | |
614 | { | |
615 | VfuObject *o = vfu_get_private(vfu_ctx); | |
616 | Error *err = NULL; | |
617 | uint32_t vector; | |
618 | ||
619 | for (vector = start; vector < count; vector++) { | |
620 | msi_set_mask(o->pci_dev, vector, mask, &err); | |
621 | if (err) { | |
622 | VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, | |
623 | error_get_pretty(err)); | |
624 | error_free(err); | |
625 | err = NULL; | |
626 | } | |
627 | } | |
628 | } | |
629 | ||
630 | static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev) | |
631 | { | |
632 | vfu_ctx_t *vfu_ctx = o->vfu_ctx; | |
633 | int ret; | |
634 | ||
635 | ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); | |
636 | if (ret < 0) { | |
637 | return ret; | |
638 | } | |
639 | ||
640 | if (msix_nr_vectors_allocated(pci_dev)) { | |
641 | ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, | |
642 | msix_nr_vectors_allocated(pci_dev)); | |
643 | vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ, | |
644 | &vfu_msix_irq_state); | |
645 | } else if (msi_nr_vectors_allocated(pci_dev)) { | |
646 | ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, | |
647 | msi_nr_vectors_allocated(pci_dev)); | |
648 | vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ, | |
649 | &vfu_msi_irq_state); | |
650 | } | |
651 | ||
652 | if (ret < 0) { | |
653 | return ret; | |
654 | } | |
655 | ||
656 | vfu_object_setup_msi_cbs(o); | |
657 | ||
658 | pci_dev->irq_opaque = vfu_ctx; | |
659 | ||
660 | return 0; | |
661 | } | |
662 | ||
663 | void vfu_object_set_bus_irq(PCIBus *pci_bus) | |
664 | { | |
665 | int bus_num = pci_bus_num(pci_bus); | |
666 | int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1); | |
667 | ||
f021f4e9 BB |
668 | pci_bus_irqs(pci_bus, vfu_object_set_irq, pci_bus, max_bdf); |
669 | pci_bus_map_irqs(pci_bus, vfu_object_map_irq); | |
08cf3dc6 JR |
670 | } |
671 | ||
78e27dfa JR |
672 | static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type) |
673 | { | |
674 | VfuObject *o = vfu_get_private(vfu_ctx); | |
675 | ||
676 | /* vfu_object_ctx_run() handles lost connection */ | |
677 | if (type == VFU_RESET_LOST_CONN) { | |
678 | return 0; | |
679 | } | |
680 | ||
dfa6ba6b | 681 | device_cold_reset(DEVICE(o->pci_dev)); |
78e27dfa JR |
682 | |
683 | return 0; | |
684 | } | |
685 | ||
87f7249f JR |
686 | /* |
687 | * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' | |
688 | * properties. It also depends on devices instantiated in QEMU. These | |
689 | * dependencies are not available during the instance_init phase of this | |
690 | * object's life-cycle. As such, the server is initialized after the | |
691 | * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT | |
692 | * when the machine is setup, and the dependencies are available. | |
693 | */ | |
694 | static void vfu_object_machine_done(Notifier *notifier, void *data) | |
695 | { | |
696 | VfuObject *o = container_of(notifier, VfuObject, machine_done); | |
697 | Error *err = NULL; | |
698 | ||
699 | vfu_object_init_ctx(o, &err); | |
700 | ||
701 | if (err) { | |
702 | error_propagate(&error_abort, err); | |
703 | } | |
704 | } | |
705 | ||
9fb3fba1 JR |
706 | /** |
707 | * vfu_object_init_ctx: Create and initialize libvfio-user context. Add | |
708 | * an unplug blocker for the associated PCI device. Setup a FD handler | |
709 | * to process incoming messages in the context's socket. | |
710 | * | |
711 | * The socket and device properties are mandatory, and this function | |
712 | * will not create the context without them - the setters for these | |
713 | * properties should call this function when the property is set. The | |
714 | * machine should also be ready when this function is invoked - it is | |
715 | * because QEMU objects are initialized before devices, and the | |
716 | * associated PCI device wouldn't be available at the object | |
717 | * initialization time. Until these conditions are satisfied, this | |
718 | * function would return early without performing any task. | |
719 | */ | |
87f7249f JR |
720 | static void vfu_object_init_ctx(VfuObject *o, Error **errp) |
721 | { | |
a6e8d6d9 JR |
722 | DeviceState *dev = NULL; |
723 | vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL; | |
724 | int ret; | |
87f7249f JR |
725 | |
726 | if (o->vfu_ctx || !o->socket || !o->device || | |
727 | !phase_check(PHASE_MACHINE_READY)) { | |
728 | return; | |
729 | } | |
730 | ||
731 | if (o->err) { | |
732 | error_propagate(errp, o->err); | |
733 | o->err = NULL; | |
734 | return; | |
735 | } | |
736 | ||
9fb3fba1 JR |
737 | o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, |
738 | LIBVFIO_USER_FLAG_ATTACH_NB, | |
87f7249f JR |
739 | o, VFU_DEV_TYPE_PCI); |
740 | if (o->vfu_ctx == NULL) { | |
741 | error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); | |
742 | return; | |
743 | } | |
a6e8d6d9 JR |
744 | |
745 | dev = qdev_find_recursive(sysbus_get_default(), o->device); | |
746 | if (dev == NULL) { | |
747 | error_setg(errp, "vfu: Device %s not found", o->device); | |
748 | goto fail; | |
749 | } | |
750 | ||
751 | if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { | |
752 | error_setg(errp, "vfu: %s not a PCI device", o->device); | |
753 | goto fail; | |
754 | } | |
755 | ||
756 | o->pci_dev = PCI_DEVICE(dev); | |
757 | ||
758 | object_ref(OBJECT(o->pci_dev)); | |
759 | ||
760 | if (pci_is_express(o->pci_dev)) { | |
761 | pci_type = VFU_PCI_TYPE_EXPRESS; | |
762 | } | |
763 | ||
764 | ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0); | |
765 | if (ret < 0) { | |
766 | error_setg(errp, | |
767 | "vfu: Failed to attach PCI device %s to context - %s", | |
768 | o->device, strerror(errno)); | |
769 | goto fail; | |
770 | } | |
771 | ||
772 | error_setg(&o->unplug_blocker, | |
773 | "vfu: %s for %s must be deleted before unplugging", | |
774 | TYPE_VFU_OBJECT, o->device); | |
775 | qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); | |
776 | ||
90072f29 JR |
777 | ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, |
778 | pci_config_size(o->pci_dev), &vfu_object_cfg_access, | |
779 | VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB, | |
780 | NULL, 0, -1, 0); | |
781 | if (ret < 0) { | |
782 | error_setg(errp, | |
783 | "vfu: Failed to setup config space handlers for %s- %s", | |
784 | o->device, strerror(errno)); | |
785 | goto fail; | |
786 | } | |
787 | ||
15ccf9be JR |
788 | ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister); |
789 | if (ret < 0) { | |
790 | error_setg(errp, "vfu: Failed to setup DMA handlers for %s", | |
791 | o->device); | |
792 | goto fail; | |
793 | } | |
794 | ||
3123f93d JR |
795 | vfu_object_register_bars(o->vfu_ctx, o->pci_dev); |
796 | ||
08cf3dc6 JR |
797 | ret = vfu_object_setup_irqs(o, o->pci_dev); |
798 | if (ret < 0) { | |
799 | error_setg(errp, "vfu: Failed to setup interrupts for %s", | |
800 | o->device); | |
801 | goto fail; | |
802 | } | |
803 | ||
78e27dfa JR |
804 | ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset); |
805 | if (ret < 0) { | |
806 | error_setg(errp, "vfu: Failed to setup reset callback"); | |
807 | goto fail; | |
808 | } | |
809 | ||
9fb3fba1 JR |
810 | ret = vfu_realize_ctx(o->vfu_ctx); |
811 | if (ret < 0) { | |
812 | error_setg(errp, "vfu: Failed to realize device %s- %s", | |
813 | o->device, strerror(errno)); | |
814 | goto fail; | |
815 | } | |
816 | ||
817 | o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); | |
818 | if (o->vfu_poll_fd < 0) { | |
819 | error_setg(errp, "vfu: Failed to get poll fd %s", o->device); | |
820 | goto fail; | |
821 | } | |
822 | ||
823 | qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o); | |
824 | ||
a6e8d6d9 JR |
825 | return; |
826 | ||
827 | fail: | |
828 | vfu_destroy_ctx(o->vfu_ctx); | |
829 | if (o->unplug_blocker && o->pci_dev) { | |
830 | qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); | |
831 | error_free(o->unplug_blocker); | |
832 | o->unplug_blocker = NULL; | |
833 | } | |
834 | if (o->pci_dev) { | |
08cf3dc6 JR |
835 | vfu_object_restore_msi_cbs(o); |
836 | o->pci_dev->irq_opaque = NULL; | |
a6e8d6d9 JR |
837 | object_unref(OBJECT(o->pci_dev)); |
838 | o->pci_dev = NULL; | |
839 | } | |
840 | o->vfu_ctx = NULL; | |
8f9a9259 JR |
841 | } |
842 | ||
843 | static void vfu_object_init(Object *obj) | |
844 | { | |
845 | VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); | |
846 | VfuObject *o = VFU_OBJECT(obj); | |
847 | ||
848 | k->nr_devs++; | |
849 | ||
850 | if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) { | |
851 | error_setg(&o->err, "vfu: %s only compatible with %s machine", | |
852 | TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); | |
853 | return; | |
854 | } | |
87f7249f JR |
855 | |
856 | if (!phase_check(PHASE_MACHINE_READY)) { | |
857 | o->machine_done.notify = vfu_object_machine_done; | |
858 | qemu_add_machine_init_done_notifier(&o->machine_done); | |
859 | } | |
860 | ||
9fb3fba1 | 861 | o->vfu_poll_fd = -1; |
8f9a9259 JR |
862 | } |
863 | ||
864 | static void vfu_object_finalize(Object *obj) | |
865 | { | |
866 | VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); | |
867 | VfuObject *o = VFU_OBJECT(obj); | |
868 | ||
869 | k->nr_devs--; | |
870 | ||
871 | qapi_free_SocketAddress(o->socket); | |
872 | ||
873 | o->socket = NULL; | |
874 | ||
9fb3fba1 JR |
875 | if (o->vfu_poll_fd != -1) { |
876 | qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); | |
877 | o->vfu_poll_fd = -1; | |
878 | } | |
879 | ||
87f7249f JR |
880 | if (o->vfu_ctx) { |
881 | vfu_destroy_ctx(o->vfu_ctx); | |
882 | o->vfu_ctx = NULL; | |
883 | } | |
884 | ||
8f9a9259 JR |
885 | g_free(o->device); |
886 | ||
887 | o->device = NULL; | |
888 | ||
a6e8d6d9 JR |
889 | if (o->unplug_blocker && o->pci_dev) { |
890 | qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); | |
891 | error_free(o->unplug_blocker); | |
892 | o->unplug_blocker = NULL; | |
893 | } | |
894 | ||
895 | if (o->pci_dev) { | |
08cf3dc6 JR |
896 | vfu_object_restore_msi_cbs(o); |
897 | o->pci_dev->irq_opaque = NULL; | |
a6e8d6d9 JR |
898 | object_unref(OBJECT(o->pci_dev)); |
899 | o->pci_dev = NULL; | |
900 | } | |
901 | ||
8f9a9259 JR |
902 | if (!k->nr_devs && vfu_object_auto_shutdown()) { |
903 | qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); | |
904 | } | |
87f7249f JR |
905 | |
906 | if (o->machine_done.notify) { | |
907 | qemu_remove_machine_init_done_notifier(&o->machine_done); | |
908 | o->machine_done.notify = NULL; | |
909 | } | |
8f9a9259 JR |
910 | } |
911 | ||
912 | static void vfu_object_class_init(ObjectClass *klass, void *data) | |
913 | { | |
914 | VfuObjectClass *k = VFU_OBJECT_CLASS(klass); | |
915 | ||
916 | k->nr_devs = 0; | |
917 | ||
918 | object_class_property_add(klass, "socket", "SocketAddress", NULL, | |
919 | vfu_object_set_socket, NULL, NULL); | |
920 | object_class_property_set_description(klass, "socket", | |
921 | "SocketAddress " | |
922 | "(ex: type=unix,path=/tmp/sock). " | |
923 | "Only UNIX is presently supported"); | |
924 | object_class_property_add_str(klass, "device", NULL, | |
925 | vfu_object_set_device); | |
926 | object_class_property_set_description(klass, "device", | |
927 | "device ID - only PCI devices " | |
928 | "are presently supported"); | |
929 | } | |
930 | ||
931 | static const TypeInfo vfu_object_info = { | |
932 | .name = TYPE_VFU_OBJECT, | |
933 | .parent = TYPE_OBJECT, | |
934 | .instance_size = sizeof(VfuObject), | |
935 | .instance_init = vfu_object_init, | |
936 | .instance_finalize = vfu_object_finalize, | |
937 | .class_size = sizeof(VfuObjectClass), | |
938 | .class_init = vfu_object_class_init, | |
939 | .interfaces = (InterfaceInfo[]) { | |
940 | { TYPE_USER_CREATABLE }, | |
941 | { } | |
942 | } | |
943 | }; | |
944 | ||
945 | static void vfu_register_types(void) | |
946 | { | |
947 | type_register_static(&vfu_object_info); | |
948 | } | |
949 | ||
950 | type_init(vfu_register_types); |