2 * vfio based device assignment support - platform devices
4 * Copyright Linaro Limited, 2014
7 * Kim Phillips <kim.phillips@linaro.org>
8 * Eric Auger <eric.auger@linaro.org>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
13 * Based on vfio based PCI device assignment support:
14 * Copyright Red Hat, Inc. 2012
17 #include <linux/vfio.h>
18 #include <sys/ioctl.h>
20 #include "hw/vfio/vfio-platform.h"
21 #include "qemu/error-report.h"
22 #include "qemu/range.h"
23 #include "sysemu/sysemu.h"
24 #include "exec/memory.h"
25 #include "qemu/queue.h"
26 #include "hw/sysbus.h"
28 #include "hw/platform-bus.h"
31 * Functions used whatever the injection method
35 * vfio_init_intp - allocate, initialize the IRQ struct pointer
36 * and add it into the list of IRQs
37 * @vbasedev: the VFIO device handle
38 * @info: irq info struct retrieved from VFIO driver
40 static VFIOINTp
*vfio_init_intp(VFIODevice
*vbasedev
,
41 struct vfio_irq_info info
)
44 VFIOPlatformDevice
*vdev
=
45 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
46 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(vdev
);
49 intp
= g_malloc0(sizeof(*intp
));
51 intp
->pin
= info
.index
;
52 intp
->flags
= info
.flags
;
53 intp
->state
= VFIO_IRQ_INACTIVE
;
55 sysbus_init_irq(sbdev
, &intp
->qemuirq
);
57 /* Get an eventfd for trigger */
58 ret
= event_notifier_init(&intp
->interrupt
, 0);
61 error_report("vfio: Error: trigger event_notifier_init failed ");
65 QLIST_INSERT_HEAD(&vdev
->intp_list
, intp
, next
);
70 * vfio_set_trigger_eventfd - set VFIO eventfd handling
72 * @intp: IRQ struct handle
73 * @handler: handler to be called on eventfd signaling
75 * Setup VFIO signaling and attach an optional user-side handler
78 static int vfio_set_trigger_eventfd(VFIOINTp
*intp
,
79 eventfd_user_side_handler_t handler
)
81 VFIODevice
*vbasedev
= &intp
->vdev
->vbasedev
;
82 struct vfio_irq_set
*irq_set
;
86 argsz
= sizeof(*irq_set
) + sizeof(*pfd
);
87 irq_set
= g_malloc0(argsz
);
88 irq_set
->argsz
= argsz
;
89 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
;
90 irq_set
->index
= intp
->pin
;
93 pfd
= (int32_t *)&irq_set
->data
;
94 *pfd
= event_notifier_get_fd(&intp
->interrupt
);
95 qemu_set_fd_handler(*pfd
, (IOHandler
*)handler
, NULL
, intp
);
96 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
99 error_report("vfio: Failed to set trigger eventfd: %m");
100 qemu_set_fd_handler(*pfd
, NULL
, NULL
, NULL
);
106 * Functions only used when eventfds are handled on user-side
111 * vfio_mmap_set_enabled - enable/disable the fast path mode
112 * @vdev: the VFIO platform device
113 * @enabled: the target mmap state
115 * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
116 * enabled = false ~ slow path = MMIO region is trapped and region callbacks
117 * are called; slow path enables to trap the device IRQ status register reset
120 static void vfio_mmap_set_enabled(VFIOPlatformDevice
*vdev
, bool enabled
)
124 trace_vfio_platform_mmap_set_enabled(enabled
);
126 for (i
= 0; i
< vdev
->vbasedev
.num_regions
; i
++) {
127 VFIORegion
*region
= vdev
->regions
[i
];
129 memory_region_set_enabled(®ion
->mmap_mem
, enabled
);
134 * vfio_intp_mmap_enable - timer function, restores the fast path
135 * if there is no more active IRQ
136 * @opaque: actually points to the VFIO platform device
138 * Called on mmap timer timout, this function checks whether the
139 * IRQ is still active and if not, restores the fast path.
140 * by construction a single eventfd is handled at a time.
141 * if the IRQ is still active, the timer is re-programmed.
143 static void vfio_intp_mmap_enable(void *opaque
)
146 VFIOPlatformDevice
*vdev
= (VFIOPlatformDevice
*)opaque
;
148 qemu_mutex_lock(&vdev
->intp_mutex
);
149 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
150 if (tmp
->state
== VFIO_IRQ_ACTIVE
) {
151 trace_vfio_platform_intp_mmap_enable(tmp
->pin
);
152 /* re-program the timer to check active status later */
153 timer_mod(vdev
->mmap_timer
,
154 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
156 qemu_mutex_unlock(&vdev
->intp_mutex
);
160 vfio_mmap_set_enabled(vdev
, true);
161 qemu_mutex_unlock(&vdev
->intp_mutex
);
165 * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
166 * @opaque: opaque pointer, in practice the VFIOINTp handle
168 * The function is called on a previous IRQ completion, from
169 * vfio_platform_eoi, while the intp_mutex is locked.
170 * Also in such situation, the slow path already is set and
171 * the mmap timer was already programmed.
173 static void vfio_intp_inject_pending_lockheld(VFIOINTp
*intp
)
175 trace_vfio_platform_intp_inject_pending_lockheld(intp
->pin
,
176 event_notifier_get_fd(&intp
->interrupt
));
178 intp
->state
= VFIO_IRQ_ACTIVE
;
180 /* trigger the virtual IRQ */
181 qemu_set_irq(intp
->qemuirq
, 1);
185 * vfio_intp_interrupt - The user-side eventfd handler
186 * @opaque: opaque pointer which in practice is the VFIOINTp handle
188 * the function is entered in event handler context:
189 * the vIRQ is injected into the guest if there is no other active
192 static void vfio_intp_interrupt(VFIOINTp
*intp
)
196 VFIOPlatformDevice
*vdev
= intp
->vdev
;
197 bool delay_handling
= false;
199 qemu_mutex_lock(&vdev
->intp_mutex
);
200 if (intp
->state
== VFIO_IRQ_INACTIVE
) {
201 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
202 if (tmp
->state
== VFIO_IRQ_ACTIVE
||
203 tmp
->state
== VFIO_IRQ_PENDING
) {
204 delay_handling
= true;
209 if (delay_handling
) {
211 * the new IRQ gets a pending status and is pushed in
214 intp
->state
= VFIO_IRQ_PENDING
;
215 trace_vfio_intp_interrupt_set_pending(intp
->pin
);
216 QSIMPLEQ_INSERT_TAIL(&vdev
->pending_intp_queue
,
218 ret
= event_notifier_test_and_clear(&intp
->interrupt
);
219 qemu_mutex_unlock(&vdev
->intp_mutex
);
223 trace_vfio_platform_intp_interrupt(intp
->pin
,
224 event_notifier_get_fd(&intp
->interrupt
));
226 ret
= event_notifier_test_and_clear(&intp
->interrupt
);
228 error_report("Error when clearing fd=%d (ret = %d)\n",
229 event_notifier_get_fd(&intp
->interrupt
), ret
);
232 intp
->state
= VFIO_IRQ_ACTIVE
;
235 vfio_mmap_set_enabled(vdev
, false);
237 /* trigger the virtual IRQ */
238 qemu_set_irq(intp
->qemuirq
, 1);
241 * Schedule the mmap timer which will restore fastpath when no IRQ
244 if (vdev
->mmap_timeout
) {
245 timer_mod(vdev
->mmap_timer
,
246 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
249 qemu_mutex_unlock(&vdev
->intp_mutex
);
253 * vfio_platform_eoi - IRQ completion routine
254 * @vbasedev: the VFIO device handle
256 * De-asserts the active virtual IRQ and unmasks the physical IRQ
257 * (effective for level sensitive IRQ auto-masked by the VFIO driver).
258 * Then it handles next pending IRQ if any.
259 * eoi function is called on the first access to any MMIO region
260 * after an IRQ was triggered, trapped since slow path was set.
261 * It is assumed this access corresponds to the IRQ status
262 * register reset. With such a mechanism, a single IRQ can be
263 * handled at a time since there is no way to know which IRQ
264 * was completed by the guest (we would need additional details
265 * about the IRQ status register mask).
267 static void vfio_platform_eoi(VFIODevice
*vbasedev
)
270 VFIOPlatformDevice
*vdev
=
271 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
273 qemu_mutex_lock(&vdev
->intp_mutex
);
274 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
275 if (intp
->state
== VFIO_IRQ_ACTIVE
) {
276 trace_vfio_platform_eoi(intp
->pin
,
277 event_notifier_get_fd(&intp
->interrupt
));
278 intp
->state
= VFIO_IRQ_INACTIVE
;
280 /* deassert the virtual IRQ */
281 qemu_set_irq(intp
->qemuirq
, 0);
283 if (intp
->flags
& VFIO_IRQ_INFO_AUTOMASKED
) {
284 /* unmasks the physical level-sensitive IRQ */
285 vfio_unmask_single_irqindex(vbasedev
, intp
->pin
);
288 /* a single IRQ can be active at a time */
292 /* in case there are pending IRQs, handle the first one */
293 if (!QSIMPLEQ_EMPTY(&vdev
->pending_intp_queue
)) {
294 intp
= QSIMPLEQ_FIRST(&vdev
->pending_intp_queue
);
295 vfio_intp_inject_pending_lockheld(intp
);
296 QSIMPLEQ_REMOVE_HEAD(&vdev
->pending_intp_queue
, pqnext
);
298 qemu_mutex_unlock(&vdev
->intp_mutex
);
302 * vfio_start_eventfd_injection - starts the virtual IRQ injection using
303 * user-side handled eventfds
304 * @intp: the IRQ struct pointer
307 static int vfio_start_eventfd_injection(VFIOINTp
*intp
)
311 ret
= vfio_set_trigger_eventfd(intp
, vfio_intp_interrupt
);
313 error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
320 static void vfio_platform_compute_needs_reset(VFIODevice
*vbasedev
)
322 vbasedev
->needs_reset
= true;
325 /* not implemented yet */
326 static int vfio_platform_hot_reset_multi(VFIODevice
*vbasedev
)
332 * vfio_populate_device - Allocate and populate MMIO region
333 * and IRQ structs according to driver returned information
334 * @vbasedev: the VFIO device handle
337 static int vfio_populate_device(VFIODevice
*vbasedev
)
339 VFIOINTp
*intp
, *tmp
;
341 VFIOPlatformDevice
*vdev
=
342 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
344 if (!(vbasedev
->flags
& VFIO_DEVICE_FLAGS_PLATFORM
)) {
345 error_report("vfio: Um, this isn't a platform device");
349 vdev
->regions
= g_new0(VFIORegion
*, vbasedev
->num_regions
);
351 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
352 struct vfio_region_info reg_info
= { .argsz
= sizeof(reg_info
) };
355 vdev
->regions
[i
] = g_malloc0(sizeof(VFIORegion
));
356 ptr
= vdev
->regions
[i
];
358 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_REGION_INFO
, ®_info
);
360 error_report("vfio: Error getting region %d info: %m", i
);
363 ptr
->flags
= reg_info
.flags
;
364 ptr
->size
= reg_info
.size
;
365 ptr
->fd_offset
= reg_info
.offset
;
367 ptr
->vbasedev
= vbasedev
;
369 trace_vfio_platform_populate_regions(ptr
->nr
,
370 (unsigned long)ptr
->flags
,
371 (unsigned long)ptr
->size
,
373 (unsigned long)ptr
->fd_offset
);
376 vdev
->mmap_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL
,
377 vfio_intp_mmap_enable
, vdev
);
379 QSIMPLEQ_INIT(&vdev
->pending_intp_queue
);
381 for (i
= 0; i
< vbasedev
->num_irqs
; i
++) {
382 struct vfio_irq_info irq
= { .argsz
= sizeof(irq
) };
385 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_IRQ_INFO
, &irq
);
387 error_printf("vfio: error getting device %s irq info",
391 trace_vfio_platform_populate_interrupts(irq
.index
,
394 intp
= vfio_init_intp(vbasedev
, irq
);
396 error_report("vfio: Error installing IRQ %d up", i
);
403 timer_del(vdev
->mmap_timer
);
404 QLIST_FOREACH_SAFE(intp
, &vdev
->intp_list
, next
, tmp
) {
405 QLIST_REMOVE(intp
, next
);
409 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
410 g_free(vdev
->regions
[i
]);
412 g_free(vdev
->regions
);
416 /* specialized functions for VFIO Platform devices */
417 static VFIODeviceOps vfio_platform_ops
= {
418 .vfio_compute_needs_reset
= vfio_platform_compute_needs_reset
,
419 .vfio_hot_reset_multi
= vfio_platform_hot_reset_multi
,
420 .vfio_eoi
= vfio_platform_eoi
,
424 * vfio_base_device_init - perform preliminary VFIO setup
425 * @vbasedev: the VFIO device handle
427 * Implement the VFIO command sequence that allows to discover
428 * assigned device resources: group extraction, device
429 * fd retrieval, resource query.
430 * Precondition: the device name must be initialized
432 static int vfio_base_device_init(VFIODevice
*vbasedev
)
435 VFIODevice
*vbasedev_iter
;
436 char path
[PATH_MAX
], iommu_group_path
[PATH_MAX
], *group_name
;
442 /* name must be set prior to the call */
443 if (!vbasedev
->name
|| strchr(vbasedev
->name
, '/')) {
447 /* Check that the host device exists */
448 g_snprintf(path
, sizeof(path
), "/sys/bus/platform/devices/%s/",
451 if (stat(path
, &st
) < 0) {
452 error_report("vfio: error: no such host device: %s", path
);
456 g_strlcat(path
, "iommu_group", sizeof(path
));
457 len
= readlink(path
, iommu_group_path
, sizeof(iommu_group_path
));
458 if (len
< 0 || len
>= sizeof(iommu_group_path
)) {
459 error_report("vfio: error no iommu_group for device");
460 return len
< 0 ? -errno
: -ENAMETOOLONG
;
463 iommu_group_path
[len
] = 0;
464 group_name
= basename(iommu_group_path
);
466 if (sscanf(group_name
, "%d", &groupid
) != 1) {
467 error_report("vfio: error reading %s: %m", path
);
471 trace_vfio_platform_base_device_init(vbasedev
->name
, groupid
);
473 group
= vfio_get_group(groupid
, &address_space_memory
);
475 error_report("vfio: failed to get group %d", groupid
);
479 g_snprintf(path
, sizeof(path
), "%s", vbasedev
->name
);
481 QLIST_FOREACH(vbasedev_iter
, &group
->device_list
, next
) {
482 if (strcmp(vbasedev_iter
->name
, vbasedev
->name
) == 0) {
483 error_report("vfio: error: device %s is already attached", path
);
484 vfio_put_group(group
);
488 ret
= vfio_get_device(group
, path
, vbasedev
);
490 error_report("vfio: failed to get device %s", path
);
491 vfio_put_group(group
);
495 ret
= vfio_populate_device(vbasedev
);
497 error_report("vfio: failed to populate device %s", path
);
498 vfio_put_group(group
);
505 * vfio_map_region - initialize the 2 memory regions for a given
507 * @vdev: the VFIO platform device handle
508 * @nr: the index of the region
510 * Init the top memory region and the mmapped memory region beneath
511 * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
512 * and could not be passed to memory region functions
514 static void vfio_map_region(VFIOPlatformDevice
*vdev
, int nr
)
516 VFIORegion
*region
= vdev
->regions
[nr
];
517 uint64_t size
= region
->size
;
524 g_snprintf(name
, sizeof(name
), "VFIO %s region %d",
525 vdev
->vbasedev
.name
, nr
);
527 /* A "slow" read/write mapping underlies all regions */
528 memory_region_init_io(®ion
->mem
, OBJECT(vdev
), &vfio_region_ops
,
531 g_strlcat(name
, " mmap", sizeof(name
));
533 if (vfio_mmap_region(OBJECT(vdev
), region
, ®ion
->mem
,
534 ®ion
->mmap_mem
, ®ion
->mmap
, size
, 0, name
)) {
535 error_report("%s unsupported. Performance may be slow", name
);
540 * vfio_platform_realize - the device realize function
541 * @dev: device state pointer
544 * initialize the device, its memory regions and IRQ structures
545 * IRQ are started separately
547 static void vfio_platform_realize(DeviceState
*dev
, Error
**errp
)
549 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(dev
);
550 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(dev
);
551 VFIODevice
*vbasedev
= &vdev
->vbasedev
;
555 vbasedev
->type
= VFIO_DEVICE_TYPE_PLATFORM
;
556 vbasedev
->ops
= &vfio_platform_ops
;
558 trace_vfio_platform_realize(vbasedev
->name
, vdev
->compat
);
560 ret
= vfio_base_device_init(vbasedev
);
562 error_setg(errp
, "vfio: vfio_base_device_init failed for %s",
567 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
568 vfio_map_region(vdev
, i
);
569 sysbus_init_mmio(sbdev
, &vdev
->regions
[i
]->mem
);
572 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
573 vfio_start_eventfd_injection(intp
);
577 static const VMStateDescription vfio_platform_vmstate
= {
578 .name
= TYPE_VFIO_PLATFORM
,
582 static Property vfio_platform_dev_properties
[] = {
583 DEFINE_PROP_STRING("host", VFIOPlatformDevice
, vbasedev
.name
),
584 DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice
, vbasedev
.allow_mmap
, true),
585 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice
,
587 DEFINE_PROP_END_OF_LIST(),
590 static void vfio_platform_class_init(ObjectClass
*klass
, void *data
)
592 DeviceClass
*dc
= DEVICE_CLASS(klass
);
594 dc
->realize
= vfio_platform_realize
;
595 dc
->props
= vfio_platform_dev_properties
;
596 dc
->vmsd
= &vfio_platform_vmstate
;
597 dc
->desc
= "VFIO-based platform device assignment";
598 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
601 static const TypeInfo vfio_platform_dev_info
= {
602 .name
= TYPE_VFIO_PLATFORM
,
603 .parent
= TYPE_SYS_BUS_DEVICE
,
604 .instance_size
= sizeof(VFIOPlatformDevice
),
605 .class_init
= vfio_platform_class_init
,
606 .class_size
= sizeof(VFIOPlatformDeviceClass
),
610 static void register_vfio_platform_dev_type(void)
612 type_register_static(&vfio_platform_dev_info
);
615 type_init(register_vfio_platform_dev_type
)