#include "exec/memory.h"
#include "hw/hw.h"
#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
#include "qemu/range.h"
#include "sysemu/balloon.h"
#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
#include "trace.h"
#include "qapi/error.h"
-struct vfio_group_head vfio_group_list =
+VFIOGroupList vfio_group_list =
QLIST_HEAD_INITIALIZER(vfio_group_list);
-struct vfio_as_head vfio_address_spaces =
+static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
QLIST_HEAD_INITIALIZER(vfio_address_spaces);
#ifdef CONFIG_KVM
ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
}
+static inline const char *action_to_str(int action)
+{
+ switch (action) {
+ case VFIO_IRQ_SET_ACTION_MASK:
+ return "MASK";
+ case VFIO_IRQ_SET_ACTION_UNMASK:
+ return "UNMASK";
+ case VFIO_IRQ_SET_ACTION_TRIGGER:
+ return "TRIGGER";
+ default:
+ return "UNKNOWN ACTION";
+ }
+}
+
+static const char *index_to_str(VFIODevice *vbasedev, int index)
+{
+ if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+ return NULL;
+ }
+
+ switch (index) {
+ case VFIO_PCI_INTX_IRQ_INDEX:
+ return "INTX";
+ case VFIO_PCI_MSI_IRQ_INDEX:
+ return "MSI";
+ case VFIO_PCI_MSIX_IRQ_INDEX:
+ return "MSIX";
+ case VFIO_PCI_ERR_IRQ_INDEX:
+ return "ERR";
+ case VFIO_PCI_REQ_IRQ_INDEX:
+ return "REQ";
+ default:
+ return NULL;
+ }
+}
+
+int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
+ int action, int fd, Error **errp)
+{
+ struct vfio_irq_set *irq_set;
+ int argsz, ret = 0;
+ const char *name;
+ int32_t *pfd;
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action;
+ irq_set->index = index;
+ irq_set->start = subindex;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+ *pfd = fd;
+
+ if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
+ ret = -errno;
+ }
+ g_free(irq_set);
+
+ if (!ret) {
+ return 0;
+ }
+
+ error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure");
+
+ name = index_to_str(vbasedev, index);
+ if (name) {
+ error_prepend(errp, "%s-%d: ", name, subindex);
+ } else {
+ error_prepend(errp, "index %d-%d: ", index, subindex);
+ }
+ error_prepend(errp,
+ "Failed to %s %s eventfd signaling for interrupt ",
+ fd < 0 ? "tear down" : "set up", action_to_str(action));
+ return ret;
+}
+
/*
* IO Port/MMIO - Beware of the endians, VFIO is always little endian
*/
.size = size,
};
- if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ /*
+ * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
+ * v4.15) where an overflow in its wrap-around check prevents us from
+ * unmapping the last page of the address space. Test for the error
+ * condition and re-try the unmap excluding the last page. The
+ * expectation is that we've never mapped the last page anyway and this
+ * unmap request comes via vIOMMU support which also makes it unlikely
+ * that this page is used. This bug was introduced well after type1 v2
+ * support was introduced, so we shouldn't need to test for v1. A fix
+ * is queued for kernel v5.0 so this workaround can be removed once
+ * affected kernels are sufficiently deprecated.
+ */
+ if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
+ container->iommu_type == VFIO_TYPE1v2_IOMMU) {
+ trace_vfio_dma_unmap_overflow_workaround();
+ unmap.size -= 1ULL << ctz64(container->pgsizes);
+ continue;
+ }
error_report("VFIO_UNMAP_DMA: %d", -errno);
return -errno;
}
int ret;
VFIOHostDMAWindow *hostwin;
bool hostwin_found;
+ Error *err = NULL;
if (vfio_listener_skipped_section(section)) {
trace_vfio_listener_region_add_skip(
hostwin->max_iova - hostwin->min_iova + 1,
section->offset_within_address_space,
int128_get64(section->size))) {
- ret = -1;
+ error_setg(&err,
+ "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
+ "host DMA window [0x%"PRIx64",0x%"PRIx64"]",
+ section->offset_within_address_space,
+ section->offset_within_address_space +
+ int128_get64(section->size) - 1,
+ hostwin->min_iova, hostwin->max_iova);
goto fail;
}
}
ret = vfio_spapr_create_window(container, section, &pgsize);
if (ret) {
+ error_setg_errno(&err, -ret, "Failed to create SPAPR window");
goto fail;
}
}
if (!hostwin_found) {
- error_report("vfio: IOMMU container %p can't map guest IOVA region"
- " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
- container, iova, end);
- ret = -EFAULT;
+ error_setg(&err, "Container %p can't map guest IOVA region"
+ " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
goto fail;
}
section->offset_within_region,
int128_get64(llend),
iommu_idx);
- QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
- memory_region_register_iommu_notifier(section->mr, &giommu->n);
+ ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
+ &err);
+ if (ret) {
+ g_free(giommu);
+ goto fail;
+ }
+ QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
memory_region_iommu_replay(giommu->iommu, &giommu->n);
return;
ret = vfio_dma_map(container, iova, int128_get64(llsize),
vaddr, section->readonly);
if (ret) {
- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
- container, iova, int128_get64(llsize), vaddr, ret);
+ error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx", %p) = %d (%m)",
+ container, iova, int128_get64(llsize), vaddr, ret);
if (memory_region_is_ram_device(section->mr)) {
/* Allow unexpected mappings not to be fatal for RAM devices */
+ error_report_err(err);
return;
}
goto fail;
*/
if (!container->initialized) {
if (!container->error) {
- container->error = ret;
+ error_propagate_prepend(&container->error, err,
+ "Region %s: ",
+ memory_region_name(section->mr));
+ } else {
+ error_free(err);
}
} else {
+ error_report_err(err);
hw_error("vfio: DMA mapping failed, unable to continue");
}
}
}
}
-static struct vfio_info_cap_header *
+struct vfio_info_cap_header *
vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
{
struct vfio_info_cap_header *hdr;
}
}
+/*
+ * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
+ */
+static int vfio_get_iommu_type(VFIOContainer *container,
+ Error **errp)
+{
+ int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+ VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
+ if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+ return iommu_types[i];
+ }
+ }
+ error_setg(errp, "No available IOMMU models");
+ return -EINVAL;
+}
+
+static int vfio_init_container(VFIOContainer *container, int group_fd,
+ Error **errp)
+{
+ int iommu_type, ret;
+
+ iommu_type = vfio_get_iommu_type(container, errp);
+ if (iommu_type < 0) {
+ return iommu_type;
+ }
+
+ ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
+ if (ret) {
+ error_setg_errno(errp, errno, "Failed to set group container");
+ return -errno;
+ }
+
+ while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
+ if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+ /*
+ * On sPAPR, despite the IOMMU subdriver always advertises v1 and
+ * v2, the running platform may not support v2 and there is no
+ * way to guess it until an IOMMU group gets added to the container.
+ * So in case it fails with v2, try v1 as a fallback.
+ */
+ iommu_type = VFIO_SPAPR_TCE_IOMMU;
+ continue;
+ }
+ error_setg_errno(errp, errno, "Failed to set iommu for container");
+ return -errno;
+ }
+
+ container->iommu_type = iommu_type;
+ return 0;
+}
+
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
Error **errp)
{
container = g_malloc0(sizeof(*container));
container->space = space;
container->fd = fd;
+ container->error = NULL;
QLIST_INIT(&container->giommu_list);
QLIST_INIT(&container->hostwin_list);
- if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) ||
- ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) {
- bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU);
- struct vfio_iommu_type1_info info;
- ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
- if (ret) {
- error_setg_errno(errp, errno, "failed to set group container");
- ret = -errno;
- goto free_container_exit;
- }
+ ret = vfio_init_container(container, group->fd, errp);
+ if (ret) {
+ goto free_container_exit;
+ }
- container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU;
- ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
- if (ret) {
- error_setg_errno(errp, errno, "failed to set iommu for container");
- ret = -errno;
- goto free_container_exit;
- }
+ switch (container->iommu_type) {
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_IOMMU:
+ {
+ struct vfio_iommu_type1_info info;
/*
* FIXME: This assumes that a Type1 IOMMU can map any 64-bit
}
vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
container->pgsizes = info.iova_pgsizes;
- } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
- ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
+ break;
+ }
+ case VFIO_SPAPR_TCE_v2_IOMMU:
+ case VFIO_SPAPR_TCE_IOMMU:
+ {
struct vfio_iommu_spapr_tce_info info;
- bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU);
-
- ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
- if (ret) {
- error_setg_errno(errp, errno, "failed to set group container");
- ret = -errno;
- goto free_container_exit;
- }
- container->iommu_type =
- v2 ? VFIO_SPAPR_TCE_v2_IOMMU : VFIO_SPAPR_TCE_IOMMU;
- ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
- if (ret) {
- container->iommu_type = VFIO_SPAPR_TCE_IOMMU;
- v2 = false;
- ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
- }
- if (ret) {
- error_setg_errno(errp, errno, "failed to set iommu for container");
- ret = -errno;
- goto free_container_exit;
- }
+ bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
/*
* The host kernel code implementing VFIO_IOMMU_DISABLE is called
&address_space_memory);
if (container->error) {
memory_listener_unregister(&container->prereg_listener);
- ret = container->error;
- error_setg(errp,
- "RAM memory listener initialization failed for container");
+ ret = -1;
+ error_propagate_prepend(errp, container->error,
+ "RAM memory listener initialization failed: ");
goto free_container_exit;
}
}
info.dma32_window_size - 1,
0x1000);
}
- } else {
- error_setg(errp, "No available IOMMU models");
- ret = -EINVAL;
- goto free_container_exit;
+ }
}
vfio_kvm_device_add_group(group);
memory_listener_register(&container->listener, container->space->as);
if (container->error) {
- ret = container->error;
- error_setg_errno(errp, -ret,
- "memory listener initialization failed for container");
+ ret = -1;
+ error_propagate_prepend(errp, container->error,
+ "memory listener initialization failed: ");
goto listener_release_exit;
}