static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
{
uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
- int ret, argsz;
+ int ret, argsz, retval = 0;
struct vfio_irq_set *irq_set;
int32_t *pfd;
Error *err = NULL;
qemu_set_fd_handler(*pfd, vfio_intx_interrupt, NULL, vdev);
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- g_free(irq_set);
if (ret) {
error_setg_errno(errp, -ret, "failed to setup INTx fd");
qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->intx.interrupt);
- return -errno;
+ retval = -errno;
+ goto cleanup;
}
vfio_intx_enable_kvm(vdev, &err);
trace_vfio_intx_enable(vdev->vbasedev.name);
- return 0;
+cleanup:
+ g_free(irq_set);
+
+ return retval;
}
static void vfio_intx_disable(VFIOPCIDevice *vdev)
static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
{
int ret;
+ Error *err = NULL;
vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) *
sizeof(unsigned long));
vdev->bars[vdev->msix->table_bar].region.mem,
vdev->msix->table_bar, vdev->msix->table_offset,
vdev->bars[vdev->msix->pba_bar].region.mem,
- vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
+ vdev->msix->pba_bar, vdev->msix->pba_offset, pos,
+ &err);
if (ret < 0) {
if (ret == -ENOTSUP) {
+ error_report_err(err);
return 0;
}
- error_setg(errp, "msix_init failed");
+
+ error_propagate(errp, err);
return ret;
}
PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
}
- pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size);
- if (pos >= 0) {
- vdev->pdev.exp.exp_cap = pos;
+ /*
+ * Intel 82599 SR-IOV VFs report an invalid PCIe capability version 0
+ * (Niantic errate #35) causing Windows to error with a Code 10 for the
+ * device on Q35. Fixup any such devices to report version 1. If we
+ * were to remove the capability entirely the guest would lose extended
+ * config space.
+ */
+ if ((flags & PCI_EXP_FLAGS_VERS) == 0) {
+ vfio_add_emulated_word(vdev, pos + PCI_CAP_FLAGS,
+ 1, PCI_EXP_FLAGS_VERS);
+ }
+
+ pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size,
+ errp);
+ if (pos < 0) {
+ return pos;
}
+ vdev->pdev.exp.exp_cap = pos;
+
return pos;
}
if (next) {
ret = vfio_add_std_cap(vdev, next, errp);
if (ret) {
- goto out;
+ return ret;
}
} else {
/* Begin the rebuild, use QEMU emulated list bits */
pdev->config[PCI_CAPABILITY_LIST] = 0;
vdev->emulated_config_bits[PCI_CAPABILITY_LIST] = 0xff;
vdev->emulated_config_bits[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+
+ ret = vfio_add_virt_caps(vdev, errp);
+ if (ret) {
+ return ret;
+ }
}
+ /* Scale down size, esp in case virt caps were added above */
+ size = MIN(size, vfio_std_cap_max_size(pdev, pos));
+
/* Use emulated next pointer to allow dropping caps */
pci_set_byte(vdev->emulated_config_bits + pos + PCI_CAP_LIST_NEXT, 0xff);
case PCI_CAP_ID_PM:
vfio_check_pm_reset(vdev, pos);
vdev->pm_cap = pos;
- ret = pci_add_capability2(pdev, cap_id, pos, size, errp);
+ ret = pci_add_capability(pdev, cap_id, pos, size, errp);
break;
case PCI_CAP_ID_AF:
vfio_check_af_flr(vdev, pos);
- ret = pci_add_capability2(pdev, cap_id, pos, size, errp);
+ ret = pci_add_capability(pdev, cap_id, pos, size, errp);
break;
default:
- ret = pci_add_capability2(pdev, cap_id, pos, size, errp);
+ ret = pci_add_capability(pdev, cap_id, pos, size, errp);
break;
}
-out:
+
if (ret < 0) {
error_prepend(errp,
"failed to add PCI capability 0x%x[0x%x]@0x%x: ",
/*
* Extended capabilities are chained with each pointing to the next, so we
* can drop anything other than the head of the chain simply by modifying
- * the previous next pointer. For the head of the chain, we can modify the
- * capability ID to something that cannot match a valid capability. ID
- * 0 is reserved for this since absence of capabilities is indicated by
- * 0 for the ID, version, AND next pointer. However, pcie_add_capability()
- * uses ID 0 as reserved for list management and will incorrectly match and
- * assert if we attempt to pre-load the head of the chain with with this
- * ID. Use ID 0xFFFF temporarily since it is also seems to be reserved in
- * part for identifying absence of capabilities in a root complex register
- * block. If the ID still exists after adding capabilities, switch back to
- * zero. We'll mark this entire first dword as emulated for this purpose.
+ * the previous next pointer. Seed the head of the chain here such that
+ * we can simply skip any capabilities we want to drop below, regardless
+ * of their position in the chain. If this stub capability still exists
+ * after we add the capabilities we want to expose, update the capability
+ * ID to zero. Note that we cannot seed with the capability header being
+ * zero as this conflicts with definition of an absent capability chain
+ * and prevents capabilities beyond the head of the list from being added.
+ * By replacing the dummy capability ID with zero after walking the device
+ * chain, we also transparently mark extended capabilities as absent if
+ * no capabilities were added. Note that the PCIe spec defines an absence
+ * of extended capabilities to be determined by a value of zero for the
+ * capability ID, version, AND next pointer. A non-zero next pointer
+ * should be sufficient to indicate additional capabilities are present,
+ * which will occur if we call pcie_add_capability() below. The entire
+ * first dword is emulated to support this.
+ *
+ * NB. The kernel side does similar masking, so be prepared that our
+ * view of the device may also contain a capability ID zero in the head
+ * of the chain. Skip it for the same reason that we cannot seed the
+ * chain with a zero capability.
*/
pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE,
PCI_EXT_CAP(0xFFFF, 0, 0));
PCI_EXT_CAP_NEXT_MASK);
switch (cap_id) {
+ case 0: /* kernel masked capability */
case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
/* Prep dependent devices for reset and clear our marker. */
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ if (!vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
}
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ if (!vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
static void vfio_req_notifier_handler(void *opaque)
{
VFIOPCIDevice *vdev = opaque;
+ Error *err = NULL;
if (!event_notifier_test_and_clear(&vdev->req_notifier)) {
return;
}
- qdev_unplug(&vdev->pdev.qdev, NULL);
+ qdev_unplug(&vdev->pdev.qdev, &err);
+ if (err) {
+ error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+ }
}
static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
error_setg(errp, "No provided host device");
- error_append_hint(errp, "Use -vfio-pci,host=DDDD:BB:DD.F "
- "or -vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+ error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
+ "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
return;
}
vdev->vbasedev.sysfsdev =
vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
+ vdev->vbasedev.dev = &vdev->pdev.qdev;
tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
len = readlink(tmp, group_path, sizeof(group_path));