*/
#include "qemu/osdep.h"
+#include <sys/ioctl.h>
#include <linux/vfio.h>
-#include "cpu.h"
#include "hw/ppc/spapr.h"
#include "hw/pci-host/spapr.h"
#include "hw/pci/msix.h"
-#include "hw/vfio/vfio.h"
+#include "hw/pci/pci_device.h"
+#include "hw/vfio/vfio-common.h"
#include "qemu/error-report.h"
+/*
+ * Interfaces for IBM EEH (Enhanced Error Handling)
+ */
+static bool vfio_eeh_container_ok(VFIOContainer *container)
+{
+ /*
+ * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
+ * implementation is broken if there are multiple groups in a
+ * container. The hardware works in units of Partitionable
+ * Endpoints (== IOMMU groups) and the EEH operations naively
+ * iterate across all groups in the container, without any logic
+ * to make sure the groups have their state synchronized. For
+ * certain operations (ENABLE) that might be ok, until an error
+ * occurs, but for others (GET_STATE) it's clearly broken.
+ */
+
+ /*
+ * XXX Once fixed kernels exist, test for them here
+ */
+
+ if (QLIST_EMPTY(&container->group_list)) {
+ return false;
+ }
+
+ if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
+{
+ struct vfio_eeh_pe_op pe_op = {
+ .argsz = sizeof(pe_op),
+ .op = op,
+ };
+ int ret;
+
+ if (!vfio_eeh_container_ok(container)) {
+ error_report("vfio/eeh: EEH_PE_OP 0x%x: "
+ "kernel requires a container with exactly one group", op);
+ return -EPERM;
+ }
+
+ ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
+ if (ret < 0) {
+ error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
+ return -errno;
+ }
+
+ return ret;
+}
+
+static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
+{
+ VFIOAddressSpace *space = vfio_get_address_space(as);
+ VFIOContainer *container = NULL;
+
+ if (QLIST_EMPTY(&space->containers)) {
+ /* No containers to act on */
+ goto out;
+ }
+
+ container = QLIST_FIRST(&space->containers);
+
+ if (QLIST_NEXT(container, next)) {
+ /*
+ * We don't yet have logic to synchronize EEH state across
+ * multiple containers
+ */
+ container = NULL;
+ goto out;
+ }
+
+out:
+ vfio_put_address_space(space);
+ return container;
+}
+
+static bool vfio_eeh_as_ok(AddressSpace *as)
+{
+ VFIOContainer *container = vfio_eeh_as_container(as);
+
+ return (container != NULL) && vfio_eeh_container_ok(container);
+}
+
+static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
+{
+ VFIOContainer *container = vfio_eeh_as_container(as);
+
+ if (!container) {
+ return -ENODEV;
+ }
+ return vfio_eeh_container_op(container, op);
+}
+
bool spapr_phb_eeh_available(SpaprPhbState *sphb)
{
return vfio_eeh_as_ok(&sphb->iommu_as);
spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
}
+static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
+ void *opaque)
+{
+ bool *found = opaque;
+
+ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ *found = true;
+ }
+}
+
int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
unsigned int addr, int option)
{
break;
case RTAS_EEH_ENABLE: {
PCIHostState *phb;
- PCIDevice *pdev;
+ bool found = false;
/*
- * The EEH functionality is enabled on basis of PCI device,
- * instead of PE. We need check the validity of the PCI
- * device address.
+ * The EEH functionality is enabled per sphb level instead of
+ * per PCI device. We have already identified this specific sphb
+ * based on buid passed as argument to ibm,set-eeh-option rtas
+ * call. Now we just need to check the validity of the PCI
+ * pass-through devices (vfio-pci) under this sphb bus.
+ * We have already validated that all the devices under this sphb
+ * are from same iommu group (within same PE) before coming here.
+ *
+ * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
+ * Rework device EEH PE determination") kernel would call
+ * eeh-set-option for each device in the PE using the device's
+ * config_address as the argument rather than the PE address.
+ * Hence if we check validity of supplied config_addr whether
+ * it matches to this PHB will cause issues with older kernel
+ * versions v5.9 and older. If we return an error from
+ * eeh-set-option when the argument isn't a valid PE address
+ * then older kernels (v5.9 and older) will interpret that as
+ * EEH not being supported.
*/
phb = PCI_HOST_BRIDGE(sphb);
- pdev = pci_find_device(phb->bus,
- (addr >> 16) & 0xFF, (addr >> 8) & 0xFF);
- if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
+ spapr_eeh_pci_find_device, &found);
+
+ if (!found) {
return RTAS_OUT_PARAM_ERROR;
}
static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
{
- pci_for_each_device(bus, pci_bus_num(bus),
- spapr_phb_vfio_eeh_clear_dev_msix, NULL);
+ pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
+ NULL);
}
static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)