Merge tag 'vfio-v4.4-rc1' of git://github.com/awilliam/linux-vfio

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 14 Nov 2015 01:05:32 +0000 (17:05 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 14 Nov 2015 01:05:32 +0000 (17:05 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Nov 2015 01:05:32 +0000 (17:05 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Nov 2015 01:05:32 +0000 (17:05 -0800)
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig

index 850d86ca685b273344271eda7c95ad4f82b28244..da6e2ce77495b21ec127209664567b7e2f8f467e 100644 (file)
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -31,6 +31,21 @@ menuconfig VFIO
  
           If you don't know what to do here, say N.
  
+menuconfig VFIO_NOIOMMU
+       bool "VFIO No-IOMMU support"
+       depends on VFIO
+       help
+         VFIO is built on the ability to isolate devices using the IOMMU.
+         Only with an IOMMU can userspace access to DMA capable devices be
+         considered secure.  VFIO No-IOMMU mode enables IOMMU groups for
+         devices without IOMMU backing for the purpose of re-using the VFIO
+         infrastructure in a non-secure mode.  Use of this mode will result
+         in an unsupportable kernel and will therefore taint the kernel.
+         Device assignment to virtual machines is also not possible with
+         this mode since there is no IOMMU to provide DMA translation.
+
+         If you don't know what to do here, say N.
+
  source "drivers/vfio/pci/Kconfig"
  source "drivers/vfio/platform/Kconfig"
  source "virt/lib/Kconfig"
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c

index 964ad572aaeede1fc7d2522a9fdb69b7d66f3faa..32b88bd2c82c7e3a3f4cd8cae11a253a171c76fa 100644 (file)
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
                 return -EINVAL;
  
-       group = iommu_group_get(&pdev->dev);
+       group = vfio_iommu_group_get(&pdev->dev);
         if (!group)
                 return -EINVAL;
  
         vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
         if (!vdev) {
-               iommu_group_put(group);
+               vfio_iommu_group_put(group, &pdev->dev);
                 return -ENOMEM;
         }
  
@@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  
         ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
         if (ret) {
-               iommu_group_put(group);
+               vfio_iommu_group_put(group, &pdev->dev);
                 kfree(vdev);
                 return ret;
         }
@@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
         if (!vdev)
                 return;
  
-       iommu_group_put(pdev->dev.iommu_group);
+       vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
         kfree(vdev);
  
         if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c

index ff75ca31a199d1a2c468b71bf0d9b4566f1894b7..fe2b470d7ec6b33c8fd75ffbfccb1168a6975ca2 100644 (file)
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -46,7 +46,7 @@
   *   0: Removed from the user visible capability list
   *   FF: Variable length
   */
-static u8 pci_cap_length[] = {
+static const u8 pci_cap_length[PCI_CAP_ID_MAX + 1] = {
         [PCI_CAP_ID_BASIC]      = PCI_STD_HEADER_SIZEOF, /* pci config header */
         [PCI_CAP_ID_PM]         = PCI_PM_SIZEOF,
         [PCI_CAP_ID_AGP]        = PCI_AGP_SIZEOF,
@@ -74,7 +74,7 @@ static u8 pci_cap_length[] = {
   *   0: Removed or masked from the user visible capabilty list
   *   FF: Variable length
   */
-static u16 pci_ext_cap_length[] = {
+static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = {
         [PCI_EXT_CAP_ID_ERR]    =       PCI_ERR_ROOT_COMMAND,
         [PCI_EXT_CAP_ID_VC]     =       0xFF,
         [PCI_EXT_CAP_ID_DSN]    =       PCI_EXT_CAP_DSN_SIZEOF,
@@ -671,6 +671,73 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
         return 0;
  }
  
+static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos,
+                                int count, struct perm_bits *perm,
+                                int offset, __le32 val)
+{
+       struct pci_dev *pdev = vdev->pdev;
+       __le16 *paddr = (__le16 *)(vdev->vconfig + pos - offset + PCI_VPD_ADDR);
+       __le32 *pdata = (__le32 *)(vdev->vconfig + pos - offset + PCI_VPD_DATA);
+       u16 addr;
+       u32 data;
+
+       /*
+        * Write through to emulation.  If the write includes the upper byte
+        * of PCI_VPD_ADDR, then the PCI_VPD_ADDR_F bit is written and we
+        * have work to do.
+        */
+       count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
+       if (count < 0 || offset > PCI_VPD_ADDR + 1 ||
+           offset + count <= PCI_VPD_ADDR + 1)
+               return count;
+
+       addr = le16_to_cpu(*paddr);
+
+       if (addr & PCI_VPD_ADDR_F) {
+               data = le32_to_cpu(*pdata);
+               if (pci_write_vpd(pdev, addr & ~PCI_VPD_ADDR_F, 4, &data) != 4)
+                       return count;
+       } else {
+               if (pci_read_vpd(pdev, addr, 4, &data) != 4)
+                       return count;
+               *pdata = cpu_to_le32(data);
+       }
+
+       /*
+        * Toggle PCI_VPD_ADDR_F in the emulated PCI_VPD_ADDR register to
+        * signal completion.  If an error occurs above, we assume that not
+        * toggling this bit will induce a driver timeout.
+        */
+       addr ^= PCI_VPD_ADDR_F;
+       *paddr = cpu_to_le16(addr);
+
+       return count;
+}
+
+/* Permissions for Vital Product Data capability */
+static int __init init_pci_cap_vpd_perm(struct perm_bits *perm)
+{
+       if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_VPD]))
+               return -ENOMEM;
+
+       perm->writefn = vfio_vpd_config_write;
+
+       /*
+        * We always virtualize the next field so we can remove
+        * capabilities from the chain if we want to.
+        */
+       p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
+
+       /*
+        * Both the address and data registers are virtualized to
+        * enable access through the pci_vpd_read/write functions
+        */
+       p_setw(perm, PCI_VPD_ADDR, (u16)ALL_VIRT, (u16)ALL_WRITE);
+       p_setd(perm, PCI_VPD_DATA, ALL_VIRT, ALL_WRITE);
+
+       return 0;
+}
+
  /* Permissions for PCI-X capability */
  static int __init init_pci_cap_pcix_perm(struct perm_bits *perm)
  {
@@ -790,6 +857,7 @@ void vfio_pci_uninit_perm_bits(void)
         free_perm_bits(&cap_perms[PCI_CAP_ID_BASIC]);
  
         free_perm_bits(&cap_perms[PCI_CAP_ID_PM]);
+       free_perm_bits(&cap_perms[PCI_CAP_ID_VPD]);
         free_perm_bits(&cap_perms[PCI_CAP_ID_PCIX]);
         free_perm_bits(&cap_perms[PCI_CAP_ID_EXP]);
         free_perm_bits(&cap_perms[PCI_CAP_ID_AF]);
@@ -807,7 +875,7 @@ int __init vfio_pci_init_perm_bits(void)
  
         /* Capabilities */
         ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]);
-       cap_perms[PCI_CAP_ID_VPD].writefn = vfio_raw_config_write;
+       ret |= init_pci_cap_vpd_perm(&cap_perms[PCI_CAP_ID_VPD]);
         ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]);
         cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_raw_config_write;
         ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]);
diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile

index 9ce8afe2845041cbd064113bba85309678e37d7a..41a6224f5e6b6ab1b962605ac4e9aede6cac66f7 100644 (file)
--- a/drivers/vfio/platform/Makefile
+++ b/drivers/vfio/platform/Makefile
@@ -1,10 +1,12 @@
-
-vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o
+vfio-platform-base-y := vfio_platform_common.o vfio_platform_irq.o
+vfio-platform-y := vfio_platform.o
  
  obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o
+obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform-base.o
  obj-$(CONFIG_VFIO_PLATFORM) += reset/
  
  vfio-amba-y := vfio_amba.o
  
  obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o
+obj-$(CONFIG_VFIO_AMBA) += vfio-platform-base.o
  obj-$(CONFIG_VFIO_AMBA) += reset/
diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig

index 746b96b0003be87f06af9037bafcf93c303c50aa..70cccc582bee6ec86047e38b473dc1be8acc10ff 100644 (file)
--- a/drivers/vfio/platform/reset/Kconfig
+++ b/drivers/vfio/platform/reset/Kconfig
@@ -5,3 +5,11 @@ config VFIO_PLATFORM_CALXEDAXGMAC_RESET
           Enables the VFIO platform driver to handle reset for Calxeda xgmac
  
           If you don't know what to do here, say N.
+
+config VFIO_PLATFORM_AMDXGBE_RESET
+       tristate "VFIO support for AMD XGBE reset"
+       depends on VFIO_PLATFORM
+       help
+         Enables the VFIO platform driver to handle reset for AMD XGBE
+
+         If you don't know what to do here, say N.
diff --git a/drivers/vfio/platform/reset/Makefile b/drivers/vfio/platform/reset/Makefile

index 2a486af9f8fa5da0a78f3ee000f8740553b2958a..93f4e232697bd62a9761fae12ad479e29eee88be 100644 (file)
--- a/drivers/vfio/platform/reset/Makefile
+++ b/drivers/vfio/platform/reset/Makefile
@@ -1,5 +1,7 @@
  vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o
+vfio-platform-amdxgbe-y := vfio_platform_amdxgbe.o
  
  ccflags-y += -Idrivers/vfio/platform
  
  obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o
+obj-$(CONFIG_VFIO_PLATFORM_AMDXGBE_RESET) += vfio-platform-amdxgbe.o
diff --git a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c

new file mode 100644 (file)

index 0000000..da5356f
--- /dev/null
+++ b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c
@@ -0,0 +1,127 @@
+/*
+ * VFIO platform driver specialized for AMD xgbe reset
+ * reset code is inherited from AMD xgbe native driver
+ *
+ * Copyright (c) 2015 Linaro Ltd.
+ *              www.linaro.org
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <uapi/linux/mdio.h>
+#include <linux/delay.h>
+
+#include "vfio_platform_private.h"
+
+#define DMA_MR                 0x3000
+#define MAC_VR                 0x0110
+#define DMA_ISR                        0x3008
+#define MAC_ISR                        0x00b0
+#define PCS_MMD_SELECT         0xff
+#define MDIO_AN_INT            0x8002
+#define MDIO_AN_INTMASK                0x8001
+
+static unsigned int xmdio_read(void *ioaddr, unsigned int mmd,
+                              unsigned int reg)
+{
+       unsigned int mmd_address, value;
+
+       mmd_address = (mmd << 16) | ((reg) & 0xffff);
+       iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2));
+       value = ioread32(ioaddr + ((mmd_address & 0xff) << 2));
+       return value;
+}
+
+static void xmdio_write(void *ioaddr, unsigned int mmd,
+                       unsigned int reg, unsigned int value)
+{
+       unsigned int mmd_address;
+
+       mmd_address = (mmd << 16) | ((reg) & 0xffff);
+       iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2));
+       iowrite32(value, ioaddr + ((mmd_address & 0xff) << 2));
+}
+
+int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev)
+{
+       struct vfio_platform_region *xgmac_regs = &vdev->regions[0];
+       struct vfio_platform_region *xpcs_regs = &vdev->regions[1];
+       u32 dma_mr_value, pcs_value, value;
+       unsigned int count;
+
+       if (!xgmac_regs->ioaddr) {
+               xgmac_regs->ioaddr =
+                       ioremap_nocache(xgmac_regs->addr, xgmac_regs->size);
+               if (!xgmac_regs->ioaddr)
+                       return -ENOMEM;
+       }
+       if (!xpcs_regs->ioaddr) {
+               xpcs_regs->ioaddr =
+                       ioremap_nocache(xpcs_regs->addr, xpcs_regs->size);
+               if (!xpcs_regs->ioaddr)
+                       return -ENOMEM;
+       }
+
+       /* reset the PHY through MDIO*/
+       pcs_value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_PCS, MDIO_CTRL1);
+       pcs_value |= MDIO_CTRL1_RESET;
+       xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_PCS, MDIO_CTRL1, pcs_value);
+
+       count = 50;
+       do {
+               msleep(20);
+               pcs_value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_PCS,
+                                       MDIO_CTRL1);
+       } while ((pcs_value & MDIO_CTRL1_RESET) && --count);
+
+       if (pcs_value & MDIO_CTRL1_RESET)
+               pr_warn("%s XGBE PHY reset timeout\n", __func__);
+
+       /* disable auto-negotiation */
+       value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_CTRL1);
+       value &= ~MDIO_AN_CTRL1_ENABLE;
+       xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_CTRL1, value);
+
+       /* disable AN IRQ */
+       xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+
+       /* clear AN IRQ */
+       xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_AN_INT, 0);
+
+       /* MAC software reset */
+       dma_mr_value = ioread32(xgmac_regs->ioaddr + DMA_MR);
+       dma_mr_value |= 0x1;
+       iowrite32(dma_mr_value, xgmac_regs->ioaddr + DMA_MR);
+
+       usleep_range(10, 15);
+
+       count = 2000;
+       while (count-- && (ioread32(xgmac_regs->ioaddr + DMA_MR) & 1))
+               usleep_range(500, 600);
+
+       if (!count)
+               pr_warn("%s MAC SW reset failed\n", __func__);
+
+       return 0;
+}
+
+module_vfio_reset_handler("amd,xgbe-seattle-v1a", vfio_platform_amdxgbe_reset);
+
+MODULE_VERSION("0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Auger <eric.auger@linaro.org>");
+MODULE_DESCRIPTION("Reset support for AMD xgbe vfio platform device");
diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c

index 619dc7d22082407771dc257a3a1a61d5a70e77eb..e3d3d948e6618c9732db62740031f2d8a42c5d0c 100644 (file)
--- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
+++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
@@ -30,8 +30,6 @@
  #define DRIVER_AUTHOR   "Eric Auger <eric.auger@linaro.org>"
  #define DRIVER_DESC     "Reset support for Calxeda xgmac vfio platform device"
  
-#define CALXEDAXGMAC_COMPAT "calxeda,hb-xgmac"
-
  /* XGMAC Register definitions */
  #define XGMAC_CONTROL           0x00000000      /* MAC Configuration */
  
@@ -61,24 +59,25 @@ static inline void xgmac_mac_disable(void __iomem *ioaddr)
  
  int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev)
  {
-       struct vfio_platform_region reg = vdev->regions[0];
+       struct vfio_platform_region *reg = &vdev->regions[0];
  
-       if (!reg.ioaddr) {
-               reg.ioaddr =
-                       ioremap_nocache(reg.addr, reg.size);
-               if (!reg.ioaddr)
+       if (!reg->ioaddr) {
+               reg->ioaddr =
+                       ioremap_nocache(reg->addr, reg->size);
+               if (!reg->ioaddr)
                         return -ENOMEM;
         }
  
         /* disable IRQ */
-       writel(0, reg.ioaddr + XGMAC_DMA_INTR_ENA);
+       writel(0, reg->ioaddr + XGMAC_DMA_INTR_ENA);
  
         /* Disable the MAC core */
-       xgmac_mac_disable(reg.ioaddr);
+       xgmac_mac_disable(reg->ioaddr);
  
         return 0;
  }
-EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset);
+
+module_vfio_reset_handler("calxeda,hb-xgmac", vfio_platform_calxedaxgmac_reset);
  
  MODULE_VERSION(DRIVER_VERSION);
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c

index ff0331f72526b30e2d84a46f7895b97e5478b690..a66479bd0edf968c8209b384264052f79a6ec272 100644 (file)
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -67,6 +67,7 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
         vdev->flags = VFIO_DEVICE_FLAGS_AMBA;
         vdev->get_resource = get_amba_resource;
         vdev->get_irq = get_amba_irq;
+       vdev->parent_module = THIS_MODULE;
  
         ret = vfio_platform_probe_common(vdev, &adev->dev);
         if (ret) {
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c

index cef645c8399679056b130ad19c3dbcbe2e480549..f1625dcfbb23d49d6b0586977a203462df0e75a5 100644 (file)
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -65,6 +65,7 @@ static int vfio_platform_probe(struct platform_device *pdev)
         vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM;
         vdev->get_resource = get_platform_resource;
         vdev->get_irq = get_platform_irq;
+       vdev->parent_module = THIS_MODULE;
  
         ret = vfio_platform_probe_common(vdev, &pdev->dev);
         if (ret)
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c

index e43efb5e92bf5bf6115d2df38391979d898b465d..a1c50d63079200ce90741ccd55e60bc703c73eb2 100644 (file)
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -23,44 +23,50 @@
  
  #include "vfio_platform_private.h"
  
-static DEFINE_MUTEX(driver_lock);
+#define DRIVER_VERSION  "0.10"
+#define DRIVER_AUTHOR   "Antonios Motakis <a.motakis@virtualopensystems.com>"
+#define DRIVER_DESC     "VFIO platform base module"
  
-static const struct vfio_platform_reset_combo reset_lookup_table[] = {
-       {
-               .compat = "calxeda,hb-xgmac",
-               .reset_function_name = "vfio_platform_calxedaxgmac_reset",
-               .module_name = "vfio-platform-calxedaxgmac",
-       },
-};
+static LIST_HEAD(reset_list);
+static DEFINE_MUTEX(driver_lock);
  
-static void vfio_platform_get_reset(struct vfio_platform_device *vdev,
-                                   struct device *dev)
+static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat,
+                                       struct module **module)
  {
-       const char *compat;
-       int (*reset)(struct vfio_platform_device *);
-       int ret, i;
-
-       ret = device_property_read_string(dev, "compatible", &compat);
-       if (ret)
-               return;
-
-       for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) {
-               if (!strcmp(reset_lookup_table[i].compat, compat)) {
-                       request_module(reset_lookup_table[i].module_name);
-                       reset = __symbol_get(
-                               reset_lookup_table[i].reset_function_name);
-                       if (reset) {
-                               vdev->reset = reset;
-                               return;
-                       }
+       struct vfio_platform_reset_node *iter;
+       vfio_platform_reset_fn_t reset_fn = NULL;
+
+       mutex_lock(&driver_lock);
+       list_for_each_entry(iter, &reset_list, link) {
+               if (!strcmp(iter->compat, compat) &&
+                       try_module_get(iter->owner)) {
+                       *module = iter->owner;
+                       reset_fn = iter->reset;
+                       break;
                 }
         }
+       mutex_unlock(&driver_lock);
+       return reset_fn;
+}
+
+static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
+{
+       char modname[256];
+
+       vdev->reset = vfio_platform_lookup_reset(vdev->compat,
+                                               &vdev->reset_module);
+       if (!vdev->reset) {
+               snprintf(modname, 256, "vfio-reset:%s", vdev->compat);
+               request_module(modname);
+               vdev->reset = vfio_platform_lookup_reset(vdev->compat,
+                                                        &vdev->reset_module);
+       }
  }
  
  static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
  {
         if (vdev->reset)
-               symbol_put_addr(vdev->reset);
+               module_put(vdev->reset_module);
  }
  
  static int vfio_platform_regions_init(struct vfio_platform_device *vdev)
@@ -138,15 +144,19 @@ static void vfio_platform_release(void *device_data)
         mutex_lock(&driver_lock);
  
         if (!(--vdev->refcnt)) {
-               if (vdev->reset)
+               if (vdev->reset) {
+                       dev_info(vdev->device, "reset\n");
                         vdev->reset(vdev);
+               } else {
+                       dev_warn(vdev->device, "no reset function found!\n");
+               }
                 vfio_platform_regions_cleanup(vdev);
                 vfio_platform_irq_cleanup(vdev);
         }
  
         mutex_unlock(&driver_lock);
  
-       module_put(THIS_MODULE);
+       module_put(vdev->parent_module);
  }
  
  static int vfio_platform_open(void *device_data)
@@ -154,7 +164,7 @@ static int vfio_platform_open(void *device_data)
         struct vfio_platform_device *vdev = device_data;
         int ret;
  
-       if (!try_module_get(THIS_MODULE))
+       if (!try_module_get(vdev->parent_module))
                 return -ENODEV;
  
         mutex_lock(&driver_lock);
@@ -168,8 +178,12 @@ static int vfio_platform_open(void *device_data)
                 if (ret)
                         goto err_irq;
  
-               if (vdev->reset)
+               if (vdev->reset) {
+                       dev_info(vdev->device, "reset\n");
                         vdev->reset(vdev);
+               } else {
+                       dev_warn(vdev->device, "no reset function found!\n");
+               }
         }
  
         vdev->refcnt++;
@@ -307,17 +321,17 @@ static long vfio_platform_ioctl(void *device_data,
         return -ENOTTY;
  }
  
-static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg,
+static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg,
                                        char __user *buf, size_t count,
                                        loff_t off)
  {
         unsigned int done = 0;
  
-       if (!reg.ioaddr) {
-               reg.ioaddr =
-                       ioremap_nocache(reg.addr, reg.size);
+       if (!reg->ioaddr) {
+               reg->ioaddr =
+                       ioremap_nocache(reg->addr, reg->size);
  
-               if (!reg.ioaddr)
+               if (!reg->ioaddr)
                         return -ENOMEM;
         }
  
@@ -327,7 +341,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg,
                 if (count >= 4 && !(off % 4)) {
                         u32 val;
  
-                       val = ioread32(reg.ioaddr + off);
+                       val = ioread32(reg->ioaddr + off);
                         if (copy_to_user(buf, &val, 4))
                                 goto err;
  
@@ -335,7 +349,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg,
                 } else if (count >= 2 && !(off % 2)) {
                         u16 val;
  
-                       val = ioread16(reg.ioaddr + off);
+                       val = ioread16(reg->ioaddr + off);
                         if (copy_to_user(buf, &val, 2))
                                 goto err;
  
@@ -343,7 +357,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg,
                 } else {
                         u8 val;
  
-                       val = ioread8(reg.ioaddr + off);
+                       val = ioread8(reg->ioaddr + off);
                         if (copy_to_user(buf, &val, 1))
                                 goto err;
  
@@ -376,7 +390,7 @@ static ssize_t vfio_platform_read(void *device_data, char __user *buf,
                 return -EINVAL;
  
         if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO)
-               return vfio_platform_read_mmio(vdev->regions[index],
+               return vfio_platform_read_mmio(&vdev->regions[index],
                                                         buf, count, off);
         else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO)
                 return -EINVAL; /* not implemented */
@@ -384,17 +398,17 @@ static ssize_t vfio_platform_read(void *device_data, char __user *buf,
         return -EINVAL;
  }
  
-static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg,
+static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg,
                                         const char __user *buf, size_t count,
                                         loff_t off)
  {
         unsigned int done = 0;
  
-       if (!reg.ioaddr) {
-               reg.ioaddr =
-                       ioremap_nocache(reg.addr, reg.size);
+       if (!reg->ioaddr) {
+               reg->ioaddr =
+                       ioremap_nocache(reg->addr, reg->size);
  
-               if (!reg.ioaddr)
+               if (!reg->ioaddr)
                         return -ENOMEM;
         }
  
@@ -406,7 +420,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg,
  
                         if (copy_from_user(&val, buf, 4))
                                 goto err;
-                       iowrite32(val, reg.ioaddr + off);
+                       iowrite32(val, reg->ioaddr + off);
  
                         filled = 4;
                 } else if (count >= 2 && !(off % 2)) {
@@ -414,7 +428,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg,
  
                         if (copy_from_user(&val, buf, 2))
                                 goto err;
-                       iowrite16(val, reg.ioaddr + off);
+                       iowrite16(val, reg->ioaddr + off);
  
                         filled = 2;
                 } else {
@@ -422,7 +436,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg,
  
                         if (copy_from_user(&val, buf, 1))
                                 goto err;
-                       iowrite8(val, reg.ioaddr + off);
+                       iowrite8(val, reg->ioaddr + off);
  
                         filled = 1;
                 }
@@ -452,7 +466,7 @@ static ssize_t vfio_platform_write(void *device_data, const char __user *buf,
                 return -EINVAL;
  
         if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO)
-               return vfio_platform_write_mmio(vdev->regions[index],
+               return vfio_platform_write_mmio(&vdev->regions[index],
                                                         buf, count, off);
         else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO)
                 return -EINVAL; /* not implemented */
@@ -539,6 +553,14 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
         if (!vdev)
                 return -EINVAL;
  
+       ret = device_property_read_string(dev, "compatible", &vdev->compat);
+       if (ret) {
+               pr_err("VFIO: cannot retrieve compat for %s\n", vdev->name);
+               return -EINVAL;
+       }
+
+       vdev->device = dev;
+
         group = iommu_group_get(dev);
         if (!group) {
                 pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
@@ -551,7 +573,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
                 return ret;
         }
  
-       vfio_platform_get_reset(vdev, dev);
+       vfio_platform_get_reset(vdev);
  
         mutex_init(&vdev->igate);
  
@@ -573,3 +595,34 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
         return vdev;
  }
  EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
+
+void __vfio_platform_register_reset(struct vfio_platform_reset_node *node)
+{
+       mutex_lock(&driver_lock);
+       list_add(&node->link, &reset_list);
+       mutex_unlock(&driver_lock);
+}
+EXPORT_SYMBOL_GPL(__vfio_platform_register_reset);
+
+void vfio_platform_unregister_reset(const char *compat,
+                                   vfio_platform_reset_fn_t fn)
+{
+       struct vfio_platform_reset_node *iter, *temp;
+
+       mutex_lock(&driver_lock);
+       list_for_each_entry_safe(iter, temp, &reset_list, link) {
+               if (!strcmp(iter->compat, compat) && (iter->reset == fn)) {
+                       list_del(&iter->link);
+                       break;
+               }
+       }
+
+       mutex_unlock(&driver_lock);
+
+}
+EXPORT_SYMBOL_GPL(vfio_platform_unregister_reset);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c

index 88bba57b30a8b28eefded76cd3ec3f9cd8912001..46d4750f43a8dccd574ea21f4fe53ecef4ec764c 100644 (file)
--- a/drivers/vfio/platform/vfio_platform_irq.c
+++ b/drivers/vfio/platform/vfio_platform_irq.c
@@ -185,6 +185,7 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index,
         int ret;
  
         if (irq->trigger) {
+               irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN);
                 free_irq(irq->hwirq, irq);
                 kfree(irq->name);
                 eventfd_ctx_put(irq->trigger);
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h

index 1c9b3d59543c1f32e60737be19bb46824bbebda3..42816dd280cb98ac4e27fabced74c2d9aa1df3d7 100644 (file)
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -56,6 +56,10 @@ struct vfio_platform_device {
         u32                             num_irqs;
         int                             refcnt;
         struct mutex                    igate;
+       struct module                   *parent_module;
+       const char                      *compat;
+       struct module                   *reset_module;
+       struct device                   *device;
  
         /*
          * These fields should be filled by the bus specific binder
@@ -70,10 +74,13 @@ struct vfio_platform_device {
         int     (*reset)(struct vfio_platform_device *vdev);
  };
  
-struct vfio_platform_reset_combo {
-       const char *compat;
-       const char *reset_function_name;
-       const char *module_name;
+typedef int (*vfio_platform_reset_fn_t)(struct vfio_platform_device *vdev);
+
+struct vfio_platform_reset_node {
+       struct list_head link;
+       char *compat;
+       struct module *owner;
+       vfio_platform_reset_fn_t reset;
  };
  
  extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
@@ -89,4 +96,29 @@ extern int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
                                         unsigned start, unsigned count,
                                         void *data);
  
+extern void __vfio_platform_register_reset(struct vfio_platform_reset_node *n);
+extern void vfio_platform_unregister_reset(const char *compat,
+                                          vfio_platform_reset_fn_t fn);
+#define vfio_platform_register_reset(__compat, __reset)                \
+static struct vfio_platform_reset_node __reset ## _node = {    \
+       .owner = THIS_MODULE,                                   \
+       .compat = __compat,                                     \
+       .reset = __reset,                                       \
+};                                                             \
+__vfio_platform_register_reset(&__reset ## _node)
+
+#define module_vfio_reset_handler(compat, reset)               \
+MODULE_ALIAS("vfio-reset:" compat);                            \
+static int __init reset ## _module_init(void)                  \
+{                                                              \
+       vfio_platform_register_reset(compat, reset);            \
+       return 0;                                               \
+};                                                             \
+static void __exit reset ## _module_exit(void)                 \
+{                                                              \
+       vfio_platform_unregister_reset(compat, reset);          \
+};                                                             \
+module_init(reset ## _module_init);                            \
+module_exit(reset ## _module_exit)
+
  #endif /* VFIO_PLATFORM_PRIVATE_H */
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c

index 563c510f285c47d2a7362a4da8729fdc38e1dee0..de632da2e22f60d4eee042631821c1a4a9b7e558 100644 (file)
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -25,6 +25,7 @@
  #include <linux/miscdevice.h>
  #include <linux/module.h>
  #include <linux/mutex.h>
+#include <linux/pci.h>
  #include <linux/rwsem.h>
  #include <linux/sched.h>
  #include <linux/slab.h>
@@ -61,6 +62,7 @@ struct vfio_container {
         struct rw_semaphore             group_lock;
         struct vfio_iommu_driver        *iommu_driver;
         void                            *iommu_data;
+       bool                            noiommu;
  };
  
  struct vfio_unbound_dev {
@@ -83,6 +85,7 @@ struct vfio_group {
         struct list_head                unbound_list;
         struct mutex                    unbound_lock;
         atomic_t                        opened;
+       bool                            noiommu;
  };
  
  struct vfio_device {
@@ -94,6 +97,147 @@ struct vfio_device {
         void                            *device_data;
  };
  
+#ifdef CONFIG_VFIO_NOIOMMU
+static bool noiommu __read_mostly;
+module_param_named(enable_unsafe_noiommu_support,
+                  noiommu, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
+#endif
+
+/*
+ * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
+ * and remove functions, any use cases other than acquiring the first
+ * reference for the purpose of calling vfio_add_group_dev() or removing
+ * that symmetric reference after vfio_del_group_dev() should use the raw
+ * iommu_group_{get,put} functions.  In particular, vfio_iommu_group_put()
+ * removes the device from the dummy group and cannot be nested.
+ */
+struct iommu_group *vfio_iommu_group_get(struct device *dev)
+{
+       struct iommu_group *group;
+       int __maybe_unused ret;
+
+       group = iommu_group_get(dev);
+
+#ifdef CONFIG_VFIO_NOIOMMU
+       /*
+        * With noiommu enabled, an IOMMU group will be created for a device
+        * that doesn't already have one and doesn't have an iommu_ops on their
+        * bus.  We use iommu_present() again in the main code to detect these
+        * fake groups.
+        */
+       if (group || !noiommu || iommu_present(dev->bus))
+               return group;
+
+       group = iommu_group_alloc();
+       if (IS_ERR(group))
+               return NULL;
+
+       iommu_group_set_name(group, "vfio-noiommu");
+       ret = iommu_group_add_device(group, dev);
+       iommu_group_put(group);
+       if (ret)
+               return NULL;
+
+       /*
+        * Where to taint?  At this point we've added an IOMMU group for a
+        * device that is not backed by iommu_ops, therefore any iommu_
+        * callback using iommu_ops can legitimately Oops.  So, while we may
+        * be about to give a DMA capable device to a user without IOMMU
+        * protection, which is clearly taint-worthy, let's go ahead and do
+        * it here.
+        */
+       add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+       dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
+#endif
+
+       return group;
+}
+EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
+
+void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
+{
+#ifdef CONFIG_VFIO_NOIOMMU
+       if (!iommu_present(dev->bus))
+               iommu_group_remove_device(dev);
+#endif
+
+       iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
+
+#ifdef CONFIG_VFIO_NOIOMMU
+static void *vfio_noiommu_open(unsigned long arg)
+{
+       if (arg != VFIO_NOIOMMU_IOMMU)
+               return ERR_PTR(-EINVAL);
+       if (!capable(CAP_SYS_RAWIO))
+               return ERR_PTR(-EPERM);
+
+       return NULL;
+}
+
+static void vfio_noiommu_release(void *iommu_data)
+{
+}
+
+static long vfio_noiommu_ioctl(void *iommu_data,
+                              unsigned int cmd, unsigned long arg)
+{
+       if (cmd == VFIO_CHECK_EXTENSION)
+               return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0;
+
+       return -ENOTTY;
+}
+
+static int vfio_iommu_present(struct device *dev, void *unused)
+{
+       return iommu_present(dev->bus) ? 1 : 0;
+}
+
+static int vfio_noiommu_attach_group(void *iommu_data,
+                                    struct iommu_group *iommu_group)
+{
+       return iommu_group_for_each_dev(iommu_group, NULL,
+                                       vfio_iommu_present) ? -EINVAL : 0;
+}
+
+static void vfio_noiommu_detach_group(void *iommu_data,
+                                     struct iommu_group *iommu_group)
+{
+}
+
+static struct vfio_iommu_driver_ops vfio_noiommu_ops = {
+       .name = "vfio-noiommu",
+       .owner = THIS_MODULE,
+       .open = vfio_noiommu_open,
+       .release = vfio_noiommu_release,
+       .ioctl = vfio_noiommu_ioctl,
+       .attach_group = vfio_noiommu_attach_group,
+       .detach_group = vfio_noiommu_detach_group,
+};
+
+static struct vfio_iommu_driver vfio_noiommu_driver = {
+       .ops = &vfio_noiommu_ops,
+};
+
+/*
+ * Wrap IOMMU drivers, the noiommu driver is the one and only driver for
+ * noiommu groups (and thus containers) and not available for normal groups.
+ */
+#define vfio_for_each_iommu_driver(con, pos)                           \
+       for (pos = con->noiommu ? &vfio_noiommu_driver :                \
+            list_first_entry(&vfio.iommu_drivers_list,                 \
+                             struct vfio_iommu_driver, vfio_next);     \
+            (con->noiommu ? pos != NULL :                              \
+                       &pos->vfio_next != &vfio.iommu_drivers_list);   \
+             pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next))
+#else
+#define vfio_for_each_iommu_driver(con, pos)                           \
+       list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next)
+#endif
+
+
  /**
   * IOMMU driver registration
   */
@@ -198,7 +342,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
  /**
   * Group objects - create, release, get, put, search
   */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
+                                           bool noiommu)
  {
         struct vfio_group *group, *tmp;
         struct device *dev;
@@ -216,6 +361,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
         atomic_set(&group->container_users, 0);
         atomic_set(&group->opened, 0);
         group->iommu_group = iommu_group;
+       group->noiommu = noiommu;
  
         group->nb.notifier_call = vfio_iommu_group_notifier;
  
@@ -251,7 +397,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
  
         dev = device_create(vfio.class, NULL,
                             MKDEV(MAJOR(vfio.group_devt), minor),
-                           group, "%d", iommu_group_id(iommu_group));
+                           group, "%s%d", noiommu ? "noiommu-" : "",
+                           iommu_group_id(iommu_group));
         if (IS_ERR(dev)) {
                 vfio_free_group_minor(minor);
                 vfio_group_unlock_and_free(group);
@@ -438,16 +585,33 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
  }
  
  /*
- * Whitelist some drivers that we know are safe (no dma) or just sit on
- * a device.  It's not always practical to leave a device within a group
- * driverless as it could get re-bound to something unsafe.
+ * Some drivers, like pci-stub, are only used to prevent other drivers from
+ * claiming a device and are therefore perfectly legitimate for a user owned
+ * group.  The pci-stub driver has no dependencies on DMA or the IOVA mapping
+ * of the device, but it does prevent the user from having direct access to
+ * the device, which is useful in some circumstances.
+ *
+ * We also assume that we can include PCI interconnect devices, ie. bridges.
+ * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
+ * then all of the downstream devices will be part of the same IOMMU group as
+ * the bridge.  Thus, if placing the bridge into the user owned IOVA space
+ * breaks anything, it only does so for user owned devices downstream.  Note
+ * that error notification via MSI can be affected for platforms that handle
+ * MSI within the same IOVA space as DMA.
   */
-static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" };
+static const char * const vfio_driver_whitelist[] = { "pci-stub" };
  
-static bool vfio_whitelisted_driver(struct device_driver *drv)
+static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
  {
         int i;
  
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+
+               if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+                       return true;
+       }
+
         for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
                 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
                         return true;
@@ -462,6 +626,7 @@ static bool vfio_whitelisted_driver(struct device_driver *drv)
   *  - driver-less
   *  - bound to a vfio driver
   *  - bound to a whitelisted driver
+ *  - a PCI interconnect device
   *
   * We use two methods to determine whether a device is bound to a vfio
   * driver.  The first is to test whether the device exists in the vfio
@@ -486,7 +651,7 @@ static int vfio_dev_viable(struct device *dev, void *data)
         }
         mutex_unlock(&group->unbound_lock);
  
-       if (!ret || !drv || vfio_whitelisted_driver(drv))
+       if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
                 return 0;
  
         device = vfio_group_get_device(group, dev);
@@ -621,7 +786,8 @@ int vfio_add_group_dev(struct device *dev,
  
         group = vfio_group_get_from_iommu(iommu_group);
         if (!group) {
-               group = vfio_create_group(iommu_group);
+               group = vfio_create_group(iommu_group,
+                                         !iommu_present(dev->bus));
                 if (IS_ERR(group)) {
                         iommu_group_put(iommu_group);
                         return PTR_ERR(group);
@@ -692,11 +858,12 @@ EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
  static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
                                                      char *buf)
  {
-       struct vfio_device *device;
+       struct vfio_device *it, *device = NULL;
  
         mutex_lock(&group->device_lock);
-       list_for_each_entry(device, &group->device_list, group_next) {
-               if (!strcmp(dev_name(device->dev), buf)) {
+       list_for_each_entry(it, &group->device_list, group_next) {
+               if (!strcmp(dev_name(it->dev), buf)) {
+                       device = it;
                         vfio_device_get(device);
                         break;
                 }
@@ -832,8 +999,7 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
                  */
                 if (!driver) {
                         mutex_lock(&vfio.iommu_drivers_lock);
-                       list_for_each_entry(driver, &vfio.iommu_drivers_list,
-                                           vfio_next) {
+                       vfio_for_each_iommu_driver(container, driver) {
                                 if (!try_module_get(driver->ops->owner))
                                         continue;
  
@@ -902,7 +1068,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
         }
  
         mutex_lock(&vfio.iommu_drivers_lock);
-       list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
+       vfio_for_each_iommu_driver(container, driver) {
                 void *data;
  
                 if (!try_module_get(driver->ops->owner))
@@ -1167,6 +1333,9 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
         if (atomic_read(&group->container_users))
                 return -EINVAL;
  
+       if (group->noiommu && !capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
         f = fdget(container_fd);
         if (!f.file)
                 return -EBADF;
@@ -1182,6 +1351,13 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
  
         down_write(&container->group_lock);
  
+       /* Real groups and fake groups cannot mix */
+       if (!list_empty(&container->group_list) &&
+           container->noiommu != group->noiommu) {
+               ret = -EPERM;
+               goto unlock_out;
+       }
+
         driver = container->iommu_driver;
         if (driver) {
                 ret = driver->ops->attach_group(container->iommu_data,
@@ -1191,6 +1367,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
         }
  
         group->container = container;
+       container->noiommu = group->noiommu;
         list_add(&group->container_next, &container->group_list);
  
         /* Get a reference on the container and mark a user within the group */
@@ -1221,6 +1398,9 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
             !group->container->iommu_driver || !vfio_group_viable(group))
                 return -EINVAL;
  
+       if (group->noiommu && !capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
         device = vfio_device_get_from_name(group, buf);
         if (!device)
                 return -ENODEV;
@@ -1263,6 +1443,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
  
         fd_install(ret, filep);
  
+       if (group->noiommu)
+               dev_warn(device->dev, "vfio-noiommu device opened by user "
+                        "(%s:%d)\n", current->comm, task_pid_nr(current));
+
         return ret;
  }
  
@@ -1351,6 +1535,11 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
         if (!group)
                 return -ENODEV;
  
+       if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
+               vfio_group_put(group);
+               return -EPERM;
+       }
+
         /* Do we need multiple instances of the group open?  Seems not. */
         opened = atomic_cmpxchg(&group->opened, 0, 1);
         if (opened) {
@@ -1513,6 +1702,11 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
         if (!atomic_inc_not_zero(&group->container_users))
                 return ERR_PTR(-EINVAL);
  
+       if (group->noiommu) {
+               atomic_dec(&group->container_users);
+               return ERR_PTR(-EPERM);
+       }
+
         if (!group->container->iommu_driver ||
                         !vfio_group_viable(group)) {
                 atomic_dec(&group->container_users);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c

index 57d8c37a002b0e0b8d49891536b8ddfab64afc6b..59d47cb638d5d18aac6700157ecc9be918772f91 100644 (file)
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -403,13 +403,26 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
  static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
  {
         struct vfio_domain *domain;
-       unsigned long bitmap = PAGE_MASK;
+       unsigned long bitmap = ULONG_MAX;
  
         mutex_lock(&iommu->lock);
         list_for_each_entry(domain, &iommu->domain_list, next)
                 bitmap &= domain->domain->ops->pgsize_bitmap;
         mutex_unlock(&iommu->lock);
  
+       /*
+        * In case the IOMMU supports page sizes smaller than PAGE_SIZE
+        * we pretend PAGE_SIZE is supported and hide sub-PAGE_SIZE sizes.
+        * That way the user will be able to map/unmap buffers whose size/
+        * start address is aligned with PAGE_SIZE. Pinning code uses that
+        * granularity while iommu driver can use the sub-PAGE_SIZE size
+        * to map the buffer.
+        */
+       if (bitmap & ~PAGE_MASK) {
+               bitmap &= PAGE_MASK;
+               bitmap |= PAGE_SIZE;
+       }
+
         return bitmap;
  }
  
diff --git a/include/linux/vfio.h b/include/linux/vfio.h

index ddb44097538245f17e882efff635143ada49e089..610a86a892b8896363ece505cdbe686096505cfc 100644 (file)
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,6 +44,9 @@ struct vfio_device_ops {
         void    (*request)(void *device_data, unsigned int count);
  };
  
+extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
+extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
+
  extern int vfio_add_group_dev(struct device *dev,
                               const struct vfio_device_ops *ops,
                               void *device_data);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h

index 9fd7b5d8df2fa357f434a899cccfa7810f826107..751b69f858c80411c910f21db7fb50c8902e94cf 100644 (file)
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -38,6 +38,13 @@
  
  #define VFIO_SPAPR_TCE_v2_IOMMU                7
  
+/*
+ * The No-IOMMU IOMMU offers no translation or isolation for devices and
+ * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
+ * code will taint the host kernel and should be used with extreme caution.
+ */
+#define VFIO_NOIOMMU_IOMMU             8
+
  /*
   * The IOCTL interface is designed for extensibility by embedding the
   * structure length (argsz) and flags into structures passed between
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 14 Nov 2015 01:05:32 +0000 (17:05 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 14 Nov 2015 01:05:32 +0000 (17:05 -0800)
drivers/vfio/Kconfig		patch \| blob \| blame \| history
drivers/vfio/pci/vfio_pci.c		patch \| blob \| blame \| history
drivers/vfio/pci/vfio_pci_config.c		patch \| blob \| blame \| history
drivers/vfio/platform/Makefile		patch \| blob \| blame \| history
drivers/vfio/platform/reset/Kconfig		patch \| blob \| blame \| history
drivers/vfio/platform/reset/Makefile		patch \| blob \| blame \| history
drivers/vfio/platform/reset/vfio_platform_amdxgbe.c	[new file with mode: 0644]	patch \| blob
drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c		patch \| blob \| blame \| history
drivers/vfio/platform/vfio_amba.c		patch \| blob \| blame \| history
drivers/vfio/platform/vfio_platform.c		patch \| blob \| blame \| history
drivers/vfio/platform/vfio_platform_common.c		patch \| blob \| blame \| history
drivers/vfio/platform/vfio_platform_irq.c		patch \| blob \| blame \| history
drivers/vfio/platform/vfio_platform_private.h		patch \| blob \| blame \| history
drivers/vfio/vfio.c		patch \| blob \| blame \| history
drivers/vfio/vfio_iommu_type1.c		patch \| blob \| blame \| history
include/linux/vfio.h		patch \| blob \| blame \| history
include/uapi/linux/vfio.h		patch \| blob \| blame \| history