]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 12 Jul 2017 17:00:04 +0000 (10:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 12 Jul 2017 17:00:04 +0000 (10:00 -0700)
Pull IOMMU updates from Joerg Roedel:
 "This update comes with:

   - Support for lockless operation in the ARM io-pgtable code.

     This is an important step to solve the scalability problems in the
     common dma-iommu code for ARM

   - Some Errata workarounds for ARM SMMU implemenations

   - Rewrite of the deferred IO/TLB flush code in the AMD IOMMU driver.

     The code suffered from very high flush rates, with the new
     implementation the flush rate is down to ~1% of what it was before

   - Support for amd_iommu=off when booting with kexec.

     The problem here was that the IOMMU driver bailed out early without
     disabling the iommu hardware, if it was enabled in the old kernel

   - The Rockchip IOMMU driver is now available on ARM64

   - Align the return value of the iommu_ops->device_group call-backs to
     not miss error values

   - Preempt-disable optimizations in the Intel VT-d and common IOVA
     code to help Linux-RT

   - Various other small cleanups and fixes"

* tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (60 commits)
  iommu/vt-d: Constify intel_dma_ops
  iommu: Warn once when device_group callback returns NULL
  iommu/omap: Return ERR_PTR in device_group call-back
  iommu: Return ERR_PTR() values from device_group call-backs
  iommu/s390: Use iommu_group_get_for_dev() in s390_iommu_add_device()
  iommu/vt-d: Don't disable preemption while accessing deferred_flush()
  iommu/iova: Don't disable preempt around this_cpu_ptr()
  iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126
  iommu/arm-smmu-v3: Enable ACPI based HiSilicon CMD_PREFETCH quirk(erratum 161010701)
  iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74
  ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3 model
  iommu/arm-smmu-v3, acpi: Add temporary Cavium SMMU-V3 IORT model number definitions
  iommu/io-pgtable-arm: Use dma_wmb() instead of wmb() when publishing table
  iommu/io-pgtable: depend on !GENERIC_ATOMIC64 when using COMPILE_TEST with LPAE
  iommu/arm-smmu-v3: Remove io-pgtable spinlock
  iommu/arm-smmu: Remove io-pgtable spinlock
  iommu/io-pgtable-arm-v7s: Support lockless operation
  iommu/io-pgtable-arm: Support lockless operation
  iommu/io-pgtable: Introduce explicit coherency
  iommu/io-pgtable-arm-v7s: Refactor split_blk_unmap
  ...

22 files changed:
Documentation/arm64/silicon-errata.txt
Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
drivers/acpi/arm64/iort.c
drivers/iommu/Kconfig
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/dma-iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel-svm.c
drivers/iommu/intel_irq_remapping.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/io-pgtable.h
drivers/iommu/iommu.c
drivers/iommu/iova.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/omap-iommu.c
drivers/iommu/s390-iommu.c
include/linux/intel-svm.h

index f5f93dca54b72594ee34dddcac52ce7fb7b385e4..66e8ce14d23d03ab654c0c14699bd7845ad91aa9 100644 (file)
@@ -61,12 +61,15 @@ stable kernels.
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
 | Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
-| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
 | Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
+| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
+| Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
+| Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
 |                |                 |                 |                             |
 | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
 |                |                 |                 |                             |
 | Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
+| Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
index be57550e14e487a797c62b485870d9a728d5c731..c9abbf3e4f68238faaa287dc9fb2222af0e4c236 100644 (file)
@@ -26,6 +26,12 @@ the PCIe specification.
                       * "priq"      - PRI Queue not empty
                       * "cmdq-sync" - CMD_SYNC complete
                       * "gerror"    - Global Error activated
+                      * "combined"  - The combined interrupt is optional,
+                                     and should only be provided if the
+                                     hardware supports just a single,
+                                     combined interrupt line.
+                                     If provided, then the combined interrupt
+                                     will be used in preference to any others.
 
 - #iommu-cells      : See the generic IOMMU binding described in
                         devicetree/bindings/pci/pci-iommu.txt
@@ -49,6 +55,12 @@ the PCIe specification.
 - hisilicon,broken-prefetch-cmd
                     : Avoid sending CMD_PREFETCH_* commands to the SMMU.
 
+- cavium,cn9900-broken-page1-regspace
+                    : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
+                     PRIQ_PROD/CONS register access with page 0 offsets.
+                     Set for Cavium ThunderX2 silicon that doesn't support
+                     SMMU page1 register space.
+
 ** Example
 
         smmu@2b400000 {
index d048f72c23f84719558e2cda0c9a7d17062d6347..a3215ee671c1e8890407f2e21f1e776b9d1f54d2 100644 (file)
 #define IORT_IOMMU_TYPE                ((1 << ACPI_IORT_NODE_SMMU) |   \
                                (1 << ACPI_IORT_NODE_SMMU_V3))
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
+#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX                0x2
+#endif
+
 struct iort_its_msi_chip {
        struct list_head        list;
        struct fwnode_handle    *fw_node;
@@ -819,6 +824,36 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node)
        return num_res;
 }
 
+static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu)
+{
+       /*
+        * Cavium ThunderX2 implementation doesn't not support unique
+        * irq line. Use single irq line for all the SMMUv3 interrupts.
+        */
+       if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+               return false;
+
+       /*
+        * ThunderX2 doesn't support MSIs from the SMMU, so we're checking
+        * SPI numbers here.
+        */
+       return smmu->event_gsiv == smmu->pri_gsiv &&
+              smmu->event_gsiv == smmu->gerr_gsiv &&
+              smmu->event_gsiv == smmu->sync_gsiv;
+}
+
+static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu)
+{
+       /*
+        * Override the size, for Cavium ThunderX2 implementation
+        * which doesn't support the page 1 SMMU register space.
+        */
+       if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+               return SZ_64K;
+
+       return SZ_128K;
+}
+
 static void __init arm_smmu_v3_init_resources(struct resource *res,
                                              struct acpi_iort_node *node)
 {
@@ -829,30 +864,38 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
        smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 
        res[num_res].start = smmu->base_address;
-       res[num_res].end = smmu->base_address + SZ_128K - 1;
+       res[num_res].end = smmu->base_address +
+                               arm_smmu_v3_resource_size(smmu) - 1;
        res[num_res].flags = IORESOURCE_MEM;
 
        num_res++;
+       if (arm_smmu_v3_is_combined_irq(smmu)) {
+               if (smmu->event_gsiv)
+                       acpi_iort_register_irq(smmu->event_gsiv, "combined",
+                                              ACPI_EDGE_SENSITIVE,
+                                              &res[num_res++]);
+       } else {
 
-       if (smmu->event_gsiv)
-               acpi_iort_register_irq(smmu->event_gsiv, "eventq",
-                                      ACPI_EDGE_SENSITIVE,
-                                      &res[num_res++]);
-
-       if (smmu->pri_gsiv)
-               acpi_iort_register_irq(smmu->pri_gsiv, "priq",
-                                      ACPI_EDGE_SENSITIVE,
-                                      &res[num_res++]);
-
-       if (smmu->gerr_gsiv)
-               acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
-                                      ACPI_EDGE_SENSITIVE,
-                                      &res[num_res++]);
-
-       if (smmu->sync_gsiv)
-               acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
-                                      ACPI_EDGE_SENSITIVE,
-                                      &res[num_res++]);
+               if (smmu->event_gsiv)
+                       acpi_iort_register_irq(smmu->event_gsiv, "eventq",
+                                              ACPI_EDGE_SENSITIVE,
+                                              &res[num_res++]);
+
+               if (smmu->pri_gsiv)
+                       acpi_iort_register_irq(smmu->pri_gsiv, "priq",
+                                              ACPI_EDGE_SENSITIVE,
+                                              &res[num_res++]);
+
+               if (smmu->gerr_gsiv)
+                       acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
+                                              ACPI_EDGE_SENSITIVE,
+                                              &res[num_res++]);
+
+               if (smmu->sync_gsiv)
+                       acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
+                                              ACPI_EDGE_SENSITIVE,
+                                              &res[num_res++]);
+       }
 }
 
 static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
index 6ee3a25ae731880147e5b5986b8f2e188e2bf687..f73ff28f77e2351f5f627033fd43a9bdc3c9f7b9 100644 (file)
@@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE
 config IOMMU_IO_PGTABLE_LPAE
        bool "ARMv7/v8 Long Descriptor Format"
        select IOMMU_IO_PGTABLE
-       depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
+       depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64))
        help
          Enable support for the ARM long descriptor pagetable format.
          This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -219,7 +219,7 @@ config OMAP_IOMMU_DEBUG
 
 config ROCKCHIP_IOMMU
        bool "Rockchip IOMMU Support"
-       depends on ARM
+       depends on ARM || ARM64
        depends on ARCH_ROCKCHIP || COMPILE_TEST
        select IOMMU_API
        select ARM_DMA_USE_IOMMU
@@ -274,7 +274,7 @@ config EXYNOS_IOMMU_DEBUG
 
 config IPMMU_VMSA
        bool "Renesas VMSA-compatible IPMMU"
-       depends on ARM_LPAE
+       depends on ARM || IOMMU_DMA
        depends on ARCH_RENESAS || COMPILE_TEST
        select IOMMU_API
        select IOMMU_IO_PGTABLE_LPAE
index f16d0f26ee24551260ebb9dbf59f81a82245f297..688e77576e5a50b3f2f137eec72cd721d6bf96fa 100644 (file)
@@ -91,25 +91,6 @@ LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
 LIST_HEAD(acpihid_map);
 
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
-       unsigned long iova_pfn;
-       unsigned long pages;
-       struct dma_ops_domain *dma_dom;
-};
-
-struct flush_queue {
-       spinlock_t lock;
-       unsigned next;
-       struct flush_queue_entry *entries;
-};
-
-static DEFINE_PER_CPU(struct flush_queue, flush_queue);
-
-static atomic_t queue_timer_on;
-static struct timer_list queue_timer;
-
 /*
  * Domain for untranslated devices - only allocated
  * if iommu=pt passed on kernel cmd line.
@@ -140,6 +121,8 @@ struct iommu_dev_data {
                                             PPR completions */
        u32 errata;                       /* Bitmap for errata to apply */
        bool use_vapic;                   /* Enable device to use vapic mode */
+
+       struct ratelimit_state rs;        /* Ratelimit IOPF messages */
 };
 
 /*
@@ -155,6 +138,20 @@ static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+       unsigned long iova_pfn;
+       unsigned long pages;
+       u64 counter; /* Flush counter when this entry was added to the queue */
+};
+
+struct flush_queue {
+       struct flush_queue_entry *entries;
+       unsigned head, tail;
+       spinlock_t lock;
+};
+
 /*
  * Data container for a dma_ops specific protection domain
  */
@@ -164,6 +161,36 @@ struct dma_ops_domain {
 
        /* IOVA RB-Tree */
        struct iova_domain iovad;
+
+       struct flush_queue __percpu *flush_queue;
+
+       /*
+        * We need two counter here to be race-free wrt. IOTLB flushing and
+        * adding entries to the flush queue.
+        *
+        * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
+        * New entries added to the flush ring-buffer get their 'counter' value
+        * from here. This way we can make sure that entries added to the queue
+        * (or other per-cpu queues of the same domain) while the TLB is about
+        * to be flushed are not considered to be flushed already.
+        */
+       atomic64_t flush_start_cnt;
+
+       /*
+        * The flush_finish_cnt is incremented when an IOTLB flush is complete.
+        * This value is always smaller than flush_start_cnt. The queue_add
+        * function frees all IOVAs that have a counter value smaller than
+        * flush_finish_cnt. This makes sure that we only free IOVAs that are
+        * flushed out of the IOTLB of the domain.
+        */
+       atomic64_t flush_finish_cnt;
+
+       /*
+        * Timer to make sure we don't keep IOVAs around unflushed
+        * for too long
+        */
+       struct timer_list flush_timer;
+       atomic_t flush_timer_on;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -255,6 +282,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
        list_add_tail(&dev_data->dev_data_list, &dev_data_list);
        spin_unlock_irqrestore(&dev_data_list_lock, flags);
 
+       ratelimit_default_init(&dev_data->rs);
+
        return dev_data;
 }
 
@@ -553,6 +582,29 @@ static void dump_command(unsigned long phys_addr)
                pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
 }
 
+static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
+                                       u64 address, int flags)
+{
+       struct iommu_dev_data *dev_data = NULL;
+       struct pci_dev *pdev;
+
+       pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff);
+       if (pdev)
+               dev_data = get_dev_data(&pdev->dev);
+
+       if (dev_data && __ratelimit(&dev_data->rs)) {
+               dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+                       domain_id, address, flags);
+       } else if (printk_ratelimit()) {
+               pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+                       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+                       domain_id, address, flags);
+       }
+
+       if (pdev)
+               pci_dev_put(pdev);
+}
+
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
        int type, devid, domid, flags;
@@ -577,7 +629,12 @@ retry:
                goto retry;
        }
 
-       printk(KERN_ERR "AMD-Vi: Event logged [");
+       if (type == EVENT_TYPE_IO_FAULT) {
+               amd_iommu_report_page_fault(devid, domid, address, flags);
+               return;
+       } else {
+               printk(KERN_ERR "AMD-Vi: Event logged [");
+       }
 
        switch (type) {
        case EVENT_TYPE_ILL_DEV:
@@ -587,12 +644,6 @@ retry:
                       address, flags);
                dump_dte_entry(devid);
                break;
-       case EVENT_TYPE_IO_FAULT:
-               printk("IO_PAGE_FAULT device=%02x:%02x.%x "
-                      "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-                      PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-                      domid, address, flags);
-               break;
        case EVENT_TYPE_DEV_TAB_ERR:
                printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
                       "address=0x%016llx flags=0x%04x]\n",
@@ -850,19 +901,20 @@ static int wait_on_sem(volatile u64 *sem)
 }
 
 static void copy_cmd_to_buffer(struct amd_iommu *iommu,
-                              struct iommu_cmd *cmd,
-                              u32 tail)
+                              struct iommu_cmd *cmd)
 {
        u8 *target;
 
-       target = iommu->cmd_buf + tail;
-       tail   = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+       target = iommu->cmd_buf + iommu->cmd_buf_tail;
+
+       iommu->cmd_buf_tail += sizeof(*cmd);
+       iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
 
        /* Copy command to buffer */
        memcpy(target, cmd, sizeof(*cmd));
 
        /* Tell the IOMMU about it */
-       writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+       writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 }
 
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
@@ -1020,33 +1072,34 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
                                      struct iommu_cmd *cmd,
                                      bool sync)
 {
-       u32 left, tail, head, next_tail;
+       unsigned int count = 0;
+       u32 left, next_tail;
 
+       next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
 again:
-
-       head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-       tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-       next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
-       left      = (head - next_tail) % CMD_BUFFER_SIZE;
+       left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
 
        if (left <= 0x20) {
-               struct iommu_cmd sync_cmd;
-               int ret;
-
-               iommu->cmd_sem = 0;
+               /* Skip udelay() the first time around */
+               if (count++) {
+                       if (count == LOOP_TIMEOUT) {
+                               pr_err("AMD-Vi: Command buffer timeout\n");
+                               return -EIO;
+                       }
 
-               build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
-               copy_cmd_to_buffer(iommu, &sync_cmd, tail);
+                       udelay(1);
+               }
 
-               if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
-                       return ret;
+               /* Update head and recheck remaining space */
+               iommu->cmd_buf_head = readl(iommu->mmio_base +
+                                           MMIO_CMD_HEAD_OFFSET);
 
                goto again;
        }
 
-       copy_cmd_to_buffer(iommu, cmd, tail);
+       copy_cmd_to_buffer(iommu, cmd);
 
-       /* We need to sync now to make sure all commands are processed */
+       /* Do we need to make sure all commands are processed? */
        iommu->need_sync = sync;
 
        return 0;
@@ -1735,6 +1788,180 @@ static void free_gcr3_table(struct protection_domain *domain)
        free_page((unsigned long)domain->gcr3_tbl);
 }
 
+static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct flush_queue *queue;
+
+               queue = per_cpu_ptr(dom->flush_queue, cpu);
+               kfree(queue->entries);
+       }
+
+       free_percpu(dom->flush_queue);
+
+       dom->flush_queue = NULL;
+}
+
+static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
+{
+       int cpu;
+
+       atomic64_set(&dom->flush_start_cnt,  0);
+       atomic64_set(&dom->flush_finish_cnt, 0);
+
+       dom->flush_queue = alloc_percpu(struct flush_queue);
+       if (!dom->flush_queue)
+               return -ENOMEM;
+
+       /* First make sure everything is cleared */
+       for_each_possible_cpu(cpu) {
+               struct flush_queue *queue;
+
+               queue = per_cpu_ptr(dom->flush_queue, cpu);
+               queue->head    = 0;
+               queue->tail    = 0;
+               queue->entries = NULL;
+       }
+
+       /* Now start doing the allocation */
+       for_each_possible_cpu(cpu) {
+               struct flush_queue *queue;
+
+               queue = per_cpu_ptr(dom->flush_queue, cpu);
+               queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
+                                        GFP_KERNEL);
+               if (!queue->entries) {
+                       dma_ops_domain_free_flush_queue(dom);
+                       return -ENOMEM;
+               }
+
+               spin_lock_init(&queue->lock);
+       }
+
+       return 0;
+}
+
+static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
+{
+       atomic64_inc(&dom->flush_start_cnt);
+       domain_flush_tlb(&dom->domain);
+       domain_flush_complete(&dom->domain);
+       atomic64_inc(&dom->flush_finish_cnt);
+}
+
+static inline bool queue_ring_full(struct flush_queue *queue)
+{
+       assert_spin_locked(&queue->lock);
+
+       return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
+}
+
+#define queue_ring_for_each(i, q) \
+       for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
+
+static inline unsigned queue_ring_add(struct flush_queue *queue)
+{
+       unsigned idx = queue->tail;
+
+       assert_spin_locked(&queue->lock);
+       queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
+
+       return idx;
+}
+
+static inline void queue_ring_remove_head(struct flush_queue *queue)
+{
+       assert_spin_locked(&queue->lock);
+       queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
+}
+
+static void queue_ring_free_flushed(struct dma_ops_domain *dom,
+                                   struct flush_queue *queue)
+{
+       u64 counter = atomic64_read(&dom->flush_finish_cnt);
+       int idx;
+
+       queue_ring_for_each(idx, queue) {
+               /*
+                * This assumes that counter values in the ring-buffer are
+                * monotonously rising.
+                */
+               if (queue->entries[idx].counter >= counter)
+                       break;
+
+               free_iova_fast(&dom->iovad,
+                              queue->entries[idx].iova_pfn,
+                              queue->entries[idx].pages);
+
+               queue_ring_remove_head(queue);
+       }
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+                     unsigned long address, unsigned long pages)
+{
+       struct flush_queue *queue;
+       unsigned long flags;
+       int idx;
+
+       pages     = __roundup_pow_of_two(pages);
+       address >>= PAGE_SHIFT;
+
+       queue = get_cpu_ptr(dom->flush_queue);
+       spin_lock_irqsave(&queue->lock, flags);
+
+       /*
+        * First remove the enries from the ring-buffer that are already
+        * flushed to make the below queue_ring_full() check less likely
+        */
+       queue_ring_free_flushed(dom, queue);
+
+       /*
+        * When ring-queue is full, flush the entries from the IOTLB so
+        * that we can free all entries with queue_ring_free_flushed()
+        * below.
+        */
+       if (queue_ring_full(queue)) {
+               dma_ops_domain_flush_tlb(dom);
+               queue_ring_free_flushed(dom, queue);
+       }
+
+       idx = queue_ring_add(queue);
+
+       queue->entries[idx].iova_pfn = address;
+       queue->entries[idx].pages    = pages;
+       queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);
+
+       spin_unlock_irqrestore(&queue->lock, flags);
+
+       if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
+               mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
+
+       put_cpu_ptr(dom->flush_queue);
+}
+
+static void queue_flush_timeout(unsigned long data)
+{
+       struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
+       int cpu;
+
+       atomic_set(&dom->flush_timer_on, 0);
+
+       dma_ops_domain_flush_tlb(dom);
+
+       for_each_possible_cpu(cpu) {
+               struct flush_queue *queue;
+               unsigned long flags;
+
+               queue = per_cpu_ptr(dom->flush_queue, cpu);
+               spin_lock_irqsave(&queue->lock, flags);
+               queue_ring_free_flushed(dom, queue);
+               spin_unlock_irqrestore(&queue->lock, flags);
+       }
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1746,6 +1973,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
        del_domain_from_list(&dom->domain);
 
+       if (timer_pending(&dom->flush_timer))
+               del_timer(&dom->flush_timer);
+
+       dma_ops_domain_free_flush_queue(dom);
+
        put_iova_domain(&dom->iovad);
 
        free_pagetable(&dom->domain);
@@ -1784,6 +2016,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
        /* Initialize reserved ranges */
        copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
+       if (dma_ops_domain_alloc_flush_queue(dma_dom))
+               goto free_dma_dom;
+
+       setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
+                   (unsigned long)dma_dom);
+
+       atomic_set(&dma_dom->flush_timer_on, 0);
+
        add_domain_to_list(&dma_dom->domain);
 
        return dma_dom;
@@ -1846,7 +2086,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
                flags    |= tmp;
        }
 
-       flags &= ~(0xffffUL);
+
+       flags &= ~(DTE_FLAG_SA | 0xffffULL);
        flags |= domain->id;
 
        amd_iommu_dev_table[devid].data[1]  = flags;
@@ -2227,92 +2468,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
  *
  *****************************************************************************/
 
-static void __queue_flush(struct flush_queue *queue)
-{
-       struct protection_domain *domain;
-       unsigned long flags;
-       int idx;
-
-       /* First flush TLB of all known domains */
-       spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-       list_for_each_entry(domain, &amd_iommu_pd_list, list)
-               domain_flush_tlb(domain);
-       spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-
-       /* Wait until flushes have completed */
-       domain_flush_complete(NULL);
-
-       for (idx = 0; idx < queue->next; ++idx) {
-               struct flush_queue_entry *entry;
-
-               entry = queue->entries + idx;
-
-               free_iova_fast(&entry->dma_dom->iovad,
-                               entry->iova_pfn,
-                               entry->pages);
-
-               /* Not really necessary, just to make sure we catch any bugs */
-               entry->dma_dom = NULL;
-       }
-
-       queue->next = 0;
-}
-
-static void queue_flush_all(void)
-{
-       int cpu;
-
-       for_each_possible_cpu(cpu) {
-               struct flush_queue *queue;
-               unsigned long flags;
-
-               queue = per_cpu_ptr(&flush_queue, cpu);
-               spin_lock_irqsave(&queue->lock, flags);
-               if (queue->next > 0)
-                       __queue_flush(queue);
-               spin_unlock_irqrestore(&queue->lock, flags);
-       }
-}
-
-static void queue_flush_timeout(unsigned long unsused)
-{
-       atomic_set(&queue_timer_on, 0);
-       queue_flush_all();
-}
-
-static void queue_add(struct dma_ops_domain *dma_dom,
-                     unsigned long address, unsigned long pages)
-{
-       struct flush_queue_entry *entry;
-       struct flush_queue *queue;
-       unsigned long flags;
-       int idx;
-
-       pages     = __roundup_pow_of_two(pages);
-       address >>= PAGE_SHIFT;
-
-       queue = get_cpu_ptr(&flush_queue);
-       spin_lock_irqsave(&queue->lock, flags);
-
-       if (queue->next == FLUSH_QUEUE_SIZE)
-               __queue_flush(queue);
-
-       idx   = queue->next++;
-       entry = queue->entries + idx;
-
-       entry->iova_pfn = address;
-       entry->pages    = pages;
-       entry->dma_dom  = dma_dom;
-
-       spin_unlock_irqrestore(&queue->lock, flags);
-
-       if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
-               mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
-
-       put_cpu_ptr(&flush_queue);
-}
-
-
 /*
  * In the dma_ops path we only have the struct device. This function
  * finds the corresponding IOMMU, the protection domain and the
@@ -2807,7 +2962,7 @@ static int init_reserved_iova_ranges(void)
 
 int __init amd_iommu_init_api(void)
 {
-       int ret, cpu, err = 0;
+       int ret, err = 0;
 
        ret = iova_cache_get();
        if (ret)
@@ -2817,18 +2972,6 @@ int __init amd_iommu_init_api(void)
        if (ret)
                return ret;
 
-       for_each_possible_cpu(cpu) {
-               struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
-               queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
-                                        sizeof(*queue->entries),
-                                        GFP_KERNEL);
-               if (!queue->entries)
-                       goto out_put_iova;
-
-               spin_lock_init(&queue->lock);
-       }
-
        err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
        if (err)
                return err;
@@ -2840,23 +2983,12 @@ int __init amd_iommu_init_api(void)
        err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
        if (err)
                return err;
-       return 0;
-
-out_put_iova:
-       for_each_possible_cpu(cpu) {
-               struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
-               kfree(queue->entries);
-       }
 
-       return -ENOMEM;
+       return 0;
 }
 
 int __init amd_iommu_init_dma_ops(void)
 {
-       setup_timer(&queue_timer, queue_flush_timeout, 0);
-       atomic_set(&queue_timer_on, 0);
-
        swiotlb        = iommu_pass_through ? 1 : 0;
        iommu_detected = 1;
 
@@ -3012,12 +3144,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
 
        switch (dom->type) {
        case IOMMU_DOMAIN_DMA:
-               /*
-                * First make sure the domain is no longer referenced from the
-                * flush queue
-                */
-               queue_flush_all();
-
                /* Now release the domain */
                dma_dom = to_dma_ops_domain(domain);
                dma_ops_domain_free(dma_dom);
@@ -4281,7 +4407,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
                                            irte_info->index);
 }
 
-static struct irq_domain_ops amd_ir_domain_ops = {
+static const struct irq_domain_ops amd_ir_domain_ops = {
        .alloc = irq_remapping_alloc,
        .free = irq_remapping_free,
        .activate = irq_remapping_activate,
index 5a11328f4d9854457f5e078ab0b3dd29aff04b6e..5cc597b383c7208d69e83d8ec0646e936817d1b4 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/export.h>
 #include <linux/iommu.h>
 #include <linux/kmemleak.h>
+#include <linux/crash_dump.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -236,6 +237,7 @@ enum iommu_init_state {
        IOMMU_INITIALIZED,
        IOMMU_NOT_FOUND,
        IOMMU_INIT_ERROR,
+       IOMMU_CMDLINE_DISABLED,
 };
 
 /* Early ioapic and hpet maps from kernel command line */
@@ -588,6 +590,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 
        writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
        writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+       iommu->cmd_buf_head = 0;
+       iommu->cmd_buf_tail = 0;
 
        iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 }
@@ -1898,6 +1902,14 @@ static void init_device_table_dma(void)
        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
                set_dev_entry_bit(devid, DEV_ENTRY_VALID);
                set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+               /*
+                * In kdump kernels in-flight DMA from the old kernel might
+                * cause IO_PAGE_FAULTs. There are no reports that a kdump
+                * actually failed because of that, so just disable fault
+                * reporting in the hardware to get rid of the messages
+                */
+               if (is_kdump_kernel())
+                       set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
        }
 }
 
@@ -2097,23 +2109,27 @@ static struct syscore_ops amd_iommu_syscore_ops = {
        .resume = amd_iommu_resume,
 };
 
-static void __init free_on_init_error(void)
+static void __init free_iommu_resources(void)
 {
        kmemleak_free(irq_lookup_table);
        free_pages((unsigned long)irq_lookup_table,
                   get_order(rlookup_table_size));
+       irq_lookup_table = NULL;
 
        kmem_cache_destroy(amd_iommu_irq_cache);
        amd_iommu_irq_cache = NULL;
 
        free_pages((unsigned long)amd_iommu_rlookup_table,
                   get_order(rlookup_table_size));
+       amd_iommu_rlookup_table = NULL;
 
        free_pages((unsigned long)amd_iommu_alias_table,
                   get_order(alias_table_size));
+       amd_iommu_alias_table = NULL;
 
        free_pages((unsigned long)amd_iommu_dev_table,
                   get_order(dev_table_size));
+       amd_iommu_dev_table = NULL;
 
        free_iommu_all();
 
@@ -2183,6 +2199,7 @@ static void __init free_dma_resources(void)
 {
        free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
                   get_order(MAX_DOMAIN_ID/8));
+       amd_iommu_pd_alloc_bitmap = NULL;
 
        free_unity_maps();
 }
@@ -2307,6 +2324,9 @@ static int __init early_amd_iommu_init(void)
        if (ret)
                goto out;
 
+       /* Disable any previously enabled IOMMUs */
+       disable_iommus();
+
        if (amd_iommu_irq_remap)
                amd_iommu_irq_remap = check_ioapic_information();
 
@@ -2410,6 +2430,13 @@ static int __init state_next(void)
        case IOMMU_IVRS_DETECTED:
                ret = early_amd_iommu_init();
                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
+               if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
+                       pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n");
+                       free_dma_resources();
+                       free_iommu_resources();
+                       init_state = IOMMU_CMDLINE_DISABLED;
+                       ret = -EINVAL;
+               }
                break;
        case IOMMU_ACPI_FINISHED:
                early_enable_iommus();
@@ -2438,6 +2465,7 @@ static int __init state_next(void)
                break;
        case IOMMU_NOT_FOUND:
        case IOMMU_INIT_ERROR:
+       case IOMMU_CMDLINE_DISABLED:
                /* Error states => do nothing */
                ret = -EINVAL;
                break;
@@ -2451,13 +2479,14 @@ static int __init state_next(void)
 
 static int __init iommu_go_to_state(enum iommu_init_state state)
 {
-       int ret = 0;
+       int ret = -EINVAL;
 
        while (init_state != state) {
-               ret = state_next();
-               if (init_state == IOMMU_NOT_FOUND ||
-                   init_state == IOMMU_INIT_ERROR)
+               if (init_state == IOMMU_NOT_FOUND         ||
+                   init_state == IOMMU_INIT_ERROR        ||
+                   init_state == IOMMU_CMDLINE_DISABLED)
                        break;
+               ret = state_next();
        }
 
        return ret;
@@ -2522,7 +2551,7 @@ static int __init amd_iommu_init(void)
                free_dma_resources();
                if (!irq_remapping_enabled) {
                        disable_iommus();
-                       free_on_init_error();
+                       free_iommu_resources();
                } else {
                        struct amd_iommu *iommu;
 
@@ -2549,9 +2578,6 @@ int __init amd_iommu_detect(void)
        if (no_iommu || (iommu_detected && !gart_iommu_aperture))
                return -ENODEV;
 
-       if (amd_iommu_disabled)
-               return -ENODEV;
-
        ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
        if (ret)
                return ret;
index 4de8f4160bb81592bea7dd0aaa6e06aebcd777cf..294a409e283b7ae4b52c59350756983711bba9d1 100644 (file)
 #define IOMMU_PTE_IW (1ULL << 62)
 
 #define DTE_FLAG_IOTLB (1ULL << 32)
+#define DTE_FLAG_SA    (1ULL << 34)
 #define DTE_FLAG_GV    (1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
@@ -516,6 +517,8 @@ struct amd_iommu {
 
        /* command buffer virtual address */
        u8 *cmd_buf;
+       u32 cmd_buf_head;
+       u32 cmd_buf_tail;
 
        /* event buffer virtual address */
        u8 *evt_buf;
index 380969aa60d5a6070765d4eaae3532c1b3324a8c..568c400eeaed8d3528cc80b7dd597420e842024b 100644 (file)
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US       100
+#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
 
 #define MSI_IOVA_BASE                  0x8000000
 #define MSI_IOVA_LENGTH                        0x100000
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_HISILICON_HI161X
+#define ACPI_IORT_SMMU_HISILICON_HI161X                0x1
+#endif
+
+#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
+#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX                0x2
+#endif
+
 static bool disable_bypass;
 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
@@ -597,6 +607,7 @@ struct arm_smmu_device {
        u32                             features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH     (1 << 0)
+#define ARM_SMMU_OPT_PAGE0_REGS_ONLY   (1 << 1)
        u32                             options;
 
        struct arm_smmu_cmdq            cmdq;
@@ -604,6 +615,7 @@ struct arm_smmu_device {
        struct arm_smmu_priq            priq;
 
        int                             gerr_irq;
+       int                             combined_irq;
 
        unsigned long                   ias; /* IPA */
        unsigned long                   oas; /* PA */
@@ -645,7 +657,6 @@ struct arm_smmu_domain {
        struct mutex                    init_mutex; /* Protects smmu pointer */
 
        struct io_pgtable_ops           *pgtbl_ops;
-       spinlock_t                      pgtbl_lock;
 
        enum arm_smmu_domain_stage      stage;
        union {
@@ -663,9 +674,20 @@ struct arm_smmu_option_prop {
 
 static struct arm_smmu_option_prop arm_smmu_options[] = {
        { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
+       { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
        { 0, NULL},
 };
 
+static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
+                                                struct arm_smmu_device *smmu)
+{
+       if ((offset > SZ_64K) &&
+           (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
+               offset -= SZ_64K;
+
+       return smmu->base + offset;
+}
+
 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 {
        return container_of(dom, struct arm_smmu_domain, domain);
@@ -737,7 +759,13 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
  */
 static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
-       ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+       ktime_t timeout;
+       unsigned int delay = 1;
+
+       /* Wait longer if it's queue drain */
+       timeout = ktime_add_us(ktime_get(), drain ?
+                                           ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
+                                           ARM_SMMU_POLL_TIMEOUT_US);
 
        while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
                if (ktime_compare(ktime_get(), timeout) > 0)
@@ -747,7 +775,8 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
                        wfe();
                } else {
                        cpu_relax();
-                       udelay(1);
+                       udelay(delay);
+                       delay *= 2;
                }
        }
 
@@ -1302,6 +1331,24 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
+{
+       struct arm_smmu_device *smmu = dev;
+
+       arm_smmu_evtq_thread(irq, dev);
+       if (smmu->features & ARM_SMMU_FEAT_PRI)
+               arm_smmu_priq_thread(irq, dev);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
+{
+       arm_smmu_gerror_handler(irq, dev);
+       arm_smmu_cmdq_sync_handler(irq, dev);
+       return IRQ_WAKE_THREAD;
+}
+
 /* IO_PGTABLE API */
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
@@ -1406,7 +1453,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
        }
 
        mutex_init(&smmu_domain->init_mutex);
-       spin_lock_init(&smmu_domain->pgtbl_lock);
        return &smmu_domain->domain;
 }
 
@@ -1555,6 +1601,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
                .iommu_dev      = smmu->dev,
        };
 
+       if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
+               pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
        if (!pgtbl_ops)
                return -ENOMEM;
@@ -1675,44 +1724,29 @@ out_unlock:
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
                        phys_addr_t paddr, size_t size, int prot)
 {
-       int ret;
-       unsigned long flags;
-       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
        if (!ops)
                return -ENODEV;
 
-       spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-       ret = ops->map(ops, iova, paddr, size, prot);
-       spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-       return ret;
+       return ops->map(ops, iova, paddr, size, prot);
 }
 
 static size_t
 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 {
-       size_t ret;
-       unsigned long flags;
-       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
        if (!ops)
                return 0;
 
-       spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-       ret = ops->unmap(ops, iova, size);
-       spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-       return ret;
+       return ops->unmap(ops, iova, size);
 }
 
 static phys_addr_t
 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
-       phys_addr_t ret;
-       unsigned long flags;
-       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
        if (domain->type == IOMMU_DOMAIN_IDENTITY)
                return iova;
@@ -1720,11 +1754,7 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
        if (!ops)
                return 0;
 
-       spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-       ret = ops->iova_to_phys(ops, iova);
-       spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-
-       return ret;
+       return ops->iova_to_phys(ops, iova);
 }
 
 static struct platform_driver arm_smmu_driver;
@@ -1961,8 +1991,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
                return -ENOMEM;
        }
 
-       q->prod_reg     = smmu->base + prod_off;
-       q->cons_reg     = smmu->base + cons_off;
+       q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
+       q->cons_reg     = arm_smmu_page1_fixup(cons_off, smmu);
        q->ent_dwords   = dwords;
 
        q->q_base  = Q_BASE_RWA;
@@ -2218,18 +2248,9 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
        devm_add_action(dev, arm_smmu_free_msis, dev);
 }
 
-static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 {
-       int ret, irq;
-       u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
-
-       /* Disable IRQs first */
-       ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
-                                     ARM_SMMU_IRQ_CTRLACK);
-       if (ret) {
-               dev_err(smmu->dev, "failed to disable irqs\n");
-               return ret;
-       }
+       int irq, ret;
 
        arm_smmu_setup_msis(smmu);
 
@@ -2272,10 +2293,41 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
                        if (ret < 0)
                                dev_warn(smmu->dev,
                                         "failed to enable priq irq\n");
-                       else
-                               irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
                }
        }
+}
+
+static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+{
+       int ret, irq;
+       u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
+
+       /* Disable IRQs first */
+       ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
+                                     ARM_SMMU_IRQ_CTRLACK);
+       if (ret) {
+               dev_err(smmu->dev, "failed to disable irqs\n");
+               return ret;
+       }
+
+       irq = smmu->combined_irq;
+       if (irq) {
+               /*
+                * Cavium ThunderX2 implementation doesn't not support unique
+                * irq lines. Use single irq line for all the SMMUv3 interrupts.
+                */
+               ret = devm_request_threaded_irq(smmu->dev, irq,
+                                       arm_smmu_combined_irq_handler,
+                                       arm_smmu_combined_irq_thread,
+                                       IRQF_ONESHOT,
+                                       "arm-smmu-v3-combined-irq", smmu);
+               if (ret < 0)
+                       dev_warn(smmu->dev, "failed to enable combined irq\n");
+       } else
+               arm_smmu_setup_unique_irqs(smmu);
+
+       if (smmu->features & ARM_SMMU_FEAT_PRI)
+               irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
 
        /* Enable interrupt generation on the SMMU */
        ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
@@ -2363,8 +2415,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
        /* Event queue */
        writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
-       writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
-       writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
+       writel_relaxed(smmu->evtq.q.prod,
+                      arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
+       writel_relaxed(smmu->evtq.q.cons,
+                      arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
 
        enables |= CR0_EVTQEN;
        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2379,9 +2433,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
                writeq_relaxed(smmu->priq.q.q_base,
                               smmu->base + ARM_SMMU_PRIQ_BASE);
                writel_relaxed(smmu->priq.q.prod,
-                              smmu->base + ARM_SMMU_PRIQ_PROD);
+                              arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
                writel_relaxed(smmu->priq.q.cons,
-                              smmu->base + ARM_SMMU_PRIQ_CONS);
+                              arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
 
                enables |= CR0_PRIQEN;
                ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2605,6 +2659,20 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 }
 
 #ifdef CONFIG_ACPI
+static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
+{
+       switch (model) {
+       case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
+               smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
+               break;
+       case ACPI_IORT_SMMU_HISILICON_HI161X:
+               smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
+               break;
+       }
+
+       dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+}
+
 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
                                      struct arm_smmu_device *smmu)
 {
@@ -2617,6 +2685,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
        /* Retrieve SMMUv3 specific data */
        iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 
+       acpi_smmu_get_options(iort_smmu->model, smmu);
+
        if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
                smmu->features |= ARM_SMMU_FEAT_COHERENCY;
 
@@ -2652,6 +2722,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
        return ret;
 }
 
+static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
+{
+       if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
+               return SZ_64K;
+       else
+               return SZ_128K;
+}
+
 static int arm_smmu_device_probe(struct platform_device *pdev)
 {
        int irq, ret;
@@ -2668,9 +2746,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
        }
        smmu->dev = dev;
 
+       if (dev->of_node) {
+               ret = arm_smmu_device_dt_probe(pdev, smmu);
+       } else {
+               ret = arm_smmu_device_acpi_probe(pdev, smmu);
+               if (ret == -ENODEV)
+                       return ret;
+       }
+
+       /* Set bypass mode according to firmware probing result */
+       bypass = !!ret;
+
        /* Base address */
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (resource_size(res) + 1 < SZ_128K) {
+       if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
                dev_err(dev, "MMIO region too small (%pr)\n", res);
                return -EINVAL;
        }
@@ -2681,33 +2770,27 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
                return PTR_ERR(smmu->base);
 
        /* Interrupt lines */
-       irq = platform_get_irq_byname(pdev, "eventq");
-       if (irq > 0)
-               smmu->evtq.q.irq = irq;
 
-       irq = platform_get_irq_byname(pdev, "priq");
+       irq = platform_get_irq_byname(pdev, "combined");
        if (irq > 0)
-               smmu->priq.q.irq = irq;
+               smmu->combined_irq = irq;
+       else {
+               irq = platform_get_irq_byname(pdev, "eventq");
+               if (irq > 0)
+                       smmu->evtq.q.irq = irq;
 
-       irq = platform_get_irq_byname(pdev, "cmdq-sync");
-       if (irq > 0)
-               smmu->cmdq.q.irq = irq;
+               irq = platform_get_irq_byname(pdev, "priq");
+               if (irq > 0)
+                       smmu->priq.q.irq = irq;
 
-       irq = platform_get_irq_byname(pdev, "gerror");
-       if (irq > 0)
-               smmu->gerr_irq = irq;
+               irq = platform_get_irq_byname(pdev, "cmdq-sync");
+               if (irq > 0)
+                       smmu->cmdq.q.irq = irq;
 
-       if (dev->of_node) {
-               ret = arm_smmu_device_dt_probe(pdev, smmu);
-       } else {
-               ret = arm_smmu_device_acpi_probe(pdev, smmu);
-               if (ret == -ENODEV)
-                       return ret;
+               irq = platform_get_irq_byname(pdev, "gerror");
+               if (irq > 0)
+                       smmu->gerr_irq = irq;
        }
-
-       /* Set bypass mode according to firmware probing result */
-       bypass = !!ret;
-
        /* Probe the h/w */
        ret = arm_smmu_device_hw_probe(smmu);
        if (ret)
@@ -2736,6 +2819,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
        iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
 
        ret = iommu_device_register(&smmu->iommu);
+       if (ret) {
+               dev_err(dev, "Failed to register iommu\n");
+               return ret;
+       }
 
 #ifdef CONFIG_PCI
        if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
@@ -2768,7 +2855,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
        return 0;
 }
 
-static struct of_device_id arm_smmu_of_match[] = {
+static const struct of_device_id arm_smmu_of_match[] = {
        { .compatible = "arm,smmu-v3", },
        { },
 };
index 7ec30b08b3bdc285872e0139997a300497450f98..bc89b4d6c043dacee88463ba22edc8883f60385e 100644 (file)
@@ -312,6 +312,14 @@ enum arm_smmu_implementation {
        CAVIUM_SMMUV2,
 };
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
+#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
+#endif
+#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
+#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5
+#endif
+
 struct arm_smmu_s2cr {
        struct iommu_group              *group;
        int                             count;
@@ -425,10 +433,10 @@ enum arm_smmu_domain_stage {
 struct arm_smmu_domain {
        struct arm_smmu_device          *smmu;
        struct io_pgtable_ops           *pgtbl_ops;
-       spinlock_t                      pgtbl_lock;
        struct arm_smmu_cfg             cfg;
        enum arm_smmu_domain_stage      stage;
        struct mutex                    init_mutex; /* Protects smmu pointer */
+       spinlock_t                      cb_lock; /* Serialises ATS1* ops */
        struct iommu_domain             domain;
 };
 
@@ -1010,6 +1018,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                .iommu_dev      = smmu->dev,
        };
 
+       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
        smmu_domain->smmu = smmu;
        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
        if (!pgtbl_ops) {
@@ -1102,7 +1113,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
        }
 
        mutex_init(&smmu_domain->init_mutex);
-       spin_lock_init(&smmu_domain->pgtbl_lock);
+       spin_lock_init(&smmu_domain->cb_lock);
 
        return &smmu_domain->domain;
 }
@@ -1380,35 +1391,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
                        phys_addr_t paddr, size_t size, int prot)
 {
-       int ret;
-       unsigned long flags;
-       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
        if (!ops)
                return -ENODEV;
 
-       spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-       ret = ops->map(ops, iova, paddr, size, prot);
-       spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-       return ret;
+       return ops->map(ops, iova, paddr, size, prot);
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
                             size_t size)
 {
-       size_t ret;
-       unsigned long flags;
-       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
        if (!ops)
                return 0;
 
-       spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-       ret = ops->unmap(ops, iova, size);
-       spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-       return ret;
+       return ops->unmap(ops, iova, size);
 }
 
 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1422,10 +1421,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
        void __iomem *cb_base;
        u32 tmp;
        u64 phys;
-       unsigned long va;
+       unsigned long va, flags;
 
        cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 
+       spin_lock_irqsave(&smmu_domain->cb_lock, flags);
        /* ATS1 registers can only be written atomically */
        va = iova & ~0xfffUL;
        if (smmu->version == ARM_SMMU_V2)
@@ -1435,6 +1435,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 
        if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
                                      !(tmp & ATSR_ACTIVE), 5, 50)) {
+               spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
                dev_err(dev,
                        "iova to phys timed out on %pad. Falling back to software table walk.\n",
                        &iova);
@@ -1442,6 +1443,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
        }
 
        phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
+       spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
        if (phys & CB_PAR_F) {
                dev_err(dev, "translation fault!\n");
                dev_err(dev, "PAR = 0x%llx\n", phys);
@@ -1454,10 +1456,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
                                        dma_addr_t iova)
 {
-       phys_addr_t ret;
-       unsigned long flags;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+       struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
 
        if (domain->type == IOMMU_DOMAIN_IDENTITY)
                return iova;
@@ -1465,17 +1465,11 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
        if (!ops)
                return 0;
 
-       spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
        if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
-                       smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-               ret = arm_smmu_iova_to_phys_hard(domain, iova);
-       } else {
-               ret = ops->iova_to_phys(ops, iova);
-       }
-
-       spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
+                       smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+               return arm_smmu_iova_to_phys_hard(domain, iova);
 
-       return ret;
+       return ops->iova_to_phys(ops, iova);
 }
 
 static bool arm_smmu_capable(enum iommu_cap cap)
@@ -2073,6 +2067,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
                smmu->version = ARM_SMMU_V1;
                smmu->model = GENERIC_SMMU;
                break;
+       case ACPI_IORT_SMMU_CORELINK_MMU401:
+               smmu->version = ARM_SMMU_V1_64K;
+               smmu->model = GENERIC_SMMU;
+               break;
        case ACPI_IORT_SMMU_V2:
                smmu->version = ARM_SMMU_V2;
                smmu->model = GENERIC_SMMU;
@@ -2081,6 +2079,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
                smmu->version = ARM_SMMU_V2;
                smmu->model = ARM_MMU500;
                break;
+       case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
+               smmu->version = ARM_SMMU_V2;
+               smmu->model = CAVIUM_SMMUV2;
+               break;
        default:
                ret = -ENODEV;
        }
index 9403336f1fa64f0290f44d1eed9b30402fba7814..9d1cebe7f6cbb14517718f5fd6d03de7ee503956 100644 (file)
@@ -316,7 +316,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
                 * If we have devices with different DMA masks, move the free
                 * area cache limit down for the benefit of the smaller one.
                 */
-               iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn);
+               iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
 
                return 0;
        }
index 1e95475883cd179f618423391616604ad47718a7..687f18f65cea58d2a5f22725a2c36c78621dd3cd 100644 (file)
@@ -481,7 +481,7 @@ struct deferred_flush_data {
        struct deferred_flush_table *tables;
 };
 
-DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
+static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
 
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
@@ -2390,7 +2390,7 @@ static struct dmar_domain *find_domain(struct device *dev)
 
        /* No lock here, assumes no domain exit in normal case */
        info = dev->archdata.iommu;
-       if (info)
+       if (likely(info))
                return info->domain;
        return NULL;
 }
@@ -3478,7 +3478,7 @@ static unsigned long intel_alloc_iova(struct device *dev,
        return iova_pfn;
 }
 
-static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
+static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
 {
        struct dmar_domain *domain, *tmp;
        struct dmar_rmrr_unit *rmrr;
@@ -3525,18 +3525,6 @@ out:
        return domain;
 }
 
-static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
-{
-       struct device_domain_info *info;
-
-       /* No lock here, assumes no domain exit in normal case */
-       info = dev->archdata.iommu;
-       if (likely(info))
-               return info->domain;
-
-       return __get_valid_domain_for_dev(dev);
-}
-
 /* Check if the dev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
@@ -3725,10 +3713,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
        struct intel_iommu *iommu;
        struct deferred_flush_entry *entry;
        struct deferred_flush_data *flush_data;
-       unsigned int cpuid;
 
-       cpuid = get_cpu();
-       flush_data = per_cpu_ptr(&deferred_flush, cpuid);
+       flush_data = raw_cpu_ptr(&deferred_flush);
 
        /* Flush all CPUs' entries to avoid deferring too much.  If
         * this becomes a bottleneck, can just flush us, and rely on
@@ -3761,8 +3747,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
        }
        flush_data->size++;
        spin_unlock_irqrestore(&flush_data->lock, flags);
-
-       put_cpu();
 }
 
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3973,7 +3957,7 @@ static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
        return !dma_addr;
 }
 
-struct dma_map_ops intel_dma_ops = {
+const struct dma_map_ops intel_dma_ops = {
        .alloc = intel_alloc_coherent,
        .free = intel_free_coherent,
        .map_sg = intel_map_sg,
index 23c427602c55ba10a546737edabb1bfb24497304..f167c0d84ebfb7f5937eb798c9cdbd8bd9abe6c6 100644 (file)
@@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
 
+int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+       struct intel_iommu *iommu;
+       struct intel_svm *svm;
+       int ret = -EINVAL;
+
+       mutex_lock(&pasid_mutex);
+       iommu = intel_svm_device_to_iommu(dev);
+       if (!iommu || !iommu->pasid_table)
+               goto out;
+
+       svm = idr_find(&iommu->pasid_idr, pasid);
+       if (!svm)
+               goto out;
+
+       /* init_mm is used in this case */
+       if (!svm->mm)
+               ret = 1;
+       else if (atomic_read(&svm->mm->mm_users) > 0)
+               ret = 1;
+       else
+               ret = 0;
+
+ out:
+       mutex_unlock(&pasid_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
+
 /* Page request queue descriptor */
 struct page_req_dsc {
        u64 srr:1;
index 8fc641ea2e415fdf94c3e0ab4cd949d192f51c64..a5b89f6bcdbf08af9e4bfb75d1baf1e063735708 100644 (file)
@@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  * the dmar_global_lock.
  */
 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
-static struct irq_domain_ops intel_ir_domain_ops;
+static const struct irq_domain_ops intel_ir_domain_ops;
 
 static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
 static int __init parse_ioapics_under_ir(void);
@@ -1407,7 +1407,7 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
        modify_irte(&data->irq_2_iommu, &entry);
 }
 
-static struct irq_domain_ops intel_ir_domain_ops = {
+static const struct irq_domain_ops intel_ir_domain_ops = {
        .alloc = intel_irq_remapping_alloc,
        .free = intel_irq_remapping_free,
        .activate = intel_irq_remapping_activate,
index 8d6ca28c3e1f14a6c364aae89640ee6535f8f0d6..af330f513653d2849682b2d4536dd86dcb6a43d7 100644 (file)
@@ -32,6 +32,7 @@
 
 #define pr_fmt(fmt)    "arm-v7s io-pgtable: " fmt
 
+#include <linux/atomic.h>
 #include <linux/dma-mapping.h>
 #include <linux/gfp.h>
 #include <linux/iommu.h>
@@ -39,6 +40,7 @@
 #include <linux/kmemleak.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
@@ -92,7 +94,8 @@
 #define ARM_V7S_PTE_TYPE_CONT_PAGE     0x1
 
 #define ARM_V7S_PTE_IS_VALID(pte)      (((pte) & 0x3) != 0)
-#define ARM_V7S_PTE_IS_TABLE(pte, lvl) (lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE))
+#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \
+       ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE))
 
 /* Page table bits */
 #define ARM_V7S_ATTR_XN(lvl)           BIT(4 * (2 - (lvl)))
@@ -167,6 +170,7 @@ struct arm_v7s_io_pgtable {
 
        arm_v7s_iopte           *pgd;
        struct kmem_cache       *l2_tables;
+       spinlock_t              split_lock;
 };
 
 static dma_addr_t __arm_v7s_dma_addr(void *pages)
@@ -186,7 +190,8 @@ static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl)
 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
                                   struct arm_v7s_io_pgtable *data)
 {
-       struct device *dev = data->iop.cfg.iommu_dev;
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       struct device *dev = cfg->iommu_dev;
        dma_addr_t dma;
        size_t size = ARM_V7S_TABLE_SIZE(lvl);
        void *table = NULL;
@@ -195,7 +200,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
                table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
        else if (lvl == 2)
                table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
-       if (table && !selftest_running) {
+       if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
                dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
                if (dma_mapping_error(dev, dma))
                        goto out_free;
@@ -224,10 +229,11 @@ out_free:
 static void __arm_v7s_free_table(void *table, int lvl,
                                 struct arm_v7s_io_pgtable *data)
 {
-       struct device *dev = data->iop.cfg.iommu_dev;
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       struct device *dev = cfg->iommu_dev;
        size_t size = ARM_V7S_TABLE_SIZE(lvl);
 
-       if (!selftest_running)
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
                dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
                                 DMA_TO_DEVICE);
        if (lvl == 1)
@@ -239,7 +245,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
                               struct io_pgtable_cfg *cfg)
 {
-       if (selftest_running)
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
                return;
 
        dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
@@ -280,6 +286,13 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
        else if (prot & IOMMU_CACHE)
                pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
 
+       pte |= ARM_V7S_PTE_TYPE_PAGE;
+       if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
+               pte |= ARM_V7S_ATTR_NS_SECTION;
+
+       if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
+               pte |= ARM_V7S_ATTR_MTK_4GB;
+
        return pte;
 }
 
@@ -352,7 +365,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
                            int lvl, int num_entries, arm_v7s_iopte *ptep)
 {
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
-       arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
+       arm_v7s_iopte pte;
        int i;
 
        for (i = 0; i < num_entries; i++)
@@ -374,13 +387,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
                        return -EEXIST;
                }
 
-       pte |= ARM_V7S_PTE_TYPE_PAGE;
-       if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
-               pte |= ARM_V7S_ATTR_NS_SECTION;
-
-       if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
-               pte |= ARM_V7S_ATTR_MTK_4GB;
-
+       pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
        if (num_entries > 1)
                pte = arm_v7s_pte_to_cont(pte, lvl);
 
@@ -390,6 +397,30 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
        return 0;
 }
 
+static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
+                                          arm_v7s_iopte *ptep,
+                                          arm_v7s_iopte curr,
+                                          struct io_pgtable_cfg *cfg)
+{
+       arm_v7s_iopte old, new;
+
+       new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
+       if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+               new |= ARM_V7S_ATTR_NS_TABLE;
+
+       /*
+        * Ensure the table itself is visible before its PTE can be.
+        * Whilst we could get away with cmpxchg64_release below, this
+        * doesn't have any ordering semantics when !CONFIG_SMP.
+        */
+       dma_wmb();
+
+       old = cmpxchg_relaxed(ptep, curr, new);
+       __arm_v7s_pte_sync(ptep, 1, cfg);
+
+       return old;
+}
+
 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
                         phys_addr_t paddr, size_t size, int prot,
                         int lvl, arm_v7s_iopte *ptep)
@@ -411,20 +442,23 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
                return -EINVAL;
 
        /* Grab a pointer to the next level */
-       pte = *ptep;
+       pte = READ_ONCE(*ptep);
        if (!pte) {
                cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
                if (!cptep)
                        return -ENOMEM;
 
-               pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE;
-               if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-                       pte |= ARM_V7S_ATTR_NS_TABLE;
+               pte = arm_v7s_install_table(cptep, ptep, 0, cfg);
+               if (pte)
+                       __arm_v7s_free_table(cptep, lvl + 1, data);
+       } else {
+               /* We've no easy way of knowing if it's synced yet, so... */
+               __arm_v7s_pte_sync(ptep, 1, cfg);
+       }
 
-               __arm_v7s_set_pte(ptep, pte, 1, cfg);
-       } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
+       if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
                cptep = iopte_deref(pte, lvl);
-       } else {
+       } else if (pte) {
                /* We require an unmap first */
                WARN_ON(!selftest_running);
                return -EEXIST;
@@ -477,66 +511,73 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
        kfree(data);
 }
 
-static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
-                              unsigned long iova, int idx, int lvl,
-                              arm_v7s_iopte *ptep)
+static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
+                                       unsigned long iova, int idx, int lvl,
+                                       arm_v7s_iopte *ptep)
 {
        struct io_pgtable *iop = &data->iop;
        arm_v7s_iopte pte;
        size_t size = ARM_V7S_BLOCK_SIZE(lvl);
        int i;
 
+       /* Check that we didn't lose a race to get the lock */
+       pte = *ptep;
+       if (!arm_v7s_pte_is_cont(pte, lvl))
+               return pte;
+
        ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
-       pte = arm_v7s_cont_to_pte(*ptep, lvl);
-       for (i = 0; i < ARM_V7S_CONT_PAGES; i++) {
-               ptep[i] = pte;
-               pte += size;
-       }
+       pte = arm_v7s_cont_to_pte(pte, lvl);
+       for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
+               ptep[i] = pte + i * size;
 
        __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
 
        size *= ARM_V7S_CONT_PAGES;
        io_pgtable_tlb_add_flush(iop, iova, size, size, true);
        io_pgtable_tlb_sync(iop);
+       return pte;
 }
 
 static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
                                   unsigned long iova, size_t size,
-                                  arm_v7s_iopte *ptep)
+                                  arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep)
 {
-       unsigned long blk_start, blk_end, blk_size;
-       phys_addr_t blk_paddr;
-       arm_v7s_iopte table = 0;
-       int prot = arm_v7s_pte_to_prot(*ptep, 1);
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       arm_v7s_iopte pte, *tablep;
+       int i, unmap_idx, num_entries, num_ptes;
 
-       blk_size = ARM_V7S_BLOCK_SIZE(1);
-       blk_start = iova & ARM_V7S_LVL_MASK(1);
-       blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1);
-       blk_paddr = *ptep & ARM_V7S_LVL_MASK(1);
+       tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
+       if (!tablep)
+               return 0; /* Bytes unmapped */
 
-       for (; blk_start < blk_end; blk_start += size, blk_paddr += size) {
-               arm_v7s_iopte *tablep;
+       num_ptes = ARM_V7S_PTES_PER_LVL(2);
+       num_entries = size >> ARM_V7S_LVL_SHIFT(2);
+       unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
 
+       pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
+       if (num_entries > 1)
+               pte = arm_v7s_pte_to_cont(pte, 2);
+
+       for (i = 0; i < num_ptes; i += num_entries, pte += size) {
                /* Unmap! */
-               if (blk_start == iova)
+               if (i == unmap_idx)
                        continue;
 
-               /* __arm_v7s_map expects a pointer to the start of the table */
-               tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1);
-               if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1,
-                                 tablep) < 0) {
-                       if (table) {
-                               /* Free the table we allocated */
-                               tablep = iopte_deref(table, 1);
-                               __arm_v7s_free_table(tablep, 2, data);
-                       }
-                       return 0; /* Bytes unmapped */
-               }
+               __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
        }
 
-       __arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg);
-       iova &= ~(blk_size - 1);
-       io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
+       pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
+       if (pte != blk_pte) {
+               __arm_v7s_free_table(tablep, 2, data);
+
+               if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
+                       return 0;
+
+               tablep = iopte_deref(pte, 1);
+               return __arm_v7s_unmap(data, iova, size, 2, tablep);
+       }
+
+       io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
        return size;
 }
 
@@ -555,17 +596,28 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
        idx = ARM_V7S_LVL_IDX(iova, lvl);
        ptep += idx;
        do {
-               if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i])))
+               pte[i] = READ_ONCE(ptep[i]);
+               if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i])))
                        return 0;
-               pte[i] = ptep[i];
        } while (++i < num_entries);
 
        /*
         * If we've hit a contiguous 'large page' entry at this level, it
         * needs splitting first, unless we're unmapping the whole lot.
+        *
+        * For splitting, we can't rewrite 16 PTEs atomically, and since we
+        * can't necessarily assume TEX remap we don't have a software bit to
+        * mark live entries being split. In practice (i.e. DMA API code), we
+        * will never be splitting large pages anyway, so just wrap this edge
+        * case in a lock for the sake of correctness and be done with it.
         */
-       if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl))
-               arm_v7s_split_cont(data, iova, idx, lvl, ptep);
+       if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&data->split_lock, flags);
+               pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
+               spin_unlock_irqrestore(&data->split_lock, flags);
+       }
 
        /* If the size matches this level, we're in the right place */
        if (num_entries) {
@@ -593,7 +645,7 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
                 * Insert a table at the next level to map the old region,
                 * minus the part we want to unmap
                 */
-               return arm_v7s_split_blk_unmap(data, iova, size, ptep);
+               return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
        }
 
        /* Keep on walkin' */
@@ -623,7 +675,8 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
        u32 mask;
 
        do {
-               pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)];
+               ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
+               pte = READ_ONCE(*ptep);
                ptep = iopte_deref(pte, lvl);
        } while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
 
@@ -651,7 +704,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
                            IO_PGTABLE_QUIRK_NO_PERMS |
                            IO_PGTABLE_QUIRK_TLBI_ON_MAP |
-                           IO_PGTABLE_QUIRK_ARM_MTK_4GB))
+                           IO_PGTABLE_QUIRK_ARM_MTK_4GB |
+                           IO_PGTABLE_QUIRK_NO_DMA))
                return NULL;
 
        /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
@@ -663,6 +717,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
        if (!data)
                return NULL;
 
+       spin_lock_init(&data->split_lock);
        data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
                                            ARM_V7S_TABLE_SIZE(2),
                                            ARM_V7S_TABLE_SIZE(2),
@@ -749,7 +804,7 @@ static void dummy_tlb_sync(void *cookie)
        WARN_ON(cookie != cfg_cookie);
 }
 
-static struct iommu_gather_ops dummy_tlb_ops = {
+static const struct iommu_gather_ops dummy_tlb_ops = {
        .tlb_flush_all  = dummy_tlb_flush_all,
        .tlb_add_flush  = dummy_tlb_add_flush,
        .tlb_sync       = dummy_tlb_sync,
@@ -768,7 +823,7 @@ static int __init arm_v7s_do_selftests(void)
                .tlb = &dummy_tlb_ops,
                .oas = 32,
                .ias = 32,
-               .quirks = IO_PGTABLE_QUIRK_ARM_NS,
+               .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
                .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
        };
        unsigned int iova, size, iova_start;
index 6e5df5e0a3bdc574a766fbfe8069a4b46f4f0836..b182039862c50debf8c55df4aad825cee468eed6 100644 (file)
@@ -20,6 +20,7 @@
 
 #define pr_fmt(fmt)    "arm-lpae io-pgtable: " fmt
 
+#include <linux/atomic.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
 #define ARM_LPAE_PTE_ATTR_HI_MASK      (((arm_lpae_iopte)6) << 52)
 #define ARM_LPAE_PTE_ATTR_MASK         (ARM_LPAE_PTE_ATTR_LO_MASK |    \
                                         ARM_LPAE_PTE_ATTR_HI_MASK)
+/* Software bit for solving coherency races */
+#define ARM_LPAE_PTE_SW_SYNC           (((arm_lpae_iopte)1) << 55)
 
 /* Stage-1 PTE */
 #define ARM_LPAE_PTE_AP_UNPRIV         (((arm_lpae_iopte)1) << 6)
@@ -217,7 +220,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
        if (!pages)
                return NULL;
 
-       if (!selftest_running) {
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
                dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
                if (dma_mapping_error(dev, dma))
                        goto out_free;
@@ -243,40 +246,64 @@ out_free:
 static void __arm_lpae_free_pages(void *pages, size_t size,
                                  struct io_pgtable_cfg *cfg)
 {
-       if (!selftest_running)
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
                dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
                                 size, DMA_TO_DEVICE);
        free_pages_exact(pages, size);
 }
 
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+                               struct io_pgtable_cfg *cfg)
+{
+       dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
+                                  sizeof(*ptep), DMA_TO_DEVICE);
+}
+
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
                               struct io_pgtable_cfg *cfg)
 {
        *ptep = pte;
 
-       if (!selftest_running)
-               dma_sync_single_for_device(cfg->iommu_dev,
-                                          __arm_lpae_dma_addr(ptep),
-                                          sizeof(pte), DMA_TO_DEVICE);
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+               __arm_lpae_sync_pte(ptep, cfg);
 }
 
 static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
                            unsigned long iova, size_t size, int lvl,
                            arm_lpae_iopte *ptep);
 
+static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
+                               phys_addr_t paddr, arm_lpae_iopte prot,
+                               int lvl, arm_lpae_iopte *ptep)
+{
+       arm_lpae_iopte pte = prot;
+
+       if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+               pte |= ARM_LPAE_PTE_NS;
+
+       if (lvl == ARM_LPAE_MAX_LEVELS - 1)
+               pte |= ARM_LPAE_PTE_TYPE_PAGE;
+       else
+               pte |= ARM_LPAE_PTE_TYPE_BLOCK;
+
+       pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
+       pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+
+       __arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+}
+
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
                             unsigned long iova, phys_addr_t paddr,
                             arm_lpae_iopte prot, int lvl,
                             arm_lpae_iopte *ptep)
 {
-       arm_lpae_iopte pte = prot;
-       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       arm_lpae_iopte pte = *ptep;
 
-       if (iopte_leaf(*ptep, lvl)) {
+       if (iopte_leaf(pte, lvl)) {
                /* We require an unmap first */
                WARN_ON(!selftest_running);
                return -EEXIST;
-       } else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
+       } else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
                /*
                 * We need to unmap and free the old table before
                 * overwriting it with a block entry.
@@ -289,19 +316,40 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
                        return -EINVAL;
        }
 
+       __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+       return 0;
+}
+
+static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
+                                            arm_lpae_iopte *ptep,
+                                            arm_lpae_iopte curr,
+                                            struct io_pgtable_cfg *cfg)
+{
+       arm_lpae_iopte old, new;
+
+       new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-               pte |= ARM_LPAE_PTE_NS;
+               new |= ARM_LPAE_PTE_NSTABLE;
 
-       if (lvl == ARM_LPAE_MAX_LEVELS - 1)
-               pte |= ARM_LPAE_PTE_TYPE_PAGE;
-       else
-               pte |= ARM_LPAE_PTE_TYPE_BLOCK;
+       /*
+        * Ensure the table itself is visible before its PTE can be.
+        * Whilst we could get away with cmpxchg64_release below, this
+        * doesn't have any ordering semantics when !CONFIG_SMP.
+        */
+       dma_wmb();
 
-       pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
-       pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+       old = cmpxchg64_relaxed(ptep, curr, new);
 
-       __arm_lpae_set_pte(ptep, pte, cfg);
-       return 0;
+       if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) ||
+           (old & ARM_LPAE_PTE_SW_SYNC))
+               return old;
+
+       /* Even if it's not ours, there's no point waiting; just kick it */
+       __arm_lpae_sync_pte(ptep, cfg);
+       if (old == curr)
+               WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
+
+       return old;
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
@@ -310,6 +358,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 {
        arm_lpae_iopte *cptep, pte;
        size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+       size_t tblsz = ARM_LPAE_GRANULE(data);
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
        /* Find our entry at the current level */
@@ -324,20 +373,23 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
                return -EINVAL;
 
        /* Grab a pointer to the next level */
-       pte = *ptep;
+       pte = READ_ONCE(*ptep);
        if (!pte) {
-               cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
-                                              GFP_ATOMIC, cfg);
+               cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg);
                if (!cptep)
                        return -ENOMEM;
 
-               pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
-               if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-                       pte |= ARM_LPAE_PTE_NSTABLE;
-               __arm_lpae_set_pte(ptep, pte, cfg);
-       } else if (!iopte_leaf(pte, lvl)) {
+               pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
+               if (pte)
+                       __arm_lpae_free_pages(cptep, tblsz, cfg);
+       } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) &&
+                  !(pte & ARM_LPAE_PTE_SW_SYNC)) {
+               __arm_lpae_sync_pte(ptep, cfg);
+       }
+
+       if (pte && !iopte_leaf(pte, lvl)) {
                cptep = iopte_deref(pte, data);
-       } else {
+       } else if (pte) {
                /* We require an unmap first */
                WARN_ON(!selftest_running);
                return -EEXIST;
@@ -452,40 +504,55 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 
 static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
                                    unsigned long iova, size_t size,
-                                   arm_lpae_iopte prot, int lvl,
-                                   arm_lpae_iopte *ptep, size_t blk_size)
+                                   arm_lpae_iopte blk_pte, int lvl,
+                                   arm_lpae_iopte *ptep)
 {
-       unsigned long blk_start, blk_end;
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       arm_lpae_iopte pte, *tablep;
        phys_addr_t blk_paddr;
-       arm_lpae_iopte table = 0;
+       size_t tablesz = ARM_LPAE_GRANULE(data);
+       size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+       int i, unmap_idx = -1;
+
+       if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+               return 0;
 
-       blk_start = iova & ~(blk_size - 1);
-       blk_end = blk_start + blk_size;
-       blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift;
+       tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg);
+       if (!tablep)
+               return 0; /* Bytes unmapped */
 
-       for (; blk_start < blk_end; blk_start += size, blk_paddr += size) {
-               arm_lpae_iopte *tablep;
+       if (size == split_sz)
+               unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
+       blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
+       pte = iopte_prot(blk_pte);
+
+       for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
                /* Unmap! */
-               if (blk_start == iova)
+               if (i == unmap_idx)
                        continue;
 
-               /* __arm_lpae_map expects a pointer to the start of the table */
-               tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data);
-               if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl,
-                                  tablep) < 0) {
-                       if (table) {
-                               /* Free the table we allocated */
-                               tablep = iopte_deref(table, data);
-                               __arm_lpae_free_pgtable(data, lvl + 1, tablep);
-                       }
-                       return 0; /* Bytes unmapped */
-               }
+               __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
        }
 
-       __arm_lpae_set_pte(ptep, table, &data->iop.cfg);
-       iova &= ~(blk_size - 1);
-       io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
+       pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
+       if (pte != blk_pte) {
+               __arm_lpae_free_pages(tablep, tablesz, cfg);
+               /*
+                * We may race against someone unmapping another part of this
+                * block, but anything else is invalid. We can't misinterpret
+                * a page entry here since we're never at the last level.
+                */
+               if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE)
+                       return 0;
+
+               tablep = iopte_deref(pte, data);
+       }
+
+       if (unmap_idx < 0)
+               return __arm_lpae_unmap(data, iova, size, lvl, tablep);
+
+       io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
        return size;
 }
 
@@ -495,19 +562,18 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 {
        arm_lpae_iopte pte;
        struct io_pgtable *iop = &data->iop;
-       size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
        /* Something went horribly wrong and we ran out of page table */
        if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
                return 0;
 
        ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
-       pte = *ptep;
+       pte = READ_ONCE(*ptep);
        if (WARN_ON(!pte))
                return 0;
 
        /* If the size matches this level, we're in the right place */
-       if (size == blk_size) {
+       if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
                __arm_lpae_set_pte(ptep, 0, &iop->cfg);
 
                if (!iopte_leaf(pte, lvl)) {
@@ -527,9 +593,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
                 * Insert a table at the next level to map the old region,
                 * minus the part we want to unmap
                 */
-               return arm_lpae_split_blk_unmap(data, iova, size,
-                                               iopte_prot(pte), lvl, ptep,
-                                               blk_size);
+               return arm_lpae_split_blk_unmap(data, iova, size, pte,
+                                               lvl + 1, ptep);
        }
 
        /* Keep on walkin' */
@@ -565,7 +630,8 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
                        return 0;
 
                /* Grab the IOPTE we're interested in */
-               pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data));
+               ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+               pte = READ_ONCE(*ptep);
 
                /* Valid entry? */
                if (!pte)
@@ -673,7 +739,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
        u64 reg;
        struct arm_lpae_io_pgtable *data;
 
-       if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS)
+       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
                return NULL;
 
        data = arm_lpae_alloc_pgtable(cfg);
@@ -762,7 +828,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
        struct arm_lpae_io_pgtable *data;
 
        /* The NS quirk doesn't apply at stage 2 */
-       if (cfg->quirks)
+       if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
                return NULL;
 
        data = arm_lpae_alloc_pgtable(cfg);
@@ -1066,6 +1132,7 @@ static int __init arm_lpae_do_selftests(void)
        struct io_pgtable_cfg cfg = {
                .tlb = &dummy_tlb_ops,
                .oas = 48,
+               .quirks = IO_PGTABLE_QUIRK_NO_DMA,
        };
 
        for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
index 969d82cc92ca62b71bf564fb251a20d6f1cf5eeb..524263a7ae6f54c290d9fc017633e0ac074054df 100644 (file)
@@ -65,11 +65,17 @@ struct io_pgtable_cfg {
         *      PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
         *      when the SoC is in "4GB mode" and they can only access the high
         *      remap of DRAM (0x1_00000000 to 0x1_ffffffff).
+        *
+        * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
+        *      be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
+        *      software-emulated IOMMU), such that pagetable updates need not
+        *      be treated as explicit DMA data.
         */
        #define IO_PGTABLE_QUIRK_ARM_NS         BIT(0)
        #define IO_PGTABLE_QUIRK_NO_PERMS       BIT(1)
        #define IO_PGTABLE_QUIRK_TLBI_ON_MAP    BIT(2)
        #define IO_PGTABLE_QUIRK_ARM_MTK_4GB    BIT(3)
+       #define IO_PGTABLE_QUIRK_NO_DMA         BIT(4)
        unsigned long                   quirks;
        unsigned long                   pgsize_bitmap;
        unsigned int                    ias;
index cf7ca7e70777d6498f751fac98bc29fef9ba8ce6..3f6ea160afed3bdf0e06a220b89e53c04b7ba84f 100644 (file)
@@ -915,13 +915,7 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
  */
 struct iommu_group *generic_device_group(struct device *dev)
 {
-       struct iommu_group *group;
-
-       group = iommu_group_alloc();
-       if (IS_ERR(group))
-               return NULL;
-
-       return group;
+       return iommu_group_alloc();
 }
 
 /*
@@ -988,11 +982,7 @@ struct iommu_group *pci_device_group(struct device *dev)
                return group;
 
        /* No shared group found, allocate new */
-       group = iommu_group_alloc();
-       if (IS_ERR(group))
-               return NULL;
-
-       return group;
+       return iommu_group_alloc();
 }
 
 /**
@@ -1020,6 +1010,9 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
        if (ops && ops->device_group)
                group = ops->device_group(dev);
 
+       if (WARN_ON_ONCE(group == NULL))
+               return ERR_PTR(-EINVAL);
+
        if (IS_ERR(group))
                return group;
 
index 5c88ba70e4e0fe92b282ebf1e8a1d0b2857677d4..246f14c83944c8c2bcff3bb25eafd8d13a262266 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 
 static bool iova_rcache_insert(struct iova_domain *iovad,
                               unsigned long pfn,
@@ -48,7 +49,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
        iovad->cached32_node = NULL;
        iovad->granule = granule;
        iovad->start_pfn = start_pfn;
-       iovad->dma_32bit_pfn = pfn_32bit;
+       iovad->dma_32bit_pfn = pfn_32bit + 1;
        init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -63,7 +64,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
                struct rb_node *prev_node = rb_prev(iovad->cached32_node);
                struct iova *curr_iova =
                        rb_entry(iovad->cached32_node, struct iova, node);
-               *limit_pfn = curr_iova->pfn_lo - 1;
+               *limit_pfn = curr_iova->pfn_lo;
                return prev_node;
        }
 }
@@ -135,7 +136,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 static unsigned int
 iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
 {
-       return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1);
+       return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
 }
 
 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
@@ -155,18 +156,15 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
        while (curr) {
                struct iova *curr_iova = rb_entry(curr, struct iova, node);
 
-               if (limit_pfn < curr_iova->pfn_lo)
+               if (limit_pfn <= curr_iova->pfn_lo) {
                        goto move_left;
-               else if (limit_pfn < curr_iova->pfn_hi)
-                       goto adjust_limit_pfn;
-               else {
+               } else if (limit_pfn > curr_iova->pfn_hi) {
                        if (size_aligned)
                                pad_size = iova_get_pad_size(size, limit_pfn);
-                       if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
+                       if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
                                break;  /* found a free slot */
                }
-adjust_limit_pfn:
-               limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0;
+               limit_pfn = curr_iova->pfn_lo;
 move_left:
                prev = curr;
                curr = rb_prev(curr);
@@ -182,7 +180,7 @@ move_left:
        }
 
        /* pfn_lo will point to size aligned address if size_aligned is set */
-       new->pfn_lo = limit_pfn - (size + pad_size) + 1;
+       new->pfn_lo = limit_pfn - (size + pad_size);
        new->pfn_hi = new->pfn_lo + size - 1;
 
        /* If we have 'prev', it's a valid place to start the insertion. */
@@ -269,7 +267,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
        if (!new_iova)
                return NULL;
 
-       ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
+       ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
                        new_iova, size_aligned);
 
        if (ret) {
@@ -398,10 +396,8 @@ retry:
 
                /* Try replenishing IOVAs by flushing rcache. */
                flushed_rcache = true;
-               preempt_disable();
                for_each_online_cpu(cpu)
                        free_cpu_cached_iovas(cpu, iovad);
-               preempt_enable();
                goto retry;
        }
 
@@ -729,7 +725,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
        bool can_insert = false;
        unsigned long flags;
 
-       cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+       cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
        spin_lock_irqsave(&cpu_rcache->lock, flags);
 
        if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -759,7 +755,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
                iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-       put_cpu_ptr(rcache->cpu_rcaches);
 
        if (mag_to_free) {
                iova_magazine_free_pfns(mag_to_free, iovad);
@@ -793,7 +788,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
        bool has_pfn = false;
        unsigned long flags;
 
-       cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+       cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
        spin_lock_irqsave(&cpu_rcache->lock, flags);
 
        if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -815,7 +810,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
                iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-       put_cpu_ptr(rcache->cpu_rcaches);
 
        return iova_pfn;
 }
index b7e14ee863f92446997a66fc4b7532f1b8d93355..2a38aa15be17d1810382f5d1dd6c0ee982fd084b 100644 (file)
@@ -8,7 +8,9 @@
  * the Free Software Foundation; version 2 of the License.
  */
 
+#include <linux/bitmap.h>
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 #include <asm/dma-iommu.h>
 #include <asm/pgalloc.h>
+#endif
 
 #include "io-pgtable.h"
 
+#define IPMMU_CTX_MAX 1
+
 struct ipmmu_vmsa_device {
        struct device *dev;
        void __iomem *base;
        struct list_head list;
 
        unsigned int num_utlbs;
+       spinlock_t lock;                        /* Protects ctx and domains[] */
+       DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
+       struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
 
        struct dma_iommu_mapping *mapping;
 };
@@ -47,10 +56,12 @@ struct ipmmu_vmsa_domain {
        spinlock_t lock;                        /* Protects mappings */
 };
 
-struct ipmmu_vmsa_archdata {
+struct ipmmu_vmsa_iommu_priv {
        struct ipmmu_vmsa_device *mmu;
        unsigned int *utlbs;
        unsigned int num_utlbs;
+       struct device *dev;
+       struct list_head list;
 };
 
 static DEFINE_SPINLOCK(ipmmu_devices_lock);
@@ -61,6 +72,24 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
        return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
 }
 
+
+static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
+{
+#if defined(CONFIG_ARM)
+       return dev->archdata.iommu;
+#else
+       return dev->iommu_fwspec->iommu_priv;
+#endif
+}
+static void set_priv(struct device *dev, struct ipmmu_vmsa_iommu_priv *p)
+{
+#if defined(CONFIG_ARM)
+       dev->archdata.iommu = p;
+#else
+       dev->iommu_fwspec->iommu_priv = p;
+#endif
+}
+
 #define TLB_LOOP_TIMEOUT               100     /* 100us */
 
 /* -----------------------------------------------------------------------------
@@ -293,9 +322,29 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
  * Domain/Context Management
  */
 
+static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu,
+                                        struct ipmmu_vmsa_domain *domain)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&mmu->lock, flags);
+
+       ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
+       if (ret != IPMMU_CTX_MAX) {
+               mmu->domains[ret] = domain;
+               set_bit(ret, mmu->ctx);
+       }
+
+       spin_unlock_irqrestore(&mmu->lock, flags);
+
+       return ret;
+}
+
 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 {
        u64 ttbr;
+       int ret;
 
        /*
         * Allocate the page table operations.
@@ -309,7 +358,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
         * non-secure mode.
         */
        domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS;
-       domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+       domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
        domain->cfg.ias = 32;
        domain->cfg.oas = 40;
        domain->cfg.tlb = &ipmmu_gather_ops;
@@ -327,10 +376,15 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
                return -EINVAL;
 
        /*
-        * TODO: When adding support for multiple contexts, find an unused
-        * context.
+        * Find an unused context.
         */
-       domain->context_id = 0;
+       ret = ipmmu_domain_allocate_context(domain->mmu, domain);
+       if (ret == IPMMU_CTX_MAX) {
+               free_io_pgtable_ops(domain->iop);
+               return -EBUSY;
+       }
+
+       domain->context_id = ret;
 
        /* TTBR0 */
        ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
@@ -372,6 +426,19 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
        return 0;
 }
 
+static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu,
+                                     unsigned int context_id)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&mmu->lock, flags);
+
+       clear_bit(context_id, mmu->ctx);
+       mmu->domains[context_id] = NULL;
+
+       spin_unlock_irqrestore(&mmu->lock, flags);
+}
+
 static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 {
        /*
@@ -382,6 +449,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
         */
        ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
        ipmmu_tlb_sync(domain);
+       ipmmu_domain_free_context(domain->mmu, domain->context_id);
 }
 
 /* -----------------------------------------------------------------------------
@@ -439,29 +507,35 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
 static irqreturn_t ipmmu_irq(int irq, void *dev)
 {
        struct ipmmu_vmsa_device *mmu = dev;
-       struct iommu_domain *io_domain;
-       struct ipmmu_vmsa_domain *domain;
+       irqreturn_t status = IRQ_NONE;
+       unsigned int i;
+       unsigned long flags;
 
-       if (!mmu->mapping)
-               return IRQ_NONE;
+       spin_lock_irqsave(&mmu->lock, flags);
+
+       /*
+        * Check interrupts for all active contexts.
+        */
+       for (i = 0; i < IPMMU_CTX_MAX; i++) {
+               if (!mmu->domains[i])
+                       continue;
+               if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
+                       status = IRQ_HANDLED;
+       }
 
-       io_domain = mmu->mapping->domain;
-       domain = to_vmsa_domain(io_domain);
+       spin_unlock_irqrestore(&mmu->lock, flags);
 
-       return ipmmu_domain_irq(domain);
+       return status;
 }
 
 /* -----------------------------------------------------------------------------
  * IOMMU Operations
  */
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
 {
        struct ipmmu_vmsa_domain *domain;
 
-       if (type != IOMMU_DOMAIN_UNMANAGED)
-               return NULL;
-
        domain = kzalloc(sizeof(*domain), GFP_KERNEL);
        if (!domain)
                return NULL;
@@ -487,8 +561,8 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
 static int ipmmu_attach_device(struct iommu_domain *io_domain,
                               struct device *dev)
 {
-       struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
-       struct ipmmu_vmsa_device *mmu = archdata->mmu;
+       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+       struct ipmmu_vmsa_device *mmu = priv->mmu;
        struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
        unsigned long flags;
        unsigned int i;
@@ -513,15 +587,16 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
                dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
                        dev_name(mmu->dev), dev_name(domain->mmu->dev));
                ret = -EINVAL;
-       }
+       } else
+               dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
 
        spin_unlock_irqrestore(&domain->lock, flags);
 
        if (ret < 0)
                return ret;
 
-       for (i = 0; i < archdata->num_utlbs; ++i)
-               ipmmu_utlb_enable(domain, archdata->utlbs[i]);
+       for (i = 0; i < priv->num_utlbs; ++i)
+               ipmmu_utlb_enable(domain, priv->utlbs[i]);
 
        return 0;
 }
@@ -529,12 +604,12 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 static void ipmmu_detach_device(struct iommu_domain *io_domain,
                                struct device *dev)
 {
-       struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
        struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
        unsigned int i;
 
-       for (i = 0; i < archdata->num_utlbs; ++i)
-               ipmmu_utlb_disable(domain, archdata->utlbs[i]);
+       for (i = 0; i < priv->num_utlbs; ++i)
+               ipmmu_utlb_disable(domain, priv->utlbs[i]);
 
        /*
         * TODO: Optimize by disabling the context when no device is attached.
@@ -595,22 +670,15 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev,
        return 0;
 }
 
-static int ipmmu_add_device(struct device *dev)
+static int ipmmu_init_platform_device(struct device *dev)
 {
-       struct ipmmu_vmsa_archdata *archdata;
+       struct ipmmu_vmsa_iommu_priv *priv;
        struct ipmmu_vmsa_device *mmu;
-       struct iommu_group *group = NULL;
        unsigned int *utlbs;
        unsigned int i;
        int num_utlbs;
        int ret = -ENODEV;
 
-       if (dev->archdata.iommu) {
-               dev_warn(dev, "IOMMU driver already assigned to device %s\n",
-                        dev_name(dev));
-               return -EINVAL;
-       }
-
        /* Find the master corresponding to the device. */
 
        num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus",
@@ -647,6 +715,46 @@ static int ipmmu_add_device(struct device *dev)
                }
        }
 
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       priv->mmu = mmu;
+       priv->utlbs = utlbs;
+       priv->num_utlbs = num_utlbs;
+       priv->dev = dev;
+       set_priv(dev, priv);
+       return 0;
+
+error:
+       kfree(utlbs);
+       return ret;
+}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
+
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+{
+       if (type != IOMMU_DOMAIN_UNMANAGED)
+               return NULL;
+
+       return __ipmmu_domain_alloc(type);
+}
+
+static int ipmmu_add_device(struct device *dev)
+{
+       struct ipmmu_vmsa_device *mmu = NULL;
+       struct iommu_group *group;
+       int ret;
+
+       if (to_priv(dev)) {
+               dev_warn(dev, "IOMMU driver already assigned to device %s\n",
+                        dev_name(dev));
+               return -EINVAL;
+       }
+
        /* Create a device group and add the device to it. */
        group = iommu_group_alloc();
        if (IS_ERR(group)) {
@@ -664,16 +772,9 @@ static int ipmmu_add_device(struct device *dev)
                goto error;
        }
 
-       archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
-       if (!archdata) {
-               ret = -ENOMEM;
+       ret = ipmmu_init_platform_device(dev);
+       if (ret < 0)
                goto error;
-       }
-
-       archdata->mmu = mmu;
-       archdata->utlbs = utlbs;
-       archdata->num_utlbs = num_utlbs;
-       dev->archdata.iommu = archdata;
 
        /*
         * Create the ARM mapping, used by the ARM DMA mapping core to allocate
@@ -684,6 +785,7 @@ static int ipmmu_add_device(struct device *dev)
         * - Make the mapping size configurable ? We currently use a 2GB mapping
         *   at a 1GB offset to ensure that NULL VAs will fault.
         */
+       mmu = to_priv(dev)->mmu;
        if (!mmu->mapping) {
                struct dma_iommu_mapping *mapping;
 
@@ -708,30 +810,30 @@ static int ipmmu_add_device(struct device *dev)
        return 0;
 
 error:
-       arm_iommu_release_mapping(mmu->mapping);
-
-       kfree(dev->archdata.iommu);
-       kfree(utlbs);
-
-       dev->archdata.iommu = NULL;
+       if (mmu)
+               arm_iommu_release_mapping(mmu->mapping);
 
        if (!IS_ERR_OR_NULL(group))
                iommu_group_remove_device(dev);
 
+       kfree(to_priv(dev)->utlbs);
+       kfree(to_priv(dev));
+       set_priv(dev, NULL);
+
        return ret;
 }
 
 static void ipmmu_remove_device(struct device *dev)
 {
-       struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 
        arm_iommu_detach_device(dev);
        iommu_group_remove_device(dev);
 
-       kfree(archdata->utlbs);
-       kfree(archdata);
+       kfree(priv->utlbs);
+       kfree(priv);
 
-       dev->archdata.iommu = NULL;
+       set_priv(dev, NULL);
 }
 
 static const struct iommu_ops ipmmu_ops = {
@@ -748,6 +850,144 @@ static const struct iommu_ops ipmmu_ops = {
        .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
 };
 
+#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */
+
+#ifdef CONFIG_IOMMU_DMA
+
+static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
+static LIST_HEAD(ipmmu_slave_devices);
+
+static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type)
+{
+       struct iommu_domain *io_domain = NULL;
+
+       switch (type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               io_domain = __ipmmu_domain_alloc(type);
+               break;
+
+       case IOMMU_DOMAIN_DMA:
+               io_domain = __ipmmu_domain_alloc(type);
+               if (io_domain)
+                       iommu_get_dma_cookie(io_domain);
+               break;
+       }
+
+       return io_domain;
+}
+
+static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
+{
+       switch (io_domain->type) {
+       case IOMMU_DOMAIN_DMA:
+               iommu_put_dma_cookie(io_domain);
+               /* fall-through */
+       default:
+               ipmmu_domain_free(io_domain);
+               break;
+       }
+}
+
+static int ipmmu_add_device_dma(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct iommu_group *group;
+
+       /*
+        * Only let through devices that have been verified in xlate()
+        * We may get called with dev->iommu_fwspec set to NULL.
+        */
+       if (!fwspec || !fwspec->iommu_priv)
+               return -ENODEV;
+
+       group = iommu_group_get_for_dev(dev);
+       if (IS_ERR(group))
+               return PTR_ERR(group);
+
+       spin_lock(&ipmmu_slave_devices_lock);
+       list_add(&to_priv(dev)->list, &ipmmu_slave_devices);
+       spin_unlock(&ipmmu_slave_devices_lock);
+       return 0;
+}
+
+static void ipmmu_remove_device_dma(struct device *dev)
+{
+       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+
+       spin_lock(&ipmmu_slave_devices_lock);
+       list_del(&priv->list);
+       spin_unlock(&ipmmu_slave_devices_lock);
+
+       iommu_group_remove_device(dev);
+}
+
+static struct device *ipmmu_find_sibling_device(struct device *dev)
+{
+       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+       struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL;
+       bool found = false;
+
+       spin_lock(&ipmmu_slave_devices_lock);
+
+       list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) {
+               if (priv == sibling_priv)
+                       continue;
+               if (sibling_priv->mmu == priv->mmu) {
+                       found = true;
+                       break;
+               }
+       }
+
+       spin_unlock(&ipmmu_slave_devices_lock);
+
+       return found ? sibling_priv->dev : NULL;
+}
+
+static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
+{
+       struct iommu_group *group;
+       struct device *sibling;
+
+       sibling = ipmmu_find_sibling_device(dev);
+       if (sibling)
+               group = iommu_group_get(sibling);
+       if (!sibling || IS_ERR(group))
+               group = generic_device_group(dev);
+
+       return group;
+}
+
+static int ipmmu_of_xlate_dma(struct device *dev,
+                             struct of_phandle_args *spec)
+{
+       /* If the IPMMU device is disabled in DT then return error
+        * to make sure the of_iommu code does not install ops
+        * even though the iommu device is disabled
+        */
+       if (!of_device_is_available(spec->np))
+               return -ENODEV;
+
+       return ipmmu_init_platform_device(dev);
+}
+
+static const struct iommu_ops ipmmu_ops = {
+       .domain_alloc = ipmmu_domain_alloc_dma,
+       .domain_free = ipmmu_domain_free_dma,
+       .attach_dev = ipmmu_attach_device,
+       .detach_dev = ipmmu_detach_device,
+       .map = ipmmu_map,
+       .unmap = ipmmu_unmap,
+       .map_sg = default_iommu_map_sg,
+       .iova_to_phys = ipmmu_iova_to_phys,
+       .add_device = ipmmu_add_device_dma,
+       .remove_device = ipmmu_remove_device_dma,
+       .device_group = ipmmu_find_group_dma,
+       .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+       .of_xlate = ipmmu_of_xlate_dma,
+};
+
+#endif /* CONFIG_IOMMU_DMA */
+
 /* -----------------------------------------------------------------------------
  * Probe/remove and init
  */
@@ -768,11 +1008,6 @@ static int ipmmu_probe(struct platform_device *pdev)
        int irq;
        int ret;
 
-       if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) {
-               dev_err(&pdev->dev, "missing platform data\n");
-               return -EINVAL;
-       }
-
        mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
        if (!mmu) {
                dev_err(&pdev->dev, "cannot allocate device data\n");
@@ -781,6 +1016,8 @@ static int ipmmu_probe(struct platform_device *pdev)
 
        mmu->dev = &pdev->dev;
        mmu->num_utlbs = 32;
+       spin_lock_init(&mmu->lock);
+       bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
 
        /* Map I/O memory and request IRQ. */
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -840,7 +1077,9 @@ static int ipmmu_remove(struct platform_device *pdev)
        list_del(&mmu->list);
        spin_unlock(&ipmmu_devices_lock);
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
        arm_iommu_release_mapping(mmu->mapping);
+#endif
 
        ipmmu_device_reset(mmu);
 
index 95dfca36ccb993e90c21aadb8454272a1838b7b7..641e035cf86661b5da9239f8a60e28c252fa2f7e 100644 (file)
@@ -1309,7 +1309,7 @@ static void omap_iommu_remove_device(struct device *dev)
 static struct iommu_group *omap_iommu_device_group(struct device *dev)
 {
        struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
-       struct iommu_group *group = NULL;
+       struct iommu_group *group = ERR_PTR(-EINVAL);
 
        if (arch_data->iommu_dev)
                group = arch_data->iommu_dev->group;
index 179e636a4d916a35e564ed516e7c47af82a1bd83..8788640756a7361ccb656269efd2b8bbb9ac68f7 100644 (file)
@@ -165,20 +165,14 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 
 static int s390_iommu_add_device(struct device *dev)
 {
-       struct iommu_group *group;
-       int rc;
+       struct iommu_group *group = iommu_group_get_for_dev(dev);
 
-       group = iommu_group_get(dev);
-       if (!group) {
-               group = iommu_group_alloc();
-               if (IS_ERR(group))
-                       return PTR_ERR(group);
-       }
+       if (IS_ERR(group))
+               return PTR_ERR(group);
 
-       rc = iommu_group_add_device(group, dev);
        iommu_group_put(group);
 
-       return rc;
+       return 0;
 }
 
 static void s390_iommu_remove_device(struct device *dev)
@@ -344,6 +338,7 @@ static struct iommu_ops s390_iommu_ops = {
        .iova_to_phys = s390_iommu_iova_to_phys,
        .add_device = s390_iommu_add_device,
        .remove_device = s390_iommu_remove_device,
+       .device_group = generic_device_group,
        .pgsize_bitmap = S390_IOMMU_PGSIZES,
 };
 
index 3c25794042f937b4d3a184a669b98f9acc4b29a5..99bc5b3ae26e1622f07558c52510c5e351e3cad6 100644 (file)
@@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
  */
 extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
+/**
+ * intel_svm_is_pasid_valid() - check if pasid is valid
+ * @dev:       Device for which PASID was allocated
+ * @pasid:     PASID value to be checked
+ *
+ * This function checks if the specified pasid is still valid. A
+ * valid pasid means the backing mm is still having a valid user.
+ * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
+ * is non-zero, it is valid.
+ *
+ * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
+ * 1 if pasid is valid.
+ */
+extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
+
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
 static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
@@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
 {
        BUG();
 }
+
+static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+       return -EINVAL;
+}
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
 #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))