]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
Merge tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Nov 2017 00:43:27 +0000 (16:43 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Nov 2017 00:43:27 +0000 (16:43 -0800)
Pull IOMMU updates from Alex Williamson:
 "As Joerg mentioned[1], he's out on paternity leave through the end of
  the year and I'm filling in for him in the interim:

   - Enforce MSI multiple IRQ alignment in AMD IOMMU

   - VT-d PASID error handling fixes

   - Add r8a7795 IPMMU support

   - Manage runtime PM links on exynos at {add,remove}_device callbacks

   - Fix Mediatek driver name to avoid conflict

   - Add terminate support to qcom fault handler

   - 64-bit IOVA optimizations

   - Simplfy IOVA domain destruction, better use of rcache, and skip
     anchor nodes on copy

   - Convert to IOMMU TLB sync API in io-pgtable-arm{-v7s}

   - Drop command queue lock when waiting for CMD_SYNC completion on ARM
     SMMU implementations supporting MSI to cacheable memory

   - iomu-vmsa cleanup inspired by missed IOTLB sync callbacks

   - Fix sleeping lock with preemption disabled for RT

   - Dual MMU support for TI DRA7xx DSPs

   - Optional flush option on IOVA allocation avoiding overhead when
     caller can try other options

  [1] https://lkml.org/lkml/2017/10/22/72"

* tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio: (54 commits)
  iommu/iova: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->fq
  iommu/mediatek: Fix driver name
  iommu/ipmmu-vmsa: Hook up r8a7795 DT matching code
  iommu/ipmmu-vmsa: Allow two bit SL0
  iommu/ipmmu-vmsa: Make IMBUSCTR setup optional
  iommu/ipmmu-vmsa: Write IMCTR twice
  iommu/ipmmu-vmsa: IPMMU device is 40-bit bus master
  iommu/ipmmu-vmsa: Make use of IOMMU_OF_DECLARE()
  iommu/ipmmu-vmsa: Enable multi context support
  iommu/ipmmu-vmsa: Add optional root device feature
  iommu/ipmmu-vmsa: Introduce features, break out alias
  iommu/ipmmu-vmsa: Unify ipmmu_ops
  iommu/ipmmu-vmsa: Clean up struct ipmmu_vmsa_iommu_priv
  iommu/ipmmu-vmsa: Simplify group allocation
  iommu/ipmmu-vmsa: Unify domain alloc/free
  iommu/ipmmu-vmsa: Fix return value check in ipmmu_find_group_dma()
  iommu/vt-d: Clear pasid table entry when memory unbound
  iommu/vt-d: Clear Page Request Overflow fault bit
  iommu/vt-d: Missing checks for pasid tables if allocation fails
  iommu/amd: Limit the IOVA page range to the specified addresses
  ...

23 files changed:
drivers/gpu/drm/tegra/drm.c
drivers/gpu/host1x/dev.c
drivers/iommu/amd_iommu.c
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/dma-iommu.c
drivers/iommu/dmar.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel-svm.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/iova.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/mtk_iommu.c
drivers/iommu/mtk_iommu_v1.c
drivers/iommu/omap-iommu.c
drivers/iommu/omap-iommu.h
drivers/iommu/qcom_iommu.c
drivers/misc/mic/scif/scif_rma.c
include/linux/dmar.h
include/linux/intel-iommu.h
include/linux/iova.h

index 597d563d636a189fa4bdb790ca842ec9a3740134..b822e484b7e55a0b3207cb4829d928cc30b78978 100644 (file)
@@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 
                order = __ffs(tegra->domain->pgsize_bitmap);
                init_iova_domain(&tegra->carveout.domain, 1UL << order,
-                                carveout_start >> order,
-                                carveout_end >> order);
+                                carveout_start >> order);
 
                tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
                tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
index 7f22c5c37660831ec5f2078f4b0717e78947d861..5267c62e88962bd31addc6b0493c535facce9558 100644 (file)
@@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev)
 
                order = __ffs(host->domain->pgsize_bitmap);
                init_iova_domain(&host->iova, 1UL << order,
-                                geometry->aperture_start >> order,
-                                geometry->aperture_end >> order);
+                                geometry->aperture_start >> order);
                host->iova_end = geometry->aperture_end;
        }
 
index 9c848e36f20904b0397bbb643906fbc3474c70c2..7d5eb004091d1d64f6956825e9dd14bf975a8b68 100644 (file)
@@ -63,7 +63,6 @@
 /* IO virtual address start page frame number */
 #define IOVA_START_PFN         (1)
 #define IOVA_PFN(addr)         ((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN          IOVA_PFN(DMA_BIT_MASK(32))
 
 /* Reserved IOVA ranges */
 #define MSI_RANGE_START                (0xfee00000)
@@ -1547,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev,
 
        if (dma_mask > DMA_BIT_MASK(32))
                pfn = alloc_iova_fast(&dma_dom->iovad, pages,
-                                     IOVA_PFN(DMA_BIT_MASK(32)));
+                                     IOVA_PFN(DMA_BIT_MASK(32)), false);
 
        if (!pfn)
-               pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask));
+               pfn = alloc_iova_fast(&dma_dom->iovad, pages,
+                                     IOVA_PFN(dma_mask), true);
 
        return (pfn << PAGE_SHIFT);
 }
@@ -1788,8 +1788,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
        if (!dma_dom->domain.pt_root)
                goto free_dma_dom;
 
-       init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
-                        IOVA_START_PFN, DMA_32BIT_PFN);
+       init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
 
        if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
                goto free_dma_dom;
@@ -2383,11 +2382,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
                           size_t size,
                           int dir)
 {
-       dma_addr_t flush_addr;
        dma_addr_t i, start;
        unsigned int pages;
 
-       flush_addr = dma_addr;
        pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
        dma_addr &= PAGE_MASK;
        start = dma_addr;
@@ -2696,8 +2693,7 @@ static int init_reserved_iova_ranges(void)
        struct pci_dev *pdev = NULL;
        struct iova *val;
 
-       init_iova_domain(&reserved_iova_ranges, PAGE_SIZE,
-                        IOVA_START_PFN, DMA_32BIT_PFN);
+       init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN);
 
        lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
                          &reserved_rbtree_key);
@@ -3155,7 +3151,7 @@ static void amd_iommu_apply_resv_region(struct device *dev,
        unsigned long start, end;
 
        start = IOVA_PFN(region->start);
-       end   = IOVA_PFN(region->start + region->length);
+       end   = IOVA_PFN(region->start + region->length - 1);
 
        WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
 }
@@ -3663,11 +3659,11 @@ out_unlock:
        return table;
 }
 
-static int alloc_irq_index(u16 devid, int count)
+static int alloc_irq_index(u16 devid, int count, bool align)
 {
        struct irq_remap_table *table;
+       int index, c, alignment = 1;
        unsigned long flags;
-       int index, c;
        struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
 
        if (!iommu)
@@ -3677,16 +3673,21 @@ static int alloc_irq_index(u16 devid, int count)
        if (!table)
                return -ENODEV;
 
+       if (align)
+               alignment = roundup_pow_of_two(count);
+
        spin_lock_irqsave(&table->lock, flags);
 
        /* Scan table for free entries */
-       for (c = 0, index = table->min_index;
-            index < MAX_IRQS_PER_TABLE;
-            ++index) {
-               if (!iommu->irte_ops->is_allocated(table, index))
+       for (index = ALIGN(table->min_index, alignment), c = 0;
+            index < MAX_IRQS_PER_TABLE;) {
+               if (!iommu->irte_ops->is_allocated(table, index)) {
                        c += 1;
-               else
-                       c = 0;
+               } else {
+                       c     = 0;
+                       index = ALIGN(index + 1, alignment);
+                       continue;
+               }
 
                if (c == count) {
                        for (; c != 0; --c)
@@ -3695,6 +3696,8 @@ static int alloc_irq_index(u16 devid, int count)
                        index -= count - 1;
                        goto out;
                }
+
+               index++;
        }
 
        index = -ENOSPC;
@@ -4099,7 +4102,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
                else
                        ret = -ENOMEM;
        } else {
-               index = alloc_irq_index(devid, nr_irqs);
+               bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
+
+               index = alloc_irq_index(devid, nr_irqs, align);
        }
        if (index < 0) {
                pr_warn("Failed to allocate IRTE\n");
index e67ba6c40faff07aaf0468f898b7ff6f99d94301..f122071688fd530b3fedd5e21ddbaf0ccd4bc203 100644 (file)
 #define ARM64_TCR_TBI0_MASK            0x1UL
 
 #define CTXDESC_CD_0_AA64              (1UL << 41)
+#define CTXDESC_CD_0_S                 (1UL << 44)
 #define CTXDESC_CD_0_R                 (1UL << 45)
 #define CTXDESC_CD_0_A                 (1UL << 46)
 #define CTXDESC_CD_0_ASET_SHIFT                47
 
 #define CMDQ_SYNC_0_CS_SHIFT           12
 #define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_CS_IRQ             (1UL << CMDQ_SYNC_0_CS_SHIFT)
 #define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_MSH_SHIFT          22
+#define CMDQ_SYNC_0_MSH_ISH            (3UL << CMDQ_SYNC_0_MSH_SHIFT)
+#define CMDQ_SYNC_0_MSIATTR_SHIFT      24
+#define CMDQ_SYNC_0_MSIATTR_OIWB       (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
+#define CMDQ_SYNC_0_MSIDATA_SHIFT      32
+#define CMDQ_SYNC_0_MSIDATA_MASK       0xffffffffUL
+#define CMDQ_SYNC_1_MSIADDR_SHIFT      0
+#define CMDQ_SYNC_1_MSIADDR_MASK       0xffffffffffffcUL
 
 /* Event queue */
 #define EVTQ_ENT_DWORDS                        4
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US       100
-#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
+#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US  1000000 /* 1s! */
+#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT  10
 
 #define MSI_IOVA_BASE                  0x8000000
 #define MSI_IOVA_LENGTH                        0x100000
 
-/* Until ACPICA headers cover IORT rev. C */
-#ifndef ACPI_IORT_SMMU_HISILICON_HI161X
-#define ACPI_IORT_SMMU_HISILICON_HI161X                0x1
-#endif
-
-#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
-#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX                0x2
-#endif
-
 static bool disable_bypass;
 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
@@ -504,6 +506,10 @@ struct arm_smmu_cmdq_ent {
                } pri;
 
                #define CMDQ_OP_CMD_SYNC        0x46
+               struct {
+                       u32                     msidata;
+                       u64                     msiaddr;
+               } sync;
        };
 };
 
@@ -604,6 +610,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S2         (1 << 10)
 #define ARM_SMMU_FEAT_STALLS           (1 << 11)
 #define ARM_SMMU_FEAT_HYP              (1 << 12)
+#define ARM_SMMU_FEAT_STALL_FORCE      (1 << 13)
        u32                             features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH     (1 << 0)
@@ -616,6 +623,7 @@ struct arm_smmu_device {
 
        int                             gerr_irq;
        int                             combined_irq;
+       atomic_t                        sync_nr;
 
        unsigned long                   ias; /* IPA */
        unsigned long                   oas; /* PA */
@@ -634,6 +642,8 @@ struct arm_smmu_device {
 
        struct arm_smmu_strtab_cfg      strtab_cfg;
 
+       u32                             sync_count;
+
        /* IOMMU core code handle */
        struct iommu_device             iommu;
 };
@@ -757,26 +767,29 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
  * Wait for the SMMU to consume items. If drain is true, wait until the queue
  * is empty. Otherwise, wait until there is at least one free slot.
  */
-static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
+static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
 {
        ktime_t timeout;
-       unsigned int delay = 1;
+       unsigned int delay = 1, spin_cnt = 0;
 
-       /* Wait longer if it's queue drain */
-       timeout = ktime_add_us(ktime_get(), drain ?
-                                           ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
+       /* Wait longer if it's a CMD_SYNC */
+       timeout = ktime_add_us(ktime_get(), sync ?
+                                           ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
                                            ARM_SMMU_POLL_TIMEOUT_US);
 
-       while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
+       while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
                if (ktime_compare(ktime_get(), timeout) > 0)
                        return -ETIMEDOUT;
 
                if (wfe) {
                        wfe();
-               } else {
+               } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
                        cpu_relax();
+                       continue;
+               } else {
                        udelay(delay);
                        delay *= 2;
+                       spin_cnt = 0;
                }
        }
 
@@ -878,7 +891,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
                }
                break;
        case CMDQ_OP_CMD_SYNC:
-               cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+               if (ent->sync.msiaddr)
+                       cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
+               else
+                       cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+               cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
+               cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
+               cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
                break;
        default:
                return -ENOENT;
@@ -936,13 +955,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
        queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
+static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
+{
+       struct arm_smmu_queue *q = &smmu->cmdq.q;
+       bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+
+       while (queue_insert_raw(q, cmd) == -ENOSPC) {
+               if (queue_poll_cons(q, false, wfe))
+                       dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
+       }
+}
+
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
                                    struct arm_smmu_cmdq_ent *ent)
 {
        u64 cmd[CMDQ_ENT_DWORDS];
        unsigned long flags;
-       bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
-       struct arm_smmu_queue *q = &smmu->cmdq.q;
 
        if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
                dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
@@ -951,14 +979,76 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
        }
 
        spin_lock_irqsave(&smmu->cmdq.lock, flags);
-       while (queue_insert_raw(q, cmd) == -ENOSPC) {
-               if (queue_poll_cons(q, false, wfe))
-                       dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
-       }
+       arm_smmu_cmdq_insert_cmd(smmu, cmd);
+       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+}
 
-       if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe))
-               dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
+/*
+ * The difference between val and sync_idx is bounded by the maximum size of
+ * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
+ */
+static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
+{
+       ktime_t timeout;
+       u32 val;
+
+       timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
+       val = smp_cond_load_acquire(&smmu->sync_count,
+                                   (int)(VAL - sync_idx) >= 0 ||
+                                   !ktime_before(ktime_get(), timeout));
+
+       return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
+}
+
+static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
+{
+       u64 cmd[CMDQ_ENT_DWORDS];
+       unsigned long flags;
+       struct arm_smmu_cmdq_ent ent = {
+               .opcode = CMDQ_OP_CMD_SYNC,
+               .sync   = {
+                       .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
+                       .msiaddr = virt_to_phys(&smmu->sync_count),
+               },
+       };
+
+       arm_smmu_cmdq_build_cmd(cmd, &ent);
+
+       spin_lock_irqsave(&smmu->cmdq.lock, flags);
+       arm_smmu_cmdq_insert_cmd(smmu, cmd);
+       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+
+       return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
+}
+
+static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+{
+       u64 cmd[CMDQ_ENT_DWORDS];
+       unsigned long flags;
+       bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+       struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
+       int ret;
+
+       arm_smmu_cmdq_build_cmd(cmd, &ent);
+
+       spin_lock_irqsave(&smmu->cmdq.lock, flags);
+       arm_smmu_cmdq_insert_cmd(smmu, cmd);
+       ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
        spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+
+       return ret;
+}
+
+static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+{
+       int ret;
+       bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
+                  (smmu->features & ARM_SMMU_FEAT_COHERENCY);
+
+       ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
+                 : __arm_smmu_cmdq_issue_sync(smmu);
+       if (ret)
+               dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
 }
 
 /* Context descriptor manipulation functions */
@@ -996,6 +1086,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
              CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
              CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
              CTXDESC_CD_0_V;
+
+       /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
+       if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+               val |= CTXDESC_CD_0_S;
+
        cfg->cdptr[0] = cpu_to_le64(val);
 
        val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
@@ -1029,8 +1124,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
        };
 
        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       cmd.opcode = CMDQ_OP_CMD_SYNC;
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+       arm_smmu_cmdq_issue_sync(smmu);
 }
 
 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
@@ -1094,7 +1188,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
                dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
                         << STRTAB_STE_1_SHCFG_SHIFT);
                dst[2] = 0; /* Nuke the VMID */
-               if (ste_live)
+               /*
+                * The SMMU can perform negative caching, so we must sync
+                * the STE regardless of whether the old value was live.
+                */
+               if (smmu)
                        arm_smmu_sync_ste_for_sid(smmu, sid);
                return;
        }
@@ -1112,7 +1210,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 #endif
                         STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
 
-               if (smmu->features & ARM_SMMU_FEAT_STALLS)
+               if (smmu->features & ARM_SMMU_FEAT_STALLS &&
+                  !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
                val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
@@ -1275,12 +1374,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
        return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
-{
-       /* We don't actually use CMD_SYNC interrupts for anything */
-       return IRQ_HANDLED;
-}
-
 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
 
 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
@@ -1313,10 +1406,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
        if (active & GERROR_MSI_EVTQ_ABT_ERR)
                dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
 
-       if (active & GERROR_MSI_CMDQ_ABT_ERR) {
+       if (active & GERROR_MSI_CMDQ_ABT_ERR)
                dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
-               arm_smmu_cmdq_sync_handler(irq, smmu->dev);
-       }
 
        if (active & GERROR_PRIQ_ABT_ERR)
                dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
@@ -1345,17 +1436,13 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
 {
        arm_smmu_gerror_handler(irq, dev);
-       arm_smmu_cmdq_sync_handler(irq, dev);
        return IRQ_WAKE_THREAD;
 }
 
 /* IO_PGTABLE API */
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
-       struct arm_smmu_cmdq_ent cmd;
-
-       cmd.opcode = CMDQ_OP_CMD_SYNC;
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+       arm_smmu_cmdq_issue_sync(smmu);
 }
 
 static void arm_smmu_tlb_sync(void *cookie)
@@ -1743,6 +1830,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
        return ops->unmap(ops, iova, size);
 }
 
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+{
+       struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+
+       if (smmu)
+               __arm_smmu_tlb_sync(smmu);
+}
+
 static phys_addr_t
 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
@@ -1963,6 +2058,8 @@ static struct iommu_ops arm_smmu_ops = {
        .map                    = arm_smmu_map,
        .unmap                  = arm_smmu_unmap,
        .map_sg                 = default_iommu_map_sg,
+       .flush_iotlb_all        = arm_smmu_iotlb_sync,
+       .iotlb_sync             = arm_smmu_iotlb_sync,
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .add_device             = arm_smmu_add_device,
        .remove_device          = arm_smmu_remove_device,
@@ -2147,6 +2244,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
 {
        int ret;
 
+       atomic_set(&smmu->sync_nr, 0);
        ret = arm_smmu_init_queues(smmu);
        if (ret)
                return ret;
@@ -2265,15 +2363,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
                        dev_warn(smmu->dev, "failed to enable evtq irq\n");
        }
 
-       irq = smmu->cmdq.q.irq;
-       if (irq) {
-               ret = devm_request_irq(smmu->dev, irq,
-                                      arm_smmu_cmdq_sync_handler, 0,
-                                      "arm-smmu-v3-cmdq-sync", smmu);
-               if (ret < 0)
-                       dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
-       }
-
        irq = smmu->gerr_irq;
        if (irq) {
                ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
@@ -2399,8 +2488,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
        /* Invalidate any cached configuration */
        cmd.opcode = CMDQ_OP_CFGI_ALL;
        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       cmd.opcode = CMDQ_OP_CMD_SYNC;
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+       arm_smmu_cmdq_issue_sync(smmu);
 
        /* Invalidate any stale TLB entries */
        if (smmu->features & ARM_SMMU_FEAT_HYP) {
@@ -2410,8 +2498,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
        cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       cmd.opcode = CMDQ_OP_CMD_SYNC;
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+       arm_smmu_cmdq_issue_sync(smmu);
 
        /* Event queue */
        writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
@@ -2532,13 +2619,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
         * register, but warn on mismatch.
         */
        if (!!(reg & IDR0_COHACC) != coherent)
-               dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
+               dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
                         coherent ? "true" : "false");
 
        switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
-       case IDR0_STALL_MODEL_STALL:
-               /* Fallthrough */
        case IDR0_STALL_MODEL_FORCE:
+               smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
+               /* Fallthrough */
+       case IDR0_STALL_MODEL_STALL:
                smmu->features |= ARM_SMMU_FEAT_STALLS;
        }
 
@@ -2665,7 +2753,7 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
        case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
                smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
                break;
-       case ACPI_IORT_SMMU_HISILICON_HI161X:
+       case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
                smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
                break;
        }
@@ -2783,10 +2871,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
                if (irq > 0)
                        smmu->priq.q.irq = irq;
 
-               irq = platform_get_irq_byname(pdev, "cmdq-sync");
-               if (irq > 0)
-                       smmu->cmdq.q.irq = irq;
-
                irq = platform_get_irq_byname(pdev, "gerror");
                if (irq > 0)
                        smmu->gerr_irq = irq;
index 3bdb799d3b4b1f5ee1de1e2505d3d0024273c658..78d4c6b8f1bad57477f598a45430a7e519e7af39 100644 (file)
@@ -59,6 +59,7 @@
 #define ARM_MMU500_ACTLR_CPRE          (1 << 1)
 
 #define ARM_MMU500_ACR_CACHE_LOCK      (1 << 26)
+#define ARM_MMU500_ACR_S2CRB_TLBEN     (1 << 10)
 #define ARM_MMU500_ACR_SMTNMB_TLBEN    (1 << 8)
 
 #define TLB_LOOP_TIMEOUT               1000000 /* 1s! */
@@ -119,14 +120,6 @@ enum arm_smmu_implementation {
        CAVIUM_SMMUV2,
 };
 
-/* Until ACPICA headers cover IORT rev. C */
-#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
-#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
-#endif
-#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
-#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5
-#endif
-
 struct arm_smmu_s2cr {
        struct iommu_group              *group;
        int                             count;
@@ -250,6 +243,7 @@ enum arm_smmu_domain_stage {
 struct arm_smmu_domain {
        struct arm_smmu_device          *smmu;
        struct io_pgtable_ops           *pgtbl_ops;
+       const struct iommu_gather_ops   *tlb_ops;
        struct arm_smmu_cfg             cfg;
        enum arm_smmu_domain_stage      stage;
        struct mutex                    init_mutex; /* Protects smmu pointer */
@@ -735,7 +729,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
        enum io_pgtable_fmt fmt;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
-       const struct iommu_gather_ops *tlb_ops;
 
        mutex_lock(&smmu_domain->init_mutex);
        if (smmu_domain->smmu)
@@ -813,7 +806,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                        ias = min(ias, 32UL);
                        oas = min(oas, 32UL);
                }
-               tlb_ops = &arm_smmu_s1_tlb_ops;
+               smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
                break;
        case ARM_SMMU_DOMAIN_NESTED:
                /*
@@ -833,9 +826,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                        oas = min(oas, 40UL);
                }
                if (smmu->version == ARM_SMMU_V2)
-                       tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+                       smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
                else
-                       tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+                       smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
                break;
        default:
                ret = -EINVAL;
@@ -863,7 +856,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                .pgsize_bitmap  = smmu->pgsize_bitmap,
                .ias            = ias,
                .oas            = oas,
-               .tlb            = tlb_ops,
+               .tlb            = smmu_domain->tlb_ops,
                .iommu_dev      = smmu->dev,
        };
 
@@ -1259,6 +1252,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
        return ops->unmap(ops, iova, size);
 }
 
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       if (smmu_domain->tlb_ops)
+               smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+}
+
 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
                                              dma_addr_t iova)
 {
@@ -1562,6 +1563,8 @@ static struct iommu_ops arm_smmu_ops = {
        .map                    = arm_smmu_map,
        .unmap                  = arm_smmu_unmap,
        .map_sg                 = default_iommu_map_sg,
+       .flush_iotlb_all        = arm_smmu_iotlb_sync,
+       .iotlb_sync             = arm_smmu_iotlb_sync,
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .add_device             = arm_smmu_add_device,
        .remove_device          = arm_smmu_remove_device,
@@ -1606,7 +1609,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
                 * Allow unmatched Stream IDs to allocate bypass
                 * TLB entries for reduced latency.
                 */
-               reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
+               reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
                writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
        }
 
index 9d1cebe7f6cbb14517718f5fd6d03de7ee503956..25914d36c5ace5e1336251c2e1ec55d6ad517b94 100644 (file)
@@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
                /* ...then finally give it a kicking to make sure it fits */
                base_pfn = max_t(unsigned long, base_pfn,
                                domain->geometry.aperture_start >> order);
-               end_pfn = min_t(unsigned long, end_pfn,
-                               domain->geometry.aperture_end >> order);
        }
-       /*
-        * PCI devices may have larger DMA masks, but still prefer allocating
-        * within a 32-bit mask to avoid DAC addressing. Such limitations don't
-        * apply to the typical platform device, so for those we may as well
-        * leave the cache limit at the top of their range to save an rb_last()
-        * traversal on every allocation.
-        */
-       if (dev && dev_is_pci(dev))
-               end_pfn &= DMA_BIT_MASK(32) >> order;
 
        /* start_pfn is always nonzero for an already-initialised domain */
        if (iovad->start_pfn) {
@@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
                        pr_warn("Incompatible range for DMA domain\n");
                        return -EFAULT;
                }
-               /*
-                * If we have devices with different DMA masks, move the free
-                * area cache limit down for the benefit of the smaller one.
-                */
-               iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
 
                return 0;
        }
 
-       init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+       init_iova_domain(iovad, 1UL << order, base_pfn);
        if (!dev)
                return 0;
 
@@ -386,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 
        /* Try to get PCI devices a SAC address */
        if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
-               iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
+               iova = alloc_iova_fast(iovad, iova_len,
+                                      DMA_BIT_MASK(32) >> shift, false);
 
        if (!iova)
-               iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
+               iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
+                                      true);
 
        return (dma_addr_t)iova << shift;
 }
index 57c920c1372d09f927a7dcdeadc25375bf4164a8..9a7ffd13c7f07d037814b3eb7f1bba75a13641f2 100644 (file)
@@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void)
                                dmar_free_pci_notify_info(info);
                        }
                }
-
-               bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
        }
 
        return dmar_dev_scope_status;
 }
 
+void dmar_register_bus_notifier(void)
+{
+       bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
+}
+
 
 int __init dmar_table_init(void)
 {
@@ -1676,7 +1679,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
                raw_spin_lock_irqsave(&iommu->register_lock, flag);
        }
 
-       writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
+       writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
+              iommu->reg + DMAR_FSTS_REG);
 
 unlock_exit:
        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
index 25c2c75f5332efe3e98d7e1c290db71d515364f5..79c45650f8de6b2a20b0d1a79ca9dff5a17dec4c 100644 (file)
@@ -263,6 +263,7 @@ struct exynos_iommu_domain {
 struct sysmmu_drvdata {
        struct device *sysmmu;          /* SYSMMU controller device */
        struct device *master;          /* master device (owner) */
+       struct device_link *link;       /* runtime PM link to master */
        void __iomem *sfrbase;          /* our registers */
        struct clk *clk;                /* SYSMMU's clock */
        struct clk *aclk;               /* SYSMMU's aclk clock */
@@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev)
 
 static int exynos_iommu_add_device(struct device *dev)
 {
+       struct exynos_iommu_owner *owner = dev->archdata.iommu;
+       struct sysmmu_drvdata *data;
        struct iommu_group *group;
 
        if (!has_sysmmu(dev))
@@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev)
        if (IS_ERR(group))
                return PTR_ERR(group);
 
+       list_for_each_entry(data, &owner->controllers, owner_node) {
+               /*
+                * SYSMMU will be runtime activated via device link
+                * (dependency) to its master device, so there are no
+                * direct calls to pm_runtime_get/put in this driver.
+                */
+               data->link = device_link_add(dev, data->sysmmu,
+                                            DL_FLAG_PM_RUNTIME);
+       }
        iommu_group_put(group);
 
        return 0;
@@ -1268,6 +1280,7 @@ static int exynos_iommu_add_device(struct device *dev)
 static void exynos_iommu_remove_device(struct device *dev)
 {
        struct exynos_iommu_owner *owner = dev->archdata.iommu;
+       struct sysmmu_drvdata *data;
 
        if (!has_sysmmu(dev))
                return;
@@ -1283,6 +1296,9 @@ static void exynos_iommu_remove_device(struct device *dev)
                }
        }
        iommu_group_remove_device(dev);
+
+       list_for_each_entry(data, &owner->controllers, owner_node)
+               device_link_del(data->link);
 }
 
 static int exynos_iommu_of_xlate(struct device *dev,
@@ -1316,13 +1332,6 @@ static int exynos_iommu_of_xlate(struct device *dev,
        list_add_tail(&data->owner_node, &owner->controllers);
        data->master = dev;
 
-       /*
-        * SYSMMU will be runtime activated via device link (dependency) to its
-        * master device, so there are no direct calls to pm_runtime_get/put
-        * in this driver.
-        */
-       device_link_add(dev, data->sysmmu, DL_FLAG_PM_RUNTIME);
-
        return 0;
 }
 
index 6784a05dd6b2d5fc779016190f427e5aaada4ebe..a0babdbf71460dda5dc156bfe7b7b9313e0da679 100644 (file)
@@ -82,8 +82,6 @@
 #define IOVA_START_PFN         (1)
 
 #define IOVA_PFN(addr)         ((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN          IOVA_PFN(DMA_BIT_MASK(32))
-#define DMA_64BIT_PFN          IOVA_PFN(DMA_BIT_MASK(64))
 
 /* page table handling */
 #define LEVEL_STRIDE           (9)
@@ -1878,8 +1876,7 @@ static int dmar_init_reserved_ranges(void)
        struct iova *iova;
        int i;
 
-       init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
-                       DMA_32BIT_PFN);
+       init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
 
        lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
                &reserved_rbtree_key);
@@ -1938,8 +1935,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
        unsigned long sagaw;
        int err;
 
-       init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
-                       DMA_32BIT_PFN);
+       init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
 
        err = init_iova_flush_queue(&domain->iovad,
                                    iommu_flush_iova, iova_entry_free);
@@ -2058,7 +2054,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
        if (context_copied(context)) {
                u16 did_old = context_domain_id(context);
 
-               if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) {
+               if (did_old < cap_ndoms(iommu->cap)) {
                        iommu->flush.flush_context(iommu, did_old,
                                                   (((u16)bus) << 8) | devfn,
                                                   DMA_CCMD_MASK_NOBIT,
@@ -3473,11 +3469,12 @@ static unsigned long intel_alloc_iova(struct device *dev,
                 * from higher range
                 */
                iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
-                                          IOVA_PFN(DMA_BIT_MASK(32)));
+                                          IOVA_PFN(DMA_BIT_MASK(32)), false);
                if (iova_pfn)
                        return iova_pfn;
        }
-       iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask));
+       iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
+                                  IOVA_PFN(dma_mask), true);
        if (unlikely(!iova_pfn)) {
                pr_err("Allocating %ld-page iova for %s failed",
                       nrpages, dev_name(dev));
@@ -4752,6 +4749,16 @@ int __init intel_iommu_init(void)
                goto out_free_dmar;
        }
 
+       up_write(&dmar_global_lock);
+
+       /*
+        * The bus notifier takes the dmar_global_lock, so lockdep will
+        * complain later when we register it under the lock.
+        */
+       dmar_register_bus_notifier();
+
+       down_write(&dmar_global_lock);
+
        if (no_iommu || dmar_disabled) {
                /*
                 * We exit the function here to ensure IOMMU's remapping and
@@ -4897,8 +4904,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
 {
        int adjust_width;
 
-       init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
-                       DMA_32BIT_PFN);
+       init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
        domain_reserve_special_ranges(domain);
 
        /* calculate AGAW */
index f6697e55c2d44f96baa52827af4e90be0548b71f..ed1cf7c5a43ba33cc04ff32d1c88efcbb49bdbde 100644 (file)
@@ -292,7 +292,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
        int pasid_max;
        int ret;
 
-       if (WARN_ON(!iommu))
+       if (WARN_ON(!iommu || !iommu->pasid_table))
                return -EINVAL;
 
        if (dev_is_pci(dev)) {
@@ -458,6 +458,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
                                kfree_rcu(sdev, rcu);
 
                                if (list_empty(&svm->devs)) {
+                                       svm->iommu->pasid_table[svm->pasid].val = 0;
+                                       wmb();
 
                                        idr_remove(&svm->iommu->pasid_idr, svm->pasid);
                                        if (svm->mm)
index 6961fc393f0b25828f5981fad35ea744c7768b41..2ca08dc9331ca1db30c5bd12435c6c36b809d3f5 100644 (file)
@@ -660,16 +660,11 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
                         size_t size)
 {
        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
-       size_t unmapped;
 
        if (WARN_ON(upper_32_bits(iova)))
                return 0;
 
-       unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd);
-       if (unmapped)
-               io_pgtable_tlb_sync(&data->iop);
-
-       return unmapped;
+       return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
 }
 
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
index e8018a308868e33a4ea722c0b9686118078dc0db..51e5c43caed18c4c5066b808aa318cd953797afe 100644 (file)
@@ -609,7 +609,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
                          size_t size)
 {
-       size_t unmapped;
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
        arm_lpae_iopte *ptep = data->pgd;
        int lvl = ARM_LPAE_START_LVL(data);
@@ -617,11 +616,7 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
        if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
                return 0;
 
-       unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep);
-       if (unmapped)
-               io_pgtable_tlb_sync(&data->iop);
-
-       return unmapped;
+       return __arm_lpae_unmap(data, iova, size, lvl, ptep);
 }
 
 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
index 33edfa794ae9f5514c5113fa27597a12373f2a5c..466aaa8ba841c3253226543adfecc030b4e5d6f6 100644 (file)
@@ -24,6 +24,9 @@
 #include <linux/bitops.h>
 #include <linux/cpu.h>
 
+/* The anchor node sits above the top of the usable address space */
+#define IOVA_ANCHOR    ~0UL
+
 static bool iova_rcache_insert(struct iova_domain *iovad,
                               unsigned long pfn,
                               unsigned long size);
@@ -37,7 +40,7 @@ static void fq_flush_timeout(unsigned long data);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
-       unsigned long start_pfn, unsigned long pfn_32bit)
+       unsigned long start_pfn)
 {
        /*
         * IOVA granularity will normally be equal to the smallest
@@ -48,12 +51,16 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 
        spin_lock_init(&iovad->iova_rbtree_lock);
        iovad->rbroot = RB_ROOT;
-       iovad->cached32_node = NULL;
+       iovad->cached_node = &iovad->anchor.node;
+       iovad->cached32_node = &iovad->anchor.node;
        iovad->granule = granule;
        iovad->start_pfn = start_pfn;
-       iovad->dma_32bit_pfn = pfn_32bit + 1;
+       iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
        iovad->flush_cb = NULL;
        iovad->fq = NULL;
+       iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
+       rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
+       rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
        init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -108,50 +115,36 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
 
 static struct rb_node *
-__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
+__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 {
-       if ((*limit_pfn > iovad->dma_32bit_pfn) ||
-               (iovad->cached32_node == NULL))
-               return rb_last(&iovad->rbroot);
-       else {
-               struct rb_node *prev_node = rb_prev(iovad->cached32_node);
-               struct iova *curr_iova =
-                       rb_entry(iovad->cached32_node, struct iova, node);
-               *limit_pfn = curr_iova->pfn_lo;
-               return prev_node;
-       }
+       if (limit_pfn <= iovad->dma_32bit_pfn)
+               return iovad->cached32_node;
+
+       return iovad->cached_node;
 }
 
 static void
-__cached_rbnode_insert_update(struct iova_domain *iovad,
-       unsigned long limit_pfn, struct iova *new)
+__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
 {
-       if (limit_pfn != iovad->dma_32bit_pfn)
-               return;
-       iovad->cached32_node = &new->node;
+       if (new->pfn_hi < iovad->dma_32bit_pfn)
+               iovad->cached32_node = &new->node;
+       else
+               iovad->cached_node = &new->node;
 }
 
 static void
 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 {
        struct iova *cached_iova;
-       struct rb_node *curr;
 
-       if (!iovad->cached32_node)
-               return;
-       curr = iovad->cached32_node;
-       cached_iova = rb_entry(curr, struct iova, node);
-
-       if (free->pfn_lo >= cached_iova->pfn_lo) {
-               struct rb_node *node = rb_next(&free->node);
-               struct iova *iova = rb_entry(node, struct iova, node);
+       cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
+       if (free->pfn_hi < iovad->dma_32bit_pfn &&
+           free->pfn_lo >= cached_iova->pfn_lo)
+               iovad->cached32_node = rb_next(&free->node);
 
-               /* only cache if it's below 32bit pfn */
-               if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
-                       iovad->cached32_node = node;
-               else
-                       iovad->cached32_node = NULL;
-       }
+       cached_iova = rb_entry(iovad->cached_node, struct iova, node);
+       if (free->pfn_lo >= cached_iova->pfn_lo)
+               iovad->cached_node = rb_next(&free->node);
 }
 
 /* Insert the iova into domain rbtree by holding writer lock */
@@ -182,63 +175,43 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
        rb_insert_color(&iova->node, root);
 }
 
-/*
- * Computes the padding size required, to make the start address
- * naturally aligned on the power-of-two order of its size
- */
-static unsigned int
-iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
-{
-       return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
-}
-
 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
                unsigned long size, unsigned long limit_pfn,
                        struct iova *new, bool size_aligned)
 {
-       struct rb_node *prev, *curr = NULL;
+       struct rb_node *curr, *prev;
+       struct iova *curr_iova;
        unsigned long flags;
-       unsigned long saved_pfn;
-       unsigned int pad_size = 0;
+       unsigned long new_pfn;
+       unsigned long align_mask = ~0UL;
+
+       if (size_aligned)
+               align_mask <<= fls_long(size - 1);
 
        /* Walk the tree backwards */
        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-       saved_pfn = limit_pfn;
-       curr = __get_cached_rbnode(iovad, &limit_pfn);
-       prev = curr;
-       while (curr) {
-               struct iova *curr_iova = rb_entry(curr, struct iova, node);
-
-               if (limit_pfn <= curr_iova->pfn_lo) {
-                       goto move_left;
-               } else if (limit_pfn > curr_iova->pfn_hi) {
-                       if (size_aligned)
-                               pad_size = iova_get_pad_size(size, limit_pfn);
-                       if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
-                               break;  /* found a free slot */
-               }
-               limit_pfn = curr_iova->pfn_lo;
-move_left:
+       curr = __get_cached_rbnode(iovad, limit_pfn);
+       curr_iova = rb_entry(curr, struct iova, node);
+       do {
+               limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
+               new_pfn = (limit_pfn - size) & align_mask;
                prev = curr;
                curr = rb_prev(curr);
-       }
+               curr_iova = rb_entry(curr, struct iova, node);
+       } while (curr && new_pfn <= curr_iova->pfn_hi);
 
-       if (!curr) {
-               if (size_aligned)
-                       pad_size = iova_get_pad_size(size, limit_pfn);
-               if ((iovad->start_pfn + size + pad_size) > limit_pfn) {
-                       spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-                       return -ENOMEM;
-               }
+       if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+               spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+               return -ENOMEM;
        }
 
        /* pfn_lo will point to size aligned address if size_aligned is set */
-       new->pfn_lo = limit_pfn - (size + pad_size);
+       new->pfn_lo = new_pfn;
        new->pfn_hi = new->pfn_lo + size - 1;
 
        /* If we have 'prev', it's a valid place to start the insertion. */
        iova_insert_rbtree(&iovad->rbroot, new, prev);
-       __cached_rbnode_insert_update(iovad, saved_pfn, new);
+       __cached_rbnode_insert_update(iovad, new);
 
        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 
@@ -258,7 +231,8 @@ EXPORT_SYMBOL(alloc_iova_mem);
 
 void free_iova_mem(struct iova *iova)
 {
-       kmem_cache_free(iova_cache, iova);
+       if (iova->pfn_lo != IOVA_ANCHOR)
+               kmem_cache_free(iova_cache, iova);
 }
 EXPORT_SYMBOL(free_iova_mem);
 
@@ -342,15 +316,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn)
        while (node) {
                struct iova *iova = rb_entry(node, struct iova, node);
 
-               /* If pfn falls within iova's range, return iova */
-               if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
-                       return iova;
-               }
-
                if (pfn < iova->pfn_lo)
                        node = node->rb_left;
-               else if (pfn > iova->pfn_lo)
+               else if (pfn > iova->pfn_hi)
                        node = node->rb_right;
+               else
+                       return iova;    /* pfn falls within iova's range */
        }
 
        return NULL;
@@ -424,18 +395,19 @@ EXPORT_SYMBOL_GPL(free_iova);
  * @iovad: - iova domain in question
  * @size: - size of page frames to allocate
  * @limit_pfn: - max limit address
+ * @flush_rcache: - set to flush rcache on regular allocation failure
  * This function tries to satisfy an iova allocation from the rcache,
- * and falls back to regular allocation on failure.
+ * and falls back to regular allocation on failure. If regular allocation
+ * fails too and the flush_rcache flag is set then the rcache will be flushed.
 */
 unsigned long
 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
-               unsigned long limit_pfn)
+               unsigned long limit_pfn, bool flush_rcache)
 {
-       bool flushed_rcache = false;
        unsigned long iova_pfn;
        struct iova *new_iova;
 
-       iova_pfn = iova_rcache_get(iovad, size, limit_pfn);
+       iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
        if (iova_pfn)
                return iova_pfn;
 
@@ -444,11 +416,11 @@ retry:
        if (!new_iova) {
                unsigned int cpu;
 
-               if (flushed_rcache)
+               if (!flush_rcache)
                        return 0;
 
                /* Try replenishing IOVAs by flushing rcache. */
-               flushed_rcache = true;
+               flush_rcache = false;
                for_each_online_cpu(cpu)
                        free_cpu_cached_iovas(cpu, iovad);
                goto retry;
@@ -570,7 +542,7 @@ void queue_iova(struct iova_domain *iovad,
                unsigned long pfn, unsigned long pages,
                unsigned long data)
 {
-       struct iova_fq *fq = get_cpu_ptr(iovad->fq);
+       struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
        unsigned long flags;
        unsigned idx;
 
@@ -600,8 +572,6 @@ void queue_iova(struct iova_domain *iovad,
        if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
                mod_timer(&iovad->fq_timer,
                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
-
-       put_cpu_ptr(iovad->fq);
 }
 EXPORT_SYMBOL_GPL(queue_iova);
 
@@ -612,21 +582,12 @@ EXPORT_SYMBOL_GPL(queue_iova);
  */
 void put_iova_domain(struct iova_domain *iovad)
 {
-       struct rb_node *node;
-       unsigned long flags;
+       struct iova *iova, *tmp;
 
        free_iova_flush_queue(iovad);
        free_iova_rcaches(iovad);
-       spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-       node = rb_first(&iovad->rbroot);
-       while (node) {
-               struct iova *iova = rb_entry(node, struct iova, node);
-
-               rb_erase(node, &iovad->rbroot);
+       rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
                free_iova_mem(iova);
-               node = rb_first(&iovad->rbroot);
-       }
-       spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 }
 EXPORT_SYMBOL_GPL(put_iova_domain);
 
@@ -695,6 +656,10 @@ reserve_iova(struct iova_domain *iovad,
        struct iova *iova;
        unsigned int overlap = 0;
 
+       /* Don't allow nonsensical pfns */
+       if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
+               return NULL;
+
        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
        for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
                if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
@@ -738,6 +703,9 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
                struct iova *iova = rb_entry(node, struct iova, node);
                struct iova *new_iova;
 
+               if (iova->pfn_lo == IOVA_ANCHOR)
+                       continue;
+
                new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
                if (!new_iova)
                        printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
@@ -855,12 +823,21 @@ static bool iova_magazine_empty(struct iova_magazine *mag)
 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
                                       unsigned long limit_pfn)
 {
+       int i;
+       unsigned long pfn;
+
        BUG_ON(iova_magazine_empty(mag));
 
-       if (mag->pfns[mag->size - 1] >= limit_pfn)
-               return 0;
+       /* Only fall back to the rbtree if we have no suitable pfns at all */
+       for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
+               if (i == 0)
+                       return 0;
 
-       return mag->pfns[--mag->size];
+       /* Swap it to pop it */
+       pfn = mag->pfns[i];
+       mag->pfns[i] = mag->pfns[--mag->size];
+
+       return pfn;
 }
 
 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
@@ -1011,27 +988,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
                return 0;
 
-       return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn);
-}
-
-/*
- * Free a cpu's rcache.
- */
-static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
-                                struct iova_rcache *rcache)
-{
-       struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
-       unsigned long flags;
-
-       spin_lock_irqsave(&cpu_rcache->lock, flags);
-
-       iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
-       iova_magazine_free(cpu_rcache->loaded);
-
-       iova_magazine_free_pfns(cpu_rcache->prev, iovad);
-       iova_magazine_free(cpu_rcache->prev);
-
-       spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+       return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
 }
 
 /*
@@ -1040,21 +997,20 @@ static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
 static void free_iova_rcaches(struct iova_domain *iovad)
 {
        struct iova_rcache *rcache;
-       unsigned long flags;
+       struct iova_cpu_rcache *cpu_rcache;
        unsigned int cpu;
        int i, j;
 
        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
                rcache = &iovad->rcaches[i];
-               for_each_possible_cpu(cpu)
-                       free_cpu_iova_rcache(cpu, iovad, rcache);
-               spin_lock_irqsave(&rcache->lock, flags);
+               for_each_possible_cpu(cpu) {
+                       cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
+                       iova_magazine_free(cpu_rcache->loaded);
+                       iova_magazine_free(cpu_rcache->prev);
+               }
                free_percpu(rcache->cpu_rcaches);
-               for (j = 0; j < rcache->depot_size; ++j) {
-                       iova_magazine_free_pfns(rcache->depot[j], iovad);
+               for (j = 0; j < rcache->depot_size; ++j)
                        iova_magazine_free(rcache->depot[j]);
-               }
-               spin_unlock_irqrestore(&rcache->lock, flags);
        }
 }
 
index 195d6e93ac7185aa53ee075521faf0de56ec53bf..8dce3a9de9d86e37b66dce131952aa19e0aadb2a 100644 (file)
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
+#include <linux/sys_soc.h>
 
 #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 #include <asm/dma-iommu.h>
 #include <asm/pgalloc.h>
+#else
+#define arm_iommu_create_mapping(...)  NULL
+#define arm_iommu_attach_device(...)   -ENODEV
+#define arm_iommu_release_mapping(...) do {} while (0)
+#define arm_iommu_detach_device(...)   do {} while (0)
 #endif
 
 #include "io-pgtable.h"
 
-#define IPMMU_CTX_MAX 1
+#define IPMMU_CTX_MAX 8
+
+struct ipmmu_features {
+       bool use_ns_alias_offset;
+       bool has_cache_leaf_nodes;
+       unsigned int number_of_contexts;
+       bool setup_imbuscr;
+       bool twobit_imttbcr_sl0;
+};
 
 struct ipmmu_vmsa_device {
        struct device *dev;
        void __iomem *base;
        struct iommu_device iommu;
-
+       struct ipmmu_vmsa_device *root;
+       const struct ipmmu_features *features;
        unsigned int num_utlbs;
+       unsigned int num_ctx;
        spinlock_t lock;                        /* Protects ctx and domains[] */
        DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
        struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
 
+       struct iommu_group *group;
        struct dma_iommu_mapping *mapping;
 };
 
@@ -57,18 +76,12 @@ struct ipmmu_vmsa_domain {
        spinlock_t lock;                        /* Protects mappings */
 };
 
-struct ipmmu_vmsa_iommu_priv {
-       struct ipmmu_vmsa_device *mmu;
-       struct device *dev;
-       struct list_head list;
-};
-
 static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
 {
        return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
 }
 
-static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
+static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
 {
        return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL;
 }
@@ -133,6 +146,10 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
 #define IMTTBCR_TSZ0_MASK              (7 << 0)
 #define IMTTBCR_TSZ0_SHIFT             O
 
+#define IMTTBCR_SL0_TWOBIT_LVL_3       (0 << 6)
+#define IMTTBCR_SL0_TWOBIT_LVL_2       (1 << 6)
+#define IMTTBCR_SL0_TWOBIT_LVL_1       (2 << 6)
+
 #define IMBUSCR                                0x000c
 #define IMBUSCR_DVM                    (1 << 2)
 #define IMBUSCR_BUSSEL_SYS             (0 << 0)
@@ -193,6 +210,36 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
 #define IMUASID_ASID0_MASK             (0xff << 0)
 #define IMUASID_ASID0_SHIFT            0
 
+/* -----------------------------------------------------------------------------
+ * Root device handling
+ */
+
+static struct platform_driver ipmmu_driver;
+
+static bool ipmmu_is_root(struct ipmmu_vmsa_device *mmu)
+{
+       return mmu->root == mmu;
+}
+
+static int __ipmmu_check_device(struct device *dev, void *data)
+{
+       struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev);
+       struct ipmmu_vmsa_device **rootp = data;
+
+       if (ipmmu_is_root(mmu))
+               *rootp = mmu;
+
+       return 0;
+}
+
+static struct ipmmu_vmsa_device *ipmmu_find_root(void)
+{
+       struct ipmmu_vmsa_device *root = NULL;
+
+       return driver_for_each_device(&ipmmu_driver.driver, NULL, &root,
+                                     __ipmmu_check_device) == 0 ? root : NULL;
+}
+
 /* -----------------------------------------------------------------------------
  * Read/Write Access
  */
@@ -208,15 +255,29 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset,
        iowrite32(data, mmu->base + offset);
 }
 
-static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg)
+static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain,
+                              unsigned int reg)
 {
-       return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg);
+       return ipmmu_read(domain->mmu->root,
+                         domain->context_id * IM_CTX_SIZE + reg);
 }
 
-static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg,
-                           u32 data)
+static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain,
+                                unsigned int reg, u32 data)
 {
-       ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data);
+       ipmmu_write(domain->mmu->root,
+                   domain->context_id * IM_CTX_SIZE + reg, data);
+}
+
+static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain,
+                               unsigned int reg, u32 data)
+{
+       if (domain->mmu != domain->mmu->root)
+               ipmmu_write(domain->mmu,
+                           domain->context_id * IM_CTX_SIZE + reg, data);
+
+       ipmmu_write(domain->mmu->root,
+                   domain->context_id * IM_CTX_SIZE + reg, data);
 }
 
 /* -----------------------------------------------------------------------------
@@ -228,7 +289,7 @@ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain)
 {
        unsigned int count = 0;
 
-       while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) {
+       while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) {
                cpu_relax();
                if (++count == TLB_LOOP_TIMEOUT) {
                        dev_err_ratelimited(domain->mmu->dev,
@@ -243,9 +304,9 @@ static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain)
 {
        u32 reg;
 
-       reg = ipmmu_ctx_read(domain, IMCTR);
+       reg = ipmmu_ctx_read_root(domain, IMCTR);
        reg |= IMCTR_FLUSH;
-       ipmmu_ctx_write(domain, IMCTR, reg);
+       ipmmu_ctx_write_all(domain, IMCTR, reg);
 
        ipmmu_tlb_sync(domain);
 }
@@ -313,11 +374,12 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu,
 
        spin_lock_irqsave(&mmu->lock, flags);
 
-       ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
-       if (ret != IPMMU_CTX_MAX) {
+       ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx);
+       if (ret != mmu->num_ctx) {
                mmu->domains[ret] = domain;
                set_bit(ret, mmu->ctx);
-       }
+       } else
+               ret = -EBUSY;
 
        spin_unlock_irqrestore(&mmu->lock, flags);
 
@@ -340,6 +402,7 @@ static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu,
 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 {
        u64 ttbr;
+       u32 tmp;
        int ret;
 
        /*
@@ -364,51 +427,59 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
         * TODO: Add support for coherent walk through CCI with DVM and remove
         * cache handling. For now, delegate it to the io-pgtable code.
         */
-       domain->cfg.iommu_dev = domain->mmu->dev;
+       domain->cfg.iommu_dev = domain->mmu->root->dev;
 
        /*
         * Find an unused context.
         */
-       ret = ipmmu_domain_allocate_context(domain->mmu, domain);
-       if (ret == IPMMU_CTX_MAX)
-               return -EBUSY;
+       ret = ipmmu_domain_allocate_context(domain->mmu->root, domain);
+       if (ret < 0)
+               return ret;
 
        domain->context_id = ret;
 
        domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
                                           domain);
        if (!domain->iop) {
-               ipmmu_domain_free_context(domain->mmu, domain->context_id);
+               ipmmu_domain_free_context(domain->mmu->root,
+                                         domain->context_id);
                return -EINVAL;
        }
 
        /* TTBR0 */
        ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
-       ipmmu_ctx_write(domain, IMTTLBR0, ttbr);
-       ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32);
+       ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr);
+       ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32);
 
        /*
         * TTBCR
         * We use long descriptors with inner-shareable WBWA tables and allocate
         * the whole 32-bit VA space to TTBR0.
         */
-       ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE |
-                       IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
-                       IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1);
+       if (domain->mmu->features->twobit_imttbcr_sl0)
+               tmp = IMTTBCR_SL0_TWOBIT_LVL_1;
+       else
+               tmp = IMTTBCR_SL0_LVL_1;
+
+       ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE |
+                            IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
+                            IMTTBCR_IRGN0_WB_WA | tmp);
 
        /* MAIR0 */
-       ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]);
+       ipmmu_ctx_write_root(domain, IMMAIR0,
+                            domain->cfg.arm_lpae_s1_cfg.mair[0]);
 
        /* IMBUSCR */
-       ipmmu_ctx_write(domain, IMBUSCR,
-                       ipmmu_ctx_read(domain, IMBUSCR) &
-                       ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK));
+       if (domain->mmu->features->setup_imbuscr)
+               ipmmu_ctx_write_root(domain, IMBUSCR,
+                                    ipmmu_ctx_read_root(domain, IMBUSCR) &
+                                    ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK));
 
        /*
         * IMSTR
         * Clear all interrupt flags.
         */
-       ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR));
+       ipmmu_ctx_write_root(domain, IMSTR, ipmmu_ctx_read_root(domain, IMSTR));
 
        /*
         * IMCTR
@@ -417,7 +488,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
         * software management as we have no use for it. Flush the TLB as
         * required when modifying the context registers.
         */
-       ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);
+       ipmmu_ctx_write_all(domain, IMCTR,
+                           IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);
 
        return 0;
 }
@@ -430,9 +502,9 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
         *
         * TODO: Is TLB flush really needed ?
         */
-       ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
+       ipmmu_ctx_write_all(domain, IMCTR, IMCTR_FLUSH);
        ipmmu_tlb_sync(domain);
-       ipmmu_domain_free_context(domain->mmu, domain->context_id);
+       ipmmu_domain_free_context(domain->mmu->root, domain->context_id);
 }
 
 /* -----------------------------------------------------------------------------
@@ -446,11 +518,11 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
        u32 status;
        u32 iova;
 
-       status = ipmmu_ctx_read(domain, IMSTR);
+       status = ipmmu_ctx_read_root(domain, IMSTR);
        if (!(status & err_mask))
                return IRQ_NONE;
 
-       iova = ipmmu_ctx_read(domain, IMEAR);
+       iova = ipmmu_ctx_read_root(domain, IMEAR);
 
        /*
         * Clear the error status flags. Unlike traditional interrupt flag
@@ -458,7 +530,7 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
         * seems to require 0. The error address register must be read before,
         * otherwise its value will be 0.
         */
-       ipmmu_ctx_write(domain, IMSTR, 0);
+       ipmmu_ctx_write_root(domain, IMSTR, 0);
 
        /* Log fatal errors. */
        if (status & IMSTR_MHIT)
@@ -499,7 +571,7 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
        /*
         * Check interrupts for all active contexts.
         */
-       for (i = 0; i < IPMMU_CTX_MAX; i++) {
+       for (i = 0; i < mmu->num_ctx; i++) {
                if (!mmu->domains[i])
                        continue;
                if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
@@ -528,6 +600,27 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
        return &domain->io_domain;
 }
 
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+{
+       struct iommu_domain *io_domain = NULL;
+
+       switch (type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               io_domain = __ipmmu_domain_alloc(type);
+               break;
+
+       case IOMMU_DOMAIN_DMA:
+               io_domain = __ipmmu_domain_alloc(type);
+               if (io_domain && iommu_get_dma_cookie(io_domain)) {
+                       kfree(io_domain);
+                       io_domain = NULL;
+               }
+               break;
+       }
+
+       return io_domain;
+}
+
 static void ipmmu_domain_free(struct iommu_domain *io_domain)
 {
        struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
@@ -536,6 +629,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
         * Free the domain resources. We assume that all devices have already
         * been detached.
         */
+       iommu_put_dma_cookie(io_domain);
        ipmmu_domain_destroy_context(domain);
        free_io_pgtable_ops(domain->iop);
        kfree(domain);
@@ -544,15 +638,14 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
 static int ipmmu_attach_device(struct iommu_domain *io_domain,
                               struct device *dev)
 {
-       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
        struct iommu_fwspec *fwspec = dev->iommu_fwspec;
-       struct ipmmu_vmsa_device *mmu = priv->mmu;
+       struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
        struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
        unsigned long flags;
        unsigned int i;
        int ret = 0;
 
-       if (!priv || !priv->mmu) {
+       if (!mmu) {
                dev_err(dev, "Cannot attach to IPMMU\n");
                return -ENXIO;
        }
@@ -563,6 +656,13 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
                /* The domain hasn't been used yet, initialize it. */
                domain->mmu = mmu;
                ret = ipmmu_domain_init_context(domain);
+               if (ret < 0) {
+                       dev_err(dev, "Unable to initialize IPMMU context\n");
+                       domain->mmu = NULL;
+               } else {
+                       dev_info(dev, "Using IPMMU context %u\n",
+                                domain->context_id);
+               }
        } else if (domain->mmu != mmu) {
                /*
                 * Something is wrong, we can't attach two devices using
@@ -619,6 +719,14 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
        return domain->iop->unmap(domain->iop, iova, size);
 }
 
+static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
+{
+       struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
+
+       if (domain->mmu)
+               ipmmu_tlb_flush_all(domain);
+}
+
 static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
                                      dma_addr_t iova)
 {
@@ -633,62 +741,53 @@ static int ipmmu_init_platform_device(struct device *dev,
                                      struct of_phandle_args *args)
 {
        struct platform_device *ipmmu_pdev;
-       struct ipmmu_vmsa_iommu_priv *priv;
 
        ipmmu_pdev = of_find_device_by_node(args->np);
        if (!ipmmu_pdev)
                return -ENODEV;
 
-       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return -ENOMEM;
-
-       priv->mmu = platform_get_drvdata(ipmmu_pdev);
-       priv->dev = dev;
-       dev->iommu_fwspec->iommu_priv = priv;
+       dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
        return 0;
 }
 
+static bool ipmmu_slave_whitelist(struct device *dev)
+{
+       /* By default, do not allow use of IPMMU */
+       return false;
+}
+
+static const struct soc_device_attribute soc_r8a7795[] = {
+       { .soc_id = "r8a7795", },
+       { /* sentinel */ }
+};
+
 static int ipmmu_of_xlate(struct device *dev,
                          struct of_phandle_args *spec)
 {
+       /* For R-Car Gen3 use a white list to opt-in slave devices */
+       if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev))
+               return -ENODEV;
+
        iommu_fwspec_add_ids(dev, spec->args, 1);
 
        /* Initialize once - xlate() will call multiple times */
-       if (to_priv(dev))
+       if (to_ipmmu(dev))
                return 0;
 
        return ipmmu_init_platform_device(dev, spec);
 }
 
-#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
-
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static int ipmmu_init_arm_mapping(struct device *dev)
 {
-       if (type != IOMMU_DOMAIN_UNMANAGED)
-               return NULL;
-
-       return __ipmmu_domain_alloc(type);
-}
-
-static int ipmmu_add_device(struct device *dev)
-{
-       struct ipmmu_vmsa_device *mmu = NULL;
+       struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
        struct iommu_group *group;
        int ret;
 
-       /*
-        * Only let through devices that have been verified in xlate()
-        */
-       if (!to_priv(dev))
-               return -ENODEV;
-
        /* Create a device group and add the device to it. */
        group = iommu_group_alloc();
        if (IS_ERR(group)) {
                dev_err(dev, "Failed to allocate IOMMU group\n");
-               ret = PTR_ERR(group);
-               goto error;
+               return PTR_ERR(group);
        }
 
        ret = iommu_group_add_device(group, dev);
@@ -696,8 +795,7 @@ static int ipmmu_add_device(struct device *dev)
 
        if (ret < 0) {
                dev_err(dev, "Failed to add device to IPMMU group\n");
-               group = NULL;
-               goto error;
+               return ret;
        }
 
        /*
@@ -709,7 +807,6 @@ static int ipmmu_add_device(struct device *dev)
         * - Make the mapping size configurable ? We currently use a 2GB mapping
         *   at a 1GB offset to ensure that NULL VAs will fault.
         */
-       mmu = to_priv(dev)->mmu;
        if (!mmu->mapping) {
                struct dma_iommu_mapping *mapping;
 
@@ -734,159 +831,73 @@ static int ipmmu_add_device(struct device *dev)
        return 0;
 
 error:
-       if (mmu)
+       iommu_group_remove_device(dev);
+       if (mmu->mapping)
                arm_iommu_release_mapping(mmu->mapping);
 
-       if (!IS_ERR_OR_NULL(group))
-               iommu_group_remove_device(dev);
-
        return ret;
 }
 
-static void ipmmu_remove_device(struct device *dev)
-{
-       arm_iommu_detach_device(dev);
-       iommu_group_remove_device(dev);
-}
-
-static const struct iommu_ops ipmmu_ops = {
-       .domain_alloc = ipmmu_domain_alloc,
-       .domain_free = ipmmu_domain_free,
-       .attach_dev = ipmmu_attach_device,
-       .detach_dev = ipmmu_detach_device,
-       .map = ipmmu_map,
-       .unmap = ipmmu_unmap,
-       .map_sg = default_iommu_map_sg,
-       .iova_to_phys = ipmmu_iova_to_phys,
-       .add_device = ipmmu_add_device,
-       .remove_device = ipmmu_remove_device,
-       .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
-       .of_xlate = ipmmu_of_xlate,
-};
-
-#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */
-
-#ifdef CONFIG_IOMMU_DMA
-
-static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
-static LIST_HEAD(ipmmu_slave_devices);
-
-static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type)
-{
-       struct iommu_domain *io_domain = NULL;
-
-       switch (type) {
-       case IOMMU_DOMAIN_UNMANAGED:
-               io_domain = __ipmmu_domain_alloc(type);
-               break;
-
-       case IOMMU_DOMAIN_DMA:
-               io_domain = __ipmmu_domain_alloc(type);
-               if (io_domain)
-                       iommu_get_dma_cookie(io_domain);
-               break;
-       }
-
-       return io_domain;
-}
-
-static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
-{
-       switch (io_domain->type) {
-       case IOMMU_DOMAIN_DMA:
-               iommu_put_dma_cookie(io_domain);
-               /* fall-through */
-       default:
-               ipmmu_domain_free(io_domain);
-               break;
-       }
-}
-
-static int ipmmu_add_device_dma(struct device *dev)
+static int ipmmu_add_device(struct device *dev)
 {
        struct iommu_group *group;
 
        /*
         * Only let through devices that have been verified in xlate()
         */
-       if (!to_priv(dev))
+       if (!to_ipmmu(dev))
                return -ENODEV;
 
+       if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA))
+               return ipmmu_init_arm_mapping(dev);
+
        group = iommu_group_get_for_dev(dev);
        if (IS_ERR(group))
                return PTR_ERR(group);
 
-       spin_lock(&ipmmu_slave_devices_lock);
-       list_add(&to_priv(dev)->list, &ipmmu_slave_devices);
-       spin_unlock(&ipmmu_slave_devices_lock);
+       iommu_group_put(group);
        return 0;
 }
 
-static void ipmmu_remove_device_dma(struct device *dev)
+static void ipmmu_remove_device(struct device *dev)
 {
-       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
-
-       spin_lock(&ipmmu_slave_devices_lock);
-       list_del(&priv->list);
-       spin_unlock(&ipmmu_slave_devices_lock);
-
+       arm_iommu_detach_device(dev);
        iommu_group_remove_device(dev);
 }
 
-static struct device *ipmmu_find_sibling_device(struct device *dev)
-{
-       struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
-       struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL;
-       bool found = false;
-
-       spin_lock(&ipmmu_slave_devices_lock);
-
-       list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) {
-               if (priv == sibling_priv)
-                       continue;
-               if (sibling_priv->mmu == priv->mmu) {
-                       found = true;
-                       break;
-               }
-       }
-
-       spin_unlock(&ipmmu_slave_devices_lock);
-
-       return found ? sibling_priv->dev : NULL;
-}
-
-static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
+static struct iommu_group *ipmmu_find_group(struct device *dev)
 {
+       struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
        struct iommu_group *group;
-       struct device *sibling;
 
-       sibling = ipmmu_find_sibling_device(dev);
-       if (sibling)
-               group = iommu_group_get(sibling);
-       if (!sibling || IS_ERR(group))
-               group = generic_device_group(dev);
+       if (mmu->group)
+               return iommu_group_ref_get(mmu->group);
+
+       group = iommu_group_alloc();
+       if (!IS_ERR(group))
+               mmu->group = group;
 
        return group;
 }
 
 static const struct iommu_ops ipmmu_ops = {
-       .domain_alloc = ipmmu_domain_alloc_dma,
-       .domain_free = ipmmu_domain_free_dma,
+       .domain_alloc = ipmmu_domain_alloc,
+       .domain_free = ipmmu_domain_free,
        .attach_dev = ipmmu_attach_device,
        .detach_dev = ipmmu_detach_device,
        .map = ipmmu_map,
        .unmap = ipmmu_unmap,
+       .flush_iotlb_all = ipmmu_iotlb_sync,
+       .iotlb_sync = ipmmu_iotlb_sync,
        .map_sg = default_iommu_map_sg,
        .iova_to_phys = ipmmu_iova_to_phys,
-       .add_device = ipmmu_add_device_dma,
-       .remove_device = ipmmu_remove_device_dma,
-       .device_group = ipmmu_find_group_dma,
+       .add_device = ipmmu_add_device,
+       .remove_device = ipmmu_remove_device,
+       .device_group = ipmmu_find_group,
        .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
        .of_xlate = ipmmu_of_xlate,
 };
 
-#endif /* CONFIG_IOMMU_DMA */
-
 /* -----------------------------------------------------------------------------
  * Probe/remove and init
  */
@@ -896,10 +907,40 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu)
        unsigned int i;
 
        /* Disable all contexts. */
-       for (i = 0; i < 4; ++i)
+       for (i = 0; i < mmu->num_ctx; ++i)
                ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
 }
 
+static const struct ipmmu_features ipmmu_features_default = {
+       .use_ns_alias_offset = true,
+       .has_cache_leaf_nodes = false,
+       .number_of_contexts = 1, /* software only tested with one context */
+       .setup_imbuscr = true,
+       .twobit_imttbcr_sl0 = false,
+};
+
+static const struct ipmmu_features ipmmu_features_r8a7795 = {
+       .use_ns_alias_offset = false,
+       .has_cache_leaf_nodes = true,
+       .number_of_contexts = 8,
+       .setup_imbuscr = false,
+       .twobit_imttbcr_sl0 = true,
+};
+
+static const struct of_device_id ipmmu_of_ids[] = {
+       {
+               .compatible = "renesas,ipmmu-vmsa",
+               .data = &ipmmu_features_default,
+       }, {
+               .compatible = "renesas,ipmmu-r8a7795",
+               .data = &ipmmu_features_r8a7795,
+       }, {
+               /* Terminator */
+       },
+};
+
+MODULE_DEVICE_TABLE(of, ipmmu_of_ids);
+
 static int ipmmu_probe(struct platform_device *pdev)
 {
        struct ipmmu_vmsa_device *mmu;
@@ -917,6 +958,8 @@ static int ipmmu_probe(struct platform_device *pdev)
        mmu->num_utlbs = 32;
        spin_lock_init(&mmu->lock);
        bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
+       mmu->features = of_device_get_match_data(&pdev->dev);
+       dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
 
        /* Map I/O memory and request IRQ. */
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -936,34 +979,71 @@ static int ipmmu_probe(struct platform_device *pdev)
         * Offset the registers base unconditionally to point to the non-secure
         * alias space for now.
         */
-       mmu->base += IM_NS_ALIAS_OFFSET;
+       if (mmu->features->use_ns_alias_offset)
+               mmu->base += IM_NS_ALIAS_OFFSET;
+
+       mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX,
+                            mmu->features->number_of_contexts);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               dev_err(&pdev->dev, "no IRQ found\n");
-               return irq;
-       }
 
-       ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
-                              dev_name(&pdev->dev), mmu);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "failed to request IRQ %d\n", irq);
-               return ret;
-       }
+       /*
+        * Determine if this IPMMU instance is a root device by checking for
+        * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property.
+        */
+       if (!mmu->features->has_cache_leaf_nodes ||
+           !of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL))
+               mmu->root = mmu;
+       else
+               mmu->root = ipmmu_find_root();
 
-       ipmmu_device_reset(mmu);
+       /*
+        * Wait until the root device has been registered for sure.
+        */
+       if (!mmu->root)
+               return -EPROBE_DEFER;
+
+       /* Root devices have mandatory IRQs */
+       if (ipmmu_is_root(mmu)) {
+               if (irq < 0) {
+                       dev_err(&pdev->dev, "no IRQ found\n");
+                       return irq;
+               }
 
-       ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
-                                    dev_name(&pdev->dev));
-       if (ret)
-               return ret;
+               ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
+                                      dev_name(&pdev->dev), mmu);
+               if (ret < 0) {
+                       dev_err(&pdev->dev, "failed to request IRQ %d\n", irq);
+                       return ret;
+               }
 
-       iommu_device_set_ops(&mmu->iommu, &ipmmu_ops);
-       iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode);
+               ipmmu_device_reset(mmu);
+       }
 
-       ret = iommu_device_register(&mmu->iommu);
-       if (ret)
-               return ret;
+       /*
+        * Register the IPMMU to the IOMMU subsystem in the following cases:
+        * - R-Car Gen2 IPMMU (all devices registered)
+        * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device)
+        */
+       if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) {
+               ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
+                                            dev_name(&pdev->dev));
+               if (ret)
+                       return ret;
+
+               iommu_device_set_ops(&mmu->iommu, &ipmmu_ops);
+               iommu_device_set_fwnode(&mmu->iommu,
+                                       &pdev->dev.of_node->fwnode);
+
+               ret = iommu_device_register(&mmu->iommu);
+               if (ret)
+                       return ret;
+
+#if defined(CONFIG_IOMMU_DMA)
+               if (!iommu_present(&platform_bus_type))
+                       bus_set_iommu(&platform_bus_type, &ipmmu_ops);
+#endif
+       }
 
        /*
         * We can't create the ARM mapping here as it requires the bus to have
@@ -983,20 +1063,13 @@ static int ipmmu_remove(struct platform_device *pdev)
        iommu_device_sysfs_remove(&mmu->iommu);
        iommu_device_unregister(&mmu->iommu);
 
-#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
        arm_iommu_release_mapping(mmu->mapping);
-#endif
 
        ipmmu_device_reset(mmu);
 
        return 0;
 }
 
-static const struct of_device_id ipmmu_of_ids[] = {
-       { .compatible = "renesas,ipmmu-vmsa", },
-       { }
-};
-
 static struct platform_driver ipmmu_driver = {
        .driver = {
                .name = "ipmmu-vmsa",
@@ -1008,15 +1081,22 @@ static struct platform_driver ipmmu_driver = {
 
 static int __init ipmmu_init(void)
 {
+       static bool setup_done;
        int ret;
 
+       if (setup_done)
+               return 0;
+
        ret = platform_driver_register(&ipmmu_driver);
        if (ret < 0)
                return ret;
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
        if (!iommu_present(&platform_bus_type))
                bus_set_iommu(&platform_bus_type, &ipmmu_ops);
+#endif
 
+       setup_done = true;
        return 0;
 }
 
@@ -1028,6 +1108,19 @@ static void __exit ipmmu_exit(void)
 subsys_initcall(ipmmu_init);
 module_exit(ipmmu_exit);
 
+#ifdef CONFIG_IOMMU_DMA
+static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np)
+{
+       ipmmu_init();
+       return 0;
+}
+
+IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa",
+                ipmmu_vmsa_iommu_of_setup);
+IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795",
+                ipmmu_vmsa_iommu_of_setup);
+#endif
+
 MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
 MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
 MODULE_LICENSE("GPL v2");
index 16d33ac19db0f77837c30f44de044a3a46b9c558..f227d73e7bf6e0f28f66752100798c8cc9318184 100644 (file)
@@ -392,6 +392,11 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
        return unmapsz;
 }
 
+static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
+{
+       mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+}
+
 static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
                                          dma_addr_t iova)
 {
@@ -491,6 +496,8 @@ static struct iommu_ops mtk_iommu_ops = {
        .map            = mtk_iommu_map,
        .unmap          = mtk_iommu_unmap,
        .map_sg         = default_iommu_map_sg,
+       .flush_iotlb_all = mtk_iommu_iotlb_sync,
+       .iotlb_sync     = mtk_iommu_iotlb_sync,
        .iova_to_phys   = mtk_iommu_iova_to_phys,
        .add_device     = mtk_iommu_add_device,
        .remove_device  = mtk_iommu_remove_device,
index bc1efbfb9ddf0f121a213f9919f7e3ea0fa957ca..542930cd183d07ba1af592f2a99292a6e6c572f9 100644 (file)
@@ -708,7 +708,7 @@ static struct platform_driver mtk_iommu_driver = {
        .probe  = mtk_iommu_probe,
        .remove = mtk_iommu_remove,
        .driver = {
-               .name = "mtk-iommu",
+               .name = "mtk-iommu-v1",
                .of_match_table = mtk_iommu_of_ids,
                .pm = &mtk_iommu_pm_ops,
        }
index bd67e1b2c64eadf25f5b10d120cf1214b120b54f..e135ab830ebfef6809e33434c76a53ed19c7b7d7 100644 (file)
@@ -2,6 +2,7 @@
  * omap iommu: tlb and pagetable primitives
  *
  * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
  *
  * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
  *             Paul Mundt and Toshihiro Kobayashi
@@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom)
  **/
 void omap_iommu_save_ctx(struct device *dev)
 {
-       struct omap_iommu *obj = dev_to_omap_iommu(dev);
-       u32 *p = obj->ctx;
+       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+       struct omap_iommu *obj;
+       u32 *p;
        int i;
 
-       for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
-               p[i] = iommu_read_reg(obj, i * sizeof(u32));
-               dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+       if (!arch_data)
+               return;
+
+       while (arch_data->iommu_dev) {
+               obj = arch_data->iommu_dev;
+               p = obj->ctx;
+               for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+                       p[i] = iommu_read_reg(obj, i * sizeof(u32));
+                       dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
+                               p[i]);
+               }
+               arch_data++;
        }
 }
 EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
@@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
  **/
 void omap_iommu_restore_ctx(struct device *dev)
 {
-       struct omap_iommu *obj = dev_to_omap_iommu(dev);
-       u32 *p = obj->ctx;
+       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+       struct omap_iommu *obj;
+       u32 *p;
        int i;
 
-       for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
-               iommu_write_reg(obj, p[i], i * sizeof(u32));
-               dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+       if (!arch_data)
+               return;
+
+       while (arch_data->iommu_dev) {
+               obj = arch_data->iommu_dev;
+               p = obj->ctx;
+               for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+                       iommu_write_reg(obj, p[i], i * sizeof(u32));
+                       dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
+                               p[i]);
+               }
+               arch_data++;
        }
 }
 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
@@ -805,7 +826,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
        struct iommu_domain *domain = obj->domain;
        struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 
-       if (!omap_domain->iommu_dev)
+       if (!omap_domain->dev)
                return IRQ_NONE;
 
        errs = iommu_report_fault(obj, &da);
@@ -893,6 +914,24 @@ static void omap_iommu_detach(struct omap_iommu *obj)
        dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
 }
 
+static bool omap_iommu_can_register(struct platform_device *pdev)
+{
+       struct device_node *np = pdev->dev.of_node;
+
+       if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu"))
+               return true;
+
+       /*
+        * restrict IOMMU core registration only for processor-port MDMA MMUs
+        * on DRA7 DSPs
+        */
+       if ((!strcmp(dev_name(&pdev->dev), "40d01000.mmu")) ||
+           (!strcmp(dev_name(&pdev->dev), "41501000.mmu")))
+               return true;
+
+       return false;
+}
+
 static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev,
                                              struct omap_iommu *obj)
 {
@@ -984,19 +1023,22 @@ static int omap_iommu_probe(struct platform_device *pdev)
                return err;
        platform_set_drvdata(pdev, obj);
 
-       obj->group = iommu_group_alloc();
-       if (IS_ERR(obj->group))
-               return PTR_ERR(obj->group);
+       if (omap_iommu_can_register(pdev)) {
+               obj->group = iommu_group_alloc();
+               if (IS_ERR(obj->group))
+                       return PTR_ERR(obj->group);
 
-       err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name);
-       if (err)
-               goto out_group;
+               err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL,
+                                            obj->name);
+               if (err)
+                       goto out_group;
 
-       iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
+               iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
 
-       err = iommu_device_register(&obj->iommu);
-       if (err)
-               goto out_sysfs;
+               err = iommu_device_register(&obj->iommu);
+               if (err)
+                       goto out_sysfs;
+       }
 
        pm_runtime_irq_safe(obj->dev);
        pm_runtime_enable(obj->dev);
@@ -1018,11 +1060,13 @@ static int omap_iommu_remove(struct platform_device *pdev)
 {
        struct omap_iommu *obj = platform_get_drvdata(pdev);
 
-       iommu_group_put(obj->group);
-       obj->group = NULL;
+       if (obj->group) {
+               iommu_group_put(obj->group);
+               obj->group = NULL;
 
-       iommu_device_sysfs_remove(&obj->iommu);
-       iommu_device_unregister(&obj->iommu);
+               iommu_device_sysfs_remove(&obj->iommu);
+               iommu_device_unregister(&obj->iommu);
+       }
 
        omap_iommu_debugfs_remove(obj);
 
@@ -1068,11 +1112,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
                          phys_addr_t pa, size_t bytes, int prot)
 {
        struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
-       struct omap_iommu *oiommu = omap_domain->iommu_dev;
-       struct device *dev = oiommu->dev;
+       struct device *dev = omap_domain->dev;
+       struct omap_iommu_device *iommu;
+       struct omap_iommu *oiommu;
        struct iotlb_entry e;
        int omap_pgsz;
-       u32 ret;
+       u32 ret = -EINVAL;
+       int i;
 
        omap_pgsz = bytes_to_iopgsz(bytes);
        if (omap_pgsz < 0) {
@@ -1084,9 +1130,24 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 
        iotlb_init_entry(&e, da, pa, omap_pgsz);
 
-       ret = omap_iopgtable_store_entry(oiommu, &e);
-       if (ret)
-               dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret);
+       iommu = omap_domain->iommus;
+       for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
+               oiommu = iommu->iommu_dev;
+               ret = omap_iopgtable_store_entry(oiommu, &e);
+               if (ret) {
+                       dev_err(dev, "omap_iopgtable_store_entry failed: %d\n",
+                               ret);
+                       break;
+               }
+       }
+
+       if (ret) {
+               while (i--) {
+                       iommu--;
+                       oiommu = iommu->iommu_dev;
+                       iopgtable_clear_entry(oiommu, da);
+               }
+       }
 
        return ret;
 }
@@ -1095,12 +1156,90 @@ static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
                               size_t size)
 {
        struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
-       struct omap_iommu *oiommu = omap_domain->iommu_dev;
-       struct device *dev = oiommu->dev;
+       struct device *dev = omap_domain->dev;
+       struct omap_iommu_device *iommu;
+       struct omap_iommu *oiommu;
+       bool error = false;
+       size_t bytes = 0;
+       int i;
 
        dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
 
-       return iopgtable_clear_entry(oiommu, da);
+       iommu = omap_domain->iommus;
+       for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
+               oiommu = iommu->iommu_dev;
+               bytes = iopgtable_clear_entry(oiommu, da);
+               if (!bytes)
+                       error = true;
+       }
+
+       /*
+        * simplify return - we are only checking if any of the iommus
+        * reported an error, but not if all of them are unmapping the
+        * same number of entries. This should not occur due to the
+        * mirror programming.
+        */
+       return error ? 0 : bytes;
+}
+
+static int omap_iommu_count(struct device *dev)
+{
+       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+       int count = 0;
+
+       while (arch_data->iommu_dev) {
+               count++;
+               arch_data++;
+       }
+
+       return count;
+}
+
+/* caller should call cleanup if this function fails */
+static int omap_iommu_attach_init(struct device *dev,
+                                 struct omap_iommu_domain *odomain)
+{
+       struct omap_iommu_device *iommu;
+       int i;
+
+       odomain->num_iommus = omap_iommu_count(dev);
+       if (!odomain->num_iommus)
+               return -EINVAL;
+
+       odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu),
+                                 GFP_ATOMIC);
+       if (!odomain->iommus)
+               return -ENOMEM;
+
+       iommu = odomain->iommus;
+       for (i = 0; i < odomain->num_iommus; i++, iommu++) {
+               iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC);
+               if (!iommu->pgtable)
+                       return -ENOMEM;
+
+               /*
+                * should never fail, but please keep this around to ensure
+                * we keep the hardware happy
+                */
+               if (WARN_ON(!IS_ALIGNED((long)iommu->pgtable,
+                                       IOPGD_TABLE_SIZE)))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain)
+{
+       int i;
+       struct omap_iommu_device *iommu = odomain->iommus;
+
+       for (i = 0; iommu && i < odomain->num_iommus; i++, iommu++)
+               kfree(iommu->pgtable);
+
+       kfree(odomain->iommus);
+       odomain->num_iommus = 0;
+       odomain->iommus = NULL;
 }
 
 static int
@@ -1108,8 +1247,10 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
        struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
        struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+       struct omap_iommu_device *iommu;
        struct omap_iommu *oiommu;
        int ret = 0;
+       int i;
 
        if (!arch_data || !arch_data->iommu_dev) {
                dev_err(dev, "device doesn't have an associated iommu\n");
@@ -1118,26 +1259,49 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 
        spin_lock(&omap_domain->lock);
 
-       /* only a single device is supported per domain for now */
-       if (omap_domain->iommu_dev) {
+       /* only a single client device can be attached to a domain */
+       if (omap_domain->dev) {
                dev_err(dev, "iommu domain is already attached\n");
                ret = -EBUSY;
                goto out;
        }
 
-       oiommu = arch_data->iommu_dev;
-
-       /* get a handle to and enable the omap iommu */
-       ret = omap_iommu_attach(oiommu, omap_domain->pgtable);
+       ret = omap_iommu_attach_init(dev, omap_domain);
        if (ret) {
-               dev_err(dev, "can't get omap iommu: %d\n", ret);
-               goto out;
+               dev_err(dev, "failed to allocate required iommu data %d\n",
+                       ret);
+               goto init_fail;
+       }
+
+       iommu = omap_domain->iommus;
+       for (i = 0; i < omap_domain->num_iommus; i++, iommu++, arch_data++) {
+               /* configure and enable the omap iommu */
+               oiommu = arch_data->iommu_dev;
+               ret = omap_iommu_attach(oiommu, iommu->pgtable);
+               if (ret) {
+                       dev_err(dev, "can't get omap iommu: %d\n", ret);
+                       goto attach_fail;
+               }
+
+               oiommu->domain = domain;
+               iommu->iommu_dev = oiommu;
        }
 
-       omap_domain->iommu_dev = oiommu;
        omap_domain->dev = dev;
-       oiommu->domain = domain;
 
+       goto out;
+
+attach_fail:
+       while (i--) {
+               iommu--;
+               arch_data--;
+               oiommu = iommu->iommu_dev;
+               omap_iommu_detach(oiommu);
+               iommu->iommu_dev = NULL;
+               oiommu->domain = NULL;
+       }
+init_fail:
+       omap_iommu_detach_fini(omap_domain);
 out:
        spin_unlock(&omap_domain->lock);
        return ret;
@@ -1146,21 +1310,40 @@ out:
 static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
                                   struct device *dev)
 {
-       struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
+       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+       struct omap_iommu_device *iommu = omap_domain->iommus;
+       struct omap_iommu *oiommu;
+       int i;
+
+       if (!omap_domain->dev) {
+               dev_err(dev, "domain has no attached device\n");
+               return;
+       }
 
        /* only a single device is supported per domain for now */
-       if (omap_domain->iommu_dev != oiommu) {
-               dev_err(dev, "invalid iommu device\n");
+       if (omap_domain->dev != dev) {
+               dev_err(dev, "invalid attached device\n");
                return;
        }
 
-       iopgtable_clear_entry_all(oiommu);
+       /*
+        * cleanup in the reverse order of attachment - this addresses
+        * any h/w dependencies between multiple instances, if any
+        */
+       iommu += (omap_domain->num_iommus - 1);
+       arch_data += (omap_domain->num_iommus - 1);
+       for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) {
+               oiommu = iommu->iommu_dev;
+               iopgtable_clear_entry_all(oiommu);
+
+               omap_iommu_detach(oiommu);
+               iommu->iommu_dev = NULL;
+               oiommu->domain = NULL;
+       }
 
-       omap_iommu_detach(oiommu);
+       omap_iommu_detach_fini(omap_domain);
 
-       omap_domain->iommu_dev = NULL;
        omap_domain->dev = NULL;
-       oiommu->domain = NULL;
 }
 
 static void omap_iommu_detach_dev(struct iommu_domain *domain,
@@ -1182,18 +1365,7 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
 
        omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
        if (!omap_domain)
-               goto out;
-
-       omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL);
-       if (!omap_domain->pgtable)
-               goto fail_nomem;
-
-       /*
-        * should never fail, but please keep this around to ensure
-        * we keep the hardware happy
-        */
-       if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE)))
-               goto fail_align;
+               return NULL;
 
        spin_lock_init(&omap_domain->lock);
 
@@ -1202,13 +1374,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
        omap_domain->domain.geometry.force_aperture = true;
 
        return &omap_domain->domain;
-
-fail_align:
-       kfree(omap_domain->pgtable);
-fail_nomem:
-       kfree(omap_domain);
-out:
-       return NULL;
 }
 
 static void omap_iommu_domain_free(struct iommu_domain *domain)
@@ -1219,10 +1384,9 @@ static void omap_iommu_domain_free(struct iommu_domain *domain)
         * An iommu device is still attached
         * (currently, only one device can be attached) ?
         */
-       if (omap_domain->iommu_dev)
+       if (omap_domain->dev)
                _omap_iommu_detach_dev(omap_domain, omap_domain->dev);
 
-       kfree(omap_domain->pgtable);
        kfree(omap_domain);
 }
 
@@ -1230,11 +1394,16 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
                                           dma_addr_t da)
 {
        struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
-       struct omap_iommu *oiommu = omap_domain->iommu_dev;
+       struct omap_iommu_device *iommu = omap_domain->iommus;
+       struct omap_iommu *oiommu = iommu->iommu_dev;
        struct device *dev = oiommu->dev;
        u32 *pgd, *pte;
        phys_addr_t ret = 0;
 
+       /*
+        * all the iommus within the domain will have identical programming,
+        * so perform the lookup using just the first iommu
+        */
        iopgtable_lookup_entry(oiommu, da, &pgd, &pte);
 
        if (pte) {
@@ -1260,11 +1429,12 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
 
 static int omap_iommu_add_device(struct device *dev)
 {
-       struct omap_iommu_arch_data *arch_data;
+       struct omap_iommu_arch_data *arch_data, *tmp;
        struct omap_iommu *oiommu;
        struct iommu_group *group;
        struct device_node *np;
        struct platform_device *pdev;
+       int num_iommus, i;
        int ret;
 
        /*
@@ -1276,36 +1446,57 @@ static int omap_iommu_add_device(struct device *dev)
        if (!dev->of_node)
                return 0;
 
-       np = of_parse_phandle(dev->of_node, "iommus", 0);
-       if (!np)
+       /*
+        * retrieve the count of IOMMU nodes using phandle size as element size
+        * since #iommu-cells = 0 for OMAP
+        */
+       num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus",
+                                                    sizeof(phandle));
+       if (num_iommus < 0)
                return 0;
 
-       pdev = of_find_device_by_node(np);
-       if (WARN_ON(!pdev)) {
-               of_node_put(np);
-               return -EINVAL;
-       }
+       arch_data = kzalloc((num_iommus + 1) * sizeof(*arch_data), GFP_KERNEL);
+       if (!arch_data)
+               return -ENOMEM;
 
-       oiommu = platform_get_drvdata(pdev);
-       if (!oiommu) {
-               of_node_put(np);
-               return -EINVAL;
-       }
+       for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) {
+               np = of_parse_phandle(dev->of_node, "iommus", i);
+               if (!np) {
+                       kfree(arch_data);
+                       return -EINVAL;
+               }
+
+               pdev = of_find_device_by_node(np);
+               if (WARN_ON(!pdev)) {
+                       of_node_put(np);
+                       kfree(arch_data);
+                       return -EINVAL;
+               }
+
+               oiommu = platform_get_drvdata(pdev);
+               if (!oiommu) {
+                       of_node_put(np);
+                       kfree(arch_data);
+                       return -EINVAL;
+               }
+
+               tmp->iommu_dev = oiommu;
 
-       arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL);
-       if (!arch_data) {
                of_node_put(np);
-               return -ENOMEM;
        }
 
+       /*
+        * use the first IOMMU alone for the sysfs device linking.
+        * TODO: Evaluate if a single iommu_group needs to be
+        * maintained for both IOMMUs
+        */
+       oiommu = arch_data->iommu_dev;
        ret = iommu_device_link(&oiommu->iommu, dev);
        if (ret) {
                kfree(arch_data);
-               of_node_put(np);
                return ret;
        }
 
-       arch_data->iommu_dev = oiommu;
        dev->archdata.iommu = arch_data;
 
        /*
@@ -1321,8 +1512,6 @@ static int omap_iommu_add_device(struct device *dev)
        }
        iommu_group_put(group);
 
-       of_node_put(np);
-
        return 0;
 }
 
index a675af29a6ec7ec4fc0f7f2424484205c6cc945b..1703159ef5af390454323b5af427a7ca49f0137c 100644 (file)
@@ -28,18 +28,27 @@ struct iotlb_entry {
        u32 endian, elsz, mixed;
 };
 
+/**
+ * struct omap_iommu_device - omap iommu device data
+ * @pgtable:   page table used by an omap iommu attached to a domain
+ * @iommu_dev: pointer to store an omap iommu instance attached to a domain
+ */
+struct omap_iommu_device {
+       u32 *pgtable;
+       struct omap_iommu *iommu_dev;
+};
+
 /**
  * struct omap_iommu_domain - omap iommu domain
- * @pgtable:   the page table
- * @iommu_dev: an omap iommu device attached to this domain. only a single
- *             iommu device can be attached for now.
+ * @num_iommus: number of iommus in this domain
+ * @iommus:    omap iommu device data for all iommus in this domain
  * @dev:       Device using this domain.
  * @lock:      domain lock, should be taken when attaching/detaching
  * @domain:    generic domain handle used by iommu core code
  */
 struct omap_iommu_domain {
-       u32 *pgtable;
-       struct omap_iommu *iommu_dev;
+       u32 num_iommus;
+       struct omap_iommu_device *iommus;
        struct device *dev;
        spinlock_t lock;
        struct iommu_domain domain;
@@ -97,17 +106,6 @@ struct iotlb_lock {
        short vict;
 };
 
-/**
- * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
- * @dev: iommu client device
- */
-static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
-{
-       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
-
-       return arch_data->iommu_dev;
-}
-
 /*
  * MMU Register offsets
  */
index c8a587d034b0d7e75d276af1b8c7cdc9e02e5aad..e07f02d00c688f0b604c71f6531d83dc43a94ee2 100644 (file)
@@ -66,6 +66,7 @@ struct qcom_iommu_ctx {
        void __iomem            *base;
        bool                     secure_init;
        u8                       asid;      /* asid and ctx bank # are 1:1 */
+       struct iommu_domain     *domain;
 };
 
 struct qcom_iommu_domain {
@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev)
        fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
        iova = iommu_readq(ctx, ARM_SMMU_CB_FAR);
 
-       dev_err_ratelimited(ctx->dev,
-                           "Unhandled context fault: fsr=0x%x, "
-                           "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
-                           fsr, iova, fsynr, ctx->asid);
+       if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) {
+               dev_err_ratelimited(ctx->dev,
+                                   "Unhandled context fault: fsr=0x%x, "
+                                   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
+                                   fsr, iova, fsynr, ctx->asid);
+       }
 
        iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
+       iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
 
        return IRQ_HANDLED;
 }
@@ -274,12 +278,14 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
 
                /* SCTLR */
                reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE |
-                       SCTLR_M | SCTLR_S1_ASIDPNE;
+                       SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG;
 
                if (IS_ENABLED(CONFIG_BIG_ENDIAN))
                        reg |= SCTLR_E;
 
                iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
+
+               ctx->domain = domain;
        }
 
        mutex_unlock(&qcom_domain->init_mutex);
@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
 
                /* Disable the context bank: */
                iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+               ctx->domain = NULL;
        }
        pm_runtime_put_sync(qcom_iommu->dev);
 
@@ -443,6 +451,19 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
        return ret;
 }
 
+static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
+{
+       struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
+       struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops,
+                                                 struct io_pgtable, ops);
+       if (!qcom_domain->pgtbl_ops)
+               return;
+
+       pm_runtime_get_sync(qcom_domain->iommu->dev);
+       qcom_iommu_tlb_sync(pgtable->cookie);
+       pm_runtime_put_sync(qcom_domain->iommu->dev);
+}
+
 static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain,
                                           dma_addr_t iova)
 {
@@ -570,6 +591,8 @@ static const struct iommu_ops qcom_iommu_ops = {
        .map            = qcom_iommu_map,
        .unmap          = qcom_iommu_unmap,
        .map_sg         = default_iommu_map_sg,
+       .flush_iotlb_all = qcom_iommu_iotlb_sync,
+       .iotlb_sync     = qcom_iommu_iotlb_sync,
        .iova_to_phys   = qcom_iommu_iova_to_phys,
        .add_device     = qcom_iommu_add_device,
        .remove_device  = qcom_iommu_remove_device,
index 329727e00e97034ce07b1b5074f193575ba444a0..c824329f7012adfc765e4b1872185f5b148fd445 100644 (file)
@@ -39,8 +39,7 @@ void scif_rma_ep_init(struct scif_endpt *ep)
        struct scif_endpt_rma_info *rma = &ep->rma_info;
 
        mutex_init(&rma->rma_lock);
-       init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
-                        SCIF_DMA_64BIT_PFN);
+       init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
        spin_lock_init(&rma->tc_lock);
        mutex_init(&rma->mmn_lock);
        INIT_LIST_HEAD(&rma->reg_list);
index e8ffba1052d3ac63f540ae2d645fa61f11cf91b5..e2433bc50210bb3a2b36aaa293d632b201649d72 100644 (file)
@@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void)
 
 extern int dmar_table_init(void);
 extern int dmar_dev_scope_init(void);
+extern void dmar_register_bus_notifier(void);
 extern int dmar_parse_dev_scope(void *start, void *end, int *cnt,
                                struct dmar_dev_scope **devices, u16 segment);
 extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt);
index 485a5b48f0380460fa0b46243e34aa6c83796c07..f3274d9f46a252a9abc332dc0621ec3fc02ef4aa 100644 (file)
 #define DMA_FSTS_IQE (1 << 4)
 #define DMA_FSTS_ICE (1 << 5)
 #define DMA_FSTS_ITE (1 << 6)
+#define DMA_FSTS_PRO (1 << 7)
 #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
 
 /* FRCD_REG, 32 bits access */
index d179b9bf7814767462dbbf592ca75cb78fa0d9fc..928442dda565f147b501dca93601731a92581b2d 100644 (file)
@@ -70,10 +70,12 @@ struct iova_fq {
 struct iova_domain {
        spinlock_t      iova_rbtree_lock; /* Lock to protect update of rbtree */
        struct rb_root  rbroot;         /* iova domain rbtree root */
-       struct rb_node  *cached32_node; /* Save last alloced node */
+       struct rb_node  *cached_node;   /* Save last alloced node */
+       struct rb_node  *cached32_node; /* Save last 32-bit alloced node */
        unsigned long   granule;        /* pfn granularity for this domain */
        unsigned long   start_pfn;      /* Lower limit for this domain */
        unsigned long   dma_32bit_pfn;
+       struct iova     anchor;         /* rbtree lookup anchor */
        struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];  /* IOVA range caches */
 
        iova_flush_cb   flush_cb;       /* Call-Back function to flush IOMMU
@@ -148,12 +150,12 @@ void queue_iova(struct iova_domain *iovad,
                unsigned long pfn, unsigned long pages,
                unsigned long data);
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
-                             unsigned long limit_pfn);
+                             unsigned long limit_pfn, bool flush_rcache);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
        unsigned long pfn_hi);
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
-       unsigned long start_pfn, unsigned long pfn_32bit);
+       unsigned long start_pfn);
 int init_iova_flush_queue(struct iova_domain *iovad,
                          iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
@@ -210,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad,
 
 static inline unsigned long alloc_iova_fast(struct iova_domain *iovad,
                                            unsigned long size,
-                                           unsigned long limit_pfn)
+                                           unsigned long limit_pfn,
+                                           bool flush_rcache)
 {
        return 0;
 }
@@ -229,8 +232,7 @@ static inline void copy_reserved_iova(struct iova_domain *from,
 
 static inline void init_iova_domain(struct iova_domain *iovad,
                                    unsigned long granule,
-                                   unsigned long start_pfn,
-                                   unsigned long pfn_32bit)
+                                   unsigned long start_pfn)
 {
 }