]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/iommu/arm-smmu.c
iommu/arm-smmu: Ensure that page-table updates are visible before TLBI
[mirror_ubuntu-bionic-kernel.git] / drivers / iommu / arm-smmu.c
1 /*
2 * IOMMU API for ARM architected SMMU implementations.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Copyright (C) 2013 ARM Limited
18 *
19 * Author: Will Deacon <will.deacon@arm.com>
20 *
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
28 */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
60
61 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
62 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
63 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
64
65 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
66 #define TLB_SPIN_COUNT 10
67
68 /* Maximum number of context banks per SMMU */
69 #define ARM_SMMU_MAX_CBS 128
70
71 /* SMMU global address space */
72 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
73 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
74
75 /*
76 * SMMU global address space with conditional offset to access secure
77 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
78 * nsGFSYNR0: 0x450)
79 */
80 #define ARM_SMMU_GR0_NS(smmu) \
81 ((smmu)->base + \
82 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
83 ? 0x400 : 0))
84
85 /*
86 * Some 64-bit registers only make sense to write atomically, but in such
87 * cases all the data relevant to AArch32 formats lies within the lower word,
88 * therefore this actually makes more sense than it might first appear.
89 */
90 #ifdef CONFIG_64BIT
91 #define smmu_write_atomic_lq writeq_relaxed
92 #else
93 #define smmu_write_atomic_lq writel_relaxed
94 #endif
95
96 /* Translation context bank */
97 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
98
99 #define MSI_IOVA_BASE 0x8000000
100 #define MSI_IOVA_LENGTH 0x100000
101
102 static int force_stage;
103 module_param(force_stage, int, S_IRUGO);
104 MODULE_PARM_DESC(force_stage,
105 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
106 static bool disable_bypass;
107 module_param(disable_bypass, bool, S_IRUGO);
108 MODULE_PARM_DESC(disable_bypass,
109 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
110
111 enum arm_smmu_arch_version {
112 ARM_SMMU_V1,
113 ARM_SMMU_V1_64K,
114 ARM_SMMU_V2,
115 };
116
117 enum arm_smmu_implementation {
118 GENERIC_SMMU,
119 ARM_MMU500,
120 CAVIUM_SMMUV2,
121 };
122
123 struct arm_smmu_s2cr {
124 struct iommu_group *group;
125 int count;
126 enum arm_smmu_s2cr_type type;
127 enum arm_smmu_s2cr_privcfg privcfg;
128 u8 cbndx;
129 };
130
131 #define s2cr_init_val (struct arm_smmu_s2cr){ \
132 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
133 }
134
135 struct arm_smmu_smr {
136 u16 mask;
137 u16 id;
138 bool valid;
139 };
140
141 struct arm_smmu_cb {
142 u64 ttbr[2];
143 u32 tcr[2];
144 u32 mair[2];
145 struct arm_smmu_cfg *cfg;
146 };
147
148 struct arm_smmu_master_cfg {
149 struct arm_smmu_device *smmu;
150 s16 smendx[];
151 };
152 #define INVALID_SMENDX -1
153 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
154 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
155 #define fwspec_smendx(fw, i) \
156 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
157 #define for_each_cfg_sme(fw, i, idx) \
158 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
159
160 struct arm_smmu_device {
161 struct device *dev;
162
163 void __iomem *base;
164 void __iomem *cb_base;
165 unsigned long pgshift;
166
167 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
168 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
169 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
170 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
171 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
172 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
173 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
174 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
177 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
179 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
180 u32 features;
181
182 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
183 u32 options;
184 enum arm_smmu_arch_version version;
185 enum arm_smmu_implementation model;
186
187 u32 num_context_banks;
188 u32 num_s2_context_banks;
189 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
190 struct arm_smmu_cb *cbs;
191 atomic_t irptndx;
192
193 u32 num_mapping_groups;
194 u16 streamid_mask;
195 u16 smr_mask_mask;
196 struct arm_smmu_smr *smrs;
197 struct arm_smmu_s2cr *s2crs;
198 struct mutex stream_map_mutex;
199
200 unsigned long va_size;
201 unsigned long ipa_size;
202 unsigned long pa_size;
203 unsigned long pgsize_bitmap;
204
205 u32 num_global_irqs;
206 u32 num_context_irqs;
207 unsigned int *irqs;
208
209 u32 cavium_id_base; /* Specific to Cavium */
210
211 spinlock_t global_sync_lock;
212
213 /* IOMMU core code handle */
214 struct iommu_device iommu;
215 };
216
217 enum arm_smmu_context_fmt {
218 ARM_SMMU_CTX_FMT_NONE,
219 ARM_SMMU_CTX_FMT_AARCH64,
220 ARM_SMMU_CTX_FMT_AARCH32_L,
221 ARM_SMMU_CTX_FMT_AARCH32_S,
222 };
223
224 struct arm_smmu_cfg {
225 u8 cbndx;
226 u8 irptndx;
227 union {
228 u16 asid;
229 u16 vmid;
230 };
231 u32 cbar;
232 enum arm_smmu_context_fmt fmt;
233 };
234 #define INVALID_IRPTNDX 0xff
235
236 enum arm_smmu_domain_stage {
237 ARM_SMMU_DOMAIN_S1 = 0,
238 ARM_SMMU_DOMAIN_S2,
239 ARM_SMMU_DOMAIN_NESTED,
240 ARM_SMMU_DOMAIN_BYPASS,
241 };
242
243 struct arm_smmu_domain {
244 struct arm_smmu_device *smmu;
245 struct io_pgtable_ops *pgtbl_ops;
246 const struct iommu_gather_ops *tlb_ops;
247 struct arm_smmu_cfg cfg;
248 enum arm_smmu_domain_stage stage;
249 bool non_strict;
250 struct mutex init_mutex; /* Protects smmu pointer */
251 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
252 struct iommu_domain domain;
253 };
254
255 struct arm_smmu_option_prop {
256 u32 opt;
257 const char *prop;
258 };
259
260 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
261
262 static bool using_legacy_binding, using_generic_binding;
263
264 static struct arm_smmu_option_prop arm_smmu_options[] = {
265 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
266 { 0, NULL},
267 };
268
269 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
270 {
271 return container_of(dom, struct arm_smmu_domain, domain);
272 }
273
274 static void parse_driver_options(struct arm_smmu_device *smmu)
275 {
276 int i = 0;
277
278 do {
279 if (of_property_read_bool(smmu->dev->of_node,
280 arm_smmu_options[i].prop)) {
281 smmu->options |= arm_smmu_options[i].opt;
282 dev_notice(smmu->dev, "option %s\n",
283 arm_smmu_options[i].prop);
284 }
285 } while (arm_smmu_options[++i].opt);
286 }
287
288 static struct device_node *dev_get_dev_node(struct device *dev)
289 {
290 if (dev_is_pci(dev)) {
291 struct pci_bus *bus = to_pci_dev(dev)->bus;
292
293 while (!pci_is_root_bus(bus))
294 bus = bus->parent;
295 return of_node_get(bus->bridge->parent->of_node);
296 }
297
298 return of_node_get(dev->of_node);
299 }
300
301 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
302 {
303 *((__be32 *)data) = cpu_to_be32(alias);
304 return 0; /* Continue walking */
305 }
306
307 static int __find_legacy_master_phandle(struct device *dev, void *data)
308 {
309 struct of_phandle_iterator *it = *(void **)data;
310 struct device_node *np = it->node;
311 int err;
312
313 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
314 "#stream-id-cells", 0)
315 if (it->node == np) {
316 *(void **)data = dev;
317 return 1;
318 }
319 it->node = np;
320 return err == -ENOENT ? 0 : err;
321 }
322
323 static struct platform_driver arm_smmu_driver;
324 static struct iommu_ops arm_smmu_ops;
325
326 static int arm_smmu_register_legacy_master(struct device *dev,
327 struct arm_smmu_device **smmu)
328 {
329 struct device *smmu_dev;
330 struct device_node *np;
331 struct of_phandle_iterator it;
332 void *data = &it;
333 u32 *sids;
334 __be32 pci_sid;
335 int err;
336
337 np = dev_get_dev_node(dev);
338 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
339 of_node_put(np);
340 return -ENODEV;
341 }
342
343 it.node = np;
344 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
345 __find_legacy_master_phandle);
346 smmu_dev = data;
347 of_node_put(np);
348 if (err == 0)
349 return -ENODEV;
350 if (err < 0)
351 return err;
352
353 if (dev_is_pci(dev)) {
354 /* "mmu-masters" assumes Stream ID == Requester ID */
355 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
356 &pci_sid);
357 it.cur = &pci_sid;
358 it.cur_count = 1;
359 }
360
361 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
362 &arm_smmu_ops);
363 if (err)
364 return err;
365
366 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
367 if (!sids)
368 return -ENOMEM;
369
370 *smmu = dev_get_drvdata(smmu_dev);
371 of_phandle_iterator_args(&it, sids, it.cur_count);
372 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
373 kfree(sids);
374 return err;
375 }
376
377 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
378 {
379 int idx;
380
381 do {
382 idx = find_next_zero_bit(map, end, start);
383 if (idx == end)
384 return -ENOSPC;
385 } while (test_and_set_bit(idx, map));
386
387 return idx;
388 }
389
390 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
391 {
392 clear_bit(idx, map);
393 }
394
395 /* Wait for any pending TLB invalidations to complete */
396 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
397 void __iomem *sync, void __iomem *status)
398 {
399 unsigned int spin_cnt, delay;
400
401 writel_relaxed(0, sync);
402 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
403 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
404 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
405 return;
406 cpu_relax();
407 }
408 udelay(delay);
409 }
410 dev_err_ratelimited(smmu->dev,
411 "TLB sync timed out -- SMMU may be deadlocked\n");
412 }
413
414 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
415 {
416 void __iomem *base = ARM_SMMU_GR0(smmu);
417 unsigned long flags;
418
419 spin_lock_irqsave(&smmu->global_sync_lock, flags);
420 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
421 base + ARM_SMMU_GR0_sTLBGSTATUS);
422 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
423 }
424
425 static void arm_smmu_tlb_sync_context(void *cookie)
426 {
427 struct arm_smmu_domain *smmu_domain = cookie;
428 struct arm_smmu_device *smmu = smmu_domain->smmu;
429 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
430 unsigned long flags;
431
432 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
433 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
434 base + ARM_SMMU_CB_TLBSTATUS);
435 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
436 }
437
438 static void arm_smmu_tlb_sync_vmid(void *cookie)
439 {
440 struct arm_smmu_domain *smmu_domain = cookie;
441
442 arm_smmu_tlb_sync_global(smmu_domain->smmu);
443 }
444
445 static void arm_smmu_tlb_inv_context_s1(void *cookie)
446 {
447 struct arm_smmu_domain *smmu_domain = cookie;
448 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
449 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
450
451 /*
452 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
453 * cleared by the current CPU are visible to the SMMU before the TLBI.
454 */
455 writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
456 arm_smmu_tlb_sync_context(cookie);
457 }
458
459 static void arm_smmu_tlb_inv_context_s2(void *cookie)
460 {
461 struct arm_smmu_domain *smmu_domain = cookie;
462 struct arm_smmu_device *smmu = smmu_domain->smmu;
463 void __iomem *base = ARM_SMMU_GR0(smmu);
464
465 /* NOTE: see above */
466 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
467 arm_smmu_tlb_sync_global(smmu);
468 }
469
470 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
471 size_t granule, bool leaf, void *cookie)
472 {
473 struct arm_smmu_domain *smmu_domain = cookie;
474 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
475 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
476 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
477
478 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
479 wmb();
480
481 if (stage1) {
482 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
483
484 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
485 iova &= ~12UL;
486 iova |= cfg->asid;
487 do {
488 writel_relaxed(iova, reg);
489 iova += granule;
490 } while (size -= granule);
491 } else {
492 iova >>= 12;
493 iova |= (u64)cfg->asid << 48;
494 do {
495 writeq_relaxed(iova, reg);
496 iova += granule >> 12;
497 } while (size -= granule);
498 }
499 } else {
500 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
501 ARM_SMMU_CB_S2_TLBIIPAS2;
502 iova >>= 12;
503 do {
504 smmu_write_atomic_lq(iova, reg);
505 iova += granule >> 12;
506 } while (size -= granule);
507 }
508 }
509
510 /*
511 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
512 * almost negligible, but the benefit of getting the first one in as far ahead
513 * of the sync as possible is significant, hence we don't just make this a
514 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
515 */
516 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
517 size_t granule, bool leaf, void *cookie)
518 {
519 struct arm_smmu_domain *smmu_domain = cookie;
520 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
521
522 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
523 wmb();
524
525 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
526 }
527
528 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
529 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
530 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
531 .tlb_sync = arm_smmu_tlb_sync_context,
532 };
533
534 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
535 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
536 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
537 .tlb_sync = arm_smmu_tlb_sync_context,
538 };
539
540 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
541 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
542 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
543 .tlb_sync = arm_smmu_tlb_sync_vmid,
544 };
545
546 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
547 {
548 u32 fsr, fsynr;
549 unsigned long iova;
550 struct iommu_domain *domain = dev;
551 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
552 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
553 struct arm_smmu_device *smmu = smmu_domain->smmu;
554 void __iomem *cb_base;
555
556 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
557 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
558
559 if (!(fsr & FSR_FAULT))
560 return IRQ_NONE;
561
562 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
563 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
564
565 dev_err_ratelimited(smmu->dev,
566 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
567 fsr, iova, fsynr, cfg->cbndx);
568
569 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
570 return IRQ_HANDLED;
571 }
572
573 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
574 {
575 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
576 struct arm_smmu_device *smmu = dev;
577 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
578
579 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
580 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
581 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
582 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
583
584 if (!gfsr)
585 return IRQ_NONE;
586
587 dev_err_ratelimited(smmu->dev,
588 "Unexpected global fault, this could be serious\n");
589 dev_err_ratelimited(smmu->dev,
590 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
591 gfsr, gfsynr0, gfsynr1, gfsynr2);
592
593 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
594 return IRQ_HANDLED;
595 }
596
597 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
598 struct io_pgtable_cfg *pgtbl_cfg)
599 {
600 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
601 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
602 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
603
604 cb->cfg = cfg;
605
606 /* TTBCR */
607 if (stage1) {
608 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
609 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
610 } else {
611 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
612 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
613 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
614 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
615 cb->tcr[1] |= TTBCR2_AS;
616 }
617 } else {
618 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
619 }
620
621 /* TTBRs */
622 if (stage1) {
623 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
624 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
625 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
626 } else {
627 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
628 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
629 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
630 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
631 }
632 } else {
633 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
634 }
635
636 /* MAIRs (stage-1 only) */
637 if (stage1) {
638 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
639 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
640 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
641 } else {
642 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
643 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
644 }
645 }
646 }
647
648 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
649 {
650 u32 reg;
651 bool stage1;
652 struct arm_smmu_cb *cb = &smmu->cbs[idx];
653 struct arm_smmu_cfg *cfg = cb->cfg;
654 void __iomem *cb_base, *gr1_base;
655
656 cb_base = ARM_SMMU_CB(smmu, idx);
657
658 /* Unassigned context banks only need disabling */
659 if (!cfg) {
660 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
661 return;
662 }
663
664 gr1_base = ARM_SMMU_GR1(smmu);
665 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
666
667 /* CBA2R */
668 if (smmu->version > ARM_SMMU_V1) {
669 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
670 reg = CBA2R_RW64_64BIT;
671 else
672 reg = CBA2R_RW64_32BIT;
673 /* 16-bit VMIDs live in CBA2R */
674 if (smmu->features & ARM_SMMU_FEAT_VMID16)
675 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
676
677 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
678 }
679
680 /* CBAR */
681 reg = cfg->cbar;
682 if (smmu->version < ARM_SMMU_V2)
683 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
684
685 /*
686 * Use the weakest shareability/memory types, so they are
687 * overridden by the ttbcr/pte.
688 */
689 if (stage1) {
690 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
691 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
692 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
693 /* 8-bit VMIDs live in CBAR */
694 reg |= cfg->vmid << CBAR_VMID_SHIFT;
695 }
696 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
697
698 /*
699 * TTBCR
700 * We must write this before the TTBRs, since it determines the
701 * access behaviour of some fields (in particular, ASID[15:8]).
702 */
703 if (stage1 && smmu->version > ARM_SMMU_V1)
704 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
705 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
706
707 /* TTBRs */
708 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
709 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
710 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
711 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
712 } else {
713 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
714 if (stage1)
715 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
716 }
717
718 /* MAIRs (stage-1 only) */
719 if (stage1) {
720 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
721 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
722 }
723
724 /* SCTLR */
725 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
726 if (stage1)
727 reg |= SCTLR_S1_ASIDPNE;
728 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
729 reg |= SCTLR_E;
730
731 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
732 }
733
734 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
735 struct arm_smmu_device *smmu)
736 {
737 int irq, start, ret = 0;
738 unsigned long ias, oas;
739 struct io_pgtable_ops *pgtbl_ops;
740 struct io_pgtable_cfg pgtbl_cfg;
741 enum io_pgtable_fmt fmt;
742 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
743 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
744
745 mutex_lock(&smmu_domain->init_mutex);
746 if (smmu_domain->smmu)
747 goto out_unlock;
748
749 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
750 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
751 smmu_domain->smmu = smmu;
752 goto out_unlock;
753 }
754
755 /*
756 * Mapping the requested stage onto what we support is surprisingly
757 * complicated, mainly because the spec allows S1+S2 SMMUs without
758 * support for nested translation. That means we end up with the
759 * following table:
760 *
761 * Requested Supported Actual
762 * S1 N S1
763 * S1 S1+S2 S1
764 * S1 S2 S2
765 * S1 S1 S1
766 * N N N
767 * N S1+S2 S2
768 * N S2 S2
769 * N S1 S1
770 *
771 * Note that you can't actually request stage-2 mappings.
772 */
773 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
774 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
775 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
776 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
777
778 /*
779 * Choosing a suitable context format is even more fiddly. Until we
780 * grow some way for the caller to express a preference, and/or move
781 * the decision into the io-pgtable code where it arguably belongs,
782 * just aim for the closest thing to the rest of the system, and hope
783 * that the hardware isn't esoteric enough that we can't assume AArch64
784 * support to be a superset of AArch32 support...
785 */
786 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
787 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
788 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
789 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
790 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
791 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
792 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
793 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
794 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
795 ARM_SMMU_FEAT_FMT_AARCH64_16K |
796 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
797 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
798
799 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
800 ret = -EINVAL;
801 goto out_unlock;
802 }
803
804 switch (smmu_domain->stage) {
805 case ARM_SMMU_DOMAIN_S1:
806 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
807 start = smmu->num_s2_context_banks;
808 ias = smmu->va_size;
809 oas = smmu->ipa_size;
810 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
811 fmt = ARM_64_LPAE_S1;
812 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
813 fmt = ARM_32_LPAE_S1;
814 ias = min(ias, 32UL);
815 oas = min(oas, 40UL);
816 } else {
817 fmt = ARM_V7S;
818 ias = min(ias, 32UL);
819 oas = min(oas, 32UL);
820 }
821 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
822 break;
823 case ARM_SMMU_DOMAIN_NESTED:
824 /*
825 * We will likely want to change this if/when KVM gets
826 * involved.
827 */
828 case ARM_SMMU_DOMAIN_S2:
829 cfg->cbar = CBAR_TYPE_S2_TRANS;
830 start = 0;
831 ias = smmu->ipa_size;
832 oas = smmu->pa_size;
833 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
834 fmt = ARM_64_LPAE_S2;
835 } else {
836 fmt = ARM_32_LPAE_S2;
837 ias = min(ias, 40UL);
838 oas = min(oas, 40UL);
839 }
840 if (smmu->version == ARM_SMMU_V2)
841 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
842 else
843 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
844 break;
845 default:
846 ret = -EINVAL;
847 goto out_unlock;
848 }
849 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
850 smmu->num_context_banks);
851 if (ret < 0)
852 goto out_unlock;
853
854 cfg->cbndx = ret;
855 if (smmu->version < ARM_SMMU_V2) {
856 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
857 cfg->irptndx %= smmu->num_context_irqs;
858 } else {
859 cfg->irptndx = cfg->cbndx;
860 }
861
862 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
863 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
864 else
865 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
866
867 pgtbl_cfg = (struct io_pgtable_cfg) {
868 .pgsize_bitmap = smmu->pgsize_bitmap,
869 .ias = ias,
870 .oas = oas,
871 .tlb = smmu_domain->tlb_ops,
872 .iommu_dev = smmu->dev,
873 };
874
875 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
876 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
877
878 if (smmu_domain->non_strict)
879 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
880
881 smmu_domain->smmu = smmu;
882 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
883 if (!pgtbl_ops) {
884 ret = -ENOMEM;
885 goto out_clear_smmu;
886 }
887
888 /* Update the domain's page sizes to reflect the page table format */
889 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
890 domain->geometry.aperture_end = (1UL << ias) - 1;
891 domain->geometry.force_aperture = true;
892
893 /* Initialise the context bank with our page table cfg */
894 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
895 arm_smmu_write_context_bank(smmu, cfg->cbndx);
896
897 /*
898 * Request context fault interrupt. Do this last to avoid the
899 * handler seeing a half-initialised domain state.
900 */
901 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
902 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
903 IRQF_SHARED, "arm-smmu-context-fault", domain);
904 if (ret < 0) {
905 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
906 cfg->irptndx, irq);
907 cfg->irptndx = INVALID_IRPTNDX;
908 }
909
910 mutex_unlock(&smmu_domain->init_mutex);
911
912 /* Publish page table ops for map/unmap */
913 smmu_domain->pgtbl_ops = pgtbl_ops;
914 return 0;
915
916 out_clear_smmu:
917 smmu_domain->smmu = NULL;
918 out_unlock:
919 mutex_unlock(&smmu_domain->init_mutex);
920 return ret;
921 }
922
923 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
924 {
925 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
926 struct arm_smmu_device *smmu = smmu_domain->smmu;
927 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
928 int irq;
929
930 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
931 return;
932
933 /*
934 * Disable the context bank and free the page tables before freeing
935 * it.
936 */
937 smmu->cbs[cfg->cbndx].cfg = NULL;
938 arm_smmu_write_context_bank(smmu, cfg->cbndx);
939
940 if (cfg->irptndx != INVALID_IRPTNDX) {
941 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
942 devm_free_irq(smmu->dev, irq, domain);
943 }
944
945 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
946 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
947 }
948
949 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
950 {
951 struct arm_smmu_domain *smmu_domain;
952
953 if (type != IOMMU_DOMAIN_UNMANAGED &&
954 type != IOMMU_DOMAIN_DMA &&
955 type != IOMMU_DOMAIN_IDENTITY)
956 return NULL;
957 /*
958 * Allocate the domain and initialise some of its data structures.
959 * We can't really do anything meaningful until we've added a
960 * master.
961 */
962 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
963 if (!smmu_domain)
964 return NULL;
965
966 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
967 iommu_get_dma_cookie(&smmu_domain->domain))) {
968 kfree(smmu_domain);
969 return NULL;
970 }
971
972 mutex_init(&smmu_domain->init_mutex);
973 spin_lock_init(&smmu_domain->cb_lock);
974
975 return &smmu_domain->domain;
976 }
977
978 static void arm_smmu_domain_free(struct iommu_domain *domain)
979 {
980 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
981
982 /*
983 * Free the domain resources. We assume that all devices have
984 * already been detached.
985 */
986 iommu_put_dma_cookie(domain);
987 arm_smmu_destroy_domain_context(domain);
988 kfree(smmu_domain);
989 }
990
991 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
992 {
993 struct arm_smmu_smr *smr = smmu->smrs + idx;
994 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
995
996 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
997 reg |= SMR_VALID;
998 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
999 }
1000
1001 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1002 {
1003 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1004 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1005 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1006 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1007
1008 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1009 smmu->smrs[idx].valid)
1010 reg |= S2CR_EXIDVALID;
1011 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1012 }
1013
1014 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1015 {
1016 arm_smmu_write_s2cr(smmu, idx);
1017 if (smmu->smrs)
1018 arm_smmu_write_smr(smmu, idx);
1019 }
1020
1021 /*
1022 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1023 * should be called after sCR0 is written.
1024 */
1025 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1026 {
1027 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1028 u32 smr;
1029
1030 if (!smmu->smrs)
1031 return;
1032
1033 /*
1034 * SMR.ID bits may not be preserved if the corresponding MASK
1035 * bits are set, so check each one separately. We can reject
1036 * masters later if they try to claim IDs outside these masks.
1037 */
1038 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1039 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1040 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1041 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1042
1043 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1044 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1045 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1046 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1047 }
1048
1049 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1050 {
1051 struct arm_smmu_smr *smrs = smmu->smrs;
1052 int i, free_idx = -ENOSPC;
1053
1054 /* Stream indexing is blissfully easy */
1055 if (!smrs)
1056 return id;
1057
1058 /* Validating SMRs is... less so */
1059 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1060 if (!smrs[i].valid) {
1061 /*
1062 * Note the first free entry we come across, which
1063 * we'll claim in the end if nothing else matches.
1064 */
1065 if (free_idx < 0)
1066 free_idx = i;
1067 continue;
1068 }
1069 /*
1070 * If the new entry is _entirely_ matched by an existing entry,
1071 * then reuse that, with the guarantee that there also cannot
1072 * be any subsequent conflicting entries. In normal use we'd
1073 * expect simply identical entries for this case, but there's
1074 * no harm in accommodating the generalisation.
1075 */
1076 if ((mask & smrs[i].mask) == mask &&
1077 !((id ^ smrs[i].id) & ~smrs[i].mask))
1078 return i;
1079 /*
1080 * If the new entry has any other overlap with an existing one,
1081 * though, then there always exists at least one stream ID
1082 * which would cause a conflict, and we can't allow that risk.
1083 */
1084 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1085 return -EINVAL;
1086 }
1087
1088 return free_idx;
1089 }
1090
1091 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1092 {
1093 if (--smmu->s2crs[idx].count)
1094 return false;
1095
1096 smmu->s2crs[idx] = s2cr_init_val;
1097 if (smmu->smrs)
1098 smmu->smrs[idx].valid = false;
1099
1100 return true;
1101 }
1102
1103 static int arm_smmu_master_alloc_smes(struct device *dev)
1104 {
1105 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1106 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1107 struct arm_smmu_device *smmu = cfg->smmu;
1108 struct arm_smmu_smr *smrs = smmu->smrs;
1109 struct iommu_group *group;
1110 int i, idx, ret;
1111
1112 mutex_lock(&smmu->stream_map_mutex);
1113 /* Figure out a viable stream map entry allocation */
1114 for_each_cfg_sme(fwspec, i, idx) {
1115 u16 sid = fwspec->ids[i];
1116 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1117
1118 if (idx != INVALID_SMENDX) {
1119 ret = -EEXIST;
1120 goto out_err;
1121 }
1122
1123 ret = arm_smmu_find_sme(smmu, sid, mask);
1124 if (ret < 0)
1125 goto out_err;
1126
1127 idx = ret;
1128 if (smrs && smmu->s2crs[idx].count == 0) {
1129 smrs[idx].id = sid;
1130 smrs[idx].mask = mask;
1131 smrs[idx].valid = true;
1132 }
1133 smmu->s2crs[idx].count++;
1134 cfg->smendx[i] = (s16)idx;
1135 }
1136
1137 group = iommu_group_get_for_dev(dev);
1138 if (!group)
1139 group = ERR_PTR(-ENOMEM);
1140 if (IS_ERR(group)) {
1141 ret = PTR_ERR(group);
1142 goto out_err;
1143 }
1144 iommu_group_put(group);
1145
1146 /* It worked! Now, poke the actual hardware */
1147 for_each_cfg_sme(fwspec, i, idx) {
1148 arm_smmu_write_sme(smmu, idx);
1149 smmu->s2crs[idx].group = group;
1150 }
1151
1152 mutex_unlock(&smmu->stream_map_mutex);
1153 return 0;
1154
1155 out_err:
1156 while (i--) {
1157 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1158 cfg->smendx[i] = INVALID_SMENDX;
1159 }
1160 mutex_unlock(&smmu->stream_map_mutex);
1161 return ret;
1162 }
1163
1164 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1165 {
1166 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1167 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1168 int i, idx;
1169
1170 mutex_lock(&smmu->stream_map_mutex);
1171 for_each_cfg_sme(fwspec, i, idx) {
1172 if (arm_smmu_free_sme(smmu, idx))
1173 arm_smmu_write_sme(smmu, idx);
1174 cfg->smendx[i] = INVALID_SMENDX;
1175 }
1176 mutex_unlock(&smmu->stream_map_mutex);
1177 }
1178
1179 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1180 struct iommu_fwspec *fwspec)
1181 {
1182 struct arm_smmu_device *smmu = smmu_domain->smmu;
1183 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1184 u8 cbndx = smmu_domain->cfg.cbndx;
1185 enum arm_smmu_s2cr_type type;
1186 int i, idx;
1187
1188 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1189 type = S2CR_TYPE_BYPASS;
1190 else
1191 type = S2CR_TYPE_TRANS;
1192
1193 for_each_cfg_sme(fwspec, i, idx) {
1194 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1195 continue;
1196
1197 s2cr[idx].type = type;
1198 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1199 s2cr[idx].cbndx = cbndx;
1200 arm_smmu_write_s2cr(smmu, idx);
1201 }
1202 return 0;
1203 }
1204
1205 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1206 {
1207 int ret;
1208 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1209 struct arm_smmu_device *smmu;
1210 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1211
1212 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1213 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1214 return -ENXIO;
1215 }
1216
1217 /*
1218 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1219 * domains between of_xlate() and add_device() - we have no way to cope
1220 * with that, so until ARM gets converted to rely on groups and default
1221 * domains, just say no (but more politely than by dereferencing NULL).
1222 * This should be at least a WARN_ON once that's sorted.
1223 */
1224 if (!fwspec->iommu_priv)
1225 return -ENODEV;
1226
1227 smmu = fwspec_smmu(fwspec);
1228 /* Ensure that the domain is finalised */
1229 ret = arm_smmu_init_domain_context(domain, smmu);
1230 if (ret < 0)
1231 return ret;
1232
1233 /*
1234 * Sanity check the domain. We don't support domains across
1235 * different SMMUs.
1236 */
1237 if (smmu_domain->smmu != smmu) {
1238 dev_err(dev,
1239 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1240 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1241 return -EINVAL;
1242 }
1243
1244 /* Looks ok, so add the device to the domain */
1245 return arm_smmu_domain_add_master(smmu_domain, fwspec);
1246 }
1247
1248 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1249 phys_addr_t paddr, size_t size, int prot)
1250 {
1251 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1252
1253 if (!ops)
1254 return -ENODEV;
1255
1256 return ops->map(ops, iova, paddr, size, prot);
1257 }
1258
1259 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1260 size_t size)
1261 {
1262 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1263
1264 if (!ops)
1265 return 0;
1266
1267 return ops->unmap(ops, iova, size);
1268 }
1269
1270 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1271 {
1272 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1273
1274 if (smmu_domain->tlb_ops)
1275 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1276 }
1277
1278 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1279 {
1280 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1281
1282 if (smmu_domain->tlb_ops)
1283 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1284 }
1285
1286 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1287 dma_addr_t iova)
1288 {
1289 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1290 struct arm_smmu_device *smmu = smmu_domain->smmu;
1291 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1292 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1293 struct device *dev = smmu->dev;
1294 void __iomem *cb_base;
1295 u32 tmp;
1296 u64 phys;
1297 unsigned long va, flags;
1298
1299 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1300
1301 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1302 /* ATS1 registers can only be written atomically */
1303 va = iova & ~0xfffUL;
1304 if (smmu->version == ARM_SMMU_V2)
1305 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1306 else /* Register is only 32-bit in v1 */
1307 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1308
1309 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1310 !(tmp & ATSR_ACTIVE), 5, 50)) {
1311 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1312 dev_err(dev,
1313 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1314 &iova);
1315 return ops->iova_to_phys(ops, iova);
1316 }
1317
1318 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1319 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1320 if (phys & CB_PAR_F) {
1321 dev_err(dev, "translation fault!\n");
1322 dev_err(dev, "PAR = 0x%llx\n", phys);
1323 return 0;
1324 }
1325
1326 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1327 }
1328
1329 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1330 dma_addr_t iova)
1331 {
1332 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1333 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1334
1335 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1336 return iova;
1337
1338 if (!ops)
1339 return 0;
1340
1341 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1342 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1343 return arm_smmu_iova_to_phys_hard(domain, iova);
1344
1345 return ops->iova_to_phys(ops, iova);
1346 }
1347
1348 static bool arm_smmu_capable(enum iommu_cap cap)
1349 {
1350 switch (cap) {
1351 case IOMMU_CAP_CACHE_COHERENCY:
1352 /*
1353 * Return true here as the SMMU can always send out coherent
1354 * requests.
1355 */
1356 return true;
1357 case IOMMU_CAP_NOEXEC:
1358 return true;
1359 default:
1360 return false;
1361 }
1362 }
1363
1364 static int arm_smmu_match_node(struct device *dev, void *data)
1365 {
1366 return dev->fwnode == data;
1367 }
1368
1369 static
1370 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1371 {
1372 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1373 fwnode, arm_smmu_match_node);
1374 put_device(dev);
1375 return dev ? dev_get_drvdata(dev) : NULL;
1376 }
1377
1378 static int arm_smmu_add_device(struct device *dev)
1379 {
1380 struct arm_smmu_device *smmu;
1381 struct arm_smmu_master_cfg *cfg;
1382 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1383 int i, ret;
1384
1385 if (using_legacy_binding) {
1386 ret = arm_smmu_register_legacy_master(dev, &smmu);
1387
1388 /*
1389 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1390 * will allocate/initialise a new one. Thus we need to update fwspec for
1391 * later use.
1392 */
1393 fwspec = dev->iommu_fwspec;
1394 if (ret)
1395 goto out_free;
1396 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1397 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1398 } else {
1399 return -ENODEV;
1400 }
1401
1402 ret = -EINVAL;
1403 for (i = 0; i < fwspec->num_ids; i++) {
1404 u16 sid = fwspec->ids[i];
1405 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1406
1407 if (sid & ~smmu->streamid_mask) {
1408 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1409 sid, smmu->streamid_mask);
1410 goto out_free;
1411 }
1412 if (mask & ~smmu->smr_mask_mask) {
1413 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1414 mask, smmu->smr_mask_mask);
1415 goto out_free;
1416 }
1417 }
1418
1419 ret = -ENOMEM;
1420 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1421 GFP_KERNEL);
1422 if (!cfg)
1423 goto out_free;
1424
1425 cfg->smmu = smmu;
1426 fwspec->iommu_priv = cfg;
1427 while (i--)
1428 cfg->smendx[i] = INVALID_SMENDX;
1429
1430 ret = arm_smmu_master_alloc_smes(dev);
1431 if (ret)
1432 goto out_cfg_free;
1433
1434 iommu_device_link(&smmu->iommu, dev);
1435
1436 return 0;
1437
1438 out_cfg_free:
1439 kfree(cfg);
1440 out_free:
1441 iommu_fwspec_free(dev);
1442 return ret;
1443 }
1444
1445 static void arm_smmu_remove_device(struct device *dev)
1446 {
1447 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1448 struct arm_smmu_master_cfg *cfg;
1449 struct arm_smmu_device *smmu;
1450
1451
1452 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1453 return;
1454
1455 cfg = fwspec->iommu_priv;
1456 smmu = cfg->smmu;
1457
1458 iommu_device_unlink(&smmu->iommu, dev);
1459 arm_smmu_master_free_smes(fwspec);
1460 iommu_group_remove_device(dev);
1461 kfree(fwspec->iommu_priv);
1462 iommu_fwspec_free(dev);
1463 }
1464
1465 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1466 {
1467 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1468 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1469 struct iommu_group *group = NULL;
1470 int i, idx;
1471
1472 for_each_cfg_sme(fwspec, i, idx) {
1473 if (group && smmu->s2crs[idx].group &&
1474 group != smmu->s2crs[idx].group)
1475 return ERR_PTR(-EINVAL);
1476
1477 group = smmu->s2crs[idx].group;
1478 }
1479
1480 if (group)
1481 return iommu_group_ref_get(group);
1482
1483 if (dev_is_pci(dev))
1484 group = pci_device_group(dev);
1485 else
1486 group = generic_device_group(dev);
1487
1488 return group;
1489 }
1490
1491 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1492 enum iommu_attr attr, void *data)
1493 {
1494 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1495
1496 switch(domain->type) {
1497 case IOMMU_DOMAIN_UNMANAGED:
1498 switch (attr) {
1499 case DOMAIN_ATTR_NESTING:
1500 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1501 return 0;
1502 default:
1503 return -ENODEV;
1504 }
1505 break;
1506 case IOMMU_DOMAIN_DMA:
1507 switch (attr) {
1508 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1509 *(int *)data = smmu_domain->non_strict;
1510 return 0;
1511 default:
1512 return -ENODEV;
1513 }
1514 break;
1515 default:
1516 return -EINVAL;
1517 }
1518 }
1519
1520 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1521 enum iommu_attr attr, void *data)
1522 {
1523 int ret = 0;
1524 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1525
1526 mutex_lock(&smmu_domain->init_mutex);
1527
1528 switch(domain->type) {
1529 case IOMMU_DOMAIN_UNMANAGED:
1530 switch (attr) {
1531 case DOMAIN_ATTR_NESTING:
1532 if (smmu_domain->smmu) {
1533 ret = -EPERM;
1534 goto out_unlock;
1535 }
1536
1537 if (*(int *)data)
1538 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1539 else
1540 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1541 break;
1542 default:
1543 ret = -ENODEV;
1544 }
1545 break;
1546 case IOMMU_DOMAIN_DMA:
1547 switch (attr) {
1548 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1549 smmu_domain->non_strict = *(int *)data;
1550 break;
1551 default:
1552 ret = -ENODEV;
1553 }
1554 break;
1555 default:
1556 ret = -EINVAL;
1557 }
1558 out_unlock:
1559 mutex_unlock(&smmu_domain->init_mutex);
1560 return ret;
1561 }
1562
1563 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1564 {
1565 u32 mask, fwid = 0;
1566
1567 if (args->args_count > 0)
1568 fwid |= (u16)args->args[0];
1569
1570 if (args->args_count > 1)
1571 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1572 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1573 fwid |= (u16)mask << SMR_MASK_SHIFT;
1574
1575 return iommu_fwspec_add_ids(dev, &fwid, 1);
1576 }
1577
1578 static void arm_smmu_get_resv_regions(struct device *dev,
1579 struct list_head *head)
1580 {
1581 struct iommu_resv_region *region;
1582 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1583
1584 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1585 prot, IOMMU_RESV_SW_MSI);
1586 if (!region)
1587 return;
1588
1589 list_add_tail(&region->list, head);
1590
1591 iommu_dma_get_resv_regions(dev, head);
1592 }
1593
1594 static void arm_smmu_put_resv_regions(struct device *dev,
1595 struct list_head *head)
1596 {
1597 struct iommu_resv_region *entry, *next;
1598
1599 list_for_each_entry_safe(entry, next, head, list)
1600 kfree(entry);
1601 }
1602
1603 static struct iommu_ops arm_smmu_ops = {
1604 .capable = arm_smmu_capable,
1605 .domain_alloc = arm_smmu_domain_alloc,
1606 .domain_free = arm_smmu_domain_free,
1607 .attach_dev = arm_smmu_attach_dev,
1608 .map = arm_smmu_map,
1609 .unmap = arm_smmu_unmap,
1610 .map_sg = default_iommu_map_sg,
1611 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1612 .iotlb_sync = arm_smmu_iotlb_sync,
1613 .iova_to_phys = arm_smmu_iova_to_phys,
1614 .add_device = arm_smmu_add_device,
1615 .remove_device = arm_smmu_remove_device,
1616 .device_group = arm_smmu_device_group,
1617 .domain_get_attr = arm_smmu_domain_get_attr,
1618 .domain_set_attr = arm_smmu_domain_set_attr,
1619 .of_xlate = arm_smmu_of_xlate,
1620 .get_resv_regions = arm_smmu_get_resv_regions,
1621 .put_resv_regions = arm_smmu_put_resv_regions,
1622 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1623 };
1624
1625 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1626 {
1627 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1628 int i;
1629 u32 reg, major;
1630
1631 /* clear global FSR */
1632 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1633 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1634
1635 /*
1636 * Reset stream mapping groups: Initial values mark all SMRn as
1637 * invalid and all S2CRn as bypass unless overridden.
1638 */
1639 for (i = 0; i < smmu->num_mapping_groups; ++i)
1640 arm_smmu_write_sme(smmu, i);
1641
1642 if (smmu->model == ARM_MMU500) {
1643 /*
1644 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1645 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1646 * bit is only present in MMU-500r2 onwards.
1647 */
1648 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1649 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1650 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1651 if (major >= 2)
1652 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1653 /*
1654 * Allow unmatched Stream IDs to allocate bypass
1655 * TLB entries for reduced latency.
1656 */
1657 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1658 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1659 }
1660
1661 /* Make sure all context banks are disabled and clear CB_FSR */
1662 for (i = 0; i < smmu->num_context_banks; ++i) {
1663 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1664
1665 arm_smmu_write_context_bank(smmu, i);
1666 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1667 /*
1668 * Disable MMU-500's not-particularly-beneficial next-page
1669 * prefetcher for the sake of errata #841119 and #826419.
1670 */
1671 if (smmu->model == ARM_MMU500) {
1672 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1673 reg &= ~ARM_MMU500_ACTLR_CPRE;
1674 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1675 }
1676 }
1677
1678 /* Invalidate the TLB, just in case */
1679 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1680 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1681
1682 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1683
1684 /* Enable fault reporting */
1685 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1686
1687 /* Disable TLB broadcasting. */
1688 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1689
1690 /* Enable client access, handling unmatched streams as appropriate */
1691 reg &= ~sCR0_CLIENTPD;
1692 if (disable_bypass)
1693 reg |= sCR0_USFCFG;
1694 else
1695 reg &= ~sCR0_USFCFG;
1696
1697 /* Disable forced broadcasting */
1698 reg &= ~sCR0_FB;
1699
1700 /* Don't upgrade barriers */
1701 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1702
1703 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1704 reg |= sCR0_VMID16EN;
1705
1706 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1707 reg |= sCR0_EXIDENABLE;
1708
1709 /* Push the button */
1710 arm_smmu_tlb_sync_global(smmu);
1711 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1712 }
1713
1714 static int arm_smmu_id_size_to_bits(int size)
1715 {
1716 switch (size) {
1717 case 0:
1718 return 32;
1719 case 1:
1720 return 36;
1721 case 2:
1722 return 40;
1723 case 3:
1724 return 42;
1725 case 4:
1726 return 44;
1727 case 5:
1728 default:
1729 return 48;
1730 }
1731 }
1732
1733 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1734 {
1735 unsigned long size;
1736 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1737 u32 id;
1738 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1739 int i;
1740
1741 dev_notice(smmu->dev, "probing hardware configuration...\n");
1742 dev_notice(smmu->dev, "SMMUv%d with:\n",
1743 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1744
1745 /* ID0 */
1746 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1747
1748 /* Restrict available stages based on module parameter */
1749 if (force_stage == 1)
1750 id &= ~(ID0_S2TS | ID0_NTS);
1751 else if (force_stage == 2)
1752 id &= ~(ID0_S1TS | ID0_NTS);
1753
1754 if (id & ID0_S1TS) {
1755 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1756 dev_notice(smmu->dev, "\tstage 1 translation\n");
1757 }
1758
1759 if (id & ID0_S2TS) {
1760 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1761 dev_notice(smmu->dev, "\tstage 2 translation\n");
1762 }
1763
1764 if (id & ID0_NTS) {
1765 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1766 dev_notice(smmu->dev, "\tnested translation\n");
1767 }
1768
1769 if (!(smmu->features &
1770 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1771 dev_err(smmu->dev, "\tno translation support!\n");
1772 return -ENODEV;
1773 }
1774
1775 if ((id & ID0_S1TS) &&
1776 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1777 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1778 dev_notice(smmu->dev, "\taddress translation ops\n");
1779 }
1780
1781 /*
1782 * In order for DMA API calls to work properly, we must defer to what
1783 * the FW says about coherency, regardless of what the hardware claims.
1784 * Fortunately, this also opens up a workaround for systems where the
1785 * ID register value has ended up configured incorrectly.
1786 */
1787 cttw_reg = !!(id & ID0_CTTW);
1788 if (cttw_fw || cttw_reg)
1789 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1790 cttw_fw ? "" : "non-");
1791 if (cttw_fw != cttw_reg)
1792 dev_notice(smmu->dev,
1793 "\t(IDR0.CTTW overridden by FW configuration)\n");
1794
1795 /* Max. number of entries we have for stream matching/indexing */
1796 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1797 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1798 size = 1 << 16;
1799 } else {
1800 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1801 }
1802 smmu->streamid_mask = size - 1;
1803 if (id & ID0_SMS) {
1804 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1805 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1806 if (size == 0) {
1807 dev_err(smmu->dev,
1808 "stream-matching supported, but no SMRs present!\n");
1809 return -ENODEV;
1810 }
1811
1812 /* Zero-initialised to mark as invalid */
1813 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1814 GFP_KERNEL);
1815 if (!smmu->smrs)
1816 return -ENOMEM;
1817
1818 dev_notice(smmu->dev,
1819 "\tstream matching with %lu register groups", size);
1820 }
1821 /* s2cr->type == 0 means translation, so initialise explicitly */
1822 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1823 GFP_KERNEL);
1824 if (!smmu->s2crs)
1825 return -ENOMEM;
1826 for (i = 0; i < size; i++)
1827 smmu->s2crs[i] = s2cr_init_val;
1828
1829 smmu->num_mapping_groups = size;
1830 mutex_init(&smmu->stream_map_mutex);
1831 spin_lock_init(&smmu->global_sync_lock);
1832
1833 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1834 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1835 if (!(id & ID0_PTFS_NO_AARCH32S))
1836 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1837 }
1838
1839 /* ID1 */
1840 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1841 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1842
1843 /* Check for size mismatch of SMMU address space from mapped region */
1844 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1845 size <<= smmu->pgshift;
1846 if (smmu->cb_base != gr0_base + size)
1847 dev_warn(smmu->dev,
1848 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1849 size * 2, (smmu->cb_base - gr0_base) * 2);
1850
1851 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1852 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1853 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1854 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1855 return -ENODEV;
1856 }
1857 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1858 smmu->num_context_banks, smmu->num_s2_context_banks);
1859 /*
1860 * Cavium CN88xx erratum #27704.
1861 * Ensure ASID and VMID allocation is unique across all SMMUs in
1862 * the system.
1863 */
1864 if (smmu->model == CAVIUM_SMMUV2) {
1865 smmu->cavium_id_base =
1866 atomic_add_return(smmu->num_context_banks,
1867 &cavium_smmu_context_count);
1868 smmu->cavium_id_base -= smmu->num_context_banks;
1869 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1870 }
1871 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1872 sizeof(*smmu->cbs), GFP_KERNEL);
1873 if (!smmu->cbs)
1874 return -ENOMEM;
1875
1876 /* ID2 */
1877 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1878 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1879 smmu->ipa_size = size;
1880
1881 /* The output mask is also applied for bypass */
1882 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1883 smmu->pa_size = size;
1884
1885 if (id & ID2_VMID16)
1886 smmu->features |= ARM_SMMU_FEAT_VMID16;
1887
1888 /*
1889 * What the page table walker can address actually depends on which
1890 * descriptor format is in use, but since a) we don't know that yet,
1891 * and b) it can vary per context bank, this will have to do...
1892 */
1893 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1894 dev_warn(smmu->dev,
1895 "failed to set DMA mask for table walker\n");
1896
1897 if (smmu->version < ARM_SMMU_V2) {
1898 smmu->va_size = smmu->ipa_size;
1899 if (smmu->version == ARM_SMMU_V1_64K)
1900 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1901 } else {
1902 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1903 smmu->va_size = arm_smmu_id_size_to_bits(size);
1904 if (id & ID2_PTFS_4K)
1905 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1906 if (id & ID2_PTFS_16K)
1907 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1908 if (id & ID2_PTFS_64K)
1909 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1910 }
1911
1912 /* Now we've corralled the various formats, what'll it do? */
1913 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1914 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1915 if (smmu->features &
1916 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1917 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1918 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1919 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1920 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1921 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1922
1923 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1924 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1925 else
1926 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1927 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1928 smmu->pgsize_bitmap);
1929
1930
1931 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1932 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1933 smmu->va_size, smmu->ipa_size);
1934
1935 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1936 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1937 smmu->ipa_size, smmu->pa_size);
1938
1939 return 0;
1940 }
1941
1942 struct arm_smmu_match_data {
1943 enum arm_smmu_arch_version version;
1944 enum arm_smmu_implementation model;
1945 };
1946
1947 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1948 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1949
1950 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1951 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1952 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1953 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1954 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1955
1956 static const struct of_device_id arm_smmu_of_match[] = {
1957 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1958 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1959 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1960 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1961 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1962 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1963 { },
1964 };
1965 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1966
1967 #ifdef CONFIG_ACPI
1968 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1969 {
1970 int ret = 0;
1971
1972 switch (model) {
1973 case ACPI_IORT_SMMU_V1:
1974 case ACPI_IORT_SMMU_CORELINK_MMU400:
1975 smmu->version = ARM_SMMU_V1;
1976 smmu->model = GENERIC_SMMU;
1977 break;
1978 case ACPI_IORT_SMMU_CORELINK_MMU401:
1979 smmu->version = ARM_SMMU_V1_64K;
1980 smmu->model = GENERIC_SMMU;
1981 break;
1982 case ACPI_IORT_SMMU_V2:
1983 smmu->version = ARM_SMMU_V2;
1984 smmu->model = GENERIC_SMMU;
1985 break;
1986 case ACPI_IORT_SMMU_CORELINK_MMU500:
1987 smmu->version = ARM_SMMU_V2;
1988 smmu->model = ARM_MMU500;
1989 break;
1990 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1991 smmu->version = ARM_SMMU_V2;
1992 smmu->model = CAVIUM_SMMUV2;
1993 break;
1994 default:
1995 ret = -ENODEV;
1996 }
1997
1998 return ret;
1999 }
2000
2001 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2002 struct arm_smmu_device *smmu)
2003 {
2004 struct device *dev = smmu->dev;
2005 struct acpi_iort_node *node =
2006 *(struct acpi_iort_node **)dev_get_platdata(dev);
2007 struct acpi_iort_smmu *iort_smmu;
2008 int ret;
2009
2010 /* Retrieve SMMU1/2 specific data */
2011 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2012
2013 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2014 if (ret < 0)
2015 return ret;
2016
2017 /* Ignore the configuration access interrupt */
2018 smmu->num_global_irqs = 1;
2019
2020 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2021 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2022
2023 return 0;
2024 }
2025 #else
2026 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2027 struct arm_smmu_device *smmu)
2028 {
2029 return -ENODEV;
2030 }
2031 #endif
2032
2033 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2034 struct arm_smmu_device *smmu)
2035 {
2036 const struct arm_smmu_match_data *data;
2037 struct device *dev = &pdev->dev;
2038 bool legacy_binding;
2039
2040 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2041 &smmu->num_global_irqs)) {
2042 dev_err(dev, "missing #global-interrupts property\n");
2043 return -ENODEV;
2044 }
2045
2046 data = of_device_get_match_data(dev);
2047 smmu->version = data->version;
2048 smmu->model = data->model;
2049
2050 parse_driver_options(smmu);
2051
2052 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2053 if (legacy_binding && !using_generic_binding) {
2054 if (!using_legacy_binding)
2055 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2056 using_legacy_binding = true;
2057 } else if (!legacy_binding && !using_legacy_binding) {
2058 using_generic_binding = true;
2059 } else {
2060 dev_err(dev, "not probing due to mismatched DT properties\n");
2061 return -ENODEV;
2062 }
2063
2064 if (of_dma_is_coherent(dev->of_node))
2065 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2066
2067 return 0;
2068 }
2069
2070 static void arm_smmu_bus_init(void)
2071 {
2072 /* Oh, for a proper bus abstraction */
2073 if (!iommu_present(&platform_bus_type))
2074 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2075 #ifdef CONFIG_ARM_AMBA
2076 if (!iommu_present(&amba_bustype))
2077 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2078 #endif
2079 #ifdef CONFIG_PCI
2080 if (!iommu_present(&pci_bus_type)) {
2081 pci_request_acs();
2082 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2083 }
2084 #endif
2085 }
2086
2087 static int arm_smmu_device_probe(struct platform_device *pdev)
2088 {
2089 struct resource *res;
2090 resource_size_t ioaddr;
2091 struct arm_smmu_device *smmu;
2092 struct device *dev = &pdev->dev;
2093 int num_irqs, i, err;
2094
2095 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2096 if (!smmu) {
2097 dev_err(dev, "failed to allocate arm_smmu_device\n");
2098 return -ENOMEM;
2099 }
2100 smmu->dev = dev;
2101
2102 if (dev->of_node)
2103 err = arm_smmu_device_dt_probe(pdev, smmu);
2104 else
2105 err = arm_smmu_device_acpi_probe(pdev, smmu);
2106
2107 if (err)
2108 return err;
2109
2110 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2111 ioaddr = res->start;
2112 smmu->base = devm_ioremap_resource(dev, res);
2113 if (IS_ERR(smmu->base))
2114 return PTR_ERR(smmu->base);
2115 smmu->cb_base = smmu->base + resource_size(res) / 2;
2116
2117 num_irqs = 0;
2118 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2119 num_irqs++;
2120 if (num_irqs > smmu->num_global_irqs)
2121 smmu->num_context_irqs++;
2122 }
2123
2124 if (!smmu->num_context_irqs) {
2125 dev_err(dev, "found %d interrupts but expected at least %d\n",
2126 num_irqs, smmu->num_global_irqs + 1);
2127 return -ENODEV;
2128 }
2129
2130 smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2131 GFP_KERNEL);
2132 if (!smmu->irqs) {
2133 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2134 return -ENOMEM;
2135 }
2136
2137 for (i = 0; i < num_irqs; ++i) {
2138 int irq = platform_get_irq(pdev, i);
2139
2140 if (irq < 0) {
2141 dev_err(dev, "failed to get irq index %d\n", i);
2142 return -ENODEV;
2143 }
2144 smmu->irqs[i] = irq;
2145 }
2146
2147 err = arm_smmu_device_cfg_probe(smmu);
2148 if (err)
2149 return err;
2150
2151 if (smmu->version == ARM_SMMU_V2) {
2152 if (smmu->num_context_banks > smmu->num_context_irqs) {
2153 dev_err(dev,
2154 "found only %d context irq(s) but %d required\n",
2155 smmu->num_context_irqs, smmu->num_context_banks);
2156 return -ENODEV;
2157 }
2158
2159 /* Ignore superfluous interrupts */
2160 smmu->num_context_irqs = smmu->num_context_banks;
2161 }
2162
2163 for (i = 0; i < smmu->num_global_irqs; ++i) {
2164 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2165 arm_smmu_global_fault,
2166 IRQF_SHARED,
2167 "arm-smmu global fault",
2168 smmu);
2169 if (err) {
2170 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2171 i, smmu->irqs[i]);
2172 return err;
2173 }
2174 }
2175
2176 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2177 "smmu.%pa", &ioaddr);
2178 if (err) {
2179 dev_err(dev, "Failed to register iommu in sysfs\n");
2180 return err;
2181 }
2182
2183 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2184 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2185
2186 err = iommu_device_register(&smmu->iommu);
2187 if (err) {
2188 dev_err(dev, "Failed to register iommu\n");
2189 return err;
2190 }
2191
2192 platform_set_drvdata(pdev, smmu);
2193 arm_smmu_device_reset(smmu);
2194 arm_smmu_test_smr_masks(smmu);
2195
2196 /*
2197 * For ACPI and generic DT bindings, an SMMU will be probed before
2198 * any device which might need it, so we want the bus ops in place
2199 * ready to handle default domain setup as soon as any SMMU exists.
2200 */
2201 if (!using_legacy_binding)
2202 arm_smmu_bus_init();
2203
2204 return 0;
2205 }
2206
2207 /*
2208 * With the legacy DT binding in play, though, we have no guarantees about
2209 * probe order, but then we're also not doing default domains, so we can
2210 * delay setting bus ops until we're sure every possible SMMU is ready,
2211 * and that way ensure that no add_device() calls get missed.
2212 */
2213 static int arm_smmu_legacy_bus_init(void)
2214 {
2215 if (using_legacy_binding)
2216 arm_smmu_bus_init();
2217 return 0;
2218 }
2219 device_initcall_sync(arm_smmu_legacy_bus_init);
2220
2221 static int arm_smmu_device_remove(struct platform_device *pdev)
2222 {
2223 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2224
2225 if (!smmu)
2226 return -ENODEV;
2227
2228 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2229 dev_err(&pdev->dev, "removing device with active domains!\n");
2230
2231 /* Turn the thing off */
2232 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2233 return 0;
2234 }
2235
2236 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2237 {
2238 arm_smmu_device_remove(pdev);
2239 }
2240
2241 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2242 {
2243 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2244
2245 arm_smmu_device_reset(smmu);
2246 return 0;
2247 }
2248
2249 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2250
2251 static struct platform_driver arm_smmu_driver = {
2252 .driver = {
2253 .name = "arm-smmu",
2254 .of_match_table = of_match_ptr(arm_smmu_of_match),
2255 .pm = &arm_smmu_pm_ops,
2256 },
2257 .probe = arm_smmu_device_probe,
2258 .remove = arm_smmu_device_remove,
2259 .shutdown = arm_smmu_device_shutdown,
2260 };
2261 module_platform_driver(arm_smmu_driver);
2262
2263 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2264 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2265 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2266 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2267 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2268 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2269
2270 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2271 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2272 MODULE_LICENSE("GPL v2");