]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blob - drivers/iommu/arm-smmu-v3.c
HID: logitech-dj: fix spelling in printk
[mirror_ubuntu-kernels.git] / drivers / iommu / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/iommu.h>
22 #include <linux/iopoll.h>
23 #include <linux/init.h>
24 #include <linux/moduleparam.h>
25 #include <linux/msi.h>
26 #include <linux/of.h>
27 #include <linux/of_address.h>
28 #include <linux/of_iommu.h>
29 #include <linux/of_platform.h>
30 #include <linux/pci.h>
31 #include <linux/platform_device.h>
32
33 #include <linux/amba/bus.h>
34
35 #include "io-pgtable.h"
36
37 /* MMIO registers */
38 #define ARM_SMMU_IDR0 0x0
39 #define IDR0_ST_LVL GENMASK(28, 27)
40 #define IDR0_ST_LVL_2LVL 1
41 #define IDR0_STALL_MODEL GENMASK(25, 24)
42 #define IDR0_STALL_MODEL_STALL 0
43 #define IDR0_STALL_MODEL_FORCE 2
44 #define IDR0_TTENDIAN GENMASK(22, 21)
45 #define IDR0_TTENDIAN_MIXED 0
46 #define IDR0_TTENDIAN_LE 2
47 #define IDR0_TTENDIAN_BE 3
48 #define IDR0_CD2L (1 << 19)
49 #define IDR0_VMID16 (1 << 18)
50 #define IDR0_PRI (1 << 16)
51 #define IDR0_SEV (1 << 14)
52 #define IDR0_MSI (1 << 13)
53 #define IDR0_ASID16 (1 << 12)
54 #define IDR0_ATS (1 << 10)
55 #define IDR0_HYP (1 << 9)
56 #define IDR0_COHACC (1 << 4)
57 #define IDR0_TTF GENMASK(3, 2)
58 #define IDR0_TTF_AARCH64 2
59 #define IDR0_TTF_AARCH32_64 3
60 #define IDR0_S1P (1 << 1)
61 #define IDR0_S2P (1 << 0)
62
63 #define ARM_SMMU_IDR1 0x4
64 #define IDR1_TABLES_PRESET (1 << 30)
65 #define IDR1_QUEUES_PRESET (1 << 29)
66 #define IDR1_REL (1 << 28)
67 #define IDR1_CMDQS GENMASK(25, 21)
68 #define IDR1_EVTQS GENMASK(20, 16)
69 #define IDR1_PRIQS GENMASK(15, 11)
70 #define IDR1_SSIDSIZE GENMASK(10, 6)
71 #define IDR1_SIDSIZE GENMASK(5, 0)
72
73 #define ARM_SMMU_IDR5 0x14
74 #define IDR5_STALL_MAX GENMASK(31, 16)
75 #define IDR5_GRAN64K (1 << 6)
76 #define IDR5_GRAN16K (1 << 5)
77 #define IDR5_GRAN4K (1 << 4)
78 #define IDR5_OAS GENMASK(2, 0)
79 #define IDR5_OAS_32_BIT 0
80 #define IDR5_OAS_36_BIT 1
81 #define IDR5_OAS_40_BIT 2
82 #define IDR5_OAS_42_BIT 3
83 #define IDR5_OAS_44_BIT 4
84 #define IDR5_OAS_48_BIT 5
85 #define IDR5_OAS_52_BIT 6
86 #define IDR5_VAX GENMASK(11, 10)
87 #define IDR5_VAX_52_BIT 1
88
89 #define ARM_SMMU_CR0 0x20
90 #define CR0_CMDQEN (1 << 3)
91 #define CR0_EVTQEN (1 << 2)
92 #define CR0_PRIQEN (1 << 1)
93 #define CR0_SMMUEN (1 << 0)
94
95 #define ARM_SMMU_CR0ACK 0x24
96
97 #define ARM_SMMU_CR1 0x28
98 #define CR1_TABLE_SH GENMASK(11, 10)
99 #define CR1_TABLE_OC GENMASK(9, 8)
100 #define CR1_TABLE_IC GENMASK(7, 6)
101 #define CR1_QUEUE_SH GENMASK(5, 4)
102 #define CR1_QUEUE_OC GENMASK(3, 2)
103 #define CR1_QUEUE_IC GENMASK(1, 0)
104 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
105 #define CR1_CACHE_NC 0
106 #define CR1_CACHE_WB 1
107 #define CR1_CACHE_WT 2
108
109 #define ARM_SMMU_CR2 0x2c
110 #define CR2_PTM (1 << 2)
111 #define CR2_RECINVSID (1 << 1)
112 #define CR2_E2H (1 << 0)
113
114 #define ARM_SMMU_GBPA 0x44
115 #define GBPA_UPDATE (1 << 31)
116 #define GBPA_ABORT (1 << 20)
117
118 #define ARM_SMMU_IRQ_CTRL 0x50
119 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
120 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
121 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
122
123 #define ARM_SMMU_IRQ_CTRLACK 0x54
124
125 #define ARM_SMMU_GERROR 0x60
126 #define GERROR_SFM_ERR (1 << 8)
127 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
128 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
129 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
130 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
131 #define GERROR_PRIQ_ABT_ERR (1 << 3)
132 #define GERROR_EVTQ_ABT_ERR (1 << 2)
133 #define GERROR_CMDQ_ERR (1 << 0)
134 #define GERROR_ERR_MASK 0xfd
135
136 #define ARM_SMMU_GERRORN 0x64
137
138 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
139 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
140 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
141
142 #define ARM_SMMU_STRTAB_BASE 0x80
143 #define STRTAB_BASE_RA (1UL << 62)
144 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
145
146 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
147 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
148 #define STRTAB_BASE_CFG_FMT_LINEAR 0
149 #define STRTAB_BASE_CFG_FMT_2LVL 1
150 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
151 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
152
153 #define ARM_SMMU_CMDQ_BASE 0x90
154 #define ARM_SMMU_CMDQ_PROD 0x98
155 #define ARM_SMMU_CMDQ_CONS 0x9c
156
157 #define ARM_SMMU_EVTQ_BASE 0xa0
158 #define ARM_SMMU_EVTQ_PROD 0x100a8
159 #define ARM_SMMU_EVTQ_CONS 0x100ac
160 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
161 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
162 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
163
164 #define ARM_SMMU_PRIQ_BASE 0xc0
165 #define ARM_SMMU_PRIQ_PROD 0x100c8
166 #define ARM_SMMU_PRIQ_CONS 0x100cc
167 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
168 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
169 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
170
171 /* Common MSI config fields */
172 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
173 #define MSI_CFG2_SH GENMASK(5, 4)
174 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
175
176 /* Common memory attribute values */
177 #define ARM_SMMU_SH_NSH 0
178 #define ARM_SMMU_SH_OSH 2
179 #define ARM_SMMU_SH_ISH 3
180 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
181 #define ARM_SMMU_MEMATTR_OIWB 0xf
182
183 #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
184 #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
185 #define Q_OVERFLOW_FLAG (1 << 31)
186 #define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
187 #define Q_ENT(q, p) ((q)->base + \
188 Q_IDX(q, p) * (q)->ent_dwords)
189
190 #define Q_BASE_RWA (1UL << 62)
191 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
192 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
193
194 /*
195 * Stream table.
196 *
197 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
198 * 2lvl: 128k L1 entries,
199 * 256 lazy entries per table (each table covers a PCI bus)
200 */
201 #define STRTAB_L1_SZ_SHIFT 20
202 #define STRTAB_SPLIT 8
203
204 #define STRTAB_L1_DESC_DWORDS 1
205 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
206 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
207
208 #define STRTAB_STE_DWORDS 8
209 #define STRTAB_STE_0_V (1UL << 0)
210 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
211 #define STRTAB_STE_0_CFG_ABORT 0
212 #define STRTAB_STE_0_CFG_BYPASS 4
213 #define STRTAB_STE_0_CFG_S1_TRANS 5
214 #define STRTAB_STE_0_CFG_S2_TRANS 6
215
216 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
217 #define STRTAB_STE_0_S1FMT_LINEAR 0
218 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
219 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
220
221 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
222 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
223 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
224 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
225 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
226 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
227 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
228
229 #define STRTAB_STE_1_S1STALLD (1UL << 27)
230
231 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
232 #define STRTAB_STE_1_EATS_ABT 0UL
233 #define STRTAB_STE_1_EATS_TRANS 1UL
234 #define STRTAB_STE_1_EATS_S1CHK 2UL
235
236 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
237 #define STRTAB_STE_1_STRW_NSEL1 0UL
238 #define STRTAB_STE_1_STRW_EL2 2UL
239
240 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
241 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
242
243 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
244 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
245 #define STRTAB_STE_2_S2AA64 (1UL << 51)
246 #define STRTAB_STE_2_S2ENDI (1UL << 52)
247 #define STRTAB_STE_2_S2PTW (1UL << 54)
248 #define STRTAB_STE_2_S2R (1UL << 58)
249
250 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
251
252 /* Context descriptor (stage-1 only) */
253 #define CTXDESC_CD_DWORDS 8
254 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
255 #define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
256 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
257 #define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
258 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
259 #define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
260 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
261 #define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
262 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
263 #define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
264 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
265 #define ARM64_TCR_EPD0 (1ULL << 7)
266 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
267 #define ARM64_TCR_EPD1 (1ULL << 23)
268
269 #define CTXDESC_CD_0_ENDI (1UL << 15)
270 #define CTXDESC_CD_0_V (1UL << 31)
271
272 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
273 #define ARM64_TCR_IPS GENMASK_ULL(34, 32)
274 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
275 #define ARM64_TCR_TBI0 (1ULL << 37)
276
277 #define CTXDESC_CD_0_AA64 (1UL << 41)
278 #define CTXDESC_CD_0_S (1UL << 44)
279 #define CTXDESC_CD_0_R (1UL << 45)
280 #define CTXDESC_CD_0_A (1UL << 46)
281 #define CTXDESC_CD_0_ASET (1UL << 47)
282 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
283
284 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
285
286 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
287 #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
288 FIELD_GET(ARM64_TCR_##fld, tcr))
289
290 /* Command queue */
291 #define CMDQ_ENT_DWORDS 2
292 #define CMDQ_MAX_SZ_SHIFT 8
293
294 #define CMDQ_CONS_ERR GENMASK(30, 24)
295 #define CMDQ_ERR_CERROR_NONE_IDX 0
296 #define CMDQ_ERR_CERROR_ILL_IDX 1
297 #define CMDQ_ERR_CERROR_ABT_IDX 2
298
299 #define CMDQ_0_OP GENMASK_ULL(7, 0)
300 #define CMDQ_0_SSV (1UL << 11)
301
302 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
303 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
304 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
305
306 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
307 #define CMDQ_CFGI_1_LEAF (1UL << 0)
308 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
309
310 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
311 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
312 #define CMDQ_TLBI_1_LEAF (1UL << 0)
313 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
314 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
315
316 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
317 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
318 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
319 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
320
321 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
322 #define CMDQ_SYNC_0_CS_NONE 0
323 #define CMDQ_SYNC_0_CS_IRQ 1
324 #define CMDQ_SYNC_0_CS_SEV 2
325 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
326 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
327 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
328 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
329
330 /* Event queue */
331 #define EVTQ_ENT_DWORDS 4
332 #define EVTQ_MAX_SZ_SHIFT 7
333
334 #define EVTQ_0_ID GENMASK_ULL(7, 0)
335
336 /* PRI queue */
337 #define PRIQ_ENT_DWORDS 2
338 #define PRIQ_MAX_SZ_SHIFT 8
339
340 #define PRIQ_0_SID GENMASK_ULL(31, 0)
341 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
342 #define PRIQ_0_PERM_PRIV (1UL << 58)
343 #define PRIQ_0_PERM_EXEC (1UL << 59)
344 #define PRIQ_0_PERM_READ (1UL << 60)
345 #define PRIQ_0_PERM_WRITE (1UL << 61)
346 #define PRIQ_0_PRG_LAST (1UL << 62)
347 #define PRIQ_0_SSID_V (1UL << 63)
348
349 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
350 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
351
352 /* High-level queue structures */
353 #define ARM_SMMU_POLL_TIMEOUT_US 100
354 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
355 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
356
357 #define MSI_IOVA_BASE 0x8000000
358 #define MSI_IOVA_LENGTH 0x100000
359
360 /*
361 * not really modular, but the easiest way to keep compat with existing
362 * bootargs behaviour is to continue using module_param_named here.
363 */
364 static bool disable_bypass = 1;
365 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
366 MODULE_PARM_DESC(disable_bypass,
367 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
368
369 enum pri_resp {
370 PRI_RESP_DENY = 0,
371 PRI_RESP_FAIL = 1,
372 PRI_RESP_SUCC = 2,
373 };
374
375 enum arm_smmu_msi_index {
376 EVTQ_MSI_INDEX,
377 GERROR_MSI_INDEX,
378 PRIQ_MSI_INDEX,
379 ARM_SMMU_MAX_MSIS,
380 };
381
382 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
383 [EVTQ_MSI_INDEX] = {
384 ARM_SMMU_EVTQ_IRQ_CFG0,
385 ARM_SMMU_EVTQ_IRQ_CFG1,
386 ARM_SMMU_EVTQ_IRQ_CFG2,
387 },
388 [GERROR_MSI_INDEX] = {
389 ARM_SMMU_GERROR_IRQ_CFG0,
390 ARM_SMMU_GERROR_IRQ_CFG1,
391 ARM_SMMU_GERROR_IRQ_CFG2,
392 },
393 [PRIQ_MSI_INDEX] = {
394 ARM_SMMU_PRIQ_IRQ_CFG0,
395 ARM_SMMU_PRIQ_IRQ_CFG1,
396 ARM_SMMU_PRIQ_IRQ_CFG2,
397 },
398 };
399
400 struct arm_smmu_cmdq_ent {
401 /* Common fields */
402 u8 opcode;
403 bool substream_valid;
404
405 /* Command-specific fields */
406 union {
407 #define CMDQ_OP_PREFETCH_CFG 0x1
408 struct {
409 u32 sid;
410 u8 size;
411 u64 addr;
412 } prefetch;
413
414 #define CMDQ_OP_CFGI_STE 0x3
415 #define CMDQ_OP_CFGI_ALL 0x4
416 struct {
417 u32 sid;
418 union {
419 bool leaf;
420 u8 span;
421 };
422 } cfgi;
423
424 #define CMDQ_OP_TLBI_NH_ASID 0x11
425 #define CMDQ_OP_TLBI_NH_VA 0x12
426 #define CMDQ_OP_TLBI_EL2_ALL 0x20
427 #define CMDQ_OP_TLBI_S12_VMALL 0x28
428 #define CMDQ_OP_TLBI_S2_IPA 0x2a
429 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
430 struct {
431 u16 asid;
432 u16 vmid;
433 bool leaf;
434 u64 addr;
435 } tlbi;
436
437 #define CMDQ_OP_PRI_RESP 0x41
438 struct {
439 u32 sid;
440 u32 ssid;
441 u16 grpid;
442 enum pri_resp resp;
443 } pri;
444
445 #define CMDQ_OP_CMD_SYNC 0x46
446 struct {
447 u32 msidata;
448 u64 msiaddr;
449 } sync;
450 };
451 };
452
453 struct arm_smmu_queue {
454 int irq; /* Wired interrupt */
455
456 __le64 *base;
457 dma_addr_t base_dma;
458 u64 q_base;
459
460 size_t ent_dwords;
461 u32 max_n_shift;
462 u32 prod;
463 u32 cons;
464
465 u32 __iomem *prod_reg;
466 u32 __iomem *cons_reg;
467 };
468
469 struct arm_smmu_cmdq {
470 struct arm_smmu_queue q;
471 spinlock_t lock;
472 };
473
474 struct arm_smmu_evtq {
475 struct arm_smmu_queue q;
476 u32 max_stalls;
477 };
478
479 struct arm_smmu_priq {
480 struct arm_smmu_queue q;
481 };
482
483 /* High-level stream table and context descriptor structures */
484 struct arm_smmu_strtab_l1_desc {
485 u8 span;
486
487 __le64 *l2ptr;
488 dma_addr_t l2ptr_dma;
489 };
490
491 struct arm_smmu_s1_cfg {
492 __le64 *cdptr;
493 dma_addr_t cdptr_dma;
494
495 struct arm_smmu_ctx_desc {
496 u16 asid;
497 u64 ttbr;
498 u64 tcr;
499 u64 mair;
500 } cd;
501 };
502
503 struct arm_smmu_s2_cfg {
504 u16 vmid;
505 u64 vttbr;
506 u64 vtcr;
507 };
508
509 struct arm_smmu_strtab_ent {
510 /*
511 * An STE is "assigned" if the master emitting the corresponding SID
512 * is attached to a domain. The behaviour of an unassigned STE is
513 * determined by the disable_bypass parameter, whereas an assigned
514 * STE behaves according to s1_cfg/s2_cfg, which themselves are
515 * configured according to the domain type.
516 */
517 bool assigned;
518 struct arm_smmu_s1_cfg *s1_cfg;
519 struct arm_smmu_s2_cfg *s2_cfg;
520 };
521
522 struct arm_smmu_strtab_cfg {
523 __le64 *strtab;
524 dma_addr_t strtab_dma;
525 struct arm_smmu_strtab_l1_desc *l1_desc;
526 unsigned int num_l1_ents;
527
528 u64 strtab_base;
529 u32 strtab_base_cfg;
530 };
531
532 /* An SMMUv3 instance */
533 struct arm_smmu_device {
534 struct device *dev;
535 void __iomem *base;
536
537 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
538 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
539 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
540 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
541 #define ARM_SMMU_FEAT_PRI (1 << 4)
542 #define ARM_SMMU_FEAT_ATS (1 << 5)
543 #define ARM_SMMU_FEAT_SEV (1 << 6)
544 #define ARM_SMMU_FEAT_MSI (1 << 7)
545 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
546 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
547 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
548 #define ARM_SMMU_FEAT_STALLS (1 << 11)
549 #define ARM_SMMU_FEAT_HYP (1 << 12)
550 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
551 #define ARM_SMMU_FEAT_VAX (1 << 14)
552 u32 features;
553
554 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
555 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
556 u32 options;
557
558 struct arm_smmu_cmdq cmdq;
559 struct arm_smmu_evtq evtq;
560 struct arm_smmu_priq priq;
561
562 int gerr_irq;
563 int combined_irq;
564 u32 sync_nr;
565 u8 prev_cmd_opcode;
566
567 unsigned long ias; /* IPA */
568 unsigned long oas; /* PA */
569 unsigned long pgsize_bitmap;
570
571 #define ARM_SMMU_MAX_ASIDS (1 << 16)
572 unsigned int asid_bits;
573 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
574
575 #define ARM_SMMU_MAX_VMIDS (1 << 16)
576 unsigned int vmid_bits;
577 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
578
579 unsigned int ssid_bits;
580 unsigned int sid_bits;
581
582 struct arm_smmu_strtab_cfg strtab_cfg;
583
584 /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
585 union {
586 u32 sync_count;
587 u64 padding;
588 };
589
590 /* IOMMU core code handle */
591 struct iommu_device iommu;
592 };
593
594 /* SMMU private data for each master */
595 struct arm_smmu_master_data {
596 struct arm_smmu_device *smmu;
597 struct arm_smmu_strtab_ent ste;
598 };
599
600 /* SMMU private data for an IOMMU domain */
601 enum arm_smmu_domain_stage {
602 ARM_SMMU_DOMAIN_S1 = 0,
603 ARM_SMMU_DOMAIN_S2,
604 ARM_SMMU_DOMAIN_NESTED,
605 ARM_SMMU_DOMAIN_BYPASS,
606 };
607
608 struct arm_smmu_domain {
609 struct arm_smmu_device *smmu;
610 struct mutex init_mutex; /* Protects smmu pointer */
611
612 struct io_pgtable_ops *pgtbl_ops;
613 bool non_strict;
614
615 enum arm_smmu_domain_stage stage;
616 union {
617 struct arm_smmu_s1_cfg s1_cfg;
618 struct arm_smmu_s2_cfg s2_cfg;
619 };
620
621 struct iommu_domain domain;
622 };
623
624 struct arm_smmu_option_prop {
625 u32 opt;
626 const char *prop;
627 };
628
629 static struct arm_smmu_option_prop arm_smmu_options[] = {
630 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
631 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
632 { 0, NULL},
633 };
634
635 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
636 struct arm_smmu_device *smmu)
637 {
638 if ((offset > SZ_64K) &&
639 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
640 offset -= SZ_64K;
641
642 return smmu->base + offset;
643 }
644
645 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
646 {
647 return container_of(dom, struct arm_smmu_domain, domain);
648 }
649
650 static void parse_driver_options(struct arm_smmu_device *smmu)
651 {
652 int i = 0;
653
654 do {
655 if (of_property_read_bool(smmu->dev->of_node,
656 arm_smmu_options[i].prop)) {
657 smmu->options |= arm_smmu_options[i].opt;
658 dev_notice(smmu->dev, "option %s\n",
659 arm_smmu_options[i].prop);
660 }
661 } while (arm_smmu_options[++i].opt);
662 }
663
664 /* Low-level queue manipulation functions */
665 static bool queue_full(struct arm_smmu_queue *q)
666 {
667 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
668 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
669 }
670
671 static bool queue_empty(struct arm_smmu_queue *q)
672 {
673 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
674 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
675 }
676
677 static void queue_sync_cons(struct arm_smmu_queue *q)
678 {
679 q->cons = readl_relaxed(q->cons_reg);
680 }
681
682 static void queue_inc_cons(struct arm_smmu_queue *q)
683 {
684 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
685
686 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
687
688 /*
689 * Ensure that all CPU accesses (reads and writes) to the queue
690 * are complete before we update the cons pointer.
691 */
692 mb();
693 writel_relaxed(q->cons, q->cons_reg);
694 }
695
696 static int queue_sync_prod(struct arm_smmu_queue *q)
697 {
698 int ret = 0;
699 u32 prod = readl_relaxed(q->prod_reg);
700
701 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
702 ret = -EOVERFLOW;
703
704 q->prod = prod;
705 return ret;
706 }
707
708 static void queue_inc_prod(struct arm_smmu_queue *q)
709 {
710 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
711
712 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
713 writel(q->prod, q->prod_reg);
714 }
715
716 /*
717 * Wait for the SMMU to consume items. If sync is true, wait until the queue
718 * is empty. Otherwise, wait until there is at least one free slot.
719 */
720 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
721 {
722 ktime_t timeout;
723 unsigned int delay = 1, spin_cnt = 0;
724
725 /* Wait longer if it's a CMD_SYNC */
726 timeout = ktime_add_us(ktime_get(), sync ?
727 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
728 ARM_SMMU_POLL_TIMEOUT_US);
729
730 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
731 if (ktime_compare(ktime_get(), timeout) > 0)
732 return -ETIMEDOUT;
733
734 if (wfe) {
735 wfe();
736 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
737 cpu_relax();
738 continue;
739 } else {
740 udelay(delay);
741 delay *= 2;
742 spin_cnt = 0;
743 }
744 }
745
746 return 0;
747 }
748
749 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
750 {
751 int i;
752
753 for (i = 0; i < n_dwords; ++i)
754 *dst++ = cpu_to_le64(*src++);
755 }
756
757 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
758 {
759 if (queue_full(q))
760 return -ENOSPC;
761
762 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
763 queue_inc_prod(q);
764 return 0;
765 }
766
767 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
768 {
769 int i;
770
771 for (i = 0; i < n_dwords; ++i)
772 *dst++ = le64_to_cpu(*src++);
773 }
774
775 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
776 {
777 if (queue_empty(q))
778 return -EAGAIN;
779
780 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
781 queue_inc_cons(q);
782 return 0;
783 }
784
785 /* High-level queue accessors */
786 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
787 {
788 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
789 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
790
791 switch (ent->opcode) {
792 case CMDQ_OP_TLBI_EL2_ALL:
793 case CMDQ_OP_TLBI_NSNH_ALL:
794 break;
795 case CMDQ_OP_PREFETCH_CFG:
796 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
797 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
798 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
799 break;
800 case CMDQ_OP_CFGI_STE:
801 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
802 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
803 break;
804 case CMDQ_OP_CFGI_ALL:
805 /* Cover the entire SID range */
806 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
807 break;
808 case CMDQ_OP_TLBI_NH_VA:
809 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
810 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
811 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
812 break;
813 case CMDQ_OP_TLBI_S2_IPA:
814 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
815 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
816 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
817 break;
818 case CMDQ_OP_TLBI_NH_ASID:
819 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
820 /* Fallthrough */
821 case CMDQ_OP_TLBI_S12_VMALL:
822 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
823 break;
824 case CMDQ_OP_PRI_RESP:
825 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
826 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
827 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
828 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
829 switch (ent->pri.resp) {
830 case PRI_RESP_DENY:
831 case PRI_RESP_FAIL:
832 case PRI_RESP_SUCC:
833 break;
834 default:
835 return -EINVAL;
836 }
837 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
838 break;
839 case CMDQ_OP_CMD_SYNC:
840 if (ent->sync.msiaddr)
841 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
842 else
843 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
844 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
845 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
846 /*
847 * Commands are written little-endian, but we want the SMMU to
848 * receive MSIData, and thus write it back to memory, in CPU
849 * byte order, so big-endian needs an extra byteswap here.
850 */
851 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
852 cpu_to_le32(ent->sync.msidata));
853 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
854 break;
855 default:
856 return -ENOENT;
857 }
858
859 return 0;
860 }
861
862 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
863 {
864 static const char *cerror_str[] = {
865 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
866 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
867 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
868 };
869
870 int i;
871 u64 cmd[CMDQ_ENT_DWORDS];
872 struct arm_smmu_queue *q = &smmu->cmdq.q;
873 u32 cons = readl_relaxed(q->cons_reg);
874 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
875 struct arm_smmu_cmdq_ent cmd_sync = {
876 .opcode = CMDQ_OP_CMD_SYNC,
877 };
878
879 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
880 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
881
882 switch (idx) {
883 case CMDQ_ERR_CERROR_ABT_IDX:
884 dev_err(smmu->dev, "retrying command fetch\n");
885 case CMDQ_ERR_CERROR_NONE_IDX:
886 return;
887 case CMDQ_ERR_CERROR_ILL_IDX:
888 /* Fallthrough */
889 default:
890 break;
891 }
892
893 /*
894 * We may have concurrent producers, so we need to be careful
895 * not to touch any of the shadow cmdq state.
896 */
897 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
898 dev_err(smmu->dev, "skipping command in error state:\n");
899 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
900 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
901
902 /* Convert the erroneous command into a CMD_SYNC */
903 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
904 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
905 return;
906 }
907
908 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
909 }
910
911 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
912 {
913 struct arm_smmu_queue *q = &smmu->cmdq.q;
914 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
915
916 smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
917
918 while (queue_insert_raw(q, cmd) == -ENOSPC) {
919 if (queue_poll_cons(q, false, wfe))
920 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
921 }
922 }
923
924 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
925 struct arm_smmu_cmdq_ent *ent)
926 {
927 u64 cmd[CMDQ_ENT_DWORDS];
928 unsigned long flags;
929
930 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
931 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
932 ent->opcode);
933 return;
934 }
935
936 spin_lock_irqsave(&smmu->cmdq.lock, flags);
937 arm_smmu_cmdq_insert_cmd(smmu, cmd);
938 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
939 }
940
941 /*
942 * The difference between val and sync_idx is bounded by the maximum size of
943 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
944 */
945 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
946 {
947 ktime_t timeout;
948 u32 val;
949
950 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
951 val = smp_cond_load_acquire(&smmu->sync_count,
952 (int)(VAL - sync_idx) >= 0 ||
953 !ktime_before(ktime_get(), timeout));
954
955 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
956 }
957
958 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
959 {
960 u64 cmd[CMDQ_ENT_DWORDS];
961 unsigned long flags;
962 struct arm_smmu_cmdq_ent ent = {
963 .opcode = CMDQ_OP_CMD_SYNC,
964 .sync = {
965 .msiaddr = virt_to_phys(&smmu->sync_count),
966 },
967 };
968
969 spin_lock_irqsave(&smmu->cmdq.lock, flags);
970
971 /* Piggy-back on the previous command if it's a SYNC */
972 if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
973 ent.sync.msidata = smmu->sync_nr;
974 } else {
975 ent.sync.msidata = ++smmu->sync_nr;
976 arm_smmu_cmdq_build_cmd(cmd, &ent);
977 arm_smmu_cmdq_insert_cmd(smmu, cmd);
978 }
979
980 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
981
982 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
983 }
984
985 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
986 {
987 u64 cmd[CMDQ_ENT_DWORDS];
988 unsigned long flags;
989 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
990 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
991 int ret;
992
993 arm_smmu_cmdq_build_cmd(cmd, &ent);
994
995 spin_lock_irqsave(&smmu->cmdq.lock, flags);
996 arm_smmu_cmdq_insert_cmd(smmu, cmd);
997 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
998 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
999
1000 return ret;
1001 }
1002
1003 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1004 {
1005 int ret;
1006 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1007 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1008
1009 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1010 : __arm_smmu_cmdq_issue_sync(smmu);
1011 if (ret)
1012 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
1013 }
1014
1015 /* Context descriptor manipulation functions */
1016 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
1017 {
1018 u64 val = 0;
1019
1020 /* Repack the TCR. Just care about TTBR0 for now */
1021 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1022 val |= ARM_SMMU_TCR2CD(tcr, TG0);
1023 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1024 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1025 val |= ARM_SMMU_TCR2CD(tcr, SH0);
1026 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1027 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1028 val |= ARM_SMMU_TCR2CD(tcr, IPS);
1029 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1030
1031 return val;
1032 }
1033
1034 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1035 struct arm_smmu_s1_cfg *cfg)
1036 {
1037 u64 val;
1038
1039 /*
1040 * We don't need to issue any invalidation here, as we'll invalidate
1041 * the STE when installing the new entry anyway.
1042 */
1043 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1044 #ifdef __BIG_ENDIAN
1045 CTXDESC_CD_0_ENDI |
1046 #endif
1047 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1048 CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1049 CTXDESC_CD_0_V;
1050
1051 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1052 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1053 val |= CTXDESC_CD_0_S;
1054
1055 cfg->cdptr[0] = cpu_to_le64(val);
1056
1057 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1058 cfg->cdptr[1] = cpu_to_le64(val);
1059
1060 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1061 }
1062
1063 /* Stream table manipulation functions */
1064 static void
1065 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1066 {
1067 u64 val = 0;
1068
1069 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1070 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1071
1072 *dst = cpu_to_le64(val);
1073 }
1074
1075 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1076 {
1077 struct arm_smmu_cmdq_ent cmd = {
1078 .opcode = CMDQ_OP_CFGI_STE,
1079 .cfgi = {
1080 .sid = sid,
1081 .leaf = true,
1082 },
1083 };
1084
1085 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1086 arm_smmu_cmdq_issue_sync(smmu);
1087 }
1088
1089 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1090 __le64 *dst, struct arm_smmu_strtab_ent *ste)
1091 {
1092 /*
1093 * This is hideously complicated, but we only really care about
1094 * three cases at the moment:
1095 *
1096 * 1. Invalid (all zero) -> bypass/fault (init)
1097 * 2. Bypass/fault -> translation/bypass (attach)
1098 * 3. Translation/bypass -> bypass/fault (detach)
1099 *
1100 * Given that we can't update the STE atomically and the SMMU
1101 * doesn't read the thing in a defined order, that leaves us
1102 * with the following maintenance requirements:
1103 *
1104 * 1. Update Config, return (init time STEs aren't live)
1105 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1106 * 3. Update Config, sync
1107 */
1108 u64 val = le64_to_cpu(dst[0]);
1109 bool ste_live = false;
1110 struct arm_smmu_cmdq_ent prefetch_cmd = {
1111 .opcode = CMDQ_OP_PREFETCH_CFG,
1112 .prefetch = {
1113 .sid = sid,
1114 },
1115 };
1116
1117 if (val & STRTAB_STE_0_V) {
1118 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1119 case STRTAB_STE_0_CFG_BYPASS:
1120 break;
1121 case STRTAB_STE_0_CFG_S1_TRANS:
1122 case STRTAB_STE_0_CFG_S2_TRANS:
1123 ste_live = true;
1124 break;
1125 case STRTAB_STE_0_CFG_ABORT:
1126 if (disable_bypass)
1127 break;
1128 default:
1129 BUG(); /* STE corruption */
1130 }
1131 }
1132
1133 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1134 val = STRTAB_STE_0_V;
1135
1136 /* Bypass/fault */
1137 if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1138 if (!ste->assigned && disable_bypass)
1139 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1140 else
1141 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1142
1143 dst[0] = cpu_to_le64(val);
1144 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1145 STRTAB_STE_1_SHCFG_INCOMING));
1146 dst[2] = 0; /* Nuke the VMID */
1147 /*
1148 * The SMMU can perform negative caching, so we must sync
1149 * the STE regardless of whether the old value was live.
1150 */
1151 if (smmu)
1152 arm_smmu_sync_ste_for_sid(smmu, sid);
1153 return;
1154 }
1155
1156 if (ste->s1_cfg) {
1157 BUG_ON(ste_live);
1158 dst[1] = cpu_to_le64(
1159 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1160 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1161 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1162 #ifdef CONFIG_PCI_ATS
1163 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1164 #endif
1165 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1166
1167 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1168 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1169 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1170
1171 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1172 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1173 }
1174
1175 if (ste->s2_cfg) {
1176 BUG_ON(ste_live);
1177 dst[2] = cpu_to_le64(
1178 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1179 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1180 #ifdef __BIG_ENDIAN
1181 STRTAB_STE_2_S2ENDI |
1182 #endif
1183 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1184 STRTAB_STE_2_S2R);
1185
1186 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1187
1188 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1189 }
1190
1191 arm_smmu_sync_ste_for_sid(smmu, sid);
1192 dst[0] = cpu_to_le64(val);
1193 arm_smmu_sync_ste_for_sid(smmu, sid);
1194
1195 /* It's likely that we'll want to use the new STE soon */
1196 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1197 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1198 }
1199
1200 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1201 {
1202 unsigned int i;
1203 struct arm_smmu_strtab_ent ste = { .assigned = false };
1204
1205 for (i = 0; i < nent; ++i) {
1206 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1207 strtab += STRTAB_STE_DWORDS;
1208 }
1209 }
1210
1211 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1212 {
1213 size_t size;
1214 void *strtab;
1215 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1216 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1217
1218 if (desc->l2ptr)
1219 return 0;
1220
1221 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1222 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1223
1224 desc->span = STRTAB_SPLIT + 1;
1225 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1226 GFP_KERNEL | __GFP_ZERO);
1227 if (!desc->l2ptr) {
1228 dev_err(smmu->dev,
1229 "failed to allocate l2 stream table for SID %u\n",
1230 sid);
1231 return -ENOMEM;
1232 }
1233
1234 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1235 arm_smmu_write_strtab_l1_desc(strtab, desc);
1236 return 0;
1237 }
1238
1239 /* IRQ and event handlers */
1240 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1241 {
1242 int i;
1243 struct arm_smmu_device *smmu = dev;
1244 struct arm_smmu_queue *q = &smmu->evtq.q;
1245 u64 evt[EVTQ_ENT_DWORDS];
1246
1247 do {
1248 while (!queue_remove_raw(q, evt)) {
1249 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1250
1251 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1252 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1253 dev_info(smmu->dev, "\t0x%016llx\n",
1254 (unsigned long long)evt[i]);
1255
1256 }
1257
1258 /*
1259 * Not much we can do on overflow, so scream and pretend we're
1260 * trying harder.
1261 */
1262 if (queue_sync_prod(q) == -EOVERFLOW)
1263 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1264 } while (!queue_empty(q));
1265
1266 /* Sync our overflow flag, as we believe we're up to speed */
1267 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1268 return IRQ_HANDLED;
1269 }
1270
1271 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1272 {
1273 u32 sid, ssid;
1274 u16 grpid;
1275 bool ssv, last;
1276
1277 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1278 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1279 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1280 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1281 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1282
1283 dev_info(smmu->dev, "unexpected PRI request received:\n");
1284 dev_info(smmu->dev,
1285 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1286 sid, ssid, grpid, last ? "L" : "",
1287 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1288 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1289 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1290 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1291 evt[1] & PRIQ_1_ADDR_MASK);
1292
1293 if (last) {
1294 struct arm_smmu_cmdq_ent cmd = {
1295 .opcode = CMDQ_OP_PRI_RESP,
1296 .substream_valid = ssv,
1297 .pri = {
1298 .sid = sid,
1299 .ssid = ssid,
1300 .grpid = grpid,
1301 .resp = PRI_RESP_DENY,
1302 },
1303 };
1304
1305 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1306 }
1307 }
1308
1309 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1310 {
1311 struct arm_smmu_device *smmu = dev;
1312 struct arm_smmu_queue *q = &smmu->priq.q;
1313 u64 evt[PRIQ_ENT_DWORDS];
1314
1315 do {
1316 while (!queue_remove_raw(q, evt))
1317 arm_smmu_handle_ppr(smmu, evt);
1318
1319 if (queue_sync_prod(q) == -EOVERFLOW)
1320 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1321 } while (!queue_empty(q));
1322
1323 /* Sync our overflow flag, as we believe we're up to speed */
1324 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1325 writel(q->cons, q->cons_reg);
1326 return IRQ_HANDLED;
1327 }
1328
1329 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1330
1331 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1332 {
1333 u32 gerror, gerrorn, active;
1334 struct arm_smmu_device *smmu = dev;
1335
1336 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1337 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1338
1339 active = gerror ^ gerrorn;
1340 if (!(active & GERROR_ERR_MASK))
1341 return IRQ_NONE; /* No errors pending */
1342
1343 dev_warn(smmu->dev,
1344 "unexpected global error reported (0x%08x), this could be serious\n",
1345 active);
1346
1347 if (active & GERROR_SFM_ERR) {
1348 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1349 arm_smmu_device_disable(smmu);
1350 }
1351
1352 if (active & GERROR_MSI_GERROR_ABT_ERR)
1353 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1354
1355 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1356 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1357
1358 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1359 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1360
1361 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1362 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1363
1364 if (active & GERROR_PRIQ_ABT_ERR)
1365 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1366
1367 if (active & GERROR_EVTQ_ABT_ERR)
1368 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1369
1370 if (active & GERROR_CMDQ_ERR)
1371 arm_smmu_cmdq_skip_err(smmu);
1372
1373 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1374 return IRQ_HANDLED;
1375 }
1376
1377 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1378 {
1379 struct arm_smmu_device *smmu = dev;
1380
1381 arm_smmu_evtq_thread(irq, dev);
1382 if (smmu->features & ARM_SMMU_FEAT_PRI)
1383 arm_smmu_priq_thread(irq, dev);
1384
1385 return IRQ_HANDLED;
1386 }
1387
1388 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1389 {
1390 arm_smmu_gerror_handler(irq, dev);
1391 return IRQ_WAKE_THREAD;
1392 }
1393
1394 /* IO_PGTABLE API */
1395 static void arm_smmu_tlb_sync(void *cookie)
1396 {
1397 struct arm_smmu_domain *smmu_domain = cookie;
1398
1399 arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
1400 }
1401
1402 static void arm_smmu_tlb_inv_context(void *cookie)
1403 {
1404 struct arm_smmu_domain *smmu_domain = cookie;
1405 struct arm_smmu_device *smmu = smmu_domain->smmu;
1406 struct arm_smmu_cmdq_ent cmd;
1407
1408 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1409 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1410 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1411 cmd.tlbi.vmid = 0;
1412 } else {
1413 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1414 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1415 }
1416
1417 /*
1418 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1419 * PTEs previously cleared by unmaps on the current CPU not yet visible
1420 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
1421 * to guarantee those are observed before the TLBI. Do be careful, 007.
1422 */
1423 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1424 arm_smmu_cmdq_issue_sync(smmu);
1425 }
1426
1427 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1428 size_t granule, bool leaf, void *cookie)
1429 {
1430 struct arm_smmu_domain *smmu_domain = cookie;
1431 struct arm_smmu_device *smmu = smmu_domain->smmu;
1432 struct arm_smmu_cmdq_ent cmd = {
1433 .tlbi = {
1434 .leaf = leaf,
1435 .addr = iova,
1436 },
1437 };
1438
1439 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1440 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1441 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1442 } else {
1443 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1444 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1445 }
1446
1447 do {
1448 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1449 cmd.tlbi.addr += granule;
1450 } while (size -= granule);
1451 }
1452
1453 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1454 .tlb_flush_all = arm_smmu_tlb_inv_context,
1455 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1456 .tlb_sync = arm_smmu_tlb_sync,
1457 };
1458
1459 /* IOMMU API */
1460 static bool arm_smmu_capable(enum iommu_cap cap)
1461 {
1462 switch (cap) {
1463 case IOMMU_CAP_CACHE_COHERENCY:
1464 return true;
1465 case IOMMU_CAP_NOEXEC:
1466 return true;
1467 default:
1468 return false;
1469 }
1470 }
1471
1472 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1473 {
1474 struct arm_smmu_domain *smmu_domain;
1475
1476 if (type != IOMMU_DOMAIN_UNMANAGED &&
1477 type != IOMMU_DOMAIN_DMA &&
1478 type != IOMMU_DOMAIN_IDENTITY)
1479 return NULL;
1480
1481 /*
1482 * Allocate the domain and initialise some of its data structures.
1483 * We can't really do anything meaningful until we've added a
1484 * master.
1485 */
1486 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1487 if (!smmu_domain)
1488 return NULL;
1489
1490 if (type == IOMMU_DOMAIN_DMA &&
1491 iommu_get_dma_cookie(&smmu_domain->domain)) {
1492 kfree(smmu_domain);
1493 return NULL;
1494 }
1495
1496 mutex_init(&smmu_domain->init_mutex);
1497 return &smmu_domain->domain;
1498 }
1499
1500 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1501 {
1502 int idx, size = 1 << span;
1503
1504 do {
1505 idx = find_first_zero_bit(map, size);
1506 if (idx == size)
1507 return -ENOSPC;
1508 } while (test_and_set_bit(idx, map));
1509
1510 return idx;
1511 }
1512
1513 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1514 {
1515 clear_bit(idx, map);
1516 }
1517
1518 static void arm_smmu_domain_free(struct iommu_domain *domain)
1519 {
1520 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1521 struct arm_smmu_device *smmu = smmu_domain->smmu;
1522
1523 iommu_put_dma_cookie(domain);
1524 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1525
1526 /* Free the CD and ASID, if we allocated them */
1527 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1528 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1529
1530 if (cfg->cdptr) {
1531 dmam_free_coherent(smmu_domain->smmu->dev,
1532 CTXDESC_CD_DWORDS << 3,
1533 cfg->cdptr,
1534 cfg->cdptr_dma);
1535
1536 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1537 }
1538 } else {
1539 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1540 if (cfg->vmid)
1541 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1542 }
1543
1544 kfree(smmu_domain);
1545 }
1546
1547 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1548 struct io_pgtable_cfg *pgtbl_cfg)
1549 {
1550 int ret;
1551 int asid;
1552 struct arm_smmu_device *smmu = smmu_domain->smmu;
1553 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1554
1555 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1556 if (asid < 0)
1557 return asid;
1558
1559 cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1560 &cfg->cdptr_dma,
1561 GFP_KERNEL | __GFP_ZERO);
1562 if (!cfg->cdptr) {
1563 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1564 ret = -ENOMEM;
1565 goto out_free_asid;
1566 }
1567
1568 cfg->cd.asid = (u16)asid;
1569 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1570 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1571 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1572 return 0;
1573
1574 out_free_asid:
1575 arm_smmu_bitmap_free(smmu->asid_map, asid);
1576 return ret;
1577 }
1578
1579 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1580 struct io_pgtable_cfg *pgtbl_cfg)
1581 {
1582 int vmid;
1583 struct arm_smmu_device *smmu = smmu_domain->smmu;
1584 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1585
1586 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1587 if (vmid < 0)
1588 return vmid;
1589
1590 cfg->vmid = (u16)vmid;
1591 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1592 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1593 return 0;
1594 }
1595
1596 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1597 {
1598 int ret;
1599 unsigned long ias, oas;
1600 enum io_pgtable_fmt fmt;
1601 struct io_pgtable_cfg pgtbl_cfg;
1602 struct io_pgtable_ops *pgtbl_ops;
1603 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1604 struct io_pgtable_cfg *);
1605 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1606 struct arm_smmu_device *smmu = smmu_domain->smmu;
1607
1608 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1609 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1610 return 0;
1611 }
1612
1613 /* Restrict the stage to what we can actually support */
1614 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1615 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1616 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1617 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1618
1619 switch (smmu_domain->stage) {
1620 case ARM_SMMU_DOMAIN_S1:
1621 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1622 ias = min_t(unsigned long, ias, VA_BITS);
1623 oas = smmu->ias;
1624 fmt = ARM_64_LPAE_S1;
1625 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1626 break;
1627 case ARM_SMMU_DOMAIN_NESTED:
1628 case ARM_SMMU_DOMAIN_S2:
1629 ias = smmu->ias;
1630 oas = smmu->oas;
1631 fmt = ARM_64_LPAE_S2;
1632 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1633 break;
1634 default:
1635 return -EINVAL;
1636 }
1637
1638 pgtbl_cfg = (struct io_pgtable_cfg) {
1639 .pgsize_bitmap = smmu->pgsize_bitmap,
1640 .ias = ias,
1641 .oas = oas,
1642 .tlb = &arm_smmu_gather_ops,
1643 .iommu_dev = smmu->dev,
1644 };
1645
1646 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1647 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1648
1649 if (smmu_domain->non_strict)
1650 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1651
1652 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1653 if (!pgtbl_ops)
1654 return -ENOMEM;
1655
1656 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1657 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1658 domain->geometry.force_aperture = true;
1659
1660 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1661 if (ret < 0) {
1662 free_io_pgtable_ops(pgtbl_ops);
1663 return ret;
1664 }
1665
1666 smmu_domain->pgtbl_ops = pgtbl_ops;
1667 return 0;
1668 }
1669
1670 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1671 {
1672 __le64 *step;
1673 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1674
1675 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1676 struct arm_smmu_strtab_l1_desc *l1_desc;
1677 int idx;
1678
1679 /* Two-level walk */
1680 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1681 l1_desc = &cfg->l1_desc[idx];
1682 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1683 step = &l1_desc->l2ptr[idx];
1684 } else {
1685 /* Simple linear lookup */
1686 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1687 }
1688
1689 return step;
1690 }
1691
1692 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1693 {
1694 int i, j;
1695 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1696 struct arm_smmu_device *smmu = master->smmu;
1697
1698 for (i = 0; i < fwspec->num_ids; ++i) {
1699 u32 sid = fwspec->ids[i];
1700 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1701
1702 /* Bridged PCI devices may end up with duplicated IDs */
1703 for (j = 0; j < i; j++)
1704 if (fwspec->ids[j] == sid)
1705 break;
1706 if (j < i)
1707 continue;
1708
1709 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1710 }
1711 }
1712
1713 static void arm_smmu_detach_dev(struct device *dev)
1714 {
1715 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1716 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1717
1718 master->ste.assigned = false;
1719 arm_smmu_install_ste_for_dev(fwspec);
1720 }
1721
1722 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1723 {
1724 int ret = 0;
1725 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1726 struct arm_smmu_device *smmu;
1727 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1728 struct arm_smmu_master_data *master;
1729 struct arm_smmu_strtab_ent *ste;
1730
1731 if (!fwspec)
1732 return -ENOENT;
1733
1734 master = fwspec->iommu_priv;
1735 smmu = master->smmu;
1736 ste = &master->ste;
1737
1738 /* Already attached to a different domain? */
1739 if (ste->assigned)
1740 arm_smmu_detach_dev(dev);
1741
1742 mutex_lock(&smmu_domain->init_mutex);
1743
1744 if (!smmu_domain->smmu) {
1745 smmu_domain->smmu = smmu;
1746 ret = arm_smmu_domain_finalise(domain);
1747 if (ret) {
1748 smmu_domain->smmu = NULL;
1749 goto out_unlock;
1750 }
1751 } else if (smmu_domain->smmu != smmu) {
1752 dev_err(dev,
1753 "cannot attach to SMMU %s (upstream of %s)\n",
1754 dev_name(smmu_domain->smmu->dev),
1755 dev_name(smmu->dev));
1756 ret = -ENXIO;
1757 goto out_unlock;
1758 }
1759
1760 ste->assigned = true;
1761
1762 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1763 ste->s1_cfg = NULL;
1764 ste->s2_cfg = NULL;
1765 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1766 ste->s1_cfg = &smmu_domain->s1_cfg;
1767 ste->s2_cfg = NULL;
1768 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1769 } else {
1770 ste->s1_cfg = NULL;
1771 ste->s2_cfg = &smmu_domain->s2_cfg;
1772 }
1773
1774 arm_smmu_install_ste_for_dev(fwspec);
1775 out_unlock:
1776 mutex_unlock(&smmu_domain->init_mutex);
1777 return ret;
1778 }
1779
1780 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1781 phys_addr_t paddr, size_t size, int prot)
1782 {
1783 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1784
1785 if (!ops)
1786 return -ENODEV;
1787
1788 return ops->map(ops, iova, paddr, size, prot);
1789 }
1790
1791 static size_t
1792 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1793 {
1794 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1795
1796 if (!ops)
1797 return 0;
1798
1799 return ops->unmap(ops, iova, size);
1800 }
1801
1802 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1803 {
1804 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1805
1806 if (smmu_domain->smmu)
1807 arm_smmu_tlb_inv_context(smmu_domain);
1808 }
1809
1810 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1811 {
1812 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1813
1814 if (smmu)
1815 arm_smmu_cmdq_issue_sync(smmu);
1816 }
1817
1818 static phys_addr_t
1819 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1820 {
1821 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1822
1823 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1824 return iova;
1825
1826 if (!ops)
1827 return 0;
1828
1829 return ops->iova_to_phys(ops, iova);
1830 }
1831
1832 static struct platform_driver arm_smmu_driver;
1833
1834 static int arm_smmu_match_node(struct device *dev, void *data)
1835 {
1836 return dev->fwnode == data;
1837 }
1838
1839 static
1840 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1841 {
1842 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1843 fwnode, arm_smmu_match_node);
1844 put_device(dev);
1845 return dev ? dev_get_drvdata(dev) : NULL;
1846 }
1847
1848 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1849 {
1850 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1851
1852 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1853 limit *= 1UL << STRTAB_SPLIT;
1854
1855 return sid < limit;
1856 }
1857
1858 static struct iommu_ops arm_smmu_ops;
1859
1860 static int arm_smmu_add_device(struct device *dev)
1861 {
1862 int i, ret;
1863 struct arm_smmu_device *smmu;
1864 struct arm_smmu_master_data *master;
1865 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1866 struct iommu_group *group;
1867
1868 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1869 return -ENODEV;
1870 /*
1871 * We _can_ actually withstand dodgy bus code re-calling add_device()
1872 * without an intervening remove_device()/of_xlate() sequence, but
1873 * we're not going to do so quietly...
1874 */
1875 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1876 master = fwspec->iommu_priv;
1877 smmu = master->smmu;
1878 } else {
1879 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1880 if (!smmu)
1881 return -ENODEV;
1882 master = kzalloc(sizeof(*master), GFP_KERNEL);
1883 if (!master)
1884 return -ENOMEM;
1885
1886 master->smmu = smmu;
1887 fwspec->iommu_priv = master;
1888 }
1889
1890 /* Check the SIDs are in range of the SMMU and our stream table */
1891 for (i = 0; i < fwspec->num_ids; i++) {
1892 u32 sid = fwspec->ids[i];
1893
1894 if (!arm_smmu_sid_in_range(smmu, sid))
1895 return -ERANGE;
1896
1897 /* Ensure l2 strtab is initialised */
1898 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1899 ret = arm_smmu_init_l2_strtab(smmu, sid);
1900 if (ret)
1901 return ret;
1902 }
1903 }
1904
1905 group = iommu_group_get_for_dev(dev);
1906 if (!IS_ERR(group)) {
1907 iommu_group_put(group);
1908 iommu_device_link(&smmu->iommu, dev);
1909 }
1910
1911 return PTR_ERR_OR_ZERO(group);
1912 }
1913
1914 static void arm_smmu_remove_device(struct device *dev)
1915 {
1916 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1917 struct arm_smmu_master_data *master;
1918 struct arm_smmu_device *smmu;
1919
1920 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1921 return;
1922
1923 master = fwspec->iommu_priv;
1924 smmu = master->smmu;
1925 if (master && master->ste.assigned)
1926 arm_smmu_detach_dev(dev);
1927 iommu_group_remove_device(dev);
1928 iommu_device_unlink(&smmu->iommu, dev);
1929 kfree(master);
1930 iommu_fwspec_free(dev);
1931 }
1932
1933 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1934 {
1935 struct iommu_group *group;
1936
1937 /*
1938 * We don't support devices sharing stream IDs other than PCI RID
1939 * aliases, since the necessary ID-to-device lookup becomes rather
1940 * impractical given a potential sparse 32-bit stream ID space.
1941 */
1942 if (dev_is_pci(dev))
1943 group = pci_device_group(dev);
1944 else
1945 group = generic_device_group(dev);
1946
1947 return group;
1948 }
1949
1950 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1951 enum iommu_attr attr, void *data)
1952 {
1953 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1954
1955 switch (domain->type) {
1956 case IOMMU_DOMAIN_UNMANAGED:
1957 switch (attr) {
1958 case DOMAIN_ATTR_NESTING:
1959 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1960 return 0;
1961 default:
1962 return -ENODEV;
1963 }
1964 break;
1965 case IOMMU_DOMAIN_DMA:
1966 switch (attr) {
1967 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1968 *(int *)data = smmu_domain->non_strict;
1969 return 0;
1970 default:
1971 return -ENODEV;
1972 }
1973 break;
1974 default:
1975 return -EINVAL;
1976 }
1977 }
1978
1979 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1980 enum iommu_attr attr, void *data)
1981 {
1982 int ret = 0;
1983 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1984
1985 mutex_lock(&smmu_domain->init_mutex);
1986
1987 switch (domain->type) {
1988 case IOMMU_DOMAIN_UNMANAGED:
1989 switch (attr) {
1990 case DOMAIN_ATTR_NESTING:
1991 if (smmu_domain->smmu) {
1992 ret = -EPERM;
1993 goto out_unlock;
1994 }
1995
1996 if (*(int *)data)
1997 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1998 else
1999 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2000 break;
2001 default:
2002 ret = -ENODEV;
2003 }
2004 break;
2005 case IOMMU_DOMAIN_DMA:
2006 switch(attr) {
2007 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2008 smmu_domain->non_strict = *(int *)data;
2009 break;
2010 default:
2011 ret = -ENODEV;
2012 }
2013 break;
2014 default:
2015 ret = -EINVAL;
2016 }
2017
2018 out_unlock:
2019 mutex_unlock(&smmu_domain->init_mutex);
2020 return ret;
2021 }
2022
2023 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2024 {
2025 return iommu_fwspec_add_ids(dev, args->args, 1);
2026 }
2027
2028 static void arm_smmu_get_resv_regions(struct device *dev,
2029 struct list_head *head)
2030 {
2031 struct iommu_resv_region *region;
2032 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2033
2034 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2035 prot, IOMMU_RESV_SW_MSI);
2036 if (!region)
2037 return;
2038
2039 list_add_tail(&region->list, head);
2040
2041 iommu_dma_get_resv_regions(dev, head);
2042 }
2043
2044 static void arm_smmu_put_resv_regions(struct device *dev,
2045 struct list_head *head)
2046 {
2047 struct iommu_resv_region *entry, *next;
2048
2049 list_for_each_entry_safe(entry, next, head, list)
2050 kfree(entry);
2051 }
2052
2053 static struct iommu_ops arm_smmu_ops = {
2054 .capable = arm_smmu_capable,
2055 .domain_alloc = arm_smmu_domain_alloc,
2056 .domain_free = arm_smmu_domain_free,
2057 .attach_dev = arm_smmu_attach_dev,
2058 .map = arm_smmu_map,
2059 .unmap = arm_smmu_unmap,
2060 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2061 .iotlb_sync = arm_smmu_iotlb_sync,
2062 .iova_to_phys = arm_smmu_iova_to_phys,
2063 .add_device = arm_smmu_add_device,
2064 .remove_device = arm_smmu_remove_device,
2065 .device_group = arm_smmu_device_group,
2066 .domain_get_attr = arm_smmu_domain_get_attr,
2067 .domain_set_attr = arm_smmu_domain_set_attr,
2068 .of_xlate = arm_smmu_of_xlate,
2069 .get_resv_regions = arm_smmu_get_resv_regions,
2070 .put_resv_regions = arm_smmu_put_resv_regions,
2071 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2072 };
2073
2074 /* Probing and initialisation functions */
2075 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2076 struct arm_smmu_queue *q,
2077 unsigned long prod_off,
2078 unsigned long cons_off,
2079 size_t dwords)
2080 {
2081 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2082
2083 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2084 if (!q->base) {
2085 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2086 qsz);
2087 return -ENOMEM;
2088 }
2089
2090 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2091 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2092 q->ent_dwords = dwords;
2093
2094 q->q_base = Q_BASE_RWA;
2095 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2096 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2097
2098 q->prod = q->cons = 0;
2099 return 0;
2100 }
2101
2102 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2103 {
2104 int ret;
2105
2106 /* cmdq */
2107 spin_lock_init(&smmu->cmdq.lock);
2108 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2109 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2110 if (ret)
2111 return ret;
2112
2113 /* evtq */
2114 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2115 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2116 if (ret)
2117 return ret;
2118
2119 /* priq */
2120 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2121 return 0;
2122
2123 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2124 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2125 }
2126
2127 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2128 {
2129 unsigned int i;
2130 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2131 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2132 void *strtab = smmu->strtab_cfg.strtab;
2133
2134 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2135 if (!cfg->l1_desc) {
2136 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2137 return -ENOMEM;
2138 }
2139
2140 for (i = 0; i < cfg->num_l1_ents; ++i) {
2141 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2142 strtab += STRTAB_L1_DESC_DWORDS << 3;
2143 }
2144
2145 return 0;
2146 }
2147
2148 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2149 {
2150 void *strtab;
2151 u64 reg;
2152 u32 size, l1size;
2153 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2154
2155 /* Calculate the L1 size, capped to the SIDSIZE. */
2156 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2157 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2158 cfg->num_l1_ents = 1 << size;
2159
2160 size += STRTAB_SPLIT;
2161 if (size < smmu->sid_bits)
2162 dev_warn(smmu->dev,
2163 "2-level strtab only covers %u/%u bits of SID\n",
2164 size, smmu->sid_bits);
2165
2166 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2167 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2168 GFP_KERNEL | __GFP_ZERO);
2169 if (!strtab) {
2170 dev_err(smmu->dev,
2171 "failed to allocate l1 stream table (%u bytes)\n",
2172 size);
2173 return -ENOMEM;
2174 }
2175 cfg->strtab = strtab;
2176
2177 /* Configure strtab_base_cfg for 2 levels */
2178 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2179 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2180 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2181 cfg->strtab_base_cfg = reg;
2182
2183 return arm_smmu_init_l1_strtab(smmu);
2184 }
2185
2186 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2187 {
2188 void *strtab;
2189 u64 reg;
2190 u32 size;
2191 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2192
2193 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2194 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2195 GFP_KERNEL | __GFP_ZERO);
2196 if (!strtab) {
2197 dev_err(smmu->dev,
2198 "failed to allocate linear stream table (%u bytes)\n",
2199 size);
2200 return -ENOMEM;
2201 }
2202 cfg->strtab = strtab;
2203 cfg->num_l1_ents = 1 << smmu->sid_bits;
2204
2205 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2206 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2207 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2208 cfg->strtab_base_cfg = reg;
2209
2210 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2211 return 0;
2212 }
2213
2214 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2215 {
2216 u64 reg;
2217 int ret;
2218
2219 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2220 ret = arm_smmu_init_strtab_2lvl(smmu);
2221 else
2222 ret = arm_smmu_init_strtab_linear(smmu);
2223
2224 if (ret)
2225 return ret;
2226
2227 /* Set the strtab base address */
2228 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2229 reg |= STRTAB_BASE_RA;
2230 smmu->strtab_cfg.strtab_base = reg;
2231
2232 /* Allocate the first VMID for stage-2 bypass STEs */
2233 set_bit(0, smmu->vmid_map);
2234 return 0;
2235 }
2236
2237 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2238 {
2239 int ret;
2240
2241 ret = arm_smmu_init_queues(smmu);
2242 if (ret)
2243 return ret;
2244
2245 return arm_smmu_init_strtab(smmu);
2246 }
2247
2248 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2249 unsigned int reg_off, unsigned int ack_off)
2250 {
2251 u32 reg;
2252
2253 writel_relaxed(val, smmu->base + reg_off);
2254 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2255 1, ARM_SMMU_POLL_TIMEOUT_US);
2256 }
2257
2258 /* GBPA is "special" */
2259 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2260 {
2261 int ret;
2262 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2263
2264 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2265 1, ARM_SMMU_POLL_TIMEOUT_US);
2266 if (ret)
2267 return ret;
2268
2269 reg &= ~clr;
2270 reg |= set;
2271 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2272 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2273 1, ARM_SMMU_POLL_TIMEOUT_US);
2274
2275 if (ret)
2276 dev_err(smmu->dev, "GBPA not responding to update\n");
2277 return ret;
2278 }
2279
2280 static void arm_smmu_free_msis(void *data)
2281 {
2282 struct device *dev = data;
2283 platform_msi_domain_free_irqs(dev);
2284 }
2285
2286 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2287 {
2288 phys_addr_t doorbell;
2289 struct device *dev = msi_desc_to_dev(desc);
2290 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2291 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2292
2293 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2294 doorbell &= MSI_CFG0_ADDR_MASK;
2295
2296 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2297 writel_relaxed(msg->data, smmu->base + cfg[1]);
2298 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2299 }
2300
2301 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2302 {
2303 struct msi_desc *desc;
2304 int ret, nvec = ARM_SMMU_MAX_MSIS;
2305 struct device *dev = smmu->dev;
2306
2307 /* Clear the MSI address regs */
2308 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2309 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2310
2311 if (smmu->features & ARM_SMMU_FEAT_PRI)
2312 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2313 else
2314 nvec--;
2315
2316 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2317 return;
2318
2319 if (!dev->msi_domain) {
2320 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2321 return;
2322 }
2323
2324 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2325 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2326 if (ret) {
2327 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2328 return;
2329 }
2330
2331 for_each_msi_entry(desc, dev) {
2332 switch (desc->platform.msi_index) {
2333 case EVTQ_MSI_INDEX:
2334 smmu->evtq.q.irq = desc->irq;
2335 break;
2336 case GERROR_MSI_INDEX:
2337 smmu->gerr_irq = desc->irq;
2338 break;
2339 case PRIQ_MSI_INDEX:
2340 smmu->priq.q.irq = desc->irq;
2341 break;
2342 default: /* Unknown */
2343 continue;
2344 }
2345 }
2346
2347 /* Add callback to free MSIs on teardown */
2348 devm_add_action(dev, arm_smmu_free_msis, dev);
2349 }
2350
2351 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2352 {
2353 int irq, ret;
2354
2355 arm_smmu_setup_msis(smmu);
2356
2357 /* Request interrupt lines */
2358 irq = smmu->evtq.q.irq;
2359 if (irq) {
2360 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2361 arm_smmu_evtq_thread,
2362 IRQF_ONESHOT,
2363 "arm-smmu-v3-evtq", smmu);
2364 if (ret < 0)
2365 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2366 } else {
2367 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2368 }
2369
2370 irq = smmu->gerr_irq;
2371 if (irq) {
2372 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2373 0, "arm-smmu-v3-gerror", smmu);
2374 if (ret < 0)
2375 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2376 } else {
2377 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2378 }
2379
2380 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2381 irq = smmu->priq.q.irq;
2382 if (irq) {
2383 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2384 arm_smmu_priq_thread,
2385 IRQF_ONESHOT,
2386 "arm-smmu-v3-priq",
2387 smmu);
2388 if (ret < 0)
2389 dev_warn(smmu->dev,
2390 "failed to enable priq irq\n");
2391 } else {
2392 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2393 }
2394 }
2395 }
2396
2397 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2398 {
2399 int ret, irq;
2400 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2401
2402 /* Disable IRQs first */
2403 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2404 ARM_SMMU_IRQ_CTRLACK);
2405 if (ret) {
2406 dev_err(smmu->dev, "failed to disable irqs\n");
2407 return ret;
2408 }
2409
2410 irq = smmu->combined_irq;
2411 if (irq) {
2412 /*
2413 * Cavium ThunderX2 implementation doesn't support unique irq
2414 * lines. Use a single irq line for all the SMMUv3 interrupts.
2415 */
2416 ret = devm_request_threaded_irq(smmu->dev, irq,
2417 arm_smmu_combined_irq_handler,
2418 arm_smmu_combined_irq_thread,
2419 IRQF_ONESHOT,
2420 "arm-smmu-v3-combined-irq", smmu);
2421 if (ret < 0)
2422 dev_warn(smmu->dev, "failed to enable combined irq\n");
2423 } else
2424 arm_smmu_setup_unique_irqs(smmu);
2425
2426 if (smmu->features & ARM_SMMU_FEAT_PRI)
2427 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2428
2429 /* Enable interrupt generation on the SMMU */
2430 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2431 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2432 if (ret)
2433 dev_warn(smmu->dev, "failed to enable irqs\n");
2434
2435 return 0;
2436 }
2437
2438 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2439 {
2440 int ret;
2441
2442 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2443 if (ret)
2444 dev_err(smmu->dev, "failed to clear cr0\n");
2445
2446 return ret;
2447 }
2448
2449 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2450 {
2451 int ret;
2452 u32 reg, enables;
2453 struct arm_smmu_cmdq_ent cmd;
2454
2455 /* Clear CR0 and sync (disables SMMU and queue processing) */
2456 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2457 if (reg & CR0_SMMUEN) {
2458 if (is_kdump_kernel()) {
2459 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2460 arm_smmu_device_disable(smmu);
2461 return -EBUSY;
2462 }
2463
2464 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2465 }
2466
2467 ret = arm_smmu_device_disable(smmu);
2468 if (ret)
2469 return ret;
2470
2471 /* CR1 (table and queue memory attributes) */
2472 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2473 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2474 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2475 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2476 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2477 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2478 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2479
2480 /* CR2 (random crap) */
2481 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2482 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2483
2484 /* Stream table */
2485 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2486 smmu->base + ARM_SMMU_STRTAB_BASE);
2487 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2488 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2489
2490 /* Command queue */
2491 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2492 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2493 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2494
2495 enables = CR0_CMDQEN;
2496 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2497 ARM_SMMU_CR0ACK);
2498 if (ret) {
2499 dev_err(smmu->dev, "failed to enable command queue\n");
2500 return ret;
2501 }
2502
2503 /* Invalidate any cached configuration */
2504 cmd.opcode = CMDQ_OP_CFGI_ALL;
2505 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2506 arm_smmu_cmdq_issue_sync(smmu);
2507
2508 /* Invalidate any stale TLB entries */
2509 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2510 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2511 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2512 }
2513
2514 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2515 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2516 arm_smmu_cmdq_issue_sync(smmu);
2517
2518 /* Event queue */
2519 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2520 writel_relaxed(smmu->evtq.q.prod,
2521 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2522 writel_relaxed(smmu->evtq.q.cons,
2523 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2524
2525 enables |= CR0_EVTQEN;
2526 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2527 ARM_SMMU_CR0ACK);
2528 if (ret) {
2529 dev_err(smmu->dev, "failed to enable event queue\n");
2530 return ret;
2531 }
2532
2533 /* PRI queue */
2534 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2535 writeq_relaxed(smmu->priq.q.q_base,
2536 smmu->base + ARM_SMMU_PRIQ_BASE);
2537 writel_relaxed(smmu->priq.q.prod,
2538 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2539 writel_relaxed(smmu->priq.q.cons,
2540 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2541
2542 enables |= CR0_PRIQEN;
2543 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2544 ARM_SMMU_CR0ACK);
2545 if (ret) {
2546 dev_err(smmu->dev, "failed to enable PRI queue\n");
2547 return ret;
2548 }
2549 }
2550
2551 ret = arm_smmu_setup_irqs(smmu);
2552 if (ret) {
2553 dev_err(smmu->dev, "failed to setup irqs\n");
2554 return ret;
2555 }
2556
2557
2558 /* Enable the SMMU interface, or ensure bypass */
2559 if (!bypass || disable_bypass) {
2560 enables |= CR0_SMMUEN;
2561 } else {
2562 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2563 if (ret)
2564 return ret;
2565 }
2566 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2567 ARM_SMMU_CR0ACK);
2568 if (ret) {
2569 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2570 return ret;
2571 }
2572
2573 return 0;
2574 }
2575
2576 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2577 {
2578 u32 reg;
2579 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2580
2581 /* IDR0 */
2582 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2583
2584 /* 2-level structures */
2585 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2586 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2587
2588 if (reg & IDR0_CD2L)
2589 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2590
2591 /*
2592 * Translation table endianness.
2593 * We currently require the same endianness as the CPU, but this
2594 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2595 */
2596 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2597 case IDR0_TTENDIAN_MIXED:
2598 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2599 break;
2600 #ifdef __BIG_ENDIAN
2601 case IDR0_TTENDIAN_BE:
2602 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2603 break;
2604 #else
2605 case IDR0_TTENDIAN_LE:
2606 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2607 break;
2608 #endif
2609 default:
2610 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2611 return -ENXIO;
2612 }
2613
2614 /* Boolean feature flags */
2615 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2616 smmu->features |= ARM_SMMU_FEAT_PRI;
2617
2618 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2619 smmu->features |= ARM_SMMU_FEAT_ATS;
2620
2621 if (reg & IDR0_SEV)
2622 smmu->features |= ARM_SMMU_FEAT_SEV;
2623
2624 if (reg & IDR0_MSI)
2625 smmu->features |= ARM_SMMU_FEAT_MSI;
2626
2627 if (reg & IDR0_HYP)
2628 smmu->features |= ARM_SMMU_FEAT_HYP;
2629
2630 /*
2631 * The coherency feature as set by FW is used in preference to the ID
2632 * register, but warn on mismatch.
2633 */
2634 if (!!(reg & IDR0_COHACC) != coherent)
2635 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2636 coherent ? "true" : "false");
2637
2638 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2639 case IDR0_STALL_MODEL_FORCE:
2640 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2641 /* Fallthrough */
2642 case IDR0_STALL_MODEL_STALL:
2643 smmu->features |= ARM_SMMU_FEAT_STALLS;
2644 }
2645
2646 if (reg & IDR0_S1P)
2647 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2648
2649 if (reg & IDR0_S2P)
2650 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2651
2652 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2653 dev_err(smmu->dev, "no translation support!\n");
2654 return -ENXIO;
2655 }
2656
2657 /* We only support the AArch64 table format at present */
2658 switch (FIELD_GET(IDR0_TTF, reg)) {
2659 case IDR0_TTF_AARCH32_64:
2660 smmu->ias = 40;
2661 /* Fallthrough */
2662 case IDR0_TTF_AARCH64:
2663 break;
2664 default:
2665 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2666 return -ENXIO;
2667 }
2668
2669 /* ASID/VMID sizes */
2670 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2671 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2672
2673 /* IDR1 */
2674 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2675 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2676 dev_err(smmu->dev, "embedded implementation not supported\n");
2677 return -ENXIO;
2678 }
2679
2680 /* Queue sizes, capped at 4k */
2681 smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2682 FIELD_GET(IDR1_CMDQS, reg));
2683 if (!smmu->cmdq.q.max_n_shift) {
2684 /* Odd alignment restrictions on the base, so ignore for now */
2685 dev_err(smmu->dev, "unit-length command queue not supported\n");
2686 return -ENXIO;
2687 }
2688
2689 smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2690 FIELD_GET(IDR1_EVTQS, reg));
2691 smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2692 FIELD_GET(IDR1_PRIQS, reg));
2693
2694 /* SID/SSID sizes */
2695 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2696 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2697
2698 /*
2699 * If the SMMU supports fewer bits than would fill a single L2 stream
2700 * table, use a linear table instead.
2701 */
2702 if (smmu->sid_bits <= STRTAB_SPLIT)
2703 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2704
2705 /* IDR5 */
2706 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2707
2708 /* Maximum number of outstanding stalls */
2709 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2710
2711 /* Page sizes */
2712 if (reg & IDR5_GRAN64K)
2713 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2714 if (reg & IDR5_GRAN16K)
2715 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2716 if (reg & IDR5_GRAN4K)
2717 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2718
2719 /* Input address size */
2720 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2721 smmu->features |= ARM_SMMU_FEAT_VAX;
2722
2723 /* Output address size */
2724 switch (FIELD_GET(IDR5_OAS, reg)) {
2725 case IDR5_OAS_32_BIT:
2726 smmu->oas = 32;
2727 break;
2728 case IDR5_OAS_36_BIT:
2729 smmu->oas = 36;
2730 break;
2731 case IDR5_OAS_40_BIT:
2732 smmu->oas = 40;
2733 break;
2734 case IDR5_OAS_42_BIT:
2735 smmu->oas = 42;
2736 break;
2737 case IDR5_OAS_44_BIT:
2738 smmu->oas = 44;
2739 break;
2740 case IDR5_OAS_52_BIT:
2741 smmu->oas = 52;
2742 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2743 break;
2744 default:
2745 dev_info(smmu->dev,
2746 "unknown output address size. Truncating to 48-bit\n");
2747 /* Fallthrough */
2748 case IDR5_OAS_48_BIT:
2749 smmu->oas = 48;
2750 }
2751
2752 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2753 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2754 else
2755 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2756
2757 /* Set the DMA mask for our table walker */
2758 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2759 dev_warn(smmu->dev,
2760 "failed to set DMA mask for table walker\n");
2761
2762 smmu->ias = max(smmu->ias, smmu->oas);
2763
2764 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2765 smmu->ias, smmu->oas, smmu->features);
2766 return 0;
2767 }
2768
2769 #ifdef CONFIG_ACPI
2770 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2771 {
2772 switch (model) {
2773 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2774 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2775 break;
2776 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2777 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2778 break;
2779 }
2780
2781 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2782 }
2783
2784 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2785 struct arm_smmu_device *smmu)
2786 {
2787 struct acpi_iort_smmu_v3 *iort_smmu;
2788 struct device *dev = smmu->dev;
2789 struct acpi_iort_node *node;
2790
2791 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2792
2793 /* Retrieve SMMUv3 specific data */
2794 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2795
2796 acpi_smmu_get_options(iort_smmu->model, smmu);
2797
2798 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2799 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2800
2801 return 0;
2802 }
2803 #else
2804 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2805 struct arm_smmu_device *smmu)
2806 {
2807 return -ENODEV;
2808 }
2809 #endif
2810
2811 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2812 struct arm_smmu_device *smmu)
2813 {
2814 struct device *dev = &pdev->dev;
2815 u32 cells;
2816 int ret = -EINVAL;
2817
2818 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2819 dev_err(dev, "missing #iommu-cells property\n");
2820 else if (cells != 1)
2821 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2822 else
2823 ret = 0;
2824
2825 parse_driver_options(smmu);
2826
2827 if (of_dma_is_coherent(dev->of_node))
2828 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2829
2830 return ret;
2831 }
2832
2833 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2834 {
2835 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2836 return SZ_64K;
2837 else
2838 return SZ_128K;
2839 }
2840
2841 static int arm_smmu_device_probe(struct platform_device *pdev)
2842 {
2843 int irq, ret;
2844 struct resource *res;
2845 resource_size_t ioaddr;
2846 struct arm_smmu_device *smmu;
2847 struct device *dev = &pdev->dev;
2848 bool bypass;
2849
2850 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2851 if (!smmu) {
2852 dev_err(dev, "failed to allocate arm_smmu_device\n");
2853 return -ENOMEM;
2854 }
2855 smmu->dev = dev;
2856
2857 if (dev->of_node) {
2858 ret = arm_smmu_device_dt_probe(pdev, smmu);
2859 } else {
2860 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2861 if (ret == -ENODEV)
2862 return ret;
2863 }
2864
2865 /* Set bypass mode according to firmware probing result */
2866 bypass = !!ret;
2867
2868 /* Base address */
2869 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2870 if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2871 dev_err(dev, "MMIO region too small (%pr)\n", res);
2872 return -EINVAL;
2873 }
2874 ioaddr = res->start;
2875
2876 smmu->base = devm_ioremap_resource(dev, res);
2877 if (IS_ERR(smmu->base))
2878 return PTR_ERR(smmu->base);
2879
2880 /* Interrupt lines */
2881
2882 irq = platform_get_irq_byname(pdev, "combined");
2883 if (irq > 0)
2884 smmu->combined_irq = irq;
2885 else {
2886 irq = platform_get_irq_byname(pdev, "eventq");
2887 if (irq > 0)
2888 smmu->evtq.q.irq = irq;
2889
2890 irq = platform_get_irq_byname(pdev, "priq");
2891 if (irq > 0)
2892 smmu->priq.q.irq = irq;
2893
2894 irq = platform_get_irq_byname(pdev, "gerror");
2895 if (irq > 0)
2896 smmu->gerr_irq = irq;
2897 }
2898 /* Probe the h/w */
2899 ret = arm_smmu_device_hw_probe(smmu);
2900 if (ret)
2901 return ret;
2902
2903 /* Initialise in-memory data structures */
2904 ret = arm_smmu_init_structures(smmu);
2905 if (ret)
2906 return ret;
2907
2908 /* Record our private device structure */
2909 platform_set_drvdata(pdev, smmu);
2910
2911 /* Reset the device */
2912 ret = arm_smmu_device_reset(smmu, bypass);
2913 if (ret)
2914 return ret;
2915
2916 /* And we're up. Go go go! */
2917 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2918 "smmu3.%pa", &ioaddr);
2919 if (ret)
2920 return ret;
2921
2922 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2923 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2924
2925 ret = iommu_device_register(&smmu->iommu);
2926 if (ret) {
2927 dev_err(dev, "Failed to register iommu\n");
2928 return ret;
2929 }
2930
2931 #ifdef CONFIG_PCI
2932 if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2933 pci_request_acs();
2934 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2935 if (ret)
2936 return ret;
2937 }
2938 #endif
2939 #ifdef CONFIG_ARM_AMBA
2940 if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2941 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2942 if (ret)
2943 return ret;
2944 }
2945 #endif
2946 if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2947 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2948 if (ret)
2949 return ret;
2950 }
2951 return 0;
2952 }
2953
2954 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2955 {
2956 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2957
2958 arm_smmu_device_disable(smmu);
2959 }
2960
2961 static const struct of_device_id arm_smmu_of_match[] = {
2962 { .compatible = "arm,smmu-v3", },
2963 { },
2964 };
2965
2966 static struct platform_driver arm_smmu_driver = {
2967 .driver = {
2968 .name = "arm-smmu-v3",
2969 .of_match_table = of_match_ptr(arm_smmu_of_match),
2970 .suppress_bind_attrs = true,
2971 },
2972 .probe = arm_smmu_device_probe,
2973 .shutdown = arm_smmu_device_shutdown,
2974 };
2975 builtin_platform_driver(arm_smmu_driver);