]>
Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
1da177e4 | 2 | /* |
1da177e4 LT |
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation |
4 | * | |
bc97ce95 | 5 | * Rewrite, cleanup: |
1da177e4 | 6 | * |
91f14480 | 7 | * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation |
bc97ce95 | 8 | * Copyright (C) 2006 Olof Johansson <olof@lixom.net> |
1da177e4 LT |
9 | * |
10 | * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. | |
1da177e4 LT |
11 | */ |
12 | ||
1da177e4 LT |
13 | #include <linux/init.h> |
14 | #include <linux/types.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/mm.h> | |
beacc6da | 17 | #include <linux/memblock.h> |
1da177e4 LT |
18 | #include <linux/spinlock.h> |
19 | #include <linux/string.h> | |
20 | #include <linux/pci.h> | |
21 | #include <linux/dma-mapping.h> | |
62a8bd6c | 22 | #include <linux/crash_dump.h> |
4e8b0cf4 | 23 | #include <linux/memory.h> |
1cf3d8b3 | 24 | #include <linux/of.h> |
ac9a5889 | 25 | #include <linux/iommu.h> |
0eaf4def | 26 | #include <linux/rculist.h> |
1da177e4 LT |
27 | #include <asm/io.h> |
28 | #include <asm/prom.h> | |
29 | #include <asm/rtas.h> | |
1da177e4 LT |
30 | #include <asm/iommu.h> |
31 | #include <asm/pci-bridge.h> | |
32 | #include <asm/machdep.h> | |
1ababe11 | 33 | #include <asm/firmware.h> |
c707ffcf | 34 | #include <asm/tce.h> |
d387899f | 35 | #include <asm/ppc-pci.h> |
2249ca9d | 36 | #include <asm/udbg.h> |
4e8b0cf4 | 37 | #include <asm/mmzone.h> |
212bebb4 | 38 | #include <asm/plpar_wrappers.h> |
a1218720 | 39 | |
38ae9ec4 | 40 | #include "pseries.h" |
1da177e4 | 41 | |
cac3e629 LB |
42 | enum { |
43 | DDW_QUERY_PE_DMA_WIN = 0, | |
44 | DDW_CREATE_PE_DMA_WIN = 1, | |
45 | DDW_REMOVE_PE_DMA_WIN = 2, | |
46 | ||
47 | DDW_APPLICABLE_SIZE | |
48 | }; | |
49 | ||
80f02512 LB |
50 | enum { |
51 | DDW_EXT_SIZE = 0, | |
52 | DDW_EXT_RESET_DMA_WIN = 1, | |
53 | DDW_EXT_QUERY_OUT_SIZE = 2 | |
54 | }; | |
55 | ||
4ff8677a | 56 | static struct iommu_table *iommu_pseries_alloc_table(int node) |
b348aa65 | 57 | { |
4dd9eab3 | 58 | struct iommu_table *tbl; |
b348aa65 | 59 | |
b348aa65 AK |
60 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); |
61 | if (!tbl) | |
4ff8677a | 62 | return NULL; |
b348aa65 | 63 | |
0eaf4def | 64 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); |
e5afdf9d | 65 | kref_init(&tbl->it_kref); |
4ff8677a LB |
66 | return tbl; |
67 | } | |
0eaf4def | 68 | |
4ff8677a LB |
69 | static struct iommu_table_group *iommu_pseries_alloc_group(int node) |
70 | { | |
71 | struct iommu_table_group *table_group; | |
72 | ||
73 | table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node); | |
74 | if (!table_group) | |
75 | return NULL; | |
b348aa65 | 76 | |
4ff8677a LB |
77 | table_group->tables[0] = iommu_pseries_alloc_table(node); |
78 | if (table_group->tables[0]) | |
79 | return table_group; | |
b348aa65 | 80 | |
4dd9eab3 | 81 | kfree(table_group); |
b348aa65 AK |
82 | return NULL; |
83 | } | |
84 | ||
85 | static void iommu_pseries_free_group(struct iommu_table_group *table_group, | |
ac9a5889 AK |
86 | const char *node_name) |
87 | { | |
b348aa65 AK |
88 | struct iommu_table *tbl; |
89 | ||
90 | if (!table_group) | |
91 | return; | |
92 | ||
0eaf4def | 93 | tbl = table_group->tables[0]; |
ac9a5889 | 94 | #ifdef CONFIG_IOMMU_API |
b348aa65 AK |
95 | if (table_group->group) { |
96 | iommu_group_put(table_group->group); | |
97 | BUG_ON(table_group->group); | |
ac9a5889 AK |
98 | } |
99 | #endif | |
e5afdf9d | 100 | iommu_tce_table_put(tbl); |
b348aa65 AK |
101 | |
102 | kfree(table_group); | |
ac9a5889 AK |
103 | } |
104 | ||
6490c490 | 105 | static int tce_build_pSeries(struct iommu_table *tbl, long index, |
bc97ce95 | 106 | long npages, unsigned long uaddr, |
4f3dd8a0 | 107 | enum dma_data_direction direction, |
00085f1e | 108 | unsigned long attrs) |
1da177e4 | 109 | { |
bc97ce95 | 110 | u64 proto_tce; |
c05f57fd | 111 | __be64 *tcep; |
bc97ce95 | 112 | u64 rpn; |
0c634baf LB |
113 | const unsigned long tceshift = tbl->it_page_shift; |
114 | const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl); | |
1da177e4 | 115 | |
bc97ce95 | 116 | proto_tce = TCE_PCI_READ; // Read allowed |
1da177e4 LT |
117 | |
118 | if (direction != DMA_TO_DEVICE) | |
bc97ce95 | 119 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 | 120 | |
c05f57fd | 121 | tcep = ((__be64 *)tbl->it_base) + index; |
1da177e4 LT |
122 | |
123 | while (npages--) { | |
95f72d1e | 124 | /* can't move this out since we might cross MEMBLOCK boundary */ |
0c634baf LB |
125 | rpn = __pa(uaddr) >> tceshift; |
126 | *tcep = cpu_to_be64(proto_tce | rpn << tceshift); | |
1da177e4 | 127 | |
0c634baf | 128 | uaddr += pagesize; |
bc97ce95 | 129 | tcep++; |
1da177e4 | 130 | } |
6490c490 | 131 | return 0; |
1da177e4 LT |
132 | } |
133 | ||
134 | ||
135 | static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) | |
136 | { | |
c05f57fd | 137 | __be64 *tcep; |
1da177e4 | 138 | |
c05f57fd | 139 | tcep = ((__be64 *)tbl->it_base) + index; |
bc97ce95 OJ |
140 | |
141 | while (npages--) | |
142 | *(tcep++) = 0; | |
1da177e4 LT |
143 | } |
144 | ||
5f50867b HM |
145 | static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) |
146 | { | |
df015604 | 147 | __be64 *tcep; |
5f50867b | 148 | |
df015604 | 149 | tcep = ((__be64 *)tbl->it_base) + index; |
5f50867b | 150 | |
df015604 | 151 | return be64_to_cpu(*tcep); |
5f50867b | 152 | } |
1da177e4 | 153 | |
0c634baf | 154 | static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); |
6490c490 RJ |
155 | static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); |
156 | ||
7559d3d2 | 157 | static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
1da177e4 | 158 | long npages, unsigned long uaddr, |
4f3dd8a0 | 159 | enum dma_data_direction direction, |
00085f1e | 160 | unsigned long attrs) |
1da177e4 | 161 | { |
6490c490 | 162 | u64 rc = 0; |
bc97ce95 OJ |
163 | u64 proto_tce, tce; |
164 | u64 rpn; | |
6490c490 RJ |
165 | int ret = 0; |
166 | long tcenum_start = tcenum, npages_start = npages; | |
1da177e4 | 167 | |
7559d3d2 | 168 | rpn = __pa(uaddr) >> tceshift; |
bc97ce95 | 169 | proto_tce = TCE_PCI_READ; |
1da177e4 | 170 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 171 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
172 | |
173 | while (npages--) { | |
0c634baf | 174 | tce = proto_tce | rpn << tceshift; |
7559d3d2 | 175 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); |
bc97ce95 | 176 | |
6490c490 RJ |
177 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
178 | ret = (int)rc; | |
0c634baf | 179 | tce_free_pSeriesLP(liobn, tcenum_start, tceshift, |
6490c490 RJ |
180 | (npages_start - (npages + 1))); |
181 | break; | |
182 | } | |
183 | ||
1da177e4 | 184 | if (rc && printk_ratelimit()) { |
fe333321 | 185 | printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
7559d3d2 | 186 | printk("\tindex = 0x%llx\n", (u64)liobn); |
fe333321 IM |
187 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); |
188 | printk("\ttce val = 0x%llx\n", tce ); | |
4ff52b4d | 189 | dump_stack(); |
1da177e4 | 190 | } |
bc97ce95 | 191 | |
1da177e4 | 192 | tcenum++; |
bc97ce95 | 193 | rpn++; |
1da177e4 | 194 | } |
6490c490 | 195 | return ret; |
1da177e4 LT |
196 | } |
197 | ||
df015604 | 198 | static DEFINE_PER_CPU(__be64 *, tce_page); |
1da177e4 | 199 | |
6490c490 | 200 | static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
1da177e4 | 201 | long npages, unsigned long uaddr, |
4f3dd8a0 | 202 | enum dma_data_direction direction, |
00085f1e | 203 | unsigned long attrs) |
1da177e4 | 204 | { |
6490c490 | 205 | u64 rc = 0; |
bc97ce95 | 206 | u64 proto_tce; |
df015604 | 207 | __be64 *tcep; |
bc97ce95 | 208 | u64 rpn; |
1da177e4 | 209 | long l, limit; |
6490c490 RJ |
210 | long tcenum_start = tcenum, npages_start = npages; |
211 | int ret = 0; | |
c1703e85 | 212 | unsigned long flags; |
0c634baf | 213 | const unsigned long tceshift = tbl->it_page_shift; |
1da177e4 | 214 | |
17a0364c | 215 | if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
7559d3d2 | 216 | return tce_build_pSeriesLP(tbl->it_index, tcenum, |
0c634baf | 217 | tceshift, npages, uaddr, |
6490c490 | 218 | direction, attrs); |
541b2755 | 219 | } |
1da177e4 | 220 | |
c1703e85 AB |
221 | local_irq_save(flags); /* to protect tcep and the page behind it */ |
222 | ||
69111bac | 223 | tcep = __this_cpu_read(tce_page); |
1da177e4 LT |
224 | |
225 | /* This is safe to do since interrupts are off when we're called | |
226 | * from iommu_alloc{,_sg}() | |
227 | */ | |
228 | if (!tcep) { | |
df015604 | 229 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
1da177e4 | 230 | /* If allocation fails, fall back to the loop implementation */ |
541b2755 | 231 | if (!tcep) { |
c1703e85 | 232 | local_irq_restore(flags); |
7559d3d2 | 233 | return tce_build_pSeriesLP(tbl->it_index, tcenum, |
0c634baf | 234 | tceshift, |
7559d3d2 | 235 | npages, uaddr, direction, attrs); |
541b2755 | 236 | } |
69111bac | 237 | __this_cpu_write(tce_page, tcep); |
1da177e4 LT |
238 | } |
239 | ||
0c634baf | 240 | rpn = __pa(uaddr) >> tceshift; |
bc97ce95 | 241 | proto_tce = TCE_PCI_READ; |
1da177e4 | 242 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 243 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
244 | |
245 | /* We can map max one pageful of TCEs at a time */ | |
246 | do { | |
247 | /* | |
248 | * Set up the page with TCE data, looping through and setting | |
249 | * the values. | |
250 | */ | |
bc97ce95 | 251 | limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); |
1da177e4 LT |
252 | |
253 | for (l = 0; l < limit; l++) { | |
0c634baf | 254 | tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); |
bc97ce95 | 255 | rpn++; |
1da177e4 LT |
256 | } |
257 | ||
258 | rc = plpar_tce_put_indirect((u64)tbl->it_index, | |
0c634baf | 259 | (u64)tcenum << tceshift, |
474e3d56 | 260 | (u64)__pa(tcep), |
1da177e4 LT |
261 | limit); |
262 | ||
263 | npages -= limit; | |
264 | tcenum += limit; | |
265 | } while (npages > 0 && !rc); | |
266 | ||
c1703e85 AB |
267 | local_irq_restore(flags); |
268 | ||
6490c490 RJ |
269 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
270 | ret = (int)rc; | |
271 | tce_freemulti_pSeriesLP(tbl, tcenum_start, | |
272 | (npages_start - (npages + limit))); | |
273 | return ret; | |
274 | } | |
275 | ||
1da177e4 | 276 | if (rc && printk_ratelimit()) { |
fe333321 IM |
277 | printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
278 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
279 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
280 | printk("\ttce[0] val = 0x%llx\n", tcep[0]); | |
4ff52b4d | 281 | dump_stack(); |
1da177e4 | 282 | } |
6490c490 | 283 | return ret; |
1da177e4 LT |
284 | } |
285 | ||
0c634baf LB |
286 | static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
287 | long npages) | |
1da177e4 LT |
288 | { |
289 | u64 rc; | |
1da177e4 | 290 | |
1da177e4 | 291 | while (npages--) { |
0c634baf | 292 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0); |
1da177e4 LT |
293 | |
294 | if (rc && printk_ratelimit()) { | |
fe333321 | 295 | printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
7559d3d2 | 296 | printk("\tindex = 0x%llx\n", (u64)liobn); |
fe333321 | 297 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); |
4ff52b4d | 298 | dump_stack(); |
1da177e4 LT |
299 | } |
300 | ||
301 | tcenum++; | |
302 | } | |
303 | } | |
304 | ||
305 | ||
306 | static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) | |
307 | { | |
308 | u64 rc; | |
1da177e4 | 309 | |
17a0364c | 310 | if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) |
0c634baf LB |
311 | return tce_free_pSeriesLP(tbl->it_index, tcenum, |
312 | tbl->it_page_shift, npages); | |
da004c36 | 313 | |
0c634baf LB |
314 | rc = plpar_tce_stuff((u64)tbl->it_index, |
315 | (u64)tcenum << tbl->it_page_shift, 0, npages); | |
1da177e4 LT |
316 | |
317 | if (rc && printk_ratelimit()) { | |
318 | printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); | |
fe333321 IM |
319 | printk("\trc = %lld\n", rc); |
320 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
321 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
4ff52b4d | 322 | dump_stack(); |
1da177e4 LT |
323 | } |
324 | } | |
325 | ||
5f50867b HM |
326 | static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) |
327 | { | |
328 | u64 rc; | |
329 | unsigned long tce_ret; | |
330 | ||
0c634baf LB |
331 | rc = plpar_tce_get((u64)tbl->it_index, |
332 | (u64)tcenum << tbl->it_page_shift, &tce_ret); | |
5f50867b HM |
333 | |
334 | if (rc && printk_ratelimit()) { | |
fe333321 IM |
335 | printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); |
336 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
337 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
4ff52b4d | 338 | dump_stack(); |
5f50867b HM |
339 | } |
340 | ||
341 | return tce_ret; | |
342 | } | |
343 | ||
25985edc | 344 | /* this is compatible with cells for the device tree property */ |
4e8b0cf4 NA |
345 | struct dynamic_dma_window_prop { |
346 | __be32 liobn; /* tce table number */ | |
347 | __be64 dma_base; /* address hi,lo */ | |
348 | __be32 tce_shift; /* ilog2(tce_page_size) */ | |
349 | __be32 window_shift; /* ilog2(tce_window_size) */ | |
350 | }; | |
351 | ||
57dbbe59 | 352 | struct dma_win { |
4e8b0cf4 NA |
353 | struct device_node *device; |
354 | const struct dynamic_dma_window_prop *prop; | |
355 | struct list_head list; | |
356 | }; | |
357 | ||
358 | /* Dynamic DMA Window support */ | |
359 | struct ddw_query_response { | |
9410e018 | 360 | u32 windows_available; |
80f02512 | 361 | u64 largest_available_block; |
9410e018 AK |
362 | u32 page_size; |
363 | u32 migration_capable; | |
4e8b0cf4 NA |
364 | }; |
365 | ||
366 | struct ddw_create_response { | |
9410e018 AK |
367 | u32 liobn; |
368 | u32 addr_hi; | |
369 | u32 addr_lo; | |
4e8b0cf4 NA |
370 | }; |
371 | ||
57dbbe59 | 372 | static LIST_HEAD(dma_win_list); |
4e8b0cf4 | 373 | /* prevents races between memory on/offline and window creation */ |
57dbbe59 | 374 | static DEFINE_SPINLOCK(dma_win_list_lock); |
4e8b0cf4 | 375 | /* protects initializing window twice for same device */ |
57dbbe59 | 376 | static DEFINE_MUTEX(dma_win_init_mutex); |
4e8b0cf4 | 377 | #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" |
381ceda8 | 378 | #define DMA64_PROPNAME "linux,dma64-ddr-window-info" |
4e8b0cf4 NA |
379 | |
380 | static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, | |
381 | unsigned long num_pfn, const void *arg) | |
382 | { | |
383 | const struct dynamic_dma_window_prop *maprange = arg; | |
384 | int rc; | |
385 | u64 tce_size, num_tce, dma_offset, next; | |
386 | u32 tce_shift; | |
387 | long limit; | |
388 | ||
389 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
390 | tce_size = 1ULL << tce_shift; | |
391 | next = start_pfn << PAGE_SHIFT; | |
392 | num_tce = num_pfn << PAGE_SHIFT; | |
393 | ||
394 | /* round back to the beginning of the tce page size */ | |
395 | num_tce += next & (tce_size - 1); | |
396 | next &= ~(tce_size - 1); | |
397 | ||
398 | /* covert to number of tces */ | |
399 | num_tce |= tce_size - 1; | |
400 | num_tce >>= tce_shift; | |
401 | ||
402 | do { | |
403 | /* | |
404 | * Set up the page with TCE data, looping through and setting | |
405 | * the values. | |
406 | */ | |
407 | limit = min_t(long, num_tce, 512); | |
408 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
409 | ||
410 | rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), | |
411 | dma_offset, | |
412 | 0, limit); | |
22b38298 | 413 | next += limit * tce_size; |
4e8b0cf4 NA |
414 | num_tce -= limit; |
415 | } while (num_tce > 0 && !rc); | |
416 | ||
417 | return rc; | |
418 | } | |
419 | ||
420 | static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, | |
421 | unsigned long num_pfn, const void *arg) | |
422 | { | |
423 | const struct dynamic_dma_window_prop *maprange = arg; | |
df015604 AB |
424 | u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; |
425 | __be64 *tcep; | |
4e8b0cf4 NA |
426 | u32 tce_shift; |
427 | u64 rc = 0; | |
428 | long l, limit; | |
429 | ||
17a0364c | 430 | if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
7559d3d2 AK |
431 | unsigned long tceshift = be32_to_cpu(maprange->tce_shift); |
432 | unsigned long dmastart = (start_pfn << PAGE_SHIFT) + | |
433 | be64_to_cpu(maprange->dma_base); | |
434 | unsigned long tcenum = dmastart >> tceshift; | |
435 | unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; | |
436 | void *uaddr = __va(start_pfn << PAGE_SHIFT); | |
437 | ||
438 | return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), | |
439 | tcenum, tceshift, npages, (unsigned long) uaddr, | |
440 | DMA_BIDIRECTIONAL, 0); | |
441 | } | |
442 | ||
4e8b0cf4 | 443 | local_irq_disable(); /* to protect tcep and the page behind it */ |
69111bac | 444 | tcep = __this_cpu_read(tce_page); |
4e8b0cf4 NA |
445 | |
446 | if (!tcep) { | |
df015604 | 447 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
4e8b0cf4 NA |
448 | if (!tcep) { |
449 | local_irq_enable(); | |
450 | return -ENOMEM; | |
451 | } | |
69111bac | 452 | __this_cpu_write(tce_page, tcep); |
4e8b0cf4 NA |
453 | } |
454 | ||
455 | proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; | |
456 | ||
457 | liobn = (u64)be32_to_cpu(maprange->liobn); | |
458 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
459 | tce_size = 1ULL << tce_shift; | |
460 | next = start_pfn << PAGE_SHIFT; | |
461 | num_tce = num_pfn << PAGE_SHIFT; | |
462 | ||
463 | /* round back to the beginning of the tce page size */ | |
464 | num_tce += next & (tce_size - 1); | |
465 | next &= ~(tce_size - 1); | |
466 | ||
467 | /* covert to number of tces */ | |
468 | num_tce |= tce_size - 1; | |
469 | num_tce >>= tce_shift; | |
470 | ||
471 | /* We can map max one pageful of TCEs at a time */ | |
472 | do { | |
473 | /* | |
474 | * Set up the page with TCE data, looping through and setting | |
475 | * the values. | |
476 | */ | |
477 | limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE); | |
478 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
479 | ||
480 | for (l = 0; l < limit; l++) { | |
df015604 | 481 | tcep[l] = cpu_to_be64(proto_tce | next); |
4e8b0cf4 NA |
482 | next += tce_size; |
483 | } | |
484 | ||
485 | rc = plpar_tce_put_indirect(liobn, | |
486 | dma_offset, | |
474e3d56 | 487 | (u64)__pa(tcep), |
4e8b0cf4 NA |
488 | limit); |
489 | ||
490 | num_tce -= limit; | |
491 | } while (num_tce > 0 && !rc); | |
492 | ||
493 | /* error cleanup: caller will clear whole range */ | |
494 | ||
495 | local_irq_enable(); | |
496 | return rc; | |
497 | } | |
498 | ||
499 | static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, | |
500 | unsigned long num_pfn, void *arg) | |
501 | { | |
502 | return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); | |
503 | } | |
504 | ||
fc8cba8f LB |
505 | static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno, |
506 | unsigned long liobn, unsigned long win_addr, | |
507 | unsigned long window_size, unsigned long page_shift, | |
508 | void *base, struct iommu_table_ops *table_ops) | |
509 | { | |
510 | tbl->it_busno = busno; | |
511 | tbl->it_index = liobn; | |
512 | tbl->it_offset = win_addr >> page_shift; | |
513 | tbl->it_size = window_size >> page_shift; | |
514 | tbl->it_page_shift = page_shift; | |
515 | tbl->it_base = (unsigned long)base; | |
516 | tbl->it_blocksize = 16; | |
517 | tbl->it_type = TCE_PCI; | |
518 | tbl->it_ops = table_ops; | |
519 | } | |
520 | ||
521 | struct iommu_table_ops iommu_table_pseries_ops; | |
522 | ||
1da177e4 LT |
523 | static void iommu_table_setparms(struct pci_controller *phb, |
524 | struct device_node *dn, | |
bc97ce95 | 525 | struct iommu_table *tbl) |
1da177e4 LT |
526 | { |
527 | struct device_node *node; | |
b7d6bf4f | 528 | const unsigned long *basep; |
9938c474 | 529 | const u32 *sizep; |
1da177e4 | 530 | |
fc8cba8f LB |
531 | /* Test if we are going over 2GB of DMA space */ |
532 | if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) { | |
533 | udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
534 | panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
535 | } | |
1da177e4 | 536 | |
fc8cba8f | 537 | node = phb->dn; |
e2eb6392 SR |
538 | basep = of_get_property(node, "linux,tce-base", NULL); |
539 | sizep = of_get_property(node, "linux,tce-size", NULL); | |
1da177e4 | 540 | if (basep == NULL || sizep == NULL) { |
b7c670d6 RH |
541 | printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has " |
542 | "missing tce entries !\n", dn); | |
1da177e4 LT |
543 | return; |
544 | } | |
545 | ||
fc8cba8f LB |
546 | iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur, |
547 | phb->dma_window_size, IOMMU_PAGE_SHIFT_4K, | |
548 | __va(*basep), &iommu_table_pseries_ops); | |
5f50867b | 549 | |
62a8bd6c | 550 | if (!is_kdump_kernel()) |
54622f10 | 551 | memset((void *)tbl->it_base, 0, *sizep); |
1da177e4 | 552 | |
1da177e4 | 553 | phb->dma_window_base_cur += phb->dma_window_size; |
1da177e4 LT |
554 | } |
555 | ||
fc8cba8f LB |
556 | struct iommu_table_ops iommu_table_lpar_multi_ops; |
557 | ||
1da177e4 LT |
558 | /* |
559 | * iommu_table_setparms_lpar | |
560 | * | |
561 | * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. | |
1da177e4 LT |
562 | */ |
563 | static void iommu_table_setparms_lpar(struct pci_controller *phb, | |
564 | struct device_node *dn, | |
565 | struct iommu_table *tbl, | |
b6e1f6ad | 566 | struct iommu_table_group *table_group, |
2083f681 | 567 | const __be32 *dma_window) |
1da177e4 | 568 | { |
fc8cba8f | 569 | unsigned long offset, size, liobn; |
4c76e0bc | 570 | |
fc8cba8f LB |
571 | of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); |
572 | ||
573 | iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, | |
574 | &iommu_table_lpar_multi_ops); | |
1da177e4 | 575 | |
b6e1f6ad AK |
576 | |
577 | table_group->tce32_start = offset; | |
578 | table_group->tce32_size = size; | |
1da177e4 LT |
579 | } |
580 | ||
da004c36 AK |
581 | struct iommu_table_ops iommu_table_pseries_ops = { |
582 | .set = tce_build_pSeries, | |
583 | .clear = tce_free_pSeries, | |
584 | .get = tce_get_pseries | |
585 | }; | |
586 | ||
12d04eef | 587 | static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) |
1da177e4 | 588 | { |
3c2822cc | 589 | struct device_node *dn; |
1da177e4 | 590 | struct iommu_table *tbl; |
3c2822cc OJ |
591 | struct device_node *isa_dn, *isa_dn_orig; |
592 | struct device_node *tmp; | |
593 | struct pci_dn *pci; | |
594 | int children; | |
1da177e4 | 595 | |
3c2822cc | 596 | dn = pci_bus_to_OF_node(bus); |
12d04eef | 597 | |
b7c670d6 | 598 | pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn); |
3c2822cc OJ |
599 | |
600 | if (bus->self) { | |
601 | /* This is not a root bus, any setup will be done for the | |
602 | * device-side of the bridge in iommu_dev_setup_pSeries(). | |
603 | */ | |
604 | return; | |
605 | } | |
12d04eef | 606 | pci = PCI_DN(dn); |
3c2822cc OJ |
607 | |
608 | /* Check if the ISA bus on the system is under | |
609 | * this PHB. | |
1da177e4 | 610 | */ |
3c2822cc | 611 | isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); |
1da177e4 | 612 | |
3c2822cc OJ |
613 | while (isa_dn && isa_dn != dn) |
614 | isa_dn = isa_dn->parent; | |
615 | ||
498b6514 | 616 | of_node_put(isa_dn_orig); |
1da177e4 | 617 | |
d3c58fb1 | 618 | /* Count number of direct PCI children of the PHB. */ |
3c2822cc | 619 | for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) |
d3c58fb1 | 620 | children++; |
1da177e4 | 621 | |
f7ebf352 | 622 | pr_debug("Children: %d\n", children); |
1da177e4 | 623 | |
3c2822cc OJ |
624 | /* Calculate amount of DMA window per slot. Each window must be |
625 | * a power of two (due to pci_alloc_consistent requirements). | |
626 | * | |
627 | * Keep 256MB aside for PHBs with ISA. | |
628 | */ | |
1da177e4 | 629 | |
3c2822cc OJ |
630 | if (!isa_dn) { |
631 | /* No ISA/IDE - just set window size and return */ | |
632 | pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ | |
633 | ||
634 | while (pci->phb->dma_window_size * children > 0x80000000ul) | |
635 | pci->phb->dma_window_size >>= 1; | |
41febbc8 | 636 | pr_debug("No ISA/IDE, window size is 0x%llx\n", |
f7ebf352 | 637 | pci->phb->dma_window_size); |
3c2822cc OJ |
638 | pci->phb->dma_window_base_cur = 0; |
639 | ||
640 | return; | |
1da177e4 | 641 | } |
3c2822cc OJ |
642 | |
643 | /* If we have ISA, then we probably have an IDE | |
644 | * controller too. Allocate a 128MB table but | |
645 | * skip the first 128MB to avoid stepping on ISA | |
646 | * space. | |
647 | */ | |
648 | pci->phb->dma_window_size = 0x8000000ul; | |
649 | pci->phb->dma_window_base_cur = 0x8000000ul; | |
650 | ||
b348aa65 AK |
651 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); |
652 | tbl = pci->table_group->tables[0]; | |
3c2822cc OJ |
653 | |
654 | iommu_table_setparms(pci->phb, dn, tbl); | |
fc8cba8f | 655 | |
4be518d8 AK |
656 | if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) |
657 | panic("Failed to initialize iommu table"); | |
3c2822cc OJ |
658 | |
659 | /* Divide the rest (1.75GB) among the children */ | |
660 | pci->phb->dma_window_size = 0x80000000ul; | |
661 | while (pci->phb->dma_window_size * children > 0x70000000ul) | |
662 | pci->phb->dma_window_size >>= 1; | |
663 | ||
41febbc8 | 664 | pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); |
1da177e4 LT |
665 | } |
666 | ||
b6e1f6ad AK |
667 | #ifdef CONFIG_IOMMU_API |
668 | static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned | |
021b7868 AK |
669 | long *tce, enum dma_data_direction *direction, |
670 | bool realmode) | |
b6e1f6ad AK |
671 | { |
672 | long rc; | |
673 | unsigned long ioba = (unsigned long) index << tbl->it_page_shift; | |
674 | unsigned long flags, oldtce = 0; | |
675 | u64 proto_tce = iommu_direction_to_tce_perm(*direction); | |
676 | unsigned long newtce = *tce | proto_tce; | |
677 | ||
678 | spin_lock_irqsave(&tbl->large_pool.lock, flags); | |
679 | ||
680 | rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); | |
681 | if (!rc) | |
682 | rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); | |
683 | ||
684 | if (!rc) { | |
685 | *direction = iommu_tce_direction(oldtce); | |
686 | *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); | |
687 | } | |
688 | ||
689 | spin_unlock_irqrestore(&tbl->large_pool.lock, flags); | |
690 | ||
691 | return rc; | |
692 | } | |
693 | #endif | |
694 | ||
da004c36 AK |
695 | struct iommu_table_ops iommu_table_lpar_multi_ops = { |
696 | .set = tce_buildmulti_pSeriesLP, | |
b6e1f6ad | 697 | #ifdef CONFIG_IOMMU_API |
021b7868 | 698 | .xchg_no_kill = tce_exchange_pseries, |
b6e1f6ad | 699 | #endif |
da004c36 AK |
700 | .clear = tce_freemulti_pSeriesLP, |
701 | .get = tce_get_pSeriesLP | |
702 | }; | |
1da177e4 | 703 | |
12d04eef | 704 | static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) |
1da177e4 LT |
705 | { |
706 | struct iommu_table *tbl; | |
707 | struct device_node *dn, *pdn; | |
1635317f | 708 | struct pci_dn *ppci; |
2083f681 | 709 | const __be32 *dma_window = NULL; |
1da177e4 | 710 | |
1da177e4 LT |
711 | dn = pci_bus_to_OF_node(bus); |
712 | ||
b7c670d6 RH |
713 | pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", |
714 | dn); | |
12d04eef | 715 | |
57dbbe59 LB |
716 | /* |
717 | * Find nearest ibm,dma-window (default DMA window), walking up the | |
718 | * device tree | |
719 | */ | |
1da177e4 | 720 | for (pdn = dn; pdn != NULL; pdn = pdn->parent) { |
e2eb6392 | 721 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); |
1da177e4 LT |
722 | if (dma_window != NULL) |
723 | break; | |
724 | } | |
725 | ||
726 | if (dma_window == NULL) { | |
f7ebf352 | 727 | pr_debug(" no ibm,dma-window property !\n"); |
1da177e4 LT |
728 | return; |
729 | } | |
730 | ||
e07102db | 731 | ppci = PCI_DN(pdn); |
12d04eef | 732 | |
b7c670d6 RH |
733 | pr_debug(" parent is %pOF, iommu_table: 0x%p\n", |
734 | pdn, ppci->table_group); | |
12d04eef | 735 | |
b348aa65 AK |
736 | if (!ppci->table_group) { |
737 | ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); | |
738 | tbl = ppci->table_group->tables[0]; | |
b6e1f6ad AK |
739 | iommu_table_setparms_lpar(ppci->phb, pdn, tbl, |
740 | ppci->table_group, dma_window); | |
fc8cba8f | 741 | |
4be518d8 AK |
742 | if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) |
743 | panic("Failed to initialize iommu table"); | |
b348aa65 AK |
744 | iommu_register_group(ppci->table_group, |
745 | pci_domain_nr(bus), 0); | |
746 | pr_debug(" created table: %p\n", ppci->table_group); | |
1da177e4 | 747 | } |
1da177e4 LT |
748 | } |
749 | ||
750 | ||
12d04eef | 751 | static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) |
1da177e4 | 752 | { |
12d04eef | 753 | struct device_node *dn; |
3c2822cc | 754 | struct iommu_table *tbl; |
1da177e4 | 755 | |
f7ebf352 | 756 | pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); |
1da177e4 | 757 | |
58f9b0b0 | 758 | dn = dev->dev.of_node; |
1da177e4 | 759 | |
3c2822cc OJ |
760 | /* If we're the direct child of a root bus, then we need to allocate |
761 | * an iommu table ourselves. The bus setup code should have setup | |
762 | * the window sizes already. | |
763 | */ | |
764 | if (!dev->bus->self) { | |
12d04eef BH |
765 | struct pci_controller *phb = PCI_DN(dn)->phb; |
766 | ||
f7ebf352 | 767 | pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); |
b348aa65 AK |
768 | PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); |
769 | tbl = PCI_DN(dn)->table_group->tables[0]; | |
12d04eef | 770 | iommu_table_setparms(phb, dn, tbl); |
fc8cba8f | 771 | |
4be518d8 AK |
772 | if (!iommu_init_table(tbl, phb->node, 0, 0)) |
773 | panic("Failed to initialize iommu table"); | |
774 | ||
4617082e | 775 | set_iommu_table_base(&dev->dev, tbl); |
3c2822cc OJ |
776 | return; |
777 | } | |
778 | ||
779 | /* If this device is further down the bus tree, search upwards until | |
780 | * an already allocated iommu table is found and use that. | |
781 | */ | |
782 | ||
b348aa65 | 783 | while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) |
1da177e4 LT |
784 | dn = dn->parent; |
785 | ||
c409c631 | 786 | if (dn && PCI_DN(dn)) |
b348aa65 AK |
787 | set_iommu_table_base(&dev->dev, |
788 | PCI_DN(dn)->table_group->tables[0]); | |
c409c631 | 789 | else |
12d04eef BH |
790 | printk(KERN_WARNING "iommu: Device %s has no iommu table\n", |
791 | pci_name(dev)); | |
1da177e4 LT |
792 | } |
793 | ||
4e8b0cf4 NA |
794 | static int __read_mostly disable_ddw; |
795 | ||
796 | static int __init disable_ddw_setup(char *str) | |
797 | { | |
798 | disable_ddw = 1; | |
799 | printk(KERN_INFO "ppc iommu: disabling ddw.\n"); | |
800 | ||
801 | return 0; | |
802 | } | |
803 | ||
804 | early_param("disable_ddw", disable_ddw_setup); | |
805 | ||
7ed2ed2d | 806 | static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp) |
4e8b0cf4 | 807 | { |
74d0b399 | 808 | int ret; |
9410e018 | 809 | |
4e8b0cf4 NA |
810 | ret = tce_clearrange_multi_pSeriesLP(0, |
811 | 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); | |
812 | if (ret) | |
f2c2cbcc JP |
813 | pr_warn("%pOF failed to clear tces in window.\n", |
814 | np); | |
4e8b0cf4 | 815 | else |
b7c670d6 RH |
816 | pr_debug("%pOF successfully cleared tces in window.\n", |
817 | np); | |
7ed2ed2d LB |
818 | } |
819 | ||
820 | /* | |
821 | * Call only if DMA window is clean. | |
822 | */ | |
823 | static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn) | |
824 | { | |
825 | int ret; | |
4e8b0cf4 | 826 | |
cac3e629 | 827 | ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); |
ae69e1ed | 828 | if (ret) |
7ed2ed2d | 829 | pr_warn("%pOF: failed to remove DMA window: rtas returned " |
ae69e1ed | 830 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
cac3e629 | 831 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
ae69e1ed | 832 | else |
7ed2ed2d | 833 | pr_debug("%pOF: successfully removed DMA window: rtas returned " |
ae69e1ed | 834 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
cac3e629 | 835 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
74d0b399 LB |
836 | } |
837 | ||
7ed2ed2d LB |
838 | static void remove_dma_window(struct device_node *np, u32 *ddw_avail, |
839 | struct property *win) | |
840 | { | |
841 | struct dynamic_dma_window_prop *dwp; | |
842 | u64 liobn; | |
843 | ||
844 | dwp = win->value; | |
845 | liobn = (u64)be32_to_cpu(dwp->liobn); | |
846 | ||
847 | clean_dma_window(np, dwp); | |
848 | __remove_dma_window(np, ddw_avail, liobn); | |
849 | } | |
850 | ||
a5fd9512 | 851 | static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name) |
74d0b399 LB |
852 | { |
853 | struct property *win; | |
854 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; | |
855 | int ret = 0; | |
856 | ||
a5fd9512 LB |
857 | win = of_find_property(np, win_name, NULL); |
858 | if (!win) | |
859 | return -EINVAL; | |
860 | ||
74d0b399 LB |
861 | ret = of_property_read_u32_array(np, "ibm,ddw-applicable", |
862 | &ddw_avail[0], DDW_APPLICABLE_SIZE); | |
863 | if (ret) | |
a5fd9512 | 864 | return 0; |
74d0b399 | 865 | |
74d0b399 LB |
866 | |
867 | if (win->length >= sizeof(struct dynamic_dma_window_prop)) | |
868 | remove_dma_window(np, ddw_avail, win); | |
869 | ||
870 | if (!remove_prop) | |
a5fd9512 | 871 | return 0; |
4e8b0cf4 | 872 | |
74d0b399 | 873 | ret = of_remove_property(np, win); |
2573f684 | 874 | if (ret) |
57dbbe59 | 875 | pr_warn("%pOF: failed to remove DMA window property: %d\n", |
b7c670d6 | 876 | np, ret); |
a5fd9512 | 877 | return 0; |
2573f684 | 878 | } |
4e8b0cf4 | 879 | |
2ca73c54 | 880 | static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift) |
4e8b0cf4 | 881 | { |
57dbbe59 LB |
882 | struct dma_win *window; |
883 | const struct dynamic_dma_window_prop *dma64; | |
2ca73c54 | 884 | bool found = false; |
4e8b0cf4 | 885 | |
57dbbe59 | 886 | spin_lock(&dma_win_list_lock); |
4e8b0cf4 | 887 | /* check if we already created a window and dupe that config if so */ |
57dbbe59 | 888 | list_for_each_entry(window, &dma_win_list, list) { |
4e8b0cf4 | 889 | if (window->device == pdn) { |
57dbbe59 LB |
890 | dma64 = window->prop; |
891 | *dma_addr = be64_to_cpu(dma64->dma_base); | |
892 | *window_shift = be32_to_cpu(dma64->window_shift); | |
2ca73c54 | 893 | found = true; |
4e8b0cf4 NA |
894 | break; |
895 | } | |
896 | } | |
57dbbe59 | 897 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 | 898 | |
2ca73c54 | 899 | return found; |
4e8b0cf4 NA |
900 | } |
901 | ||
57dbbe59 LB |
902 | static struct dma_win *ddw_list_new_entry(struct device_node *pdn, |
903 | const struct dynamic_dma_window_prop *dma64) | |
92a23219 | 904 | { |
57dbbe59 | 905 | struct dma_win *window; |
92a23219 LB |
906 | |
907 | window = kzalloc(sizeof(*window), GFP_KERNEL); | |
908 | if (!window) | |
909 | return NULL; | |
910 | ||
911 | window->device = pdn; | |
912 | window->prop = dma64; | |
913 | ||
914 | return window; | |
915 | } | |
916 | ||
8599395d | 917 | static void find_existing_ddw_windows_named(const char *name) |
4e8b0cf4 | 918 | { |
97e7dc52 | 919 | int len; |
c8566780 | 920 | struct device_node *pdn; |
57dbbe59 | 921 | struct dma_win *window; |
8599395d | 922 | const struct dynamic_dma_window_prop *dma64; |
c8566780 | 923 | |
8599395d LB |
924 | for_each_node_with_property(pdn, name) { |
925 | dma64 = of_get_property(pdn, name, &len); | |
926 | if (!dma64 || len < sizeof(*dma64)) { | |
927 | remove_ddw(pdn, true, name); | |
97e7dc52 NA |
928 | continue; |
929 | } | |
c8566780 | 930 | |
8599395d | 931 | window = ddw_list_new_entry(pdn, dma64); |
92a23219 LB |
932 | if (!window) |
933 | break; | |
934 | ||
57dbbe59 LB |
935 | spin_lock(&dma_win_list_lock); |
936 | list_add(&window->list, &dma_win_list); | |
937 | spin_unlock(&dma_win_list_lock); | |
4e8b0cf4 | 938 | } |
8599395d LB |
939 | } |
940 | ||
941 | static int find_existing_ddw_windows(void) | |
942 | { | |
943 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | |
944 | return 0; | |
945 | ||
946 | find_existing_ddw_windows_named(DIRECT64_PROPNAME); | |
381ceda8 | 947 | find_existing_ddw_windows_named(DMA64_PROPNAME); |
4e8b0cf4 | 948 | |
c8566780 | 949 | return 0; |
4e8b0cf4 | 950 | } |
c8566780 | 951 | machine_arch_initcall(pseries, find_existing_ddw_windows); |
4e8b0cf4 | 952 | |
80f02512 LB |
953 | /** |
954 | * ddw_read_ext - Get the value of an DDW extension | |
955 | * @np: device node from which the extension value is to be read. | |
956 | * @extnum: index number of the extension. | |
957 | * @value: pointer to return value, modified when extension is available. | |
958 | * | |
959 | * Checks if "ibm,ddw-extensions" exists for this node, and get the value | |
960 | * on index 'extnum'. | |
961 | * It can be used only to check if a property exists, passing value == NULL. | |
962 | * | |
963 | * Returns: | |
964 | * 0 if extension successfully read | |
965 | * -EINVAL if the "ibm,ddw-extensions" does not exist, | |
966 | * -ENODATA if "ibm,ddw-extensions" does not have a value, and | |
967 | * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension. | |
968 | */ | |
969 | static inline int ddw_read_ext(const struct device_node *np, int extnum, | |
970 | u32 *value) | |
971 | { | |
972 | static const char propname[] = "ibm,ddw-extensions"; | |
973 | u32 count; | |
974 | int ret; | |
975 | ||
976 | ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count); | |
977 | if (ret) | |
978 | return ret; | |
979 | ||
980 | if (count < extnum) | |
981 | return -EOVERFLOW; | |
982 | ||
983 | if (!value) | |
984 | value = &count; | |
985 | ||
986 | return of_property_read_u32_index(np, propname, extnum, value); | |
987 | } | |
988 | ||
b73a635f | 989 | static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
80f02512 LB |
990 | struct ddw_query_response *query, |
991 | struct device_node *parent) | |
4e8b0cf4 | 992 | { |
8445a87f GP |
993 | struct device_node *dn; |
994 | struct pci_dn *pdn; | |
80f02512 | 995 | u32 cfg_addr, ext_query, query_out[5]; |
4e8b0cf4 | 996 | u64 buid; |
80f02512 LB |
997 | int ret, out_sz; |
998 | ||
999 | /* | |
1000 | * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many | |
1001 | * output parameters ibm,query-pe-dma-windows will have, ranging from | |
1002 | * 5 to 6. | |
1003 | */ | |
1004 | ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query); | |
1005 | if (!ret && ext_query == 1) | |
1006 | out_sz = 6; | |
1007 | else | |
1008 | out_sz = 5; | |
4e8b0cf4 NA |
1009 | |
1010 | /* | |
1011 | * Get the config address and phb buid of the PE window. | |
1012 | * Rely on eeh to retrieve this for us. | |
1013 | * Retrieve them from the pci device, not the node with the | |
1014 | * dma-window property | |
1015 | */ | |
8445a87f GP |
1016 | dn = pci_device_to_OF_node(dev); |
1017 | pdn = PCI_DN(dn); | |
1018 | buid = pdn->phb->buid; | |
8a934efe | 1019 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
39baadbf | 1020 | |
80f02512 | 1021 | ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, |
cac3e629 | 1022 | cfg_addr, BUID_HI(buid), BUID_LO(buid)); |
80f02512 LB |
1023 | dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n", |
1024 | ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), | |
1025 | BUID_LO(buid), ret); | |
1026 | ||
1027 | switch (out_sz) { | |
1028 | case 5: | |
1029 | query->windows_available = query_out[0]; | |
1030 | query->largest_available_block = query_out[1]; | |
1031 | query->page_size = query_out[2]; | |
1032 | query->migration_capable = query_out[3]; | |
1033 | break; | |
1034 | case 6: | |
1035 | query->windows_available = query_out[0]; | |
1036 | query->largest_available_block = ((u64)query_out[1] << 32) | | |
1037 | query_out[2]; | |
1038 | query->page_size = query_out[3]; | |
1039 | query->migration_capable = query_out[4]; | |
1040 | break; | |
1041 | } | |
1042 | ||
4e8b0cf4 NA |
1043 | return ret; |
1044 | } | |
1045 | ||
b73a635f | 1046 | static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
4e8b0cf4 NA |
1047 | struct ddw_create_response *create, int page_shift, |
1048 | int window_shift) | |
1049 | { | |
8445a87f GP |
1050 | struct device_node *dn; |
1051 | struct pci_dn *pdn; | |
4e8b0cf4 NA |
1052 | u32 cfg_addr; |
1053 | u64 buid; | |
1054 | int ret; | |
1055 | ||
1056 | /* | |
1057 | * Get the config address and phb buid of the PE window. | |
1058 | * Rely on eeh to retrieve this for us. | |
1059 | * Retrieve them from the pci device, not the node with the | |
1060 | * dma-window property | |
1061 | */ | |
8445a87f GP |
1062 | dn = pci_device_to_OF_node(dev); |
1063 | pdn = PCI_DN(dn); | |
1064 | buid = pdn->phb->buid; | |
8a934efe | 1065 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
4e8b0cf4 NA |
1066 | |
1067 | do { | |
1068 | /* extra outputs are LIOBN and dma-addr (hi, lo) */ | |
cac3e629 LB |
1069 | ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4, |
1070 | (u32 *)create, cfg_addr, BUID_HI(buid), | |
1071 | BUID_LO(buid), page_shift, window_shift); | |
4e8b0cf4 NA |
1072 | } while (rtas_busy_delay(ret)); |
1073 | dev_info(&dev->dev, | |
1074 | "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " | |
cac3e629 LB |
1075 | "(liobn = 0x%x starting addr = %x %x)\n", |
1076 | ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid), | |
1077 | BUID_LO(buid), page_shift, window_shift, ret, create->liobn, | |
1078 | create->addr_hi, create->addr_lo); | |
4e8b0cf4 NA |
1079 | |
1080 | return ret; | |
1081 | } | |
1082 | ||
61435690 NA |
1083 | struct failed_ddw_pdn { |
1084 | struct device_node *pdn; | |
1085 | struct list_head list; | |
1086 | }; | |
1087 | ||
1088 | static LIST_HEAD(failed_ddw_pdn_list); | |
1089 | ||
68c0449e AK |
1090 | static phys_addr_t ddw_memory_hotplug_max(void) |
1091 | { | |
1092 | phys_addr_t max_addr = memory_hotplug_max(); | |
1093 | struct device_node *memory; | |
1094 | ||
54fc3c68 AK |
1095 | /* |
1096 | * The "ibm,pmemory" can appear anywhere in the address space. | |
1097 | * Assuming it is still backed by page structs, set the upper limit | |
1098 | * for the huge DMA window as MAX_PHYSMEM_BITS. | |
1099 | */ | |
1100 | if (of_find_node_by_type(NULL, "ibm,pmemory")) | |
1101 | return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ? | |
1102 | (phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS); | |
1103 | ||
68c0449e AK |
1104 | for_each_node_by_type(memory, "memory") { |
1105 | unsigned long start, size; | |
c05f57fd | 1106 | int n_mem_addr_cells, n_mem_size_cells, len; |
68c0449e AK |
1107 | const __be32 *memcell_buf; |
1108 | ||
1109 | memcell_buf = of_get_property(memory, "reg", &len); | |
1110 | if (!memcell_buf || len <= 0) | |
1111 | continue; | |
1112 | ||
1113 | n_mem_addr_cells = of_n_addr_cells(memory); | |
1114 | n_mem_size_cells = of_n_size_cells(memory); | |
1115 | ||
68c0449e AK |
1116 | start = of_read_number(memcell_buf, n_mem_addr_cells); |
1117 | memcell_buf += n_mem_addr_cells; | |
1118 | size = of_read_number(memcell_buf, n_mem_size_cells); | |
1119 | memcell_buf += n_mem_size_cells; | |
1120 | ||
1121 | max_addr = max_t(phys_addr_t, max_addr, start + size); | |
1122 | } | |
1123 | ||
1124 | return max_addr; | |
1125 | } | |
1126 | ||
8c0d5159 LB |
1127 | /* |
1128 | * Platforms supporting the DDW option starting with LoPAR level 2.7 implement | |
1129 | * ibm,ddw-extensions, which carries the rtas token for | |
1130 | * ibm,reset-pe-dma-windows. | |
1131 | * That rtas-call can be used to restore the default DMA window for the device. | |
1132 | */ | |
1133 | static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) | |
1134 | { | |
1135 | int ret; | |
1136 | u32 cfg_addr, reset_dma_win; | |
1137 | u64 buid; | |
1138 | struct device_node *dn; | |
1139 | struct pci_dn *pdn; | |
1140 | ||
1141 | ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win); | |
1142 | if (ret) | |
1143 | return; | |
1144 | ||
1145 | dn = pci_device_to_OF_node(dev); | |
1146 | pdn = PCI_DN(dn); | |
1147 | buid = pdn->phb->buid; | |
1148 | cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); | |
1149 | ||
1150 | ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid), | |
1151 | BUID_LO(buid)); | |
1152 | if (ret) | |
1153 | dev_info(&dev->dev, | |
1154 | "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ", | |
1155 | reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), | |
1156 | ret); | |
1157 | } | |
1158 | ||
47272411 LB |
1159 | /* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ |
1160 | static int iommu_get_page_shift(u32 query_page_size) | |
1161 | { | |
1162 | /* Supported IO page-sizes according to LoPAR */ | |
1163 | const int shift[] = { | |
1164 | __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), | |
1165 | __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), | |
1166 | __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G) | |
1167 | }; | |
1168 | ||
1169 | int i = ARRAY_SIZE(shift) - 1; | |
1170 | ||
1171 | /* | |
1172 | * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: | |
1173 | * - bit 31 means 4k pages are supported, | |
1174 | * - bit 30 means 64k pages are supported, and so on. | |
1175 | * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. | |
1176 | */ | |
1177 | for (; i >= 0 ; i--) { | |
1178 | if (query_page_size & (1 << i)) | |
1179 | return shift[i]; | |
1180 | } | |
1181 | ||
1182 | /* No valid page size found. */ | |
1183 | return 0; | |
1184 | } | |
1185 | ||
7ed2ed2d LB |
1186 | static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr, |
1187 | u32 page_shift, u32 window_shift) | |
1188 | { | |
1189 | struct dynamic_dma_window_prop *ddwprop; | |
1190 | struct property *win64; | |
1191 | ||
1192 | win64 = kzalloc(sizeof(*win64), GFP_KERNEL); | |
1193 | if (!win64) | |
1194 | return NULL; | |
1195 | ||
1196 | win64->name = kstrdup(propname, GFP_KERNEL); | |
1197 | ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL); | |
1198 | win64->value = ddwprop; | |
1199 | win64->length = sizeof(*ddwprop); | |
1200 | if (!win64->name || !win64->value) { | |
1201 | kfree(win64->name); | |
1202 | kfree(win64->value); | |
1203 | kfree(win64); | |
1204 | return NULL; | |
1205 | } | |
1206 | ||
1207 | ddwprop->liobn = cpu_to_be32(liobn); | |
1208 | ddwprop->dma_base = cpu_to_be64(dma_addr); | |
1209 | ddwprop->tce_shift = cpu_to_be32(page_shift); | |
1210 | ddwprop->window_shift = cpu_to_be32(window_shift); | |
1211 | ||
1212 | return win64; | |
1213 | } | |
1214 | ||
4e8b0cf4 NA |
1215 | /* |
1216 | * If the PE supports dynamic dma windows, and there is space for a table | |
1217 | * that can map all pages in a linear offset, then setup such a table, | |
1218 | * and record the dma-offset in the struct device. | |
1219 | * | |
1220 | * dev: the pci device we are checking | |
1221 | * pdn: the parent pe node with the ibm,dma_window property | |
1222 | * Future: also check if we can remap the base window for our base page size | |
1223 | * | |
2ca73c54 | 1224 | * returns true if can map all pages (direct mapping), false otherwise.. |
4e8b0cf4 | 1225 | */ |
2ca73c54 | 1226 | static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) |
4e8b0cf4 | 1227 | { |
bf6e2d56 AK |
1228 | int len = 0, ret; |
1229 | int max_ram_len = order_base_2(ddw_memory_hotplug_max()); | |
4e8b0cf4 NA |
1230 | struct ddw_query_response query; |
1231 | struct ddw_create_response create; | |
1232 | int page_shift; | |
7ed2ed2d | 1233 | u64 win_addr; |
381ceda8 | 1234 | const char *win_name; |
4e8b0cf4 | 1235 | struct device_node *dn; |
cac3e629 | 1236 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; |
57dbbe59 | 1237 | struct dma_win *window; |
76730334 | 1238 | struct property *win64; |
2ca73c54 | 1239 | bool ddw_enabled = false; |
61435690 | 1240 | struct failed_ddw_pdn *fpdn; |
381ceda8 | 1241 | bool default_win_removed = false, direct_mapping = false; |
bf6e2d56 | 1242 | bool pmem_present; |
381ceda8 LB |
1243 | struct pci_dn *pci = PCI_DN(pdn); |
1244 | struct iommu_table *tbl = pci->table_group->tables[0]; | |
bf6e2d56 AK |
1245 | |
1246 | dn = of_find_node_by_type(NULL, "ibm,pmemory"); | |
1247 | pmem_present = dn != NULL; | |
1248 | of_node_put(dn); | |
4e8b0cf4 | 1249 | |
57dbbe59 | 1250 | mutex_lock(&dma_win_init_mutex); |
4e8b0cf4 | 1251 | |
2ca73c54 | 1252 | if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { |
381ceda8 | 1253 | direct_mapping = (len >= max_ram_len); |
2ca73c54 | 1254 | ddw_enabled = true; |
4e8b0cf4 | 1255 | goto out_unlock; |
2ca73c54 | 1256 | } |
4e8b0cf4 | 1257 | |
61435690 NA |
1258 | /* |
1259 | * If we already went through this for a previous function of | |
1260 | * the same device and failed, we don't want to muck with the | |
1261 | * DMA window again, as it will race with in-flight operations | |
1262 | * and can lead to EEHs. The above mutex protects access to the | |
1263 | * list. | |
1264 | */ | |
1265 | list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { | |
b7c670d6 | 1266 | if (fpdn->pdn == pdn) |
61435690 NA |
1267 | goto out_unlock; |
1268 | } | |
1269 | ||
4e8b0cf4 NA |
1270 | /* |
1271 | * the ibm,ddw-applicable property holds the tokens for: | |
1272 | * ibm,query-pe-dma-window | |
1273 | * ibm,create-pe-dma-window | |
1274 | * ibm,remove-pe-dma-window | |
1275 | * for the given node in that order. | |
1276 | * the property is actually in the parent, not the PE | |
1277 | */ | |
9410e018 | 1278 | ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", |
cac3e629 | 1279 | &ddw_avail[0], DDW_APPLICABLE_SIZE); |
9410e018 | 1280 | if (ret) |
ae69e1ed | 1281 | goto out_failed; |
25ebc45b | 1282 | |
ae69e1ed | 1283 | /* |
4e8b0cf4 NA |
1284 | * Query if there is a second window of size to map the |
1285 | * whole partition. Query returns number of windows, largest | |
1286 | * block assigned to PE (partition endpoint), and two bitmasks | |
1287 | * of page sizes: supported and supported for migrate-dma. | |
1288 | */ | |
1289 | dn = pci_device_to_OF_node(dev); | |
80f02512 | 1290 | ret = query_ddw(dev, ddw_avail, &query, pdn); |
4e8b0cf4 | 1291 | if (ret != 0) |
ae69e1ed | 1292 | goto out_failed; |
4e8b0cf4 | 1293 | |
8c0d5159 LB |
1294 | /* |
1295 | * If there is no window available, remove the default DMA window, | |
1296 | * if it's present. This will make all the resources available to the | |
1297 | * new DDW window. | |
1298 | * If anything fails after this, we need to restore it, so also check | |
1299 | * for extensions presence. | |
1300 | */ | |
4e8b0cf4 | 1301 | if (query.windows_available == 0) { |
8c0d5159 LB |
1302 | struct property *default_win; |
1303 | int reset_win_ext; | |
1304 | ||
92fe01b7 AK |
1305 | /* DDW + IOMMU on single window may fail if there is any allocation */ |
1306 | if (iommu_table_in_use(tbl)) { | |
1307 | dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); | |
1308 | goto out_failed; | |
1309 | } | |
1310 | ||
8c0d5159 LB |
1311 | default_win = of_find_property(pdn, "ibm,dma-window", NULL); |
1312 | if (!default_win) | |
1313 | goto out_failed; | |
1314 | ||
1315 | reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL); | |
1316 | if (reset_win_ext) | |
1317 | goto out_failed; | |
1318 | ||
1319 | remove_dma_window(pdn, ddw_avail, default_win); | |
1320 | default_win_removed = true; | |
1321 | ||
1322 | /* Query again, to check if the window is available */ | |
1323 | ret = query_ddw(dev, ddw_avail, &query, pdn); | |
1324 | if (ret != 0) | |
1325 | goto out_failed; | |
1326 | ||
1327 | if (query.windows_available == 0) { | |
1328 | /* no windows are available for this device. */ | |
1329 | dev_dbg(&dev->dev, "no free dynamic windows"); | |
1330 | goto out_failed; | |
1331 | } | |
4e8b0cf4 | 1332 | } |
47272411 LB |
1333 | |
1334 | page_shift = iommu_get_page_shift(query.page_size); | |
1335 | if (!page_shift) { | |
57dbbe59 LB |
1336 | dev_dbg(&dev->dev, "no supported page size in mask %x", |
1337 | query.page_size); | |
ae69e1ed | 1338 | goto out_failed; |
4e8b0cf4 | 1339 | } |
381ceda8 LB |
1340 | |
1341 | ||
bf6e2d56 AK |
1342 | /* |
1343 | * The "ibm,pmemory" can appear anywhere in the address space. | |
1344 | * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS | |
1345 | * for the upper limit and fallback to max RAM otherwise but this | |
1346 | * disables device::dma_ops_bypass. | |
1347 | */ | |
1348 | len = max_ram_len; | |
1349 | if (pmem_present) { | |
1350 | if (query.largest_available_block >= | |
1351 | (1ULL << (MAX_PHYSMEM_BITS - page_shift))) | |
a9d2f9bb | 1352 | len = MAX_PHYSMEM_BITS; |
bf6e2d56 AK |
1353 | else |
1354 | dev_info(&dev->dev, "Skipping ibm,pmemory"); | |
1355 | } | |
1356 | ||
381ceda8 | 1357 | /* check if the available block * number of ptes will map everything */ |
bf6e2d56 AK |
1358 | if (query.largest_available_block < (1ULL << (len - page_shift))) { |
1359 | dev_dbg(&dev->dev, | |
1360 | "can't map partition max 0x%llx with %llu %llu-sized pages\n", | |
1361 | 1ULL << len, | |
1362 | query.largest_available_block, | |
1363 | 1ULL << page_shift); | |
381ceda8 | 1364 | |
381ceda8 LB |
1365 | len = order_base_2(query.largest_available_block << page_shift); |
1366 | win_name = DMA64_PROPNAME; | |
1367 | } else { | |
1368 | direct_mapping = true; | |
1369 | win_name = DIRECT64_PROPNAME; | |
4e8b0cf4 | 1370 | } |
4e8b0cf4 | 1371 | |
b73a635f | 1372 | ret = create_ddw(dev, ddw_avail, &create, page_shift, len); |
4e8b0cf4 | 1373 | if (ret != 0) |
7ed2ed2d | 1374 | goto out_failed; |
4e8b0cf4 | 1375 | |
b7c670d6 RH |
1376 | dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", |
1377 | create.liobn, dn); | |
4e8b0cf4 | 1378 | |
7ed2ed2d | 1379 | win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; |
381ceda8 LB |
1380 | win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len); |
1381 | ||
7ed2ed2d LB |
1382 | if (!win64) { |
1383 | dev_info(&dev->dev, | |
1384 | "couldn't allocate property, property name, or value\n"); | |
1385 | goto out_remove_win; | |
1386 | } | |
1387 | ||
1388 | ret = of_add_property(pdn, win64); | |
1389 | if (ret) { | |
57dbbe59 | 1390 | dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d", |
7ed2ed2d LB |
1391 | pdn, ret); |
1392 | goto out_free_prop; | |
1393 | } | |
1394 | ||
1395 | window = ddw_list_new_entry(pdn, win64->value); | |
4e8b0cf4 | 1396 | if (!window) |
7ed2ed2d | 1397 | goto out_del_prop; |
4e8b0cf4 | 1398 | |
381ceda8 LB |
1399 | if (direct_mapping) { |
1400 | /* DDW maps the whole partition, so enable direct DMA mapping */ | |
1401 | ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, | |
1402 | win64->value, tce_setrange_multi_pSeriesLP_walk); | |
1403 | if (ret) { | |
57dbbe59 | 1404 | dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", |
381ceda8 | 1405 | dn, ret); |
4e8b0cf4 | 1406 | |
7ed2ed2d LB |
1407 | /* Make sure to clean DDW if any TCE was set*/ |
1408 | clean_dma_window(pdn, win64->value); | |
381ceda8 LB |
1409 | goto out_del_list; |
1410 | } | |
1411 | } else { | |
1412 | struct iommu_table *newtbl; | |
1413 | int i; | |
d853adc7 | 1414 | unsigned long start = 0, end = 0; |
381ceda8 LB |
1415 | |
1416 | for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { | |
1417 | const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; | |
1418 | ||
1419 | /* Look for MMIO32 */ | |
d853adc7 AK |
1420 | if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { |
1421 | start = pci->phb->mem_resources[i].start; | |
1422 | end = pci->phb->mem_resources[i].end; | |
381ceda8 | 1423 | break; |
d853adc7 | 1424 | } |
381ceda8 LB |
1425 | } |
1426 | ||
381ceda8 LB |
1427 | /* New table for using DDW instead of the default DMA window */ |
1428 | newtbl = iommu_pseries_alloc_table(pci->phb->node); | |
1429 | if (!newtbl) { | |
1430 | dev_dbg(&dev->dev, "couldn't create new IOMMU table\n"); | |
1431 | goto out_del_list; | |
1432 | } | |
1433 | ||
1434 | iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, | |
1435 | 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); | |
d853adc7 | 1436 | iommu_init_table(newtbl, pci->phb->node, start, end); |
381ceda8 LB |
1437 | |
1438 | pci->table_group->tables[1] = newtbl; | |
1439 | ||
1440 | /* Keep default DMA window stuct if removed */ | |
1441 | if (default_win_removed) { | |
1442 | tbl->it_size = 0; | |
41ee7232 AK |
1443 | vfree(tbl->it_map); |
1444 | tbl->it_map = NULL; | |
381ceda8 LB |
1445 | } |
1446 | ||
1447 | set_iommu_table_base(&dev->dev, newtbl); | |
4e8b0cf4 NA |
1448 | } |
1449 | ||
57dbbe59 LB |
1450 | spin_lock(&dma_win_list_lock); |
1451 | list_add(&window->list, &dma_win_list); | |
1452 | spin_unlock(&dma_win_list_lock); | |
4e8b0cf4 | 1453 | |
7ed2ed2d | 1454 | dev->dev.archdata.dma_offset = win_addr; |
2ca73c54 | 1455 | ddw_enabled = true; |
4e8b0cf4 NA |
1456 | goto out_unlock; |
1457 | ||
7ed2ed2d | 1458 | out_del_list: |
7a19081f JL |
1459 | kfree(window); |
1460 | ||
7ed2ed2d LB |
1461 | out_del_prop: |
1462 | of_remove_property(pdn, win64); | |
4e8b0cf4 NA |
1463 | |
1464 | out_free_prop: | |
1465 | kfree(win64->name); | |
1466 | kfree(win64->value); | |
1467 | kfree(win64); | |
1468 | ||
7ed2ed2d LB |
1469 | out_remove_win: |
1470 | /* DDW is clean, so it's ok to call this directly. */ | |
1471 | __remove_dma_window(pdn, ddw_avail, create.liobn); | |
1472 | ||
ae69e1ed | 1473 | out_failed: |
8c0d5159 LB |
1474 | if (default_win_removed) |
1475 | reset_dma_window(dev, pdn); | |
25ebc45b | 1476 | |
61435690 NA |
1477 | fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); |
1478 | if (!fpdn) | |
1479 | goto out_unlock; | |
1480 | fpdn->pdn = pdn; | |
1481 | list_add(&fpdn->list, &failed_ddw_pdn_list); | |
1482 | ||
4e8b0cf4 | 1483 | out_unlock: |
57dbbe59 | 1484 | mutex_unlock(&dma_win_init_mutex); |
bf6e2d56 AK |
1485 | |
1486 | /* | |
1487 | * If we have persistent memory and the window size is only as big | |
1488 | * as RAM, then we failed to create a window to cover persistent | |
1489 | * memory and need to set the DMA limit. | |
1490 | */ | |
381ceda8 | 1491 | if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len) |
2ca73c54 | 1492 | dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); |
bf6e2d56 | 1493 | |
381ceda8 | 1494 | return ddw_enabled && direct_mapping; |
4e8b0cf4 NA |
1495 | } |
1496 | ||
12d04eef | 1497 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
1da177e4 LT |
1498 | { |
1499 | struct device_node *pdn, *dn; | |
1500 | struct iommu_table *tbl; | |
2083f681 | 1501 | const __be32 *dma_window = NULL; |
1635317f | 1502 | struct pci_dn *pci; |
1da177e4 | 1503 | |
f7ebf352 | 1504 | pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); |
12d04eef | 1505 | |
1da177e4 | 1506 | /* dev setup for LPAR is a little tricky, since the device tree might |
25985edc | 1507 | * contain the dma-window properties per-device and not necessarily |
1da177e4 LT |
1508 | * for the bus. So we need to search upwards in the tree until we |
1509 | * either hit a dma-window property, OR find a parent with a table | |
1510 | * already allocated. | |
1511 | */ | |
1512 | dn = pci_device_to_OF_node(dev); | |
b7c670d6 | 1513 | pr_debug(" node is %pOF\n", dn); |
5d2efba6 | 1514 | |
b348aa65 | 1515 | for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; |
1635317f | 1516 | pdn = pdn->parent) { |
e2eb6392 | 1517 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); |
1da177e4 LT |
1518 | if (dma_window) |
1519 | break; | |
1520 | } | |
1521 | ||
650f7b3b LV |
1522 | if (!pdn || !PCI_DN(pdn)) { |
1523 | printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " | |
b7c670d6 RH |
1524 | "no DMA window found for pci dev=%s dn=%pOF\n", |
1525 | pci_name(dev), dn); | |
650f7b3b LV |
1526 | return; |
1527 | } | |
b7c670d6 | 1528 | pr_debug(" parent is %pOF\n", pdn); |
12d04eef | 1529 | |
e07102db | 1530 | pci = PCI_DN(pdn); |
b348aa65 AK |
1531 | if (!pci->table_group) { |
1532 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); | |
1533 | tbl = pci->table_group->tables[0]; | |
b6e1f6ad AK |
1534 | iommu_table_setparms_lpar(pci->phb, pdn, tbl, |
1535 | pci->table_group, dma_window); | |
fc8cba8f | 1536 | |
201ed7f3 | 1537 | iommu_init_table(tbl, pci->phb->node, 0, 0); |
b348aa65 AK |
1538 | iommu_register_group(pci->table_group, |
1539 | pci_domain_nr(pci->phb->bus), 0); | |
1540 | pr_debug(" created table: %p\n", pci->table_group); | |
de113217 | 1541 | } else { |
b348aa65 | 1542 | pr_debug(" found DMA window, table: %p\n", pci->table_group); |
1da177e4 LT |
1543 | } |
1544 | ||
b348aa65 | 1545 | set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); |
c4e9d3c1 | 1546 | iommu_add_device(pci->table_group, &dev->dev); |
1da177e4 | 1547 | } |
4e8b0cf4 | 1548 | |
9ae2fdde | 1549 | static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) |
4e8b0cf4 | 1550 | { |
9ae2fdde | 1551 | struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; |
2083f681 | 1552 | const __be32 *dma_window = NULL; |
4e8b0cf4 | 1553 | |
4e8b0cf4 | 1554 | /* only attempt to use a new window if 64-bit DMA is requested */ |
9ae2fdde CH |
1555 | if (dma_mask < DMA_BIT_MASK(64)) |
1556 | return false; | |
4e8b0cf4 | 1557 | |
9ae2fdde | 1558 | dev_dbg(&pdev->dev, "node is %pOF\n", dn); |
4e8b0cf4 | 1559 | |
9ae2fdde CH |
1560 | /* |
1561 | * the device tree might contain the dma-window properties | |
1562 | * per-device and not necessarily for the bus. So we need to | |
1563 | * search upwards in the tree until we either hit a dma-window | |
1564 | * property, OR find a parent with a table already allocated. | |
1565 | */ | |
1566 | for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; | |
1567 | pdn = pdn->parent) { | |
1568 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); | |
1569 | if (dma_window) | |
1570 | break; | |
4e8b0cf4 NA |
1571 | } |
1572 | ||
2ca73c54 LB |
1573 | if (pdn && PCI_DN(pdn)) |
1574 | return enable_ddw(pdev, pdn); | |
6a5c7be5 | 1575 | |
9ae2fdde | 1576 | return false; |
6a5c7be5 MM |
1577 | } |
1578 | ||
4e8b0cf4 NA |
1579 | static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, |
1580 | void *data) | |
1581 | { | |
57dbbe59 | 1582 | struct dma_win *window; |
4e8b0cf4 NA |
1583 | struct memory_notify *arg = data; |
1584 | int ret = 0; | |
1585 | ||
1586 | switch (action) { | |
1587 | case MEM_GOING_ONLINE: | |
57dbbe59 LB |
1588 | spin_lock(&dma_win_list_lock); |
1589 | list_for_each_entry(window, &dma_win_list, list) { | |
4e8b0cf4 NA |
1590 | ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, |
1591 | arg->nr_pages, window->prop); | |
1592 | /* XXX log error */ | |
1593 | } | |
57dbbe59 | 1594 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 NA |
1595 | break; |
1596 | case MEM_CANCEL_ONLINE: | |
1597 | case MEM_OFFLINE: | |
57dbbe59 LB |
1598 | spin_lock(&dma_win_list_lock); |
1599 | list_for_each_entry(window, &dma_win_list, list) { | |
4e8b0cf4 NA |
1600 | ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, |
1601 | arg->nr_pages, window->prop); | |
1602 | /* XXX log error */ | |
1603 | } | |
57dbbe59 | 1604 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 NA |
1605 | break; |
1606 | default: | |
1607 | break; | |
1608 | } | |
1609 | if (ret && action != MEM_CANCEL_ONLINE) | |
1610 | return NOTIFY_BAD; | |
1611 | ||
1612 | return NOTIFY_OK; | |
1613 | } | |
1614 | ||
1615 | static struct notifier_block iommu_mem_nb = { | |
1616 | .notifier_call = iommu_mem_notifier, | |
1617 | }; | |
1618 | ||
f5242e5a | 1619 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) |
bed59275 SR |
1620 | { |
1621 | int err = NOTIFY_OK; | |
f5242e5a GL |
1622 | struct of_reconfig_data *rd = data; |
1623 | struct device_node *np = rd->dn; | |
bed59275 | 1624 | struct pci_dn *pci = PCI_DN(np); |
57dbbe59 | 1625 | struct dma_win *window; |
bed59275 SR |
1626 | |
1627 | switch (action) { | |
1cf3d8b3 | 1628 | case OF_RECONFIG_DETACH_NODE: |
5efbabe0 GS |
1629 | /* |
1630 | * Removing the property will invoke the reconfig | |
1631 | * notifier again, which causes dead-lock on the | |
1632 | * read-write semaphore of the notifier chain. So | |
1633 | * we have to remove the property when releasing | |
1634 | * the device node. | |
1635 | */ | |
381ceda8 LB |
1636 | if (remove_ddw(np, false, DIRECT64_PROPNAME)) |
1637 | remove_ddw(np, false, DMA64_PROPNAME); | |
1638 | ||
b348aa65 AK |
1639 | if (pci && pci->table_group) |
1640 | iommu_pseries_free_group(pci->table_group, | |
ac9a5889 | 1641 | np->full_name); |
4e8b0cf4 | 1642 | |
57dbbe59 LB |
1643 | spin_lock(&dma_win_list_lock); |
1644 | list_for_each_entry(window, &dma_win_list, list) { | |
4e8b0cf4 NA |
1645 | if (window->device == np) { |
1646 | list_del(&window->list); | |
1647 | kfree(window); | |
1648 | break; | |
1649 | } | |
1650 | } | |
57dbbe59 | 1651 | spin_unlock(&dma_win_list_lock); |
bed59275 SR |
1652 | break; |
1653 | default: | |
1654 | err = NOTIFY_DONE; | |
1655 | break; | |
1656 | } | |
1657 | return err; | |
1658 | } | |
1659 | ||
1660 | static struct notifier_block iommu_reconfig_nb = { | |
1661 | .notifier_call = iommu_reconfig_notifier, | |
1662 | }; | |
1da177e4 | 1663 | |
1da177e4 LT |
1664 | /* These are called very early. */ |
1665 | void iommu_init_early_pSeries(void) | |
1666 | { | |
a8daac8a | 1667 | if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) |
1da177e4 | 1668 | return; |
1da177e4 | 1669 | |
57cfb814 | 1670 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
38ae9ec4 DA |
1671 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
1672 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; | |
9ae2fdde CH |
1673 | if (!disable_ddw) |
1674 | pseries_pci_controller_ops.iommu_bypass_supported = | |
1675 | iommu_bypass_supported_pSeriesLP; | |
1da177e4 | 1676 | } else { |
38ae9ec4 DA |
1677 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; |
1678 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; | |
1da177e4 LT |
1679 | } |
1680 | ||
1681 | ||
1cf3d8b3 | 1682 | of_reconfig_notifier_register(&iommu_reconfig_nb); |
4e8b0cf4 | 1683 | register_memory_notifier(&iommu_mem_nb); |
1da177e4 | 1684 | |
d862b441 | 1685 | set_pci_dma_ops(&dma_iommu_ops); |
1da177e4 LT |
1686 | } |
1687 | ||
4e89a2d8 WS |
1688 | static int __init disable_multitce(char *str) |
1689 | { | |
1690 | if (strcmp(str, "off") == 0 && | |
1691 | firmware_has_feature(FW_FEATURE_LPAR) && | |
17a0364c AK |
1692 | (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) || |
1693 | firmware_has_feature(FW_FEATURE_STUFF_TCE))) { | |
4e89a2d8 | 1694 | printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); |
17a0364c AK |
1695 | powerpc_firmware_features &= |
1696 | ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE); | |
4e89a2d8 WS |
1697 | } |
1698 | return 1; | |
1699 | } | |
1700 | ||
1701 | __setup("multitce=", disable_multitce); | |
4ad04e59 | 1702 | |
c4e9d3c1 AK |
1703 | static int tce_iommu_bus_notifier(struct notifier_block *nb, |
1704 | unsigned long action, void *data) | |
1705 | { | |
1706 | struct device *dev = data; | |
1707 | ||
1708 | switch (action) { | |
1709 | case BUS_NOTIFY_DEL_DEVICE: | |
1710 | iommu_del_device(dev); | |
1711 | return 0; | |
1712 | default: | |
1713 | return 0; | |
1714 | } | |
1715 | } | |
1716 | ||
1717 | static struct notifier_block tce_iommu_bus_nb = { | |
1718 | .notifier_call = tce_iommu_bus_notifier, | |
1719 | }; | |
1720 | ||
1721 | static int __init tce_iommu_bus_notifier_init(void) | |
1722 | { | |
1723 | bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); | |
1724 | return 0; | |
1725 | } | |
4ad04e59 | 1726 | machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); |