]>
Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
1da177e4 | 2 | /* |
1da177e4 LT |
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation |
4 | * | |
bc97ce95 | 5 | * Rewrite, cleanup: |
1da177e4 | 6 | * |
91f14480 | 7 | * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation |
bc97ce95 | 8 | * Copyright (C) 2006 Olof Johansson <olof@lixom.net> |
1da177e4 LT |
9 | * |
10 | * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. | |
1da177e4 LT |
11 | */ |
12 | ||
1da177e4 LT |
13 | #include <linux/init.h> |
14 | #include <linux/types.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/mm.h> | |
beacc6da | 17 | #include <linux/memblock.h> |
1da177e4 LT |
18 | #include <linux/spinlock.h> |
19 | #include <linux/string.h> | |
20 | #include <linux/pci.h> | |
21 | #include <linux/dma-mapping.h> | |
62a8bd6c | 22 | #include <linux/crash_dump.h> |
4e8b0cf4 | 23 | #include <linux/memory.h> |
1cf3d8b3 | 24 | #include <linux/of.h> |
ac9a5889 | 25 | #include <linux/iommu.h> |
0eaf4def | 26 | #include <linux/rculist.h> |
1da177e4 LT |
27 | #include <asm/io.h> |
28 | #include <asm/prom.h> | |
29 | #include <asm/rtas.h> | |
1da177e4 LT |
30 | #include <asm/iommu.h> |
31 | #include <asm/pci-bridge.h> | |
32 | #include <asm/machdep.h> | |
1ababe11 | 33 | #include <asm/firmware.h> |
c707ffcf | 34 | #include <asm/tce.h> |
d387899f | 35 | #include <asm/ppc-pci.h> |
2249ca9d | 36 | #include <asm/udbg.h> |
4e8b0cf4 | 37 | #include <asm/mmzone.h> |
212bebb4 | 38 | #include <asm/plpar_wrappers.h> |
a1218720 | 39 | |
38ae9ec4 | 40 | #include "pseries.h" |
1da177e4 | 41 | |
b348aa65 AK |
42 | static struct iommu_table_group *iommu_pseries_alloc_group(int node) |
43 | { | |
4dd9eab3 ME |
44 | struct iommu_table_group *table_group; |
45 | struct iommu_table *tbl; | |
b348aa65 AK |
46 | |
47 | table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, | |
48 | node); | |
49 | if (!table_group) | |
4dd9eab3 | 50 | return NULL; |
b348aa65 AK |
51 | |
52 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); | |
53 | if (!tbl) | |
4dd9eab3 | 54 | goto free_group; |
b348aa65 | 55 | |
0eaf4def | 56 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); |
e5afdf9d | 57 | kref_init(&tbl->it_kref); |
0eaf4def | 58 | |
b348aa65 AK |
59 | table_group->tables[0] = tbl; |
60 | ||
61 | return table_group; | |
62 | ||
4dd9eab3 ME |
63 | free_group: |
64 | kfree(table_group); | |
b348aa65 AK |
65 | return NULL; |
66 | } | |
67 | ||
68 | static void iommu_pseries_free_group(struct iommu_table_group *table_group, | |
ac9a5889 AK |
69 | const char *node_name) |
70 | { | |
b348aa65 AK |
71 | struct iommu_table *tbl; |
72 | ||
73 | if (!table_group) | |
74 | return; | |
75 | ||
0eaf4def | 76 | tbl = table_group->tables[0]; |
ac9a5889 | 77 | #ifdef CONFIG_IOMMU_API |
b348aa65 AK |
78 | if (table_group->group) { |
79 | iommu_group_put(table_group->group); | |
80 | BUG_ON(table_group->group); | |
ac9a5889 AK |
81 | } |
82 | #endif | |
e5afdf9d | 83 | iommu_tce_table_put(tbl); |
b348aa65 AK |
84 | |
85 | kfree(table_group); | |
ac9a5889 AK |
86 | } |
87 | ||
6490c490 | 88 | static int tce_build_pSeries(struct iommu_table *tbl, long index, |
bc97ce95 | 89 | long npages, unsigned long uaddr, |
4f3dd8a0 | 90 | enum dma_data_direction direction, |
00085f1e | 91 | unsigned long attrs) |
1da177e4 | 92 | { |
bc97ce95 | 93 | u64 proto_tce; |
c05f57fd | 94 | __be64 *tcep; |
bc97ce95 | 95 | u64 rpn; |
1da177e4 | 96 | |
bc97ce95 | 97 | proto_tce = TCE_PCI_READ; // Read allowed |
1da177e4 LT |
98 | |
99 | if (direction != DMA_TO_DEVICE) | |
bc97ce95 | 100 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 | 101 | |
c05f57fd | 102 | tcep = ((__be64 *)tbl->it_base) + index; |
1da177e4 LT |
103 | |
104 | while (npages--) { | |
95f72d1e | 105 | /* can't move this out since we might cross MEMBLOCK boundary */ |
474e3d56 | 106 | rpn = __pa(uaddr) >> TCE_SHIFT; |
df015604 | 107 | *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); |
1da177e4 | 108 | |
d0035c62 | 109 | uaddr += TCE_PAGE_SIZE; |
bc97ce95 | 110 | tcep++; |
1da177e4 | 111 | } |
6490c490 | 112 | return 0; |
1da177e4 LT |
113 | } |
114 | ||
115 | ||
116 | static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) | |
117 | { | |
c05f57fd | 118 | __be64 *tcep; |
1da177e4 | 119 | |
c05f57fd | 120 | tcep = ((__be64 *)tbl->it_base) + index; |
bc97ce95 OJ |
121 | |
122 | while (npages--) | |
123 | *(tcep++) = 0; | |
1da177e4 LT |
124 | } |
125 | ||
5f50867b HM |
126 | static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) |
127 | { | |
df015604 | 128 | __be64 *tcep; |
5f50867b | 129 | |
df015604 | 130 | tcep = ((__be64 *)tbl->it_base) + index; |
5f50867b | 131 | |
df015604 | 132 | return be64_to_cpu(*tcep); |
5f50867b | 133 | } |
1da177e4 | 134 | |
6490c490 RJ |
135 | static void tce_free_pSeriesLP(struct iommu_table*, long, long); |
136 | static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); | |
137 | ||
138 | static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, | |
1da177e4 | 139 | long npages, unsigned long uaddr, |
4f3dd8a0 | 140 | enum dma_data_direction direction, |
00085f1e | 141 | unsigned long attrs) |
1da177e4 | 142 | { |
6490c490 | 143 | u64 rc = 0; |
bc97ce95 OJ |
144 | u64 proto_tce, tce; |
145 | u64 rpn; | |
6490c490 RJ |
146 | int ret = 0; |
147 | long tcenum_start = tcenum, npages_start = npages; | |
1da177e4 | 148 | |
474e3d56 | 149 | rpn = __pa(uaddr) >> TCE_SHIFT; |
bc97ce95 | 150 | proto_tce = TCE_PCI_READ; |
1da177e4 | 151 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 152 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
153 | |
154 | while (npages--) { | |
bc97ce95 OJ |
155 | tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; |
156 | rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); | |
157 | ||
6490c490 RJ |
158 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
159 | ret = (int)rc; | |
160 | tce_free_pSeriesLP(tbl, tcenum_start, | |
161 | (npages_start - (npages + 1))); | |
162 | break; | |
163 | } | |
164 | ||
1da177e4 | 165 | if (rc && printk_ratelimit()) { |
fe333321 IM |
166 | printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
167 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
168 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
169 | printk("\ttce val = 0x%llx\n", tce ); | |
4ff52b4d | 170 | dump_stack(); |
1da177e4 | 171 | } |
bc97ce95 | 172 | |
1da177e4 | 173 | tcenum++; |
bc97ce95 | 174 | rpn++; |
1da177e4 | 175 | } |
6490c490 | 176 | return ret; |
1da177e4 LT |
177 | } |
178 | ||
df015604 | 179 | static DEFINE_PER_CPU(__be64 *, tce_page); |
1da177e4 | 180 | |
6490c490 | 181 | static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
1da177e4 | 182 | long npages, unsigned long uaddr, |
4f3dd8a0 | 183 | enum dma_data_direction direction, |
00085f1e | 184 | unsigned long attrs) |
1da177e4 | 185 | { |
6490c490 | 186 | u64 rc = 0; |
bc97ce95 | 187 | u64 proto_tce; |
df015604 | 188 | __be64 *tcep; |
bc97ce95 | 189 | u64 rpn; |
1da177e4 | 190 | long l, limit; |
6490c490 RJ |
191 | long tcenum_start = tcenum, npages_start = npages; |
192 | int ret = 0; | |
c1703e85 | 193 | unsigned long flags; |
1da177e4 | 194 | |
da004c36 | 195 | if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { |
6490c490 RJ |
196 | return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, |
197 | direction, attrs); | |
541b2755 | 198 | } |
1da177e4 | 199 | |
c1703e85 AB |
200 | local_irq_save(flags); /* to protect tcep and the page behind it */ |
201 | ||
69111bac | 202 | tcep = __this_cpu_read(tce_page); |
1da177e4 LT |
203 | |
204 | /* This is safe to do since interrupts are off when we're called | |
205 | * from iommu_alloc{,_sg}() | |
206 | */ | |
207 | if (!tcep) { | |
df015604 | 208 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
1da177e4 | 209 | /* If allocation fails, fall back to the loop implementation */ |
541b2755 | 210 | if (!tcep) { |
c1703e85 | 211 | local_irq_restore(flags); |
6490c490 | 212 | return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, |
4f3dd8a0 | 213 | direction, attrs); |
541b2755 | 214 | } |
69111bac | 215 | __this_cpu_write(tce_page, tcep); |
1da177e4 LT |
216 | } |
217 | ||
474e3d56 | 218 | rpn = __pa(uaddr) >> TCE_SHIFT; |
bc97ce95 | 219 | proto_tce = TCE_PCI_READ; |
1da177e4 | 220 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 221 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
222 | |
223 | /* We can map max one pageful of TCEs at a time */ | |
224 | do { | |
225 | /* | |
226 | * Set up the page with TCE data, looping through and setting | |
227 | * the values. | |
228 | */ | |
bc97ce95 | 229 | limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); |
1da177e4 LT |
230 | |
231 | for (l = 0; l < limit; l++) { | |
df015604 | 232 | tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); |
bc97ce95 | 233 | rpn++; |
1da177e4 LT |
234 | } |
235 | ||
236 | rc = plpar_tce_put_indirect((u64)tbl->it_index, | |
237 | (u64)tcenum << 12, | |
474e3d56 | 238 | (u64)__pa(tcep), |
1da177e4 LT |
239 | limit); |
240 | ||
241 | npages -= limit; | |
242 | tcenum += limit; | |
243 | } while (npages > 0 && !rc); | |
244 | ||
c1703e85 AB |
245 | local_irq_restore(flags); |
246 | ||
6490c490 RJ |
247 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
248 | ret = (int)rc; | |
249 | tce_freemulti_pSeriesLP(tbl, tcenum_start, | |
250 | (npages_start - (npages + limit))); | |
251 | return ret; | |
252 | } | |
253 | ||
1da177e4 | 254 | if (rc && printk_ratelimit()) { |
fe333321 IM |
255 | printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
256 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
257 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
258 | printk("\ttce[0] val = 0x%llx\n", tcep[0]); | |
4ff52b4d | 259 | dump_stack(); |
1da177e4 | 260 | } |
6490c490 | 261 | return ret; |
1da177e4 LT |
262 | } |
263 | ||
264 | static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) | |
265 | { | |
266 | u64 rc; | |
1da177e4 | 267 | |
1da177e4 | 268 | while (npages--) { |
bc97ce95 | 269 | rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); |
1da177e4 LT |
270 | |
271 | if (rc && printk_ratelimit()) { | |
fe333321 IM |
272 | printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
273 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
274 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
4ff52b4d | 275 | dump_stack(); |
1da177e4 LT |
276 | } |
277 | ||
278 | tcenum++; | |
279 | } | |
280 | } | |
281 | ||
282 | ||
283 | static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) | |
284 | { | |
285 | u64 rc; | |
1da177e4 | 286 | |
da004c36 AK |
287 | if (!firmware_has_feature(FW_FEATURE_MULTITCE)) |
288 | return tce_free_pSeriesLP(tbl, tcenum, npages); | |
289 | ||
bc97ce95 | 290 | rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); |
1da177e4 LT |
291 | |
292 | if (rc && printk_ratelimit()) { | |
293 | printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); | |
fe333321 IM |
294 | printk("\trc = %lld\n", rc); |
295 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
296 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
4ff52b4d | 297 | dump_stack(); |
1da177e4 LT |
298 | } |
299 | } | |
300 | ||
5f50867b HM |
301 | static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) |
302 | { | |
303 | u64 rc; | |
304 | unsigned long tce_ret; | |
305 | ||
5f50867b HM |
306 | rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret); |
307 | ||
308 | if (rc && printk_ratelimit()) { | |
fe333321 IM |
309 | printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); |
310 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
311 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
4ff52b4d | 312 | dump_stack(); |
5f50867b HM |
313 | } |
314 | ||
315 | return tce_ret; | |
316 | } | |
317 | ||
25985edc | 318 | /* this is compatible with cells for the device tree property */ |
4e8b0cf4 NA |
319 | struct dynamic_dma_window_prop { |
320 | __be32 liobn; /* tce table number */ | |
321 | __be64 dma_base; /* address hi,lo */ | |
322 | __be32 tce_shift; /* ilog2(tce_page_size) */ | |
323 | __be32 window_shift; /* ilog2(tce_window_size) */ | |
324 | }; | |
325 | ||
326 | struct direct_window { | |
327 | struct device_node *device; | |
328 | const struct dynamic_dma_window_prop *prop; | |
329 | struct list_head list; | |
330 | }; | |
331 | ||
332 | /* Dynamic DMA Window support */ | |
333 | struct ddw_query_response { | |
9410e018 AK |
334 | u32 windows_available; |
335 | u32 largest_available_block; | |
336 | u32 page_size; | |
337 | u32 migration_capable; | |
4e8b0cf4 NA |
338 | }; |
339 | ||
340 | struct ddw_create_response { | |
9410e018 AK |
341 | u32 liobn; |
342 | u32 addr_hi; | |
343 | u32 addr_lo; | |
4e8b0cf4 NA |
344 | }; |
345 | ||
346 | static LIST_HEAD(direct_window_list); | |
347 | /* prevents races between memory on/offline and window creation */ | |
348 | static DEFINE_SPINLOCK(direct_window_list_lock); | |
349 | /* protects initializing window twice for same device */ | |
350 | static DEFINE_MUTEX(direct_window_init_mutex); | |
351 | #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" | |
352 | ||
353 | static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, | |
354 | unsigned long num_pfn, const void *arg) | |
355 | { | |
356 | const struct dynamic_dma_window_prop *maprange = arg; | |
357 | int rc; | |
358 | u64 tce_size, num_tce, dma_offset, next; | |
359 | u32 tce_shift; | |
360 | long limit; | |
361 | ||
362 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
363 | tce_size = 1ULL << tce_shift; | |
364 | next = start_pfn << PAGE_SHIFT; | |
365 | num_tce = num_pfn << PAGE_SHIFT; | |
366 | ||
367 | /* round back to the beginning of the tce page size */ | |
368 | num_tce += next & (tce_size - 1); | |
369 | next &= ~(tce_size - 1); | |
370 | ||
371 | /* covert to number of tces */ | |
372 | num_tce |= tce_size - 1; | |
373 | num_tce >>= tce_shift; | |
374 | ||
375 | do { | |
376 | /* | |
377 | * Set up the page with TCE data, looping through and setting | |
378 | * the values. | |
379 | */ | |
380 | limit = min_t(long, num_tce, 512); | |
381 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
382 | ||
383 | rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), | |
384 | dma_offset, | |
385 | 0, limit); | |
22b38298 | 386 | next += limit * tce_size; |
4e8b0cf4 NA |
387 | num_tce -= limit; |
388 | } while (num_tce > 0 && !rc); | |
389 | ||
390 | return rc; | |
391 | } | |
392 | ||
393 | static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, | |
394 | unsigned long num_pfn, const void *arg) | |
395 | { | |
396 | const struct dynamic_dma_window_prop *maprange = arg; | |
df015604 AB |
397 | u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; |
398 | __be64 *tcep; | |
4e8b0cf4 NA |
399 | u32 tce_shift; |
400 | u64 rc = 0; | |
401 | long l, limit; | |
402 | ||
403 | local_irq_disable(); /* to protect tcep and the page behind it */ | |
69111bac | 404 | tcep = __this_cpu_read(tce_page); |
4e8b0cf4 NA |
405 | |
406 | if (!tcep) { | |
df015604 | 407 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
4e8b0cf4 NA |
408 | if (!tcep) { |
409 | local_irq_enable(); | |
410 | return -ENOMEM; | |
411 | } | |
69111bac | 412 | __this_cpu_write(tce_page, tcep); |
4e8b0cf4 NA |
413 | } |
414 | ||
415 | proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; | |
416 | ||
417 | liobn = (u64)be32_to_cpu(maprange->liobn); | |
418 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
419 | tce_size = 1ULL << tce_shift; | |
420 | next = start_pfn << PAGE_SHIFT; | |
421 | num_tce = num_pfn << PAGE_SHIFT; | |
422 | ||
423 | /* round back to the beginning of the tce page size */ | |
424 | num_tce += next & (tce_size - 1); | |
425 | next &= ~(tce_size - 1); | |
426 | ||
427 | /* covert to number of tces */ | |
428 | num_tce |= tce_size - 1; | |
429 | num_tce >>= tce_shift; | |
430 | ||
431 | /* We can map max one pageful of TCEs at a time */ | |
432 | do { | |
433 | /* | |
434 | * Set up the page with TCE data, looping through and setting | |
435 | * the values. | |
436 | */ | |
437 | limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE); | |
438 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
439 | ||
440 | for (l = 0; l < limit; l++) { | |
df015604 | 441 | tcep[l] = cpu_to_be64(proto_tce | next); |
4e8b0cf4 NA |
442 | next += tce_size; |
443 | } | |
444 | ||
445 | rc = plpar_tce_put_indirect(liobn, | |
446 | dma_offset, | |
474e3d56 | 447 | (u64)__pa(tcep), |
4e8b0cf4 NA |
448 | limit); |
449 | ||
450 | num_tce -= limit; | |
451 | } while (num_tce > 0 && !rc); | |
452 | ||
453 | /* error cleanup: caller will clear whole range */ | |
454 | ||
455 | local_irq_enable(); | |
456 | return rc; | |
457 | } | |
458 | ||
459 | static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, | |
460 | unsigned long num_pfn, void *arg) | |
461 | { | |
462 | return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); | |
463 | } | |
464 | ||
1da177e4 LT |
465 | static void iommu_table_setparms(struct pci_controller *phb, |
466 | struct device_node *dn, | |
bc97ce95 | 467 | struct iommu_table *tbl) |
1da177e4 LT |
468 | { |
469 | struct device_node *node; | |
b7d6bf4f | 470 | const unsigned long *basep; |
9938c474 | 471 | const u32 *sizep; |
1da177e4 | 472 | |
44ef3390 | 473 | node = phb->dn; |
1da177e4 | 474 | |
e2eb6392 SR |
475 | basep = of_get_property(node, "linux,tce-base", NULL); |
476 | sizep = of_get_property(node, "linux,tce-size", NULL); | |
1da177e4 | 477 | if (basep == NULL || sizep == NULL) { |
b7c670d6 RH |
478 | printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has " |
479 | "missing tce entries !\n", dn); | |
1da177e4 LT |
480 | return; |
481 | } | |
482 | ||
483 | tbl->it_base = (unsigned long)__va(*basep); | |
5f50867b | 484 | |
62a8bd6c | 485 | if (!is_kdump_kernel()) |
54622f10 | 486 | memset((void *)tbl->it_base, 0, *sizep); |
1da177e4 LT |
487 | |
488 | tbl->it_busno = phb->bus->number; | |
3a553170 | 489 | tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; |
bc97ce95 | 490 | |
1da177e4 | 491 | /* Units of tce entries */ |
3a553170 | 492 | tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift; |
bc97ce95 | 493 | |
1da177e4 | 494 | /* Test if we are going over 2GB of DMA space */ |
3c2822cc OJ |
495 | if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { |
496 | udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
bc97ce95 | 497 | panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); |
3c2822cc | 498 | } |
bc97ce95 | 499 | |
1da177e4 LT |
500 | phb->dma_window_base_cur += phb->dma_window_size; |
501 | ||
502 | /* Set the tce table size - measured in entries */ | |
3a553170 | 503 | tbl->it_size = phb->dma_window_size >> tbl->it_page_shift; |
1da177e4 LT |
504 | |
505 | tbl->it_index = 0; | |
506 | tbl->it_blocksize = 16; | |
507 | tbl->it_type = TCE_PCI; | |
508 | } | |
509 | ||
510 | /* | |
511 | * iommu_table_setparms_lpar | |
512 | * | |
513 | * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. | |
1da177e4 LT |
514 | */ |
515 | static void iommu_table_setparms_lpar(struct pci_controller *phb, | |
516 | struct device_node *dn, | |
517 | struct iommu_table *tbl, | |
b6e1f6ad | 518 | struct iommu_table_group *table_group, |
2083f681 | 519 | const __be32 *dma_window) |
1da177e4 | 520 | { |
4c76e0bc JK |
521 | unsigned long offset, size; |
522 | ||
4c76e0bc | 523 | of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size); |
1da177e4 | 524 | |
b8c49def | 525 | tbl->it_busno = phb->bus->number; |
3a553170 | 526 | tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; |
1da177e4 | 527 | tbl->it_base = 0; |
1da177e4 LT |
528 | tbl->it_blocksize = 16; |
529 | tbl->it_type = TCE_PCI; | |
3a553170 AP |
530 | tbl->it_offset = offset >> tbl->it_page_shift; |
531 | tbl->it_size = size >> tbl->it_page_shift; | |
b6e1f6ad AK |
532 | |
533 | table_group->tce32_start = offset; | |
534 | table_group->tce32_size = size; | |
1da177e4 LT |
535 | } |
536 | ||
da004c36 AK |
537 | struct iommu_table_ops iommu_table_pseries_ops = { |
538 | .set = tce_build_pSeries, | |
539 | .clear = tce_free_pSeries, | |
540 | .get = tce_get_pseries | |
541 | }; | |
542 | ||
12d04eef | 543 | static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) |
1da177e4 | 544 | { |
3c2822cc | 545 | struct device_node *dn; |
1da177e4 | 546 | struct iommu_table *tbl; |
3c2822cc OJ |
547 | struct device_node *isa_dn, *isa_dn_orig; |
548 | struct device_node *tmp; | |
549 | struct pci_dn *pci; | |
550 | int children; | |
1da177e4 | 551 | |
3c2822cc | 552 | dn = pci_bus_to_OF_node(bus); |
12d04eef | 553 | |
b7c670d6 | 554 | pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn); |
3c2822cc OJ |
555 | |
556 | if (bus->self) { | |
557 | /* This is not a root bus, any setup will be done for the | |
558 | * device-side of the bridge in iommu_dev_setup_pSeries(). | |
559 | */ | |
560 | return; | |
561 | } | |
12d04eef | 562 | pci = PCI_DN(dn); |
3c2822cc OJ |
563 | |
564 | /* Check if the ISA bus on the system is under | |
565 | * this PHB. | |
1da177e4 | 566 | */ |
3c2822cc | 567 | isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); |
1da177e4 | 568 | |
3c2822cc OJ |
569 | while (isa_dn && isa_dn != dn) |
570 | isa_dn = isa_dn->parent; | |
571 | ||
498b6514 | 572 | of_node_put(isa_dn_orig); |
1da177e4 | 573 | |
d3c58fb1 | 574 | /* Count number of direct PCI children of the PHB. */ |
3c2822cc | 575 | for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) |
d3c58fb1 | 576 | children++; |
1da177e4 | 577 | |
f7ebf352 | 578 | pr_debug("Children: %d\n", children); |
1da177e4 | 579 | |
3c2822cc OJ |
580 | /* Calculate amount of DMA window per slot. Each window must be |
581 | * a power of two (due to pci_alloc_consistent requirements). | |
582 | * | |
583 | * Keep 256MB aside for PHBs with ISA. | |
584 | */ | |
1da177e4 | 585 | |
3c2822cc OJ |
586 | if (!isa_dn) { |
587 | /* No ISA/IDE - just set window size and return */ | |
588 | pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ | |
589 | ||
590 | while (pci->phb->dma_window_size * children > 0x80000000ul) | |
591 | pci->phb->dma_window_size >>= 1; | |
41febbc8 | 592 | pr_debug("No ISA/IDE, window size is 0x%llx\n", |
f7ebf352 | 593 | pci->phb->dma_window_size); |
3c2822cc OJ |
594 | pci->phb->dma_window_base_cur = 0; |
595 | ||
596 | return; | |
1da177e4 | 597 | } |
3c2822cc OJ |
598 | |
599 | /* If we have ISA, then we probably have an IDE | |
600 | * controller too. Allocate a 128MB table but | |
601 | * skip the first 128MB to avoid stepping on ISA | |
602 | * space. | |
603 | */ | |
604 | pci->phb->dma_window_size = 0x8000000ul; | |
605 | pci->phb->dma_window_base_cur = 0x8000000ul; | |
606 | ||
b348aa65 AK |
607 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); |
608 | tbl = pci->table_group->tables[0]; | |
3c2822cc OJ |
609 | |
610 | iommu_table_setparms(pci->phb, dn, tbl); | |
da004c36 | 611 | tbl->it_ops = &iommu_table_pseries_ops; |
b348aa65 | 612 | iommu_init_table(tbl, pci->phb->node); |
3c2822cc OJ |
613 | |
614 | /* Divide the rest (1.75GB) among the children */ | |
615 | pci->phb->dma_window_size = 0x80000000ul; | |
616 | while (pci->phb->dma_window_size * children > 0x70000000ul) | |
617 | pci->phb->dma_window_size >>= 1; | |
618 | ||
41febbc8 | 619 | pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); |
1da177e4 LT |
620 | } |
621 | ||
b6e1f6ad AK |
622 | #ifdef CONFIG_IOMMU_API |
623 | static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned | |
624 | long *tce, enum dma_data_direction *direction) | |
625 | { | |
626 | long rc; | |
627 | unsigned long ioba = (unsigned long) index << tbl->it_page_shift; | |
628 | unsigned long flags, oldtce = 0; | |
629 | u64 proto_tce = iommu_direction_to_tce_perm(*direction); | |
630 | unsigned long newtce = *tce | proto_tce; | |
631 | ||
632 | spin_lock_irqsave(&tbl->large_pool.lock, flags); | |
633 | ||
634 | rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); | |
635 | if (!rc) | |
636 | rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); | |
637 | ||
638 | if (!rc) { | |
639 | *direction = iommu_tce_direction(oldtce); | |
640 | *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); | |
641 | } | |
642 | ||
643 | spin_unlock_irqrestore(&tbl->large_pool.lock, flags); | |
644 | ||
645 | return rc; | |
646 | } | |
647 | #endif | |
648 | ||
da004c36 AK |
649 | struct iommu_table_ops iommu_table_lpar_multi_ops = { |
650 | .set = tce_buildmulti_pSeriesLP, | |
b6e1f6ad AK |
651 | #ifdef CONFIG_IOMMU_API |
652 | .exchange = tce_exchange_pseries, | |
653 | #endif | |
da004c36 AK |
654 | .clear = tce_freemulti_pSeriesLP, |
655 | .get = tce_get_pSeriesLP | |
656 | }; | |
1da177e4 | 657 | |
12d04eef | 658 | static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) |
1da177e4 LT |
659 | { |
660 | struct iommu_table *tbl; | |
661 | struct device_node *dn, *pdn; | |
1635317f | 662 | struct pci_dn *ppci; |
2083f681 | 663 | const __be32 *dma_window = NULL; |
1da177e4 | 664 | |
1da177e4 LT |
665 | dn = pci_bus_to_OF_node(bus); |
666 | ||
b7c670d6 RH |
667 | pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", |
668 | dn); | |
12d04eef | 669 | |
1da177e4 LT |
670 | /* Find nearest ibm,dma-window, walking up the device tree */ |
671 | for (pdn = dn; pdn != NULL; pdn = pdn->parent) { | |
e2eb6392 | 672 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); |
1da177e4 LT |
673 | if (dma_window != NULL) |
674 | break; | |
675 | } | |
676 | ||
677 | if (dma_window == NULL) { | |
f7ebf352 | 678 | pr_debug(" no ibm,dma-window property !\n"); |
1da177e4 LT |
679 | return; |
680 | } | |
681 | ||
e07102db | 682 | ppci = PCI_DN(pdn); |
12d04eef | 683 | |
b7c670d6 RH |
684 | pr_debug(" parent is %pOF, iommu_table: 0x%p\n", |
685 | pdn, ppci->table_group); | |
12d04eef | 686 | |
b348aa65 AK |
687 | if (!ppci->table_group) { |
688 | ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); | |
689 | tbl = ppci->table_group->tables[0]; | |
b6e1f6ad AK |
690 | iommu_table_setparms_lpar(ppci->phb, pdn, tbl, |
691 | ppci->table_group, dma_window); | |
da004c36 | 692 | tbl->it_ops = &iommu_table_lpar_multi_ops; |
b348aa65 AK |
693 | iommu_init_table(tbl, ppci->phb->node); |
694 | iommu_register_group(ppci->table_group, | |
695 | pci_domain_nr(bus), 0); | |
696 | pr_debug(" created table: %p\n", ppci->table_group); | |
1da177e4 | 697 | } |
1da177e4 LT |
698 | } |
699 | ||
700 | ||
12d04eef | 701 | static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) |
1da177e4 | 702 | { |
12d04eef | 703 | struct device_node *dn; |
3c2822cc | 704 | struct iommu_table *tbl; |
1da177e4 | 705 | |
f7ebf352 | 706 | pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); |
1da177e4 | 707 | |
58f9b0b0 | 708 | dn = dev->dev.of_node; |
1da177e4 | 709 | |
3c2822cc OJ |
710 | /* If we're the direct child of a root bus, then we need to allocate |
711 | * an iommu table ourselves. The bus setup code should have setup | |
712 | * the window sizes already. | |
713 | */ | |
714 | if (!dev->bus->self) { | |
12d04eef BH |
715 | struct pci_controller *phb = PCI_DN(dn)->phb; |
716 | ||
f7ebf352 | 717 | pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); |
b348aa65 AK |
718 | PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); |
719 | tbl = PCI_DN(dn)->table_group->tables[0]; | |
12d04eef | 720 | iommu_table_setparms(phb, dn, tbl); |
da004c36 | 721 | tbl->it_ops = &iommu_table_pseries_ops; |
b348aa65 | 722 | iommu_init_table(tbl, phb->node); |
4617082e | 723 | set_iommu_table_base(&dev->dev, tbl); |
3c2822cc OJ |
724 | return; |
725 | } | |
726 | ||
727 | /* If this device is further down the bus tree, search upwards until | |
728 | * an already allocated iommu table is found and use that. | |
729 | */ | |
730 | ||
b348aa65 | 731 | while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) |
1da177e4 LT |
732 | dn = dn->parent; |
733 | ||
c409c631 | 734 | if (dn && PCI_DN(dn)) |
b348aa65 AK |
735 | set_iommu_table_base(&dev->dev, |
736 | PCI_DN(dn)->table_group->tables[0]); | |
c409c631 | 737 | else |
12d04eef BH |
738 | printk(KERN_WARNING "iommu: Device %s has no iommu table\n", |
739 | pci_name(dev)); | |
1da177e4 LT |
740 | } |
741 | ||
4e8b0cf4 NA |
742 | static int __read_mostly disable_ddw; |
743 | ||
744 | static int __init disable_ddw_setup(char *str) | |
745 | { | |
746 | disable_ddw = 1; | |
747 | printk(KERN_INFO "ppc iommu: disabling ddw.\n"); | |
748 | ||
749 | return 0; | |
750 | } | |
751 | ||
752 | early_param("disable_ddw", disable_ddw_setup); | |
753 | ||
5efbabe0 | 754 | static void remove_ddw(struct device_node *np, bool remove_prop) |
4e8b0cf4 NA |
755 | { |
756 | struct dynamic_dma_window_prop *dwp; | |
757 | struct property *win64; | |
9410e018 | 758 | u32 ddw_avail[3]; |
4e8b0cf4 | 759 | u64 liobn; |
9410e018 AK |
760 | int ret = 0; |
761 | ||
762 | ret = of_property_read_u32_array(np, "ibm,ddw-applicable", | |
763 | &ddw_avail[0], 3); | |
4e8b0cf4 | 764 | |
4e8b0cf4 | 765 | win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); |
2573f684 | 766 | if (!win64) |
4e8b0cf4 NA |
767 | return; |
768 | ||
9410e018 | 769 | if (ret || win64->length < sizeof(*dwp)) |
2573f684 MM |
770 | goto delprop; |
771 | ||
4e8b0cf4 NA |
772 | dwp = win64->value; |
773 | liobn = (u64)be32_to_cpu(dwp->liobn); | |
774 | ||
775 | /* clear the whole window, note the arg is in kernel pages */ | |
776 | ret = tce_clearrange_multi_pSeriesLP(0, | |
777 | 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); | |
778 | if (ret) | |
f2c2cbcc JP |
779 | pr_warn("%pOF failed to clear tces in window.\n", |
780 | np); | |
4e8b0cf4 | 781 | else |
b7c670d6 RH |
782 | pr_debug("%pOF successfully cleared tces in window.\n", |
783 | np); | |
4e8b0cf4 | 784 | |
ae69e1ed NA |
785 | ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); |
786 | if (ret) | |
f2c2cbcc | 787 | pr_warn("%pOF: failed to remove direct window: rtas returned " |
ae69e1ed | 788 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
b7c670d6 | 789 | np, ret, ddw_avail[2], liobn); |
ae69e1ed | 790 | else |
b7c670d6 | 791 | pr_debug("%pOF: successfully removed direct window: rtas returned " |
ae69e1ed | 792 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
b7c670d6 | 793 | np, ret, ddw_avail[2], liobn); |
4e8b0cf4 | 794 | |
2573f684 | 795 | delprop: |
5efbabe0 GS |
796 | if (remove_prop) |
797 | ret = of_remove_property(np, win64); | |
2573f684 | 798 | if (ret) |
f2c2cbcc | 799 | pr_warn("%pOF: failed to remove direct window property: %d\n", |
b7c670d6 | 800 | np, ret); |
2573f684 | 801 | } |
4e8b0cf4 | 802 | |
b73a635f | 803 | static u64 find_existing_ddw(struct device_node *pdn) |
4e8b0cf4 | 804 | { |
4e8b0cf4 NA |
805 | struct direct_window *window; |
806 | const struct dynamic_dma_window_prop *direct64; | |
807 | u64 dma_addr = 0; | |
808 | ||
4e8b0cf4 NA |
809 | spin_lock(&direct_window_list_lock); |
810 | /* check if we already created a window and dupe that config if so */ | |
811 | list_for_each_entry(window, &direct_window_list, list) { | |
812 | if (window->device == pdn) { | |
813 | direct64 = window->prop; | |
df015604 | 814 | dma_addr = be64_to_cpu(direct64->dma_base); |
4e8b0cf4 NA |
815 | break; |
816 | } | |
817 | } | |
818 | spin_unlock(&direct_window_list_lock); | |
819 | ||
820 | return dma_addr; | |
821 | } | |
822 | ||
c8566780 | 823 | static int find_existing_ddw_windows(void) |
4e8b0cf4 | 824 | { |
97e7dc52 | 825 | int len; |
c8566780 | 826 | struct device_node *pdn; |
97e7dc52 | 827 | struct direct_window *window; |
4e8b0cf4 | 828 | const struct dynamic_dma_window_prop *direct64; |
4e8b0cf4 | 829 | |
c8566780 MM |
830 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
831 | return 0; | |
832 | ||
833 | for_each_node_with_property(pdn, DIRECT64_PROPNAME) { | |
97e7dc52 | 834 | direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); |
c8566780 MM |
835 | if (!direct64) |
836 | continue; | |
837 | ||
97e7dc52 NA |
838 | window = kzalloc(sizeof(*window), GFP_KERNEL); |
839 | if (!window || len < sizeof(struct dynamic_dma_window_prop)) { | |
840 | kfree(window); | |
5efbabe0 | 841 | remove_ddw(pdn, true); |
97e7dc52 NA |
842 | continue; |
843 | } | |
c8566780 | 844 | |
97e7dc52 NA |
845 | window->device = pdn; |
846 | window->prop = direct64; | |
847 | spin_lock(&direct_window_list_lock); | |
848 | list_add(&window->list, &direct_window_list); | |
849 | spin_unlock(&direct_window_list_lock); | |
4e8b0cf4 NA |
850 | } |
851 | ||
c8566780 | 852 | return 0; |
4e8b0cf4 | 853 | } |
c8566780 | 854 | machine_arch_initcall(pseries, find_existing_ddw_windows); |
4e8b0cf4 | 855 | |
b73a635f | 856 | static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
4e8b0cf4 NA |
857 | struct ddw_query_response *query) |
858 | { | |
8445a87f GP |
859 | struct device_node *dn; |
860 | struct pci_dn *pdn; | |
4e8b0cf4 NA |
861 | u32 cfg_addr; |
862 | u64 buid; | |
863 | int ret; | |
864 | ||
865 | /* | |
866 | * Get the config address and phb buid of the PE window. | |
867 | * Rely on eeh to retrieve this for us. | |
868 | * Retrieve them from the pci device, not the node with the | |
869 | * dma-window property | |
870 | */ | |
8445a87f GP |
871 | dn = pci_device_to_OF_node(dev); |
872 | pdn = PCI_DN(dn); | |
873 | buid = pdn->phb->buid; | |
8a934efe | 874 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
39baadbf | 875 | |
b73a635f | 876 | ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, |
4e8b0cf4 NA |
877 | cfg_addr, BUID_HI(buid), BUID_LO(buid)); |
878 | dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" | |
b73a635f | 879 | " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid), |
4e8b0cf4 NA |
880 | BUID_LO(buid), ret); |
881 | return ret; | |
882 | } | |
883 | ||
b73a635f | 884 | static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
4e8b0cf4 NA |
885 | struct ddw_create_response *create, int page_shift, |
886 | int window_shift) | |
887 | { | |
8445a87f GP |
888 | struct device_node *dn; |
889 | struct pci_dn *pdn; | |
4e8b0cf4 NA |
890 | u32 cfg_addr; |
891 | u64 buid; | |
892 | int ret; | |
893 | ||
894 | /* | |
895 | * Get the config address and phb buid of the PE window. | |
896 | * Rely on eeh to retrieve this for us. | |
897 | * Retrieve them from the pci device, not the node with the | |
898 | * dma-window property | |
899 | */ | |
8445a87f GP |
900 | dn = pci_device_to_OF_node(dev); |
901 | pdn = PCI_DN(dn); | |
902 | buid = pdn->phb->buid; | |
8a934efe | 903 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
4e8b0cf4 NA |
904 | |
905 | do { | |
906 | /* extra outputs are LIOBN and dma-addr (hi, lo) */ | |
9410e018 AK |
907 | ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, |
908 | cfg_addr, BUID_HI(buid), BUID_LO(buid), | |
909 | page_shift, window_shift); | |
4e8b0cf4 NA |
910 | } while (rtas_busy_delay(ret)); |
911 | dev_info(&dev->dev, | |
912 | "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " | |
b73a635f | 913 | "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1], |
4e8b0cf4 NA |
914 | cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, |
915 | window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); | |
916 | ||
917 | return ret; | |
918 | } | |
919 | ||
61435690 NA |
920 | struct failed_ddw_pdn { |
921 | struct device_node *pdn; | |
922 | struct list_head list; | |
923 | }; | |
924 | ||
925 | static LIST_HEAD(failed_ddw_pdn_list); | |
926 | ||
68c0449e AK |
927 | static phys_addr_t ddw_memory_hotplug_max(void) |
928 | { | |
929 | phys_addr_t max_addr = memory_hotplug_max(); | |
930 | struct device_node *memory; | |
931 | ||
932 | for_each_node_by_type(memory, "memory") { | |
933 | unsigned long start, size; | |
c05f57fd | 934 | int n_mem_addr_cells, n_mem_size_cells, len; |
68c0449e AK |
935 | const __be32 *memcell_buf; |
936 | ||
937 | memcell_buf = of_get_property(memory, "reg", &len); | |
938 | if (!memcell_buf || len <= 0) | |
939 | continue; | |
940 | ||
941 | n_mem_addr_cells = of_n_addr_cells(memory); | |
942 | n_mem_size_cells = of_n_size_cells(memory); | |
943 | ||
68c0449e AK |
944 | start = of_read_number(memcell_buf, n_mem_addr_cells); |
945 | memcell_buf += n_mem_addr_cells; | |
946 | size = of_read_number(memcell_buf, n_mem_size_cells); | |
947 | memcell_buf += n_mem_size_cells; | |
948 | ||
949 | max_addr = max_t(phys_addr_t, max_addr, start + size); | |
950 | } | |
951 | ||
952 | return max_addr; | |
953 | } | |
954 | ||
4e8b0cf4 NA |
955 | /* |
956 | * If the PE supports dynamic dma windows, and there is space for a table | |
957 | * that can map all pages in a linear offset, then setup such a table, | |
958 | * and record the dma-offset in the struct device. | |
959 | * | |
960 | * dev: the pci device we are checking | |
961 | * pdn: the parent pe node with the ibm,dma_window property | |
962 | * Future: also check if we can remap the base window for our base page size | |
963 | * | |
9ae2fdde | 964 | * returns the dma offset for use by the direct mapped DMA code. |
4e8b0cf4 NA |
965 | */ |
966 | static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) | |
967 | { | |
968 | int len, ret; | |
969 | struct ddw_query_response query; | |
970 | struct ddw_create_response create; | |
971 | int page_shift; | |
972 | u64 dma_addr, max_addr; | |
973 | struct device_node *dn; | |
9410e018 | 974 | u32 ddw_avail[3]; |
4e8b0cf4 | 975 | struct direct_window *window; |
76730334 | 976 | struct property *win64; |
4e8b0cf4 | 977 | struct dynamic_dma_window_prop *ddwprop; |
61435690 | 978 | struct failed_ddw_pdn *fpdn; |
4e8b0cf4 NA |
979 | |
980 | mutex_lock(&direct_window_init_mutex); | |
981 | ||
b73a635f | 982 | dma_addr = find_existing_ddw(pdn); |
4e8b0cf4 NA |
983 | if (dma_addr != 0) |
984 | goto out_unlock; | |
985 | ||
61435690 NA |
986 | /* |
987 | * If we already went through this for a previous function of | |
988 | * the same device and failed, we don't want to muck with the | |
989 | * DMA window again, as it will race with in-flight operations | |
990 | * and can lead to EEHs. The above mutex protects access to the | |
991 | * list. | |
992 | */ | |
993 | list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { | |
b7c670d6 | 994 | if (fpdn->pdn == pdn) |
61435690 NA |
995 | goto out_unlock; |
996 | } | |
997 | ||
4e8b0cf4 NA |
998 | /* |
999 | * the ibm,ddw-applicable property holds the tokens for: | |
1000 | * ibm,query-pe-dma-window | |
1001 | * ibm,create-pe-dma-window | |
1002 | * ibm,remove-pe-dma-window | |
1003 | * for the given node in that order. | |
1004 | * the property is actually in the parent, not the PE | |
1005 | */ | |
9410e018 AK |
1006 | ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", |
1007 | &ddw_avail[0], 3); | |
1008 | if (ret) | |
ae69e1ed | 1009 | goto out_failed; |
25ebc45b | 1010 | |
ae69e1ed | 1011 | /* |
4e8b0cf4 NA |
1012 | * Query if there is a second window of size to map the |
1013 | * whole partition. Query returns number of windows, largest | |
1014 | * block assigned to PE (partition endpoint), and two bitmasks | |
1015 | * of page sizes: supported and supported for migrate-dma. | |
1016 | */ | |
1017 | dn = pci_device_to_OF_node(dev); | |
b73a635f | 1018 | ret = query_ddw(dev, ddw_avail, &query); |
4e8b0cf4 | 1019 | if (ret != 0) |
ae69e1ed | 1020 | goto out_failed; |
4e8b0cf4 NA |
1021 | |
1022 | if (query.windows_available == 0) { | |
1023 | /* | |
1024 | * no additional windows are available for this device. | |
1025 | * We might be able to reallocate the existing window, | |
1026 | * trading in for a larger page size. | |
1027 | */ | |
1028 | dev_dbg(&dev->dev, "no free dynamic windows"); | |
ae69e1ed | 1029 | goto out_failed; |
4e8b0cf4 | 1030 | } |
9410e018 | 1031 | if (query.page_size & 4) { |
4e8b0cf4 | 1032 | page_shift = 24; /* 16MB */ |
9410e018 | 1033 | } else if (query.page_size & 2) { |
4e8b0cf4 | 1034 | page_shift = 16; /* 64kB */ |
9410e018 | 1035 | } else if (query.page_size & 1) { |
4e8b0cf4 NA |
1036 | page_shift = 12; /* 4kB */ |
1037 | } else { | |
1038 | dev_dbg(&dev->dev, "no supported direct page size in mask %x", | |
1039 | query.page_size); | |
ae69e1ed | 1040 | goto out_failed; |
4e8b0cf4 NA |
1041 | } |
1042 | /* verify the window * number of ptes will map the partition */ | |
1043 | /* check largest block * page size > max memory hotplug addr */ | |
68c0449e | 1044 | max_addr = ddw_memory_hotplug_max(); |
9410e018 | 1045 | if (query.largest_available_block < (max_addr >> page_shift)) { |
8ab102d6 | 1046 | dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u " |
4e8b0cf4 NA |
1047 | "%llu-sized pages\n", max_addr, query.largest_available_block, |
1048 | 1ULL << page_shift); | |
ae69e1ed | 1049 | goto out_failed; |
4e8b0cf4 NA |
1050 | } |
1051 | len = order_base_2(max_addr); | |
1052 | win64 = kzalloc(sizeof(struct property), GFP_KERNEL); | |
1053 | if (!win64) { | |
1054 | dev_info(&dev->dev, | |
1055 | "couldn't allocate property for 64bit dma window\n"); | |
ae69e1ed | 1056 | goto out_failed; |
4e8b0cf4 NA |
1057 | } |
1058 | win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); | |
1059 | win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); | |
76730334 | 1060 | win64->length = sizeof(*ddwprop); |
4e8b0cf4 NA |
1061 | if (!win64->name || !win64->value) { |
1062 | dev_info(&dev->dev, | |
1063 | "couldn't allocate property name and value\n"); | |
1064 | goto out_free_prop; | |
1065 | } | |
1066 | ||
b73a635f | 1067 | ret = create_ddw(dev, ddw_avail, &create, page_shift, len); |
4e8b0cf4 NA |
1068 | if (ret != 0) |
1069 | goto out_free_prop; | |
1070 | ||
9410e018 AK |
1071 | ddwprop->liobn = cpu_to_be32(create.liobn); |
1072 | ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | | |
1073 | create.addr_lo); | |
4e8b0cf4 NA |
1074 | ddwprop->tce_shift = cpu_to_be32(page_shift); |
1075 | ddwprop->window_shift = cpu_to_be32(len); | |
1076 | ||
b7c670d6 RH |
1077 | dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", |
1078 | create.liobn, dn); | |
4e8b0cf4 NA |
1079 | |
1080 | window = kzalloc(sizeof(*window), GFP_KERNEL); | |
1081 | if (!window) | |
1082 | goto out_clear_window; | |
1083 | ||
1084 | ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, | |
1085 | win64->value, tce_setrange_multi_pSeriesLP_walk); | |
1086 | if (ret) { | |
b7c670d6 RH |
1087 | dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n", |
1088 | dn, ret); | |
7a19081f | 1089 | goto out_free_window; |
4e8b0cf4 NA |
1090 | } |
1091 | ||
79d1c712 | 1092 | ret = of_add_property(pdn, win64); |
4e8b0cf4 | 1093 | if (ret) { |
b7c670d6 RH |
1094 | dev_err(&dev->dev, "unable to add dma window property for %pOF: %d", |
1095 | pdn, ret); | |
7a19081f | 1096 | goto out_free_window; |
4e8b0cf4 NA |
1097 | } |
1098 | ||
1099 | window->device = pdn; | |
1100 | window->prop = ddwprop; | |
1101 | spin_lock(&direct_window_list_lock); | |
1102 | list_add(&window->list, &direct_window_list); | |
1103 | spin_unlock(&direct_window_list_lock); | |
1104 | ||
9410e018 | 1105 | dma_addr = be64_to_cpu(ddwprop->dma_base); |
4e8b0cf4 NA |
1106 | goto out_unlock; |
1107 | ||
7a19081f JL |
1108 | out_free_window: |
1109 | kfree(window); | |
1110 | ||
4e8b0cf4 | 1111 | out_clear_window: |
5efbabe0 | 1112 | remove_ddw(pdn, true); |
4e8b0cf4 NA |
1113 | |
1114 | out_free_prop: | |
1115 | kfree(win64->name); | |
1116 | kfree(win64->value); | |
1117 | kfree(win64); | |
1118 | ||
ae69e1ed | 1119 | out_failed: |
25ebc45b | 1120 | |
61435690 NA |
1121 | fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); |
1122 | if (!fpdn) | |
1123 | goto out_unlock; | |
1124 | fpdn->pdn = pdn; | |
1125 | list_add(&fpdn->list, &failed_ddw_pdn_list); | |
1126 | ||
4e8b0cf4 NA |
1127 | out_unlock: |
1128 | mutex_unlock(&direct_window_init_mutex); | |
1129 | return dma_addr; | |
1130 | } | |
1131 | ||
12d04eef | 1132 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
1da177e4 LT |
1133 | { |
1134 | struct device_node *pdn, *dn; | |
1135 | struct iommu_table *tbl; | |
2083f681 | 1136 | const __be32 *dma_window = NULL; |
1635317f | 1137 | struct pci_dn *pci; |
1da177e4 | 1138 | |
f7ebf352 | 1139 | pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); |
12d04eef | 1140 | |
1da177e4 | 1141 | /* dev setup for LPAR is a little tricky, since the device tree might |
25985edc | 1142 | * contain the dma-window properties per-device and not necessarily |
1da177e4 LT |
1143 | * for the bus. So we need to search upwards in the tree until we |
1144 | * either hit a dma-window property, OR find a parent with a table | |
1145 | * already allocated. | |
1146 | */ | |
1147 | dn = pci_device_to_OF_node(dev); | |
b7c670d6 | 1148 | pr_debug(" node is %pOF\n", dn); |
5d2efba6 | 1149 | |
b348aa65 | 1150 | for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; |
1635317f | 1151 | pdn = pdn->parent) { |
e2eb6392 | 1152 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); |
1da177e4 LT |
1153 | if (dma_window) |
1154 | break; | |
1155 | } | |
1156 | ||
650f7b3b LV |
1157 | if (!pdn || !PCI_DN(pdn)) { |
1158 | printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " | |
b7c670d6 RH |
1159 | "no DMA window found for pci dev=%s dn=%pOF\n", |
1160 | pci_name(dev), dn); | |
650f7b3b LV |
1161 | return; |
1162 | } | |
b7c670d6 | 1163 | pr_debug(" parent is %pOF\n", pdn); |
12d04eef | 1164 | |
e07102db | 1165 | pci = PCI_DN(pdn); |
b348aa65 AK |
1166 | if (!pci->table_group) { |
1167 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); | |
1168 | tbl = pci->table_group->tables[0]; | |
b6e1f6ad AK |
1169 | iommu_table_setparms_lpar(pci->phb, pdn, tbl, |
1170 | pci->table_group, dma_window); | |
da004c36 | 1171 | tbl->it_ops = &iommu_table_lpar_multi_ops; |
b348aa65 AK |
1172 | iommu_init_table(tbl, pci->phb->node); |
1173 | iommu_register_group(pci->table_group, | |
1174 | pci_domain_nr(pci->phb->bus), 0); | |
1175 | pr_debug(" created table: %p\n", pci->table_group); | |
de113217 | 1176 | } else { |
b348aa65 | 1177 | pr_debug(" found DMA window, table: %p\n", pci->table_group); |
1da177e4 LT |
1178 | } |
1179 | ||
b348aa65 | 1180 | set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); |
c4e9d3c1 | 1181 | iommu_add_device(pci->table_group, &dev->dev); |
1da177e4 | 1182 | } |
4e8b0cf4 | 1183 | |
9ae2fdde | 1184 | static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) |
4e8b0cf4 | 1185 | { |
9ae2fdde | 1186 | struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; |
2083f681 | 1187 | const __be32 *dma_window = NULL; |
4e8b0cf4 | 1188 | |
4e8b0cf4 | 1189 | /* only attempt to use a new window if 64-bit DMA is requested */ |
9ae2fdde CH |
1190 | if (dma_mask < DMA_BIT_MASK(64)) |
1191 | return false; | |
4e8b0cf4 | 1192 | |
9ae2fdde | 1193 | dev_dbg(&pdev->dev, "node is %pOF\n", dn); |
4e8b0cf4 | 1194 | |
9ae2fdde CH |
1195 | /* |
1196 | * the device tree might contain the dma-window properties | |
1197 | * per-device and not necessarily for the bus. So we need to | |
1198 | * search upwards in the tree until we either hit a dma-window | |
1199 | * property, OR find a parent with a table already allocated. | |
1200 | */ | |
1201 | for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; | |
1202 | pdn = pdn->parent) { | |
1203 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); | |
1204 | if (dma_window) | |
1205 | break; | |
4e8b0cf4 NA |
1206 | } |
1207 | ||
9ae2fdde | 1208 | if (pdn && PCI_DN(pdn)) { |
0617fc0c CH |
1209 | pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn); |
1210 | if (pdev->dev.archdata.dma_offset) | |
9ae2fdde | 1211 | return true; |
6a5c7be5 MM |
1212 | } |
1213 | ||
9ae2fdde | 1214 | return false; |
6a5c7be5 MM |
1215 | } |
1216 | ||
4e8b0cf4 NA |
1217 | static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, |
1218 | void *data) | |
1219 | { | |
1220 | struct direct_window *window; | |
1221 | struct memory_notify *arg = data; | |
1222 | int ret = 0; | |
1223 | ||
1224 | switch (action) { | |
1225 | case MEM_GOING_ONLINE: | |
1226 | spin_lock(&direct_window_list_lock); | |
1227 | list_for_each_entry(window, &direct_window_list, list) { | |
1228 | ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, | |
1229 | arg->nr_pages, window->prop); | |
1230 | /* XXX log error */ | |
1231 | } | |
1232 | spin_unlock(&direct_window_list_lock); | |
1233 | break; | |
1234 | case MEM_CANCEL_ONLINE: | |
1235 | case MEM_OFFLINE: | |
1236 | spin_lock(&direct_window_list_lock); | |
1237 | list_for_each_entry(window, &direct_window_list, list) { | |
1238 | ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, | |
1239 | arg->nr_pages, window->prop); | |
1240 | /* XXX log error */ | |
1241 | } | |
1242 | spin_unlock(&direct_window_list_lock); | |
1243 | break; | |
1244 | default: | |
1245 | break; | |
1246 | } | |
1247 | if (ret && action != MEM_CANCEL_ONLINE) | |
1248 | return NOTIFY_BAD; | |
1249 | ||
1250 | return NOTIFY_OK; | |
1251 | } | |
1252 | ||
1253 | static struct notifier_block iommu_mem_nb = { | |
1254 | .notifier_call = iommu_mem_notifier, | |
1255 | }; | |
1256 | ||
f5242e5a | 1257 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) |
bed59275 SR |
1258 | { |
1259 | int err = NOTIFY_OK; | |
f5242e5a GL |
1260 | struct of_reconfig_data *rd = data; |
1261 | struct device_node *np = rd->dn; | |
bed59275 | 1262 | struct pci_dn *pci = PCI_DN(np); |
4e8b0cf4 | 1263 | struct direct_window *window; |
bed59275 SR |
1264 | |
1265 | switch (action) { | |
1cf3d8b3 | 1266 | case OF_RECONFIG_DETACH_NODE: |
5efbabe0 GS |
1267 | /* |
1268 | * Removing the property will invoke the reconfig | |
1269 | * notifier again, which causes dead-lock on the | |
1270 | * read-write semaphore of the notifier chain. So | |
1271 | * we have to remove the property when releasing | |
1272 | * the device node. | |
1273 | */ | |
1274 | remove_ddw(np, false); | |
b348aa65 AK |
1275 | if (pci && pci->table_group) |
1276 | iommu_pseries_free_group(pci->table_group, | |
ac9a5889 | 1277 | np->full_name); |
4e8b0cf4 NA |
1278 | |
1279 | spin_lock(&direct_window_list_lock); | |
1280 | list_for_each_entry(window, &direct_window_list, list) { | |
1281 | if (window->device == np) { | |
1282 | list_del(&window->list); | |
1283 | kfree(window); | |
1284 | break; | |
1285 | } | |
1286 | } | |
1287 | spin_unlock(&direct_window_list_lock); | |
bed59275 SR |
1288 | break; |
1289 | default: | |
1290 | err = NOTIFY_DONE; | |
1291 | break; | |
1292 | } | |
1293 | return err; | |
1294 | } | |
1295 | ||
1296 | static struct notifier_block iommu_reconfig_nb = { | |
1297 | .notifier_call = iommu_reconfig_notifier, | |
1298 | }; | |
1da177e4 | 1299 | |
1da177e4 LT |
1300 | /* These are called very early. */ |
1301 | void iommu_init_early_pSeries(void) | |
1302 | { | |
a8daac8a | 1303 | if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) |
1da177e4 | 1304 | return; |
1da177e4 | 1305 | |
57cfb814 | 1306 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
38ae9ec4 DA |
1307 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
1308 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; | |
9ae2fdde CH |
1309 | if (!disable_ddw) |
1310 | pseries_pci_controller_ops.iommu_bypass_supported = | |
1311 | iommu_bypass_supported_pSeriesLP; | |
1da177e4 | 1312 | } else { |
38ae9ec4 DA |
1313 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; |
1314 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; | |
1da177e4 LT |
1315 | } |
1316 | ||
1317 | ||
1cf3d8b3 | 1318 | of_reconfig_notifier_register(&iommu_reconfig_nb); |
4e8b0cf4 | 1319 | register_memory_notifier(&iommu_mem_nb); |
1da177e4 | 1320 | |
98747770 | 1321 | set_pci_dma_ops(&dma_iommu_ops); |
1da177e4 LT |
1322 | } |
1323 | ||
4e89a2d8 WS |
1324 | static int __init disable_multitce(char *str) |
1325 | { | |
1326 | if (strcmp(str, "off") == 0 && | |
1327 | firmware_has_feature(FW_FEATURE_LPAR) && | |
1328 | firmware_has_feature(FW_FEATURE_MULTITCE)) { | |
1329 | printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); | |
4e89a2d8 WS |
1330 | powerpc_firmware_features &= ~FW_FEATURE_MULTITCE; |
1331 | } | |
1332 | return 1; | |
1333 | } | |
1334 | ||
1335 | __setup("multitce=", disable_multitce); | |
4ad04e59 | 1336 | |
c4e9d3c1 AK |
1337 | static int tce_iommu_bus_notifier(struct notifier_block *nb, |
1338 | unsigned long action, void *data) | |
1339 | { | |
1340 | struct device *dev = data; | |
1341 | ||
1342 | switch (action) { | |
1343 | case BUS_NOTIFY_DEL_DEVICE: | |
1344 | iommu_del_device(dev); | |
1345 | return 0; | |
1346 | default: | |
1347 | return 0; | |
1348 | } | |
1349 | } | |
1350 | ||
1351 | static struct notifier_block tce_iommu_bus_nb = { | |
1352 | .notifier_call = tce_iommu_bus_notifier, | |
1353 | }; | |
1354 | ||
1355 | static int __init tce_iommu_bus_notifier_init(void) | |
1356 | { | |
1357 | bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); | |
1358 | return 0; | |
1359 | } | |
4ad04e59 | 1360 | machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); |