2 * IOMMU implementation for Broadband Processor Architecture
3 * We just establish a linear mapping at boot by setting all the
4 * IOPT cache entries in the CPU.
5 * The mapping functions should be identical to pci_direct_iommu,
6 * except for the handling of the high order bit that is required
7 * by the Spider bridge. These should be split into a separate
8 * file at the point where we get a different bridge chip.
10 * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH,
11 * Arnd Bergmann <arndb@de.ibm.com>
13 * Based on linear mapping
14 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/delay.h>
27 #include <linux/string.h>
28 #include <linux/init.h>
29 #include <linux/bootmem.h>
31 #include <linux/dma-mapping.h>
33 #include <asm/sections.h>
34 #include <asm/iommu.h>
37 #include <asm/pci-bridge.h>
38 #include <asm/machdep.h>
39 #include <asm/pmac_feature.h>
40 #include <asm/abs_addr.h>
41 #include <asm/system.h>
44 #include "bpa_iommu.h"
46 static inline unsigned long
47 get_iopt_entry(unsigned long real_address
, unsigned long ioid
,
50 return (prot
& IOPT_PROT_MASK
)
53 | (real_address
& IOPT_RPN_MASK
)
54 | (ioid
& IOPT_IOID_MASK
);
62 mk_ioste(unsigned long val
)
64 ioste ioste
= { .val
= val
, };
69 get_iost_entry(unsigned long iopt_base
, unsigned long io_address
, unsigned page_size
)
79 nnpt
= 0; /* one page per segment */
80 shift
= 5; /* segment has 16 iopt entries */
85 nnpt
= 0; /* one page per segment */
86 shift
= 1; /* segment has 256 iopt entries */
91 nnpt
= 0x07; /* 8 pages per io page table */
92 shift
= 0; /* all entries are used */
97 nnpt
= 0x7f; /* 128 pages per io page table */
98 shift
= 0; /* all entries are used */
101 default: /* not a known compile time constant */
103 /* BUILD_BUG_ON() is not usable here */
104 extern void __get_iost_entry_bad_page_size(void);
105 __get_iost_entry_bad_page_size();
111 /* need 8 bytes per iopte */
112 (((io_address
/ page_size
* 8)
113 /* align io page tables on 4k page boundaries */
115 /* nnpt+1 pages go into each iopt */
118 nnpt
++; /* this seems to work, but the documentation is not clear
119 about wether we put nnpt or nnpt-1 into the ioste bits.
120 In theory, this can't work for 4k pages. */
121 return mk_ioste(IOST_VALID_MASK
122 | (iostep
& IOST_PT_BASE_MASK
)
123 | ((nnpt
<< 5) & IOST_NNPT_MASK
)
124 | (ps
& IOST_PS_MASK
));
127 /* compute the address of an io pte */
128 static inline unsigned long
129 get_ioptep(ioste iost_entry
, unsigned long io_address
)
131 unsigned long iopt_base
;
132 unsigned long page_size
;
133 unsigned long page_number
;
134 unsigned long iopt_offset
;
136 iopt_base
= iost_entry
.val
& IOST_PT_BASE_MASK
;
137 page_size
= iost_entry
.val
& IOST_PS_MASK
;
139 /* decode page size to compute page number */
140 page_number
= (io_address
& 0x0fffffff) >> (10 + 2 * page_size
);
141 /* page number is an offset into the io page table */
142 iopt_offset
= (page_number
<< 3) & 0x7fff8ul
;
143 return iopt_base
+ iopt_offset
;
146 /* compute the tag field of the iopt cache entry */
147 static inline unsigned long
148 get_ioc_tag(ioste iost_entry
, unsigned long io_address
)
150 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
152 return IOPT_VALID_MASK
153 | ((iopte
& 0x00000000000000ff8ul
) >> 3)
154 | ((iopte
& 0x0000003fffffc0000ul
) >> 9);
157 /* compute the hashed 6 bit index for the 4-way associative pte cache */
158 static inline unsigned long
159 get_ioc_hash(ioste iost_entry
, unsigned long io_address
)
161 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
163 return ((iopte
& 0x000000000000001f8ul
) >> 3)
164 ^ ((iopte
& 0x00000000000020000ul
) >> 17)
165 ^ ((iopte
& 0x00000000000010000ul
) >> 15)
166 ^ ((iopte
& 0x00000000000008000ul
) >> 13)
167 ^ ((iopte
& 0x00000000000004000ul
) >> 11)
168 ^ ((iopte
& 0x00000000000002000ul
) >> 9)
169 ^ ((iopte
& 0x00000000000001000ul
) >> 7);
172 /* same as above, but pretend that we have a simpler 1-way associative
173 pte cache with an 8 bit index */
174 static inline unsigned long
175 get_ioc_hash_1way(ioste iost_entry
, unsigned long io_address
)
177 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
179 return ((iopte
& 0x000000000000001f8ul
) >> 3)
180 ^ ((iopte
& 0x00000000000020000ul
) >> 17)
181 ^ ((iopte
& 0x00000000000010000ul
) >> 15)
182 ^ ((iopte
& 0x00000000000008000ul
) >> 13)
183 ^ ((iopte
& 0x00000000000004000ul
) >> 11)
184 ^ ((iopte
& 0x00000000000002000ul
) >> 9)
185 ^ ((iopte
& 0x00000000000001000ul
) >> 7)
186 ^ ((iopte
& 0x0000000000000c000ul
) >> 8);
190 get_iost_cache(void __iomem
*base
, unsigned long index
)
192 unsigned long __iomem
*p
= (base
+ IOC_ST_CACHE_DIR
);
193 return mk_ioste(in_be64(&p
[index
]));
197 set_iost_cache(void __iomem
*base
, unsigned long index
, ioste ste
)
199 unsigned long __iomem
*p
= (base
+ IOC_ST_CACHE_DIR
);
200 pr_debug("ioste %02lx was %016lx, store %016lx", index
,
201 get_iost_cache(base
, index
).val
, ste
.val
);
202 out_be64(&p
[index
], ste
.val
);
203 pr_debug(" now %016lx\n", get_iost_cache(base
, index
).val
);
206 static inline unsigned long
207 get_iopt_cache(void __iomem
*base
, unsigned long index
, unsigned long *tag
)
209 unsigned long __iomem
*tags
= (void *)(base
+ IOC_PT_CACHE_DIR
);
210 unsigned long __iomem
*p
= (void *)(base
+ IOC_PT_CACHE_REG
);
218 set_iopt_cache(void __iomem
*base
, unsigned long index
,
219 unsigned long tag
, unsigned long val
)
221 unsigned long __iomem
*tags
= base
+ IOC_PT_CACHE_DIR
;
222 unsigned long __iomem
*p
= base
+ IOC_PT_CACHE_REG
;
223 pr_debug("iopt %02lx was v%016lx/t%016lx, store v%016lx/t%016lx\n",
224 index
, get_iopt_cache(base
, index
, &oldtag
), oldtag
, val
, tag
);
227 out_be64(&tags
[index
], tag
);
231 set_iost_origin(void __iomem
*base
)
233 unsigned long __iomem
*p
= base
+ IOC_ST_ORIGIN
;
234 unsigned long origin
= IOSTO_ENABLE
| IOSTO_SW
;
236 pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p
), origin
);
241 set_iocmd_config(void __iomem
*base
)
243 unsigned long __iomem
*p
= base
+ 0xc00;
247 pr_debug("iost_conf %016lx, now %016lx\n", conf
, conf
| IOCMD_CONF_TE
);
248 out_be64(p
, conf
| IOCMD_CONF_TE
);
251 /* FIXME: get these from the device tree */
252 #define ioc_base 0x20000511000ull
253 #define ioc_mmio_base 0x20000510000ull
255 #define iopt_phys_offset (- 0x20000000) /* We have a 512MB offset from the SB */
256 #define io_page_size 0x1000000
258 static unsigned long map_iopt_entry(unsigned long address
)
260 switch (address
>> 20) {
262 address
= 0x24020000000ull
; /* spider i/o */
265 address
+= iopt_phys_offset
;
269 return get_iopt_entry(address
, ioid
, IOPT_PROT_RW
);
272 static void iommu_bus_setup_null(struct pci_bus
*b
) { }
273 static void iommu_dev_setup_null(struct pci_dev
*d
) { }
275 /* initialize the iommu to support a simple linear mapping
276 * for each DMA window used by any device. For now, we
277 * happen to know that there is only one DMA window in use,
278 * starting at iopt_phys_offset. */
279 static void bpa_map_iommu(void)
281 unsigned long address
;
286 base
= __ioremap(ioc_base
, 0x1000, _PAGE_NO_CACHE
);
287 pr_debug("%lx mapped to %p\n", ioc_base
, base
);
288 set_iocmd_config(base
);
291 base
= __ioremap(ioc_mmio_base
, 0x1000, _PAGE_NO_CACHE
);
292 pr_debug("%lx mapped to %p\n", ioc_mmio_base
, base
);
294 set_iost_origin(base
);
296 for (address
= 0; address
< 0x100000000ul
; address
+= io_page_size
) {
297 ioste
= get_iost_entry(0x10000000000ul
, address
, io_page_size
);
298 if ((address
& 0xfffffff) == 0) /* segment start */
299 set_iost_cache(base
, address
>> 28, ioste
);
300 index
= get_ioc_hash_1way(ioste
, address
);
301 pr_debug("addr %08lx, index %02lx, ioste %016lx\n",
302 address
, index
, ioste
.val
);
304 get_ioc_hash_1way(ioste
, address
),
305 get_ioc_tag(ioste
, address
),
306 map_iopt_entry(address
));
312 static void *bpa_alloc_coherent(struct device
*hwdev
, size_t size
,
313 dma_addr_t
*dma_handle
, gfp_t flag
)
317 ret
= (void *)__get_free_pages(flag
, get_order(size
));
319 memset(ret
, 0, size
);
320 *dma_handle
= virt_to_abs(ret
) | BPA_DMA_VALID
;
325 static void bpa_free_coherent(struct device
*hwdev
, size_t size
,
326 void *vaddr
, dma_addr_t dma_handle
)
328 free_pages((unsigned long)vaddr
, get_order(size
));
331 static dma_addr_t
bpa_map_single(struct device
*hwdev
, void *ptr
,
332 size_t size
, enum dma_data_direction direction
)
334 return virt_to_abs(ptr
) | BPA_DMA_VALID
;
337 static void bpa_unmap_single(struct device
*hwdev
, dma_addr_t dma_addr
,
338 size_t size
, enum dma_data_direction direction
)
342 static int bpa_map_sg(struct device
*hwdev
, struct scatterlist
*sg
,
343 int nents
, enum dma_data_direction direction
)
347 for (i
= 0; i
< nents
; i
++, sg
++) {
348 sg
->dma_address
= (page_to_phys(sg
->page
) + sg
->offset
)
350 sg
->dma_length
= sg
->length
;
356 static void bpa_unmap_sg(struct device
*hwdev
, struct scatterlist
*sg
,
357 int nents
, enum dma_data_direction direction
)
361 static int bpa_dma_supported(struct device
*dev
, u64 mask
)
363 return mask
< 0x100000000ull
;
366 void bpa_init_iommu(void)
370 /* Direct I/O, IOMMU off */
371 ppc_md
.iommu_dev_setup
= iommu_dev_setup_null
;
372 ppc_md
.iommu_bus_setup
= iommu_bus_setup_null
;
374 pci_dma_ops
.alloc_coherent
= bpa_alloc_coherent
;
375 pci_dma_ops
.free_coherent
= bpa_free_coherent
;
376 pci_dma_ops
.map_single
= bpa_map_single
;
377 pci_dma_ops
.unmap_single
= bpa_unmap_single
;
378 pci_dma_ops
.map_sg
= bpa_map_sg
;
379 pci_dma_ops
.unmap_sg
= bpa_unmap_sg
;
380 pci_dma_ops
.dma_supported
= bpa_dma_supported
;