]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * arch/ppc64/kernel/iommu.c | |
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation | |
4 | * | |
5 | * Rewrite, cleanup, new allocation schemes, virtual merging: | |
6 | * Copyright (C) 2004 Olof Johansson, IBM Corporation | |
7 | * and Ben. Herrenschmidt, IBM Corporation | |
8 | * | |
9 | * Dynamic DMA mapping support, bus-independent parts. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
24 | */ | |
25 | ||
26 | ||
27 | #include <linux/config.h> | |
28 | #include <linux/init.h> | |
29 | #include <linux/types.h> | |
30 | #include <linux/slab.h> | |
31 | #include <linux/mm.h> | |
32 | #include <linux/spinlock.h> | |
33 | #include <linux/string.h> | |
34 | #include <linux/dma-mapping.h> | |
35 | #include <linux/init.h> | |
36 | #include <linux/bitops.h> | |
37 | #include <asm/io.h> | |
38 | #include <asm/prom.h> | |
39 | #include <asm/iommu.h> | |
40 | #include <asm/pci-bridge.h> | |
41 | #include <asm/machdep.h> | |
42 | ||
43 | #define DBG(...) | |
44 | ||
45 | #ifdef CONFIG_IOMMU_VMERGE | |
46 | static int novmerge = 0; | |
47 | #else | |
48 | static int novmerge = 1; | |
49 | #endif | |
50 | ||
51 | static int __init setup_iommu(char *str) | |
52 | { | |
53 | if (!strcmp(str, "novmerge")) | |
54 | novmerge = 1; | |
55 | else if (!strcmp(str, "vmerge")) | |
56 | novmerge = 0; | |
57 | return 1; | |
58 | } | |
59 | ||
60 | __setup("iommu=", setup_iommu); | |
61 | ||
62 | static unsigned long iommu_range_alloc(struct iommu_table *tbl, | |
63 | unsigned long npages, | |
64 | unsigned long *handle, | |
65 | unsigned int align_order) | |
66 | { | |
67 | unsigned long n, end, i, start; | |
68 | unsigned long limit; | |
69 | int largealloc = npages > 15; | |
70 | int pass = 0; | |
71 | unsigned long align_mask; | |
72 | ||
73 | align_mask = 0xffffffffffffffffl >> (64 - align_order); | |
74 | ||
75 | /* This allocator was derived from x86_64's bit string search */ | |
76 | ||
77 | /* Sanity check */ | |
78 | if (unlikely(npages) == 0) { | |
79 | if (printk_ratelimit()) | |
80 | WARN_ON(1); | |
81 | return DMA_ERROR_CODE; | |
82 | } | |
83 | ||
84 | if (handle && *handle) | |
85 | start = *handle; | |
86 | else | |
87 | start = largealloc ? tbl->it_largehint : tbl->it_hint; | |
88 | ||
89 | /* Use only half of the table for small allocs (15 pages or less) */ | |
90 | limit = largealloc ? tbl->it_size : tbl->it_halfpoint; | |
91 | ||
92 | if (largealloc && start < tbl->it_halfpoint) | |
93 | start = tbl->it_halfpoint; | |
94 | ||
95 | /* The case below can happen if we have a small segment appended | |
96 | * to a large, or when the previous alloc was at the very end of | |
97 | * the available space. If so, go back to the initial start. | |
98 | */ | |
99 | if (start >= limit) | |
100 | start = largealloc ? tbl->it_largehint : tbl->it_hint; | |
101 | ||
102 | again: | |
103 | ||
104 | n = find_next_zero_bit(tbl->it_map, limit, start); | |
105 | ||
106 | /* Align allocation */ | |
107 | n = (n + align_mask) & ~align_mask; | |
108 | ||
109 | end = n + npages; | |
110 | ||
111 | if (unlikely(end >= limit)) { | |
112 | if (likely(pass < 2)) { | |
113 | /* First failure, just rescan the half of the table. | |
114 | * Second failure, rescan the other half of the table. | |
115 | */ | |
116 | start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; | |
117 | limit = pass ? tbl->it_size : limit; | |
118 | pass++; | |
119 | goto again; | |
120 | } else { | |
121 | /* Third failure, give up */ | |
122 | return DMA_ERROR_CODE; | |
123 | } | |
124 | } | |
125 | ||
126 | for (i = n; i < end; i++) | |
127 | if (test_bit(i, tbl->it_map)) { | |
128 | start = i+1; | |
129 | goto again; | |
130 | } | |
131 | ||
132 | for (i = n; i < end; i++) | |
133 | __set_bit(i, tbl->it_map); | |
134 | ||
135 | /* Bump the hint to a new block for small allocs. */ | |
136 | if (largealloc) { | |
137 | /* Don't bump to new block to avoid fragmentation */ | |
138 | tbl->it_largehint = end; | |
139 | } else { | |
140 | /* Overflow will be taken care of at the next allocation */ | |
141 | tbl->it_hint = (end + tbl->it_blocksize - 1) & | |
142 | ~(tbl->it_blocksize - 1); | |
143 | } | |
144 | ||
145 | /* Update handle for SG allocations */ | |
146 | if (handle) | |
147 | *handle = end; | |
148 | ||
149 | return n; | |
150 | } | |
151 | ||
152 | static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page, | |
153 | unsigned int npages, enum dma_data_direction direction, | |
154 | unsigned int align_order) | |
155 | { | |
156 | unsigned long entry, flags; | |
157 | dma_addr_t ret = DMA_ERROR_CODE; | |
158 | ||
159 | spin_lock_irqsave(&(tbl->it_lock), flags); | |
160 | ||
161 | entry = iommu_range_alloc(tbl, npages, NULL, align_order); | |
162 | ||
163 | if (unlikely(entry == DMA_ERROR_CODE)) { | |
164 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | |
165 | return DMA_ERROR_CODE; | |
166 | } | |
167 | ||
168 | entry += tbl->it_offset; /* Offset into real TCE table */ | |
169 | ret = entry << PAGE_SHIFT; /* Set the return dma address */ | |
170 | ||
171 | /* Put the TCEs in the HW table */ | |
172 | ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK, | |
173 | direction); | |
174 | ||
175 | ||
176 | /* Flush/invalidate TLB caches if necessary */ | |
177 | if (ppc_md.tce_flush) | |
178 | ppc_md.tce_flush(tbl); | |
179 | ||
180 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | |
181 | ||
182 | /* Make sure updates are seen by hardware */ | |
183 | mb(); | |
184 | ||
185 | return ret; | |
186 | } | |
187 | ||
188 | static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |
189 | unsigned int npages) | |
190 | { | |
191 | unsigned long entry, free_entry; | |
192 | unsigned long i; | |
193 | ||
194 | entry = dma_addr >> PAGE_SHIFT; | |
195 | free_entry = entry - tbl->it_offset; | |
196 | ||
197 | if (((free_entry + npages) > tbl->it_size) || | |
198 | (entry < tbl->it_offset)) { | |
199 | if (printk_ratelimit()) { | |
200 | printk(KERN_INFO "iommu_free: invalid entry\n"); | |
201 | printk(KERN_INFO "\tentry = 0x%lx\n", entry); | |
202 | printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr); | |
203 | printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl); | |
204 | printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno); | |
205 | printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size); | |
206 | printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset); | |
207 | printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index); | |
208 | WARN_ON(1); | |
209 | } | |
210 | return; | |
211 | } | |
212 | ||
213 | ppc_md.tce_free(tbl, entry, npages); | |
214 | ||
215 | for (i = 0; i < npages; i++) | |
216 | __clear_bit(free_entry+i, tbl->it_map); | |
217 | } | |
218 | ||
219 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |
220 | unsigned int npages) | |
221 | { | |
222 | unsigned long flags; | |
223 | ||
224 | spin_lock_irqsave(&(tbl->it_lock), flags); | |
225 | ||
226 | __iommu_free(tbl, dma_addr, npages); | |
227 | ||
228 | /* Make sure TLB cache is flushed if the HW needs it. We do | |
229 | * not do an mb() here on purpose, it is not needed on any of | |
230 | * the current platforms. | |
231 | */ | |
232 | if (ppc_md.tce_flush) | |
233 | ppc_md.tce_flush(tbl); | |
234 | ||
235 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | |
236 | } | |
237 | ||
238 | int iommu_map_sg(struct device *dev, struct iommu_table *tbl, | |
239 | struct scatterlist *sglist, int nelems, | |
240 | enum dma_data_direction direction) | |
241 | { | |
242 | dma_addr_t dma_next = 0, dma_addr; | |
243 | unsigned long flags; | |
244 | struct scatterlist *s, *outs, *segstart; | |
ac9af7cb | 245 | int outcount, incount; |
1da177e4 LT |
246 | unsigned long handle; |
247 | ||
248 | BUG_ON(direction == DMA_NONE); | |
249 | ||
250 | if ((nelems == 0) || !tbl) | |
251 | return 0; | |
252 | ||
253 | outs = s = segstart = &sglist[0]; | |
254 | outcount = 1; | |
ac9af7cb | 255 | incount = nelems; |
1da177e4 LT |
256 | handle = 0; |
257 | ||
258 | /* Init first segment length for backout at failure */ | |
259 | outs->dma_length = 0; | |
260 | ||
261 | DBG("mapping %d elements:\n", nelems); | |
262 | ||
263 | spin_lock_irqsave(&(tbl->it_lock), flags); | |
264 | ||
265 | for (s = outs; nelems; nelems--, s++) { | |
266 | unsigned long vaddr, npages, entry, slen; | |
267 | ||
268 | slen = s->length; | |
269 | /* Sanity check */ | |
270 | if (slen == 0) { | |
271 | dma_next = 0; | |
272 | continue; | |
273 | } | |
274 | /* Allocate iommu entries for that segment */ | |
275 | vaddr = (unsigned long)page_address(s->page) + s->offset; | |
276 | npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK); | |
277 | npages >>= PAGE_SHIFT; | |
278 | entry = iommu_range_alloc(tbl, npages, &handle, 0); | |
279 | ||
280 | DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); | |
281 | ||
282 | /* Handle failure */ | |
283 | if (unlikely(entry == DMA_ERROR_CODE)) { | |
284 | if (printk_ratelimit()) | |
285 | printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx" | |
286 | " npages %lx\n", tbl, vaddr, npages); | |
287 | goto failure; | |
288 | } | |
289 | ||
290 | /* Convert entry to a dma_addr_t */ | |
291 | entry += tbl->it_offset; | |
292 | dma_addr = entry << PAGE_SHIFT; | |
293 | dma_addr |= s->offset; | |
294 | ||
295 | DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n", | |
296 | npages, entry, dma_addr); | |
297 | ||
298 | /* Insert into HW table */ | |
299 | ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction); | |
300 | ||
301 | /* If we are in an open segment, try merging */ | |
302 | if (segstart != s) { | |
303 | DBG(" - trying merge...\n"); | |
304 | /* We cannot merge if: | |
305 | * - allocated dma_addr isn't contiguous to previous allocation | |
306 | */ | |
307 | if (novmerge || (dma_addr != dma_next)) { | |
308 | /* Can't merge: create a new segment */ | |
309 | segstart = s; | |
310 | outcount++; outs++; | |
311 | DBG(" can't merge, new segment.\n"); | |
312 | } else { | |
313 | outs->dma_length += s->length; | |
314 | DBG(" merged, new len: %lx\n", outs->dma_length); | |
315 | } | |
316 | } | |
317 | ||
318 | if (segstart == s) { | |
319 | /* This is a new segment, fill entries */ | |
320 | DBG(" - filling new segment.\n"); | |
321 | outs->dma_address = dma_addr; | |
322 | outs->dma_length = slen; | |
323 | } | |
324 | ||
325 | /* Calculate next page pointer for contiguous check */ | |
326 | dma_next = dma_addr + slen; | |
327 | ||
328 | DBG(" - dma next is: %lx\n", dma_next); | |
329 | } | |
330 | ||
331 | /* Flush/invalidate TLB caches if necessary */ | |
332 | if (ppc_md.tce_flush) | |
333 | ppc_md.tce_flush(tbl); | |
334 | ||
335 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | |
336 | ||
337 | /* Make sure updates are seen by hardware */ | |
338 | mb(); | |
339 | ||
340 | DBG("mapped %d elements:\n", outcount); | |
341 | ||
ac9af7cb | 342 | /* For the sake of iommu_unmap_sg, we clear out the length in the |
1da177e4 LT |
343 | * next entry of the sglist if we didn't fill the list completely |
344 | */ | |
ac9af7cb | 345 | if (outcount < incount) { |
1da177e4 LT |
346 | outs++; |
347 | outs->dma_address = DMA_ERROR_CODE; | |
348 | outs->dma_length = 0; | |
349 | } | |
350 | return outcount; | |
351 | ||
352 | failure: | |
353 | for (s = &sglist[0]; s <= outs; s++) { | |
354 | if (s->dma_length != 0) { | |
355 | unsigned long vaddr, npages; | |
356 | ||
357 | vaddr = s->dma_address & PAGE_MASK; | |
358 | npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr) | |
359 | >> PAGE_SHIFT; | |
360 | __iommu_free(tbl, vaddr, npages); | |
361 | } | |
362 | } | |
363 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | |
364 | return 0; | |
365 | } | |
366 | ||
367 | ||
368 | void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, | |
369 | int nelems, enum dma_data_direction direction) | |
370 | { | |
371 | unsigned long flags; | |
372 | ||
373 | BUG_ON(direction == DMA_NONE); | |
374 | ||
375 | if (!tbl) | |
376 | return; | |
377 | ||
378 | spin_lock_irqsave(&(tbl->it_lock), flags); | |
379 | ||
380 | while (nelems--) { | |
381 | unsigned int npages; | |
382 | dma_addr_t dma_handle = sglist->dma_address; | |
383 | ||
384 | if (sglist->dma_length == 0) | |
385 | break; | |
386 | npages = (PAGE_ALIGN(dma_handle + sglist->dma_length) | |
387 | - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT; | |
388 | __iommu_free(tbl, dma_handle, npages); | |
389 | sglist++; | |
390 | } | |
391 | ||
392 | /* Flush/invalidate TLBs if necessary. As for iommu_free(), we | |
393 | * do not do an mb() here, the affected platforms do not need it | |
394 | * when freeing. | |
395 | */ | |
396 | if (ppc_md.tce_flush) | |
397 | ppc_md.tce_flush(tbl); | |
398 | ||
399 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | |
400 | } | |
401 | ||
402 | /* | |
403 | * Build a iommu_table structure. This contains a bit map which | |
404 | * is used to manage allocation of the tce space. | |
405 | */ | |
406 | struct iommu_table *iommu_init_table(struct iommu_table *tbl) | |
407 | { | |
408 | unsigned long sz; | |
409 | static int welcomed = 0; | |
410 | ||
411 | /* Set aside 1/4 of the table for large allocations. */ | |
412 | tbl->it_halfpoint = tbl->it_size * 3 / 4; | |
413 | ||
414 | /* number of bytes needed for the bitmap */ | |
415 | sz = (tbl->it_size + 7) >> 3; | |
416 | ||
417 | tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz)); | |
418 | if (!tbl->it_map) | |
419 | panic("iommu_init_table: Can't allocate %ld bytes\n", sz); | |
420 | ||
421 | memset(tbl->it_map, 0, sz); | |
422 | ||
423 | tbl->it_hint = 0; | |
424 | tbl->it_largehint = tbl->it_halfpoint; | |
425 | spin_lock_init(&tbl->it_lock); | |
426 | ||
d3588ba9 JR |
427 | /* Clear the hardware table in case firmware left allocations in it */ |
428 | ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); | |
429 | ||
1da177e4 LT |
430 | if (!welcomed) { |
431 | printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", | |
432 | novmerge ? "disabled" : "enabled"); | |
433 | welcomed = 1; | |
434 | } | |
435 | ||
436 | return tbl; | |
437 | } | |
438 | ||
439 | void iommu_free_table(struct device_node *dn) | |
440 | { | |
1635317f PM |
441 | struct pci_dn *pdn = dn->data; |
442 | struct iommu_table *tbl = pdn->iommu_table; | |
1da177e4 LT |
443 | unsigned long bitmap_sz, i; |
444 | unsigned int order; | |
445 | ||
446 | if (!tbl || !tbl->it_map) { | |
447 | printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__, | |
448 | dn->full_name); | |
449 | return; | |
450 | } | |
451 | ||
452 | /* verify that table contains no entries */ | |
453 | /* it_size is in entries, and we're examining 64 at a time */ | |
454 | for (i = 0; i < (tbl->it_size/64); i++) { | |
455 | if (tbl->it_map[i] != 0) { | |
456 | printk(KERN_WARNING "%s: Unexpected TCEs for %s\n", | |
457 | __FUNCTION__, dn->full_name); | |
458 | break; | |
459 | } | |
460 | } | |
461 | ||
462 | /* calculate bitmap size in bytes */ | |
463 | bitmap_sz = (tbl->it_size + 7) / 8; | |
464 | ||
465 | /* free bitmap */ | |
466 | order = get_order(bitmap_sz); | |
467 | free_pages((unsigned long) tbl->it_map, order); | |
468 | ||
469 | /* free table */ | |
470 | kfree(tbl); | |
471 | } | |
472 | ||
473 | /* Creates TCEs for a user provided buffer. The user buffer must be | |
474 | * contiguous real kernel storage (not vmalloc). The address of the buffer | |
475 | * passed here is the kernel (virtual) address of the buffer. The buffer | |
476 | * need not be page aligned, the dma_addr_t returned will point to the same | |
477 | * byte within the page as vaddr. | |
478 | */ | |
479 | dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, | |
480 | size_t size, enum dma_data_direction direction) | |
481 | { | |
482 | dma_addr_t dma_handle = DMA_ERROR_CODE; | |
483 | unsigned long uaddr; | |
484 | unsigned int npages; | |
485 | ||
486 | BUG_ON(direction == DMA_NONE); | |
487 | ||
488 | uaddr = (unsigned long)vaddr; | |
489 | npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK); | |
490 | npages >>= PAGE_SHIFT; | |
491 | ||
492 | if (tbl) { | |
493 | dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0); | |
494 | if (dma_handle == DMA_ERROR_CODE) { | |
495 | if (printk_ratelimit()) { | |
496 | printk(KERN_INFO "iommu_alloc failed, " | |
497 | "tbl %p vaddr %p npages %d\n", | |
498 | tbl, vaddr, npages); | |
499 | } | |
500 | } else | |
501 | dma_handle |= (uaddr & ~PAGE_MASK); | |
502 | } | |
503 | ||
504 | return dma_handle; | |
505 | } | |
506 | ||
507 | void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, | |
508 | size_t size, enum dma_data_direction direction) | |
509 | { | |
510 | BUG_ON(direction == DMA_NONE); | |
511 | ||
512 | if (tbl) | |
513 | iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) - | |
514 | (dma_handle & PAGE_MASK)) >> PAGE_SHIFT); | |
515 | } | |
516 | ||
517 | /* Allocates a contiguous real buffer and creates mappings over it. | |
518 | * Returns the virtual address of the buffer and sets dma_handle | |
519 | * to the dma address (mapping) of the first page. | |
520 | */ | |
521 | void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, | |
dd0fc66f | 522 | dma_addr_t *dma_handle, gfp_t flag) |
1da177e4 LT |
523 | { |
524 | void *ret = NULL; | |
525 | dma_addr_t mapping; | |
526 | unsigned int npages, order; | |
527 | ||
528 | size = PAGE_ALIGN(size); | |
529 | npages = size >> PAGE_SHIFT; | |
530 | order = get_order(size); | |
531 | ||
532 | /* | |
533 | * Client asked for way too much space. This is checked later | |
534 | * anyway. It is easier to debug here for the drivers than in | |
535 | * the tce tables. | |
536 | */ | |
537 | if (order >= IOMAP_MAX_ORDER) { | |
538 | printk("iommu_alloc_consistent size too large: 0x%lx\n", size); | |
539 | return NULL; | |
540 | } | |
541 | ||
542 | if (!tbl) | |
543 | return NULL; | |
544 | ||
545 | /* Alloc enough pages (and possibly more) */ | |
546 | ret = (void *)__get_free_pages(flag, order); | |
547 | if (!ret) | |
548 | return NULL; | |
549 | memset(ret, 0, size); | |
550 | ||
551 | /* Set up tces to cover the allocated range */ | |
552 | mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order); | |
553 | if (mapping == DMA_ERROR_CODE) { | |
554 | free_pages((unsigned long)ret, order); | |
555 | ret = NULL; | |
556 | } else | |
557 | *dma_handle = mapping; | |
558 | return ret; | |
559 | } | |
560 | ||
561 | void iommu_free_coherent(struct iommu_table *tbl, size_t size, | |
562 | void *vaddr, dma_addr_t dma_handle) | |
563 | { | |
564 | unsigned int npages; | |
565 | ||
566 | if (tbl) { | |
567 | size = PAGE_ALIGN(size); | |
568 | npages = size >> PAGE_SHIFT; | |
569 | iommu_free(tbl, dma_handle, npages); | |
570 | free_pages((unsigned long)vaddr, get_order(size)); | |
571 | } | |
572 | } |