]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/pci/intel-iommu.c
Merge branch 'bkl-removal' of git://git.lwn.net/linux-2.6
[mirror_ubuntu-bionic-kernel.git] / drivers / pci / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 */
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/slab.h>
27 #include <linux/irq.h>
28 #include <linux/interrupt.h>
29 #include <linux/sysdev.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
41 #include "pci.h"
42
43 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
44 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
45
46 #define IOAPIC_RANGE_START (0xfee00000)
47 #define IOAPIC_RANGE_END (0xfeefffff)
48 #define IOVA_START_ADDR (0x1000)
49
50 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51
52 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
53
54
55 static void flush_unmaps_timeout(unsigned long data);
56
57 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
58
59 #define HIGH_WATER_MARK 250
60 struct deferred_flush_tables {
61 int next;
62 struct iova *iova[HIGH_WATER_MARK];
63 struct dmar_domain *domain[HIGH_WATER_MARK];
64 };
65
66 static struct deferred_flush_tables *deferred_flush;
67
68 /* bitmap for indexing intel_iommus */
69 static int g_num_of_iommus;
70
71 static DEFINE_SPINLOCK(async_umap_flush_lock);
72 static LIST_HEAD(unmaps_to_do);
73
74 static int timer_on;
75 static long list_size;
76
77 static void domain_remove_dev_info(struct dmar_domain *domain);
78
79 int dmar_disabled;
80 static int __initdata dmar_map_gfx = 1;
81 static int dmar_forcedac;
82 static int intel_iommu_strict;
83
84 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
85 static DEFINE_SPINLOCK(device_domain_lock);
86 static LIST_HEAD(device_domain_list);
87
88 static int __init intel_iommu_setup(char *str)
89 {
90 if (!str)
91 return -EINVAL;
92 while (*str) {
93 if (!strncmp(str, "off", 3)) {
94 dmar_disabled = 1;
95 printk(KERN_INFO"Intel-IOMMU: disabled\n");
96 } else if (!strncmp(str, "igfx_off", 8)) {
97 dmar_map_gfx = 0;
98 printk(KERN_INFO
99 "Intel-IOMMU: disable GFX device mapping\n");
100 } else if (!strncmp(str, "forcedac", 8)) {
101 printk(KERN_INFO
102 "Intel-IOMMU: Forcing DAC for PCI devices\n");
103 dmar_forcedac = 1;
104 } else if (!strncmp(str, "strict", 6)) {
105 printk(KERN_INFO
106 "Intel-IOMMU: disable batched IOTLB flush\n");
107 intel_iommu_strict = 1;
108 }
109
110 str += strcspn(str, ",");
111 while (*str == ',')
112 str++;
113 }
114 return 0;
115 }
116 __setup("intel_iommu=", intel_iommu_setup);
117
118 static struct kmem_cache *iommu_domain_cache;
119 static struct kmem_cache *iommu_devinfo_cache;
120 static struct kmem_cache *iommu_iova_cache;
121
122 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
123 {
124 unsigned int flags;
125 void *vaddr;
126
127 /* trying to avoid low memory issues */
128 flags = current->flags & PF_MEMALLOC;
129 current->flags |= PF_MEMALLOC;
130 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
131 current->flags &= (~PF_MEMALLOC | flags);
132 return vaddr;
133 }
134
135
136 static inline void *alloc_pgtable_page(void)
137 {
138 unsigned int flags;
139 void *vaddr;
140
141 /* trying to avoid low memory issues */
142 flags = current->flags & PF_MEMALLOC;
143 current->flags |= PF_MEMALLOC;
144 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
145 current->flags &= (~PF_MEMALLOC | flags);
146 return vaddr;
147 }
148
149 static inline void free_pgtable_page(void *vaddr)
150 {
151 free_page((unsigned long)vaddr);
152 }
153
154 static inline void *alloc_domain_mem(void)
155 {
156 return iommu_kmem_cache_alloc(iommu_domain_cache);
157 }
158
159 static void free_domain_mem(void *vaddr)
160 {
161 kmem_cache_free(iommu_domain_cache, vaddr);
162 }
163
164 static inline void * alloc_devinfo_mem(void)
165 {
166 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
167 }
168
169 static inline void free_devinfo_mem(void *vaddr)
170 {
171 kmem_cache_free(iommu_devinfo_cache, vaddr);
172 }
173
174 struct iova *alloc_iova_mem(void)
175 {
176 return iommu_kmem_cache_alloc(iommu_iova_cache);
177 }
178
179 void free_iova_mem(struct iova *iova)
180 {
181 kmem_cache_free(iommu_iova_cache, iova);
182 }
183
184 /* Gets context entry for a given bus and devfn */
185 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
186 u8 bus, u8 devfn)
187 {
188 struct root_entry *root;
189 struct context_entry *context;
190 unsigned long phy_addr;
191 unsigned long flags;
192
193 spin_lock_irqsave(&iommu->lock, flags);
194 root = &iommu->root_entry[bus];
195 context = get_context_addr_from_root(root);
196 if (!context) {
197 context = (struct context_entry *)alloc_pgtable_page();
198 if (!context) {
199 spin_unlock_irqrestore(&iommu->lock, flags);
200 return NULL;
201 }
202 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
203 phy_addr = virt_to_phys((void *)context);
204 set_root_value(root, phy_addr);
205 set_root_present(root);
206 __iommu_flush_cache(iommu, root, sizeof(*root));
207 }
208 spin_unlock_irqrestore(&iommu->lock, flags);
209 return &context[devfn];
210 }
211
212 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
213 {
214 struct root_entry *root;
215 struct context_entry *context;
216 int ret;
217 unsigned long flags;
218
219 spin_lock_irqsave(&iommu->lock, flags);
220 root = &iommu->root_entry[bus];
221 context = get_context_addr_from_root(root);
222 if (!context) {
223 ret = 0;
224 goto out;
225 }
226 ret = context_present(context[devfn]);
227 out:
228 spin_unlock_irqrestore(&iommu->lock, flags);
229 return ret;
230 }
231
232 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
233 {
234 struct root_entry *root;
235 struct context_entry *context;
236 unsigned long flags;
237
238 spin_lock_irqsave(&iommu->lock, flags);
239 root = &iommu->root_entry[bus];
240 context = get_context_addr_from_root(root);
241 if (context) {
242 context_clear_entry(context[devfn]);
243 __iommu_flush_cache(iommu, &context[devfn], \
244 sizeof(*context));
245 }
246 spin_unlock_irqrestore(&iommu->lock, flags);
247 }
248
249 static void free_context_table(struct intel_iommu *iommu)
250 {
251 struct root_entry *root;
252 int i;
253 unsigned long flags;
254 struct context_entry *context;
255
256 spin_lock_irqsave(&iommu->lock, flags);
257 if (!iommu->root_entry) {
258 goto out;
259 }
260 for (i = 0; i < ROOT_ENTRY_NR; i++) {
261 root = &iommu->root_entry[i];
262 context = get_context_addr_from_root(root);
263 if (context)
264 free_pgtable_page(context);
265 }
266 free_pgtable_page(iommu->root_entry);
267 iommu->root_entry = NULL;
268 out:
269 spin_unlock_irqrestore(&iommu->lock, flags);
270 }
271
272 /* page table handling */
273 #define LEVEL_STRIDE (9)
274 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
275
276 static inline int agaw_to_level(int agaw)
277 {
278 return agaw + 2;
279 }
280
281 static inline int agaw_to_width(int agaw)
282 {
283 return 30 + agaw * LEVEL_STRIDE;
284
285 }
286
287 static inline int width_to_agaw(int width)
288 {
289 return (width - 30) / LEVEL_STRIDE;
290 }
291
292 static inline unsigned int level_to_offset_bits(int level)
293 {
294 return (12 + (level - 1) * LEVEL_STRIDE);
295 }
296
297 static inline int address_level_offset(u64 addr, int level)
298 {
299 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
300 }
301
302 static inline u64 level_mask(int level)
303 {
304 return ((u64)-1 << level_to_offset_bits(level));
305 }
306
307 static inline u64 level_size(int level)
308 {
309 return ((u64)1 << level_to_offset_bits(level));
310 }
311
312 static inline u64 align_to_level(u64 addr, int level)
313 {
314 return ((addr + level_size(level) - 1) & level_mask(level));
315 }
316
317 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
318 {
319 int addr_width = agaw_to_width(domain->agaw);
320 struct dma_pte *parent, *pte = NULL;
321 int level = agaw_to_level(domain->agaw);
322 int offset;
323 unsigned long flags;
324
325 BUG_ON(!domain->pgd);
326
327 addr &= (((u64)1) << addr_width) - 1;
328 parent = domain->pgd;
329
330 spin_lock_irqsave(&domain->mapping_lock, flags);
331 while (level > 0) {
332 void *tmp_page;
333
334 offset = address_level_offset(addr, level);
335 pte = &parent[offset];
336 if (level == 1)
337 break;
338
339 if (!dma_pte_present(*pte)) {
340 tmp_page = alloc_pgtable_page();
341
342 if (!tmp_page) {
343 spin_unlock_irqrestore(&domain->mapping_lock,
344 flags);
345 return NULL;
346 }
347 __iommu_flush_cache(domain->iommu, tmp_page,
348 PAGE_SIZE_4K);
349 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
350 /*
351 * high level table always sets r/w, last level page
352 * table control read/write
353 */
354 dma_set_pte_readable(*pte);
355 dma_set_pte_writable(*pte);
356 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
357 }
358 parent = phys_to_virt(dma_pte_addr(*pte));
359 level--;
360 }
361
362 spin_unlock_irqrestore(&domain->mapping_lock, flags);
363 return pte;
364 }
365
366 /* return address's pte at specific level */
367 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
368 int level)
369 {
370 struct dma_pte *parent, *pte = NULL;
371 int total = agaw_to_level(domain->agaw);
372 int offset;
373
374 parent = domain->pgd;
375 while (level <= total) {
376 offset = address_level_offset(addr, total);
377 pte = &parent[offset];
378 if (level == total)
379 return pte;
380
381 if (!dma_pte_present(*pte))
382 break;
383 parent = phys_to_virt(dma_pte_addr(*pte));
384 total--;
385 }
386 return NULL;
387 }
388
389 /* clear one page's page table */
390 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
391 {
392 struct dma_pte *pte = NULL;
393
394 /* get last level pte */
395 pte = dma_addr_level_pte(domain, addr, 1);
396
397 if (pte) {
398 dma_clear_pte(*pte);
399 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
400 }
401 }
402
403 /* clear last level pte, a tlb flush should be followed */
404 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
405 {
406 int addr_width = agaw_to_width(domain->agaw);
407
408 start &= (((u64)1) << addr_width) - 1;
409 end &= (((u64)1) << addr_width) - 1;
410 /* in case it's partial page */
411 start = PAGE_ALIGN_4K(start);
412 end &= PAGE_MASK_4K;
413
414 /* we don't need lock here, nobody else touches the iova range */
415 while (start < end) {
416 dma_pte_clear_one(domain, start);
417 start += PAGE_SIZE_4K;
418 }
419 }
420
421 /* free page table pages. last level pte should already be cleared */
422 static void dma_pte_free_pagetable(struct dmar_domain *domain,
423 u64 start, u64 end)
424 {
425 int addr_width = agaw_to_width(domain->agaw);
426 struct dma_pte *pte;
427 int total = agaw_to_level(domain->agaw);
428 int level;
429 u64 tmp;
430
431 start &= (((u64)1) << addr_width) - 1;
432 end &= (((u64)1) << addr_width) - 1;
433
434 /* we don't need lock here, nobody else touches the iova range */
435 level = 2;
436 while (level <= total) {
437 tmp = align_to_level(start, level);
438 if (tmp >= end || (tmp + level_size(level) > end))
439 return;
440
441 while (tmp < end) {
442 pte = dma_addr_level_pte(domain, tmp, level);
443 if (pte) {
444 free_pgtable_page(
445 phys_to_virt(dma_pte_addr(*pte)));
446 dma_clear_pte(*pte);
447 __iommu_flush_cache(domain->iommu,
448 pte, sizeof(*pte));
449 }
450 tmp += level_size(level);
451 }
452 level++;
453 }
454 /* free pgd */
455 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
456 free_pgtable_page(domain->pgd);
457 domain->pgd = NULL;
458 }
459 }
460
461 /* iommu handling */
462 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
463 {
464 struct root_entry *root;
465 unsigned long flags;
466
467 root = (struct root_entry *)alloc_pgtable_page();
468 if (!root)
469 return -ENOMEM;
470
471 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
472
473 spin_lock_irqsave(&iommu->lock, flags);
474 iommu->root_entry = root;
475 spin_unlock_irqrestore(&iommu->lock, flags);
476
477 return 0;
478 }
479
480 static void iommu_set_root_entry(struct intel_iommu *iommu)
481 {
482 void *addr;
483 u32 cmd, sts;
484 unsigned long flag;
485
486 addr = iommu->root_entry;
487
488 spin_lock_irqsave(&iommu->register_lock, flag);
489 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
490
491 cmd = iommu->gcmd | DMA_GCMD_SRTP;
492 writel(cmd, iommu->reg + DMAR_GCMD_REG);
493
494 /* Make sure hardware complete it */
495 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
496 readl, (sts & DMA_GSTS_RTPS), sts);
497
498 spin_unlock_irqrestore(&iommu->register_lock, flag);
499 }
500
501 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
502 {
503 u32 val;
504 unsigned long flag;
505
506 if (!cap_rwbf(iommu->cap))
507 return;
508 val = iommu->gcmd | DMA_GCMD_WBF;
509
510 spin_lock_irqsave(&iommu->register_lock, flag);
511 writel(val, iommu->reg + DMAR_GCMD_REG);
512
513 /* Make sure hardware complete it */
514 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
515 readl, (!(val & DMA_GSTS_WBFS)), val);
516
517 spin_unlock_irqrestore(&iommu->register_lock, flag);
518 }
519
520 /* return value determine if we need a write buffer flush */
521 static int __iommu_flush_context(struct intel_iommu *iommu,
522 u16 did, u16 source_id, u8 function_mask, u64 type,
523 int non_present_entry_flush)
524 {
525 u64 val = 0;
526 unsigned long flag;
527
528 /*
529 * In the non-present entry flush case, if hardware doesn't cache
530 * non-present entry we do nothing and if hardware cache non-present
531 * entry, we flush entries of domain 0 (the domain id is used to cache
532 * any non-present entries)
533 */
534 if (non_present_entry_flush) {
535 if (!cap_caching_mode(iommu->cap))
536 return 1;
537 else
538 did = 0;
539 }
540
541 switch (type) {
542 case DMA_CCMD_GLOBAL_INVL:
543 val = DMA_CCMD_GLOBAL_INVL;
544 break;
545 case DMA_CCMD_DOMAIN_INVL:
546 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
547 break;
548 case DMA_CCMD_DEVICE_INVL:
549 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
550 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
551 break;
552 default:
553 BUG();
554 }
555 val |= DMA_CCMD_ICC;
556
557 spin_lock_irqsave(&iommu->register_lock, flag);
558 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
559
560 /* Make sure hardware complete it */
561 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
562 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
563
564 spin_unlock_irqrestore(&iommu->register_lock, flag);
565
566 /* flush context entry will implicitly flush write buffer */
567 return 0;
568 }
569
570 static int inline iommu_flush_context_global(struct intel_iommu *iommu,
571 int non_present_entry_flush)
572 {
573 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
574 non_present_entry_flush);
575 }
576
577 static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
578 int non_present_entry_flush)
579 {
580 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
581 non_present_entry_flush);
582 }
583
584 static int inline iommu_flush_context_device(struct intel_iommu *iommu,
585 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
586 {
587 return __iommu_flush_context(iommu, did, source_id, function_mask,
588 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
589 }
590
591 /* return value determine if we need a write buffer flush */
592 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
593 u64 addr, unsigned int size_order, u64 type,
594 int non_present_entry_flush)
595 {
596 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
597 u64 val = 0, val_iva = 0;
598 unsigned long flag;
599
600 /*
601 * In the non-present entry flush case, if hardware doesn't cache
602 * non-present entry we do nothing and if hardware cache non-present
603 * entry, we flush entries of domain 0 (the domain id is used to cache
604 * any non-present entries)
605 */
606 if (non_present_entry_flush) {
607 if (!cap_caching_mode(iommu->cap))
608 return 1;
609 else
610 did = 0;
611 }
612
613 switch (type) {
614 case DMA_TLB_GLOBAL_FLUSH:
615 /* global flush doesn't need set IVA_REG */
616 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
617 break;
618 case DMA_TLB_DSI_FLUSH:
619 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
620 break;
621 case DMA_TLB_PSI_FLUSH:
622 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
623 /* Note: always flush non-leaf currently */
624 val_iva = size_order | addr;
625 break;
626 default:
627 BUG();
628 }
629 /* Note: set drain read/write */
630 #if 0
631 /*
632 * This is probably to be super secure.. Looks like we can
633 * ignore it without any impact.
634 */
635 if (cap_read_drain(iommu->cap))
636 val |= DMA_TLB_READ_DRAIN;
637 #endif
638 if (cap_write_drain(iommu->cap))
639 val |= DMA_TLB_WRITE_DRAIN;
640
641 spin_lock_irqsave(&iommu->register_lock, flag);
642 /* Note: Only uses first TLB reg currently */
643 if (val_iva)
644 dmar_writeq(iommu->reg + tlb_offset, val_iva);
645 dmar_writeq(iommu->reg + tlb_offset + 8, val);
646
647 /* Make sure hardware complete it */
648 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
649 dmar_readq, (!(val & DMA_TLB_IVT)), val);
650
651 spin_unlock_irqrestore(&iommu->register_lock, flag);
652
653 /* check IOTLB invalidation granularity */
654 if (DMA_TLB_IAIG(val) == 0)
655 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
656 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
657 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
658 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
659 /* flush iotlb entry will implicitly flush write buffer */
660 return 0;
661 }
662
663 static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
664 int non_present_entry_flush)
665 {
666 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
667 non_present_entry_flush);
668 }
669
670 static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
671 int non_present_entry_flush)
672 {
673 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
674 non_present_entry_flush);
675 }
676
677 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
678 u64 addr, unsigned int pages, int non_present_entry_flush)
679 {
680 unsigned int mask;
681
682 BUG_ON(addr & (~PAGE_MASK_4K));
683 BUG_ON(pages == 0);
684
685 /* Fallback to domain selective flush if no PSI support */
686 if (!cap_pgsel_inv(iommu->cap))
687 return iommu_flush_iotlb_dsi(iommu, did,
688 non_present_entry_flush);
689
690 /*
691 * PSI requires page size to be 2 ^ x, and the base address is naturally
692 * aligned to the size
693 */
694 mask = ilog2(__roundup_pow_of_two(pages));
695 /* Fallback to domain selective flush if size is too big */
696 if (mask > cap_max_amask_val(iommu->cap))
697 return iommu_flush_iotlb_dsi(iommu, did,
698 non_present_entry_flush);
699
700 return __iommu_flush_iotlb(iommu, did, addr, mask,
701 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
702 }
703
704 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
705 {
706 u32 pmen;
707 unsigned long flags;
708
709 spin_lock_irqsave(&iommu->register_lock, flags);
710 pmen = readl(iommu->reg + DMAR_PMEN_REG);
711 pmen &= ~DMA_PMEN_EPM;
712 writel(pmen, iommu->reg + DMAR_PMEN_REG);
713
714 /* wait for the protected region status bit to clear */
715 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
716 readl, !(pmen & DMA_PMEN_PRS), pmen);
717
718 spin_unlock_irqrestore(&iommu->register_lock, flags);
719 }
720
721 static int iommu_enable_translation(struct intel_iommu *iommu)
722 {
723 u32 sts;
724 unsigned long flags;
725
726 spin_lock_irqsave(&iommu->register_lock, flags);
727 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
728
729 /* Make sure hardware complete it */
730 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
731 readl, (sts & DMA_GSTS_TES), sts);
732
733 iommu->gcmd |= DMA_GCMD_TE;
734 spin_unlock_irqrestore(&iommu->register_lock, flags);
735 return 0;
736 }
737
738 static int iommu_disable_translation(struct intel_iommu *iommu)
739 {
740 u32 sts;
741 unsigned long flag;
742
743 spin_lock_irqsave(&iommu->register_lock, flag);
744 iommu->gcmd &= ~DMA_GCMD_TE;
745 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
746
747 /* Make sure hardware complete it */
748 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
749 readl, (!(sts & DMA_GSTS_TES)), sts);
750
751 spin_unlock_irqrestore(&iommu->register_lock, flag);
752 return 0;
753 }
754
755 /* iommu interrupt handling. Most stuff are MSI-like. */
756
757 static const char *fault_reason_strings[] =
758 {
759 "Software",
760 "Present bit in root entry is clear",
761 "Present bit in context entry is clear",
762 "Invalid context entry",
763 "Access beyond MGAW",
764 "PTE Write access is not set",
765 "PTE Read access is not set",
766 "Next page table ptr is invalid",
767 "Root table address invalid",
768 "Context table ptr is invalid",
769 "non-zero reserved fields in RTP",
770 "non-zero reserved fields in CTP",
771 "non-zero reserved fields in PTE",
772 };
773 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
774
775 const char *dmar_get_fault_reason(u8 fault_reason)
776 {
777 if (fault_reason > MAX_FAULT_REASON_IDX)
778 return "Unknown";
779 else
780 return fault_reason_strings[fault_reason];
781 }
782
783 void dmar_msi_unmask(unsigned int irq)
784 {
785 struct intel_iommu *iommu = get_irq_data(irq);
786 unsigned long flag;
787
788 /* unmask it */
789 spin_lock_irqsave(&iommu->register_lock, flag);
790 writel(0, iommu->reg + DMAR_FECTL_REG);
791 /* Read a reg to force flush the post write */
792 readl(iommu->reg + DMAR_FECTL_REG);
793 spin_unlock_irqrestore(&iommu->register_lock, flag);
794 }
795
796 void dmar_msi_mask(unsigned int irq)
797 {
798 unsigned long flag;
799 struct intel_iommu *iommu = get_irq_data(irq);
800
801 /* mask it */
802 spin_lock_irqsave(&iommu->register_lock, flag);
803 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
804 /* Read a reg to force flush the post write */
805 readl(iommu->reg + DMAR_FECTL_REG);
806 spin_unlock_irqrestore(&iommu->register_lock, flag);
807 }
808
809 void dmar_msi_write(int irq, struct msi_msg *msg)
810 {
811 struct intel_iommu *iommu = get_irq_data(irq);
812 unsigned long flag;
813
814 spin_lock_irqsave(&iommu->register_lock, flag);
815 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
816 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
817 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
818 spin_unlock_irqrestore(&iommu->register_lock, flag);
819 }
820
821 void dmar_msi_read(int irq, struct msi_msg *msg)
822 {
823 struct intel_iommu *iommu = get_irq_data(irq);
824 unsigned long flag;
825
826 spin_lock_irqsave(&iommu->register_lock, flag);
827 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
828 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
829 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
830 spin_unlock_irqrestore(&iommu->register_lock, flag);
831 }
832
833 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
834 u8 fault_reason, u16 source_id, u64 addr)
835 {
836 const char *reason;
837
838 reason = dmar_get_fault_reason(fault_reason);
839
840 printk(KERN_ERR
841 "DMAR:[%s] Request device [%02x:%02x.%d] "
842 "fault addr %llx \n"
843 "DMAR:[fault reason %02d] %s\n",
844 (type ? "DMA Read" : "DMA Write"),
845 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
846 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
847 return 0;
848 }
849
850 #define PRIMARY_FAULT_REG_LEN (16)
851 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
852 {
853 struct intel_iommu *iommu = dev_id;
854 int reg, fault_index;
855 u32 fault_status;
856 unsigned long flag;
857
858 spin_lock_irqsave(&iommu->register_lock, flag);
859 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
860
861 /* TBD: ignore advanced fault log currently */
862 if (!(fault_status & DMA_FSTS_PPF))
863 goto clear_overflow;
864
865 fault_index = dma_fsts_fault_record_index(fault_status);
866 reg = cap_fault_reg_offset(iommu->cap);
867 while (1) {
868 u8 fault_reason;
869 u16 source_id;
870 u64 guest_addr;
871 int type;
872 u32 data;
873
874 /* highest 32 bits */
875 data = readl(iommu->reg + reg +
876 fault_index * PRIMARY_FAULT_REG_LEN + 12);
877 if (!(data & DMA_FRCD_F))
878 break;
879
880 fault_reason = dma_frcd_fault_reason(data);
881 type = dma_frcd_type(data);
882
883 data = readl(iommu->reg + reg +
884 fault_index * PRIMARY_FAULT_REG_LEN + 8);
885 source_id = dma_frcd_source_id(data);
886
887 guest_addr = dmar_readq(iommu->reg + reg +
888 fault_index * PRIMARY_FAULT_REG_LEN);
889 guest_addr = dma_frcd_page_addr(guest_addr);
890 /* clear the fault */
891 writel(DMA_FRCD_F, iommu->reg + reg +
892 fault_index * PRIMARY_FAULT_REG_LEN + 12);
893
894 spin_unlock_irqrestore(&iommu->register_lock, flag);
895
896 iommu_page_fault_do_one(iommu, type, fault_reason,
897 source_id, guest_addr);
898
899 fault_index++;
900 if (fault_index > cap_num_fault_regs(iommu->cap))
901 fault_index = 0;
902 spin_lock_irqsave(&iommu->register_lock, flag);
903 }
904 clear_overflow:
905 /* clear primary fault overflow */
906 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
907 if (fault_status & DMA_FSTS_PFO)
908 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
909
910 spin_unlock_irqrestore(&iommu->register_lock, flag);
911 return IRQ_HANDLED;
912 }
913
914 int dmar_set_interrupt(struct intel_iommu *iommu)
915 {
916 int irq, ret;
917
918 irq = create_irq();
919 if (!irq) {
920 printk(KERN_ERR "IOMMU: no free vectors\n");
921 return -EINVAL;
922 }
923
924 set_irq_data(irq, iommu);
925 iommu->irq = irq;
926
927 ret = arch_setup_dmar_msi(irq);
928 if (ret) {
929 set_irq_data(irq, NULL);
930 iommu->irq = 0;
931 destroy_irq(irq);
932 return 0;
933 }
934
935 /* Force fault register is cleared */
936 iommu_page_fault(irq, iommu);
937
938 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
939 if (ret)
940 printk(KERN_ERR "IOMMU: can't request irq\n");
941 return ret;
942 }
943
944 static int iommu_init_domains(struct intel_iommu *iommu)
945 {
946 unsigned long ndomains;
947 unsigned long nlongs;
948
949 ndomains = cap_ndoms(iommu->cap);
950 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
951 nlongs = BITS_TO_LONGS(ndomains);
952
953 /* TBD: there might be 64K domains,
954 * consider other allocation for future chip
955 */
956 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
957 if (!iommu->domain_ids) {
958 printk(KERN_ERR "Allocating domain id array failed\n");
959 return -ENOMEM;
960 }
961 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
962 GFP_KERNEL);
963 if (!iommu->domains) {
964 printk(KERN_ERR "Allocating domain array failed\n");
965 kfree(iommu->domain_ids);
966 return -ENOMEM;
967 }
968
969 spin_lock_init(&iommu->lock);
970
971 /*
972 * if Caching mode is set, then invalid translations are tagged
973 * with domainid 0. Hence we need to pre-allocate it.
974 */
975 if (cap_caching_mode(iommu->cap))
976 set_bit(0, iommu->domain_ids);
977 return 0;
978 }
979
980
981 static void domain_exit(struct dmar_domain *domain);
982
983 void free_dmar_iommu(struct intel_iommu *iommu)
984 {
985 struct dmar_domain *domain;
986 int i;
987
988 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
989 for (; i < cap_ndoms(iommu->cap); ) {
990 domain = iommu->domains[i];
991 clear_bit(i, iommu->domain_ids);
992 domain_exit(domain);
993 i = find_next_bit(iommu->domain_ids,
994 cap_ndoms(iommu->cap), i+1);
995 }
996
997 if (iommu->gcmd & DMA_GCMD_TE)
998 iommu_disable_translation(iommu);
999
1000 if (iommu->irq) {
1001 set_irq_data(iommu->irq, NULL);
1002 /* This will mask the irq */
1003 free_irq(iommu->irq, iommu);
1004 destroy_irq(iommu->irq);
1005 }
1006
1007 kfree(iommu->domains);
1008 kfree(iommu->domain_ids);
1009
1010 /* free context mapping */
1011 free_context_table(iommu);
1012 }
1013
1014 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1015 {
1016 unsigned long num;
1017 unsigned long ndomains;
1018 struct dmar_domain *domain;
1019 unsigned long flags;
1020
1021 domain = alloc_domain_mem();
1022 if (!domain)
1023 return NULL;
1024
1025 ndomains = cap_ndoms(iommu->cap);
1026
1027 spin_lock_irqsave(&iommu->lock, flags);
1028 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1029 if (num >= ndomains) {
1030 spin_unlock_irqrestore(&iommu->lock, flags);
1031 free_domain_mem(domain);
1032 printk(KERN_ERR "IOMMU: no free domain ids\n");
1033 return NULL;
1034 }
1035
1036 set_bit(num, iommu->domain_ids);
1037 domain->id = num;
1038 domain->iommu = iommu;
1039 iommu->domains[num] = domain;
1040 spin_unlock_irqrestore(&iommu->lock, flags);
1041
1042 return domain;
1043 }
1044
1045 static void iommu_free_domain(struct dmar_domain *domain)
1046 {
1047 unsigned long flags;
1048
1049 spin_lock_irqsave(&domain->iommu->lock, flags);
1050 clear_bit(domain->id, domain->iommu->domain_ids);
1051 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1052 }
1053
1054 static struct iova_domain reserved_iova_list;
1055 static struct lock_class_key reserved_alloc_key;
1056 static struct lock_class_key reserved_rbtree_key;
1057
1058 static void dmar_init_reserved_ranges(void)
1059 {
1060 struct pci_dev *pdev = NULL;
1061 struct iova *iova;
1062 int i;
1063 u64 addr, size;
1064
1065 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1066
1067 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1068 &reserved_alloc_key);
1069 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1070 &reserved_rbtree_key);
1071
1072 /* IOAPIC ranges shouldn't be accessed by DMA */
1073 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1074 IOVA_PFN(IOAPIC_RANGE_END));
1075 if (!iova)
1076 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1077
1078 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1079 for_each_pci_dev(pdev) {
1080 struct resource *r;
1081
1082 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1083 r = &pdev->resource[i];
1084 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1085 continue;
1086 addr = r->start;
1087 addr &= PAGE_MASK_4K;
1088 size = r->end - addr;
1089 size = PAGE_ALIGN_4K(size);
1090 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1091 IOVA_PFN(size + addr) - 1);
1092 if (!iova)
1093 printk(KERN_ERR "Reserve iova failed\n");
1094 }
1095 }
1096
1097 }
1098
1099 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1100 {
1101 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1102 }
1103
1104 static inline int guestwidth_to_adjustwidth(int gaw)
1105 {
1106 int agaw;
1107 int r = (gaw - 12) % 9;
1108
1109 if (r == 0)
1110 agaw = gaw;
1111 else
1112 agaw = gaw + 9 - r;
1113 if (agaw > 64)
1114 agaw = 64;
1115 return agaw;
1116 }
1117
1118 static int domain_init(struct dmar_domain *domain, int guest_width)
1119 {
1120 struct intel_iommu *iommu;
1121 int adjust_width, agaw;
1122 unsigned long sagaw;
1123
1124 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1125 spin_lock_init(&domain->mapping_lock);
1126
1127 domain_reserve_special_ranges(domain);
1128
1129 /* calculate AGAW */
1130 iommu = domain->iommu;
1131 if (guest_width > cap_mgaw(iommu->cap))
1132 guest_width = cap_mgaw(iommu->cap);
1133 domain->gaw = guest_width;
1134 adjust_width = guestwidth_to_adjustwidth(guest_width);
1135 agaw = width_to_agaw(adjust_width);
1136 sagaw = cap_sagaw(iommu->cap);
1137 if (!test_bit(agaw, &sagaw)) {
1138 /* hardware doesn't support it, choose a bigger one */
1139 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1140 agaw = find_next_bit(&sagaw, 5, agaw);
1141 if (agaw >= 5)
1142 return -ENODEV;
1143 }
1144 domain->agaw = agaw;
1145 INIT_LIST_HEAD(&domain->devices);
1146
1147 /* always allocate the top pgd */
1148 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1149 if (!domain->pgd)
1150 return -ENOMEM;
1151 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1152 return 0;
1153 }
1154
1155 static void domain_exit(struct dmar_domain *domain)
1156 {
1157 u64 end;
1158
1159 /* Domain 0 is reserved, so dont process it */
1160 if (!domain)
1161 return;
1162
1163 domain_remove_dev_info(domain);
1164 /* destroy iovas */
1165 put_iova_domain(&domain->iovad);
1166 end = DOMAIN_MAX_ADDR(domain->gaw);
1167 end = end & (~PAGE_MASK_4K);
1168
1169 /* clear ptes */
1170 dma_pte_clear_range(domain, 0, end);
1171
1172 /* free page tables */
1173 dma_pte_free_pagetable(domain, 0, end);
1174
1175 iommu_free_domain(domain);
1176 free_domain_mem(domain);
1177 }
1178
1179 static int domain_context_mapping_one(struct dmar_domain *domain,
1180 u8 bus, u8 devfn)
1181 {
1182 struct context_entry *context;
1183 struct intel_iommu *iommu = domain->iommu;
1184 unsigned long flags;
1185
1186 pr_debug("Set context mapping for %02x:%02x.%d\n",
1187 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1188 BUG_ON(!domain->pgd);
1189 context = device_to_context_entry(iommu, bus, devfn);
1190 if (!context)
1191 return -ENOMEM;
1192 spin_lock_irqsave(&iommu->lock, flags);
1193 if (context_present(*context)) {
1194 spin_unlock_irqrestore(&iommu->lock, flags);
1195 return 0;
1196 }
1197
1198 context_set_domain_id(*context, domain->id);
1199 context_set_address_width(*context, domain->agaw);
1200 context_set_address_root(*context, virt_to_phys(domain->pgd));
1201 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1202 context_set_fault_enable(*context);
1203 context_set_present(*context);
1204 __iommu_flush_cache(iommu, context, sizeof(*context));
1205
1206 /* it's a non-present to present mapping */
1207 if (iommu_flush_context_device(iommu, domain->id,
1208 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1209 iommu_flush_write_buffer(iommu);
1210 else
1211 iommu_flush_iotlb_dsi(iommu, 0, 0);
1212 spin_unlock_irqrestore(&iommu->lock, flags);
1213 return 0;
1214 }
1215
1216 static int
1217 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1218 {
1219 int ret;
1220 struct pci_dev *tmp, *parent;
1221
1222 ret = domain_context_mapping_one(domain, pdev->bus->number,
1223 pdev->devfn);
1224 if (ret)
1225 return ret;
1226
1227 /* dependent device mapping */
1228 tmp = pci_find_upstream_pcie_bridge(pdev);
1229 if (!tmp)
1230 return 0;
1231 /* Secondary interface's bus number and devfn 0 */
1232 parent = pdev->bus->self;
1233 while (parent != tmp) {
1234 ret = domain_context_mapping_one(domain, parent->bus->number,
1235 parent->devfn);
1236 if (ret)
1237 return ret;
1238 parent = parent->bus->self;
1239 }
1240 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1241 return domain_context_mapping_one(domain,
1242 tmp->subordinate->number, 0);
1243 else /* this is a legacy PCI bridge */
1244 return domain_context_mapping_one(domain,
1245 tmp->bus->number, tmp->devfn);
1246 }
1247
1248 static int domain_context_mapped(struct dmar_domain *domain,
1249 struct pci_dev *pdev)
1250 {
1251 int ret;
1252 struct pci_dev *tmp, *parent;
1253
1254 ret = device_context_mapped(domain->iommu,
1255 pdev->bus->number, pdev->devfn);
1256 if (!ret)
1257 return ret;
1258 /* dependent device mapping */
1259 tmp = pci_find_upstream_pcie_bridge(pdev);
1260 if (!tmp)
1261 return ret;
1262 /* Secondary interface's bus number and devfn 0 */
1263 parent = pdev->bus->self;
1264 while (parent != tmp) {
1265 ret = device_context_mapped(domain->iommu, parent->bus->number,
1266 parent->devfn);
1267 if (!ret)
1268 return ret;
1269 parent = parent->bus->self;
1270 }
1271 if (tmp->is_pcie)
1272 return device_context_mapped(domain->iommu,
1273 tmp->subordinate->number, 0);
1274 else
1275 return device_context_mapped(domain->iommu,
1276 tmp->bus->number, tmp->devfn);
1277 }
1278
1279 static int
1280 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1281 u64 hpa, size_t size, int prot)
1282 {
1283 u64 start_pfn, end_pfn;
1284 struct dma_pte *pte;
1285 int index;
1286
1287 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1288 return -EINVAL;
1289 iova &= PAGE_MASK_4K;
1290 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1291 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1292 index = 0;
1293 while (start_pfn < end_pfn) {
1294 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1295 if (!pte)
1296 return -ENOMEM;
1297 /* We don't need lock here, nobody else
1298 * touches the iova range
1299 */
1300 BUG_ON(dma_pte_addr(*pte));
1301 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1302 dma_set_pte_prot(*pte, prot);
1303 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1304 start_pfn++;
1305 index++;
1306 }
1307 return 0;
1308 }
1309
1310 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1311 {
1312 clear_context_table(domain->iommu, bus, devfn);
1313 iommu_flush_context_global(domain->iommu, 0);
1314 iommu_flush_iotlb_global(domain->iommu, 0);
1315 }
1316
1317 static void domain_remove_dev_info(struct dmar_domain *domain)
1318 {
1319 struct device_domain_info *info;
1320 unsigned long flags;
1321
1322 spin_lock_irqsave(&device_domain_lock, flags);
1323 while (!list_empty(&domain->devices)) {
1324 info = list_entry(domain->devices.next,
1325 struct device_domain_info, link);
1326 list_del(&info->link);
1327 list_del(&info->global);
1328 if (info->dev)
1329 info->dev->dev.archdata.iommu = NULL;
1330 spin_unlock_irqrestore(&device_domain_lock, flags);
1331
1332 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1333 free_devinfo_mem(info);
1334
1335 spin_lock_irqsave(&device_domain_lock, flags);
1336 }
1337 spin_unlock_irqrestore(&device_domain_lock, flags);
1338 }
1339
1340 /*
1341 * find_domain
1342 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1343 */
1344 static struct dmar_domain *
1345 find_domain(struct pci_dev *pdev)
1346 {
1347 struct device_domain_info *info;
1348
1349 /* No lock here, assumes no domain exit in normal case */
1350 info = pdev->dev.archdata.iommu;
1351 if (info)
1352 return info->domain;
1353 return NULL;
1354 }
1355
1356 /* domain is initialized */
1357 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1358 {
1359 struct dmar_domain *domain, *found = NULL;
1360 struct intel_iommu *iommu;
1361 struct dmar_drhd_unit *drhd;
1362 struct device_domain_info *info, *tmp;
1363 struct pci_dev *dev_tmp;
1364 unsigned long flags;
1365 int bus = 0, devfn = 0;
1366
1367 domain = find_domain(pdev);
1368 if (domain)
1369 return domain;
1370
1371 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1372 if (dev_tmp) {
1373 if (dev_tmp->is_pcie) {
1374 bus = dev_tmp->subordinate->number;
1375 devfn = 0;
1376 } else {
1377 bus = dev_tmp->bus->number;
1378 devfn = dev_tmp->devfn;
1379 }
1380 spin_lock_irqsave(&device_domain_lock, flags);
1381 list_for_each_entry(info, &device_domain_list, global) {
1382 if (info->bus == bus && info->devfn == devfn) {
1383 found = info->domain;
1384 break;
1385 }
1386 }
1387 spin_unlock_irqrestore(&device_domain_lock, flags);
1388 /* pcie-pci bridge already has a domain, uses it */
1389 if (found) {
1390 domain = found;
1391 goto found_domain;
1392 }
1393 }
1394
1395 /* Allocate new domain for the device */
1396 drhd = dmar_find_matched_drhd_unit(pdev);
1397 if (!drhd) {
1398 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1399 pci_name(pdev));
1400 return NULL;
1401 }
1402 iommu = drhd->iommu;
1403
1404 domain = iommu_alloc_domain(iommu);
1405 if (!domain)
1406 goto error;
1407
1408 if (domain_init(domain, gaw)) {
1409 domain_exit(domain);
1410 goto error;
1411 }
1412
1413 /* register pcie-to-pci device */
1414 if (dev_tmp) {
1415 info = alloc_devinfo_mem();
1416 if (!info) {
1417 domain_exit(domain);
1418 goto error;
1419 }
1420 info->bus = bus;
1421 info->devfn = devfn;
1422 info->dev = NULL;
1423 info->domain = domain;
1424 /* This domain is shared by devices under p2p bridge */
1425 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1426
1427 /* pcie-to-pci bridge already has a domain, uses it */
1428 found = NULL;
1429 spin_lock_irqsave(&device_domain_lock, flags);
1430 list_for_each_entry(tmp, &device_domain_list, global) {
1431 if (tmp->bus == bus && tmp->devfn == devfn) {
1432 found = tmp->domain;
1433 break;
1434 }
1435 }
1436 if (found) {
1437 free_devinfo_mem(info);
1438 domain_exit(domain);
1439 domain = found;
1440 } else {
1441 list_add(&info->link, &domain->devices);
1442 list_add(&info->global, &device_domain_list);
1443 }
1444 spin_unlock_irqrestore(&device_domain_lock, flags);
1445 }
1446
1447 found_domain:
1448 info = alloc_devinfo_mem();
1449 if (!info)
1450 goto error;
1451 info->bus = pdev->bus->number;
1452 info->devfn = pdev->devfn;
1453 info->dev = pdev;
1454 info->domain = domain;
1455 spin_lock_irqsave(&device_domain_lock, flags);
1456 /* somebody is fast */
1457 found = find_domain(pdev);
1458 if (found != NULL) {
1459 spin_unlock_irqrestore(&device_domain_lock, flags);
1460 if (found != domain) {
1461 domain_exit(domain);
1462 domain = found;
1463 }
1464 free_devinfo_mem(info);
1465 return domain;
1466 }
1467 list_add(&info->link, &domain->devices);
1468 list_add(&info->global, &device_domain_list);
1469 pdev->dev.archdata.iommu = info;
1470 spin_unlock_irqrestore(&device_domain_lock, flags);
1471 return domain;
1472 error:
1473 /* recheck it here, maybe others set it */
1474 return find_domain(pdev);
1475 }
1476
1477 static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1478 {
1479 struct dmar_domain *domain;
1480 unsigned long size;
1481 u64 base;
1482 int ret;
1483
1484 printk(KERN_INFO
1485 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1486 pci_name(pdev), start, end);
1487 /* page table init */
1488 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1489 if (!domain)
1490 return -ENOMEM;
1491
1492 /* The address might not be aligned */
1493 base = start & PAGE_MASK_4K;
1494 size = end - base;
1495 size = PAGE_ALIGN_4K(size);
1496 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1497 IOVA_PFN(base + size) - 1)) {
1498 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1499 ret = -ENOMEM;
1500 goto error;
1501 }
1502
1503 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1504 size, base, pci_name(pdev));
1505 /*
1506 * RMRR range might have overlap with physical memory range,
1507 * clear it first
1508 */
1509 dma_pte_clear_range(domain, base, base + size);
1510
1511 ret = domain_page_mapping(domain, base, base, size,
1512 DMA_PTE_READ|DMA_PTE_WRITE);
1513 if (ret)
1514 goto error;
1515
1516 /* context entry init */
1517 ret = domain_context_mapping(domain, pdev);
1518 if (!ret)
1519 return 0;
1520 error:
1521 domain_exit(domain);
1522 return ret;
1523
1524 }
1525
1526 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1527 struct pci_dev *pdev)
1528 {
1529 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1530 return 0;
1531 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1532 rmrr->end_address + 1);
1533 }
1534
1535 #ifdef CONFIG_DMAR_GFX_WA
1536 struct iommu_prepare_data {
1537 struct pci_dev *pdev;
1538 int ret;
1539 };
1540
1541 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1542 unsigned long end_pfn, void *datax)
1543 {
1544 struct iommu_prepare_data *data;
1545
1546 data = (struct iommu_prepare_data *)datax;
1547
1548 data->ret = iommu_prepare_identity_map(data->pdev,
1549 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1550 return data->ret;
1551
1552 }
1553
1554 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1555 {
1556 int nid;
1557 struct iommu_prepare_data data;
1558
1559 data.pdev = pdev;
1560 data.ret = 0;
1561
1562 for_each_online_node(nid) {
1563 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1564 if (data.ret)
1565 return data.ret;
1566 }
1567 return data.ret;
1568 }
1569
1570 static void __init iommu_prepare_gfx_mapping(void)
1571 {
1572 struct pci_dev *pdev = NULL;
1573 int ret;
1574
1575 for_each_pci_dev(pdev) {
1576 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1577 !IS_GFX_DEVICE(pdev))
1578 continue;
1579 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1580 pci_name(pdev));
1581 ret = iommu_prepare_with_active_regions(pdev);
1582 if (ret)
1583 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1584 }
1585 }
1586 #endif
1587
1588 #ifdef CONFIG_DMAR_FLOPPY_WA
1589 static inline void iommu_prepare_isa(void)
1590 {
1591 struct pci_dev *pdev;
1592 int ret;
1593
1594 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1595 if (!pdev)
1596 return;
1597
1598 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1599 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1600
1601 if (ret)
1602 printk("IOMMU: Failed to create 0-64M identity map, "
1603 "floppy might not work\n");
1604
1605 }
1606 #else
1607 static inline void iommu_prepare_isa(void)
1608 {
1609 return;
1610 }
1611 #endif /* !CONFIG_DMAR_FLPY_WA */
1612
1613 int __init init_dmars(void)
1614 {
1615 struct dmar_drhd_unit *drhd;
1616 struct dmar_rmrr_unit *rmrr;
1617 struct pci_dev *pdev;
1618 struct intel_iommu *iommu;
1619 int i, ret, unit = 0;
1620
1621 /*
1622 * for each drhd
1623 * allocate root
1624 * initialize and program root entry to not present
1625 * endfor
1626 */
1627 for_each_drhd_unit(drhd) {
1628 g_num_of_iommus++;
1629 /*
1630 * lock not needed as this is only incremented in the single
1631 * threaded kernel __init code path all other access are read
1632 * only
1633 */
1634 }
1635
1636 deferred_flush = kzalloc(g_num_of_iommus *
1637 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1638 if (!deferred_flush) {
1639 ret = -ENOMEM;
1640 goto error;
1641 }
1642
1643 for_each_drhd_unit(drhd) {
1644 if (drhd->ignored)
1645 continue;
1646
1647 iommu = drhd->iommu;
1648
1649 ret = iommu_init_domains(iommu);
1650 if (ret)
1651 goto error;
1652
1653 /*
1654 * TBD:
1655 * we could share the same root & context tables
1656 * amoung all IOMMU's. Need to Split it later.
1657 */
1658 ret = iommu_alloc_root_entry(iommu);
1659 if (ret) {
1660 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1661 goto error;
1662 }
1663 }
1664
1665 /*
1666 * For each rmrr
1667 * for each dev attached to rmrr
1668 * do
1669 * locate drhd for dev, alloc domain for dev
1670 * allocate free domain
1671 * allocate page table entries for rmrr
1672 * if context not allocated for bus
1673 * allocate and init context
1674 * set present in root table for this bus
1675 * init context with domain, translation etc
1676 * endfor
1677 * endfor
1678 */
1679 for_each_rmrr_units(rmrr) {
1680 for (i = 0; i < rmrr->devices_cnt; i++) {
1681 pdev = rmrr->devices[i];
1682 /* some BIOS lists non-exist devices in DMAR table */
1683 if (!pdev)
1684 continue;
1685 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1686 if (ret)
1687 printk(KERN_ERR
1688 "IOMMU: mapping reserved region failed\n");
1689 }
1690 }
1691
1692 iommu_prepare_gfx_mapping();
1693
1694 iommu_prepare_isa();
1695
1696 /*
1697 * for each drhd
1698 * enable fault log
1699 * global invalidate context cache
1700 * global invalidate iotlb
1701 * enable translation
1702 */
1703 for_each_drhd_unit(drhd) {
1704 if (drhd->ignored)
1705 continue;
1706 iommu = drhd->iommu;
1707 sprintf (iommu->name, "dmar%d", unit++);
1708
1709 iommu_flush_write_buffer(iommu);
1710
1711 ret = dmar_set_interrupt(iommu);
1712 if (ret)
1713 goto error;
1714
1715 iommu_set_root_entry(iommu);
1716
1717 iommu_flush_context_global(iommu, 0);
1718 iommu_flush_iotlb_global(iommu, 0);
1719
1720 iommu_disable_protect_mem_regions(iommu);
1721
1722 ret = iommu_enable_translation(iommu);
1723 if (ret)
1724 goto error;
1725 }
1726
1727 return 0;
1728 error:
1729 for_each_drhd_unit(drhd) {
1730 if (drhd->ignored)
1731 continue;
1732 iommu = drhd->iommu;
1733 free_iommu(iommu);
1734 }
1735 return ret;
1736 }
1737
1738 static inline u64 aligned_size(u64 host_addr, size_t size)
1739 {
1740 u64 addr;
1741 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1742 return PAGE_ALIGN_4K(addr);
1743 }
1744
1745 struct iova *
1746 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1747 {
1748 struct iova *piova;
1749
1750 /* Make sure it's in range */
1751 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1752 if (!size || (IOVA_START_ADDR + size > end))
1753 return NULL;
1754
1755 piova = alloc_iova(&domain->iovad,
1756 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1757 return piova;
1758 }
1759
1760 static struct iova *
1761 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1762 size_t size)
1763 {
1764 struct pci_dev *pdev = to_pci_dev(dev);
1765 struct iova *iova = NULL;
1766
1767 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1768 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1769 } else {
1770 /*
1771 * First try to allocate an io virtual address in
1772 * DMA_32BIT_MASK and if that fails then try allocating
1773 * from higher range
1774 */
1775 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1776 if (!iova)
1777 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1778 }
1779
1780 if (!iova) {
1781 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1782 return NULL;
1783 }
1784
1785 return iova;
1786 }
1787
1788 static struct dmar_domain *
1789 get_valid_domain_for_dev(struct pci_dev *pdev)
1790 {
1791 struct dmar_domain *domain;
1792 int ret;
1793
1794 domain = get_domain_for_dev(pdev,
1795 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1796 if (!domain) {
1797 printk(KERN_ERR
1798 "Allocating domain for %s failed", pci_name(pdev));
1799 return NULL;
1800 }
1801
1802 /* make sure context mapping is ok */
1803 if (unlikely(!domain_context_mapped(domain, pdev))) {
1804 ret = domain_context_mapping(domain, pdev);
1805 if (ret) {
1806 printk(KERN_ERR
1807 "Domain context map for %s failed",
1808 pci_name(pdev));
1809 return NULL;
1810 }
1811 }
1812
1813 return domain;
1814 }
1815
1816 static dma_addr_t
1817 intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
1818 {
1819 struct pci_dev *pdev = to_pci_dev(hwdev);
1820 struct dmar_domain *domain;
1821 unsigned long start_paddr;
1822 struct iova *iova;
1823 int prot = 0;
1824 int ret;
1825
1826 BUG_ON(dir == DMA_NONE);
1827 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1828 return paddr;
1829
1830 domain = get_valid_domain_for_dev(pdev);
1831 if (!domain)
1832 return 0;
1833
1834 size = aligned_size((u64)paddr, size);
1835
1836 iova = __intel_alloc_iova(hwdev, domain, size);
1837 if (!iova)
1838 goto error;
1839
1840 start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
1841
1842 /*
1843 * Check if DMAR supports zero-length reads on write only
1844 * mappings..
1845 */
1846 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1847 !cap_zlr(domain->iommu->cap))
1848 prot |= DMA_PTE_READ;
1849 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1850 prot |= DMA_PTE_WRITE;
1851 /*
1852 * paddr - (paddr + size) might be partial page, we should map the whole
1853 * page. Note: if two part of one page are separately mapped, we
1854 * might have two guest_addr mapping to the same host paddr, but this
1855 * is not a big problem
1856 */
1857 ret = domain_page_mapping(domain, start_paddr,
1858 ((u64)paddr) & PAGE_MASK_4K, size, prot);
1859 if (ret)
1860 goto error;
1861
1862 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1863 pci_name(pdev), size, (u64)paddr,
1864 size, (u64)start_paddr, dir);
1865
1866 /* it's a non-present to present mapping */
1867 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1868 start_paddr, size >> PAGE_SHIFT_4K, 1);
1869 if (ret)
1870 iommu_flush_write_buffer(domain->iommu);
1871
1872 return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
1873
1874 error:
1875 if (iova)
1876 __free_iova(&domain->iovad, iova);
1877 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1878 pci_name(pdev), size, (u64)paddr, dir);
1879 return 0;
1880 }
1881
1882 static void flush_unmaps(void)
1883 {
1884 int i, j;
1885
1886 timer_on = 0;
1887
1888 /* just flush them all */
1889 for (i = 0; i < g_num_of_iommus; i++) {
1890 if (deferred_flush[i].next) {
1891 struct intel_iommu *iommu =
1892 deferred_flush[i].domain[0]->iommu;
1893
1894 iommu_flush_iotlb_global(iommu, 0);
1895 for (j = 0; j < deferred_flush[i].next; j++) {
1896 __free_iova(&deferred_flush[i].domain[j]->iovad,
1897 deferred_flush[i].iova[j]);
1898 }
1899 deferred_flush[i].next = 0;
1900 }
1901 }
1902
1903 list_size = 0;
1904 }
1905
1906 static void flush_unmaps_timeout(unsigned long data)
1907 {
1908 unsigned long flags;
1909
1910 spin_lock_irqsave(&async_umap_flush_lock, flags);
1911 flush_unmaps();
1912 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1913 }
1914
1915 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1916 {
1917 unsigned long flags;
1918 int next, iommu_id;
1919
1920 spin_lock_irqsave(&async_umap_flush_lock, flags);
1921 if (list_size == HIGH_WATER_MARK)
1922 flush_unmaps();
1923
1924 iommu_id = dom->iommu->seq_id;
1925
1926 next = deferred_flush[iommu_id].next;
1927 deferred_flush[iommu_id].domain[next] = dom;
1928 deferred_flush[iommu_id].iova[next] = iova;
1929 deferred_flush[iommu_id].next++;
1930
1931 if (!timer_on) {
1932 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1933 timer_on = 1;
1934 }
1935 list_size++;
1936 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1937 }
1938
1939 static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1940 size_t size, int dir)
1941 {
1942 struct pci_dev *pdev = to_pci_dev(dev);
1943 struct dmar_domain *domain;
1944 unsigned long start_addr;
1945 struct iova *iova;
1946
1947 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1948 return;
1949 domain = find_domain(pdev);
1950 BUG_ON(!domain);
1951
1952 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1953 if (!iova)
1954 return;
1955
1956 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1957 size = aligned_size((u64)dev_addr, size);
1958
1959 pr_debug("Device %s unmapping: %lx@%llx\n",
1960 pci_name(pdev), size, (u64)start_addr);
1961
1962 /* clear the whole page */
1963 dma_pte_clear_range(domain, start_addr, start_addr + size);
1964 /* free page tables */
1965 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1966 if (intel_iommu_strict) {
1967 if (iommu_flush_iotlb_psi(domain->iommu,
1968 domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
1969 iommu_flush_write_buffer(domain->iommu);
1970 /* free iova */
1971 __free_iova(&domain->iovad, iova);
1972 } else {
1973 add_unmap(domain, iova);
1974 /*
1975 * queue up the release of the unmap to save the 1/6th of the
1976 * cpu used up by the iotlb flush operation...
1977 */
1978 }
1979 }
1980
1981 static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1982 dma_addr_t *dma_handle, gfp_t flags)
1983 {
1984 void *vaddr;
1985 int order;
1986
1987 size = PAGE_ALIGN_4K(size);
1988 order = get_order(size);
1989 flags &= ~(GFP_DMA | GFP_DMA32);
1990
1991 vaddr = (void *)__get_free_pages(flags, order);
1992 if (!vaddr)
1993 return NULL;
1994 memset(vaddr, 0, size);
1995
1996 *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL);
1997 if (*dma_handle)
1998 return vaddr;
1999 free_pages((unsigned long)vaddr, order);
2000 return NULL;
2001 }
2002
2003 static void intel_free_coherent(struct device *hwdev, size_t size,
2004 void *vaddr, dma_addr_t dma_handle)
2005 {
2006 int order;
2007
2008 size = PAGE_ALIGN_4K(size);
2009 order = get_order(size);
2010
2011 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2012 free_pages((unsigned long)vaddr, order);
2013 }
2014
2015 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2016 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2017 int nelems, int dir)
2018 {
2019 int i;
2020 struct pci_dev *pdev = to_pci_dev(hwdev);
2021 struct dmar_domain *domain;
2022 unsigned long start_addr;
2023 struct iova *iova;
2024 size_t size = 0;
2025 void *addr;
2026 struct scatterlist *sg;
2027
2028 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2029 return;
2030
2031 domain = find_domain(pdev);
2032
2033 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2034 if (!iova)
2035 return;
2036 for_each_sg(sglist, sg, nelems, i) {
2037 addr = SG_ENT_VIRT_ADDRESS(sg);
2038 size += aligned_size((u64)addr, sg->length);
2039 }
2040
2041 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2042
2043 /* clear the whole page */
2044 dma_pte_clear_range(domain, start_addr, start_addr + size);
2045 /* free page tables */
2046 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2047
2048 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2049 size >> PAGE_SHIFT_4K, 0))
2050 iommu_flush_write_buffer(domain->iommu);
2051
2052 /* free iova */
2053 __free_iova(&domain->iovad, iova);
2054 }
2055
2056 static int intel_nontranslate_map_sg(struct device *hddev,
2057 struct scatterlist *sglist, int nelems, int dir)
2058 {
2059 int i;
2060 struct scatterlist *sg;
2061
2062 for_each_sg(sglist, sg, nelems, i) {
2063 BUG_ON(!sg_page(sg));
2064 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2065 sg->dma_length = sg->length;
2066 }
2067 return nelems;
2068 }
2069
2070 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2071 int nelems, int dir)
2072 {
2073 void *addr;
2074 int i;
2075 struct pci_dev *pdev = to_pci_dev(hwdev);
2076 struct dmar_domain *domain;
2077 size_t size = 0;
2078 int prot = 0;
2079 size_t offset = 0;
2080 struct iova *iova = NULL;
2081 int ret;
2082 struct scatterlist *sg;
2083 unsigned long start_addr;
2084
2085 BUG_ON(dir == DMA_NONE);
2086 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2087 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2088
2089 domain = get_valid_domain_for_dev(pdev);
2090 if (!domain)
2091 return 0;
2092
2093 for_each_sg(sglist, sg, nelems, i) {
2094 addr = SG_ENT_VIRT_ADDRESS(sg);
2095 addr = (void *)virt_to_phys(addr);
2096 size += aligned_size((u64)addr, sg->length);
2097 }
2098
2099 iova = __intel_alloc_iova(hwdev, domain, size);
2100 if (!iova) {
2101 sglist->dma_length = 0;
2102 return 0;
2103 }
2104
2105 /*
2106 * Check if DMAR supports zero-length reads on write only
2107 * mappings..
2108 */
2109 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2110 !cap_zlr(domain->iommu->cap))
2111 prot |= DMA_PTE_READ;
2112 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2113 prot |= DMA_PTE_WRITE;
2114
2115 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2116 offset = 0;
2117 for_each_sg(sglist, sg, nelems, i) {
2118 addr = SG_ENT_VIRT_ADDRESS(sg);
2119 addr = (void *)virt_to_phys(addr);
2120 size = aligned_size((u64)addr, sg->length);
2121 ret = domain_page_mapping(domain, start_addr + offset,
2122 ((u64)addr) & PAGE_MASK_4K,
2123 size, prot);
2124 if (ret) {
2125 /* clear the page */
2126 dma_pte_clear_range(domain, start_addr,
2127 start_addr + offset);
2128 /* free page tables */
2129 dma_pte_free_pagetable(domain, start_addr,
2130 start_addr + offset);
2131 /* free iova */
2132 __free_iova(&domain->iovad, iova);
2133 return 0;
2134 }
2135 sg->dma_address = start_addr + offset +
2136 ((u64)addr & (~PAGE_MASK_4K));
2137 sg->dma_length = sg->length;
2138 offset += size;
2139 }
2140
2141 /* it's a non-present to present mapping */
2142 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2143 start_addr, offset >> PAGE_SHIFT_4K, 1))
2144 iommu_flush_write_buffer(domain->iommu);
2145 return nelems;
2146 }
2147
2148 static struct dma_mapping_ops intel_dma_ops = {
2149 .alloc_coherent = intel_alloc_coherent,
2150 .free_coherent = intel_free_coherent,
2151 .map_single = intel_map_single,
2152 .unmap_single = intel_unmap_single,
2153 .map_sg = intel_map_sg,
2154 .unmap_sg = intel_unmap_sg,
2155 };
2156
2157 static inline int iommu_domain_cache_init(void)
2158 {
2159 int ret = 0;
2160
2161 iommu_domain_cache = kmem_cache_create("iommu_domain",
2162 sizeof(struct dmar_domain),
2163 0,
2164 SLAB_HWCACHE_ALIGN,
2165
2166 NULL);
2167 if (!iommu_domain_cache) {
2168 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2169 ret = -ENOMEM;
2170 }
2171
2172 return ret;
2173 }
2174
2175 static inline int iommu_devinfo_cache_init(void)
2176 {
2177 int ret = 0;
2178
2179 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2180 sizeof(struct device_domain_info),
2181 0,
2182 SLAB_HWCACHE_ALIGN,
2183
2184 NULL);
2185 if (!iommu_devinfo_cache) {
2186 printk(KERN_ERR "Couldn't create devinfo cache\n");
2187 ret = -ENOMEM;
2188 }
2189
2190 return ret;
2191 }
2192
2193 static inline int iommu_iova_cache_init(void)
2194 {
2195 int ret = 0;
2196
2197 iommu_iova_cache = kmem_cache_create("iommu_iova",
2198 sizeof(struct iova),
2199 0,
2200 SLAB_HWCACHE_ALIGN,
2201
2202 NULL);
2203 if (!iommu_iova_cache) {
2204 printk(KERN_ERR "Couldn't create iova cache\n");
2205 ret = -ENOMEM;
2206 }
2207
2208 return ret;
2209 }
2210
2211 static int __init iommu_init_mempool(void)
2212 {
2213 int ret;
2214 ret = iommu_iova_cache_init();
2215 if (ret)
2216 return ret;
2217
2218 ret = iommu_domain_cache_init();
2219 if (ret)
2220 goto domain_error;
2221
2222 ret = iommu_devinfo_cache_init();
2223 if (!ret)
2224 return ret;
2225
2226 kmem_cache_destroy(iommu_domain_cache);
2227 domain_error:
2228 kmem_cache_destroy(iommu_iova_cache);
2229
2230 return -ENOMEM;
2231 }
2232
2233 static void __init iommu_exit_mempool(void)
2234 {
2235 kmem_cache_destroy(iommu_devinfo_cache);
2236 kmem_cache_destroy(iommu_domain_cache);
2237 kmem_cache_destroy(iommu_iova_cache);
2238
2239 }
2240
2241 static void __init init_no_remapping_devices(void)
2242 {
2243 struct dmar_drhd_unit *drhd;
2244
2245 for_each_drhd_unit(drhd) {
2246 if (!drhd->include_all) {
2247 int i;
2248 for (i = 0; i < drhd->devices_cnt; i++)
2249 if (drhd->devices[i] != NULL)
2250 break;
2251 /* ignore DMAR unit if no pci devices exist */
2252 if (i == drhd->devices_cnt)
2253 drhd->ignored = 1;
2254 }
2255 }
2256
2257 if (dmar_map_gfx)
2258 return;
2259
2260 for_each_drhd_unit(drhd) {
2261 int i;
2262 if (drhd->ignored || drhd->include_all)
2263 continue;
2264
2265 for (i = 0; i < drhd->devices_cnt; i++)
2266 if (drhd->devices[i] &&
2267 !IS_GFX_DEVICE(drhd->devices[i]))
2268 break;
2269
2270 if (i < drhd->devices_cnt)
2271 continue;
2272
2273 /* bypass IOMMU if it is just for gfx devices */
2274 drhd->ignored = 1;
2275 for (i = 0; i < drhd->devices_cnt; i++) {
2276 if (!drhd->devices[i])
2277 continue;
2278 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2279 }
2280 }
2281 }
2282
2283 int __init intel_iommu_init(void)
2284 {
2285 int ret = 0;
2286
2287 if (dmar_table_init())
2288 return -ENODEV;
2289
2290 if (dmar_dev_scope_init())
2291 return -ENODEV;
2292
2293 /*
2294 * Check the need for DMA-remapping initialization now.
2295 * Above initialization will also be used by Interrupt-remapping.
2296 */
2297 if (no_iommu || swiotlb || dmar_disabled)
2298 return -ENODEV;
2299
2300 iommu_init_mempool();
2301 dmar_init_reserved_ranges();
2302
2303 init_no_remapping_devices();
2304
2305 ret = init_dmars();
2306 if (ret) {
2307 printk(KERN_ERR "IOMMU: dmar init failed\n");
2308 put_iova_domain(&reserved_iova_list);
2309 iommu_exit_mempool();
2310 return ret;
2311 }
2312 printk(KERN_INFO
2313 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2314
2315 init_timer(&unmap_timer);
2316 force_iommu = 1;
2317 dma_ops = &intel_dma_ops;
2318 return 0;
2319 }
2320
2321 void intel_iommu_domain_exit(struct dmar_domain *domain)
2322 {
2323 u64 end;
2324
2325 /* Domain 0 is reserved, so dont process it */
2326 if (!domain)
2327 return;
2328
2329 end = DOMAIN_MAX_ADDR(domain->gaw);
2330 end = end & (~PAGE_MASK_4K);
2331
2332 /* clear ptes */
2333 dma_pte_clear_range(domain, 0, end);
2334
2335 /* free page tables */
2336 dma_pte_free_pagetable(domain, 0, end);
2337
2338 iommu_free_domain(domain);
2339 free_domain_mem(domain);
2340 }
2341 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2342
2343 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2344 {
2345 struct dmar_drhd_unit *drhd;
2346 struct dmar_domain *domain;
2347 struct intel_iommu *iommu;
2348
2349 drhd = dmar_find_matched_drhd_unit(pdev);
2350 if (!drhd) {
2351 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2352 return NULL;
2353 }
2354
2355 iommu = drhd->iommu;
2356 if (!iommu) {
2357 printk(KERN_ERR
2358 "intel_iommu_domain_alloc: iommu == NULL\n");
2359 return NULL;
2360 }
2361 domain = iommu_alloc_domain(iommu);
2362 if (!domain) {
2363 printk(KERN_ERR
2364 "intel_iommu_domain_alloc: domain == NULL\n");
2365 return NULL;
2366 }
2367 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2368 printk(KERN_ERR
2369 "intel_iommu_domain_alloc: domain_init() failed\n");
2370 intel_iommu_domain_exit(domain);
2371 return NULL;
2372 }
2373 return domain;
2374 }
2375 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2376
2377 int intel_iommu_context_mapping(
2378 struct dmar_domain *domain, struct pci_dev *pdev)
2379 {
2380 int rc;
2381 rc = domain_context_mapping(domain, pdev);
2382 return rc;
2383 }
2384 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2385
2386 int intel_iommu_page_mapping(
2387 struct dmar_domain *domain, dma_addr_t iova,
2388 u64 hpa, size_t size, int prot)
2389 {
2390 int rc;
2391 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2392 return rc;
2393 }
2394 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2395
2396 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2397 {
2398 detach_domain_for_dev(domain, bus, devfn);
2399 }
2400 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2401
2402 struct dmar_domain *
2403 intel_iommu_find_domain(struct pci_dev *pdev)
2404 {
2405 return find_domain(pdev);
2406 }
2407 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2408
2409 int intel_iommu_found(void)
2410 {
2411 return g_num_of_iommus;
2412 }
2413 EXPORT_SYMBOL_GPL(intel_iommu_found);
2414
2415 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2416 {
2417 struct dma_pte *pte;
2418 u64 pfn;
2419
2420 pfn = 0;
2421 pte = addr_to_dma_pte(domain, iova);
2422
2423 if (pte)
2424 pfn = dma_pte_addr(*pte);
2425
2426 return pfn >> PAGE_SHIFT_4K;
2427 }
2428 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);