2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
63 * 12-63: Context Ptr (12 - (haw-1))
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71 static inline bool root_present(struct root_entry
*root
)
73 return (root
->val
& 1);
75 static inline void set_root_present(struct root_entry
*root
)
79 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
81 root
->val
|= value
& VTD_PAGE_MASK
;
84 static inline struct context_entry
*
85 get_context_addr_from_root(struct root_entry
*root
)
87 return (struct context_entry
*)
88 (root_present(root
)?phys_to_virt(
89 root
->val
& VTD_PAGE_MASK
) :
96 * 1: fault processing disable
97 * 2-3: translation type
98 * 12-63: address space root
104 struct context_entry
{
109 static inline bool context_present(struct context_entry
*context
)
111 return (context
->lo
& 1);
113 static inline void context_set_present(struct context_entry
*context
)
118 static inline void context_set_fault_enable(struct context_entry
*context
)
120 context
->lo
&= (((u64
)-1) << 2) | 1;
123 #define CONTEXT_TT_MULTI_LEVEL 0
125 static inline void context_set_translation_type(struct context_entry
*context
,
128 context
->lo
&= (((u64
)-1) << 4) | 3;
129 context
->lo
|= (value
& 3) << 2;
132 static inline void context_set_address_root(struct context_entry
*context
,
135 context
->lo
|= value
& VTD_PAGE_MASK
;
138 static inline void context_set_address_width(struct context_entry
*context
,
141 context
->hi
|= value
& 7;
144 static inline void context_set_domain_id(struct context_entry
*context
,
147 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
150 static inline void context_clear_entry(struct context_entry
*context
)
162 * 12-63: Host physcial address
168 static inline void dma_clear_pte(struct dma_pte
*pte
)
173 static inline void dma_set_pte_readable(struct dma_pte
*pte
)
175 pte
->val
|= DMA_PTE_READ
;
178 static inline void dma_set_pte_writable(struct dma_pte
*pte
)
180 pte
->val
|= DMA_PTE_WRITE
;
183 static inline void dma_set_pte_prot(struct dma_pte
*pte
, unsigned long prot
)
185 pte
->val
= (pte
->val
& ~3) | (prot
& 3);
188 static inline u64
dma_pte_addr(struct dma_pte
*pte
)
190 return (pte
->val
& VTD_PAGE_MASK
);
193 static inline void dma_set_pte_addr(struct dma_pte
*pte
, u64 addr
)
195 pte
->val
|= (addr
& VTD_PAGE_MASK
);
198 static inline bool dma_pte_present(struct dma_pte
*pte
)
200 return (pte
->val
& 3) != 0;
203 /* devices under the same p2p bridge are owned in one domain */
204 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
207 int id
; /* domain id */
208 struct intel_iommu
*iommu
; /* back pointer to owning iommu */
210 struct list_head devices
; /* all devices' list */
211 struct iova_domain iovad
; /* iova's that belong to this domain */
213 struct dma_pte
*pgd
; /* virtual address */
214 spinlock_t mapping_lock
; /* page table lock */
215 int gaw
; /* max guest address width */
217 /* adjusted guest address width, 0 is level 2 30-bit */
220 int flags
; /* flags to find out type of domain */
223 /* PCI domain-device relationship */
224 struct device_domain_info
{
225 struct list_head link
; /* link to domain siblings */
226 struct list_head global
; /* link to global list */
227 u8 bus
; /* PCI bus numer */
228 u8 devfn
; /* PCI devfn number */
229 struct pci_dev
*dev
; /* it's NULL for PCIE-to-PCI bridge */
230 struct dmar_domain
*domain
; /* pointer to domain */
233 static void flush_unmaps_timeout(unsigned long data
);
235 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
237 #define HIGH_WATER_MARK 250
238 struct deferred_flush_tables
{
240 struct iova
*iova
[HIGH_WATER_MARK
];
241 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
244 static struct deferred_flush_tables
*deferred_flush
;
246 /* bitmap for indexing intel_iommus */
247 static int g_num_of_iommus
;
249 static DEFINE_SPINLOCK(async_umap_flush_lock
);
250 static LIST_HEAD(unmaps_to_do
);
253 static long list_size
;
255 static void domain_remove_dev_info(struct dmar_domain
*domain
);
258 static int __initdata dmar_map_gfx
= 1;
259 static int dmar_forcedac
;
260 static int intel_iommu_strict
;
262 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
263 static DEFINE_SPINLOCK(device_domain_lock
);
264 static LIST_HEAD(device_domain_list
);
266 static int __init
intel_iommu_setup(char *str
)
271 if (!strncmp(str
, "off", 3)) {
273 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
274 } else if (!strncmp(str
, "igfx_off", 8)) {
277 "Intel-IOMMU: disable GFX device mapping\n");
278 } else if (!strncmp(str
, "forcedac", 8)) {
280 "Intel-IOMMU: Forcing DAC for PCI devices\n");
282 } else if (!strncmp(str
, "strict", 6)) {
284 "Intel-IOMMU: disable batched IOTLB flush\n");
285 intel_iommu_strict
= 1;
288 str
+= strcspn(str
, ",");
294 __setup("intel_iommu=", intel_iommu_setup
);
296 static struct kmem_cache
*iommu_domain_cache
;
297 static struct kmem_cache
*iommu_devinfo_cache
;
298 static struct kmem_cache
*iommu_iova_cache
;
300 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
305 /* trying to avoid low memory issues */
306 flags
= current
->flags
& PF_MEMALLOC
;
307 current
->flags
|= PF_MEMALLOC
;
308 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
309 current
->flags
&= (~PF_MEMALLOC
| flags
);
314 static inline void *alloc_pgtable_page(void)
319 /* trying to avoid low memory issues */
320 flags
= current
->flags
& PF_MEMALLOC
;
321 current
->flags
|= PF_MEMALLOC
;
322 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
323 current
->flags
&= (~PF_MEMALLOC
| flags
);
327 static inline void free_pgtable_page(void *vaddr
)
329 free_page((unsigned long)vaddr
);
332 static inline void *alloc_domain_mem(void)
334 return iommu_kmem_cache_alloc(iommu_domain_cache
);
337 static void free_domain_mem(void *vaddr
)
339 kmem_cache_free(iommu_domain_cache
, vaddr
);
342 static inline void * alloc_devinfo_mem(void)
344 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
347 static inline void free_devinfo_mem(void *vaddr
)
349 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
352 struct iova
*alloc_iova_mem(void)
354 return iommu_kmem_cache_alloc(iommu_iova_cache
);
357 void free_iova_mem(struct iova
*iova
)
359 kmem_cache_free(iommu_iova_cache
, iova
);
362 /* Gets context entry for a given bus and devfn */
363 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
366 struct root_entry
*root
;
367 struct context_entry
*context
;
368 unsigned long phy_addr
;
371 spin_lock_irqsave(&iommu
->lock
, flags
);
372 root
= &iommu
->root_entry
[bus
];
373 context
= get_context_addr_from_root(root
);
375 context
= (struct context_entry
*)alloc_pgtable_page();
377 spin_unlock_irqrestore(&iommu
->lock
, flags
);
380 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
381 phy_addr
= virt_to_phys((void *)context
);
382 set_root_value(root
, phy_addr
);
383 set_root_present(root
);
384 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
386 spin_unlock_irqrestore(&iommu
->lock
, flags
);
387 return &context
[devfn
];
390 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
392 struct root_entry
*root
;
393 struct context_entry
*context
;
397 spin_lock_irqsave(&iommu
->lock
, flags
);
398 root
= &iommu
->root_entry
[bus
];
399 context
= get_context_addr_from_root(root
);
404 ret
= context_present(&context
[devfn
]);
406 spin_unlock_irqrestore(&iommu
->lock
, flags
);
410 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
412 struct root_entry
*root
;
413 struct context_entry
*context
;
416 spin_lock_irqsave(&iommu
->lock
, flags
);
417 root
= &iommu
->root_entry
[bus
];
418 context
= get_context_addr_from_root(root
);
420 context_clear_entry(&context
[devfn
]);
421 __iommu_flush_cache(iommu
, &context
[devfn
], \
424 spin_unlock_irqrestore(&iommu
->lock
, flags
);
427 static void free_context_table(struct intel_iommu
*iommu
)
429 struct root_entry
*root
;
432 struct context_entry
*context
;
434 spin_lock_irqsave(&iommu
->lock
, flags
);
435 if (!iommu
->root_entry
) {
438 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
439 root
= &iommu
->root_entry
[i
];
440 context
= get_context_addr_from_root(root
);
442 free_pgtable_page(context
);
444 free_pgtable_page(iommu
->root_entry
);
445 iommu
->root_entry
= NULL
;
447 spin_unlock_irqrestore(&iommu
->lock
, flags
);
450 /* page table handling */
451 #define LEVEL_STRIDE (9)
452 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
454 static inline int agaw_to_level(int agaw
)
459 static inline int agaw_to_width(int agaw
)
461 return 30 + agaw
* LEVEL_STRIDE
;
465 static inline int width_to_agaw(int width
)
467 return (width
- 30) / LEVEL_STRIDE
;
470 static inline unsigned int level_to_offset_bits(int level
)
472 return (12 + (level
- 1) * LEVEL_STRIDE
);
475 static inline int address_level_offset(u64 addr
, int level
)
477 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
480 static inline u64
level_mask(int level
)
482 return ((u64
)-1 << level_to_offset_bits(level
));
485 static inline u64
level_size(int level
)
487 return ((u64
)1 << level_to_offset_bits(level
));
490 static inline u64
align_to_level(u64 addr
, int level
)
492 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
495 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
497 int addr_width
= agaw_to_width(domain
->agaw
);
498 struct dma_pte
*parent
, *pte
= NULL
;
499 int level
= agaw_to_level(domain
->agaw
);
503 BUG_ON(!domain
->pgd
);
505 addr
&= (((u64
)1) << addr_width
) - 1;
506 parent
= domain
->pgd
;
508 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
512 offset
= address_level_offset(addr
, level
);
513 pte
= &parent
[offset
];
517 if (!dma_pte_present(pte
)) {
518 tmp_page
= alloc_pgtable_page();
521 spin_unlock_irqrestore(&domain
->mapping_lock
,
525 __iommu_flush_cache(domain
->iommu
, tmp_page
,
527 dma_set_pte_addr(pte
, virt_to_phys(tmp_page
));
529 * high level table always sets r/w, last level page
530 * table control read/write
532 dma_set_pte_readable(pte
);
533 dma_set_pte_writable(pte
);
534 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
536 parent
= phys_to_virt(dma_pte_addr(pte
));
540 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
544 /* return address's pte at specific level */
545 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
548 struct dma_pte
*parent
, *pte
= NULL
;
549 int total
= agaw_to_level(domain
->agaw
);
552 parent
= domain
->pgd
;
553 while (level
<= total
) {
554 offset
= address_level_offset(addr
, total
);
555 pte
= &parent
[offset
];
559 if (!dma_pte_present(pte
))
561 parent
= phys_to_virt(dma_pte_addr(pte
));
567 /* clear one page's page table */
568 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
570 struct dma_pte
*pte
= NULL
;
572 /* get last level pte */
573 pte
= dma_addr_level_pte(domain
, addr
, 1);
577 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
581 /* clear last level pte, a tlb flush should be followed */
582 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
584 int addr_width
= agaw_to_width(domain
->agaw
);
586 start
&= (((u64
)1) << addr_width
) - 1;
587 end
&= (((u64
)1) << addr_width
) - 1;
588 /* in case it's partial page */
589 start
= PAGE_ALIGN(start
);
592 /* we don't need lock here, nobody else touches the iova range */
593 while (start
< end
) {
594 dma_pte_clear_one(domain
, start
);
595 start
+= VTD_PAGE_SIZE
;
599 /* free page table pages. last level pte should already be cleared */
600 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
603 int addr_width
= agaw_to_width(domain
->agaw
);
605 int total
= agaw_to_level(domain
->agaw
);
609 start
&= (((u64
)1) << addr_width
) - 1;
610 end
&= (((u64
)1) << addr_width
) - 1;
612 /* we don't need lock here, nobody else touches the iova range */
614 while (level
<= total
) {
615 tmp
= align_to_level(start
, level
);
616 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
620 pte
= dma_addr_level_pte(domain
, tmp
, level
);
623 phys_to_virt(dma_pte_addr(pte
)));
625 __iommu_flush_cache(domain
->iommu
,
628 tmp
+= level_size(level
);
633 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
634 free_pgtable_page(domain
->pgd
);
640 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
642 struct root_entry
*root
;
645 root
= (struct root_entry
*)alloc_pgtable_page();
649 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
651 spin_lock_irqsave(&iommu
->lock
, flags
);
652 iommu
->root_entry
= root
;
653 spin_unlock_irqrestore(&iommu
->lock
, flags
);
658 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
664 addr
= iommu
->root_entry
;
666 spin_lock_irqsave(&iommu
->register_lock
, flag
);
667 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
669 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
670 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
672 /* Make sure hardware complete it */
673 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
674 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
676 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
679 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
684 if (!cap_rwbf(iommu
->cap
))
686 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
688 spin_lock_irqsave(&iommu
->register_lock
, flag
);
689 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
691 /* Make sure hardware complete it */
692 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
693 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
695 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
698 /* return value determine if we need a write buffer flush */
699 static int __iommu_flush_context(struct intel_iommu
*iommu
,
700 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
701 int non_present_entry_flush
)
707 * In the non-present entry flush case, if hardware doesn't cache
708 * non-present entry we do nothing and if hardware cache non-present
709 * entry, we flush entries of domain 0 (the domain id is used to cache
710 * any non-present entries)
712 if (non_present_entry_flush
) {
713 if (!cap_caching_mode(iommu
->cap
))
720 case DMA_CCMD_GLOBAL_INVL
:
721 val
= DMA_CCMD_GLOBAL_INVL
;
723 case DMA_CCMD_DOMAIN_INVL
:
724 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
726 case DMA_CCMD_DEVICE_INVL
:
727 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
728 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
735 spin_lock_irqsave(&iommu
->register_lock
, flag
);
736 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
738 /* Make sure hardware complete it */
739 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
740 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
742 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
744 /* flush context entry will implicitly flush write buffer */
748 /* return value determine if we need a write buffer flush */
749 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
750 u64 addr
, unsigned int size_order
, u64 type
,
751 int non_present_entry_flush
)
753 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
754 u64 val
= 0, val_iva
= 0;
758 * In the non-present entry flush case, if hardware doesn't cache
759 * non-present entry we do nothing and if hardware cache non-present
760 * entry, we flush entries of domain 0 (the domain id is used to cache
761 * any non-present entries)
763 if (non_present_entry_flush
) {
764 if (!cap_caching_mode(iommu
->cap
))
771 case DMA_TLB_GLOBAL_FLUSH
:
772 /* global flush doesn't need set IVA_REG */
773 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
775 case DMA_TLB_DSI_FLUSH
:
776 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
778 case DMA_TLB_PSI_FLUSH
:
779 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
780 /* Note: always flush non-leaf currently */
781 val_iva
= size_order
| addr
;
786 /* Note: set drain read/write */
789 * This is probably to be super secure.. Looks like we can
790 * ignore it without any impact.
792 if (cap_read_drain(iommu
->cap
))
793 val
|= DMA_TLB_READ_DRAIN
;
795 if (cap_write_drain(iommu
->cap
))
796 val
|= DMA_TLB_WRITE_DRAIN
;
798 spin_lock_irqsave(&iommu
->register_lock
, flag
);
799 /* Note: Only uses first TLB reg currently */
801 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
802 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
804 /* Make sure hardware complete it */
805 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
806 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
808 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
810 /* check IOTLB invalidation granularity */
811 if (DMA_TLB_IAIG(val
) == 0)
812 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
813 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
814 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
815 (unsigned long long)DMA_TLB_IIRG(type
),
816 (unsigned long long)DMA_TLB_IAIG(val
));
817 /* flush iotlb entry will implicitly flush write buffer */
821 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
822 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
826 BUG_ON(addr
& (~VTD_PAGE_MASK
));
829 /* Fallback to domain selective flush if no PSI support */
830 if (!cap_pgsel_inv(iommu
->cap
))
831 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
833 non_present_entry_flush
);
836 * PSI requires page size to be 2 ^ x, and the base address is naturally
837 * aligned to the size
839 mask
= ilog2(__roundup_pow_of_two(pages
));
840 /* Fallback to domain selective flush if size is too big */
841 if (mask
> cap_max_amask_val(iommu
->cap
))
842 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
843 DMA_TLB_DSI_FLUSH
, non_present_entry_flush
);
845 return iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
847 non_present_entry_flush
);
850 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
855 spin_lock_irqsave(&iommu
->register_lock
, flags
);
856 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
857 pmen
&= ~DMA_PMEN_EPM
;
858 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
860 /* wait for the protected region status bit to clear */
861 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
862 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
864 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
867 static int iommu_enable_translation(struct intel_iommu
*iommu
)
872 spin_lock_irqsave(&iommu
->register_lock
, flags
);
873 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
875 /* Make sure hardware complete it */
876 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
877 readl
, (sts
& DMA_GSTS_TES
), sts
);
879 iommu
->gcmd
|= DMA_GCMD_TE
;
880 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
884 static int iommu_disable_translation(struct intel_iommu
*iommu
)
889 spin_lock_irqsave(&iommu
->register_lock
, flag
);
890 iommu
->gcmd
&= ~DMA_GCMD_TE
;
891 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
893 /* Make sure hardware complete it */
894 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
895 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
897 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
901 /* iommu interrupt handling. Most stuff are MSI-like. */
903 static const char *fault_reason_strings
[] =
906 "Present bit in root entry is clear",
907 "Present bit in context entry is clear",
908 "Invalid context entry",
909 "Access beyond MGAW",
910 "PTE Write access is not set",
911 "PTE Read access is not set",
912 "Next page table ptr is invalid",
913 "Root table address invalid",
914 "Context table ptr is invalid",
915 "non-zero reserved fields in RTP",
916 "non-zero reserved fields in CTP",
917 "non-zero reserved fields in PTE",
919 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
921 const char *dmar_get_fault_reason(u8 fault_reason
)
923 if (fault_reason
> MAX_FAULT_REASON_IDX
)
926 return fault_reason_strings
[fault_reason
];
929 void dmar_msi_unmask(unsigned int irq
)
931 struct intel_iommu
*iommu
= get_irq_data(irq
);
935 spin_lock_irqsave(&iommu
->register_lock
, flag
);
936 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
937 /* Read a reg to force flush the post write */
938 readl(iommu
->reg
+ DMAR_FECTL_REG
);
939 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
942 void dmar_msi_mask(unsigned int irq
)
945 struct intel_iommu
*iommu
= get_irq_data(irq
);
948 spin_lock_irqsave(&iommu
->register_lock
, flag
);
949 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
950 /* Read a reg to force flush the post write */
951 readl(iommu
->reg
+ DMAR_FECTL_REG
);
952 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
955 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
957 struct intel_iommu
*iommu
= get_irq_data(irq
);
960 spin_lock_irqsave(&iommu
->register_lock
, flag
);
961 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
962 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
963 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
964 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
967 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
969 struct intel_iommu
*iommu
= get_irq_data(irq
);
972 spin_lock_irqsave(&iommu
->register_lock
, flag
);
973 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
974 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
975 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
976 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
979 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
980 u8 fault_reason
, u16 source_id
, unsigned long long addr
)
984 reason
= dmar_get_fault_reason(fault_reason
);
987 "DMAR:[%s] Request device [%02x:%02x.%d] "
989 "DMAR:[fault reason %02d] %s\n",
990 (type
? "DMA Read" : "DMA Write"),
991 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
992 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
996 #define PRIMARY_FAULT_REG_LEN (16)
997 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
999 struct intel_iommu
*iommu
= dev_id
;
1000 int reg
, fault_index
;
1004 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1005 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
1007 /* TBD: ignore advanced fault log currently */
1008 if (!(fault_status
& DMA_FSTS_PPF
))
1009 goto clear_overflow
;
1011 fault_index
= dma_fsts_fault_record_index(fault_status
);
1012 reg
= cap_fault_reg_offset(iommu
->cap
);
1020 /* highest 32 bits */
1021 data
= readl(iommu
->reg
+ reg
+
1022 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
1023 if (!(data
& DMA_FRCD_F
))
1026 fault_reason
= dma_frcd_fault_reason(data
);
1027 type
= dma_frcd_type(data
);
1029 data
= readl(iommu
->reg
+ reg
+
1030 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
1031 source_id
= dma_frcd_source_id(data
);
1033 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
1034 fault_index
* PRIMARY_FAULT_REG_LEN
);
1035 guest_addr
= dma_frcd_page_addr(guest_addr
);
1036 /* clear the fault */
1037 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
1038 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
1040 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1042 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
1043 source_id
, guest_addr
);
1046 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
1048 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1051 /* clear primary fault overflow */
1052 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
1053 if (fault_status
& DMA_FSTS_PFO
)
1054 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
1056 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1060 int dmar_set_interrupt(struct intel_iommu
*iommu
)
1066 printk(KERN_ERR
"IOMMU: no free vectors\n");
1070 set_irq_data(irq
, iommu
);
1073 ret
= arch_setup_dmar_msi(irq
);
1075 set_irq_data(irq
, NULL
);
1081 /* Force fault register is cleared */
1082 iommu_page_fault(irq
, iommu
);
1084 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
1086 printk(KERN_ERR
"IOMMU: can't request irq\n");
1090 static int iommu_init_domains(struct intel_iommu
*iommu
)
1092 unsigned long ndomains
;
1093 unsigned long nlongs
;
1095 ndomains
= cap_ndoms(iommu
->cap
);
1096 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
1097 nlongs
= BITS_TO_LONGS(ndomains
);
1099 /* TBD: there might be 64K domains,
1100 * consider other allocation for future chip
1102 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1103 if (!iommu
->domain_ids
) {
1104 printk(KERN_ERR
"Allocating domain id array failed\n");
1107 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1109 if (!iommu
->domains
) {
1110 printk(KERN_ERR
"Allocating domain array failed\n");
1111 kfree(iommu
->domain_ids
);
1115 spin_lock_init(&iommu
->lock
);
1118 * if Caching mode is set, then invalid translations are tagged
1119 * with domainid 0. Hence we need to pre-allocate it.
1121 if (cap_caching_mode(iommu
->cap
))
1122 set_bit(0, iommu
->domain_ids
);
1127 static void domain_exit(struct dmar_domain
*domain
);
1129 void free_dmar_iommu(struct intel_iommu
*iommu
)
1131 struct dmar_domain
*domain
;
1134 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1135 for (; i
< cap_ndoms(iommu
->cap
); ) {
1136 domain
= iommu
->domains
[i
];
1137 clear_bit(i
, iommu
->domain_ids
);
1138 domain_exit(domain
);
1139 i
= find_next_bit(iommu
->domain_ids
,
1140 cap_ndoms(iommu
->cap
), i
+1);
1143 if (iommu
->gcmd
& DMA_GCMD_TE
)
1144 iommu_disable_translation(iommu
);
1147 set_irq_data(iommu
->irq
, NULL
);
1148 /* This will mask the irq */
1149 free_irq(iommu
->irq
, iommu
);
1150 destroy_irq(iommu
->irq
);
1153 kfree(iommu
->domains
);
1154 kfree(iommu
->domain_ids
);
1156 /* free context mapping */
1157 free_context_table(iommu
);
1160 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1163 unsigned long ndomains
;
1164 struct dmar_domain
*domain
;
1165 unsigned long flags
;
1167 domain
= alloc_domain_mem();
1171 ndomains
= cap_ndoms(iommu
->cap
);
1173 spin_lock_irqsave(&iommu
->lock
, flags
);
1174 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1175 if (num
>= ndomains
) {
1176 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1177 free_domain_mem(domain
);
1178 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1182 set_bit(num
, iommu
->domain_ids
);
1184 domain
->iommu
= iommu
;
1186 iommu
->domains
[num
] = domain
;
1187 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1192 static void iommu_free_domain(struct dmar_domain
*domain
)
1194 unsigned long flags
;
1196 spin_lock_irqsave(&domain
->iommu
->lock
, flags
);
1197 clear_bit(domain
->id
, domain
->iommu
->domain_ids
);
1198 spin_unlock_irqrestore(&domain
->iommu
->lock
, flags
);
1201 static struct iova_domain reserved_iova_list
;
1202 static struct lock_class_key reserved_alloc_key
;
1203 static struct lock_class_key reserved_rbtree_key
;
1205 static void dmar_init_reserved_ranges(void)
1207 struct pci_dev
*pdev
= NULL
;
1212 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1214 lockdep_set_class(&reserved_iova_list
.iova_alloc_lock
,
1215 &reserved_alloc_key
);
1216 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1217 &reserved_rbtree_key
);
1219 /* IOAPIC ranges shouldn't be accessed by DMA */
1220 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1221 IOVA_PFN(IOAPIC_RANGE_END
));
1223 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1225 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1226 for_each_pci_dev(pdev
) {
1229 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1230 r
= &pdev
->resource
[i
];
1231 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1235 size
= r
->end
- addr
;
1236 size
= PAGE_ALIGN(size
);
1237 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1238 IOVA_PFN(size
+ addr
) - 1);
1240 printk(KERN_ERR
"Reserve iova failed\n");
1246 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1248 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1251 static inline int guestwidth_to_adjustwidth(int gaw
)
1254 int r
= (gaw
- 12) % 9;
1265 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1267 struct intel_iommu
*iommu
;
1268 int adjust_width
, agaw
;
1269 unsigned long sagaw
;
1271 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1272 spin_lock_init(&domain
->mapping_lock
);
1274 domain_reserve_special_ranges(domain
);
1276 /* calculate AGAW */
1277 iommu
= domain
->iommu
;
1278 if (guest_width
> cap_mgaw(iommu
->cap
))
1279 guest_width
= cap_mgaw(iommu
->cap
);
1280 domain
->gaw
= guest_width
;
1281 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1282 agaw
= width_to_agaw(adjust_width
);
1283 sagaw
= cap_sagaw(iommu
->cap
);
1284 if (!test_bit(agaw
, &sagaw
)) {
1285 /* hardware doesn't support it, choose a bigger one */
1286 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1287 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1291 domain
->agaw
= agaw
;
1292 INIT_LIST_HEAD(&domain
->devices
);
1294 /* always allocate the top pgd */
1295 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1298 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1302 static void domain_exit(struct dmar_domain
*domain
)
1306 /* Domain 0 is reserved, so dont process it */
1310 domain_remove_dev_info(domain
);
1312 put_iova_domain(&domain
->iovad
);
1313 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1314 end
= end
& (~PAGE_MASK
);
1317 dma_pte_clear_range(domain
, 0, end
);
1319 /* free page tables */
1320 dma_pte_free_pagetable(domain
, 0, end
);
1322 iommu_free_domain(domain
);
1323 free_domain_mem(domain
);
1326 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1329 struct context_entry
*context
;
1330 struct intel_iommu
*iommu
= domain
->iommu
;
1331 unsigned long flags
;
1333 pr_debug("Set context mapping for %02x:%02x.%d\n",
1334 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1335 BUG_ON(!domain
->pgd
);
1336 context
= device_to_context_entry(iommu
, bus
, devfn
);
1339 spin_lock_irqsave(&iommu
->lock
, flags
);
1340 if (context_present(context
)) {
1341 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1345 context_set_domain_id(context
, domain
->id
);
1346 context_set_address_width(context
, domain
->agaw
);
1347 context_set_address_root(context
, virt_to_phys(domain
->pgd
));
1348 context_set_translation_type(context
, CONTEXT_TT_MULTI_LEVEL
);
1349 context_set_fault_enable(context
);
1350 context_set_present(context
);
1351 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
1353 /* it's a non-present to present mapping */
1354 if (iommu
->flush
.flush_context(iommu
, domain
->id
,
1355 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
,
1356 DMA_CCMD_DEVICE_INVL
, 1))
1357 iommu_flush_write_buffer(iommu
);
1359 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_DSI_FLUSH
, 0);
1361 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1366 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1369 struct pci_dev
*tmp
, *parent
;
1371 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1376 /* dependent device mapping */
1377 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1380 /* Secondary interface's bus number and devfn 0 */
1381 parent
= pdev
->bus
->self
;
1382 while (parent
!= tmp
) {
1383 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1387 parent
= parent
->bus
->self
;
1389 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1390 return domain_context_mapping_one(domain
,
1391 tmp
->subordinate
->number
, 0);
1392 else /* this is a legacy PCI bridge */
1393 return domain_context_mapping_one(domain
,
1394 tmp
->bus
->number
, tmp
->devfn
);
1397 static int domain_context_mapped(struct dmar_domain
*domain
,
1398 struct pci_dev
*pdev
)
1401 struct pci_dev
*tmp
, *parent
;
1403 ret
= device_context_mapped(domain
->iommu
,
1404 pdev
->bus
->number
, pdev
->devfn
);
1407 /* dependent device mapping */
1408 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1411 /* Secondary interface's bus number and devfn 0 */
1412 parent
= pdev
->bus
->self
;
1413 while (parent
!= tmp
) {
1414 ret
= device_context_mapped(domain
->iommu
, parent
->bus
->number
,
1418 parent
= parent
->bus
->self
;
1421 return device_context_mapped(domain
->iommu
,
1422 tmp
->subordinate
->number
, 0);
1424 return device_context_mapped(domain
->iommu
,
1425 tmp
->bus
->number
, tmp
->devfn
);
1429 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1430 u64 hpa
, size_t size
, int prot
)
1432 u64 start_pfn
, end_pfn
;
1433 struct dma_pte
*pte
;
1435 int addr_width
= agaw_to_width(domain
->agaw
);
1437 hpa
&= (((u64
)1) << addr_width
) - 1;
1439 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1442 start_pfn
= ((u64
)hpa
) >> VTD_PAGE_SHIFT
;
1443 end_pfn
= (VTD_PAGE_ALIGN(((u64
)hpa
) + size
)) >> VTD_PAGE_SHIFT
;
1445 while (start_pfn
< end_pfn
) {
1446 pte
= addr_to_dma_pte(domain
, iova
+ VTD_PAGE_SIZE
* index
);
1449 /* We don't need lock here, nobody else
1450 * touches the iova range
1452 BUG_ON(dma_pte_addr(pte
));
1453 dma_set_pte_addr(pte
, start_pfn
<< VTD_PAGE_SHIFT
);
1454 dma_set_pte_prot(pte
, prot
);
1455 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
1462 static void detach_domain_for_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
1464 clear_context_table(domain
->iommu
, bus
, devfn
);
1465 domain
->iommu
->flush
.flush_context(domain
->iommu
, 0, 0, 0,
1466 DMA_CCMD_GLOBAL_INVL
, 0);
1467 domain
->iommu
->flush
.flush_iotlb(domain
->iommu
, 0, 0, 0,
1468 DMA_TLB_GLOBAL_FLUSH
, 0);
1471 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1473 struct device_domain_info
*info
;
1474 unsigned long flags
;
1476 spin_lock_irqsave(&device_domain_lock
, flags
);
1477 while (!list_empty(&domain
->devices
)) {
1478 info
= list_entry(domain
->devices
.next
,
1479 struct device_domain_info
, link
);
1480 list_del(&info
->link
);
1481 list_del(&info
->global
);
1483 info
->dev
->dev
.archdata
.iommu
= NULL
;
1484 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1486 detach_domain_for_dev(info
->domain
, info
->bus
, info
->devfn
);
1487 free_devinfo_mem(info
);
1489 spin_lock_irqsave(&device_domain_lock
, flags
);
1491 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1496 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1498 static struct dmar_domain
*
1499 find_domain(struct pci_dev
*pdev
)
1501 struct device_domain_info
*info
;
1503 /* No lock here, assumes no domain exit in normal case */
1504 info
= pdev
->dev
.archdata
.iommu
;
1506 return info
->domain
;
1510 /* domain is initialized */
1511 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1513 struct dmar_domain
*domain
, *found
= NULL
;
1514 struct intel_iommu
*iommu
;
1515 struct dmar_drhd_unit
*drhd
;
1516 struct device_domain_info
*info
, *tmp
;
1517 struct pci_dev
*dev_tmp
;
1518 unsigned long flags
;
1519 int bus
= 0, devfn
= 0;
1521 domain
= find_domain(pdev
);
1525 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1527 if (dev_tmp
->is_pcie
) {
1528 bus
= dev_tmp
->subordinate
->number
;
1531 bus
= dev_tmp
->bus
->number
;
1532 devfn
= dev_tmp
->devfn
;
1534 spin_lock_irqsave(&device_domain_lock
, flags
);
1535 list_for_each_entry(info
, &device_domain_list
, global
) {
1536 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1537 found
= info
->domain
;
1541 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1542 /* pcie-pci bridge already has a domain, uses it */
1549 /* Allocate new domain for the device */
1550 drhd
= dmar_find_matched_drhd_unit(pdev
);
1552 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1556 iommu
= drhd
->iommu
;
1558 domain
= iommu_alloc_domain(iommu
);
1562 if (domain_init(domain
, gaw
)) {
1563 domain_exit(domain
);
1567 /* register pcie-to-pci device */
1569 info
= alloc_devinfo_mem();
1571 domain_exit(domain
);
1575 info
->devfn
= devfn
;
1577 info
->domain
= domain
;
1578 /* This domain is shared by devices under p2p bridge */
1579 domain
->flags
|= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES
;
1581 /* pcie-to-pci bridge already has a domain, uses it */
1583 spin_lock_irqsave(&device_domain_lock
, flags
);
1584 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1585 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1586 found
= tmp
->domain
;
1591 free_devinfo_mem(info
);
1592 domain_exit(domain
);
1595 list_add(&info
->link
, &domain
->devices
);
1596 list_add(&info
->global
, &device_domain_list
);
1598 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1602 info
= alloc_devinfo_mem();
1605 info
->bus
= pdev
->bus
->number
;
1606 info
->devfn
= pdev
->devfn
;
1608 info
->domain
= domain
;
1609 spin_lock_irqsave(&device_domain_lock
, flags
);
1610 /* somebody is fast */
1611 found
= find_domain(pdev
);
1612 if (found
!= NULL
) {
1613 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1614 if (found
!= domain
) {
1615 domain_exit(domain
);
1618 free_devinfo_mem(info
);
1621 list_add(&info
->link
, &domain
->devices
);
1622 list_add(&info
->global
, &device_domain_list
);
1623 pdev
->dev
.archdata
.iommu
= info
;
1624 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1627 /* recheck it here, maybe others set it */
1628 return find_domain(pdev
);
1631 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
1632 unsigned long long start
,
1633 unsigned long long end
)
1635 struct dmar_domain
*domain
;
1637 unsigned long long base
;
1641 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1642 pci_name(pdev
), start
, end
);
1643 /* page table init */
1644 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1648 /* The address might not be aligned */
1649 base
= start
& PAGE_MASK
;
1651 size
= PAGE_ALIGN(size
);
1652 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1653 IOVA_PFN(base
+ size
) - 1)) {
1654 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1659 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1660 size
, base
, pci_name(pdev
));
1662 * RMRR range might have overlap with physical memory range,
1665 dma_pte_clear_range(domain
, base
, base
+ size
);
1667 ret
= domain_page_mapping(domain
, base
, base
, size
,
1668 DMA_PTE_READ
|DMA_PTE_WRITE
);
1672 /* context entry init */
1673 ret
= domain_context_mapping(domain
, pdev
);
1677 domain_exit(domain
);
1682 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1683 struct pci_dev
*pdev
)
1685 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1687 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1688 rmrr
->end_address
+ 1);
1691 #ifdef CONFIG_DMAR_GFX_WA
1692 struct iommu_prepare_data
{
1693 struct pci_dev
*pdev
;
1697 static int __init
iommu_prepare_work_fn(unsigned long start_pfn
,
1698 unsigned long end_pfn
, void *datax
)
1700 struct iommu_prepare_data
*data
;
1702 data
= (struct iommu_prepare_data
*)datax
;
1704 data
->ret
= iommu_prepare_identity_map(data
->pdev
,
1705 start_pfn
<<PAGE_SHIFT
, end_pfn
<<PAGE_SHIFT
);
1710 static int __init
iommu_prepare_with_active_regions(struct pci_dev
*pdev
)
1713 struct iommu_prepare_data data
;
1718 for_each_online_node(nid
) {
1719 work_with_active_regions(nid
, iommu_prepare_work_fn
, &data
);
1726 static void __init
iommu_prepare_gfx_mapping(void)
1728 struct pci_dev
*pdev
= NULL
;
1731 for_each_pci_dev(pdev
) {
1732 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1733 !IS_GFX_DEVICE(pdev
))
1735 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1737 ret
= iommu_prepare_with_active_regions(pdev
);
1739 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1742 #else /* !CONFIG_DMAR_GFX_WA */
1743 static inline void iommu_prepare_gfx_mapping(void)
1749 #ifdef CONFIG_DMAR_FLOPPY_WA
1750 static inline void iommu_prepare_isa(void)
1752 struct pci_dev
*pdev
;
1755 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1759 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1760 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1763 printk("IOMMU: Failed to create 0-64M identity map, "
1764 "floppy might not work\n");
1768 static inline void iommu_prepare_isa(void)
1772 #endif /* !CONFIG_DMAR_FLPY_WA */
1774 static int __init
init_dmars(void)
1776 struct dmar_drhd_unit
*drhd
;
1777 struct dmar_rmrr_unit
*rmrr
;
1778 struct pci_dev
*pdev
;
1779 struct intel_iommu
*iommu
;
1780 int i
, ret
, unit
= 0;
1785 * initialize and program root entry to not present
1788 for_each_drhd_unit(drhd
) {
1791 * lock not needed as this is only incremented in the single
1792 * threaded kernel __init code path all other access are read
1797 deferred_flush
= kzalloc(g_num_of_iommus
*
1798 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
1799 if (!deferred_flush
) {
1804 for_each_drhd_unit(drhd
) {
1808 iommu
= drhd
->iommu
;
1810 ret
= iommu_init_domains(iommu
);
1816 * we could share the same root & context tables
1817 * amoung all IOMMU's. Need to Split it later.
1819 ret
= iommu_alloc_root_entry(iommu
);
1821 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
1826 for_each_drhd_unit(drhd
) {
1830 iommu
= drhd
->iommu
;
1831 if (dmar_enable_qi(iommu
)) {
1833 * Queued Invalidate not enabled, use Register Based
1836 iommu
->flush
.flush_context
= __iommu_flush_context
;
1837 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
1838 printk(KERN_INFO
"IOMMU 0x%Lx: using Register based "
1840 (unsigned long long)drhd
->reg_base_addr
);
1842 iommu
->flush
.flush_context
= qi_flush_context
;
1843 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
1844 printk(KERN_INFO
"IOMMU 0x%Lx: using Queued "
1846 (unsigned long long)drhd
->reg_base_addr
);
1852 * for each dev attached to rmrr
1854 * locate drhd for dev, alloc domain for dev
1855 * allocate free domain
1856 * allocate page table entries for rmrr
1857 * if context not allocated for bus
1858 * allocate and init context
1859 * set present in root table for this bus
1860 * init context with domain, translation etc
1864 for_each_rmrr_units(rmrr
) {
1865 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
1866 pdev
= rmrr
->devices
[i
];
1867 /* some BIOS lists non-exist devices in DMAR table */
1870 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
1873 "IOMMU: mapping reserved region failed\n");
1877 iommu_prepare_gfx_mapping();
1879 iommu_prepare_isa();
1884 * global invalidate context cache
1885 * global invalidate iotlb
1886 * enable translation
1888 for_each_drhd_unit(drhd
) {
1891 iommu
= drhd
->iommu
;
1892 sprintf (iommu
->name
, "dmar%d", unit
++);
1894 iommu_flush_write_buffer(iommu
);
1896 ret
= dmar_set_interrupt(iommu
);
1900 iommu_set_root_entry(iommu
);
1902 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
1904 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
1906 iommu_disable_protect_mem_regions(iommu
);
1908 ret
= iommu_enable_translation(iommu
);
1915 for_each_drhd_unit(drhd
) {
1918 iommu
= drhd
->iommu
;
1924 static inline u64
aligned_size(u64 host_addr
, size_t size
)
1927 addr
= (host_addr
& (~PAGE_MASK
)) + size
;
1928 return PAGE_ALIGN(addr
);
1932 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
1936 /* Make sure it's in range */
1937 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
1938 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
1941 piova
= alloc_iova(&domain
->iovad
,
1942 size
>> PAGE_SHIFT
, IOVA_PFN(end
), 1);
1946 static struct iova
*
1947 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
1948 size_t size
, u64 dma_mask
)
1950 struct pci_dev
*pdev
= to_pci_dev(dev
);
1951 struct iova
*iova
= NULL
;
1953 if (dma_mask
<= DMA_32BIT_MASK
|| dmar_forcedac
)
1954 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
1957 * First try to allocate an io virtual address in
1958 * DMA_32BIT_MASK and if that fails then try allocating
1961 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
1963 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
1967 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
1974 static struct dmar_domain
*
1975 get_valid_domain_for_dev(struct pci_dev
*pdev
)
1977 struct dmar_domain
*domain
;
1980 domain
= get_domain_for_dev(pdev
,
1981 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1984 "Allocating domain for %s failed", pci_name(pdev
));
1988 /* make sure context mapping is ok */
1989 if (unlikely(!domain_context_mapped(domain
, pdev
))) {
1990 ret
= domain_context_mapping(domain
, pdev
);
1993 "Domain context map for %s failed",
2002 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2003 size_t size
, int dir
, u64 dma_mask
)
2005 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2006 struct dmar_domain
*domain
;
2007 phys_addr_t start_paddr
;
2012 BUG_ON(dir
== DMA_NONE
);
2013 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2016 domain
= get_valid_domain_for_dev(pdev
);
2020 size
= aligned_size((u64
)paddr
, size
);
2022 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2026 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
2029 * Check if DMAR supports zero-length reads on write only
2032 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2033 !cap_zlr(domain
->iommu
->cap
))
2034 prot
|= DMA_PTE_READ
;
2035 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2036 prot
|= DMA_PTE_WRITE
;
2038 * paddr - (paddr + size) might be partial page, we should map the whole
2039 * page. Note: if two part of one page are separately mapped, we
2040 * might have two guest_addr mapping to the same host paddr, but this
2041 * is not a big problem
2043 ret
= domain_page_mapping(domain
, start_paddr
,
2044 ((u64
)paddr
) & PAGE_MASK
, size
, prot
);
2048 /* it's a non-present to present mapping */
2049 ret
= iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
2050 start_paddr
, size
>> VTD_PAGE_SHIFT
, 1);
2052 iommu_flush_write_buffer(domain
->iommu
);
2054 return start_paddr
+ ((u64
)paddr
& (~PAGE_MASK
));
2058 __free_iova(&domain
->iovad
, iova
);
2059 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
2060 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
2064 dma_addr_t
intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2065 size_t size
, int dir
)
2067 return __intel_map_single(hwdev
, paddr
, size
, dir
,
2068 to_pci_dev(hwdev
)->dma_mask
);
2071 static void flush_unmaps(void)
2077 /* just flush them all */
2078 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2079 if (deferred_flush
[i
].next
) {
2080 struct intel_iommu
*iommu
=
2081 deferred_flush
[i
].domain
[0]->iommu
;
2083 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2084 DMA_TLB_GLOBAL_FLUSH
, 0);
2085 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2086 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
,
2087 deferred_flush
[i
].iova
[j
]);
2089 deferred_flush
[i
].next
= 0;
2096 static void flush_unmaps_timeout(unsigned long data
)
2098 unsigned long flags
;
2100 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2102 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2105 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2107 unsigned long flags
;
2110 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2111 if (list_size
== HIGH_WATER_MARK
)
2114 iommu_id
= dom
->iommu
->seq_id
;
2116 next
= deferred_flush
[iommu_id
].next
;
2117 deferred_flush
[iommu_id
].domain
[next
] = dom
;
2118 deferred_flush
[iommu_id
].iova
[next
] = iova
;
2119 deferred_flush
[iommu_id
].next
++;
2122 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
2126 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2129 void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
2132 struct pci_dev
*pdev
= to_pci_dev(dev
);
2133 struct dmar_domain
*domain
;
2134 unsigned long start_addr
;
2137 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2139 domain
= find_domain(pdev
);
2142 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
2146 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2147 size
= aligned_size((u64
)dev_addr
, size
);
2149 pr_debug("Device %s unmapping: %lx@%llx\n",
2150 pci_name(pdev
), size
, (unsigned long long)start_addr
);
2152 /* clear the whole page */
2153 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2154 /* free page tables */
2155 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2156 if (intel_iommu_strict
) {
2157 if (iommu_flush_iotlb_psi(domain
->iommu
,
2158 domain
->id
, start_addr
, size
>> VTD_PAGE_SHIFT
, 0))
2159 iommu_flush_write_buffer(domain
->iommu
);
2161 __free_iova(&domain
->iovad
, iova
);
2163 add_unmap(domain
, iova
);
2165 * queue up the release of the unmap to save the 1/6th of the
2166 * cpu used up by the iotlb flush operation...
2171 void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
2172 dma_addr_t
*dma_handle
, gfp_t flags
)
2177 size
= PAGE_ALIGN(size
);
2178 order
= get_order(size
);
2179 flags
&= ~(GFP_DMA
| GFP_DMA32
);
2181 vaddr
= (void *)__get_free_pages(flags
, order
);
2184 memset(vaddr
, 0, size
);
2186 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
2188 hwdev
->coherent_dma_mask
);
2191 free_pages((unsigned long)vaddr
, order
);
2195 void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
2196 dma_addr_t dma_handle
)
2200 size
= PAGE_ALIGN(size
);
2201 order
= get_order(size
);
2203 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
2204 free_pages((unsigned long)vaddr
, order
);
2207 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2209 void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2210 int nelems
, int dir
)
2213 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2214 struct dmar_domain
*domain
;
2215 unsigned long start_addr
;
2219 struct scatterlist
*sg
;
2221 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2224 domain
= find_domain(pdev
);
2226 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2229 for_each_sg(sglist
, sg
, nelems
, i
) {
2230 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2231 size
+= aligned_size((u64
)addr
, sg
->length
);
2234 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2236 /* clear the whole page */
2237 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2238 /* free page tables */
2239 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2241 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
2242 size
>> VTD_PAGE_SHIFT
, 0))
2243 iommu_flush_write_buffer(domain
->iommu
);
2246 __free_iova(&domain
->iovad
, iova
);
2249 static int intel_nontranslate_map_sg(struct device
*hddev
,
2250 struct scatterlist
*sglist
, int nelems
, int dir
)
2253 struct scatterlist
*sg
;
2255 for_each_sg(sglist
, sg
, nelems
, i
) {
2256 BUG_ON(!sg_page(sg
));
2257 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2258 sg
->dma_length
= sg
->length
;
2263 int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
2268 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2269 struct dmar_domain
*domain
;
2273 struct iova
*iova
= NULL
;
2275 struct scatterlist
*sg
;
2276 unsigned long start_addr
;
2278 BUG_ON(dir
== DMA_NONE
);
2279 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2280 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2282 domain
= get_valid_domain_for_dev(pdev
);
2286 for_each_sg(sglist
, sg
, nelems
, i
) {
2287 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2288 addr
= (void *)virt_to_phys(addr
);
2289 size
+= aligned_size((u64
)addr
, sg
->length
);
2292 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2294 sglist
->dma_length
= 0;
2299 * Check if DMAR supports zero-length reads on write only
2302 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2303 !cap_zlr(domain
->iommu
->cap
))
2304 prot
|= DMA_PTE_READ
;
2305 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2306 prot
|= DMA_PTE_WRITE
;
2308 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2310 for_each_sg(sglist
, sg
, nelems
, i
) {
2311 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2312 addr
= (void *)virt_to_phys(addr
);
2313 size
= aligned_size((u64
)addr
, sg
->length
);
2314 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2315 ((u64
)addr
) & PAGE_MASK
,
2318 /* clear the page */
2319 dma_pte_clear_range(domain
, start_addr
,
2320 start_addr
+ offset
);
2321 /* free page tables */
2322 dma_pte_free_pagetable(domain
, start_addr
,
2323 start_addr
+ offset
);
2325 __free_iova(&domain
->iovad
, iova
);
2328 sg
->dma_address
= start_addr
+ offset
+
2329 ((u64
)addr
& (~PAGE_MASK
));
2330 sg
->dma_length
= sg
->length
;
2334 /* it's a non-present to present mapping */
2335 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
2336 start_addr
, offset
>> VTD_PAGE_SHIFT
, 1))
2337 iommu_flush_write_buffer(domain
->iommu
);
2341 static struct dma_mapping_ops intel_dma_ops
= {
2342 .alloc_coherent
= intel_alloc_coherent
,
2343 .free_coherent
= intel_free_coherent
,
2344 .map_single
= intel_map_single
,
2345 .unmap_single
= intel_unmap_single
,
2346 .map_sg
= intel_map_sg
,
2347 .unmap_sg
= intel_unmap_sg
,
2350 static inline int iommu_domain_cache_init(void)
2354 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2355 sizeof(struct dmar_domain
),
2360 if (!iommu_domain_cache
) {
2361 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2368 static inline int iommu_devinfo_cache_init(void)
2372 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2373 sizeof(struct device_domain_info
),
2377 if (!iommu_devinfo_cache
) {
2378 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2385 static inline int iommu_iova_cache_init(void)
2389 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2390 sizeof(struct iova
),
2394 if (!iommu_iova_cache
) {
2395 printk(KERN_ERR
"Couldn't create iova cache\n");
2402 static int __init
iommu_init_mempool(void)
2405 ret
= iommu_iova_cache_init();
2409 ret
= iommu_domain_cache_init();
2413 ret
= iommu_devinfo_cache_init();
2417 kmem_cache_destroy(iommu_domain_cache
);
2419 kmem_cache_destroy(iommu_iova_cache
);
2424 static void __init
iommu_exit_mempool(void)
2426 kmem_cache_destroy(iommu_devinfo_cache
);
2427 kmem_cache_destroy(iommu_domain_cache
);
2428 kmem_cache_destroy(iommu_iova_cache
);
2432 static void __init
init_no_remapping_devices(void)
2434 struct dmar_drhd_unit
*drhd
;
2436 for_each_drhd_unit(drhd
) {
2437 if (!drhd
->include_all
) {
2439 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2440 if (drhd
->devices
[i
] != NULL
)
2442 /* ignore DMAR unit if no pci devices exist */
2443 if (i
== drhd
->devices_cnt
)
2451 for_each_drhd_unit(drhd
) {
2453 if (drhd
->ignored
|| drhd
->include_all
)
2456 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2457 if (drhd
->devices
[i
] &&
2458 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2461 if (i
< drhd
->devices_cnt
)
2464 /* bypass IOMMU if it is just for gfx devices */
2466 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2467 if (!drhd
->devices
[i
])
2469 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2474 int __init
intel_iommu_init(void)
2478 if (dmar_table_init())
2481 if (dmar_dev_scope_init())
2485 * Check the need for DMA-remapping initialization now.
2486 * Above initialization will also be used by Interrupt-remapping.
2488 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2491 iommu_init_mempool();
2492 dmar_init_reserved_ranges();
2494 init_no_remapping_devices();
2498 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2499 put_iova_domain(&reserved_iova_list
);
2500 iommu_exit_mempool();
2504 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2506 init_timer(&unmap_timer
);
2508 dma_ops
= &intel_dma_ops
;
2512 void intel_iommu_domain_exit(struct dmar_domain
*domain
)
2516 /* Domain 0 is reserved, so dont process it */
2520 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
2521 end
= end
& (~VTD_PAGE_MASK
);
2524 dma_pte_clear_range(domain
, 0, end
);
2526 /* free page tables */
2527 dma_pte_free_pagetable(domain
, 0, end
);
2529 iommu_free_domain(domain
);
2530 free_domain_mem(domain
);
2532 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit
);
2534 struct dmar_domain
*intel_iommu_domain_alloc(struct pci_dev
*pdev
)
2536 struct dmar_drhd_unit
*drhd
;
2537 struct dmar_domain
*domain
;
2538 struct intel_iommu
*iommu
;
2540 drhd
= dmar_find_matched_drhd_unit(pdev
);
2542 printk(KERN_ERR
"intel_iommu_domain_alloc: drhd == NULL\n");
2546 iommu
= drhd
->iommu
;
2549 "intel_iommu_domain_alloc: iommu == NULL\n");
2552 domain
= iommu_alloc_domain(iommu
);
2555 "intel_iommu_domain_alloc: domain == NULL\n");
2558 if (domain_init(domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2560 "intel_iommu_domain_alloc: domain_init() failed\n");
2561 intel_iommu_domain_exit(domain
);
2566 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc
);
2568 int intel_iommu_context_mapping(
2569 struct dmar_domain
*domain
, struct pci_dev
*pdev
)
2572 rc
= domain_context_mapping(domain
, pdev
);
2575 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping
);
2577 int intel_iommu_page_mapping(
2578 struct dmar_domain
*domain
, dma_addr_t iova
,
2579 u64 hpa
, size_t size
, int prot
)
2582 rc
= domain_page_mapping(domain
, iova
, hpa
, size
, prot
);
2585 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping
);
2587 void intel_iommu_detach_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
2589 detach_domain_for_dev(domain
, bus
, devfn
);
2591 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev
);
2593 struct dmar_domain
*
2594 intel_iommu_find_domain(struct pci_dev
*pdev
)
2596 return find_domain(pdev
);
2598 EXPORT_SYMBOL_GPL(intel_iommu_find_domain
);
2600 int intel_iommu_found(void)
2602 return g_num_of_iommus
;
2604 EXPORT_SYMBOL_GPL(intel_iommu_found
);
2606 u64
intel_iommu_iova_to_pfn(struct dmar_domain
*domain
, u64 iova
)
2608 struct dma_pte
*pte
;
2612 pte
= addr_to_dma_pte(domain
, iova
);
2615 pfn
= dma_pte_addr(pte
);
2617 return pfn
>> VTD_PAGE_SHIFT
;
2619 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn
);