2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
43 #define ROOT_SIZE VTD_PAGE_SIZE
44 #define CONTEXT_SIZE VTD_PAGE_SIZE
46 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
49 #define IOAPIC_RANGE_START (0xfee00000)
50 #define IOAPIC_RANGE_END (0xfeefffff)
51 #define IOVA_START_ADDR (0x1000)
53 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
57 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
61 /* global iommu list, set NULL for ignored DMAR units */
62 static struct intel_iommu
**g_iommus
;
67 * 12-63: Context Ptr (12 - (haw-1))
74 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
75 static inline bool root_present(struct root_entry
*root
)
77 return (root
->val
& 1);
79 static inline void set_root_present(struct root_entry
*root
)
83 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
85 root
->val
|= value
& VTD_PAGE_MASK
;
88 static inline struct context_entry
*
89 get_context_addr_from_root(struct root_entry
*root
)
91 return (struct context_entry
*)
92 (root_present(root
)?phys_to_virt(
93 root
->val
& VTD_PAGE_MASK
) :
100 * 1: fault processing disable
101 * 2-3: translation type
102 * 12-63: address space root
108 struct context_entry
{
113 static inline bool context_present(struct context_entry
*context
)
115 return (context
->lo
& 1);
117 static inline void context_set_present(struct context_entry
*context
)
122 static inline void context_set_fault_enable(struct context_entry
*context
)
124 context
->lo
&= (((u64
)-1) << 2) | 1;
127 #define CONTEXT_TT_MULTI_LEVEL 0
129 static inline void context_set_translation_type(struct context_entry
*context
,
132 context
->lo
&= (((u64
)-1) << 4) | 3;
133 context
->lo
|= (value
& 3) << 2;
136 static inline void context_set_address_root(struct context_entry
*context
,
139 context
->lo
|= value
& VTD_PAGE_MASK
;
142 static inline void context_set_address_width(struct context_entry
*context
,
145 context
->hi
|= value
& 7;
148 static inline void context_set_domain_id(struct context_entry
*context
,
151 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
154 static inline void context_clear_entry(struct context_entry
*context
)
166 * 12-63: Host physcial address
172 static inline void dma_clear_pte(struct dma_pte
*pte
)
177 static inline void dma_set_pte_readable(struct dma_pte
*pte
)
179 pte
->val
|= DMA_PTE_READ
;
182 static inline void dma_set_pte_writable(struct dma_pte
*pte
)
184 pte
->val
|= DMA_PTE_WRITE
;
187 static inline void dma_set_pte_prot(struct dma_pte
*pte
, unsigned long prot
)
189 pte
->val
= (pte
->val
& ~3) | (prot
& 3);
192 static inline u64
dma_pte_addr(struct dma_pte
*pte
)
194 return (pte
->val
& VTD_PAGE_MASK
);
197 static inline void dma_set_pte_addr(struct dma_pte
*pte
, u64 addr
)
199 pte
->val
|= (addr
& VTD_PAGE_MASK
);
202 static inline bool dma_pte_present(struct dma_pte
*pte
)
204 return (pte
->val
& 3) != 0;
207 /* devices under the same p2p bridge are owned in one domain */
208 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
210 /* domain represents a virtual machine, more than one devices
211 * across iommus may be owned in one domain, e.g. kvm guest.
213 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
216 int id
; /* domain id */
217 unsigned long iommu_bmp
; /* bitmap of iommus this domain uses*/
219 struct list_head devices
; /* all devices' list */
220 struct iova_domain iovad
; /* iova's that belong to this domain */
222 struct dma_pte
*pgd
; /* virtual address */
223 spinlock_t mapping_lock
; /* page table lock */
224 int gaw
; /* max guest address width */
226 /* adjusted guest address width, 0 is level 2 30-bit */
229 int flags
; /* flags to find out type of domain */
231 int iommu_coherency
;/* indicate coherency of iommu access */
232 int iommu_count
; /* reference count of iommu */
233 spinlock_t iommu_lock
; /* protect iommu set in domain */
234 u64 max_addr
; /* maximum mapped address */
237 /* PCI domain-device relationship */
238 struct device_domain_info
{
239 struct list_head link
; /* link to domain siblings */
240 struct list_head global
; /* link to global list */
241 u8 bus
; /* PCI bus numer */
242 u8 devfn
; /* PCI devfn number */
243 struct pci_dev
*dev
; /* it's NULL for PCIE-to-PCI bridge */
244 struct dmar_domain
*domain
; /* pointer to domain */
247 static void flush_unmaps_timeout(unsigned long data
);
249 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
251 #define HIGH_WATER_MARK 250
252 struct deferred_flush_tables
{
254 struct iova
*iova
[HIGH_WATER_MARK
];
255 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
258 static struct deferred_flush_tables
*deferred_flush
;
260 /* bitmap for indexing intel_iommus */
261 static int g_num_of_iommus
;
263 static DEFINE_SPINLOCK(async_umap_flush_lock
);
264 static LIST_HEAD(unmaps_to_do
);
267 static long list_size
;
269 static void domain_remove_dev_info(struct dmar_domain
*domain
);
272 static int __initdata dmar_map_gfx
= 1;
273 static int dmar_forcedac
;
274 static int intel_iommu_strict
;
276 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
277 static DEFINE_SPINLOCK(device_domain_lock
);
278 static LIST_HEAD(device_domain_list
);
280 static int __init
intel_iommu_setup(char *str
)
285 if (!strncmp(str
, "off", 3)) {
287 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
288 } else if (!strncmp(str
, "igfx_off", 8)) {
291 "Intel-IOMMU: disable GFX device mapping\n");
292 } else if (!strncmp(str
, "forcedac", 8)) {
294 "Intel-IOMMU: Forcing DAC for PCI devices\n");
296 } else if (!strncmp(str
, "strict", 6)) {
298 "Intel-IOMMU: disable batched IOTLB flush\n");
299 intel_iommu_strict
= 1;
302 str
+= strcspn(str
, ",");
308 __setup("intel_iommu=", intel_iommu_setup
);
310 static struct kmem_cache
*iommu_domain_cache
;
311 static struct kmem_cache
*iommu_devinfo_cache
;
312 static struct kmem_cache
*iommu_iova_cache
;
314 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
319 /* trying to avoid low memory issues */
320 flags
= current
->flags
& PF_MEMALLOC
;
321 current
->flags
|= PF_MEMALLOC
;
322 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
323 current
->flags
&= (~PF_MEMALLOC
| flags
);
328 static inline void *alloc_pgtable_page(void)
333 /* trying to avoid low memory issues */
334 flags
= current
->flags
& PF_MEMALLOC
;
335 current
->flags
|= PF_MEMALLOC
;
336 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
337 current
->flags
&= (~PF_MEMALLOC
| flags
);
341 static inline void free_pgtable_page(void *vaddr
)
343 free_page((unsigned long)vaddr
);
346 static inline void *alloc_domain_mem(void)
348 return iommu_kmem_cache_alloc(iommu_domain_cache
);
351 static void free_domain_mem(void *vaddr
)
353 kmem_cache_free(iommu_domain_cache
, vaddr
);
356 static inline void * alloc_devinfo_mem(void)
358 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
361 static inline void free_devinfo_mem(void *vaddr
)
363 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
366 struct iova
*alloc_iova_mem(void)
368 return iommu_kmem_cache_alloc(iommu_iova_cache
);
371 void free_iova_mem(struct iova
*iova
)
373 kmem_cache_free(iommu_iova_cache
, iova
);
377 static inline int width_to_agaw(int width
);
379 /* calculate agaw for each iommu.
380 * "SAGAW" may be different across iommus, use a default agaw, and
381 * get a supported less agaw for iommus that don't support the default agaw.
383 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
388 sagaw
= cap_sagaw(iommu
->cap
);
389 for (agaw
= width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH
);
391 if (test_bit(agaw
, &sagaw
))
398 /* in native case, each domain is related to only one iommu */
399 static struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
403 BUG_ON(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
);
405 iommu_id
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
406 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
409 return g_iommus
[iommu_id
];
412 /* "Coherency" capability may be different across iommus */
413 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
417 domain
->iommu_coherency
= 1;
419 i
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
420 for (; i
< g_num_of_iommus
; ) {
421 if (!ecap_coherent(g_iommus
[i
]->ecap
)) {
422 domain
->iommu_coherency
= 0;
425 i
= find_next_bit(&domain
->iommu_bmp
, g_num_of_iommus
, i
+1);
429 static struct intel_iommu
*device_to_iommu(u8 bus
, u8 devfn
)
431 struct dmar_drhd_unit
*drhd
= NULL
;
434 for_each_drhd_unit(drhd
) {
438 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
439 if (drhd
->devices
[i
]->bus
->number
== bus
&&
440 drhd
->devices
[i
]->devfn
== devfn
)
443 if (drhd
->include_all
)
450 static void domain_flush_cache(struct dmar_domain
*domain
,
451 void *addr
, int size
)
453 if (!domain
->iommu_coherency
)
454 clflush_cache_range(addr
, size
);
457 /* Gets context entry for a given bus and devfn */
458 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
461 struct root_entry
*root
;
462 struct context_entry
*context
;
463 unsigned long phy_addr
;
466 spin_lock_irqsave(&iommu
->lock
, flags
);
467 root
= &iommu
->root_entry
[bus
];
468 context
= get_context_addr_from_root(root
);
470 context
= (struct context_entry
*)alloc_pgtable_page();
472 spin_unlock_irqrestore(&iommu
->lock
, flags
);
475 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
476 phy_addr
= virt_to_phys((void *)context
);
477 set_root_value(root
, phy_addr
);
478 set_root_present(root
);
479 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
481 spin_unlock_irqrestore(&iommu
->lock
, flags
);
482 return &context
[devfn
];
485 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
487 struct root_entry
*root
;
488 struct context_entry
*context
;
492 spin_lock_irqsave(&iommu
->lock
, flags
);
493 root
= &iommu
->root_entry
[bus
];
494 context
= get_context_addr_from_root(root
);
499 ret
= context_present(&context
[devfn
]);
501 spin_unlock_irqrestore(&iommu
->lock
, flags
);
505 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
507 struct root_entry
*root
;
508 struct context_entry
*context
;
511 spin_lock_irqsave(&iommu
->lock
, flags
);
512 root
= &iommu
->root_entry
[bus
];
513 context
= get_context_addr_from_root(root
);
515 context_clear_entry(&context
[devfn
]);
516 __iommu_flush_cache(iommu
, &context
[devfn
], \
519 spin_unlock_irqrestore(&iommu
->lock
, flags
);
522 static void free_context_table(struct intel_iommu
*iommu
)
524 struct root_entry
*root
;
527 struct context_entry
*context
;
529 spin_lock_irqsave(&iommu
->lock
, flags
);
530 if (!iommu
->root_entry
) {
533 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
534 root
= &iommu
->root_entry
[i
];
535 context
= get_context_addr_from_root(root
);
537 free_pgtable_page(context
);
539 free_pgtable_page(iommu
->root_entry
);
540 iommu
->root_entry
= NULL
;
542 spin_unlock_irqrestore(&iommu
->lock
, flags
);
545 /* page table handling */
546 #define LEVEL_STRIDE (9)
547 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
549 static inline int agaw_to_level(int agaw
)
554 static inline int agaw_to_width(int agaw
)
556 return 30 + agaw
* LEVEL_STRIDE
;
560 static inline int width_to_agaw(int width
)
562 return (width
- 30) / LEVEL_STRIDE
;
565 static inline unsigned int level_to_offset_bits(int level
)
567 return (12 + (level
- 1) * LEVEL_STRIDE
);
570 static inline int address_level_offset(u64 addr
, int level
)
572 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
575 static inline u64
level_mask(int level
)
577 return ((u64
)-1 << level_to_offset_bits(level
));
580 static inline u64
level_size(int level
)
582 return ((u64
)1 << level_to_offset_bits(level
));
585 static inline u64
align_to_level(u64 addr
, int level
)
587 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
590 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
592 int addr_width
= agaw_to_width(domain
->agaw
);
593 struct dma_pte
*parent
, *pte
= NULL
;
594 int level
= agaw_to_level(domain
->agaw
);
598 BUG_ON(!domain
->pgd
);
600 addr
&= (((u64
)1) << addr_width
) - 1;
601 parent
= domain
->pgd
;
603 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
607 offset
= address_level_offset(addr
, level
);
608 pte
= &parent
[offset
];
612 if (!dma_pte_present(pte
)) {
613 tmp_page
= alloc_pgtable_page();
616 spin_unlock_irqrestore(&domain
->mapping_lock
,
620 domain_flush_cache(domain
, tmp_page
, PAGE_SIZE
);
621 dma_set_pte_addr(pte
, virt_to_phys(tmp_page
));
623 * high level table always sets r/w, last level page
624 * table control read/write
626 dma_set_pte_readable(pte
);
627 dma_set_pte_writable(pte
);
628 domain_flush_cache(domain
, pte
, sizeof(*pte
));
630 parent
= phys_to_virt(dma_pte_addr(pte
));
634 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
638 /* return address's pte at specific level */
639 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
642 struct dma_pte
*parent
, *pte
= NULL
;
643 int total
= agaw_to_level(domain
->agaw
);
646 parent
= domain
->pgd
;
647 while (level
<= total
) {
648 offset
= address_level_offset(addr
, total
);
649 pte
= &parent
[offset
];
653 if (!dma_pte_present(pte
))
655 parent
= phys_to_virt(dma_pte_addr(pte
));
661 /* clear one page's page table */
662 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
664 struct dma_pte
*pte
= NULL
;
666 /* get last level pte */
667 pte
= dma_addr_level_pte(domain
, addr
, 1);
671 domain_flush_cache(domain
, pte
, sizeof(*pte
));
675 /* clear last level pte, a tlb flush should be followed */
676 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
678 int addr_width
= agaw_to_width(domain
->agaw
);
680 start
&= (((u64
)1) << addr_width
) - 1;
681 end
&= (((u64
)1) << addr_width
) - 1;
682 /* in case it's partial page */
683 start
= PAGE_ALIGN(start
);
686 /* we don't need lock here, nobody else touches the iova range */
687 while (start
< end
) {
688 dma_pte_clear_one(domain
, start
);
689 start
+= VTD_PAGE_SIZE
;
693 /* free page table pages. last level pte should already be cleared */
694 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
697 int addr_width
= agaw_to_width(domain
->agaw
);
699 int total
= agaw_to_level(domain
->agaw
);
703 start
&= (((u64
)1) << addr_width
) - 1;
704 end
&= (((u64
)1) << addr_width
) - 1;
706 /* we don't need lock here, nobody else touches the iova range */
708 while (level
<= total
) {
709 tmp
= align_to_level(start
, level
);
710 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
714 pte
= dma_addr_level_pte(domain
, tmp
, level
);
717 phys_to_virt(dma_pte_addr(pte
)));
719 domain_flush_cache(domain
, pte
, sizeof(*pte
));
721 tmp
+= level_size(level
);
726 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
727 free_pgtable_page(domain
->pgd
);
733 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
735 struct root_entry
*root
;
738 root
= (struct root_entry
*)alloc_pgtable_page();
742 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
744 spin_lock_irqsave(&iommu
->lock
, flags
);
745 iommu
->root_entry
= root
;
746 spin_unlock_irqrestore(&iommu
->lock
, flags
);
751 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
757 addr
= iommu
->root_entry
;
759 spin_lock_irqsave(&iommu
->register_lock
, flag
);
760 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
762 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
763 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
765 /* Make sure hardware complete it */
766 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
767 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
769 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
772 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
777 if (!cap_rwbf(iommu
->cap
))
779 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
781 spin_lock_irqsave(&iommu
->register_lock
, flag
);
782 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
784 /* Make sure hardware complete it */
785 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
786 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
788 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
791 /* return value determine if we need a write buffer flush */
792 static int __iommu_flush_context(struct intel_iommu
*iommu
,
793 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
794 int non_present_entry_flush
)
800 * In the non-present entry flush case, if hardware doesn't cache
801 * non-present entry we do nothing and if hardware cache non-present
802 * entry, we flush entries of domain 0 (the domain id is used to cache
803 * any non-present entries)
805 if (non_present_entry_flush
) {
806 if (!cap_caching_mode(iommu
->cap
))
813 case DMA_CCMD_GLOBAL_INVL
:
814 val
= DMA_CCMD_GLOBAL_INVL
;
816 case DMA_CCMD_DOMAIN_INVL
:
817 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
819 case DMA_CCMD_DEVICE_INVL
:
820 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
821 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
828 spin_lock_irqsave(&iommu
->register_lock
, flag
);
829 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
831 /* Make sure hardware complete it */
832 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
833 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
835 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
837 /* flush context entry will implicitly flush write buffer */
841 /* return value determine if we need a write buffer flush */
842 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
843 u64 addr
, unsigned int size_order
, u64 type
,
844 int non_present_entry_flush
)
846 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
847 u64 val
= 0, val_iva
= 0;
851 * In the non-present entry flush case, if hardware doesn't cache
852 * non-present entry we do nothing and if hardware cache non-present
853 * entry, we flush entries of domain 0 (the domain id is used to cache
854 * any non-present entries)
856 if (non_present_entry_flush
) {
857 if (!cap_caching_mode(iommu
->cap
))
864 case DMA_TLB_GLOBAL_FLUSH
:
865 /* global flush doesn't need set IVA_REG */
866 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
868 case DMA_TLB_DSI_FLUSH
:
869 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
871 case DMA_TLB_PSI_FLUSH
:
872 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
873 /* Note: always flush non-leaf currently */
874 val_iva
= size_order
| addr
;
879 /* Note: set drain read/write */
882 * This is probably to be super secure.. Looks like we can
883 * ignore it without any impact.
885 if (cap_read_drain(iommu
->cap
))
886 val
|= DMA_TLB_READ_DRAIN
;
888 if (cap_write_drain(iommu
->cap
))
889 val
|= DMA_TLB_WRITE_DRAIN
;
891 spin_lock_irqsave(&iommu
->register_lock
, flag
);
892 /* Note: Only uses first TLB reg currently */
894 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
895 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
897 /* Make sure hardware complete it */
898 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
899 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
901 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
903 /* check IOTLB invalidation granularity */
904 if (DMA_TLB_IAIG(val
) == 0)
905 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
906 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
907 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
908 (unsigned long long)DMA_TLB_IIRG(type
),
909 (unsigned long long)DMA_TLB_IAIG(val
));
910 /* flush iotlb entry will implicitly flush write buffer */
914 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
915 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
919 BUG_ON(addr
& (~VTD_PAGE_MASK
));
922 /* Fallback to domain selective flush if no PSI support */
923 if (!cap_pgsel_inv(iommu
->cap
))
924 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
926 non_present_entry_flush
);
929 * PSI requires page size to be 2 ^ x, and the base address is naturally
930 * aligned to the size
932 mask
= ilog2(__roundup_pow_of_two(pages
));
933 /* Fallback to domain selective flush if size is too big */
934 if (mask
> cap_max_amask_val(iommu
->cap
))
935 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
936 DMA_TLB_DSI_FLUSH
, non_present_entry_flush
);
938 return iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
940 non_present_entry_flush
);
943 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
948 spin_lock_irqsave(&iommu
->register_lock
, flags
);
949 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
950 pmen
&= ~DMA_PMEN_EPM
;
951 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
953 /* wait for the protected region status bit to clear */
954 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
955 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
957 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
960 static int iommu_enable_translation(struct intel_iommu
*iommu
)
965 spin_lock_irqsave(&iommu
->register_lock
, flags
);
966 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
968 /* Make sure hardware complete it */
969 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
970 readl
, (sts
& DMA_GSTS_TES
), sts
);
972 iommu
->gcmd
|= DMA_GCMD_TE
;
973 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
977 static int iommu_disable_translation(struct intel_iommu
*iommu
)
982 spin_lock_irqsave(&iommu
->register_lock
, flag
);
983 iommu
->gcmd
&= ~DMA_GCMD_TE
;
984 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
986 /* Make sure hardware complete it */
987 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
988 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
990 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
994 /* iommu interrupt handling. Most stuff are MSI-like. */
996 static const char *fault_reason_strings
[] =
999 "Present bit in root entry is clear",
1000 "Present bit in context entry is clear",
1001 "Invalid context entry",
1002 "Access beyond MGAW",
1003 "PTE Write access is not set",
1004 "PTE Read access is not set",
1005 "Next page table ptr is invalid",
1006 "Root table address invalid",
1007 "Context table ptr is invalid",
1008 "non-zero reserved fields in RTP",
1009 "non-zero reserved fields in CTP",
1010 "non-zero reserved fields in PTE",
1012 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1014 const char *dmar_get_fault_reason(u8 fault_reason
)
1016 if (fault_reason
> MAX_FAULT_REASON_IDX
)
1019 return fault_reason_strings
[fault_reason
];
1022 void dmar_msi_unmask(unsigned int irq
)
1024 struct intel_iommu
*iommu
= get_irq_data(irq
);
1028 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1029 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
1030 /* Read a reg to force flush the post write */
1031 readl(iommu
->reg
+ DMAR_FECTL_REG
);
1032 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1035 void dmar_msi_mask(unsigned int irq
)
1038 struct intel_iommu
*iommu
= get_irq_data(irq
);
1041 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1042 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
1043 /* Read a reg to force flush the post write */
1044 readl(iommu
->reg
+ DMAR_FECTL_REG
);
1045 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1048 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
1050 struct intel_iommu
*iommu
= get_irq_data(irq
);
1053 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1054 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
1055 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
1056 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
1057 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1060 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
1062 struct intel_iommu
*iommu
= get_irq_data(irq
);
1065 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1066 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
1067 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
1068 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
1069 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1072 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
1073 u8 fault_reason
, u16 source_id
, unsigned long long addr
)
1077 reason
= dmar_get_fault_reason(fault_reason
);
1080 "DMAR:[%s] Request device [%02x:%02x.%d] "
1081 "fault addr %llx \n"
1082 "DMAR:[fault reason %02d] %s\n",
1083 (type
? "DMA Read" : "DMA Write"),
1084 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
1085 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
1089 #define PRIMARY_FAULT_REG_LEN (16)
1090 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
1092 struct intel_iommu
*iommu
= dev_id
;
1093 int reg
, fault_index
;
1097 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1098 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
1100 /* TBD: ignore advanced fault log currently */
1101 if (!(fault_status
& DMA_FSTS_PPF
))
1102 goto clear_overflow
;
1104 fault_index
= dma_fsts_fault_record_index(fault_status
);
1105 reg
= cap_fault_reg_offset(iommu
->cap
);
1113 /* highest 32 bits */
1114 data
= readl(iommu
->reg
+ reg
+
1115 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
1116 if (!(data
& DMA_FRCD_F
))
1119 fault_reason
= dma_frcd_fault_reason(data
);
1120 type
= dma_frcd_type(data
);
1122 data
= readl(iommu
->reg
+ reg
+
1123 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
1124 source_id
= dma_frcd_source_id(data
);
1126 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
1127 fault_index
* PRIMARY_FAULT_REG_LEN
);
1128 guest_addr
= dma_frcd_page_addr(guest_addr
);
1129 /* clear the fault */
1130 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
1131 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
1133 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1135 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
1136 source_id
, guest_addr
);
1139 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
1141 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1144 /* clear primary fault overflow */
1145 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
1146 if (fault_status
& DMA_FSTS_PFO
)
1147 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
1149 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1153 int dmar_set_interrupt(struct intel_iommu
*iommu
)
1159 printk(KERN_ERR
"IOMMU: no free vectors\n");
1163 set_irq_data(irq
, iommu
);
1166 ret
= arch_setup_dmar_msi(irq
);
1168 set_irq_data(irq
, NULL
);
1174 /* Force fault register is cleared */
1175 iommu_page_fault(irq
, iommu
);
1177 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
1179 printk(KERN_ERR
"IOMMU: can't request irq\n");
1183 static int iommu_init_domains(struct intel_iommu
*iommu
)
1185 unsigned long ndomains
;
1186 unsigned long nlongs
;
1188 ndomains
= cap_ndoms(iommu
->cap
);
1189 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
1190 nlongs
= BITS_TO_LONGS(ndomains
);
1192 /* TBD: there might be 64K domains,
1193 * consider other allocation for future chip
1195 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1196 if (!iommu
->domain_ids
) {
1197 printk(KERN_ERR
"Allocating domain id array failed\n");
1200 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1202 if (!iommu
->domains
) {
1203 printk(KERN_ERR
"Allocating domain array failed\n");
1204 kfree(iommu
->domain_ids
);
1208 spin_lock_init(&iommu
->lock
);
1211 * if Caching mode is set, then invalid translations are tagged
1212 * with domainid 0. Hence we need to pre-allocate it.
1214 if (cap_caching_mode(iommu
->cap
))
1215 set_bit(0, iommu
->domain_ids
);
1220 static void domain_exit(struct dmar_domain
*domain
);
1221 static void vm_domain_exit(struct dmar_domain
*domain
);
1223 void free_dmar_iommu(struct intel_iommu
*iommu
)
1225 struct dmar_domain
*domain
;
1227 unsigned long flags
;
1229 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1230 for (; i
< cap_ndoms(iommu
->cap
); ) {
1231 domain
= iommu
->domains
[i
];
1232 clear_bit(i
, iommu
->domain_ids
);
1234 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1235 if (--domain
->iommu_count
== 0) {
1236 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
)
1237 vm_domain_exit(domain
);
1239 domain_exit(domain
);
1241 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1243 i
= find_next_bit(iommu
->domain_ids
,
1244 cap_ndoms(iommu
->cap
), i
+1);
1247 if (iommu
->gcmd
& DMA_GCMD_TE
)
1248 iommu_disable_translation(iommu
);
1251 set_irq_data(iommu
->irq
, NULL
);
1252 /* This will mask the irq */
1253 free_irq(iommu
->irq
, iommu
);
1254 destroy_irq(iommu
->irq
);
1257 kfree(iommu
->domains
);
1258 kfree(iommu
->domain_ids
);
1260 g_iommus
[iommu
->seq_id
] = NULL
;
1262 /* if all iommus are freed, free g_iommus */
1263 for (i
= 0; i
< g_num_of_iommus
; i
++) {
1268 if (i
== g_num_of_iommus
)
1271 /* free context mapping */
1272 free_context_table(iommu
);
1275 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1278 unsigned long ndomains
;
1279 struct dmar_domain
*domain
;
1280 unsigned long flags
;
1282 domain
= alloc_domain_mem();
1286 ndomains
= cap_ndoms(iommu
->cap
);
1288 spin_lock_irqsave(&iommu
->lock
, flags
);
1289 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1290 if (num
>= ndomains
) {
1291 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1292 free_domain_mem(domain
);
1293 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1297 set_bit(num
, iommu
->domain_ids
);
1299 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
1300 set_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1302 iommu
->domains
[num
] = domain
;
1303 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1308 static void iommu_free_domain(struct dmar_domain
*domain
)
1310 unsigned long flags
;
1311 struct intel_iommu
*iommu
;
1313 iommu
= domain_get_iommu(domain
);
1315 spin_lock_irqsave(&iommu
->lock
, flags
);
1316 clear_bit(domain
->id
, iommu
->domain_ids
);
1317 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1320 static struct iova_domain reserved_iova_list
;
1321 static struct lock_class_key reserved_alloc_key
;
1322 static struct lock_class_key reserved_rbtree_key
;
1324 static void dmar_init_reserved_ranges(void)
1326 struct pci_dev
*pdev
= NULL
;
1331 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1333 lockdep_set_class(&reserved_iova_list
.iova_alloc_lock
,
1334 &reserved_alloc_key
);
1335 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1336 &reserved_rbtree_key
);
1338 /* IOAPIC ranges shouldn't be accessed by DMA */
1339 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1340 IOVA_PFN(IOAPIC_RANGE_END
));
1342 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1344 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1345 for_each_pci_dev(pdev
) {
1348 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1349 r
= &pdev
->resource
[i
];
1350 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1354 size
= r
->end
- addr
;
1355 size
= PAGE_ALIGN(size
);
1356 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1357 IOVA_PFN(size
+ addr
) - 1);
1359 printk(KERN_ERR
"Reserve iova failed\n");
1365 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1367 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1370 static inline int guestwidth_to_adjustwidth(int gaw
)
1373 int r
= (gaw
- 12) % 9;
1384 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1386 struct intel_iommu
*iommu
;
1387 int adjust_width
, agaw
;
1388 unsigned long sagaw
;
1390 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1391 spin_lock_init(&domain
->mapping_lock
);
1392 spin_lock_init(&domain
->iommu_lock
);
1394 domain_reserve_special_ranges(domain
);
1396 /* calculate AGAW */
1397 iommu
= domain_get_iommu(domain
);
1398 if (guest_width
> cap_mgaw(iommu
->cap
))
1399 guest_width
= cap_mgaw(iommu
->cap
);
1400 domain
->gaw
= guest_width
;
1401 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1402 agaw
= width_to_agaw(adjust_width
);
1403 sagaw
= cap_sagaw(iommu
->cap
);
1404 if (!test_bit(agaw
, &sagaw
)) {
1405 /* hardware doesn't support it, choose a bigger one */
1406 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1407 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1411 domain
->agaw
= agaw
;
1412 INIT_LIST_HEAD(&domain
->devices
);
1414 if (ecap_coherent(iommu
->ecap
))
1415 domain
->iommu_coherency
= 1;
1417 domain
->iommu_coherency
= 0;
1419 domain
->iommu_count
= 1;
1421 /* always allocate the top pgd */
1422 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1425 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1429 static void domain_exit(struct dmar_domain
*domain
)
1433 /* Domain 0 is reserved, so dont process it */
1437 domain_remove_dev_info(domain
);
1439 put_iova_domain(&domain
->iovad
);
1440 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1441 end
= end
& (~PAGE_MASK
);
1444 dma_pte_clear_range(domain
, 0, end
);
1446 /* free page tables */
1447 dma_pte_free_pagetable(domain
, 0, end
);
1449 iommu_free_domain(domain
);
1450 free_domain_mem(domain
);
1453 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1456 struct context_entry
*context
;
1457 unsigned long flags
;
1458 struct intel_iommu
*iommu
;
1459 struct dma_pte
*pgd
;
1461 unsigned long ndomains
;
1465 pr_debug("Set context mapping for %02x:%02x.%d\n",
1466 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1467 BUG_ON(!domain
->pgd
);
1469 iommu
= device_to_iommu(bus
, devfn
);
1473 context
= device_to_context_entry(iommu
, bus
, devfn
);
1476 spin_lock_irqsave(&iommu
->lock
, flags
);
1477 if (context_present(context
)) {
1478 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1485 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) {
1488 /* find an available domain id for this device in iommu */
1489 ndomains
= cap_ndoms(iommu
->cap
);
1490 num
= find_first_bit(iommu
->domain_ids
, ndomains
);
1491 for (; num
< ndomains
; ) {
1492 if (iommu
->domains
[num
] == domain
) {
1497 num
= find_next_bit(iommu
->domain_ids
,
1498 cap_ndoms(iommu
->cap
), num
+1);
1502 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1503 if (num
>= ndomains
) {
1504 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1505 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1509 set_bit(num
, iommu
->domain_ids
);
1510 iommu
->domains
[num
] = domain
;
1514 /* Skip top levels of page tables for
1515 * iommu which has less agaw than default.
1517 for (agaw
= domain
->agaw
; agaw
!= iommu
->agaw
; agaw
--) {
1518 pgd
= phys_to_virt(dma_pte_addr(pgd
));
1519 if (!dma_pte_present(pgd
)) {
1520 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1526 context_set_domain_id(context
, id
);
1527 context_set_address_width(context
, iommu
->agaw
);
1528 context_set_address_root(context
, virt_to_phys(pgd
));
1529 context_set_translation_type(context
, CONTEXT_TT_MULTI_LEVEL
);
1530 context_set_fault_enable(context
);
1531 context_set_present(context
);
1532 domain_flush_cache(domain
, context
, sizeof(*context
));
1534 /* it's a non-present to present mapping */
1535 if (iommu
->flush
.flush_context(iommu
, domain
->id
,
1536 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
,
1537 DMA_CCMD_DEVICE_INVL
, 1))
1538 iommu_flush_write_buffer(iommu
);
1540 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_DSI_FLUSH
, 0);
1542 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1544 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1545 if (!test_and_set_bit(iommu
->seq_id
, &domain
->iommu_bmp
)) {
1546 domain
->iommu_count
++;
1547 domain_update_iommu_coherency(domain
);
1549 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1554 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1557 struct pci_dev
*tmp
, *parent
;
1559 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1564 /* dependent device mapping */
1565 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1568 /* Secondary interface's bus number and devfn 0 */
1569 parent
= pdev
->bus
->self
;
1570 while (parent
!= tmp
) {
1571 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1575 parent
= parent
->bus
->self
;
1577 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1578 return domain_context_mapping_one(domain
,
1579 tmp
->subordinate
->number
, 0);
1580 else /* this is a legacy PCI bridge */
1581 return domain_context_mapping_one(domain
,
1582 tmp
->bus
->number
, tmp
->devfn
);
1585 static int domain_context_mapped(struct pci_dev
*pdev
)
1588 struct pci_dev
*tmp
, *parent
;
1589 struct intel_iommu
*iommu
;
1591 iommu
= device_to_iommu(pdev
->bus
->number
, pdev
->devfn
);
1595 ret
= device_context_mapped(iommu
,
1596 pdev
->bus
->number
, pdev
->devfn
);
1599 /* dependent device mapping */
1600 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1603 /* Secondary interface's bus number and devfn 0 */
1604 parent
= pdev
->bus
->self
;
1605 while (parent
!= tmp
) {
1606 ret
= device_context_mapped(iommu
, parent
->bus
->number
,
1610 parent
= parent
->bus
->self
;
1613 return device_context_mapped(iommu
,
1614 tmp
->subordinate
->number
, 0);
1616 return device_context_mapped(iommu
,
1617 tmp
->bus
->number
, tmp
->devfn
);
1621 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1622 u64 hpa
, size_t size
, int prot
)
1624 u64 start_pfn
, end_pfn
;
1625 struct dma_pte
*pte
;
1627 int addr_width
= agaw_to_width(domain
->agaw
);
1629 hpa
&= (((u64
)1) << addr_width
) - 1;
1631 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1634 start_pfn
= ((u64
)hpa
) >> VTD_PAGE_SHIFT
;
1635 end_pfn
= (VTD_PAGE_ALIGN(((u64
)hpa
) + size
)) >> VTD_PAGE_SHIFT
;
1637 while (start_pfn
< end_pfn
) {
1638 pte
= addr_to_dma_pte(domain
, iova
+ VTD_PAGE_SIZE
* index
);
1641 /* We don't need lock here, nobody else
1642 * touches the iova range
1644 BUG_ON(dma_pte_addr(pte
));
1645 dma_set_pte_addr(pte
, start_pfn
<< VTD_PAGE_SHIFT
);
1646 dma_set_pte_prot(pte
, prot
);
1647 domain_flush_cache(domain
, pte
, sizeof(*pte
));
1654 static void iommu_detach_dev(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
1659 clear_context_table(iommu
, bus
, devfn
);
1660 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
1661 DMA_CCMD_GLOBAL_INVL
, 0);
1662 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
1663 DMA_TLB_GLOBAL_FLUSH
, 0);
1666 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1668 struct device_domain_info
*info
;
1669 unsigned long flags
;
1670 struct intel_iommu
*iommu
;
1672 spin_lock_irqsave(&device_domain_lock
, flags
);
1673 while (!list_empty(&domain
->devices
)) {
1674 info
= list_entry(domain
->devices
.next
,
1675 struct device_domain_info
, link
);
1676 list_del(&info
->link
);
1677 list_del(&info
->global
);
1679 info
->dev
->dev
.archdata
.iommu
= NULL
;
1680 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1682 iommu
= device_to_iommu(info
->bus
, info
->devfn
);
1683 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
1684 free_devinfo_mem(info
);
1686 spin_lock_irqsave(&device_domain_lock
, flags
);
1688 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1693 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1695 static struct dmar_domain
*
1696 find_domain(struct pci_dev
*pdev
)
1698 struct device_domain_info
*info
;
1700 /* No lock here, assumes no domain exit in normal case */
1701 info
= pdev
->dev
.archdata
.iommu
;
1703 return info
->domain
;
1707 /* domain is initialized */
1708 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1710 struct dmar_domain
*domain
, *found
= NULL
;
1711 struct intel_iommu
*iommu
;
1712 struct dmar_drhd_unit
*drhd
;
1713 struct device_domain_info
*info
, *tmp
;
1714 struct pci_dev
*dev_tmp
;
1715 unsigned long flags
;
1716 int bus
= 0, devfn
= 0;
1718 domain
= find_domain(pdev
);
1722 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1724 if (dev_tmp
->is_pcie
) {
1725 bus
= dev_tmp
->subordinate
->number
;
1728 bus
= dev_tmp
->bus
->number
;
1729 devfn
= dev_tmp
->devfn
;
1731 spin_lock_irqsave(&device_domain_lock
, flags
);
1732 list_for_each_entry(info
, &device_domain_list
, global
) {
1733 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1734 found
= info
->domain
;
1738 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1739 /* pcie-pci bridge already has a domain, uses it */
1746 /* Allocate new domain for the device */
1747 drhd
= dmar_find_matched_drhd_unit(pdev
);
1749 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1753 iommu
= drhd
->iommu
;
1755 domain
= iommu_alloc_domain(iommu
);
1759 if (domain_init(domain
, gaw
)) {
1760 domain_exit(domain
);
1764 /* register pcie-to-pci device */
1766 info
= alloc_devinfo_mem();
1768 domain_exit(domain
);
1772 info
->devfn
= devfn
;
1774 info
->domain
= domain
;
1775 /* This domain is shared by devices under p2p bridge */
1776 domain
->flags
|= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES
;
1778 /* pcie-to-pci bridge already has a domain, uses it */
1780 spin_lock_irqsave(&device_domain_lock
, flags
);
1781 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1782 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1783 found
= tmp
->domain
;
1788 free_devinfo_mem(info
);
1789 domain_exit(domain
);
1792 list_add(&info
->link
, &domain
->devices
);
1793 list_add(&info
->global
, &device_domain_list
);
1795 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1799 info
= alloc_devinfo_mem();
1802 info
->bus
= pdev
->bus
->number
;
1803 info
->devfn
= pdev
->devfn
;
1805 info
->domain
= domain
;
1806 spin_lock_irqsave(&device_domain_lock
, flags
);
1807 /* somebody is fast */
1808 found
= find_domain(pdev
);
1809 if (found
!= NULL
) {
1810 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1811 if (found
!= domain
) {
1812 domain_exit(domain
);
1815 free_devinfo_mem(info
);
1818 list_add(&info
->link
, &domain
->devices
);
1819 list_add(&info
->global
, &device_domain_list
);
1820 pdev
->dev
.archdata
.iommu
= info
;
1821 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1824 /* recheck it here, maybe others set it */
1825 return find_domain(pdev
);
1828 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
1829 unsigned long long start
,
1830 unsigned long long end
)
1832 struct dmar_domain
*domain
;
1834 unsigned long long base
;
1838 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1839 pci_name(pdev
), start
, end
);
1840 /* page table init */
1841 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1845 /* The address might not be aligned */
1846 base
= start
& PAGE_MASK
;
1848 size
= PAGE_ALIGN(size
);
1849 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1850 IOVA_PFN(base
+ size
) - 1)) {
1851 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1856 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1857 size
, base
, pci_name(pdev
));
1859 * RMRR range might have overlap with physical memory range,
1862 dma_pte_clear_range(domain
, base
, base
+ size
);
1864 ret
= domain_page_mapping(domain
, base
, base
, size
,
1865 DMA_PTE_READ
|DMA_PTE_WRITE
);
1869 /* context entry init */
1870 ret
= domain_context_mapping(domain
, pdev
);
1874 domain_exit(domain
);
1879 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1880 struct pci_dev
*pdev
)
1882 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1884 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1885 rmrr
->end_address
+ 1);
1888 #ifdef CONFIG_DMAR_GFX_WA
1889 struct iommu_prepare_data
{
1890 struct pci_dev
*pdev
;
1894 static int __init
iommu_prepare_work_fn(unsigned long start_pfn
,
1895 unsigned long end_pfn
, void *datax
)
1897 struct iommu_prepare_data
*data
;
1899 data
= (struct iommu_prepare_data
*)datax
;
1901 data
->ret
= iommu_prepare_identity_map(data
->pdev
,
1902 start_pfn
<<PAGE_SHIFT
, end_pfn
<<PAGE_SHIFT
);
1907 static int __init
iommu_prepare_with_active_regions(struct pci_dev
*pdev
)
1910 struct iommu_prepare_data data
;
1915 for_each_online_node(nid
) {
1916 work_with_active_regions(nid
, iommu_prepare_work_fn
, &data
);
1923 static void __init
iommu_prepare_gfx_mapping(void)
1925 struct pci_dev
*pdev
= NULL
;
1928 for_each_pci_dev(pdev
) {
1929 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1930 !IS_GFX_DEVICE(pdev
))
1932 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1934 ret
= iommu_prepare_with_active_regions(pdev
);
1936 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1939 #else /* !CONFIG_DMAR_GFX_WA */
1940 static inline void iommu_prepare_gfx_mapping(void)
1946 #ifdef CONFIG_DMAR_FLOPPY_WA
1947 static inline void iommu_prepare_isa(void)
1949 struct pci_dev
*pdev
;
1952 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1956 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1957 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1960 printk("IOMMU: Failed to create 0-64M identity map, "
1961 "floppy might not work\n");
1965 static inline void iommu_prepare_isa(void)
1969 #endif /* !CONFIG_DMAR_FLPY_WA */
1971 static int __init
init_dmars(void)
1973 struct dmar_drhd_unit
*drhd
;
1974 struct dmar_rmrr_unit
*rmrr
;
1975 struct pci_dev
*pdev
;
1976 struct intel_iommu
*iommu
;
1977 int i
, ret
, unit
= 0;
1982 * initialize and program root entry to not present
1985 for_each_drhd_unit(drhd
) {
1988 * lock not needed as this is only incremented in the single
1989 * threaded kernel __init code path all other access are read
1994 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
1997 printk(KERN_ERR
"Allocating global iommu array failed\n");
2002 deferred_flush
= kzalloc(g_num_of_iommus
*
2003 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
2004 if (!deferred_flush
) {
2010 for_each_drhd_unit(drhd
) {
2014 iommu
= drhd
->iommu
;
2015 g_iommus
[iommu
->seq_id
] = iommu
;
2017 ret
= iommu_init_domains(iommu
);
2023 * we could share the same root & context tables
2024 * amoung all IOMMU's. Need to Split it later.
2026 ret
= iommu_alloc_root_entry(iommu
);
2028 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
2033 for_each_drhd_unit(drhd
) {
2037 iommu
= drhd
->iommu
;
2038 if (dmar_enable_qi(iommu
)) {
2040 * Queued Invalidate not enabled, use Register Based
2043 iommu
->flush
.flush_context
= __iommu_flush_context
;
2044 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
2045 printk(KERN_INFO
"IOMMU 0x%Lx: using Register based "
2047 (unsigned long long)drhd
->reg_base_addr
);
2049 iommu
->flush
.flush_context
= qi_flush_context
;
2050 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2051 printk(KERN_INFO
"IOMMU 0x%Lx: using Queued "
2053 (unsigned long long)drhd
->reg_base_addr
);
2059 * for each dev attached to rmrr
2061 * locate drhd for dev, alloc domain for dev
2062 * allocate free domain
2063 * allocate page table entries for rmrr
2064 * if context not allocated for bus
2065 * allocate and init context
2066 * set present in root table for this bus
2067 * init context with domain, translation etc
2071 for_each_rmrr_units(rmrr
) {
2072 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
2073 pdev
= rmrr
->devices
[i
];
2074 /* some BIOS lists non-exist devices in DMAR table */
2077 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
2080 "IOMMU: mapping reserved region failed\n");
2084 iommu_prepare_gfx_mapping();
2086 iommu_prepare_isa();
2091 * global invalidate context cache
2092 * global invalidate iotlb
2093 * enable translation
2095 for_each_drhd_unit(drhd
) {
2098 iommu
= drhd
->iommu
;
2099 sprintf (iommu
->name
, "dmar%d", unit
++);
2101 iommu_flush_write_buffer(iommu
);
2103 ret
= dmar_set_interrupt(iommu
);
2107 iommu_set_root_entry(iommu
);
2109 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
2111 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
2113 iommu_disable_protect_mem_regions(iommu
);
2115 ret
= iommu_enable_translation(iommu
);
2122 for_each_drhd_unit(drhd
) {
2125 iommu
= drhd
->iommu
;
2132 static inline u64
aligned_size(u64 host_addr
, size_t size
)
2135 addr
= (host_addr
& (~PAGE_MASK
)) + size
;
2136 return PAGE_ALIGN(addr
);
2140 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
2144 /* Make sure it's in range */
2145 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
2146 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
2149 piova
= alloc_iova(&domain
->iovad
,
2150 size
>> PAGE_SHIFT
, IOVA_PFN(end
), 1);
2154 static struct iova
*
2155 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
2156 size_t size
, u64 dma_mask
)
2158 struct pci_dev
*pdev
= to_pci_dev(dev
);
2159 struct iova
*iova
= NULL
;
2161 if (dma_mask
<= DMA_32BIT_MASK
|| dmar_forcedac
)
2162 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
2165 * First try to allocate an io virtual address in
2166 * DMA_32BIT_MASK and if that fails then try allocating
2169 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
2171 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
2175 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
2182 static struct dmar_domain
*
2183 get_valid_domain_for_dev(struct pci_dev
*pdev
)
2185 struct dmar_domain
*domain
;
2188 domain
= get_domain_for_dev(pdev
,
2189 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2192 "Allocating domain for %s failed", pci_name(pdev
));
2196 /* make sure context mapping is ok */
2197 if (unlikely(!domain_context_mapped(pdev
))) {
2198 ret
= domain_context_mapping(domain
, pdev
);
2201 "Domain context map for %s failed",
2210 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2211 size_t size
, int dir
, u64 dma_mask
)
2213 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2214 struct dmar_domain
*domain
;
2215 phys_addr_t start_paddr
;
2219 struct intel_iommu
*iommu
;
2221 BUG_ON(dir
== DMA_NONE
);
2222 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2225 domain
= get_valid_domain_for_dev(pdev
);
2229 iommu
= domain_get_iommu(domain
);
2230 size
= aligned_size((u64
)paddr
, size
);
2232 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2236 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
2239 * Check if DMAR supports zero-length reads on write only
2242 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2243 !cap_zlr(iommu
->cap
))
2244 prot
|= DMA_PTE_READ
;
2245 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2246 prot
|= DMA_PTE_WRITE
;
2248 * paddr - (paddr + size) might be partial page, we should map the whole
2249 * page. Note: if two part of one page are separately mapped, we
2250 * might have two guest_addr mapping to the same host paddr, but this
2251 * is not a big problem
2253 ret
= domain_page_mapping(domain
, start_paddr
,
2254 ((u64
)paddr
) & PAGE_MASK
, size
, prot
);
2258 /* it's a non-present to present mapping */
2259 ret
= iommu_flush_iotlb_psi(iommu
, domain
->id
,
2260 start_paddr
, size
>> VTD_PAGE_SHIFT
, 1);
2262 iommu_flush_write_buffer(iommu
);
2264 return start_paddr
+ ((u64
)paddr
& (~PAGE_MASK
));
2268 __free_iova(&domain
->iovad
, iova
);
2269 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
2270 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
2274 dma_addr_t
intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2275 size_t size
, int dir
)
2277 return __intel_map_single(hwdev
, paddr
, size
, dir
,
2278 to_pci_dev(hwdev
)->dma_mask
);
2281 static void flush_unmaps(void)
2287 /* just flush them all */
2288 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2289 struct intel_iommu
*iommu
= g_iommus
[i
];
2293 if (deferred_flush
[i
].next
) {
2294 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2295 DMA_TLB_GLOBAL_FLUSH
, 0);
2296 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2297 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
,
2298 deferred_flush
[i
].iova
[j
]);
2300 deferred_flush
[i
].next
= 0;
2307 static void flush_unmaps_timeout(unsigned long data
)
2309 unsigned long flags
;
2311 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2313 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2316 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2318 unsigned long flags
;
2320 struct intel_iommu
*iommu
;
2322 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2323 if (list_size
== HIGH_WATER_MARK
)
2326 iommu
= domain_get_iommu(dom
);
2327 iommu_id
= iommu
->seq_id
;
2329 next
= deferred_flush
[iommu_id
].next
;
2330 deferred_flush
[iommu_id
].domain
[next
] = dom
;
2331 deferred_flush
[iommu_id
].iova
[next
] = iova
;
2332 deferred_flush
[iommu_id
].next
++;
2335 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
2339 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2342 void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
2345 struct pci_dev
*pdev
= to_pci_dev(dev
);
2346 struct dmar_domain
*domain
;
2347 unsigned long start_addr
;
2349 struct intel_iommu
*iommu
;
2351 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2353 domain
= find_domain(pdev
);
2356 iommu
= domain_get_iommu(domain
);
2358 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
2362 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2363 size
= aligned_size((u64
)dev_addr
, size
);
2365 pr_debug("Device %s unmapping: %lx@%llx\n",
2366 pci_name(pdev
), size
, (unsigned long long)start_addr
);
2368 /* clear the whole page */
2369 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2370 /* free page tables */
2371 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2372 if (intel_iommu_strict
) {
2373 if (iommu_flush_iotlb_psi(iommu
,
2374 domain
->id
, start_addr
, size
>> VTD_PAGE_SHIFT
, 0))
2375 iommu_flush_write_buffer(iommu
);
2377 __free_iova(&domain
->iovad
, iova
);
2379 add_unmap(domain
, iova
);
2381 * queue up the release of the unmap to save the 1/6th of the
2382 * cpu used up by the iotlb flush operation...
2387 void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
2388 dma_addr_t
*dma_handle
, gfp_t flags
)
2393 size
= PAGE_ALIGN(size
);
2394 order
= get_order(size
);
2395 flags
&= ~(GFP_DMA
| GFP_DMA32
);
2397 vaddr
= (void *)__get_free_pages(flags
, order
);
2400 memset(vaddr
, 0, size
);
2402 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
2404 hwdev
->coherent_dma_mask
);
2407 free_pages((unsigned long)vaddr
, order
);
2411 void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
2412 dma_addr_t dma_handle
)
2416 size
= PAGE_ALIGN(size
);
2417 order
= get_order(size
);
2419 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
2420 free_pages((unsigned long)vaddr
, order
);
2423 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2425 void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2426 int nelems
, int dir
)
2429 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2430 struct dmar_domain
*domain
;
2431 unsigned long start_addr
;
2435 struct scatterlist
*sg
;
2436 struct intel_iommu
*iommu
;
2438 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2441 domain
= find_domain(pdev
);
2444 iommu
= domain_get_iommu(domain
);
2446 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2449 for_each_sg(sglist
, sg
, nelems
, i
) {
2450 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2451 size
+= aligned_size((u64
)addr
, sg
->length
);
2454 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2456 /* clear the whole page */
2457 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2458 /* free page tables */
2459 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2461 if (iommu_flush_iotlb_psi(iommu
, domain
->id
, start_addr
,
2462 size
>> VTD_PAGE_SHIFT
, 0))
2463 iommu_flush_write_buffer(iommu
);
2466 __free_iova(&domain
->iovad
, iova
);
2469 static int intel_nontranslate_map_sg(struct device
*hddev
,
2470 struct scatterlist
*sglist
, int nelems
, int dir
)
2473 struct scatterlist
*sg
;
2475 for_each_sg(sglist
, sg
, nelems
, i
) {
2476 BUG_ON(!sg_page(sg
));
2477 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2478 sg
->dma_length
= sg
->length
;
2483 int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
2488 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2489 struct dmar_domain
*domain
;
2493 struct iova
*iova
= NULL
;
2495 struct scatterlist
*sg
;
2496 unsigned long start_addr
;
2497 struct intel_iommu
*iommu
;
2499 BUG_ON(dir
== DMA_NONE
);
2500 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2501 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2503 domain
= get_valid_domain_for_dev(pdev
);
2507 iommu
= domain_get_iommu(domain
);
2509 for_each_sg(sglist
, sg
, nelems
, i
) {
2510 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2511 addr
= (void *)virt_to_phys(addr
);
2512 size
+= aligned_size((u64
)addr
, sg
->length
);
2515 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2517 sglist
->dma_length
= 0;
2522 * Check if DMAR supports zero-length reads on write only
2525 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2526 !cap_zlr(iommu
->cap
))
2527 prot
|= DMA_PTE_READ
;
2528 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2529 prot
|= DMA_PTE_WRITE
;
2531 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2533 for_each_sg(sglist
, sg
, nelems
, i
) {
2534 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2535 addr
= (void *)virt_to_phys(addr
);
2536 size
= aligned_size((u64
)addr
, sg
->length
);
2537 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2538 ((u64
)addr
) & PAGE_MASK
,
2541 /* clear the page */
2542 dma_pte_clear_range(domain
, start_addr
,
2543 start_addr
+ offset
);
2544 /* free page tables */
2545 dma_pte_free_pagetable(domain
, start_addr
,
2546 start_addr
+ offset
);
2548 __free_iova(&domain
->iovad
, iova
);
2551 sg
->dma_address
= start_addr
+ offset
+
2552 ((u64
)addr
& (~PAGE_MASK
));
2553 sg
->dma_length
= sg
->length
;
2557 /* it's a non-present to present mapping */
2558 if (iommu_flush_iotlb_psi(iommu
, domain
->id
,
2559 start_addr
, offset
>> VTD_PAGE_SHIFT
, 1))
2560 iommu_flush_write_buffer(iommu
);
2564 static struct dma_mapping_ops intel_dma_ops
= {
2565 .alloc_coherent
= intel_alloc_coherent
,
2566 .free_coherent
= intel_free_coherent
,
2567 .map_single
= intel_map_single
,
2568 .unmap_single
= intel_unmap_single
,
2569 .map_sg
= intel_map_sg
,
2570 .unmap_sg
= intel_unmap_sg
,
2573 static inline int iommu_domain_cache_init(void)
2577 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2578 sizeof(struct dmar_domain
),
2583 if (!iommu_domain_cache
) {
2584 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2591 static inline int iommu_devinfo_cache_init(void)
2595 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2596 sizeof(struct device_domain_info
),
2600 if (!iommu_devinfo_cache
) {
2601 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2608 static inline int iommu_iova_cache_init(void)
2612 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2613 sizeof(struct iova
),
2617 if (!iommu_iova_cache
) {
2618 printk(KERN_ERR
"Couldn't create iova cache\n");
2625 static int __init
iommu_init_mempool(void)
2628 ret
= iommu_iova_cache_init();
2632 ret
= iommu_domain_cache_init();
2636 ret
= iommu_devinfo_cache_init();
2640 kmem_cache_destroy(iommu_domain_cache
);
2642 kmem_cache_destroy(iommu_iova_cache
);
2647 static void __init
iommu_exit_mempool(void)
2649 kmem_cache_destroy(iommu_devinfo_cache
);
2650 kmem_cache_destroy(iommu_domain_cache
);
2651 kmem_cache_destroy(iommu_iova_cache
);
2655 static void __init
init_no_remapping_devices(void)
2657 struct dmar_drhd_unit
*drhd
;
2659 for_each_drhd_unit(drhd
) {
2660 if (!drhd
->include_all
) {
2662 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2663 if (drhd
->devices
[i
] != NULL
)
2665 /* ignore DMAR unit if no pci devices exist */
2666 if (i
== drhd
->devices_cnt
)
2674 for_each_drhd_unit(drhd
) {
2676 if (drhd
->ignored
|| drhd
->include_all
)
2679 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2680 if (drhd
->devices
[i
] &&
2681 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2684 if (i
< drhd
->devices_cnt
)
2687 /* bypass IOMMU if it is just for gfx devices */
2689 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2690 if (!drhd
->devices
[i
])
2692 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2697 int __init
intel_iommu_init(void)
2701 if (dmar_table_init())
2704 if (dmar_dev_scope_init())
2708 * Check the need for DMA-remapping initialization now.
2709 * Above initialization will also be used by Interrupt-remapping.
2711 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2714 iommu_init_mempool();
2715 dmar_init_reserved_ranges();
2717 init_no_remapping_devices();
2721 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2722 put_iova_domain(&reserved_iova_list
);
2723 iommu_exit_mempool();
2727 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2729 init_timer(&unmap_timer
);
2731 dma_ops
= &intel_dma_ops
;
2735 static int vm_domain_add_dev_info(struct dmar_domain
*domain
,
2736 struct pci_dev
*pdev
)
2738 struct device_domain_info
*info
;
2739 unsigned long flags
;
2741 info
= alloc_devinfo_mem();
2745 info
->bus
= pdev
->bus
->number
;
2746 info
->devfn
= pdev
->devfn
;
2748 info
->domain
= domain
;
2750 spin_lock_irqsave(&device_domain_lock
, flags
);
2751 list_add(&info
->link
, &domain
->devices
);
2752 list_add(&info
->global
, &device_domain_list
);
2753 pdev
->dev
.archdata
.iommu
= info
;
2754 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2759 static void vm_domain_remove_one_dev_info(struct dmar_domain
*domain
,
2760 struct pci_dev
*pdev
)
2762 struct device_domain_info
*info
;
2763 struct intel_iommu
*iommu
;
2764 unsigned long flags
;
2766 struct list_head
*entry
, *tmp
;
2768 iommu
= device_to_iommu(pdev
->bus
->number
, pdev
->devfn
);
2772 spin_lock_irqsave(&device_domain_lock
, flags
);
2773 list_for_each_safe(entry
, tmp
, &domain
->devices
) {
2774 info
= list_entry(entry
, struct device_domain_info
, link
);
2775 if (info
->bus
== pdev
->bus
->number
&&
2776 info
->devfn
== pdev
->devfn
) {
2777 list_del(&info
->link
);
2778 list_del(&info
->global
);
2780 info
->dev
->dev
.archdata
.iommu
= NULL
;
2781 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2783 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
2784 free_devinfo_mem(info
);
2786 spin_lock_irqsave(&device_domain_lock
, flags
);
2794 /* if there is no other devices under the same iommu
2795 * owned by this domain, clear this iommu in iommu_bmp
2796 * update iommu count and coherency
2798 if (device_to_iommu(info
->bus
, info
->devfn
) == iommu
)
2803 unsigned long tmp_flags
;
2804 spin_lock_irqsave(&domain
->iommu_lock
, tmp_flags
);
2805 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
2806 domain
->iommu_count
--;
2807 domain_update_iommu_coherency(domain
);
2808 spin_unlock_irqrestore(&domain
->iommu_lock
, tmp_flags
);
2811 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2814 static void vm_domain_remove_all_dev_info(struct dmar_domain
*domain
)
2816 struct device_domain_info
*info
;
2817 struct intel_iommu
*iommu
;
2818 unsigned long flags1
, flags2
;
2820 spin_lock_irqsave(&device_domain_lock
, flags1
);
2821 while (!list_empty(&domain
->devices
)) {
2822 info
= list_entry(domain
->devices
.next
,
2823 struct device_domain_info
, link
);
2824 list_del(&info
->link
);
2825 list_del(&info
->global
);
2827 info
->dev
->dev
.archdata
.iommu
= NULL
;
2829 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
2831 iommu
= device_to_iommu(info
->bus
, info
->devfn
);
2832 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
2834 /* clear this iommu in iommu_bmp, update iommu count
2837 spin_lock_irqsave(&domain
->iommu_lock
, flags2
);
2838 if (test_and_clear_bit(iommu
->seq_id
,
2839 &domain
->iommu_bmp
)) {
2840 domain
->iommu_count
--;
2841 domain_update_iommu_coherency(domain
);
2843 spin_unlock_irqrestore(&domain
->iommu_lock
, flags2
);
2845 free_devinfo_mem(info
);
2846 spin_lock_irqsave(&device_domain_lock
, flags1
);
2848 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
2851 /* domain id for virtual machine, it won't be set in context */
2852 static unsigned long vm_domid
;
2854 static int vm_domain_min_agaw(struct dmar_domain
*domain
)
2857 int min_agaw
= domain
->agaw
;
2859 i
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
2860 for (; i
< g_num_of_iommus
; ) {
2861 if (min_agaw
> g_iommus
[i
]->agaw
)
2862 min_agaw
= g_iommus
[i
]->agaw
;
2864 i
= find_next_bit(&domain
->iommu_bmp
, g_num_of_iommus
, i
+1);
2870 static struct dmar_domain
*iommu_alloc_vm_domain(void)
2872 struct dmar_domain
*domain
;
2874 domain
= alloc_domain_mem();
2878 domain
->id
= vm_domid
++;
2879 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
2880 domain
->flags
= DOMAIN_FLAG_VIRTUAL_MACHINE
;
2885 static int vm_domain_init(struct dmar_domain
*domain
, int guest_width
)
2889 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
2890 spin_lock_init(&domain
->mapping_lock
);
2891 spin_lock_init(&domain
->iommu_lock
);
2893 domain_reserve_special_ranges(domain
);
2895 /* calculate AGAW */
2896 domain
->gaw
= guest_width
;
2897 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
2898 domain
->agaw
= width_to_agaw(adjust_width
);
2900 INIT_LIST_HEAD(&domain
->devices
);
2902 domain
->iommu_count
= 0;
2903 domain
->iommu_coherency
= 0;
2904 domain
->max_addr
= 0;
2906 /* always allocate the top pgd */
2907 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
2910 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
2914 static void iommu_free_vm_domain(struct dmar_domain
*domain
)
2916 unsigned long flags
;
2917 struct dmar_drhd_unit
*drhd
;
2918 struct intel_iommu
*iommu
;
2920 unsigned long ndomains
;
2922 for_each_drhd_unit(drhd
) {
2925 iommu
= drhd
->iommu
;
2927 ndomains
= cap_ndoms(iommu
->cap
);
2928 i
= find_first_bit(iommu
->domain_ids
, ndomains
);
2929 for (; i
< ndomains
; ) {
2930 if (iommu
->domains
[i
] == domain
) {
2931 spin_lock_irqsave(&iommu
->lock
, flags
);
2932 clear_bit(i
, iommu
->domain_ids
);
2933 iommu
->domains
[i
] = NULL
;
2934 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2937 i
= find_next_bit(iommu
->domain_ids
, ndomains
, i
+1);
2942 static void vm_domain_exit(struct dmar_domain
*domain
)
2946 /* Domain 0 is reserved, so dont process it */
2950 vm_domain_remove_all_dev_info(domain
);
2952 put_iova_domain(&domain
->iovad
);
2953 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
2954 end
= end
& (~VTD_PAGE_MASK
);
2957 dma_pte_clear_range(domain
, 0, end
);
2959 /* free page tables */
2960 dma_pte_free_pagetable(domain
, 0, end
);
2962 iommu_free_vm_domain(domain
);
2963 free_domain_mem(domain
);
2966 static int intel_iommu_domain_init(struct iommu_domain
*domain
)
2968 struct dmar_domain
*dmar_domain
;
2970 dmar_domain
= iommu_alloc_vm_domain();
2973 "intel_iommu_domain_init: dmar_domain == NULL\n");
2976 if (vm_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2978 "intel_iommu_domain_init() failed\n");
2979 vm_domain_exit(dmar_domain
);
2982 domain
->priv
= dmar_domain
;
2987 static void intel_iommu_domain_destroy(struct iommu_domain
*domain
)
2989 struct dmar_domain
*dmar_domain
= domain
->priv
;
2991 domain
->priv
= NULL
;
2992 vm_domain_exit(dmar_domain
);
2995 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
2998 struct dmar_domain
*dmar_domain
= domain
->priv
;
2999 struct pci_dev
*pdev
= to_pci_dev(dev
);
3000 struct intel_iommu
*iommu
;
3005 /* normally pdev is not mapped */
3006 if (unlikely(domain_context_mapped(pdev
))) {
3007 struct dmar_domain
*old_domain
;
3009 old_domain
= find_domain(pdev
);
3011 if (dmar_domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
)
3012 vm_domain_remove_one_dev_info(old_domain
, pdev
);
3014 domain_remove_dev_info(old_domain
);
3018 iommu
= device_to_iommu(pdev
->bus
->number
, pdev
->devfn
);
3022 /* check if this iommu agaw is sufficient for max mapped address */
3023 addr_width
= agaw_to_width(iommu
->agaw
);
3024 end
= DOMAIN_MAX_ADDR(addr_width
);
3025 end
= end
& VTD_PAGE_MASK
;
3026 if (end
< dmar_domain
->max_addr
) {
3027 printk(KERN_ERR
"%s: iommu agaw (%d) is not "
3028 "sufficient for the mapped address (%llx)\n",
3029 __func__
, iommu
->agaw
, dmar_domain
->max_addr
);
3033 ret
= domain_context_mapping(dmar_domain
, pdev
);
3037 ret
= vm_domain_add_dev_info(dmar_domain
, pdev
);
3041 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
3044 struct dmar_domain
*dmar_domain
= domain
->priv
;
3045 struct pci_dev
*pdev
= to_pci_dev(dev
);
3047 vm_domain_remove_one_dev_info(dmar_domain
, pdev
);
3050 static int intel_iommu_map_range(struct iommu_domain
*domain
,
3051 unsigned long iova
, phys_addr_t hpa
,
3052 size_t size
, int iommu_prot
)
3054 struct dmar_domain
*dmar_domain
= domain
->priv
;
3060 if (iommu_prot
& IOMMU_READ
)
3061 prot
|= DMA_PTE_READ
;
3062 if (iommu_prot
& IOMMU_WRITE
)
3063 prot
|= DMA_PTE_WRITE
;
3065 max_addr
= (iova
& VTD_PAGE_MASK
) + VTD_PAGE_ALIGN(size
);
3066 if (dmar_domain
->max_addr
< max_addr
) {
3070 /* check if minimum agaw is sufficient for mapped address */
3071 min_agaw
= vm_domain_min_agaw(dmar_domain
);
3072 addr_width
= agaw_to_width(min_agaw
);
3073 end
= DOMAIN_MAX_ADDR(addr_width
);
3074 end
= end
& VTD_PAGE_MASK
;
3075 if (end
< max_addr
) {
3076 printk(KERN_ERR
"%s: iommu agaw (%d) is not "
3077 "sufficient for the mapped address (%llx)\n",
3078 __func__
, min_agaw
, max_addr
);
3081 dmar_domain
->max_addr
= max_addr
;
3084 ret
= domain_page_mapping(dmar_domain
, iova
, hpa
, size
, prot
);
3088 static void intel_iommu_unmap_range(struct iommu_domain
*domain
,
3089 unsigned long iova
, size_t size
)
3091 struct dmar_domain
*dmar_domain
= domain
->priv
;
3094 /* The address might not be aligned */
3095 base
= iova
& VTD_PAGE_MASK
;
3096 size
= VTD_PAGE_ALIGN(size
);
3097 dma_pte_clear_range(dmar_domain
, base
, base
+ size
);
3099 if (dmar_domain
->max_addr
== base
+ size
)
3100 dmar_domain
->max_addr
= base
;
3103 int intel_iommu_found(void)
3105 return g_num_of_iommus
;
3107 EXPORT_SYMBOL_GPL(intel_iommu_found
);
3109 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
3112 struct dmar_domain
*dmar_domain
= domain
->priv
;
3113 struct dma_pte
*pte
;
3116 pte
= addr_to_dma_pte(dmar_domain
, iova
);
3118 phys
= dma_pte_addr(pte
);