]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/pci/intel-iommu.c
intel-iommu: move DMA_32/64BIT_PFN into intel-iommu.c
[mirror_ubuntu-bionic-kernel.git] / drivers / pci / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
59
60 static void flush_unmaps_timeout(unsigned long data);
61
62 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
63
64 #define HIGH_WATER_MARK 250
65 struct deferred_flush_tables {
66 int next;
67 struct iova *iova[HIGH_WATER_MARK];
68 struct dmar_domain *domain[HIGH_WATER_MARK];
69 };
70
71 static struct deferred_flush_tables *deferred_flush;
72
73 /* bitmap for indexing intel_iommus */
74 static int g_num_of_iommus;
75
76 static DEFINE_SPINLOCK(async_umap_flush_lock);
77 static LIST_HEAD(unmaps_to_do);
78
79 static int timer_on;
80 static long list_size;
81
82 static void domain_remove_dev_info(struct dmar_domain *domain);
83
84 int dmar_disabled;
85 static int __initdata dmar_map_gfx = 1;
86 static int dmar_forcedac;
87 static int intel_iommu_strict;
88
89 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
90 static DEFINE_SPINLOCK(device_domain_lock);
91 static LIST_HEAD(device_domain_list);
92
93 static int __init intel_iommu_setup(char *str)
94 {
95 if (!str)
96 return -EINVAL;
97 while (*str) {
98 if (!strncmp(str, "off", 3)) {
99 dmar_disabled = 1;
100 printk(KERN_INFO"Intel-IOMMU: disabled\n");
101 } else if (!strncmp(str, "igfx_off", 8)) {
102 dmar_map_gfx = 0;
103 printk(KERN_INFO
104 "Intel-IOMMU: disable GFX device mapping\n");
105 } else if (!strncmp(str, "forcedac", 8)) {
106 printk(KERN_INFO
107 "Intel-IOMMU: Forcing DAC for PCI devices\n");
108 dmar_forcedac = 1;
109 } else if (!strncmp(str, "strict", 6)) {
110 printk(KERN_INFO
111 "Intel-IOMMU: disable batched IOTLB flush\n");
112 intel_iommu_strict = 1;
113 }
114
115 str += strcspn(str, ",");
116 while (*str == ',')
117 str++;
118 }
119 return 0;
120 }
121 __setup("intel_iommu=", intel_iommu_setup);
122
123 static struct kmem_cache *iommu_domain_cache;
124 static struct kmem_cache *iommu_devinfo_cache;
125 static struct kmem_cache *iommu_iova_cache;
126
127 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
128 {
129 unsigned int flags;
130 void *vaddr;
131
132 /* trying to avoid low memory issues */
133 flags = current->flags & PF_MEMALLOC;
134 current->flags |= PF_MEMALLOC;
135 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
136 current->flags &= (~PF_MEMALLOC | flags);
137 return vaddr;
138 }
139
140
141 static inline void *alloc_pgtable_page(void)
142 {
143 unsigned int flags;
144 void *vaddr;
145
146 /* trying to avoid low memory issues */
147 flags = current->flags & PF_MEMALLOC;
148 current->flags |= PF_MEMALLOC;
149 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
150 current->flags &= (~PF_MEMALLOC | flags);
151 return vaddr;
152 }
153
154 static inline void free_pgtable_page(void *vaddr)
155 {
156 free_page((unsigned long)vaddr);
157 }
158
159 static inline void *alloc_domain_mem(void)
160 {
161 return iommu_kmem_cache_alloc(iommu_domain_cache);
162 }
163
164 static void free_domain_mem(void *vaddr)
165 {
166 kmem_cache_free(iommu_domain_cache, vaddr);
167 }
168
169 static inline void * alloc_devinfo_mem(void)
170 {
171 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
172 }
173
174 static inline void free_devinfo_mem(void *vaddr)
175 {
176 kmem_cache_free(iommu_devinfo_cache, vaddr);
177 }
178
179 struct iova *alloc_iova_mem(void)
180 {
181 return iommu_kmem_cache_alloc(iommu_iova_cache);
182 }
183
184 void free_iova_mem(struct iova *iova)
185 {
186 kmem_cache_free(iommu_iova_cache, iova);
187 }
188
189 /* Gets context entry for a given bus and devfn */
190 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
191 u8 bus, u8 devfn)
192 {
193 struct root_entry *root;
194 struct context_entry *context;
195 unsigned long phy_addr;
196 unsigned long flags;
197
198 spin_lock_irqsave(&iommu->lock, flags);
199 root = &iommu->root_entry[bus];
200 context = get_context_addr_from_root(root);
201 if (!context) {
202 context = (struct context_entry *)alloc_pgtable_page();
203 if (!context) {
204 spin_unlock_irqrestore(&iommu->lock, flags);
205 return NULL;
206 }
207 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
208 phy_addr = virt_to_phys((void *)context);
209 set_root_value(root, phy_addr);
210 set_root_present(root);
211 __iommu_flush_cache(iommu, root, sizeof(*root));
212 }
213 spin_unlock_irqrestore(&iommu->lock, flags);
214 return &context[devfn];
215 }
216
217 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
218 {
219 struct root_entry *root;
220 struct context_entry *context;
221 int ret;
222 unsigned long flags;
223
224 spin_lock_irqsave(&iommu->lock, flags);
225 root = &iommu->root_entry[bus];
226 context = get_context_addr_from_root(root);
227 if (!context) {
228 ret = 0;
229 goto out;
230 }
231 ret = context_present(context[devfn]);
232 out:
233 spin_unlock_irqrestore(&iommu->lock, flags);
234 return ret;
235 }
236
237 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
238 {
239 struct root_entry *root;
240 struct context_entry *context;
241 unsigned long flags;
242
243 spin_lock_irqsave(&iommu->lock, flags);
244 root = &iommu->root_entry[bus];
245 context = get_context_addr_from_root(root);
246 if (context) {
247 context_clear_entry(context[devfn]);
248 __iommu_flush_cache(iommu, &context[devfn], \
249 sizeof(*context));
250 }
251 spin_unlock_irqrestore(&iommu->lock, flags);
252 }
253
254 static void free_context_table(struct intel_iommu *iommu)
255 {
256 struct root_entry *root;
257 int i;
258 unsigned long flags;
259 struct context_entry *context;
260
261 spin_lock_irqsave(&iommu->lock, flags);
262 if (!iommu->root_entry) {
263 goto out;
264 }
265 for (i = 0; i < ROOT_ENTRY_NR; i++) {
266 root = &iommu->root_entry[i];
267 context = get_context_addr_from_root(root);
268 if (context)
269 free_pgtable_page(context);
270 }
271 free_pgtable_page(iommu->root_entry);
272 iommu->root_entry = NULL;
273 out:
274 spin_unlock_irqrestore(&iommu->lock, flags);
275 }
276
277 /* page table handling */
278 #define LEVEL_STRIDE (9)
279 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
280
281 static inline int agaw_to_level(int agaw)
282 {
283 return agaw + 2;
284 }
285
286 static inline int agaw_to_width(int agaw)
287 {
288 return 30 + agaw * LEVEL_STRIDE;
289
290 }
291
292 static inline int width_to_agaw(int width)
293 {
294 return (width - 30) / LEVEL_STRIDE;
295 }
296
297 static inline unsigned int level_to_offset_bits(int level)
298 {
299 return (12 + (level - 1) * LEVEL_STRIDE);
300 }
301
302 static inline int address_level_offset(u64 addr, int level)
303 {
304 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
305 }
306
307 static inline u64 level_mask(int level)
308 {
309 return ((u64)-1 << level_to_offset_bits(level));
310 }
311
312 static inline u64 level_size(int level)
313 {
314 return ((u64)1 << level_to_offset_bits(level));
315 }
316
317 static inline u64 align_to_level(u64 addr, int level)
318 {
319 return ((addr + level_size(level) - 1) & level_mask(level));
320 }
321
322 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
323 {
324 int addr_width = agaw_to_width(domain->agaw);
325 struct dma_pte *parent, *pte = NULL;
326 int level = agaw_to_level(domain->agaw);
327 int offset;
328 unsigned long flags;
329
330 BUG_ON(!domain->pgd);
331
332 addr &= (((u64)1) << addr_width) - 1;
333 parent = domain->pgd;
334
335 spin_lock_irqsave(&domain->mapping_lock, flags);
336 while (level > 0) {
337 void *tmp_page;
338
339 offset = address_level_offset(addr, level);
340 pte = &parent[offset];
341 if (level == 1)
342 break;
343
344 if (!dma_pte_present(*pte)) {
345 tmp_page = alloc_pgtable_page();
346
347 if (!tmp_page) {
348 spin_unlock_irqrestore(&domain->mapping_lock,
349 flags);
350 return NULL;
351 }
352 __iommu_flush_cache(domain->iommu, tmp_page,
353 PAGE_SIZE);
354 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
355 /*
356 * high level table always sets r/w, last level page
357 * table control read/write
358 */
359 dma_set_pte_readable(*pte);
360 dma_set_pte_writable(*pte);
361 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
362 }
363 parent = phys_to_virt(dma_pte_addr(*pte));
364 level--;
365 }
366
367 spin_unlock_irqrestore(&domain->mapping_lock, flags);
368 return pte;
369 }
370
371 /* return address's pte at specific level */
372 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
373 int level)
374 {
375 struct dma_pte *parent, *pte = NULL;
376 int total = agaw_to_level(domain->agaw);
377 int offset;
378
379 parent = domain->pgd;
380 while (level <= total) {
381 offset = address_level_offset(addr, total);
382 pte = &parent[offset];
383 if (level == total)
384 return pte;
385
386 if (!dma_pte_present(*pte))
387 break;
388 parent = phys_to_virt(dma_pte_addr(*pte));
389 total--;
390 }
391 return NULL;
392 }
393
394 /* clear one page's page table */
395 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
396 {
397 struct dma_pte *pte = NULL;
398
399 /* get last level pte */
400 pte = dma_addr_level_pte(domain, addr, 1);
401
402 if (pte) {
403 dma_clear_pte(*pte);
404 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
405 }
406 }
407
408 /* clear last level pte, a tlb flush should be followed */
409 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
410 {
411 int addr_width = agaw_to_width(domain->agaw);
412
413 start &= (((u64)1) << addr_width) - 1;
414 end &= (((u64)1) << addr_width) - 1;
415 /* in case it's partial page */
416 start = PAGE_ALIGN(start);
417 end &= PAGE_MASK;
418
419 /* we don't need lock here, nobody else touches the iova range */
420 while (start < end) {
421 dma_pte_clear_one(domain, start);
422 start += VTD_PAGE_SIZE;
423 }
424 }
425
426 /* free page table pages. last level pte should already be cleared */
427 static void dma_pte_free_pagetable(struct dmar_domain *domain,
428 u64 start, u64 end)
429 {
430 int addr_width = agaw_to_width(domain->agaw);
431 struct dma_pte *pte;
432 int total = agaw_to_level(domain->agaw);
433 int level;
434 u64 tmp;
435
436 start &= (((u64)1) << addr_width) - 1;
437 end &= (((u64)1) << addr_width) - 1;
438
439 /* we don't need lock here, nobody else touches the iova range */
440 level = 2;
441 while (level <= total) {
442 tmp = align_to_level(start, level);
443 if (tmp >= end || (tmp + level_size(level) > end))
444 return;
445
446 while (tmp < end) {
447 pte = dma_addr_level_pte(domain, tmp, level);
448 if (pte) {
449 free_pgtable_page(
450 phys_to_virt(dma_pte_addr(*pte)));
451 dma_clear_pte(*pte);
452 __iommu_flush_cache(domain->iommu,
453 pte, sizeof(*pte));
454 }
455 tmp += level_size(level);
456 }
457 level++;
458 }
459 /* free pgd */
460 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
461 free_pgtable_page(domain->pgd);
462 domain->pgd = NULL;
463 }
464 }
465
466 /* iommu handling */
467 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
468 {
469 struct root_entry *root;
470 unsigned long flags;
471
472 root = (struct root_entry *)alloc_pgtable_page();
473 if (!root)
474 return -ENOMEM;
475
476 __iommu_flush_cache(iommu, root, ROOT_SIZE);
477
478 spin_lock_irqsave(&iommu->lock, flags);
479 iommu->root_entry = root;
480 spin_unlock_irqrestore(&iommu->lock, flags);
481
482 return 0;
483 }
484
485 static void iommu_set_root_entry(struct intel_iommu *iommu)
486 {
487 void *addr;
488 u32 cmd, sts;
489 unsigned long flag;
490
491 addr = iommu->root_entry;
492
493 spin_lock_irqsave(&iommu->register_lock, flag);
494 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
495
496 cmd = iommu->gcmd | DMA_GCMD_SRTP;
497 writel(cmd, iommu->reg + DMAR_GCMD_REG);
498
499 /* Make sure hardware complete it */
500 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
501 readl, (sts & DMA_GSTS_RTPS), sts);
502
503 spin_unlock_irqrestore(&iommu->register_lock, flag);
504 }
505
506 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
507 {
508 u32 val;
509 unsigned long flag;
510
511 if (!cap_rwbf(iommu->cap))
512 return;
513 val = iommu->gcmd | DMA_GCMD_WBF;
514
515 spin_lock_irqsave(&iommu->register_lock, flag);
516 writel(val, iommu->reg + DMAR_GCMD_REG);
517
518 /* Make sure hardware complete it */
519 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
520 readl, (!(val & DMA_GSTS_WBFS)), val);
521
522 spin_unlock_irqrestore(&iommu->register_lock, flag);
523 }
524
525 /* return value determine if we need a write buffer flush */
526 static int __iommu_flush_context(struct intel_iommu *iommu,
527 u16 did, u16 source_id, u8 function_mask, u64 type,
528 int non_present_entry_flush)
529 {
530 u64 val = 0;
531 unsigned long flag;
532
533 /*
534 * In the non-present entry flush case, if hardware doesn't cache
535 * non-present entry we do nothing and if hardware cache non-present
536 * entry, we flush entries of domain 0 (the domain id is used to cache
537 * any non-present entries)
538 */
539 if (non_present_entry_flush) {
540 if (!cap_caching_mode(iommu->cap))
541 return 1;
542 else
543 did = 0;
544 }
545
546 switch (type) {
547 case DMA_CCMD_GLOBAL_INVL:
548 val = DMA_CCMD_GLOBAL_INVL;
549 break;
550 case DMA_CCMD_DOMAIN_INVL:
551 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
552 break;
553 case DMA_CCMD_DEVICE_INVL:
554 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
555 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
556 break;
557 default:
558 BUG();
559 }
560 val |= DMA_CCMD_ICC;
561
562 spin_lock_irqsave(&iommu->register_lock, flag);
563 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
564
565 /* Make sure hardware complete it */
566 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
567 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
568
569 spin_unlock_irqrestore(&iommu->register_lock, flag);
570
571 /* flush context entry will implicitly flush write buffer */
572 return 0;
573 }
574
575 /* return value determine if we need a write buffer flush */
576 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
577 u64 addr, unsigned int size_order, u64 type,
578 int non_present_entry_flush)
579 {
580 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
581 u64 val = 0, val_iva = 0;
582 unsigned long flag;
583
584 /*
585 * In the non-present entry flush case, if hardware doesn't cache
586 * non-present entry we do nothing and if hardware cache non-present
587 * entry, we flush entries of domain 0 (the domain id is used to cache
588 * any non-present entries)
589 */
590 if (non_present_entry_flush) {
591 if (!cap_caching_mode(iommu->cap))
592 return 1;
593 else
594 did = 0;
595 }
596
597 switch (type) {
598 case DMA_TLB_GLOBAL_FLUSH:
599 /* global flush doesn't need set IVA_REG */
600 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
601 break;
602 case DMA_TLB_DSI_FLUSH:
603 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
604 break;
605 case DMA_TLB_PSI_FLUSH:
606 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
607 /* Note: always flush non-leaf currently */
608 val_iva = size_order | addr;
609 break;
610 default:
611 BUG();
612 }
613 /* Note: set drain read/write */
614 #if 0
615 /*
616 * This is probably to be super secure.. Looks like we can
617 * ignore it without any impact.
618 */
619 if (cap_read_drain(iommu->cap))
620 val |= DMA_TLB_READ_DRAIN;
621 #endif
622 if (cap_write_drain(iommu->cap))
623 val |= DMA_TLB_WRITE_DRAIN;
624
625 spin_lock_irqsave(&iommu->register_lock, flag);
626 /* Note: Only uses first TLB reg currently */
627 if (val_iva)
628 dmar_writeq(iommu->reg + tlb_offset, val_iva);
629 dmar_writeq(iommu->reg + tlb_offset + 8, val);
630
631 /* Make sure hardware complete it */
632 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
633 dmar_readq, (!(val & DMA_TLB_IVT)), val);
634
635 spin_unlock_irqrestore(&iommu->register_lock, flag);
636
637 /* check IOTLB invalidation granularity */
638 if (DMA_TLB_IAIG(val) == 0)
639 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
640 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
641 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
642 (unsigned long long)DMA_TLB_IIRG(type),
643 (unsigned long long)DMA_TLB_IAIG(val));
644 /* flush iotlb entry will implicitly flush write buffer */
645 return 0;
646 }
647
648 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
649 u64 addr, unsigned int pages, int non_present_entry_flush)
650 {
651 unsigned int mask;
652
653 BUG_ON(addr & (~VTD_PAGE_MASK));
654 BUG_ON(pages == 0);
655
656 /* Fallback to domain selective flush if no PSI support */
657 if (!cap_pgsel_inv(iommu->cap))
658 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
659 DMA_TLB_DSI_FLUSH,
660 non_present_entry_flush);
661
662 /*
663 * PSI requires page size to be 2 ^ x, and the base address is naturally
664 * aligned to the size
665 */
666 mask = ilog2(__roundup_pow_of_two(pages));
667 /* Fallback to domain selective flush if size is too big */
668 if (mask > cap_max_amask_val(iommu->cap))
669 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
670 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
671
672 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
673 DMA_TLB_PSI_FLUSH,
674 non_present_entry_flush);
675 }
676
677 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
678 {
679 u32 pmen;
680 unsigned long flags;
681
682 spin_lock_irqsave(&iommu->register_lock, flags);
683 pmen = readl(iommu->reg + DMAR_PMEN_REG);
684 pmen &= ~DMA_PMEN_EPM;
685 writel(pmen, iommu->reg + DMAR_PMEN_REG);
686
687 /* wait for the protected region status bit to clear */
688 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
689 readl, !(pmen & DMA_PMEN_PRS), pmen);
690
691 spin_unlock_irqrestore(&iommu->register_lock, flags);
692 }
693
694 static int iommu_enable_translation(struct intel_iommu *iommu)
695 {
696 u32 sts;
697 unsigned long flags;
698
699 spin_lock_irqsave(&iommu->register_lock, flags);
700 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
701
702 /* Make sure hardware complete it */
703 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
704 readl, (sts & DMA_GSTS_TES), sts);
705
706 iommu->gcmd |= DMA_GCMD_TE;
707 spin_unlock_irqrestore(&iommu->register_lock, flags);
708 return 0;
709 }
710
711 static int iommu_disable_translation(struct intel_iommu *iommu)
712 {
713 u32 sts;
714 unsigned long flag;
715
716 spin_lock_irqsave(&iommu->register_lock, flag);
717 iommu->gcmd &= ~DMA_GCMD_TE;
718 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
719
720 /* Make sure hardware complete it */
721 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
722 readl, (!(sts & DMA_GSTS_TES)), sts);
723
724 spin_unlock_irqrestore(&iommu->register_lock, flag);
725 return 0;
726 }
727
728 /* iommu interrupt handling. Most stuff are MSI-like. */
729
730 static const char *fault_reason_strings[] =
731 {
732 "Software",
733 "Present bit in root entry is clear",
734 "Present bit in context entry is clear",
735 "Invalid context entry",
736 "Access beyond MGAW",
737 "PTE Write access is not set",
738 "PTE Read access is not set",
739 "Next page table ptr is invalid",
740 "Root table address invalid",
741 "Context table ptr is invalid",
742 "non-zero reserved fields in RTP",
743 "non-zero reserved fields in CTP",
744 "non-zero reserved fields in PTE",
745 };
746 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
747
748 const char *dmar_get_fault_reason(u8 fault_reason)
749 {
750 if (fault_reason > MAX_FAULT_REASON_IDX)
751 return "Unknown";
752 else
753 return fault_reason_strings[fault_reason];
754 }
755
756 void dmar_msi_unmask(unsigned int irq)
757 {
758 struct intel_iommu *iommu = get_irq_data(irq);
759 unsigned long flag;
760
761 /* unmask it */
762 spin_lock_irqsave(&iommu->register_lock, flag);
763 writel(0, iommu->reg + DMAR_FECTL_REG);
764 /* Read a reg to force flush the post write */
765 readl(iommu->reg + DMAR_FECTL_REG);
766 spin_unlock_irqrestore(&iommu->register_lock, flag);
767 }
768
769 void dmar_msi_mask(unsigned int irq)
770 {
771 unsigned long flag;
772 struct intel_iommu *iommu = get_irq_data(irq);
773
774 /* mask it */
775 spin_lock_irqsave(&iommu->register_lock, flag);
776 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
777 /* Read a reg to force flush the post write */
778 readl(iommu->reg + DMAR_FECTL_REG);
779 spin_unlock_irqrestore(&iommu->register_lock, flag);
780 }
781
782 void dmar_msi_write(int irq, struct msi_msg *msg)
783 {
784 struct intel_iommu *iommu = get_irq_data(irq);
785 unsigned long flag;
786
787 spin_lock_irqsave(&iommu->register_lock, flag);
788 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
789 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
790 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
791 spin_unlock_irqrestore(&iommu->register_lock, flag);
792 }
793
794 void dmar_msi_read(int irq, struct msi_msg *msg)
795 {
796 struct intel_iommu *iommu = get_irq_data(irq);
797 unsigned long flag;
798
799 spin_lock_irqsave(&iommu->register_lock, flag);
800 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
801 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
802 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
803 spin_unlock_irqrestore(&iommu->register_lock, flag);
804 }
805
806 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
807 u8 fault_reason, u16 source_id, unsigned long long addr)
808 {
809 const char *reason;
810
811 reason = dmar_get_fault_reason(fault_reason);
812
813 printk(KERN_ERR
814 "DMAR:[%s] Request device [%02x:%02x.%d] "
815 "fault addr %llx \n"
816 "DMAR:[fault reason %02d] %s\n",
817 (type ? "DMA Read" : "DMA Write"),
818 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
819 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
820 return 0;
821 }
822
823 #define PRIMARY_FAULT_REG_LEN (16)
824 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
825 {
826 struct intel_iommu *iommu = dev_id;
827 int reg, fault_index;
828 u32 fault_status;
829 unsigned long flag;
830
831 spin_lock_irqsave(&iommu->register_lock, flag);
832 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
833
834 /* TBD: ignore advanced fault log currently */
835 if (!(fault_status & DMA_FSTS_PPF))
836 goto clear_overflow;
837
838 fault_index = dma_fsts_fault_record_index(fault_status);
839 reg = cap_fault_reg_offset(iommu->cap);
840 while (1) {
841 u8 fault_reason;
842 u16 source_id;
843 u64 guest_addr;
844 int type;
845 u32 data;
846
847 /* highest 32 bits */
848 data = readl(iommu->reg + reg +
849 fault_index * PRIMARY_FAULT_REG_LEN + 12);
850 if (!(data & DMA_FRCD_F))
851 break;
852
853 fault_reason = dma_frcd_fault_reason(data);
854 type = dma_frcd_type(data);
855
856 data = readl(iommu->reg + reg +
857 fault_index * PRIMARY_FAULT_REG_LEN + 8);
858 source_id = dma_frcd_source_id(data);
859
860 guest_addr = dmar_readq(iommu->reg + reg +
861 fault_index * PRIMARY_FAULT_REG_LEN);
862 guest_addr = dma_frcd_page_addr(guest_addr);
863 /* clear the fault */
864 writel(DMA_FRCD_F, iommu->reg + reg +
865 fault_index * PRIMARY_FAULT_REG_LEN + 12);
866
867 spin_unlock_irqrestore(&iommu->register_lock, flag);
868
869 iommu_page_fault_do_one(iommu, type, fault_reason,
870 source_id, guest_addr);
871
872 fault_index++;
873 if (fault_index > cap_num_fault_regs(iommu->cap))
874 fault_index = 0;
875 spin_lock_irqsave(&iommu->register_lock, flag);
876 }
877 clear_overflow:
878 /* clear primary fault overflow */
879 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
880 if (fault_status & DMA_FSTS_PFO)
881 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
882
883 spin_unlock_irqrestore(&iommu->register_lock, flag);
884 return IRQ_HANDLED;
885 }
886
887 int dmar_set_interrupt(struct intel_iommu *iommu)
888 {
889 int irq, ret;
890
891 irq = create_irq();
892 if (!irq) {
893 printk(KERN_ERR "IOMMU: no free vectors\n");
894 return -EINVAL;
895 }
896
897 set_irq_data(irq, iommu);
898 iommu->irq = irq;
899
900 ret = arch_setup_dmar_msi(irq);
901 if (ret) {
902 set_irq_data(irq, NULL);
903 iommu->irq = 0;
904 destroy_irq(irq);
905 return 0;
906 }
907
908 /* Force fault register is cleared */
909 iommu_page_fault(irq, iommu);
910
911 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
912 if (ret)
913 printk(KERN_ERR "IOMMU: can't request irq\n");
914 return ret;
915 }
916
917 static int iommu_init_domains(struct intel_iommu *iommu)
918 {
919 unsigned long ndomains;
920 unsigned long nlongs;
921
922 ndomains = cap_ndoms(iommu->cap);
923 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
924 nlongs = BITS_TO_LONGS(ndomains);
925
926 /* TBD: there might be 64K domains,
927 * consider other allocation for future chip
928 */
929 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
930 if (!iommu->domain_ids) {
931 printk(KERN_ERR "Allocating domain id array failed\n");
932 return -ENOMEM;
933 }
934 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
935 GFP_KERNEL);
936 if (!iommu->domains) {
937 printk(KERN_ERR "Allocating domain array failed\n");
938 kfree(iommu->domain_ids);
939 return -ENOMEM;
940 }
941
942 spin_lock_init(&iommu->lock);
943
944 /*
945 * if Caching mode is set, then invalid translations are tagged
946 * with domainid 0. Hence we need to pre-allocate it.
947 */
948 if (cap_caching_mode(iommu->cap))
949 set_bit(0, iommu->domain_ids);
950 return 0;
951 }
952
953
954 static void domain_exit(struct dmar_domain *domain);
955
956 void free_dmar_iommu(struct intel_iommu *iommu)
957 {
958 struct dmar_domain *domain;
959 int i;
960
961 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
962 for (; i < cap_ndoms(iommu->cap); ) {
963 domain = iommu->domains[i];
964 clear_bit(i, iommu->domain_ids);
965 domain_exit(domain);
966 i = find_next_bit(iommu->domain_ids,
967 cap_ndoms(iommu->cap), i+1);
968 }
969
970 if (iommu->gcmd & DMA_GCMD_TE)
971 iommu_disable_translation(iommu);
972
973 if (iommu->irq) {
974 set_irq_data(iommu->irq, NULL);
975 /* This will mask the irq */
976 free_irq(iommu->irq, iommu);
977 destroy_irq(iommu->irq);
978 }
979
980 kfree(iommu->domains);
981 kfree(iommu->domain_ids);
982
983 /* free context mapping */
984 free_context_table(iommu);
985 }
986
987 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
988 {
989 unsigned long num;
990 unsigned long ndomains;
991 struct dmar_domain *domain;
992 unsigned long flags;
993
994 domain = alloc_domain_mem();
995 if (!domain)
996 return NULL;
997
998 ndomains = cap_ndoms(iommu->cap);
999
1000 spin_lock_irqsave(&iommu->lock, flags);
1001 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1002 if (num >= ndomains) {
1003 spin_unlock_irqrestore(&iommu->lock, flags);
1004 free_domain_mem(domain);
1005 printk(KERN_ERR "IOMMU: no free domain ids\n");
1006 return NULL;
1007 }
1008
1009 set_bit(num, iommu->domain_ids);
1010 domain->id = num;
1011 domain->iommu = iommu;
1012 iommu->domains[num] = domain;
1013 spin_unlock_irqrestore(&iommu->lock, flags);
1014
1015 return domain;
1016 }
1017
1018 static void iommu_free_domain(struct dmar_domain *domain)
1019 {
1020 unsigned long flags;
1021
1022 spin_lock_irqsave(&domain->iommu->lock, flags);
1023 clear_bit(domain->id, domain->iommu->domain_ids);
1024 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1025 }
1026
1027 static struct iova_domain reserved_iova_list;
1028 static struct lock_class_key reserved_alloc_key;
1029 static struct lock_class_key reserved_rbtree_key;
1030
1031 static void dmar_init_reserved_ranges(void)
1032 {
1033 struct pci_dev *pdev = NULL;
1034 struct iova *iova;
1035 int i;
1036 u64 addr, size;
1037
1038 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1039
1040 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1041 &reserved_alloc_key);
1042 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1043 &reserved_rbtree_key);
1044
1045 /* IOAPIC ranges shouldn't be accessed by DMA */
1046 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1047 IOVA_PFN(IOAPIC_RANGE_END));
1048 if (!iova)
1049 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1050
1051 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1052 for_each_pci_dev(pdev) {
1053 struct resource *r;
1054
1055 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1056 r = &pdev->resource[i];
1057 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1058 continue;
1059 addr = r->start;
1060 addr &= PAGE_MASK;
1061 size = r->end - addr;
1062 size = PAGE_ALIGN(size);
1063 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1064 IOVA_PFN(size + addr) - 1);
1065 if (!iova)
1066 printk(KERN_ERR "Reserve iova failed\n");
1067 }
1068 }
1069
1070 }
1071
1072 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1073 {
1074 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1075 }
1076
1077 static inline int guestwidth_to_adjustwidth(int gaw)
1078 {
1079 int agaw;
1080 int r = (gaw - 12) % 9;
1081
1082 if (r == 0)
1083 agaw = gaw;
1084 else
1085 agaw = gaw + 9 - r;
1086 if (agaw > 64)
1087 agaw = 64;
1088 return agaw;
1089 }
1090
1091 static int domain_init(struct dmar_domain *domain, int guest_width)
1092 {
1093 struct intel_iommu *iommu;
1094 int adjust_width, agaw;
1095 unsigned long sagaw;
1096
1097 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1098 spin_lock_init(&domain->mapping_lock);
1099
1100 domain_reserve_special_ranges(domain);
1101
1102 /* calculate AGAW */
1103 iommu = domain->iommu;
1104 if (guest_width > cap_mgaw(iommu->cap))
1105 guest_width = cap_mgaw(iommu->cap);
1106 domain->gaw = guest_width;
1107 adjust_width = guestwidth_to_adjustwidth(guest_width);
1108 agaw = width_to_agaw(adjust_width);
1109 sagaw = cap_sagaw(iommu->cap);
1110 if (!test_bit(agaw, &sagaw)) {
1111 /* hardware doesn't support it, choose a bigger one */
1112 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1113 agaw = find_next_bit(&sagaw, 5, agaw);
1114 if (agaw >= 5)
1115 return -ENODEV;
1116 }
1117 domain->agaw = agaw;
1118 INIT_LIST_HEAD(&domain->devices);
1119
1120 /* always allocate the top pgd */
1121 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1122 if (!domain->pgd)
1123 return -ENOMEM;
1124 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1125 return 0;
1126 }
1127
1128 static void domain_exit(struct dmar_domain *domain)
1129 {
1130 u64 end;
1131
1132 /* Domain 0 is reserved, so dont process it */
1133 if (!domain)
1134 return;
1135
1136 domain_remove_dev_info(domain);
1137 /* destroy iovas */
1138 put_iova_domain(&domain->iovad);
1139 end = DOMAIN_MAX_ADDR(domain->gaw);
1140 end = end & (~PAGE_MASK);
1141
1142 /* clear ptes */
1143 dma_pte_clear_range(domain, 0, end);
1144
1145 /* free page tables */
1146 dma_pte_free_pagetable(domain, 0, end);
1147
1148 iommu_free_domain(domain);
1149 free_domain_mem(domain);
1150 }
1151
1152 static int domain_context_mapping_one(struct dmar_domain *domain,
1153 u8 bus, u8 devfn)
1154 {
1155 struct context_entry *context;
1156 struct intel_iommu *iommu = domain->iommu;
1157 unsigned long flags;
1158
1159 pr_debug("Set context mapping for %02x:%02x.%d\n",
1160 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1161 BUG_ON(!domain->pgd);
1162 context = device_to_context_entry(iommu, bus, devfn);
1163 if (!context)
1164 return -ENOMEM;
1165 spin_lock_irqsave(&iommu->lock, flags);
1166 if (context_present(*context)) {
1167 spin_unlock_irqrestore(&iommu->lock, flags);
1168 return 0;
1169 }
1170
1171 context_set_domain_id(*context, domain->id);
1172 context_set_address_width(*context, domain->agaw);
1173 context_set_address_root(*context, virt_to_phys(domain->pgd));
1174 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1175 context_set_fault_enable(*context);
1176 context_set_present(*context);
1177 __iommu_flush_cache(iommu, context, sizeof(*context));
1178
1179 /* it's a non-present to present mapping */
1180 if (iommu->flush.flush_context(iommu, domain->id,
1181 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1182 DMA_CCMD_DEVICE_INVL, 1))
1183 iommu_flush_write_buffer(iommu);
1184 else
1185 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1186
1187 spin_unlock_irqrestore(&iommu->lock, flags);
1188 return 0;
1189 }
1190
1191 static int
1192 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1193 {
1194 int ret;
1195 struct pci_dev *tmp, *parent;
1196
1197 ret = domain_context_mapping_one(domain, pdev->bus->number,
1198 pdev->devfn);
1199 if (ret)
1200 return ret;
1201
1202 /* dependent device mapping */
1203 tmp = pci_find_upstream_pcie_bridge(pdev);
1204 if (!tmp)
1205 return 0;
1206 /* Secondary interface's bus number and devfn 0 */
1207 parent = pdev->bus->self;
1208 while (parent != tmp) {
1209 ret = domain_context_mapping_one(domain, parent->bus->number,
1210 parent->devfn);
1211 if (ret)
1212 return ret;
1213 parent = parent->bus->self;
1214 }
1215 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1216 return domain_context_mapping_one(domain,
1217 tmp->subordinate->number, 0);
1218 else /* this is a legacy PCI bridge */
1219 return domain_context_mapping_one(domain,
1220 tmp->bus->number, tmp->devfn);
1221 }
1222
1223 static int domain_context_mapped(struct dmar_domain *domain,
1224 struct pci_dev *pdev)
1225 {
1226 int ret;
1227 struct pci_dev *tmp, *parent;
1228
1229 ret = device_context_mapped(domain->iommu,
1230 pdev->bus->number, pdev->devfn);
1231 if (!ret)
1232 return ret;
1233 /* dependent device mapping */
1234 tmp = pci_find_upstream_pcie_bridge(pdev);
1235 if (!tmp)
1236 return ret;
1237 /* Secondary interface's bus number and devfn 0 */
1238 parent = pdev->bus->self;
1239 while (parent != tmp) {
1240 ret = device_context_mapped(domain->iommu, parent->bus->number,
1241 parent->devfn);
1242 if (!ret)
1243 return ret;
1244 parent = parent->bus->self;
1245 }
1246 if (tmp->is_pcie)
1247 return device_context_mapped(domain->iommu,
1248 tmp->subordinate->number, 0);
1249 else
1250 return device_context_mapped(domain->iommu,
1251 tmp->bus->number, tmp->devfn);
1252 }
1253
1254 static int
1255 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1256 u64 hpa, size_t size, int prot)
1257 {
1258 u64 start_pfn, end_pfn;
1259 struct dma_pte *pte;
1260 int index;
1261 int addr_width = agaw_to_width(domain->agaw);
1262
1263 hpa &= (((u64)1) << addr_width) - 1;
1264
1265 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1266 return -EINVAL;
1267 iova &= PAGE_MASK;
1268 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1269 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1270 index = 0;
1271 while (start_pfn < end_pfn) {
1272 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1273 if (!pte)
1274 return -ENOMEM;
1275 /* We don't need lock here, nobody else
1276 * touches the iova range
1277 */
1278 BUG_ON(dma_pte_addr(*pte));
1279 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
1280 dma_set_pte_prot(*pte, prot);
1281 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1282 start_pfn++;
1283 index++;
1284 }
1285 return 0;
1286 }
1287
1288 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1289 {
1290 clear_context_table(domain->iommu, bus, devfn);
1291 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1292 DMA_CCMD_GLOBAL_INVL, 0);
1293 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1294 DMA_TLB_GLOBAL_FLUSH, 0);
1295 }
1296
1297 static void domain_remove_dev_info(struct dmar_domain *domain)
1298 {
1299 struct device_domain_info *info;
1300 unsigned long flags;
1301
1302 spin_lock_irqsave(&device_domain_lock, flags);
1303 while (!list_empty(&domain->devices)) {
1304 info = list_entry(domain->devices.next,
1305 struct device_domain_info, link);
1306 list_del(&info->link);
1307 list_del(&info->global);
1308 if (info->dev)
1309 info->dev->dev.archdata.iommu = NULL;
1310 spin_unlock_irqrestore(&device_domain_lock, flags);
1311
1312 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1313 free_devinfo_mem(info);
1314
1315 spin_lock_irqsave(&device_domain_lock, flags);
1316 }
1317 spin_unlock_irqrestore(&device_domain_lock, flags);
1318 }
1319
1320 /*
1321 * find_domain
1322 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1323 */
1324 static struct dmar_domain *
1325 find_domain(struct pci_dev *pdev)
1326 {
1327 struct device_domain_info *info;
1328
1329 /* No lock here, assumes no domain exit in normal case */
1330 info = pdev->dev.archdata.iommu;
1331 if (info)
1332 return info->domain;
1333 return NULL;
1334 }
1335
1336 /* domain is initialized */
1337 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1338 {
1339 struct dmar_domain *domain, *found = NULL;
1340 struct intel_iommu *iommu;
1341 struct dmar_drhd_unit *drhd;
1342 struct device_domain_info *info, *tmp;
1343 struct pci_dev *dev_tmp;
1344 unsigned long flags;
1345 int bus = 0, devfn = 0;
1346
1347 domain = find_domain(pdev);
1348 if (domain)
1349 return domain;
1350
1351 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1352 if (dev_tmp) {
1353 if (dev_tmp->is_pcie) {
1354 bus = dev_tmp->subordinate->number;
1355 devfn = 0;
1356 } else {
1357 bus = dev_tmp->bus->number;
1358 devfn = dev_tmp->devfn;
1359 }
1360 spin_lock_irqsave(&device_domain_lock, flags);
1361 list_for_each_entry(info, &device_domain_list, global) {
1362 if (info->bus == bus && info->devfn == devfn) {
1363 found = info->domain;
1364 break;
1365 }
1366 }
1367 spin_unlock_irqrestore(&device_domain_lock, flags);
1368 /* pcie-pci bridge already has a domain, uses it */
1369 if (found) {
1370 domain = found;
1371 goto found_domain;
1372 }
1373 }
1374
1375 /* Allocate new domain for the device */
1376 drhd = dmar_find_matched_drhd_unit(pdev);
1377 if (!drhd) {
1378 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1379 pci_name(pdev));
1380 return NULL;
1381 }
1382 iommu = drhd->iommu;
1383
1384 domain = iommu_alloc_domain(iommu);
1385 if (!domain)
1386 goto error;
1387
1388 if (domain_init(domain, gaw)) {
1389 domain_exit(domain);
1390 goto error;
1391 }
1392
1393 /* register pcie-to-pci device */
1394 if (dev_tmp) {
1395 info = alloc_devinfo_mem();
1396 if (!info) {
1397 domain_exit(domain);
1398 goto error;
1399 }
1400 info->bus = bus;
1401 info->devfn = devfn;
1402 info->dev = NULL;
1403 info->domain = domain;
1404 /* This domain is shared by devices under p2p bridge */
1405 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1406
1407 /* pcie-to-pci bridge already has a domain, uses it */
1408 found = NULL;
1409 spin_lock_irqsave(&device_domain_lock, flags);
1410 list_for_each_entry(tmp, &device_domain_list, global) {
1411 if (tmp->bus == bus && tmp->devfn == devfn) {
1412 found = tmp->domain;
1413 break;
1414 }
1415 }
1416 if (found) {
1417 free_devinfo_mem(info);
1418 domain_exit(domain);
1419 domain = found;
1420 } else {
1421 list_add(&info->link, &domain->devices);
1422 list_add(&info->global, &device_domain_list);
1423 }
1424 spin_unlock_irqrestore(&device_domain_lock, flags);
1425 }
1426
1427 found_domain:
1428 info = alloc_devinfo_mem();
1429 if (!info)
1430 goto error;
1431 info->bus = pdev->bus->number;
1432 info->devfn = pdev->devfn;
1433 info->dev = pdev;
1434 info->domain = domain;
1435 spin_lock_irqsave(&device_domain_lock, flags);
1436 /* somebody is fast */
1437 found = find_domain(pdev);
1438 if (found != NULL) {
1439 spin_unlock_irqrestore(&device_domain_lock, flags);
1440 if (found != domain) {
1441 domain_exit(domain);
1442 domain = found;
1443 }
1444 free_devinfo_mem(info);
1445 return domain;
1446 }
1447 list_add(&info->link, &domain->devices);
1448 list_add(&info->global, &device_domain_list);
1449 pdev->dev.archdata.iommu = info;
1450 spin_unlock_irqrestore(&device_domain_lock, flags);
1451 return domain;
1452 error:
1453 /* recheck it here, maybe others set it */
1454 return find_domain(pdev);
1455 }
1456
1457 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1458 unsigned long long start,
1459 unsigned long long end)
1460 {
1461 struct dmar_domain *domain;
1462 unsigned long size;
1463 unsigned long long base;
1464 int ret;
1465
1466 printk(KERN_INFO
1467 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1468 pci_name(pdev), start, end);
1469 /* page table init */
1470 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1471 if (!domain)
1472 return -ENOMEM;
1473
1474 /* The address might not be aligned */
1475 base = start & PAGE_MASK;
1476 size = end - base;
1477 size = PAGE_ALIGN(size);
1478 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1479 IOVA_PFN(base + size) - 1)) {
1480 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1481 ret = -ENOMEM;
1482 goto error;
1483 }
1484
1485 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1486 size, base, pci_name(pdev));
1487 /*
1488 * RMRR range might have overlap with physical memory range,
1489 * clear it first
1490 */
1491 dma_pte_clear_range(domain, base, base + size);
1492
1493 ret = domain_page_mapping(domain, base, base, size,
1494 DMA_PTE_READ|DMA_PTE_WRITE);
1495 if (ret)
1496 goto error;
1497
1498 /* context entry init */
1499 ret = domain_context_mapping(domain, pdev);
1500 if (!ret)
1501 return 0;
1502 error:
1503 domain_exit(domain);
1504 return ret;
1505
1506 }
1507
1508 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1509 struct pci_dev *pdev)
1510 {
1511 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1512 return 0;
1513 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1514 rmrr->end_address + 1);
1515 }
1516
1517 #ifdef CONFIG_DMAR_GFX_WA
1518 struct iommu_prepare_data {
1519 struct pci_dev *pdev;
1520 int ret;
1521 };
1522
1523 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1524 unsigned long end_pfn, void *datax)
1525 {
1526 struct iommu_prepare_data *data;
1527
1528 data = (struct iommu_prepare_data *)datax;
1529
1530 data->ret = iommu_prepare_identity_map(data->pdev,
1531 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1532 return data->ret;
1533
1534 }
1535
1536 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1537 {
1538 int nid;
1539 struct iommu_prepare_data data;
1540
1541 data.pdev = pdev;
1542 data.ret = 0;
1543
1544 for_each_online_node(nid) {
1545 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1546 if (data.ret)
1547 return data.ret;
1548 }
1549 return data.ret;
1550 }
1551
1552 static void __init iommu_prepare_gfx_mapping(void)
1553 {
1554 struct pci_dev *pdev = NULL;
1555 int ret;
1556
1557 for_each_pci_dev(pdev) {
1558 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1559 !IS_GFX_DEVICE(pdev))
1560 continue;
1561 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1562 pci_name(pdev));
1563 ret = iommu_prepare_with_active_regions(pdev);
1564 if (ret)
1565 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1566 }
1567 }
1568 #endif
1569
1570 #ifdef CONFIG_DMAR_FLOPPY_WA
1571 static inline void iommu_prepare_isa(void)
1572 {
1573 struct pci_dev *pdev;
1574 int ret;
1575
1576 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1577 if (!pdev)
1578 return;
1579
1580 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1581 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1582
1583 if (ret)
1584 printk("IOMMU: Failed to create 0-64M identity map, "
1585 "floppy might not work\n");
1586
1587 }
1588 #else
1589 static inline void iommu_prepare_isa(void)
1590 {
1591 return;
1592 }
1593 #endif /* !CONFIG_DMAR_FLPY_WA */
1594
1595 static int __init init_dmars(void)
1596 {
1597 struct dmar_drhd_unit *drhd;
1598 struct dmar_rmrr_unit *rmrr;
1599 struct pci_dev *pdev;
1600 struct intel_iommu *iommu;
1601 int i, ret, unit = 0;
1602
1603 /*
1604 * for each drhd
1605 * allocate root
1606 * initialize and program root entry to not present
1607 * endfor
1608 */
1609 for_each_drhd_unit(drhd) {
1610 g_num_of_iommus++;
1611 /*
1612 * lock not needed as this is only incremented in the single
1613 * threaded kernel __init code path all other access are read
1614 * only
1615 */
1616 }
1617
1618 deferred_flush = kzalloc(g_num_of_iommus *
1619 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1620 if (!deferred_flush) {
1621 ret = -ENOMEM;
1622 goto error;
1623 }
1624
1625 for_each_drhd_unit(drhd) {
1626 if (drhd->ignored)
1627 continue;
1628
1629 iommu = drhd->iommu;
1630
1631 ret = iommu_init_domains(iommu);
1632 if (ret)
1633 goto error;
1634
1635 /*
1636 * TBD:
1637 * we could share the same root & context tables
1638 * amoung all IOMMU's. Need to Split it later.
1639 */
1640 ret = iommu_alloc_root_entry(iommu);
1641 if (ret) {
1642 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1643 goto error;
1644 }
1645 }
1646
1647 for_each_drhd_unit(drhd) {
1648 if (drhd->ignored)
1649 continue;
1650
1651 iommu = drhd->iommu;
1652 if (dmar_enable_qi(iommu)) {
1653 /*
1654 * Queued Invalidate not enabled, use Register Based
1655 * Invalidate
1656 */
1657 iommu->flush.flush_context = __iommu_flush_context;
1658 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1659 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1660 "invalidation\n",
1661 (unsigned long long)drhd->reg_base_addr);
1662 } else {
1663 iommu->flush.flush_context = qi_flush_context;
1664 iommu->flush.flush_iotlb = qi_flush_iotlb;
1665 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1666 "invalidation\n",
1667 (unsigned long long)drhd->reg_base_addr);
1668 }
1669 }
1670
1671 /*
1672 * For each rmrr
1673 * for each dev attached to rmrr
1674 * do
1675 * locate drhd for dev, alloc domain for dev
1676 * allocate free domain
1677 * allocate page table entries for rmrr
1678 * if context not allocated for bus
1679 * allocate and init context
1680 * set present in root table for this bus
1681 * init context with domain, translation etc
1682 * endfor
1683 * endfor
1684 */
1685 for_each_rmrr_units(rmrr) {
1686 for (i = 0; i < rmrr->devices_cnt; i++) {
1687 pdev = rmrr->devices[i];
1688 /* some BIOS lists non-exist devices in DMAR table */
1689 if (!pdev)
1690 continue;
1691 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1692 if (ret)
1693 printk(KERN_ERR
1694 "IOMMU: mapping reserved region failed\n");
1695 }
1696 }
1697
1698 iommu_prepare_gfx_mapping();
1699
1700 iommu_prepare_isa();
1701
1702 /*
1703 * for each drhd
1704 * enable fault log
1705 * global invalidate context cache
1706 * global invalidate iotlb
1707 * enable translation
1708 */
1709 for_each_drhd_unit(drhd) {
1710 if (drhd->ignored)
1711 continue;
1712 iommu = drhd->iommu;
1713 sprintf (iommu->name, "dmar%d", unit++);
1714
1715 iommu_flush_write_buffer(iommu);
1716
1717 ret = dmar_set_interrupt(iommu);
1718 if (ret)
1719 goto error;
1720
1721 iommu_set_root_entry(iommu);
1722
1723 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1724 0);
1725 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1726 0);
1727 iommu_disable_protect_mem_regions(iommu);
1728
1729 ret = iommu_enable_translation(iommu);
1730 if (ret)
1731 goto error;
1732 }
1733
1734 return 0;
1735 error:
1736 for_each_drhd_unit(drhd) {
1737 if (drhd->ignored)
1738 continue;
1739 iommu = drhd->iommu;
1740 free_iommu(iommu);
1741 }
1742 return ret;
1743 }
1744
1745 static inline u64 aligned_size(u64 host_addr, size_t size)
1746 {
1747 u64 addr;
1748 addr = (host_addr & (~PAGE_MASK)) + size;
1749 return PAGE_ALIGN(addr);
1750 }
1751
1752 struct iova *
1753 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1754 {
1755 struct iova *piova;
1756
1757 /* Make sure it's in range */
1758 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1759 if (!size || (IOVA_START_ADDR + size > end))
1760 return NULL;
1761
1762 piova = alloc_iova(&domain->iovad,
1763 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
1764 return piova;
1765 }
1766
1767 static struct iova *
1768 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1769 size_t size, u64 dma_mask)
1770 {
1771 struct pci_dev *pdev = to_pci_dev(dev);
1772 struct iova *iova = NULL;
1773
1774 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1775 iova = iommu_alloc_iova(domain, size, dma_mask);
1776 else {
1777 /*
1778 * First try to allocate an io virtual address in
1779 * DMA_32BIT_MASK and if that fails then try allocating
1780 * from higher range
1781 */
1782 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1783 if (!iova)
1784 iova = iommu_alloc_iova(domain, size, dma_mask);
1785 }
1786
1787 if (!iova) {
1788 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1789 return NULL;
1790 }
1791
1792 return iova;
1793 }
1794
1795 static struct dmar_domain *
1796 get_valid_domain_for_dev(struct pci_dev *pdev)
1797 {
1798 struct dmar_domain *domain;
1799 int ret;
1800
1801 domain = get_domain_for_dev(pdev,
1802 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1803 if (!domain) {
1804 printk(KERN_ERR
1805 "Allocating domain for %s failed", pci_name(pdev));
1806 return NULL;
1807 }
1808
1809 /* make sure context mapping is ok */
1810 if (unlikely(!domain_context_mapped(domain, pdev))) {
1811 ret = domain_context_mapping(domain, pdev);
1812 if (ret) {
1813 printk(KERN_ERR
1814 "Domain context map for %s failed",
1815 pci_name(pdev));
1816 return NULL;
1817 }
1818 }
1819
1820 return domain;
1821 }
1822
1823 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1824 size_t size, int dir, u64 dma_mask)
1825 {
1826 struct pci_dev *pdev = to_pci_dev(hwdev);
1827 struct dmar_domain *domain;
1828 phys_addr_t start_paddr;
1829 struct iova *iova;
1830 int prot = 0;
1831 int ret;
1832
1833 BUG_ON(dir == DMA_NONE);
1834 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1835 return paddr;
1836
1837 domain = get_valid_domain_for_dev(pdev);
1838 if (!domain)
1839 return 0;
1840
1841 size = aligned_size((u64)paddr, size);
1842
1843 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
1844 if (!iova)
1845 goto error;
1846
1847 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
1848
1849 /*
1850 * Check if DMAR supports zero-length reads on write only
1851 * mappings..
1852 */
1853 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1854 !cap_zlr(domain->iommu->cap))
1855 prot |= DMA_PTE_READ;
1856 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1857 prot |= DMA_PTE_WRITE;
1858 /*
1859 * paddr - (paddr + size) might be partial page, we should map the whole
1860 * page. Note: if two part of one page are separately mapped, we
1861 * might have two guest_addr mapping to the same host paddr, but this
1862 * is not a big problem
1863 */
1864 ret = domain_page_mapping(domain, start_paddr,
1865 ((u64)paddr) & PAGE_MASK, size, prot);
1866 if (ret)
1867 goto error;
1868
1869 /* it's a non-present to present mapping */
1870 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1871 start_paddr, size >> VTD_PAGE_SHIFT, 1);
1872 if (ret)
1873 iommu_flush_write_buffer(domain->iommu);
1874
1875 return start_paddr + ((u64)paddr & (~PAGE_MASK));
1876
1877 error:
1878 if (iova)
1879 __free_iova(&domain->iovad, iova);
1880 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1881 pci_name(pdev), size, (unsigned long long)paddr, dir);
1882 return 0;
1883 }
1884
1885 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1886 size_t size, int dir)
1887 {
1888 return __intel_map_single(hwdev, paddr, size, dir,
1889 to_pci_dev(hwdev)->dma_mask);
1890 }
1891
1892 static void flush_unmaps(void)
1893 {
1894 int i, j;
1895
1896 timer_on = 0;
1897
1898 /* just flush them all */
1899 for (i = 0; i < g_num_of_iommus; i++) {
1900 if (deferred_flush[i].next) {
1901 struct intel_iommu *iommu =
1902 deferred_flush[i].domain[0]->iommu;
1903
1904 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1905 DMA_TLB_GLOBAL_FLUSH, 0);
1906 for (j = 0; j < deferred_flush[i].next; j++) {
1907 __free_iova(&deferred_flush[i].domain[j]->iovad,
1908 deferred_flush[i].iova[j]);
1909 }
1910 deferred_flush[i].next = 0;
1911 }
1912 }
1913
1914 list_size = 0;
1915 }
1916
1917 static void flush_unmaps_timeout(unsigned long data)
1918 {
1919 unsigned long flags;
1920
1921 spin_lock_irqsave(&async_umap_flush_lock, flags);
1922 flush_unmaps();
1923 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1924 }
1925
1926 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1927 {
1928 unsigned long flags;
1929 int next, iommu_id;
1930
1931 spin_lock_irqsave(&async_umap_flush_lock, flags);
1932 if (list_size == HIGH_WATER_MARK)
1933 flush_unmaps();
1934
1935 iommu_id = dom->iommu->seq_id;
1936
1937 next = deferred_flush[iommu_id].next;
1938 deferred_flush[iommu_id].domain[next] = dom;
1939 deferred_flush[iommu_id].iova[next] = iova;
1940 deferred_flush[iommu_id].next++;
1941
1942 if (!timer_on) {
1943 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1944 timer_on = 1;
1945 }
1946 list_size++;
1947 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1948 }
1949
1950 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
1951 int dir)
1952 {
1953 struct pci_dev *pdev = to_pci_dev(dev);
1954 struct dmar_domain *domain;
1955 unsigned long start_addr;
1956 struct iova *iova;
1957
1958 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1959 return;
1960 domain = find_domain(pdev);
1961 BUG_ON(!domain);
1962
1963 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1964 if (!iova)
1965 return;
1966
1967 start_addr = iova->pfn_lo << PAGE_SHIFT;
1968 size = aligned_size((u64)dev_addr, size);
1969
1970 pr_debug("Device %s unmapping: %lx@%llx\n",
1971 pci_name(pdev), size, (unsigned long long)start_addr);
1972
1973 /* clear the whole page */
1974 dma_pte_clear_range(domain, start_addr, start_addr + size);
1975 /* free page tables */
1976 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1977 if (intel_iommu_strict) {
1978 if (iommu_flush_iotlb_psi(domain->iommu,
1979 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
1980 iommu_flush_write_buffer(domain->iommu);
1981 /* free iova */
1982 __free_iova(&domain->iovad, iova);
1983 } else {
1984 add_unmap(domain, iova);
1985 /*
1986 * queue up the release of the unmap to save the 1/6th of the
1987 * cpu used up by the iotlb flush operation...
1988 */
1989 }
1990 }
1991
1992 void *intel_alloc_coherent(struct device *hwdev, size_t size,
1993 dma_addr_t *dma_handle, gfp_t flags)
1994 {
1995 void *vaddr;
1996 int order;
1997
1998 size = PAGE_ALIGN(size);
1999 order = get_order(size);
2000 flags &= ~(GFP_DMA | GFP_DMA32);
2001
2002 vaddr = (void *)__get_free_pages(flags, order);
2003 if (!vaddr)
2004 return NULL;
2005 memset(vaddr, 0, size);
2006
2007 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2008 DMA_BIDIRECTIONAL,
2009 hwdev->coherent_dma_mask);
2010 if (*dma_handle)
2011 return vaddr;
2012 free_pages((unsigned long)vaddr, order);
2013 return NULL;
2014 }
2015
2016 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2017 dma_addr_t dma_handle)
2018 {
2019 int order;
2020
2021 size = PAGE_ALIGN(size);
2022 order = get_order(size);
2023
2024 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2025 free_pages((unsigned long)vaddr, order);
2026 }
2027
2028 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2029
2030 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2031 int nelems, int dir)
2032 {
2033 int i;
2034 struct pci_dev *pdev = to_pci_dev(hwdev);
2035 struct dmar_domain *domain;
2036 unsigned long start_addr;
2037 struct iova *iova;
2038 size_t size = 0;
2039 void *addr;
2040 struct scatterlist *sg;
2041
2042 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2043 return;
2044
2045 domain = find_domain(pdev);
2046
2047 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2048 if (!iova)
2049 return;
2050 for_each_sg(sglist, sg, nelems, i) {
2051 addr = SG_ENT_VIRT_ADDRESS(sg);
2052 size += aligned_size((u64)addr, sg->length);
2053 }
2054
2055 start_addr = iova->pfn_lo << PAGE_SHIFT;
2056
2057 /* clear the whole page */
2058 dma_pte_clear_range(domain, start_addr, start_addr + size);
2059 /* free page tables */
2060 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2061
2062 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2063 size >> VTD_PAGE_SHIFT, 0))
2064 iommu_flush_write_buffer(domain->iommu);
2065
2066 /* free iova */
2067 __free_iova(&domain->iovad, iova);
2068 }
2069
2070 static int intel_nontranslate_map_sg(struct device *hddev,
2071 struct scatterlist *sglist, int nelems, int dir)
2072 {
2073 int i;
2074 struct scatterlist *sg;
2075
2076 for_each_sg(sglist, sg, nelems, i) {
2077 BUG_ON(!sg_page(sg));
2078 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2079 sg->dma_length = sg->length;
2080 }
2081 return nelems;
2082 }
2083
2084 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2085 int dir)
2086 {
2087 void *addr;
2088 int i;
2089 struct pci_dev *pdev = to_pci_dev(hwdev);
2090 struct dmar_domain *domain;
2091 size_t size = 0;
2092 int prot = 0;
2093 size_t offset = 0;
2094 struct iova *iova = NULL;
2095 int ret;
2096 struct scatterlist *sg;
2097 unsigned long start_addr;
2098
2099 BUG_ON(dir == DMA_NONE);
2100 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2101 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2102
2103 domain = get_valid_domain_for_dev(pdev);
2104 if (!domain)
2105 return 0;
2106
2107 for_each_sg(sglist, sg, nelems, i) {
2108 addr = SG_ENT_VIRT_ADDRESS(sg);
2109 addr = (void *)virt_to_phys(addr);
2110 size += aligned_size((u64)addr, sg->length);
2111 }
2112
2113 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2114 if (!iova) {
2115 sglist->dma_length = 0;
2116 return 0;
2117 }
2118
2119 /*
2120 * Check if DMAR supports zero-length reads on write only
2121 * mappings..
2122 */
2123 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2124 !cap_zlr(domain->iommu->cap))
2125 prot |= DMA_PTE_READ;
2126 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2127 prot |= DMA_PTE_WRITE;
2128
2129 start_addr = iova->pfn_lo << PAGE_SHIFT;
2130 offset = 0;
2131 for_each_sg(sglist, sg, nelems, i) {
2132 addr = SG_ENT_VIRT_ADDRESS(sg);
2133 addr = (void *)virt_to_phys(addr);
2134 size = aligned_size((u64)addr, sg->length);
2135 ret = domain_page_mapping(domain, start_addr + offset,
2136 ((u64)addr) & PAGE_MASK,
2137 size, prot);
2138 if (ret) {
2139 /* clear the page */
2140 dma_pte_clear_range(domain, start_addr,
2141 start_addr + offset);
2142 /* free page tables */
2143 dma_pte_free_pagetable(domain, start_addr,
2144 start_addr + offset);
2145 /* free iova */
2146 __free_iova(&domain->iovad, iova);
2147 return 0;
2148 }
2149 sg->dma_address = start_addr + offset +
2150 ((u64)addr & (~PAGE_MASK));
2151 sg->dma_length = sg->length;
2152 offset += size;
2153 }
2154
2155 /* it's a non-present to present mapping */
2156 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2157 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2158 iommu_flush_write_buffer(domain->iommu);
2159 return nelems;
2160 }
2161
2162 static struct dma_mapping_ops intel_dma_ops = {
2163 .alloc_coherent = intel_alloc_coherent,
2164 .free_coherent = intel_free_coherent,
2165 .map_single = intel_map_single,
2166 .unmap_single = intel_unmap_single,
2167 .map_sg = intel_map_sg,
2168 .unmap_sg = intel_unmap_sg,
2169 };
2170
2171 static inline int iommu_domain_cache_init(void)
2172 {
2173 int ret = 0;
2174
2175 iommu_domain_cache = kmem_cache_create("iommu_domain",
2176 sizeof(struct dmar_domain),
2177 0,
2178 SLAB_HWCACHE_ALIGN,
2179
2180 NULL);
2181 if (!iommu_domain_cache) {
2182 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2183 ret = -ENOMEM;
2184 }
2185
2186 return ret;
2187 }
2188
2189 static inline int iommu_devinfo_cache_init(void)
2190 {
2191 int ret = 0;
2192
2193 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2194 sizeof(struct device_domain_info),
2195 0,
2196 SLAB_HWCACHE_ALIGN,
2197 NULL);
2198 if (!iommu_devinfo_cache) {
2199 printk(KERN_ERR "Couldn't create devinfo cache\n");
2200 ret = -ENOMEM;
2201 }
2202
2203 return ret;
2204 }
2205
2206 static inline int iommu_iova_cache_init(void)
2207 {
2208 int ret = 0;
2209
2210 iommu_iova_cache = kmem_cache_create("iommu_iova",
2211 sizeof(struct iova),
2212 0,
2213 SLAB_HWCACHE_ALIGN,
2214 NULL);
2215 if (!iommu_iova_cache) {
2216 printk(KERN_ERR "Couldn't create iova cache\n");
2217 ret = -ENOMEM;
2218 }
2219
2220 return ret;
2221 }
2222
2223 static int __init iommu_init_mempool(void)
2224 {
2225 int ret;
2226 ret = iommu_iova_cache_init();
2227 if (ret)
2228 return ret;
2229
2230 ret = iommu_domain_cache_init();
2231 if (ret)
2232 goto domain_error;
2233
2234 ret = iommu_devinfo_cache_init();
2235 if (!ret)
2236 return ret;
2237
2238 kmem_cache_destroy(iommu_domain_cache);
2239 domain_error:
2240 kmem_cache_destroy(iommu_iova_cache);
2241
2242 return -ENOMEM;
2243 }
2244
2245 static void __init iommu_exit_mempool(void)
2246 {
2247 kmem_cache_destroy(iommu_devinfo_cache);
2248 kmem_cache_destroy(iommu_domain_cache);
2249 kmem_cache_destroy(iommu_iova_cache);
2250
2251 }
2252
2253 static void __init init_no_remapping_devices(void)
2254 {
2255 struct dmar_drhd_unit *drhd;
2256
2257 for_each_drhd_unit(drhd) {
2258 if (!drhd->include_all) {
2259 int i;
2260 for (i = 0; i < drhd->devices_cnt; i++)
2261 if (drhd->devices[i] != NULL)
2262 break;
2263 /* ignore DMAR unit if no pci devices exist */
2264 if (i == drhd->devices_cnt)
2265 drhd->ignored = 1;
2266 }
2267 }
2268
2269 if (dmar_map_gfx)
2270 return;
2271
2272 for_each_drhd_unit(drhd) {
2273 int i;
2274 if (drhd->ignored || drhd->include_all)
2275 continue;
2276
2277 for (i = 0; i < drhd->devices_cnt; i++)
2278 if (drhd->devices[i] &&
2279 !IS_GFX_DEVICE(drhd->devices[i]))
2280 break;
2281
2282 if (i < drhd->devices_cnt)
2283 continue;
2284
2285 /* bypass IOMMU if it is just for gfx devices */
2286 drhd->ignored = 1;
2287 for (i = 0; i < drhd->devices_cnt; i++) {
2288 if (!drhd->devices[i])
2289 continue;
2290 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2291 }
2292 }
2293 }
2294
2295 int __init intel_iommu_init(void)
2296 {
2297 int ret = 0;
2298
2299 if (dmar_table_init())
2300 return -ENODEV;
2301
2302 if (dmar_dev_scope_init())
2303 return -ENODEV;
2304
2305 /*
2306 * Check the need for DMA-remapping initialization now.
2307 * Above initialization will also be used by Interrupt-remapping.
2308 */
2309 if (no_iommu || swiotlb || dmar_disabled)
2310 return -ENODEV;
2311
2312 iommu_init_mempool();
2313 dmar_init_reserved_ranges();
2314
2315 init_no_remapping_devices();
2316
2317 ret = init_dmars();
2318 if (ret) {
2319 printk(KERN_ERR "IOMMU: dmar init failed\n");
2320 put_iova_domain(&reserved_iova_list);
2321 iommu_exit_mempool();
2322 return ret;
2323 }
2324 printk(KERN_INFO
2325 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2326
2327 init_timer(&unmap_timer);
2328 force_iommu = 1;
2329 dma_ops = &intel_dma_ops;
2330 return 0;
2331 }
2332
2333 void intel_iommu_domain_exit(struct dmar_domain *domain)
2334 {
2335 u64 end;
2336
2337 /* Domain 0 is reserved, so dont process it */
2338 if (!domain)
2339 return;
2340
2341 end = DOMAIN_MAX_ADDR(domain->gaw);
2342 end = end & (~VTD_PAGE_MASK);
2343
2344 /* clear ptes */
2345 dma_pte_clear_range(domain, 0, end);
2346
2347 /* free page tables */
2348 dma_pte_free_pagetable(domain, 0, end);
2349
2350 iommu_free_domain(domain);
2351 free_domain_mem(domain);
2352 }
2353 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2354
2355 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2356 {
2357 struct dmar_drhd_unit *drhd;
2358 struct dmar_domain *domain;
2359 struct intel_iommu *iommu;
2360
2361 drhd = dmar_find_matched_drhd_unit(pdev);
2362 if (!drhd) {
2363 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2364 return NULL;
2365 }
2366
2367 iommu = drhd->iommu;
2368 if (!iommu) {
2369 printk(KERN_ERR
2370 "intel_iommu_domain_alloc: iommu == NULL\n");
2371 return NULL;
2372 }
2373 domain = iommu_alloc_domain(iommu);
2374 if (!domain) {
2375 printk(KERN_ERR
2376 "intel_iommu_domain_alloc: domain == NULL\n");
2377 return NULL;
2378 }
2379 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2380 printk(KERN_ERR
2381 "intel_iommu_domain_alloc: domain_init() failed\n");
2382 intel_iommu_domain_exit(domain);
2383 return NULL;
2384 }
2385 return domain;
2386 }
2387 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2388
2389 int intel_iommu_context_mapping(
2390 struct dmar_domain *domain, struct pci_dev *pdev)
2391 {
2392 int rc;
2393 rc = domain_context_mapping(domain, pdev);
2394 return rc;
2395 }
2396 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2397
2398 int intel_iommu_page_mapping(
2399 struct dmar_domain *domain, dma_addr_t iova,
2400 u64 hpa, size_t size, int prot)
2401 {
2402 int rc;
2403 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2404 return rc;
2405 }
2406 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2407
2408 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2409 {
2410 detach_domain_for_dev(domain, bus, devfn);
2411 }
2412 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2413
2414 struct dmar_domain *
2415 intel_iommu_find_domain(struct pci_dev *pdev)
2416 {
2417 return find_domain(pdev);
2418 }
2419 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2420
2421 int intel_iommu_found(void)
2422 {
2423 return g_num_of_iommus;
2424 }
2425 EXPORT_SYMBOL_GPL(intel_iommu_found);
2426
2427 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2428 {
2429 struct dma_pte *pte;
2430 u64 pfn;
2431
2432 pfn = 0;
2433 pte = addr_to_dma_pte(domain, iova);
2434
2435 if (pte)
2436 pfn = dma_pte_addr(*pte);
2437
2438 return pfn >> VTD_PAGE_SHIFT;
2439 }
2440 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);