]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/pci/intel-iommu.c
Add/remove domain device info for virtual machine domain
[mirror_ubuntu-bionic-kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
d9630fe9
WH
60/* global iommu list, set NULL for ignored DMAR units */
61static struct intel_iommu **g_iommus;
62
46b08e1a
MM
63/*
64 * 0: Present
65 * 1-11: Reserved
66 * 12-63: Context Ptr (12 - (haw-1))
67 * 64-127: Reserved
68 */
69struct root_entry {
70 u64 val;
71 u64 rsvd1;
72};
73#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74static inline bool root_present(struct root_entry *root)
75{
76 return (root->val & 1);
77}
78static inline void set_root_present(struct root_entry *root)
79{
80 root->val |= 1;
81}
82static inline void set_root_value(struct root_entry *root, unsigned long value)
83{
84 root->val |= value & VTD_PAGE_MASK;
85}
86
87static inline struct context_entry *
88get_context_addr_from_root(struct root_entry *root)
89{
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
93 NULL);
94}
95
7a8fc25e
MM
96/*
97 * low 64 bits:
98 * 0: present
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
102 * high 64 bits:
103 * 0-2: address width
104 * 3-6: aval
105 * 8-23: domain id
106 */
107struct context_entry {
108 u64 lo;
109 u64 hi;
110};
c07e7d21
MM
111
112static inline bool context_present(struct context_entry *context)
113{
114 return (context->lo & 1);
115}
116static inline void context_set_present(struct context_entry *context)
117{
118 context->lo |= 1;
119}
120
121static inline void context_set_fault_enable(struct context_entry *context)
122{
123 context->lo &= (((u64)-1) << 2) | 1;
124}
125
7a8fc25e 126#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
127
128static inline void context_set_translation_type(struct context_entry *context,
129 unsigned long value)
130{
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
133}
134
135static inline void context_set_address_root(struct context_entry *context,
136 unsigned long value)
137{
138 context->lo |= value & VTD_PAGE_MASK;
139}
140
141static inline void context_set_address_width(struct context_entry *context,
142 unsigned long value)
143{
144 context->hi |= value & 7;
145}
146
147static inline void context_set_domain_id(struct context_entry *context,
148 unsigned long value)
149{
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
151}
152
153static inline void context_clear_entry(struct context_entry *context)
154{
155 context->lo = 0;
156 context->hi = 0;
157}
7a8fc25e 158
622ba12a
MM
159/*
160 * 0: readable
161 * 1: writable
162 * 2-6: reserved
163 * 7: super page
164 * 8-11: available
165 * 12-63: Host physcial address
166 */
167struct dma_pte {
168 u64 val;
169};
622ba12a 170
19c239ce
MM
171static inline void dma_clear_pte(struct dma_pte *pte)
172{
173 pte->val = 0;
174}
175
176static inline void dma_set_pte_readable(struct dma_pte *pte)
177{
178 pte->val |= DMA_PTE_READ;
179}
180
181static inline void dma_set_pte_writable(struct dma_pte *pte)
182{
183 pte->val |= DMA_PTE_WRITE;
184}
185
186static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187{
188 pte->val = (pte->val & ~3) | (prot & 3);
189}
190
191static inline u64 dma_pte_addr(struct dma_pte *pte)
192{
193 return (pte->val & VTD_PAGE_MASK);
194}
195
196static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197{
198 pte->val |= (addr & VTD_PAGE_MASK);
199}
200
201static inline bool dma_pte_present(struct dma_pte *pte)
202{
203 return (pte->val & 3) != 0;
204}
622ba12a 205
3b5410e7
WH
206/* devices under the same p2p bridge are owned in one domain */
207#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
1ce28feb
WH
209/* domain represents a virtual machine, more than one devices
210 * across iommus may be owned in one domain, e.g. kvm guest.
211 */
212#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
213
99126f7c
MM
214struct dmar_domain {
215 int id; /* domain id */
8c11e798 216 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
217
218 struct list_head devices; /* all devices' list */
219 struct iova_domain iovad; /* iova's that belong to this domain */
220
221 struct dma_pte *pgd; /* virtual address */
222 spinlock_t mapping_lock; /* page table lock */
223 int gaw; /* max guest address width */
224
225 /* adjusted guest address width, 0 is level 2 30-bit */
226 int agaw;
227
3b5410e7 228 int flags; /* flags to find out type of domain */
8e604097
WH
229
230 int iommu_coherency;/* indicate coherency of iommu access */
c7151a8d
WH
231 int iommu_count; /* reference count of iommu */
232 spinlock_t iommu_lock; /* protect iommu set in domain */
99126f7c
MM
233};
234
a647dacb
MM
235/* PCI domain-device relationship */
236struct device_domain_info {
237 struct list_head link; /* link to domain siblings */
238 struct list_head global; /* link to global list */
239 u8 bus; /* PCI bus numer */
240 u8 devfn; /* PCI devfn number */
241 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
242 struct dmar_domain *domain; /* pointer to domain */
243};
244
5e0d2a6f 245static void flush_unmaps_timeout(unsigned long data);
246
247DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
248
80b20dd8 249#define HIGH_WATER_MARK 250
250struct deferred_flush_tables {
251 int next;
252 struct iova *iova[HIGH_WATER_MARK];
253 struct dmar_domain *domain[HIGH_WATER_MARK];
254};
255
256static struct deferred_flush_tables *deferred_flush;
257
5e0d2a6f 258/* bitmap for indexing intel_iommus */
5e0d2a6f 259static int g_num_of_iommus;
260
261static DEFINE_SPINLOCK(async_umap_flush_lock);
262static LIST_HEAD(unmaps_to_do);
263
264static int timer_on;
265static long list_size;
5e0d2a6f 266
ba395927
KA
267static void domain_remove_dev_info(struct dmar_domain *domain);
268
2ae21010 269int dmar_disabled;
ba395927 270static int __initdata dmar_map_gfx = 1;
7d3b03ce 271static int dmar_forcedac;
5e0d2a6f 272static int intel_iommu_strict;
ba395927
KA
273
274#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275static DEFINE_SPINLOCK(device_domain_lock);
276static LIST_HEAD(device_domain_list);
277
278static int __init intel_iommu_setup(char *str)
279{
280 if (!str)
281 return -EINVAL;
282 while (*str) {
283 if (!strncmp(str, "off", 3)) {
284 dmar_disabled = 1;
285 printk(KERN_INFO"Intel-IOMMU: disabled\n");
286 } else if (!strncmp(str, "igfx_off", 8)) {
287 dmar_map_gfx = 0;
288 printk(KERN_INFO
289 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 290 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 291 printk(KERN_INFO
7d3b03ce
KA
292 "Intel-IOMMU: Forcing DAC for PCI devices\n");
293 dmar_forcedac = 1;
5e0d2a6f 294 } else if (!strncmp(str, "strict", 6)) {
295 printk(KERN_INFO
296 "Intel-IOMMU: disable batched IOTLB flush\n");
297 intel_iommu_strict = 1;
ba395927
KA
298 }
299
300 str += strcspn(str, ",");
301 while (*str == ',')
302 str++;
303 }
304 return 0;
305}
306__setup("intel_iommu=", intel_iommu_setup);
307
308static struct kmem_cache *iommu_domain_cache;
309static struct kmem_cache *iommu_devinfo_cache;
310static struct kmem_cache *iommu_iova_cache;
311
eb3fa7cb
KA
312static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
313{
314 unsigned int flags;
315 void *vaddr;
316
317 /* trying to avoid low memory issues */
318 flags = current->flags & PF_MEMALLOC;
319 current->flags |= PF_MEMALLOC;
320 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
321 current->flags &= (~PF_MEMALLOC | flags);
322 return vaddr;
323}
324
325
ba395927
KA
326static inline void *alloc_pgtable_page(void)
327{
eb3fa7cb
KA
328 unsigned int flags;
329 void *vaddr;
330
331 /* trying to avoid low memory issues */
332 flags = current->flags & PF_MEMALLOC;
333 current->flags |= PF_MEMALLOC;
334 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
335 current->flags &= (~PF_MEMALLOC | flags);
336 return vaddr;
ba395927
KA
337}
338
339static inline void free_pgtable_page(void *vaddr)
340{
341 free_page((unsigned long)vaddr);
342}
343
344static inline void *alloc_domain_mem(void)
345{
eb3fa7cb 346 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
347}
348
38717946 349static void free_domain_mem(void *vaddr)
ba395927
KA
350{
351 kmem_cache_free(iommu_domain_cache, vaddr);
352}
353
354static inline void * alloc_devinfo_mem(void)
355{
eb3fa7cb 356 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
357}
358
359static inline void free_devinfo_mem(void *vaddr)
360{
361 kmem_cache_free(iommu_devinfo_cache, vaddr);
362}
363
364struct iova *alloc_iova_mem(void)
365{
eb3fa7cb 366 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
367}
368
369void free_iova_mem(struct iova *iova)
370{
371 kmem_cache_free(iommu_iova_cache, iova);
372}
373
1b573683
WH
374
375static inline int width_to_agaw(int width);
376
377/* calculate agaw for each iommu.
378 * "SAGAW" may be different across iommus, use a default agaw, and
379 * get a supported less agaw for iommus that don't support the default agaw.
380 */
381int iommu_calculate_agaw(struct intel_iommu *iommu)
382{
383 unsigned long sagaw;
384 int agaw = -1;
385
386 sagaw = cap_sagaw(iommu->cap);
387 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
388 agaw >= 0; agaw--) {
389 if (test_bit(agaw, &sagaw))
390 break;
391 }
392
393 return agaw;
394}
395
8c11e798
WH
396/* in native case, each domain is related to only one iommu */
397static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
398{
399 int iommu_id;
400
1ce28feb
WH
401 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
402
8c11e798
WH
403 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
404 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
405 return NULL;
406
407 return g_iommus[iommu_id];
408}
409
8e604097
WH
410/* "Coherency" capability may be different across iommus */
411static void domain_update_iommu_coherency(struct dmar_domain *domain)
412{
413 int i;
414
415 domain->iommu_coherency = 1;
416
417 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418 for (; i < g_num_of_iommus; ) {
419 if (!ecap_coherent(g_iommus[i]->ecap)) {
420 domain->iommu_coherency = 0;
421 break;
422 }
423 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
424 }
425}
426
c7151a8d
WH
427static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
428{
429 struct dmar_drhd_unit *drhd = NULL;
430 int i;
431
432 for_each_drhd_unit(drhd) {
433 if (drhd->ignored)
434 continue;
435
436 for (i = 0; i < drhd->devices_cnt; i++)
437 if (drhd->devices[i]->bus->number == bus &&
438 drhd->devices[i]->devfn == devfn)
439 return drhd->iommu;
440
441 if (drhd->include_all)
442 return drhd->iommu;
443 }
444
445 return NULL;
446}
447
ba395927
KA
448/* Gets context entry for a given bus and devfn */
449static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
450 u8 bus, u8 devfn)
451{
452 struct root_entry *root;
453 struct context_entry *context;
454 unsigned long phy_addr;
455 unsigned long flags;
456
457 spin_lock_irqsave(&iommu->lock, flags);
458 root = &iommu->root_entry[bus];
459 context = get_context_addr_from_root(root);
460 if (!context) {
461 context = (struct context_entry *)alloc_pgtable_page();
462 if (!context) {
463 spin_unlock_irqrestore(&iommu->lock, flags);
464 return NULL;
465 }
5b6985ce 466 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
467 phy_addr = virt_to_phys((void *)context);
468 set_root_value(root, phy_addr);
469 set_root_present(root);
470 __iommu_flush_cache(iommu, root, sizeof(*root));
471 }
472 spin_unlock_irqrestore(&iommu->lock, flags);
473 return &context[devfn];
474}
475
476static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
477{
478 struct root_entry *root;
479 struct context_entry *context;
480 int ret;
481 unsigned long flags;
482
483 spin_lock_irqsave(&iommu->lock, flags);
484 root = &iommu->root_entry[bus];
485 context = get_context_addr_from_root(root);
486 if (!context) {
487 ret = 0;
488 goto out;
489 }
c07e7d21 490 ret = context_present(&context[devfn]);
ba395927
KA
491out:
492 spin_unlock_irqrestore(&iommu->lock, flags);
493 return ret;
494}
495
496static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
497{
498 struct root_entry *root;
499 struct context_entry *context;
500 unsigned long flags;
501
502 spin_lock_irqsave(&iommu->lock, flags);
503 root = &iommu->root_entry[bus];
504 context = get_context_addr_from_root(root);
505 if (context) {
c07e7d21 506 context_clear_entry(&context[devfn]);
ba395927
KA
507 __iommu_flush_cache(iommu, &context[devfn], \
508 sizeof(*context));
509 }
510 spin_unlock_irqrestore(&iommu->lock, flags);
511}
512
513static void free_context_table(struct intel_iommu *iommu)
514{
515 struct root_entry *root;
516 int i;
517 unsigned long flags;
518 struct context_entry *context;
519
520 spin_lock_irqsave(&iommu->lock, flags);
521 if (!iommu->root_entry) {
522 goto out;
523 }
524 for (i = 0; i < ROOT_ENTRY_NR; i++) {
525 root = &iommu->root_entry[i];
526 context = get_context_addr_from_root(root);
527 if (context)
528 free_pgtable_page(context);
529 }
530 free_pgtable_page(iommu->root_entry);
531 iommu->root_entry = NULL;
532out:
533 spin_unlock_irqrestore(&iommu->lock, flags);
534}
535
536/* page table handling */
537#define LEVEL_STRIDE (9)
538#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
539
540static inline int agaw_to_level(int agaw)
541{
542 return agaw + 2;
543}
544
545static inline int agaw_to_width(int agaw)
546{
547 return 30 + agaw * LEVEL_STRIDE;
548
549}
550
551static inline int width_to_agaw(int width)
552{
553 return (width - 30) / LEVEL_STRIDE;
554}
555
556static inline unsigned int level_to_offset_bits(int level)
557{
558 return (12 + (level - 1) * LEVEL_STRIDE);
559}
560
561static inline int address_level_offset(u64 addr, int level)
562{
563 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
564}
565
566static inline u64 level_mask(int level)
567{
568 return ((u64)-1 << level_to_offset_bits(level));
569}
570
571static inline u64 level_size(int level)
572{
573 return ((u64)1 << level_to_offset_bits(level));
574}
575
576static inline u64 align_to_level(u64 addr, int level)
577{
578 return ((addr + level_size(level) - 1) & level_mask(level));
579}
580
581static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
582{
583 int addr_width = agaw_to_width(domain->agaw);
584 struct dma_pte *parent, *pte = NULL;
585 int level = agaw_to_level(domain->agaw);
586 int offset;
587 unsigned long flags;
8c11e798 588 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
589
590 BUG_ON(!domain->pgd);
591
592 addr &= (((u64)1) << addr_width) - 1;
593 parent = domain->pgd;
594
595 spin_lock_irqsave(&domain->mapping_lock, flags);
596 while (level > 0) {
597 void *tmp_page;
598
599 offset = address_level_offset(addr, level);
600 pte = &parent[offset];
601 if (level == 1)
602 break;
603
19c239ce 604 if (!dma_pte_present(pte)) {
ba395927
KA
605 tmp_page = alloc_pgtable_page();
606
607 if (!tmp_page) {
608 spin_unlock_irqrestore(&domain->mapping_lock,
609 flags);
610 return NULL;
611 }
8c11e798 612 __iommu_flush_cache(iommu, tmp_page,
5b6985ce 613 PAGE_SIZE);
19c239ce 614 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
615 /*
616 * high level table always sets r/w, last level page
617 * table control read/write
618 */
19c239ce
MM
619 dma_set_pte_readable(pte);
620 dma_set_pte_writable(pte);
8c11e798 621 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927 622 }
19c239ce 623 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
624 level--;
625 }
626
627 spin_unlock_irqrestore(&domain->mapping_lock, flags);
628 return pte;
629}
630
631/* return address's pte at specific level */
632static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
633 int level)
634{
635 struct dma_pte *parent, *pte = NULL;
636 int total = agaw_to_level(domain->agaw);
637 int offset;
638
639 parent = domain->pgd;
640 while (level <= total) {
641 offset = address_level_offset(addr, total);
642 pte = &parent[offset];
643 if (level == total)
644 return pte;
645
19c239ce 646 if (!dma_pte_present(pte))
ba395927 647 break;
19c239ce 648 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
649 total--;
650 }
651 return NULL;
652}
653
654/* clear one page's page table */
655static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
656{
657 struct dma_pte *pte = NULL;
8c11e798 658 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
659
660 /* get last level pte */
661 pte = dma_addr_level_pte(domain, addr, 1);
662
663 if (pte) {
19c239ce 664 dma_clear_pte(pte);
8c11e798 665 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
666 }
667}
668
669/* clear last level pte, a tlb flush should be followed */
670static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
671{
672 int addr_width = agaw_to_width(domain->agaw);
673
674 start &= (((u64)1) << addr_width) - 1;
675 end &= (((u64)1) << addr_width) - 1;
676 /* in case it's partial page */
5b6985ce
FY
677 start = PAGE_ALIGN(start);
678 end &= PAGE_MASK;
ba395927
KA
679
680 /* we don't need lock here, nobody else touches the iova range */
681 while (start < end) {
682 dma_pte_clear_one(domain, start);
5b6985ce 683 start += VTD_PAGE_SIZE;
ba395927
KA
684 }
685}
686
687/* free page table pages. last level pte should already be cleared */
688static void dma_pte_free_pagetable(struct dmar_domain *domain,
689 u64 start, u64 end)
690{
691 int addr_width = agaw_to_width(domain->agaw);
692 struct dma_pte *pte;
693 int total = agaw_to_level(domain->agaw);
694 int level;
695 u64 tmp;
8c11e798 696 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
697
698 start &= (((u64)1) << addr_width) - 1;
699 end &= (((u64)1) << addr_width) - 1;
700
701 /* we don't need lock here, nobody else touches the iova range */
702 level = 2;
703 while (level <= total) {
704 tmp = align_to_level(start, level);
705 if (tmp >= end || (tmp + level_size(level) > end))
706 return;
707
708 while (tmp < end) {
709 pte = dma_addr_level_pte(domain, tmp, level);
710 if (pte) {
711 free_pgtable_page(
19c239ce
MM
712 phys_to_virt(dma_pte_addr(pte)));
713 dma_clear_pte(pte);
8c11e798 714 __iommu_flush_cache(iommu,
ba395927
KA
715 pte, sizeof(*pte));
716 }
717 tmp += level_size(level);
718 }
719 level++;
720 }
721 /* free pgd */
722 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
723 free_pgtable_page(domain->pgd);
724 domain->pgd = NULL;
725 }
726}
727
728/* iommu handling */
729static int iommu_alloc_root_entry(struct intel_iommu *iommu)
730{
731 struct root_entry *root;
732 unsigned long flags;
733
734 root = (struct root_entry *)alloc_pgtable_page();
735 if (!root)
736 return -ENOMEM;
737
5b6985ce 738 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
739
740 spin_lock_irqsave(&iommu->lock, flags);
741 iommu->root_entry = root;
742 spin_unlock_irqrestore(&iommu->lock, flags);
743
744 return 0;
745}
746
ba395927
KA
747static void iommu_set_root_entry(struct intel_iommu *iommu)
748{
749 void *addr;
750 u32 cmd, sts;
751 unsigned long flag;
752
753 addr = iommu->root_entry;
754
755 spin_lock_irqsave(&iommu->register_lock, flag);
756 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
757
758 cmd = iommu->gcmd | DMA_GCMD_SRTP;
759 writel(cmd, iommu->reg + DMAR_GCMD_REG);
760
761 /* Make sure hardware complete it */
762 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
763 readl, (sts & DMA_GSTS_RTPS), sts);
764
765 spin_unlock_irqrestore(&iommu->register_lock, flag);
766}
767
768static void iommu_flush_write_buffer(struct intel_iommu *iommu)
769{
770 u32 val;
771 unsigned long flag;
772
773 if (!cap_rwbf(iommu->cap))
774 return;
775 val = iommu->gcmd | DMA_GCMD_WBF;
776
777 spin_lock_irqsave(&iommu->register_lock, flag);
778 writel(val, iommu->reg + DMAR_GCMD_REG);
779
780 /* Make sure hardware complete it */
781 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
782 readl, (!(val & DMA_GSTS_WBFS)), val);
783
784 spin_unlock_irqrestore(&iommu->register_lock, flag);
785}
786
787/* return value determine if we need a write buffer flush */
788static int __iommu_flush_context(struct intel_iommu *iommu,
789 u16 did, u16 source_id, u8 function_mask, u64 type,
790 int non_present_entry_flush)
791{
792 u64 val = 0;
793 unsigned long flag;
794
795 /*
796 * In the non-present entry flush case, if hardware doesn't cache
797 * non-present entry we do nothing and if hardware cache non-present
798 * entry, we flush entries of domain 0 (the domain id is used to cache
799 * any non-present entries)
800 */
801 if (non_present_entry_flush) {
802 if (!cap_caching_mode(iommu->cap))
803 return 1;
804 else
805 did = 0;
806 }
807
808 switch (type) {
809 case DMA_CCMD_GLOBAL_INVL:
810 val = DMA_CCMD_GLOBAL_INVL;
811 break;
812 case DMA_CCMD_DOMAIN_INVL:
813 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
814 break;
815 case DMA_CCMD_DEVICE_INVL:
816 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
817 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
818 break;
819 default:
820 BUG();
821 }
822 val |= DMA_CCMD_ICC;
823
824 spin_lock_irqsave(&iommu->register_lock, flag);
825 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
826
827 /* Make sure hardware complete it */
828 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
829 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
830
831 spin_unlock_irqrestore(&iommu->register_lock, flag);
832
4d235ba6 833 /* flush context entry will implicitly flush write buffer */
ba395927
KA
834 return 0;
835}
836
ba395927
KA
837/* return value determine if we need a write buffer flush */
838static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
839 u64 addr, unsigned int size_order, u64 type,
840 int non_present_entry_flush)
841{
842 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
843 u64 val = 0, val_iva = 0;
844 unsigned long flag;
845
846 /*
847 * In the non-present entry flush case, if hardware doesn't cache
848 * non-present entry we do nothing and if hardware cache non-present
849 * entry, we flush entries of domain 0 (the domain id is used to cache
850 * any non-present entries)
851 */
852 if (non_present_entry_flush) {
853 if (!cap_caching_mode(iommu->cap))
854 return 1;
855 else
856 did = 0;
857 }
858
859 switch (type) {
860 case DMA_TLB_GLOBAL_FLUSH:
861 /* global flush doesn't need set IVA_REG */
862 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
863 break;
864 case DMA_TLB_DSI_FLUSH:
865 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
866 break;
867 case DMA_TLB_PSI_FLUSH:
868 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
869 /* Note: always flush non-leaf currently */
870 val_iva = size_order | addr;
871 break;
872 default:
873 BUG();
874 }
875 /* Note: set drain read/write */
876#if 0
877 /*
878 * This is probably to be super secure.. Looks like we can
879 * ignore it without any impact.
880 */
881 if (cap_read_drain(iommu->cap))
882 val |= DMA_TLB_READ_DRAIN;
883#endif
884 if (cap_write_drain(iommu->cap))
885 val |= DMA_TLB_WRITE_DRAIN;
886
887 spin_lock_irqsave(&iommu->register_lock, flag);
888 /* Note: Only uses first TLB reg currently */
889 if (val_iva)
890 dmar_writeq(iommu->reg + tlb_offset, val_iva);
891 dmar_writeq(iommu->reg + tlb_offset + 8, val);
892
893 /* Make sure hardware complete it */
894 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
895 dmar_readq, (!(val & DMA_TLB_IVT)), val);
896
897 spin_unlock_irqrestore(&iommu->register_lock, flag);
898
899 /* check IOTLB invalidation granularity */
900 if (DMA_TLB_IAIG(val) == 0)
901 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
902 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
903 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
904 (unsigned long long)DMA_TLB_IIRG(type),
905 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 906 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
907 return 0;
908}
909
ba395927
KA
910static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
911 u64 addr, unsigned int pages, int non_present_entry_flush)
912{
f76aec76 913 unsigned int mask;
ba395927 914
5b6985ce 915 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
916 BUG_ON(pages == 0);
917
918 /* Fallback to domain selective flush if no PSI support */
919 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
920 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
921 DMA_TLB_DSI_FLUSH,
922 non_present_entry_flush);
ba395927
KA
923
924 /*
925 * PSI requires page size to be 2 ^ x, and the base address is naturally
926 * aligned to the size
927 */
f76aec76 928 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 929 /* Fallback to domain selective flush if size is too big */
f76aec76 930 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
931 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
932 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 933
a77b67d4
YS
934 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
935 DMA_TLB_PSI_FLUSH,
936 non_present_entry_flush);
ba395927
KA
937}
938
f8bab735 939static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
940{
941 u32 pmen;
942 unsigned long flags;
943
944 spin_lock_irqsave(&iommu->register_lock, flags);
945 pmen = readl(iommu->reg + DMAR_PMEN_REG);
946 pmen &= ~DMA_PMEN_EPM;
947 writel(pmen, iommu->reg + DMAR_PMEN_REG);
948
949 /* wait for the protected region status bit to clear */
950 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
951 readl, !(pmen & DMA_PMEN_PRS), pmen);
952
953 spin_unlock_irqrestore(&iommu->register_lock, flags);
954}
955
ba395927
KA
956static int iommu_enable_translation(struct intel_iommu *iommu)
957{
958 u32 sts;
959 unsigned long flags;
960
961 spin_lock_irqsave(&iommu->register_lock, flags);
962 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
963
964 /* Make sure hardware complete it */
965 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
966 readl, (sts & DMA_GSTS_TES), sts);
967
968 iommu->gcmd |= DMA_GCMD_TE;
969 spin_unlock_irqrestore(&iommu->register_lock, flags);
970 return 0;
971}
972
973static int iommu_disable_translation(struct intel_iommu *iommu)
974{
975 u32 sts;
976 unsigned long flag;
977
978 spin_lock_irqsave(&iommu->register_lock, flag);
979 iommu->gcmd &= ~DMA_GCMD_TE;
980 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
981
982 /* Make sure hardware complete it */
983 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
984 readl, (!(sts & DMA_GSTS_TES)), sts);
985
986 spin_unlock_irqrestore(&iommu->register_lock, flag);
987 return 0;
988}
989
3460a6d9
KA
990/* iommu interrupt handling. Most stuff are MSI-like. */
991
d94afc6c 992static const char *fault_reason_strings[] =
3460a6d9
KA
993{
994 "Software",
995 "Present bit in root entry is clear",
996 "Present bit in context entry is clear",
997 "Invalid context entry",
998 "Access beyond MGAW",
999 "PTE Write access is not set",
1000 "PTE Read access is not set",
1001 "Next page table ptr is invalid",
1002 "Root table address invalid",
1003 "Context table ptr is invalid",
1004 "non-zero reserved fields in RTP",
1005 "non-zero reserved fields in CTP",
1006 "non-zero reserved fields in PTE",
3460a6d9 1007};
f8bab735 1008#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 1009
d94afc6c 1010const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 1011{
d94afc6c 1012 if (fault_reason > MAX_FAULT_REASON_IDX)
1013 return "Unknown";
3460a6d9
KA
1014 else
1015 return fault_reason_strings[fault_reason];
1016}
1017
1018void dmar_msi_unmask(unsigned int irq)
1019{
1020 struct intel_iommu *iommu = get_irq_data(irq);
1021 unsigned long flag;
1022
1023 /* unmask it */
1024 spin_lock_irqsave(&iommu->register_lock, flag);
1025 writel(0, iommu->reg + DMAR_FECTL_REG);
1026 /* Read a reg to force flush the post write */
1027 readl(iommu->reg + DMAR_FECTL_REG);
1028 spin_unlock_irqrestore(&iommu->register_lock, flag);
1029}
1030
1031void dmar_msi_mask(unsigned int irq)
1032{
1033 unsigned long flag;
1034 struct intel_iommu *iommu = get_irq_data(irq);
1035
1036 /* mask it */
1037 spin_lock_irqsave(&iommu->register_lock, flag);
1038 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1039 /* Read a reg to force flush the post write */
1040 readl(iommu->reg + DMAR_FECTL_REG);
1041 spin_unlock_irqrestore(&iommu->register_lock, flag);
1042}
1043
1044void dmar_msi_write(int irq, struct msi_msg *msg)
1045{
1046 struct intel_iommu *iommu = get_irq_data(irq);
1047 unsigned long flag;
1048
1049 spin_lock_irqsave(&iommu->register_lock, flag);
1050 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1051 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1052 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1053 spin_unlock_irqrestore(&iommu->register_lock, flag);
1054}
1055
1056void dmar_msi_read(int irq, struct msi_msg *msg)
1057{
1058 struct intel_iommu *iommu = get_irq_data(irq);
1059 unsigned long flag;
1060
1061 spin_lock_irqsave(&iommu->register_lock, flag);
1062 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1063 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1064 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1065 spin_unlock_irqrestore(&iommu->register_lock, flag);
1066}
1067
1068static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1069 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1070{
d94afc6c 1071 const char *reason;
3460a6d9
KA
1072
1073 reason = dmar_get_fault_reason(fault_reason);
1074
1075 printk(KERN_ERR
1076 "DMAR:[%s] Request device [%02x:%02x.%d] "
1077 "fault addr %llx \n"
1078 "DMAR:[fault reason %02d] %s\n",
1079 (type ? "DMA Read" : "DMA Write"),
1080 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1081 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1082 return 0;
1083}
1084
1085#define PRIMARY_FAULT_REG_LEN (16)
1086static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1087{
1088 struct intel_iommu *iommu = dev_id;
1089 int reg, fault_index;
1090 u32 fault_status;
1091 unsigned long flag;
1092
1093 spin_lock_irqsave(&iommu->register_lock, flag);
1094 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1095
1096 /* TBD: ignore advanced fault log currently */
1097 if (!(fault_status & DMA_FSTS_PPF))
1098 goto clear_overflow;
1099
1100 fault_index = dma_fsts_fault_record_index(fault_status);
1101 reg = cap_fault_reg_offset(iommu->cap);
1102 while (1) {
1103 u8 fault_reason;
1104 u16 source_id;
1105 u64 guest_addr;
1106 int type;
1107 u32 data;
1108
1109 /* highest 32 bits */
1110 data = readl(iommu->reg + reg +
1111 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1112 if (!(data & DMA_FRCD_F))
1113 break;
1114
1115 fault_reason = dma_frcd_fault_reason(data);
1116 type = dma_frcd_type(data);
1117
1118 data = readl(iommu->reg + reg +
1119 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1120 source_id = dma_frcd_source_id(data);
1121
1122 guest_addr = dmar_readq(iommu->reg + reg +
1123 fault_index * PRIMARY_FAULT_REG_LEN);
1124 guest_addr = dma_frcd_page_addr(guest_addr);
1125 /* clear the fault */
1126 writel(DMA_FRCD_F, iommu->reg + reg +
1127 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1128
1129 spin_unlock_irqrestore(&iommu->register_lock, flag);
1130
1131 iommu_page_fault_do_one(iommu, type, fault_reason,
1132 source_id, guest_addr);
1133
1134 fault_index++;
1135 if (fault_index > cap_num_fault_regs(iommu->cap))
1136 fault_index = 0;
1137 spin_lock_irqsave(&iommu->register_lock, flag);
1138 }
1139clear_overflow:
1140 /* clear primary fault overflow */
1141 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1142 if (fault_status & DMA_FSTS_PFO)
1143 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1144
1145 spin_unlock_irqrestore(&iommu->register_lock, flag);
1146 return IRQ_HANDLED;
1147}
1148
1149int dmar_set_interrupt(struct intel_iommu *iommu)
1150{
1151 int irq, ret;
1152
1153 irq = create_irq();
1154 if (!irq) {
1155 printk(KERN_ERR "IOMMU: no free vectors\n");
1156 return -EINVAL;
1157 }
1158
1159 set_irq_data(irq, iommu);
1160 iommu->irq = irq;
1161
1162 ret = arch_setup_dmar_msi(irq);
1163 if (ret) {
1164 set_irq_data(irq, NULL);
1165 iommu->irq = 0;
1166 destroy_irq(irq);
1167 return 0;
1168 }
1169
1170 /* Force fault register is cleared */
1171 iommu_page_fault(irq, iommu);
1172
1173 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1174 if (ret)
1175 printk(KERN_ERR "IOMMU: can't request irq\n");
1176 return ret;
1177}
1178
ba395927
KA
1179static int iommu_init_domains(struct intel_iommu *iommu)
1180{
1181 unsigned long ndomains;
1182 unsigned long nlongs;
1183
1184 ndomains = cap_ndoms(iommu->cap);
1185 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1186 nlongs = BITS_TO_LONGS(ndomains);
1187
1188 /* TBD: there might be 64K domains,
1189 * consider other allocation for future chip
1190 */
1191 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1192 if (!iommu->domain_ids) {
1193 printk(KERN_ERR "Allocating domain id array failed\n");
1194 return -ENOMEM;
1195 }
1196 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1197 GFP_KERNEL);
1198 if (!iommu->domains) {
1199 printk(KERN_ERR "Allocating domain array failed\n");
1200 kfree(iommu->domain_ids);
1201 return -ENOMEM;
1202 }
1203
e61d98d8
SS
1204 spin_lock_init(&iommu->lock);
1205
ba395927
KA
1206 /*
1207 * if Caching mode is set, then invalid translations are tagged
1208 * with domainid 0. Hence we need to pre-allocate it.
1209 */
1210 if (cap_caching_mode(iommu->cap))
1211 set_bit(0, iommu->domain_ids);
1212 return 0;
1213}
ba395927 1214
ba395927
KA
1215
1216static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1217
1218void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1219{
1220 struct dmar_domain *domain;
1221 int i;
c7151a8d 1222 unsigned long flags;
ba395927 1223
ba395927
KA
1224 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1225 for (; i < cap_ndoms(iommu->cap); ) {
1226 domain = iommu->domains[i];
1227 clear_bit(i, iommu->domain_ids);
c7151a8d
WH
1228
1229 spin_lock_irqsave(&domain->iommu_lock, flags);
1230 if (--domain->iommu_count == 0)
1231 domain_exit(domain);
1232 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1233
ba395927
KA
1234 i = find_next_bit(iommu->domain_ids,
1235 cap_ndoms(iommu->cap), i+1);
1236 }
1237
1238 if (iommu->gcmd & DMA_GCMD_TE)
1239 iommu_disable_translation(iommu);
1240
1241 if (iommu->irq) {
1242 set_irq_data(iommu->irq, NULL);
1243 /* This will mask the irq */
1244 free_irq(iommu->irq, iommu);
1245 destroy_irq(iommu->irq);
1246 }
1247
1248 kfree(iommu->domains);
1249 kfree(iommu->domain_ids);
1250
d9630fe9
WH
1251 g_iommus[iommu->seq_id] = NULL;
1252
1253 /* if all iommus are freed, free g_iommus */
1254 for (i = 0; i < g_num_of_iommus; i++) {
1255 if (g_iommus[i])
1256 break;
1257 }
1258
1259 if (i == g_num_of_iommus)
1260 kfree(g_iommus);
1261
ba395927
KA
1262 /* free context mapping */
1263 free_context_table(iommu);
ba395927
KA
1264}
1265
1266static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1267{
1268 unsigned long num;
1269 unsigned long ndomains;
1270 struct dmar_domain *domain;
1271 unsigned long flags;
1272
1273 domain = alloc_domain_mem();
1274 if (!domain)
1275 return NULL;
1276
1277 ndomains = cap_ndoms(iommu->cap);
1278
1279 spin_lock_irqsave(&iommu->lock, flags);
1280 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1281 if (num >= ndomains) {
1282 spin_unlock_irqrestore(&iommu->lock, flags);
1283 free_domain_mem(domain);
1284 printk(KERN_ERR "IOMMU: no free domain ids\n");
1285 return NULL;
1286 }
1287
1288 set_bit(num, iommu->domain_ids);
1289 domain->id = num;
8c11e798
WH
1290 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1291 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1292 domain->flags = 0;
ba395927
KA
1293 iommu->domains[num] = domain;
1294 spin_unlock_irqrestore(&iommu->lock, flags);
1295
1296 return domain;
1297}
1298
1299static void iommu_free_domain(struct dmar_domain *domain)
1300{
1301 unsigned long flags;
8c11e798
WH
1302 struct intel_iommu *iommu;
1303
1304 iommu = domain_get_iommu(domain);
ba395927 1305
8c11e798
WH
1306 spin_lock_irqsave(&iommu->lock, flags);
1307 clear_bit(domain->id, iommu->domain_ids);
1308 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1309}
1310
1311static struct iova_domain reserved_iova_list;
8a443df4
MG
1312static struct lock_class_key reserved_alloc_key;
1313static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1314
1315static void dmar_init_reserved_ranges(void)
1316{
1317 struct pci_dev *pdev = NULL;
1318 struct iova *iova;
1319 int i;
1320 u64 addr, size;
1321
f661197e 1322 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1323
8a443df4
MG
1324 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1325 &reserved_alloc_key);
1326 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1327 &reserved_rbtree_key);
1328
ba395927
KA
1329 /* IOAPIC ranges shouldn't be accessed by DMA */
1330 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1331 IOVA_PFN(IOAPIC_RANGE_END));
1332 if (!iova)
1333 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1334
1335 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1336 for_each_pci_dev(pdev) {
1337 struct resource *r;
1338
1339 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1340 r = &pdev->resource[i];
1341 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1342 continue;
1343 addr = r->start;
5b6985ce 1344 addr &= PAGE_MASK;
ba395927 1345 size = r->end - addr;
5b6985ce 1346 size = PAGE_ALIGN(size);
ba395927
KA
1347 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1348 IOVA_PFN(size + addr) - 1);
1349 if (!iova)
1350 printk(KERN_ERR "Reserve iova failed\n");
1351 }
1352 }
1353
1354}
1355
1356static void domain_reserve_special_ranges(struct dmar_domain *domain)
1357{
1358 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1359}
1360
1361static inline int guestwidth_to_adjustwidth(int gaw)
1362{
1363 int agaw;
1364 int r = (gaw - 12) % 9;
1365
1366 if (r == 0)
1367 agaw = gaw;
1368 else
1369 agaw = gaw + 9 - r;
1370 if (agaw > 64)
1371 agaw = 64;
1372 return agaw;
1373}
1374
1375static int domain_init(struct dmar_domain *domain, int guest_width)
1376{
1377 struct intel_iommu *iommu;
1378 int adjust_width, agaw;
1379 unsigned long sagaw;
1380
f661197e 1381 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927 1382 spin_lock_init(&domain->mapping_lock);
c7151a8d 1383 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1384
1385 domain_reserve_special_ranges(domain);
1386
1387 /* calculate AGAW */
8c11e798 1388 iommu = domain_get_iommu(domain);
ba395927
KA
1389 if (guest_width > cap_mgaw(iommu->cap))
1390 guest_width = cap_mgaw(iommu->cap);
1391 domain->gaw = guest_width;
1392 adjust_width = guestwidth_to_adjustwidth(guest_width);
1393 agaw = width_to_agaw(adjust_width);
1394 sagaw = cap_sagaw(iommu->cap);
1395 if (!test_bit(agaw, &sagaw)) {
1396 /* hardware doesn't support it, choose a bigger one */
1397 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1398 agaw = find_next_bit(&sagaw, 5, agaw);
1399 if (agaw >= 5)
1400 return -ENODEV;
1401 }
1402 domain->agaw = agaw;
1403 INIT_LIST_HEAD(&domain->devices);
1404
8e604097
WH
1405 if (ecap_coherent(iommu->ecap))
1406 domain->iommu_coherency = 1;
1407 else
1408 domain->iommu_coherency = 0;
1409
c7151a8d
WH
1410 domain->iommu_count = 1;
1411
ba395927
KA
1412 /* always allocate the top pgd */
1413 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1414 if (!domain->pgd)
1415 return -ENOMEM;
5b6985ce 1416 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1417 return 0;
1418}
1419
1420static void domain_exit(struct dmar_domain *domain)
1421{
1422 u64 end;
1423
1424 /* Domain 0 is reserved, so dont process it */
1425 if (!domain)
1426 return;
1427
1428 domain_remove_dev_info(domain);
1429 /* destroy iovas */
1430 put_iova_domain(&domain->iovad);
1431 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1432 end = end & (~PAGE_MASK);
ba395927
KA
1433
1434 /* clear ptes */
1435 dma_pte_clear_range(domain, 0, end);
1436
1437 /* free page tables */
1438 dma_pte_free_pagetable(domain, 0, end);
1439
1440 iommu_free_domain(domain);
1441 free_domain_mem(domain);
1442}
1443
1444static int domain_context_mapping_one(struct dmar_domain *domain,
1445 u8 bus, u8 devfn)
1446{
1447 struct context_entry *context;
8c11e798 1448 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
1449 unsigned long flags;
1450
1451 pr_debug("Set context mapping for %02x:%02x.%d\n",
1452 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1453 BUG_ON(!domain->pgd);
1454 context = device_to_context_entry(iommu, bus, devfn);
1455 if (!context)
1456 return -ENOMEM;
1457 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1458 if (context_present(context)) {
ba395927
KA
1459 spin_unlock_irqrestore(&iommu->lock, flags);
1460 return 0;
1461 }
1462
c07e7d21
MM
1463 context_set_domain_id(context, domain->id);
1464 context_set_address_width(context, domain->agaw);
1465 context_set_address_root(context, virt_to_phys(domain->pgd));
1466 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1467 context_set_fault_enable(context);
1468 context_set_present(context);
ba395927
KA
1469 __iommu_flush_cache(iommu, context, sizeof(*context));
1470
1471 /* it's a non-present to present mapping */
a77b67d4
YS
1472 if (iommu->flush.flush_context(iommu, domain->id,
1473 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1474 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1475 iommu_flush_write_buffer(iommu);
1476 else
a77b67d4
YS
1477 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1478
ba395927 1479 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1480
1481 spin_lock_irqsave(&domain->iommu_lock, flags);
1482 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1483 domain->iommu_count++;
1484 domain_update_iommu_coherency(domain);
1485 }
1486 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1487 return 0;
1488}
1489
1490static int
1491domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1492{
1493 int ret;
1494 struct pci_dev *tmp, *parent;
1495
1496 ret = domain_context_mapping_one(domain, pdev->bus->number,
1497 pdev->devfn);
1498 if (ret)
1499 return ret;
1500
1501 /* dependent device mapping */
1502 tmp = pci_find_upstream_pcie_bridge(pdev);
1503 if (!tmp)
1504 return 0;
1505 /* Secondary interface's bus number and devfn 0 */
1506 parent = pdev->bus->self;
1507 while (parent != tmp) {
1508 ret = domain_context_mapping_one(domain, parent->bus->number,
1509 parent->devfn);
1510 if (ret)
1511 return ret;
1512 parent = parent->bus->self;
1513 }
1514 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1515 return domain_context_mapping_one(domain,
1516 tmp->subordinate->number, 0);
1517 else /* this is a legacy PCI bridge */
1518 return domain_context_mapping_one(domain,
1519 tmp->bus->number, tmp->devfn);
1520}
1521
1522static int domain_context_mapped(struct dmar_domain *domain,
1523 struct pci_dev *pdev)
1524{
1525 int ret;
1526 struct pci_dev *tmp, *parent;
8c11e798 1527 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927 1528
8c11e798 1529 ret = device_context_mapped(iommu,
ba395927
KA
1530 pdev->bus->number, pdev->devfn);
1531 if (!ret)
1532 return ret;
1533 /* dependent device mapping */
1534 tmp = pci_find_upstream_pcie_bridge(pdev);
1535 if (!tmp)
1536 return ret;
1537 /* Secondary interface's bus number and devfn 0 */
1538 parent = pdev->bus->self;
1539 while (parent != tmp) {
8c11e798 1540 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1541 parent->devfn);
1542 if (!ret)
1543 return ret;
1544 parent = parent->bus->self;
1545 }
1546 if (tmp->is_pcie)
8c11e798 1547 return device_context_mapped(iommu,
ba395927
KA
1548 tmp->subordinate->number, 0);
1549 else
8c11e798 1550 return device_context_mapped(iommu,
ba395927
KA
1551 tmp->bus->number, tmp->devfn);
1552}
1553
1554static int
1555domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1556 u64 hpa, size_t size, int prot)
1557{
1558 u64 start_pfn, end_pfn;
1559 struct dma_pte *pte;
1560 int index;
5b6985ce 1561 int addr_width = agaw_to_width(domain->agaw);
8c11e798 1562 struct intel_iommu *iommu = domain_get_iommu(domain);
5b6985ce
FY
1563
1564 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1565
1566 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1567 return -EINVAL;
5b6985ce
FY
1568 iova &= PAGE_MASK;
1569 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1570 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1571 index = 0;
1572 while (start_pfn < end_pfn) {
5b6985ce 1573 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1574 if (!pte)
1575 return -ENOMEM;
1576 /* We don't need lock here, nobody else
1577 * touches the iova range
1578 */
19c239ce
MM
1579 BUG_ON(dma_pte_addr(pte));
1580 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1581 dma_set_pte_prot(pte, prot);
8c11e798 1582 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
1583 start_pfn++;
1584 index++;
1585 }
1586 return 0;
1587}
1588
c7151a8d 1589static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1590{
c7151a8d
WH
1591 if (!iommu)
1592 return;
8c11e798
WH
1593
1594 clear_context_table(iommu, bus, devfn);
1595 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1596 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1597 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1598 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1599}
1600
1601static void domain_remove_dev_info(struct dmar_domain *domain)
1602{
1603 struct device_domain_info *info;
1604 unsigned long flags;
c7151a8d 1605 struct intel_iommu *iommu;
ba395927
KA
1606
1607 spin_lock_irqsave(&device_domain_lock, flags);
1608 while (!list_empty(&domain->devices)) {
1609 info = list_entry(domain->devices.next,
1610 struct device_domain_info, link);
1611 list_del(&info->link);
1612 list_del(&info->global);
1613 if (info->dev)
358dd8ac 1614 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1615 spin_unlock_irqrestore(&device_domain_lock, flags);
1616
c7151a8d
WH
1617 iommu = device_to_iommu(info->bus, info->devfn);
1618 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1619 free_devinfo_mem(info);
1620
1621 spin_lock_irqsave(&device_domain_lock, flags);
1622 }
1623 spin_unlock_irqrestore(&device_domain_lock, flags);
1624}
1625
1626/*
1627 * find_domain
358dd8ac 1628 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1629 */
38717946 1630static struct dmar_domain *
ba395927
KA
1631find_domain(struct pci_dev *pdev)
1632{
1633 struct device_domain_info *info;
1634
1635 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1636 info = pdev->dev.archdata.iommu;
ba395927
KA
1637 if (info)
1638 return info->domain;
1639 return NULL;
1640}
1641
ba395927
KA
1642/* domain is initialized */
1643static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1644{
1645 struct dmar_domain *domain, *found = NULL;
1646 struct intel_iommu *iommu;
1647 struct dmar_drhd_unit *drhd;
1648 struct device_domain_info *info, *tmp;
1649 struct pci_dev *dev_tmp;
1650 unsigned long flags;
1651 int bus = 0, devfn = 0;
1652
1653 domain = find_domain(pdev);
1654 if (domain)
1655 return domain;
1656
1657 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1658 if (dev_tmp) {
1659 if (dev_tmp->is_pcie) {
1660 bus = dev_tmp->subordinate->number;
1661 devfn = 0;
1662 } else {
1663 bus = dev_tmp->bus->number;
1664 devfn = dev_tmp->devfn;
1665 }
1666 spin_lock_irqsave(&device_domain_lock, flags);
1667 list_for_each_entry(info, &device_domain_list, global) {
1668 if (info->bus == bus && info->devfn == devfn) {
1669 found = info->domain;
1670 break;
1671 }
1672 }
1673 spin_unlock_irqrestore(&device_domain_lock, flags);
1674 /* pcie-pci bridge already has a domain, uses it */
1675 if (found) {
1676 domain = found;
1677 goto found_domain;
1678 }
1679 }
1680
1681 /* Allocate new domain for the device */
1682 drhd = dmar_find_matched_drhd_unit(pdev);
1683 if (!drhd) {
1684 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1685 pci_name(pdev));
1686 return NULL;
1687 }
1688 iommu = drhd->iommu;
1689
1690 domain = iommu_alloc_domain(iommu);
1691 if (!domain)
1692 goto error;
1693
1694 if (domain_init(domain, gaw)) {
1695 domain_exit(domain);
1696 goto error;
1697 }
1698
1699 /* register pcie-to-pci device */
1700 if (dev_tmp) {
1701 info = alloc_devinfo_mem();
1702 if (!info) {
1703 domain_exit(domain);
1704 goto error;
1705 }
1706 info->bus = bus;
1707 info->devfn = devfn;
1708 info->dev = NULL;
1709 info->domain = domain;
1710 /* This domain is shared by devices under p2p bridge */
3b5410e7 1711 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1712
1713 /* pcie-to-pci bridge already has a domain, uses it */
1714 found = NULL;
1715 spin_lock_irqsave(&device_domain_lock, flags);
1716 list_for_each_entry(tmp, &device_domain_list, global) {
1717 if (tmp->bus == bus && tmp->devfn == devfn) {
1718 found = tmp->domain;
1719 break;
1720 }
1721 }
1722 if (found) {
1723 free_devinfo_mem(info);
1724 domain_exit(domain);
1725 domain = found;
1726 } else {
1727 list_add(&info->link, &domain->devices);
1728 list_add(&info->global, &device_domain_list);
1729 }
1730 spin_unlock_irqrestore(&device_domain_lock, flags);
1731 }
1732
1733found_domain:
1734 info = alloc_devinfo_mem();
1735 if (!info)
1736 goto error;
1737 info->bus = pdev->bus->number;
1738 info->devfn = pdev->devfn;
1739 info->dev = pdev;
1740 info->domain = domain;
1741 spin_lock_irqsave(&device_domain_lock, flags);
1742 /* somebody is fast */
1743 found = find_domain(pdev);
1744 if (found != NULL) {
1745 spin_unlock_irqrestore(&device_domain_lock, flags);
1746 if (found != domain) {
1747 domain_exit(domain);
1748 domain = found;
1749 }
1750 free_devinfo_mem(info);
1751 return domain;
1752 }
1753 list_add(&info->link, &domain->devices);
1754 list_add(&info->global, &device_domain_list);
358dd8ac 1755 pdev->dev.archdata.iommu = info;
ba395927
KA
1756 spin_unlock_irqrestore(&device_domain_lock, flags);
1757 return domain;
1758error:
1759 /* recheck it here, maybe others set it */
1760 return find_domain(pdev);
1761}
1762
5b6985ce
FY
1763static int iommu_prepare_identity_map(struct pci_dev *pdev,
1764 unsigned long long start,
1765 unsigned long long end)
ba395927
KA
1766{
1767 struct dmar_domain *domain;
1768 unsigned long size;
5b6985ce 1769 unsigned long long base;
ba395927
KA
1770 int ret;
1771
1772 printk(KERN_INFO
1773 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1774 pci_name(pdev), start, end);
1775 /* page table init */
1776 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1777 if (!domain)
1778 return -ENOMEM;
1779
1780 /* The address might not be aligned */
5b6985ce 1781 base = start & PAGE_MASK;
ba395927 1782 size = end - base;
5b6985ce 1783 size = PAGE_ALIGN(size);
ba395927
KA
1784 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1785 IOVA_PFN(base + size) - 1)) {
1786 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1787 ret = -ENOMEM;
1788 goto error;
1789 }
1790
1791 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1792 size, base, pci_name(pdev));
1793 /*
1794 * RMRR range might have overlap with physical memory range,
1795 * clear it first
1796 */
1797 dma_pte_clear_range(domain, base, base + size);
1798
1799 ret = domain_page_mapping(domain, base, base, size,
1800 DMA_PTE_READ|DMA_PTE_WRITE);
1801 if (ret)
1802 goto error;
1803
1804 /* context entry init */
1805 ret = domain_context_mapping(domain, pdev);
1806 if (!ret)
1807 return 0;
1808error:
1809 domain_exit(domain);
1810 return ret;
1811
1812}
1813
1814static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1815 struct pci_dev *pdev)
1816{
358dd8ac 1817 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1818 return 0;
1819 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1820 rmrr->end_address + 1);
1821}
1822
e820482c 1823#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1824struct iommu_prepare_data {
1825 struct pci_dev *pdev;
1826 int ret;
1827};
1828
1829static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1830 unsigned long end_pfn, void *datax)
1831{
1832 struct iommu_prepare_data *data;
1833
1834 data = (struct iommu_prepare_data *)datax;
1835
1836 data->ret = iommu_prepare_identity_map(data->pdev,
1837 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1838 return data->ret;
1839
1840}
1841
1842static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1843{
1844 int nid;
1845 struct iommu_prepare_data data;
1846
1847 data.pdev = pdev;
1848 data.ret = 0;
1849
1850 for_each_online_node(nid) {
1851 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1852 if (data.ret)
1853 return data.ret;
1854 }
1855 return data.ret;
1856}
1857
e820482c
KA
1858static void __init iommu_prepare_gfx_mapping(void)
1859{
1860 struct pci_dev *pdev = NULL;
e820482c
KA
1861 int ret;
1862
1863 for_each_pci_dev(pdev) {
358dd8ac 1864 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1865 !IS_GFX_DEVICE(pdev))
1866 continue;
1867 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1868 pci_name(pdev));
d52d53b8
YL
1869 ret = iommu_prepare_with_active_regions(pdev);
1870 if (ret)
1871 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1872 }
1873}
2abd7e16
MM
1874#else /* !CONFIG_DMAR_GFX_WA */
1875static inline void iommu_prepare_gfx_mapping(void)
1876{
1877 return;
1878}
e820482c
KA
1879#endif
1880
49a0429e
KA
1881#ifdef CONFIG_DMAR_FLOPPY_WA
1882static inline void iommu_prepare_isa(void)
1883{
1884 struct pci_dev *pdev;
1885 int ret;
1886
1887 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1888 if (!pdev)
1889 return;
1890
1891 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1892 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1893
1894 if (ret)
1895 printk("IOMMU: Failed to create 0-64M identity map, "
1896 "floppy might not work\n");
1897
1898}
1899#else
1900static inline void iommu_prepare_isa(void)
1901{
1902 return;
1903}
1904#endif /* !CONFIG_DMAR_FLPY_WA */
1905
519a0549 1906static int __init init_dmars(void)
ba395927
KA
1907{
1908 struct dmar_drhd_unit *drhd;
1909 struct dmar_rmrr_unit *rmrr;
1910 struct pci_dev *pdev;
1911 struct intel_iommu *iommu;
80b20dd8 1912 int i, ret, unit = 0;
ba395927
KA
1913
1914 /*
1915 * for each drhd
1916 * allocate root
1917 * initialize and program root entry to not present
1918 * endfor
1919 */
1920 for_each_drhd_unit(drhd) {
5e0d2a6f 1921 g_num_of_iommus++;
1922 /*
1923 * lock not needed as this is only incremented in the single
1924 * threaded kernel __init code path all other access are read
1925 * only
1926 */
1927 }
1928
d9630fe9
WH
1929 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1930 GFP_KERNEL);
1931 if (!g_iommus) {
1932 printk(KERN_ERR "Allocating global iommu array failed\n");
1933 ret = -ENOMEM;
1934 goto error;
1935 }
1936
80b20dd8 1937 deferred_flush = kzalloc(g_num_of_iommus *
1938 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1939 if (!deferred_flush) {
d9630fe9 1940 kfree(g_iommus);
5e0d2a6f 1941 ret = -ENOMEM;
1942 goto error;
1943 }
1944
5e0d2a6f 1945 for_each_drhd_unit(drhd) {
1946 if (drhd->ignored)
1947 continue;
1886e8a9
SS
1948
1949 iommu = drhd->iommu;
d9630fe9 1950 g_iommus[iommu->seq_id] = iommu;
ba395927 1951
e61d98d8
SS
1952 ret = iommu_init_domains(iommu);
1953 if (ret)
1954 goto error;
1955
ba395927
KA
1956 /*
1957 * TBD:
1958 * we could share the same root & context tables
1959 * amoung all IOMMU's. Need to Split it later.
1960 */
1961 ret = iommu_alloc_root_entry(iommu);
1962 if (ret) {
1963 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1964 goto error;
1965 }
1966 }
1967
a77b67d4
YS
1968 for_each_drhd_unit(drhd) {
1969 if (drhd->ignored)
1970 continue;
1971
1972 iommu = drhd->iommu;
1973 if (dmar_enable_qi(iommu)) {
1974 /*
1975 * Queued Invalidate not enabled, use Register Based
1976 * Invalidate
1977 */
1978 iommu->flush.flush_context = __iommu_flush_context;
1979 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1980 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1981 "invalidation\n",
1982 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1983 } else {
1984 iommu->flush.flush_context = qi_flush_context;
1985 iommu->flush.flush_iotlb = qi_flush_iotlb;
1986 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1987 "invalidation\n",
1988 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1989 }
1990 }
1991
ba395927
KA
1992 /*
1993 * For each rmrr
1994 * for each dev attached to rmrr
1995 * do
1996 * locate drhd for dev, alloc domain for dev
1997 * allocate free domain
1998 * allocate page table entries for rmrr
1999 * if context not allocated for bus
2000 * allocate and init context
2001 * set present in root table for this bus
2002 * init context with domain, translation etc
2003 * endfor
2004 * endfor
2005 */
2006 for_each_rmrr_units(rmrr) {
ba395927
KA
2007 for (i = 0; i < rmrr->devices_cnt; i++) {
2008 pdev = rmrr->devices[i];
2009 /* some BIOS lists non-exist devices in DMAR table */
2010 if (!pdev)
2011 continue;
2012 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2013 if (ret)
2014 printk(KERN_ERR
2015 "IOMMU: mapping reserved region failed\n");
2016 }
2017 }
2018
e820482c
KA
2019 iommu_prepare_gfx_mapping();
2020
49a0429e
KA
2021 iommu_prepare_isa();
2022
ba395927
KA
2023 /*
2024 * for each drhd
2025 * enable fault log
2026 * global invalidate context cache
2027 * global invalidate iotlb
2028 * enable translation
2029 */
2030 for_each_drhd_unit(drhd) {
2031 if (drhd->ignored)
2032 continue;
2033 iommu = drhd->iommu;
2034 sprintf (iommu->name, "dmar%d", unit++);
2035
2036 iommu_flush_write_buffer(iommu);
2037
3460a6d9
KA
2038 ret = dmar_set_interrupt(iommu);
2039 if (ret)
2040 goto error;
2041
ba395927
KA
2042 iommu_set_root_entry(iommu);
2043
a77b67d4
YS
2044 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2045 0);
2046 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2047 0);
f8bab735 2048 iommu_disable_protect_mem_regions(iommu);
2049
ba395927
KA
2050 ret = iommu_enable_translation(iommu);
2051 if (ret)
2052 goto error;
2053 }
2054
2055 return 0;
2056error:
2057 for_each_drhd_unit(drhd) {
2058 if (drhd->ignored)
2059 continue;
2060 iommu = drhd->iommu;
2061 free_iommu(iommu);
2062 }
d9630fe9 2063 kfree(g_iommus);
ba395927
KA
2064 return ret;
2065}
2066
2067static inline u64 aligned_size(u64 host_addr, size_t size)
2068{
2069 u64 addr;
5b6985ce
FY
2070 addr = (host_addr & (~PAGE_MASK)) + size;
2071 return PAGE_ALIGN(addr);
ba395927
KA
2072}
2073
2074struct iova *
f76aec76 2075iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2076{
ba395927
KA
2077 struct iova *piova;
2078
2079 /* Make sure it's in range */
ba395927 2080 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2081 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2082 return NULL;
2083
2084 piova = alloc_iova(&domain->iovad,
5b6985ce 2085 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2086 return piova;
2087}
2088
f76aec76
KA
2089static struct iova *
2090__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2091 size_t size, u64 dma_mask)
ba395927 2092{
ba395927 2093 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2094 struct iova *iova = NULL;
ba395927 2095
bb9e6d65
FT
2096 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2097 iova = iommu_alloc_iova(domain, size, dma_mask);
2098 else {
ba395927
KA
2099 /*
2100 * First try to allocate an io virtual address in
2101 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2102 * from higher range
ba395927 2103 */
f76aec76 2104 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2105 if (!iova)
bb9e6d65 2106 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2107 }
2108
2109 if (!iova) {
2110 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2111 return NULL;
2112 }
2113
2114 return iova;
2115}
2116
2117static struct dmar_domain *
2118get_valid_domain_for_dev(struct pci_dev *pdev)
2119{
2120 struct dmar_domain *domain;
2121 int ret;
2122
2123 domain = get_domain_for_dev(pdev,
2124 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2125 if (!domain) {
2126 printk(KERN_ERR
2127 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2128 return NULL;
ba395927
KA
2129 }
2130
2131 /* make sure context mapping is ok */
2132 if (unlikely(!domain_context_mapped(domain, pdev))) {
2133 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2134 if (ret) {
2135 printk(KERN_ERR
2136 "Domain context map for %s failed",
2137 pci_name(pdev));
4fe05bbc 2138 return NULL;
f76aec76 2139 }
ba395927
KA
2140 }
2141
f76aec76
KA
2142 return domain;
2143}
2144
bb9e6d65
FT
2145static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2146 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2147{
2148 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2149 struct dmar_domain *domain;
5b6985ce 2150 phys_addr_t start_paddr;
f76aec76
KA
2151 struct iova *iova;
2152 int prot = 0;
6865f0d1 2153 int ret;
8c11e798 2154 struct intel_iommu *iommu;
f76aec76
KA
2155
2156 BUG_ON(dir == DMA_NONE);
358dd8ac 2157 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2158 return paddr;
f76aec76
KA
2159
2160 domain = get_valid_domain_for_dev(pdev);
2161 if (!domain)
2162 return 0;
2163
8c11e798 2164 iommu = domain_get_iommu(domain);
6865f0d1 2165 size = aligned_size((u64)paddr, size);
f76aec76 2166
bb9e6d65 2167 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2168 if (!iova)
2169 goto error;
2170
5b6985ce 2171 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2172
ba395927
KA
2173 /*
2174 * Check if DMAR supports zero-length reads on write only
2175 * mappings..
2176 */
2177 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2178 !cap_zlr(iommu->cap))
ba395927
KA
2179 prot |= DMA_PTE_READ;
2180 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2181 prot |= DMA_PTE_WRITE;
2182 /*
6865f0d1 2183 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2184 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2185 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2186 * is not a big problem
2187 */
6865f0d1 2188 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2189 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2190 if (ret)
2191 goto error;
2192
f76aec76 2193 /* it's a non-present to present mapping */
8c11e798 2194 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2195 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2196 if (ret)
8c11e798 2197 iommu_flush_write_buffer(iommu);
f76aec76 2198
5b6985ce 2199 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2200
ba395927 2201error:
f76aec76
KA
2202 if (iova)
2203 __free_iova(&domain->iovad, iova);
ba395927 2204 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2205 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2206 return 0;
2207}
2208
bb9e6d65
FT
2209dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2210 size_t size, int dir)
2211{
2212 return __intel_map_single(hwdev, paddr, size, dir,
2213 to_pci_dev(hwdev)->dma_mask);
2214}
2215
5e0d2a6f 2216static void flush_unmaps(void)
2217{
80b20dd8 2218 int i, j;
5e0d2a6f 2219
5e0d2a6f 2220 timer_on = 0;
2221
2222 /* just flush them all */
2223 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2224 struct intel_iommu *iommu = g_iommus[i];
2225 if (!iommu)
2226 continue;
c42d9f32 2227
a2bb8459 2228 if (deferred_flush[i].next) {
a77b67d4
YS
2229 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2230 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2231 for (j = 0; j < deferred_flush[i].next; j++) {
2232 __free_iova(&deferred_flush[i].domain[j]->iovad,
2233 deferred_flush[i].iova[j]);
2234 }
2235 deferred_flush[i].next = 0;
2236 }
5e0d2a6f 2237 }
2238
5e0d2a6f 2239 list_size = 0;
5e0d2a6f 2240}
2241
2242static void flush_unmaps_timeout(unsigned long data)
2243{
80b20dd8 2244 unsigned long flags;
2245
2246 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2247 flush_unmaps();
80b20dd8 2248 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2249}
2250
2251static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2252{
2253 unsigned long flags;
80b20dd8 2254 int next, iommu_id;
8c11e798 2255 struct intel_iommu *iommu;
5e0d2a6f 2256
2257 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2258 if (list_size == HIGH_WATER_MARK)
2259 flush_unmaps();
2260
8c11e798
WH
2261 iommu = domain_get_iommu(dom);
2262 iommu_id = iommu->seq_id;
c42d9f32 2263
80b20dd8 2264 next = deferred_flush[iommu_id].next;
2265 deferred_flush[iommu_id].domain[next] = dom;
2266 deferred_flush[iommu_id].iova[next] = iova;
2267 deferred_flush[iommu_id].next++;
5e0d2a6f 2268
2269 if (!timer_on) {
2270 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2271 timer_on = 1;
2272 }
2273 list_size++;
2274 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2275}
2276
5b6985ce
FY
2277void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2278 int dir)
ba395927 2279{
ba395927 2280 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2281 struct dmar_domain *domain;
2282 unsigned long start_addr;
ba395927 2283 struct iova *iova;
8c11e798 2284 struct intel_iommu *iommu;
ba395927 2285
358dd8ac 2286 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2287 return;
ba395927
KA
2288 domain = find_domain(pdev);
2289 BUG_ON(!domain);
2290
8c11e798
WH
2291 iommu = domain_get_iommu(domain);
2292
ba395927 2293 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2294 if (!iova)
ba395927 2295 return;
ba395927 2296
5b6985ce 2297 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2298 size = aligned_size((u64)dev_addr, size);
ba395927 2299
f76aec76 2300 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2301 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2302
f76aec76
KA
2303 /* clear the whole page */
2304 dma_pte_clear_range(domain, start_addr, start_addr + size);
2305 /* free page tables */
2306 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2307 if (intel_iommu_strict) {
8c11e798 2308 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2309 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2310 iommu_flush_write_buffer(iommu);
5e0d2a6f 2311 /* free iova */
2312 __free_iova(&domain->iovad, iova);
2313 } else {
2314 add_unmap(domain, iova);
2315 /*
2316 * queue up the release of the unmap to save the 1/6th of the
2317 * cpu used up by the iotlb flush operation...
2318 */
5e0d2a6f 2319 }
ba395927
KA
2320}
2321
5b6985ce
FY
2322void *intel_alloc_coherent(struct device *hwdev, size_t size,
2323 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2324{
2325 void *vaddr;
2326 int order;
2327
5b6985ce 2328 size = PAGE_ALIGN(size);
ba395927
KA
2329 order = get_order(size);
2330 flags &= ~(GFP_DMA | GFP_DMA32);
2331
2332 vaddr = (void *)__get_free_pages(flags, order);
2333 if (!vaddr)
2334 return NULL;
2335 memset(vaddr, 0, size);
2336
bb9e6d65
FT
2337 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2338 DMA_BIDIRECTIONAL,
2339 hwdev->coherent_dma_mask);
ba395927
KA
2340 if (*dma_handle)
2341 return vaddr;
2342 free_pages((unsigned long)vaddr, order);
2343 return NULL;
2344}
2345
5b6985ce
FY
2346void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2347 dma_addr_t dma_handle)
ba395927
KA
2348{
2349 int order;
2350
5b6985ce 2351 size = PAGE_ALIGN(size);
ba395927
KA
2352 order = get_order(size);
2353
2354 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2355 free_pages((unsigned long)vaddr, order);
2356}
2357
12d4d40e 2358#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2359
2360void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2361 int nelems, int dir)
ba395927
KA
2362{
2363 int i;
2364 struct pci_dev *pdev = to_pci_dev(hwdev);
2365 struct dmar_domain *domain;
f76aec76
KA
2366 unsigned long start_addr;
2367 struct iova *iova;
2368 size_t size = 0;
2369 void *addr;
c03ab37c 2370 struct scatterlist *sg;
8c11e798 2371 struct intel_iommu *iommu;
ba395927 2372
358dd8ac 2373 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2374 return;
2375
2376 domain = find_domain(pdev);
8c11e798
WH
2377 BUG_ON(!domain);
2378
2379 iommu = domain_get_iommu(domain);
ba395927 2380
c03ab37c 2381 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2382 if (!iova)
2383 return;
c03ab37c 2384 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2385 addr = SG_ENT_VIRT_ADDRESS(sg);
2386 size += aligned_size((u64)addr, sg->length);
2387 }
2388
5b6985ce 2389 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2390
2391 /* clear the whole page */
2392 dma_pte_clear_range(domain, start_addr, start_addr + size);
2393 /* free page tables */
2394 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2395
8c11e798 2396 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2397 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2398 iommu_flush_write_buffer(iommu);
f76aec76
KA
2399
2400 /* free iova */
2401 __free_iova(&domain->iovad, iova);
ba395927
KA
2402}
2403
ba395927 2404static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2405 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2406{
2407 int i;
c03ab37c 2408 struct scatterlist *sg;
ba395927 2409
c03ab37c 2410 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2411 BUG_ON(!sg_page(sg));
c03ab37c
FT
2412 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2413 sg->dma_length = sg->length;
ba395927
KA
2414 }
2415 return nelems;
2416}
2417
5b6985ce
FY
2418int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2419 int dir)
ba395927
KA
2420{
2421 void *addr;
2422 int i;
ba395927
KA
2423 struct pci_dev *pdev = to_pci_dev(hwdev);
2424 struct dmar_domain *domain;
f76aec76
KA
2425 size_t size = 0;
2426 int prot = 0;
2427 size_t offset = 0;
2428 struct iova *iova = NULL;
2429 int ret;
c03ab37c 2430 struct scatterlist *sg;
f76aec76 2431 unsigned long start_addr;
8c11e798 2432 struct intel_iommu *iommu;
ba395927
KA
2433
2434 BUG_ON(dir == DMA_NONE);
358dd8ac 2435 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2436 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2437
f76aec76
KA
2438 domain = get_valid_domain_for_dev(pdev);
2439 if (!domain)
2440 return 0;
2441
8c11e798
WH
2442 iommu = domain_get_iommu(domain);
2443
c03ab37c 2444 for_each_sg(sglist, sg, nelems, i) {
ba395927 2445 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2446 addr = (void *)virt_to_phys(addr);
2447 size += aligned_size((u64)addr, sg->length);
2448 }
2449
bb9e6d65 2450 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2451 if (!iova) {
c03ab37c 2452 sglist->dma_length = 0;
f76aec76
KA
2453 return 0;
2454 }
2455
2456 /*
2457 * Check if DMAR supports zero-length reads on write only
2458 * mappings..
2459 */
2460 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2461 !cap_zlr(iommu->cap))
f76aec76
KA
2462 prot |= DMA_PTE_READ;
2463 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2464 prot |= DMA_PTE_WRITE;
2465
5b6985ce 2466 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2467 offset = 0;
c03ab37c 2468 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2469 addr = SG_ENT_VIRT_ADDRESS(sg);
2470 addr = (void *)virt_to_phys(addr);
2471 size = aligned_size((u64)addr, sg->length);
2472 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2473 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2474 size, prot);
2475 if (ret) {
2476 /* clear the page */
2477 dma_pte_clear_range(domain, start_addr,
2478 start_addr + offset);
2479 /* free page tables */
2480 dma_pte_free_pagetable(domain, start_addr,
2481 start_addr + offset);
2482 /* free iova */
2483 __free_iova(&domain->iovad, iova);
ba395927
KA
2484 return 0;
2485 }
f76aec76 2486 sg->dma_address = start_addr + offset +
5b6985ce 2487 ((u64)addr & (~PAGE_MASK));
ba395927 2488 sg->dma_length = sg->length;
f76aec76 2489 offset += size;
ba395927
KA
2490 }
2491
ba395927 2492 /* it's a non-present to present mapping */
8c11e798 2493 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2494 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2495 iommu_flush_write_buffer(iommu);
ba395927
KA
2496 return nelems;
2497}
2498
2499static struct dma_mapping_ops intel_dma_ops = {
2500 .alloc_coherent = intel_alloc_coherent,
2501 .free_coherent = intel_free_coherent,
2502 .map_single = intel_map_single,
2503 .unmap_single = intel_unmap_single,
2504 .map_sg = intel_map_sg,
2505 .unmap_sg = intel_unmap_sg,
2506};
2507
2508static inline int iommu_domain_cache_init(void)
2509{
2510 int ret = 0;
2511
2512 iommu_domain_cache = kmem_cache_create("iommu_domain",
2513 sizeof(struct dmar_domain),
2514 0,
2515 SLAB_HWCACHE_ALIGN,
2516
2517 NULL);
2518 if (!iommu_domain_cache) {
2519 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2520 ret = -ENOMEM;
2521 }
2522
2523 return ret;
2524}
2525
2526static inline int iommu_devinfo_cache_init(void)
2527{
2528 int ret = 0;
2529
2530 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2531 sizeof(struct device_domain_info),
2532 0,
2533 SLAB_HWCACHE_ALIGN,
ba395927
KA
2534 NULL);
2535 if (!iommu_devinfo_cache) {
2536 printk(KERN_ERR "Couldn't create devinfo cache\n");
2537 ret = -ENOMEM;
2538 }
2539
2540 return ret;
2541}
2542
2543static inline int iommu_iova_cache_init(void)
2544{
2545 int ret = 0;
2546
2547 iommu_iova_cache = kmem_cache_create("iommu_iova",
2548 sizeof(struct iova),
2549 0,
2550 SLAB_HWCACHE_ALIGN,
ba395927
KA
2551 NULL);
2552 if (!iommu_iova_cache) {
2553 printk(KERN_ERR "Couldn't create iova cache\n");
2554 ret = -ENOMEM;
2555 }
2556
2557 return ret;
2558}
2559
2560static int __init iommu_init_mempool(void)
2561{
2562 int ret;
2563 ret = iommu_iova_cache_init();
2564 if (ret)
2565 return ret;
2566
2567 ret = iommu_domain_cache_init();
2568 if (ret)
2569 goto domain_error;
2570
2571 ret = iommu_devinfo_cache_init();
2572 if (!ret)
2573 return ret;
2574
2575 kmem_cache_destroy(iommu_domain_cache);
2576domain_error:
2577 kmem_cache_destroy(iommu_iova_cache);
2578
2579 return -ENOMEM;
2580}
2581
2582static void __init iommu_exit_mempool(void)
2583{
2584 kmem_cache_destroy(iommu_devinfo_cache);
2585 kmem_cache_destroy(iommu_domain_cache);
2586 kmem_cache_destroy(iommu_iova_cache);
2587
2588}
2589
ba395927
KA
2590static void __init init_no_remapping_devices(void)
2591{
2592 struct dmar_drhd_unit *drhd;
2593
2594 for_each_drhd_unit(drhd) {
2595 if (!drhd->include_all) {
2596 int i;
2597 for (i = 0; i < drhd->devices_cnt; i++)
2598 if (drhd->devices[i] != NULL)
2599 break;
2600 /* ignore DMAR unit if no pci devices exist */
2601 if (i == drhd->devices_cnt)
2602 drhd->ignored = 1;
2603 }
2604 }
2605
2606 if (dmar_map_gfx)
2607 return;
2608
2609 for_each_drhd_unit(drhd) {
2610 int i;
2611 if (drhd->ignored || drhd->include_all)
2612 continue;
2613
2614 for (i = 0; i < drhd->devices_cnt; i++)
2615 if (drhd->devices[i] &&
2616 !IS_GFX_DEVICE(drhd->devices[i]))
2617 break;
2618
2619 if (i < drhd->devices_cnt)
2620 continue;
2621
2622 /* bypass IOMMU if it is just for gfx devices */
2623 drhd->ignored = 1;
2624 for (i = 0; i < drhd->devices_cnt; i++) {
2625 if (!drhd->devices[i])
2626 continue;
358dd8ac 2627 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2628 }
2629 }
2630}
2631
2632int __init intel_iommu_init(void)
2633{
2634 int ret = 0;
2635
ba395927
KA
2636 if (dmar_table_init())
2637 return -ENODEV;
2638
1886e8a9
SS
2639 if (dmar_dev_scope_init())
2640 return -ENODEV;
2641
2ae21010
SS
2642 /*
2643 * Check the need for DMA-remapping initialization now.
2644 * Above initialization will also be used by Interrupt-remapping.
2645 */
2646 if (no_iommu || swiotlb || dmar_disabled)
2647 return -ENODEV;
2648
ba395927
KA
2649 iommu_init_mempool();
2650 dmar_init_reserved_ranges();
2651
2652 init_no_remapping_devices();
2653
2654 ret = init_dmars();
2655 if (ret) {
2656 printk(KERN_ERR "IOMMU: dmar init failed\n");
2657 put_iova_domain(&reserved_iova_list);
2658 iommu_exit_mempool();
2659 return ret;
2660 }
2661 printk(KERN_INFO
2662 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2663
5e0d2a6f 2664 init_timer(&unmap_timer);
ba395927
KA
2665 force_iommu = 1;
2666 dma_ops = &intel_dma_ops;
2667 return 0;
2668}
e820482c 2669
c7151a8d
WH
2670static int vm_domain_add_dev_info(struct dmar_domain *domain,
2671 struct pci_dev *pdev)
2672{
2673 struct device_domain_info *info;
2674 unsigned long flags;
2675
2676 info = alloc_devinfo_mem();
2677 if (!info)
2678 return -ENOMEM;
2679
2680 info->bus = pdev->bus->number;
2681 info->devfn = pdev->devfn;
2682 info->dev = pdev;
2683 info->domain = domain;
2684
2685 spin_lock_irqsave(&device_domain_lock, flags);
2686 list_add(&info->link, &domain->devices);
2687 list_add(&info->global, &device_domain_list);
2688 pdev->dev.archdata.iommu = info;
2689 spin_unlock_irqrestore(&device_domain_lock, flags);
2690
2691 return 0;
2692}
2693
2694static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2695 struct pci_dev *pdev)
2696{
2697 struct device_domain_info *info;
2698 struct intel_iommu *iommu;
2699 unsigned long flags;
2700 int found = 0;
2701 struct list_head *entry, *tmp;
2702
2703 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2704 if (!iommu)
2705 return;
2706
2707 spin_lock_irqsave(&device_domain_lock, flags);
2708 list_for_each_safe(entry, tmp, &domain->devices) {
2709 info = list_entry(entry, struct device_domain_info, link);
2710 if (info->bus == pdev->bus->number &&
2711 info->devfn == pdev->devfn) {
2712 list_del(&info->link);
2713 list_del(&info->global);
2714 if (info->dev)
2715 info->dev->dev.archdata.iommu = NULL;
2716 spin_unlock_irqrestore(&device_domain_lock, flags);
2717
2718 iommu_detach_dev(iommu, info->bus, info->devfn);
2719 free_devinfo_mem(info);
2720
2721 spin_lock_irqsave(&device_domain_lock, flags);
2722
2723 if (found)
2724 break;
2725 else
2726 continue;
2727 }
2728
2729 /* if there is no other devices under the same iommu
2730 * owned by this domain, clear this iommu in iommu_bmp
2731 * update iommu count and coherency
2732 */
2733 if (device_to_iommu(info->bus, info->devfn) == iommu)
2734 found = 1;
2735 }
2736
2737 if (found == 0) {
2738 unsigned long tmp_flags;
2739 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2740 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2741 domain->iommu_count--;
2742 domain_update_iommu_coherency(domain);
2743 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2744 }
2745
2746 spin_unlock_irqrestore(&device_domain_lock, flags);
2747}
2748
2749static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2750{
2751 struct device_domain_info *info;
2752 struct intel_iommu *iommu;
2753 unsigned long flags1, flags2;
2754
2755 spin_lock_irqsave(&device_domain_lock, flags1);
2756 while (!list_empty(&domain->devices)) {
2757 info = list_entry(domain->devices.next,
2758 struct device_domain_info, link);
2759 list_del(&info->link);
2760 list_del(&info->global);
2761 if (info->dev)
2762 info->dev->dev.archdata.iommu = NULL;
2763
2764 spin_unlock_irqrestore(&device_domain_lock, flags1);
2765
2766 iommu = device_to_iommu(info->bus, info->devfn);
2767 iommu_detach_dev(iommu, info->bus, info->devfn);
2768
2769 /* clear this iommu in iommu_bmp, update iommu count
2770 * and coherency
2771 */
2772 spin_lock_irqsave(&domain->iommu_lock, flags2);
2773 if (test_and_clear_bit(iommu->seq_id,
2774 &domain->iommu_bmp)) {
2775 domain->iommu_count--;
2776 domain_update_iommu_coherency(domain);
2777 }
2778 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2779
2780 free_devinfo_mem(info);
2781 spin_lock_irqsave(&device_domain_lock, flags1);
2782 }
2783 spin_unlock_irqrestore(&device_domain_lock, flags1);
2784}
2785
38717946
KA
2786void intel_iommu_domain_exit(struct dmar_domain *domain)
2787{
2788 u64 end;
2789
2790 /* Domain 0 is reserved, so dont process it */
2791 if (!domain)
2792 return;
2793
2794 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2795 end = end & (~VTD_PAGE_MASK);
38717946
KA
2796
2797 /* clear ptes */
2798 dma_pte_clear_range(domain, 0, end);
2799
2800 /* free page tables */
2801 dma_pte_free_pagetable(domain, 0, end);
2802
2803 iommu_free_domain(domain);
2804 free_domain_mem(domain);
2805}
2806EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2807
2808struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2809{
2810 struct dmar_drhd_unit *drhd;
2811 struct dmar_domain *domain;
2812 struct intel_iommu *iommu;
2813
2814 drhd = dmar_find_matched_drhd_unit(pdev);
2815 if (!drhd) {
2816 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2817 return NULL;
2818 }
2819
2820 iommu = drhd->iommu;
2821 if (!iommu) {
2822 printk(KERN_ERR
2823 "intel_iommu_domain_alloc: iommu == NULL\n");
2824 return NULL;
2825 }
2826 domain = iommu_alloc_domain(iommu);
2827 if (!domain) {
2828 printk(KERN_ERR
2829 "intel_iommu_domain_alloc: domain == NULL\n");
2830 return NULL;
2831 }
2832 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2833 printk(KERN_ERR
2834 "intel_iommu_domain_alloc: domain_init() failed\n");
2835 intel_iommu_domain_exit(domain);
2836 return NULL;
2837 }
2838 return domain;
2839}
2840EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2841
2842int intel_iommu_context_mapping(
2843 struct dmar_domain *domain, struct pci_dev *pdev)
2844{
2845 int rc;
2846 rc = domain_context_mapping(domain, pdev);
2847 return rc;
2848}
2849EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2850
2851int intel_iommu_page_mapping(
2852 struct dmar_domain *domain, dma_addr_t iova,
2853 u64 hpa, size_t size, int prot)
2854{
2855 int rc;
2856 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2857 return rc;
2858}
2859EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2860
2861void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2862{
c7151a8d
WH
2863 struct intel_iommu *iommu;
2864
2865 iommu = device_to_iommu(bus, devfn);
2866 iommu_detach_dev(iommu, bus, devfn);
38717946
KA
2867}
2868EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2869
2870struct dmar_domain *
2871intel_iommu_find_domain(struct pci_dev *pdev)
2872{
2873 return find_domain(pdev);
2874}
2875EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2876
2877int intel_iommu_found(void)
2878{
2879 return g_num_of_iommus;
2880}
2881EXPORT_SYMBOL_GPL(intel_iommu_found);
2882
2883u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2884{
2885 struct dma_pte *pte;
2886 u64 pfn;
2887
2888 pfn = 0;
2889 pte = addr_to_dma_pte(domain, iova);
2890
2891 if (pte)
19c239ce 2892 pfn = dma_pte_addr(pte);
38717946 2893
5b6985ce 2894 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2895}
2896EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);