]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/pci/intel-iommu.c
VT-d: adapt domain init and destroy functions for IOMMU API
[mirror_ubuntu-jammy-kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
ba395927 39#include <asm/cacheflush.h>
46a7fa27 40#include <asm/iommu.h>
ba395927
KA
41#include "pci.h"
42
5b6985ce
FY
43#define ROOT_SIZE VTD_PAGE_SIZE
44#define CONTEXT_SIZE VTD_PAGE_SIZE
45
ba395927
KA
46#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48
49#define IOAPIC_RANGE_START (0xfee00000)
50#define IOAPIC_RANGE_END (0xfeefffff)
51#define IOVA_START_ADDR (0x1000)
52
53#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54
ba395927
KA
55#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56
f27be03b
MM
57#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 60
d9630fe9
WH
61/* global iommu list, set NULL for ignored DMAR units */
62static struct intel_iommu **g_iommus;
63
46b08e1a
MM
64/*
65 * 0: Present
66 * 1-11: Reserved
67 * 12-63: Context Ptr (12 - (haw-1))
68 * 64-127: Reserved
69 */
70struct root_entry {
71 u64 val;
72 u64 rsvd1;
73};
74#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
75static inline bool root_present(struct root_entry *root)
76{
77 return (root->val & 1);
78}
79static inline void set_root_present(struct root_entry *root)
80{
81 root->val |= 1;
82}
83static inline void set_root_value(struct root_entry *root, unsigned long value)
84{
85 root->val |= value & VTD_PAGE_MASK;
86}
87
88static inline struct context_entry *
89get_context_addr_from_root(struct root_entry *root)
90{
91 return (struct context_entry *)
92 (root_present(root)?phys_to_virt(
93 root->val & VTD_PAGE_MASK) :
94 NULL);
95}
96
7a8fc25e
MM
97/*
98 * low 64 bits:
99 * 0: present
100 * 1: fault processing disable
101 * 2-3: translation type
102 * 12-63: address space root
103 * high 64 bits:
104 * 0-2: address width
105 * 3-6: aval
106 * 8-23: domain id
107 */
108struct context_entry {
109 u64 lo;
110 u64 hi;
111};
c07e7d21
MM
112
113static inline bool context_present(struct context_entry *context)
114{
115 return (context->lo & 1);
116}
117static inline void context_set_present(struct context_entry *context)
118{
119 context->lo |= 1;
120}
121
122static inline void context_set_fault_enable(struct context_entry *context)
123{
124 context->lo &= (((u64)-1) << 2) | 1;
125}
126
7a8fc25e 127#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
128
129static inline void context_set_translation_type(struct context_entry *context,
130 unsigned long value)
131{
132 context->lo &= (((u64)-1) << 4) | 3;
133 context->lo |= (value & 3) << 2;
134}
135
136static inline void context_set_address_root(struct context_entry *context,
137 unsigned long value)
138{
139 context->lo |= value & VTD_PAGE_MASK;
140}
141
142static inline void context_set_address_width(struct context_entry *context,
143 unsigned long value)
144{
145 context->hi |= value & 7;
146}
147
148static inline void context_set_domain_id(struct context_entry *context,
149 unsigned long value)
150{
151 context->hi |= (value & ((1 << 16) - 1)) << 8;
152}
153
154static inline void context_clear_entry(struct context_entry *context)
155{
156 context->lo = 0;
157 context->hi = 0;
158}
7a8fc25e 159
622ba12a
MM
160/*
161 * 0: readable
162 * 1: writable
163 * 2-6: reserved
164 * 7: super page
165 * 8-11: available
166 * 12-63: Host physcial address
167 */
168struct dma_pte {
169 u64 val;
170};
622ba12a 171
19c239ce
MM
172static inline void dma_clear_pte(struct dma_pte *pte)
173{
174 pte->val = 0;
175}
176
177static inline void dma_set_pte_readable(struct dma_pte *pte)
178{
179 pte->val |= DMA_PTE_READ;
180}
181
182static inline void dma_set_pte_writable(struct dma_pte *pte)
183{
184 pte->val |= DMA_PTE_WRITE;
185}
186
187static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
188{
189 pte->val = (pte->val & ~3) | (prot & 3);
190}
191
192static inline u64 dma_pte_addr(struct dma_pte *pte)
193{
194 return (pte->val & VTD_PAGE_MASK);
195}
196
197static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
198{
199 pte->val |= (addr & VTD_PAGE_MASK);
200}
201
202static inline bool dma_pte_present(struct dma_pte *pte)
203{
204 return (pte->val & 3) != 0;
205}
622ba12a 206
3b5410e7
WH
207/* devices under the same p2p bridge are owned in one domain */
208#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
209
1ce28feb
WH
210/* domain represents a virtual machine, more than one devices
211 * across iommus may be owned in one domain, e.g. kvm guest.
212 */
213#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
214
99126f7c
MM
215struct dmar_domain {
216 int id; /* domain id */
8c11e798 217 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
218
219 struct list_head devices; /* all devices' list */
220 struct iova_domain iovad; /* iova's that belong to this domain */
221
222 struct dma_pte *pgd; /* virtual address */
223 spinlock_t mapping_lock; /* page table lock */
224 int gaw; /* max guest address width */
225
226 /* adjusted guest address width, 0 is level 2 30-bit */
227 int agaw;
228
3b5410e7 229 int flags; /* flags to find out type of domain */
8e604097
WH
230
231 int iommu_coherency;/* indicate coherency of iommu access */
c7151a8d
WH
232 int iommu_count; /* reference count of iommu */
233 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 234 u64 max_addr; /* maximum mapped address */
99126f7c
MM
235};
236
a647dacb
MM
237/* PCI domain-device relationship */
238struct device_domain_info {
239 struct list_head link; /* link to domain siblings */
240 struct list_head global; /* link to global list */
241 u8 bus; /* PCI bus numer */
242 u8 devfn; /* PCI devfn number */
243 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
244 struct dmar_domain *domain; /* pointer to domain */
245};
246
5e0d2a6f 247static void flush_unmaps_timeout(unsigned long data);
248
249DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
250
80b20dd8 251#define HIGH_WATER_MARK 250
252struct deferred_flush_tables {
253 int next;
254 struct iova *iova[HIGH_WATER_MARK];
255 struct dmar_domain *domain[HIGH_WATER_MARK];
256};
257
258static struct deferred_flush_tables *deferred_flush;
259
5e0d2a6f 260/* bitmap for indexing intel_iommus */
5e0d2a6f 261static int g_num_of_iommus;
262
263static DEFINE_SPINLOCK(async_umap_flush_lock);
264static LIST_HEAD(unmaps_to_do);
265
266static int timer_on;
267static long list_size;
5e0d2a6f 268
ba395927
KA
269static void domain_remove_dev_info(struct dmar_domain *domain);
270
2ae21010 271int dmar_disabled;
ba395927 272static int __initdata dmar_map_gfx = 1;
7d3b03ce 273static int dmar_forcedac;
5e0d2a6f 274static int intel_iommu_strict;
ba395927
KA
275
276#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
277static DEFINE_SPINLOCK(device_domain_lock);
278static LIST_HEAD(device_domain_list);
279
280static int __init intel_iommu_setup(char *str)
281{
282 if (!str)
283 return -EINVAL;
284 while (*str) {
285 if (!strncmp(str, "off", 3)) {
286 dmar_disabled = 1;
287 printk(KERN_INFO"Intel-IOMMU: disabled\n");
288 } else if (!strncmp(str, "igfx_off", 8)) {
289 dmar_map_gfx = 0;
290 printk(KERN_INFO
291 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 292 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 293 printk(KERN_INFO
7d3b03ce
KA
294 "Intel-IOMMU: Forcing DAC for PCI devices\n");
295 dmar_forcedac = 1;
5e0d2a6f 296 } else if (!strncmp(str, "strict", 6)) {
297 printk(KERN_INFO
298 "Intel-IOMMU: disable batched IOTLB flush\n");
299 intel_iommu_strict = 1;
ba395927
KA
300 }
301
302 str += strcspn(str, ",");
303 while (*str == ',')
304 str++;
305 }
306 return 0;
307}
308__setup("intel_iommu=", intel_iommu_setup);
309
310static struct kmem_cache *iommu_domain_cache;
311static struct kmem_cache *iommu_devinfo_cache;
312static struct kmem_cache *iommu_iova_cache;
313
eb3fa7cb
KA
314static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
315{
316 unsigned int flags;
317 void *vaddr;
318
319 /* trying to avoid low memory issues */
320 flags = current->flags & PF_MEMALLOC;
321 current->flags |= PF_MEMALLOC;
322 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
323 current->flags &= (~PF_MEMALLOC | flags);
324 return vaddr;
325}
326
327
ba395927
KA
328static inline void *alloc_pgtable_page(void)
329{
eb3fa7cb
KA
330 unsigned int flags;
331 void *vaddr;
332
333 /* trying to avoid low memory issues */
334 flags = current->flags & PF_MEMALLOC;
335 current->flags |= PF_MEMALLOC;
336 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
337 current->flags &= (~PF_MEMALLOC | flags);
338 return vaddr;
ba395927
KA
339}
340
341static inline void free_pgtable_page(void *vaddr)
342{
343 free_page((unsigned long)vaddr);
344}
345
346static inline void *alloc_domain_mem(void)
347{
eb3fa7cb 348 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
349}
350
38717946 351static void free_domain_mem(void *vaddr)
ba395927
KA
352{
353 kmem_cache_free(iommu_domain_cache, vaddr);
354}
355
356static inline void * alloc_devinfo_mem(void)
357{
eb3fa7cb 358 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
359}
360
361static inline void free_devinfo_mem(void *vaddr)
362{
363 kmem_cache_free(iommu_devinfo_cache, vaddr);
364}
365
366struct iova *alloc_iova_mem(void)
367{
eb3fa7cb 368 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
369}
370
371void free_iova_mem(struct iova *iova)
372{
373 kmem_cache_free(iommu_iova_cache, iova);
374}
375
1b573683
WH
376
377static inline int width_to_agaw(int width);
378
379/* calculate agaw for each iommu.
380 * "SAGAW" may be different across iommus, use a default agaw, and
381 * get a supported less agaw for iommus that don't support the default agaw.
382 */
383int iommu_calculate_agaw(struct intel_iommu *iommu)
384{
385 unsigned long sagaw;
386 int agaw = -1;
387
388 sagaw = cap_sagaw(iommu->cap);
389 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
390 agaw >= 0; agaw--) {
391 if (test_bit(agaw, &sagaw))
392 break;
393 }
394
395 return agaw;
396}
397
8c11e798
WH
398/* in native case, each domain is related to only one iommu */
399static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
400{
401 int iommu_id;
402
1ce28feb
WH
403 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
404
8c11e798
WH
405 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
406 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
407 return NULL;
408
409 return g_iommus[iommu_id];
410}
411
8e604097
WH
412/* "Coherency" capability may be different across iommus */
413static void domain_update_iommu_coherency(struct dmar_domain *domain)
414{
415 int i;
416
417 domain->iommu_coherency = 1;
418
419 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
420 for (; i < g_num_of_iommus; ) {
421 if (!ecap_coherent(g_iommus[i]->ecap)) {
422 domain->iommu_coherency = 0;
423 break;
424 }
425 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
426 }
427}
428
c7151a8d
WH
429static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
430{
431 struct dmar_drhd_unit *drhd = NULL;
432 int i;
433
434 for_each_drhd_unit(drhd) {
435 if (drhd->ignored)
436 continue;
437
438 for (i = 0; i < drhd->devices_cnt; i++)
439 if (drhd->devices[i]->bus->number == bus &&
440 drhd->devices[i]->devfn == devfn)
441 return drhd->iommu;
442
443 if (drhd->include_all)
444 return drhd->iommu;
445 }
446
447 return NULL;
448}
449
5331fe6f
WH
450static void domain_flush_cache(struct dmar_domain *domain,
451 void *addr, int size)
452{
453 if (!domain->iommu_coherency)
454 clflush_cache_range(addr, size);
455}
456
ba395927
KA
457/* Gets context entry for a given bus and devfn */
458static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
459 u8 bus, u8 devfn)
460{
461 struct root_entry *root;
462 struct context_entry *context;
463 unsigned long phy_addr;
464 unsigned long flags;
465
466 spin_lock_irqsave(&iommu->lock, flags);
467 root = &iommu->root_entry[bus];
468 context = get_context_addr_from_root(root);
469 if (!context) {
470 context = (struct context_entry *)alloc_pgtable_page();
471 if (!context) {
472 spin_unlock_irqrestore(&iommu->lock, flags);
473 return NULL;
474 }
5b6985ce 475 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
476 phy_addr = virt_to_phys((void *)context);
477 set_root_value(root, phy_addr);
478 set_root_present(root);
479 __iommu_flush_cache(iommu, root, sizeof(*root));
480 }
481 spin_unlock_irqrestore(&iommu->lock, flags);
482 return &context[devfn];
483}
484
485static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
486{
487 struct root_entry *root;
488 struct context_entry *context;
489 int ret;
490 unsigned long flags;
491
492 spin_lock_irqsave(&iommu->lock, flags);
493 root = &iommu->root_entry[bus];
494 context = get_context_addr_from_root(root);
495 if (!context) {
496 ret = 0;
497 goto out;
498 }
c07e7d21 499 ret = context_present(&context[devfn]);
ba395927
KA
500out:
501 spin_unlock_irqrestore(&iommu->lock, flags);
502 return ret;
503}
504
505static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
506{
507 struct root_entry *root;
508 struct context_entry *context;
509 unsigned long flags;
510
511 spin_lock_irqsave(&iommu->lock, flags);
512 root = &iommu->root_entry[bus];
513 context = get_context_addr_from_root(root);
514 if (context) {
c07e7d21 515 context_clear_entry(&context[devfn]);
ba395927
KA
516 __iommu_flush_cache(iommu, &context[devfn], \
517 sizeof(*context));
518 }
519 spin_unlock_irqrestore(&iommu->lock, flags);
520}
521
522static void free_context_table(struct intel_iommu *iommu)
523{
524 struct root_entry *root;
525 int i;
526 unsigned long flags;
527 struct context_entry *context;
528
529 spin_lock_irqsave(&iommu->lock, flags);
530 if (!iommu->root_entry) {
531 goto out;
532 }
533 for (i = 0; i < ROOT_ENTRY_NR; i++) {
534 root = &iommu->root_entry[i];
535 context = get_context_addr_from_root(root);
536 if (context)
537 free_pgtable_page(context);
538 }
539 free_pgtable_page(iommu->root_entry);
540 iommu->root_entry = NULL;
541out:
542 spin_unlock_irqrestore(&iommu->lock, flags);
543}
544
545/* page table handling */
546#define LEVEL_STRIDE (9)
547#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
548
549static inline int agaw_to_level(int agaw)
550{
551 return agaw + 2;
552}
553
554static inline int agaw_to_width(int agaw)
555{
556 return 30 + agaw * LEVEL_STRIDE;
557
558}
559
560static inline int width_to_agaw(int width)
561{
562 return (width - 30) / LEVEL_STRIDE;
563}
564
565static inline unsigned int level_to_offset_bits(int level)
566{
567 return (12 + (level - 1) * LEVEL_STRIDE);
568}
569
570static inline int address_level_offset(u64 addr, int level)
571{
572 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
573}
574
575static inline u64 level_mask(int level)
576{
577 return ((u64)-1 << level_to_offset_bits(level));
578}
579
580static inline u64 level_size(int level)
581{
582 return ((u64)1 << level_to_offset_bits(level));
583}
584
585static inline u64 align_to_level(u64 addr, int level)
586{
587 return ((addr + level_size(level) - 1) & level_mask(level));
588}
589
590static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
591{
592 int addr_width = agaw_to_width(domain->agaw);
593 struct dma_pte *parent, *pte = NULL;
594 int level = agaw_to_level(domain->agaw);
595 int offset;
596 unsigned long flags;
597
598 BUG_ON(!domain->pgd);
599
600 addr &= (((u64)1) << addr_width) - 1;
601 parent = domain->pgd;
602
603 spin_lock_irqsave(&domain->mapping_lock, flags);
604 while (level > 0) {
605 void *tmp_page;
606
607 offset = address_level_offset(addr, level);
608 pte = &parent[offset];
609 if (level == 1)
610 break;
611
19c239ce 612 if (!dma_pte_present(pte)) {
ba395927
KA
613 tmp_page = alloc_pgtable_page();
614
615 if (!tmp_page) {
616 spin_unlock_irqrestore(&domain->mapping_lock,
617 flags);
618 return NULL;
619 }
5331fe6f 620 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
19c239ce 621 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
622 /*
623 * high level table always sets r/w, last level page
624 * table control read/write
625 */
19c239ce
MM
626 dma_set_pte_readable(pte);
627 dma_set_pte_writable(pte);
5331fe6f 628 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 629 }
19c239ce 630 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
631 level--;
632 }
633
634 spin_unlock_irqrestore(&domain->mapping_lock, flags);
635 return pte;
636}
637
638/* return address's pte at specific level */
639static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
640 int level)
641{
642 struct dma_pte *parent, *pte = NULL;
643 int total = agaw_to_level(domain->agaw);
644 int offset;
645
646 parent = domain->pgd;
647 while (level <= total) {
648 offset = address_level_offset(addr, total);
649 pte = &parent[offset];
650 if (level == total)
651 return pte;
652
19c239ce 653 if (!dma_pte_present(pte))
ba395927 654 break;
19c239ce 655 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
656 total--;
657 }
658 return NULL;
659}
660
661/* clear one page's page table */
662static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
663{
664 struct dma_pte *pte = NULL;
665
666 /* get last level pte */
667 pte = dma_addr_level_pte(domain, addr, 1);
668
669 if (pte) {
19c239ce 670 dma_clear_pte(pte);
5331fe6f 671 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
672 }
673}
674
675/* clear last level pte, a tlb flush should be followed */
676static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
677{
678 int addr_width = agaw_to_width(domain->agaw);
679
680 start &= (((u64)1) << addr_width) - 1;
681 end &= (((u64)1) << addr_width) - 1;
682 /* in case it's partial page */
5b6985ce
FY
683 start = PAGE_ALIGN(start);
684 end &= PAGE_MASK;
ba395927
KA
685
686 /* we don't need lock here, nobody else touches the iova range */
687 while (start < end) {
688 dma_pte_clear_one(domain, start);
5b6985ce 689 start += VTD_PAGE_SIZE;
ba395927
KA
690 }
691}
692
693/* free page table pages. last level pte should already be cleared */
694static void dma_pte_free_pagetable(struct dmar_domain *domain,
695 u64 start, u64 end)
696{
697 int addr_width = agaw_to_width(domain->agaw);
698 struct dma_pte *pte;
699 int total = agaw_to_level(domain->agaw);
700 int level;
701 u64 tmp;
702
703 start &= (((u64)1) << addr_width) - 1;
704 end &= (((u64)1) << addr_width) - 1;
705
706 /* we don't need lock here, nobody else touches the iova range */
707 level = 2;
708 while (level <= total) {
709 tmp = align_to_level(start, level);
710 if (tmp >= end || (tmp + level_size(level) > end))
711 return;
712
713 while (tmp < end) {
714 pte = dma_addr_level_pte(domain, tmp, level);
715 if (pte) {
716 free_pgtable_page(
19c239ce
MM
717 phys_to_virt(dma_pte_addr(pte)));
718 dma_clear_pte(pte);
5331fe6f 719 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
720 }
721 tmp += level_size(level);
722 }
723 level++;
724 }
725 /* free pgd */
726 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
727 free_pgtable_page(domain->pgd);
728 domain->pgd = NULL;
729 }
730}
731
732/* iommu handling */
733static int iommu_alloc_root_entry(struct intel_iommu *iommu)
734{
735 struct root_entry *root;
736 unsigned long flags;
737
738 root = (struct root_entry *)alloc_pgtable_page();
739 if (!root)
740 return -ENOMEM;
741
5b6985ce 742 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
743
744 spin_lock_irqsave(&iommu->lock, flags);
745 iommu->root_entry = root;
746 spin_unlock_irqrestore(&iommu->lock, flags);
747
748 return 0;
749}
750
ba395927
KA
751static void iommu_set_root_entry(struct intel_iommu *iommu)
752{
753 void *addr;
754 u32 cmd, sts;
755 unsigned long flag;
756
757 addr = iommu->root_entry;
758
759 spin_lock_irqsave(&iommu->register_lock, flag);
760 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
761
762 cmd = iommu->gcmd | DMA_GCMD_SRTP;
763 writel(cmd, iommu->reg + DMAR_GCMD_REG);
764
765 /* Make sure hardware complete it */
766 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
767 readl, (sts & DMA_GSTS_RTPS), sts);
768
769 spin_unlock_irqrestore(&iommu->register_lock, flag);
770}
771
772static void iommu_flush_write_buffer(struct intel_iommu *iommu)
773{
774 u32 val;
775 unsigned long flag;
776
777 if (!cap_rwbf(iommu->cap))
778 return;
779 val = iommu->gcmd | DMA_GCMD_WBF;
780
781 spin_lock_irqsave(&iommu->register_lock, flag);
782 writel(val, iommu->reg + DMAR_GCMD_REG);
783
784 /* Make sure hardware complete it */
785 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
786 readl, (!(val & DMA_GSTS_WBFS)), val);
787
788 spin_unlock_irqrestore(&iommu->register_lock, flag);
789}
790
791/* return value determine if we need a write buffer flush */
792static int __iommu_flush_context(struct intel_iommu *iommu,
793 u16 did, u16 source_id, u8 function_mask, u64 type,
794 int non_present_entry_flush)
795{
796 u64 val = 0;
797 unsigned long flag;
798
799 /*
800 * In the non-present entry flush case, if hardware doesn't cache
801 * non-present entry we do nothing and if hardware cache non-present
802 * entry, we flush entries of domain 0 (the domain id is used to cache
803 * any non-present entries)
804 */
805 if (non_present_entry_flush) {
806 if (!cap_caching_mode(iommu->cap))
807 return 1;
808 else
809 did = 0;
810 }
811
812 switch (type) {
813 case DMA_CCMD_GLOBAL_INVL:
814 val = DMA_CCMD_GLOBAL_INVL;
815 break;
816 case DMA_CCMD_DOMAIN_INVL:
817 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
818 break;
819 case DMA_CCMD_DEVICE_INVL:
820 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
821 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
822 break;
823 default:
824 BUG();
825 }
826 val |= DMA_CCMD_ICC;
827
828 spin_lock_irqsave(&iommu->register_lock, flag);
829 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
830
831 /* Make sure hardware complete it */
832 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
833 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
834
835 spin_unlock_irqrestore(&iommu->register_lock, flag);
836
4d235ba6 837 /* flush context entry will implicitly flush write buffer */
ba395927
KA
838 return 0;
839}
840
ba395927
KA
841/* return value determine if we need a write buffer flush */
842static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
843 u64 addr, unsigned int size_order, u64 type,
844 int non_present_entry_flush)
845{
846 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
847 u64 val = 0, val_iva = 0;
848 unsigned long flag;
849
850 /*
851 * In the non-present entry flush case, if hardware doesn't cache
852 * non-present entry we do nothing and if hardware cache non-present
853 * entry, we flush entries of domain 0 (the domain id is used to cache
854 * any non-present entries)
855 */
856 if (non_present_entry_flush) {
857 if (!cap_caching_mode(iommu->cap))
858 return 1;
859 else
860 did = 0;
861 }
862
863 switch (type) {
864 case DMA_TLB_GLOBAL_FLUSH:
865 /* global flush doesn't need set IVA_REG */
866 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
867 break;
868 case DMA_TLB_DSI_FLUSH:
869 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
870 break;
871 case DMA_TLB_PSI_FLUSH:
872 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
873 /* Note: always flush non-leaf currently */
874 val_iva = size_order | addr;
875 break;
876 default:
877 BUG();
878 }
879 /* Note: set drain read/write */
880#if 0
881 /*
882 * This is probably to be super secure.. Looks like we can
883 * ignore it without any impact.
884 */
885 if (cap_read_drain(iommu->cap))
886 val |= DMA_TLB_READ_DRAIN;
887#endif
888 if (cap_write_drain(iommu->cap))
889 val |= DMA_TLB_WRITE_DRAIN;
890
891 spin_lock_irqsave(&iommu->register_lock, flag);
892 /* Note: Only uses first TLB reg currently */
893 if (val_iva)
894 dmar_writeq(iommu->reg + tlb_offset, val_iva);
895 dmar_writeq(iommu->reg + tlb_offset + 8, val);
896
897 /* Make sure hardware complete it */
898 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
899 dmar_readq, (!(val & DMA_TLB_IVT)), val);
900
901 spin_unlock_irqrestore(&iommu->register_lock, flag);
902
903 /* check IOTLB invalidation granularity */
904 if (DMA_TLB_IAIG(val) == 0)
905 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
906 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
907 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
908 (unsigned long long)DMA_TLB_IIRG(type),
909 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 910 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
911 return 0;
912}
913
ba395927
KA
914static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
915 u64 addr, unsigned int pages, int non_present_entry_flush)
916{
f76aec76 917 unsigned int mask;
ba395927 918
5b6985ce 919 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
920 BUG_ON(pages == 0);
921
922 /* Fallback to domain selective flush if no PSI support */
923 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
924 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
925 DMA_TLB_DSI_FLUSH,
926 non_present_entry_flush);
ba395927
KA
927
928 /*
929 * PSI requires page size to be 2 ^ x, and the base address is naturally
930 * aligned to the size
931 */
f76aec76 932 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 933 /* Fallback to domain selective flush if size is too big */
f76aec76 934 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
935 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
936 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 937
a77b67d4
YS
938 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
939 DMA_TLB_PSI_FLUSH,
940 non_present_entry_flush);
ba395927
KA
941}
942
f8bab735 943static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
944{
945 u32 pmen;
946 unsigned long flags;
947
948 spin_lock_irqsave(&iommu->register_lock, flags);
949 pmen = readl(iommu->reg + DMAR_PMEN_REG);
950 pmen &= ~DMA_PMEN_EPM;
951 writel(pmen, iommu->reg + DMAR_PMEN_REG);
952
953 /* wait for the protected region status bit to clear */
954 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
955 readl, !(pmen & DMA_PMEN_PRS), pmen);
956
957 spin_unlock_irqrestore(&iommu->register_lock, flags);
958}
959
ba395927
KA
960static int iommu_enable_translation(struct intel_iommu *iommu)
961{
962 u32 sts;
963 unsigned long flags;
964
965 spin_lock_irqsave(&iommu->register_lock, flags);
966 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
967
968 /* Make sure hardware complete it */
969 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
970 readl, (sts & DMA_GSTS_TES), sts);
971
972 iommu->gcmd |= DMA_GCMD_TE;
973 spin_unlock_irqrestore(&iommu->register_lock, flags);
974 return 0;
975}
976
977static int iommu_disable_translation(struct intel_iommu *iommu)
978{
979 u32 sts;
980 unsigned long flag;
981
982 spin_lock_irqsave(&iommu->register_lock, flag);
983 iommu->gcmd &= ~DMA_GCMD_TE;
984 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
985
986 /* Make sure hardware complete it */
987 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
988 readl, (!(sts & DMA_GSTS_TES)), sts);
989
990 spin_unlock_irqrestore(&iommu->register_lock, flag);
991 return 0;
992}
993
3460a6d9
KA
994/* iommu interrupt handling. Most stuff are MSI-like. */
995
d94afc6c 996static const char *fault_reason_strings[] =
3460a6d9
KA
997{
998 "Software",
999 "Present bit in root entry is clear",
1000 "Present bit in context entry is clear",
1001 "Invalid context entry",
1002 "Access beyond MGAW",
1003 "PTE Write access is not set",
1004 "PTE Read access is not set",
1005 "Next page table ptr is invalid",
1006 "Root table address invalid",
1007 "Context table ptr is invalid",
1008 "non-zero reserved fields in RTP",
1009 "non-zero reserved fields in CTP",
1010 "non-zero reserved fields in PTE",
3460a6d9 1011};
f8bab735 1012#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 1013
d94afc6c 1014const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 1015{
d94afc6c 1016 if (fault_reason > MAX_FAULT_REASON_IDX)
1017 return "Unknown";
3460a6d9
KA
1018 else
1019 return fault_reason_strings[fault_reason];
1020}
1021
1022void dmar_msi_unmask(unsigned int irq)
1023{
1024 struct intel_iommu *iommu = get_irq_data(irq);
1025 unsigned long flag;
1026
1027 /* unmask it */
1028 spin_lock_irqsave(&iommu->register_lock, flag);
1029 writel(0, iommu->reg + DMAR_FECTL_REG);
1030 /* Read a reg to force flush the post write */
1031 readl(iommu->reg + DMAR_FECTL_REG);
1032 spin_unlock_irqrestore(&iommu->register_lock, flag);
1033}
1034
1035void dmar_msi_mask(unsigned int irq)
1036{
1037 unsigned long flag;
1038 struct intel_iommu *iommu = get_irq_data(irq);
1039
1040 /* mask it */
1041 spin_lock_irqsave(&iommu->register_lock, flag);
1042 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1043 /* Read a reg to force flush the post write */
1044 readl(iommu->reg + DMAR_FECTL_REG);
1045 spin_unlock_irqrestore(&iommu->register_lock, flag);
1046}
1047
1048void dmar_msi_write(int irq, struct msi_msg *msg)
1049{
1050 struct intel_iommu *iommu = get_irq_data(irq);
1051 unsigned long flag;
1052
1053 spin_lock_irqsave(&iommu->register_lock, flag);
1054 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1055 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1056 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1057 spin_unlock_irqrestore(&iommu->register_lock, flag);
1058}
1059
1060void dmar_msi_read(int irq, struct msi_msg *msg)
1061{
1062 struct intel_iommu *iommu = get_irq_data(irq);
1063 unsigned long flag;
1064
1065 spin_lock_irqsave(&iommu->register_lock, flag);
1066 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1067 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1068 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1069 spin_unlock_irqrestore(&iommu->register_lock, flag);
1070}
1071
1072static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1073 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1074{
d94afc6c 1075 const char *reason;
3460a6d9
KA
1076
1077 reason = dmar_get_fault_reason(fault_reason);
1078
1079 printk(KERN_ERR
1080 "DMAR:[%s] Request device [%02x:%02x.%d] "
1081 "fault addr %llx \n"
1082 "DMAR:[fault reason %02d] %s\n",
1083 (type ? "DMA Read" : "DMA Write"),
1084 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1085 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1086 return 0;
1087}
1088
1089#define PRIMARY_FAULT_REG_LEN (16)
1090static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1091{
1092 struct intel_iommu *iommu = dev_id;
1093 int reg, fault_index;
1094 u32 fault_status;
1095 unsigned long flag;
1096
1097 spin_lock_irqsave(&iommu->register_lock, flag);
1098 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1099
1100 /* TBD: ignore advanced fault log currently */
1101 if (!(fault_status & DMA_FSTS_PPF))
1102 goto clear_overflow;
1103
1104 fault_index = dma_fsts_fault_record_index(fault_status);
1105 reg = cap_fault_reg_offset(iommu->cap);
1106 while (1) {
1107 u8 fault_reason;
1108 u16 source_id;
1109 u64 guest_addr;
1110 int type;
1111 u32 data;
1112
1113 /* highest 32 bits */
1114 data = readl(iommu->reg + reg +
1115 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1116 if (!(data & DMA_FRCD_F))
1117 break;
1118
1119 fault_reason = dma_frcd_fault_reason(data);
1120 type = dma_frcd_type(data);
1121
1122 data = readl(iommu->reg + reg +
1123 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1124 source_id = dma_frcd_source_id(data);
1125
1126 guest_addr = dmar_readq(iommu->reg + reg +
1127 fault_index * PRIMARY_FAULT_REG_LEN);
1128 guest_addr = dma_frcd_page_addr(guest_addr);
1129 /* clear the fault */
1130 writel(DMA_FRCD_F, iommu->reg + reg +
1131 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1132
1133 spin_unlock_irqrestore(&iommu->register_lock, flag);
1134
1135 iommu_page_fault_do_one(iommu, type, fault_reason,
1136 source_id, guest_addr);
1137
1138 fault_index++;
1139 if (fault_index > cap_num_fault_regs(iommu->cap))
1140 fault_index = 0;
1141 spin_lock_irqsave(&iommu->register_lock, flag);
1142 }
1143clear_overflow:
1144 /* clear primary fault overflow */
1145 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1146 if (fault_status & DMA_FSTS_PFO)
1147 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1148
1149 spin_unlock_irqrestore(&iommu->register_lock, flag);
1150 return IRQ_HANDLED;
1151}
1152
1153int dmar_set_interrupt(struct intel_iommu *iommu)
1154{
1155 int irq, ret;
1156
1157 irq = create_irq();
1158 if (!irq) {
1159 printk(KERN_ERR "IOMMU: no free vectors\n");
1160 return -EINVAL;
1161 }
1162
1163 set_irq_data(irq, iommu);
1164 iommu->irq = irq;
1165
1166 ret = arch_setup_dmar_msi(irq);
1167 if (ret) {
1168 set_irq_data(irq, NULL);
1169 iommu->irq = 0;
1170 destroy_irq(irq);
1171 return 0;
1172 }
1173
1174 /* Force fault register is cleared */
1175 iommu_page_fault(irq, iommu);
1176
1177 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1178 if (ret)
1179 printk(KERN_ERR "IOMMU: can't request irq\n");
1180 return ret;
1181}
1182
ba395927
KA
1183static int iommu_init_domains(struct intel_iommu *iommu)
1184{
1185 unsigned long ndomains;
1186 unsigned long nlongs;
1187
1188 ndomains = cap_ndoms(iommu->cap);
1189 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1190 nlongs = BITS_TO_LONGS(ndomains);
1191
1192 /* TBD: there might be 64K domains,
1193 * consider other allocation for future chip
1194 */
1195 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1196 if (!iommu->domain_ids) {
1197 printk(KERN_ERR "Allocating domain id array failed\n");
1198 return -ENOMEM;
1199 }
1200 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1201 GFP_KERNEL);
1202 if (!iommu->domains) {
1203 printk(KERN_ERR "Allocating domain array failed\n");
1204 kfree(iommu->domain_ids);
1205 return -ENOMEM;
1206 }
1207
e61d98d8
SS
1208 spin_lock_init(&iommu->lock);
1209
ba395927
KA
1210 /*
1211 * if Caching mode is set, then invalid translations are tagged
1212 * with domainid 0. Hence we need to pre-allocate it.
1213 */
1214 if (cap_caching_mode(iommu->cap))
1215 set_bit(0, iommu->domain_ids);
1216 return 0;
1217}
ba395927 1218
ba395927
KA
1219
1220static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1221static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1222
1223void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1224{
1225 struct dmar_domain *domain;
1226 int i;
c7151a8d 1227 unsigned long flags;
ba395927 1228
ba395927
KA
1229 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1230 for (; i < cap_ndoms(iommu->cap); ) {
1231 domain = iommu->domains[i];
1232 clear_bit(i, iommu->domain_ids);
c7151a8d
WH
1233
1234 spin_lock_irqsave(&domain->iommu_lock, flags);
5e98c4b1
WH
1235 if (--domain->iommu_count == 0) {
1236 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1237 vm_domain_exit(domain);
1238 else
1239 domain_exit(domain);
1240 }
c7151a8d
WH
1241 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1242
ba395927
KA
1243 i = find_next_bit(iommu->domain_ids,
1244 cap_ndoms(iommu->cap), i+1);
1245 }
1246
1247 if (iommu->gcmd & DMA_GCMD_TE)
1248 iommu_disable_translation(iommu);
1249
1250 if (iommu->irq) {
1251 set_irq_data(iommu->irq, NULL);
1252 /* This will mask the irq */
1253 free_irq(iommu->irq, iommu);
1254 destroy_irq(iommu->irq);
1255 }
1256
1257 kfree(iommu->domains);
1258 kfree(iommu->domain_ids);
1259
d9630fe9
WH
1260 g_iommus[iommu->seq_id] = NULL;
1261
1262 /* if all iommus are freed, free g_iommus */
1263 for (i = 0; i < g_num_of_iommus; i++) {
1264 if (g_iommus[i])
1265 break;
1266 }
1267
1268 if (i == g_num_of_iommus)
1269 kfree(g_iommus);
1270
ba395927
KA
1271 /* free context mapping */
1272 free_context_table(iommu);
ba395927
KA
1273}
1274
1275static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1276{
1277 unsigned long num;
1278 unsigned long ndomains;
1279 struct dmar_domain *domain;
1280 unsigned long flags;
1281
1282 domain = alloc_domain_mem();
1283 if (!domain)
1284 return NULL;
1285
1286 ndomains = cap_ndoms(iommu->cap);
1287
1288 spin_lock_irqsave(&iommu->lock, flags);
1289 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1290 if (num >= ndomains) {
1291 spin_unlock_irqrestore(&iommu->lock, flags);
1292 free_domain_mem(domain);
1293 printk(KERN_ERR "IOMMU: no free domain ids\n");
1294 return NULL;
1295 }
1296
1297 set_bit(num, iommu->domain_ids);
1298 domain->id = num;
8c11e798
WH
1299 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1300 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1301 domain->flags = 0;
ba395927
KA
1302 iommu->domains[num] = domain;
1303 spin_unlock_irqrestore(&iommu->lock, flags);
1304
1305 return domain;
1306}
1307
1308static void iommu_free_domain(struct dmar_domain *domain)
1309{
1310 unsigned long flags;
8c11e798
WH
1311 struct intel_iommu *iommu;
1312
1313 iommu = domain_get_iommu(domain);
ba395927 1314
8c11e798
WH
1315 spin_lock_irqsave(&iommu->lock, flags);
1316 clear_bit(domain->id, iommu->domain_ids);
1317 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1318}
1319
1320static struct iova_domain reserved_iova_list;
8a443df4
MG
1321static struct lock_class_key reserved_alloc_key;
1322static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1323
1324static void dmar_init_reserved_ranges(void)
1325{
1326 struct pci_dev *pdev = NULL;
1327 struct iova *iova;
1328 int i;
1329 u64 addr, size;
1330
f661197e 1331 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1332
8a443df4
MG
1333 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1334 &reserved_alloc_key);
1335 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1336 &reserved_rbtree_key);
1337
ba395927
KA
1338 /* IOAPIC ranges shouldn't be accessed by DMA */
1339 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1340 IOVA_PFN(IOAPIC_RANGE_END));
1341 if (!iova)
1342 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1343
1344 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1345 for_each_pci_dev(pdev) {
1346 struct resource *r;
1347
1348 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1349 r = &pdev->resource[i];
1350 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1351 continue;
1352 addr = r->start;
5b6985ce 1353 addr &= PAGE_MASK;
ba395927 1354 size = r->end - addr;
5b6985ce 1355 size = PAGE_ALIGN(size);
ba395927
KA
1356 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1357 IOVA_PFN(size + addr) - 1);
1358 if (!iova)
1359 printk(KERN_ERR "Reserve iova failed\n");
1360 }
1361 }
1362
1363}
1364
1365static void domain_reserve_special_ranges(struct dmar_domain *domain)
1366{
1367 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1368}
1369
1370static inline int guestwidth_to_adjustwidth(int gaw)
1371{
1372 int agaw;
1373 int r = (gaw - 12) % 9;
1374
1375 if (r == 0)
1376 agaw = gaw;
1377 else
1378 agaw = gaw + 9 - r;
1379 if (agaw > 64)
1380 agaw = 64;
1381 return agaw;
1382}
1383
1384static int domain_init(struct dmar_domain *domain, int guest_width)
1385{
1386 struct intel_iommu *iommu;
1387 int adjust_width, agaw;
1388 unsigned long sagaw;
1389
f661197e 1390 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927 1391 spin_lock_init(&domain->mapping_lock);
c7151a8d 1392 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1393
1394 domain_reserve_special_ranges(domain);
1395
1396 /* calculate AGAW */
8c11e798 1397 iommu = domain_get_iommu(domain);
ba395927
KA
1398 if (guest_width > cap_mgaw(iommu->cap))
1399 guest_width = cap_mgaw(iommu->cap);
1400 domain->gaw = guest_width;
1401 adjust_width = guestwidth_to_adjustwidth(guest_width);
1402 agaw = width_to_agaw(adjust_width);
1403 sagaw = cap_sagaw(iommu->cap);
1404 if (!test_bit(agaw, &sagaw)) {
1405 /* hardware doesn't support it, choose a bigger one */
1406 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1407 agaw = find_next_bit(&sagaw, 5, agaw);
1408 if (agaw >= 5)
1409 return -ENODEV;
1410 }
1411 domain->agaw = agaw;
1412 INIT_LIST_HEAD(&domain->devices);
1413
8e604097
WH
1414 if (ecap_coherent(iommu->ecap))
1415 domain->iommu_coherency = 1;
1416 else
1417 domain->iommu_coherency = 0;
1418
c7151a8d
WH
1419 domain->iommu_count = 1;
1420
ba395927
KA
1421 /* always allocate the top pgd */
1422 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1423 if (!domain->pgd)
1424 return -ENOMEM;
5b6985ce 1425 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1426 return 0;
1427}
1428
1429static void domain_exit(struct dmar_domain *domain)
1430{
1431 u64 end;
1432
1433 /* Domain 0 is reserved, so dont process it */
1434 if (!domain)
1435 return;
1436
1437 domain_remove_dev_info(domain);
1438 /* destroy iovas */
1439 put_iova_domain(&domain->iovad);
1440 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1441 end = end & (~PAGE_MASK);
ba395927
KA
1442
1443 /* clear ptes */
1444 dma_pte_clear_range(domain, 0, end);
1445
1446 /* free page tables */
1447 dma_pte_free_pagetable(domain, 0, end);
1448
1449 iommu_free_domain(domain);
1450 free_domain_mem(domain);
1451}
1452
1453static int domain_context_mapping_one(struct dmar_domain *domain,
1454 u8 bus, u8 devfn)
1455{
1456 struct context_entry *context;
ba395927 1457 unsigned long flags;
5331fe6f 1458 struct intel_iommu *iommu;
ea6606b0
WH
1459 struct dma_pte *pgd;
1460 unsigned long num;
1461 unsigned long ndomains;
1462 int id;
1463 int agaw;
ba395927
KA
1464
1465 pr_debug("Set context mapping for %02x:%02x.%d\n",
1466 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1467 BUG_ON(!domain->pgd);
5331fe6f
WH
1468
1469 iommu = device_to_iommu(bus, devfn);
1470 if (!iommu)
1471 return -ENODEV;
1472
ba395927
KA
1473 context = device_to_context_entry(iommu, bus, devfn);
1474 if (!context)
1475 return -ENOMEM;
1476 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1477 if (context_present(context)) {
ba395927
KA
1478 spin_unlock_irqrestore(&iommu->lock, flags);
1479 return 0;
1480 }
1481
ea6606b0
WH
1482 id = domain->id;
1483 pgd = domain->pgd;
1484
1485 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1486 int found = 0;
1487
1488 /* find an available domain id for this device in iommu */
1489 ndomains = cap_ndoms(iommu->cap);
1490 num = find_first_bit(iommu->domain_ids, ndomains);
1491 for (; num < ndomains; ) {
1492 if (iommu->domains[num] == domain) {
1493 id = num;
1494 found = 1;
1495 break;
1496 }
1497 num = find_next_bit(iommu->domain_ids,
1498 cap_ndoms(iommu->cap), num+1);
1499 }
1500
1501 if (found == 0) {
1502 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1503 if (num >= ndomains) {
1504 spin_unlock_irqrestore(&iommu->lock, flags);
1505 printk(KERN_ERR "IOMMU: no free domain ids\n");
1506 return -EFAULT;
1507 }
1508
1509 set_bit(num, iommu->domain_ids);
1510 iommu->domains[num] = domain;
1511 id = num;
1512 }
1513
1514 /* Skip top levels of page tables for
1515 * iommu which has less agaw than default.
1516 */
1517 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1518 pgd = phys_to_virt(dma_pte_addr(pgd));
1519 if (!dma_pte_present(pgd)) {
1520 spin_unlock_irqrestore(&iommu->lock, flags);
1521 return -ENOMEM;
1522 }
1523 }
1524 }
1525
1526 context_set_domain_id(context, id);
1527 context_set_address_width(context, iommu->agaw);
1528 context_set_address_root(context, virt_to_phys(pgd));
c07e7d21
MM
1529 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1530 context_set_fault_enable(context);
1531 context_set_present(context);
5331fe6f 1532 domain_flush_cache(domain, context, sizeof(*context));
ba395927
KA
1533
1534 /* it's a non-present to present mapping */
a77b67d4
YS
1535 if (iommu->flush.flush_context(iommu, domain->id,
1536 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1537 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1538 iommu_flush_write_buffer(iommu);
1539 else
a77b67d4
YS
1540 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1541
ba395927 1542 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1543
1544 spin_lock_irqsave(&domain->iommu_lock, flags);
1545 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1546 domain->iommu_count++;
1547 domain_update_iommu_coherency(domain);
1548 }
1549 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1550 return 0;
1551}
1552
1553static int
1554domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1555{
1556 int ret;
1557 struct pci_dev *tmp, *parent;
1558
1559 ret = domain_context_mapping_one(domain, pdev->bus->number,
1560 pdev->devfn);
1561 if (ret)
1562 return ret;
1563
1564 /* dependent device mapping */
1565 tmp = pci_find_upstream_pcie_bridge(pdev);
1566 if (!tmp)
1567 return 0;
1568 /* Secondary interface's bus number and devfn 0 */
1569 parent = pdev->bus->self;
1570 while (parent != tmp) {
1571 ret = domain_context_mapping_one(domain, parent->bus->number,
1572 parent->devfn);
1573 if (ret)
1574 return ret;
1575 parent = parent->bus->self;
1576 }
1577 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1578 return domain_context_mapping_one(domain,
1579 tmp->subordinate->number, 0);
1580 else /* this is a legacy PCI bridge */
1581 return domain_context_mapping_one(domain,
1582 tmp->bus->number, tmp->devfn);
1583}
1584
5331fe6f 1585static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1586{
1587 int ret;
1588 struct pci_dev *tmp, *parent;
5331fe6f
WH
1589 struct intel_iommu *iommu;
1590
1591 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1592 if (!iommu)
1593 return -ENODEV;
ba395927 1594
8c11e798 1595 ret = device_context_mapped(iommu,
ba395927
KA
1596 pdev->bus->number, pdev->devfn);
1597 if (!ret)
1598 return ret;
1599 /* dependent device mapping */
1600 tmp = pci_find_upstream_pcie_bridge(pdev);
1601 if (!tmp)
1602 return ret;
1603 /* Secondary interface's bus number and devfn 0 */
1604 parent = pdev->bus->self;
1605 while (parent != tmp) {
8c11e798 1606 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1607 parent->devfn);
1608 if (!ret)
1609 return ret;
1610 parent = parent->bus->self;
1611 }
1612 if (tmp->is_pcie)
8c11e798 1613 return device_context_mapped(iommu,
ba395927
KA
1614 tmp->subordinate->number, 0);
1615 else
8c11e798 1616 return device_context_mapped(iommu,
ba395927
KA
1617 tmp->bus->number, tmp->devfn);
1618}
1619
1620static int
1621domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1622 u64 hpa, size_t size, int prot)
1623{
1624 u64 start_pfn, end_pfn;
1625 struct dma_pte *pte;
1626 int index;
5b6985ce
FY
1627 int addr_width = agaw_to_width(domain->agaw);
1628
1629 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1630
1631 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1632 return -EINVAL;
5b6985ce
FY
1633 iova &= PAGE_MASK;
1634 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1635 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1636 index = 0;
1637 while (start_pfn < end_pfn) {
5b6985ce 1638 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1639 if (!pte)
1640 return -ENOMEM;
1641 /* We don't need lock here, nobody else
1642 * touches the iova range
1643 */
19c239ce
MM
1644 BUG_ON(dma_pte_addr(pte));
1645 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1646 dma_set_pte_prot(pte, prot);
5331fe6f 1647 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
1648 start_pfn++;
1649 index++;
1650 }
1651 return 0;
1652}
1653
c7151a8d 1654static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1655{
c7151a8d
WH
1656 if (!iommu)
1657 return;
8c11e798
WH
1658
1659 clear_context_table(iommu, bus, devfn);
1660 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1661 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1662 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1663 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1664}
1665
1666static void domain_remove_dev_info(struct dmar_domain *domain)
1667{
1668 struct device_domain_info *info;
1669 unsigned long flags;
c7151a8d 1670 struct intel_iommu *iommu;
ba395927
KA
1671
1672 spin_lock_irqsave(&device_domain_lock, flags);
1673 while (!list_empty(&domain->devices)) {
1674 info = list_entry(domain->devices.next,
1675 struct device_domain_info, link);
1676 list_del(&info->link);
1677 list_del(&info->global);
1678 if (info->dev)
358dd8ac 1679 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1680 spin_unlock_irqrestore(&device_domain_lock, flags);
1681
c7151a8d
WH
1682 iommu = device_to_iommu(info->bus, info->devfn);
1683 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1684 free_devinfo_mem(info);
1685
1686 spin_lock_irqsave(&device_domain_lock, flags);
1687 }
1688 spin_unlock_irqrestore(&device_domain_lock, flags);
1689}
1690
1691/*
1692 * find_domain
358dd8ac 1693 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1694 */
38717946 1695static struct dmar_domain *
ba395927
KA
1696find_domain(struct pci_dev *pdev)
1697{
1698 struct device_domain_info *info;
1699
1700 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1701 info = pdev->dev.archdata.iommu;
ba395927
KA
1702 if (info)
1703 return info->domain;
1704 return NULL;
1705}
1706
ba395927
KA
1707/* domain is initialized */
1708static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1709{
1710 struct dmar_domain *domain, *found = NULL;
1711 struct intel_iommu *iommu;
1712 struct dmar_drhd_unit *drhd;
1713 struct device_domain_info *info, *tmp;
1714 struct pci_dev *dev_tmp;
1715 unsigned long flags;
1716 int bus = 0, devfn = 0;
1717
1718 domain = find_domain(pdev);
1719 if (domain)
1720 return domain;
1721
1722 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1723 if (dev_tmp) {
1724 if (dev_tmp->is_pcie) {
1725 bus = dev_tmp->subordinate->number;
1726 devfn = 0;
1727 } else {
1728 bus = dev_tmp->bus->number;
1729 devfn = dev_tmp->devfn;
1730 }
1731 spin_lock_irqsave(&device_domain_lock, flags);
1732 list_for_each_entry(info, &device_domain_list, global) {
1733 if (info->bus == bus && info->devfn == devfn) {
1734 found = info->domain;
1735 break;
1736 }
1737 }
1738 spin_unlock_irqrestore(&device_domain_lock, flags);
1739 /* pcie-pci bridge already has a domain, uses it */
1740 if (found) {
1741 domain = found;
1742 goto found_domain;
1743 }
1744 }
1745
1746 /* Allocate new domain for the device */
1747 drhd = dmar_find_matched_drhd_unit(pdev);
1748 if (!drhd) {
1749 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1750 pci_name(pdev));
1751 return NULL;
1752 }
1753 iommu = drhd->iommu;
1754
1755 domain = iommu_alloc_domain(iommu);
1756 if (!domain)
1757 goto error;
1758
1759 if (domain_init(domain, gaw)) {
1760 domain_exit(domain);
1761 goto error;
1762 }
1763
1764 /* register pcie-to-pci device */
1765 if (dev_tmp) {
1766 info = alloc_devinfo_mem();
1767 if (!info) {
1768 domain_exit(domain);
1769 goto error;
1770 }
1771 info->bus = bus;
1772 info->devfn = devfn;
1773 info->dev = NULL;
1774 info->domain = domain;
1775 /* This domain is shared by devices under p2p bridge */
3b5410e7 1776 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1777
1778 /* pcie-to-pci bridge already has a domain, uses it */
1779 found = NULL;
1780 spin_lock_irqsave(&device_domain_lock, flags);
1781 list_for_each_entry(tmp, &device_domain_list, global) {
1782 if (tmp->bus == bus && tmp->devfn == devfn) {
1783 found = tmp->domain;
1784 break;
1785 }
1786 }
1787 if (found) {
1788 free_devinfo_mem(info);
1789 domain_exit(domain);
1790 domain = found;
1791 } else {
1792 list_add(&info->link, &domain->devices);
1793 list_add(&info->global, &device_domain_list);
1794 }
1795 spin_unlock_irqrestore(&device_domain_lock, flags);
1796 }
1797
1798found_domain:
1799 info = alloc_devinfo_mem();
1800 if (!info)
1801 goto error;
1802 info->bus = pdev->bus->number;
1803 info->devfn = pdev->devfn;
1804 info->dev = pdev;
1805 info->domain = domain;
1806 spin_lock_irqsave(&device_domain_lock, flags);
1807 /* somebody is fast */
1808 found = find_domain(pdev);
1809 if (found != NULL) {
1810 spin_unlock_irqrestore(&device_domain_lock, flags);
1811 if (found != domain) {
1812 domain_exit(domain);
1813 domain = found;
1814 }
1815 free_devinfo_mem(info);
1816 return domain;
1817 }
1818 list_add(&info->link, &domain->devices);
1819 list_add(&info->global, &device_domain_list);
358dd8ac 1820 pdev->dev.archdata.iommu = info;
ba395927
KA
1821 spin_unlock_irqrestore(&device_domain_lock, flags);
1822 return domain;
1823error:
1824 /* recheck it here, maybe others set it */
1825 return find_domain(pdev);
1826}
1827
5b6985ce
FY
1828static int iommu_prepare_identity_map(struct pci_dev *pdev,
1829 unsigned long long start,
1830 unsigned long long end)
ba395927
KA
1831{
1832 struct dmar_domain *domain;
1833 unsigned long size;
5b6985ce 1834 unsigned long long base;
ba395927
KA
1835 int ret;
1836
1837 printk(KERN_INFO
1838 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1839 pci_name(pdev), start, end);
1840 /* page table init */
1841 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1842 if (!domain)
1843 return -ENOMEM;
1844
1845 /* The address might not be aligned */
5b6985ce 1846 base = start & PAGE_MASK;
ba395927 1847 size = end - base;
5b6985ce 1848 size = PAGE_ALIGN(size);
ba395927
KA
1849 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1850 IOVA_PFN(base + size) - 1)) {
1851 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1852 ret = -ENOMEM;
1853 goto error;
1854 }
1855
1856 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1857 size, base, pci_name(pdev));
1858 /*
1859 * RMRR range might have overlap with physical memory range,
1860 * clear it first
1861 */
1862 dma_pte_clear_range(domain, base, base + size);
1863
1864 ret = domain_page_mapping(domain, base, base, size,
1865 DMA_PTE_READ|DMA_PTE_WRITE);
1866 if (ret)
1867 goto error;
1868
1869 /* context entry init */
1870 ret = domain_context_mapping(domain, pdev);
1871 if (!ret)
1872 return 0;
1873error:
1874 domain_exit(domain);
1875 return ret;
1876
1877}
1878
1879static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1880 struct pci_dev *pdev)
1881{
358dd8ac 1882 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1883 return 0;
1884 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1885 rmrr->end_address + 1);
1886}
1887
e820482c 1888#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1889struct iommu_prepare_data {
1890 struct pci_dev *pdev;
1891 int ret;
1892};
1893
1894static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1895 unsigned long end_pfn, void *datax)
1896{
1897 struct iommu_prepare_data *data;
1898
1899 data = (struct iommu_prepare_data *)datax;
1900
1901 data->ret = iommu_prepare_identity_map(data->pdev,
1902 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1903 return data->ret;
1904
1905}
1906
1907static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1908{
1909 int nid;
1910 struct iommu_prepare_data data;
1911
1912 data.pdev = pdev;
1913 data.ret = 0;
1914
1915 for_each_online_node(nid) {
1916 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1917 if (data.ret)
1918 return data.ret;
1919 }
1920 return data.ret;
1921}
1922
e820482c
KA
1923static void __init iommu_prepare_gfx_mapping(void)
1924{
1925 struct pci_dev *pdev = NULL;
e820482c
KA
1926 int ret;
1927
1928 for_each_pci_dev(pdev) {
358dd8ac 1929 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1930 !IS_GFX_DEVICE(pdev))
1931 continue;
1932 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1933 pci_name(pdev));
d52d53b8
YL
1934 ret = iommu_prepare_with_active_regions(pdev);
1935 if (ret)
1936 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1937 }
1938}
2abd7e16
MM
1939#else /* !CONFIG_DMAR_GFX_WA */
1940static inline void iommu_prepare_gfx_mapping(void)
1941{
1942 return;
1943}
e820482c
KA
1944#endif
1945
49a0429e
KA
1946#ifdef CONFIG_DMAR_FLOPPY_WA
1947static inline void iommu_prepare_isa(void)
1948{
1949 struct pci_dev *pdev;
1950 int ret;
1951
1952 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1953 if (!pdev)
1954 return;
1955
1956 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1957 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1958
1959 if (ret)
1960 printk("IOMMU: Failed to create 0-64M identity map, "
1961 "floppy might not work\n");
1962
1963}
1964#else
1965static inline void iommu_prepare_isa(void)
1966{
1967 return;
1968}
1969#endif /* !CONFIG_DMAR_FLPY_WA */
1970
519a0549 1971static int __init init_dmars(void)
ba395927
KA
1972{
1973 struct dmar_drhd_unit *drhd;
1974 struct dmar_rmrr_unit *rmrr;
1975 struct pci_dev *pdev;
1976 struct intel_iommu *iommu;
80b20dd8 1977 int i, ret, unit = 0;
ba395927
KA
1978
1979 /*
1980 * for each drhd
1981 * allocate root
1982 * initialize and program root entry to not present
1983 * endfor
1984 */
1985 for_each_drhd_unit(drhd) {
5e0d2a6f 1986 g_num_of_iommus++;
1987 /*
1988 * lock not needed as this is only incremented in the single
1989 * threaded kernel __init code path all other access are read
1990 * only
1991 */
1992 }
1993
d9630fe9
WH
1994 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1995 GFP_KERNEL);
1996 if (!g_iommus) {
1997 printk(KERN_ERR "Allocating global iommu array failed\n");
1998 ret = -ENOMEM;
1999 goto error;
2000 }
2001
80b20dd8 2002 deferred_flush = kzalloc(g_num_of_iommus *
2003 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2004 if (!deferred_flush) {
d9630fe9 2005 kfree(g_iommus);
5e0d2a6f 2006 ret = -ENOMEM;
2007 goto error;
2008 }
2009
5e0d2a6f 2010 for_each_drhd_unit(drhd) {
2011 if (drhd->ignored)
2012 continue;
1886e8a9
SS
2013
2014 iommu = drhd->iommu;
d9630fe9 2015 g_iommus[iommu->seq_id] = iommu;
ba395927 2016
e61d98d8
SS
2017 ret = iommu_init_domains(iommu);
2018 if (ret)
2019 goto error;
2020
ba395927
KA
2021 /*
2022 * TBD:
2023 * we could share the same root & context tables
2024 * amoung all IOMMU's. Need to Split it later.
2025 */
2026 ret = iommu_alloc_root_entry(iommu);
2027 if (ret) {
2028 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2029 goto error;
2030 }
2031 }
2032
a77b67d4
YS
2033 for_each_drhd_unit(drhd) {
2034 if (drhd->ignored)
2035 continue;
2036
2037 iommu = drhd->iommu;
2038 if (dmar_enable_qi(iommu)) {
2039 /*
2040 * Queued Invalidate not enabled, use Register Based
2041 * Invalidate
2042 */
2043 iommu->flush.flush_context = __iommu_flush_context;
2044 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2045 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
2046 "invalidation\n",
2047 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2048 } else {
2049 iommu->flush.flush_context = qi_flush_context;
2050 iommu->flush.flush_iotlb = qi_flush_iotlb;
2051 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2052 "invalidation\n",
2053 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2054 }
2055 }
2056
ba395927
KA
2057 /*
2058 * For each rmrr
2059 * for each dev attached to rmrr
2060 * do
2061 * locate drhd for dev, alloc domain for dev
2062 * allocate free domain
2063 * allocate page table entries for rmrr
2064 * if context not allocated for bus
2065 * allocate and init context
2066 * set present in root table for this bus
2067 * init context with domain, translation etc
2068 * endfor
2069 * endfor
2070 */
2071 for_each_rmrr_units(rmrr) {
ba395927
KA
2072 for (i = 0; i < rmrr->devices_cnt; i++) {
2073 pdev = rmrr->devices[i];
2074 /* some BIOS lists non-exist devices in DMAR table */
2075 if (!pdev)
2076 continue;
2077 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2078 if (ret)
2079 printk(KERN_ERR
2080 "IOMMU: mapping reserved region failed\n");
2081 }
2082 }
2083
e820482c
KA
2084 iommu_prepare_gfx_mapping();
2085
49a0429e
KA
2086 iommu_prepare_isa();
2087
ba395927
KA
2088 /*
2089 * for each drhd
2090 * enable fault log
2091 * global invalidate context cache
2092 * global invalidate iotlb
2093 * enable translation
2094 */
2095 for_each_drhd_unit(drhd) {
2096 if (drhd->ignored)
2097 continue;
2098 iommu = drhd->iommu;
2099 sprintf (iommu->name, "dmar%d", unit++);
2100
2101 iommu_flush_write_buffer(iommu);
2102
3460a6d9
KA
2103 ret = dmar_set_interrupt(iommu);
2104 if (ret)
2105 goto error;
2106
ba395927
KA
2107 iommu_set_root_entry(iommu);
2108
a77b67d4
YS
2109 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2110 0);
2111 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2112 0);
f8bab735 2113 iommu_disable_protect_mem_regions(iommu);
2114
ba395927
KA
2115 ret = iommu_enable_translation(iommu);
2116 if (ret)
2117 goto error;
2118 }
2119
2120 return 0;
2121error:
2122 for_each_drhd_unit(drhd) {
2123 if (drhd->ignored)
2124 continue;
2125 iommu = drhd->iommu;
2126 free_iommu(iommu);
2127 }
d9630fe9 2128 kfree(g_iommus);
ba395927
KA
2129 return ret;
2130}
2131
2132static inline u64 aligned_size(u64 host_addr, size_t size)
2133{
2134 u64 addr;
5b6985ce
FY
2135 addr = (host_addr & (~PAGE_MASK)) + size;
2136 return PAGE_ALIGN(addr);
ba395927
KA
2137}
2138
2139struct iova *
f76aec76 2140iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2141{
ba395927
KA
2142 struct iova *piova;
2143
2144 /* Make sure it's in range */
ba395927 2145 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2146 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2147 return NULL;
2148
2149 piova = alloc_iova(&domain->iovad,
5b6985ce 2150 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2151 return piova;
2152}
2153
f76aec76
KA
2154static struct iova *
2155__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2156 size_t size, u64 dma_mask)
ba395927 2157{
ba395927 2158 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2159 struct iova *iova = NULL;
ba395927 2160
bb9e6d65
FT
2161 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2162 iova = iommu_alloc_iova(domain, size, dma_mask);
2163 else {
ba395927
KA
2164 /*
2165 * First try to allocate an io virtual address in
2166 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2167 * from higher range
ba395927 2168 */
f76aec76 2169 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2170 if (!iova)
bb9e6d65 2171 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2172 }
2173
2174 if (!iova) {
2175 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2176 return NULL;
2177 }
2178
2179 return iova;
2180}
2181
2182static struct dmar_domain *
2183get_valid_domain_for_dev(struct pci_dev *pdev)
2184{
2185 struct dmar_domain *domain;
2186 int ret;
2187
2188 domain = get_domain_for_dev(pdev,
2189 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2190 if (!domain) {
2191 printk(KERN_ERR
2192 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2193 return NULL;
ba395927
KA
2194 }
2195
2196 /* make sure context mapping is ok */
5331fe6f 2197 if (unlikely(!domain_context_mapped(pdev))) {
ba395927 2198 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2199 if (ret) {
2200 printk(KERN_ERR
2201 "Domain context map for %s failed",
2202 pci_name(pdev));
4fe05bbc 2203 return NULL;
f76aec76 2204 }
ba395927
KA
2205 }
2206
f76aec76
KA
2207 return domain;
2208}
2209
bb9e6d65
FT
2210static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2211 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2212{
2213 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2214 struct dmar_domain *domain;
5b6985ce 2215 phys_addr_t start_paddr;
f76aec76
KA
2216 struct iova *iova;
2217 int prot = 0;
6865f0d1 2218 int ret;
8c11e798 2219 struct intel_iommu *iommu;
f76aec76
KA
2220
2221 BUG_ON(dir == DMA_NONE);
358dd8ac 2222 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2223 return paddr;
f76aec76
KA
2224
2225 domain = get_valid_domain_for_dev(pdev);
2226 if (!domain)
2227 return 0;
2228
8c11e798 2229 iommu = domain_get_iommu(domain);
6865f0d1 2230 size = aligned_size((u64)paddr, size);
f76aec76 2231
bb9e6d65 2232 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2233 if (!iova)
2234 goto error;
2235
5b6985ce 2236 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2237
ba395927
KA
2238 /*
2239 * Check if DMAR supports zero-length reads on write only
2240 * mappings..
2241 */
2242 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2243 !cap_zlr(iommu->cap))
ba395927
KA
2244 prot |= DMA_PTE_READ;
2245 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2246 prot |= DMA_PTE_WRITE;
2247 /*
6865f0d1 2248 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2249 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2250 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2251 * is not a big problem
2252 */
6865f0d1 2253 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2254 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2255 if (ret)
2256 goto error;
2257
f76aec76 2258 /* it's a non-present to present mapping */
8c11e798 2259 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2260 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2261 if (ret)
8c11e798 2262 iommu_flush_write_buffer(iommu);
f76aec76 2263
5b6985ce 2264 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2265
ba395927 2266error:
f76aec76
KA
2267 if (iova)
2268 __free_iova(&domain->iovad, iova);
ba395927 2269 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2270 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2271 return 0;
2272}
2273
bb9e6d65
FT
2274dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2275 size_t size, int dir)
2276{
2277 return __intel_map_single(hwdev, paddr, size, dir,
2278 to_pci_dev(hwdev)->dma_mask);
2279}
2280
5e0d2a6f 2281static void flush_unmaps(void)
2282{
80b20dd8 2283 int i, j;
5e0d2a6f 2284
5e0d2a6f 2285 timer_on = 0;
2286
2287 /* just flush them all */
2288 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2289 struct intel_iommu *iommu = g_iommus[i];
2290 if (!iommu)
2291 continue;
c42d9f32 2292
a2bb8459 2293 if (deferred_flush[i].next) {
a77b67d4
YS
2294 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2295 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2296 for (j = 0; j < deferred_flush[i].next; j++) {
2297 __free_iova(&deferred_flush[i].domain[j]->iovad,
2298 deferred_flush[i].iova[j]);
2299 }
2300 deferred_flush[i].next = 0;
2301 }
5e0d2a6f 2302 }
2303
5e0d2a6f 2304 list_size = 0;
5e0d2a6f 2305}
2306
2307static void flush_unmaps_timeout(unsigned long data)
2308{
80b20dd8 2309 unsigned long flags;
2310
2311 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2312 flush_unmaps();
80b20dd8 2313 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2314}
2315
2316static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2317{
2318 unsigned long flags;
80b20dd8 2319 int next, iommu_id;
8c11e798 2320 struct intel_iommu *iommu;
5e0d2a6f 2321
2322 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2323 if (list_size == HIGH_WATER_MARK)
2324 flush_unmaps();
2325
8c11e798
WH
2326 iommu = domain_get_iommu(dom);
2327 iommu_id = iommu->seq_id;
c42d9f32 2328
80b20dd8 2329 next = deferred_flush[iommu_id].next;
2330 deferred_flush[iommu_id].domain[next] = dom;
2331 deferred_flush[iommu_id].iova[next] = iova;
2332 deferred_flush[iommu_id].next++;
5e0d2a6f 2333
2334 if (!timer_on) {
2335 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2336 timer_on = 1;
2337 }
2338 list_size++;
2339 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2340}
2341
5b6985ce
FY
2342void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2343 int dir)
ba395927 2344{
ba395927 2345 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2346 struct dmar_domain *domain;
2347 unsigned long start_addr;
ba395927 2348 struct iova *iova;
8c11e798 2349 struct intel_iommu *iommu;
ba395927 2350
358dd8ac 2351 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2352 return;
ba395927
KA
2353 domain = find_domain(pdev);
2354 BUG_ON(!domain);
2355
8c11e798
WH
2356 iommu = domain_get_iommu(domain);
2357
ba395927 2358 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2359 if (!iova)
ba395927 2360 return;
ba395927 2361
5b6985ce 2362 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2363 size = aligned_size((u64)dev_addr, size);
ba395927 2364
f76aec76 2365 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2366 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2367
f76aec76
KA
2368 /* clear the whole page */
2369 dma_pte_clear_range(domain, start_addr, start_addr + size);
2370 /* free page tables */
2371 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2372 if (intel_iommu_strict) {
8c11e798 2373 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2374 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2375 iommu_flush_write_buffer(iommu);
5e0d2a6f 2376 /* free iova */
2377 __free_iova(&domain->iovad, iova);
2378 } else {
2379 add_unmap(domain, iova);
2380 /*
2381 * queue up the release of the unmap to save the 1/6th of the
2382 * cpu used up by the iotlb flush operation...
2383 */
5e0d2a6f 2384 }
ba395927
KA
2385}
2386
5b6985ce
FY
2387void *intel_alloc_coherent(struct device *hwdev, size_t size,
2388 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2389{
2390 void *vaddr;
2391 int order;
2392
5b6985ce 2393 size = PAGE_ALIGN(size);
ba395927
KA
2394 order = get_order(size);
2395 flags &= ~(GFP_DMA | GFP_DMA32);
2396
2397 vaddr = (void *)__get_free_pages(flags, order);
2398 if (!vaddr)
2399 return NULL;
2400 memset(vaddr, 0, size);
2401
bb9e6d65
FT
2402 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2403 DMA_BIDIRECTIONAL,
2404 hwdev->coherent_dma_mask);
ba395927
KA
2405 if (*dma_handle)
2406 return vaddr;
2407 free_pages((unsigned long)vaddr, order);
2408 return NULL;
2409}
2410
5b6985ce
FY
2411void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2412 dma_addr_t dma_handle)
ba395927
KA
2413{
2414 int order;
2415
5b6985ce 2416 size = PAGE_ALIGN(size);
ba395927
KA
2417 order = get_order(size);
2418
2419 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2420 free_pages((unsigned long)vaddr, order);
2421}
2422
12d4d40e 2423#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2424
2425void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2426 int nelems, int dir)
ba395927
KA
2427{
2428 int i;
2429 struct pci_dev *pdev = to_pci_dev(hwdev);
2430 struct dmar_domain *domain;
f76aec76
KA
2431 unsigned long start_addr;
2432 struct iova *iova;
2433 size_t size = 0;
2434 void *addr;
c03ab37c 2435 struct scatterlist *sg;
8c11e798 2436 struct intel_iommu *iommu;
ba395927 2437
358dd8ac 2438 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2439 return;
2440
2441 domain = find_domain(pdev);
8c11e798
WH
2442 BUG_ON(!domain);
2443
2444 iommu = domain_get_iommu(domain);
ba395927 2445
c03ab37c 2446 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2447 if (!iova)
2448 return;
c03ab37c 2449 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2450 addr = SG_ENT_VIRT_ADDRESS(sg);
2451 size += aligned_size((u64)addr, sg->length);
2452 }
2453
5b6985ce 2454 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2455
2456 /* clear the whole page */
2457 dma_pte_clear_range(domain, start_addr, start_addr + size);
2458 /* free page tables */
2459 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2460
8c11e798 2461 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2462 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2463 iommu_flush_write_buffer(iommu);
f76aec76
KA
2464
2465 /* free iova */
2466 __free_iova(&domain->iovad, iova);
ba395927
KA
2467}
2468
ba395927 2469static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2470 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2471{
2472 int i;
c03ab37c 2473 struct scatterlist *sg;
ba395927 2474
c03ab37c 2475 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2476 BUG_ON(!sg_page(sg));
c03ab37c
FT
2477 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2478 sg->dma_length = sg->length;
ba395927
KA
2479 }
2480 return nelems;
2481}
2482
5b6985ce
FY
2483int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2484 int dir)
ba395927
KA
2485{
2486 void *addr;
2487 int i;
ba395927
KA
2488 struct pci_dev *pdev = to_pci_dev(hwdev);
2489 struct dmar_domain *domain;
f76aec76
KA
2490 size_t size = 0;
2491 int prot = 0;
2492 size_t offset = 0;
2493 struct iova *iova = NULL;
2494 int ret;
c03ab37c 2495 struct scatterlist *sg;
f76aec76 2496 unsigned long start_addr;
8c11e798 2497 struct intel_iommu *iommu;
ba395927
KA
2498
2499 BUG_ON(dir == DMA_NONE);
358dd8ac 2500 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2501 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2502
f76aec76
KA
2503 domain = get_valid_domain_for_dev(pdev);
2504 if (!domain)
2505 return 0;
2506
8c11e798
WH
2507 iommu = domain_get_iommu(domain);
2508
c03ab37c 2509 for_each_sg(sglist, sg, nelems, i) {
ba395927 2510 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2511 addr = (void *)virt_to_phys(addr);
2512 size += aligned_size((u64)addr, sg->length);
2513 }
2514
bb9e6d65 2515 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2516 if (!iova) {
c03ab37c 2517 sglist->dma_length = 0;
f76aec76
KA
2518 return 0;
2519 }
2520
2521 /*
2522 * Check if DMAR supports zero-length reads on write only
2523 * mappings..
2524 */
2525 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2526 !cap_zlr(iommu->cap))
f76aec76
KA
2527 prot |= DMA_PTE_READ;
2528 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2529 prot |= DMA_PTE_WRITE;
2530
5b6985ce 2531 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2532 offset = 0;
c03ab37c 2533 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2534 addr = SG_ENT_VIRT_ADDRESS(sg);
2535 addr = (void *)virt_to_phys(addr);
2536 size = aligned_size((u64)addr, sg->length);
2537 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2538 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2539 size, prot);
2540 if (ret) {
2541 /* clear the page */
2542 dma_pte_clear_range(domain, start_addr,
2543 start_addr + offset);
2544 /* free page tables */
2545 dma_pte_free_pagetable(domain, start_addr,
2546 start_addr + offset);
2547 /* free iova */
2548 __free_iova(&domain->iovad, iova);
ba395927
KA
2549 return 0;
2550 }
f76aec76 2551 sg->dma_address = start_addr + offset +
5b6985ce 2552 ((u64)addr & (~PAGE_MASK));
ba395927 2553 sg->dma_length = sg->length;
f76aec76 2554 offset += size;
ba395927
KA
2555 }
2556
ba395927 2557 /* it's a non-present to present mapping */
8c11e798 2558 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2559 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2560 iommu_flush_write_buffer(iommu);
ba395927
KA
2561 return nelems;
2562}
2563
2564static struct dma_mapping_ops intel_dma_ops = {
2565 .alloc_coherent = intel_alloc_coherent,
2566 .free_coherent = intel_free_coherent,
2567 .map_single = intel_map_single,
2568 .unmap_single = intel_unmap_single,
2569 .map_sg = intel_map_sg,
2570 .unmap_sg = intel_unmap_sg,
2571};
2572
2573static inline int iommu_domain_cache_init(void)
2574{
2575 int ret = 0;
2576
2577 iommu_domain_cache = kmem_cache_create("iommu_domain",
2578 sizeof(struct dmar_domain),
2579 0,
2580 SLAB_HWCACHE_ALIGN,
2581
2582 NULL);
2583 if (!iommu_domain_cache) {
2584 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2585 ret = -ENOMEM;
2586 }
2587
2588 return ret;
2589}
2590
2591static inline int iommu_devinfo_cache_init(void)
2592{
2593 int ret = 0;
2594
2595 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2596 sizeof(struct device_domain_info),
2597 0,
2598 SLAB_HWCACHE_ALIGN,
ba395927
KA
2599 NULL);
2600 if (!iommu_devinfo_cache) {
2601 printk(KERN_ERR "Couldn't create devinfo cache\n");
2602 ret = -ENOMEM;
2603 }
2604
2605 return ret;
2606}
2607
2608static inline int iommu_iova_cache_init(void)
2609{
2610 int ret = 0;
2611
2612 iommu_iova_cache = kmem_cache_create("iommu_iova",
2613 sizeof(struct iova),
2614 0,
2615 SLAB_HWCACHE_ALIGN,
ba395927
KA
2616 NULL);
2617 if (!iommu_iova_cache) {
2618 printk(KERN_ERR "Couldn't create iova cache\n");
2619 ret = -ENOMEM;
2620 }
2621
2622 return ret;
2623}
2624
2625static int __init iommu_init_mempool(void)
2626{
2627 int ret;
2628 ret = iommu_iova_cache_init();
2629 if (ret)
2630 return ret;
2631
2632 ret = iommu_domain_cache_init();
2633 if (ret)
2634 goto domain_error;
2635
2636 ret = iommu_devinfo_cache_init();
2637 if (!ret)
2638 return ret;
2639
2640 kmem_cache_destroy(iommu_domain_cache);
2641domain_error:
2642 kmem_cache_destroy(iommu_iova_cache);
2643
2644 return -ENOMEM;
2645}
2646
2647static void __init iommu_exit_mempool(void)
2648{
2649 kmem_cache_destroy(iommu_devinfo_cache);
2650 kmem_cache_destroy(iommu_domain_cache);
2651 kmem_cache_destroy(iommu_iova_cache);
2652
2653}
2654
ba395927
KA
2655static void __init init_no_remapping_devices(void)
2656{
2657 struct dmar_drhd_unit *drhd;
2658
2659 for_each_drhd_unit(drhd) {
2660 if (!drhd->include_all) {
2661 int i;
2662 for (i = 0; i < drhd->devices_cnt; i++)
2663 if (drhd->devices[i] != NULL)
2664 break;
2665 /* ignore DMAR unit if no pci devices exist */
2666 if (i == drhd->devices_cnt)
2667 drhd->ignored = 1;
2668 }
2669 }
2670
2671 if (dmar_map_gfx)
2672 return;
2673
2674 for_each_drhd_unit(drhd) {
2675 int i;
2676 if (drhd->ignored || drhd->include_all)
2677 continue;
2678
2679 for (i = 0; i < drhd->devices_cnt; i++)
2680 if (drhd->devices[i] &&
2681 !IS_GFX_DEVICE(drhd->devices[i]))
2682 break;
2683
2684 if (i < drhd->devices_cnt)
2685 continue;
2686
2687 /* bypass IOMMU if it is just for gfx devices */
2688 drhd->ignored = 1;
2689 for (i = 0; i < drhd->devices_cnt; i++) {
2690 if (!drhd->devices[i])
2691 continue;
358dd8ac 2692 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2693 }
2694 }
2695}
2696
2697int __init intel_iommu_init(void)
2698{
2699 int ret = 0;
2700
ba395927
KA
2701 if (dmar_table_init())
2702 return -ENODEV;
2703
1886e8a9
SS
2704 if (dmar_dev_scope_init())
2705 return -ENODEV;
2706
2ae21010
SS
2707 /*
2708 * Check the need for DMA-remapping initialization now.
2709 * Above initialization will also be used by Interrupt-remapping.
2710 */
2711 if (no_iommu || swiotlb || dmar_disabled)
2712 return -ENODEV;
2713
ba395927
KA
2714 iommu_init_mempool();
2715 dmar_init_reserved_ranges();
2716
2717 init_no_remapping_devices();
2718
2719 ret = init_dmars();
2720 if (ret) {
2721 printk(KERN_ERR "IOMMU: dmar init failed\n");
2722 put_iova_domain(&reserved_iova_list);
2723 iommu_exit_mempool();
2724 return ret;
2725 }
2726 printk(KERN_INFO
2727 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2728
5e0d2a6f 2729 init_timer(&unmap_timer);
ba395927
KA
2730 force_iommu = 1;
2731 dma_ops = &intel_dma_ops;
2732 return 0;
2733}
e820482c 2734
c7151a8d
WH
2735static int vm_domain_add_dev_info(struct dmar_domain *domain,
2736 struct pci_dev *pdev)
2737{
2738 struct device_domain_info *info;
2739 unsigned long flags;
2740
2741 info = alloc_devinfo_mem();
2742 if (!info)
2743 return -ENOMEM;
2744
2745 info->bus = pdev->bus->number;
2746 info->devfn = pdev->devfn;
2747 info->dev = pdev;
2748 info->domain = domain;
2749
2750 spin_lock_irqsave(&device_domain_lock, flags);
2751 list_add(&info->link, &domain->devices);
2752 list_add(&info->global, &device_domain_list);
2753 pdev->dev.archdata.iommu = info;
2754 spin_unlock_irqrestore(&device_domain_lock, flags);
2755
2756 return 0;
2757}
2758
2759static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2760 struct pci_dev *pdev)
2761{
2762 struct device_domain_info *info;
2763 struct intel_iommu *iommu;
2764 unsigned long flags;
2765 int found = 0;
2766 struct list_head *entry, *tmp;
2767
2768 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2769 if (!iommu)
2770 return;
2771
2772 spin_lock_irqsave(&device_domain_lock, flags);
2773 list_for_each_safe(entry, tmp, &domain->devices) {
2774 info = list_entry(entry, struct device_domain_info, link);
2775 if (info->bus == pdev->bus->number &&
2776 info->devfn == pdev->devfn) {
2777 list_del(&info->link);
2778 list_del(&info->global);
2779 if (info->dev)
2780 info->dev->dev.archdata.iommu = NULL;
2781 spin_unlock_irqrestore(&device_domain_lock, flags);
2782
2783 iommu_detach_dev(iommu, info->bus, info->devfn);
2784 free_devinfo_mem(info);
2785
2786 spin_lock_irqsave(&device_domain_lock, flags);
2787
2788 if (found)
2789 break;
2790 else
2791 continue;
2792 }
2793
2794 /* if there is no other devices under the same iommu
2795 * owned by this domain, clear this iommu in iommu_bmp
2796 * update iommu count and coherency
2797 */
2798 if (device_to_iommu(info->bus, info->devfn) == iommu)
2799 found = 1;
2800 }
2801
2802 if (found == 0) {
2803 unsigned long tmp_flags;
2804 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2805 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2806 domain->iommu_count--;
2807 domain_update_iommu_coherency(domain);
2808 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2809 }
2810
2811 spin_unlock_irqrestore(&device_domain_lock, flags);
2812}
2813
2814static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2815{
2816 struct device_domain_info *info;
2817 struct intel_iommu *iommu;
2818 unsigned long flags1, flags2;
2819
2820 spin_lock_irqsave(&device_domain_lock, flags1);
2821 while (!list_empty(&domain->devices)) {
2822 info = list_entry(domain->devices.next,
2823 struct device_domain_info, link);
2824 list_del(&info->link);
2825 list_del(&info->global);
2826 if (info->dev)
2827 info->dev->dev.archdata.iommu = NULL;
2828
2829 spin_unlock_irqrestore(&device_domain_lock, flags1);
2830
2831 iommu = device_to_iommu(info->bus, info->devfn);
2832 iommu_detach_dev(iommu, info->bus, info->devfn);
2833
2834 /* clear this iommu in iommu_bmp, update iommu count
2835 * and coherency
2836 */
2837 spin_lock_irqsave(&domain->iommu_lock, flags2);
2838 if (test_and_clear_bit(iommu->seq_id,
2839 &domain->iommu_bmp)) {
2840 domain->iommu_count--;
2841 domain_update_iommu_coherency(domain);
2842 }
2843 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2844
2845 free_devinfo_mem(info);
2846 spin_lock_irqsave(&device_domain_lock, flags1);
2847 }
2848 spin_unlock_irqrestore(&device_domain_lock, flags1);
2849}
2850
5e98c4b1
WH
2851/* domain id for virtual machine, it won't be set in context */
2852static unsigned long vm_domid;
2853
fe40f1e0
WH
2854static int vm_domain_min_agaw(struct dmar_domain *domain)
2855{
2856 int i;
2857 int min_agaw = domain->agaw;
2858
2859 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2860 for (; i < g_num_of_iommus; ) {
2861 if (min_agaw > g_iommus[i]->agaw)
2862 min_agaw = g_iommus[i]->agaw;
2863
2864 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2865 }
2866
2867 return min_agaw;
2868}
2869
5e98c4b1
WH
2870static struct dmar_domain *iommu_alloc_vm_domain(void)
2871{
2872 struct dmar_domain *domain;
2873
2874 domain = alloc_domain_mem();
2875 if (!domain)
2876 return NULL;
2877
2878 domain->id = vm_domid++;
2879 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2880 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2881
2882 return domain;
2883}
2884
2885static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2886{
2887 int adjust_width;
2888
2889 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2890 spin_lock_init(&domain->mapping_lock);
2891 spin_lock_init(&domain->iommu_lock);
2892
2893 domain_reserve_special_ranges(domain);
2894
2895 /* calculate AGAW */
2896 domain->gaw = guest_width;
2897 adjust_width = guestwidth_to_adjustwidth(guest_width);
2898 domain->agaw = width_to_agaw(adjust_width);
2899
2900 INIT_LIST_HEAD(&domain->devices);
2901
2902 domain->iommu_count = 0;
2903 domain->iommu_coherency = 0;
fe40f1e0 2904 domain->max_addr = 0;
5e98c4b1
WH
2905
2906 /* always allocate the top pgd */
2907 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2908 if (!domain->pgd)
2909 return -ENOMEM;
2910 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2911 return 0;
2912}
2913
2914static void iommu_free_vm_domain(struct dmar_domain *domain)
2915{
2916 unsigned long flags;
2917 struct dmar_drhd_unit *drhd;
2918 struct intel_iommu *iommu;
2919 unsigned long i;
2920 unsigned long ndomains;
2921
2922 for_each_drhd_unit(drhd) {
2923 if (drhd->ignored)
2924 continue;
2925 iommu = drhd->iommu;
2926
2927 ndomains = cap_ndoms(iommu->cap);
2928 i = find_first_bit(iommu->domain_ids, ndomains);
2929 for (; i < ndomains; ) {
2930 if (iommu->domains[i] == domain) {
2931 spin_lock_irqsave(&iommu->lock, flags);
2932 clear_bit(i, iommu->domain_ids);
2933 iommu->domains[i] = NULL;
2934 spin_unlock_irqrestore(&iommu->lock, flags);
2935 break;
2936 }
2937 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2938 }
2939 }
2940}
2941
2942static void vm_domain_exit(struct dmar_domain *domain)
2943{
2944 u64 end;
2945
2946 /* Domain 0 is reserved, so dont process it */
2947 if (!domain)
2948 return;
2949
2950 vm_domain_remove_all_dev_info(domain);
2951 /* destroy iovas */
2952 put_iova_domain(&domain->iovad);
2953 end = DOMAIN_MAX_ADDR(domain->gaw);
2954 end = end & (~VTD_PAGE_MASK);
2955
2956 /* clear ptes */
2957 dma_pte_clear_range(domain, 0, end);
2958
2959 /* free page tables */
2960 dma_pte_free_pagetable(domain, 0, end);
2961
2962 iommu_free_vm_domain(domain);
2963 free_domain_mem(domain);
2964}
2965
5d450806 2966static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 2967{
5d450806 2968 struct dmar_domain *dmar_domain;
38717946 2969
5d450806
JR
2970 dmar_domain = iommu_alloc_vm_domain();
2971 if (!dmar_domain) {
38717946 2972 printk(KERN_ERR
5d450806
JR
2973 "intel_iommu_domain_init: dmar_domain == NULL\n");
2974 return -ENOMEM;
38717946 2975 }
5d450806 2976 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 2977 printk(KERN_ERR
5d450806
JR
2978 "intel_iommu_domain_init() failed\n");
2979 vm_domain_exit(dmar_domain);
2980 return -ENOMEM;
38717946 2981 }
5d450806 2982 domain->priv = dmar_domain;
faa3d6f5 2983
5d450806 2984 return 0;
38717946 2985}
38717946 2986
5d450806 2987static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 2988{
5d450806
JR
2989 struct dmar_domain *dmar_domain = domain->priv;
2990
2991 domain->priv = NULL;
2992 vm_domain_exit(dmar_domain);
38717946 2993}
38717946 2994
faa3d6f5
WH
2995int intel_iommu_attach_device(struct dmar_domain *domain,
2996 struct pci_dev *pdev)
38717946 2997{
fe40f1e0
WH
2998 struct intel_iommu *iommu;
2999 int addr_width;
3000 u64 end;
faa3d6f5
WH
3001 int ret;
3002
3003 /* normally pdev is not mapped */
3004 if (unlikely(domain_context_mapped(pdev))) {
3005 struct dmar_domain *old_domain;
3006
3007 old_domain = find_domain(pdev);
3008 if (old_domain) {
3009 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
3010 vm_domain_remove_one_dev_info(old_domain, pdev);
3011 else
3012 domain_remove_dev_info(old_domain);
3013 }
3014 }
3015
fe40f1e0
WH
3016 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3017 if (!iommu)
3018 return -ENODEV;
3019
3020 /* check if this iommu agaw is sufficient for max mapped address */
3021 addr_width = agaw_to_width(iommu->agaw);
3022 end = DOMAIN_MAX_ADDR(addr_width);
3023 end = end & VTD_PAGE_MASK;
3024 if (end < domain->max_addr) {
3025 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3026 "sufficient for the mapped address (%llx)\n",
3027 __func__, iommu->agaw, domain->max_addr);
3028 return -EFAULT;
3029 }
3030
faa3d6f5
WH
3031 ret = domain_context_mapping(domain, pdev);
3032 if (ret)
3033 return ret;
3034
3035 ret = vm_domain_add_dev_info(domain, pdev);
3036 return ret;
38717946 3037}
faa3d6f5 3038EXPORT_SYMBOL_GPL(intel_iommu_attach_device);
38717946 3039
faa3d6f5
WH
3040void intel_iommu_detach_device(struct dmar_domain *domain,
3041 struct pci_dev *pdev)
38717946 3042{
faa3d6f5
WH
3043 vm_domain_remove_one_dev_info(domain, pdev);
3044}
3045EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
c7151a8d 3046
faa3d6f5
WH
3047int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
3048 u64 hpa, size_t size, int prot)
3049{
fe40f1e0
WH
3050 u64 max_addr;
3051 int addr_width;
faa3d6f5 3052 int ret;
fe40f1e0
WH
3053
3054 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
3055 if (domain->max_addr < max_addr) {
3056 int min_agaw;
3057 u64 end;
3058
3059 /* check if minimum agaw is sufficient for mapped address */
3060 min_agaw = vm_domain_min_agaw(domain);
3061 addr_width = agaw_to_width(min_agaw);
3062 end = DOMAIN_MAX_ADDR(addr_width);
3063 end = end & VTD_PAGE_MASK;
3064 if (end < max_addr) {
3065 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3066 "sufficient for the mapped address (%llx)\n",
3067 __func__, min_agaw, max_addr);
3068 return -EFAULT;
3069 }
3070 domain->max_addr = max_addr;
3071 }
3072
faa3d6f5
WH
3073 ret = domain_page_mapping(domain, iova, hpa, size, prot);
3074 return ret;
38717946 3075}
faa3d6f5 3076EXPORT_SYMBOL_GPL(intel_iommu_map_address);
38717946 3077
faa3d6f5
WH
3078void intel_iommu_unmap_address(struct dmar_domain *domain,
3079 dma_addr_t iova, size_t size)
38717946 3080{
faa3d6f5
WH
3081 dma_addr_t base;
3082
3083 /* The address might not be aligned */
3084 base = iova & VTD_PAGE_MASK;
3085 size = VTD_PAGE_ALIGN(size);
3086 dma_pte_clear_range(domain, base, base + size);
fe40f1e0
WH
3087
3088 if (domain->max_addr == base + size)
3089 domain->max_addr = base;
38717946 3090}
faa3d6f5 3091EXPORT_SYMBOL_GPL(intel_iommu_unmap_address);
38717946
KA
3092
3093int intel_iommu_found(void)
3094{
3095 return g_num_of_iommus;
3096}
3097EXPORT_SYMBOL_GPL(intel_iommu_found);
3098
faa3d6f5 3099u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova)
38717946
KA
3100{
3101 struct dma_pte *pte;
faa3d6f5 3102 u64 phys = 0;
38717946 3103
38717946 3104 pte = addr_to_dma_pte(domain, iova);
38717946 3105 if (pte)
faa3d6f5 3106 phys = dma_pte_addr(pte);
38717946 3107
faa3d6f5 3108 return phys;
38717946 3109}
faa3d6f5 3110EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys);