]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/pci/intel-iommu.c
intel-iommu: Flush unmaps at domain_exit
[mirror_ubuntu-zesty-kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
134fac3f 39#include <linux/syscore_ops.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
ba395927 42#include <asm/cacheflush.h>
46a7fa27 43#include <asm/iommu.h>
ba395927
KA
44#include "pci.h"
45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
ba395927
KA
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
52
53#define IOAPIC_RANGE_START (0xfee00000)
54#define IOAPIC_RANGE_END (0xfeefffff)
55#define IOVA_START_ADDR (0x1000)
56
57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
4ed0d3e6
FY
59#define MAX_AGAW_WIDTH 64
60
2ebe3151
DW
61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 69
f27be03b 70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 73
df08cdc7
AM
74/* page table handling */
75#define LEVEL_STRIDE (9)
76#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
77
78static inline int agaw_to_level(int agaw)
79{
80 return agaw + 2;
81}
82
83static inline int agaw_to_width(int agaw)
84{
85 return 30 + agaw * LEVEL_STRIDE;
86}
87
88static inline int width_to_agaw(int width)
89{
90 return (width - 30) / LEVEL_STRIDE;
91}
92
93static inline unsigned int level_to_offset_bits(int level)
94{
95 return (level - 1) * LEVEL_STRIDE;
96}
97
98static inline int pfn_level_offset(unsigned long pfn, int level)
99{
100 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
101}
102
103static inline unsigned long level_mask(int level)
104{
105 return -1UL << level_to_offset_bits(level);
106}
107
108static inline unsigned long level_size(int level)
109{
110 return 1UL << level_to_offset_bits(level);
111}
112
113static inline unsigned long align_to_level(unsigned long pfn, int level)
114{
115 return (pfn + level_size(level) - 1) & level_mask(level);
116}
fd18de50 117
dd4e8319
DW
118/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
119 are never going to work. */
120static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
121{
122 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
123}
124
125static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
126{
127 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
128}
129static inline unsigned long page_to_dma_pfn(struct page *pg)
130{
131 return mm_to_dma_pfn(page_to_pfn(pg));
132}
133static inline unsigned long virt_to_dma_pfn(void *p)
134{
135 return page_to_dma_pfn(virt_to_page(p));
136}
137
d9630fe9
WH
138/* global iommu list, set NULL for ignored DMAR units */
139static struct intel_iommu **g_iommus;
140
e0fc7e0b 141static void __init check_tylersburg_isoch(void);
9af88143
DW
142static int rwbf_quirk;
143
b779260b
JC
144/*
145 * set to 1 to panic kernel if can't successfully enable VT-d
146 * (used when kernel is launched w/ TXT)
147 */
148static int force_on = 0;
149
46b08e1a
MM
150/*
151 * 0: Present
152 * 1-11: Reserved
153 * 12-63: Context Ptr (12 - (haw-1))
154 * 64-127: Reserved
155 */
156struct root_entry {
157 u64 val;
158 u64 rsvd1;
159};
160#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
161static inline bool root_present(struct root_entry *root)
162{
163 return (root->val & 1);
164}
165static inline void set_root_present(struct root_entry *root)
166{
167 root->val |= 1;
168}
169static inline void set_root_value(struct root_entry *root, unsigned long value)
170{
171 root->val |= value & VTD_PAGE_MASK;
172}
173
174static inline struct context_entry *
175get_context_addr_from_root(struct root_entry *root)
176{
177 return (struct context_entry *)
178 (root_present(root)?phys_to_virt(
179 root->val & VTD_PAGE_MASK) :
180 NULL);
181}
182
7a8fc25e
MM
183/*
184 * low 64 bits:
185 * 0: present
186 * 1: fault processing disable
187 * 2-3: translation type
188 * 12-63: address space root
189 * high 64 bits:
190 * 0-2: address width
191 * 3-6: aval
192 * 8-23: domain id
193 */
194struct context_entry {
195 u64 lo;
196 u64 hi;
197};
c07e7d21
MM
198
199static inline bool context_present(struct context_entry *context)
200{
201 return (context->lo & 1);
202}
203static inline void context_set_present(struct context_entry *context)
204{
205 context->lo |= 1;
206}
207
208static inline void context_set_fault_enable(struct context_entry *context)
209{
210 context->lo &= (((u64)-1) << 2) | 1;
211}
212
c07e7d21
MM
213static inline void context_set_translation_type(struct context_entry *context,
214 unsigned long value)
215{
216 context->lo &= (((u64)-1) << 4) | 3;
217 context->lo |= (value & 3) << 2;
218}
219
220static inline void context_set_address_root(struct context_entry *context,
221 unsigned long value)
222{
223 context->lo |= value & VTD_PAGE_MASK;
224}
225
226static inline void context_set_address_width(struct context_entry *context,
227 unsigned long value)
228{
229 context->hi |= value & 7;
230}
231
232static inline void context_set_domain_id(struct context_entry *context,
233 unsigned long value)
234{
235 context->hi |= (value & ((1 << 16) - 1)) << 8;
236}
237
238static inline void context_clear_entry(struct context_entry *context)
239{
240 context->lo = 0;
241 context->hi = 0;
242}
7a8fc25e 243
622ba12a
MM
244/*
245 * 0: readable
246 * 1: writable
247 * 2-6: reserved
248 * 7: super page
9cf06697
SY
249 * 8-10: available
250 * 11: snoop behavior
622ba12a
MM
251 * 12-63: Host physcial address
252 */
253struct dma_pte {
254 u64 val;
255};
622ba12a 256
19c239ce
MM
257static inline void dma_clear_pte(struct dma_pte *pte)
258{
259 pte->val = 0;
260}
261
262static inline void dma_set_pte_readable(struct dma_pte *pte)
263{
264 pte->val |= DMA_PTE_READ;
265}
266
267static inline void dma_set_pte_writable(struct dma_pte *pte)
268{
269 pte->val |= DMA_PTE_WRITE;
270}
271
9cf06697
SY
272static inline void dma_set_pte_snp(struct dma_pte *pte)
273{
274 pte->val |= DMA_PTE_SNP;
275}
276
19c239ce
MM
277static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
278{
279 pte->val = (pte->val & ~3) | (prot & 3);
280}
281
282static inline u64 dma_pte_addr(struct dma_pte *pte)
283{
c85994e4
DW
284#ifdef CONFIG_64BIT
285 return pte->val & VTD_PAGE_MASK;
286#else
287 /* Must have a full atomic 64-bit read */
1a8bd481 288 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 289#endif
19c239ce
MM
290}
291
dd4e8319 292static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 293{
dd4e8319 294 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
295}
296
297static inline bool dma_pte_present(struct dma_pte *pte)
298{
299 return (pte->val & 3) != 0;
300}
622ba12a 301
75e6bf96
DW
302static inline int first_pte_in_page(struct dma_pte *pte)
303{
304 return !((unsigned long)pte & ~VTD_PAGE_MASK);
305}
306
2c2e2c38
FY
307/*
308 * This domain is a statically identity mapping domain.
309 * 1. This domain creats a static 1:1 mapping to all usable memory.
310 * 2. It maps to each iommu if successful.
311 * 3. Each iommu mapps to this domain if successful.
312 */
19943b0e
DW
313static struct dmar_domain *si_domain;
314static int hw_pass_through = 1;
2c2e2c38 315
3b5410e7 316/* devices under the same p2p bridge are owned in one domain */
cdc7b837 317#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 318
1ce28feb
WH
319/* domain represents a virtual machine, more than one devices
320 * across iommus may be owned in one domain, e.g. kvm guest.
321 */
322#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
323
2c2e2c38
FY
324/* si_domain contains mulitple devices */
325#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
326
99126f7c
MM
327struct dmar_domain {
328 int id; /* domain id */
4c923d47 329 int nid; /* node id */
8c11e798 330 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
331
332 struct list_head devices; /* all devices' list */
333 struct iova_domain iovad; /* iova's that belong to this domain */
334
335 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
336 int gaw; /* max guest address width */
337
338 /* adjusted guest address width, 0 is level 2 30-bit */
339 int agaw;
340
3b5410e7 341 int flags; /* flags to find out type of domain */
8e604097
WH
342
343 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 344 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d
WH
345 int iommu_count; /* reference count of iommu */
346 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 347 u64 max_addr; /* maximum mapped address */
99126f7c
MM
348};
349
a647dacb
MM
350/* PCI domain-device relationship */
351struct device_domain_info {
352 struct list_head link; /* link to domain siblings */
353 struct list_head global; /* link to global list */
276dbf99
DW
354 int segment; /* PCI domain */
355 u8 bus; /* PCI bus number */
a647dacb 356 u8 devfn; /* PCI devfn number */
45e829ea 357 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 358 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
359 struct dmar_domain *domain; /* pointer to domain */
360};
361
5e0d2a6f 362static void flush_unmaps_timeout(unsigned long data);
363
364DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
365
80b20dd8 366#define HIGH_WATER_MARK 250
367struct deferred_flush_tables {
368 int next;
369 struct iova *iova[HIGH_WATER_MARK];
370 struct dmar_domain *domain[HIGH_WATER_MARK];
371};
372
373static struct deferred_flush_tables *deferred_flush;
374
5e0d2a6f 375/* bitmap for indexing intel_iommus */
5e0d2a6f 376static int g_num_of_iommus;
377
378static DEFINE_SPINLOCK(async_umap_flush_lock);
379static LIST_HEAD(unmaps_to_do);
380
381static int timer_on;
382static long list_size;
5e0d2a6f 383
ba395927
KA
384static void domain_remove_dev_info(struct dmar_domain *domain);
385
0cd5c3c8
KM
386#ifdef CONFIG_DMAR_DEFAULT_ON
387int dmar_disabled = 0;
388#else
389int dmar_disabled = 1;
390#endif /*CONFIG_DMAR_DEFAULT_ON*/
391
2d9e667e 392static int dmar_map_gfx = 1;
7d3b03ce 393static int dmar_forcedac;
5e0d2a6f 394static int intel_iommu_strict;
ba395927
KA
395
396#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
397static DEFINE_SPINLOCK(device_domain_lock);
398static LIST_HEAD(device_domain_list);
399
a8bcbb0d
JR
400static struct iommu_ops intel_iommu_ops;
401
ba395927
KA
402static int __init intel_iommu_setup(char *str)
403{
404 if (!str)
405 return -EINVAL;
406 while (*str) {
0cd5c3c8
KM
407 if (!strncmp(str, "on", 2)) {
408 dmar_disabled = 0;
409 printk(KERN_INFO "Intel-IOMMU: enabled\n");
410 } else if (!strncmp(str, "off", 3)) {
ba395927 411 dmar_disabled = 1;
0cd5c3c8 412 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
413 } else if (!strncmp(str, "igfx_off", 8)) {
414 dmar_map_gfx = 0;
415 printk(KERN_INFO
416 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 417 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 418 printk(KERN_INFO
7d3b03ce
KA
419 "Intel-IOMMU: Forcing DAC for PCI devices\n");
420 dmar_forcedac = 1;
5e0d2a6f 421 } else if (!strncmp(str, "strict", 6)) {
422 printk(KERN_INFO
423 "Intel-IOMMU: disable batched IOTLB flush\n");
424 intel_iommu_strict = 1;
ba395927
KA
425 }
426
427 str += strcspn(str, ",");
428 while (*str == ',')
429 str++;
430 }
431 return 0;
432}
433__setup("intel_iommu=", intel_iommu_setup);
434
435static struct kmem_cache *iommu_domain_cache;
436static struct kmem_cache *iommu_devinfo_cache;
437static struct kmem_cache *iommu_iova_cache;
438
4c923d47 439static inline void *alloc_pgtable_page(int node)
eb3fa7cb 440{
4c923d47
SS
441 struct page *page;
442 void *vaddr = NULL;
eb3fa7cb 443
4c923d47
SS
444 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
445 if (page)
446 vaddr = page_address(page);
eb3fa7cb 447 return vaddr;
ba395927
KA
448}
449
450static inline void free_pgtable_page(void *vaddr)
451{
452 free_page((unsigned long)vaddr);
453}
454
455static inline void *alloc_domain_mem(void)
456{
354bb65e 457 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
458}
459
38717946 460static void free_domain_mem(void *vaddr)
ba395927
KA
461{
462 kmem_cache_free(iommu_domain_cache, vaddr);
463}
464
465static inline void * alloc_devinfo_mem(void)
466{
354bb65e 467 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
468}
469
470static inline void free_devinfo_mem(void *vaddr)
471{
472 kmem_cache_free(iommu_devinfo_cache, vaddr);
473}
474
475struct iova *alloc_iova_mem(void)
476{
354bb65e 477 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
478}
479
480void free_iova_mem(struct iova *iova)
481{
482 kmem_cache_free(iommu_iova_cache, iova);
483}
484
1b573683 485
4ed0d3e6 486static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
487{
488 unsigned long sagaw;
489 int agaw = -1;
490
491 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 492 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
493 agaw >= 0; agaw--) {
494 if (test_bit(agaw, &sagaw))
495 break;
496 }
497
498 return agaw;
499}
500
4ed0d3e6
FY
501/*
502 * Calculate max SAGAW for each iommu.
503 */
504int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
505{
506 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
507}
508
509/*
510 * calculate agaw for each iommu.
511 * "SAGAW" may be different across iommus, use a default agaw, and
512 * get a supported less agaw for iommus that don't support the default agaw.
513 */
514int iommu_calculate_agaw(struct intel_iommu *iommu)
515{
516 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
517}
518
2c2e2c38 519/* This functionin only returns single iommu in a domain */
8c11e798
WH
520static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
521{
522 int iommu_id;
523
2c2e2c38 524 /* si_domain and vm domain should not get here. */
1ce28feb 525 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 526 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 527
8c11e798
WH
528 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
529 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
530 return NULL;
531
532 return g_iommus[iommu_id];
533}
534
8e604097
WH
535static void domain_update_iommu_coherency(struct dmar_domain *domain)
536{
537 int i;
538
539 domain->iommu_coherency = 1;
540
a45946ab 541 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
542 if (!ecap_coherent(g_iommus[i]->ecap)) {
543 domain->iommu_coherency = 0;
544 break;
545 }
8e604097
WH
546 }
547}
548
58c610bd
SY
549static void domain_update_iommu_snooping(struct dmar_domain *domain)
550{
551 int i;
552
553 domain->iommu_snooping = 1;
554
a45946ab 555 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
556 if (!ecap_sc_support(g_iommus[i]->ecap)) {
557 domain->iommu_snooping = 0;
558 break;
559 }
58c610bd
SY
560 }
561}
562
563/* Some capabilities may be different across iommus */
564static void domain_update_iommu_cap(struct dmar_domain *domain)
565{
566 domain_update_iommu_coherency(domain);
567 domain_update_iommu_snooping(domain);
568}
569
276dbf99 570static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
571{
572 struct dmar_drhd_unit *drhd = NULL;
573 int i;
574
575 for_each_drhd_unit(drhd) {
576 if (drhd->ignored)
577 continue;
276dbf99
DW
578 if (segment != drhd->segment)
579 continue;
c7151a8d 580
924b6231 581 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
582 if (drhd->devices[i] &&
583 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
584 drhd->devices[i]->devfn == devfn)
585 return drhd->iommu;
4958c5dc
DW
586 if (drhd->devices[i] &&
587 drhd->devices[i]->subordinate &&
924b6231
DW
588 drhd->devices[i]->subordinate->number <= bus &&
589 drhd->devices[i]->subordinate->subordinate >= bus)
590 return drhd->iommu;
591 }
c7151a8d
WH
592
593 if (drhd->include_all)
594 return drhd->iommu;
595 }
596
597 return NULL;
598}
599
5331fe6f
WH
600static void domain_flush_cache(struct dmar_domain *domain,
601 void *addr, int size)
602{
603 if (!domain->iommu_coherency)
604 clflush_cache_range(addr, size);
605}
606
ba395927
KA
607/* Gets context entry for a given bus and devfn */
608static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
609 u8 bus, u8 devfn)
610{
611 struct root_entry *root;
612 struct context_entry *context;
613 unsigned long phy_addr;
614 unsigned long flags;
615
616 spin_lock_irqsave(&iommu->lock, flags);
617 root = &iommu->root_entry[bus];
618 context = get_context_addr_from_root(root);
619 if (!context) {
4c923d47
SS
620 context = (struct context_entry *)
621 alloc_pgtable_page(iommu->node);
ba395927
KA
622 if (!context) {
623 spin_unlock_irqrestore(&iommu->lock, flags);
624 return NULL;
625 }
5b6985ce 626 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
627 phy_addr = virt_to_phys((void *)context);
628 set_root_value(root, phy_addr);
629 set_root_present(root);
630 __iommu_flush_cache(iommu, root, sizeof(*root));
631 }
632 spin_unlock_irqrestore(&iommu->lock, flags);
633 return &context[devfn];
634}
635
636static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
637{
638 struct root_entry *root;
639 struct context_entry *context;
640 int ret;
641 unsigned long flags;
642
643 spin_lock_irqsave(&iommu->lock, flags);
644 root = &iommu->root_entry[bus];
645 context = get_context_addr_from_root(root);
646 if (!context) {
647 ret = 0;
648 goto out;
649 }
c07e7d21 650 ret = context_present(&context[devfn]);
ba395927
KA
651out:
652 spin_unlock_irqrestore(&iommu->lock, flags);
653 return ret;
654}
655
656static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
657{
658 struct root_entry *root;
659 struct context_entry *context;
660 unsigned long flags;
661
662 spin_lock_irqsave(&iommu->lock, flags);
663 root = &iommu->root_entry[bus];
664 context = get_context_addr_from_root(root);
665 if (context) {
c07e7d21 666 context_clear_entry(&context[devfn]);
ba395927
KA
667 __iommu_flush_cache(iommu, &context[devfn], \
668 sizeof(*context));
669 }
670 spin_unlock_irqrestore(&iommu->lock, flags);
671}
672
673static void free_context_table(struct intel_iommu *iommu)
674{
675 struct root_entry *root;
676 int i;
677 unsigned long flags;
678 struct context_entry *context;
679
680 spin_lock_irqsave(&iommu->lock, flags);
681 if (!iommu->root_entry) {
682 goto out;
683 }
684 for (i = 0; i < ROOT_ENTRY_NR; i++) {
685 root = &iommu->root_entry[i];
686 context = get_context_addr_from_root(root);
687 if (context)
688 free_pgtable_page(context);
689 }
690 free_pgtable_page(iommu->root_entry);
691 iommu->root_entry = NULL;
692out:
693 spin_unlock_irqrestore(&iommu->lock, flags);
694}
695
b026fd28
DW
696static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
697 unsigned long pfn)
ba395927 698{
b026fd28 699 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
700 struct dma_pte *parent, *pte = NULL;
701 int level = agaw_to_level(domain->agaw);
702 int offset;
ba395927
KA
703
704 BUG_ON(!domain->pgd);
b026fd28 705 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
706 parent = domain->pgd;
707
ba395927
KA
708 while (level > 0) {
709 void *tmp_page;
710
b026fd28 711 offset = pfn_level_offset(pfn, level);
ba395927
KA
712 pte = &parent[offset];
713 if (level == 1)
714 break;
715
19c239ce 716 if (!dma_pte_present(pte)) {
c85994e4
DW
717 uint64_t pteval;
718
4c923d47 719 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 720
206a73c1 721 if (!tmp_page)
ba395927 722 return NULL;
206a73c1 723
c85994e4 724 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 725 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
726 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
727 /* Someone else set it while we were thinking; use theirs. */
728 free_pgtable_page(tmp_page);
729 } else {
730 dma_pte_addr(pte);
731 domain_flush_cache(domain, pte, sizeof(*pte));
732 }
ba395927 733 }
19c239ce 734 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
735 level--;
736 }
737
ba395927
KA
738 return pte;
739}
740
741/* return address's pte at specific level */
90dcfb5e
DW
742static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
743 unsigned long pfn,
744 int level)
ba395927
KA
745{
746 struct dma_pte *parent, *pte = NULL;
747 int total = agaw_to_level(domain->agaw);
748 int offset;
749
750 parent = domain->pgd;
751 while (level <= total) {
90dcfb5e 752 offset = pfn_level_offset(pfn, total);
ba395927
KA
753 pte = &parent[offset];
754 if (level == total)
755 return pte;
756
19c239ce 757 if (!dma_pte_present(pte))
ba395927 758 break;
19c239ce 759 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
760 total--;
761 }
762 return NULL;
763}
764
ba395927 765/* clear last level pte, a tlb flush should be followed */
595badf5
DW
766static void dma_pte_clear_range(struct dmar_domain *domain,
767 unsigned long start_pfn,
768 unsigned long last_pfn)
ba395927 769{
04b18e65 770 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
310a5ab9 771 struct dma_pte *first_pte, *pte;
66eae846 772
04b18e65 773 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 774 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 775 BUG_ON(start_pfn > last_pfn);
ba395927 776
04b18e65 777 /* we don't need lock here; nobody else touches the iova range */
59c36286 778 do {
310a5ab9
DW
779 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
780 if (!pte) {
781 start_pfn = align_to_level(start_pfn + 1, 2);
782 continue;
783 }
75e6bf96 784 do {
310a5ab9
DW
785 dma_clear_pte(pte);
786 start_pfn++;
787 pte++;
75e6bf96
DW
788 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
789
310a5ab9
DW
790 domain_flush_cache(domain, first_pte,
791 (void *)pte - (void *)first_pte);
59c36286
DW
792
793 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
794}
795
796/* free page table pages. last level pte should already be cleared */
797static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
798 unsigned long start_pfn,
799 unsigned long last_pfn)
ba395927 800{
6660c63a 801 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 802 struct dma_pte *first_pte, *pte;
ba395927
KA
803 int total = agaw_to_level(domain->agaw);
804 int level;
6660c63a 805 unsigned long tmp;
ba395927 806
6660c63a
DW
807 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
808 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 809 BUG_ON(start_pfn > last_pfn);
ba395927 810
f3a0a52f 811 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
812 level = 2;
813 while (level <= total) {
6660c63a
DW
814 tmp = align_to_level(start_pfn, level);
815
f3a0a52f 816 /* If we can't even clear one PTE at this level, we're done */
6660c63a 817 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
818 return;
819
59c36286 820 do {
f3a0a52f
DW
821 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
822 if (!pte) {
823 tmp = align_to_level(tmp + 1, level + 1);
824 continue;
825 }
75e6bf96 826 do {
6a43e574
DW
827 if (dma_pte_present(pte)) {
828 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
829 dma_clear_pte(pte);
830 }
f3a0a52f
DW
831 pte++;
832 tmp += level_size(level);
75e6bf96
DW
833 } while (!first_pte_in_page(pte) &&
834 tmp + level_size(level) - 1 <= last_pfn);
835
f3a0a52f
DW
836 domain_flush_cache(domain, first_pte,
837 (void *)pte - (void *)first_pte);
838
59c36286 839 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
840 level++;
841 }
842 /* free pgd */
d794dc9b 843 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
844 free_pgtable_page(domain->pgd);
845 domain->pgd = NULL;
846 }
847}
848
849/* iommu handling */
850static int iommu_alloc_root_entry(struct intel_iommu *iommu)
851{
852 struct root_entry *root;
853 unsigned long flags;
854
4c923d47 855 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
856 if (!root)
857 return -ENOMEM;
858
5b6985ce 859 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
860
861 spin_lock_irqsave(&iommu->lock, flags);
862 iommu->root_entry = root;
863 spin_unlock_irqrestore(&iommu->lock, flags);
864
865 return 0;
866}
867
ba395927
KA
868static void iommu_set_root_entry(struct intel_iommu *iommu)
869{
870 void *addr;
c416daa9 871 u32 sts;
ba395927
KA
872 unsigned long flag;
873
874 addr = iommu->root_entry;
875
876 spin_lock_irqsave(&iommu->register_lock, flag);
877 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
878
c416daa9 879 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
880
881 /* Make sure hardware complete it */
882 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 883 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
884
885 spin_unlock_irqrestore(&iommu->register_lock, flag);
886}
887
888static void iommu_flush_write_buffer(struct intel_iommu *iommu)
889{
890 u32 val;
891 unsigned long flag;
892
9af88143 893 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 894 return;
ba395927
KA
895
896 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 897 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
898
899 /* Make sure hardware complete it */
900 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 901 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
902
903 spin_unlock_irqrestore(&iommu->register_lock, flag);
904}
905
906/* return value determine if we need a write buffer flush */
4c25a2c1
DW
907static void __iommu_flush_context(struct intel_iommu *iommu,
908 u16 did, u16 source_id, u8 function_mask,
909 u64 type)
ba395927
KA
910{
911 u64 val = 0;
912 unsigned long flag;
913
ba395927
KA
914 switch (type) {
915 case DMA_CCMD_GLOBAL_INVL:
916 val = DMA_CCMD_GLOBAL_INVL;
917 break;
918 case DMA_CCMD_DOMAIN_INVL:
919 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
920 break;
921 case DMA_CCMD_DEVICE_INVL:
922 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
923 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
924 break;
925 default:
926 BUG();
927 }
928 val |= DMA_CCMD_ICC;
929
930 spin_lock_irqsave(&iommu->register_lock, flag);
931 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
932
933 /* Make sure hardware complete it */
934 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
935 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
936
937 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
938}
939
ba395927 940/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
941static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
942 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
943{
944 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
945 u64 val = 0, val_iva = 0;
946 unsigned long flag;
947
ba395927
KA
948 switch (type) {
949 case DMA_TLB_GLOBAL_FLUSH:
950 /* global flush doesn't need set IVA_REG */
951 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
952 break;
953 case DMA_TLB_DSI_FLUSH:
954 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
955 break;
956 case DMA_TLB_PSI_FLUSH:
957 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
958 /* Note: always flush non-leaf currently */
959 val_iva = size_order | addr;
960 break;
961 default:
962 BUG();
963 }
964 /* Note: set drain read/write */
965#if 0
966 /*
967 * This is probably to be super secure.. Looks like we can
968 * ignore it without any impact.
969 */
970 if (cap_read_drain(iommu->cap))
971 val |= DMA_TLB_READ_DRAIN;
972#endif
973 if (cap_write_drain(iommu->cap))
974 val |= DMA_TLB_WRITE_DRAIN;
975
976 spin_lock_irqsave(&iommu->register_lock, flag);
977 /* Note: Only uses first TLB reg currently */
978 if (val_iva)
979 dmar_writeq(iommu->reg + tlb_offset, val_iva);
980 dmar_writeq(iommu->reg + tlb_offset + 8, val);
981
982 /* Make sure hardware complete it */
983 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
984 dmar_readq, (!(val & DMA_TLB_IVT)), val);
985
986 spin_unlock_irqrestore(&iommu->register_lock, flag);
987
988 /* check IOTLB invalidation granularity */
989 if (DMA_TLB_IAIG(val) == 0)
990 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
991 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
992 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
993 (unsigned long long)DMA_TLB_IIRG(type),
994 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
995}
996
93a23a72
YZ
997static struct device_domain_info *iommu_support_dev_iotlb(
998 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
999{
1000 int found = 0;
1001 unsigned long flags;
1002 struct device_domain_info *info;
1003 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1004
1005 if (!ecap_dev_iotlb_support(iommu->ecap))
1006 return NULL;
1007
1008 if (!iommu->qi)
1009 return NULL;
1010
1011 spin_lock_irqsave(&device_domain_lock, flags);
1012 list_for_each_entry(info, &domain->devices, link)
1013 if (info->bus == bus && info->devfn == devfn) {
1014 found = 1;
1015 break;
1016 }
1017 spin_unlock_irqrestore(&device_domain_lock, flags);
1018
1019 if (!found || !info->dev)
1020 return NULL;
1021
1022 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1023 return NULL;
1024
1025 if (!dmar_find_matched_atsr_unit(info->dev))
1026 return NULL;
1027
1028 info->iommu = iommu;
1029
1030 return info;
1031}
1032
1033static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1034{
93a23a72
YZ
1035 if (!info)
1036 return;
1037
1038 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1039}
1040
1041static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1042{
1043 if (!info->dev || !pci_ats_enabled(info->dev))
1044 return;
1045
1046 pci_disable_ats(info->dev);
1047}
1048
1049static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1050 u64 addr, unsigned mask)
1051{
1052 u16 sid, qdep;
1053 unsigned long flags;
1054 struct device_domain_info *info;
1055
1056 spin_lock_irqsave(&device_domain_lock, flags);
1057 list_for_each_entry(info, &domain->devices, link) {
1058 if (!info->dev || !pci_ats_enabled(info->dev))
1059 continue;
1060
1061 sid = info->bus << 8 | info->devfn;
1062 qdep = pci_ats_queue_depth(info->dev);
1063 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1064 }
1065 spin_unlock_irqrestore(&device_domain_lock, flags);
1066}
1067
1f0ef2aa 1068static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1069 unsigned long pfn, unsigned int pages, int map)
ba395927 1070{
9dd2fe89 1071 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1072 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1073
ba395927
KA
1074 BUG_ON(pages == 0);
1075
ba395927 1076 /*
9dd2fe89
YZ
1077 * Fallback to domain selective flush if no PSI support or the size is
1078 * too big.
ba395927
KA
1079 * PSI requires page size to be 2 ^ x, and the base address is naturally
1080 * aligned to the size
1081 */
9dd2fe89
YZ
1082 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1083 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1084 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1085 else
1086 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1087 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1088
1089 /*
82653633
NA
1090 * In caching mode, changes of pages from non-present to present require
1091 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1092 */
82653633 1093 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1094 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1095}
1096
f8bab735 1097static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1098{
1099 u32 pmen;
1100 unsigned long flags;
1101
1102 spin_lock_irqsave(&iommu->register_lock, flags);
1103 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1104 pmen &= ~DMA_PMEN_EPM;
1105 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1106
1107 /* wait for the protected region status bit to clear */
1108 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1109 readl, !(pmen & DMA_PMEN_PRS), pmen);
1110
1111 spin_unlock_irqrestore(&iommu->register_lock, flags);
1112}
1113
ba395927
KA
1114static int iommu_enable_translation(struct intel_iommu *iommu)
1115{
1116 u32 sts;
1117 unsigned long flags;
1118
1119 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1120 iommu->gcmd |= DMA_GCMD_TE;
1121 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1122
1123 /* Make sure hardware complete it */
1124 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1125 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1126
ba395927
KA
1127 spin_unlock_irqrestore(&iommu->register_lock, flags);
1128 return 0;
1129}
1130
1131static int iommu_disable_translation(struct intel_iommu *iommu)
1132{
1133 u32 sts;
1134 unsigned long flag;
1135
1136 spin_lock_irqsave(&iommu->register_lock, flag);
1137 iommu->gcmd &= ~DMA_GCMD_TE;
1138 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1139
1140 /* Make sure hardware complete it */
1141 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1142 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1143
1144 spin_unlock_irqrestore(&iommu->register_lock, flag);
1145 return 0;
1146}
1147
3460a6d9 1148
ba395927
KA
1149static int iommu_init_domains(struct intel_iommu *iommu)
1150{
1151 unsigned long ndomains;
1152 unsigned long nlongs;
1153
1154 ndomains = cap_ndoms(iommu->cap);
680a7524
YL
1155 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1156 ndomains);
ba395927
KA
1157 nlongs = BITS_TO_LONGS(ndomains);
1158
94a91b50
DD
1159 spin_lock_init(&iommu->lock);
1160
ba395927
KA
1161 /* TBD: there might be 64K domains,
1162 * consider other allocation for future chip
1163 */
1164 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1165 if (!iommu->domain_ids) {
1166 printk(KERN_ERR "Allocating domain id array failed\n");
1167 return -ENOMEM;
1168 }
1169 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1170 GFP_KERNEL);
1171 if (!iommu->domains) {
1172 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1173 return -ENOMEM;
1174 }
1175
1176 /*
1177 * if Caching mode is set, then invalid translations are tagged
1178 * with domainid 0. Hence we need to pre-allocate it.
1179 */
1180 if (cap_caching_mode(iommu->cap))
1181 set_bit(0, iommu->domain_ids);
1182 return 0;
1183}
ba395927 1184
ba395927
KA
1185
1186static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1187static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1188
1189void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1190{
1191 struct dmar_domain *domain;
1192 int i;
c7151a8d 1193 unsigned long flags;
ba395927 1194
94a91b50 1195 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1196 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1197 domain = iommu->domains[i];
1198 clear_bit(i, iommu->domain_ids);
1199
1200 spin_lock_irqsave(&domain->iommu_lock, flags);
1201 if (--domain->iommu_count == 0) {
1202 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1203 vm_domain_exit(domain);
1204 else
1205 domain_exit(domain);
1206 }
1207 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1208 }
ba395927
KA
1209 }
1210
1211 if (iommu->gcmd & DMA_GCMD_TE)
1212 iommu_disable_translation(iommu);
1213
1214 if (iommu->irq) {
dced35ae 1215 irq_set_handler_data(iommu->irq, NULL);
ba395927
KA
1216 /* This will mask the irq */
1217 free_irq(iommu->irq, iommu);
1218 destroy_irq(iommu->irq);
1219 }
1220
1221 kfree(iommu->domains);
1222 kfree(iommu->domain_ids);
1223
d9630fe9
WH
1224 g_iommus[iommu->seq_id] = NULL;
1225
1226 /* if all iommus are freed, free g_iommus */
1227 for (i = 0; i < g_num_of_iommus; i++) {
1228 if (g_iommus[i])
1229 break;
1230 }
1231
1232 if (i == g_num_of_iommus)
1233 kfree(g_iommus);
1234
ba395927
KA
1235 /* free context mapping */
1236 free_context_table(iommu);
ba395927
KA
1237}
1238
2c2e2c38 1239static struct dmar_domain *alloc_domain(void)
ba395927 1240{
ba395927 1241 struct dmar_domain *domain;
ba395927
KA
1242
1243 domain = alloc_domain_mem();
1244 if (!domain)
1245 return NULL;
1246
4c923d47 1247 domain->nid = -1;
2c2e2c38
FY
1248 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1249 domain->flags = 0;
1250
1251 return domain;
1252}
1253
1254static int iommu_attach_domain(struct dmar_domain *domain,
1255 struct intel_iommu *iommu)
1256{
1257 int num;
1258 unsigned long ndomains;
1259 unsigned long flags;
1260
ba395927
KA
1261 ndomains = cap_ndoms(iommu->cap);
1262
1263 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1264
ba395927
KA
1265 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1266 if (num >= ndomains) {
1267 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1268 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1269 return -ENOMEM;
ba395927
KA
1270 }
1271
ba395927 1272 domain->id = num;
2c2e2c38 1273 set_bit(num, iommu->domain_ids);
8c11e798 1274 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1275 iommu->domains[num] = domain;
1276 spin_unlock_irqrestore(&iommu->lock, flags);
1277
2c2e2c38 1278 return 0;
ba395927
KA
1279}
1280
2c2e2c38
FY
1281static void iommu_detach_domain(struct dmar_domain *domain,
1282 struct intel_iommu *iommu)
ba395927
KA
1283{
1284 unsigned long flags;
2c2e2c38
FY
1285 int num, ndomains;
1286 int found = 0;
ba395927 1287
8c11e798 1288 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1289 ndomains = cap_ndoms(iommu->cap);
a45946ab 1290 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1291 if (iommu->domains[num] == domain) {
1292 found = 1;
1293 break;
1294 }
2c2e2c38
FY
1295 }
1296
1297 if (found) {
1298 clear_bit(num, iommu->domain_ids);
1299 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1300 iommu->domains[num] = NULL;
1301 }
8c11e798 1302 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1303}
1304
1305static struct iova_domain reserved_iova_list;
8a443df4 1306static struct lock_class_key reserved_rbtree_key;
ba395927 1307
51a63e67 1308static int dmar_init_reserved_ranges(void)
ba395927
KA
1309{
1310 struct pci_dev *pdev = NULL;
1311 struct iova *iova;
1312 int i;
ba395927 1313
f661197e 1314 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1315
8a443df4
MG
1316 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1317 &reserved_rbtree_key);
1318
ba395927
KA
1319 /* IOAPIC ranges shouldn't be accessed by DMA */
1320 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1321 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1322 if (!iova) {
ba395927 1323 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1324 return -ENODEV;
1325 }
ba395927
KA
1326
1327 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1328 for_each_pci_dev(pdev) {
1329 struct resource *r;
1330
1331 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1332 r = &pdev->resource[i];
1333 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1334 continue;
1a4a4551
DW
1335 iova = reserve_iova(&reserved_iova_list,
1336 IOVA_PFN(r->start),
1337 IOVA_PFN(r->end));
51a63e67 1338 if (!iova) {
ba395927 1339 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1340 return -ENODEV;
1341 }
ba395927
KA
1342 }
1343 }
51a63e67 1344 return 0;
ba395927
KA
1345}
1346
1347static void domain_reserve_special_ranges(struct dmar_domain *domain)
1348{
1349 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1350}
1351
1352static inline int guestwidth_to_adjustwidth(int gaw)
1353{
1354 int agaw;
1355 int r = (gaw - 12) % 9;
1356
1357 if (r == 0)
1358 agaw = gaw;
1359 else
1360 agaw = gaw + 9 - r;
1361 if (agaw > 64)
1362 agaw = 64;
1363 return agaw;
1364}
1365
1366static int domain_init(struct dmar_domain *domain, int guest_width)
1367{
1368 struct intel_iommu *iommu;
1369 int adjust_width, agaw;
1370 unsigned long sagaw;
1371
f661197e 1372 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1373 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1374
1375 domain_reserve_special_ranges(domain);
1376
1377 /* calculate AGAW */
8c11e798 1378 iommu = domain_get_iommu(domain);
ba395927
KA
1379 if (guest_width > cap_mgaw(iommu->cap))
1380 guest_width = cap_mgaw(iommu->cap);
1381 domain->gaw = guest_width;
1382 adjust_width = guestwidth_to_adjustwidth(guest_width);
1383 agaw = width_to_agaw(adjust_width);
1384 sagaw = cap_sagaw(iommu->cap);
1385 if (!test_bit(agaw, &sagaw)) {
1386 /* hardware doesn't support it, choose a bigger one */
1387 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1388 agaw = find_next_bit(&sagaw, 5, agaw);
1389 if (agaw >= 5)
1390 return -ENODEV;
1391 }
1392 domain->agaw = agaw;
1393 INIT_LIST_HEAD(&domain->devices);
1394
8e604097
WH
1395 if (ecap_coherent(iommu->ecap))
1396 domain->iommu_coherency = 1;
1397 else
1398 domain->iommu_coherency = 0;
1399
58c610bd
SY
1400 if (ecap_sc_support(iommu->ecap))
1401 domain->iommu_snooping = 1;
1402 else
1403 domain->iommu_snooping = 0;
1404
c7151a8d 1405 domain->iommu_count = 1;
4c923d47 1406 domain->nid = iommu->node;
c7151a8d 1407
ba395927 1408 /* always allocate the top pgd */
4c923d47 1409 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1410 if (!domain->pgd)
1411 return -ENOMEM;
5b6985ce 1412 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1413 return 0;
1414}
1415
1416static void domain_exit(struct dmar_domain *domain)
1417{
2c2e2c38
FY
1418 struct dmar_drhd_unit *drhd;
1419 struct intel_iommu *iommu;
ba395927
KA
1420
1421 /* Domain 0 is reserved, so dont process it */
1422 if (!domain)
1423 return;
1424
7b668357
AW
1425 /* Flush any lazy unmaps that may reference this domain */
1426 if (!intel_iommu_strict)
1427 flush_unmaps_timeout(0);
1428
ba395927
KA
1429 domain_remove_dev_info(domain);
1430 /* destroy iovas */
1431 put_iova_domain(&domain->iovad);
ba395927
KA
1432
1433 /* clear ptes */
595badf5 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1435
1436 /* free page tables */
d794dc9b 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1438
2c2e2c38
FY
1439 for_each_active_iommu(iommu, drhd)
1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1441 iommu_detach_domain(domain, iommu);
1442
ba395927
KA
1443 free_domain_mem(domain);
1444}
1445
4ed0d3e6
FY
1446static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1447 u8 bus, u8 devfn, int translation)
ba395927
KA
1448{
1449 struct context_entry *context;
ba395927 1450 unsigned long flags;
5331fe6f 1451 struct intel_iommu *iommu;
ea6606b0
WH
1452 struct dma_pte *pgd;
1453 unsigned long num;
1454 unsigned long ndomains;
1455 int id;
1456 int agaw;
93a23a72 1457 struct device_domain_info *info = NULL;
ba395927
KA
1458
1459 pr_debug("Set context mapping for %02x:%02x.%d\n",
1460 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1461
ba395927 1462 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1463 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1464 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1465
276dbf99 1466 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1467 if (!iommu)
1468 return -ENODEV;
1469
ba395927
KA
1470 context = device_to_context_entry(iommu, bus, devfn);
1471 if (!context)
1472 return -ENOMEM;
1473 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1474 if (context_present(context)) {
ba395927
KA
1475 spin_unlock_irqrestore(&iommu->lock, flags);
1476 return 0;
1477 }
1478
ea6606b0
WH
1479 id = domain->id;
1480 pgd = domain->pgd;
1481
2c2e2c38
FY
1482 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1483 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1484 int found = 0;
1485
1486 /* find an available domain id for this device in iommu */
1487 ndomains = cap_ndoms(iommu->cap);
a45946ab 1488 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1489 if (iommu->domains[num] == domain) {
1490 id = num;
1491 found = 1;
1492 break;
1493 }
ea6606b0
WH
1494 }
1495
1496 if (found == 0) {
1497 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1498 if (num >= ndomains) {
1499 spin_unlock_irqrestore(&iommu->lock, flags);
1500 printk(KERN_ERR "IOMMU: no free domain ids\n");
1501 return -EFAULT;
1502 }
1503
1504 set_bit(num, iommu->domain_ids);
1505 iommu->domains[num] = domain;
1506 id = num;
1507 }
1508
1509 /* Skip top levels of page tables for
1510 * iommu which has less agaw than default.
1672af11 1511 * Unnecessary for PT mode.
ea6606b0 1512 */
1672af11
CW
1513 if (translation != CONTEXT_TT_PASS_THROUGH) {
1514 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1515 pgd = phys_to_virt(dma_pte_addr(pgd));
1516 if (!dma_pte_present(pgd)) {
1517 spin_unlock_irqrestore(&iommu->lock, flags);
1518 return -ENOMEM;
1519 }
ea6606b0
WH
1520 }
1521 }
1522 }
1523
1524 context_set_domain_id(context, id);
4ed0d3e6 1525
93a23a72
YZ
1526 if (translation != CONTEXT_TT_PASS_THROUGH) {
1527 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1528 translation = info ? CONTEXT_TT_DEV_IOTLB :
1529 CONTEXT_TT_MULTI_LEVEL;
1530 }
4ed0d3e6
FY
1531 /*
1532 * In pass through mode, AW must be programmed to indicate the largest
1533 * AGAW value supported by hardware. And ASR is ignored by hardware.
1534 */
93a23a72 1535 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1536 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1537 else {
1538 context_set_address_root(context, virt_to_phys(pgd));
1539 context_set_address_width(context, iommu->agaw);
1540 }
4ed0d3e6
FY
1541
1542 context_set_translation_type(context, translation);
c07e7d21
MM
1543 context_set_fault_enable(context);
1544 context_set_present(context);
5331fe6f 1545 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1546
4c25a2c1
DW
1547 /*
1548 * It's a non-present to present mapping. If hardware doesn't cache
1549 * non-present entry we only need to flush the write-buffer. If the
1550 * _does_ cache non-present entries, then it does so in the special
1551 * domain #0, which we have to flush:
1552 */
1553 if (cap_caching_mode(iommu->cap)) {
1554 iommu->flush.flush_context(iommu, 0,
1555 (((u16)bus) << 8) | devfn,
1556 DMA_CCMD_MASK_NOBIT,
1557 DMA_CCMD_DEVICE_INVL);
82653633 1558 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1559 } else {
ba395927 1560 iommu_flush_write_buffer(iommu);
4c25a2c1 1561 }
93a23a72 1562 iommu_enable_dev_iotlb(info);
ba395927 1563 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1564
1565 spin_lock_irqsave(&domain->iommu_lock, flags);
1566 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1567 domain->iommu_count++;
4c923d47
SS
1568 if (domain->iommu_count == 1)
1569 domain->nid = iommu->node;
58c610bd 1570 domain_update_iommu_cap(domain);
c7151a8d
WH
1571 }
1572 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1573 return 0;
1574}
1575
1576static int
4ed0d3e6
FY
1577domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1578 int translation)
ba395927
KA
1579{
1580 int ret;
1581 struct pci_dev *tmp, *parent;
1582
276dbf99 1583 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1584 pdev->bus->number, pdev->devfn,
1585 translation);
ba395927
KA
1586 if (ret)
1587 return ret;
1588
1589 /* dependent device mapping */
1590 tmp = pci_find_upstream_pcie_bridge(pdev);
1591 if (!tmp)
1592 return 0;
1593 /* Secondary interface's bus number and devfn 0 */
1594 parent = pdev->bus->self;
1595 while (parent != tmp) {
276dbf99
DW
1596 ret = domain_context_mapping_one(domain,
1597 pci_domain_nr(parent->bus),
1598 parent->bus->number,
4ed0d3e6 1599 parent->devfn, translation);
ba395927
KA
1600 if (ret)
1601 return ret;
1602 parent = parent->bus->self;
1603 }
45e829ea 1604 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1605 return domain_context_mapping_one(domain,
276dbf99 1606 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1607 tmp->subordinate->number, 0,
1608 translation);
ba395927
KA
1609 else /* this is a legacy PCI bridge */
1610 return domain_context_mapping_one(domain,
276dbf99
DW
1611 pci_domain_nr(tmp->bus),
1612 tmp->bus->number,
4ed0d3e6
FY
1613 tmp->devfn,
1614 translation);
ba395927
KA
1615}
1616
5331fe6f 1617static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1618{
1619 int ret;
1620 struct pci_dev *tmp, *parent;
5331fe6f
WH
1621 struct intel_iommu *iommu;
1622
276dbf99
DW
1623 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1624 pdev->devfn);
5331fe6f
WH
1625 if (!iommu)
1626 return -ENODEV;
ba395927 1627
276dbf99 1628 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1629 if (!ret)
1630 return ret;
1631 /* dependent device mapping */
1632 tmp = pci_find_upstream_pcie_bridge(pdev);
1633 if (!tmp)
1634 return ret;
1635 /* Secondary interface's bus number and devfn 0 */
1636 parent = pdev->bus->self;
1637 while (parent != tmp) {
8c11e798 1638 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1639 parent->devfn);
ba395927
KA
1640 if (!ret)
1641 return ret;
1642 parent = parent->bus->self;
1643 }
5f4d91a1 1644 if (pci_is_pcie(tmp))
276dbf99
DW
1645 return device_context_mapped(iommu, tmp->subordinate->number,
1646 0);
ba395927 1647 else
276dbf99
DW
1648 return device_context_mapped(iommu, tmp->bus->number,
1649 tmp->devfn);
ba395927
KA
1650}
1651
f532959b
FY
1652/* Returns a number of VTD pages, but aligned to MM page size */
1653static inline unsigned long aligned_nrpages(unsigned long host_addr,
1654 size_t size)
1655{
1656 host_addr &= ~PAGE_MASK;
1657 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1658}
1659
9051aa02
DW
1660static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1661 struct scatterlist *sg, unsigned long phys_pfn,
1662 unsigned long nr_pages, int prot)
e1605495
DW
1663{
1664 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1665 phys_addr_t uninitialized_var(pteval);
e1605495 1666 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1667 unsigned long sg_res;
e1605495
DW
1668
1669 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1670
1671 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1672 return -EINVAL;
1673
1674 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1675
9051aa02
DW
1676 if (sg)
1677 sg_res = 0;
1678 else {
1679 sg_res = nr_pages + 1;
1680 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1681 }
1682
e1605495 1683 while (nr_pages--) {
c85994e4
DW
1684 uint64_t tmp;
1685
e1605495 1686 if (!sg_res) {
f532959b 1687 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1688 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1689 sg->dma_length = sg->length;
1690 pteval = page_to_phys(sg_page(sg)) | prot;
1691 }
1692 if (!pte) {
1693 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1694 if (!pte)
1695 return -ENOMEM;
1696 }
1697 /* We don't need lock here, nobody else
1698 * touches the iova range
1699 */
7766a3fb 1700 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1701 if (tmp) {
1bf20f0d 1702 static int dumps = 5;
c85994e4
DW
1703 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1704 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1705 if (dumps) {
1706 dumps--;
1707 debug_dma_dump_mappings(NULL);
1708 }
1709 WARN_ON(1);
1710 }
e1605495 1711 pte++;
75e6bf96 1712 if (!nr_pages || first_pte_in_page(pte)) {
e1605495
DW
1713 domain_flush_cache(domain, first_pte,
1714 (void *)pte - (void *)first_pte);
1715 pte = NULL;
1716 }
1717 iov_pfn++;
1718 pteval += VTD_PAGE_SIZE;
1719 sg_res--;
1720 if (!sg_res)
1721 sg = sg_next(sg);
1722 }
1723 return 0;
1724}
1725
9051aa02
DW
1726static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1727 struct scatterlist *sg, unsigned long nr_pages,
1728 int prot)
ba395927 1729{
9051aa02
DW
1730 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1731}
6f6a00e4 1732
9051aa02
DW
1733static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1734 unsigned long phys_pfn, unsigned long nr_pages,
1735 int prot)
1736{
1737 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1738}
1739
c7151a8d 1740static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1741{
c7151a8d
WH
1742 if (!iommu)
1743 return;
8c11e798
WH
1744
1745 clear_context_table(iommu, bus, devfn);
1746 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1747 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1748 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1749}
1750
1751static void domain_remove_dev_info(struct dmar_domain *domain)
1752{
1753 struct device_domain_info *info;
1754 unsigned long flags;
c7151a8d 1755 struct intel_iommu *iommu;
ba395927
KA
1756
1757 spin_lock_irqsave(&device_domain_lock, flags);
1758 while (!list_empty(&domain->devices)) {
1759 info = list_entry(domain->devices.next,
1760 struct device_domain_info, link);
1761 list_del(&info->link);
1762 list_del(&info->global);
1763 if (info->dev)
358dd8ac 1764 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1765 spin_unlock_irqrestore(&device_domain_lock, flags);
1766
93a23a72 1767 iommu_disable_dev_iotlb(info);
276dbf99 1768 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1769 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1770 free_devinfo_mem(info);
1771
1772 spin_lock_irqsave(&device_domain_lock, flags);
1773 }
1774 spin_unlock_irqrestore(&device_domain_lock, flags);
1775}
1776
1777/*
1778 * find_domain
358dd8ac 1779 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1780 */
38717946 1781static struct dmar_domain *
ba395927
KA
1782find_domain(struct pci_dev *pdev)
1783{
1784 struct device_domain_info *info;
1785
1786 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1787 info = pdev->dev.archdata.iommu;
ba395927
KA
1788 if (info)
1789 return info->domain;
1790 return NULL;
1791}
1792
ba395927
KA
1793/* domain is initialized */
1794static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1795{
1796 struct dmar_domain *domain, *found = NULL;
1797 struct intel_iommu *iommu;
1798 struct dmar_drhd_unit *drhd;
1799 struct device_domain_info *info, *tmp;
1800 struct pci_dev *dev_tmp;
1801 unsigned long flags;
1802 int bus = 0, devfn = 0;
276dbf99 1803 int segment;
2c2e2c38 1804 int ret;
ba395927
KA
1805
1806 domain = find_domain(pdev);
1807 if (domain)
1808 return domain;
1809
276dbf99
DW
1810 segment = pci_domain_nr(pdev->bus);
1811
ba395927
KA
1812 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1813 if (dev_tmp) {
5f4d91a1 1814 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1815 bus = dev_tmp->subordinate->number;
1816 devfn = 0;
1817 } else {
1818 bus = dev_tmp->bus->number;
1819 devfn = dev_tmp->devfn;
1820 }
1821 spin_lock_irqsave(&device_domain_lock, flags);
1822 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1823 if (info->segment == segment &&
1824 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1825 found = info->domain;
1826 break;
1827 }
1828 }
1829 spin_unlock_irqrestore(&device_domain_lock, flags);
1830 /* pcie-pci bridge already has a domain, uses it */
1831 if (found) {
1832 domain = found;
1833 goto found_domain;
1834 }
1835 }
1836
2c2e2c38
FY
1837 domain = alloc_domain();
1838 if (!domain)
1839 goto error;
1840
ba395927
KA
1841 /* Allocate new domain for the device */
1842 drhd = dmar_find_matched_drhd_unit(pdev);
1843 if (!drhd) {
1844 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1845 pci_name(pdev));
1846 return NULL;
1847 }
1848 iommu = drhd->iommu;
1849
2c2e2c38
FY
1850 ret = iommu_attach_domain(domain, iommu);
1851 if (ret) {
2fe9723d 1852 free_domain_mem(domain);
ba395927 1853 goto error;
2c2e2c38 1854 }
ba395927
KA
1855
1856 if (domain_init(domain, gaw)) {
1857 domain_exit(domain);
1858 goto error;
1859 }
1860
1861 /* register pcie-to-pci device */
1862 if (dev_tmp) {
1863 info = alloc_devinfo_mem();
1864 if (!info) {
1865 domain_exit(domain);
1866 goto error;
1867 }
276dbf99 1868 info->segment = segment;
ba395927
KA
1869 info->bus = bus;
1870 info->devfn = devfn;
1871 info->dev = NULL;
1872 info->domain = domain;
1873 /* This domain is shared by devices under p2p bridge */
3b5410e7 1874 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1875
1876 /* pcie-to-pci bridge already has a domain, uses it */
1877 found = NULL;
1878 spin_lock_irqsave(&device_domain_lock, flags);
1879 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
1880 if (tmp->segment == segment &&
1881 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
1882 found = tmp->domain;
1883 break;
1884 }
1885 }
1886 if (found) {
00dfff77 1887 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1888 free_devinfo_mem(info);
1889 domain_exit(domain);
1890 domain = found;
1891 } else {
1892 list_add(&info->link, &domain->devices);
1893 list_add(&info->global, &device_domain_list);
00dfff77 1894 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 1895 }
ba395927
KA
1896 }
1897
1898found_domain:
1899 info = alloc_devinfo_mem();
1900 if (!info)
1901 goto error;
276dbf99 1902 info->segment = segment;
ba395927
KA
1903 info->bus = pdev->bus->number;
1904 info->devfn = pdev->devfn;
1905 info->dev = pdev;
1906 info->domain = domain;
1907 spin_lock_irqsave(&device_domain_lock, flags);
1908 /* somebody is fast */
1909 found = find_domain(pdev);
1910 if (found != NULL) {
1911 spin_unlock_irqrestore(&device_domain_lock, flags);
1912 if (found != domain) {
1913 domain_exit(domain);
1914 domain = found;
1915 }
1916 free_devinfo_mem(info);
1917 return domain;
1918 }
1919 list_add(&info->link, &domain->devices);
1920 list_add(&info->global, &device_domain_list);
358dd8ac 1921 pdev->dev.archdata.iommu = info;
ba395927
KA
1922 spin_unlock_irqrestore(&device_domain_lock, flags);
1923 return domain;
1924error:
1925 /* recheck it here, maybe others set it */
1926 return find_domain(pdev);
1927}
1928
2c2e2c38 1929static int iommu_identity_mapping;
e0fc7e0b
DW
1930#define IDENTMAP_ALL 1
1931#define IDENTMAP_GFX 2
1932#define IDENTMAP_AZALIA 4
2c2e2c38 1933
b213203e
DW
1934static int iommu_domain_identity_map(struct dmar_domain *domain,
1935 unsigned long long start,
1936 unsigned long long end)
ba395927 1937{
c5395d5c
DW
1938 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1939 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1940
1941 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1942 dma_to_mm_pfn(last_vpfn))) {
ba395927 1943 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 1944 return -ENOMEM;
ba395927
KA
1945 }
1946
c5395d5c
DW
1947 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1948 start, end, domain->id);
ba395927
KA
1949 /*
1950 * RMRR range might have overlap with physical memory range,
1951 * clear it first
1952 */
c5395d5c 1953 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 1954
c5395d5c
DW
1955 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1956 last_vpfn - first_vpfn + 1,
61df7443 1957 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
1958}
1959
1960static int iommu_prepare_identity_map(struct pci_dev *pdev,
1961 unsigned long long start,
1962 unsigned long long end)
1963{
1964 struct dmar_domain *domain;
1965 int ret;
1966
c7ab48d2 1967 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
1968 if (!domain)
1969 return -ENOMEM;
1970
19943b0e
DW
1971 /* For _hardware_ passthrough, don't bother. But for software
1972 passthrough, we do it anyway -- it may indicate a memory
1973 range which is reserved in E820, so which didn't get set
1974 up to start with in si_domain */
1975 if (domain == si_domain && hw_pass_through) {
1976 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1977 pci_name(pdev), start, end);
1978 return 0;
1979 }
1980
1981 printk(KERN_INFO
1982 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1983 pci_name(pdev), start, end);
2ff729f5 1984
5595b528
DW
1985 if (end < start) {
1986 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
1987 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1988 dmi_get_system_info(DMI_BIOS_VENDOR),
1989 dmi_get_system_info(DMI_BIOS_VERSION),
1990 dmi_get_system_info(DMI_PRODUCT_VERSION));
1991 ret = -EIO;
1992 goto error;
1993 }
1994
2ff729f5
DW
1995 if (end >> agaw_to_width(domain->agaw)) {
1996 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
1997 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1998 agaw_to_width(domain->agaw),
1999 dmi_get_system_info(DMI_BIOS_VENDOR),
2000 dmi_get_system_info(DMI_BIOS_VERSION),
2001 dmi_get_system_info(DMI_PRODUCT_VERSION));
2002 ret = -EIO;
2003 goto error;
2004 }
19943b0e 2005
b213203e 2006 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2007 if (ret)
2008 goto error;
2009
2010 /* context entry init */
4ed0d3e6 2011 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2012 if (ret)
2013 goto error;
2014
2015 return 0;
2016
2017 error:
ba395927
KA
2018 domain_exit(domain);
2019 return ret;
ba395927
KA
2020}
2021
2022static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2023 struct pci_dev *pdev)
2024{
358dd8ac 2025 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2026 return 0;
2027 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2028 rmrr->end_address + 1);
2029}
2030
49a0429e
KA
2031#ifdef CONFIG_DMAR_FLOPPY_WA
2032static inline void iommu_prepare_isa(void)
2033{
2034 struct pci_dev *pdev;
2035 int ret;
2036
2037 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2038 if (!pdev)
2039 return;
2040
c7ab48d2 2041 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
49a0429e
KA
2042 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2043
2044 if (ret)
c7ab48d2
DW
2045 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2046 "floppy might not work\n");
49a0429e
KA
2047
2048}
2049#else
2050static inline void iommu_prepare_isa(void)
2051{
2052 return;
2053}
2054#endif /* !CONFIG_DMAR_FLPY_WA */
2055
2c2e2c38 2056static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2057
2058static int __init si_domain_work_fn(unsigned long start_pfn,
2059 unsigned long end_pfn, void *datax)
2060{
2061 int *ret = datax;
2062
2063 *ret = iommu_domain_identity_map(si_domain,
2064 (uint64_t)start_pfn << PAGE_SHIFT,
2065 (uint64_t)end_pfn << PAGE_SHIFT);
2066 return *ret;
2067
2068}
2069
071e1374 2070static int __init si_domain_init(int hw)
2c2e2c38
FY
2071{
2072 struct dmar_drhd_unit *drhd;
2073 struct intel_iommu *iommu;
c7ab48d2 2074 int nid, ret = 0;
2c2e2c38
FY
2075
2076 si_domain = alloc_domain();
2077 if (!si_domain)
2078 return -EFAULT;
2079
c7ab48d2 2080 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2081
2082 for_each_active_iommu(iommu, drhd) {
2083 ret = iommu_attach_domain(si_domain, iommu);
2084 if (ret) {
2085 domain_exit(si_domain);
2086 return -EFAULT;
2087 }
2088 }
2089
2090 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2091 domain_exit(si_domain);
2092 return -EFAULT;
2093 }
2094
2095 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2096
19943b0e
DW
2097 if (hw)
2098 return 0;
2099
c7ab48d2
DW
2100 for_each_online_node(nid) {
2101 work_with_active_regions(nid, si_domain_work_fn, &ret);
2102 if (ret)
2103 return ret;
2104 }
2105
2c2e2c38
FY
2106 return 0;
2107}
2108
2109static void domain_remove_one_dev_info(struct dmar_domain *domain,
2110 struct pci_dev *pdev);
2111static int identity_mapping(struct pci_dev *pdev)
2112{
2113 struct device_domain_info *info;
2114
2115 if (likely(!iommu_identity_mapping))
2116 return 0;
2117
2118
2119 list_for_each_entry(info, &si_domain->devices, link)
2120 if (info->dev == pdev)
2121 return 1;
2122 return 0;
2123}
2124
2125static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2126 struct pci_dev *pdev,
2127 int translation)
2c2e2c38
FY
2128{
2129 struct device_domain_info *info;
2130 unsigned long flags;
5fe60f4e 2131 int ret;
2c2e2c38
FY
2132
2133 info = alloc_devinfo_mem();
2134 if (!info)
2135 return -ENOMEM;
2136
5fe60f4e
DW
2137 ret = domain_context_mapping(domain, pdev, translation);
2138 if (ret) {
2139 free_devinfo_mem(info);
2140 return ret;
2141 }
2142
2c2e2c38
FY
2143 info->segment = pci_domain_nr(pdev->bus);
2144 info->bus = pdev->bus->number;
2145 info->devfn = pdev->devfn;
2146 info->dev = pdev;
2147 info->domain = domain;
2148
2149 spin_lock_irqsave(&device_domain_lock, flags);
2150 list_add(&info->link, &domain->devices);
2151 list_add(&info->global, &device_domain_list);
2152 pdev->dev.archdata.iommu = info;
2153 spin_unlock_irqrestore(&device_domain_lock, flags);
2154
2155 return 0;
2156}
2157
6941af28
DW
2158static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2159{
e0fc7e0b
DW
2160 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2161 return 1;
2162
2163 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2164 return 1;
2165
2166 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2167 return 0;
6941af28 2168
3dfc813d
DW
2169 /*
2170 * We want to start off with all devices in the 1:1 domain, and
2171 * take them out later if we find they can't access all of memory.
2172 *
2173 * However, we can't do this for PCI devices behind bridges,
2174 * because all PCI devices behind the same bridge will end up
2175 * with the same source-id on their transactions.
2176 *
2177 * Practically speaking, we can't change things around for these
2178 * devices at run-time, because we can't be sure there'll be no
2179 * DMA transactions in flight for any of their siblings.
2180 *
2181 * So PCI devices (unless they're on the root bus) as well as
2182 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2183 * the 1:1 domain, just in _case_ one of their siblings turns out
2184 * not to be able to map all of memory.
2185 */
5f4d91a1 2186 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2187 if (!pci_is_root_bus(pdev->bus))
2188 return 0;
2189 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2190 return 0;
2191 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2192 return 0;
2193
2194 /*
2195 * At boot time, we don't yet know if devices will be 64-bit capable.
2196 * Assume that they will -- if they turn out not to be, then we can
2197 * take them out of the 1:1 domain later.
2198 */
6941af28
DW
2199 if (!startup)
2200 return pdev->dma_mask > DMA_BIT_MASK(32);
2201
2202 return 1;
2203}
2204
071e1374 2205static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2206{
2c2e2c38
FY
2207 struct pci_dev *pdev = NULL;
2208 int ret;
2209
19943b0e 2210 ret = si_domain_init(hw);
2c2e2c38
FY
2211 if (ret)
2212 return -EFAULT;
2213
2c2e2c38 2214 for_each_pci_dev(pdev) {
6941af28 2215 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2216 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2217 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2218
5fe60f4e 2219 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2220 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2221 CONTEXT_TT_MULTI_LEVEL);
2222 if (ret)
2223 return ret;
62edf5dc 2224 }
2c2e2c38
FY
2225 }
2226
2227 return 0;
2228}
2229
b779260b 2230static int __init init_dmars(void)
ba395927
KA
2231{
2232 struct dmar_drhd_unit *drhd;
2233 struct dmar_rmrr_unit *rmrr;
2234 struct pci_dev *pdev;
2235 struct intel_iommu *iommu;
9d783ba0 2236 int i, ret;
2c2e2c38 2237
ba395927
KA
2238 /*
2239 * for each drhd
2240 * allocate root
2241 * initialize and program root entry to not present
2242 * endfor
2243 */
2244 for_each_drhd_unit(drhd) {
5e0d2a6f 2245 g_num_of_iommus++;
2246 /*
2247 * lock not needed as this is only incremented in the single
2248 * threaded kernel __init code path all other access are read
2249 * only
2250 */
2251 }
2252
d9630fe9
WH
2253 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2254 GFP_KERNEL);
2255 if (!g_iommus) {
2256 printk(KERN_ERR "Allocating global iommu array failed\n");
2257 ret = -ENOMEM;
2258 goto error;
2259 }
2260
80b20dd8 2261 deferred_flush = kzalloc(g_num_of_iommus *
2262 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2263 if (!deferred_flush) {
5e0d2a6f 2264 ret = -ENOMEM;
2265 goto error;
2266 }
2267
5e0d2a6f 2268 for_each_drhd_unit(drhd) {
2269 if (drhd->ignored)
2270 continue;
1886e8a9
SS
2271
2272 iommu = drhd->iommu;
d9630fe9 2273 g_iommus[iommu->seq_id] = iommu;
ba395927 2274
e61d98d8
SS
2275 ret = iommu_init_domains(iommu);
2276 if (ret)
2277 goto error;
2278
ba395927
KA
2279 /*
2280 * TBD:
2281 * we could share the same root & context tables
25985edc 2282 * among all IOMMU's. Need to Split it later.
ba395927
KA
2283 */
2284 ret = iommu_alloc_root_entry(iommu);
2285 if (ret) {
2286 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2287 goto error;
2288 }
4ed0d3e6 2289 if (!ecap_pass_through(iommu->ecap))
19943b0e 2290 hw_pass_through = 0;
ba395927
KA
2291 }
2292
1531a6a6
SS
2293 /*
2294 * Start from the sane iommu hardware state.
2295 */
a77b67d4
YS
2296 for_each_drhd_unit(drhd) {
2297 if (drhd->ignored)
2298 continue;
2299
2300 iommu = drhd->iommu;
1531a6a6
SS
2301
2302 /*
2303 * If the queued invalidation is already initialized by us
2304 * (for example, while enabling interrupt-remapping) then
2305 * we got the things already rolling from a sane state.
2306 */
2307 if (iommu->qi)
2308 continue;
2309
2310 /*
2311 * Clear any previous faults.
2312 */
2313 dmar_fault(-1, iommu);
2314 /*
2315 * Disable queued invalidation if supported and already enabled
2316 * before OS handover.
2317 */
2318 dmar_disable_qi(iommu);
2319 }
2320
2321 for_each_drhd_unit(drhd) {
2322 if (drhd->ignored)
2323 continue;
2324
2325 iommu = drhd->iommu;
2326
a77b67d4
YS
2327 if (dmar_enable_qi(iommu)) {
2328 /*
2329 * Queued Invalidate not enabled, use Register Based
2330 * Invalidate
2331 */
2332 iommu->flush.flush_context = __iommu_flush_context;
2333 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2334 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2335 "invalidation\n",
680a7524 2336 iommu->seq_id,
b4e0f9eb 2337 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2338 } else {
2339 iommu->flush.flush_context = qi_flush_context;
2340 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2341 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2342 "invalidation\n",
680a7524 2343 iommu->seq_id,
b4e0f9eb 2344 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2345 }
2346 }
2347
19943b0e 2348 if (iommu_pass_through)
e0fc7e0b
DW
2349 iommu_identity_mapping |= IDENTMAP_ALL;
2350
19943b0e 2351#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2352 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2353#endif
e0fc7e0b
DW
2354
2355 check_tylersburg_isoch();
2356
ba395927 2357 /*
19943b0e
DW
2358 * If pass through is not set or not enabled, setup context entries for
2359 * identity mappings for rmrr, gfx, and isa and may fall back to static
2360 * identity mapping if iommu_identity_mapping is set.
ba395927 2361 */
19943b0e
DW
2362 if (iommu_identity_mapping) {
2363 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2364 if (ret) {
19943b0e
DW
2365 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2366 goto error;
ba395927
KA
2367 }
2368 }
ba395927 2369 /*
19943b0e
DW
2370 * For each rmrr
2371 * for each dev attached to rmrr
2372 * do
2373 * locate drhd for dev, alloc domain for dev
2374 * allocate free domain
2375 * allocate page table entries for rmrr
2376 * if context not allocated for bus
2377 * allocate and init context
2378 * set present in root table for this bus
2379 * init context with domain, translation etc
2380 * endfor
2381 * endfor
ba395927 2382 */
19943b0e
DW
2383 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2384 for_each_rmrr_units(rmrr) {
2385 for (i = 0; i < rmrr->devices_cnt; i++) {
2386 pdev = rmrr->devices[i];
2387 /*
2388 * some BIOS lists non-exist devices in DMAR
2389 * table.
2390 */
2391 if (!pdev)
2392 continue;
2393 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2394 if (ret)
2395 printk(KERN_ERR
2396 "IOMMU: mapping reserved region failed\n");
ba395927 2397 }
4ed0d3e6 2398 }
49a0429e 2399
19943b0e
DW
2400 iommu_prepare_isa();
2401
ba395927
KA
2402 /*
2403 * for each drhd
2404 * enable fault log
2405 * global invalidate context cache
2406 * global invalidate iotlb
2407 * enable translation
2408 */
2409 for_each_drhd_unit(drhd) {
51a63e67
JC
2410 if (drhd->ignored) {
2411 /*
2412 * we always have to disable PMRs or DMA may fail on
2413 * this device
2414 */
2415 if (force_on)
2416 iommu_disable_protect_mem_regions(drhd->iommu);
ba395927 2417 continue;
51a63e67 2418 }
ba395927 2419 iommu = drhd->iommu;
ba395927
KA
2420
2421 iommu_flush_write_buffer(iommu);
2422
3460a6d9
KA
2423 ret = dmar_set_interrupt(iommu);
2424 if (ret)
2425 goto error;
2426
ba395927
KA
2427 iommu_set_root_entry(iommu);
2428
4c25a2c1 2429 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2430 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2431
ba395927
KA
2432 ret = iommu_enable_translation(iommu);
2433 if (ret)
2434 goto error;
b94996c9
DW
2435
2436 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2437 }
2438
2439 return 0;
2440error:
2441 for_each_drhd_unit(drhd) {
2442 if (drhd->ignored)
2443 continue;
2444 iommu = drhd->iommu;
2445 free_iommu(iommu);
2446 }
d9630fe9 2447 kfree(g_iommus);
ba395927
KA
2448 return ret;
2449}
2450
5a5e02a6 2451/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2452static struct iova *intel_alloc_iova(struct device *dev,
2453 struct dmar_domain *domain,
2454 unsigned long nrpages, uint64_t dma_mask)
ba395927 2455{
ba395927 2456 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2457 struct iova *iova = NULL;
ba395927 2458
875764de
DW
2459 /* Restrict dma_mask to the width that the iommu can handle */
2460 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2461
2462 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2463 /*
2464 * First try to allocate an io virtual address in
284901a9 2465 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2466 * from higher range
ba395927 2467 */
875764de
DW
2468 iova = alloc_iova(&domain->iovad, nrpages,
2469 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2470 if (iova)
2471 return iova;
2472 }
2473 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2474 if (unlikely(!iova)) {
2475 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2476 nrpages, pci_name(pdev));
f76aec76
KA
2477 return NULL;
2478 }
2479
2480 return iova;
2481}
2482
147202aa 2483static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2484{
2485 struct dmar_domain *domain;
2486 int ret;
2487
2488 domain = get_domain_for_dev(pdev,
2489 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2490 if (!domain) {
2491 printk(KERN_ERR
2492 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2493 return NULL;
ba395927
KA
2494 }
2495
2496 /* make sure context mapping is ok */
5331fe6f 2497 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2498 ret = domain_context_mapping(domain, pdev,
2499 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2500 if (ret) {
2501 printk(KERN_ERR
2502 "Domain context map for %s failed",
2503 pci_name(pdev));
4fe05bbc 2504 return NULL;
f76aec76 2505 }
ba395927
KA
2506 }
2507
f76aec76
KA
2508 return domain;
2509}
2510
147202aa
DW
2511static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2512{
2513 struct device_domain_info *info;
2514
2515 /* No lock here, assumes no domain exit in normal case */
2516 info = dev->dev.archdata.iommu;
2517 if (likely(info))
2518 return info->domain;
2519
2520 return __get_valid_domain_for_dev(dev);
2521}
2522
2c2e2c38
FY
2523static int iommu_dummy(struct pci_dev *pdev)
2524{
2525 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2526}
2527
2528/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2529static int iommu_no_mapping(struct device *dev)
2c2e2c38 2530{
73676832 2531 struct pci_dev *pdev;
2c2e2c38
FY
2532 int found;
2533
73676832
DW
2534 if (unlikely(dev->bus != &pci_bus_type))
2535 return 1;
2536
2537 pdev = to_pci_dev(dev);
1e4c64c4
DW
2538 if (iommu_dummy(pdev))
2539 return 1;
2540
2c2e2c38 2541 if (!iommu_identity_mapping)
1e4c64c4 2542 return 0;
2c2e2c38
FY
2543
2544 found = identity_mapping(pdev);
2545 if (found) {
6941af28 2546 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2547 return 1;
2548 else {
2549 /*
2550 * 32 bit DMA is removed from si_domain and fall back
2551 * to non-identity mapping.
2552 */
2553 domain_remove_one_dev_info(si_domain, pdev);
2554 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2555 pci_name(pdev));
2556 return 0;
2557 }
2558 } else {
2559 /*
2560 * In case of a detached 64 bit DMA device from vm, the device
2561 * is put into si_domain for identity mapping.
2562 */
6941af28 2563 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2564 int ret;
5fe60f4e
DW
2565 ret = domain_add_dev_info(si_domain, pdev,
2566 hw_pass_through ?
2567 CONTEXT_TT_PASS_THROUGH :
2568 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2569 if (!ret) {
2570 printk(KERN_INFO "64bit %s uses identity mapping\n",
2571 pci_name(pdev));
2572 return 1;
2573 }
2574 }
2575 }
2576
1e4c64c4 2577 return 0;
2c2e2c38
FY
2578}
2579
bb9e6d65
FT
2580static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2581 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2582{
2583 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2584 struct dmar_domain *domain;
5b6985ce 2585 phys_addr_t start_paddr;
f76aec76
KA
2586 struct iova *iova;
2587 int prot = 0;
6865f0d1 2588 int ret;
8c11e798 2589 struct intel_iommu *iommu;
33041ec0 2590 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2591
2592 BUG_ON(dir == DMA_NONE);
2c2e2c38 2593
73676832 2594 if (iommu_no_mapping(hwdev))
6865f0d1 2595 return paddr;
f76aec76
KA
2596
2597 domain = get_valid_domain_for_dev(pdev);
2598 if (!domain)
2599 return 0;
2600
8c11e798 2601 iommu = domain_get_iommu(domain);
88cb6a74 2602 size = aligned_nrpages(paddr, size);
f76aec76 2603
5a5e02a6
DW
2604 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2605 pdev->dma_mask);
f76aec76
KA
2606 if (!iova)
2607 goto error;
2608
ba395927
KA
2609 /*
2610 * Check if DMAR supports zero-length reads on write only
2611 * mappings..
2612 */
2613 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2614 !cap_zlr(iommu->cap))
ba395927
KA
2615 prot |= DMA_PTE_READ;
2616 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2617 prot |= DMA_PTE_WRITE;
2618 /*
6865f0d1 2619 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2620 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2621 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2622 * is not a big problem
2623 */
0ab36de2 2624 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2625 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2626 if (ret)
2627 goto error;
2628
1f0ef2aa
DW
2629 /* it's a non-present to present mapping. Only flush if caching mode */
2630 if (cap_caching_mode(iommu->cap))
82653633 2631 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2632 else
8c11e798 2633 iommu_flush_write_buffer(iommu);
f76aec76 2634
03d6a246
DW
2635 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2636 start_paddr += paddr & ~PAGE_MASK;
2637 return start_paddr;
ba395927 2638
ba395927 2639error:
f76aec76
KA
2640 if (iova)
2641 __free_iova(&domain->iovad, iova);
4cf2e75d 2642 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2643 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2644 return 0;
2645}
2646
ffbbef5c
FT
2647static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2648 unsigned long offset, size_t size,
2649 enum dma_data_direction dir,
2650 struct dma_attrs *attrs)
bb9e6d65 2651{
ffbbef5c
FT
2652 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2653 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2654}
2655
5e0d2a6f 2656static void flush_unmaps(void)
2657{
80b20dd8 2658 int i, j;
5e0d2a6f 2659
5e0d2a6f 2660 timer_on = 0;
2661
2662 /* just flush them all */
2663 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2664 struct intel_iommu *iommu = g_iommus[i];
2665 if (!iommu)
2666 continue;
c42d9f32 2667
9dd2fe89
YZ
2668 if (!deferred_flush[i].next)
2669 continue;
2670
78d5f0f5
NA
2671 /* In caching mode, global flushes turn emulation expensive */
2672 if (!cap_caching_mode(iommu->cap))
2673 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2674 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2675 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2676 unsigned long mask;
2677 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2678 struct dmar_domain *domain = deferred_flush[i].domain[j];
2679
2680 /* On real hardware multiple invalidations are expensive */
2681 if (cap_caching_mode(iommu->cap))
2682 iommu_flush_iotlb_psi(iommu, domain->id,
2683 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2684 else {
2685 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2686 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2687 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2688 }
93a23a72 2689 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2690 }
9dd2fe89 2691 deferred_flush[i].next = 0;
5e0d2a6f 2692 }
2693
5e0d2a6f 2694 list_size = 0;
5e0d2a6f 2695}
2696
2697static void flush_unmaps_timeout(unsigned long data)
2698{
80b20dd8 2699 unsigned long flags;
2700
2701 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2702 flush_unmaps();
80b20dd8 2703 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2704}
2705
2706static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2707{
2708 unsigned long flags;
80b20dd8 2709 int next, iommu_id;
8c11e798 2710 struct intel_iommu *iommu;
5e0d2a6f 2711
2712 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2713 if (list_size == HIGH_WATER_MARK)
2714 flush_unmaps();
2715
8c11e798
WH
2716 iommu = domain_get_iommu(dom);
2717 iommu_id = iommu->seq_id;
c42d9f32 2718
80b20dd8 2719 next = deferred_flush[iommu_id].next;
2720 deferred_flush[iommu_id].domain[next] = dom;
2721 deferred_flush[iommu_id].iova[next] = iova;
2722 deferred_flush[iommu_id].next++;
5e0d2a6f 2723
2724 if (!timer_on) {
2725 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2726 timer_on = 1;
2727 }
2728 list_size++;
2729 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2730}
2731
ffbbef5c
FT
2732static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2733 size_t size, enum dma_data_direction dir,
2734 struct dma_attrs *attrs)
ba395927 2735{
ba395927 2736 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2737 struct dmar_domain *domain;
d794dc9b 2738 unsigned long start_pfn, last_pfn;
ba395927 2739 struct iova *iova;
8c11e798 2740 struct intel_iommu *iommu;
ba395927 2741
73676832 2742 if (iommu_no_mapping(dev))
f76aec76 2743 return;
2c2e2c38 2744
ba395927
KA
2745 domain = find_domain(pdev);
2746 BUG_ON(!domain);
2747
8c11e798
WH
2748 iommu = domain_get_iommu(domain);
2749
ba395927 2750 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2751 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2752 (unsigned long long)dev_addr))
ba395927 2753 return;
ba395927 2754
d794dc9b
DW
2755 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2756 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2757
d794dc9b
DW
2758 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2759 pci_name(pdev), start_pfn, last_pfn);
ba395927 2760
f76aec76 2761 /* clear the whole page */
d794dc9b
DW
2762 dma_pte_clear_range(domain, start_pfn, last_pfn);
2763
f76aec76 2764 /* free page tables */
d794dc9b
DW
2765 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2766
5e0d2a6f 2767 if (intel_iommu_strict) {
03d6a246 2768 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2769 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2770 /* free iova */
2771 __free_iova(&domain->iovad, iova);
2772 } else {
2773 add_unmap(domain, iova);
2774 /*
2775 * queue up the release of the unmap to save the 1/6th of the
2776 * cpu used up by the iotlb flush operation...
2777 */
5e0d2a6f 2778 }
ba395927
KA
2779}
2780
d7ab5c46
FT
2781static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2782 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2783{
2784 void *vaddr;
2785 int order;
2786
5b6985ce 2787 size = PAGE_ALIGN(size);
ba395927 2788 order = get_order(size);
e8bb910d
AW
2789
2790 if (!iommu_no_mapping(hwdev))
2791 flags &= ~(GFP_DMA | GFP_DMA32);
2792 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2793 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2794 flags |= GFP_DMA;
2795 else
2796 flags |= GFP_DMA32;
2797 }
ba395927
KA
2798
2799 vaddr = (void *)__get_free_pages(flags, order);
2800 if (!vaddr)
2801 return NULL;
2802 memset(vaddr, 0, size);
2803
bb9e6d65
FT
2804 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2805 DMA_BIDIRECTIONAL,
2806 hwdev->coherent_dma_mask);
ba395927
KA
2807 if (*dma_handle)
2808 return vaddr;
2809 free_pages((unsigned long)vaddr, order);
2810 return NULL;
2811}
2812
d7ab5c46
FT
2813static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2814 dma_addr_t dma_handle)
ba395927
KA
2815{
2816 int order;
2817
5b6985ce 2818 size = PAGE_ALIGN(size);
ba395927
KA
2819 order = get_order(size);
2820
0db9b7ae 2821 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2822 free_pages((unsigned long)vaddr, order);
2823}
2824
d7ab5c46
FT
2825static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2826 int nelems, enum dma_data_direction dir,
2827 struct dma_attrs *attrs)
ba395927 2828{
ba395927
KA
2829 struct pci_dev *pdev = to_pci_dev(hwdev);
2830 struct dmar_domain *domain;
d794dc9b 2831 unsigned long start_pfn, last_pfn;
f76aec76 2832 struct iova *iova;
8c11e798 2833 struct intel_iommu *iommu;
ba395927 2834
73676832 2835 if (iommu_no_mapping(hwdev))
ba395927
KA
2836 return;
2837
2838 domain = find_domain(pdev);
8c11e798
WH
2839 BUG_ON(!domain);
2840
2841 iommu = domain_get_iommu(domain);
ba395927 2842
c03ab37c 2843 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2844 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2845 (unsigned long long)sglist[0].dma_address))
f76aec76 2846 return;
f76aec76 2847
d794dc9b
DW
2848 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2849 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2850
2851 /* clear the whole page */
d794dc9b
DW
2852 dma_pte_clear_range(domain, start_pfn, last_pfn);
2853
f76aec76 2854 /* free page tables */
d794dc9b 2855 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2856
acea0018
DW
2857 if (intel_iommu_strict) {
2858 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2859 last_pfn - start_pfn + 1, 0);
acea0018
DW
2860 /* free iova */
2861 __free_iova(&domain->iovad, iova);
2862 } else {
2863 add_unmap(domain, iova);
2864 /*
2865 * queue up the release of the unmap to save the 1/6th of the
2866 * cpu used up by the iotlb flush operation...
2867 */
2868 }
ba395927
KA
2869}
2870
ba395927 2871static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2872 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2873{
2874 int i;
c03ab37c 2875 struct scatterlist *sg;
ba395927 2876
c03ab37c 2877 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2878 BUG_ON(!sg_page(sg));
4cf2e75d 2879 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 2880 sg->dma_length = sg->length;
ba395927
KA
2881 }
2882 return nelems;
2883}
2884
d7ab5c46
FT
2885static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2886 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 2887{
ba395927 2888 int i;
ba395927
KA
2889 struct pci_dev *pdev = to_pci_dev(hwdev);
2890 struct dmar_domain *domain;
f76aec76
KA
2891 size_t size = 0;
2892 int prot = 0;
f76aec76
KA
2893 struct iova *iova = NULL;
2894 int ret;
c03ab37c 2895 struct scatterlist *sg;
b536d24d 2896 unsigned long start_vpfn;
8c11e798 2897 struct intel_iommu *iommu;
ba395927
KA
2898
2899 BUG_ON(dir == DMA_NONE);
73676832 2900 if (iommu_no_mapping(hwdev))
c03ab37c 2901 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2902
f76aec76
KA
2903 domain = get_valid_domain_for_dev(pdev);
2904 if (!domain)
2905 return 0;
2906
8c11e798
WH
2907 iommu = domain_get_iommu(domain);
2908
b536d24d 2909 for_each_sg(sglist, sg, nelems, i)
88cb6a74 2910 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 2911
5a5e02a6
DW
2912 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2913 pdev->dma_mask);
f76aec76 2914 if (!iova) {
c03ab37c 2915 sglist->dma_length = 0;
f76aec76
KA
2916 return 0;
2917 }
2918
2919 /*
2920 * Check if DMAR supports zero-length reads on write only
2921 * mappings..
2922 */
2923 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2924 !cap_zlr(iommu->cap))
f76aec76
KA
2925 prot |= DMA_PTE_READ;
2926 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2927 prot |= DMA_PTE_WRITE;
2928
b536d24d 2929 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 2930
f532959b 2931 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
2932 if (unlikely(ret)) {
2933 /* clear the page */
2934 dma_pte_clear_range(domain, start_vpfn,
2935 start_vpfn + size - 1);
2936 /* free page tables */
2937 dma_pte_free_pagetable(domain, start_vpfn,
2938 start_vpfn + size - 1);
2939 /* free iova */
2940 __free_iova(&domain->iovad, iova);
2941 return 0;
ba395927
KA
2942 }
2943
1f0ef2aa
DW
2944 /* it's a non-present to present mapping. Only flush if caching mode */
2945 if (cap_caching_mode(iommu->cap))
82653633 2946 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 2947 else
8c11e798 2948 iommu_flush_write_buffer(iommu);
1f0ef2aa 2949
ba395927
KA
2950 return nelems;
2951}
2952
dfb805e8
FT
2953static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
2954{
2955 return !dma_addr;
2956}
2957
160c1d8e 2958struct dma_map_ops intel_dma_ops = {
ba395927
KA
2959 .alloc_coherent = intel_alloc_coherent,
2960 .free_coherent = intel_free_coherent,
ba395927
KA
2961 .map_sg = intel_map_sg,
2962 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
2963 .map_page = intel_map_page,
2964 .unmap_page = intel_unmap_page,
dfb805e8 2965 .mapping_error = intel_mapping_error,
ba395927
KA
2966};
2967
2968static inline int iommu_domain_cache_init(void)
2969{
2970 int ret = 0;
2971
2972 iommu_domain_cache = kmem_cache_create("iommu_domain",
2973 sizeof(struct dmar_domain),
2974 0,
2975 SLAB_HWCACHE_ALIGN,
2976
2977 NULL);
2978 if (!iommu_domain_cache) {
2979 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2980 ret = -ENOMEM;
2981 }
2982
2983 return ret;
2984}
2985
2986static inline int iommu_devinfo_cache_init(void)
2987{
2988 int ret = 0;
2989
2990 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2991 sizeof(struct device_domain_info),
2992 0,
2993 SLAB_HWCACHE_ALIGN,
ba395927
KA
2994 NULL);
2995 if (!iommu_devinfo_cache) {
2996 printk(KERN_ERR "Couldn't create devinfo cache\n");
2997 ret = -ENOMEM;
2998 }
2999
3000 return ret;
3001}
3002
3003static inline int iommu_iova_cache_init(void)
3004{
3005 int ret = 0;
3006
3007 iommu_iova_cache = kmem_cache_create("iommu_iova",
3008 sizeof(struct iova),
3009 0,
3010 SLAB_HWCACHE_ALIGN,
ba395927
KA
3011 NULL);
3012 if (!iommu_iova_cache) {
3013 printk(KERN_ERR "Couldn't create iova cache\n");
3014 ret = -ENOMEM;
3015 }
3016
3017 return ret;
3018}
3019
3020static int __init iommu_init_mempool(void)
3021{
3022 int ret;
3023 ret = iommu_iova_cache_init();
3024 if (ret)
3025 return ret;
3026
3027 ret = iommu_domain_cache_init();
3028 if (ret)
3029 goto domain_error;
3030
3031 ret = iommu_devinfo_cache_init();
3032 if (!ret)
3033 return ret;
3034
3035 kmem_cache_destroy(iommu_domain_cache);
3036domain_error:
3037 kmem_cache_destroy(iommu_iova_cache);
3038
3039 return -ENOMEM;
3040}
3041
3042static void __init iommu_exit_mempool(void)
3043{
3044 kmem_cache_destroy(iommu_devinfo_cache);
3045 kmem_cache_destroy(iommu_domain_cache);
3046 kmem_cache_destroy(iommu_iova_cache);
3047
3048}
3049
556ab45f
DW
3050static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3051{
3052 struct dmar_drhd_unit *drhd;
3053 u32 vtbar;
3054 int rc;
3055
3056 /* We know that this device on this chipset has its own IOMMU.
3057 * If we find it under a different IOMMU, then the BIOS is lying
3058 * to us. Hope that the IOMMU for this device is actually
3059 * disabled, and it needs no translation...
3060 */
3061 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3062 if (rc) {
3063 /* "can't" happen */
3064 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3065 return;
3066 }
3067 vtbar &= 0xffff0000;
3068
3069 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3070 drhd = dmar_find_matched_drhd_unit(pdev);
3071 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3072 TAINT_FIRMWARE_WORKAROUND,
3073 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3074 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3075}
3076DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3077
ba395927
KA
3078static void __init init_no_remapping_devices(void)
3079{
3080 struct dmar_drhd_unit *drhd;
3081
3082 for_each_drhd_unit(drhd) {
3083 if (!drhd->include_all) {
3084 int i;
3085 for (i = 0; i < drhd->devices_cnt; i++)
3086 if (drhd->devices[i] != NULL)
3087 break;
3088 /* ignore DMAR unit if no pci devices exist */
3089 if (i == drhd->devices_cnt)
3090 drhd->ignored = 1;
3091 }
3092 }
3093
3094 if (dmar_map_gfx)
3095 return;
3096
3097 for_each_drhd_unit(drhd) {
3098 int i;
3099 if (drhd->ignored || drhd->include_all)
3100 continue;
3101
3102 for (i = 0; i < drhd->devices_cnt; i++)
3103 if (drhd->devices[i] &&
3104 !IS_GFX_DEVICE(drhd->devices[i]))
3105 break;
3106
3107 if (i < drhd->devices_cnt)
3108 continue;
3109
3110 /* bypass IOMMU if it is just for gfx devices */
3111 drhd->ignored = 1;
3112 for (i = 0; i < drhd->devices_cnt; i++) {
3113 if (!drhd->devices[i])
3114 continue;
358dd8ac 3115 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3116 }
3117 }
3118}
3119
f59c7b69
FY
3120#ifdef CONFIG_SUSPEND
3121static int init_iommu_hw(void)
3122{
3123 struct dmar_drhd_unit *drhd;
3124 struct intel_iommu *iommu = NULL;
3125
3126 for_each_active_iommu(iommu, drhd)
3127 if (iommu->qi)
3128 dmar_reenable_qi(iommu);
3129
b779260b
JC
3130 for_each_iommu(iommu, drhd) {
3131 if (drhd->ignored) {
3132 /*
3133 * we always have to disable PMRs or DMA may fail on
3134 * this device
3135 */
3136 if (force_on)
3137 iommu_disable_protect_mem_regions(iommu);
3138 continue;
3139 }
3140
f59c7b69
FY
3141 iommu_flush_write_buffer(iommu);
3142
3143 iommu_set_root_entry(iommu);
3144
3145 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3146 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3147 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3148 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3149 if (iommu_enable_translation(iommu))
3150 return 1;
b94996c9 3151 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3152 }
3153
3154 return 0;
3155}
3156
3157static void iommu_flush_all(void)
3158{
3159 struct dmar_drhd_unit *drhd;
3160 struct intel_iommu *iommu;
3161
3162 for_each_active_iommu(iommu, drhd) {
3163 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3164 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3165 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3166 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3167 }
3168}
3169
134fac3f 3170static int iommu_suspend(void)
f59c7b69
FY
3171{
3172 struct dmar_drhd_unit *drhd;
3173 struct intel_iommu *iommu = NULL;
3174 unsigned long flag;
3175
3176 for_each_active_iommu(iommu, drhd) {
3177 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3178 GFP_ATOMIC);
3179 if (!iommu->iommu_state)
3180 goto nomem;
3181 }
3182
3183 iommu_flush_all();
3184
3185 for_each_active_iommu(iommu, drhd) {
3186 iommu_disable_translation(iommu);
3187
3188 spin_lock_irqsave(&iommu->register_lock, flag);
3189
3190 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3191 readl(iommu->reg + DMAR_FECTL_REG);
3192 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3193 readl(iommu->reg + DMAR_FEDATA_REG);
3194 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3195 readl(iommu->reg + DMAR_FEADDR_REG);
3196 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3197 readl(iommu->reg + DMAR_FEUADDR_REG);
3198
3199 spin_unlock_irqrestore(&iommu->register_lock, flag);
3200 }
3201 return 0;
3202
3203nomem:
3204 for_each_active_iommu(iommu, drhd)
3205 kfree(iommu->iommu_state);
3206
3207 return -ENOMEM;
3208}
3209
134fac3f 3210static void iommu_resume(void)
f59c7b69
FY
3211{
3212 struct dmar_drhd_unit *drhd;
3213 struct intel_iommu *iommu = NULL;
3214 unsigned long flag;
3215
3216 if (init_iommu_hw()) {
b779260b
JC
3217 if (force_on)
3218 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3219 else
3220 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3221 return;
f59c7b69
FY
3222 }
3223
3224 for_each_active_iommu(iommu, drhd) {
3225
3226 spin_lock_irqsave(&iommu->register_lock, flag);
3227
3228 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3229 iommu->reg + DMAR_FECTL_REG);
3230 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3231 iommu->reg + DMAR_FEDATA_REG);
3232 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3233 iommu->reg + DMAR_FEADDR_REG);
3234 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3235 iommu->reg + DMAR_FEUADDR_REG);
3236
3237 spin_unlock_irqrestore(&iommu->register_lock, flag);
3238 }
3239
3240 for_each_active_iommu(iommu, drhd)
3241 kfree(iommu->iommu_state);
f59c7b69
FY
3242}
3243
134fac3f 3244static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3245 .resume = iommu_resume,
3246 .suspend = iommu_suspend,
3247};
3248
134fac3f 3249static void __init init_iommu_pm_ops(void)
f59c7b69 3250{
134fac3f 3251 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3252}
3253
3254#else
134fac3f 3255static inline int init_iommu_pm_ops(void) { }
f59c7b69
FY
3256#endif /* CONFIG_PM */
3257
99dcaded
FY
3258/*
3259 * Here we only respond to action of unbound device from driver.
3260 *
3261 * Added device is not attached to its DMAR domain here yet. That will happen
3262 * when mapping the device to iova.
3263 */
3264static int device_notifier(struct notifier_block *nb,
3265 unsigned long action, void *data)
3266{
3267 struct device *dev = data;
3268 struct pci_dev *pdev = to_pci_dev(dev);
3269 struct dmar_domain *domain;
3270
44cd613c
DW
3271 if (iommu_no_mapping(dev))
3272 return 0;
3273
99dcaded
FY
3274 domain = find_domain(pdev);
3275 if (!domain)
3276 return 0;
3277
a97590e5 3278 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
99dcaded
FY
3279 domain_remove_one_dev_info(domain, pdev);
3280
a97590e5
AW
3281 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3282 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3283 list_empty(&domain->devices))
3284 domain_exit(domain);
3285 }
3286
99dcaded
FY
3287 return 0;
3288}
3289
3290static struct notifier_block device_nb = {
3291 .notifier_call = device_notifier,
3292};
3293
ba395927
KA
3294int __init intel_iommu_init(void)
3295{
3296 int ret = 0;
3297
a59b50e9
JC
3298 /* VT-d is required for a TXT/tboot launch, so enforce that */
3299 force_on = tboot_force_iommu();
3300
3301 if (dmar_table_init()) {
3302 if (force_on)
3303 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3304 return -ENODEV;
a59b50e9 3305 }
ba395927 3306
a59b50e9
JC
3307 if (dmar_dev_scope_init()) {
3308 if (force_on)
3309 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3310 return -ENODEV;
a59b50e9 3311 }
1886e8a9 3312
2ae21010
SS
3313 /*
3314 * Check the need for DMA-remapping initialization now.
3315 * Above initialization will also be used by Interrupt-remapping.
3316 */
75f1cdf1 3317 if (no_iommu || dmar_disabled)
2ae21010
SS
3318 return -ENODEV;
3319
51a63e67
JC
3320 if (iommu_init_mempool()) {
3321 if (force_on)
3322 panic("tboot: Failed to initialize iommu memory\n");
3323 return -ENODEV;
3324 }
3325
3326 if (dmar_init_reserved_ranges()) {
3327 if (force_on)
3328 panic("tboot: Failed to reserve iommu ranges\n");
3329 return -ENODEV;
3330 }
ba395927
KA
3331
3332 init_no_remapping_devices();
3333
b779260b 3334 ret = init_dmars();
ba395927 3335 if (ret) {
a59b50e9
JC
3336 if (force_on)
3337 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3338 printk(KERN_ERR "IOMMU: dmar init failed\n");
3339 put_iova_domain(&reserved_iova_list);
3340 iommu_exit_mempool();
3341 return ret;
3342 }
3343 printk(KERN_INFO
3344 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3345
5e0d2a6f 3346 init_timer(&unmap_timer);
75f1cdf1
FT
3347#ifdef CONFIG_SWIOTLB
3348 swiotlb = 0;
3349#endif
19943b0e 3350 dma_ops = &intel_dma_ops;
4ed0d3e6 3351
134fac3f 3352 init_iommu_pm_ops();
a8bcbb0d
JR
3353
3354 register_iommu(&intel_iommu_ops);
3355
99dcaded
FY
3356 bus_register_notifier(&pci_bus_type, &device_nb);
3357
ba395927
KA
3358 return 0;
3359}
e820482c 3360
3199aa6b
HW
3361static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3362 struct pci_dev *pdev)
3363{
3364 struct pci_dev *tmp, *parent;
3365
3366 if (!iommu || !pdev)
3367 return;
3368
3369 /* dependent device detach */
3370 tmp = pci_find_upstream_pcie_bridge(pdev);
3371 /* Secondary interface's bus number and devfn 0 */
3372 if (tmp) {
3373 parent = pdev->bus->self;
3374 while (parent != tmp) {
3375 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3376 parent->devfn);
3199aa6b
HW
3377 parent = parent->bus->self;
3378 }
45e829ea 3379 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3380 iommu_detach_dev(iommu,
3381 tmp->subordinate->number, 0);
3382 else /* this is a legacy PCI bridge */
276dbf99
DW
3383 iommu_detach_dev(iommu, tmp->bus->number,
3384 tmp->devfn);
3199aa6b
HW
3385 }
3386}
3387
2c2e2c38 3388static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3389 struct pci_dev *pdev)
3390{
3391 struct device_domain_info *info;
3392 struct intel_iommu *iommu;
3393 unsigned long flags;
3394 int found = 0;
3395 struct list_head *entry, *tmp;
3396
276dbf99
DW
3397 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3398 pdev->devfn);
c7151a8d
WH
3399 if (!iommu)
3400 return;
3401
3402 spin_lock_irqsave(&device_domain_lock, flags);
3403 list_for_each_safe(entry, tmp, &domain->devices) {
3404 info = list_entry(entry, struct device_domain_info, link);
276dbf99 3405 /* No need to compare PCI domain; it has to be the same */
c7151a8d
WH
3406 if (info->bus == pdev->bus->number &&
3407 info->devfn == pdev->devfn) {
3408 list_del(&info->link);
3409 list_del(&info->global);
3410 if (info->dev)
3411 info->dev->dev.archdata.iommu = NULL;
3412 spin_unlock_irqrestore(&device_domain_lock, flags);
3413
93a23a72 3414 iommu_disable_dev_iotlb(info);
c7151a8d 3415 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3416 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3417 free_devinfo_mem(info);
3418
3419 spin_lock_irqsave(&device_domain_lock, flags);
3420
3421 if (found)
3422 break;
3423 else
3424 continue;
3425 }
3426
3427 /* if there is no other devices under the same iommu
3428 * owned by this domain, clear this iommu in iommu_bmp
3429 * update iommu count and coherency
3430 */
276dbf99
DW
3431 if (iommu == device_to_iommu(info->segment, info->bus,
3432 info->devfn))
c7151a8d
WH
3433 found = 1;
3434 }
3435
3436 if (found == 0) {
3437 unsigned long tmp_flags;
3438 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3439 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3440 domain->iommu_count--;
58c610bd 3441 domain_update_iommu_cap(domain);
c7151a8d 3442 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5
AW
3443
3444 spin_lock_irqsave(&iommu->lock, tmp_flags);
3445 clear_bit(domain->id, iommu->domain_ids);
3446 iommu->domains[domain->id] = NULL;
3447 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
c7151a8d
WH
3448 }
3449
3450 spin_unlock_irqrestore(&device_domain_lock, flags);
3451}
3452
3453static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3454{
3455 struct device_domain_info *info;
3456 struct intel_iommu *iommu;
3457 unsigned long flags1, flags2;
3458
3459 spin_lock_irqsave(&device_domain_lock, flags1);
3460 while (!list_empty(&domain->devices)) {
3461 info = list_entry(domain->devices.next,
3462 struct device_domain_info, link);
3463 list_del(&info->link);
3464 list_del(&info->global);
3465 if (info->dev)
3466 info->dev->dev.archdata.iommu = NULL;
3467
3468 spin_unlock_irqrestore(&device_domain_lock, flags1);
3469
93a23a72 3470 iommu_disable_dev_iotlb(info);
276dbf99 3471 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3472 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3473 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3474
3475 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3476 * and capabilities
c7151a8d
WH
3477 */
3478 spin_lock_irqsave(&domain->iommu_lock, flags2);
3479 if (test_and_clear_bit(iommu->seq_id,
3480 &domain->iommu_bmp)) {
3481 domain->iommu_count--;
58c610bd 3482 domain_update_iommu_cap(domain);
c7151a8d
WH
3483 }
3484 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3485
3486 free_devinfo_mem(info);
3487 spin_lock_irqsave(&device_domain_lock, flags1);
3488 }
3489 spin_unlock_irqrestore(&device_domain_lock, flags1);
3490}
3491
5e98c4b1
WH
3492/* domain id for virtual machine, it won't be set in context */
3493static unsigned long vm_domid;
3494
3495static struct dmar_domain *iommu_alloc_vm_domain(void)
3496{
3497 struct dmar_domain *domain;
3498
3499 domain = alloc_domain_mem();
3500 if (!domain)
3501 return NULL;
3502
3503 domain->id = vm_domid++;
4c923d47 3504 domain->nid = -1;
5e98c4b1
WH
3505 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3506 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3507
3508 return domain;
3509}
3510
2c2e2c38 3511static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3512{
3513 int adjust_width;
3514
3515 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3516 spin_lock_init(&domain->iommu_lock);
3517
3518 domain_reserve_special_ranges(domain);
3519
3520 /* calculate AGAW */
3521 domain->gaw = guest_width;
3522 adjust_width = guestwidth_to_adjustwidth(guest_width);
3523 domain->agaw = width_to_agaw(adjust_width);
3524
3525 INIT_LIST_HEAD(&domain->devices);
3526
3527 domain->iommu_count = 0;
3528 domain->iommu_coherency = 0;
c5b15255 3529 domain->iommu_snooping = 0;
fe40f1e0 3530 domain->max_addr = 0;
4c923d47 3531 domain->nid = -1;
5e98c4b1
WH
3532
3533 /* always allocate the top pgd */
4c923d47 3534 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3535 if (!domain->pgd)
3536 return -ENOMEM;
3537 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3538 return 0;
3539}
3540
3541static void iommu_free_vm_domain(struct dmar_domain *domain)
3542{
3543 unsigned long flags;
3544 struct dmar_drhd_unit *drhd;
3545 struct intel_iommu *iommu;
3546 unsigned long i;
3547 unsigned long ndomains;
3548
3549 for_each_drhd_unit(drhd) {
3550 if (drhd->ignored)
3551 continue;
3552 iommu = drhd->iommu;
3553
3554 ndomains = cap_ndoms(iommu->cap);
a45946ab 3555 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3556 if (iommu->domains[i] == domain) {
3557 spin_lock_irqsave(&iommu->lock, flags);
3558 clear_bit(i, iommu->domain_ids);
3559 iommu->domains[i] = NULL;
3560 spin_unlock_irqrestore(&iommu->lock, flags);
3561 break;
3562 }
5e98c4b1
WH
3563 }
3564 }
3565}
3566
3567static void vm_domain_exit(struct dmar_domain *domain)
3568{
5e98c4b1
WH
3569 /* Domain 0 is reserved, so dont process it */
3570 if (!domain)
3571 return;
3572
3573 vm_domain_remove_all_dev_info(domain);
3574 /* destroy iovas */
3575 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3576
3577 /* clear ptes */
595badf5 3578 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3579
3580 /* free page tables */
d794dc9b 3581 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3582
3583 iommu_free_vm_domain(domain);
3584 free_domain_mem(domain);
3585}
3586
5d450806 3587static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3588{
5d450806 3589 struct dmar_domain *dmar_domain;
38717946 3590
5d450806
JR
3591 dmar_domain = iommu_alloc_vm_domain();
3592 if (!dmar_domain) {
38717946 3593 printk(KERN_ERR
5d450806
JR
3594 "intel_iommu_domain_init: dmar_domain == NULL\n");
3595 return -ENOMEM;
38717946 3596 }
2c2e2c38 3597 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3598 printk(KERN_ERR
5d450806
JR
3599 "intel_iommu_domain_init() failed\n");
3600 vm_domain_exit(dmar_domain);
3601 return -ENOMEM;
38717946 3602 }
5d450806 3603 domain->priv = dmar_domain;
faa3d6f5 3604
5d450806 3605 return 0;
38717946 3606}
38717946 3607
5d450806 3608static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3609{
5d450806
JR
3610 struct dmar_domain *dmar_domain = domain->priv;
3611
3612 domain->priv = NULL;
3613 vm_domain_exit(dmar_domain);
38717946 3614}
38717946 3615
4c5478c9
JR
3616static int intel_iommu_attach_device(struct iommu_domain *domain,
3617 struct device *dev)
38717946 3618{
4c5478c9
JR
3619 struct dmar_domain *dmar_domain = domain->priv;
3620 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3621 struct intel_iommu *iommu;
3622 int addr_width;
faa3d6f5
WH
3623
3624 /* normally pdev is not mapped */
3625 if (unlikely(domain_context_mapped(pdev))) {
3626 struct dmar_domain *old_domain;
3627
3628 old_domain = find_domain(pdev);
3629 if (old_domain) {
2c2e2c38
FY
3630 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3631 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3632 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3633 else
3634 domain_remove_dev_info(old_domain);
3635 }
3636 }
3637
276dbf99
DW
3638 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3639 pdev->devfn);
fe40f1e0
WH
3640 if (!iommu)
3641 return -ENODEV;
3642
3643 /* check if this iommu agaw is sufficient for max mapped address */
3644 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3645 if (addr_width > cap_mgaw(iommu->cap))
3646 addr_width = cap_mgaw(iommu->cap);
3647
3648 if (dmar_domain->max_addr > (1LL << addr_width)) {
3649 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3650 "sufficient for the mapped address (%llx)\n",
a99c47a2 3651 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3652 return -EFAULT;
3653 }
a99c47a2
TL
3654 dmar_domain->gaw = addr_width;
3655
3656 /*
3657 * Knock out extra levels of page tables if necessary
3658 */
3659 while (iommu->agaw < dmar_domain->agaw) {
3660 struct dma_pte *pte;
3661
3662 pte = dmar_domain->pgd;
3663 if (dma_pte_present(pte)) {
25cbff16
SY
3664 dmar_domain->pgd = (struct dma_pte *)
3665 phys_to_virt(dma_pte_addr(pte));
7a661013 3666 free_pgtable_page(pte);
a99c47a2
TL
3667 }
3668 dmar_domain->agaw--;
3669 }
fe40f1e0 3670
5fe60f4e 3671 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3672}
38717946 3673
4c5478c9
JR
3674static void intel_iommu_detach_device(struct iommu_domain *domain,
3675 struct device *dev)
38717946 3676{
4c5478c9
JR
3677 struct dmar_domain *dmar_domain = domain->priv;
3678 struct pci_dev *pdev = to_pci_dev(dev);
3679
2c2e2c38 3680 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3681}
c7151a8d 3682
b146a1c9
JR
3683static int intel_iommu_map(struct iommu_domain *domain,
3684 unsigned long iova, phys_addr_t hpa,
3685 int gfp_order, int iommu_prot)
faa3d6f5 3686{
dde57a21 3687 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 3688 u64 max_addr;
dde57a21 3689 int prot = 0;
b146a1c9 3690 size_t size;
faa3d6f5 3691 int ret;
fe40f1e0 3692
dde57a21
JR
3693 if (iommu_prot & IOMMU_READ)
3694 prot |= DMA_PTE_READ;
3695 if (iommu_prot & IOMMU_WRITE)
3696 prot |= DMA_PTE_WRITE;
9cf06697
SY
3697 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3698 prot |= DMA_PTE_SNP;
dde57a21 3699
b146a1c9 3700 size = PAGE_SIZE << gfp_order;
163cc52c 3701 max_addr = iova + size;
dde57a21 3702 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3703 u64 end;
3704
3705 /* check if minimum agaw is sufficient for mapped address */
8954da1f 3706 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 3707 if (end < max_addr) {
8954da1f 3708 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3709 "sufficient for the mapped address (%llx)\n",
8954da1f 3710 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
3711 return -EFAULT;
3712 }
dde57a21 3713 dmar_domain->max_addr = max_addr;
fe40f1e0 3714 }
ad051221
DW
3715 /* Round up size to next multiple of PAGE_SIZE, if it and
3716 the low bits of hpa would take us onto the next page */
88cb6a74 3717 size = aligned_nrpages(hpa, size);
ad051221
DW
3718 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3719 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3720 return ret;
38717946 3721}
38717946 3722
b146a1c9
JR
3723static int intel_iommu_unmap(struct iommu_domain *domain,
3724 unsigned long iova, int gfp_order)
38717946 3725{
dde57a21 3726 struct dmar_domain *dmar_domain = domain->priv;
b146a1c9 3727 size_t size = PAGE_SIZE << gfp_order;
4b99d352 3728
163cc52c
DW
3729 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3730 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 3731
163cc52c
DW
3732 if (dmar_domain->max_addr == iova + size)
3733 dmar_domain->max_addr = iova;
b146a1c9
JR
3734
3735 return gfp_order;
38717946 3736}
38717946 3737
d14d6577
JR
3738static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3739 unsigned long iova)
38717946 3740{
d14d6577 3741 struct dmar_domain *dmar_domain = domain->priv;
38717946 3742 struct dma_pte *pte;
faa3d6f5 3743 u64 phys = 0;
38717946 3744
b026fd28 3745 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
38717946 3746 if (pte)
faa3d6f5 3747 phys = dma_pte_addr(pte);
38717946 3748
faa3d6f5 3749 return phys;
38717946 3750}
a8bcbb0d 3751
dbb9fd86
SY
3752static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3753 unsigned long cap)
3754{
3755 struct dmar_domain *dmar_domain = domain->priv;
3756
3757 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3758 return dmar_domain->iommu_snooping;
323f99cb
TL
3759 if (cap == IOMMU_CAP_INTR_REMAP)
3760 return intr_remapping_enabled;
dbb9fd86
SY
3761
3762 return 0;
3763}
3764
a8bcbb0d
JR
3765static struct iommu_ops intel_iommu_ops = {
3766 .domain_init = intel_iommu_domain_init,
3767 .domain_destroy = intel_iommu_domain_destroy,
3768 .attach_dev = intel_iommu_attach_device,
3769 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
3770 .map = intel_iommu_map,
3771 .unmap = intel_iommu_unmap,
a8bcbb0d 3772 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3773 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3774};
9af88143
DW
3775
3776static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3777{
3778 /*
3779 * Mobile 4 Series Chipset neglects to set RWBF capability,
3780 * but needs it:
3781 */
3782 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3783 rwbf_quirk = 1;
2d9e667e
DW
3784
3785 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3786 if (dev->revision == 0x07) {
3787 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3788 dmar_map_gfx = 0;
3789 }
9af88143
DW
3790}
3791
3792DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b 3793
eecfd57f
AJ
3794#define GGC 0x52
3795#define GGC_MEMORY_SIZE_MASK (0xf << 8)
3796#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
3797#define GGC_MEMORY_SIZE_1M (0x1 << 8)
3798#define GGC_MEMORY_SIZE_2M (0x3 << 8)
3799#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
3800#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
3801#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
3802#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
3803
9eecabcb
DW
3804static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3805{
3806 unsigned short ggc;
3807
eecfd57f 3808 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
3809 return;
3810
eecfd57f 3811 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
3812 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3813 dmar_map_gfx = 0;
3814 }
3815}
3816DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3817DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3818DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3819DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3820
e0fc7e0b
DW
3821/* On Tylersburg chipsets, some BIOSes have been known to enable the
3822 ISOCH DMAR unit for the Azalia sound device, but not give it any
3823 TLB entries, which causes it to deadlock. Check for that. We do
3824 this in a function called from init_dmars(), instead of in a PCI
3825 quirk, because we don't want to print the obnoxious "BIOS broken"
3826 message if VT-d is actually disabled.
3827*/
3828static void __init check_tylersburg_isoch(void)
3829{
3830 struct pci_dev *pdev;
3831 uint32_t vtisochctrl;
3832
3833 /* If there's no Azalia in the system anyway, forget it. */
3834 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3835 if (!pdev)
3836 return;
3837 pci_dev_put(pdev);
3838
3839 /* System Management Registers. Might be hidden, in which case
3840 we can't do the sanity check. But that's OK, because the
3841 known-broken BIOSes _don't_ actually hide it, so far. */
3842 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3843 if (!pdev)
3844 return;
3845
3846 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3847 pci_dev_put(pdev);
3848 return;
3849 }
3850
3851 pci_dev_put(pdev);
3852
3853 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3854 if (vtisochctrl & 1)
3855 return;
3856
3857 /* Drop all bits other than the number of TLB entries */
3858 vtisochctrl &= 0x1c;
3859
3860 /* If we have the recommended number of TLB entries (16), fine. */
3861 if (vtisochctrl == 0x10)
3862 return;
3863
3864 /* Zero TLB entries? You get to ride the short bus to school. */
3865 if (!vtisochctrl) {
3866 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3867 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3868 dmi_get_system_info(DMI_BIOS_VENDOR),
3869 dmi_get_system_info(DMI_BIOS_VERSION),
3870 dmi_get_system_info(DMI_PRODUCT_VERSION));
3871 iommu_identity_mapping |= IDENTMAP_AZALIA;
3872 return;
3873 }
3874
3875 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3876 vtisochctrl);
3877}