]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
x86, ioapic: Define irq_remap_modify_chip_defaults()
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
134fac3f 39#include <linux/syscore_ops.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
5cdede24 42#include <linux/pci-ats.h>
ba395927 43#include <asm/cacheflush.h>
46a7fa27 44#include <asm/iommu.h>
ba395927 45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
825507d6
MT
49#define IS_BRIDGE_HOST_DEVICE(pdev) \
50 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
ba395927
KA
51#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 53#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
54
55#define IOAPIC_RANGE_START (0xfee00000)
56#define IOAPIC_RANGE_END (0xfeefffff)
57#define IOVA_START_ADDR (0x1000)
58
59#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
60
4ed0d3e6
FY
61#define MAX_AGAW_WIDTH 64
62
2ebe3151
DW
63#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
65
66/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
69 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 71
f27be03b 72#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 73#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 74#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 75
df08cdc7
AM
76/* page table handling */
77#define LEVEL_STRIDE (9)
78#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
79
80static inline int agaw_to_level(int agaw)
81{
82 return agaw + 2;
83}
84
85static inline int agaw_to_width(int agaw)
86{
87 return 30 + agaw * LEVEL_STRIDE;
88}
89
90static inline int width_to_agaw(int width)
91{
92 return (width - 30) / LEVEL_STRIDE;
93}
94
95static inline unsigned int level_to_offset_bits(int level)
96{
97 return (level - 1) * LEVEL_STRIDE;
98}
99
100static inline int pfn_level_offset(unsigned long pfn, int level)
101{
102 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
103}
104
105static inline unsigned long level_mask(int level)
106{
107 return -1UL << level_to_offset_bits(level);
108}
109
110static inline unsigned long level_size(int level)
111{
112 return 1UL << level_to_offset_bits(level);
113}
114
115static inline unsigned long align_to_level(unsigned long pfn, int level)
116{
117 return (pfn + level_size(level) - 1) & level_mask(level);
118}
fd18de50 119
6dd9a7c7
YS
120static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
121{
122 return 1 << ((lvl - 1) * LEVEL_STRIDE);
123}
124
dd4e8319
DW
125/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
126 are never going to work. */
127static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
128{
129 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
130}
131
132static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
133{
134 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
135}
136static inline unsigned long page_to_dma_pfn(struct page *pg)
137{
138 return mm_to_dma_pfn(page_to_pfn(pg));
139}
140static inline unsigned long virt_to_dma_pfn(void *p)
141{
142 return page_to_dma_pfn(virt_to_page(p));
143}
144
d9630fe9
WH
145/* global iommu list, set NULL for ignored DMAR units */
146static struct intel_iommu **g_iommus;
147
e0fc7e0b 148static void __init check_tylersburg_isoch(void);
9af88143
DW
149static int rwbf_quirk;
150
b779260b
JC
151/*
152 * set to 1 to panic kernel if can't successfully enable VT-d
153 * (used when kernel is launched w/ TXT)
154 */
155static int force_on = 0;
156
46b08e1a
MM
157/*
158 * 0: Present
159 * 1-11: Reserved
160 * 12-63: Context Ptr (12 - (haw-1))
161 * 64-127: Reserved
162 */
163struct root_entry {
164 u64 val;
165 u64 rsvd1;
166};
167#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
168static inline bool root_present(struct root_entry *root)
169{
170 return (root->val & 1);
171}
172static inline void set_root_present(struct root_entry *root)
173{
174 root->val |= 1;
175}
176static inline void set_root_value(struct root_entry *root, unsigned long value)
177{
178 root->val |= value & VTD_PAGE_MASK;
179}
180
181static inline struct context_entry *
182get_context_addr_from_root(struct root_entry *root)
183{
184 return (struct context_entry *)
185 (root_present(root)?phys_to_virt(
186 root->val & VTD_PAGE_MASK) :
187 NULL);
188}
189
7a8fc25e
MM
190/*
191 * low 64 bits:
192 * 0: present
193 * 1: fault processing disable
194 * 2-3: translation type
195 * 12-63: address space root
196 * high 64 bits:
197 * 0-2: address width
198 * 3-6: aval
199 * 8-23: domain id
200 */
201struct context_entry {
202 u64 lo;
203 u64 hi;
204};
c07e7d21
MM
205
206static inline bool context_present(struct context_entry *context)
207{
208 return (context->lo & 1);
209}
210static inline void context_set_present(struct context_entry *context)
211{
212 context->lo |= 1;
213}
214
215static inline void context_set_fault_enable(struct context_entry *context)
216{
217 context->lo &= (((u64)-1) << 2) | 1;
218}
219
c07e7d21
MM
220static inline void context_set_translation_type(struct context_entry *context,
221 unsigned long value)
222{
223 context->lo &= (((u64)-1) << 4) | 3;
224 context->lo |= (value & 3) << 2;
225}
226
227static inline void context_set_address_root(struct context_entry *context,
228 unsigned long value)
229{
230 context->lo |= value & VTD_PAGE_MASK;
231}
232
233static inline void context_set_address_width(struct context_entry *context,
234 unsigned long value)
235{
236 context->hi |= value & 7;
237}
238
239static inline void context_set_domain_id(struct context_entry *context,
240 unsigned long value)
241{
242 context->hi |= (value & ((1 << 16) - 1)) << 8;
243}
244
245static inline void context_clear_entry(struct context_entry *context)
246{
247 context->lo = 0;
248 context->hi = 0;
249}
7a8fc25e 250
622ba12a
MM
251/*
252 * 0: readable
253 * 1: writable
254 * 2-6: reserved
255 * 7: super page
9cf06697
SY
256 * 8-10: available
257 * 11: snoop behavior
622ba12a
MM
258 * 12-63: Host physcial address
259 */
260struct dma_pte {
261 u64 val;
262};
622ba12a 263
19c239ce
MM
264static inline void dma_clear_pte(struct dma_pte *pte)
265{
266 pte->val = 0;
267}
268
269static inline void dma_set_pte_readable(struct dma_pte *pte)
270{
271 pte->val |= DMA_PTE_READ;
272}
273
274static inline void dma_set_pte_writable(struct dma_pte *pte)
275{
276 pte->val |= DMA_PTE_WRITE;
277}
278
9cf06697
SY
279static inline void dma_set_pte_snp(struct dma_pte *pte)
280{
281 pte->val |= DMA_PTE_SNP;
282}
283
19c239ce
MM
284static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
285{
286 pte->val = (pte->val & ~3) | (prot & 3);
287}
288
289static inline u64 dma_pte_addr(struct dma_pte *pte)
290{
c85994e4
DW
291#ifdef CONFIG_64BIT
292 return pte->val & VTD_PAGE_MASK;
293#else
294 /* Must have a full atomic 64-bit read */
1a8bd481 295 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 296#endif
19c239ce
MM
297}
298
dd4e8319 299static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 300{
dd4e8319 301 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
302}
303
304static inline bool dma_pte_present(struct dma_pte *pte)
305{
306 return (pte->val & 3) != 0;
307}
622ba12a 308
75e6bf96
DW
309static inline int first_pte_in_page(struct dma_pte *pte)
310{
311 return !((unsigned long)pte & ~VTD_PAGE_MASK);
312}
313
2c2e2c38
FY
314/*
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
319 */
19943b0e
DW
320static struct dmar_domain *si_domain;
321static int hw_pass_through = 1;
2c2e2c38 322
3b5410e7 323/* devices under the same p2p bridge are owned in one domain */
cdc7b837 324#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 325
1ce28feb
WH
326/* domain represents a virtual machine, more than one devices
327 * across iommus may be owned in one domain, e.g. kvm guest.
328 */
329#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
330
2c2e2c38
FY
331/* si_domain contains mulitple devices */
332#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
333
99126f7c
MM
334struct dmar_domain {
335 int id; /* domain id */
4c923d47 336 int nid; /* node id */
8c11e798 337 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
338
339 struct list_head devices; /* all devices' list */
340 struct iova_domain iovad; /* iova's that belong to this domain */
341
342 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
343 int gaw; /* max guest address width */
344
345 /* adjusted guest address width, 0 is level 2 30-bit */
346 int agaw;
347
3b5410e7 348 int flags; /* flags to find out type of domain */
8e604097
WH
349
350 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 351 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 352 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
353 int iommu_superpage;/* Level of superpages supported:
354 0 == 4KiB (no superpages), 1 == 2MiB,
355 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 356 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 357 u64 max_addr; /* maximum mapped address */
99126f7c
MM
358};
359
a647dacb
MM
360/* PCI domain-device relationship */
361struct device_domain_info {
362 struct list_head link; /* link to domain siblings */
363 struct list_head global; /* link to global list */
276dbf99
DW
364 int segment; /* PCI domain */
365 u8 bus; /* PCI bus number */
a647dacb 366 u8 devfn; /* PCI devfn number */
45e829ea 367 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 368 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
369 struct dmar_domain *domain; /* pointer to domain */
370};
371
5e0d2a6f 372static void flush_unmaps_timeout(unsigned long data);
373
374DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
375
80b20dd8 376#define HIGH_WATER_MARK 250
377struct deferred_flush_tables {
378 int next;
379 struct iova *iova[HIGH_WATER_MARK];
380 struct dmar_domain *domain[HIGH_WATER_MARK];
381};
382
383static struct deferred_flush_tables *deferred_flush;
384
5e0d2a6f 385/* bitmap for indexing intel_iommus */
5e0d2a6f 386static int g_num_of_iommus;
387
388static DEFINE_SPINLOCK(async_umap_flush_lock);
389static LIST_HEAD(unmaps_to_do);
390
391static int timer_on;
392static long list_size;
5e0d2a6f 393
ba395927
KA
394static void domain_remove_dev_info(struct dmar_domain *domain);
395
0cd5c3c8
KM
396#ifdef CONFIG_DMAR_DEFAULT_ON
397int dmar_disabled = 0;
398#else
399int dmar_disabled = 1;
400#endif /*CONFIG_DMAR_DEFAULT_ON*/
401
2d9e667e 402static int dmar_map_gfx = 1;
7d3b03ce 403static int dmar_forcedac;
5e0d2a6f 404static int intel_iommu_strict;
6dd9a7c7 405static int intel_iommu_superpage = 1;
ba395927
KA
406
407#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
408static DEFINE_SPINLOCK(device_domain_lock);
409static LIST_HEAD(device_domain_list);
410
a8bcbb0d
JR
411static struct iommu_ops intel_iommu_ops;
412
ba395927
KA
413static int __init intel_iommu_setup(char *str)
414{
415 if (!str)
416 return -EINVAL;
417 while (*str) {
0cd5c3c8
KM
418 if (!strncmp(str, "on", 2)) {
419 dmar_disabled = 0;
420 printk(KERN_INFO "Intel-IOMMU: enabled\n");
421 } else if (!strncmp(str, "off", 3)) {
ba395927 422 dmar_disabled = 1;
0cd5c3c8 423 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
424 } else if (!strncmp(str, "igfx_off", 8)) {
425 dmar_map_gfx = 0;
426 printk(KERN_INFO
427 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 428 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 429 printk(KERN_INFO
7d3b03ce
KA
430 "Intel-IOMMU: Forcing DAC for PCI devices\n");
431 dmar_forcedac = 1;
5e0d2a6f 432 } else if (!strncmp(str, "strict", 6)) {
433 printk(KERN_INFO
434 "Intel-IOMMU: disable batched IOTLB flush\n");
435 intel_iommu_strict = 1;
6dd9a7c7
YS
436 } else if (!strncmp(str, "sp_off", 6)) {
437 printk(KERN_INFO
438 "Intel-IOMMU: disable supported super page\n");
439 intel_iommu_superpage = 0;
ba395927
KA
440 }
441
442 str += strcspn(str, ",");
443 while (*str == ',')
444 str++;
445 }
446 return 0;
447}
448__setup("intel_iommu=", intel_iommu_setup);
449
450static struct kmem_cache *iommu_domain_cache;
451static struct kmem_cache *iommu_devinfo_cache;
452static struct kmem_cache *iommu_iova_cache;
453
4c923d47 454static inline void *alloc_pgtable_page(int node)
eb3fa7cb 455{
4c923d47
SS
456 struct page *page;
457 void *vaddr = NULL;
eb3fa7cb 458
4c923d47
SS
459 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
460 if (page)
461 vaddr = page_address(page);
eb3fa7cb 462 return vaddr;
ba395927
KA
463}
464
465static inline void free_pgtable_page(void *vaddr)
466{
467 free_page((unsigned long)vaddr);
468}
469
470static inline void *alloc_domain_mem(void)
471{
354bb65e 472 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
473}
474
38717946 475static void free_domain_mem(void *vaddr)
ba395927
KA
476{
477 kmem_cache_free(iommu_domain_cache, vaddr);
478}
479
480static inline void * alloc_devinfo_mem(void)
481{
354bb65e 482 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
483}
484
485static inline void free_devinfo_mem(void *vaddr)
486{
487 kmem_cache_free(iommu_devinfo_cache, vaddr);
488}
489
490struct iova *alloc_iova_mem(void)
491{
354bb65e 492 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
493}
494
495void free_iova_mem(struct iova *iova)
496{
497 kmem_cache_free(iommu_iova_cache, iova);
498}
499
1b573683 500
4ed0d3e6 501static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
502{
503 unsigned long sagaw;
504 int agaw = -1;
505
506 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 507 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
508 agaw >= 0; agaw--) {
509 if (test_bit(agaw, &sagaw))
510 break;
511 }
512
513 return agaw;
514}
515
4ed0d3e6
FY
516/*
517 * Calculate max SAGAW for each iommu.
518 */
519int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
520{
521 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
522}
523
524/*
525 * calculate agaw for each iommu.
526 * "SAGAW" may be different across iommus, use a default agaw, and
527 * get a supported less agaw for iommus that don't support the default agaw.
528 */
529int iommu_calculate_agaw(struct intel_iommu *iommu)
530{
531 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
532}
533
2c2e2c38 534/* This functionin only returns single iommu in a domain */
8c11e798
WH
535static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
536{
537 int iommu_id;
538
2c2e2c38 539 /* si_domain and vm domain should not get here. */
1ce28feb 540 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 541 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 542
8c11e798
WH
543 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
544 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
545 return NULL;
546
547 return g_iommus[iommu_id];
548}
549
8e604097
WH
550static void domain_update_iommu_coherency(struct dmar_domain *domain)
551{
552 int i;
553
554 domain->iommu_coherency = 1;
555
a45946ab 556 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
557 if (!ecap_coherent(g_iommus[i]->ecap)) {
558 domain->iommu_coherency = 0;
559 break;
560 }
8e604097
WH
561 }
562}
563
58c610bd
SY
564static void domain_update_iommu_snooping(struct dmar_domain *domain)
565{
566 int i;
567
568 domain->iommu_snooping = 1;
569
a45946ab 570 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
571 if (!ecap_sc_support(g_iommus[i]->ecap)) {
572 domain->iommu_snooping = 0;
573 break;
574 }
58c610bd
SY
575 }
576}
577
6dd9a7c7
YS
578static void domain_update_iommu_superpage(struct dmar_domain *domain)
579{
580 int i, mask = 0xf;
581
582 if (!intel_iommu_superpage) {
583 domain->iommu_superpage = 0;
584 return;
585 }
586
587 domain->iommu_superpage = 4; /* 1TiB */
588
589 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
590 mask |= cap_super_page_val(g_iommus[i]->cap);
591 if (!mask) {
592 break;
593 }
594 }
595 domain->iommu_superpage = fls(mask);
596}
597
58c610bd
SY
598/* Some capabilities may be different across iommus */
599static void domain_update_iommu_cap(struct dmar_domain *domain)
600{
601 domain_update_iommu_coherency(domain);
602 domain_update_iommu_snooping(domain);
6dd9a7c7 603 domain_update_iommu_superpage(domain);
58c610bd
SY
604}
605
276dbf99 606static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
607{
608 struct dmar_drhd_unit *drhd = NULL;
609 int i;
610
611 for_each_drhd_unit(drhd) {
612 if (drhd->ignored)
613 continue;
276dbf99
DW
614 if (segment != drhd->segment)
615 continue;
c7151a8d 616
924b6231 617 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
618 if (drhd->devices[i] &&
619 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
620 drhd->devices[i]->devfn == devfn)
621 return drhd->iommu;
4958c5dc
DW
622 if (drhd->devices[i] &&
623 drhd->devices[i]->subordinate &&
924b6231
DW
624 drhd->devices[i]->subordinate->number <= bus &&
625 drhd->devices[i]->subordinate->subordinate >= bus)
626 return drhd->iommu;
627 }
c7151a8d
WH
628
629 if (drhd->include_all)
630 return drhd->iommu;
631 }
632
633 return NULL;
634}
635
5331fe6f
WH
636static void domain_flush_cache(struct dmar_domain *domain,
637 void *addr, int size)
638{
639 if (!domain->iommu_coherency)
640 clflush_cache_range(addr, size);
641}
642
ba395927
KA
643/* Gets context entry for a given bus and devfn */
644static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
645 u8 bus, u8 devfn)
646{
647 struct root_entry *root;
648 struct context_entry *context;
649 unsigned long phy_addr;
650 unsigned long flags;
651
652 spin_lock_irqsave(&iommu->lock, flags);
653 root = &iommu->root_entry[bus];
654 context = get_context_addr_from_root(root);
655 if (!context) {
4c923d47
SS
656 context = (struct context_entry *)
657 alloc_pgtable_page(iommu->node);
ba395927
KA
658 if (!context) {
659 spin_unlock_irqrestore(&iommu->lock, flags);
660 return NULL;
661 }
5b6985ce 662 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
663 phy_addr = virt_to_phys((void *)context);
664 set_root_value(root, phy_addr);
665 set_root_present(root);
666 __iommu_flush_cache(iommu, root, sizeof(*root));
667 }
668 spin_unlock_irqrestore(&iommu->lock, flags);
669 return &context[devfn];
670}
671
672static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
673{
674 struct root_entry *root;
675 struct context_entry *context;
676 int ret;
677 unsigned long flags;
678
679 spin_lock_irqsave(&iommu->lock, flags);
680 root = &iommu->root_entry[bus];
681 context = get_context_addr_from_root(root);
682 if (!context) {
683 ret = 0;
684 goto out;
685 }
c07e7d21 686 ret = context_present(&context[devfn]);
ba395927
KA
687out:
688 spin_unlock_irqrestore(&iommu->lock, flags);
689 return ret;
690}
691
692static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
693{
694 struct root_entry *root;
695 struct context_entry *context;
696 unsigned long flags;
697
698 spin_lock_irqsave(&iommu->lock, flags);
699 root = &iommu->root_entry[bus];
700 context = get_context_addr_from_root(root);
701 if (context) {
c07e7d21 702 context_clear_entry(&context[devfn]);
ba395927
KA
703 __iommu_flush_cache(iommu, &context[devfn], \
704 sizeof(*context));
705 }
706 spin_unlock_irqrestore(&iommu->lock, flags);
707}
708
709static void free_context_table(struct intel_iommu *iommu)
710{
711 struct root_entry *root;
712 int i;
713 unsigned long flags;
714 struct context_entry *context;
715
716 spin_lock_irqsave(&iommu->lock, flags);
717 if (!iommu->root_entry) {
718 goto out;
719 }
720 for (i = 0; i < ROOT_ENTRY_NR; i++) {
721 root = &iommu->root_entry[i];
722 context = get_context_addr_from_root(root);
723 if (context)
724 free_pgtable_page(context);
725 }
726 free_pgtable_page(iommu->root_entry);
727 iommu->root_entry = NULL;
728out:
729 spin_unlock_irqrestore(&iommu->lock, flags);
730}
731
b026fd28 732static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
6dd9a7c7 733 unsigned long pfn, int large_level)
ba395927 734{
b026fd28 735 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
736 struct dma_pte *parent, *pte = NULL;
737 int level = agaw_to_level(domain->agaw);
6dd9a7c7 738 int offset, target_level;
ba395927
KA
739
740 BUG_ON(!domain->pgd);
b026fd28 741 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
742 parent = domain->pgd;
743
6dd9a7c7
YS
744 /* Search pte */
745 if (!large_level)
746 target_level = 1;
747 else
748 target_level = large_level;
749
ba395927
KA
750 while (level > 0) {
751 void *tmp_page;
752
b026fd28 753 offset = pfn_level_offset(pfn, level);
ba395927 754 pte = &parent[offset];
6dd9a7c7
YS
755 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
756 break;
757 if (level == target_level)
ba395927
KA
758 break;
759
19c239ce 760 if (!dma_pte_present(pte)) {
c85994e4
DW
761 uint64_t pteval;
762
4c923d47 763 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 764
206a73c1 765 if (!tmp_page)
ba395927 766 return NULL;
206a73c1 767
c85994e4 768 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 769 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
770 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
771 /* Someone else set it while we were thinking; use theirs. */
772 free_pgtable_page(tmp_page);
773 } else {
774 dma_pte_addr(pte);
775 domain_flush_cache(domain, pte, sizeof(*pte));
776 }
ba395927 777 }
19c239ce 778 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
779 level--;
780 }
781
ba395927
KA
782 return pte;
783}
784
6dd9a7c7 785
ba395927 786/* return address's pte at specific level */
90dcfb5e
DW
787static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
788 unsigned long pfn,
6dd9a7c7 789 int level, int *large_page)
ba395927
KA
790{
791 struct dma_pte *parent, *pte = NULL;
792 int total = agaw_to_level(domain->agaw);
793 int offset;
794
795 parent = domain->pgd;
796 while (level <= total) {
90dcfb5e 797 offset = pfn_level_offset(pfn, total);
ba395927
KA
798 pte = &parent[offset];
799 if (level == total)
800 return pte;
801
6dd9a7c7
YS
802 if (!dma_pte_present(pte)) {
803 *large_page = total;
ba395927 804 break;
6dd9a7c7
YS
805 }
806
807 if (pte->val & DMA_PTE_LARGE_PAGE) {
808 *large_page = total;
809 return pte;
810 }
811
19c239ce 812 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
813 total--;
814 }
815 return NULL;
816}
817
ba395927 818/* clear last level pte, a tlb flush should be followed */
595badf5
DW
819static void dma_pte_clear_range(struct dmar_domain *domain,
820 unsigned long start_pfn,
821 unsigned long last_pfn)
ba395927 822{
04b18e65 823 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 824 unsigned int large_page = 1;
310a5ab9 825 struct dma_pte *first_pte, *pte;
66eae846 826
04b18e65 827 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 828 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 829 BUG_ON(start_pfn > last_pfn);
ba395927 830
04b18e65 831 /* we don't need lock here; nobody else touches the iova range */
59c36286 832 do {
6dd9a7c7
YS
833 large_page = 1;
834 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 835 if (!pte) {
6dd9a7c7 836 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
837 continue;
838 }
6dd9a7c7 839 do {
310a5ab9 840 dma_clear_pte(pte);
6dd9a7c7 841 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 842 pte++;
75e6bf96
DW
843 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
844
310a5ab9
DW
845 domain_flush_cache(domain, first_pte,
846 (void *)pte - (void *)first_pte);
59c36286
DW
847
848 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
849}
850
851/* free page table pages. last level pte should already be cleared */
852static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
853 unsigned long start_pfn,
854 unsigned long last_pfn)
ba395927 855{
6660c63a 856 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 857 struct dma_pte *first_pte, *pte;
ba395927
KA
858 int total = agaw_to_level(domain->agaw);
859 int level;
6660c63a 860 unsigned long tmp;
6dd9a7c7 861 int large_page = 2;
ba395927 862
6660c63a
DW
863 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
864 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 865 BUG_ON(start_pfn > last_pfn);
ba395927 866
f3a0a52f 867 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
868 level = 2;
869 while (level <= total) {
6660c63a
DW
870 tmp = align_to_level(start_pfn, level);
871
f3a0a52f 872 /* If we can't even clear one PTE at this level, we're done */
6660c63a 873 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
874 return;
875
59c36286 876 do {
6dd9a7c7
YS
877 large_page = level;
878 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
879 if (large_page > level)
880 level = large_page + 1;
f3a0a52f
DW
881 if (!pte) {
882 tmp = align_to_level(tmp + 1, level + 1);
883 continue;
884 }
75e6bf96 885 do {
6a43e574
DW
886 if (dma_pte_present(pte)) {
887 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
888 dma_clear_pte(pte);
889 }
f3a0a52f
DW
890 pte++;
891 tmp += level_size(level);
75e6bf96
DW
892 } while (!first_pte_in_page(pte) &&
893 tmp + level_size(level) - 1 <= last_pfn);
894
f3a0a52f
DW
895 domain_flush_cache(domain, first_pte,
896 (void *)pte - (void *)first_pte);
897
59c36286 898 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
899 level++;
900 }
901 /* free pgd */
d794dc9b 902 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
903 free_pgtable_page(domain->pgd);
904 domain->pgd = NULL;
905 }
906}
907
908/* iommu handling */
909static int iommu_alloc_root_entry(struct intel_iommu *iommu)
910{
911 struct root_entry *root;
912 unsigned long flags;
913
4c923d47 914 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
915 if (!root)
916 return -ENOMEM;
917
5b6985ce 918 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
919
920 spin_lock_irqsave(&iommu->lock, flags);
921 iommu->root_entry = root;
922 spin_unlock_irqrestore(&iommu->lock, flags);
923
924 return 0;
925}
926
ba395927
KA
927static void iommu_set_root_entry(struct intel_iommu *iommu)
928{
929 void *addr;
c416daa9 930 u32 sts;
ba395927
KA
931 unsigned long flag;
932
933 addr = iommu->root_entry;
934
935 spin_lock_irqsave(&iommu->register_lock, flag);
936 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
937
c416daa9 938 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
939
940 /* Make sure hardware complete it */
941 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 942 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
943
944 spin_unlock_irqrestore(&iommu->register_lock, flag);
945}
946
947static void iommu_flush_write_buffer(struct intel_iommu *iommu)
948{
949 u32 val;
950 unsigned long flag;
951
9af88143 952 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 953 return;
ba395927
KA
954
955 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 956 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
957
958 /* Make sure hardware complete it */
959 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 960 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
961
962 spin_unlock_irqrestore(&iommu->register_lock, flag);
963}
964
965/* return value determine if we need a write buffer flush */
4c25a2c1
DW
966static void __iommu_flush_context(struct intel_iommu *iommu,
967 u16 did, u16 source_id, u8 function_mask,
968 u64 type)
ba395927
KA
969{
970 u64 val = 0;
971 unsigned long flag;
972
ba395927
KA
973 switch (type) {
974 case DMA_CCMD_GLOBAL_INVL:
975 val = DMA_CCMD_GLOBAL_INVL;
976 break;
977 case DMA_CCMD_DOMAIN_INVL:
978 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
979 break;
980 case DMA_CCMD_DEVICE_INVL:
981 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
982 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
983 break;
984 default:
985 BUG();
986 }
987 val |= DMA_CCMD_ICC;
988
989 spin_lock_irqsave(&iommu->register_lock, flag);
990 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
991
992 /* Make sure hardware complete it */
993 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
994 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
995
996 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
997}
998
ba395927 999/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1000static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1001 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1002{
1003 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1004 u64 val = 0, val_iva = 0;
1005 unsigned long flag;
1006
ba395927
KA
1007 switch (type) {
1008 case DMA_TLB_GLOBAL_FLUSH:
1009 /* global flush doesn't need set IVA_REG */
1010 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1011 break;
1012 case DMA_TLB_DSI_FLUSH:
1013 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1014 break;
1015 case DMA_TLB_PSI_FLUSH:
1016 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1017 /* Note: always flush non-leaf currently */
1018 val_iva = size_order | addr;
1019 break;
1020 default:
1021 BUG();
1022 }
1023 /* Note: set drain read/write */
1024#if 0
1025 /*
1026 * This is probably to be super secure.. Looks like we can
1027 * ignore it without any impact.
1028 */
1029 if (cap_read_drain(iommu->cap))
1030 val |= DMA_TLB_READ_DRAIN;
1031#endif
1032 if (cap_write_drain(iommu->cap))
1033 val |= DMA_TLB_WRITE_DRAIN;
1034
1035 spin_lock_irqsave(&iommu->register_lock, flag);
1036 /* Note: Only uses first TLB reg currently */
1037 if (val_iva)
1038 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1039 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1040
1041 /* Make sure hardware complete it */
1042 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1043 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1044
1045 spin_unlock_irqrestore(&iommu->register_lock, flag);
1046
1047 /* check IOTLB invalidation granularity */
1048 if (DMA_TLB_IAIG(val) == 0)
1049 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1050 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1051 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1052 (unsigned long long)DMA_TLB_IIRG(type),
1053 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1054}
1055
93a23a72
YZ
1056static struct device_domain_info *iommu_support_dev_iotlb(
1057 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1058{
1059 int found = 0;
1060 unsigned long flags;
1061 struct device_domain_info *info;
1062 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1063
1064 if (!ecap_dev_iotlb_support(iommu->ecap))
1065 return NULL;
1066
1067 if (!iommu->qi)
1068 return NULL;
1069
1070 spin_lock_irqsave(&device_domain_lock, flags);
1071 list_for_each_entry(info, &domain->devices, link)
1072 if (info->bus == bus && info->devfn == devfn) {
1073 found = 1;
1074 break;
1075 }
1076 spin_unlock_irqrestore(&device_domain_lock, flags);
1077
1078 if (!found || !info->dev)
1079 return NULL;
1080
1081 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1082 return NULL;
1083
1084 if (!dmar_find_matched_atsr_unit(info->dev))
1085 return NULL;
1086
1087 info->iommu = iommu;
1088
1089 return info;
1090}
1091
1092static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1093{
93a23a72
YZ
1094 if (!info)
1095 return;
1096
1097 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1098}
1099
1100static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1101{
1102 if (!info->dev || !pci_ats_enabled(info->dev))
1103 return;
1104
1105 pci_disable_ats(info->dev);
1106}
1107
1108static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1109 u64 addr, unsigned mask)
1110{
1111 u16 sid, qdep;
1112 unsigned long flags;
1113 struct device_domain_info *info;
1114
1115 spin_lock_irqsave(&device_domain_lock, flags);
1116 list_for_each_entry(info, &domain->devices, link) {
1117 if (!info->dev || !pci_ats_enabled(info->dev))
1118 continue;
1119
1120 sid = info->bus << 8 | info->devfn;
1121 qdep = pci_ats_queue_depth(info->dev);
1122 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1123 }
1124 spin_unlock_irqrestore(&device_domain_lock, flags);
1125}
1126
1f0ef2aa 1127static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1128 unsigned long pfn, unsigned int pages, int map)
ba395927 1129{
9dd2fe89 1130 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1131 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1132
ba395927
KA
1133 BUG_ON(pages == 0);
1134
ba395927 1135 /*
9dd2fe89
YZ
1136 * Fallback to domain selective flush if no PSI support or the size is
1137 * too big.
ba395927
KA
1138 * PSI requires page size to be 2 ^ x, and the base address is naturally
1139 * aligned to the size
1140 */
9dd2fe89
YZ
1141 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1142 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1143 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1144 else
1145 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1146 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1147
1148 /*
82653633
NA
1149 * In caching mode, changes of pages from non-present to present require
1150 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1151 */
82653633 1152 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1153 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1154}
1155
f8bab735 1156static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1157{
1158 u32 pmen;
1159 unsigned long flags;
1160
1161 spin_lock_irqsave(&iommu->register_lock, flags);
1162 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1163 pmen &= ~DMA_PMEN_EPM;
1164 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1165
1166 /* wait for the protected region status bit to clear */
1167 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1168 readl, !(pmen & DMA_PMEN_PRS), pmen);
1169
1170 spin_unlock_irqrestore(&iommu->register_lock, flags);
1171}
1172
ba395927
KA
1173static int iommu_enable_translation(struct intel_iommu *iommu)
1174{
1175 u32 sts;
1176 unsigned long flags;
1177
1178 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1179 iommu->gcmd |= DMA_GCMD_TE;
1180 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1181
1182 /* Make sure hardware complete it */
1183 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1184 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1185
ba395927
KA
1186 spin_unlock_irqrestore(&iommu->register_lock, flags);
1187 return 0;
1188}
1189
1190static int iommu_disable_translation(struct intel_iommu *iommu)
1191{
1192 u32 sts;
1193 unsigned long flag;
1194
1195 spin_lock_irqsave(&iommu->register_lock, flag);
1196 iommu->gcmd &= ~DMA_GCMD_TE;
1197 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1198
1199 /* Make sure hardware complete it */
1200 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1201 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1202
1203 spin_unlock_irqrestore(&iommu->register_lock, flag);
1204 return 0;
1205}
1206
3460a6d9 1207
ba395927
KA
1208static int iommu_init_domains(struct intel_iommu *iommu)
1209{
1210 unsigned long ndomains;
1211 unsigned long nlongs;
1212
1213 ndomains = cap_ndoms(iommu->cap);
680a7524
YL
1214 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1215 ndomains);
ba395927
KA
1216 nlongs = BITS_TO_LONGS(ndomains);
1217
94a91b50
DD
1218 spin_lock_init(&iommu->lock);
1219
ba395927
KA
1220 /* TBD: there might be 64K domains,
1221 * consider other allocation for future chip
1222 */
1223 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1224 if (!iommu->domain_ids) {
1225 printk(KERN_ERR "Allocating domain id array failed\n");
1226 return -ENOMEM;
1227 }
1228 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1229 GFP_KERNEL);
1230 if (!iommu->domains) {
1231 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1232 return -ENOMEM;
1233 }
1234
1235 /*
1236 * if Caching mode is set, then invalid translations are tagged
1237 * with domainid 0. Hence we need to pre-allocate it.
1238 */
1239 if (cap_caching_mode(iommu->cap))
1240 set_bit(0, iommu->domain_ids);
1241 return 0;
1242}
ba395927 1243
ba395927
KA
1244
1245static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1246static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1247
1248void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1249{
1250 struct dmar_domain *domain;
1251 int i;
c7151a8d 1252 unsigned long flags;
ba395927 1253
94a91b50 1254 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1255 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1256 domain = iommu->domains[i];
1257 clear_bit(i, iommu->domain_ids);
1258
1259 spin_lock_irqsave(&domain->iommu_lock, flags);
1260 if (--domain->iommu_count == 0) {
1261 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1262 vm_domain_exit(domain);
1263 else
1264 domain_exit(domain);
1265 }
1266 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1267 }
ba395927
KA
1268 }
1269
1270 if (iommu->gcmd & DMA_GCMD_TE)
1271 iommu_disable_translation(iommu);
1272
1273 if (iommu->irq) {
dced35ae 1274 irq_set_handler_data(iommu->irq, NULL);
ba395927
KA
1275 /* This will mask the irq */
1276 free_irq(iommu->irq, iommu);
1277 destroy_irq(iommu->irq);
1278 }
1279
1280 kfree(iommu->domains);
1281 kfree(iommu->domain_ids);
1282
d9630fe9
WH
1283 g_iommus[iommu->seq_id] = NULL;
1284
1285 /* if all iommus are freed, free g_iommus */
1286 for (i = 0; i < g_num_of_iommus; i++) {
1287 if (g_iommus[i])
1288 break;
1289 }
1290
1291 if (i == g_num_of_iommus)
1292 kfree(g_iommus);
1293
ba395927
KA
1294 /* free context mapping */
1295 free_context_table(iommu);
ba395927
KA
1296}
1297
2c2e2c38 1298static struct dmar_domain *alloc_domain(void)
ba395927 1299{
ba395927 1300 struct dmar_domain *domain;
ba395927
KA
1301
1302 domain = alloc_domain_mem();
1303 if (!domain)
1304 return NULL;
1305
4c923d47 1306 domain->nid = -1;
2c2e2c38
FY
1307 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1308 domain->flags = 0;
1309
1310 return domain;
1311}
1312
1313static int iommu_attach_domain(struct dmar_domain *domain,
1314 struct intel_iommu *iommu)
1315{
1316 int num;
1317 unsigned long ndomains;
1318 unsigned long flags;
1319
ba395927
KA
1320 ndomains = cap_ndoms(iommu->cap);
1321
1322 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1323
ba395927
KA
1324 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1325 if (num >= ndomains) {
1326 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1327 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1328 return -ENOMEM;
ba395927
KA
1329 }
1330
ba395927 1331 domain->id = num;
2c2e2c38 1332 set_bit(num, iommu->domain_ids);
8c11e798 1333 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1334 iommu->domains[num] = domain;
1335 spin_unlock_irqrestore(&iommu->lock, flags);
1336
2c2e2c38 1337 return 0;
ba395927
KA
1338}
1339
2c2e2c38
FY
1340static void iommu_detach_domain(struct dmar_domain *domain,
1341 struct intel_iommu *iommu)
ba395927
KA
1342{
1343 unsigned long flags;
2c2e2c38
FY
1344 int num, ndomains;
1345 int found = 0;
ba395927 1346
8c11e798 1347 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1348 ndomains = cap_ndoms(iommu->cap);
a45946ab 1349 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1350 if (iommu->domains[num] == domain) {
1351 found = 1;
1352 break;
1353 }
2c2e2c38
FY
1354 }
1355
1356 if (found) {
1357 clear_bit(num, iommu->domain_ids);
1358 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1359 iommu->domains[num] = NULL;
1360 }
8c11e798 1361 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1362}
1363
1364static struct iova_domain reserved_iova_list;
8a443df4 1365static struct lock_class_key reserved_rbtree_key;
ba395927 1366
51a63e67 1367static int dmar_init_reserved_ranges(void)
ba395927
KA
1368{
1369 struct pci_dev *pdev = NULL;
1370 struct iova *iova;
1371 int i;
ba395927 1372
f661197e 1373 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1374
8a443df4
MG
1375 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1376 &reserved_rbtree_key);
1377
ba395927
KA
1378 /* IOAPIC ranges shouldn't be accessed by DMA */
1379 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1380 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1381 if (!iova) {
ba395927 1382 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1383 return -ENODEV;
1384 }
ba395927
KA
1385
1386 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1387 for_each_pci_dev(pdev) {
1388 struct resource *r;
1389
1390 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1391 r = &pdev->resource[i];
1392 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1393 continue;
1a4a4551
DW
1394 iova = reserve_iova(&reserved_iova_list,
1395 IOVA_PFN(r->start),
1396 IOVA_PFN(r->end));
51a63e67 1397 if (!iova) {
ba395927 1398 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1399 return -ENODEV;
1400 }
ba395927
KA
1401 }
1402 }
51a63e67 1403 return 0;
ba395927
KA
1404}
1405
1406static void domain_reserve_special_ranges(struct dmar_domain *domain)
1407{
1408 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1409}
1410
1411static inline int guestwidth_to_adjustwidth(int gaw)
1412{
1413 int agaw;
1414 int r = (gaw - 12) % 9;
1415
1416 if (r == 0)
1417 agaw = gaw;
1418 else
1419 agaw = gaw + 9 - r;
1420 if (agaw > 64)
1421 agaw = 64;
1422 return agaw;
1423}
1424
1425static int domain_init(struct dmar_domain *domain, int guest_width)
1426{
1427 struct intel_iommu *iommu;
1428 int adjust_width, agaw;
1429 unsigned long sagaw;
1430
f661197e 1431 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1432 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1433
1434 domain_reserve_special_ranges(domain);
1435
1436 /* calculate AGAW */
8c11e798 1437 iommu = domain_get_iommu(domain);
ba395927
KA
1438 if (guest_width > cap_mgaw(iommu->cap))
1439 guest_width = cap_mgaw(iommu->cap);
1440 domain->gaw = guest_width;
1441 adjust_width = guestwidth_to_adjustwidth(guest_width);
1442 agaw = width_to_agaw(adjust_width);
1443 sagaw = cap_sagaw(iommu->cap);
1444 if (!test_bit(agaw, &sagaw)) {
1445 /* hardware doesn't support it, choose a bigger one */
1446 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1447 agaw = find_next_bit(&sagaw, 5, agaw);
1448 if (agaw >= 5)
1449 return -ENODEV;
1450 }
1451 domain->agaw = agaw;
1452 INIT_LIST_HEAD(&domain->devices);
1453
8e604097
WH
1454 if (ecap_coherent(iommu->ecap))
1455 domain->iommu_coherency = 1;
1456 else
1457 domain->iommu_coherency = 0;
1458
58c610bd
SY
1459 if (ecap_sc_support(iommu->ecap))
1460 domain->iommu_snooping = 1;
1461 else
1462 domain->iommu_snooping = 0;
1463
6dd9a7c7 1464 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
c7151a8d 1465 domain->iommu_count = 1;
4c923d47 1466 domain->nid = iommu->node;
c7151a8d 1467
ba395927 1468 /* always allocate the top pgd */
4c923d47 1469 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1470 if (!domain->pgd)
1471 return -ENOMEM;
5b6985ce 1472 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1473 return 0;
1474}
1475
1476static void domain_exit(struct dmar_domain *domain)
1477{
2c2e2c38
FY
1478 struct dmar_drhd_unit *drhd;
1479 struct intel_iommu *iommu;
ba395927
KA
1480
1481 /* Domain 0 is reserved, so dont process it */
1482 if (!domain)
1483 return;
1484
7b668357
AW
1485 /* Flush any lazy unmaps that may reference this domain */
1486 if (!intel_iommu_strict)
1487 flush_unmaps_timeout(0);
1488
ba395927
KA
1489 domain_remove_dev_info(domain);
1490 /* destroy iovas */
1491 put_iova_domain(&domain->iovad);
ba395927
KA
1492
1493 /* clear ptes */
595badf5 1494 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1495
1496 /* free page tables */
d794dc9b 1497 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1498
2c2e2c38
FY
1499 for_each_active_iommu(iommu, drhd)
1500 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1501 iommu_detach_domain(domain, iommu);
1502
ba395927
KA
1503 free_domain_mem(domain);
1504}
1505
4ed0d3e6
FY
1506static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1507 u8 bus, u8 devfn, int translation)
ba395927
KA
1508{
1509 struct context_entry *context;
ba395927 1510 unsigned long flags;
5331fe6f 1511 struct intel_iommu *iommu;
ea6606b0
WH
1512 struct dma_pte *pgd;
1513 unsigned long num;
1514 unsigned long ndomains;
1515 int id;
1516 int agaw;
93a23a72 1517 struct device_domain_info *info = NULL;
ba395927
KA
1518
1519 pr_debug("Set context mapping for %02x:%02x.%d\n",
1520 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1521
ba395927 1522 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1523 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1524 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1525
276dbf99 1526 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1527 if (!iommu)
1528 return -ENODEV;
1529
ba395927
KA
1530 context = device_to_context_entry(iommu, bus, devfn);
1531 if (!context)
1532 return -ENOMEM;
1533 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1534 if (context_present(context)) {
ba395927
KA
1535 spin_unlock_irqrestore(&iommu->lock, flags);
1536 return 0;
1537 }
1538
ea6606b0
WH
1539 id = domain->id;
1540 pgd = domain->pgd;
1541
2c2e2c38
FY
1542 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1543 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1544 int found = 0;
1545
1546 /* find an available domain id for this device in iommu */
1547 ndomains = cap_ndoms(iommu->cap);
a45946ab 1548 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1549 if (iommu->domains[num] == domain) {
1550 id = num;
1551 found = 1;
1552 break;
1553 }
ea6606b0
WH
1554 }
1555
1556 if (found == 0) {
1557 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1558 if (num >= ndomains) {
1559 spin_unlock_irqrestore(&iommu->lock, flags);
1560 printk(KERN_ERR "IOMMU: no free domain ids\n");
1561 return -EFAULT;
1562 }
1563
1564 set_bit(num, iommu->domain_ids);
1565 iommu->domains[num] = domain;
1566 id = num;
1567 }
1568
1569 /* Skip top levels of page tables for
1570 * iommu which has less agaw than default.
1672af11 1571 * Unnecessary for PT mode.
ea6606b0 1572 */
1672af11
CW
1573 if (translation != CONTEXT_TT_PASS_THROUGH) {
1574 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1575 pgd = phys_to_virt(dma_pte_addr(pgd));
1576 if (!dma_pte_present(pgd)) {
1577 spin_unlock_irqrestore(&iommu->lock, flags);
1578 return -ENOMEM;
1579 }
ea6606b0
WH
1580 }
1581 }
1582 }
1583
1584 context_set_domain_id(context, id);
4ed0d3e6 1585
93a23a72
YZ
1586 if (translation != CONTEXT_TT_PASS_THROUGH) {
1587 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1588 translation = info ? CONTEXT_TT_DEV_IOTLB :
1589 CONTEXT_TT_MULTI_LEVEL;
1590 }
4ed0d3e6
FY
1591 /*
1592 * In pass through mode, AW must be programmed to indicate the largest
1593 * AGAW value supported by hardware. And ASR is ignored by hardware.
1594 */
93a23a72 1595 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1596 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1597 else {
1598 context_set_address_root(context, virt_to_phys(pgd));
1599 context_set_address_width(context, iommu->agaw);
1600 }
4ed0d3e6
FY
1601
1602 context_set_translation_type(context, translation);
c07e7d21
MM
1603 context_set_fault_enable(context);
1604 context_set_present(context);
5331fe6f 1605 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1606
4c25a2c1
DW
1607 /*
1608 * It's a non-present to present mapping. If hardware doesn't cache
1609 * non-present entry we only need to flush the write-buffer. If the
1610 * _does_ cache non-present entries, then it does so in the special
1611 * domain #0, which we have to flush:
1612 */
1613 if (cap_caching_mode(iommu->cap)) {
1614 iommu->flush.flush_context(iommu, 0,
1615 (((u16)bus) << 8) | devfn,
1616 DMA_CCMD_MASK_NOBIT,
1617 DMA_CCMD_DEVICE_INVL);
82653633 1618 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1619 } else {
ba395927 1620 iommu_flush_write_buffer(iommu);
4c25a2c1 1621 }
93a23a72 1622 iommu_enable_dev_iotlb(info);
ba395927 1623 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1624
1625 spin_lock_irqsave(&domain->iommu_lock, flags);
1626 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1627 domain->iommu_count++;
4c923d47
SS
1628 if (domain->iommu_count == 1)
1629 domain->nid = iommu->node;
58c610bd 1630 domain_update_iommu_cap(domain);
c7151a8d
WH
1631 }
1632 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1633 return 0;
1634}
1635
1636static int
4ed0d3e6
FY
1637domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1638 int translation)
ba395927
KA
1639{
1640 int ret;
1641 struct pci_dev *tmp, *parent;
1642
276dbf99 1643 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1644 pdev->bus->number, pdev->devfn,
1645 translation);
ba395927
KA
1646 if (ret)
1647 return ret;
1648
1649 /* dependent device mapping */
1650 tmp = pci_find_upstream_pcie_bridge(pdev);
1651 if (!tmp)
1652 return 0;
1653 /* Secondary interface's bus number and devfn 0 */
1654 parent = pdev->bus->self;
1655 while (parent != tmp) {
276dbf99
DW
1656 ret = domain_context_mapping_one(domain,
1657 pci_domain_nr(parent->bus),
1658 parent->bus->number,
4ed0d3e6 1659 parent->devfn, translation);
ba395927
KA
1660 if (ret)
1661 return ret;
1662 parent = parent->bus->self;
1663 }
45e829ea 1664 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1665 return domain_context_mapping_one(domain,
276dbf99 1666 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1667 tmp->subordinate->number, 0,
1668 translation);
ba395927
KA
1669 else /* this is a legacy PCI bridge */
1670 return domain_context_mapping_one(domain,
276dbf99
DW
1671 pci_domain_nr(tmp->bus),
1672 tmp->bus->number,
4ed0d3e6
FY
1673 tmp->devfn,
1674 translation);
ba395927
KA
1675}
1676
5331fe6f 1677static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1678{
1679 int ret;
1680 struct pci_dev *tmp, *parent;
5331fe6f
WH
1681 struct intel_iommu *iommu;
1682
276dbf99
DW
1683 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1684 pdev->devfn);
5331fe6f
WH
1685 if (!iommu)
1686 return -ENODEV;
ba395927 1687
276dbf99 1688 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1689 if (!ret)
1690 return ret;
1691 /* dependent device mapping */
1692 tmp = pci_find_upstream_pcie_bridge(pdev);
1693 if (!tmp)
1694 return ret;
1695 /* Secondary interface's bus number and devfn 0 */
1696 parent = pdev->bus->self;
1697 while (parent != tmp) {
8c11e798 1698 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1699 parent->devfn);
ba395927
KA
1700 if (!ret)
1701 return ret;
1702 parent = parent->bus->self;
1703 }
5f4d91a1 1704 if (pci_is_pcie(tmp))
276dbf99
DW
1705 return device_context_mapped(iommu, tmp->subordinate->number,
1706 0);
ba395927 1707 else
276dbf99
DW
1708 return device_context_mapped(iommu, tmp->bus->number,
1709 tmp->devfn);
ba395927
KA
1710}
1711
f532959b
FY
1712/* Returns a number of VTD pages, but aligned to MM page size */
1713static inline unsigned long aligned_nrpages(unsigned long host_addr,
1714 size_t size)
1715{
1716 host_addr &= ~PAGE_MASK;
1717 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1718}
1719
6dd9a7c7
YS
1720/* Return largest possible superpage level for a given mapping */
1721static inline int hardware_largepage_caps(struct dmar_domain *domain,
1722 unsigned long iov_pfn,
1723 unsigned long phy_pfn,
1724 unsigned long pages)
1725{
1726 int support, level = 1;
1727 unsigned long pfnmerge;
1728
1729 support = domain->iommu_superpage;
1730
1731 /* To use a large page, the virtual *and* physical addresses
1732 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1733 of them will mean we have to use smaller pages. So just
1734 merge them and check both at once. */
1735 pfnmerge = iov_pfn | phy_pfn;
1736
1737 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1738 pages >>= VTD_STRIDE_SHIFT;
1739 if (!pages)
1740 break;
1741 pfnmerge >>= VTD_STRIDE_SHIFT;
1742 level++;
1743 support--;
1744 }
1745 return level;
1746}
1747
9051aa02
DW
1748static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1749 struct scatterlist *sg, unsigned long phys_pfn,
1750 unsigned long nr_pages, int prot)
e1605495
DW
1751{
1752 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1753 phys_addr_t uninitialized_var(pteval);
e1605495 1754 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1755 unsigned long sg_res;
6dd9a7c7
YS
1756 unsigned int largepage_lvl = 0;
1757 unsigned long lvl_pages = 0;
e1605495
DW
1758
1759 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1760
1761 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1762 return -EINVAL;
1763
1764 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1765
9051aa02
DW
1766 if (sg)
1767 sg_res = 0;
1768 else {
1769 sg_res = nr_pages + 1;
1770 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1771 }
1772
6dd9a7c7 1773 while (nr_pages > 0) {
c85994e4
DW
1774 uint64_t tmp;
1775
e1605495 1776 if (!sg_res) {
f532959b 1777 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1778 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1779 sg->dma_length = sg->length;
1780 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1781 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1782 }
6dd9a7c7 1783
e1605495 1784 if (!pte) {
6dd9a7c7
YS
1785 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1786
1787 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
e1605495
DW
1788 if (!pte)
1789 return -ENOMEM;
6dd9a7c7
YS
1790 /* It is large page*/
1791 if (largepage_lvl > 1)
1792 pteval |= DMA_PTE_LARGE_PAGE;
1793 else
1794 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1795
e1605495
DW
1796 }
1797 /* We don't need lock here, nobody else
1798 * touches the iova range
1799 */
7766a3fb 1800 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1801 if (tmp) {
1bf20f0d 1802 static int dumps = 5;
c85994e4
DW
1803 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1804 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1805 if (dumps) {
1806 dumps--;
1807 debug_dma_dump_mappings(NULL);
1808 }
1809 WARN_ON(1);
1810 }
6dd9a7c7
YS
1811
1812 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1813
1814 BUG_ON(nr_pages < lvl_pages);
1815 BUG_ON(sg_res < lvl_pages);
1816
1817 nr_pages -= lvl_pages;
1818 iov_pfn += lvl_pages;
1819 phys_pfn += lvl_pages;
1820 pteval += lvl_pages * VTD_PAGE_SIZE;
1821 sg_res -= lvl_pages;
1822
1823 /* If the next PTE would be the first in a new page, then we
1824 need to flush the cache on the entries we've just written.
1825 And then we'll need to recalculate 'pte', so clear it and
1826 let it get set again in the if (!pte) block above.
1827
1828 If we're done (!nr_pages) we need to flush the cache too.
1829
1830 Also if we've been setting superpages, we may need to
1831 recalculate 'pte' and switch back to smaller pages for the
1832 end of the mapping, if the trailing size is not enough to
1833 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 1834 pte++;
6dd9a7c7
YS
1835 if (!nr_pages || first_pte_in_page(pte) ||
1836 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
1837 domain_flush_cache(domain, first_pte,
1838 (void *)pte - (void *)first_pte);
1839 pte = NULL;
1840 }
6dd9a7c7
YS
1841
1842 if (!sg_res && nr_pages)
e1605495
DW
1843 sg = sg_next(sg);
1844 }
1845 return 0;
1846}
1847
9051aa02
DW
1848static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1849 struct scatterlist *sg, unsigned long nr_pages,
1850 int prot)
ba395927 1851{
9051aa02
DW
1852 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1853}
6f6a00e4 1854
9051aa02
DW
1855static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1856 unsigned long phys_pfn, unsigned long nr_pages,
1857 int prot)
1858{
1859 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1860}
1861
c7151a8d 1862static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1863{
c7151a8d
WH
1864 if (!iommu)
1865 return;
8c11e798
WH
1866
1867 clear_context_table(iommu, bus, devfn);
1868 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1869 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1870 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1871}
1872
1873static void domain_remove_dev_info(struct dmar_domain *domain)
1874{
1875 struct device_domain_info *info;
1876 unsigned long flags;
c7151a8d 1877 struct intel_iommu *iommu;
ba395927
KA
1878
1879 spin_lock_irqsave(&device_domain_lock, flags);
1880 while (!list_empty(&domain->devices)) {
1881 info = list_entry(domain->devices.next,
1882 struct device_domain_info, link);
1883 list_del(&info->link);
1884 list_del(&info->global);
1885 if (info->dev)
358dd8ac 1886 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1887 spin_unlock_irqrestore(&device_domain_lock, flags);
1888
93a23a72 1889 iommu_disable_dev_iotlb(info);
276dbf99 1890 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1891 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1892 free_devinfo_mem(info);
1893
1894 spin_lock_irqsave(&device_domain_lock, flags);
1895 }
1896 spin_unlock_irqrestore(&device_domain_lock, flags);
1897}
1898
1899/*
1900 * find_domain
358dd8ac 1901 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1902 */
38717946 1903static struct dmar_domain *
ba395927
KA
1904find_domain(struct pci_dev *pdev)
1905{
1906 struct device_domain_info *info;
1907
1908 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1909 info = pdev->dev.archdata.iommu;
ba395927
KA
1910 if (info)
1911 return info->domain;
1912 return NULL;
1913}
1914
ba395927
KA
1915/* domain is initialized */
1916static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1917{
1918 struct dmar_domain *domain, *found = NULL;
1919 struct intel_iommu *iommu;
1920 struct dmar_drhd_unit *drhd;
1921 struct device_domain_info *info, *tmp;
1922 struct pci_dev *dev_tmp;
1923 unsigned long flags;
1924 int bus = 0, devfn = 0;
276dbf99 1925 int segment;
2c2e2c38 1926 int ret;
ba395927
KA
1927
1928 domain = find_domain(pdev);
1929 if (domain)
1930 return domain;
1931
276dbf99
DW
1932 segment = pci_domain_nr(pdev->bus);
1933
ba395927
KA
1934 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1935 if (dev_tmp) {
5f4d91a1 1936 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1937 bus = dev_tmp->subordinate->number;
1938 devfn = 0;
1939 } else {
1940 bus = dev_tmp->bus->number;
1941 devfn = dev_tmp->devfn;
1942 }
1943 spin_lock_irqsave(&device_domain_lock, flags);
1944 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1945 if (info->segment == segment &&
1946 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1947 found = info->domain;
1948 break;
1949 }
1950 }
1951 spin_unlock_irqrestore(&device_domain_lock, flags);
1952 /* pcie-pci bridge already has a domain, uses it */
1953 if (found) {
1954 domain = found;
1955 goto found_domain;
1956 }
1957 }
1958
2c2e2c38
FY
1959 domain = alloc_domain();
1960 if (!domain)
1961 goto error;
1962
ba395927
KA
1963 /* Allocate new domain for the device */
1964 drhd = dmar_find_matched_drhd_unit(pdev);
1965 if (!drhd) {
1966 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1967 pci_name(pdev));
1968 return NULL;
1969 }
1970 iommu = drhd->iommu;
1971
2c2e2c38
FY
1972 ret = iommu_attach_domain(domain, iommu);
1973 if (ret) {
2fe9723d 1974 free_domain_mem(domain);
ba395927 1975 goto error;
2c2e2c38 1976 }
ba395927
KA
1977
1978 if (domain_init(domain, gaw)) {
1979 domain_exit(domain);
1980 goto error;
1981 }
1982
1983 /* register pcie-to-pci device */
1984 if (dev_tmp) {
1985 info = alloc_devinfo_mem();
1986 if (!info) {
1987 domain_exit(domain);
1988 goto error;
1989 }
276dbf99 1990 info->segment = segment;
ba395927
KA
1991 info->bus = bus;
1992 info->devfn = devfn;
1993 info->dev = NULL;
1994 info->domain = domain;
1995 /* This domain is shared by devices under p2p bridge */
3b5410e7 1996 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1997
1998 /* pcie-to-pci bridge already has a domain, uses it */
1999 found = NULL;
2000 spin_lock_irqsave(&device_domain_lock, flags);
2001 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
2002 if (tmp->segment == segment &&
2003 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
2004 found = tmp->domain;
2005 break;
2006 }
2007 }
2008 if (found) {
00dfff77 2009 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
2010 free_devinfo_mem(info);
2011 domain_exit(domain);
2012 domain = found;
2013 } else {
2014 list_add(&info->link, &domain->devices);
2015 list_add(&info->global, &device_domain_list);
00dfff77 2016 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2017 }
ba395927
KA
2018 }
2019
2020found_domain:
2021 info = alloc_devinfo_mem();
2022 if (!info)
2023 goto error;
276dbf99 2024 info->segment = segment;
ba395927
KA
2025 info->bus = pdev->bus->number;
2026 info->devfn = pdev->devfn;
2027 info->dev = pdev;
2028 info->domain = domain;
2029 spin_lock_irqsave(&device_domain_lock, flags);
2030 /* somebody is fast */
2031 found = find_domain(pdev);
2032 if (found != NULL) {
2033 spin_unlock_irqrestore(&device_domain_lock, flags);
2034 if (found != domain) {
2035 domain_exit(domain);
2036 domain = found;
2037 }
2038 free_devinfo_mem(info);
2039 return domain;
2040 }
2041 list_add(&info->link, &domain->devices);
2042 list_add(&info->global, &device_domain_list);
358dd8ac 2043 pdev->dev.archdata.iommu = info;
ba395927
KA
2044 spin_unlock_irqrestore(&device_domain_lock, flags);
2045 return domain;
2046error:
2047 /* recheck it here, maybe others set it */
2048 return find_domain(pdev);
2049}
2050
2c2e2c38 2051static int iommu_identity_mapping;
e0fc7e0b
DW
2052#define IDENTMAP_ALL 1
2053#define IDENTMAP_GFX 2
2054#define IDENTMAP_AZALIA 4
2c2e2c38 2055
b213203e
DW
2056static int iommu_domain_identity_map(struct dmar_domain *domain,
2057 unsigned long long start,
2058 unsigned long long end)
ba395927 2059{
c5395d5c
DW
2060 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2061 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2062
2063 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2064 dma_to_mm_pfn(last_vpfn))) {
ba395927 2065 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2066 return -ENOMEM;
ba395927
KA
2067 }
2068
c5395d5c
DW
2069 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2070 start, end, domain->id);
ba395927
KA
2071 /*
2072 * RMRR range might have overlap with physical memory range,
2073 * clear it first
2074 */
c5395d5c 2075 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2076
c5395d5c
DW
2077 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2078 last_vpfn - first_vpfn + 1,
61df7443 2079 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2080}
2081
2082static int iommu_prepare_identity_map(struct pci_dev *pdev,
2083 unsigned long long start,
2084 unsigned long long end)
2085{
2086 struct dmar_domain *domain;
2087 int ret;
2088
c7ab48d2 2089 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2090 if (!domain)
2091 return -ENOMEM;
2092
19943b0e
DW
2093 /* For _hardware_ passthrough, don't bother. But for software
2094 passthrough, we do it anyway -- it may indicate a memory
2095 range which is reserved in E820, so which didn't get set
2096 up to start with in si_domain */
2097 if (domain == si_domain && hw_pass_through) {
2098 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2099 pci_name(pdev), start, end);
2100 return 0;
2101 }
2102
2103 printk(KERN_INFO
2104 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2105 pci_name(pdev), start, end);
2ff729f5 2106
5595b528
DW
2107 if (end < start) {
2108 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2109 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2110 dmi_get_system_info(DMI_BIOS_VENDOR),
2111 dmi_get_system_info(DMI_BIOS_VERSION),
2112 dmi_get_system_info(DMI_PRODUCT_VERSION));
2113 ret = -EIO;
2114 goto error;
2115 }
2116
2ff729f5
DW
2117 if (end >> agaw_to_width(domain->agaw)) {
2118 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2119 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2120 agaw_to_width(domain->agaw),
2121 dmi_get_system_info(DMI_BIOS_VENDOR),
2122 dmi_get_system_info(DMI_BIOS_VERSION),
2123 dmi_get_system_info(DMI_PRODUCT_VERSION));
2124 ret = -EIO;
2125 goto error;
2126 }
19943b0e 2127
b213203e 2128 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2129 if (ret)
2130 goto error;
2131
2132 /* context entry init */
4ed0d3e6 2133 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2134 if (ret)
2135 goto error;
2136
2137 return 0;
2138
2139 error:
ba395927
KA
2140 domain_exit(domain);
2141 return ret;
ba395927
KA
2142}
2143
2144static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2145 struct pci_dev *pdev)
2146{
358dd8ac 2147 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2148 return 0;
2149 return iommu_prepare_identity_map(pdev, rmrr->base_address,
70e535d1 2150 rmrr->end_address);
ba395927
KA
2151}
2152
49a0429e
KA
2153#ifdef CONFIG_DMAR_FLOPPY_WA
2154static inline void iommu_prepare_isa(void)
2155{
2156 struct pci_dev *pdev;
2157 int ret;
2158
2159 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2160 if (!pdev)
2161 return;
2162
c7ab48d2 2163 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
70e535d1 2164 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
49a0429e
KA
2165
2166 if (ret)
c7ab48d2
DW
2167 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2168 "floppy might not work\n");
49a0429e
KA
2169
2170}
2171#else
2172static inline void iommu_prepare_isa(void)
2173{
2174 return;
2175}
2176#endif /* !CONFIG_DMAR_FLPY_WA */
2177
2c2e2c38 2178static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2179
2180static int __init si_domain_work_fn(unsigned long start_pfn,
2181 unsigned long end_pfn, void *datax)
2182{
2183 int *ret = datax;
2184
2185 *ret = iommu_domain_identity_map(si_domain,
2186 (uint64_t)start_pfn << PAGE_SHIFT,
2187 (uint64_t)end_pfn << PAGE_SHIFT);
2188 return *ret;
2189
2190}
2191
071e1374 2192static int __init si_domain_init(int hw)
2c2e2c38
FY
2193{
2194 struct dmar_drhd_unit *drhd;
2195 struct intel_iommu *iommu;
c7ab48d2 2196 int nid, ret = 0;
2c2e2c38
FY
2197
2198 si_domain = alloc_domain();
2199 if (!si_domain)
2200 return -EFAULT;
2201
c7ab48d2 2202 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2203
2204 for_each_active_iommu(iommu, drhd) {
2205 ret = iommu_attach_domain(si_domain, iommu);
2206 if (ret) {
2207 domain_exit(si_domain);
2208 return -EFAULT;
2209 }
2210 }
2211
2212 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2213 domain_exit(si_domain);
2214 return -EFAULT;
2215 }
2216
2217 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2218
19943b0e
DW
2219 if (hw)
2220 return 0;
2221
c7ab48d2
DW
2222 for_each_online_node(nid) {
2223 work_with_active_regions(nid, si_domain_work_fn, &ret);
2224 if (ret)
2225 return ret;
2226 }
2227
2c2e2c38
FY
2228 return 0;
2229}
2230
2231static void domain_remove_one_dev_info(struct dmar_domain *domain,
2232 struct pci_dev *pdev);
2233static int identity_mapping(struct pci_dev *pdev)
2234{
2235 struct device_domain_info *info;
2236
2237 if (likely(!iommu_identity_mapping))
2238 return 0;
2239
cb452a40
MT
2240 info = pdev->dev.archdata.iommu;
2241 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2242 return (info->domain == si_domain);
2c2e2c38 2243
2c2e2c38
FY
2244 return 0;
2245}
2246
2247static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2248 struct pci_dev *pdev,
2249 int translation)
2c2e2c38
FY
2250{
2251 struct device_domain_info *info;
2252 unsigned long flags;
5fe60f4e 2253 int ret;
2c2e2c38
FY
2254
2255 info = alloc_devinfo_mem();
2256 if (!info)
2257 return -ENOMEM;
2258
5fe60f4e
DW
2259 ret = domain_context_mapping(domain, pdev, translation);
2260 if (ret) {
2261 free_devinfo_mem(info);
2262 return ret;
2263 }
2264
2c2e2c38
FY
2265 info->segment = pci_domain_nr(pdev->bus);
2266 info->bus = pdev->bus->number;
2267 info->devfn = pdev->devfn;
2268 info->dev = pdev;
2269 info->domain = domain;
2270
2271 spin_lock_irqsave(&device_domain_lock, flags);
2272 list_add(&info->link, &domain->devices);
2273 list_add(&info->global, &device_domain_list);
2274 pdev->dev.archdata.iommu = info;
2275 spin_unlock_irqrestore(&device_domain_lock, flags);
2276
2277 return 0;
2278}
2279
6941af28
DW
2280static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2281{
e0fc7e0b
DW
2282 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2283 return 1;
2284
2285 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2286 return 1;
2287
2288 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2289 return 0;
6941af28 2290
3dfc813d
DW
2291 /*
2292 * We want to start off with all devices in the 1:1 domain, and
2293 * take them out later if we find they can't access all of memory.
2294 *
2295 * However, we can't do this for PCI devices behind bridges,
2296 * because all PCI devices behind the same bridge will end up
2297 * with the same source-id on their transactions.
2298 *
2299 * Practically speaking, we can't change things around for these
2300 * devices at run-time, because we can't be sure there'll be no
2301 * DMA transactions in flight for any of their siblings.
2302 *
2303 * So PCI devices (unless they're on the root bus) as well as
2304 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2305 * the 1:1 domain, just in _case_ one of their siblings turns out
2306 * not to be able to map all of memory.
2307 */
5f4d91a1 2308 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2309 if (!pci_is_root_bus(pdev->bus))
2310 return 0;
2311 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2312 return 0;
2313 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2314 return 0;
2315
2316 /*
2317 * At boot time, we don't yet know if devices will be 64-bit capable.
2318 * Assume that they will -- if they turn out not to be, then we can
2319 * take them out of the 1:1 domain later.
2320 */
8fcc5372
CW
2321 if (!startup) {
2322 /*
2323 * If the device's dma_mask is less than the system's memory
2324 * size then this is not a candidate for identity mapping.
2325 */
2326 u64 dma_mask = pdev->dma_mask;
2327
2328 if (pdev->dev.coherent_dma_mask &&
2329 pdev->dev.coherent_dma_mask < dma_mask)
2330 dma_mask = pdev->dev.coherent_dma_mask;
2331
2332 return dma_mask >= dma_get_required_mask(&pdev->dev);
2333 }
6941af28
DW
2334
2335 return 1;
2336}
2337
071e1374 2338static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2339{
2c2e2c38
FY
2340 struct pci_dev *pdev = NULL;
2341 int ret;
2342
19943b0e 2343 ret = si_domain_init(hw);
2c2e2c38
FY
2344 if (ret)
2345 return -EFAULT;
2346
2c2e2c38 2347 for_each_pci_dev(pdev) {
825507d6
MT
2348 /* Skip Host/PCI Bridge devices */
2349 if (IS_BRIDGE_HOST_DEVICE(pdev))
2350 continue;
6941af28 2351 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2352 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2353 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2354
5fe60f4e 2355 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2356 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2357 CONTEXT_TT_MULTI_LEVEL);
2358 if (ret)
2359 return ret;
62edf5dc 2360 }
2c2e2c38
FY
2361 }
2362
2363 return 0;
2364}
2365
b779260b 2366static int __init init_dmars(void)
ba395927
KA
2367{
2368 struct dmar_drhd_unit *drhd;
2369 struct dmar_rmrr_unit *rmrr;
2370 struct pci_dev *pdev;
2371 struct intel_iommu *iommu;
9d783ba0 2372 int i, ret;
2c2e2c38 2373
ba395927
KA
2374 /*
2375 * for each drhd
2376 * allocate root
2377 * initialize and program root entry to not present
2378 * endfor
2379 */
2380 for_each_drhd_unit(drhd) {
5e0d2a6f 2381 g_num_of_iommus++;
2382 /*
2383 * lock not needed as this is only incremented in the single
2384 * threaded kernel __init code path all other access are read
2385 * only
2386 */
2387 }
2388
d9630fe9
WH
2389 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2390 GFP_KERNEL);
2391 if (!g_iommus) {
2392 printk(KERN_ERR "Allocating global iommu array failed\n");
2393 ret = -ENOMEM;
2394 goto error;
2395 }
2396
80b20dd8 2397 deferred_flush = kzalloc(g_num_of_iommus *
2398 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2399 if (!deferred_flush) {
5e0d2a6f 2400 ret = -ENOMEM;
2401 goto error;
2402 }
2403
5e0d2a6f 2404 for_each_drhd_unit(drhd) {
2405 if (drhd->ignored)
2406 continue;
1886e8a9
SS
2407
2408 iommu = drhd->iommu;
d9630fe9 2409 g_iommus[iommu->seq_id] = iommu;
ba395927 2410
e61d98d8
SS
2411 ret = iommu_init_domains(iommu);
2412 if (ret)
2413 goto error;
2414
ba395927
KA
2415 /*
2416 * TBD:
2417 * we could share the same root & context tables
25985edc 2418 * among all IOMMU's. Need to Split it later.
ba395927
KA
2419 */
2420 ret = iommu_alloc_root_entry(iommu);
2421 if (ret) {
2422 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2423 goto error;
2424 }
4ed0d3e6 2425 if (!ecap_pass_through(iommu->ecap))
19943b0e 2426 hw_pass_through = 0;
ba395927
KA
2427 }
2428
1531a6a6
SS
2429 /*
2430 * Start from the sane iommu hardware state.
2431 */
a77b67d4
YS
2432 for_each_drhd_unit(drhd) {
2433 if (drhd->ignored)
2434 continue;
2435
2436 iommu = drhd->iommu;
1531a6a6
SS
2437
2438 /*
2439 * If the queued invalidation is already initialized by us
2440 * (for example, while enabling interrupt-remapping) then
2441 * we got the things already rolling from a sane state.
2442 */
2443 if (iommu->qi)
2444 continue;
2445
2446 /*
2447 * Clear any previous faults.
2448 */
2449 dmar_fault(-1, iommu);
2450 /*
2451 * Disable queued invalidation if supported and already enabled
2452 * before OS handover.
2453 */
2454 dmar_disable_qi(iommu);
2455 }
2456
2457 for_each_drhd_unit(drhd) {
2458 if (drhd->ignored)
2459 continue;
2460
2461 iommu = drhd->iommu;
2462
a77b67d4
YS
2463 if (dmar_enable_qi(iommu)) {
2464 /*
2465 * Queued Invalidate not enabled, use Register Based
2466 * Invalidate
2467 */
2468 iommu->flush.flush_context = __iommu_flush_context;
2469 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2470 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2471 "invalidation\n",
680a7524 2472 iommu->seq_id,
b4e0f9eb 2473 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2474 } else {
2475 iommu->flush.flush_context = qi_flush_context;
2476 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2477 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2478 "invalidation\n",
680a7524 2479 iommu->seq_id,
b4e0f9eb 2480 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2481 }
2482 }
2483
19943b0e 2484 if (iommu_pass_through)
e0fc7e0b
DW
2485 iommu_identity_mapping |= IDENTMAP_ALL;
2486
19943b0e 2487#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2488 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2489#endif
e0fc7e0b
DW
2490
2491 check_tylersburg_isoch();
2492
ba395927 2493 /*
19943b0e
DW
2494 * If pass through is not set or not enabled, setup context entries for
2495 * identity mappings for rmrr, gfx, and isa and may fall back to static
2496 * identity mapping if iommu_identity_mapping is set.
ba395927 2497 */
19943b0e
DW
2498 if (iommu_identity_mapping) {
2499 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2500 if (ret) {
19943b0e
DW
2501 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2502 goto error;
ba395927
KA
2503 }
2504 }
ba395927 2505 /*
19943b0e
DW
2506 * For each rmrr
2507 * for each dev attached to rmrr
2508 * do
2509 * locate drhd for dev, alloc domain for dev
2510 * allocate free domain
2511 * allocate page table entries for rmrr
2512 * if context not allocated for bus
2513 * allocate and init context
2514 * set present in root table for this bus
2515 * init context with domain, translation etc
2516 * endfor
2517 * endfor
ba395927 2518 */
19943b0e
DW
2519 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2520 for_each_rmrr_units(rmrr) {
2521 for (i = 0; i < rmrr->devices_cnt; i++) {
2522 pdev = rmrr->devices[i];
2523 /*
2524 * some BIOS lists non-exist devices in DMAR
2525 * table.
2526 */
2527 if (!pdev)
2528 continue;
2529 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2530 if (ret)
2531 printk(KERN_ERR
2532 "IOMMU: mapping reserved region failed\n");
ba395927 2533 }
4ed0d3e6 2534 }
49a0429e 2535
19943b0e
DW
2536 iommu_prepare_isa();
2537
ba395927
KA
2538 /*
2539 * for each drhd
2540 * enable fault log
2541 * global invalidate context cache
2542 * global invalidate iotlb
2543 * enable translation
2544 */
2545 for_each_drhd_unit(drhd) {
51a63e67
JC
2546 if (drhd->ignored) {
2547 /*
2548 * we always have to disable PMRs or DMA may fail on
2549 * this device
2550 */
2551 if (force_on)
2552 iommu_disable_protect_mem_regions(drhd->iommu);
ba395927 2553 continue;
51a63e67 2554 }
ba395927 2555 iommu = drhd->iommu;
ba395927
KA
2556
2557 iommu_flush_write_buffer(iommu);
2558
3460a6d9
KA
2559 ret = dmar_set_interrupt(iommu);
2560 if (ret)
2561 goto error;
2562
ba395927
KA
2563 iommu_set_root_entry(iommu);
2564
4c25a2c1 2565 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2566 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2567
ba395927
KA
2568 ret = iommu_enable_translation(iommu);
2569 if (ret)
2570 goto error;
b94996c9
DW
2571
2572 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2573 }
2574
2575 return 0;
2576error:
2577 for_each_drhd_unit(drhd) {
2578 if (drhd->ignored)
2579 continue;
2580 iommu = drhd->iommu;
2581 free_iommu(iommu);
2582 }
d9630fe9 2583 kfree(g_iommus);
ba395927
KA
2584 return ret;
2585}
2586
5a5e02a6 2587/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2588static struct iova *intel_alloc_iova(struct device *dev,
2589 struct dmar_domain *domain,
2590 unsigned long nrpages, uint64_t dma_mask)
ba395927 2591{
ba395927 2592 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2593 struct iova *iova = NULL;
ba395927 2594
875764de
DW
2595 /* Restrict dma_mask to the width that the iommu can handle */
2596 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2597
2598 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2599 /*
2600 * First try to allocate an io virtual address in
284901a9 2601 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2602 * from higher range
ba395927 2603 */
875764de
DW
2604 iova = alloc_iova(&domain->iovad, nrpages,
2605 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2606 if (iova)
2607 return iova;
2608 }
2609 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2610 if (unlikely(!iova)) {
2611 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2612 nrpages, pci_name(pdev));
f76aec76
KA
2613 return NULL;
2614 }
2615
2616 return iova;
2617}
2618
147202aa 2619static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2620{
2621 struct dmar_domain *domain;
2622 int ret;
2623
2624 domain = get_domain_for_dev(pdev,
2625 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2626 if (!domain) {
2627 printk(KERN_ERR
2628 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2629 return NULL;
ba395927
KA
2630 }
2631
2632 /* make sure context mapping is ok */
5331fe6f 2633 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2634 ret = domain_context_mapping(domain, pdev,
2635 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2636 if (ret) {
2637 printk(KERN_ERR
2638 "Domain context map for %s failed",
2639 pci_name(pdev));
4fe05bbc 2640 return NULL;
f76aec76 2641 }
ba395927
KA
2642 }
2643
f76aec76
KA
2644 return domain;
2645}
2646
147202aa
DW
2647static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2648{
2649 struct device_domain_info *info;
2650
2651 /* No lock here, assumes no domain exit in normal case */
2652 info = dev->dev.archdata.iommu;
2653 if (likely(info))
2654 return info->domain;
2655
2656 return __get_valid_domain_for_dev(dev);
2657}
2658
2c2e2c38
FY
2659static int iommu_dummy(struct pci_dev *pdev)
2660{
2661 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2662}
2663
2664/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2665static int iommu_no_mapping(struct device *dev)
2c2e2c38 2666{
73676832 2667 struct pci_dev *pdev;
2c2e2c38
FY
2668 int found;
2669
73676832
DW
2670 if (unlikely(dev->bus != &pci_bus_type))
2671 return 1;
2672
2673 pdev = to_pci_dev(dev);
1e4c64c4
DW
2674 if (iommu_dummy(pdev))
2675 return 1;
2676
2c2e2c38 2677 if (!iommu_identity_mapping)
1e4c64c4 2678 return 0;
2c2e2c38
FY
2679
2680 found = identity_mapping(pdev);
2681 if (found) {
6941af28 2682 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2683 return 1;
2684 else {
2685 /*
2686 * 32 bit DMA is removed from si_domain and fall back
2687 * to non-identity mapping.
2688 */
2689 domain_remove_one_dev_info(si_domain, pdev);
2690 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2691 pci_name(pdev));
2692 return 0;
2693 }
2694 } else {
2695 /*
2696 * In case of a detached 64 bit DMA device from vm, the device
2697 * is put into si_domain for identity mapping.
2698 */
6941af28 2699 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2700 int ret;
5fe60f4e
DW
2701 ret = domain_add_dev_info(si_domain, pdev,
2702 hw_pass_through ?
2703 CONTEXT_TT_PASS_THROUGH :
2704 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2705 if (!ret) {
2706 printk(KERN_INFO "64bit %s uses identity mapping\n",
2707 pci_name(pdev));
2708 return 1;
2709 }
2710 }
2711 }
2712
1e4c64c4 2713 return 0;
2c2e2c38
FY
2714}
2715
bb9e6d65
FT
2716static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2717 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2718{
2719 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2720 struct dmar_domain *domain;
5b6985ce 2721 phys_addr_t start_paddr;
f76aec76
KA
2722 struct iova *iova;
2723 int prot = 0;
6865f0d1 2724 int ret;
8c11e798 2725 struct intel_iommu *iommu;
33041ec0 2726 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2727
2728 BUG_ON(dir == DMA_NONE);
2c2e2c38 2729
73676832 2730 if (iommu_no_mapping(hwdev))
6865f0d1 2731 return paddr;
f76aec76
KA
2732
2733 domain = get_valid_domain_for_dev(pdev);
2734 if (!domain)
2735 return 0;
2736
8c11e798 2737 iommu = domain_get_iommu(domain);
88cb6a74 2738 size = aligned_nrpages(paddr, size);
f76aec76 2739
c681d0ba 2740 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
2741 if (!iova)
2742 goto error;
2743
ba395927
KA
2744 /*
2745 * Check if DMAR supports zero-length reads on write only
2746 * mappings..
2747 */
2748 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2749 !cap_zlr(iommu->cap))
ba395927
KA
2750 prot |= DMA_PTE_READ;
2751 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2752 prot |= DMA_PTE_WRITE;
2753 /*
6865f0d1 2754 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2755 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2756 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2757 * is not a big problem
2758 */
0ab36de2 2759 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2760 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2761 if (ret)
2762 goto error;
2763
1f0ef2aa
DW
2764 /* it's a non-present to present mapping. Only flush if caching mode */
2765 if (cap_caching_mode(iommu->cap))
82653633 2766 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2767 else
8c11e798 2768 iommu_flush_write_buffer(iommu);
f76aec76 2769
03d6a246
DW
2770 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2771 start_paddr += paddr & ~PAGE_MASK;
2772 return start_paddr;
ba395927 2773
ba395927 2774error:
f76aec76
KA
2775 if (iova)
2776 __free_iova(&domain->iovad, iova);
4cf2e75d 2777 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2778 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2779 return 0;
2780}
2781
ffbbef5c
FT
2782static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2783 unsigned long offset, size_t size,
2784 enum dma_data_direction dir,
2785 struct dma_attrs *attrs)
bb9e6d65 2786{
ffbbef5c
FT
2787 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2788 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2789}
2790
5e0d2a6f 2791static void flush_unmaps(void)
2792{
80b20dd8 2793 int i, j;
5e0d2a6f 2794
5e0d2a6f 2795 timer_on = 0;
2796
2797 /* just flush them all */
2798 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2799 struct intel_iommu *iommu = g_iommus[i];
2800 if (!iommu)
2801 continue;
c42d9f32 2802
9dd2fe89
YZ
2803 if (!deferred_flush[i].next)
2804 continue;
2805
78d5f0f5
NA
2806 /* In caching mode, global flushes turn emulation expensive */
2807 if (!cap_caching_mode(iommu->cap))
2808 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2809 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2810 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2811 unsigned long mask;
2812 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2813 struct dmar_domain *domain = deferred_flush[i].domain[j];
2814
2815 /* On real hardware multiple invalidations are expensive */
2816 if (cap_caching_mode(iommu->cap))
2817 iommu_flush_iotlb_psi(iommu, domain->id,
2818 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2819 else {
2820 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2821 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2822 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2823 }
93a23a72 2824 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2825 }
9dd2fe89 2826 deferred_flush[i].next = 0;
5e0d2a6f 2827 }
2828
5e0d2a6f 2829 list_size = 0;
5e0d2a6f 2830}
2831
2832static void flush_unmaps_timeout(unsigned long data)
2833{
80b20dd8 2834 unsigned long flags;
2835
2836 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2837 flush_unmaps();
80b20dd8 2838 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2839}
2840
2841static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2842{
2843 unsigned long flags;
80b20dd8 2844 int next, iommu_id;
8c11e798 2845 struct intel_iommu *iommu;
5e0d2a6f 2846
2847 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2848 if (list_size == HIGH_WATER_MARK)
2849 flush_unmaps();
2850
8c11e798
WH
2851 iommu = domain_get_iommu(dom);
2852 iommu_id = iommu->seq_id;
c42d9f32 2853
80b20dd8 2854 next = deferred_flush[iommu_id].next;
2855 deferred_flush[iommu_id].domain[next] = dom;
2856 deferred_flush[iommu_id].iova[next] = iova;
2857 deferred_flush[iommu_id].next++;
5e0d2a6f 2858
2859 if (!timer_on) {
2860 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2861 timer_on = 1;
2862 }
2863 list_size++;
2864 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2865}
2866
ffbbef5c
FT
2867static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2868 size_t size, enum dma_data_direction dir,
2869 struct dma_attrs *attrs)
ba395927 2870{
ba395927 2871 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2872 struct dmar_domain *domain;
d794dc9b 2873 unsigned long start_pfn, last_pfn;
ba395927 2874 struct iova *iova;
8c11e798 2875 struct intel_iommu *iommu;
ba395927 2876
73676832 2877 if (iommu_no_mapping(dev))
f76aec76 2878 return;
2c2e2c38 2879
ba395927
KA
2880 domain = find_domain(pdev);
2881 BUG_ON(!domain);
2882
8c11e798
WH
2883 iommu = domain_get_iommu(domain);
2884
ba395927 2885 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2886 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2887 (unsigned long long)dev_addr))
ba395927 2888 return;
ba395927 2889
d794dc9b
DW
2890 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2891 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2892
d794dc9b
DW
2893 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2894 pci_name(pdev), start_pfn, last_pfn);
ba395927 2895
f76aec76 2896 /* clear the whole page */
d794dc9b
DW
2897 dma_pte_clear_range(domain, start_pfn, last_pfn);
2898
f76aec76 2899 /* free page tables */
d794dc9b
DW
2900 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2901
5e0d2a6f 2902 if (intel_iommu_strict) {
03d6a246 2903 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2904 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2905 /* free iova */
2906 __free_iova(&domain->iovad, iova);
2907 } else {
2908 add_unmap(domain, iova);
2909 /*
2910 * queue up the release of the unmap to save the 1/6th of the
2911 * cpu used up by the iotlb flush operation...
2912 */
5e0d2a6f 2913 }
ba395927
KA
2914}
2915
d7ab5c46
FT
2916static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2917 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2918{
2919 void *vaddr;
2920 int order;
2921
5b6985ce 2922 size = PAGE_ALIGN(size);
ba395927 2923 order = get_order(size);
e8bb910d
AW
2924
2925 if (!iommu_no_mapping(hwdev))
2926 flags &= ~(GFP_DMA | GFP_DMA32);
2927 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2928 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2929 flags |= GFP_DMA;
2930 else
2931 flags |= GFP_DMA32;
2932 }
ba395927
KA
2933
2934 vaddr = (void *)__get_free_pages(flags, order);
2935 if (!vaddr)
2936 return NULL;
2937 memset(vaddr, 0, size);
2938
bb9e6d65
FT
2939 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2940 DMA_BIDIRECTIONAL,
2941 hwdev->coherent_dma_mask);
ba395927
KA
2942 if (*dma_handle)
2943 return vaddr;
2944 free_pages((unsigned long)vaddr, order);
2945 return NULL;
2946}
2947
d7ab5c46
FT
2948static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2949 dma_addr_t dma_handle)
ba395927
KA
2950{
2951 int order;
2952
5b6985ce 2953 size = PAGE_ALIGN(size);
ba395927
KA
2954 order = get_order(size);
2955
0db9b7ae 2956 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2957 free_pages((unsigned long)vaddr, order);
2958}
2959
d7ab5c46
FT
2960static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2961 int nelems, enum dma_data_direction dir,
2962 struct dma_attrs *attrs)
ba395927 2963{
ba395927
KA
2964 struct pci_dev *pdev = to_pci_dev(hwdev);
2965 struct dmar_domain *domain;
d794dc9b 2966 unsigned long start_pfn, last_pfn;
f76aec76 2967 struct iova *iova;
8c11e798 2968 struct intel_iommu *iommu;
ba395927 2969
73676832 2970 if (iommu_no_mapping(hwdev))
ba395927
KA
2971 return;
2972
2973 domain = find_domain(pdev);
8c11e798
WH
2974 BUG_ON(!domain);
2975
2976 iommu = domain_get_iommu(domain);
ba395927 2977
c03ab37c 2978 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2979 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2980 (unsigned long long)sglist[0].dma_address))
f76aec76 2981 return;
f76aec76 2982
d794dc9b
DW
2983 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2984 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2985
2986 /* clear the whole page */
d794dc9b
DW
2987 dma_pte_clear_range(domain, start_pfn, last_pfn);
2988
f76aec76 2989 /* free page tables */
d794dc9b 2990 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2991
acea0018
DW
2992 if (intel_iommu_strict) {
2993 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2994 last_pfn - start_pfn + 1, 0);
acea0018
DW
2995 /* free iova */
2996 __free_iova(&domain->iovad, iova);
2997 } else {
2998 add_unmap(domain, iova);
2999 /*
3000 * queue up the release of the unmap to save the 1/6th of the
3001 * cpu used up by the iotlb flush operation...
3002 */
3003 }
ba395927
KA
3004}
3005
ba395927 3006static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3007 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3008{
3009 int i;
c03ab37c 3010 struct scatterlist *sg;
ba395927 3011
c03ab37c 3012 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3013 BUG_ON(!sg_page(sg));
4cf2e75d 3014 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3015 sg->dma_length = sg->length;
ba395927
KA
3016 }
3017 return nelems;
3018}
3019
d7ab5c46
FT
3020static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3021 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3022{
ba395927 3023 int i;
ba395927
KA
3024 struct pci_dev *pdev = to_pci_dev(hwdev);
3025 struct dmar_domain *domain;
f76aec76
KA
3026 size_t size = 0;
3027 int prot = 0;
f76aec76
KA
3028 struct iova *iova = NULL;
3029 int ret;
c03ab37c 3030 struct scatterlist *sg;
b536d24d 3031 unsigned long start_vpfn;
8c11e798 3032 struct intel_iommu *iommu;
ba395927
KA
3033
3034 BUG_ON(dir == DMA_NONE);
73676832 3035 if (iommu_no_mapping(hwdev))
c03ab37c 3036 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 3037
f76aec76
KA
3038 domain = get_valid_domain_for_dev(pdev);
3039 if (!domain)
3040 return 0;
3041
8c11e798
WH
3042 iommu = domain_get_iommu(domain);
3043
b536d24d 3044 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3045 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3046
5a5e02a6
DW
3047 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3048 pdev->dma_mask);
f76aec76 3049 if (!iova) {
c03ab37c 3050 sglist->dma_length = 0;
f76aec76
KA
3051 return 0;
3052 }
3053
3054 /*
3055 * Check if DMAR supports zero-length reads on write only
3056 * mappings..
3057 */
3058 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3059 !cap_zlr(iommu->cap))
f76aec76
KA
3060 prot |= DMA_PTE_READ;
3061 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3062 prot |= DMA_PTE_WRITE;
3063
b536d24d 3064 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3065
f532959b 3066 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3067 if (unlikely(ret)) {
3068 /* clear the page */
3069 dma_pte_clear_range(domain, start_vpfn,
3070 start_vpfn + size - 1);
3071 /* free page tables */
3072 dma_pte_free_pagetable(domain, start_vpfn,
3073 start_vpfn + size - 1);
3074 /* free iova */
3075 __free_iova(&domain->iovad, iova);
3076 return 0;
ba395927
KA
3077 }
3078
1f0ef2aa
DW
3079 /* it's a non-present to present mapping. Only flush if caching mode */
3080 if (cap_caching_mode(iommu->cap))
82653633 3081 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 3082 else
8c11e798 3083 iommu_flush_write_buffer(iommu);
1f0ef2aa 3084
ba395927
KA
3085 return nelems;
3086}
3087
dfb805e8
FT
3088static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3089{
3090 return !dma_addr;
3091}
3092
160c1d8e 3093struct dma_map_ops intel_dma_ops = {
ba395927
KA
3094 .alloc_coherent = intel_alloc_coherent,
3095 .free_coherent = intel_free_coherent,
ba395927
KA
3096 .map_sg = intel_map_sg,
3097 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3098 .map_page = intel_map_page,
3099 .unmap_page = intel_unmap_page,
dfb805e8 3100 .mapping_error = intel_mapping_error,
ba395927
KA
3101};
3102
3103static inline int iommu_domain_cache_init(void)
3104{
3105 int ret = 0;
3106
3107 iommu_domain_cache = kmem_cache_create("iommu_domain",
3108 sizeof(struct dmar_domain),
3109 0,
3110 SLAB_HWCACHE_ALIGN,
3111
3112 NULL);
3113 if (!iommu_domain_cache) {
3114 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3115 ret = -ENOMEM;
3116 }
3117
3118 return ret;
3119}
3120
3121static inline int iommu_devinfo_cache_init(void)
3122{
3123 int ret = 0;
3124
3125 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3126 sizeof(struct device_domain_info),
3127 0,
3128 SLAB_HWCACHE_ALIGN,
ba395927
KA
3129 NULL);
3130 if (!iommu_devinfo_cache) {
3131 printk(KERN_ERR "Couldn't create devinfo cache\n");
3132 ret = -ENOMEM;
3133 }
3134
3135 return ret;
3136}
3137
3138static inline int iommu_iova_cache_init(void)
3139{
3140 int ret = 0;
3141
3142 iommu_iova_cache = kmem_cache_create("iommu_iova",
3143 sizeof(struct iova),
3144 0,
3145 SLAB_HWCACHE_ALIGN,
ba395927
KA
3146 NULL);
3147 if (!iommu_iova_cache) {
3148 printk(KERN_ERR "Couldn't create iova cache\n");
3149 ret = -ENOMEM;
3150 }
3151
3152 return ret;
3153}
3154
3155static int __init iommu_init_mempool(void)
3156{
3157 int ret;
3158 ret = iommu_iova_cache_init();
3159 if (ret)
3160 return ret;
3161
3162 ret = iommu_domain_cache_init();
3163 if (ret)
3164 goto domain_error;
3165
3166 ret = iommu_devinfo_cache_init();
3167 if (!ret)
3168 return ret;
3169
3170 kmem_cache_destroy(iommu_domain_cache);
3171domain_error:
3172 kmem_cache_destroy(iommu_iova_cache);
3173
3174 return -ENOMEM;
3175}
3176
3177static void __init iommu_exit_mempool(void)
3178{
3179 kmem_cache_destroy(iommu_devinfo_cache);
3180 kmem_cache_destroy(iommu_domain_cache);
3181 kmem_cache_destroy(iommu_iova_cache);
3182
3183}
3184
556ab45f
DW
3185static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3186{
3187 struct dmar_drhd_unit *drhd;
3188 u32 vtbar;
3189 int rc;
3190
3191 /* We know that this device on this chipset has its own IOMMU.
3192 * If we find it under a different IOMMU, then the BIOS is lying
3193 * to us. Hope that the IOMMU for this device is actually
3194 * disabled, and it needs no translation...
3195 */
3196 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3197 if (rc) {
3198 /* "can't" happen */
3199 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3200 return;
3201 }
3202 vtbar &= 0xffff0000;
3203
3204 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3205 drhd = dmar_find_matched_drhd_unit(pdev);
3206 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3207 TAINT_FIRMWARE_WORKAROUND,
3208 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3209 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3210}
3211DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3212
ba395927
KA
3213static void __init init_no_remapping_devices(void)
3214{
3215 struct dmar_drhd_unit *drhd;
3216
3217 for_each_drhd_unit(drhd) {
3218 if (!drhd->include_all) {
3219 int i;
3220 for (i = 0; i < drhd->devices_cnt; i++)
3221 if (drhd->devices[i] != NULL)
3222 break;
3223 /* ignore DMAR unit if no pci devices exist */
3224 if (i == drhd->devices_cnt)
3225 drhd->ignored = 1;
3226 }
3227 }
3228
3229 if (dmar_map_gfx)
3230 return;
3231
3232 for_each_drhd_unit(drhd) {
3233 int i;
3234 if (drhd->ignored || drhd->include_all)
3235 continue;
3236
3237 for (i = 0; i < drhd->devices_cnt; i++)
3238 if (drhd->devices[i] &&
3239 !IS_GFX_DEVICE(drhd->devices[i]))
3240 break;
3241
3242 if (i < drhd->devices_cnt)
3243 continue;
3244
3245 /* bypass IOMMU if it is just for gfx devices */
3246 drhd->ignored = 1;
3247 for (i = 0; i < drhd->devices_cnt; i++) {
3248 if (!drhd->devices[i])
3249 continue;
358dd8ac 3250 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3251 }
3252 }
3253}
3254
f59c7b69
FY
3255#ifdef CONFIG_SUSPEND
3256static int init_iommu_hw(void)
3257{
3258 struct dmar_drhd_unit *drhd;
3259 struct intel_iommu *iommu = NULL;
3260
3261 for_each_active_iommu(iommu, drhd)
3262 if (iommu->qi)
3263 dmar_reenable_qi(iommu);
3264
b779260b
JC
3265 for_each_iommu(iommu, drhd) {
3266 if (drhd->ignored) {
3267 /*
3268 * we always have to disable PMRs or DMA may fail on
3269 * this device
3270 */
3271 if (force_on)
3272 iommu_disable_protect_mem_regions(iommu);
3273 continue;
3274 }
3275
f59c7b69
FY
3276 iommu_flush_write_buffer(iommu);
3277
3278 iommu_set_root_entry(iommu);
3279
3280 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3281 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3282 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3283 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3284 if (iommu_enable_translation(iommu))
3285 return 1;
b94996c9 3286 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3287 }
3288
3289 return 0;
3290}
3291
3292static void iommu_flush_all(void)
3293{
3294 struct dmar_drhd_unit *drhd;
3295 struct intel_iommu *iommu;
3296
3297 for_each_active_iommu(iommu, drhd) {
3298 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3299 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3300 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3301 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3302 }
3303}
3304
134fac3f 3305static int iommu_suspend(void)
f59c7b69
FY
3306{
3307 struct dmar_drhd_unit *drhd;
3308 struct intel_iommu *iommu = NULL;
3309 unsigned long flag;
3310
3311 for_each_active_iommu(iommu, drhd) {
3312 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3313 GFP_ATOMIC);
3314 if (!iommu->iommu_state)
3315 goto nomem;
3316 }
3317
3318 iommu_flush_all();
3319
3320 for_each_active_iommu(iommu, drhd) {
3321 iommu_disable_translation(iommu);
3322
3323 spin_lock_irqsave(&iommu->register_lock, flag);
3324
3325 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3326 readl(iommu->reg + DMAR_FECTL_REG);
3327 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3328 readl(iommu->reg + DMAR_FEDATA_REG);
3329 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3330 readl(iommu->reg + DMAR_FEADDR_REG);
3331 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3332 readl(iommu->reg + DMAR_FEUADDR_REG);
3333
3334 spin_unlock_irqrestore(&iommu->register_lock, flag);
3335 }
3336 return 0;
3337
3338nomem:
3339 for_each_active_iommu(iommu, drhd)
3340 kfree(iommu->iommu_state);
3341
3342 return -ENOMEM;
3343}
3344
134fac3f 3345static void iommu_resume(void)
f59c7b69
FY
3346{
3347 struct dmar_drhd_unit *drhd;
3348 struct intel_iommu *iommu = NULL;
3349 unsigned long flag;
3350
3351 if (init_iommu_hw()) {
b779260b
JC
3352 if (force_on)
3353 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3354 else
3355 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3356 return;
f59c7b69
FY
3357 }
3358
3359 for_each_active_iommu(iommu, drhd) {
3360
3361 spin_lock_irqsave(&iommu->register_lock, flag);
3362
3363 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3364 iommu->reg + DMAR_FECTL_REG);
3365 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3366 iommu->reg + DMAR_FEDATA_REG);
3367 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3368 iommu->reg + DMAR_FEADDR_REG);
3369 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3370 iommu->reg + DMAR_FEUADDR_REG);
3371
3372 spin_unlock_irqrestore(&iommu->register_lock, flag);
3373 }
3374
3375 for_each_active_iommu(iommu, drhd)
3376 kfree(iommu->iommu_state);
f59c7b69
FY
3377}
3378
134fac3f 3379static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3380 .resume = iommu_resume,
3381 .suspend = iommu_suspend,
3382};
3383
134fac3f 3384static void __init init_iommu_pm_ops(void)
f59c7b69 3385{
134fac3f 3386 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3387}
3388
3389#else
99592ba4 3390static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3391#endif /* CONFIG_PM */
3392
318fe7df
SS
3393LIST_HEAD(dmar_rmrr_units);
3394
3395static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3396{
3397 list_add(&rmrr->list, &dmar_rmrr_units);
3398}
3399
3400
3401int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3402{
3403 struct acpi_dmar_reserved_memory *rmrr;
3404 struct dmar_rmrr_unit *rmrru;
3405
3406 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3407 if (!rmrru)
3408 return -ENOMEM;
3409
3410 rmrru->hdr = header;
3411 rmrr = (struct acpi_dmar_reserved_memory *)header;
3412 rmrru->base_address = rmrr->base_address;
3413 rmrru->end_address = rmrr->end_address;
3414
3415 dmar_register_rmrr_unit(rmrru);
3416 return 0;
3417}
3418
3419static int __init
3420rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3421{
3422 struct acpi_dmar_reserved_memory *rmrr;
3423 int ret;
3424
3425 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3426 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3427 ((void *)rmrr) + rmrr->header.length,
3428 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3429
3430 if (ret || (rmrru->devices_cnt == 0)) {
3431 list_del(&rmrru->list);
3432 kfree(rmrru);
3433 }
3434 return ret;
3435}
3436
3437static LIST_HEAD(dmar_atsr_units);
3438
3439int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3440{
3441 struct acpi_dmar_atsr *atsr;
3442 struct dmar_atsr_unit *atsru;
3443
3444 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3445 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3446 if (!atsru)
3447 return -ENOMEM;
3448
3449 atsru->hdr = hdr;
3450 atsru->include_all = atsr->flags & 0x1;
3451
3452 list_add(&atsru->list, &dmar_atsr_units);
3453
3454 return 0;
3455}
3456
3457static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3458{
3459 int rc;
3460 struct acpi_dmar_atsr *atsr;
3461
3462 if (atsru->include_all)
3463 return 0;
3464
3465 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3466 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3467 (void *)atsr + atsr->header.length,
3468 &atsru->devices_cnt, &atsru->devices,
3469 atsr->segment);
3470 if (rc || !atsru->devices_cnt) {
3471 list_del(&atsru->list);
3472 kfree(atsru);
3473 }
3474
3475 return rc;
3476}
3477
3478int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3479{
3480 int i;
3481 struct pci_bus *bus;
3482 struct acpi_dmar_atsr *atsr;
3483 struct dmar_atsr_unit *atsru;
3484
3485 dev = pci_physfn(dev);
3486
3487 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3488 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3489 if (atsr->segment == pci_domain_nr(dev->bus))
3490 goto found;
3491 }
3492
3493 return 0;
3494
3495found:
3496 for (bus = dev->bus; bus; bus = bus->parent) {
3497 struct pci_dev *bridge = bus->self;
3498
3499 if (!bridge || !pci_is_pcie(bridge) ||
3500 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
3501 return 0;
3502
3503 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
3504 for (i = 0; i < atsru->devices_cnt; i++)
3505 if (atsru->devices[i] == bridge)
3506 return 1;
3507 break;
3508 }
3509 }
3510
3511 if (atsru->include_all)
3512 return 1;
3513
3514 return 0;
3515}
3516
3517int dmar_parse_rmrr_atsr_dev(void)
3518{
3519 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3520 struct dmar_atsr_unit *atsr, *atsr_n;
3521 int ret = 0;
3522
3523 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3524 ret = rmrr_parse_dev(rmrr);
3525 if (ret)
3526 return ret;
3527 }
3528
3529 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3530 ret = atsr_parse_dev(atsr);
3531 if (ret)
3532 return ret;
3533 }
3534
3535 return ret;
3536}
3537
99dcaded
FY
3538/*
3539 * Here we only respond to action of unbound device from driver.
3540 *
3541 * Added device is not attached to its DMAR domain here yet. That will happen
3542 * when mapping the device to iova.
3543 */
3544static int device_notifier(struct notifier_block *nb,
3545 unsigned long action, void *data)
3546{
3547 struct device *dev = data;
3548 struct pci_dev *pdev = to_pci_dev(dev);
3549 struct dmar_domain *domain;
3550
44cd613c
DW
3551 if (iommu_no_mapping(dev))
3552 return 0;
3553
99dcaded
FY
3554 domain = find_domain(pdev);
3555 if (!domain)
3556 return 0;
3557
a97590e5 3558 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
99dcaded
FY
3559 domain_remove_one_dev_info(domain, pdev);
3560
a97590e5
AW
3561 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3562 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3563 list_empty(&domain->devices))
3564 domain_exit(domain);
3565 }
3566
99dcaded
FY
3567 return 0;
3568}
3569
3570static struct notifier_block device_nb = {
3571 .notifier_call = device_notifier,
3572};
3573
ba395927
KA
3574int __init intel_iommu_init(void)
3575{
3576 int ret = 0;
3577
a59b50e9
JC
3578 /* VT-d is required for a TXT/tboot launch, so enforce that */
3579 force_on = tboot_force_iommu();
3580
3581 if (dmar_table_init()) {
3582 if (force_on)
3583 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3584 return -ENODEV;
a59b50e9 3585 }
ba395927 3586
c2c7286a 3587 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3588 if (force_on)
3589 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3590 return -ENODEV;
a59b50e9 3591 }
1886e8a9 3592
75f1cdf1 3593 if (no_iommu || dmar_disabled)
2ae21010
SS
3594 return -ENODEV;
3595
51a63e67
JC
3596 if (iommu_init_mempool()) {
3597 if (force_on)
3598 panic("tboot: Failed to initialize iommu memory\n");
3599 return -ENODEV;
3600 }
3601
318fe7df
SS
3602 if (list_empty(&dmar_rmrr_units))
3603 printk(KERN_INFO "DMAR: No RMRR found\n");
3604
3605 if (list_empty(&dmar_atsr_units))
3606 printk(KERN_INFO "DMAR: No ATSR found\n");
3607
51a63e67
JC
3608 if (dmar_init_reserved_ranges()) {
3609 if (force_on)
3610 panic("tboot: Failed to reserve iommu ranges\n");
3611 return -ENODEV;
3612 }
ba395927
KA
3613
3614 init_no_remapping_devices();
3615
b779260b 3616 ret = init_dmars();
ba395927 3617 if (ret) {
a59b50e9
JC
3618 if (force_on)
3619 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3620 printk(KERN_ERR "IOMMU: dmar init failed\n");
3621 put_iova_domain(&reserved_iova_list);
3622 iommu_exit_mempool();
3623 return ret;
3624 }
3625 printk(KERN_INFO
3626 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3627
5e0d2a6f 3628 init_timer(&unmap_timer);
75f1cdf1
FT
3629#ifdef CONFIG_SWIOTLB
3630 swiotlb = 0;
3631#endif
19943b0e 3632 dma_ops = &intel_dma_ops;
4ed0d3e6 3633
134fac3f 3634 init_iommu_pm_ops();
a8bcbb0d
JR
3635
3636 register_iommu(&intel_iommu_ops);
3637
99dcaded
FY
3638 bus_register_notifier(&pci_bus_type, &device_nb);
3639
ba395927
KA
3640 return 0;
3641}
e820482c 3642
3199aa6b
HW
3643static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3644 struct pci_dev *pdev)
3645{
3646 struct pci_dev *tmp, *parent;
3647
3648 if (!iommu || !pdev)
3649 return;
3650
3651 /* dependent device detach */
3652 tmp = pci_find_upstream_pcie_bridge(pdev);
3653 /* Secondary interface's bus number and devfn 0 */
3654 if (tmp) {
3655 parent = pdev->bus->self;
3656 while (parent != tmp) {
3657 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3658 parent->devfn);
3199aa6b
HW
3659 parent = parent->bus->self;
3660 }
45e829ea 3661 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3662 iommu_detach_dev(iommu,
3663 tmp->subordinate->number, 0);
3664 else /* this is a legacy PCI bridge */
276dbf99
DW
3665 iommu_detach_dev(iommu, tmp->bus->number,
3666 tmp->devfn);
3199aa6b
HW
3667 }
3668}
3669
2c2e2c38 3670static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3671 struct pci_dev *pdev)
3672{
3673 struct device_domain_info *info;
3674 struct intel_iommu *iommu;
3675 unsigned long flags;
3676 int found = 0;
3677 struct list_head *entry, *tmp;
3678
276dbf99
DW
3679 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3680 pdev->devfn);
c7151a8d
WH
3681 if (!iommu)
3682 return;
3683
3684 spin_lock_irqsave(&device_domain_lock, flags);
3685 list_for_each_safe(entry, tmp, &domain->devices) {
3686 info = list_entry(entry, struct device_domain_info, link);
8519dc44
MH
3687 if (info->segment == pci_domain_nr(pdev->bus) &&
3688 info->bus == pdev->bus->number &&
c7151a8d
WH
3689 info->devfn == pdev->devfn) {
3690 list_del(&info->link);
3691 list_del(&info->global);
3692 if (info->dev)
3693 info->dev->dev.archdata.iommu = NULL;
3694 spin_unlock_irqrestore(&device_domain_lock, flags);
3695
93a23a72 3696 iommu_disable_dev_iotlb(info);
c7151a8d 3697 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3698 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3699 free_devinfo_mem(info);
3700
3701 spin_lock_irqsave(&device_domain_lock, flags);
3702
3703 if (found)
3704 break;
3705 else
3706 continue;
3707 }
3708
3709 /* if there is no other devices under the same iommu
3710 * owned by this domain, clear this iommu in iommu_bmp
3711 * update iommu count and coherency
3712 */
276dbf99
DW
3713 if (iommu == device_to_iommu(info->segment, info->bus,
3714 info->devfn))
c7151a8d
WH
3715 found = 1;
3716 }
3717
3718 if (found == 0) {
3719 unsigned long tmp_flags;
3720 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3721 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3722 domain->iommu_count--;
58c610bd 3723 domain_update_iommu_cap(domain);
c7151a8d 3724 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 3725
9b4554b2
AW
3726 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3727 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3728 spin_lock_irqsave(&iommu->lock, tmp_flags);
3729 clear_bit(domain->id, iommu->domain_ids);
3730 iommu->domains[domain->id] = NULL;
3731 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3732 }
c7151a8d
WH
3733 }
3734
3735 spin_unlock_irqrestore(&device_domain_lock, flags);
3736}
3737
3738static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3739{
3740 struct device_domain_info *info;
3741 struct intel_iommu *iommu;
3742 unsigned long flags1, flags2;
3743
3744 spin_lock_irqsave(&device_domain_lock, flags1);
3745 while (!list_empty(&domain->devices)) {
3746 info = list_entry(domain->devices.next,
3747 struct device_domain_info, link);
3748 list_del(&info->link);
3749 list_del(&info->global);
3750 if (info->dev)
3751 info->dev->dev.archdata.iommu = NULL;
3752
3753 spin_unlock_irqrestore(&device_domain_lock, flags1);
3754
93a23a72 3755 iommu_disable_dev_iotlb(info);
276dbf99 3756 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3757 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3758 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3759
3760 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3761 * and capabilities
c7151a8d
WH
3762 */
3763 spin_lock_irqsave(&domain->iommu_lock, flags2);
3764 if (test_and_clear_bit(iommu->seq_id,
3765 &domain->iommu_bmp)) {
3766 domain->iommu_count--;
58c610bd 3767 domain_update_iommu_cap(domain);
c7151a8d
WH
3768 }
3769 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3770
3771 free_devinfo_mem(info);
3772 spin_lock_irqsave(&device_domain_lock, flags1);
3773 }
3774 spin_unlock_irqrestore(&device_domain_lock, flags1);
3775}
3776
5e98c4b1
WH
3777/* domain id for virtual machine, it won't be set in context */
3778static unsigned long vm_domid;
3779
3780static struct dmar_domain *iommu_alloc_vm_domain(void)
3781{
3782 struct dmar_domain *domain;
3783
3784 domain = alloc_domain_mem();
3785 if (!domain)
3786 return NULL;
3787
3788 domain->id = vm_domid++;
4c923d47 3789 domain->nid = -1;
5e98c4b1
WH
3790 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3791 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3792
3793 return domain;
3794}
3795
2c2e2c38 3796static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3797{
3798 int adjust_width;
3799
3800 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3801 spin_lock_init(&domain->iommu_lock);
3802
3803 domain_reserve_special_ranges(domain);
3804
3805 /* calculate AGAW */
3806 domain->gaw = guest_width;
3807 adjust_width = guestwidth_to_adjustwidth(guest_width);
3808 domain->agaw = width_to_agaw(adjust_width);
3809
3810 INIT_LIST_HEAD(&domain->devices);
3811
3812 domain->iommu_count = 0;
3813 domain->iommu_coherency = 0;
c5b15255 3814 domain->iommu_snooping = 0;
6dd9a7c7 3815 domain->iommu_superpage = 0;
fe40f1e0 3816 domain->max_addr = 0;
4c923d47 3817 domain->nid = -1;
5e98c4b1
WH
3818
3819 /* always allocate the top pgd */
4c923d47 3820 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3821 if (!domain->pgd)
3822 return -ENOMEM;
3823 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3824 return 0;
3825}
3826
3827static void iommu_free_vm_domain(struct dmar_domain *domain)
3828{
3829 unsigned long flags;
3830 struct dmar_drhd_unit *drhd;
3831 struct intel_iommu *iommu;
3832 unsigned long i;
3833 unsigned long ndomains;
3834
3835 for_each_drhd_unit(drhd) {
3836 if (drhd->ignored)
3837 continue;
3838 iommu = drhd->iommu;
3839
3840 ndomains = cap_ndoms(iommu->cap);
a45946ab 3841 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3842 if (iommu->domains[i] == domain) {
3843 spin_lock_irqsave(&iommu->lock, flags);
3844 clear_bit(i, iommu->domain_ids);
3845 iommu->domains[i] = NULL;
3846 spin_unlock_irqrestore(&iommu->lock, flags);
3847 break;
3848 }
5e98c4b1
WH
3849 }
3850 }
3851}
3852
3853static void vm_domain_exit(struct dmar_domain *domain)
3854{
5e98c4b1
WH
3855 /* Domain 0 is reserved, so dont process it */
3856 if (!domain)
3857 return;
3858
3859 vm_domain_remove_all_dev_info(domain);
3860 /* destroy iovas */
3861 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3862
3863 /* clear ptes */
595badf5 3864 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3865
3866 /* free page tables */
d794dc9b 3867 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3868
3869 iommu_free_vm_domain(domain);
3870 free_domain_mem(domain);
3871}
3872
5d450806 3873static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3874{
5d450806 3875 struct dmar_domain *dmar_domain;
38717946 3876
5d450806
JR
3877 dmar_domain = iommu_alloc_vm_domain();
3878 if (!dmar_domain) {
38717946 3879 printk(KERN_ERR
5d450806
JR
3880 "intel_iommu_domain_init: dmar_domain == NULL\n");
3881 return -ENOMEM;
38717946 3882 }
2c2e2c38 3883 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3884 printk(KERN_ERR
5d450806
JR
3885 "intel_iommu_domain_init() failed\n");
3886 vm_domain_exit(dmar_domain);
3887 return -ENOMEM;
38717946 3888 }
5d450806 3889 domain->priv = dmar_domain;
faa3d6f5 3890
5d450806 3891 return 0;
38717946 3892}
38717946 3893
5d450806 3894static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3895{
5d450806
JR
3896 struct dmar_domain *dmar_domain = domain->priv;
3897
3898 domain->priv = NULL;
3899 vm_domain_exit(dmar_domain);
38717946 3900}
38717946 3901
4c5478c9
JR
3902static int intel_iommu_attach_device(struct iommu_domain *domain,
3903 struct device *dev)
38717946 3904{
4c5478c9
JR
3905 struct dmar_domain *dmar_domain = domain->priv;
3906 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3907 struct intel_iommu *iommu;
3908 int addr_width;
faa3d6f5
WH
3909
3910 /* normally pdev is not mapped */
3911 if (unlikely(domain_context_mapped(pdev))) {
3912 struct dmar_domain *old_domain;
3913
3914 old_domain = find_domain(pdev);
3915 if (old_domain) {
2c2e2c38
FY
3916 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3917 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3918 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3919 else
3920 domain_remove_dev_info(old_domain);
3921 }
3922 }
3923
276dbf99
DW
3924 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3925 pdev->devfn);
fe40f1e0
WH
3926 if (!iommu)
3927 return -ENODEV;
3928
3929 /* check if this iommu agaw is sufficient for max mapped address */
3930 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3931 if (addr_width > cap_mgaw(iommu->cap))
3932 addr_width = cap_mgaw(iommu->cap);
3933
3934 if (dmar_domain->max_addr > (1LL << addr_width)) {
3935 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3936 "sufficient for the mapped address (%llx)\n",
a99c47a2 3937 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3938 return -EFAULT;
3939 }
a99c47a2
TL
3940 dmar_domain->gaw = addr_width;
3941
3942 /*
3943 * Knock out extra levels of page tables if necessary
3944 */
3945 while (iommu->agaw < dmar_domain->agaw) {
3946 struct dma_pte *pte;
3947
3948 pte = dmar_domain->pgd;
3949 if (dma_pte_present(pte)) {
25cbff16
SY
3950 dmar_domain->pgd = (struct dma_pte *)
3951 phys_to_virt(dma_pte_addr(pte));
7a661013 3952 free_pgtable_page(pte);
a99c47a2
TL
3953 }
3954 dmar_domain->agaw--;
3955 }
fe40f1e0 3956
5fe60f4e 3957 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3958}
38717946 3959
4c5478c9
JR
3960static void intel_iommu_detach_device(struct iommu_domain *domain,
3961 struct device *dev)
38717946 3962{
4c5478c9
JR
3963 struct dmar_domain *dmar_domain = domain->priv;
3964 struct pci_dev *pdev = to_pci_dev(dev);
3965
2c2e2c38 3966 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3967}
c7151a8d 3968
b146a1c9
JR
3969static int intel_iommu_map(struct iommu_domain *domain,
3970 unsigned long iova, phys_addr_t hpa,
3971 int gfp_order, int iommu_prot)
faa3d6f5 3972{
dde57a21 3973 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 3974 u64 max_addr;
dde57a21 3975 int prot = 0;
b146a1c9 3976 size_t size;
faa3d6f5 3977 int ret;
fe40f1e0 3978
dde57a21
JR
3979 if (iommu_prot & IOMMU_READ)
3980 prot |= DMA_PTE_READ;
3981 if (iommu_prot & IOMMU_WRITE)
3982 prot |= DMA_PTE_WRITE;
9cf06697
SY
3983 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3984 prot |= DMA_PTE_SNP;
dde57a21 3985
b146a1c9 3986 size = PAGE_SIZE << gfp_order;
163cc52c 3987 max_addr = iova + size;
dde57a21 3988 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3989 u64 end;
3990
3991 /* check if minimum agaw is sufficient for mapped address */
8954da1f 3992 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 3993 if (end < max_addr) {
8954da1f 3994 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3995 "sufficient for the mapped address (%llx)\n",
8954da1f 3996 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
3997 return -EFAULT;
3998 }
dde57a21 3999 dmar_domain->max_addr = max_addr;
fe40f1e0 4000 }
ad051221
DW
4001 /* Round up size to next multiple of PAGE_SIZE, if it and
4002 the low bits of hpa would take us onto the next page */
88cb6a74 4003 size = aligned_nrpages(hpa, size);
ad051221
DW
4004 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4005 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4006 return ret;
38717946 4007}
38717946 4008
b146a1c9
JR
4009static int intel_iommu_unmap(struct iommu_domain *domain,
4010 unsigned long iova, int gfp_order)
38717946 4011{
dde57a21 4012 struct dmar_domain *dmar_domain = domain->priv;
b146a1c9 4013 size_t size = PAGE_SIZE << gfp_order;
4b99d352 4014
163cc52c
DW
4015 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4016 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 4017
163cc52c
DW
4018 if (dmar_domain->max_addr == iova + size)
4019 dmar_domain->max_addr = iova;
b146a1c9
JR
4020
4021 return gfp_order;
38717946 4022}
38717946 4023
d14d6577
JR
4024static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4025 unsigned long iova)
38717946 4026{
d14d6577 4027 struct dmar_domain *dmar_domain = domain->priv;
38717946 4028 struct dma_pte *pte;
faa3d6f5 4029 u64 phys = 0;
38717946 4030
6dd9a7c7 4031 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
38717946 4032 if (pte)
faa3d6f5 4033 phys = dma_pte_addr(pte);
38717946 4034
faa3d6f5 4035 return phys;
38717946 4036}
a8bcbb0d 4037
dbb9fd86
SY
4038static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4039 unsigned long cap)
4040{
4041 struct dmar_domain *dmar_domain = domain->priv;
4042
4043 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4044 return dmar_domain->iommu_snooping;
323f99cb
TL
4045 if (cap == IOMMU_CAP_INTR_REMAP)
4046 return intr_remapping_enabled;
dbb9fd86
SY
4047
4048 return 0;
4049}
4050
a8bcbb0d
JR
4051static struct iommu_ops intel_iommu_ops = {
4052 .domain_init = intel_iommu_domain_init,
4053 .domain_destroy = intel_iommu_domain_destroy,
4054 .attach_dev = intel_iommu_attach_device,
4055 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4056 .map = intel_iommu_map,
4057 .unmap = intel_iommu_unmap,
a8bcbb0d 4058 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4059 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 4060};
9af88143
DW
4061
4062static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4063{
4064 /*
4065 * Mobile 4 Series Chipset neglects to set RWBF capability,
4066 * but needs it:
4067 */
4068 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4069 rwbf_quirk = 1;
2d9e667e
DW
4070
4071 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
4072 if (dev->revision == 0x07) {
4073 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4074 dmar_map_gfx = 0;
4075 }
9af88143
DW
4076}
4077
4078DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b 4079
eecfd57f
AJ
4080#define GGC 0x52
4081#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4082#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4083#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4084#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4085#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4086#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4087#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4088#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4089
9eecabcb
DW
4090static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4091{
4092 unsigned short ggc;
4093
eecfd57f 4094 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4095 return;
4096
eecfd57f 4097 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4098 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4099 dmar_map_gfx = 0;
4100 }
4101}
4102DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4103DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4104DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4105DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4106
e0fc7e0b
DW
4107/* On Tylersburg chipsets, some BIOSes have been known to enable the
4108 ISOCH DMAR unit for the Azalia sound device, but not give it any
4109 TLB entries, which causes it to deadlock. Check for that. We do
4110 this in a function called from init_dmars(), instead of in a PCI
4111 quirk, because we don't want to print the obnoxious "BIOS broken"
4112 message if VT-d is actually disabled.
4113*/
4114static void __init check_tylersburg_isoch(void)
4115{
4116 struct pci_dev *pdev;
4117 uint32_t vtisochctrl;
4118
4119 /* If there's no Azalia in the system anyway, forget it. */
4120 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4121 if (!pdev)
4122 return;
4123 pci_dev_put(pdev);
4124
4125 /* System Management Registers. Might be hidden, in which case
4126 we can't do the sanity check. But that's OK, because the
4127 known-broken BIOSes _don't_ actually hide it, so far. */
4128 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4129 if (!pdev)
4130 return;
4131
4132 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4133 pci_dev_put(pdev);
4134 return;
4135 }
4136
4137 pci_dev_put(pdev);
4138
4139 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4140 if (vtisochctrl & 1)
4141 return;
4142
4143 /* Drop all bits other than the number of TLB entries */
4144 vtisochctrl &= 0x1c;
4145
4146 /* If we have the recommended number of TLB entries (16), fine. */
4147 if (vtisochctrl == 0x10)
4148 return;
4149
4150 /* Zero TLB entries? You get to ride the short bus to school. */
4151 if (!vtisochctrl) {
4152 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4153 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4154 dmi_get_system_info(DMI_BIOS_VENDOR),
4155 dmi_get_system_info(DMI_BIOS_VERSION),
4156 dmi_get_system_info(DMI_PRODUCT_VERSION));
4157 iommu_identity_mapping |= IDENTMAP_AZALIA;
4158 return;
4159 }
4160
4161 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4162 vtisochctrl);
4163}