]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/iommu/intel-iommu.c
Linux 3.16-rc3
[mirror_ubuntu-zesty-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2 47#include "irq_remapping.h"
61e015ac 48#include "pci.h"
078e1ee2 49
5b6985ce
FY
50#define ROOT_SIZE VTD_PAGE_SIZE
51#define CONTEXT_SIZE VTD_PAGE_SIZE
52
ba395927
KA
53#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
54#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 55#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
56
57#define IOAPIC_RANGE_START (0xfee00000)
58#define IOAPIC_RANGE_END (0xfeefffff)
59#define IOVA_START_ADDR (0x1000)
60
61#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
62
4ed0d3e6 63#define MAX_AGAW_WIDTH 64
5c645b35 64#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 65
2ebe3151
DW
66#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
67#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
68
69/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
70 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
71#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
72 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
73#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 74
f27be03b 75#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 76#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 77#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 78
df08cdc7
AM
79/* page table handling */
80#define LEVEL_STRIDE (9)
81#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
82
6d1c56a9
OBC
83/*
84 * This bitmap is used to advertise the page sizes our hardware support
85 * to the IOMMU core, which will then use this information to split
86 * physically contiguous memory regions it is mapping into page sizes
87 * that we support.
88 *
89 * Traditionally the IOMMU core just handed us the mappings directly,
90 * after making sure the size is an order of a 4KiB page and that the
91 * mapping has natural alignment.
92 *
93 * To retain this behavior, we currently advertise that we support
94 * all page sizes that are an order of 4KiB.
95 *
96 * If at some point we'd like to utilize the IOMMU core's new behavior,
97 * we could change this to advertise the real page sizes we support.
98 */
99#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
100
df08cdc7
AM
101static inline int agaw_to_level(int agaw)
102{
103 return agaw + 2;
104}
105
106static inline int agaw_to_width(int agaw)
107{
5c645b35 108 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
109}
110
111static inline int width_to_agaw(int width)
112{
5c645b35 113 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
114}
115
116static inline unsigned int level_to_offset_bits(int level)
117{
118 return (level - 1) * LEVEL_STRIDE;
119}
120
121static inline int pfn_level_offset(unsigned long pfn, int level)
122{
123 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
124}
125
126static inline unsigned long level_mask(int level)
127{
128 return -1UL << level_to_offset_bits(level);
129}
130
131static inline unsigned long level_size(int level)
132{
133 return 1UL << level_to_offset_bits(level);
134}
135
136static inline unsigned long align_to_level(unsigned long pfn, int level)
137{
138 return (pfn + level_size(level) - 1) & level_mask(level);
139}
fd18de50 140
6dd9a7c7
YS
141static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
142{
5c645b35 143 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
144}
145
dd4e8319
DW
146/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
147 are never going to work. */
148static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
149{
150 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
151}
152
153static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
154{
155 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
156}
157static inline unsigned long page_to_dma_pfn(struct page *pg)
158{
159 return mm_to_dma_pfn(page_to_pfn(pg));
160}
161static inline unsigned long virt_to_dma_pfn(void *p)
162{
163 return page_to_dma_pfn(virt_to_page(p));
164}
165
d9630fe9
WH
166/* global iommu list, set NULL for ignored DMAR units */
167static struct intel_iommu **g_iommus;
168
e0fc7e0b 169static void __init check_tylersburg_isoch(void);
9af88143
DW
170static int rwbf_quirk;
171
b779260b
JC
172/*
173 * set to 1 to panic kernel if can't successfully enable VT-d
174 * (used when kernel is launched w/ TXT)
175 */
176static int force_on = 0;
177
46b08e1a
MM
178/*
179 * 0: Present
180 * 1-11: Reserved
181 * 12-63: Context Ptr (12 - (haw-1))
182 * 64-127: Reserved
183 */
184struct root_entry {
185 u64 val;
186 u64 rsvd1;
187};
188#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
189static inline bool root_present(struct root_entry *root)
190{
191 return (root->val & 1);
192}
193static inline void set_root_present(struct root_entry *root)
194{
195 root->val |= 1;
196}
197static inline void set_root_value(struct root_entry *root, unsigned long value)
198{
199 root->val |= value & VTD_PAGE_MASK;
200}
201
202static inline struct context_entry *
203get_context_addr_from_root(struct root_entry *root)
204{
205 return (struct context_entry *)
206 (root_present(root)?phys_to_virt(
207 root->val & VTD_PAGE_MASK) :
208 NULL);
209}
210
7a8fc25e
MM
211/*
212 * low 64 bits:
213 * 0: present
214 * 1: fault processing disable
215 * 2-3: translation type
216 * 12-63: address space root
217 * high 64 bits:
218 * 0-2: address width
219 * 3-6: aval
220 * 8-23: domain id
221 */
222struct context_entry {
223 u64 lo;
224 u64 hi;
225};
c07e7d21
MM
226
227static inline bool context_present(struct context_entry *context)
228{
229 return (context->lo & 1);
230}
231static inline void context_set_present(struct context_entry *context)
232{
233 context->lo |= 1;
234}
235
236static inline void context_set_fault_enable(struct context_entry *context)
237{
238 context->lo &= (((u64)-1) << 2) | 1;
239}
240
c07e7d21
MM
241static inline void context_set_translation_type(struct context_entry *context,
242 unsigned long value)
243{
244 context->lo &= (((u64)-1) << 4) | 3;
245 context->lo |= (value & 3) << 2;
246}
247
248static inline void context_set_address_root(struct context_entry *context,
249 unsigned long value)
250{
251 context->lo |= value & VTD_PAGE_MASK;
252}
253
254static inline void context_set_address_width(struct context_entry *context,
255 unsigned long value)
256{
257 context->hi |= value & 7;
258}
259
260static inline void context_set_domain_id(struct context_entry *context,
261 unsigned long value)
262{
263 context->hi |= (value & ((1 << 16) - 1)) << 8;
264}
265
266static inline void context_clear_entry(struct context_entry *context)
267{
268 context->lo = 0;
269 context->hi = 0;
270}
7a8fc25e 271
622ba12a
MM
272/*
273 * 0: readable
274 * 1: writable
275 * 2-6: reserved
276 * 7: super page
9cf06697
SY
277 * 8-10: available
278 * 11: snoop behavior
622ba12a
MM
279 * 12-63: Host physcial address
280 */
281struct dma_pte {
282 u64 val;
283};
622ba12a 284
19c239ce
MM
285static inline void dma_clear_pte(struct dma_pte *pte)
286{
287 pte->val = 0;
288}
289
19c239ce
MM
290static inline u64 dma_pte_addr(struct dma_pte *pte)
291{
c85994e4
DW
292#ifdef CONFIG_64BIT
293 return pte->val & VTD_PAGE_MASK;
294#else
295 /* Must have a full atomic 64-bit read */
1a8bd481 296 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 297#endif
19c239ce
MM
298}
299
19c239ce
MM
300static inline bool dma_pte_present(struct dma_pte *pte)
301{
302 return (pte->val & 3) != 0;
303}
622ba12a 304
4399c8bf
AK
305static inline bool dma_pte_superpage(struct dma_pte *pte)
306{
307 return (pte->val & (1 << 7));
308}
309
75e6bf96
DW
310static inline int first_pte_in_page(struct dma_pte *pte)
311{
312 return !((unsigned long)pte & ~VTD_PAGE_MASK);
313}
314
2c2e2c38
FY
315/*
316 * This domain is a statically identity mapping domain.
317 * 1. This domain creats a static 1:1 mapping to all usable memory.
318 * 2. It maps to each iommu if successful.
319 * 3. Each iommu mapps to this domain if successful.
320 */
19943b0e
DW
321static struct dmar_domain *si_domain;
322static int hw_pass_through = 1;
2c2e2c38 323
3b5410e7 324/* devices under the same p2p bridge are owned in one domain */
cdc7b837 325#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 326
1ce28feb
WH
327/* domain represents a virtual machine, more than one devices
328 * across iommus may be owned in one domain, e.g. kvm guest.
329 */
330#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
331
2c2e2c38
FY
332/* si_domain contains mulitple devices */
333#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
334
1b198bb0
MT
335/* define the limit of IOMMUs supported in each domain */
336#ifdef CONFIG_X86
337# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
338#else
339# define IOMMU_UNITS_SUPPORTED 64
340#endif
341
99126f7c
MM
342struct dmar_domain {
343 int id; /* domain id */
4c923d47 344 int nid; /* node id */
1b198bb0
MT
345 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
346 /* bitmap of iommus this domain uses*/
99126f7c
MM
347
348 struct list_head devices; /* all devices' list */
349 struct iova_domain iovad; /* iova's that belong to this domain */
350
351 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
352 int gaw; /* max guest address width */
353
354 /* adjusted guest address width, 0 is level 2 30-bit */
355 int agaw;
356
3b5410e7 357 int flags; /* flags to find out type of domain */
8e604097
WH
358
359 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 360 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 361 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
362 int iommu_superpage;/* Level of superpages supported:
363 0 == 4KiB (no superpages), 1 == 2MiB,
364 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 365 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 366 u64 max_addr; /* maximum mapped address */
99126f7c
MM
367};
368
a647dacb
MM
369/* PCI domain-device relationship */
370struct device_domain_info {
371 struct list_head link; /* link to domain siblings */
372 struct list_head global; /* link to global list */
276dbf99 373 u8 bus; /* PCI bus number */
a647dacb 374 u8 devfn; /* PCI devfn number */
0bcb3e28 375 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 376 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
377 struct dmar_domain *domain; /* pointer to domain */
378};
379
b94e4117
JL
380struct dmar_rmrr_unit {
381 struct list_head list; /* list of rmrr units */
382 struct acpi_dmar_header *hdr; /* ACPI header */
383 u64 base_address; /* reserved base address*/
384 u64 end_address; /* reserved end address */
832bd858 385 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
386 int devices_cnt; /* target device count */
387};
388
389struct dmar_atsr_unit {
390 struct list_head list; /* list of ATSR units */
391 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 392 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
393 int devices_cnt; /* target device count */
394 u8 include_all:1; /* include all ports */
395};
396
397static LIST_HEAD(dmar_atsr_units);
398static LIST_HEAD(dmar_rmrr_units);
399
400#define for_each_rmrr_units(rmrr) \
401 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
402
5e0d2a6f 403static void flush_unmaps_timeout(unsigned long data);
404
b707cb02 405static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 406
80b20dd8 407#define HIGH_WATER_MARK 250
408struct deferred_flush_tables {
409 int next;
410 struct iova *iova[HIGH_WATER_MARK];
411 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 412 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 413};
414
415static struct deferred_flush_tables *deferred_flush;
416
5e0d2a6f 417/* bitmap for indexing intel_iommus */
5e0d2a6f 418static int g_num_of_iommus;
419
420static DEFINE_SPINLOCK(async_umap_flush_lock);
421static LIST_HEAD(unmaps_to_do);
422
423static int timer_on;
424static long list_size;
5e0d2a6f 425
92d03cc8 426static void domain_exit(struct dmar_domain *domain);
ba395927 427static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 428static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 429 struct device *dev);
92d03cc8 430static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 431 struct device *dev);
ba395927 432
d3f13810 433#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
434int dmar_disabled = 0;
435#else
436int dmar_disabled = 1;
d3f13810 437#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 438
8bc1f85c
ED
439int intel_iommu_enabled = 0;
440EXPORT_SYMBOL_GPL(intel_iommu_enabled);
441
2d9e667e 442static int dmar_map_gfx = 1;
7d3b03ce 443static int dmar_forcedac;
5e0d2a6f 444static int intel_iommu_strict;
6dd9a7c7 445static int intel_iommu_superpage = 1;
ba395927 446
c0771df8
DW
447int intel_iommu_gfx_mapped;
448EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
449
ba395927
KA
450#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
451static DEFINE_SPINLOCK(device_domain_lock);
452static LIST_HEAD(device_domain_list);
453
a8bcbb0d
JR
454static struct iommu_ops intel_iommu_ops;
455
ba395927
KA
456static int __init intel_iommu_setup(char *str)
457{
458 if (!str)
459 return -EINVAL;
460 while (*str) {
0cd5c3c8
KM
461 if (!strncmp(str, "on", 2)) {
462 dmar_disabled = 0;
463 printk(KERN_INFO "Intel-IOMMU: enabled\n");
464 } else if (!strncmp(str, "off", 3)) {
ba395927 465 dmar_disabled = 1;
0cd5c3c8 466 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
467 } else if (!strncmp(str, "igfx_off", 8)) {
468 dmar_map_gfx = 0;
469 printk(KERN_INFO
470 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 471 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 472 printk(KERN_INFO
7d3b03ce
KA
473 "Intel-IOMMU: Forcing DAC for PCI devices\n");
474 dmar_forcedac = 1;
5e0d2a6f 475 } else if (!strncmp(str, "strict", 6)) {
476 printk(KERN_INFO
477 "Intel-IOMMU: disable batched IOTLB flush\n");
478 intel_iommu_strict = 1;
6dd9a7c7
YS
479 } else if (!strncmp(str, "sp_off", 6)) {
480 printk(KERN_INFO
481 "Intel-IOMMU: disable supported super page\n");
482 intel_iommu_superpage = 0;
ba395927
KA
483 }
484
485 str += strcspn(str, ",");
486 while (*str == ',')
487 str++;
488 }
489 return 0;
490}
491__setup("intel_iommu=", intel_iommu_setup);
492
493static struct kmem_cache *iommu_domain_cache;
494static struct kmem_cache *iommu_devinfo_cache;
495static struct kmem_cache *iommu_iova_cache;
496
4c923d47 497static inline void *alloc_pgtable_page(int node)
eb3fa7cb 498{
4c923d47
SS
499 struct page *page;
500 void *vaddr = NULL;
eb3fa7cb 501
4c923d47
SS
502 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
503 if (page)
504 vaddr = page_address(page);
eb3fa7cb 505 return vaddr;
ba395927
KA
506}
507
508static inline void free_pgtable_page(void *vaddr)
509{
510 free_page((unsigned long)vaddr);
511}
512
513static inline void *alloc_domain_mem(void)
514{
354bb65e 515 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
516}
517
38717946 518static void free_domain_mem(void *vaddr)
ba395927
KA
519{
520 kmem_cache_free(iommu_domain_cache, vaddr);
521}
522
523static inline void * alloc_devinfo_mem(void)
524{
354bb65e 525 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
526}
527
528static inline void free_devinfo_mem(void *vaddr)
529{
530 kmem_cache_free(iommu_devinfo_cache, vaddr);
531}
532
533struct iova *alloc_iova_mem(void)
534{
354bb65e 535 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
536}
537
538void free_iova_mem(struct iova *iova)
539{
540 kmem_cache_free(iommu_iova_cache, iova);
541}
542
1b573683 543
4ed0d3e6 544static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
545{
546 unsigned long sagaw;
547 int agaw = -1;
548
549 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 550 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
551 agaw >= 0; agaw--) {
552 if (test_bit(agaw, &sagaw))
553 break;
554 }
555
556 return agaw;
557}
558
4ed0d3e6
FY
559/*
560 * Calculate max SAGAW for each iommu.
561 */
562int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
563{
564 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
565}
566
567/*
568 * calculate agaw for each iommu.
569 * "SAGAW" may be different across iommus, use a default agaw, and
570 * get a supported less agaw for iommus that don't support the default agaw.
571 */
572int iommu_calculate_agaw(struct intel_iommu *iommu)
573{
574 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
575}
576
2c2e2c38 577/* This functionin only returns single iommu in a domain */
8c11e798
WH
578static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
579{
580 int iommu_id;
581
2c2e2c38 582 /* si_domain and vm domain should not get here. */
1ce28feb 583 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 584 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 585
1b198bb0 586 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
587 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
588 return NULL;
589
590 return g_iommus[iommu_id];
591}
592
8e604097
WH
593static void domain_update_iommu_coherency(struct dmar_domain *domain)
594{
d0501960
DW
595 struct dmar_drhd_unit *drhd;
596 struct intel_iommu *iommu;
597 int i, found = 0;
2e12bc29 598
d0501960 599 domain->iommu_coherency = 1;
8e604097 600
1b198bb0 601 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 602 found = 1;
8e604097
WH
603 if (!ecap_coherent(g_iommus[i]->ecap)) {
604 domain->iommu_coherency = 0;
605 break;
606 }
8e604097 607 }
d0501960
DW
608 if (found)
609 return;
610
611 /* No hardware attached; use lowest common denominator */
612 rcu_read_lock();
613 for_each_active_iommu(iommu, drhd) {
614 if (!ecap_coherent(iommu->ecap)) {
615 domain->iommu_coherency = 0;
616 break;
617 }
618 }
619 rcu_read_unlock();
8e604097
WH
620}
621
58c610bd
SY
622static void domain_update_iommu_snooping(struct dmar_domain *domain)
623{
624 int i;
625
626 domain->iommu_snooping = 1;
627
1b198bb0 628 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
629 if (!ecap_sc_support(g_iommus[i]->ecap)) {
630 domain->iommu_snooping = 0;
631 break;
632 }
58c610bd
SY
633 }
634}
635
6dd9a7c7
YS
636static void domain_update_iommu_superpage(struct dmar_domain *domain)
637{
8140a95d
AK
638 struct dmar_drhd_unit *drhd;
639 struct intel_iommu *iommu = NULL;
640 int mask = 0xf;
6dd9a7c7
YS
641
642 if (!intel_iommu_superpage) {
643 domain->iommu_superpage = 0;
644 return;
645 }
646
8140a95d 647 /* set iommu_superpage to the smallest common denominator */
0e242612 648 rcu_read_lock();
8140a95d
AK
649 for_each_active_iommu(iommu, drhd) {
650 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
651 if (!mask) {
652 break;
653 }
654 }
0e242612
JL
655 rcu_read_unlock();
656
6dd9a7c7
YS
657 domain->iommu_superpage = fls(mask);
658}
659
58c610bd
SY
660/* Some capabilities may be different across iommus */
661static void domain_update_iommu_cap(struct dmar_domain *domain)
662{
663 domain_update_iommu_coherency(domain);
664 domain_update_iommu_snooping(domain);
6dd9a7c7 665 domain_update_iommu_superpage(domain);
58c610bd
SY
666}
667
156baca8 668static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
669{
670 struct dmar_drhd_unit *drhd = NULL;
b683b230 671 struct intel_iommu *iommu;
156baca8
DW
672 struct device *tmp;
673 struct pci_dev *ptmp, *pdev = NULL;
674 u16 segment;
c7151a8d
WH
675 int i;
676
156baca8
DW
677 if (dev_is_pci(dev)) {
678 pdev = to_pci_dev(dev);
679 segment = pci_domain_nr(pdev->bus);
680 } else if (ACPI_COMPANION(dev))
681 dev = &ACPI_COMPANION(dev)->dev;
682
0e242612 683 rcu_read_lock();
b683b230 684 for_each_active_iommu(iommu, drhd) {
156baca8 685 if (pdev && segment != drhd->segment)
276dbf99 686 continue;
c7151a8d 687
b683b230 688 for_each_active_dev_scope(drhd->devices,
156baca8
DW
689 drhd->devices_cnt, i, tmp) {
690 if (tmp == dev) {
691 *bus = drhd->devices[i].bus;
692 *devfn = drhd->devices[i].devfn;
b683b230 693 goto out;
156baca8
DW
694 }
695
696 if (!pdev || !dev_is_pci(tmp))
697 continue;
698
699 ptmp = to_pci_dev(tmp);
700 if (ptmp->subordinate &&
701 ptmp->subordinate->number <= pdev->bus->number &&
702 ptmp->subordinate->busn_res.end >= pdev->bus->number)
703 goto got_pdev;
924b6231 704 }
c7151a8d 705
156baca8
DW
706 if (pdev && drhd->include_all) {
707 got_pdev:
708 *bus = pdev->bus->number;
709 *devfn = pdev->devfn;
b683b230 710 goto out;
156baca8 711 }
c7151a8d 712 }
b683b230 713 iommu = NULL;
156baca8 714 out:
0e242612 715 rcu_read_unlock();
c7151a8d 716
b683b230 717 return iommu;
c7151a8d
WH
718}
719
5331fe6f
WH
720static void domain_flush_cache(struct dmar_domain *domain,
721 void *addr, int size)
722{
723 if (!domain->iommu_coherency)
724 clflush_cache_range(addr, size);
725}
726
ba395927
KA
727/* Gets context entry for a given bus and devfn */
728static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
729 u8 bus, u8 devfn)
730{
731 struct root_entry *root;
732 struct context_entry *context;
733 unsigned long phy_addr;
734 unsigned long flags;
735
736 spin_lock_irqsave(&iommu->lock, flags);
737 root = &iommu->root_entry[bus];
738 context = get_context_addr_from_root(root);
739 if (!context) {
4c923d47
SS
740 context = (struct context_entry *)
741 alloc_pgtable_page(iommu->node);
ba395927
KA
742 if (!context) {
743 spin_unlock_irqrestore(&iommu->lock, flags);
744 return NULL;
745 }
5b6985ce 746 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
747 phy_addr = virt_to_phys((void *)context);
748 set_root_value(root, phy_addr);
749 set_root_present(root);
750 __iommu_flush_cache(iommu, root, sizeof(*root));
751 }
752 spin_unlock_irqrestore(&iommu->lock, flags);
753 return &context[devfn];
754}
755
756static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
757{
758 struct root_entry *root;
759 struct context_entry *context;
760 int ret;
761 unsigned long flags;
762
763 spin_lock_irqsave(&iommu->lock, flags);
764 root = &iommu->root_entry[bus];
765 context = get_context_addr_from_root(root);
766 if (!context) {
767 ret = 0;
768 goto out;
769 }
c07e7d21 770 ret = context_present(&context[devfn]);
ba395927
KA
771out:
772 spin_unlock_irqrestore(&iommu->lock, flags);
773 return ret;
774}
775
776static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
777{
778 struct root_entry *root;
779 struct context_entry *context;
780 unsigned long flags;
781
782 spin_lock_irqsave(&iommu->lock, flags);
783 root = &iommu->root_entry[bus];
784 context = get_context_addr_from_root(root);
785 if (context) {
c07e7d21 786 context_clear_entry(&context[devfn]);
ba395927
KA
787 __iommu_flush_cache(iommu, &context[devfn], \
788 sizeof(*context));
789 }
790 spin_unlock_irqrestore(&iommu->lock, flags);
791}
792
793static void free_context_table(struct intel_iommu *iommu)
794{
795 struct root_entry *root;
796 int i;
797 unsigned long flags;
798 struct context_entry *context;
799
800 spin_lock_irqsave(&iommu->lock, flags);
801 if (!iommu->root_entry) {
802 goto out;
803 }
804 for (i = 0; i < ROOT_ENTRY_NR; i++) {
805 root = &iommu->root_entry[i];
806 context = get_context_addr_from_root(root);
807 if (context)
808 free_pgtable_page(context);
809 }
810 free_pgtable_page(iommu->root_entry);
811 iommu->root_entry = NULL;
812out:
813 spin_unlock_irqrestore(&iommu->lock, flags);
814}
815
b026fd28 816static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 817 unsigned long pfn, int *target_level)
ba395927 818{
b026fd28 819 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
820 struct dma_pte *parent, *pte = NULL;
821 int level = agaw_to_level(domain->agaw);
4399c8bf 822 int offset;
ba395927
KA
823
824 BUG_ON(!domain->pgd);
f9423606
JS
825
826 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
827 /* Address beyond IOMMU's addressing capabilities. */
828 return NULL;
829
ba395927
KA
830 parent = domain->pgd;
831
5cf0a76f 832 while (1) {
ba395927
KA
833 void *tmp_page;
834
b026fd28 835 offset = pfn_level_offset(pfn, level);
ba395927 836 pte = &parent[offset];
5cf0a76f 837 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 838 break;
5cf0a76f 839 if (level == *target_level)
ba395927
KA
840 break;
841
19c239ce 842 if (!dma_pte_present(pte)) {
c85994e4
DW
843 uint64_t pteval;
844
4c923d47 845 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 846
206a73c1 847 if (!tmp_page)
ba395927 848 return NULL;
206a73c1 849
c85994e4 850 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 851 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
852 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
853 /* Someone else set it while we were thinking; use theirs. */
854 free_pgtable_page(tmp_page);
855 } else {
856 dma_pte_addr(pte);
857 domain_flush_cache(domain, pte, sizeof(*pte));
858 }
ba395927 859 }
5cf0a76f
DW
860 if (level == 1)
861 break;
862
19c239ce 863 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
864 level--;
865 }
866
5cf0a76f
DW
867 if (!*target_level)
868 *target_level = level;
869
ba395927
KA
870 return pte;
871}
872
6dd9a7c7 873
ba395927 874/* return address's pte at specific level */
90dcfb5e
DW
875static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
876 unsigned long pfn,
6dd9a7c7 877 int level, int *large_page)
ba395927
KA
878{
879 struct dma_pte *parent, *pte = NULL;
880 int total = agaw_to_level(domain->agaw);
881 int offset;
882
883 parent = domain->pgd;
884 while (level <= total) {
90dcfb5e 885 offset = pfn_level_offset(pfn, total);
ba395927
KA
886 pte = &parent[offset];
887 if (level == total)
888 return pte;
889
6dd9a7c7
YS
890 if (!dma_pte_present(pte)) {
891 *large_page = total;
ba395927 892 break;
6dd9a7c7
YS
893 }
894
895 if (pte->val & DMA_PTE_LARGE_PAGE) {
896 *large_page = total;
897 return pte;
898 }
899
19c239ce 900 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
901 total--;
902 }
903 return NULL;
904}
905
ba395927 906/* clear last level pte, a tlb flush should be followed */
5cf0a76f 907static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
908 unsigned long start_pfn,
909 unsigned long last_pfn)
ba395927 910{
04b18e65 911 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 912 unsigned int large_page = 1;
310a5ab9 913 struct dma_pte *first_pte, *pte;
66eae846 914
04b18e65 915 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 916 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 917 BUG_ON(start_pfn > last_pfn);
ba395927 918
04b18e65 919 /* we don't need lock here; nobody else touches the iova range */
59c36286 920 do {
6dd9a7c7
YS
921 large_page = 1;
922 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 923 if (!pte) {
6dd9a7c7 924 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
925 continue;
926 }
6dd9a7c7 927 do {
310a5ab9 928 dma_clear_pte(pte);
6dd9a7c7 929 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 930 pte++;
75e6bf96
DW
931 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
932
310a5ab9
DW
933 domain_flush_cache(domain, first_pte,
934 (void *)pte - (void *)first_pte);
59c36286
DW
935
936 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
937}
938
3269ee0b
AW
939static void dma_pte_free_level(struct dmar_domain *domain, int level,
940 struct dma_pte *pte, unsigned long pfn,
941 unsigned long start_pfn, unsigned long last_pfn)
942{
943 pfn = max(start_pfn, pfn);
944 pte = &pte[pfn_level_offset(pfn, level)];
945
946 do {
947 unsigned long level_pfn;
948 struct dma_pte *level_pte;
949
950 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
951 goto next;
952
953 level_pfn = pfn & level_mask(level - 1);
954 level_pte = phys_to_virt(dma_pte_addr(pte));
955
956 if (level > 2)
957 dma_pte_free_level(domain, level - 1, level_pte,
958 level_pfn, start_pfn, last_pfn);
959
960 /* If range covers entire pagetable, free it */
961 if (!(start_pfn > level_pfn ||
08336fd2 962 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
963 dma_clear_pte(pte);
964 domain_flush_cache(domain, pte, sizeof(*pte));
965 free_pgtable_page(level_pte);
966 }
967next:
968 pfn += level_size(level);
969 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
970}
971
ba395927
KA
972/* free page table pages. last level pte should already be cleared */
973static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
974 unsigned long start_pfn,
975 unsigned long last_pfn)
ba395927 976{
6660c63a 977 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 978
6660c63a
DW
979 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
980 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 981 BUG_ON(start_pfn > last_pfn);
ba395927 982
f3a0a52f 983 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
984 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
985 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 986
ba395927 987 /* free pgd */
d794dc9b 988 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
989 free_pgtable_page(domain->pgd);
990 domain->pgd = NULL;
991 }
992}
993
ea8ea460
DW
994/* When a page at a given level is being unlinked from its parent, we don't
995 need to *modify* it at all. All we need to do is make a list of all the
996 pages which can be freed just as soon as we've flushed the IOTLB and we
997 know the hardware page-walk will no longer touch them.
998 The 'pte' argument is the *parent* PTE, pointing to the page that is to
999 be freed. */
1000static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1001 int level, struct dma_pte *pte,
1002 struct page *freelist)
1003{
1004 struct page *pg;
1005
1006 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1007 pg->freelist = freelist;
1008 freelist = pg;
1009
1010 if (level == 1)
1011 return freelist;
1012
adeb2590
JL
1013 pte = page_address(pg);
1014 do {
ea8ea460
DW
1015 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1016 freelist = dma_pte_list_pagetables(domain, level - 1,
1017 pte, freelist);
adeb2590
JL
1018 pte++;
1019 } while (!first_pte_in_page(pte));
ea8ea460
DW
1020
1021 return freelist;
1022}
1023
1024static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1025 struct dma_pte *pte, unsigned long pfn,
1026 unsigned long start_pfn,
1027 unsigned long last_pfn,
1028 struct page *freelist)
1029{
1030 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1031
1032 pfn = max(start_pfn, pfn);
1033 pte = &pte[pfn_level_offset(pfn, level)];
1034
1035 do {
1036 unsigned long level_pfn;
1037
1038 if (!dma_pte_present(pte))
1039 goto next;
1040
1041 level_pfn = pfn & level_mask(level);
1042
1043 /* If range covers entire pagetable, free it */
1044 if (start_pfn <= level_pfn &&
1045 last_pfn >= level_pfn + level_size(level) - 1) {
1046 /* These suborbinate page tables are going away entirely. Don't
1047 bother to clear them; we're just going to *free* them. */
1048 if (level > 1 && !dma_pte_superpage(pte))
1049 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1050
1051 dma_clear_pte(pte);
1052 if (!first_pte)
1053 first_pte = pte;
1054 last_pte = pte;
1055 } else if (level > 1) {
1056 /* Recurse down into a level that isn't *entirely* obsolete */
1057 freelist = dma_pte_clear_level(domain, level - 1,
1058 phys_to_virt(dma_pte_addr(pte)),
1059 level_pfn, start_pfn, last_pfn,
1060 freelist);
1061 }
1062next:
1063 pfn += level_size(level);
1064 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1065
1066 if (first_pte)
1067 domain_flush_cache(domain, first_pte,
1068 (void *)++last_pte - (void *)first_pte);
1069
1070 return freelist;
1071}
1072
1073/* We can't just free the pages because the IOMMU may still be walking
1074 the page tables, and may have cached the intermediate levels. The
1075 pages can only be freed after the IOTLB flush has been done. */
1076struct page *domain_unmap(struct dmar_domain *domain,
1077 unsigned long start_pfn,
1078 unsigned long last_pfn)
1079{
1080 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1081 struct page *freelist = NULL;
1082
1083 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1084 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1085 BUG_ON(start_pfn > last_pfn);
1086
1087 /* we don't need lock here; nobody else touches the iova range */
1088 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1089 domain->pgd, 0, start_pfn, last_pfn, NULL);
1090
1091 /* free pgd */
1092 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1093 struct page *pgd_page = virt_to_page(domain->pgd);
1094 pgd_page->freelist = freelist;
1095 freelist = pgd_page;
1096
1097 domain->pgd = NULL;
1098 }
1099
1100 return freelist;
1101}
1102
1103void dma_free_pagelist(struct page *freelist)
1104{
1105 struct page *pg;
1106
1107 while ((pg = freelist)) {
1108 freelist = pg->freelist;
1109 free_pgtable_page(page_address(pg));
1110 }
1111}
1112
ba395927
KA
1113/* iommu handling */
1114static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1115{
1116 struct root_entry *root;
1117 unsigned long flags;
1118
4c923d47 1119 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
1120 if (!root)
1121 return -ENOMEM;
1122
5b6985ce 1123 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1124
1125 spin_lock_irqsave(&iommu->lock, flags);
1126 iommu->root_entry = root;
1127 spin_unlock_irqrestore(&iommu->lock, flags);
1128
1129 return 0;
1130}
1131
ba395927
KA
1132static void iommu_set_root_entry(struct intel_iommu *iommu)
1133{
1134 void *addr;
c416daa9 1135 u32 sts;
ba395927
KA
1136 unsigned long flag;
1137
1138 addr = iommu->root_entry;
1139
1f5b3c3f 1140 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1141 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1142
c416daa9 1143 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1144
1145 /* Make sure hardware complete it */
1146 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1147 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1148
1f5b3c3f 1149 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1150}
1151
1152static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1153{
1154 u32 val;
1155 unsigned long flag;
1156
9af88143 1157 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1158 return;
ba395927 1159
1f5b3c3f 1160 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1161 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1162
1163 /* Make sure hardware complete it */
1164 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1165 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1166
1f5b3c3f 1167 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1168}
1169
1170/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1171static void __iommu_flush_context(struct intel_iommu *iommu,
1172 u16 did, u16 source_id, u8 function_mask,
1173 u64 type)
ba395927
KA
1174{
1175 u64 val = 0;
1176 unsigned long flag;
1177
ba395927
KA
1178 switch (type) {
1179 case DMA_CCMD_GLOBAL_INVL:
1180 val = DMA_CCMD_GLOBAL_INVL;
1181 break;
1182 case DMA_CCMD_DOMAIN_INVL:
1183 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1184 break;
1185 case DMA_CCMD_DEVICE_INVL:
1186 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1187 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1188 break;
1189 default:
1190 BUG();
1191 }
1192 val |= DMA_CCMD_ICC;
1193
1f5b3c3f 1194 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1195 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1196
1197 /* Make sure hardware complete it */
1198 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1199 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1200
1f5b3c3f 1201 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1202}
1203
ba395927 1204/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1205static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1206 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1207{
1208 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1209 u64 val = 0, val_iva = 0;
1210 unsigned long flag;
1211
ba395927
KA
1212 switch (type) {
1213 case DMA_TLB_GLOBAL_FLUSH:
1214 /* global flush doesn't need set IVA_REG */
1215 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1216 break;
1217 case DMA_TLB_DSI_FLUSH:
1218 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1219 break;
1220 case DMA_TLB_PSI_FLUSH:
1221 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1222 /* IH bit is passed in as part of address */
ba395927
KA
1223 val_iva = size_order | addr;
1224 break;
1225 default:
1226 BUG();
1227 }
1228 /* Note: set drain read/write */
1229#if 0
1230 /*
1231 * This is probably to be super secure.. Looks like we can
1232 * ignore it without any impact.
1233 */
1234 if (cap_read_drain(iommu->cap))
1235 val |= DMA_TLB_READ_DRAIN;
1236#endif
1237 if (cap_write_drain(iommu->cap))
1238 val |= DMA_TLB_WRITE_DRAIN;
1239
1f5b3c3f 1240 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1241 /* Note: Only uses first TLB reg currently */
1242 if (val_iva)
1243 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1244 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1245
1246 /* Make sure hardware complete it */
1247 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1248 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1249
1f5b3c3f 1250 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1251
1252 /* check IOTLB invalidation granularity */
1253 if (DMA_TLB_IAIG(val) == 0)
1254 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1255 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1256 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1257 (unsigned long long)DMA_TLB_IIRG(type),
1258 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1259}
1260
64ae892b
DW
1261static struct device_domain_info *
1262iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1263 u8 bus, u8 devfn)
93a23a72
YZ
1264{
1265 int found = 0;
1266 unsigned long flags;
1267 struct device_domain_info *info;
0bcb3e28 1268 struct pci_dev *pdev;
93a23a72
YZ
1269
1270 if (!ecap_dev_iotlb_support(iommu->ecap))
1271 return NULL;
1272
1273 if (!iommu->qi)
1274 return NULL;
1275
1276 spin_lock_irqsave(&device_domain_lock, flags);
1277 list_for_each_entry(info, &domain->devices, link)
1278 if (info->bus == bus && info->devfn == devfn) {
1279 found = 1;
1280 break;
1281 }
1282 spin_unlock_irqrestore(&device_domain_lock, flags);
1283
0bcb3e28 1284 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1285 return NULL;
1286
0bcb3e28
DW
1287 pdev = to_pci_dev(info->dev);
1288
1289 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1290 return NULL;
1291
0bcb3e28 1292 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1293 return NULL;
1294
93a23a72
YZ
1295 return info;
1296}
1297
1298static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1299{
0bcb3e28 1300 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1301 return;
1302
0bcb3e28 1303 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1304}
1305
1306static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1307{
0bcb3e28
DW
1308 if (!info->dev || !dev_is_pci(info->dev) ||
1309 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1310 return;
1311
0bcb3e28 1312 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1313}
1314
1315static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1316 u64 addr, unsigned mask)
1317{
1318 u16 sid, qdep;
1319 unsigned long flags;
1320 struct device_domain_info *info;
1321
1322 spin_lock_irqsave(&device_domain_lock, flags);
1323 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1324 struct pci_dev *pdev;
1325 if (!info->dev || !dev_is_pci(info->dev))
1326 continue;
1327
1328 pdev = to_pci_dev(info->dev);
1329 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1330 continue;
1331
1332 sid = info->bus << 8 | info->devfn;
0bcb3e28 1333 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1334 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1335 }
1336 spin_unlock_irqrestore(&device_domain_lock, flags);
1337}
1338
1f0ef2aa 1339static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1340 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1341{
9dd2fe89 1342 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1343 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1344
ba395927
KA
1345 BUG_ON(pages == 0);
1346
ea8ea460
DW
1347 if (ih)
1348 ih = 1 << 6;
ba395927 1349 /*
9dd2fe89
YZ
1350 * Fallback to domain selective flush if no PSI support or the size is
1351 * too big.
ba395927
KA
1352 * PSI requires page size to be 2 ^ x, and the base address is naturally
1353 * aligned to the size
1354 */
9dd2fe89
YZ
1355 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1356 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1357 DMA_TLB_DSI_FLUSH);
9dd2fe89 1358 else
ea8ea460 1359 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1360 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1361
1362 /*
82653633
NA
1363 * In caching mode, changes of pages from non-present to present require
1364 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1365 */
82653633 1366 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1367 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1368}
1369
f8bab735 1370static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1371{
1372 u32 pmen;
1373 unsigned long flags;
1374
1f5b3c3f 1375 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1376 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1377 pmen &= ~DMA_PMEN_EPM;
1378 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1379
1380 /* wait for the protected region status bit to clear */
1381 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1382 readl, !(pmen & DMA_PMEN_PRS), pmen);
1383
1f5b3c3f 1384 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1385}
1386
ba395927
KA
1387static int iommu_enable_translation(struct intel_iommu *iommu)
1388{
1389 u32 sts;
1390 unsigned long flags;
1391
1f5b3c3f 1392 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1393 iommu->gcmd |= DMA_GCMD_TE;
1394 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1395
1396 /* Make sure hardware complete it */
1397 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1398 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1399
1f5b3c3f 1400 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1401 return 0;
1402}
1403
1404static int iommu_disable_translation(struct intel_iommu *iommu)
1405{
1406 u32 sts;
1407 unsigned long flag;
1408
1f5b3c3f 1409 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1410 iommu->gcmd &= ~DMA_GCMD_TE;
1411 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1412
1413 /* Make sure hardware complete it */
1414 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1415 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1416
1f5b3c3f 1417 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1418 return 0;
1419}
1420
3460a6d9 1421
ba395927
KA
1422static int iommu_init_domains(struct intel_iommu *iommu)
1423{
1424 unsigned long ndomains;
1425 unsigned long nlongs;
1426
1427 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1428 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1429 iommu->seq_id, ndomains);
ba395927
KA
1430 nlongs = BITS_TO_LONGS(ndomains);
1431
94a91b50
DD
1432 spin_lock_init(&iommu->lock);
1433
ba395927
KA
1434 /* TBD: there might be 64K domains,
1435 * consider other allocation for future chip
1436 */
1437 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1438 if (!iommu->domain_ids) {
852bdb04
JL
1439 pr_err("IOMMU%d: allocating domain id array failed\n",
1440 iommu->seq_id);
ba395927
KA
1441 return -ENOMEM;
1442 }
1443 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1444 GFP_KERNEL);
1445 if (!iommu->domains) {
852bdb04
JL
1446 pr_err("IOMMU%d: allocating domain array failed\n",
1447 iommu->seq_id);
1448 kfree(iommu->domain_ids);
1449 iommu->domain_ids = NULL;
ba395927
KA
1450 return -ENOMEM;
1451 }
1452
1453 /*
1454 * if Caching mode is set, then invalid translations are tagged
1455 * with domainid 0. Hence we need to pre-allocate it.
1456 */
1457 if (cap_caching_mode(iommu->cap))
1458 set_bit(0, iommu->domain_ids);
1459 return 0;
1460}
ba395927 1461
a868e6b7 1462static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1463{
1464 struct dmar_domain *domain;
5ced12af 1465 int i, count;
c7151a8d 1466 unsigned long flags;
ba395927 1467
94a91b50 1468 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1469 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1470 /*
1471 * Domain id 0 is reserved for invalid translation
1472 * if hardware supports caching mode.
1473 */
1474 if (cap_caching_mode(iommu->cap) && i == 0)
1475 continue;
1476
94a91b50
DD
1477 domain = iommu->domains[i];
1478 clear_bit(i, iommu->domain_ids);
1479
1480 spin_lock_irqsave(&domain->iommu_lock, flags);
5ced12af
JL
1481 count = --domain->iommu_count;
1482 spin_unlock_irqrestore(&domain->iommu_lock, flags);
92d03cc8
JL
1483 if (count == 0)
1484 domain_exit(domain);
5e98c4b1 1485 }
ba395927
KA
1486 }
1487
1488 if (iommu->gcmd & DMA_GCMD_TE)
1489 iommu_disable_translation(iommu);
1490
ba395927
KA
1491 kfree(iommu->domains);
1492 kfree(iommu->domain_ids);
a868e6b7
JL
1493 iommu->domains = NULL;
1494 iommu->domain_ids = NULL;
ba395927 1495
d9630fe9
WH
1496 g_iommus[iommu->seq_id] = NULL;
1497
ba395927
KA
1498 /* free context mapping */
1499 free_context_table(iommu);
ba395927
KA
1500}
1501
92d03cc8 1502static struct dmar_domain *alloc_domain(bool vm)
ba395927 1503{
92d03cc8
JL
1504 /* domain id for virtual machine, it won't be set in context */
1505 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1506 struct dmar_domain *domain;
ba395927
KA
1507
1508 domain = alloc_domain_mem();
1509 if (!domain)
1510 return NULL;
1511
4c923d47 1512 domain->nid = -1;
92d03cc8 1513 domain->iommu_count = 0;
1b198bb0 1514 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38 1515 domain->flags = 0;
92d03cc8
JL
1516 spin_lock_init(&domain->iommu_lock);
1517 INIT_LIST_HEAD(&domain->devices);
1518 if (vm) {
1519 domain->id = atomic_inc_return(&vm_domid);
1520 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1521 }
2c2e2c38
FY
1522
1523 return domain;
1524}
1525
1526static int iommu_attach_domain(struct dmar_domain *domain,
1527 struct intel_iommu *iommu)
1528{
1529 int num;
1530 unsigned long ndomains;
1531 unsigned long flags;
1532
ba395927
KA
1533 ndomains = cap_ndoms(iommu->cap);
1534
1535 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1536
ba395927
KA
1537 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1538 if (num >= ndomains) {
1539 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1540 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1541 return -ENOMEM;
ba395927
KA
1542 }
1543
ba395927 1544 domain->id = num;
9ebd682e 1545 domain->iommu_count++;
2c2e2c38 1546 set_bit(num, iommu->domain_ids);
1b198bb0 1547 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1548 iommu->domains[num] = domain;
1549 spin_unlock_irqrestore(&iommu->lock, flags);
1550
2c2e2c38 1551 return 0;
ba395927
KA
1552}
1553
2c2e2c38
FY
1554static void iommu_detach_domain(struct dmar_domain *domain,
1555 struct intel_iommu *iommu)
ba395927
KA
1556{
1557 unsigned long flags;
2c2e2c38 1558 int num, ndomains;
ba395927 1559
8c11e798 1560 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1561 ndomains = cap_ndoms(iommu->cap);
a45946ab 1562 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38 1563 if (iommu->domains[num] == domain) {
92d03cc8
JL
1564 clear_bit(num, iommu->domain_ids);
1565 iommu->domains[num] = NULL;
2c2e2c38
FY
1566 break;
1567 }
2c2e2c38 1568 }
8c11e798 1569 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1570}
1571
1572static struct iova_domain reserved_iova_list;
8a443df4 1573static struct lock_class_key reserved_rbtree_key;
ba395927 1574
51a63e67 1575static int dmar_init_reserved_ranges(void)
ba395927
KA
1576{
1577 struct pci_dev *pdev = NULL;
1578 struct iova *iova;
1579 int i;
ba395927 1580
f661197e 1581 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1582
8a443df4
MG
1583 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1584 &reserved_rbtree_key);
1585
ba395927
KA
1586 /* IOAPIC ranges shouldn't be accessed by DMA */
1587 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1588 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1589 if (!iova) {
ba395927 1590 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1591 return -ENODEV;
1592 }
ba395927
KA
1593
1594 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1595 for_each_pci_dev(pdev) {
1596 struct resource *r;
1597
1598 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1599 r = &pdev->resource[i];
1600 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1601 continue;
1a4a4551
DW
1602 iova = reserve_iova(&reserved_iova_list,
1603 IOVA_PFN(r->start),
1604 IOVA_PFN(r->end));
51a63e67 1605 if (!iova) {
ba395927 1606 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1607 return -ENODEV;
1608 }
ba395927
KA
1609 }
1610 }
51a63e67 1611 return 0;
ba395927
KA
1612}
1613
1614static void domain_reserve_special_ranges(struct dmar_domain *domain)
1615{
1616 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1617}
1618
1619static inline int guestwidth_to_adjustwidth(int gaw)
1620{
1621 int agaw;
1622 int r = (gaw - 12) % 9;
1623
1624 if (r == 0)
1625 agaw = gaw;
1626 else
1627 agaw = gaw + 9 - r;
1628 if (agaw > 64)
1629 agaw = 64;
1630 return agaw;
1631}
1632
1633static int domain_init(struct dmar_domain *domain, int guest_width)
1634{
1635 struct intel_iommu *iommu;
1636 int adjust_width, agaw;
1637 unsigned long sagaw;
1638
f661197e 1639 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1640 domain_reserve_special_ranges(domain);
1641
1642 /* calculate AGAW */
8c11e798 1643 iommu = domain_get_iommu(domain);
ba395927
KA
1644 if (guest_width > cap_mgaw(iommu->cap))
1645 guest_width = cap_mgaw(iommu->cap);
1646 domain->gaw = guest_width;
1647 adjust_width = guestwidth_to_adjustwidth(guest_width);
1648 agaw = width_to_agaw(adjust_width);
1649 sagaw = cap_sagaw(iommu->cap);
1650 if (!test_bit(agaw, &sagaw)) {
1651 /* hardware doesn't support it, choose a bigger one */
1652 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1653 agaw = find_next_bit(&sagaw, 5, agaw);
1654 if (agaw >= 5)
1655 return -ENODEV;
1656 }
1657 domain->agaw = agaw;
ba395927 1658
8e604097
WH
1659 if (ecap_coherent(iommu->ecap))
1660 domain->iommu_coherency = 1;
1661 else
1662 domain->iommu_coherency = 0;
1663
58c610bd
SY
1664 if (ecap_sc_support(iommu->ecap))
1665 domain->iommu_snooping = 1;
1666 else
1667 domain->iommu_snooping = 0;
1668
214e39aa
DW
1669 if (intel_iommu_superpage)
1670 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1671 else
1672 domain->iommu_superpage = 0;
1673
4c923d47 1674 domain->nid = iommu->node;
c7151a8d 1675
ba395927 1676 /* always allocate the top pgd */
4c923d47 1677 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1678 if (!domain->pgd)
1679 return -ENOMEM;
5b6985ce 1680 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1681 return 0;
1682}
1683
1684static void domain_exit(struct dmar_domain *domain)
1685{
2c2e2c38
FY
1686 struct dmar_drhd_unit *drhd;
1687 struct intel_iommu *iommu;
ea8ea460 1688 struct page *freelist = NULL;
ba395927
KA
1689
1690 /* Domain 0 is reserved, so dont process it */
1691 if (!domain)
1692 return;
1693
7b668357
AW
1694 /* Flush any lazy unmaps that may reference this domain */
1695 if (!intel_iommu_strict)
1696 flush_unmaps_timeout(0);
1697
92d03cc8 1698 /* remove associated devices */
ba395927 1699 domain_remove_dev_info(domain);
92d03cc8 1700
ba395927
KA
1701 /* destroy iovas */
1702 put_iova_domain(&domain->iovad);
ba395927 1703
ea8ea460 1704 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1705
92d03cc8 1706 /* clear attached or cached domains */
0e242612 1707 rcu_read_lock();
2c2e2c38 1708 for_each_active_iommu(iommu, drhd)
92d03cc8
JL
1709 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1710 test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38 1711 iommu_detach_domain(domain, iommu);
0e242612 1712 rcu_read_unlock();
2c2e2c38 1713
ea8ea460
DW
1714 dma_free_pagelist(freelist);
1715
ba395927
KA
1716 free_domain_mem(domain);
1717}
1718
64ae892b
DW
1719static int domain_context_mapping_one(struct dmar_domain *domain,
1720 struct intel_iommu *iommu,
1721 u8 bus, u8 devfn, int translation)
ba395927
KA
1722{
1723 struct context_entry *context;
ba395927 1724 unsigned long flags;
ea6606b0
WH
1725 struct dma_pte *pgd;
1726 unsigned long num;
1727 unsigned long ndomains;
1728 int id;
1729 int agaw;
93a23a72 1730 struct device_domain_info *info = NULL;
ba395927
KA
1731
1732 pr_debug("Set context mapping for %02x:%02x.%d\n",
1733 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1734
ba395927 1735 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1736 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1737 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1738
ba395927
KA
1739 context = device_to_context_entry(iommu, bus, devfn);
1740 if (!context)
1741 return -ENOMEM;
1742 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1743 if (context_present(context)) {
ba395927
KA
1744 spin_unlock_irqrestore(&iommu->lock, flags);
1745 return 0;
1746 }
1747
ea6606b0
WH
1748 id = domain->id;
1749 pgd = domain->pgd;
1750
2c2e2c38
FY
1751 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1752 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1753 int found = 0;
1754
1755 /* find an available domain id for this device in iommu */
1756 ndomains = cap_ndoms(iommu->cap);
a45946ab 1757 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1758 if (iommu->domains[num] == domain) {
1759 id = num;
1760 found = 1;
1761 break;
1762 }
ea6606b0
WH
1763 }
1764
1765 if (found == 0) {
1766 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1767 if (num >= ndomains) {
1768 spin_unlock_irqrestore(&iommu->lock, flags);
1769 printk(KERN_ERR "IOMMU: no free domain ids\n");
1770 return -EFAULT;
1771 }
1772
1773 set_bit(num, iommu->domain_ids);
1774 iommu->domains[num] = domain;
1775 id = num;
1776 }
1777
1778 /* Skip top levels of page tables for
1779 * iommu which has less agaw than default.
1672af11 1780 * Unnecessary for PT mode.
ea6606b0 1781 */
1672af11
CW
1782 if (translation != CONTEXT_TT_PASS_THROUGH) {
1783 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1784 pgd = phys_to_virt(dma_pte_addr(pgd));
1785 if (!dma_pte_present(pgd)) {
1786 spin_unlock_irqrestore(&iommu->lock, flags);
1787 return -ENOMEM;
1788 }
ea6606b0
WH
1789 }
1790 }
1791 }
1792
1793 context_set_domain_id(context, id);
4ed0d3e6 1794
93a23a72 1795 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1796 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1797 translation = info ? CONTEXT_TT_DEV_IOTLB :
1798 CONTEXT_TT_MULTI_LEVEL;
1799 }
4ed0d3e6
FY
1800 /*
1801 * In pass through mode, AW must be programmed to indicate the largest
1802 * AGAW value supported by hardware. And ASR is ignored by hardware.
1803 */
93a23a72 1804 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1805 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1806 else {
1807 context_set_address_root(context, virt_to_phys(pgd));
1808 context_set_address_width(context, iommu->agaw);
1809 }
4ed0d3e6
FY
1810
1811 context_set_translation_type(context, translation);
c07e7d21
MM
1812 context_set_fault_enable(context);
1813 context_set_present(context);
5331fe6f 1814 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1815
4c25a2c1
DW
1816 /*
1817 * It's a non-present to present mapping. If hardware doesn't cache
1818 * non-present entry we only need to flush the write-buffer. If the
1819 * _does_ cache non-present entries, then it does so in the special
1820 * domain #0, which we have to flush:
1821 */
1822 if (cap_caching_mode(iommu->cap)) {
1823 iommu->flush.flush_context(iommu, 0,
1824 (((u16)bus) << 8) | devfn,
1825 DMA_CCMD_MASK_NOBIT,
1826 DMA_CCMD_DEVICE_INVL);
82653633 1827 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1828 } else {
ba395927 1829 iommu_flush_write_buffer(iommu);
4c25a2c1 1830 }
93a23a72 1831 iommu_enable_dev_iotlb(info);
ba395927 1832 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1833
1834 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1835 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1836 domain->iommu_count++;
4c923d47
SS
1837 if (domain->iommu_count == 1)
1838 domain->nid = iommu->node;
58c610bd 1839 domain_update_iommu_cap(domain);
c7151a8d
WH
1840 }
1841 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1842 return 0;
1843}
1844
1845static int
e1f167f3
DW
1846domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1847 int translation)
ba395927
KA
1848{
1849 int ret;
e1f167f3 1850 struct pci_dev *pdev, *tmp, *parent;
64ae892b 1851 struct intel_iommu *iommu;
156baca8 1852 u8 bus, devfn;
64ae892b 1853
e1f167f3 1854 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1855 if (!iommu)
1856 return -ENODEV;
ba395927 1857
156baca8 1858 ret = domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1859 translation);
e1f167f3 1860 if (ret || !dev_is_pci(dev))
ba395927
KA
1861 return ret;
1862
1863 /* dependent device mapping */
e1f167f3 1864 pdev = to_pci_dev(dev);
ba395927
KA
1865 tmp = pci_find_upstream_pcie_bridge(pdev);
1866 if (!tmp)
1867 return 0;
1868 /* Secondary interface's bus number and devfn 0 */
1869 parent = pdev->bus->self;
1870 while (parent != tmp) {
64ae892b 1871 ret = domain_context_mapping_one(domain, iommu,
276dbf99 1872 parent->bus->number,
4ed0d3e6 1873 parent->devfn, translation);
ba395927
KA
1874 if (ret)
1875 return ret;
1876 parent = parent->bus->self;
1877 }
45e829ea 1878 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
64ae892b 1879 return domain_context_mapping_one(domain, iommu,
4ed0d3e6
FY
1880 tmp->subordinate->number, 0,
1881 translation);
ba395927 1882 else /* this is a legacy PCI bridge */
64ae892b 1883 return domain_context_mapping_one(domain, iommu,
276dbf99 1884 tmp->bus->number,
4ed0d3e6
FY
1885 tmp->devfn,
1886 translation);
ba395927
KA
1887}
1888
e1f167f3 1889static int domain_context_mapped(struct device *dev)
ba395927
KA
1890{
1891 int ret;
e1f167f3 1892 struct pci_dev *pdev, *tmp, *parent;
5331fe6f 1893 struct intel_iommu *iommu;
156baca8 1894 u8 bus, devfn;
5331fe6f 1895
e1f167f3 1896 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1897 if (!iommu)
1898 return -ENODEV;
ba395927 1899
156baca8 1900 ret = device_context_mapped(iommu, bus, devfn);
e1f167f3 1901 if (!ret || !dev_is_pci(dev))
ba395927 1902 return ret;
e1f167f3 1903
ba395927 1904 /* dependent device mapping */
e1f167f3 1905 pdev = to_pci_dev(dev);
ba395927
KA
1906 tmp = pci_find_upstream_pcie_bridge(pdev);
1907 if (!tmp)
1908 return ret;
1909 /* Secondary interface's bus number and devfn 0 */
1910 parent = pdev->bus->self;
1911 while (parent != tmp) {
8c11e798 1912 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1913 parent->devfn);
ba395927
KA
1914 if (!ret)
1915 return ret;
1916 parent = parent->bus->self;
1917 }
5f4d91a1 1918 if (pci_is_pcie(tmp))
276dbf99
DW
1919 return device_context_mapped(iommu, tmp->subordinate->number,
1920 0);
ba395927 1921 else
276dbf99
DW
1922 return device_context_mapped(iommu, tmp->bus->number,
1923 tmp->devfn);
ba395927
KA
1924}
1925
f532959b
FY
1926/* Returns a number of VTD pages, but aligned to MM page size */
1927static inline unsigned long aligned_nrpages(unsigned long host_addr,
1928 size_t size)
1929{
1930 host_addr &= ~PAGE_MASK;
1931 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1932}
1933
6dd9a7c7
YS
1934/* Return largest possible superpage level for a given mapping */
1935static inline int hardware_largepage_caps(struct dmar_domain *domain,
1936 unsigned long iov_pfn,
1937 unsigned long phy_pfn,
1938 unsigned long pages)
1939{
1940 int support, level = 1;
1941 unsigned long pfnmerge;
1942
1943 support = domain->iommu_superpage;
1944
1945 /* To use a large page, the virtual *and* physical addresses
1946 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1947 of them will mean we have to use smaller pages. So just
1948 merge them and check both at once. */
1949 pfnmerge = iov_pfn | phy_pfn;
1950
1951 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1952 pages >>= VTD_STRIDE_SHIFT;
1953 if (!pages)
1954 break;
1955 pfnmerge >>= VTD_STRIDE_SHIFT;
1956 level++;
1957 support--;
1958 }
1959 return level;
1960}
1961
9051aa02
DW
1962static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1963 struct scatterlist *sg, unsigned long phys_pfn,
1964 unsigned long nr_pages, int prot)
e1605495
DW
1965{
1966 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1967 phys_addr_t uninitialized_var(pteval);
e1605495 1968 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1969 unsigned long sg_res;
6dd9a7c7
YS
1970 unsigned int largepage_lvl = 0;
1971 unsigned long lvl_pages = 0;
e1605495
DW
1972
1973 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1974
1975 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1976 return -EINVAL;
1977
1978 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1979
9051aa02
DW
1980 if (sg)
1981 sg_res = 0;
1982 else {
1983 sg_res = nr_pages + 1;
1984 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1985 }
1986
6dd9a7c7 1987 while (nr_pages > 0) {
c85994e4
DW
1988 uint64_t tmp;
1989
e1605495 1990 if (!sg_res) {
f532959b 1991 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1992 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1993 sg->dma_length = sg->length;
1994 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1995 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1996 }
6dd9a7c7 1997
e1605495 1998 if (!pte) {
6dd9a7c7
YS
1999 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2000
5cf0a76f 2001 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2002 if (!pte)
2003 return -ENOMEM;
6dd9a7c7 2004 /* It is large page*/
6491d4d0 2005 if (largepage_lvl > 1) {
6dd9a7c7 2006 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
2007 /* Ensure that old small page tables are removed to make room
2008 for superpage, if they exist. */
2009 dma_pte_clear_range(domain, iov_pfn,
2010 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2011 dma_pte_free_pagetable(domain, iov_pfn,
2012 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2013 } else {
6dd9a7c7 2014 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2015 }
6dd9a7c7 2016
e1605495
DW
2017 }
2018 /* We don't need lock here, nobody else
2019 * touches the iova range
2020 */
7766a3fb 2021 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2022 if (tmp) {
1bf20f0d 2023 static int dumps = 5;
c85994e4
DW
2024 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2025 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2026 if (dumps) {
2027 dumps--;
2028 debug_dma_dump_mappings(NULL);
2029 }
2030 WARN_ON(1);
2031 }
6dd9a7c7
YS
2032
2033 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2034
2035 BUG_ON(nr_pages < lvl_pages);
2036 BUG_ON(sg_res < lvl_pages);
2037
2038 nr_pages -= lvl_pages;
2039 iov_pfn += lvl_pages;
2040 phys_pfn += lvl_pages;
2041 pteval += lvl_pages * VTD_PAGE_SIZE;
2042 sg_res -= lvl_pages;
2043
2044 /* If the next PTE would be the first in a new page, then we
2045 need to flush the cache on the entries we've just written.
2046 And then we'll need to recalculate 'pte', so clear it and
2047 let it get set again in the if (!pte) block above.
2048
2049 If we're done (!nr_pages) we need to flush the cache too.
2050
2051 Also if we've been setting superpages, we may need to
2052 recalculate 'pte' and switch back to smaller pages for the
2053 end of the mapping, if the trailing size is not enough to
2054 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2055 pte++;
6dd9a7c7
YS
2056 if (!nr_pages || first_pte_in_page(pte) ||
2057 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2058 domain_flush_cache(domain, first_pte,
2059 (void *)pte - (void *)first_pte);
2060 pte = NULL;
2061 }
6dd9a7c7
YS
2062
2063 if (!sg_res && nr_pages)
e1605495
DW
2064 sg = sg_next(sg);
2065 }
2066 return 0;
2067}
2068
9051aa02
DW
2069static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2070 struct scatterlist *sg, unsigned long nr_pages,
2071 int prot)
ba395927 2072{
9051aa02
DW
2073 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2074}
6f6a00e4 2075
9051aa02
DW
2076static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2077 unsigned long phys_pfn, unsigned long nr_pages,
2078 int prot)
2079{
2080 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2081}
2082
c7151a8d 2083static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2084{
c7151a8d
WH
2085 if (!iommu)
2086 return;
8c11e798
WH
2087
2088 clear_context_table(iommu, bus, devfn);
2089 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2090 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2091 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2092}
2093
109b9b04
DW
2094static inline void unlink_domain_info(struct device_domain_info *info)
2095{
2096 assert_spin_locked(&device_domain_lock);
2097 list_del(&info->link);
2098 list_del(&info->global);
2099 if (info->dev)
0bcb3e28 2100 info->dev->archdata.iommu = NULL;
109b9b04
DW
2101}
2102
ba395927
KA
2103static void domain_remove_dev_info(struct dmar_domain *domain)
2104{
2105 struct device_domain_info *info;
92d03cc8 2106 unsigned long flags, flags2;
ba395927
KA
2107
2108 spin_lock_irqsave(&device_domain_lock, flags);
2109 while (!list_empty(&domain->devices)) {
2110 info = list_entry(domain->devices.next,
2111 struct device_domain_info, link);
109b9b04 2112 unlink_domain_info(info);
ba395927
KA
2113 spin_unlock_irqrestore(&device_domain_lock, flags);
2114
93a23a72 2115 iommu_disable_dev_iotlb(info);
7c7faa11 2116 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2117
92d03cc8 2118 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
7c7faa11 2119 iommu_detach_dependent_devices(info->iommu, info->dev);
92d03cc8
JL
2120 /* clear this iommu in iommu_bmp, update iommu count
2121 * and capabilities
2122 */
2123 spin_lock_irqsave(&domain->iommu_lock, flags2);
7c7faa11 2124 if (test_and_clear_bit(info->iommu->seq_id,
92d03cc8
JL
2125 domain->iommu_bmp)) {
2126 domain->iommu_count--;
2127 domain_update_iommu_cap(domain);
2128 }
2129 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2130 }
2131
2132 free_devinfo_mem(info);
ba395927
KA
2133 spin_lock_irqsave(&device_domain_lock, flags);
2134 }
2135 spin_unlock_irqrestore(&device_domain_lock, flags);
2136}
2137
2138/*
2139 * find_domain
1525a29a 2140 * Note: we use struct device->archdata.iommu stores the info
ba395927 2141 */
1525a29a 2142static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2143{
2144 struct device_domain_info *info;
2145
2146 /* No lock here, assumes no domain exit in normal case */
1525a29a 2147 info = dev->archdata.iommu;
ba395927
KA
2148 if (info)
2149 return info->domain;
2150 return NULL;
2151}
2152
5a8f40e8 2153static inline struct device_domain_info *
745f2586
JL
2154dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2155{
2156 struct device_domain_info *info;
2157
2158 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2159 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2160 info->devfn == devfn)
5a8f40e8 2161 return info;
745f2586
JL
2162
2163 return NULL;
2164}
2165
5a8f40e8 2166static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2167 int bus, int devfn,
b718cd3d
DW
2168 struct device *dev,
2169 struct dmar_domain *domain)
745f2586 2170{
5a8f40e8 2171 struct dmar_domain *found = NULL;
745f2586
JL
2172 struct device_domain_info *info;
2173 unsigned long flags;
2174
2175 info = alloc_devinfo_mem();
2176 if (!info)
b718cd3d 2177 return NULL;
745f2586 2178
745f2586
JL
2179 info->bus = bus;
2180 info->devfn = devfn;
2181 info->dev = dev;
2182 info->domain = domain;
5a8f40e8 2183 info->iommu = iommu;
745f2586
JL
2184 if (!dev)
2185 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2186
2187 spin_lock_irqsave(&device_domain_lock, flags);
2188 if (dev)
0bcb3e28 2189 found = find_domain(dev);
5a8f40e8
DW
2190 else {
2191 struct device_domain_info *info2;
41e80dca 2192 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2193 if (info2)
2194 found = info2->domain;
2195 }
745f2586
JL
2196 if (found) {
2197 spin_unlock_irqrestore(&device_domain_lock, flags);
2198 free_devinfo_mem(info);
b718cd3d
DW
2199 /* Caller must free the original domain */
2200 return found;
745f2586
JL
2201 }
2202
b718cd3d
DW
2203 list_add(&info->link, &domain->devices);
2204 list_add(&info->global, &device_domain_list);
2205 if (dev)
2206 dev->archdata.iommu = info;
2207 spin_unlock_irqrestore(&device_domain_lock, flags);
2208
2209 return domain;
745f2586
JL
2210}
2211
ba395927 2212/* domain is initialized */
146922ec 2213static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2214{
e85bb5d4 2215 struct dmar_domain *domain, *free = NULL;
5a8f40e8
DW
2216 struct intel_iommu *iommu = NULL;
2217 struct device_domain_info *info;
146922ec 2218 struct pci_dev *dev_tmp = NULL;
ba395927 2219 unsigned long flags;
146922ec 2220 u8 bus, devfn, bridge_bus, bridge_devfn;
ba395927 2221
146922ec 2222 domain = find_domain(dev);
ba395927
KA
2223 if (domain)
2224 return domain;
2225
146922ec
DW
2226 if (dev_is_pci(dev)) {
2227 struct pci_dev *pdev = to_pci_dev(dev);
2228 u16 segment;
276dbf99 2229
146922ec
DW
2230 segment = pci_domain_nr(pdev->bus);
2231 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2232 if (dev_tmp) {
2233 if (pci_is_pcie(dev_tmp)) {
2234 bridge_bus = dev_tmp->subordinate->number;
2235 bridge_devfn = 0;
2236 } else {
2237 bridge_bus = dev_tmp->bus->number;
2238 bridge_devfn = dev_tmp->devfn;
2239 }
2240 spin_lock_irqsave(&device_domain_lock, flags);
9f05d3fb
DW
2241 info = dmar_search_domain_by_dev_info(segment,
2242 bridge_bus,
2243 bridge_devfn);
146922ec
DW
2244 if (info) {
2245 iommu = info->iommu;
2246 domain = info->domain;
2247 }
2248 spin_unlock_irqrestore(&device_domain_lock, flags);
2249 /* pcie-pci bridge already has a domain, uses it */
2250 if (info)
2251 goto found_domain;
5a8f40e8 2252 }
ba395927
KA
2253 }
2254
146922ec
DW
2255 iommu = device_to_iommu(dev, &bus, &devfn);
2256 if (!iommu)
2257 goto error;
ba395927 2258
146922ec 2259 /* Allocate and initialize new domain for the device */
92d03cc8 2260 domain = alloc_domain(false);
745f2586
JL
2261 if (!domain)
2262 goto error;
2263 if (iommu_attach_domain(domain, iommu)) {
2fe9723d 2264 free_domain_mem(domain);
14d40569 2265 domain = NULL;
ba395927 2266 goto error;
2c2e2c38 2267 }
e85bb5d4
JL
2268 free = domain;
2269 if (domain_init(domain, gaw))
ba395927 2270 goto error;
ba395927
KA
2271
2272 /* register pcie-to-pci device */
2273 if (dev_tmp) {
146922ec
DW
2274 domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
2275 NULL, domain);
b718cd3d 2276 if (!domain)
ba395927 2277 goto error;
ba395927
KA
2278 }
2279
2280found_domain:
146922ec 2281 domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
ba395927 2282error:
b718cd3d 2283 if (free != domain)
e85bb5d4 2284 domain_exit(free);
b718cd3d
DW
2285
2286 return domain;
ba395927
KA
2287}
2288
2c2e2c38 2289static int iommu_identity_mapping;
e0fc7e0b
DW
2290#define IDENTMAP_ALL 1
2291#define IDENTMAP_GFX 2
2292#define IDENTMAP_AZALIA 4
2c2e2c38 2293
b213203e
DW
2294static int iommu_domain_identity_map(struct dmar_domain *domain,
2295 unsigned long long start,
2296 unsigned long long end)
ba395927 2297{
c5395d5c
DW
2298 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2299 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2300
2301 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2302 dma_to_mm_pfn(last_vpfn))) {
ba395927 2303 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2304 return -ENOMEM;
ba395927
KA
2305 }
2306
c5395d5c
DW
2307 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2308 start, end, domain->id);
ba395927
KA
2309 /*
2310 * RMRR range might have overlap with physical memory range,
2311 * clear it first
2312 */
c5395d5c 2313 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2314
c5395d5c
DW
2315 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2316 last_vpfn - first_vpfn + 1,
61df7443 2317 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2318}
2319
0b9d9753 2320static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2321 unsigned long long start,
2322 unsigned long long end)
2323{
2324 struct dmar_domain *domain;
2325 int ret;
2326
0b9d9753 2327 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2328 if (!domain)
2329 return -ENOMEM;
2330
19943b0e
DW
2331 /* For _hardware_ passthrough, don't bother. But for software
2332 passthrough, we do it anyway -- it may indicate a memory
2333 range which is reserved in E820, so which didn't get set
2334 up to start with in si_domain */
2335 if (domain == si_domain && hw_pass_through) {
2336 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2337 dev_name(dev), start, end);
19943b0e
DW
2338 return 0;
2339 }
2340
2341 printk(KERN_INFO
2342 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2343 dev_name(dev), start, end);
2ff729f5 2344
5595b528
DW
2345 if (end < start) {
2346 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2347 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2348 dmi_get_system_info(DMI_BIOS_VENDOR),
2349 dmi_get_system_info(DMI_BIOS_VERSION),
2350 dmi_get_system_info(DMI_PRODUCT_VERSION));
2351 ret = -EIO;
2352 goto error;
2353 }
2354
2ff729f5
DW
2355 if (end >> agaw_to_width(domain->agaw)) {
2356 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2357 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2358 agaw_to_width(domain->agaw),
2359 dmi_get_system_info(DMI_BIOS_VENDOR),
2360 dmi_get_system_info(DMI_BIOS_VERSION),
2361 dmi_get_system_info(DMI_PRODUCT_VERSION));
2362 ret = -EIO;
2363 goto error;
2364 }
19943b0e 2365
b213203e 2366 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2367 if (ret)
2368 goto error;
2369
2370 /* context entry init */
0b9d9753 2371 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2372 if (ret)
2373 goto error;
2374
2375 return 0;
2376
2377 error:
ba395927
KA
2378 domain_exit(domain);
2379 return ret;
ba395927
KA
2380}
2381
2382static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2383 struct device *dev)
ba395927 2384{
0b9d9753 2385 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2386 return 0;
0b9d9753
DW
2387 return iommu_prepare_identity_map(dev, rmrr->base_address,
2388 rmrr->end_address);
ba395927
KA
2389}
2390
d3f13810 2391#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2392static inline void iommu_prepare_isa(void)
2393{
2394 struct pci_dev *pdev;
2395 int ret;
2396
2397 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2398 if (!pdev)
2399 return;
2400
c7ab48d2 2401 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2402 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2403
2404 if (ret)
c7ab48d2
DW
2405 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2406 "floppy might not work\n");
49a0429e
KA
2407
2408}
2409#else
2410static inline void iommu_prepare_isa(void)
2411{
2412 return;
2413}
d3f13810 2414#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2415
2c2e2c38 2416static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2417
071e1374 2418static int __init si_domain_init(int hw)
2c2e2c38
FY
2419{
2420 struct dmar_drhd_unit *drhd;
2421 struct intel_iommu *iommu;
c7ab48d2 2422 int nid, ret = 0;
2c2e2c38 2423
92d03cc8 2424 si_domain = alloc_domain(false);
2c2e2c38
FY
2425 if (!si_domain)
2426 return -EFAULT;
2427
92d03cc8
JL
2428 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2429
2c2e2c38
FY
2430 for_each_active_iommu(iommu, drhd) {
2431 ret = iommu_attach_domain(si_domain, iommu);
2432 if (ret) {
2433 domain_exit(si_domain);
2434 return -EFAULT;
2435 }
2436 }
2437
2438 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2439 domain_exit(si_domain);
2440 return -EFAULT;
2441 }
2442
9544c003
JL
2443 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2444 si_domain->id);
2c2e2c38 2445
19943b0e
DW
2446 if (hw)
2447 return 0;
2448
c7ab48d2 2449 for_each_online_node(nid) {
5dfe8660
TH
2450 unsigned long start_pfn, end_pfn;
2451 int i;
2452
2453 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2454 ret = iommu_domain_identity_map(si_domain,
2455 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2456 if (ret)
2457 return ret;
2458 }
c7ab48d2
DW
2459 }
2460
2c2e2c38
FY
2461 return 0;
2462}
2463
9b226624 2464static int identity_mapping(struct device *dev)
2c2e2c38
FY
2465{
2466 struct device_domain_info *info;
2467
2468 if (likely(!iommu_identity_mapping))
2469 return 0;
2470
9b226624 2471 info = dev->archdata.iommu;
cb452a40
MT
2472 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2473 return (info->domain == si_domain);
2c2e2c38 2474
2c2e2c38
FY
2475 return 0;
2476}
2477
2478static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2479 struct device *dev, int translation)
2c2e2c38 2480{
0ac72664 2481 struct dmar_domain *ndomain;
5a8f40e8 2482 struct intel_iommu *iommu;
156baca8 2483 u8 bus, devfn;
5fe60f4e 2484 int ret;
2c2e2c38 2485
5913c9bf 2486 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2487 if (!iommu)
2488 return -ENODEV;
2489
5913c9bf 2490 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2491 if (ndomain != domain)
2492 return -EBUSY;
2c2e2c38 2493
5913c9bf 2494 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2495 if (ret) {
5913c9bf 2496 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2497 return ret;
2498 }
2499
2c2e2c38
FY
2500 return 0;
2501}
2502
0b9d9753 2503static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2504{
2505 struct dmar_rmrr_unit *rmrr;
832bd858 2506 struct device *tmp;
ea2447f7
TM
2507 int i;
2508
0e242612 2509 rcu_read_lock();
ea2447f7 2510 for_each_rmrr_units(rmrr) {
b683b230
JL
2511 /*
2512 * Return TRUE if this RMRR contains the device that
2513 * is passed in.
2514 */
2515 for_each_active_dev_scope(rmrr->devices,
2516 rmrr->devices_cnt, i, tmp)
0b9d9753 2517 if (tmp == dev) {
0e242612 2518 rcu_read_unlock();
ea2447f7 2519 return true;
b683b230 2520 }
ea2447f7 2521 }
0e242612 2522 rcu_read_unlock();
ea2447f7
TM
2523 return false;
2524}
2525
3bdb2591 2526static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2527{
ea2447f7 2528
3bdb2591
DW
2529 if (dev_is_pci(dev)) {
2530 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2531
3bdb2591
DW
2532 /*
2533 * We want to prevent any device associated with an RMRR from
2534 * getting placed into the SI Domain. This is done because
2535 * problems exist when devices are moved in and out of domains
2536 * and their respective RMRR info is lost. We exempt USB devices
2537 * from this process due to their usage of RMRRs that are known
2538 * to not be needed after BIOS hand-off to OS.
2539 */
2540 if (device_has_rmrr(dev) &&
2541 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2542 return 0;
e0fc7e0b 2543
3bdb2591
DW
2544 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2545 return 1;
e0fc7e0b 2546
3bdb2591
DW
2547 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2548 return 1;
6941af28 2549
3bdb2591 2550 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2551 return 0;
3bdb2591
DW
2552
2553 /*
2554 * We want to start off with all devices in the 1:1 domain, and
2555 * take them out later if we find they can't access all of memory.
2556 *
2557 * However, we can't do this for PCI devices behind bridges,
2558 * because all PCI devices behind the same bridge will end up
2559 * with the same source-id on their transactions.
2560 *
2561 * Practically speaking, we can't change things around for these
2562 * devices at run-time, because we can't be sure there'll be no
2563 * DMA transactions in flight for any of their siblings.
2564 *
2565 * So PCI devices (unless they're on the root bus) as well as
2566 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2567 * the 1:1 domain, just in _case_ one of their siblings turns out
2568 * not to be able to map all of memory.
2569 */
2570 if (!pci_is_pcie(pdev)) {
2571 if (!pci_is_root_bus(pdev->bus))
2572 return 0;
2573 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2574 return 0;
2575 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2576 return 0;
3bdb2591
DW
2577 } else {
2578 if (device_has_rmrr(dev))
2579 return 0;
2580 }
3dfc813d 2581
3bdb2591 2582 /*
3dfc813d 2583 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2584 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2585 * take them out of the 1:1 domain later.
2586 */
8fcc5372
CW
2587 if (!startup) {
2588 /*
2589 * If the device's dma_mask is less than the system's memory
2590 * size then this is not a candidate for identity mapping.
2591 */
3bdb2591 2592 u64 dma_mask = *dev->dma_mask;
8fcc5372 2593
3bdb2591
DW
2594 if (dev->coherent_dma_mask &&
2595 dev->coherent_dma_mask < dma_mask)
2596 dma_mask = dev->coherent_dma_mask;
8fcc5372 2597
3bdb2591 2598 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2599 }
6941af28
DW
2600
2601 return 1;
2602}
2603
cf04eee8
DW
2604static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2605{
2606 int ret;
2607
2608 if (!iommu_should_identity_map(dev, 1))
2609 return 0;
2610
2611 ret = domain_add_dev_info(si_domain, dev,
2612 hw ? CONTEXT_TT_PASS_THROUGH :
2613 CONTEXT_TT_MULTI_LEVEL);
2614 if (!ret)
2615 pr_info("IOMMU: %s identity mapping for device %s\n",
2616 hw ? "hardware" : "software", dev_name(dev));
2617 else if (ret == -ENODEV)
2618 /* device not associated with an iommu */
2619 ret = 0;
2620
2621 return ret;
2622}
2623
2624
071e1374 2625static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2626{
2c2e2c38 2627 struct pci_dev *pdev = NULL;
cf04eee8
DW
2628 struct dmar_drhd_unit *drhd;
2629 struct intel_iommu *iommu;
2630 struct device *dev;
2631 int i;
2632 int ret = 0;
2c2e2c38 2633
19943b0e 2634 ret = si_domain_init(hw);
2c2e2c38
FY
2635 if (ret)
2636 return -EFAULT;
2637
2c2e2c38 2638 for_each_pci_dev(pdev) {
cf04eee8
DW
2639 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2640 if (ret)
2641 return ret;
2642 }
2643
2644 for_each_active_iommu(iommu, drhd)
2645 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2646 struct acpi_device_physical_node *pn;
2647 struct acpi_device *adev;
2648
2649 if (dev->bus != &acpi_bus_type)
2650 continue;
2651
2652 adev= to_acpi_device(dev);
2653 mutex_lock(&adev->physical_node_lock);
2654 list_for_each_entry(pn, &adev->physical_node_list, node) {
2655 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2656 if (ret)
2657 break;
eae460b6 2658 }
cf04eee8
DW
2659 mutex_unlock(&adev->physical_node_lock);
2660 if (ret)
2661 return ret;
62edf5dc 2662 }
2c2e2c38
FY
2663
2664 return 0;
2665}
2666
b779260b 2667static int __init init_dmars(void)
ba395927
KA
2668{
2669 struct dmar_drhd_unit *drhd;
2670 struct dmar_rmrr_unit *rmrr;
832bd858 2671 struct device *dev;
ba395927 2672 struct intel_iommu *iommu;
9d783ba0 2673 int i, ret;
2c2e2c38 2674
ba395927
KA
2675 /*
2676 * for each drhd
2677 * allocate root
2678 * initialize and program root entry to not present
2679 * endfor
2680 */
2681 for_each_drhd_unit(drhd) {
5e0d2a6f 2682 /*
2683 * lock not needed as this is only incremented in the single
2684 * threaded kernel __init code path all other access are read
2685 * only
2686 */
1b198bb0
MT
2687 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2688 g_num_of_iommus++;
2689 continue;
2690 }
2691 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2692 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2693 }
2694
d9630fe9
WH
2695 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2696 GFP_KERNEL);
2697 if (!g_iommus) {
2698 printk(KERN_ERR "Allocating global iommu array failed\n");
2699 ret = -ENOMEM;
2700 goto error;
2701 }
2702
80b20dd8 2703 deferred_flush = kzalloc(g_num_of_iommus *
2704 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2705 if (!deferred_flush) {
5e0d2a6f 2706 ret = -ENOMEM;
989d51fc 2707 goto free_g_iommus;
5e0d2a6f 2708 }
2709
7c919779 2710 for_each_active_iommu(iommu, drhd) {
d9630fe9 2711 g_iommus[iommu->seq_id] = iommu;
ba395927 2712
e61d98d8
SS
2713 ret = iommu_init_domains(iommu);
2714 if (ret)
989d51fc 2715 goto free_iommu;
e61d98d8 2716
ba395927
KA
2717 /*
2718 * TBD:
2719 * we could share the same root & context tables
25985edc 2720 * among all IOMMU's. Need to Split it later.
ba395927
KA
2721 */
2722 ret = iommu_alloc_root_entry(iommu);
2723 if (ret) {
2724 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2725 goto free_iommu;
ba395927 2726 }
4ed0d3e6 2727 if (!ecap_pass_through(iommu->ecap))
19943b0e 2728 hw_pass_through = 0;
ba395927
KA
2729 }
2730
1531a6a6
SS
2731 /*
2732 * Start from the sane iommu hardware state.
2733 */
7c919779 2734 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2735 /*
2736 * If the queued invalidation is already initialized by us
2737 * (for example, while enabling interrupt-remapping) then
2738 * we got the things already rolling from a sane state.
2739 */
2740 if (iommu->qi)
2741 continue;
2742
2743 /*
2744 * Clear any previous faults.
2745 */
2746 dmar_fault(-1, iommu);
2747 /*
2748 * Disable queued invalidation if supported and already enabled
2749 * before OS handover.
2750 */
2751 dmar_disable_qi(iommu);
2752 }
2753
7c919779 2754 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2755 if (dmar_enable_qi(iommu)) {
2756 /*
2757 * Queued Invalidate not enabled, use Register Based
2758 * Invalidate
2759 */
2760 iommu->flush.flush_context = __iommu_flush_context;
2761 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2762 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2763 "invalidation\n",
680a7524 2764 iommu->seq_id,
b4e0f9eb 2765 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2766 } else {
2767 iommu->flush.flush_context = qi_flush_context;
2768 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2769 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2770 "invalidation\n",
680a7524 2771 iommu->seq_id,
b4e0f9eb 2772 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2773 }
2774 }
2775
19943b0e 2776 if (iommu_pass_through)
e0fc7e0b
DW
2777 iommu_identity_mapping |= IDENTMAP_ALL;
2778
d3f13810 2779#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2780 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2781#endif
e0fc7e0b
DW
2782
2783 check_tylersburg_isoch();
2784
ba395927 2785 /*
19943b0e
DW
2786 * If pass through is not set or not enabled, setup context entries for
2787 * identity mappings for rmrr, gfx, and isa and may fall back to static
2788 * identity mapping if iommu_identity_mapping is set.
ba395927 2789 */
19943b0e
DW
2790 if (iommu_identity_mapping) {
2791 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2792 if (ret) {
19943b0e 2793 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2794 goto free_iommu;
ba395927
KA
2795 }
2796 }
ba395927 2797 /*
19943b0e
DW
2798 * For each rmrr
2799 * for each dev attached to rmrr
2800 * do
2801 * locate drhd for dev, alloc domain for dev
2802 * allocate free domain
2803 * allocate page table entries for rmrr
2804 * if context not allocated for bus
2805 * allocate and init context
2806 * set present in root table for this bus
2807 * init context with domain, translation etc
2808 * endfor
2809 * endfor
ba395927 2810 */
19943b0e
DW
2811 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2812 for_each_rmrr_units(rmrr) {
b683b230
JL
2813 /* some BIOS lists non-exist devices in DMAR table. */
2814 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2815 i, dev) {
0b9d9753 2816 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2817 if (ret)
2818 printk(KERN_ERR
2819 "IOMMU: mapping reserved region failed\n");
ba395927 2820 }
4ed0d3e6 2821 }
49a0429e 2822
19943b0e
DW
2823 iommu_prepare_isa();
2824
ba395927
KA
2825 /*
2826 * for each drhd
2827 * enable fault log
2828 * global invalidate context cache
2829 * global invalidate iotlb
2830 * enable translation
2831 */
7c919779 2832 for_each_iommu(iommu, drhd) {
51a63e67
JC
2833 if (drhd->ignored) {
2834 /*
2835 * we always have to disable PMRs or DMA may fail on
2836 * this device
2837 */
2838 if (force_on)
7c919779 2839 iommu_disable_protect_mem_regions(iommu);
ba395927 2840 continue;
51a63e67 2841 }
ba395927
KA
2842
2843 iommu_flush_write_buffer(iommu);
2844
3460a6d9
KA
2845 ret = dmar_set_interrupt(iommu);
2846 if (ret)
989d51fc 2847 goto free_iommu;
3460a6d9 2848
ba395927
KA
2849 iommu_set_root_entry(iommu);
2850
4c25a2c1 2851 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2852 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2853
ba395927
KA
2854 ret = iommu_enable_translation(iommu);
2855 if (ret)
989d51fc 2856 goto free_iommu;
b94996c9
DW
2857
2858 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2859 }
2860
2861 return 0;
989d51fc
JL
2862
2863free_iommu:
7c919779 2864 for_each_active_iommu(iommu, drhd)
a868e6b7 2865 free_dmar_iommu(iommu);
9bdc531e 2866 kfree(deferred_flush);
989d51fc 2867free_g_iommus:
d9630fe9 2868 kfree(g_iommus);
989d51fc 2869error:
ba395927
KA
2870 return ret;
2871}
2872
5a5e02a6 2873/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2874static struct iova *intel_alloc_iova(struct device *dev,
2875 struct dmar_domain *domain,
2876 unsigned long nrpages, uint64_t dma_mask)
ba395927 2877{
ba395927 2878 struct iova *iova = NULL;
ba395927 2879
875764de
DW
2880 /* Restrict dma_mask to the width that the iommu can handle */
2881 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2882
2883 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2884 /*
2885 * First try to allocate an io virtual address in
284901a9 2886 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2887 * from higher range
ba395927 2888 */
875764de
DW
2889 iova = alloc_iova(&domain->iovad, nrpages,
2890 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2891 if (iova)
2892 return iova;
2893 }
2894 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2895 if (unlikely(!iova)) {
2896 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2897 nrpages, dev_name(dev));
f76aec76
KA
2898 return NULL;
2899 }
2900
2901 return iova;
2902}
2903
d4b709f4 2904static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2905{
2906 struct dmar_domain *domain;
2907 int ret;
2908
d4b709f4 2909 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2910 if (!domain) {
d4b709f4
DW
2911 printk(KERN_ERR "Allocating domain for %s failed",
2912 dev_name(dev));
4fe05bbc 2913 return NULL;
ba395927
KA
2914 }
2915
2916 /* make sure context mapping is ok */
d4b709f4
DW
2917 if (unlikely(!domain_context_mapped(dev))) {
2918 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2919 if (ret) {
d4b709f4
DW
2920 printk(KERN_ERR "Domain context map for %s failed",
2921 dev_name(dev));
4fe05bbc 2922 return NULL;
f76aec76 2923 }
ba395927
KA
2924 }
2925
f76aec76
KA
2926 return domain;
2927}
2928
d4b709f4 2929static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2930{
2931 struct device_domain_info *info;
2932
2933 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2934 info = dev->archdata.iommu;
147202aa
DW
2935 if (likely(info))
2936 return info->domain;
2937
2938 return __get_valid_domain_for_dev(dev);
2939}
2940
3d89194a 2941static int iommu_dummy(struct device *dev)
2c2e2c38 2942{
3d89194a 2943 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2944}
2945
ecb509ec 2946/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2947static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2948{
2949 int found;
2950
3d89194a 2951 if (iommu_dummy(dev))
1e4c64c4
DW
2952 return 1;
2953
2c2e2c38 2954 if (!iommu_identity_mapping)
1e4c64c4 2955 return 0;
2c2e2c38 2956
9b226624 2957 found = identity_mapping(dev);
2c2e2c38 2958 if (found) {
ecb509ec 2959 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
2960 return 1;
2961 else {
2962 /*
2963 * 32 bit DMA is removed from si_domain and fall back
2964 * to non-identity mapping.
2965 */
bf9c9eda 2966 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 2967 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 2968 dev_name(dev));
2c2e2c38
FY
2969 return 0;
2970 }
2971 } else {
2972 /*
2973 * In case of a detached 64 bit DMA device from vm, the device
2974 * is put into si_domain for identity mapping.
2975 */
ecb509ec 2976 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 2977 int ret;
5913c9bf 2978 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
2979 hw_pass_through ?
2980 CONTEXT_TT_PASS_THROUGH :
2981 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2982 if (!ret) {
2983 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 2984 dev_name(dev));
2c2e2c38
FY
2985 return 1;
2986 }
2987 }
2988 }
2989
1e4c64c4 2990 return 0;
2c2e2c38
FY
2991}
2992
5040a918 2993static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 2994 size_t size, int dir, u64 dma_mask)
f76aec76 2995{
f76aec76 2996 struct dmar_domain *domain;
5b6985ce 2997 phys_addr_t start_paddr;
f76aec76
KA
2998 struct iova *iova;
2999 int prot = 0;
6865f0d1 3000 int ret;
8c11e798 3001 struct intel_iommu *iommu;
33041ec0 3002 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3003
3004 BUG_ON(dir == DMA_NONE);
2c2e2c38 3005
5040a918 3006 if (iommu_no_mapping(dev))
6865f0d1 3007 return paddr;
f76aec76 3008
5040a918 3009 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3010 if (!domain)
3011 return 0;
3012
8c11e798 3013 iommu = domain_get_iommu(domain);
88cb6a74 3014 size = aligned_nrpages(paddr, size);
f76aec76 3015
5040a918 3016 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3017 if (!iova)
3018 goto error;
3019
ba395927
KA
3020 /*
3021 * Check if DMAR supports zero-length reads on write only
3022 * mappings..
3023 */
3024 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3025 !cap_zlr(iommu->cap))
ba395927
KA
3026 prot |= DMA_PTE_READ;
3027 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3028 prot |= DMA_PTE_WRITE;
3029 /*
6865f0d1 3030 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3031 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3032 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3033 * is not a big problem
3034 */
0ab36de2 3035 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3036 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3037 if (ret)
3038 goto error;
3039
1f0ef2aa
DW
3040 /* it's a non-present to present mapping. Only flush if caching mode */
3041 if (cap_caching_mode(iommu->cap))
ea8ea460 3042 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3043 else
8c11e798 3044 iommu_flush_write_buffer(iommu);
f76aec76 3045
03d6a246
DW
3046 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3047 start_paddr += paddr & ~PAGE_MASK;
3048 return start_paddr;
ba395927 3049
ba395927 3050error:
f76aec76
KA
3051 if (iova)
3052 __free_iova(&domain->iovad, iova);
4cf2e75d 3053 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3054 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3055 return 0;
3056}
3057
ffbbef5c
FT
3058static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3059 unsigned long offset, size_t size,
3060 enum dma_data_direction dir,
3061 struct dma_attrs *attrs)
bb9e6d65 3062{
ffbbef5c 3063 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3064 dir, *dev->dma_mask);
bb9e6d65
FT
3065}
3066
5e0d2a6f 3067static void flush_unmaps(void)
3068{
80b20dd8 3069 int i, j;
5e0d2a6f 3070
5e0d2a6f 3071 timer_on = 0;
3072
3073 /* just flush them all */
3074 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3075 struct intel_iommu *iommu = g_iommus[i];
3076 if (!iommu)
3077 continue;
c42d9f32 3078
9dd2fe89
YZ
3079 if (!deferred_flush[i].next)
3080 continue;
3081
78d5f0f5
NA
3082 /* In caching mode, global flushes turn emulation expensive */
3083 if (!cap_caching_mode(iommu->cap))
3084 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3085 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3086 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3087 unsigned long mask;
3088 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3089 struct dmar_domain *domain = deferred_flush[i].domain[j];
3090
3091 /* On real hardware multiple invalidations are expensive */
3092 if (cap_caching_mode(iommu->cap))
3093 iommu_flush_iotlb_psi(iommu, domain->id,
ea8ea460
DW
3094 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3095 !deferred_flush[i].freelist[j], 0);
78d5f0f5
NA
3096 else {
3097 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3098 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3099 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3100 }
93a23a72 3101 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3102 if (deferred_flush[i].freelist[j])
3103 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3104 }
9dd2fe89 3105 deferred_flush[i].next = 0;
5e0d2a6f 3106 }
3107
5e0d2a6f 3108 list_size = 0;
5e0d2a6f 3109}
3110
3111static void flush_unmaps_timeout(unsigned long data)
3112{
80b20dd8 3113 unsigned long flags;
3114
3115 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3116 flush_unmaps();
80b20dd8 3117 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3118}
3119
ea8ea460 3120static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3121{
3122 unsigned long flags;
80b20dd8 3123 int next, iommu_id;
8c11e798 3124 struct intel_iommu *iommu;
5e0d2a6f 3125
3126 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3127 if (list_size == HIGH_WATER_MARK)
3128 flush_unmaps();
3129
8c11e798
WH
3130 iommu = domain_get_iommu(dom);
3131 iommu_id = iommu->seq_id;
c42d9f32 3132
80b20dd8 3133 next = deferred_flush[iommu_id].next;
3134 deferred_flush[iommu_id].domain[next] = dom;
3135 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3136 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3137 deferred_flush[iommu_id].next++;
5e0d2a6f 3138
3139 if (!timer_on) {
3140 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3141 timer_on = 1;
3142 }
3143 list_size++;
3144 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3145}
3146
ffbbef5c
FT
3147static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3148 size_t size, enum dma_data_direction dir,
3149 struct dma_attrs *attrs)
ba395927 3150{
f76aec76 3151 struct dmar_domain *domain;
d794dc9b 3152 unsigned long start_pfn, last_pfn;
ba395927 3153 struct iova *iova;
8c11e798 3154 struct intel_iommu *iommu;
ea8ea460 3155 struct page *freelist;
ba395927 3156
73676832 3157 if (iommu_no_mapping(dev))
f76aec76 3158 return;
2c2e2c38 3159
1525a29a 3160 domain = find_domain(dev);
ba395927
KA
3161 BUG_ON(!domain);
3162
8c11e798
WH
3163 iommu = domain_get_iommu(domain);
3164
ba395927 3165 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3166 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3167 (unsigned long long)dev_addr))
ba395927 3168 return;
ba395927 3169
d794dc9b
DW
3170 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3171 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3172
d794dc9b 3173 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3174 dev_name(dev), start_pfn, last_pfn);
ba395927 3175
ea8ea460 3176 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3177
5e0d2a6f 3178 if (intel_iommu_strict) {
03d6a246 3179 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3180 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3181 /* free iova */
3182 __free_iova(&domain->iovad, iova);
ea8ea460 3183 dma_free_pagelist(freelist);
5e0d2a6f 3184 } else {
ea8ea460 3185 add_unmap(domain, iova, freelist);
5e0d2a6f 3186 /*
3187 * queue up the release of the unmap to save the 1/6th of the
3188 * cpu used up by the iotlb flush operation...
3189 */
5e0d2a6f 3190 }
ba395927
KA
3191}
3192
5040a918 3193static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3194 dma_addr_t *dma_handle, gfp_t flags,
3195 struct dma_attrs *attrs)
ba395927 3196{
36746436 3197 struct page *page = NULL;
ba395927
KA
3198 int order;
3199
5b6985ce 3200 size = PAGE_ALIGN(size);
ba395927 3201 order = get_order(size);
e8bb910d 3202
5040a918 3203 if (!iommu_no_mapping(dev))
e8bb910d 3204 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3205 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3206 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3207 flags |= GFP_DMA;
3208 else
3209 flags |= GFP_DMA32;
3210 }
ba395927 3211
36746436
AM
3212 if (flags & __GFP_WAIT) {
3213 unsigned int count = size >> PAGE_SHIFT;
3214
3215 page = dma_alloc_from_contiguous(dev, count, order);
3216 if (page && iommu_no_mapping(dev) &&
3217 page_to_phys(page) + size > dev->coherent_dma_mask) {
3218 dma_release_from_contiguous(dev, page, count);
3219 page = NULL;
3220 }
3221 }
3222
3223 if (!page)
3224 page = alloc_pages(flags, order);
3225 if (!page)
ba395927 3226 return NULL;
36746436 3227 memset(page_address(page), 0, size);
ba395927 3228
36746436 3229 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3230 DMA_BIDIRECTIONAL,
5040a918 3231 dev->coherent_dma_mask);
ba395927 3232 if (*dma_handle)
36746436
AM
3233 return page_address(page);
3234 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3235 __free_pages(page, order);
3236
ba395927
KA
3237 return NULL;
3238}
3239
5040a918 3240static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3241 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3242{
3243 int order;
36746436 3244 struct page *page = virt_to_page(vaddr);
ba395927 3245
5b6985ce 3246 size = PAGE_ALIGN(size);
ba395927
KA
3247 order = get_order(size);
3248
5040a918 3249 intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
36746436
AM
3250 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3251 __free_pages(page, order);
ba395927
KA
3252}
3253
5040a918 3254static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3255 int nelems, enum dma_data_direction dir,
3256 struct dma_attrs *attrs)
ba395927 3257{
ba395927 3258 struct dmar_domain *domain;
d794dc9b 3259 unsigned long start_pfn, last_pfn;
f76aec76 3260 struct iova *iova;
8c11e798 3261 struct intel_iommu *iommu;
ea8ea460 3262 struct page *freelist;
ba395927 3263
5040a918 3264 if (iommu_no_mapping(dev))
ba395927
KA
3265 return;
3266
5040a918 3267 domain = find_domain(dev);
8c11e798
WH
3268 BUG_ON(!domain);
3269
3270 iommu = domain_get_iommu(domain);
ba395927 3271
c03ab37c 3272 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3273 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3274 (unsigned long long)sglist[0].dma_address))
f76aec76 3275 return;
f76aec76 3276
d794dc9b
DW
3277 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3278 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76 3279
ea8ea460 3280 freelist = domain_unmap(domain, start_pfn, last_pfn);
f76aec76 3281
acea0018
DW
3282 if (intel_iommu_strict) {
3283 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3284 last_pfn - start_pfn + 1, !freelist, 0);
acea0018
DW
3285 /* free iova */
3286 __free_iova(&domain->iovad, iova);
ea8ea460 3287 dma_free_pagelist(freelist);
acea0018 3288 } else {
ea8ea460 3289 add_unmap(domain, iova, freelist);
acea0018
DW
3290 /*
3291 * queue up the release of the unmap to save the 1/6th of the
3292 * cpu used up by the iotlb flush operation...
3293 */
3294 }
ba395927
KA
3295}
3296
ba395927 3297static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3298 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3299{
3300 int i;
c03ab37c 3301 struct scatterlist *sg;
ba395927 3302
c03ab37c 3303 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3304 BUG_ON(!sg_page(sg));
4cf2e75d 3305 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3306 sg->dma_length = sg->length;
ba395927
KA
3307 }
3308 return nelems;
3309}
3310
5040a918 3311static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3312 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3313{
ba395927 3314 int i;
ba395927 3315 struct dmar_domain *domain;
f76aec76
KA
3316 size_t size = 0;
3317 int prot = 0;
f76aec76
KA
3318 struct iova *iova = NULL;
3319 int ret;
c03ab37c 3320 struct scatterlist *sg;
b536d24d 3321 unsigned long start_vpfn;
8c11e798 3322 struct intel_iommu *iommu;
ba395927
KA
3323
3324 BUG_ON(dir == DMA_NONE);
5040a918
DW
3325 if (iommu_no_mapping(dev))
3326 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3327
5040a918 3328 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3329 if (!domain)
3330 return 0;
3331
8c11e798
WH
3332 iommu = domain_get_iommu(domain);
3333
b536d24d 3334 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3335 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3336
5040a918
DW
3337 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3338 *dev->dma_mask);
f76aec76 3339 if (!iova) {
c03ab37c 3340 sglist->dma_length = 0;
f76aec76
KA
3341 return 0;
3342 }
3343
3344 /*
3345 * Check if DMAR supports zero-length reads on write only
3346 * mappings..
3347 */
3348 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3349 !cap_zlr(iommu->cap))
f76aec76
KA
3350 prot |= DMA_PTE_READ;
3351 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3352 prot |= DMA_PTE_WRITE;
3353
b536d24d 3354 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3355
f532959b 3356 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3357 if (unlikely(ret)) {
3358 /* clear the page */
3359 dma_pte_clear_range(domain, start_vpfn,
3360 start_vpfn + size - 1);
3361 /* free page tables */
3362 dma_pte_free_pagetable(domain, start_vpfn,
3363 start_vpfn + size - 1);
3364 /* free iova */
3365 __free_iova(&domain->iovad, iova);
3366 return 0;
ba395927
KA
3367 }
3368
1f0ef2aa
DW
3369 /* it's a non-present to present mapping. Only flush if caching mode */
3370 if (cap_caching_mode(iommu->cap))
ea8ea460 3371 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3372 else
8c11e798 3373 iommu_flush_write_buffer(iommu);
1f0ef2aa 3374
ba395927
KA
3375 return nelems;
3376}
3377
dfb805e8
FT
3378static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3379{
3380 return !dma_addr;
3381}
3382
160c1d8e 3383struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3384 .alloc = intel_alloc_coherent,
3385 .free = intel_free_coherent,
ba395927
KA
3386 .map_sg = intel_map_sg,
3387 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3388 .map_page = intel_map_page,
3389 .unmap_page = intel_unmap_page,
dfb805e8 3390 .mapping_error = intel_mapping_error,
ba395927
KA
3391};
3392
3393static inline int iommu_domain_cache_init(void)
3394{
3395 int ret = 0;
3396
3397 iommu_domain_cache = kmem_cache_create("iommu_domain",
3398 sizeof(struct dmar_domain),
3399 0,
3400 SLAB_HWCACHE_ALIGN,
3401
3402 NULL);
3403 if (!iommu_domain_cache) {
3404 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3405 ret = -ENOMEM;
3406 }
3407
3408 return ret;
3409}
3410
3411static inline int iommu_devinfo_cache_init(void)
3412{
3413 int ret = 0;
3414
3415 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3416 sizeof(struct device_domain_info),
3417 0,
3418 SLAB_HWCACHE_ALIGN,
ba395927
KA
3419 NULL);
3420 if (!iommu_devinfo_cache) {
3421 printk(KERN_ERR "Couldn't create devinfo cache\n");
3422 ret = -ENOMEM;
3423 }
3424
3425 return ret;
3426}
3427
3428static inline int iommu_iova_cache_init(void)
3429{
3430 int ret = 0;
3431
3432 iommu_iova_cache = kmem_cache_create("iommu_iova",
3433 sizeof(struct iova),
3434 0,
3435 SLAB_HWCACHE_ALIGN,
ba395927
KA
3436 NULL);
3437 if (!iommu_iova_cache) {
3438 printk(KERN_ERR "Couldn't create iova cache\n");
3439 ret = -ENOMEM;
3440 }
3441
3442 return ret;
3443}
3444
3445static int __init iommu_init_mempool(void)
3446{
3447 int ret;
3448 ret = iommu_iova_cache_init();
3449 if (ret)
3450 return ret;
3451
3452 ret = iommu_domain_cache_init();
3453 if (ret)
3454 goto domain_error;
3455
3456 ret = iommu_devinfo_cache_init();
3457 if (!ret)
3458 return ret;
3459
3460 kmem_cache_destroy(iommu_domain_cache);
3461domain_error:
3462 kmem_cache_destroy(iommu_iova_cache);
3463
3464 return -ENOMEM;
3465}
3466
3467static void __init iommu_exit_mempool(void)
3468{
3469 kmem_cache_destroy(iommu_devinfo_cache);
3470 kmem_cache_destroy(iommu_domain_cache);
3471 kmem_cache_destroy(iommu_iova_cache);
3472
3473}
3474
556ab45f
DW
3475static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3476{
3477 struct dmar_drhd_unit *drhd;
3478 u32 vtbar;
3479 int rc;
3480
3481 /* We know that this device on this chipset has its own IOMMU.
3482 * If we find it under a different IOMMU, then the BIOS is lying
3483 * to us. Hope that the IOMMU for this device is actually
3484 * disabled, and it needs no translation...
3485 */
3486 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3487 if (rc) {
3488 /* "can't" happen */
3489 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3490 return;
3491 }
3492 vtbar &= 0xffff0000;
3493
3494 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3495 drhd = dmar_find_matched_drhd_unit(pdev);
3496 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3497 TAINT_FIRMWARE_WORKAROUND,
3498 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3499 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3500}
3501DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3502
ba395927
KA
3503static void __init init_no_remapping_devices(void)
3504{
3505 struct dmar_drhd_unit *drhd;
832bd858 3506 struct device *dev;
b683b230 3507 int i;
ba395927
KA
3508
3509 for_each_drhd_unit(drhd) {
3510 if (!drhd->include_all) {
b683b230
JL
3511 for_each_active_dev_scope(drhd->devices,
3512 drhd->devices_cnt, i, dev)
3513 break;
832bd858 3514 /* ignore DMAR unit if no devices exist */
ba395927
KA
3515 if (i == drhd->devices_cnt)
3516 drhd->ignored = 1;
3517 }
3518 }
3519
7c919779 3520 for_each_active_drhd_unit(drhd) {
7c919779 3521 if (drhd->include_all)
ba395927
KA
3522 continue;
3523
b683b230
JL
3524 for_each_active_dev_scope(drhd->devices,
3525 drhd->devices_cnt, i, dev)
832bd858 3526 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3527 break;
ba395927
KA
3528 if (i < drhd->devices_cnt)
3529 continue;
3530
c0771df8
DW
3531 /* This IOMMU has *only* gfx devices. Either bypass it or
3532 set the gfx_mapped flag, as appropriate */
3533 if (dmar_map_gfx) {
3534 intel_iommu_gfx_mapped = 1;
3535 } else {
3536 drhd->ignored = 1;
b683b230
JL
3537 for_each_active_dev_scope(drhd->devices,
3538 drhd->devices_cnt, i, dev)
832bd858 3539 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3540 }
3541 }
3542}
3543
f59c7b69
FY
3544#ifdef CONFIG_SUSPEND
3545static int init_iommu_hw(void)
3546{
3547 struct dmar_drhd_unit *drhd;
3548 struct intel_iommu *iommu = NULL;
3549
3550 for_each_active_iommu(iommu, drhd)
3551 if (iommu->qi)
3552 dmar_reenable_qi(iommu);
3553
b779260b
JC
3554 for_each_iommu(iommu, drhd) {
3555 if (drhd->ignored) {
3556 /*
3557 * we always have to disable PMRs or DMA may fail on
3558 * this device
3559 */
3560 if (force_on)
3561 iommu_disable_protect_mem_regions(iommu);
3562 continue;
3563 }
3564
f59c7b69
FY
3565 iommu_flush_write_buffer(iommu);
3566
3567 iommu_set_root_entry(iommu);
3568
3569 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3570 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3571 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3572 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3573 if (iommu_enable_translation(iommu))
3574 return 1;
b94996c9 3575 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3576 }
3577
3578 return 0;
3579}
3580
3581static void iommu_flush_all(void)
3582{
3583 struct dmar_drhd_unit *drhd;
3584 struct intel_iommu *iommu;
3585
3586 for_each_active_iommu(iommu, drhd) {
3587 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3588 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3589 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3590 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3591 }
3592}
3593
134fac3f 3594static int iommu_suspend(void)
f59c7b69
FY
3595{
3596 struct dmar_drhd_unit *drhd;
3597 struct intel_iommu *iommu = NULL;
3598 unsigned long flag;
3599
3600 for_each_active_iommu(iommu, drhd) {
3601 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3602 GFP_ATOMIC);
3603 if (!iommu->iommu_state)
3604 goto nomem;
3605 }
3606
3607 iommu_flush_all();
3608
3609 for_each_active_iommu(iommu, drhd) {
3610 iommu_disable_translation(iommu);
3611
1f5b3c3f 3612 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3613
3614 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3615 readl(iommu->reg + DMAR_FECTL_REG);
3616 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3617 readl(iommu->reg + DMAR_FEDATA_REG);
3618 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3619 readl(iommu->reg + DMAR_FEADDR_REG);
3620 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3621 readl(iommu->reg + DMAR_FEUADDR_REG);
3622
1f5b3c3f 3623 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3624 }
3625 return 0;
3626
3627nomem:
3628 for_each_active_iommu(iommu, drhd)
3629 kfree(iommu->iommu_state);
3630
3631 return -ENOMEM;
3632}
3633
134fac3f 3634static void iommu_resume(void)
f59c7b69
FY
3635{
3636 struct dmar_drhd_unit *drhd;
3637 struct intel_iommu *iommu = NULL;
3638 unsigned long flag;
3639
3640 if (init_iommu_hw()) {
b779260b
JC
3641 if (force_on)
3642 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3643 else
3644 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3645 return;
f59c7b69
FY
3646 }
3647
3648 for_each_active_iommu(iommu, drhd) {
3649
1f5b3c3f 3650 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3651
3652 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3653 iommu->reg + DMAR_FECTL_REG);
3654 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3655 iommu->reg + DMAR_FEDATA_REG);
3656 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3657 iommu->reg + DMAR_FEADDR_REG);
3658 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3659 iommu->reg + DMAR_FEUADDR_REG);
3660
1f5b3c3f 3661 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3662 }
3663
3664 for_each_active_iommu(iommu, drhd)
3665 kfree(iommu->iommu_state);
f59c7b69
FY
3666}
3667
134fac3f 3668static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3669 .resume = iommu_resume,
3670 .suspend = iommu_suspend,
3671};
3672
134fac3f 3673static void __init init_iommu_pm_ops(void)
f59c7b69 3674{
134fac3f 3675 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3676}
3677
3678#else
99592ba4 3679static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3680#endif /* CONFIG_PM */
3681
318fe7df
SS
3682
3683int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3684{
3685 struct acpi_dmar_reserved_memory *rmrr;
3686 struct dmar_rmrr_unit *rmrru;
3687
3688 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3689 if (!rmrru)
3690 return -ENOMEM;
3691
3692 rmrru->hdr = header;
3693 rmrr = (struct acpi_dmar_reserved_memory *)header;
3694 rmrru->base_address = rmrr->base_address;
3695 rmrru->end_address = rmrr->end_address;
2e455289
JL
3696 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3697 ((void *)rmrr) + rmrr->header.length,
3698 &rmrru->devices_cnt);
3699 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3700 kfree(rmrru);
3701 return -ENOMEM;
3702 }
318fe7df 3703
2e455289 3704 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3705
2e455289 3706 return 0;
318fe7df
SS
3707}
3708
318fe7df
SS
3709int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3710{
3711 struct acpi_dmar_atsr *atsr;
3712 struct dmar_atsr_unit *atsru;
3713
3714 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3715 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3716 if (!atsru)
3717 return -ENOMEM;
3718
3719 atsru->hdr = hdr;
3720 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3721 if (!atsru->include_all) {
3722 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3723 (void *)atsr + atsr->header.length,
3724 &atsru->devices_cnt);
3725 if (atsru->devices_cnt && atsru->devices == NULL) {
3726 kfree(atsru);
3727 return -ENOMEM;
3728 }
3729 }
318fe7df 3730
0e242612 3731 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3732
3733 return 0;
3734}
3735
9bdc531e
JL
3736static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3737{
3738 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3739 kfree(atsru);
3740}
3741
3742static void intel_iommu_free_dmars(void)
3743{
3744 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3745 struct dmar_atsr_unit *atsru, *atsr_n;
3746
3747 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3748 list_del(&rmrru->list);
3749 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3750 kfree(rmrru);
318fe7df
SS
3751 }
3752
9bdc531e
JL
3753 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3754 list_del(&atsru->list);
3755 intel_iommu_free_atsr(atsru);
3756 }
318fe7df
SS
3757}
3758
3759int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3760{
b683b230 3761 int i, ret = 1;
318fe7df 3762 struct pci_bus *bus;
832bd858
DW
3763 struct pci_dev *bridge = NULL;
3764 struct device *tmp;
318fe7df
SS
3765 struct acpi_dmar_atsr *atsr;
3766 struct dmar_atsr_unit *atsru;
3767
3768 dev = pci_physfn(dev);
318fe7df 3769 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3770 bridge = bus->self;
318fe7df 3771 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3772 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3773 return 0;
b5f82ddf 3774 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3775 break;
318fe7df 3776 }
b5f82ddf
JL
3777 if (!bridge)
3778 return 0;
318fe7df 3779
0e242612 3780 rcu_read_lock();
b5f82ddf
JL
3781 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3782 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3783 if (atsr->segment != pci_domain_nr(dev->bus))
3784 continue;
3785
b683b230 3786 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3787 if (tmp == &bridge->dev)
b683b230 3788 goto out;
b5f82ddf
JL
3789
3790 if (atsru->include_all)
b683b230 3791 goto out;
b5f82ddf 3792 }
b683b230
JL
3793 ret = 0;
3794out:
0e242612 3795 rcu_read_unlock();
318fe7df 3796
b683b230 3797 return ret;
318fe7df
SS
3798}
3799
59ce0515
JL
3800int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3801{
3802 int ret = 0;
3803 struct dmar_rmrr_unit *rmrru;
3804 struct dmar_atsr_unit *atsru;
3805 struct acpi_dmar_atsr *atsr;
3806 struct acpi_dmar_reserved_memory *rmrr;
3807
3808 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3809 return 0;
3810
3811 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3812 rmrr = container_of(rmrru->hdr,
3813 struct acpi_dmar_reserved_memory, header);
3814 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3815 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3816 ((void *)rmrr) + rmrr->header.length,
3817 rmrr->segment, rmrru->devices,
3818 rmrru->devices_cnt);
27e24950 3819 if(ret < 0)
59ce0515
JL
3820 return ret;
3821 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3822 dmar_remove_dev_scope(info, rmrr->segment,
3823 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3824 }
3825 }
3826
3827 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3828 if (atsru->include_all)
3829 continue;
3830
3831 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3832 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3833 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3834 (void *)atsr + atsr->header.length,
3835 atsr->segment, atsru->devices,
3836 atsru->devices_cnt);
3837 if (ret > 0)
3838 break;
3839 else if(ret < 0)
3840 return ret;
3841 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3842 if (dmar_remove_dev_scope(info, atsr->segment,
3843 atsru->devices, atsru->devices_cnt))
3844 break;
3845 }
3846 }
3847
3848 return 0;
3849}
3850
99dcaded
FY
3851/*
3852 * Here we only respond to action of unbound device from driver.
3853 *
3854 * Added device is not attached to its DMAR domain here yet. That will happen
3855 * when mapping the device to iova.
3856 */
3857static int device_notifier(struct notifier_block *nb,
3858 unsigned long action, void *data)
3859{
3860 struct device *dev = data;
99dcaded
FY
3861 struct dmar_domain *domain;
3862
3d89194a 3863 if (iommu_dummy(dev))
44cd613c
DW
3864 return 0;
3865
7e7dfab7
JL
3866 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3867 action != BUS_NOTIFY_DEL_DEVICE)
3868 return 0;
3869
1525a29a 3870 domain = find_domain(dev);
99dcaded
FY
3871 if (!domain)
3872 return 0;
3873
3a5670e8 3874 down_read(&dmar_global_lock);
bf9c9eda 3875 domain_remove_one_dev_info(domain, dev);
7e7dfab7
JL
3876 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3877 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3878 list_empty(&domain->devices))
3879 domain_exit(domain);
3a5670e8 3880 up_read(&dmar_global_lock);
a97590e5 3881
99dcaded
FY
3882 return 0;
3883}
3884
3885static struct notifier_block device_nb = {
3886 .notifier_call = device_notifier,
3887};
3888
75f05569
JL
3889static int intel_iommu_memory_notifier(struct notifier_block *nb,
3890 unsigned long val, void *v)
3891{
3892 struct memory_notify *mhp = v;
3893 unsigned long long start, end;
3894 unsigned long start_vpfn, last_vpfn;
3895
3896 switch (val) {
3897 case MEM_GOING_ONLINE:
3898 start = mhp->start_pfn << PAGE_SHIFT;
3899 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3900 if (iommu_domain_identity_map(si_domain, start, end)) {
3901 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3902 start, end);
3903 return NOTIFY_BAD;
3904 }
3905 break;
3906
3907 case MEM_OFFLINE:
3908 case MEM_CANCEL_ONLINE:
3909 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3910 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3911 while (start_vpfn <= last_vpfn) {
3912 struct iova *iova;
3913 struct dmar_drhd_unit *drhd;
3914 struct intel_iommu *iommu;
ea8ea460 3915 struct page *freelist;
75f05569
JL
3916
3917 iova = find_iova(&si_domain->iovad, start_vpfn);
3918 if (iova == NULL) {
3919 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3920 start_vpfn);
3921 break;
3922 }
3923
3924 iova = split_and_remove_iova(&si_domain->iovad, iova,
3925 start_vpfn, last_vpfn);
3926 if (iova == NULL) {
3927 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3928 start_vpfn, last_vpfn);
3929 return NOTIFY_BAD;
3930 }
3931
ea8ea460
DW
3932 freelist = domain_unmap(si_domain, iova->pfn_lo,
3933 iova->pfn_hi);
3934
75f05569
JL
3935 rcu_read_lock();
3936 for_each_active_iommu(iommu, drhd)
3937 iommu_flush_iotlb_psi(iommu, si_domain->id,
3938 iova->pfn_lo,
ea8ea460
DW
3939 iova->pfn_hi - iova->pfn_lo + 1,
3940 !freelist, 0);
75f05569 3941 rcu_read_unlock();
ea8ea460 3942 dma_free_pagelist(freelist);
75f05569
JL
3943
3944 start_vpfn = iova->pfn_hi + 1;
3945 free_iova_mem(iova);
3946 }
3947 break;
3948 }
3949
3950 return NOTIFY_OK;
3951}
3952
3953static struct notifier_block intel_iommu_memory_nb = {
3954 .notifier_call = intel_iommu_memory_notifier,
3955 .priority = 0
3956};
3957
ba395927
KA
3958int __init intel_iommu_init(void)
3959{
9bdc531e 3960 int ret = -ENODEV;
3a93c841 3961 struct dmar_drhd_unit *drhd;
7c919779 3962 struct intel_iommu *iommu;
ba395927 3963
a59b50e9
JC
3964 /* VT-d is required for a TXT/tboot launch, so enforce that */
3965 force_on = tboot_force_iommu();
3966
3a5670e8
JL
3967 if (iommu_init_mempool()) {
3968 if (force_on)
3969 panic("tboot: Failed to initialize iommu memory\n");
3970 return -ENOMEM;
3971 }
3972
3973 down_write(&dmar_global_lock);
a59b50e9
JC
3974 if (dmar_table_init()) {
3975 if (force_on)
3976 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 3977 goto out_free_dmar;
a59b50e9 3978 }
ba395927 3979
3a93c841
TI
3980 /*
3981 * Disable translation if already enabled prior to OS handover.
3982 */
7c919779 3983 for_each_active_iommu(iommu, drhd)
3a93c841
TI
3984 if (iommu->gcmd & DMA_GCMD_TE)
3985 iommu_disable_translation(iommu);
3a93c841 3986
c2c7286a 3987 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3988 if (force_on)
3989 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 3990 goto out_free_dmar;
a59b50e9 3991 }
1886e8a9 3992
75f1cdf1 3993 if (no_iommu || dmar_disabled)
9bdc531e 3994 goto out_free_dmar;
2ae21010 3995
318fe7df
SS
3996 if (list_empty(&dmar_rmrr_units))
3997 printk(KERN_INFO "DMAR: No RMRR found\n");
3998
3999 if (list_empty(&dmar_atsr_units))
4000 printk(KERN_INFO "DMAR: No ATSR found\n");
4001
51a63e67
JC
4002 if (dmar_init_reserved_ranges()) {
4003 if (force_on)
4004 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4005 goto out_free_reserved_range;
51a63e67 4006 }
ba395927
KA
4007
4008 init_no_remapping_devices();
4009
b779260b 4010 ret = init_dmars();
ba395927 4011 if (ret) {
a59b50e9
JC
4012 if (force_on)
4013 panic("tboot: Failed to initialize DMARs\n");
ba395927 4014 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4015 goto out_free_reserved_range;
ba395927 4016 }
3a5670e8 4017 up_write(&dmar_global_lock);
ba395927
KA
4018 printk(KERN_INFO
4019 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4020
5e0d2a6f 4021 init_timer(&unmap_timer);
75f1cdf1
FT
4022#ifdef CONFIG_SWIOTLB
4023 swiotlb = 0;
4024#endif
19943b0e 4025 dma_ops = &intel_dma_ops;
4ed0d3e6 4026
134fac3f 4027 init_iommu_pm_ops();
a8bcbb0d 4028
4236d97d 4029 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4030 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4031 if (si_domain && !hw_pass_through)
4032 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4033
8bc1f85c
ED
4034 intel_iommu_enabled = 1;
4035
ba395927 4036 return 0;
9bdc531e
JL
4037
4038out_free_reserved_range:
4039 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4040out_free_dmar:
4041 intel_iommu_free_dmars();
3a5670e8
JL
4042 up_write(&dmar_global_lock);
4043 iommu_exit_mempool();
9bdc531e 4044 return ret;
ba395927 4045}
e820482c 4046
3199aa6b 4047static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4048 struct device *dev)
3199aa6b 4049{
0bcb3e28 4050 struct pci_dev *tmp, *parent, *pdev;
3199aa6b 4051
0bcb3e28 4052 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4053 return;
4054
0bcb3e28
DW
4055 pdev = to_pci_dev(dev);
4056
3199aa6b
HW
4057 /* dependent device detach */
4058 tmp = pci_find_upstream_pcie_bridge(pdev);
4059 /* Secondary interface's bus number and devfn 0 */
4060 if (tmp) {
4061 parent = pdev->bus->self;
4062 while (parent != tmp) {
4063 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 4064 parent->devfn);
3199aa6b
HW
4065 parent = parent->bus->self;
4066 }
45e829ea 4067 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
4068 iommu_detach_dev(iommu,
4069 tmp->subordinate->number, 0);
4070 else /* this is a legacy PCI bridge */
276dbf99
DW
4071 iommu_detach_dev(iommu, tmp->bus->number,
4072 tmp->devfn);
3199aa6b
HW
4073 }
4074}
4075
2c2e2c38 4076static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4077 struct device *dev)
c7151a8d 4078{
bca2b916 4079 struct device_domain_info *info, *tmp;
c7151a8d
WH
4080 struct intel_iommu *iommu;
4081 unsigned long flags;
4082 int found = 0;
156baca8 4083 u8 bus, devfn;
c7151a8d 4084
bf9c9eda 4085 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4086 if (!iommu)
4087 return;
4088
4089 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4090 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4091 if (info->iommu == iommu && info->bus == bus &&
4092 info->devfn == devfn) {
109b9b04 4093 unlink_domain_info(info);
c7151a8d
WH
4094 spin_unlock_irqrestore(&device_domain_lock, flags);
4095
93a23a72 4096 iommu_disable_dev_iotlb(info);
c7151a8d 4097 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4098 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4099 free_devinfo_mem(info);
4100
4101 spin_lock_irqsave(&device_domain_lock, flags);
4102
4103 if (found)
4104 break;
4105 else
4106 continue;
4107 }
4108
4109 /* if there is no other devices under the same iommu
4110 * owned by this domain, clear this iommu in iommu_bmp
4111 * update iommu count and coherency
4112 */
8bbc4410 4113 if (info->iommu == iommu)
c7151a8d
WH
4114 found = 1;
4115 }
4116
3e7abe25
RD
4117 spin_unlock_irqrestore(&device_domain_lock, flags);
4118
c7151a8d
WH
4119 if (found == 0) {
4120 unsigned long tmp_flags;
4121 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 4122 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 4123 domain->iommu_count--;
58c610bd 4124 domain_update_iommu_cap(domain);
c7151a8d 4125 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 4126
9b4554b2
AW
4127 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
4128 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
4129 spin_lock_irqsave(&iommu->lock, tmp_flags);
4130 clear_bit(domain->id, iommu->domain_ids);
4131 iommu->domains[domain->id] = NULL;
4132 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
4133 }
c7151a8d 4134 }
c7151a8d
WH
4135}
4136
2c2e2c38 4137static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4138{
4139 int adjust_width;
4140
4141 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4142 domain_reserve_special_ranges(domain);
4143
4144 /* calculate AGAW */
4145 domain->gaw = guest_width;
4146 adjust_width = guestwidth_to_adjustwidth(guest_width);
4147 domain->agaw = width_to_agaw(adjust_width);
4148
5e98c4b1 4149 domain->iommu_coherency = 0;
c5b15255 4150 domain->iommu_snooping = 0;
6dd9a7c7 4151 domain->iommu_superpage = 0;
fe40f1e0 4152 domain->max_addr = 0;
4c923d47 4153 domain->nid = -1;
5e98c4b1
WH
4154
4155 /* always allocate the top pgd */
4c923d47 4156 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4157 if (!domain->pgd)
4158 return -ENOMEM;
4159 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4160 return 0;
4161}
4162
5d450806 4163static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4164{
5d450806 4165 struct dmar_domain *dmar_domain;
38717946 4166
92d03cc8 4167 dmar_domain = alloc_domain(true);
5d450806 4168 if (!dmar_domain) {
38717946 4169 printk(KERN_ERR
5d450806
JR
4170 "intel_iommu_domain_init: dmar_domain == NULL\n");
4171 return -ENOMEM;
38717946 4172 }
2c2e2c38 4173 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4174 printk(KERN_ERR
5d450806 4175 "intel_iommu_domain_init() failed\n");
92d03cc8 4176 domain_exit(dmar_domain);
5d450806 4177 return -ENOMEM;
38717946 4178 }
8140a95d 4179 domain_update_iommu_cap(dmar_domain);
5d450806 4180 domain->priv = dmar_domain;
faa3d6f5 4181
8a0e715b
JR
4182 domain->geometry.aperture_start = 0;
4183 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4184 domain->geometry.force_aperture = true;
4185
5d450806 4186 return 0;
38717946 4187}
38717946 4188
5d450806 4189static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4190{
5d450806
JR
4191 struct dmar_domain *dmar_domain = domain->priv;
4192
4193 domain->priv = NULL;
92d03cc8 4194 domain_exit(dmar_domain);
38717946 4195}
38717946 4196
4c5478c9
JR
4197static int intel_iommu_attach_device(struct iommu_domain *domain,
4198 struct device *dev)
38717946 4199{
4c5478c9 4200 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4201 struct intel_iommu *iommu;
4202 int addr_width;
156baca8 4203 u8 bus, devfn;
faa3d6f5 4204
7207d8f9
DW
4205 /* normally dev is not mapped */
4206 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4207 struct dmar_domain *old_domain;
4208
1525a29a 4209 old_domain = find_domain(dev);
faa3d6f5 4210 if (old_domain) {
2c2e2c38
FY
4211 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4212 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
bf9c9eda 4213 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4214 else
4215 domain_remove_dev_info(old_domain);
4216 }
4217 }
4218
156baca8 4219 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4220 if (!iommu)
4221 return -ENODEV;
4222
4223 /* check if this iommu agaw is sufficient for max mapped address */
4224 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4225 if (addr_width > cap_mgaw(iommu->cap))
4226 addr_width = cap_mgaw(iommu->cap);
4227
4228 if (dmar_domain->max_addr > (1LL << addr_width)) {
4229 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4230 "sufficient for the mapped address (%llx)\n",
a99c47a2 4231 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4232 return -EFAULT;
4233 }
a99c47a2
TL
4234 dmar_domain->gaw = addr_width;
4235
4236 /*
4237 * Knock out extra levels of page tables if necessary
4238 */
4239 while (iommu->agaw < dmar_domain->agaw) {
4240 struct dma_pte *pte;
4241
4242 pte = dmar_domain->pgd;
4243 if (dma_pte_present(pte)) {
25cbff16
SY
4244 dmar_domain->pgd = (struct dma_pte *)
4245 phys_to_virt(dma_pte_addr(pte));
7a661013 4246 free_pgtable_page(pte);
a99c47a2
TL
4247 }
4248 dmar_domain->agaw--;
4249 }
fe40f1e0 4250
5913c9bf 4251 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4252}
38717946 4253
4c5478c9
JR
4254static void intel_iommu_detach_device(struct iommu_domain *domain,
4255 struct device *dev)
38717946 4256{
4c5478c9 4257 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4258
bf9c9eda 4259 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4260}
c7151a8d 4261
b146a1c9
JR
4262static int intel_iommu_map(struct iommu_domain *domain,
4263 unsigned long iova, phys_addr_t hpa,
5009065d 4264 size_t size, int iommu_prot)
faa3d6f5 4265{
dde57a21 4266 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4267 u64 max_addr;
dde57a21 4268 int prot = 0;
faa3d6f5 4269 int ret;
fe40f1e0 4270
dde57a21
JR
4271 if (iommu_prot & IOMMU_READ)
4272 prot |= DMA_PTE_READ;
4273 if (iommu_prot & IOMMU_WRITE)
4274 prot |= DMA_PTE_WRITE;
9cf06697
SY
4275 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4276 prot |= DMA_PTE_SNP;
dde57a21 4277
163cc52c 4278 max_addr = iova + size;
dde57a21 4279 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4280 u64 end;
4281
4282 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4283 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4284 if (end < max_addr) {
8954da1f 4285 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4286 "sufficient for the mapped address (%llx)\n",
8954da1f 4287 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4288 return -EFAULT;
4289 }
dde57a21 4290 dmar_domain->max_addr = max_addr;
fe40f1e0 4291 }
ad051221
DW
4292 /* Round up size to next multiple of PAGE_SIZE, if it and
4293 the low bits of hpa would take us onto the next page */
88cb6a74 4294 size = aligned_nrpages(hpa, size);
ad051221
DW
4295 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4296 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4297 return ret;
38717946 4298}
38717946 4299
5009065d 4300static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4301 unsigned long iova, size_t size)
38717946 4302{
dde57a21 4303 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4304 struct page *freelist = NULL;
4305 struct intel_iommu *iommu;
4306 unsigned long start_pfn, last_pfn;
4307 unsigned int npages;
4308 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4309
4310 /* Cope with horrid API which requires us to unmap more than the
4311 size argument if it happens to be a large-page mapping. */
4312 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4313 BUG();
4314
4315 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4316 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4317
ea8ea460
DW
4318 start_pfn = iova >> VTD_PAGE_SHIFT;
4319 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4320
4321 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4322
4323 npages = last_pfn - start_pfn + 1;
4324
4325 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4326 iommu = g_iommus[iommu_id];
4327
4328 /*
4329 * find bit position of dmar_domain
4330 */
4331 ndomains = cap_ndoms(iommu->cap);
4332 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4333 if (iommu->domains[num] == dmar_domain)
4334 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4335 npages, !freelist, 0);
4336 }
4337
4338 }
4339
4340 dma_free_pagelist(freelist);
fe40f1e0 4341
163cc52c
DW
4342 if (dmar_domain->max_addr == iova + size)
4343 dmar_domain->max_addr = iova;
b146a1c9 4344
5cf0a76f 4345 return size;
38717946 4346}
38717946 4347
d14d6577 4348static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4349 dma_addr_t iova)
38717946 4350{
d14d6577 4351 struct dmar_domain *dmar_domain = domain->priv;
38717946 4352 struct dma_pte *pte;
5cf0a76f 4353 int level = 0;
faa3d6f5 4354 u64 phys = 0;
38717946 4355
5cf0a76f 4356 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4357 if (pte)
faa3d6f5 4358 phys = dma_pte_addr(pte);
38717946 4359
faa3d6f5 4360 return phys;
38717946 4361}
a8bcbb0d 4362
dbb9fd86
SY
4363static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4364 unsigned long cap)
4365{
4366 struct dmar_domain *dmar_domain = domain->priv;
4367
4368 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4369 return dmar_domain->iommu_snooping;
323f99cb 4370 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4371 return irq_remapping_enabled;
dbb9fd86
SY
4372
4373 return 0;
4374}
4375
783f157b 4376#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
70ae6f0d 4377
abdfdde2
AW
4378static int intel_iommu_add_device(struct device *dev)
4379{
4380 struct pci_dev *pdev = to_pci_dev(dev);
3da4af0a 4381 struct pci_dev *bridge, *dma_pdev = NULL;
abdfdde2
AW
4382 struct iommu_group *group;
4383 int ret;
156baca8 4384 u8 bus, devfn;
70ae6f0d 4385
156baca8 4386 if (!device_to_iommu(dev, &bus, &devfn))
70ae6f0d
AW
4387 return -ENODEV;
4388
4389 bridge = pci_find_upstream_pcie_bridge(pdev);
4390 if (bridge) {
abdfdde2
AW
4391 if (pci_is_pcie(bridge))
4392 dma_pdev = pci_get_domain_bus_and_slot(
4393 pci_domain_nr(pdev->bus),
4394 bridge->subordinate->number, 0);
3da4af0a 4395 if (!dma_pdev)
abdfdde2
AW
4396 dma_pdev = pci_dev_get(bridge);
4397 } else
4398 dma_pdev = pci_dev_get(pdev);
4399
a4ff1fc2 4400 /* Account for quirked devices */
783f157b
AW
4401 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4402
a4ff1fc2
AW
4403 /*
4404 * If it's a multifunction device that does not support our
c14d2690
AW
4405 * required ACS flags, add to the same group as lowest numbered
4406 * function that also does not suport the required ACS flags.
a4ff1fc2 4407 */
783f157b 4408 if (dma_pdev->multifunction &&
c14d2690
AW
4409 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4410 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4411
4412 for (i = 0; i < 8; i++) {
4413 struct pci_dev *tmp;
4414
4415 tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4416 if (!tmp)
4417 continue;
4418
4419 if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4420 swap_pci_ref(&dma_pdev, tmp);
4421 break;
4422 }
4423 pci_dev_put(tmp);
4424 }
4425 }
783f157b 4426
a4ff1fc2
AW
4427 /*
4428 * Devices on the root bus go through the iommu. If that's not us,
4429 * find the next upstream device and test ACS up to the root bus.
4430 * Finding the next device may require skipping virtual buses.
4431 */
783f157b 4432 while (!pci_is_root_bus(dma_pdev->bus)) {
a4ff1fc2
AW
4433 struct pci_bus *bus = dma_pdev->bus;
4434
4435 while (!bus->self) {
4436 if (!pci_is_root_bus(bus))
4437 bus = bus->parent;
4438 else
4439 goto root_bus;
4440 }
4441
4442 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
783f157b
AW
4443 break;
4444
a4ff1fc2 4445 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
783f157b
AW
4446 }
4447
a4ff1fc2 4448root_bus:
abdfdde2
AW
4449 group = iommu_group_get(&dma_pdev->dev);
4450 pci_dev_put(dma_pdev);
4451 if (!group) {
4452 group = iommu_group_alloc();
4453 if (IS_ERR(group))
4454 return PTR_ERR(group);
70ae6f0d
AW
4455 }
4456
abdfdde2 4457 ret = iommu_group_add_device(group, dev);
bcb71abe 4458
abdfdde2
AW
4459 iommu_group_put(group);
4460 return ret;
4461}
70ae6f0d 4462
abdfdde2
AW
4463static void intel_iommu_remove_device(struct device *dev)
4464{
4465 iommu_group_remove_device(dev);
70ae6f0d
AW
4466}
4467
a8bcbb0d
JR
4468static struct iommu_ops intel_iommu_ops = {
4469 .domain_init = intel_iommu_domain_init,
4470 .domain_destroy = intel_iommu_domain_destroy,
4471 .attach_dev = intel_iommu_attach_device,
4472 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4473 .map = intel_iommu_map,
4474 .unmap = intel_iommu_unmap,
a8bcbb0d 4475 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4476 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4477 .add_device = intel_iommu_add_device,
4478 .remove_device = intel_iommu_remove_device,
6d1c56a9 4479 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4480};
9af88143 4481
9452618e
DV
4482static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4483{
4484 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4485 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4486 dmar_map_gfx = 0;
4487}
4488
4489DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4490DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4491DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4492DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4493DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4494DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4495DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4496
d34d6517 4497static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4498{
4499 /*
4500 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4501 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4502 */
4503 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4504 rwbf_quirk = 1;
4505}
4506
4507DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4510DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4514
eecfd57f
AJ
4515#define GGC 0x52
4516#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4517#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4518#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4519#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4520#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4521#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4522#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4523#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4524
d34d6517 4525static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4526{
4527 unsigned short ggc;
4528
eecfd57f 4529 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4530 return;
4531
eecfd57f 4532 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4533 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4534 dmar_map_gfx = 0;
6fbcfb3e
DW
4535 } else if (dmar_map_gfx) {
4536 /* we have to ensure the gfx device is idle before we flush */
4537 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4538 intel_iommu_strict = 1;
4539 }
9eecabcb
DW
4540}
4541DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4542DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4543DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4544DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4545
e0fc7e0b
DW
4546/* On Tylersburg chipsets, some BIOSes have been known to enable the
4547 ISOCH DMAR unit for the Azalia sound device, but not give it any
4548 TLB entries, which causes it to deadlock. Check for that. We do
4549 this in a function called from init_dmars(), instead of in a PCI
4550 quirk, because we don't want to print the obnoxious "BIOS broken"
4551 message if VT-d is actually disabled.
4552*/
4553static void __init check_tylersburg_isoch(void)
4554{
4555 struct pci_dev *pdev;
4556 uint32_t vtisochctrl;
4557
4558 /* If there's no Azalia in the system anyway, forget it. */
4559 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4560 if (!pdev)
4561 return;
4562 pci_dev_put(pdev);
4563
4564 /* System Management Registers. Might be hidden, in which case
4565 we can't do the sanity check. But that's OK, because the
4566 known-broken BIOSes _don't_ actually hide it, so far. */
4567 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4568 if (!pdev)
4569 return;
4570
4571 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4572 pci_dev_put(pdev);
4573 return;
4574 }
4575
4576 pci_dev_put(pdev);
4577
4578 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4579 if (vtisochctrl & 1)
4580 return;
4581
4582 /* Drop all bits other than the number of TLB entries */
4583 vtisochctrl &= 0x1c;
4584
4585 /* If we have the recommended number of TLB entries (16), fine. */
4586 if (vtisochctrl == 0x10)
4587 return;
4588
4589 /* Zero TLB entries? You get to ride the short bus to school. */
4590 if (!vtisochctrl) {
4591 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4592 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4593 dmi_get_system_info(DMI_BIOS_VENDOR),
4594 dmi_get_system_info(DMI_BIOS_VERSION),
4595 dmi_get_system_info(DMI_PRODUCT_VERSION));
4596 iommu_identity_mapping |= IDENTMAP_AZALIA;
4597 return;
4598 }
4599
4600 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4601 vtisochctrl);
4602}