]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Introduce helper functions to make code symmetric for readability
[mirror_ubuntu-zesty-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2
JR
47#include "irq_remapping.h"
48
5b6985ce
FY
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
ba395927
KA
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
4ed0d3e6 62#define MAX_AGAW_WIDTH 64
5c645b35 63#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 64
2ebe3151
DW
65#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 73
f27be03b 74#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 75#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 76#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 77
df08cdc7
AM
78/* page table handling */
79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81
6d1c56a9
OBC
82/*
83 * This bitmap is used to advertise the page sizes our hardware support
84 * to the IOMMU core, which will then use this information to split
85 * physically contiguous memory regions it is mapping into page sizes
86 * that we support.
87 *
88 * Traditionally the IOMMU core just handed us the mappings directly,
89 * after making sure the size is an order of a 4KiB page and that the
90 * mapping has natural alignment.
91 *
92 * To retain this behavior, we currently advertise that we support
93 * all page sizes that are an order of 4KiB.
94 *
95 * If at some point we'd like to utilize the IOMMU core's new behavior,
96 * we could change this to advertise the real page sizes we support.
97 */
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
df08cdc7
AM
100static inline int agaw_to_level(int agaw)
101{
102 return agaw + 2;
103}
104
105static inline int agaw_to_width(int agaw)
106{
5c645b35 107 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
108}
109
110static inline int width_to_agaw(int width)
111{
5c645b35 112 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
113}
114
115static inline unsigned int level_to_offset_bits(int level)
116{
117 return (level - 1) * LEVEL_STRIDE;
118}
119
120static inline int pfn_level_offset(unsigned long pfn, int level)
121{
122 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123}
124
125static inline unsigned long level_mask(int level)
126{
127 return -1UL << level_to_offset_bits(level);
128}
129
130static inline unsigned long level_size(int level)
131{
132 return 1UL << level_to_offset_bits(level);
133}
134
135static inline unsigned long align_to_level(unsigned long pfn, int level)
136{
137 return (pfn + level_size(level) - 1) & level_mask(level);
138}
fd18de50 139
6dd9a7c7
YS
140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141{
5c645b35 142 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
143}
144
dd4e8319
DW
145/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146 are never going to work. */
147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148{
149 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150}
151
152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153{
154 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155}
156static inline unsigned long page_to_dma_pfn(struct page *pg)
157{
158 return mm_to_dma_pfn(page_to_pfn(pg));
159}
160static inline unsigned long virt_to_dma_pfn(void *p)
161{
162 return page_to_dma_pfn(virt_to_page(p));
163}
164
d9630fe9
WH
165/* global iommu list, set NULL for ignored DMAR units */
166static struct intel_iommu **g_iommus;
167
e0fc7e0b 168static void __init check_tylersburg_isoch(void);
9af88143
DW
169static int rwbf_quirk;
170
b779260b
JC
171/*
172 * set to 1 to panic kernel if can't successfully enable VT-d
173 * (used when kernel is launched w/ TXT)
174 */
175static int force_on = 0;
176
46b08e1a
MM
177/*
178 * 0: Present
179 * 1-11: Reserved
180 * 12-63: Context Ptr (12 - (haw-1))
181 * 64-127: Reserved
182 */
183struct root_entry {
184 u64 val;
185 u64 rsvd1;
186};
187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188static inline bool root_present(struct root_entry *root)
189{
190 return (root->val & 1);
191}
192static inline void set_root_present(struct root_entry *root)
193{
194 root->val |= 1;
195}
196static inline void set_root_value(struct root_entry *root, unsigned long value)
197{
198 root->val |= value & VTD_PAGE_MASK;
199}
200
201static inline struct context_entry *
202get_context_addr_from_root(struct root_entry *root)
203{
204 return (struct context_entry *)
205 (root_present(root)?phys_to_virt(
206 root->val & VTD_PAGE_MASK) :
207 NULL);
208}
209
7a8fc25e
MM
210/*
211 * low 64 bits:
212 * 0: present
213 * 1: fault processing disable
214 * 2-3: translation type
215 * 12-63: address space root
216 * high 64 bits:
217 * 0-2: address width
218 * 3-6: aval
219 * 8-23: domain id
220 */
221struct context_entry {
222 u64 lo;
223 u64 hi;
224};
c07e7d21
MM
225
226static inline bool context_present(struct context_entry *context)
227{
228 return (context->lo & 1);
229}
230static inline void context_set_present(struct context_entry *context)
231{
232 context->lo |= 1;
233}
234
235static inline void context_set_fault_enable(struct context_entry *context)
236{
237 context->lo &= (((u64)-1) << 2) | 1;
238}
239
c07e7d21
MM
240static inline void context_set_translation_type(struct context_entry *context,
241 unsigned long value)
242{
243 context->lo &= (((u64)-1) << 4) | 3;
244 context->lo |= (value & 3) << 2;
245}
246
247static inline void context_set_address_root(struct context_entry *context,
248 unsigned long value)
249{
250 context->lo |= value & VTD_PAGE_MASK;
251}
252
253static inline void context_set_address_width(struct context_entry *context,
254 unsigned long value)
255{
256 context->hi |= value & 7;
257}
258
259static inline void context_set_domain_id(struct context_entry *context,
260 unsigned long value)
261{
262 context->hi |= (value & ((1 << 16) - 1)) << 8;
263}
264
265static inline void context_clear_entry(struct context_entry *context)
266{
267 context->lo = 0;
268 context->hi = 0;
269}
7a8fc25e 270
622ba12a
MM
271/*
272 * 0: readable
273 * 1: writable
274 * 2-6: reserved
275 * 7: super page
9cf06697
SY
276 * 8-10: available
277 * 11: snoop behavior
622ba12a
MM
278 * 12-63: Host physcial address
279 */
280struct dma_pte {
281 u64 val;
282};
622ba12a 283
19c239ce
MM
284static inline void dma_clear_pte(struct dma_pte *pte)
285{
286 pte->val = 0;
287}
288
19c239ce
MM
289static inline u64 dma_pte_addr(struct dma_pte *pte)
290{
c85994e4
DW
291#ifdef CONFIG_64BIT
292 return pte->val & VTD_PAGE_MASK;
293#else
294 /* Must have a full atomic 64-bit read */
1a8bd481 295 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 296#endif
19c239ce
MM
297}
298
19c239ce
MM
299static inline bool dma_pte_present(struct dma_pte *pte)
300{
301 return (pte->val & 3) != 0;
302}
622ba12a 303
4399c8bf
AK
304static inline bool dma_pte_superpage(struct dma_pte *pte)
305{
c3c75eb7 306 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
307}
308
75e6bf96
DW
309static inline int first_pte_in_page(struct dma_pte *pte)
310{
311 return !((unsigned long)pte & ~VTD_PAGE_MASK);
312}
313
2c2e2c38
FY
314/*
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
319 */
19943b0e
DW
320static struct dmar_domain *si_domain;
321static int hw_pass_through = 1;
2c2e2c38 322
1ce28feb
WH
323/* domain represents a virtual machine, more than one devices
324 * across iommus may be owned in one domain, e.g. kvm guest.
325 */
ab8dfe25 326#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 327
2c2e2c38 328/* si_domain contains mulitple devices */
ab8dfe25 329#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 330
1b198bb0
MT
331/* define the limit of IOMMUs supported in each domain */
332#ifdef CONFIG_X86
333# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
334#else
335# define IOMMU_UNITS_SUPPORTED 64
336#endif
337
99126f7c
MM
338struct dmar_domain {
339 int id; /* domain id */
4c923d47 340 int nid; /* node id */
1b198bb0
MT
341 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
342 /* bitmap of iommus this domain uses*/
99126f7c
MM
343
344 struct list_head devices; /* all devices' list */
345 struct iova_domain iovad; /* iova's that belong to this domain */
346
347 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
348 int gaw; /* max guest address width */
349
350 /* adjusted guest address width, 0 is level 2 30-bit */
351 int agaw;
352
3b5410e7 353 int flags; /* flags to find out type of domain */
8e604097
WH
354
355 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 356 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 357 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
358 int iommu_superpage;/* Level of superpages supported:
359 0 == 4KiB (no superpages), 1 == 2MiB,
360 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 361 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 362 u64 max_addr; /* maximum mapped address */
99126f7c
MM
363};
364
a647dacb
MM
365/* PCI domain-device relationship */
366struct device_domain_info {
367 struct list_head link; /* link to domain siblings */
368 struct list_head global; /* link to global list */
276dbf99 369 u8 bus; /* PCI bus number */
a647dacb 370 u8 devfn; /* PCI devfn number */
0bcb3e28 371 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 372 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
373 struct dmar_domain *domain; /* pointer to domain */
374};
375
b94e4117
JL
376struct dmar_rmrr_unit {
377 struct list_head list; /* list of rmrr units */
378 struct acpi_dmar_header *hdr; /* ACPI header */
379 u64 base_address; /* reserved base address*/
380 u64 end_address; /* reserved end address */
832bd858 381 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
382 int devices_cnt; /* target device count */
383};
384
385struct dmar_atsr_unit {
386 struct list_head list; /* list of ATSR units */
387 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 388 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
389 int devices_cnt; /* target device count */
390 u8 include_all:1; /* include all ports */
391};
392
393static LIST_HEAD(dmar_atsr_units);
394static LIST_HEAD(dmar_rmrr_units);
395
396#define for_each_rmrr_units(rmrr) \
397 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
398
5e0d2a6f 399static void flush_unmaps_timeout(unsigned long data);
400
b707cb02 401static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 402
80b20dd8 403#define HIGH_WATER_MARK 250
404struct deferred_flush_tables {
405 int next;
406 struct iova *iova[HIGH_WATER_MARK];
407 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 408 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 409};
410
411static struct deferred_flush_tables *deferred_flush;
412
5e0d2a6f 413/* bitmap for indexing intel_iommus */
5e0d2a6f 414static int g_num_of_iommus;
415
416static DEFINE_SPINLOCK(async_umap_flush_lock);
417static LIST_HEAD(unmaps_to_do);
418
419static int timer_on;
420static long list_size;
5e0d2a6f 421
92d03cc8 422static void domain_exit(struct dmar_domain *domain);
ba395927 423static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 424static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 425 struct device *dev);
92d03cc8 426static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 427 struct device *dev);
ba395927 428
d3f13810 429#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
430int dmar_disabled = 0;
431#else
432int dmar_disabled = 1;
d3f13810 433#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 434
8bc1f85c
ED
435int intel_iommu_enabled = 0;
436EXPORT_SYMBOL_GPL(intel_iommu_enabled);
437
2d9e667e 438static int dmar_map_gfx = 1;
7d3b03ce 439static int dmar_forcedac;
5e0d2a6f 440static int intel_iommu_strict;
6dd9a7c7 441static int intel_iommu_superpage = 1;
ba395927 442
c0771df8
DW
443int intel_iommu_gfx_mapped;
444EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
445
ba395927
KA
446#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
447static DEFINE_SPINLOCK(device_domain_lock);
448static LIST_HEAD(device_domain_list);
449
b22f6434 450static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 451
ba395927
KA
452static int __init intel_iommu_setup(char *str)
453{
454 if (!str)
455 return -EINVAL;
456 while (*str) {
0cd5c3c8
KM
457 if (!strncmp(str, "on", 2)) {
458 dmar_disabled = 0;
459 printk(KERN_INFO "Intel-IOMMU: enabled\n");
460 } else if (!strncmp(str, "off", 3)) {
ba395927 461 dmar_disabled = 1;
0cd5c3c8 462 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
463 } else if (!strncmp(str, "igfx_off", 8)) {
464 dmar_map_gfx = 0;
465 printk(KERN_INFO
466 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 467 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 468 printk(KERN_INFO
7d3b03ce
KA
469 "Intel-IOMMU: Forcing DAC for PCI devices\n");
470 dmar_forcedac = 1;
5e0d2a6f 471 } else if (!strncmp(str, "strict", 6)) {
472 printk(KERN_INFO
473 "Intel-IOMMU: disable batched IOTLB flush\n");
474 intel_iommu_strict = 1;
6dd9a7c7
YS
475 } else if (!strncmp(str, "sp_off", 6)) {
476 printk(KERN_INFO
477 "Intel-IOMMU: disable supported super page\n");
478 intel_iommu_superpage = 0;
ba395927
KA
479 }
480
481 str += strcspn(str, ",");
482 while (*str == ',')
483 str++;
484 }
485 return 0;
486}
487__setup("intel_iommu=", intel_iommu_setup);
488
489static struct kmem_cache *iommu_domain_cache;
490static struct kmem_cache *iommu_devinfo_cache;
491static struct kmem_cache *iommu_iova_cache;
492
4c923d47 493static inline void *alloc_pgtable_page(int node)
eb3fa7cb 494{
4c923d47
SS
495 struct page *page;
496 void *vaddr = NULL;
eb3fa7cb 497
4c923d47
SS
498 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
499 if (page)
500 vaddr = page_address(page);
eb3fa7cb 501 return vaddr;
ba395927
KA
502}
503
504static inline void free_pgtable_page(void *vaddr)
505{
506 free_page((unsigned long)vaddr);
507}
508
509static inline void *alloc_domain_mem(void)
510{
354bb65e 511 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
512}
513
38717946 514static void free_domain_mem(void *vaddr)
ba395927
KA
515{
516 kmem_cache_free(iommu_domain_cache, vaddr);
517}
518
519static inline void * alloc_devinfo_mem(void)
520{
354bb65e 521 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
522}
523
524static inline void free_devinfo_mem(void *vaddr)
525{
526 kmem_cache_free(iommu_devinfo_cache, vaddr);
527}
528
529struct iova *alloc_iova_mem(void)
530{
354bb65e 531 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
532}
533
534void free_iova_mem(struct iova *iova)
535{
536 kmem_cache_free(iommu_iova_cache, iova);
537}
538
ab8dfe25
JL
539static inline int domain_type_is_vm(struct dmar_domain *domain)
540{
541 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
542}
543
544static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
545{
546 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
547 DOMAIN_FLAG_STATIC_IDENTITY);
548}
1b573683 549
4ed0d3e6 550static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
551{
552 unsigned long sagaw;
553 int agaw = -1;
554
555 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 556 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
557 agaw >= 0; agaw--) {
558 if (test_bit(agaw, &sagaw))
559 break;
560 }
561
562 return agaw;
563}
564
4ed0d3e6
FY
565/*
566 * Calculate max SAGAW for each iommu.
567 */
568int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
569{
570 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
571}
572
573/*
574 * calculate agaw for each iommu.
575 * "SAGAW" may be different across iommus, use a default agaw, and
576 * get a supported less agaw for iommus that don't support the default agaw.
577 */
578int iommu_calculate_agaw(struct intel_iommu *iommu)
579{
580 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
581}
582
2c2e2c38 583/* This functionin only returns single iommu in a domain */
8c11e798
WH
584static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
585{
586 int iommu_id;
587
2c2e2c38 588 /* si_domain and vm domain should not get here. */
ab8dfe25 589 BUG_ON(domain_type_is_vm_or_si(domain));
1b198bb0 590 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
591 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
592 return NULL;
593
594 return g_iommus[iommu_id];
595}
596
8e604097
WH
597static void domain_update_iommu_coherency(struct dmar_domain *domain)
598{
d0501960
DW
599 struct dmar_drhd_unit *drhd;
600 struct intel_iommu *iommu;
601 int i, found = 0;
2e12bc29 602
d0501960 603 domain->iommu_coherency = 1;
8e604097 604
1b198bb0 605 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 606 found = 1;
8e604097
WH
607 if (!ecap_coherent(g_iommus[i]->ecap)) {
608 domain->iommu_coherency = 0;
609 break;
610 }
8e604097 611 }
d0501960
DW
612 if (found)
613 return;
614
615 /* No hardware attached; use lowest common denominator */
616 rcu_read_lock();
617 for_each_active_iommu(iommu, drhd) {
618 if (!ecap_coherent(iommu->ecap)) {
619 domain->iommu_coherency = 0;
620 break;
621 }
622 }
623 rcu_read_unlock();
8e604097
WH
624}
625
58c610bd
SY
626static void domain_update_iommu_snooping(struct dmar_domain *domain)
627{
628 int i;
629
630 domain->iommu_snooping = 1;
631
1b198bb0 632 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
633 if (!ecap_sc_support(g_iommus[i]->ecap)) {
634 domain->iommu_snooping = 0;
635 break;
636 }
58c610bd
SY
637 }
638}
639
6dd9a7c7
YS
640static void domain_update_iommu_superpage(struct dmar_domain *domain)
641{
8140a95d
AK
642 struct dmar_drhd_unit *drhd;
643 struct intel_iommu *iommu = NULL;
644 int mask = 0xf;
6dd9a7c7
YS
645
646 if (!intel_iommu_superpage) {
647 domain->iommu_superpage = 0;
648 return;
649 }
650
8140a95d 651 /* set iommu_superpage to the smallest common denominator */
0e242612 652 rcu_read_lock();
8140a95d
AK
653 for_each_active_iommu(iommu, drhd) {
654 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
655 if (!mask) {
656 break;
657 }
658 }
0e242612
JL
659 rcu_read_unlock();
660
6dd9a7c7
YS
661 domain->iommu_superpage = fls(mask);
662}
663
58c610bd
SY
664/* Some capabilities may be different across iommus */
665static void domain_update_iommu_cap(struct dmar_domain *domain)
666{
667 domain_update_iommu_coherency(domain);
668 domain_update_iommu_snooping(domain);
6dd9a7c7 669 domain_update_iommu_superpage(domain);
58c610bd
SY
670}
671
156baca8 672static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
673{
674 struct dmar_drhd_unit *drhd = NULL;
b683b230 675 struct intel_iommu *iommu;
156baca8
DW
676 struct device *tmp;
677 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 678 u16 segment = 0;
c7151a8d
WH
679 int i;
680
156baca8
DW
681 if (dev_is_pci(dev)) {
682 pdev = to_pci_dev(dev);
683 segment = pci_domain_nr(pdev->bus);
684 } else if (ACPI_COMPANION(dev))
685 dev = &ACPI_COMPANION(dev)->dev;
686
0e242612 687 rcu_read_lock();
b683b230 688 for_each_active_iommu(iommu, drhd) {
156baca8 689 if (pdev && segment != drhd->segment)
276dbf99 690 continue;
c7151a8d 691
b683b230 692 for_each_active_dev_scope(drhd->devices,
156baca8
DW
693 drhd->devices_cnt, i, tmp) {
694 if (tmp == dev) {
695 *bus = drhd->devices[i].bus;
696 *devfn = drhd->devices[i].devfn;
b683b230 697 goto out;
156baca8
DW
698 }
699
700 if (!pdev || !dev_is_pci(tmp))
701 continue;
702
703 ptmp = to_pci_dev(tmp);
704 if (ptmp->subordinate &&
705 ptmp->subordinate->number <= pdev->bus->number &&
706 ptmp->subordinate->busn_res.end >= pdev->bus->number)
707 goto got_pdev;
924b6231 708 }
c7151a8d 709
156baca8
DW
710 if (pdev && drhd->include_all) {
711 got_pdev:
712 *bus = pdev->bus->number;
713 *devfn = pdev->devfn;
b683b230 714 goto out;
156baca8 715 }
c7151a8d 716 }
b683b230 717 iommu = NULL;
156baca8 718 out:
0e242612 719 rcu_read_unlock();
c7151a8d 720
b683b230 721 return iommu;
c7151a8d
WH
722}
723
5331fe6f
WH
724static void domain_flush_cache(struct dmar_domain *domain,
725 void *addr, int size)
726{
727 if (!domain->iommu_coherency)
728 clflush_cache_range(addr, size);
729}
730
ba395927
KA
731/* Gets context entry for a given bus and devfn */
732static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
733 u8 bus, u8 devfn)
734{
735 struct root_entry *root;
736 struct context_entry *context;
737 unsigned long phy_addr;
738 unsigned long flags;
739
740 spin_lock_irqsave(&iommu->lock, flags);
741 root = &iommu->root_entry[bus];
742 context = get_context_addr_from_root(root);
743 if (!context) {
4c923d47
SS
744 context = (struct context_entry *)
745 alloc_pgtable_page(iommu->node);
ba395927
KA
746 if (!context) {
747 spin_unlock_irqrestore(&iommu->lock, flags);
748 return NULL;
749 }
5b6985ce 750 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
751 phy_addr = virt_to_phys((void *)context);
752 set_root_value(root, phy_addr);
753 set_root_present(root);
754 __iommu_flush_cache(iommu, root, sizeof(*root));
755 }
756 spin_unlock_irqrestore(&iommu->lock, flags);
757 return &context[devfn];
758}
759
760static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
761{
762 struct root_entry *root;
763 struct context_entry *context;
764 int ret;
765 unsigned long flags;
766
767 spin_lock_irqsave(&iommu->lock, flags);
768 root = &iommu->root_entry[bus];
769 context = get_context_addr_from_root(root);
770 if (!context) {
771 ret = 0;
772 goto out;
773 }
c07e7d21 774 ret = context_present(&context[devfn]);
ba395927
KA
775out:
776 spin_unlock_irqrestore(&iommu->lock, flags);
777 return ret;
778}
779
780static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
781{
782 struct root_entry *root;
783 struct context_entry *context;
784 unsigned long flags;
785
786 spin_lock_irqsave(&iommu->lock, flags);
787 root = &iommu->root_entry[bus];
788 context = get_context_addr_from_root(root);
789 if (context) {
c07e7d21 790 context_clear_entry(&context[devfn]);
ba395927
KA
791 __iommu_flush_cache(iommu, &context[devfn], \
792 sizeof(*context));
793 }
794 spin_unlock_irqrestore(&iommu->lock, flags);
795}
796
797static void free_context_table(struct intel_iommu *iommu)
798{
799 struct root_entry *root;
800 int i;
801 unsigned long flags;
802 struct context_entry *context;
803
804 spin_lock_irqsave(&iommu->lock, flags);
805 if (!iommu->root_entry) {
806 goto out;
807 }
808 for (i = 0; i < ROOT_ENTRY_NR; i++) {
809 root = &iommu->root_entry[i];
810 context = get_context_addr_from_root(root);
811 if (context)
812 free_pgtable_page(context);
813 }
814 free_pgtable_page(iommu->root_entry);
815 iommu->root_entry = NULL;
816out:
817 spin_unlock_irqrestore(&iommu->lock, flags);
818}
819
b026fd28 820static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 821 unsigned long pfn, int *target_level)
ba395927 822{
b026fd28 823 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
824 struct dma_pte *parent, *pte = NULL;
825 int level = agaw_to_level(domain->agaw);
4399c8bf 826 int offset;
ba395927
KA
827
828 BUG_ON(!domain->pgd);
f9423606
JS
829
830 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
831 /* Address beyond IOMMU's addressing capabilities. */
832 return NULL;
833
ba395927
KA
834 parent = domain->pgd;
835
5cf0a76f 836 while (1) {
ba395927
KA
837 void *tmp_page;
838
b026fd28 839 offset = pfn_level_offset(pfn, level);
ba395927 840 pte = &parent[offset];
5cf0a76f 841 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 842 break;
5cf0a76f 843 if (level == *target_level)
ba395927
KA
844 break;
845
19c239ce 846 if (!dma_pte_present(pte)) {
c85994e4
DW
847 uint64_t pteval;
848
4c923d47 849 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 850
206a73c1 851 if (!tmp_page)
ba395927 852 return NULL;
206a73c1 853
c85994e4 854 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 855 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 856 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
857 /* Someone else set it while we were thinking; use theirs. */
858 free_pgtable_page(tmp_page);
effad4b5 859 else
c85994e4 860 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 861 }
5cf0a76f
DW
862 if (level == 1)
863 break;
864
19c239ce 865 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
866 level--;
867 }
868
5cf0a76f
DW
869 if (!*target_level)
870 *target_level = level;
871
ba395927
KA
872 return pte;
873}
874
6dd9a7c7 875
ba395927 876/* return address's pte at specific level */
90dcfb5e
DW
877static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
878 unsigned long pfn,
6dd9a7c7 879 int level, int *large_page)
ba395927
KA
880{
881 struct dma_pte *parent, *pte = NULL;
882 int total = agaw_to_level(domain->agaw);
883 int offset;
884
885 parent = domain->pgd;
886 while (level <= total) {
90dcfb5e 887 offset = pfn_level_offset(pfn, total);
ba395927
KA
888 pte = &parent[offset];
889 if (level == total)
890 return pte;
891
6dd9a7c7
YS
892 if (!dma_pte_present(pte)) {
893 *large_page = total;
ba395927 894 break;
6dd9a7c7
YS
895 }
896
e16922af 897 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
898 *large_page = total;
899 return pte;
900 }
901
19c239ce 902 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
903 total--;
904 }
905 return NULL;
906}
907
ba395927 908/* clear last level pte, a tlb flush should be followed */
5cf0a76f 909static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
910 unsigned long start_pfn,
911 unsigned long last_pfn)
ba395927 912{
04b18e65 913 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 914 unsigned int large_page = 1;
310a5ab9 915 struct dma_pte *first_pte, *pte;
66eae846 916
04b18e65 917 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 918 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 919 BUG_ON(start_pfn > last_pfn);
ba395927 920
04b18e65 921 /* we don't need lock here; nobody else touches the iova range */
59c36286 922 do {
6dd9a7c7
YS
923 large_page = 1;
924 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 925 if (!pte) {
6dd9a7c7 926 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
927 continue;
928 }
6dd9a7c7 929 do {
310a5ab9 930 dma_clear_pte(pte);
6dd9a7c7 931 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 932 pte++;
75e6bf96
DW
933 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
934
310a5ab9
DW
935 domain_flush_cache(domain, first_pte,
936 (void *)pte - (void *)first_pte);
59c36286
DW
937
938 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
939}
940
3269ee0b
AW
941static void dma_pte_free_level(struct dmar_domain *domain, int level,
942 struct dma_pte *pte, unsigned long pfn,
943 unsigned long start_pfn, unsigned long last_pfn)
944{
945 pfn = max(start_pfn, pfn);
946 pte = &pte[pfn_level_offset(pfn, level)];
947
948 do {
949 unsigned long level_pfn;
950 struct dma_pte *level_pte;
951
952 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
953 goto next;
954
955 level_pfn = pfn & level_mask(level - 1);
956 level_pte = phys_to_virt(dma_pte_addr(pte));
957
958 if (level > 2)
959 dma_pte_free_level(domain, level - 1, level_pte,
960 level_pfn, start_pfn, last_pfn);
961
962 /* If range covers entire pagetable, free it */
963 if (!(start_pfn > level_pfn ||
08336fd2 964 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
965 dma_clear_pte(pte);
966 domain_flush_cache(domain, pte, sizeof(*pte));
967 free_pgtable_page(level_pte);
968 }
969next:
970 pfn += level_size(level);
971 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
972}
973
ba395927
KA
974/* free page table pages. last level pte should already be cleared */
975static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
976 unsigned long start_pfn,
977 unsigned long last_pfn)
ba395927 978{
6660c63a 979 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 980
6660c63a
DW
981 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
982 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 983 BUG_ON(start_pfn > last_pfn);
ba395927 984
f3a0a52f 985 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
986 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
987 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 988
ba395927 989 /* free pgd */
d794dc9b 990 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
991 free_pgtable_page(domain->pgd);
992 domain->pgd = NULL;
993 }
994}
995
ea8ea460
DW
996/* When a page at a given level is being unlinked from its parent, we don't
997 need to *modify* it at all. All we need to do is make a list of all the
998 pages which can be freed just as soon as we've flushed the IOTLB and we
999 know the hardware page-walk will no longer touch them.
1000 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1001 be freed. */
1002static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1003 int level, struct dma_pte *pte,
1004 struct page *freelist)
1005{
1006 struct page *pg;
1007
1008 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1009 pg->freelist = freelist;
1010 freelist = pg;
1011
1012 if (level == 1)
1013 return freelist;
1014
adeb2590
JL
1015 pte = page_address(pg);
1016 do {
ea8ea460
DW
1017 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1018 freelist = dma_pte_list_pagetables(domain, level - 1,
1019 pte, freelist);
adeb2590
JL
1020 pte++;
1021 } while (!first_pte_in_page(pte));
ea8ea460
DW
1022
1023 return freelist;
1024}
1025
1026static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1027 struct dma_pte *pte, unsigned long pfn,
1028 unsigned long start_pfn,
1029 unsigned long last_pfn,
1030 struct page *freelist)
1031{
1032 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1033
1034 pfn = max(start_pfn, pfn);
1035 pte = &pte[pfn_level_offset(pfn, level)];
1036
1037 do {
1038 unsigned long level_pfn;
1039
1040 if (!dma_pte_present(pte))
1041 goto next;
1042
1043 level_pfn = pfn & level_mask(level);
1044
1045 /* If range covers entire pagetable, free it */
1046 if (start_pfn <= level_pfn &&
1047 last_pfn >= level_pfn + level_size(level) - 1) {
1048 /* These suborbinate page tables are going away entirely. Don't
1049 bother to clear them; we're just going to *free* them. */
1050 if (level > 1 && !dma_pte_superpage(pte))
1051 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1052
1053 dma_clear_pte(pte);
1054 if (!first_pte)
1055 first_pte = pte;
1056 last_pte = pte;
1057 } else if (level > 1) {
1058 /* Recurse down into a level that isn't *entirely* obsolete */
1059 freelist = dma_pte_clear_level(domain, level - 1,
1060 phys_to_virt(dma_pte_addr(pte)),
1061 level_pfn, start_pfn, last_pfn,
1062 freelist);
1063 }
1064next:
1065 pfn += level_size(level);
1066 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1067
1068 if (first_pte)
1069 domain_flush_cache(domain, first_pte,
1070 (void *)++last_pte - (void *)first_pte);
1071
1072 return freelist;
1073}
1074
1075/* We can't just free the pages because the IOMMU may still be walking
1076 the page tables, and may have cached the intermediate levels. The
1077 pages can only be freed after the IOTLB flush has been done. */
1078struct page *domain_unmap(struct dmar_domain *domain,
1079 unsigned long start_pfn,
1080 unsigned long last_pfn)
1081{
1082 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1083 struct page *freelist = NULL;
1084
1085 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1086 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1087 BUG_ON(start_pfn > last_pfn);
1088
1089 /* we don't need lock here; nobody else touches the iova range */
1090 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1091 domain->pgd, 0, start_pfn, last_pfn, NULL);
1092
1093 /* free pgd */
1094 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1095 struct page *pgd_page = virt_to_page(domain->pgd);
1096 pgd_page->freelist = freelist;
1097 freelist = pgd_page;
1098
1099 domain->pgd = NULL;
1100 }
1101
1102 return freelist;
1103}
1104
1105void dma_free_pagelist(struct page *freelist)
1106{
1107 struct page *pg;
1108
1109 while ((pg = freelist)) {
1110 freelist = pg->freelist;
1111 free_pgtable_page(page_address(pg));
1112 }
1113}
1114
ba395927
KA
1115/* iommu handling */
1116static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1117{
1118 struct root_entry *root;
1119 unsigned long flags;
1120
4c923d47 1121 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
1122 if (!root)
1123 return -ENOMEM;
1124
5b6985ce 1125 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1126
1127 spin_lock_irqsave(&iommu->lock, flags);
1128 iommu->root_entry = root;
1129 spin_unlock_irqrestore(&iommu->lock, flags);
1130
1131 return 0;
1132}
1133
ba395927
KA
1134static void iommu_set_root_entry(struct intel_iommu *iommu)
1135{
1136 void *addr;
c416daa9 1137 u32 sts;
ba395927
KA
1138 unsigned long flag;
1139
1140 addr = iommu->root_entry;
1141
1f5b3c3f 1142 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1143 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1144
c416daa9 1145 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1146
1147 /* Make sure hardware complete it */
1148 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1149 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1150
1f5b3c3f 1151 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1152}
1153
1154static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1155{
1156 u32 val;
1157 unsigned long flag;
1158
9af88143 1159 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1160 return;
ba395927 1161
1f5b3c3f 1162 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1163 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1164
1165 /* Make sure hardware complete it */
1166 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1167 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1168
1f5b3c3f 1169 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1170}
1171
1172/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1173static void __iommu_flush_context(struct intel_iommu *iommu,
1174 u16 did, u16 source_id, u8 function_mask,
1175 u64 type)
ba395927
KA
1176{
1177 u64 val = 0;
1178 unsigned long flag;
1179
ba395927
KA
1180 switch (type) {
1181 case DMA_CCMD_GLOBAL_INVL:
1182 val = DMA_CCMD_GLOBAL_INVL;
1183 break;
1184 case DMA_CCMD_DOMAIN_INVL:
1185 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1186 break;
1187 case DMA_CCMD_DEVICE_INVL:
1188 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1189 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1190 break;
1191 default:
1192 BUG();
1193 }
1194 val |= DMA_CCMD_ICC;
1195
1f5b3c3f 1196 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1197 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1198
1199 /* Make sure hardware complete it */
1200 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1201 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1202
1f5b3c3f 1203 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1204}
1205
ba395927 1206/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1207static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1208 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1209{
1210 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1211 u64 val = 0, val_iva = 0;
1212 unsigned long flag;
1213
ba395927
KA
1214 switch (type) {
1215 case DMA_TLB_GLOBAL_FLUSH:
1216 /* global flush doesn't need set IVA_REG */
1217 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1218 break;
1219 case DMA_TLB_DSI_FLUSH:
1220 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1221 break;
1222 case DMA_TLB_PSI_FLUSH:
1223 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1224 /* IH bit is passed in as part of address */
ba395927
KA
1225 val_iva = size_order | addr;
1226 break;
1227 default:
1228 BUG();
1229 }
1230 /* Note: set drain read/write */
1231#if 0
1232 /*
1233 * This is probably to be super secure.. Looks like we can
1234 * ignore it without any impact.
1235 */
1236 if (cap_read_drain(iommu->cap))
1237 val |= DMA_TLB_READ_DRAIN;
1238#endif
1239 if (cap_write_drain(iommu->cap))
1240 val |= DMA_TLB_WRITE_DRAIN;
1241
1f5b3c3f 1242 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1243 /* Note: Only uses first TLB reg currently */
1244 if (val_iva)
1245 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1246 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1247
1248 /* Make sure hardware complete it */
1249 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1250 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1251
1f5b3c3f 1252 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1253
1254 /* check IOTLB invalidation granularity */
1255 if (DMA_TLB_IAIG(val) == 0)
1256 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1257 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1258 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1259 (unsigned long long)DMA_TLB_IIRG(type),
1260 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1261}
1262
64ae892b
DW
1263static struct device_domain_info *
1264iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1265 u8 bus, u8 devfn)
93a23a72
YZ
1266{
1267 int found = 0;
1268 unsigned long flags;
1269 struct device_domain_info *info;
0bcb3e28 1270 struct pci_dev *pdev;
93a23a72
YZ
1271
1272 if (!ecap_dev_iotlb_support(iommu->ecap))
1273 return NULL;
1274
1275 if (!iommu->qi)
1276 return NULL;
1277
1278 spin_lock_irqsave(&device_domain_lock, flags);
1279 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1280 if (info->iommu == iommu && info->bus == bus &&
1281 info->devfn == devfn) {
93a23a72
YZ
1282 found = 1;
1283 break;
1284 }
1285 spin_unlock_irqrestore(&device_domain_lock, flags);
1286
0bcb3e28 1287 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1288 return NULL;
1289
0bcb3e28
DW
1290 pdev = to_pci_dev(info->dev);
1291
1292 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1293 return NULL;
1294
0bcb3e28 1295 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1296 return NULL;
1297
93a23a72
YZ
1298 return info;
1299}
1300
1301static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1302{
0bcb3e28 1303 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1304 return;
1305
0bcb3e28 1306 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1307}
1308
1309static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1310{
0bcb3e28
DW
1311 if (!info->dev || !dev_is_pci(info->dev) ||
1312 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1313 return;
1314
0bcb3e28 1315 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1316}
1317
1318static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1319 u64 addr, unsigned mask)
1320{
1321 u16 sid, qdep;
1322 unsigned long flags;
1323 struct device_domain_info *info;
1324
1325 spin_lock_irqsave(&device_domain_lock, flags);
1326 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1327 struct pci_dev *pdev;
1328 if (!info->dev || !dev_is_pci(info->dev))
1329 continue;
1330
1331 pdev = to_pci_dev(info->dev);
1332 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1333 continue;
1334
1335 sid = info->bus << 8 | info->devfn;
0bcb3e28 1336 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1337 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1338 }
1339 spin_unlock_irqrestore(&device_domain_lock, flags);
1340}
1341
1f0ef2aa 1342static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1343 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1344{
9dd2fe89 1345 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1346 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1347
ba395927
KA
1348 BUG_ON(pages == 0);
1349
ea8ea460
DW
1350 if (ih)
1351 ih = 1 << 6;
ba395927 1352 /*
9dd2fe89
YZ
1353 * Fallback to domain selective flush if no PSI support or the size is
1354 * too big.
ba395927
KA
1355 * PSI requires page size to be 2 ^ x, and the base address is naturally
1356 * aligned to the size
1357 */
9dd2fe89
YZ
1358 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1359 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1360 DMA_TLB_DSI_FLUSH);
9dd2fe89 1361 else
ea8ea460 1362 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1363 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1364
1365 /*
82653633
NA
1366 * In caching mode, changes of pages from non-present to present require
1367 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1368 */
82653633 1369 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1370 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1371}
1372
f8bab735 1373static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1374{
1375 u32 pmen;
1376 unsigned long flags;
1377
1f5b3c3f 1378 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1379 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1380 pmen &= ~DMA_PMEN_EPM;
1381 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1382
1383 /* wait for the protected region status bit to clear */
1384 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1385 readl, !(pmen & DMA_PMEN_PRS), pmen);
1386
1f5b3c3f 1387 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1388}
1389
ba395927
KA
1390static int iommu_enable_translation(struct intel_iommu *iommu)
1391{
1392 u32 sts;
1393 unsigned long flags;
1394
1f5b3c3f 1395 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1396 iommu->gcmd |= DMA_GCMD_TE;
1397 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1398
1399 /* Make sure hardware complete it */
1400 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1401 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1402
1f5b3c3f 1403 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1404 return 0;
1405}
1406
1407static int iommu_disable_translation(struct intel_iommu *iommu)
1408{
1409 u32 sts;
1410 unsigned long flag;
1411
1f5b3c3f 1412 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1413 iommu->gcmd &= ~DMA_GCMD_TE;
1414 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1415
1416 /* Make sure hardware complete it */
1417 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1418 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1419
1f5b3c3f 1420 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1421 return 0;
1422}
1423
3460a6d9 1424
ba395927
KA
1425static int iommu_init_domains(struct intel_iommu *iommu)
1426{
1427 unsigned long ndomains;
1428 unsigned long nlongs;
1429
1430 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1431 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1432 iommu->seq_id, ndomains);
ba395927
KA
1433 nlongs = BITS_TO_LONGS(ndomains);
1434
94a91b50
DD
1435 spin_lock_init(&iommu->lock);
1436
ba395927
KA
1437 /* TBD: there might be 64K domains,
1438 * consider other allocation for future chip
1439 */
1440 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1441 if (!iommu->domain_ids) {
852bdb04
JL
1442 pr_err("IOMMU%d: allocating domain id array failed\n",
1443 iommu->seq_id);
ba395927
KA
1444 return -ENOMEM;
1445 }
1446 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1447 GFP_KERNEL);
1448 if (!iommu->domains) {
852bdb04
JL
1449 pr_err("IOMMU%d: allocating domain array failed\n",
1450 iommu->seq_id);
1451 kfree(iommu->domain_ids);
1452 iommu->domain_ids = NULL;
ba395927
KA
1453 return -ENOMEM;
1454 }
1455
1456 /*
1457 * if Caching mode is set, then invalid translations are tagged
1458 * with domainid 0. Hence we need to pre-allocate it.
1459 */
1460 if (cap_caching_mode(iommu->cap))
1461 set_bit(0, iommu->domain_ids);
1462 return 0;
1463}
ba395927 1464
a868e6b7 1465static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1466{
1467 struct dmar_domain *domain;
5ced12af 1468 int i, count;
c7151a8d 1469 unsigned long flags;
ba395927 1470
94a91b50 1471 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1472 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1473 /*
1474 * Domain id 0 is reserved for invalid translation
1475 * if hardware supports caching mode.
1476 */
1477 if (cap_caching_mode(iommu->cap) && i == 0)
1478 continue;
1479
94a91b50
DD
1480 domain = iommu->domains[i];
1481 clear_bit(i, iommu->domain_ids);
1482
1483 spin_lock_irqsave(&domain->iommu_lock, flags);
5ced12af
JL
1484 count = --domain->iommu_count;
1485 spin_unlock_irqrestore(&domain->iommu_lock, flags);
92d03cc8
JL
1486 if (count == 0)
1487 domain_exit(domain);
5e98c4b1 1488 }
ba395927
KA
1489 }
1490
1491 if (iommu->gcmd & DMA_GCMD_TE)
1492 iommu_disable_translation(iommu);
1493
ba395927
KA
1494 kfree(iommu->domains);
1495 kfree(iommu->domain_ids);
a868e6b7
JL
1496 iommu->domains = NULL;
1497 iommu->domain_ids = NULL;
ba395927 1498
d9630fe9
WH
1499 g_iommus[iommu->seq_id] = NULL;
1500
ba395927
KA
1501 /* free context mapping */
1502 free_context_table(iommu);
ba395927
KA
1503}
1504
ab8dfe25 1505static struct dmar_domain *alloc_domain(int flags)
ba395927 1506{
92d03cc8
JL
1507 /* domain id for virtual machine, it won't be set in context */
1508 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1509 struct dmar_domain *domain;
ba395927
KA
1510
1511 domain = alloc_domain_mem();
1512 if (!domain)
1513 return NULL;
1514
ab8dfe25 1515 memset(domain, 0, sizeof(*domain));
4c923d47 1516 domain->nid = -1;
ab8dfe25 1517 domain->flags = flags;
92d03cc8
JL
1518 spin_lock_init(&domain->iommu_lock);
1519 INIT_LIST_HEAD(&domain->devices);
ab8dfe25 1520 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
92d03cc8 1521 domain->id = atomic_inc_return(&vm_domid);
2c2e2c38
FY
1522
1523 return domain;
1524}
1525
fb170fb4
JL
1526static int __iommu_attach_domain(struct dmar_domain *domain,
1527 struct intel_iommu *iommu)
2c2e2c38
FY
1528{
1529 int num;
1530 unsigned long ndomains;
2c2e2c38 1531
ba395927 1532 ndomains = cap_ndoms(iommu->cap);
ba395927 1533 num = find_first_zero_bit(iommu->domain_ids, ndomains);
fb170fb4
JL
1534 if (num < ndomains) {
1535 set_bit(num, iommu->domain_ids);
1536 iommu->domains[num] = domain;
1537 } else {
1538 num = -ENOSPC;
ba395927
KA
1539 }
1540
fb170fb4
JL
1541 return num;
1542}
1543
1544static int iommu_attach_domain(struct dmar_domain *domain,
1545 struct intel_iommu *iommu)
1546{
1547 int num;
1548 unsigned long flags;
1549
1550 spin_lock_irqsave(&iommu->lock, flags);
1551 num = __iommu_attach_domain(domain, iommu);
1552 if (num < 0)
1553 pr_err("IOMMU: no free domain ids\n");
1554 else
1555 domain->id = num;
ba395927
KA
1556 spin_unlock_irqrestore(&iommu->lock, flags);
1557
fb170fb4 1558 return num;
ba395927
KA
1559}
1560
2c2e2c38
FY
1561static void iommu_detach_domain(struct dmar_domain *domain,
1562 struct intel_iommu *iommu)
ba395927
KA
1563{
1564 unsigned long flags;
2c2e2c38 1565 int num, ndomains;
ba395927 1566
8c11e798 1567 spin_lock_irqsave(&iommu->lock, flags);
fb170fb4
JL
1568 if (domain_type_is_vm_or_si(domain)) {
1569 ndomains = cap_ndoms(iommu->cap);
1570 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1571 if (iommu->domains[num] == domain) {
1572 clear_bit(num, iommu->domain_ids);
1573 iommu->domains[num] = NULL;
1574 break;
1575 }
2c2e2c38 1576 }
fb170fb4
JL
1577 } else {
1578 clear_bit(domain->id, iommu->domain_ids);
1579 iommu->domains[domain->id] = NULL;
2c2e2c38 1580 }
8c11e798 1581 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1582}
1583
fb170fb4
JL
1584static void domain_attach_iommu(struct dmar_domain *domain,
1585 struct intel_iommu *iommu)
1586{
1587 unsigned long flags;
1588
1589 spin_lock_irqsave(&domain->iommu_lock, flags);
1590 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1591 domain->iommu_count++;
1592 if (domain->iommu_count == 1)
1593 domain->nid = iommu->node;
1594 domain_update_iommu_cap(domain);
1595 }
1596 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1597}
1598
1599static int domain_detach_iommu(struct dmar_domain *domain,
1600 struct intel_iommu *iommu)
1601{
1602 unsigned long flags;
1603 int count = INT_MAX;
1604
1605 spin_lock_irqsave(&domain->iommu_lock, flags);
1606 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1607 count = --domain->iommu_count;
1608 domain_update_iommu_cap(domain);
1609 }
1610 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1611
1612 return count;
1613}
1614
ba395927 1615static struct iova_domain reserved_iova_list;
8a443df4 1616static struct lock_class_key reserved_rbtree_key;
ba395927 1617
51a63e67 1618static int dmar_init_reserved_ranges(void)
ba395927
KA
1619{
1620 struct pci_dev *pdev = NULL;
1621 struct iova *iova;
1622 int i;
ba395927 1623
f661197e 1624 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1625
8a443df4
MG
1626 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1627 &reserved_rbtree_key);
1628
ba395927
KA
1629 /* IOAPIC ranges shouldn't be accessed by DMA */
1630 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1631 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1632 if (!iova) {
ba395927 1633 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1634 return -ENODEV;
1635 }
ba395927
KA
1636
1637 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1638 for_each_pci_dev(pdev) {
1639 struct resource *r;
1640
1641 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1642 r = &pdev->resource[i];
1643 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1644 continue;
1a4a4551
DW
1645 iova = reserve_iova(&reserved_iova_list,
1646 IOVA_PFN(r->start),
1647 IOVA_PFN(r->end));
51a63e67 1648 if (!iova) {
ba395927 1649 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1650 return -ENODEV;
1651 }
ba395927
KA
1652 }
1653 }
51a63e67 1654 return 0;
ba395927
KA
1655}
1656
1657static void domain_reserve_special_ranges(struct dmar_domain *domain)
1658{
1659 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1660}
1661
1662static inline int guestwidth_to_adjustwidth(int gaw)
1663{
1664 int agaw;
1665 int r = (gaw - 12) % 9;
1666
1667 if (r == 0)
1668 agaw = gaw;
1669 else
1670 agaw = gaw + 9 - r;
1671 if (agaw > 64)
1672 agaw = 64;
1673 return agaw;
1674}
1675
1676static int domain_init(struct dmar_domain *domain, int guest_width)
1677{
1678 struct intel_iommu *iommu;
1679 int adjust_width, agaw;
1680 unsigned long sagaw;
1681
f661197e 1682 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1683 domain_reserve_special_ranges(domain);
1684
1685 /* calculate AGAW */
8c11e798 1686 iommu = domain_get_iommu(domain);
ba395927
KA
1687 if (guest_width > cap_mgaw(iommu->cap))
1688 guest_width = cap_mgaw(iommu->cap);
1689 domain->gaw = guest_width;
1690 adjust_width = guestwidth_to_adjustwidth(guest_width);
1691 agaw = width_to_agaw(adjust_width);
1692 sagaw = cap_sagaw(iommu->cap);
1693 if (!test_bit(agaw, &sagaw)) {
1694 /* hardware doesn't support it, choose a bigger one */
1695 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1696 agaw = find_next_bit(&sagaw, 5, agaw);
1697 if (agaw >= 5)
1698 return -ENODEV;
1699 }
1700 domain->agaw = agaw;
ba395927 1701
8e604097
WH
1702 if (ecap_coherent(iommu->ecap))
1703 domain->iommu_coherency = 1;
1704 else
1705 domain->iommu_coherency = 0;
1706
58c610bd
SY
1707 if (ecap_sc_support(iommu->ecap))
1708 domain->iommu_snooping = 1;
1709 else
1710 domain->iommu_snooping = 0;
1711
214e39aa
DW
1712 if (intel_iommu_superpage)
1713 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1714 else
1715 domain->iommu_superpage = 0;
1716
4c923d47 1717 domain->nid = iommu->node;
c7151a8d 1718
ba395927 1719 /* always allocate the top pgd */
4c923d47 1720 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1721 if (!domain->pgd)
1722 return -ENOMEM;
5b6985ce 1723 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1724 return 0;
1725}
1726
1727static void domain_exit(struct dmar_domain *domain)
1728{
2c2e2c38
FY
1729 struct dmar_drhd_unit *drhd;
1730 struct intel_iommu *iommu;
ea8ea460 1731 struct page *freelist = NULL;
ba395927
KA
1732
1733 /* Domain 0 is reserved, so dont process it */
1734 if (!domain)
1735 return;
1736
7b668357
AW
1737 /* Flush any lazy unmaps that may reference this domain */
1738 if (!intel_iommu_strict)
1739 flush_unmaps_timeout(0);
1740
92d03cc8 1741 /* remove associated devices */
ba395927 1742 domain_remove_dev_info(domain);
92d03cc8 1743
ba395927
KA
1744 /* destroy iovas */
1745 put_iova_domain(&domain->iovad);
ba395927 1746
ea8ea460 1747 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1748
92d03cc8 1749 /* clear attached or cached domains */
0e242612 1750 rcu_read_lock();
2c2e2c38 1751 for_each_active_iommu(iommu, drhd)
fb170fb4 1752 iommu_detach_domain(domain, iommu);
0e242612 1753 rcu_read_unlock();
2c2e2c38 1754
ea8ea460
DW
1755 dma_free_pagelist(freelist);
1756
ba395927
KA
1757 free_domain_mem(domain);
1758}
1759
64ae892b
DW
1760static int domain_context_mapping_one(struct dmar_domain *domain,
1761 struct intel_iommu *iommu,
1762 u8 bus, u8 devfn, int translation)
ba395927
KA
1763{
1764 struct context_entry *context;
ba395927 1765 unsigned long flags;
ea6606b0
WH
1766 struct dma_pte *pgd;
1767 unsigned long num;
1768 unsigned long ndomains;
1769 int id;
1770 int agaw;
93a23a72 1771 struct device_domain_info *info = NULL;
ba395927
KA
1772
1773 pr_debug("Set context mapping for %02x:%02x.%d\n",
1774 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1775
ba395927 1776 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1777 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1778 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1779
ba395927
KA
1780 context = device_to_context_entry(iommu, bus, devfn);
1781 if (!context)
1782 return -ENOMEM;
1783 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1784 if (context_present(context)) {
ba395927
KA
1785 spin_unlock_irqrestore(&iommu->lock, flags);
1786 return 0;
1787 }
1788
ea6606b0
WH
1789 id = domain->id;
1790 pgd = domain->pgd;
1791
ab8dfe25 1792 if (domain_type_is_vm_or_si(domain)) {
ea6606b0
WH
1793 int found = 0;
1794
1795 /* find an available domain id for this device in iommu */
1796 ndomains = cap_ndoms(iommu->cap);
a45946ab 1797 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1798 if (iommu->domains[num] == domain) {
1799 id = num;
1800 found = 1;
1801 break;
1802 }
ea6606b0
WH
1803 }
1804
1805 if (found == 0) {
fb170fb4
JL
1806 id = __iommu_attach_domain(domain, iommu);
1807 if (id < 0) {
ea6606b0 1808 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4 1809 pr_err("IOMMU: no free domain ids\n");
ea6606b0
WH
1810 return -EFAULT;
1811 }
ea6606b0
WH
1812 }
1813
1814 /* Skip top levels of page tables for
1815 * iommu which has less agaw than default.
1672af11 1816 * Unnecessary for PT mode.
ea6606b0 1817 */
1672af11
CW
1818 if (translation != CONTEXT_TT_PASS_THROUGH) {
1819 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1820 pgd = phys_to_virt(dma_pte_addr(pgd));
1821 if (!dma_pte_present(pgd)) {
1822 spin_unlock_irqrestore(&iommu->lock, flags);
1823 return -ENOMEM;
1824 }
ea6606b0
WH
1825 }
1826 }
1827 }
1828
1829 context_set_domain_id(context, id);
4ed0d3e6 1830
93a23a72 1831 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1832 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1833 translation = info ? CONTEXT_TT_DEV_IOTLB :
1834 CONTEXT_TT_MULTI_LEVEL;
1835 }
4ed0d3e6
FY
1836 /*
1837 * In pass through mode, AW must be programmed to indicate the largest
1838 * AGAW value supported by hardware. And ASR is ignored by hardware.
1839 */
93a23a72 1840 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1841 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1842 else {
1843 context_set_address_root(context, virt_to_phys(pgd));
1844 context_set_address_width(context, iommu->agaw);
1845 }
4ed0d3e6
FY
1846
1847 context_set_translation_type(context, translation);
c07e7d21
MM
1848 context_set_fault_enable(context);
1849 context_set_present(context);
5331fe6f 1850 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1851
4c25a2c1
DW
1852 /*
1853 * It's a non-present to present mapping. If hardware doesn't cache
1854 * non-present entry we only need to flush the write-buffer. If the
1855 * _does_ cache non-present entries, then it does so in the special
1856 * domain #0, which we have to flush:
1857 */
1858 if (cap_caching_mode(iommu->cap)) {
1859 iommu->flush.flush_context(iommu, 0,
1860 (((u16)bus) << 8) | devfn,
1861 DMA_CCMD_MASK_NOBIT,
1862 DMA_CCMD_DEVICE_INVL);
18fd779a 1863 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1864 } else {
ba395927 1865 iommu_flush_write_buffer(iommu);
4c25a2c1 1866 }
93a23a72 1867 iommu_enable_dev_iotlb(info);
ba395927 1868 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 1869
fb170fb4
JL
1870 domain_attach_iommu(domain, iommu);
1871
ba395927
KA
1872 return 0;
1873}
1874
579305f7
AW
1875struct domain_context_mapping_data {
1876 struct dmar_domain *domain;
1877 struct intel_iommu *iommu;
1878 int translation;
1879};
1880
1881static int domain_context_mapping_cb(struct pci_dev *pdev,
1882 u16 alias, void *opaque)
1883{
1884 struct domain_context_mapping_data *data = opaque;
1885
1886 return domain_context_mapping_one(data->domain, data->iommu,
1887 PCI_BUS_NUM(alias), alias & 0xff,
1888 data->translation);
1889}
1890
ba395927 1891static int
e1f167f3
DW
1892domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1893 int translation)
ba395927 1894{
64ae892b 1895 struct intel_iommu *iommu;
156baca8 1896 u8 bus, devfn;
579305f7 1897 struct domain_context_mapping_data data;
64ae892b 1898
e1f167f3 1899 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1900 if (!iommu)
1901 return -ENODEV;
ba395927 1902
579305f7
AW
1903 if (!dev_is_pci(dev))
1904 return domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1905 translation);
579305f7
AW
1906
1907 data.domain = domain;
1908 data.iommu = iommu;
1909 data.translation = translation;
1910
1911 return pci_for_each_dma_alias(to_pci_dev(dev),
1912 &domain_context_mapping_cb, &data);
1913}
1914
1915static int domain_context_mapped_cb(struct pci_dev *pdev,
1916 u16 alias, void *opaque)
1917{
1918 struct intel_iommu *iommu = opaque;
1919
1920 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
1921}
1922
e1f167f3 1923static int domain_context_mapped(struct device *dev)
ba395927 1924{
5331fe6f 1925 struct intel_iommu *iommu;
156baca8 1926 u8 bus, devfn;
5331fe6f 1927
e1f167f3 1928 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1929 if (!iommu)
1930 return -ENODEV;
ba395927 1931
579305f7
AW
1932 if (!dev_is_pci(dev))
1933 return device_context_mapped(iommu, bus, devfn);
e1f167f3 1934
579305f7
AW
1935 return !pci_for_each_dma_alias(to_pci_dev(dev),
1936 domain_context_mapped_cb, iommu);
ba395927
KA
1937}
1938
f532959b
FY
1939/* Returns a number of VTD pages, but aligned to MM page size */
1940static inline unsigned long aligned_nrpages(unsigned long host_addr,
1941 size_t size)
1942{
1943 host_addr &= ~PAGE_MASK;
1944 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1945}
1946
6dd9a7c7
YS
1947/* Return largest possible superpage level for a given mapping */
1948static inline int hardware_largepage_caps(struct dmar_domain *domain,
1949 unsigned long iov_pfn,
1950 unsigned long phy_pfn,
1951 unsigned long pages)
1952{
1953 int support, level = 1;
1954 unsigned long pfnmerge;
1955
1956 support = domain->iommu_superpage;
1957
1958 /* To use a large page, the virtual *and* physical addresses
1959 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1960 of them will mean we have to use smaller pages. So just
1961 merge them and check both at once. */
1962 pfnmerge = iov_pfn | phy_pfn;
1963
1964 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1965 pages >>= VTD_STRIDE_SHIFT;
1966 if (!pages)
1967 break;
1968 pfnmerge >>= VTD_STRIDE_SHIFT;
1969 level++;
1970 support--;
1971 }
1972 return level;
1973}
1974
9051aa02
DW
1975static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1976 struct scatterlist *sg, unsigned long phys_pfn,
1977 unsigned long nr_pages, int prot)
e1605495
DW
1978{
1979 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1980 phys_addr_t uninitialized_var(pteval);
e1605495 1981 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1982 unsigned long sg_res;
6dd9a7c7
YS
1983 unsigned int largepage_lvl = 0;
1984 unsigned long lvl_pages = 0;
e1605495
DW
1985
1986 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1987
1988 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1989 return -EINVAL;
1990
1991 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1992
9051aa02
DW
1993 if (sg)
1994 sg_res = 0;
1995 else {
1996 sg_res = nr_pages + 1;
1997 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1998 }
1999
6dd9a7c7 2000 while (nr_pages > 0) {
c85994e4
DW
2001 uint64_t tmp;
2002
e1605495 2003 if (!sg_res) {
f532959b 2004 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2005 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2006 sg->dma_length = sg->length;
2007 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2008 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2009 }
6dd9a7c7 2010
e1605495 2011 if (!pte) {
6dd9a7c7
YS
2012 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2013
5cf0a76f 2014 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2015 if (!pte)
2016 return -ENOMEM;
6dd9a7c7 2017 /* It is large page*/
6491d4d0 2018 if (largepage_lvl > 1) {
6dd9a7c7 2019 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
2020 /* Ensure that old small page tables are removed to make room
2021 for superpage, if they exist. */
2022 dma_pte_clear_range(domain, iov_pfn,
2023 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2024 dma_pte_free_pagetable(domain, iov_pfn,
2025 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2026 } else {
6dd9a7c7 2027 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2028 }
6dd9a7c7 2029
e1605495
DW
2030 }
2031 /* We don't need lock here, nobody else
2032 * touches the iova range
2033 */
7766a3fb 2034 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2035 if (tmp) {
1bf20f0d 2036 static int dumps = 5;
c85994e4
DW
2037 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2038 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2039 if (dumps) {
2040 dumps--;
2041 debug_dma_dump_mappings(NULL);
2042 }
2043 WARN_ON(1);
2044 }
6dd9a7c7
YS
2045
2046 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2047
2048 BUG_ON(nr_pages < lvl_pages);
2049 BUG_ON(sg_res < lvl_pages);
2050
2051 nr_pages -= lvl_pages;
2052 iov_pfn += lvl_pages;
2053 phys_pfn += lvl_pages;
2054 pteval += lvl_pages * VTD_PAGE_SIZE;
2055 sg_res -= lvl_pages;
2056
2057 /* If the next PTE would be the first in a new page, then we
2058 need to flush the cache on the entries we've just written.
2059 And then we'll need to recalculate 'pte', so clear it and
2060 let it get set again in the if (!pte) block above.
2061
2062 If we're done (!nr_pages) we need to flush the cache too.
2063
2064 Also if we've been setting superpages, we may need to
2065 recalculate 'pte' and switch back to smaller pages for the
2066 end of the mapping, if the trailing size is not enough to
2067 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2068 pte++;
6dd9a7c7
YS
2069 if (!nr_pages || first_pte_in_page(pte) ||
2070 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2071 domain_flush_cache(domain, first_pte,
2072 (void *)pte - (void *)first_pte);
2073 pte = NULL;
2074 }
6dd9a7c7
YS
2075
2076 if (!sg_res && nr_pages)
e1605495
DW
2077 sg = sg_next(sg);
2078 }
2079 return 0;
2080}
2081
9051aa02
DW
2082static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2083 struct scatterlist *sg, unsigned long nr_pages,
2084 int prot)
ba395927 2085{
9051aa02
DW
2086 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2087}
6f6a00e4 2088
9051aa02
DW
2089static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2090 unsigned long phys_pfn, unsigned long nr_pages,
2091 int prot)
2092{
2093 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2094}
2095
c7151a8d 2096static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2097{
c7151a8d
WH
2098 if (!iommu)
2099 return;
8c11e798
WH
2100
2101 clear_context_table(iommu, bus, devfn);
2102 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2103 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2104 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2105}
2106
109b9b04
DW
2107static inline void unlink_domain_info(struct device_domain_info *info)
2108{
2109 assert_spin_locked(&device_domain_lock);
2110 list_del(&info->link);
2111 list_del(&info->global);
2112 if (info->dev)
0bcb3e28 2113 info->dev->archdata.iommu = NULL;
109b9b04
DW
2114}
2115
ba395927
KA
2116static void domain_remove_dev_info(struct dmar_domain *domain)
2117{
3a74ca01 2118 struct device_domain_info *info, *tmp;
fb170fb4 2119 unsigned long flags;
ba395927
KA
2120
2121 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2122 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2123 unlink_domain_info(info);
ba395927
KA
2124 spin_unlock_irqrestore(&device_domain_lock, flags);
2125
93a23a72 2126 iommu_disable_dev_iotlb(info);
7c7faa11 2127 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2128
ab8dfe25 2129 if (domain_type_is_vm(domain)) {
7c7faa11 2130 iommu_detach_dependent_devices(info->iommu, info->dev);
fb170fb4 2131 domain_detach_iommu(domain, info->iommu);
92d03cc8
JL
2132 }
2133
2134 free_devinfo_mem(info);
ba395927
KA
2135 spin_lock_irqsave(&device_domain_lock, flags);
2136 }
2137 spin_unlock_irqrestore(&device_domain_lock, flags);
2138}
2139
2140/*
2141 * find_domain
1525a29a 2142 * Note: we use struct device->archdata.iommu stores the info
ba395927 2143 */
1525a29a 2144static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2145{
2146 struct device_domain_info *info;
2147
2148 /* No lock here, assumes no domain exit in normal case */
1525a29a 2149 info = dev->archdata.iommu;
ba395927
KA
2150 if (info)
2151 return info->domain;
2152 return NULL;
2153}
2154
5a8f40e8 2155static inline struct device_domain_info *
745f2586
JL
2156dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2157{
2158 struct device_domain_info *info;
2159
2160 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2161 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2162 info->devfn == devfn)
5a8f40e8 2163 return info;
745f2586
JL
2164
2165 return NULL;
2166}
2167
5a8f40e8 2168static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2169 int bus, int devfn,
b718cd3d
DW
2170 struct device *dev,
2171 struct dmar_domain *domain)
745f2586 2172{
5a8f40e8 2173 struct dmar_domain *found = NULL;
745f2586
JL
2174 struct device_domain_info *info;
2175 unsigned long flags;
2176
2177 info = alloc_devinfo_mem();
2178 if (!info)
b718cd3d 2179 return NULL;
745f2586 2180
745f2586
JL
2181 info->bus = bus;
2182 info->devfn = devfn;
2183 info->dev = dev;
2184 info->domain = domain;
5a8f40e8 2185 info->iommu = iommu;
745f2586
JL
2186
2187 spin_lock_irqsave(&device_domain_lock, flags);
2188 if (dev)
0bcb3e28 2189 found = find_domain(dev);
5a8f40e8
DW
2190 else {
2191 struct device_domain_info *info2;
41e80dca 2192 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2193 if (info2)
2194 found = info2->domain;
2195 }
745f2586
JL
2196 if (found) {
2197 spin_unlock_irqrestore(&device_domain_lock, flags);
2198 free_devinfo_mem(info);
b718cd3d
DW
2199 /* Caller must free the original domain */
2200 return found;
745f2586
JL
2201 }
2202
b718cd3d
DW
2203 list_add(&info->link, &domain->devices);
2204 list_add(&info->global, &device_domain_list);
2205 if (dev)
2206 dev->archdata.iommu = info;
2207 spin_unlock_irqrestore(&device_domain_lock, flags);
2208
2209 return domain;
745f2586
JL
2210}
2211
579305f7
AW
2212static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2213{
2214 *(u16 *)opaque = alias;
2215 return 0;
2216}
2217
ba395927 2218/* domain is initialized */
146922ec 2219static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2220{
579305f7
AW
2221 struct dmar_domain *domain, *tmp;
2222 struct intel_iommu *iommu;
5a8f40e8 2223 struct device_domain_info *info;
579305f7 2224 u16 dma_alias;
ba395927 2225 unsigned long flags;
aa4d066a 2226 u8 bus, devfn;
ba395927 2227
146922ec 2228 domain = find_domain(dev);
ba395927
KA
2229 if (domain)
2230 return domain;
2231
579305f7
AW
2232 iommu = device_to_iommu(dev, &bus, &devfn);
2233 if (!iommu)
2234 return NULL;
2235
146922ec
DW
2236 if (dev_is_pci(dev)) {
2237 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2238
579305f7
AW
2239 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2240
2241 spin_lock_irqsave(&device_domain_lock, flags);
2242 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2243 PCI_BUS_NUM(dma_alias),
2244 dma_alias & 0xff);
2245 if (info) {
2246 iommu = info->iommu;
2247 domain = info->domain;
5a8f40e8 2248 }
579305f7 2249 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2250
579305f7
AW
2251 /* DMA alias already has a domain, uses it */
2252 if (info)
2253 goto found_domain;
2254 }
ba395927 2255
146922ec 2256 /* Allocate and initialize new domain for the device */
ab8dfe25 2257 domain = alloc_domain(0);
745f2586 2258 if (!domain)
579305f7
AW
2259 return NULL;
2260
fb170fb4 2261 if (iommu_attach_domain(domain, iommu) < 0) {
2fe9723d 2262 free_domain_mem(domain);
579305f7 2263 return NULL;
2c2e2c38 2264 }
fb170fb4 2265 domain_attach_iommu(domain, iommu);
579305f7
AW
2266 if (domain_init(domain, gaw)) {
2267 domain_exit(domain);
2268 return NULL;
2c2e2c38 2269 }
ba395927 2270
579305f7
AW
2271 /* register PCI DMA alias device */
2272 if (dev_is_pci(dev)) {
2273 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2274 dma_alias & 0xff, NULL, domain);
2275
2276 if (!tmp || tmp != domain) {
2277 domain_exit(domain);
2278 domain = tmp;
2279 }
2280
b718cd3d 2281 if (!domain)
579305f7 2282 return NULL;
ba395927
KA
2283 }
2284
2285found_domain:
579305f7
AW
2286 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2287
2288 if (!tmp || tmp != domain) {
2289 domain_exit(domain);
2290 domain = tmp;
2291 }
b718cd3d
DW
2292
2293 return domain;
ba395927
KA
2294}
2295
2c2e2c38 2296static int iommu_identity_mapping;
e0fc7e0b
DW
2297#define IDENTMAP_ALL 1
2298#define IDENTMAP_GFX 2
2299#define IDENTMAP_AZALIA 4
2c2e2c38 2300
b213203e
DW
2301static int iommu_domain_identity_map(struct dmar_domain *domain,
2302 unsigned long long start,
2303 unsigned long long end)
ba395927 2304{
c5395d5c
DW
2305 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2306 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2307
2308 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2309 dma_to_mm_pfn(last_vpfn))) {
ba395927 2310 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2311 return -ENOMEM;
ba395927
KA
2312 }
2313
c5395d5c
DW
2314 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2315 start, end, domain->id);
ba395927
KA
2316 /*
2317 * RMRR range might have overlap with physical memory range,
2318 * clear it first
2319 */
c5395d5c 2320 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2321
c5395d5c
DW
2322 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2323 last_vpfn - first_vpfn + 1,
61df7443 2324 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2325}
2326
0b9d9753 2327static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2328 unsigned long long start,
2329 unsigned long long end)
2330{
2331 struct dmar_domain *domain;
2332 int ret;
2333
0b9d9753 2334 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2335 if (!domain)
2336 return -ENOMEM;
2337
19943b0e
DW
2338 /* For _hardware_ passthrough, don't bother. But for software
2339 passthrough, we do it anyway -- it may indicate a memory
2340 range which is reserved in E820, so which didn't get set
2341 up to start with in si_domain */
2342 if (domain == si_domain && hw_pass_through) {
2343 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2344 dev_name(dev), start, end);
19943b0e
DW
2345 return 0;
2346 }
2347
2348 printk(KERN_INFO
2349 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2350 dev_name(dev), start, end);
2ff729f5 2351
5595b528
DW
2352 if (end < start) {
2353 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2354 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2355 dmi_get_system_info(DMI_BIOS_VENDOR),
2356 dmi_get_system_info(DMI_BIOS_VERSION),
2357 dmi_get_system_info(DMI_PRODUCT_VERSION));
2358 ret = -EIO;
2359 goto error;
2360 }
2361
2ff729f5
DW
2362 if (end >> agaw_to_width(domain->agaw)) {
2363 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2364 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2365 agaw_to_width(domain->agaw),
2366 dmi_get_system_info(DMI_BIOS_VENDOR),
2367 dmi_get_system_info(DMI_BIOS_VERSION),
2368 dmi_get_system_info(DMI_PRODUCT_VERSION));
2369 ret = -EIO;
2370 goto error;
2371 }
19943b0e 2372
b213203e 2373 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2374 if (ret)
2375 goto error;
2376
2377 /* context entry init */
0b9d9753 2378 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2379 if (ret)
2380 goto error;
2381
2382 return 0;
2383
2384 error:
ba395927
KA
2385 domain_exit(domain);
2386 return ret;
ba395927
KA
2387}
2388
2389static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2390 struct device *dev)
ba395927 2391{
0b9d9753 2392 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2393 return 0;
0b9d9753
DW
2394 return iommu_prepare_identity_map(dev, rmrr->base_address,
2395 rmrr->end_address);
ba395927
KA
2396}
2397
d3f13810 2398#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2399static inline void iommu_prepare_isa(void)
2400{
2401 struct pci_dev *pdev;
2402 int ret;
2403
2404 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2405 if (!pdev)
2406 return;
2407
c7ab48d2 2408 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2409 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2410
2411 if (ret)
c7ab48d2
DW
2412 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2413 "floppy might not work\n");
49a0429e 2414
9b27e82d 2415 pci_dev_put(pdev);
49a0429e
KA
2416}
2417#else
2418static inline void iommu_prepare_isa(void)
2419{
2420 return;
2421}
d3f13810 2422#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2423
2c2e2c38 2424static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2425
071e1374 2426static int __init si_domain_init(int hw)
2c2e2c38
FY
2427{
2428 struct dmar_drhd_unit *drhd;
2429 struct intel_iommu *iommu;
c7ab48d2 2430 int nid, ret = 0;
2c2e2c38 2431
ab8dfe25 2432 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2433 if (!si_domain)
2434 return -EFAULT;
2435
2c2e2c38
FY
2436 for_each_active_iommu(iommu, drhd) {
2437 ret = iommu_attach_domain(si_domain, iommu);
fb170fb4 2438 if (ret < 0) {
2c2e2c38
FY
2439 domain_exit(si_domain);
2440 return -EFAULT;
2441 }
fb170fb4 2442 domain_attach_iommu(si_domain, iommu);
2c2e2c38
FY
2443 }
2444
2445 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2446 domain_exit(si_domain);
2447 return -EFAULT;
2448 }
2449
9544c003
JL
2450 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2451 si_domain->id);
2c2e2c38 2452
19943b0e
DW
2453 if (hw)
2454 return 0;
2455
c7ab48d2 2456 for_each_online_node(nid) {
5dfe8660
TH
2457 unsigned long start_pfn, end_pfn;
2458 int i;
2459
2460 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2461 ret = iommu_domain_identity_map(si_domain,
2462 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2463 if (ret)
2464 return ret;
2465 }
c7ab48d2
DW
2466 }
2467
2c2e2c38
FY
2468 return 0;
2469}
2470
9b226624 2471static int identity_mapping(struct device *dev)
2c2e2c38
FY
2472{
2473 struct device_domain_info *info;
2474
2475 if (likely(!iommu_identity_mapping))
2476 return 0;
2477
9b226624 2478 info = dev->archdata.iommu;
cb452a40
MT
2479 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2480 return (info->domain == si_domain);
2c2e2c38 2481
2c2e2c38
FY
2482 return 0;
2483}
2484
2485static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2486 struct device *dev, int translation)
2c2e2c38 2487{
0ac72664 2488 struct dmar_domain *ndomain;
5a8f40e8 2489 struct intel_iommu *iommu;
156baca8 2490 u8 bus, devfn;
5fe60f4e 2491 int ret;
2c2e2c38 2492
5913c9bf 2493 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2494 if (!iommu)
2495 return -ENODEV;
2496
5913c9bf 2497 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2498 if (ndomain != domain)
2499 return -EBUSY;
2c2e2c38 2500
5913c9bf 2501 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2502 if (ret) {
5913c9bf 2503 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2504 return ret;
2505 }
2506
2c2e2c38
FY
2507 return 0;
2508}
2509
0b9d9753 2510static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2511{
2512 struct dmar_rmrr_unit *rmrr;
832bd858 2513 struct device *tmp;
ea2447f7
TM
2514 int i;
2515
0e242612 2516 rcu_read_lock();
ea2447f7 2517 for_each_rmrr_units(rmrr) {
b683b230
JL
2518 /*
2519 * Return TRUE if this RMRR contains the device that
2520 * is passed in.
2521 */
2522 for_each_active_dev_scope(rmrr->devices,
2523 rmrr->devices_cnt, i, tmp)
0b9d9753 2524 if (tmp == dev) {
0e242612 2525 rcu_read_unlock();
ea2447f7 2526 return true;
b683b230 2527 }
ea2447f7 2528 }
0e242612 2529 rcu_read_unlock();
ea2447f7
TM
2530 return false;
2531}
2532
3bdb2591 2533static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2534{
ea2447f7 2535
3bdb2591
DW
2536 if (dev_is_pci(dev)) {
2537 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2538
3bdb2591
DW
2539 /*
2540 * We want to prevent any device associated with an RMRR from
2541 * getting placed into the SI Domain. This is done because
2542 * problems exist when devices are moved in and out of domains
2543 * and their respective RMRR info is lost. We exempt USB devices
2544 * from this process due to their usage of RMRRs that are known
2545 * to not be needed after BIOS hand-off to OS.
2546 */
2547 if (device_has_rmrr(dev) &&
2548 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2549 return 0;
e0fc7e0b 2550
3bdb2591
DW
2551 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2552 return 1;
e0fc7e0b 2553
3bdb2591
DW
2554 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2555 return 1;
6941af28 2556
3bdb2591 2557 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2558 return 0;
3bdb2591
DW
2559
2560 /*
2561 * We want to start off with all devices in the 1:1 domain, and
2562 * take them out later if we find they can't access all of memory.
2563 *
2564 * However, we can't do this for PCI devices behind bridges,
2565 * because all PCI devices behind the same bridge will end up
2566 * with the same source-id on their transactions.
2567 *
2568 * Practically speaking, we can't change things around for these
2569 * devices at run-time, because we can't be sure there'll be no
2570 * DMA transactions in flight for any of their siblings.
2571 *
2572 * So PCI devices (unless they're on the root bus) as well as
2573 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2574 * the 1:1 domain, just in _case_ one of their siblings turns out
2575 * not to be able to map all of memory.
2576 */
2577 if (!pci_is_pcie(pdev)) {
2578 if (!pci_is_root_bus(pdev->bus))
2579 return 0;
2580 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2581 return 0;
2582 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2583 return 0;
3bdb2591
DW
2584 } else {
2585 if (device_has_rmrr(dev))
2586 return 0;
2587 }
3dfc813d 2588
3bdb2591 2589 /*
3dfc813d 2590 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2591 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2592 * take them out of the 1:1 domain later.
2593 */
8fcc5372
CW
2594 if (!startup) {
2595 /*
2596 * If the device's dma_mask is less than the system's memory
2597 * size then this is not a candidate for identity mapping.
2598 */
3bdb2591 2599 u64 dma_mask = *dev->dma_mask;
8fcc5372 2600
3bdb2591
DW
2601 if (dev->coherent_dma_mask &&
2602 dev->coherent_dma_mask < dma_mask)
2603 dma_mask = dev->coherent_dma_mask;
8fcc5372 2604
3bdb2591 2605 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2606 }
6941af28
DW
2607
2608 return 1;
2609}
2610
cf04eee8
DW
2611static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2612{
2613 int ret;
2614
2615 if (!iommu_should_identity_map(dev, 1))
2616 return 0;
2617
2618 ret = domain_add_dev_info(si_domain, dev,
2619 hw ? CONTEXT_TT_PASS_THROUGH :
2620 CONTEXT_TT_MULTI_LEVEL);
2621 if (!ret)
2622 pr_info("IOMMU: %s identity mapping for device %s\n",
2623 hw ? "hardware" : "software", dev_name(dev));
2624 else if (ret == -ENODEV)
2625 /* device not associated with an iommu */
2626 ret = 0;
2627
2628 return ret;
2629}
2630
2631
071e1374 2632static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2633{
2c2e2c38 2634 struct pci_dev *pdev = NULL;
cf04eee8
DW
2635 struct dmar_drhd_unit *drhd;
2636 struct intel_iommu *iommu;
2637 struct device *dev;
2638 int i;
2639 int ret = 0;
2c2e2c38 2640
19943b0e 2641 ret = si_domain_init(hw);
2c2e2c38
FY
2642 if (ret)
2643 return -EFAULT;
2644
2c2e2c38 2645 for_each_pci_dev(pdev) {
cf04eee8
DW
2646 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2647 if (ret)
2648 return ret;
2649 }
2650
2651 for_each_active_iommu(iommu, drhd)
2652 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2653 struct acpi_device_physical_node *pn;
2654 struct acpi_device *adev;
2655
2656 if (dev->bus != &acpi_bus_type)
2657 continue;
2658
2659 adev= to_acpi_device(dev);
2660 mutex_lock(&adev->physical_node_lock);
2661 list_for_each_entry(pn, &adev->physical_node_list, node) {
2662 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2663 if (ret)
2664 break;
eae460b6 2665 }
cf04eee8
DW
2666 mutex_unlock(&adev->physical_node_lock);
2667 if (ret)
2668 return ret;
62edf5dc 2669 }
2c2e2c38
FY
2670
2671 return 0;
2672}
2673
b779260b 2674static int __init init_dmars(void)
ba395927
KA
2675{
2676 struct dmar_drhd_unit *drhd;
2677 struct dmar_rmrr_unit *rmrr;
832bd858 2678 struct device *dev;
ba395927 2679 struct intel_iommu *iommu;
9d783ba0 2680 int i, ret;
2c2e2c38 2681
ba395927
KA
2682 /*
2683 * for each drhd
2684 * allocate root
2685 * initialize and program root entry to not present
2686 * endfor
2687 */
2688 for_each_drhd_unit(drhd) {
5e0d2a6f 2689 /*
2690 * lock not needed as this is only incremented in the single
2691 * threaded kernel __init code path all other access are read
2692 * only
2693 */
1b198bb0
MT
2694 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2695 g_num_of_iommus++;
2696 continue;
2697 }
2698 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2699 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2700 }
2701
d9630fe9
WH
2702 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2703 GFP_KERNEL);
2704 if (!g_iommus) {
2705 printk(KERN_ERR "Allocating global iommu array failed\n");
2706 ret = -ENOMEM;
2707 goto error;
2708 }
2709
80b20dd8 2710 deferred_flush = kzalloc(g_num_of_iommus *
2711 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2712 if (!deferred_flush) {
5e0d2a6f 2713 ret = -ENOMEM;
989d51fc 2714 goto free_g_iommus;
5e0d2a6f 2715 }
2716
7c919779 2717 for_each_active_iommu(iommu, drhd) {
d9630fe9 2718 g_iommus[iommu->seq_id] = iommu;
ba395927 2719
e61d98d8
SS
2720 ret = iommu_init_domains(iommu);
2721 if (ret)
989d51fc 2722 goto free_iommu;
e61d98d8 2723
ba395927
KA
2724 /*
2725 * TBD:
2726 * we could share the same root & context tables
25985edc 2727 * among all IOMMU's. Need to Split it later.
ba395927
KA
2728 */
2729 ret = iommu_alloc_root_entry(iommu);
2730 if (ret) {
2731 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2732 goto free_iommu;
ba395927 2733 }
4ed0d3e6 2734 if (!ecap_pass_through(iommu->ecap))
19943b0e 2735 hw_pass_through = 0;
ba395927
KA
2736 }
2737
1531a6a6
SS
2738 /*
2739 * Start from the sane iommu hardware state.
2740 */
7c919779 2741 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2742 /*
2743 * If the queued invalidation is already initialized by us
2744 * (for example, while enabling interrupt-remapping) then
2745 * we got the things already rolling from a sane state.
2746 */
2747 if (iommu->qi)
2748 continue;
2749
2750 /*
2751 * Clear any previous faults.
2752 */
2753 dmar_fault(-1, iommu);
2754 /*
2755 * Disable queued invalidation if supported and already enabled
2756 * before OS handover.
2757 */
2758 dmar_disable_qi(iommu);
2759 }
2760
7c919779 2761 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2762 if (dmar_enable_qi(iommu)) {
2763 /*
2764 * Queued Invalidate not enabled, use Register Based
2765 * Invalidate
2766 */
2767 iommu->flush.flush_context = __iommu_flush_context;
2768 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2769 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2770 "invalidation\n",
680a7524 2771 iommu->seq_id,
b4e0f9eb 2772 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2773 } else {
2774 iommu->flush.flush_context = qi_flush_context;
2775 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2776 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2777 "invalidation\n",
680a7524 2778 iommu->seq_id,
b4e0f9eb 2779 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2780 }
2781 }
2782
19943b0e 2783 if (iommu_pass_through)
e0fc7e0b
DW
2784 iommu_identity_mapping |= IDENTMAP_ALL;
2785
d3f13810 2786#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2787 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2788#endif
e0fc7e0b
DW
2789
2790 check_tylersburg_isoch();
2791
ba395927 2792 /*
19943b0e
DW
2793 * If pass through is not set or not enabled, setup context entries for
2794 * identity mappings for rmrr, gfx, and isa and may fall back to static
2795 * identity mapping if iommu_identity_mapping is set.
ba395927 2796 */
19943b0e
DW
2797 if (iommu_identity_mapping) {
2798 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2799 if (ret) {
19943b0e 2800 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2801 goto free_iommu;
ba395927
KA
2802 }
2803 }
ba395927 2804 /*
19943b0e
DW
2805 * For each rmrr
2806 * for each dev attached to rmrr
2807 * do
2808 * locate drhd for dev, alloc domain for dev
2809 * allocate free domain
2810 * allocate page table entries for rmrr
2811 * if context not allocated for bus
2812 * allocate and init context
2813 * set present in root table for this bus
2814 * init context with domain, translation etc
2815 * endfor
2816 * endfor
ba395927 2817 */
19943b0e
DW
2818 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2819 for_each_rmrr_units(rmrr) {
b683b230
JL
2820 /* some BIOS lists non-exist devices in DMAR table. */
2821 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2822 i, dev) {
0b9d9753 2823 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2824 if (ret)
2825 printk(KERN_ERR
2826 "IOMMU: mapping reserved region failed\n");
ba395927 2827 }
4ed0d3e6 2828 }
49a0429e 2829
19943b0e
DW
2830 iommu_prepare_isa();
2831
ba395927
KA
2832 /*
2833 * for each drhd
2834 * enable fault log
2835 * global invalidate context cache
2836 * global invalidate iotlb
2837 * enable translation
2838 */
7c919779 2839 for_each_iommu(iommu, drhd) {
51a63e67
JC
2840 if (drhd->ignored) {
2841 /*
2842 * we always have to disable PMRs or DMA may fail on
2843 * this device
2844 */
2845 if (force_on)
7c919779 2846 iommu_disable_protect_mem_regions(iommu);
ba395927 2847 continue;
51a63e67 2848 }
ba395927
KA
2849
2850 iommu_flush_write_buffer(iommu);
2851
3460a6d9
KA
2852 ret = dmar_set_interrupt(iommu);
2853 if (ret)
989d51fc 2854 goto free_iommu;
3460a6d9 2855
ba395927
KA
2856 iommu_set_root_entry(iommu);
2857
4c25a2c1 2858 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2859 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2860
ba395927
KA
2861 ret = iommu_enable_translation(iommu);
2862 if (ret)
989d51fc 2863 goto free_iommu;
b94996c9
DW
2864
2865 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2866 }
2867
2868 return 0;
989d51fc
JL
2869
2870free_iommu:
7c919779 2871 for_each_active_iommu(iommu, drhd)
a868e6b7 2872 free_dmar_iommu(iommu);
9bdc531e 2873 kfree(deferred_flush);
989d51fc 2874free_g_iommus:
d9630fe9 2875 kfree(g_iommus);
989d51fc 2876error:
ba395927
KA
2877 return ret;
2878}
2879
5a5e02a6 2880/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2881static struct iova *intel_alloc_iova(struct device *dev,
2882 struct dmar_domain *domain,
2883 unsigned long nrpages, uint64_t dma_mask)
ba395927 2884{
ba395927 2885 struct iova *iova = NULL;
ba395927 2886
875764de
DW
2887 /* Restrict dma_mask to the width that the iommu can handle */
2888 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2889
2890 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2891 /*
2892 * First try to allocate an io virtual address in
284901a9 2893 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2894 * from higher range
ba395927 2895 */
875764de
DW
2896 iova = alloc_iova(&domain->iovad, nrpages,
2897 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2898 if (iova)
2899 return iova;
2900 }
2901 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2902 if (unlikely(!iova)) {
2903 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2904 nrpages, dev_name(dev));
f76aec76
KA
2905 return NULL;
2906 }
2907
2908 return iova;
2909}
2910
d4b709f4 2911static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2912{
2913 struct dmar_domain *domain;
2914 int ret;
2915
d4b709f4 2916 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2917 if (!domain) {
d4b709f4
DW
2918 printk(KERN_ERR "Allocating domain for %s failed",
2919 dev_name(dev));
4fe05bbc 2920 return NULL;
ba395927
KA
2921 }
2922
2923 /* make sure context mapping is ok */
d4b709f4
DW
2924 if (unlikely(!domain_context_mapped(dev))) {
2925 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2926 if (ret) {
d4b709f4
DW
2927 printk(KERN_ERR "Domain context map for %s failed",
2928 dev_name(dev));
4fe05bbc 2929 return NULL;
f76aec76 2930 }
ba395927
KA
2931 }
2932
f76aec76
KA
2933 return domain;
2934}
2935
d4b709f4 2936static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2937{
2938 struct device_domain_info *info;
2939
2940 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2941 info = dev->archdata.iommu;
147202aa
DW
2942 if (likely(info))
2943 return info->domain;
2944
2945 return __get_valid_domain_for_dev(dev);
2946}
2947
3d89194a 2948static int iommu_dummy(struct device *dev)
2c2e2c38 2949{
3d89194a 2950 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2951}
2952
ecb509ec 2953/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2954static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2955{
2956 int found;
2957
3d89194a 2958 if (iommu_dummy(dev))
1e4c64c4
DW
2959 return 1;
2960
2c2e2c38 2961 if (!iommu_identity_mapping)
1e4c64c4 2962 return 0;
2c2e2c38 2963
9b226624 2964 found = identity_mapping(dev);
2c2e2c38 2965 if (found) {
ecb509ec 2966 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
2967 return 1;
2968 else {
2969 /*
2970 * 32 bit DMA is removed from si_domain and fall back
2971 * to non-identity mapping.
2972 */
bf9c9eda 2973 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 2974 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 2975 dev_name(dev));
2c2e2c38
FY
2976 return 0;
2977 }
2978 } else {
2979 /*
2980 * In case of a detached 64 bit DMA device from vm, the device
2981 * is put into si_domain for identity mapping.
2982 */
ecb509ec 2983 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 2984 int ret;
5913c9bf 2985 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
2986 hw_pass_through ?
2987 CONTEXT_TT_PASS_THROUGH :
2988 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2989 if (!ret) {
2990 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 2991 dev_name(dev));
2c2e2c38
FY
2992 return 1;
2993 }
2994 }
2995 }
2996
1e4c64c4 2997 return 0;
2c2e2c38
FY
2998}
2999
5040a918 3000static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3001 size_t size, int dir, u64 dma_mask)
f76aec76 3002{
f76aec76 3003 struct dmar_domain *domain;
5b6985ce 3004 phys_addr_t start_paddr;
f76aec76
KA
3005 struct iova *iova;
3006 int prot = 0;
6865f0d1 3007 int ret;
8c11e798 3008 struct intel_iommu *iommu;
33041ec0 3009 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3010
3011 BUG_ON(dir == DMA_NONE);
2c2e2c38 3012
5040a918 3013 if (iommu_no_mapping(dev))
6865f0d1 3014 return paddr;
f76aec76 3015
5040a918 3016 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3017 if (!domain)
3018 return 0;
3019
8c11e798 3020 iommu = domain_get_iommu(domain);
88cb6a74 3021 size = aligned_nrpages(paddr, size);
f76aec76 3022
5040a918 3023 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3024 if (!iova)
3025 goto error;
3026
ba395927
KA
3027 /*
3028 * Check if DMAR supports zero-length reads on write only
3029 * mappings..
3030 */
3031 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3032 !cap_zlr(iommu->cap))
ba395927
KA
3033 prot |= DMA_PTE_READ;
3034 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3035 prot |= DMA_PTE_WRITE;
3036 /*
6865f0d1 3037 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3038 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3039 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3040 * is not a big problem
3041 */
0ab36de2 3042 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3043 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3044 if (ret)
3045 goto error;
3046
1f0ef2aa
DW
3047 /* it's a non-present to present mapping. Only flush if caching mode */
3048 if (cap_caching_mode(iommu->cap))
ea8ea460 3049 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3050 else
8c11e798 3051 iommu_flush_write_buffer(iommu);
f76aec76 3052
03d6a246
DW
3053 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3054 start_paddr += paddr & ~PAGE_MASK;
3055 return start_paddr;
ba395927 3056
ba395927 3057error:
f76aec76
KA
3058 if (iova)
3059 __free_iova(&domain->iovad, iova);
4cf2e75d 3060 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3061 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3062 return 0;
3063}
3064
ffbbef5c
FT
3065static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3066 unsigned long offset, size_t size,
3067 enum dma_data_direction dir,
3068 struct dma_attrs *attrs)
bb9e6d65 3069{
ffbbef5c 3070 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3071 dir, *dev->dma_mask);
bb9e6d65
FT
3072}
3073
5e0d2a6f 3074static void flush_unmaps(void)
3075{
80b20dd8 3076 int i, j;
5e0d2a6f 3077
5e0d2a6f 3078 timer_on = 0;
3079
3080 /* just flush them all */
3081 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3082 struct intel_iommu *iommu = g_iommus[i];
3083 if (!iommu)
3084 continue;
c42d9f32 3085
9dd2fe89
YZ
3086 if (!deferred_flush[i].next)
3087 continue;
3088
78d5f0f5
NA
3089 /* In caching mode, global flushes turn emulation expensive */
3090 if (!cap_caching_mode(iommu->cap))
3091 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3092 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3093 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3094 unsigned long mask;
3095 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3096 struct dmar_domain *domain = deferred_flush[i].domain[j];
3097
3098 /* On real hardware multiple invalidations are expensive */
3099 if (cap_caching_mode(iommu->cap))
3100 iommu_flush_iotlb_psi(iommu, domain->id,
ea8ea460
DW
3101 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3102 !deferred_flush[i].freelist[j], 0);
78d5f0f5
NA
3103 else {
3104 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3105 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3106 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3107 }
93a23a72 3108 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3109 if (deferred_flush[i].freelist[j])
3110 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3111 }
9dd2fe89 3112 deferred_flush[i].next = 0;
5e0d2a6f 3113 }
3114
5e0d2a6f 3115 list_size = 0;
5e0d2a6f 3116}
3117
3118static void flush_unmaps_timeout(unsigned long data)
3119{
80b20dd8 3120 unsigned long flags;
3121
3122 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3123 flush_unmaps();
80b20dd8 3124 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3125}
3126
ea8ea460 3127static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3128{
3129 unsigned long flags;
80b20dd8 3130 int next, iommu_id;
8c11e798 3131 struct intel_iommu *iommu;
5e0d2a6f 3132
3133 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3134 if (list_size == HIGH_WATER_MARK)
3135 flush_unmaps();
3136
8c11e798
WH
3137 iommu = domain_get_iommu(dom);
3138 iommu_id = iommu->seq_id;
c42d9f32 3139
80b20dd8 3140 next = deferred_flush[iommu_id].next;
3141 deferred_flush[iommu_id].domain[next] = dom;
3142 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3143 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3144 deferred_flush[iommu_id].next++;
5e0d2a6f 3145
3146 if (!timer_on) {
3147 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3148 timer_on = 1;
3149 }
3150 list_size++;
3151 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3152}
3153
ffbbef5c
FT
3154static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3155 size_t size, enum dma_data_direction dir,
3156 struct dma_attrs *attrs)
ba395927 3157{
f76aec76 3158 struct dmar_domain *domain;
d794dc9b 3159 unsigned long start_pfn, last_pfn;
ba395927 3160 struct iova *iova;
8c11e798 3161 struct intel_iommu *iommu;
ea8ea460 3162 struct page *freelist;
ba395927 3163
73676832 3164 if (iommu_no_mapping(dev))
f76aec76 3165 return;
2c2e2c38 3166
1525a29a 3167 domain = find_domain(dev);
ba395927
KA
3168 BUG_ON(!domain);
3169
8c11e798
WH
3170 iommu = domain_get_iommu(domain);
3171
ba395927 3172 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3173 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3174 (unsigned long long)dev_addr))
ba395927 3175 return;
ba395927 3176
d794dc9b
DW
3177 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3178 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3179
d794dc9b 3180 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3181 dev_name(dev), start_pfn, last_pfn);
ba395927 3182
ea8ea460 3183 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3184
5e0d2a6f 3185 if (intel_iommu_strict) {
03d6a246 3186 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3187 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3188 /* free iova */
3189 __free_iova(&domain->iovad, iova);
ea8ea460 3190 dma_free_pagelist(freelist);
5e0d2a6f 3191 } else {
ea8ea460 3192 add_unmap(domain, iova, freelist);
5e0d2a6f 3193 /*
3194 * queue up the release of the unmap to save the 1/6th of the
3195 * cpu used up by the iotlb flush operation...
3196 */
5e0d2a6f 3197 }
ba395927
KA
3198}
3199
5040a918 3200static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3201 dma_addr_t *dma_handle, gfp_t flags,
3202 struct dma_attrs *attrs)
ba395927 3203{
36746436 3204 struct page *page = NULL;
ba395927
KA
3205 int order;
3206
5b6985ce 3207 size = PAGE_ALIGN(size);
ba395927 3208 order = get_order(size);
e8bb910d 3209
5040a918 3210 if (!iommu_no_mapping(dev))
e8bb910d 3211 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3212 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3213 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3214 flags |= GFP_DMA;
3215 else
3216 flags |= GFP_DMA32;
3217 }
ba395927 3218
36746436
AM
3219 if (flags & __GFP_WAIT) {
3220 unsigned int count = size >> PAGE_SHIFT;
3221
3222 page = dma_alloc_from_contiguous(dev, count, order);
3223 if (page && iommu_no_mapping(dev) &&
3224 page_to_phys(page) + size > dev->coherent_dma_mask) {
3225 dma_release_from_contiguous(dev, page, count);
3226 page = NULL;
3227 }
3228 }
3229
3230 if (!page)
3231 page = alloc_pages(flags, order);
3232 if (!page)
ba395927 3233 return NULL;
36746436 3234 memset(page_address(page), 0, size);
ba395927 3235
36746436 3236 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3237 DMA_BIDIRECTIONAL,
5040a918 3238 dev->coherent_dma_mask);
ba395927 3239 if (*dma_handle)
36746436
AM
3240 return page_address(page);
3241 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3242 __free_pages(page, order);
3243
ba395927
KA
3244 return NULL;
3245}
3246
5040a918 3247static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3248 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3249{
3250 int order;
36746436 3251 struct page *page = virt_to_page(vaddr);
ba395927 3252
5b6985ce 3253 size = PAGE_ALIGN(size);
ba395927
KA
3254 order = get_order(size);
3255
5040a918 3256 intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
36746436
AM
3257 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3258 __free_pages(page, order);
ba395927
KA
3259}
3260
5040a918 3261static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3262 int nelems, enum dma_data_direction dir,
3263 struct dma_attrs *attrs)
ba395927 3264{
ba395927 3265 struct dmar_domain *domain;
d794dc9b 3266 unsigned long start_pfn, last_pfn;
f76aec76 3267 struct iova *iova;
8c11e798 3268 struct intel_iommu *iommu;
ea8ea460 3269 struct page *freelist;
ba395927 3270
5040a918 3271 if (iommu_no_mapping(dev))
ba395927
KA
3272 return;
3273
5040a918 3274 domain = find_domain(dev);
8c11e798
WH
3275 BUG_ON(!domain);
3276
3277 iommu = domain_get_iommu(domain);
ba395927 3278
c03ab37c 3279 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3280 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3281 (unsigned long long)sglist[0].dma_address))
f76aec76 3282 return;
f76aec76 3283
d794dc9b
DW
3284 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3285 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76 3286
ea8ea460 3287 freelist = domain_unmap(domain, start_pfn, last_pfn);
f76aec76 3288
acea0018
DW
3289 if (intel_iommu_strict) {
3290 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3291 last_pfn - start_pfn + 1, !freelist, 0);
acea0018
DW
3292 /* free iova */
3293 __free_iova(&domain->iovad, iova);
ea8ea460 3294 dma_free_pagelist(freelist);
acea0018 3295 } else {
ea8ea460 3296 add_unmap(domain, iova, freelist);
acea0018
DW
3297 /*
3298 * queue up the release of the unmap to save the 1/6th of the
3299 * cpu used up by the iotlb flush operation...
3300 */
3301 }
ba395927
KA
3302}
3303
ba395927 3304static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3305 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3306{
3307 int i;
c03ab37c 3308 struct scatterlist *sg;
ba395927 3309
c03ab37c 3310 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3311 BUG_ON(!sg_page(sg));
4cf2e75d 3312 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3313 sg->dma_length = sg->length;
ba395927
KA
3314 }
3315 return nelems;
3316}
3317
5040a918 3318static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3319 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3320{
ba395927 3321 int i;
ba395927 3322 struct dmar_domain *domain;
f76aec76
KA
3323 size_t size = 0;
3324 int prot = 0;
f76aec76
KA
3325 struct iova *iova = NULL;
3326 int ret;
c03ab37c 3327 struct scatterlist *sg;
b536d24d 3328 unsigned long start_vpfn;
8c11e798 3329 struct intel_iommu *iommu;
ba395927
KA
3330
3331 BUG_ON(dir == DMA_NONE);
5040a918
DW
3332 if (iommu_no_mapping(dev))
3333 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3334
5040a918 3335 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3336 if (!domain)
3337 return 0;
3338
8c11e798
WH
3339 iommu = domain_get_iommu(domain);
3340
b536d24d 3341 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3342 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3343
5040a918
DW
3344 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3345 *dev->dma_mask);
f76aec76 3346 if (!iova) {
c03ab37c 3347 sglist->dma_length = 0;
f76aec76
KA
3348 return 0;
3349 }
3350
3351 /*
3352 * Check if DMAR supports zero-length reads on write only
3353 * mappings..
3354 */
3355 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3356 !cap_zlr(iommu->cap))
f76aec76
KA
3357 prot |= DMA_PTE_READ;
3358 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3359 prot |= DMA_PTE_WRITE;
3360
b536d24d 3361 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3362
f532959b 3363 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3364 if (unlikely(ret)) {
3365 /* clear the page */
3366 dma_pte_clear_range(domain, start_vpfn,
3367 start_vpfn + size - 1);
3368 /* free page tables */
3369 dma_pte_free_pagetable(domain, start_vpfn,
3370 start_vpfn + size - 1);
3371 /* free iova */
3372 __free_iova(&domain->iovad, iova);
3373 return 0;
ba395927
KA
3374 }
3375
1f0ef2aa
DW
3376 /* it's a non-present to present mapping. Only flush if caching mode */
3377 if (cap_caching_mode(iommu->cap))
ea8ea460 3378 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3379 else
8c11e798 3380 iommu_flush_write_buffer(iommu);
1f0ef2aa 3381
ba395927
KA
3382 return nelems;
3383}
3384
dfb805e8
FT
3385static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3386{
3387 return !dma_addr;
3388}
3389
160c1d8e 3390struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3391 .alloc = intel_alloc_coherent,
3392 .free = intel_free_coherent,
ba395927
KA
3393 .map_sg = intel_map_sg,
3394 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3395 .map_page = intel_map_page,
3396 .unmap_page = intel_unmap_page,
dfb805e8 3397 .mapping_error = intel_mapping_error,
ba395927
KA
3398};
3399
3400static inline int iommu_domain_cache_init(void)
3401{
3402 int ret = 0;
3403
3404 iommu_domain_cache = kmem_cache_create("iommu_domain",
3405 sizeof(struct dmar_domain),
3406 0,
3407 SLAB_HWCACHE_ALIGN,
3408
3409 NULL);
3410 if (!iommu_domain_cache) {
3411 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3412 ret = -ENOMEM;
3413 }
3414
3415 return ret;
3416}
3417
3418static inline int iommu_devinfo_cache_init(void)
3419{
3420 int ret = 0;
3421
3422 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3423 sizeof(struct device_domain_info),
3424 0,
3425 SLAB_HWCACHE_ALIGN,
ba395927
KA
3426 NULL);
3427 if (!iommu_devinfo_cache) {
3428 printk(KERN_ERR "Couldn't create devinfo cache\n");
3429 ret = -ENOMEM;
3430 }
3431
3432 return ret;
3433}
3434
3435static inline int iommu_iova_cache_init(void)
3436{
3437 int ret = 0;
3438
3439 iommu_iova_cache = kmem_cache_create("iommu_iova",
3440 sizeof(struct iova),
3441 0,
3442 SLAB_HWCACHE_ALIGN,
ba395927
KA
3443 NULL);
3444 if (!iommu_iova_cache) {
3445 printk(KERN_ERR "Couldn't create iova cache\n");
3446 ret = -ENOMEM;
3447 }
3448
3449 return ret;
3450}
3451
3452static int __init iommu_init_mempool(void)
3453{
3454 int ret;
3455 ret = iommu_iova_cache_init();
3456 if (ret)
3457 return ret;
3458
3459 ret = iommu_domain_cache_init();
3460 if (ret)
3461 goto domain_error;
3462
3463 ret = iommu_devinfo_cache_init();
3464 if (!ret)
3465 return ret;
3466
3467 kmem_cache_destroy(iommu_domain_cache);
3468domain_error:
3469 kmem_cache_destroy(iommu_iova_cache);
3470
3471 return -ENOMEM;
3472}
3473
3474static void __init iommu_exit_mempool(void)
3475{
3476 kmem_cache_destroy(iommu_devinfo_cache);
3477 kmem_cache_destroy(iommu_domain_cache);
3478 kmem_cache_destroy(iommu_iova_cache);
3479
3480}
3481
556ab45f
DW
3482static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3483{
3484 struct dmar_drhd_unit *drhd;
3485 u32 vtbar;
3486 int rc;
3487
3488 /* We know that this device on this chipset has its own IOMMU.
3489 * If we find it under a different IOMMU, then the BIOS is lying
3490 * to us. Hope that the IOMMU for this device is actually
3491 * disabled, and it needs no translation...
3492 */
3493 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3494 if (rc) {
3495 /* "can't" happen */
3496 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3497 return;
3498 }
3499 vtbar &= 0xffff0000;
3500
3501 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3502 drhd = dmar_find_matched_drhd_unit(pdev);
3503 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3504 TAINT_FIRMWARE_WORKAROUND,
3505 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3506 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3507}
3508DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3509
ba395927
KA
3510static void __init init_no_remapping_devices(void)
3511{
3512 struct dmar_drhd_unit *drhd;
832bd858 3513 struct device *dev;
b683b230 3514 int i;
ba395927
KA
3515
3516 for_each_drhd_unit(drhd) {
3517 if (!drhd->include_all) {
b683b230
JL
3518 for_each_active_dev_scope(drhd->devices,
3519 drhd->devices_cnt, i, dev)
3520 break;
832bd858 3521 /* ignore DMAR unit if no devices exist */
ba395927
KA
3522 if (i == drhd->devices_cnt)
3523 drhd->ignored = 1;
3524 }
3525 }
3526
7c919779 3527 for_each_active_drhd_unit(drhd) {
7c919779 3528 if (drhd->include_all)
ba395927
KA
3529 continue;
3530
b683b230
JL
3531 for_each_active_dev_scope(drhd->devices,
3532 drhd->devices_cnt, i, dev)
832bd858 3533 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3534 break;
ba395927
KA
3535 if (i < drhd->devices_cnt)
3536 continue;
3537
c0771df8
DW
3538 /* This IOMMU has *only* gfx devices. Either bypass it or
3539 set the gfx_mapped flag, as appropriate */
3540 if (dmar_map_gfx) {
3541 intel_iommu_gfx_mapped = 1;
3542 } else {
3543 drhd->ignored = 1;
b683b230
JL
3544 for_each_active_dev_scope(drhd->devices,
3545 drhd->devices_cnt, i, dev)
832bd858 3546 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3547 }
3548 }
3549}
3550
f59c7b69
FY
3551#ifdef CONFIG_SUSPEND
3552static int init_iommu_hw(void)
3553{
3554 struct dmar_drhd_unit *drhd;
3555 struct intel_iommu *iommu = NULL;
3556
3557 for_each_active_iommu(iommu, drhd)
3558 if (iommu->qi)
3559 dmar_reenable_qi(iommu);
3560
b779260b
JC
3561 for_each_iommu(iommu, drhd) {
3562 if (drhd->ignored) {
3563 /*
3564 * we always have to disable PMRs or DMA may fail on
3565 * this device
3566 */
3567 if (force_on)
3568 iommu_disable_protect_mem_regions(iommu);
3569 continue;
3570 }
3571
f59c7b69
FY
3572 iommu_flush_write_buffer(iommu);
3573
3574 iommu_set_root_entry(iommu);
3575
3576 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3577 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3578 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3579 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3580 if (iommu_enable_translation(iommu))
3581 return 1;
b94996c9 3582 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3583 }
3584
3585 return 0;
3586}
3587
3588static void iommu_flush_all(void)
3589{
3590 struct dmar_drhd_unit *drhd;
3591 struct intel_iommu *iommu;
3592
3593 for_each_active_iommu(iommu, drhd) {
3594 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3595 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3596 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3597 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3598 }
3599}
3600
134fac3f 3601static int iommu_suspend(void)
f59c7b69
FY
3602{
3603 struct dmar_drhd_unit *drhd;
3604 struct intel_iommu *iommu = NULL;
3605 unsigned long flag;
3606
3607 for_each_active_iommu(iommu, drhd) {
3608 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3609 GFP_ATOMIC);
3610 if (!iommu->iommu_state)
3611 goto nomem;
3612 }
3613
3614 iommu_flush_all();
3615
3616 for_each_active_iommu(iommu, drhd) {
3617 iommu_disable_translation(iommu);
3618
1f5b3c3f 3619 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3620
3621 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3622 readl(iommu->reg + DMAR_FECTL_REG);
3623 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3624 readl(iommu->reg + DMAR_FEDATA_REG);
3625 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3626 readl(iommu->reg + DMAR_FEADDR_REG);
3627 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3628 readl(iommu->reg + DMAR_FEUADDR_REG);
3629
1f5b3c3f 3630 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3631 }
3632 return 0;
3633
3634nomem:
3635 for_each_active_iommu(iommu, drhd)
3636 kfree(iommu->iommu_state);
3637
3638 return -ENOMEM;
3639}
3640
134fac3f 3641static void iommu_resume(void)
f59c7b69
FY
3642{
3643 struct dmar_drhd_unit *drhd;
3644 struct intel_iommu *iommu = NULL;
3645 unsigned long flag;
3646
3647 if (init_iommu_hw()) {
b779260b
JC
3648 if (force_on)
3649 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3650 else
3651 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3652 return;
f59c7b69
FY
3653 }
3654
3655 for_each_active_iommu(iommu, drhd) {
3656
1f5b3c3f 3657 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3658
3659 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3660 iommu->reg + DMAR_FECTL_REG);
3661 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3662 iommu->reg + DMAR_FEDATA_REG);
3663 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3664 iommu->reg + DMAR_FEADDR_REG);
3665 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3666 iommu->reg + DMAR_FEUADDR_REG);
3667
1f5b3c3f 3668 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3669 }
3670
3671 for_each_active_iommu(iommu, drhd)
3672 kfree(iommu->iommu_state);
f59c7b69
FY
3673}
3674
134fac3f 3675static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3676 .resume = iommu_resume,
3677 .suspend = iommu_suspend,
3678};
3679
134fac3f 3680static void __init init_iommu_pm_ops(void)
f59c7b69 3681{
134fac3f 3682 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3683}
3684
3685#else
99592ba4 3686static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3687#endif /* CONFIG_PM */
3688
318fe7df
SS
3689
3690int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3691{
3692 struct acpi_dmar_reserved_memory *rmrr;
3693 struct dmar_rmrr_unit *rmrru;
3694
3695 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3696 if (!rmrru)
3697 return -ENOMEM;
3698
3699 rmrru->hdr = header;
3700 rmrr = (struct acpi_dmar_reserved_memory *)header;
3701 rmrru->base_address = rmrr->base_address;
3702 rmrru->end_address = rmrr->end_address;
2e455289
JL
3703 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3704 ((void *)rmrr) + rmrr->header.length,
3705 &rmrru->devices_cnt);
3706 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3707 kfree(rmrru);
3708 return -ENOMEM;
3709 }
318fe7df 3710
2e455289 3711 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3712
2e455289 3713 return 0;
318fe7df
SS
3714}
3715
318fe7df
SS
3716int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3717{
3718 struct acpi_dmar_atsr *atsr;
3719 struct dmar_atsr_unit *atsru;
3720
3721 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3722 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3723 if (!atsru)
3724 return -ENOMEM;
3725
3726 atsru->hdr = hdr;
3727 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3728 if (!atsru->include_all) {
3729 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3730 (void *)atsr + atsr->header.length,
3731 &atsru->devices_cnt);
3732 if (atsru->devices_cnt && atsru->devices == NULL) {
3733 kfree(atsru);
3734 return -ENOMEM;
3735 }
3736 }
318fe7df 3737
0e242612 3738 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3739
3740 return 0;
3741}
3742
9bdc531e
JL
3743static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3744{
3745 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3746 kfree(atsru);
3747}
3748
3749static void intel_iommu_free_dmars(void)
3750{
3751 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3752 struct dmar_atsr_unit *atsru, *atsr_n;
3753
3754 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3755 list_del(&rmrru->list);
3756 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3757 kfree(rmrru);
318fe7df
SS
3758 }
3759
9bdc531e
JL
3760 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3761 list_del(&atsru->list);
3762 intel_iommu_free_atsr(atsru);
3763 }
318fe7df
SS
3764}
3765
3766int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3767{
b683b230 3768 int i, ret = 1;
318fe7df 3769 struct pci_bus *bus;
832bd858
DW
3770 struct pci_dev *bridge = NULL;
3771 struct device *tmp;
318fe7df
SS
3772 struct acpi_dmar_atsr *atsr;
3773 struct dmar_atsr_unit *atsru;
3774
3775 dev = pci_physfn(dev);
318fe7df 3776 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3777 bridge = bus->self;
318fe7df 3778 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3779 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3780 return 0;
b5f82ddf 3781 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3782 break;
318fe7df 3783 }
b5f82ddf
JL
3784 if (!bridge)
3785 return 0;
318fe7df 3786
0e242612 3787 rcu_read_lock();
b5f82ddf
JL
3788 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3789 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3790 if (atsr->segment != pci_domain_nr(dev->bus))
3791 continue;
3792
b683b230 3793 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3794 if (tmp == &bridge->dev)
b683b230 3795 goto out;
b5f82ddf
JL
3796
3797 if (atsru->include_all)
b683b230 3798 goto out;
b5f82ddf 3799 }
b683b230
JL
3800 ret = 0;
3801out:
0e242612 3802 rcu_read_unlock();
318fe7df 3803
b683b230 3804 return ret;
318fe7df
SS
3805}
3806
59ce0515
JL
3807int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3808{
3809 int ret = 0;
3810 struct dmar_rmrr_unit *rmrru;
3811 struct dmar_atsr_unit *atsru;
3812 struct acpi_dmar_atsr *atsr;
3813 struct acpi_dmar_reserved_memory *rmrr;
3814
3815 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3816 return 0;
3817
3818 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3819 rmrr = container_of(rmrru->hdr,
3820 struct acpi_dmar_reserved_memory, header);
3821 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3822 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3823 ((void *)rmrr) + rmrr->header.length,
3824 rmrr->segment, rmrru->devices,
3825 rmrru->devices_cnt);
27e24950 3826 if(ret < 0)
59ce0515
JL
3827 return ret;
3828 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3829 dmar_remove_dev_scope(info, rmrr->segment,
3830 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3831 }
3832 }
3833
3834 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3835 if (atsru->include_all)
3836 continue;
3837
3838 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3839 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3840 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3841 (void *)atsr + atsr->header.length,
3842 atsr->segment, atsru->devices,
3843 atsru->devices_cnt);
3844 if (ret > 0)
3845 break;
3846 else if(ret < 0)
3847 return ret;
3848 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3849 if (dmar_remove_dev_scope(info, atsr->segment,
3850 atsru->devices, atsru->devices_cnt))
3851 break;
3852 }
3853 }
3854
3855 return 0;
3856}
3857
99dcaded
FY
3858/*
3859 * Here we only respond to action of unbound device from driver.
3860 *
3861 * Added device is not attached to its DMAR domain here yet. That will happen
3862 * when mapping the device to iova.
3863 */
3864static int device_notifier(struct notifier_block *nb,
3865 unsigned long action, void *data)
3866{
3867 struct device *dev = data;
99dcaded
FY
3868 struct dmar_domain *domain;
3869
3d89194a 3870 if (iommu_dummy(dev))
44cd613c
DW
3871 return 0;
3872
7e7dfab7
JL
3873 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3874 action != BUS_NOTIFY_DEL_DEVICE)
3875 return 0;
3876
1525a29a 3877 domain = find_domain(dev);
99dcaded
FY
3878 if (!domain)
3879 return 0;
3880
3a5670e8 3881 down_read(&dmar_global_lock);
bf9c9eda 3882 domain_remove_one_dev_info(domain, dev);
ab8dfe25 3883 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 3884 domain_exit(domain);
3a5670e8 3885 up_read(&dmar_global_lock);
a97590e5 3886
99dcaded
FY
3887 return 0;
3888}
3889
3890static struct notifier_block device_nb = {
3891 .notifier_call = device_notifier,
3892};
3893
75f05569
JL
3894static int intel_iommu_memory_notifier(struct notifier_block *nb,
3895 unsigned long val, void *v)
3896{
3897 struct memory_notify *mhp = v;
3898 unsigned long long start, end;
3899 unsigned long start_vpfn, last_vpfn;
3900
3901 switch (val) {
3902 case MEM_GOING_ONLINE:
3903 start = mhp->start_pfn << PAGE_SHIFT;
3904 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3905 if (iommu_domain_identity_map(si_domain, start, end)) {
3906 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3907 start, end);
3908 return NOTIFY_BAD;
3909 }
3910 break;
3911
3912 case MEM_OFFLINE:
3913 case MEM_CANCEL_ONLINE:
3914 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3915 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3916 while (start_vpfn <= last_vpfn) {
3917 struct iova *iova;
3918 struct dmar_drhd_unit *drhd;
3919 struct intel_iommu *iommu;
ea8ea460 3920 struct page *freelist;
75f05569
JL
3921
3922 iova = find_iova(&si_domain->iovad, start_vpfn);
3923 if (iova == NULL) {
3924 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3925 start_vpfn);
3926 break;
3927 }
3928
3929 iova = split_and_remove_iova(&si_domain->iovad, iova,
3930 start_vpfn, last_vpfn);
3931 if (iova == NULL) {
3932 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3933 start_vpfn, last_vpfn);
3934 return NOTIFY_BAD;
3935 }
3936
ea8ea460
DW
3937 freelist = domain_unmap(si_domain, iova->pfn_lo,
3938 iova->pfn_hi);
3939
75f05569
JL
3940 rcu_read_lock();
3941 for_each_active_iommu(iommu, drhd)
3942 iommu_flush_iotlb_psi(iommu, si_domain->id,
3943 iova->pfn_lo,
ea8ea460
DW
3944 iova->pfn_hi - iova->pfn_lo + 1,
3945 !freelist, 0);
75f05569 3946 rcu_read_unlock();
ea8ea460 3947 dma_free_pagelist(freelist);
75f05569
JL
3948
3949 start_vpfn = iova->pfn_hi + 1;
3950 free_iova_mem(iova);
3951 }
3952 break;
3953 }
3954
3955 return NOTIFY_OK;
3956}
3957
3958static struct notifier_block intel_iommu_memory_nb = {
3959 .notifier_call = intel_iommu_memory_notifier,
3960 .priority = 0
3961};
3962
a5459cfe
AW
3963
3964static ssize_t intel_iommu_show_version(struct device *dev,
3965 struct device_attribute *attr,
3966 char *buf)
3967{
3968 struct intel_iommu *iommu = dev_get_drvdata(dev);
3969 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3970 return sprintf(buf, "%d:%d\n",
3971 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3972}
3973static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3974
3975static ssize_t intel_iommu_show_address(struct device *dev,
3976 struct device_attribute *attr,
3977 char *buf)
3978{
3979 struct intel_iommu *iommu = dev_get_drvdata(dev);
3980 return sprintf(buf, "%llx\n", iommu->reg_phys);
3981}
3982static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3983
3984static ssize_t intel_iommu_show_cap(struct device *dev,
3985 struct device_attribute *attr,
3986 char *buf)
3987{
3988 struct intel_iommu *iommu = dev_get_drvdata(dev);
3989 return sprintf(buf, "%llx\n", iommu->cap);
3990}
3991static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3992
3993static ssize_t intel_iommu_show_ecap(struct device *dev,
3994 struct device_attribute *attr,
3995 char *buf)
3996{
3997 struct intel_iommu *iommu = dev_get_drvdata(dev);
3998 return sprintf(buf, "%llx\n", iommu->ecap);
3999}
4000static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4001
4002static struct attribute *intel_iommu_attrs[] = {
4003 &dev_attr_version.attr,
4004 &dev_attr_address.attr,
4005 &dev_attr_cap.attr,
4006 &dev_attr_ecap.attr,
4007 NULL,
4008};
4009
4010static struct attribute_group intel_iommu_group = {
4011 .name = "intel-iommu",
4012 .attrs = intel_iommu_attrs,
4013};
4014
4015const struct attribute_group *intel_iommu_groups[] = {
4016 &intel_iommu_group,
4017 NULL,
4018};
4019
ba395927
KA
4020int __init intel_iommu_init(void)
4021{
9bdc531e 4022 int ret = -ENODEV;
3a93c841 4023 struct dmar_drhd_unit *drhd;
7c919779 4024 struct intel_iommu *iommu;
ba395927 4025
a59b50e9
JC
4026 /* VT-d is required for a TXT/tboot launch, so enforce that */
4027 force_on = tboot_force_iommu();
4028
3a5670e8
JL
4029 if (iommu_init_mempool()) {
4030 if (force_on)
4031 panic("tboot: Failed to initialize iommu memory\n");
4032 return -ENOMEM;
4033 }
4034
4035 down_write(&dmar_global_lock);
a59b50e9
JC
4036 if (dmar_table_init()) {
4037 if (force_on)
4038 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4039 goto out_free_dmar;
a59b50e9 4040 }
ba395927 4041
3a93c841
TI
4042 /*
4043 * Disable translation if already enabled prior to OS handover.
4044 */
7c919779 4045 for_each_active_iommu(iommu, drhd)
3a93c841
TI
4046 if (iommu->gcmd & DMA_GCMD_TE)
4047 iommu_disable_translation(iommu);
3a93c841 4048
c2c7286a 4049 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4050 if (force_on)
4051 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4052 goto out_free_dmar;
a59b50e9 4053 }
1886e8a9 4054
75f1cdf1 4055 if (no_iommu || dmar_disabled)
9bdc531e 4056 goto out_free_dmar;
2ae21010 4057
318fe7df
SS
4058 if (list_empty(&dmar_rmrr_units))
4059 printk(KERN_INFO "DMAR: No RMRR found\n");
4060
4061 if (list_empty(&dmar_atsr_units))
4062 printk(KERN_INFO "DMAR: No ATSR found\n");
4063
51a63e67
JC
4064 if (dmar_init_reserved_ranges()) {
4065 if (force_on)
4066 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4067 goto out_free_reserved_range;
51a63e67 4068 }
ba395927
KA
4069
4070 init_no_remapping_devices();
4071
b779260b 4072 ret = init_dmars();
ba395927 4073 if (ret) {
a59b50e9
JC
4074 if (force_on)
4075 panic("tboot: Failed to initialize DMARs\n");
ba395927 4076 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4077 goto out_free_reserved_range;
ba395927 4078 }
3a5670e8 4079 up_write(&dmar_global_lock);
ba395927
KA
4080 printk(KERN_INFO
4081 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4082
5e0d2a6f 4083 init_timer(&unmap_timer);
75f1cdf1
FT
4084#ifdef CONFIG_SWIOTLB
4085 swiotlb = 0;
4086#endif
19943b0e 4087 dma_ops = &intel_dma_ops;
4ed0d3e6 4088
134fac3f 4089 init_iommu_pm_ops();
a8bcbb0d 4090
a5459cfe
AW
4091 for_each_active_iommu(iommu, drhd)
4092 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4093 intel_iommu_groups,
4094 iommu->name);
4095
4236d97d 4096 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4097 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4098 if (si_domain && !hw_pass_through)
4099 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4100
8bc1f85c
ED
4101 intel_iommu_enabled = 1;
4102
ba395927 4103 return 0;
9bdc531e
JL
4104
4105out_free_reserved_range:
4106 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4107out_free_dmar:
4108 intel_iommu_free_dmars();
3a5670e8
JL
4109 up_write(&dmar_global_lock);
4110 iommu_exit_mempool();
9bdc531e 4111 return ret;
ba395927 4112}
e820482c 4113
579305f7
AW
4114static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4115{
4116 struct intel_iommu *iommu = opaque;
4117
4118 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4119 return 0;
4120}
4121
4122/*
4123 * NB - intel-iommu lacks any sort of reference counting for the users of
4124 * dependent devices. If multiple endpoints have intersecting dependent
4125 * devices, unbinding the driver from any one of them will possibly leave
4126 * the others unable to operate.
4127 */
3199aa6b 4128static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4129 struct device *dev)
3199aa6b 4130{
0bcb3e28 4131 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4132 return;
4133
579305f7 4134 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
3199aa6b
HW
4135}
4136
2c2e2c38 4137static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4138 struct device *dev)
c7151a8d 4139{
bca2b916 4140 struct device_domain_info *info, *tmp;
c7151a8d
WH
4141 struct intel_iommu *iommu;
4142 unsigned long flags;
4143 int found = 0;
156baca8 4144 u8 bus, devfn;
c7151a8d 4145
bf9c9eda 4146 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4147 if (!iommu)
4148 return;
4149
4150 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4151 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4152 if (info->iommu == iommu && info->bus == bus &&
4153 info->devfn == devfn) {
109b9b04 4154 unlink_domain_info(info);
c7151a8d
WH
4155 spin_unlock_irqrestore(&device_domain_lock, flags);
4156
93a23a72 4157 iommu_disable_dev_iotlb(info);
c7151a8d 4158 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4159 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4160 free_devinfo_mem(info);
4161
4162 spin_lock_irqsave(&device_domain_lock, flags);
4163
4164 if (found)
4165 break;
4166 else
4167 continue;
4168 }
4169
4170 /* if there is no other devices under the same iommu
4171 * owned by this domain, clear this iommu in iommu_bmp
4172 * update iommu count and coherency
4173 */
8bbc4410 4174 if (info->iommu == iommu)
c7151a8d
WH
4175 found = 1;
4176 }
4177
3e7abe25
RD
4178 spin_unlock_irqrestore(&device_domain_lock, flags);
4179
c7151a8d 4180 if (found == 0) {
fb170fb4
JL
4181 domain_detach_iommu(domain, iommu);
4182 if (!domain_type_is_vm_or_si(domain))
4183 iommu_detach_domain(domain, iommu);
c7151a8d 4184 }
c7151a8d
WH
4185}
4186
2c2e2c38 4187static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4188{
4189 int adjust_width;
4190
4191 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4192 domain_reserve_special_ranges(domain);
4193
4194 /* calculate AGAW */
4195 domain->gaw = guest_width;
4196 adjust_width = guestwidth_to_adjustwidth(guest_width);
4197 domain->agaw = width_to_agaw(adjust_width);
4198
5e98c4b1 4199 domain->iommu_coherency = 0;
c5b15255 4200 domain->iommu_snooping = 0;
6dd9a7c7 4201 domain->iommu_superpage = 0;
fe40f1e0 4202 domain->max_addr = 0;
5e98c4b1
WH
4203
4204 /* always allocate the top pgd */
4c923d47 4205 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4206 if (!domain->pgd)
4207 return -ENOMEM;
4208 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4209 return 0;
4210}
4211
5d450806 4212static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4213{
5d450806 4214 struct dmar_domain *dmar_domain;
38717946 4215
ab8dfe25 4216 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4217 if (!dmar_domain) {
38717946 4218 printk(KERN_ERR
5d450806
JR
4219 "intel_iommu_domain_init: dmar_domain == NULL\n");
4220 return -ENOMEM;
38717946 4221 }
2c2e2c38 4222 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4223 printk(KERN_ERR
5d450806 4224 "intel_iommu_domain_init() failed\n");
92d03cc8 4225 domain_exit(dmar_domain);
5d450806 4226 return -ENOMEM;
38717946 4227 }
8140a95d 4228 domain_update_iommu_cap(dmar_domain);
5d450806 4229 domain->priv = dmar_domain;
faa3d6f5 4230
8a0e715b
JR
4231 domain->geometry.aperture_start = 0;
4232 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4233 domain->geometry.force_aperture = true;
4234
5d450806 4235 return 0;
38717946 4236}
38717946 4237
5d450806 4238static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4239{
5d450806
JR
4240 struct dmar_domain *dmar_domain = domain->priv;
4241
4242 domain->priv = NULL;
92d03cc8 4243 domain_exit(dmar_domain);
38717946 4244}
38717946 4245
4c5478c9
JR
4246static int intel_iommu_attach_device(struct iommu_domain *domain,
4247 struct device *dev)
38717946 4248{
4c5478c9 4249 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4250 struct intel_iommu *iommu;
4251 int addr_width;
156baca8 4252 u8 bus, devfn;
faa3d6f5 4253
7207d8f9
DW
4254 /* normally dev is not mapped */
4255 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4256 struct dmar_domain *old_domain;
4257
1525a29a 4258 old_domain = find_domain(dev);
faa3d6f5 4259 if (old_domain) {
ab8dfe25 4260 if (domain_type_is_vm_or_si(dmar_domain))
bf9c9eda 4261 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4262 else
4263 domain_remove_dev_info(old_domain);
4264 }
4265 }
4266
156baca8 4267 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4268 if (!iommu)
4269 return -ENODEV;
4270
4271 /* check if this iommu agaw is sufficient for max mapped address */
4272 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4273 if (addr_width > cap_mgaw(iommu->cap))
4274 addr_width = cap_mgaw(iommu->cap);
4275
4276 if (dmar_domain->max_addr > (1LL << addr_width)) {
4277 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4278 "sufficient for the mapped address (%llx)\n",
a99c47a2 4279 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4280 return -EFAULT;
4281 }
a99c47a2
TL
4282 dmar_domain->gaw = addr_width;
4283
4284 /*
4285 * Knock out extra levels of page tables if necessary
4286 */
4287 while (iommu->agaw < dmar_domain->agaw) {
4288 struct dma_pte *pte;
4289
4290 pte = dmar_domain->pgd;
4291 if (dma_pte_present(pte)) {
25cbff16
SY
4292 dmar_domain->pgd = (struct dma_pte *)
4293 phys_to_virt(dma_pte_addr(pte));
7a661013 4294 free_pgtable_page(pte);
a99c47a2
TL
4295 }
4296 dmar_domain->agaw--;
4297 }
fe40f1e0 4298
5913c9bf 4299 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4300}
38717946 4301
4c5478c9
JR
4302static void intel_iommu_detach_device(struct iommu_domain *domain,
4303 struct device *dev)
38717946 4304{
4c5478c9 4305 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4306
bf9c9eda 4307 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4308}
c7151a8d 4309
b146a1c9
JR
4310static int intel_iommu_map(struct iommu_domain *domain,
4311 unsigned long iova, phys_addr_t hpa,
5009065d 4312 size_t size, int iommu_prot)
faa3d6f5 4313{
dde57a21 4314 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4315 u64 max_addr;
dde57a21 4316 int prot = 0;
faa3d6f5 4317 int ret;
fe40f1e0 4318
dde57a21
JR
4319 if (iommu_prot & IOMMU_READ)
4320 prot |= DMA_PTE_READ;
4321 if (iommu_prot & IOMMU_WRITE)
4322 prot |= DMA_PTE_WRITE;
9cf06697
SY
4323 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4324 prot |= DMA_PTE_SNP;
dde57a21 4325
163cc52c 4326 max_addr = iova + size;
dde57a21 4327 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4328 u64 end;
4329
4330 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4331 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4332 if (end < max_addr) {
8954da1f 4333 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4334 "sufficient for the mapped address (%llx)\n",
8954da1f 4335 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4336 return -EFAULT;
4337 }
dde57a21 4338 dmar_domain->max_addr = max_addr;
fe40f1e0 4339 }
ad051221
DW
4340 /* Round up size to next multiple of PAGE_SIZE, if it and
4341 the low bits of hpa would take us onto the next page */
88cb6a74 4342 size = aligned_nrpages(hpa, size);
ad051221
DW
4343 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4344 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4345 return ret;
38717946 4346}
38717946 4347
5009065d 4348static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4349 unsigned long iova, size_t size)
38717946 4350{
dde57a21 4351 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4352 struct page *freelist = NULL;
4353 struct intel_iommu *iommu;
4354 unsigned long start_pfn, last_pfn;
4355 unsigned int npages;
4356 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4357
4358 /* Cope with horrid API which requires us to unmap more than the
4359 size argument if it happens to be a large-page mapping. */
4360 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4361 BUG();
4362
4363 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4364 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4365
ea8ea460
DW
4366 start_pfn = iova >> VTD_PAGE_SHIFT;
4367 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4368
4369 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4370
4371 npages = last_pfn - start_pfn + 1;
4372
4373 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4374 iommu = g_iommus[iommu_id];
4375
4376 /*
4377 * find bit position of dmar_domain
4378 */
4379 ndomains = cap_ndoms(iommu->cap);
4380 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4381 if (iommu->domains[num] == dmar_domain)
4382 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4383 npages, !freelist, 0);
4384 }
4385
4386 }
4387
4388 dma_free_pagelist(freelist);
fe40f1e0 4389
163cc52c
DW
4390 if (dmar_domain->max_addr == iova + size)
4391 dmar_domain->max_addr = iova;
b146a1c9 4392
5cf0a76f 4393 return size;
38717946 4394}
38717946 4395
d14d6577 4396static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4397 dma_addr_t iova)
38717946 4398{
d14d6577 4399 struct dmar_domain *dmar_domain = domain->priv;
38717946 4400 struct dma_pte *pte;
5cf0a76f 4401 int level = 0;
faa3d6f5 4402 u64 phys = 0;
38717946 4403
5cf0a76f 4404 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4405 if (pte)
faa3d6f5 4406 phys = dma_pte_addr(pte);
38717946 4407
faa3d6f5 4408 return phys;
38717946 4409}
a8bcbb0d 4410
dbb9fd86
SY
4411static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4412 unsigned long cap)
4413{
4414 struct dmar_domain *dmar_domain = domain->priv;
4415
4416 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4417 return dmar_domain->iommu_snooping;
323f99cb 4418 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4419 return irq_remapping_enabled;
dbb9fd86
SY
4420
4421 return 0;
4422}
4423
abdfdde2
AW
4424static int intel_iommu_add_device(struct device *dev)
4425{
a5459cfe 4426 struct intel_iommu *iommu;
abdfdde2 4427 struct iommu_group *group;
156baca8 4428 u8 bus, devfn;
70ae6f0d 4429
a5459cfe
AW
4430 iommu = device_to_iommu(dev, &bus, &devfn);
4431 if (!iommu)
70ae6f0d
AW
4432 return -ENODEV;
4433
a5459cfe 4434 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4435
e17f9ff4 4436 group = iommu_group_get_for_dev(dev);
783f157b 4437
e17f9ff4
AW
4438 if (IS_ERR(group))
4439 return PTR_ERR(group);
bcb71abe 4440
abdfdde2 4441 iommu_group_put(group);
e17f9ff4 4442 return 0;
abdfdde2 4443}
70ae6f0d 4444
abdfdde2
AW
4445static void intel_iommu_remove_device(struct device *dev)
4446{
a5459cfe
AW
4447 struct intel_iommu *iommu;
4448 u8 bus, devfn;
4449
4450 iommu = device_to_iommu(dev, &bus, &devfn);
4451 if (!iommu)
4452 return;
4453
abdfdde2 4454 iommu_group_remove_device(dev);
a5459cfe
AW
4455
4456 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4457}
4458
b22f6434 4459static const struct iommu_ops intel_iommu_ops = {
a8bcbb0d
JR
4460 .domain_init = intel_iommu_domain_init,
4461 .domain_destroy = intel_iommu_domain_destroy,
4462 .attach_dev = intel_iommu_attach_device,
4463 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4464 .map = intel_iommu_map,
4465 .unmap = intel_iommu_unmap,
a8bcbb0d 4466 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4467 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4468 .add_device = intel_iommu_add_device,
4469 .remove_device = intel_iommu_remove_device,
6d1c56a9 4470 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4471};
9af88143 4472
9452618e
DV
4473static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4474{
4475 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4476 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4477 dmar_map_gfx = 0;
4478}
4479
4480DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4481DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4482DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4483DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4484DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4485DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4486DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4487
d34d6517 4488static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4489{
4490 /*
4491 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4492 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4493 */
4494 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4495 rwbf_quirk = 1;
4496}
4497
4498DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4499DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4500DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4501DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4502DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4503DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4505
eecfd57f
AJ
4506#define GGC 0x52
4507#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4508#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4509#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4510#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4511#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4512#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4513#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4514#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4515
d34d6517 4516static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4517{
4518 unsigned short ggc;
4519
eecfd57f 4520 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4521 return;
4522
eecfd57f 4523 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4524 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4525 dmar_map_gfx = 0;
6fbcfb3e
DW
4526 } else if (dmar_map_gfx) {
4527 /* we have to ensure the gfx device is idle before we flush */
4528 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4529 intel_iommu_strict = 1;
4530 }
9eecabcb
DW
4531}
4532DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4533DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4534DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4535DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4536
e0fc7e0b
DW
4537/* On Tylersburg chipsets, some BIOSes have been known to enable the
4538 ISOCH DMAR unit for the Azalia sound device, but not give it any
4539 TLB entries, which causes it to deadlock. Check for that. We do
4540 this in a function called from init_dmars(), instead of in a PCI
4541 quirk, because we don't want to print the obnoxious "BIOS broken"
4542 message if VT-d is actually disabled.
4543*/
4544static void __init check_tylersburg_isoch(void)
4545{
4546 struct pci_dev *pdev;
4547 uint32_t vtisochctrl;
4548
4549 /* If there's no Azalia in the system anyway, forget it. */
4550 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4551 if (!pdev)
4552 return;
4553 pci_dev_put(pdev);
4554
4555 /* System Management Registers. Might be hidden, in which case
4556 we can't do the sanity check. But that's OK, because the
4557 known-broken BIOSes _don't_ actually hide it, so far. */
4558 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4559 if (!pdev)
4560 return;
4561
4562 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4563 pci_dev_put(pdev);
4564 return;
4565 }
4566
4567 pci_dev_put(pdev);
4568
4569 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4570 if (vtisochctrl & 1)
4571 return;
4572
4573 /* Drop all bits other than the number of TLB entries */
4574 vtisochctrl &= 0x1c;
4575
4576 /* If we have the recommended number of TLB entries (16), fine. */
4577 if (vtisochctrl == 0x10)
4578 return;
4579
4580 /* Zero TLB entries? You get to ride the short bus to school. */
4581 if (!vtisochctrl) {
4582 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4583 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4584 dmi_get_system_info(DMI_BIOS_VENDOR),
4585 dmi_get_system_info(DMI_BIOS_VERSION),
4586 dmi_get_system_info(DMI_PRODUCT_VERSION));
4587 iommu_identity_mapping |= IDENTMAP_AZALIA;
4588 return;
4589 }
4590
4591 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4592 vtisochctrl);
4593}