]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Simplify include/linux/dmar.h
[mirror_ubuntu-zesty-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2
JR
47#include "irq_remapping.h"
48
5b6985ce
FY
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
ba395927
KA
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
4ed0d3e6 62#define MAX_AGAW_WIDTH 64
5c645b35 63#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 64
2ebe3151
DW
65#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 73
f27be03b 74#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 75#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 76#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 77
df08cdc7
AM
78/* page table handling */
79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81
6d1c56a9
OBC
82/*
83 * This bitmap is used to advertise the page sizes our hardware support
84 * to the IOMMU core, which will then use this information to split
85 * physically contiguous memory regions it is mapping into page sizes
86 * that we support.
87 *
88 * Traditionally the IOMMU core just handed us the mappings directly,
89 * after making sure the size is an order of a 4KiB page and that the
90 * mapping has natural alignment.
91 *
92 * To retain this behavior, we currently advertise that we support
93 * all page sizes that are an order of 4KiB.
94 *
95 * If at some point we'd like to utilize the IOMMU core's new behavior,
96 * we could change this to advertise the real page sizes we support.
97 */
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
df08cdc7
AM
100static inline int agaw_to_level(int agaw)
101{
102 return agaw + 2;
103}
104
105static inline int agaw_to_width(int agaw)
106{
5c645b35 107 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
108}
109
110static inline int width_to_agaw(int width)
111{
5c645b35 112 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
113}
114
115static inline unsigned int level_to_offset_bits(int level)
116{
117 return (level - 1) * LEVEL_STRIDE;
118}
119
120static inline int pfn_level_offset(unsigned long pfn, int level)
121{
122 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123}
124
125static inline unsigned long level_mask(int level)
126{
127 return -1UL << level_to_offset_bits(level);
128}
129
130static inline unsigned long level_size(int level)
131{
132 return 1UL << level_to_offset_bits(level);
133}
134
135static inline unsigned long align_to_level(unsigned long pfn, int level)
136{
137 return (pfn + level_size(level) - 1) & level_mask(level);
138}
fd18de50 139
6dd9a7c7
YS
140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141{
5c645b35 142 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
143}
144
dd4e8319
DW
145/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146 are never going to work. */
147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148{
149 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150}
151
152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153{
154 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155}
156static inline unsigned long page_to_dma_pfn(struct page *pg)
157{
158 return mm_to_dma_pfn(page_to_pfn(pg));
159}
160static inline unsigned long virt_to_dma_pfn(void *p)
161{
162 return page_to_dma_pfn(virt_to_page(p));
163}
164
d9630fe9
WH
165/* global iommu list, set NULL for ignored DMAR units */
166static struct intel_iommu **g_iommus;
167
e0fc7e0b 168static void __init check_tylersburg_isoch(void);
9af88143
DW
169static int rwbf_quirk;
170
b779260b
JC
171/*
172 * set to 1 to panic kernel if can't successfully enable VT-d
173 * (used when kernel is launched w/ TXT)
174 */
175static int force_on = 0;
176
46b08e1a
MM
177/*
178 * 0: Present
179 * 1-11: Reserved
180 * 12-63: Context Ptr (12 - (haw-1))
181 * 64-127: Reserved
182 */
183struct root_entry {
184 u64 val;
185 u64 rsvd1;
186};
187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188static inline bool root_present(struct root_entry *root)
189{
190 return (root->val & 1);
191}
192static inline void set_root_present(struct root_entry *root)
193{
194 root->val |= 1;
195}
196static inline void set_root_value(struct root_entry *root, unsigned long value)
197{
198 root->val |= value & VTD_PAGE_MASK;
199}
200
201static inline struct context_entry *
202get_context_addr_from_root(struct root_entry *root)
203{
204 return (struct context_entry *)
205 (root_present(root)?phys_to_virt(
206 root->val & VTD_PAGE_MASK) :
207 NULL);
208}
209
7a8fc25e
MM
210/*
211 * low 64 bits:
212 * 0: present
213 * 1: fault processing disable
214 * 2-3: translation type
215 * 12-63: address space root
216 * high 64 bits:
217 * 0-2: address width
218 * 3-6: aval
219 * 8-23: domain id
220 */
221struct context_entry {
222 u64 lo;
223 u64 hi;
224};
c07e7d21
MM
225
226static inline bool context_present(struct context_entry *context)
227{
228 return (context->lo & 1);
229}
230static inline void context_set_present(struct context_entry *context)
231{
232 context->lo |= 1;
233}
234
235static inline void context_set_fault_enable(struct context_entry *context)
236{
237 context->lo &= (((u64)-1) << 2) | 1;
238}
239
c07e7d21
MM
240static inline void context_set_translation_type(struct context_entry *context,
241 unsigned long value)
242{
243 context->lo &= (((u64)-1) << 4) | 3;
244 context->lo |= (value & 3) << 2;
245}
246
247static inline void context_set_address_root(struct context_entry *context,
248 unsigned long value)
249{
250 context->lo |= value & VTD_PAGE_MASK;
251}
252
253static inline void context_set_address_width(struct context_entry *context,
254 unsigned long value)
255{
256 context->hi |= value & 7;
257}
258
259static inline void context_set_domain_id(struct context_entry *context,
260 unsigned long value)
261{
262 context->hi |= (value & ((1 << 16) - 1)) << 8;
263}
264
265static inline void context_clear_entry(struct context_entry *context)
266{
267 context->lo = 0;
268 context->hi = 0;
269}
7a8fc25e 270
622ba12a
MM
271/*
272 * 0: readable
273 * 1: writable
274 * 2-6: reserved
275 * 7: super page
9cf06697
SY
276 * 8-10: available
277 * 11: snoop behavior
622ba12a
MM
278 * 12-63: Host physcial address
279 */
280struct dma_pte {
281 u64 val;
282};
622ba12a 283
19c239ce
MM
284static inline void dma_clear_pte(struct dma_pte *pte)
285{
286 pte->val = 0;
287}
288
19c239ce
MM
289static inline u64 dma_pte_addr(struct dma_pte *pte)
290{
c85994e4
DW
291#ifdef CONFIG_64BIT
292 return pte->val & VTD_PAGE_MASK;
293#else
294 /* Must have a full atomic 64-bit read */
1a8bd481 295 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 296#endif
19c239ce
MM
297}
298
19c239ce
MM
299static inline bool dma_pte_present(struct dma_pte *pte)
300{
301 return (pte->val & 3) != 0;
302}
622ba12a 303
4399c8bf
AK
304static inline bool dma_pte_superpage(struct dma_pte *pte)
305{
c3c75eb7 306 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
307}
308
75e6bf96
DW
309static inline int first_pte_in_page(struct dma_pte *pte)
310{
311 return !((unsigned long)pte & ~VTD_PAGE_MASK);
312}
313
2c2e2c38
FY
314/*
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
319 */
19943b0e
DW
320static struct dmar_domain *si_domain;
321static int hw_pass_through = 1;
2c2e2c38 322
1ce28feb
WH
323/* domain represents a virtual machine, more than one devices
324 * across iommus may be owned in one domain, e.g. kvm guest.
325 */
ab8dfe25 326#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 327
2c2e2c38 328/* si_domain contains mulitple devices */
ab8dfe25 329#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 330
1b198bb0
MT
331/* define the limit of IOMMUs supported in each domain */
332#ifdef CONFIG_X86
333# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
334#else
335# define IOMMU_UNITS_SUPPORTED 64
336#endif
337
99126f7c
MM
338struct dmar_domain {
339 int id; /* domain id */
4c923d47 340 int nid; /* node id */
1b198bb0
MT
341 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
342 /* bitmap of iommus this domain uses*/
99126f7c
MM
343
344 struct list_head devices; /* all devices' list */
345 struct iova_domain iovad; /* iova's that belong to this domain */
346
347 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
348 int gaw; /* max guest address width */
349
350 /* adjusted guest address width, 0 is level 2 30-bit */
351 int agaw;
352
3b5410e7 353 int flags; /* flags to find out type of domain */
8e604097
WH
354
355 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 356 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 357 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
358 int iommu_superpage;/* Level of superpages supported:
359 0 == 4KiB (no superpages), 1 == 2MiB,
360 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 361 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 362 u64 max_addr; /* maximum mapped address */
99126f7c
MM
363};
364
a647dacb
MM
365/* PCI domain-device relationship */
366struct device_domain_info {
367 struct list_head link; /* link to domain siblings */
368 struct list_head global; /* link to global list */
276dbf99 369 u8 bus; /* PCI bus number */
a647dacb 370 u8 devfn; /* PCI devfn number */
0bcb3e28 371 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 372 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
373 struct dmar_domain *domain; /* pointer to domain */
374};
375
b94e4117
JL
376struct dmar_rmrr_unit {
377 struct list_head list; /* list of rmrr units */
378 struct acpi_dmar_header *hdr; /* ACPI header */
379 u64 base_address; /* reserved base address*/
380 u64 end_address; /* reserved end address */
832bd858 381 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
382 int devices_cnt; /* target device count */
383};
384
385struct dmar_atsr_unit {
386 struct list_head list; /* list of ATSR units */
387 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 388 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
389 int devices_cnt; /* target device count */
390 u8 include_all:1; /* include all ports */
391};
392
393static LIST_HEAD(dmar_atsr_units);
394static LIST_HEAD(dmar_rmrr_units);
395
396#define for_each_rmrr_units(rmrr) \
397 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
398
5e0d2a6f 399static void flush_unmaps_timeout(unsigned long data);
400
b707cb02 401static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 402
80b20dd8 403#define HIGH_WATER_MARK 250
404struct deferred_flush_tables {
405 int next;
406 struct iova *iova[HIGH_WATER_MARK];
407 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 408 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 409};
410
411static struct deferred_flush_tables *deferred_flush;
412
5e0d2a6f 413/* bitmap for indexing intel_iommus */
5e0d2a6f 414static int g_num_of_iommus;
415
416static DEFINE_SPINLOCK(async_umap_flush_lock);
417static LIST_HEAD(unmaps_to_do);
418
419static int timer_on;
420static long list_size;
5e0d2a6f 421
92d03cc8 422static void domain_exit(struct dmar_domain *domain);
ba395927 423static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 424static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 425 struct device *dev);
92d03cc8 426static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 427 struct device *dev);
2a46ddf7
JL
428static int domain_detach_iommu(struct dmar_domain *domain,
429 struct intel_iommu *iommu);
ba395927 430
d3f13810 431#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
432int dmar_disabled = 0;
433#else
434int dmar_disabled = 1;
d3f13810 435#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 436
8bc1f85c
ED
437int intel_iommu_enabled = 0;
438EXPORT_SYMBOL_GPL(intel_iommu_enabled);
439
2d9e667e 440static int dmar_map_gfx = 1;
7d3b03ce 441static int dmar_forcedac;
5e0d2a6f 442static int intel_iommu_strict;
6dd9a7c7 443static int intel_iommu_superpage = 1;
ba395927 444
c0771df8
DW
445int intel_iommu_gfx_mapped;
446EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
447
ba395927
KA
448#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
449static DEFINE_SPINLOCK(device_domain_lock);
450static LIST_HEAD(device_domain_list);
451
b22f6434 452static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 453
ba395927
KA
454static int __init intel_iommu_setup(char *str)
455{
456 if (!str)
457 return -EINVAL;
458 while (*str) {
0cd5c3c8
KM
459 if (!strncmp(str, "on", 2)) {
460 dmar_disabled = 0;
461 printk(KERN_INFO "Intel-IOMMU: enabled\n");
462 } else if (!strncmp(str, "off", 3)) {
ba395927 463 dmar_disabled = 1;
0cd5c3c8 464 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
465 } else if (!strncmp(str, "igfx_off", 8)) {
466 dmar_map_gfx = 0;
467 printk(KERN_INFO
468 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 469 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 470 printk(KERN_INFO
7d3b03ce
KA
471 "Intel-IOMMU: Forcing DAC for PCI devices\n");
472 dmar_forcedac = 1;
5e0d2a6f 473 } else if (!strncmp(str, "strict", 6)) {
474 printk(KERN_INFO
475 "Intel-IOMMU: disable batched IOTLB flush\n");
476 intel_iommu_strict = 1;
6dd9a7c7
YS
477 } else if (!strncmp(str, "sp_off", 6)) {
478 printk(KERN_INFO
479 "Intel-IOMMU: disable supported super page\n");
480 intel_iommu_superpage = 0;
ba395927
KA
481 }
482
483 str += strcspn(str, ",");
484 while (*str == ',')
485 str++;
486 }
487 return 0;
488}
489__setup("intel_iommu=", intel_iommu_setup);
490
491static struct kmem_cache *iommu_domain_cache;
492static struct kmem_cache *iommu_devinfo_cache;
493static struct kmem_cache *iommu_iova_cache;
494
4c923d47 495static inline void *alloc_pgtable_page(int node)
eb3fa7cb 496{
4c923d47
SS
497 struct page *page;
498 void *vaddr = NULL;
eb3fa7cb 499
4c923d47
SS
500 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
501 if (page)
502 vaddr = page_address(page);
eb3fa7cb 503 return vaddr;
ba395927
KA
504}
505
506static inline void free_pgtable_page(void *vaddr)
507{
508 free_page((unsigned long)vaddr);
509}
510
511static inline void *alloc_domain_mem(void)
512{
354bb65e 513 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
514}
515
38717946 516static void free_domain_mem(void *vaddr)
ba395927
KA
517{
518 kmem_cache_free(iommu_domain_cache, vaddr);
519}
520
521static inline void * alloc_devinfo_mem(void)
522{
354bb65e 523 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
524}
525
526static inline void free_devinfo_mem(void *vaddr)
527{
528 kmem_cache_free(iommu_devinfo_cache, vaddr);
529}
530
531struct iova *alloc_iova_mem(void)
532{
354bb65e 533 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
534}
535
536void free_iova_mem(struct iova *iova)
537{
538 kmem_cache_free(iommu_iova_cache, iova);
539}
540
ab8dfe25
JL
541static inline int domain_type_is_vm(struct dmar_domain *domain)
542{
543 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
544}
545
546static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
547{
548 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
549 DOMAIN_FLAG_STATIC_IDENTITY);
550}
1b573683 551
4ed0d3e6 552static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
553{
554 unsigned long sagaw;
555 int agaw = -1;
556
557 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 558 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
559 agaw >= 0; agaw--) {
560 if (test_bit(agaw, &sagaw))
561 break;
562 }
563
564 return agaw;
565}
566
4ed0d3e6
FY
567/*
568 * Calculate max SAGAW for each iommu.
569 */
570int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
571{
572 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
573}
574
575/*
576 * calculate agaw for each iommu.
577 * "SAGAW" may be different across iommus, use a default agaw, and
578 * get a supported less agaw for iommus that don't support the default agaw.
579 */
580int iommu_calculate_agaw(struct intel_iommu *iommu)
581{
582 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
583}
584
2c2e2c38 585/* This functionin only returns single iommu in a domain */
8c11e798
WH
586static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
587{
588 int iommu_id;
589
2c2e2c38 590 /* si_domain and vm domain should not get here. */
ab8dfe25 591 BUG_ON(domain_type_is_vm_or_si(domain));
1b198bb0 592 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
593 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
594 return NULL;
595
596 return g_iommus[iommu_id];
597}
598
8e604097
WH
599static void domain_update_iommu_coherency(struct dmar_domain *domain)
600{
d0501960
DW
601 struct dmar_drhd_unit *drhd;
602 struct intel_iommu *iommu;
603 int i, found = 0;
2e12bc29 604
d0501960 605 domain->iommu_coherency = 1;
8e604097 606
1b198bb0 607 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 608 found = 1;
8e604097
WH
609 if (!ecap_coherent(g_iommus[i]->ecap)) {
610 domain->iommu_coherency = 0;
611 break;
612 }
8e604097 613 }
d0501960
DW
614 if (found)
615 return;
616
617 /* No hardware attached; use lowest common denominator */
618 rcu_read_lock();
619 for_each_active_iommu(iommu, drhd) {
620 if (!ecap_coherent(iommu->ecap)) {
621 domain->iommu_coherency = 0;
622 break;
623 }
624 }
625 rcu_read_unlock();
8e604097
WH
626}
627
58c610bd
SY
628static void domain_update_iommu_snooping(struct dmar_domain *domain)
629{
630 int i;
631
632 domain->iommu_snooping = 1;
633
1b198bb0 634 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
635 if (!ecap_sc_support(g_iommus[i]->ecap)) {
636 domain->iommu_snooping = 0;
637 break;
638 }
58c610bd
SY
639 }
640}
641
6dd9a7c7
YS
642static void domain_update_iommu_superpage(struct dmar_domain *domain)
643{
8140a95d
AK
644 struct dmar_drhd_unit *drhd;
645 struct intel_iommu *iommu = NULL;
646 int mask = 0xf;
6dd9a7c7
YS
647
648 if (!intel_iommu_superpage) {
649 domain->iommu_superpage = 0;
650 return;
651 }
652
8140a95d 653 /* set iommu_superpage to the smallest common denominator */
0e242612 654 rcu_read_lock();
8140a95d
AK
655 for_each_active_iommu(iommu, drhd) {
656 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
657 if (!mask) {
658 break;
659 }
660 }
0e242612
JL
661 rcu_read_unlock();
662
6dd9a7c7
YS
663 domain->iommu_superpage = fls(mask);
664}
665
58c610bd
SY
666/* Some capabilities may be different across iommus */
667static void domain_update_iommu_cap(struct dmar_domain *domain)
668{
669 domain_update_iommu_coherency(domain);
670 domain_update_iommu_snooping(domain);
6dd9a7c7 671 domain_update_iommu_superpage(domain);
58c610bd
SY
672}
673
156baca8 674static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
675{
676 struct dmar_drhd_unit *drhd = NULL;
b683b230 677 struct intel_iommu *iommu;
156baca8
DW
678 struct device *tmp;
679 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 680 u16 segment = 0;
c7151a8d
WH
681 int i;
682
156baca8
DW
683 if (dev_is_pci(dev)) {
684 pdev = to_pci_dev(dev);
685 segment = pci_domain_nr(pdev->bus);
686 } else if (ACPI_COMPANION(dev))
687 dev = &ACPI_COMPANION(dev)->dev;
688
0e242612 689 rcu_read_lock();
b683b230 690 for_each_active_iommu(iommu, drhd) {
156baca8 691 if (pdev && segment != drhd->segment)
276dbf99 692 continue;
c7151a8d 693
b683b230 694 for_each_active_dev_scope(drhd->devices,
156baca8
DW
695 drhd->devices_cnt, i, tmp) {
696 if (tmp == dev) {
697 *bus = drhd->devices[i].bus;
698 *devfn = drhd->devices[i].devfn;
b683b230 699 goto out;
156baca8
DW
700 }
701
702 if (!pdev || !dev_is_pci(tmp))
703 continue;
704
705 ptmp = to_pci_dev(tmp);
706 if (ptmp->subordinate &&
707 ptmp->subordinate->number <= pdev->bus->number &&
708 ptmp->subordinate->busn_res.end >= pdev->bus->number)
709 goto got_pdev;
924b6231 710 }
c7151a8d 711
156baca8
DW
712 if (pdev && drhd->include_all) {
713 got_pdev:
714 *bus = pdev->bus->number;
715 *devfn = pdev->devfn;
b683b230 716 goto out;
156baca8 717 }
c7151a8d 718 }
b683b230 719 iommu = NULL;
156baca8 720 out:
0e242612 721 rcu_read_unlock();
c7151a8d 722
b683b230 723 return iommu;
c7151a8d
WH
724}
725
5331fe6f
WH
726static void domain_flush_cache(struct dmar_domain *domain,
727 void *addr, int size)
728{
729 if (!domain->iommu_coherency)
730 clflush_cache_range(addr, size);
731}
732
ba395927
KA
733/* Gets context entry for a given bus and devfn */
734static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
735 u8 bus, u8 devfn)
736{
737 struct root_entry *root;
738 struct context_entry *context;
739 unsigned long phy_addr;
740 unsigned long flags;
741
742 spin_lock_irqsave(&iommu->lock, flags);
743 root = &iommu->root_entry[bus];
744 context = get_context_addr_from_root(root);
745 if (!context) {
4c923d47
SS
746 context = (struct context_entry *)
747 alloc_pgtable_page(iommu->node);
ba395927
KA
748 if (!context) {
749 spin_unlock_irqrestore(&iommu->lock, flags);
750 return NULL;
751 }
5b6985ce 752 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
753 phy_addr = virt_to_phys((void *)context);
754 set_root_value(root, phy_addr);
755 set_root_present(root);
756 __iommu_flush_cache(iommu, root, sizeof(*root));
757 }
758 spin_unlock_irqrestore(&iommu->lock, flags);
759 return &context[devfn];
760}
761
762static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
763{
764 struct root_entry *root;
765 struct context_entry *context;
766 int ret;
767 unsigned long flags;
768
769 spin_lock_irqsave(&iommu->lock, flags);
770 root = &iommu->root_entry[bus];
771 context = get_context_addr_from_root(root);
772 if (!context) {
773 ret = 0;
774 goto out;
775 }
c07e7d21 776 ret = context_present(&context[devfn]);
ba395927
KA
777out:
778 spin_unlock_irqrestore(&iommu->lock, flags);
779 return ret;
780}
781
782static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
783{
784 struct root_entry *root;
785 struct context_entry *context;
786 unsigned long flags;
787
788 spin_lock_irqsave(&iommu->lock, flags);
789 root = &iommu->root_entry[bus];
790 context = get_context_addr_from_root(root);
791 if (context) {
c07e7d21 792 context_clear_entry(&context[devfn]);
ba395927
KA
793 __iommu_flush_cache(iommu, &context[devfn], \
794 sizeof(*context));
795 }
796 spin_unlock_irqrestore(&iommu->lock, flags);
797}
798
799static void free_context_table(struct intel_iommu *iommu)
800{
801 struct root_entry *root;
802 int i;
803 unsigned long flags;
804 struct context_entry *context;
805
806 spin_lock_irqsave(&iommu->lock, flags);
807 if (!iommu->root_entry) {
808 goto out;
809 }
810 for (i = 0; i < ROOT_ENTRY_NR; i++) {
811 root = &iommu->root_entry[i];
812 context = get_context_addr_from_root(root);
813 if (context)
814 free_pgtable_page(context);
815 }
816 free_pgtable_page(iommu->root_entry);
817 iommu->root_entry = NULL;
818out:
819 spin_unlock_irqrestore(&iommu->lock, flags);
820}
821
b026fd28 822static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 823 unsigned long pfn, int *target_level)
ba395927 824{
b026fd28 825 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
826 struct dma_pte *parent, *pte = NULL;
827 int level = agaw_to_level(domain->agaw);
4399c8bf 828 int offset;
ba395927
KA
829
830 BUG_ON(!domain->pgd);
f9423606
JS
831
832 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
833 /* Address beyond IOMMU's addressing capabilities. */
834 return NULL;
835
ba395927
KA
836 parent = domain->pgd;
837
5cf0a76f 838 while (1) {
ba395927
KA
839 void *tmp_page;
840
b026fd28 841 offset = pfn_level_offset(pfn, level);
ba395927 842 pte = &parent[offset];
5cf0a76f 843 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 844 break;
5cf0a76f 845 if (level == *target_level)
ba395927
KA
846 break;
847
19c239ce 848 if (!dma_pte_present(pte)) {
c85994e4
DW
849 uint64_t pteval;
850
4c923d47 851 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 852
206a73c1 853 if (!tmp_page)
ba395927 854 return NULL;
206a73c1 855
c85994e4 856 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 857 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 858 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
859 /* Someone else set it while we were thinking; use theirs. */
860 free_pgtable_page(tmp_page);
effad4b5 861 else
c85994e4 862 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 863 }
5cf0a76f
DW
864 if (level == 1)
865 break;
866
19c239ce 867 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
868 level--;
869 }
870
5cf0a76f
DW
871 if (!*target_level)
872 *target_level = level;
873
ba395927
KA
874 return pte;
875}
876
6dd9a7c7 877
ba395927 878/* return address's pte at specific level */
90dcfb5e
DW
879static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
880 unsigned long pfn,
6dd9a7c7 881 int level, int *large_page)
ba395927
KA
882{
883 struct dma_pte *parent, *pte = NULL;
884 int total = agaw_to_level(domain->agaw);
885 int offset;
886
887 parent = domain->pgd;
888 while (level <= total) {
90dcfb5e 889 offset = pfn_level_offset(pfn, total);
ba395927
KA
890 pte = &parent[offset];
891 if (level == total)
892 return pte;
893
6dd9a7c7
YS
894 if (!dma_pte_present(pte)) {
895 *large_page = total;
ba395927 896 break;
6dd9a7c7
YS
897 }
898
e16922af 899 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
900 *large_page = total;
901 return pte;
902 }
903
19c239ce 904 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
905 total--;
906 }
907 return NULL;
908}
909
ba395927 910/* clear last level pte, a tlb flush should be followed */
5cf0a76f 911static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
912 unsigned long start_pfn,
913 unsigned long last_pfn)
ba395927 914{
04b18e65 915 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 916 unsigned int large_page = 1;
310a5ab9 917 struct dma_pte *first_pte, *pte;
66eae846 918
04b18e65 919 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 920 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 921 BUG_ON(start_pfn > last_pfn);
ba395927 922
04b18e65 923 /* we don't need lock here; nobody else touches the iova range */
59c36286 924 do {
6dd9a7c7
YS
925 large_page = 1;
926 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 927 if (!pte) {
6dd9a7c7 928 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
929 continue;
930 }
6dd9a7c7 931 do {
310a5ab9 932 dma_clear_pte(pte);
6dd9a7c7 933 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 934 pte++;
75e6bf96
DW
935 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
936
310a5ab9
DW
937 domain_flush_cache(domain, first_pte,
938 (void *)pte - (void *)first_pte);
59c36286
DW
939
940 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
941}
942
3269ee0b
AW
943static void dma_pte_free_level(struct dmar_domain *domain, int level,
944 struct dma_pte *pte, unsigned long pfn,
945 unsigned long start_pfn, unsigned long last_pfn)
946{
947 pfn = max(start_pfn, pfn);
948 pte = &pte[pfn_level_offset(pfn, level)];
949
950 do {
951 unsigned long level_pfn;
952 struct dma_pte *level_pte;
953
954 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
955 goto next;
956
957 level_pfn = pfn & level_mask(level - 1);
958 level_pte = phys_to_virt(dma_pte_addr(pte));
959
960 if (level > 2)
961 dma_pte_free_level(domain, level - 1, level_pte,
962 level_pfn, start_pfn, last_pfn);
963
964 /* If range covers entire pagetable, free it */
965 if (!(start_pfn > level_pfn ||
08336fd2 966 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
967 dma_clear_pte(pte);
968 domain_flush_cache(domain, pte, sizeof(*pte));
969 free_pgtable_page(level_pte);
970 }
971next:
972 pfn += level_size(level);
973 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
974}
975
ba395927
KA
976/* free page table pages. last level pte should already be cleared */
977static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
978 unsigned long start_pfn,
979 unsigned long last_pfn)
ba395927 980{
6660c63a 981 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 982
6660c63a
DW
983 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
984 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 985 BUG_ON(start_pfn > last_pfn);
ba395927 986
f3a0a52f 987 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
988 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
989 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 990
ba395927 991 /* free pgd */
d794dc9b 992 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
993 free_pgtable_page(domain->pgd);
994 domain->pgd = NULL;
995 }
996}
997
ea8ea460
DW
998/* When a page at a given level is being unlinked from its parent, we don't
999 need to *modify* it at all. All we need to do is make a list of all the
1000 pages which can be freed just as soon as we've flushed the IOTLB and we
1001 know the hardware page-walk will no longer touch them.
1002 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1003 be freed. */
1004static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1005 int level, struct dma_pte *pte,
1006 struct page *freelist)
1007{
1008 struct page *pg;
1009
1010 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1011 pg->freelist = freelist;
1012 freelist = pg;
1013
1014 if (level == 1)
1015 return freelist;
1016
adeb2590
JL
1017 pte = page_address(pg);
1018 do {
ea8ea460
DW
1019 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1020 freelist = dma_pte_list_pagetables(domain, level - 1,
1021 pte, freelist);
adeb2590
JL
1022 pte++;
1023 } while (!first_pte_in_page(pte));
ea8ea460
DW
1024
1025 return freelist;
1026}
1027
1028static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1029 struct dma_pte *pte, unsigned long pfn,
1030 unsigned long start_pfn,
1031 unsigned long last_pfn,
1032 struct page *freelist)
1033{
1034 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1035
1036 pfn = max(start_pfn, pfn);
1037 pte = &pte[pfn_level_offset(pfn, level)];
1038
1039 do {
1040 unsigned long level_pfn;
1041
1042 if (!dma_pte_present(pte))
1043 goto next;
1044
1045 level_pfn = pfn & level_mask(level);
1046
1047 /* If range covers entire pagetable, free it */
1048 if (start_pfn <= level_pfn &&
1049 last_pfn >= level_pfn + level_size(level) - 1) {
1050 /* These suborbinate page tables are going away entirely. Don't
1051 bother to clear them; we're just going to *free* them. */
1052 if (level > 1 && !dma_pte_superpage(pte))
1053 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1054
1055 dma_clear_pte(pte);
1056 if (!first_pte)
1057 first_pte = pte;
1058 last_pte = pte;
1059 } else if (level > 1) {
1060 /* Recurse down into a level that isn't *entirely* obsolete */
1061 freelist = dma_pte_clear_level(domain, level - 1,
1062 phys_to_virt(dma_pte_addr(pte)),
1063 level_pfn, start_pfn, last_pfn,
1064 freelist);
1065 }
1066next:
1067 pfn += level_size(level);
1068 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1069
1070 if (first_pte)
1071 domain_flush_cache(domain, first_pte,
1072 (void *)++last_pte - (void *)first_pte);
1073
1074 return freelist;
1075}
1076
1077/* We can't just free the pages because the IOMMU may still be walking
1078 the page tables, and may have cached the intermediate levels. The
1079 pages can only be freed after the IOTLB flush has been done. */
1080struct page *domain_unmap(struct dmar_domain *domain,
1081 unsigned long start_pfn,
1082 unsigned long last_pfn)
1083{
1084 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1085 struct page *freelist = NULL;
1086
1087 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1088 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1089 BUG_ON(start_pfn > last_pfn);
1090
1091 /* we don't need lock here; nobody else touches the iova range */
1092 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1093 domain->pgd, 0, start_pfn, last_pfn, NULL);
1094
1095 /* free pgd */
1096 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1097 struct page *pgd_page = virt_to_page(domain->pgd);
1098 pgd_page->freelist = freelist;
1099 freelist = pgd_page;
1100
1101 domain->pgd = NULL;
1102 }
1103
1104 return freelist;
1105}
1106
1107void dma_free_pagelist(struct page *freelist)
1108{
1109 struct page *pg;
1110
1111 while ((pg = freelist)) {
1112 freelist = pg->freelist;
1113 free_pgtable_page(page_address(pg));
1114 }
1115}
1116
ba395927
KA
1117/* iommu handling */
1118static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1119{
1120 struct root_entry *root;
1121 unsigned long flags;
1122
4c923d47 1123 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
1124 if (!root)
1125 return -ENOMEM;
1126
5b6985ce 1127 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1128
1129 spin_lock_irqsave(&iommu->lock, flags);
1130 iommu->root_entry = root;
1131 spin_unlock_irqrestore(&iommu->lock, flags);
1132
1133 return 0;
1134}
1135
ba395927
KA
1136static void iommu_set_root_entry(struct intel_iommu *iommu)
1137{
1138 void *addr;
c416daa9 1139 u32 sts;
ba395927
KA
1140 unsigned long flag;
1141
1142 addr = iommu->root_entry;
1143
1f5b3c3f 1144 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1145 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1146
c416daa9 1147 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1148
1149 /* Make sure hardware complete it */
1150 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1151 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1152
1f5b3c3f 1153 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1154}
1155
1156static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1157{
1158 u32 val;
1159 unsigned long flag;
1160
9af88143 1161 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1162 return;
ba395927 1163
1f5b3c3f 1164 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1165 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1166
1167 /* Make sure hardware complete it */
1168 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1169 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1170
1f5b3c3f 1171 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1172}
1173
1174/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1175static void __iommu_flush_context(struct intel_iommu *iommu,
1176 u16 did, u16 source_id, u8 function_mask,
1177 u64 type)
ba395927
KA
1178{
1179 u64 val = 0;
1180 unsigned long flag;
1181
ba395927
KA
1182 switch (type) {
1183 case DMA_CCMD_GLOBAL_INVL:
1184 val = DMA_CCMD_GLOBAL_INVL;
1185 break;
1186 case DMA_CCMD_DOMAIN_INVL:
1187 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1188 break;
1189 case DMA_CCMD_DEVICE_INVL:
1190 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1191 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1192 break;
1193 default:
1194 BUG();
1195 }
1196 val |= DMA_CCMD_ICC;
1197
1f5b3c3f 1198 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1199 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1200
1201 /* Make sure hardware complete it */
1202 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1203 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1204
1f5b3c3f 1205 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1206}
1207
ba395927 1208/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1209static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1210 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1211{
1212 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1213 u64 val = 0, val_iva = 0;
1214 unsigned long flag;
1215
ba395927
KA
1216 switch (type) {
1217 case DMA_TLB_GLOBAL_FLUSH:
1218 /* global flush doesn't need set IVA_REG */
1219 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1220 break;
1221 case DMA_TLB_DSI_FLUSH:
1222 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1223 break;
1224 case DMA_TLB_PSI_FLUSH:
1225 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1226 /* IH bit is passed in as part of address */
ba395927
KA
1227 val_iva = size_order | addr;
1228 break;
1229 default:
1230 BUG();
1231 }
1232 /* Note: set drain read/write */
1233#if 0
1234 /*
1235 * This is probably to be super secure.. Looks like we can
1236 * ignore it without any impact.
1237 */
1238 if (cap_read_drain(iommu->cap))
1239 val |= DMA_TLB_READ_DRAIN;
1240#endif
1241 if (cap_write_drain(iommu->cap))
1242 val |= DMA_TLB_WRITE_DRAIN;
1243
1f5b3c3f 1244 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1245 /* Note: Only uses first TLB reg currently */
1246 if (val_iva)
1247 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1248 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1249
1250 /* Make sure hardware complete it */
1251 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1252 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1253
1f5b3c3f 1254 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1255
1256 /* check IOTLB invalidation granularity */
1257 if (DMA_TLB_IAIG(val) == 0)
1258 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1259 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1260 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1261 (unsigned long long)DMA_TLB_IIRG(type),
1262 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1263}
1264
64ae892b
DW
1265static struct device_domain_info *
1266iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1267 u8 bus, u8 devfn)
93a23a72
YZ
1268{
1269 int found = 0;
1270 unsigned long flags;
1271 struct device_domain_info *info;
0bcb3e28 1272 struct pci_dev *pdev;
93a23a72
YZ
1273
1274 if (!ecap_dev_iotlb_support(iommu->ecap))
1275 return NULL;
1276
1277 if (!iommu->qi)
1278 return NULL;
1279
1280 spin_lock_irqsave(&device_domain_lock, flags);
1281 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1282 if (info->iommu == iommu && info->bus == bus &&
1283 info->devfn == devfn) {
93a23a72
YZ
1284 found = 1;
1285 break;
1286 }
1287 spin_unlock_irqrestore(&device_domain_lock, flags);
1288
0bcb3e28 1289 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1290 return NULL;
1291
0bcb3e28
DW
1292 pdev = to_pci_dev(info->dev);
1293
1294 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1295 return NULL;
1296
0bcb3e28 1297 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1298 return NULL;
1299
93a23a72
YZ
1300 return info;
1301}
1302
1303static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1304{
0bcb3e28 1305 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1306 return;
1307
0bcb3e28 1308 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1309}
1310
1311static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1312{
0bcb3e28
DW
1313 if (!info->dev || !dev_is_pci(info->dev) ||
1314 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1315 return;
1316
0bcb3e28 1317 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1318}
1319
1320static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1321 u64 addr, unsigned mask)
1322{
1323 u16 sid, qdep;
1324 unsigned long flags;
1325 struct device_domain_info *info;
1326
1327 spin_lock_irqsave(&device_domain_lock, flags);
1328 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1329 struct pci_dev *pdev;
1330 if (!info->dev || !dev_is_pci(info->dev))
1331 continue;
1332
1333 pdev = to_pci_dev(info->dev);
1334 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1335 continue;
1336
1337 sid = info->bus << 8 | info->devfn;
0bcb3e28 1338 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1339 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1340 }
1341 spin_unlock_irqrestore(&device_domain_lock, flags);
1342}
1343
1f0ef2aa 1344static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1345 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1346{
9dd2fe89 1347 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1348 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1349
ba395927
KA
1350 BUG_ON(pages == 0);
1351
ea8ea460
DW
1352 if (ih)
1353 ih = 1 << 6;
ba395927 1354 /*
9dd2fe89
YZ
1355 * Fallback to domain selective flush if no PSI support or the size is
1356 * too big.
ba395927
KA
1357 * PSI requires page size to be 2 ^ x, and the base address is naturally
1358 * aligned to the size
1359 */
9dd2fe89
YZ
1360 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1361 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1362 DMA_TLB_DSI_FLUSH);
9dd2fe89 1363 else
ea8ea460 1364 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1365 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1366
1367 /*
82653633
NA
1368 * In caching mode, changes of pages from non-present to present require
1369 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1370 */
82653633 1371 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1372 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1373}
1374
f8bab735 1375static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1376{
1377 u32 pmen;
1378 unsigned long flags;
1379
1f5b3c3f 1380 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1381 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1382 pmen &= ~DMA_PMEN_EPM;
1383 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1384
1385 /* wait for the protected region status bit to clear */
1386 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1387 readl, !(pmen & DMA_PMEN_PRS), pmen);
1388
1f5b3c3f 1389 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1390}
1391
ba395927
KA
1392static int iommu_enable_translation(struct intel_iommu *iommu)
1393{
1394 u32 sts;
1395 unsigned long flags;
1396
1f5b3c3f 1397 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1398 iommu->gcmd |= DMA_GCMD_TE;
1399 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1400
1401 /* Make sure hardware complete it */
1402 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1403 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1404
1f5b3c3f 1405 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1406 return 0;
1407}
1408
1409static int iommu_disable_translation(struct intel_iommu *iommu)
1410{
1411 u32 sts;
1412 unsigned long flag;
1413
1f5b3c3f 1414 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1415 iommu->gcmd &= ~DMA_GCMD_TE;
1416 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1417
1418 /* Make sure hardware complete it */
1419 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1420 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1421
1f5b3c3f 1422 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1423 return 0;
1424}
1425
3460a6d9 1426
ba395927
KA
1427static int iommu_init_domains(struct intel_iommu *iommu)
1428{
1429 unsigned long ndomains;
1430 unsigned long nlongs;
1431
1432 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1433 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1434 iommu->seq_id, ndomains);
ba395927
KA
1435 nlongs = BITS_TO_LONGS(ndomains);
1436
94a91b50
DD
1437 spin_lock_init(&iommu->lock);
1438
ba395927
KA
1439 /* TBD: there might be 64K domains,
1440 * consider other allocation for future chip
1441 */
1442 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1443 if (!iommu->domain_ids) {
852bdb04
JL
1444 pr_err("IOMMU%d: allocating domain id array failed\n",
1445 iommu->seq_id);
ba395927
KA
1446 return -ENOMEM;
1447 }
1448 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1449 GFP_KERNEL);
1450 if (!iommu->domains) {
852bdb04
JL
1451 pr_err("IOMMU%d: allocating domain array failed\n",
1452 iommu->seq_id);
1453 kfree(iommu->domain_ids);
1454 iommu->domain_ids = NULL;
ba395927
KA
1455 return -ENOMEM;
1456 }
1457
1458 /*
1459 * if Caching mode is set, then invalid translations are tagged
1460 * with domainid 0. Hence we need to pre-allocate it.
1461 */
1462 if (cap_caching_mode(iommu->cap))
1463 set_bit(0, iommu->domain_ids);
1464 return 0;
1465}
ba395927 1466
a868e6b7 1467static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1468{
1469 struct dmar_domain *domain;
2a46ddf7 1470 int i;
ba395927 1471
94a91b50 1472 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1473 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1474 /*
1475 * Domain id 0 is reserved for invalid translation
1476 * if hardware supports caching mode.
1477 */
1478 if (cap_caching_mode(iommu->cap) && i == 0)
1479 continue;
1480
94a91b50
DD
1481 domain = iommu->domains[i];
1482 clear_bit(i, iommu->domain_ids);
129ad281
JL
1483 if (domain_detach_iommu(domain, iommu) == 0 &&
1484 !domain_type_is_vm(domain))
92d03cc8 1485 domain_exit(domain);
5e98c4b1 1486 }
ba395927
KA
1487 }
1488
1489 if (iommu->gcmd & DMA_GCMD_TE)
1490 iommu_disable_translation(iommu);
1491
ba395927
KA
1492 kfree(iommu->domains);
1493 kfree(iommu->domain_ids);
a868e6b7
JL
1494 iommu->domains = NULL;
1495 iommu->domain_ids = NULL;
ba395927 1496
d9630fe9
WH
1497 g_iommus[iommu->seq_id] = NULL;
1498
ba395927
KA
1499 /* free context mapping */
1500 free_context_table(iommu);
ba395927
KA
1501}
1502
ab8dfe25 1503static struct dmar_domain *alloc_domain(int flags)
ba395927 1504{
92d03cc8
JL
1505 /* domain id for virtual machine, it won't be set in context */
1506 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1507 struct dmar_domain *domain;
ba395927
KA
1508
1509 domain = alloc_domain_mem();
1510 if (!domain)
1511 return NULL;
1512
ab8dfe25 1513 memset(domain, 0, sizeof(*domain));
4c923d47 1514 domain->nid = -1;
ab8dfe25 1515 domain->flags = flags;
92d03cc8
JL
1516 spin_lock_init(&domain->iommu_lock);
1517 INIT_LIST_HEAD(&domain->devices);
ab8dfe25 1518 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
92d03cc8 1519 domain->id = atomic_inc_return(&vm_domid);
2c2e2c38
FY
1520
1521 return domain;
1522}
1523
fb170fb4
JL
1524static int __iommu_attach_domain(struct dmar_domain *domain,
1525 struct intel_iommu *iommu)
2c2e2c38
FY
1526{
1527 int num;
1528 unsigned long ndomains;
2c2e2c38 1529
ba395927 1530 ndomains = cap_ndoms(iommu->cap);
ba395927 1531 num = find_first_zero_bit(iommu->domain_ids, ndomains);
fb170fb4
JL
1532 if (num < ndomains) {
1533 set_bit(num, iommu->domain_ids);
1534 iommu->domains[num] = domain;
1535 } else {
1536 num = -ENOSPC;
ba395927
KA
1537 }
1538
fb170fb4
JL
1539 return num;
1540}
1541
1542static int iommu_attach_domain(struct dmar_domain *domain,
1543 struct intel_iommu *iommu)
1544{
1545 int num;
1546 unsigned long flags;
1547
1548 spin_lock_irqsave(&iommu->lock, flags);
1549 num = __iommu_attach_domain(domain, iommu);
44bde614 1550 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4
JL
1551 if (num < 0)
1552 pr_err("IOMMU: no free domain ids\n");
ba395927 1553
fb170fb4 1554 return num;
ba395927
KA
1555}
1556
44bde614
JL
1557static int iommu_attach_vm_domain(struct dmar_domain *domain,
1558 struct intel_iommu *iommu)
1559{
1560 int num;
1561 unsigned long ndomains;
1562
1563 ndomains = cap_ndoms(iommu->cap);
1564 for_each_set_bit(num, iommu->domain_ids, ndomains)
1565 if (iommu->domains[num] == domain)
1566 return num;
1567
1568 return __iommu_attach_domain(domain, iommu);
1569}
1570
2c2e2c38
FY
1571static void iommu_detach_domain(struct dmar_domain *domain,
1572 struct intel_iommu *iommu)
ba395927
KA
1573{
1574 unsigned long flags;
2c2e2c38 1575 int num, ndomains;
ba395927 1576
8c11e798 1577 spin_lock_irqsave(&iommu->lock, flags);
fb170fb4
JL
1578 if (domain_type_is_vm_or_si(domain)) {
1579 ndomains = cap_ndoms(iommu->cap);
1580 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1581 if (iommu->domains[num] == domain) {
1582 clear_bit(num, iommu->domain_ids);
1583 iommu->domains[num] = NULL;
1584 break;
1585 }
2c2e2c38 1586 }
fb170fb4
JL
1587 } else {
1588 clear_bit(domain->id, iommu->domain_ids);
1589 iommu->domains[domain->id] = NULL;
2c2e2c38 1590 }
8c11e798 1591 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1592}
1593
fb170fb4
JL
1594static void domain_attach_iommu(struct dmar_domain *domain,
1595 struct intel_iommu *iommu)
1596{
1597 unsigned long flags;
1598
1599 spin_lock_irqsave(&domain->iommu_lock, flags);
1600 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1601 domain->iommu_count++;
1602 if (domain->iommu_count == 1)
1603 domain->nid = iommu->node;
1604 domain_update_iommu_cap(domain);
1605 }
1606 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1607}
1608
1609static int domain_detach_iommu(struct dmar_domain *domain,
1610 struct intel_iommu *iommu)
1611{
1612 unsigned long flags;
1613 int count = INT_MAX;
1614
1615 spin_lock_irqsave(&domain->iommu_lock, flags);
1616 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1617 count = --domain->iommu_count;
1618 domain_update_iommu_cap(domain);
1619 }
1620 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1621
1622 return count;
1623}
1624
ba395927 1625static struct iova_domain reserved_iova_list;
8a443df4 1626static struct lock_class_key reserved_rbtree_key;
ba395927 1627
51a63e67 1628static int dmar_init_reserved_ranges(void)
ba395927
KA
1629{
1630 struct pci_dev *pdev = NULL;
1631 struct iova *iova;
1632 int i;
ba395927 1633
f661197e 1634 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1635
8a443df4
MG
1636 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1637 &reserved_rbtree_key);
1638
ba395927
KA
1639 /* IOAPIC ranges shouldn't be accessed by DMA */
1640 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1641 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1642 if (!iova) {
ba395927 1643 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1644 return -ENODEV;
1645 }
ba395927
KA
1646
1647 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1648 for_each_pci_dev(pdev) {
1649 struct resource *r;
1650
1651 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1652 r = &pdev->resource[i];
1653 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1654 continue;
1a4a4551
DW
1655 iova = reserve_iova(&reserved_iova_list,
1656 IOVA_PFN(r->start),
1657 IOVA_PFN(r->end));
51a63e67 1658 if (!iova) {
ba395927 1659 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1660 return -ENODEV;
1661 }
ba395927
KA
1662 }
1663 }
51a63e67 1664 return 0;
ba395927
KA
1665}
1666
1667static void domain_reserve_special_ranges(struct dmar_domain *domain)
1668{
1669 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1670}
1671
1672static inline int guestwidth_to_adjustwidth(int gaw)
1673{
1674 int agaw;
1675 int r = (gaw - 12) % 9;
1676
1677 if (r == 0)
1678 agaw = gaw;
1679 else
1680 agaw = gaw + 9 - r;
1681 if (agaw > 64)
1682 agaw = 64;
1683 return agaw;
1684}
1685
1686static int domain_init(struct dmar_domain *domain, int guest_width)
1687{
1688 struct intel_iommu *iommu;
1689 int adjust_width, agaw;
1690 unsigned long sagaw;
1691
f661197e 1692 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1693 domain_reserve_special_ranges(domain);
1694
1695 /* calculate AGAW */
8c11e798 1696 iommu = domain_get_iommu(domain);
ba395927
KA
1697 if (guest_width > cap_mgaw(iommu->cap))
1698 guest_width = cap_mgaw(iommu->cap);
1699 domain->gaw = guest_width;
1700 adjust_width = guestwidth_to_adjustwidth(guest_width);
1701 agaw = width_to_agaw(adjust_width);
1702 sagaw = cap_sagaw(iommu->cap);
1703 if (!test_bit(agaw, &sagaw)) {
1704 /* hardware doesn't support it, choose a bigger one */
1705 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1706 agaw = find_next_bit(&sagaw, 5, agaw);
1707 if (agaw >= 5)
1708 return -ENODEV;
1709 }
1710 domain->agaw = agaw;
ba395927 1711
8e604097
WH
1712 if (ecap_coherent(iommu->ecap))
1713 domain->iommu_coherency = 1;
1714 else
1715 domain->iommu_coherency = 0;
1716
58c610bd
SY
1717 if (ecap_sc_support(iommu->ecap))
1718 domain->iommu_snooping = 1;
1719 else
1720 domain->iommu_snooping = 0;
1721
214e39aa
DW
1722 if (intel_iommu_superpage)
1723 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1724 else
1725 domain->iommu_superpage = 0;
1726
4c923d47 1727 domain->nid = iommu->node;
c7151a8d 1728
ba395927 1729 /* always allocate the top pgd */
4c923d47 1730 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1731 if (!domain->pgd)
1732 return -ENOMEM;
5b6985ce 1733 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1734 return 0;
1735}
1736
1737static void domain_exit(struct dmar_domain *domain)
1738{
2c2e2c38
FY
1739 struct dmar_drhd_unit *drhd;
1740 struct intel_iommu *iommu;
ea8ea460 1741 struct page *freelist = NULL;
ba395927
KA
1742
1743 /* Domain 0 is reserved, so dont process it */
1744 if (!domain)
1745 return;
1746
7b668357
AW
1747 /* Flush any lazy unmaps that may reference this domain */
1748 if (!intel_iommu_strict)
1749 flush_unmaps_timeout(0);
1750
92d03cc8 1751 /* remove associated devices */
ba395927 1752 domain_remove_dev_info(domain);
92d03cc8 1753
ba395927
KA
1754 /* destroy iovas */
1755 put_iova_domain(&domain->iovad);
ba395927 1756
ea8ea460 1757 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1758
92d03cc8 1759 /* clear attached or cached domains */
0e242612 1760 rcu_read_lock();
2c2e2c38 1761 for_each_active_iommu(iommu, drhd)
fb170fb4 1762 iommu_detach_domain(domain, iommu);
0e242612 1763 rcu_read_unlock();
2c2e2c38 1764
ea8ea460
DW
1765 dma_free_pagelist(freelist);
1766
ba395927
KA
1767 free_domain_mem(domain);
1768}
1769
64ae892b
DW
1770static int domain_context_mapping_one(struct dmar_domain *domain,
1771 struct intel_iommu *iommu,
1772 u8 bus, u8 devfn, int translation)
ba395927
KA
1773{
1774 struct context_entry *context;
ba395927 1775 unsigned long flags;
ea6606b0 1776 struct dma_pte *pgd;
ea6606b0
WH
1777 int id;
1778 int agaw;
93a23a72 1779 struct device_domain_info *info = NULL;
ba395927
KA
1780
1781 pr_debug("Set context mapping for %02x:%02x.%d\n",
1782 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1783
ba395927 1784 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1785 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1786 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1787
ba395927
KA
1788 context = device_to_context_entry(iommu, bus, devfn);
1789 if (!context)
1790 return -ENOMEM;
1791 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1792 if (context_present(context)) {
ba395927
KA
1793 spin_unlock_irqrestore(&iommu->lock, flags);
1794 return 0;
1795 }
1796
ea6606b0
WH
1797 id = domain->id;
1798 pgd = domain->pgd;
1799
ab8dfe25 1800 if (domain_type_is_vm_or_si(domain)) {
44bde614
JL
1801 if (domain_type_is_vm(domain)) {
1802 id = iommu_attach_vm_domain(domain, iommu);
fb170fb4 1803 if (id < 0) {
ea6606b0 1804 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4 1805 pr_err("IOMMU: no free domain ids\n");
ea6606b0
WH
1806 return -EFAULT;
1807 }
ea6606b0
WH
1808 }
1809
1810 /* Skip top levels of page tables for
1811 * iommu which has less agaw than default.
1672af11 1812 * Unnecessary for PT mode.
ea6606b0 1813 */
1672af11
CW
1814 if (translation != CONTEXT_TT_PASS_THROUGH) {
1815 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1816 pgd = phys_to_virt(dma_pte_addr(pgd));
1817 if (!dma_pte_present(pgd)) {
1818 spin_unlock_irqrestore(&iommu->lock, flags);
1819 return -ENOMEM;
1820 }
ea6606b0
WH
1821 }
1822 }
1823 }
1824
1825 context_set_domain_id(context, id);
4ed0d3e6 1826
93a23a72 1827 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1828 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1829 translation = info ? CONTEXT_TT_DEV_IOTLB :
1830 CONTEXT_TT_MULTI_LEVEL;
1831 }
4ed0d3e6
FY
1832 /*
1833 * In pass through mode, AW must be programmed to indicate the largest
1834 * AGAW value supported by hardware. And ASR is ignored by hardware.
1835 */
93a23a72 1836 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1837 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1838 else {
1839 context_set_address_root(context, virt_to_phys(pgd));
1840 context_set_address_width(context, iommu->agaw);
1841 }
4ed0d3e6
FY
1842
1843 context_set_translation_type(context, translation);
c07e7d21
MM
1844 context_set_fault_enable(context);
1845 context_set_present(context);
5331fe6f 1846 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1847
4c25a2c1
DW
1848 /*
1849 * It's a non-present to present mapping. If hardware doesn't cache
1850 * non-present entry we only need to flush the write-buffer. If the
1851 * _does_ cache non-present entries, then it does so in the special
1852 * domain #0, which we have to flush:
1853 */
1854 if (cap_caching_mode(iommu->cap)) {
1855 iommu->flush.flush_context(iommu, 0,
1856 (((u16)bus) << 8) | devfn,
1857 DMA_CCMD_MASK_NOBIT,
1858 DMA_CCMD_DEVICE_INVL);
18fd779a 1859 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1860 } else {
ba395927 1861 iommu_flush_write_buffer(iommu);
4c25a2c1 1862 }
93a23a72 1863 iommu_enable_dev_iotlb(info);
ba395927 1864 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 1865
fb170fb4
JL
1866 domain_attach_iommu(domain, iommu);
1867
ba395927
KA
1868 return 0;
1869}
1870
579305f7
AW
1871struct domain_context_mapping_data {
1872 struct dmar_domain *domain;
1873 struct intel_iommu *iommu;
1874 int translation;
1875};
1876
1877static int domain_context_mapping_cb(struct pci_dev *pdev,
1878 u16 alias, void *opaque)
1879{
1880 struct domain_context_mapping_data *data = opaque;
1881
1882 return domain_context_mapping_one(data->domain, data->iommu,
1883 PCI_BUS_NUM(alias), alias & 0xff,
1884 data->translation);
1885}
1886
ba395927 1887static int
e1f167f3
DW
1888domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1889 int translation)
ba395927 1890{
64ae892b 1891 struct intel_iommu *iommu;
156baca8 1892 u8 bus, devfn;
579305f7 1893 struct domain_context_mapping_data data;
64ae892b 1894
e1f167f3 1895 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1896 if (!iommu)
1897 return -ENODEV;
ba395927 1898
579305f7
AW
1899 if (!dev_is_pci(dev))
1900 return domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1901 translation);
579305f7
AW
1902
1903 data.domain = domain;
1904 data.iommu = iommu;
1905 data.translation = translation;
1906
1907 return pci_for_each_dma_alias(to_pci_dev(dev),
1908 &domain_context_mapping_cb, &data);
1909}
1910
1911static int domain_context_mapped_cb(struct pci_dev *pdev,
1912 u16 alias, void *opaque)
1913{
1914 struct intel_iommu *iommu = opaque;
1915
1916 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
1917}
1918
e1f167f3 1919static int domain_context_mapped(struct device *dev)
ba395927 1920{
5331fe6f 1921 struct intel_iommu *iommu;
156baca8 1922 u8 bus, devfn;
5331fe6f 1923
e1f167f3 1924 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1925 if (!iommu)
1926 return -ENODEV;
ba395927 1927
579305f7
AW
1928 if (!dev_is_pci(dev))
1929 return device_context_mapped(iommu, bus, devfn);
e1f167f3 1930
579305f7
AW
1931 return !pci_for_each_dma_alias(to_pci_dev(dev),
1932 domain_context_mapped_cb, iommu);
ba395927
KA
1933}
1934
f532959b
FY
1935/* Returns a number of VTD pages, but aligned to MM page size */
1936static inline unsigned long aligned_nrpages(unsigned long host_addr,
1937 size_t size)
1938{
1939 host_addr &= ~PAGE_MASK;
1940 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1941}
1942
6dd9a7c7
YS
1943/* Return largest possible superpage level for a given mapping */
1944static inline int hardware_largepage_caps(struct dmar_domain *domain,
1945 unsigned long iov_pfn,
1946 unsigned long phy_pfn,
1947 unsigned long pages)
1948{
1949 int support, level = 1;
1950 unsigned long pfnmerge;
1951
1952 support = domain->iommu_superpage;
1953
1954 /* To use a large page, the virtual *and* physical addresses
1955 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1956 of them will mean we have to use smaller pages. So just
1957 merge them and check both at once. */
1958 pfnmerge = iov_pfn | phy_pfn;
1959
1960 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1961 pages >>= VTD_STRIDE_SHIFT;
1962 if (!pages)
1963 break;
1964 pfnmerge >>= VTD_STRIDE_SHIFT;
1965 level++;
1966 support--;
1967 }
1968 return level;
1969}
1970
9051aa02
DW
1971static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1972 struct scatterlist *sg, unsigned long phys_pfn,
1973 unsigned long nr_pages, int prot)
e1605495
DW
1974{
1975 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1976 phys_addr_t uninitialized_var(pteval);
e1605495 1977 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1978 unsigned long sg_res;
6dd9a7c7
YS
1979 unsigned int largepage_lvl = 0;
1980 unsigned long lvl_pages = 0;
e1605495
DW
1981
1982 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1983
1984 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1985 return -EINVAL;
1986
1987 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1988
9051aa02
DW
1989 if (sg)
1990 sg_res = 0;
1991 else {
1992 sg_res = nr_pages + 1;
1993 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1994 }
1995
6dd9a7c7 1996 while (nr_pages > 0) {
c85994e4
DW
1997 uint64_t tmp;
1998
e1605495 1999 if (!sg_res) {
f532959b 2000 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2001 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2002 sg->dma_length = sg->length;
2003 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2004 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2005 }
6dd9a7c7 2006
e1605495 2007 if (!pte) {
6dd9a7c7
YS
2008 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2009
5cf0a76f 2010 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2011 if (!pte)
2012 return -ENOMEM;
6dd9a7c7 2013 /* It is large page*/
6491d4d0 2014 if (largepage_lvl > 1) {
6dd9a7c7 2015 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
2016 /* Ensure that old small page tables are removed to make room
2017 for superpage, if they exist. */
2018 dma_pte_clear_range(domain, iov_pfn,
2019 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2020 dma_pte_free_pagetable(domain, iov_pfn,
2021 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2022 } else {
6dd9a7c7 2023 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2024 }
6dd9a7c7 2025
e1605495
DW
2026 }
2027 /* We don't need lock here, nobody else
2028 * touches the iova range
2029 */
7766a3fb 2030 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2031 if (tmp) {
1bf20f0d 2032 static int dumps = 5;
c85994e4
DW
2033 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2034 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2035 if (dumps) {
2036 dumps--;
2037 debug_dma_dump_mappings(NULL);
2038 }
2039 WARN_ON(1);
2040 }
6dd9a7c7
YS
2041
2042 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2043
2044 BUG_ON(nr_pages < lvl_pages);
2045 BUG_ON(sg_res < lvl_pages);
2046
2047 nr_pages -= lvl_pages;
2048 iov_pfn += lvl_pages;
2049 phys_pfn += lvl_pages;
2050 pteval += lvl_pages * VTD_PAGE_SIZE;
2051 sg_res -= lvl_pages;
2052
2053 /* If the next PTE would be the first in a new page, then we
2054 need to flush the cache on the entries we've just written.
2055 And then we'll need to recalculate 'pte', so clear it and
2056 let it get set again in the if (!pte) block above.
2057
2058 If we're done (!nr_pages) we need to flush the cache too.
2059
2060 Also if we've been setting superpages, we may need to
2061 recalculate 'pte' and switch back to smaller pages for the
2062 end of the mapping, if the trailing size is not enough to
2063 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2064 pte++;
6dd9a7c7
YS
2065 if (!nr_pages || first_pte_in_page(pte) ||
2066 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2067 domain_flush_cache(domain, first_pte,
2068 (void *)pte - (void *)first_pte);
2069 pte = NULL;
2070 }
6dd9a7c7
YS
2071
2072 if (!sg_res && nr_pages)
e1605495
DW
2073 sg = sg_next(sg);
2074 }
2075 return 0;
2076}
2077
9051aa02
DW
2078static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2079 struct scatterlist *sg, unsigned long nr_pages,
2080 int prot)
ba395927 2081{
9051aa02
DW
2082 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2083}
6f6a00e4 2084
9051aa02
DW
2085static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2086 unsigned long phys_pfn, unsigned long nr_pages,
2087 int prot)
2088{
2089 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2090}
2091
c7151a8d 2092static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2093{
c7151a8d
WH
2094 if (!iommu)
2095 return;
8c11e798
WH
2096
2097 clear_context_table(iommu, bus, devfn);
2098 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2099 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2100 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2101}
2102
109b9b04
DW
2103static inline void unlink_domain_info(struct device_domain_info *info)
2104{
2105 assert_spin_locked(&device_domain_lock);
2106 list_del(&info->link);
2107 list_del(&info->global);
2108 if (info->dev)
0bcb3e28 2109 info->dev->archdata.iommu = NULL;
109b9b04
DW
2110}
2111
ba395927
KA
2112static void domain_remove_dev_info(struct dmar_domain *domain)
2113{
3a74ca01 2114 struct device_domain_info *info, *tmp;
fb170fb4 2115 unsigned long flags;
ba395927
KA
2116
2117 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2118 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2119 unlink_domain_info(info);
ba395927
KA
2120 spin_unlock_irqrestore(&device_domain_lock, flags);
2121
93a23a72 2122 iommu_disable_dev_iotlb(info);
7c7faa11 2123 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2124
ab8dfe25 2125 if (domain_type_is_vm(domain)) {
7c7faa11 2126 iommu_detach_dependent_devices(info->iommu, info->dev);
fb170fb4 2127 domain_detach_iommu(domain, info->iommu);
92d03cc8
JL
2128 }
2129
2130 free_devinfo_mem(info);
ba395927
KA
2131 spin_lock_irqsave(&device_domain_lock, flags);
2132 }
2133 spin_unlock_irqrestore(&device_domain_lock, flags);
2134}
2135
2136/*
2137 * find_domain
1525a29a 2138 * Note: we use struct device->archdata.iommu stores the info
ba395927 2139 */
1525a29a 2140static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2141{
2142 struct device_domain_info *info;
2143
2144 /* No lock here, assumes no domain exit in normal case */
1525a29a 2145 info = dev->archdata.iommu;
ba395927
KA
2146 if (info)
2147 return info->domain;
2148 return NULL;
2149}
2150
5a8f40e8 2151static inline struct device_domain_info *
745f2586
JL
2152dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2153{
2154 struct device_domain_info *info;
2155
2156 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2157 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2158 info->devfn == devfn)
5a8f40e8 2159 return info;
745f2586
JL
2160
2161 return NULL;
2162}
2163
5a8f40e8 2164static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2165 int bus, int devfn,
b718cd3d
DW
2166 struct device *dev,
2167 struct dmar_domain *domain)
745f2586 2168{
5a8f40e8 2169 struct dmar_domain *found = NULL;
745f2586
JL
2170 struct device_domain_info *info;
2171 unsigned long flags;
2172
2173 info = alloc_devinfo_mem();
2174 if (!info)
b718cd3d 2175 return NULL;
745f2586 2176
745f2586
JL
2177 info->bus = bus;
2178 info->devfn = devfn;
2179 info->dev = dev;
2180 info->domain = domain;
5a8f40e8 2181 info->iommu = iommu;
745f2586
JL
2182
2183 spin_lock_irqsave(&device_domain_lock, flags);
2184 if (dev)
0bcb3e28 2185 found = find_domain(dev);
5a8f40e8
DW
2186 else {
2187 struct device_domain_info *info2;
41e80dca 2188 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2189 if (info2)
2190 found = info2->domain;
2191 }
745f2586
JL
2192 if (found) {
2193 spin_unlock_irqrestore(&device_domain_lock, flags);
2194 free_devinfo_mem(info);
b718cd3d
DW
2195 /* Caller must free the original domain */
2196 return found;
745f2586
JL
2197 }
2198
b718cd3d
DW
2199 list_add(&info->link, &domain->devices);
2200 list_add(&info->global, &device_domain_list);
2201 if (dev)
2202 dev->archdata.iommu = info;
2203 spin_unlock_irqrestore(&device_domain_lock, flags);
2204
2205 return domain;
745f2586
JL
2206}
2207
579305f7
AW
2208static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2209{
2210 *(u16 *)opaque = alias;
2211 return 0;
2212}
2213
ba395927 2214/* domain is initialized */
146922ec 2215static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2216{
579305f7
AW
2217 struct dmar_domain *domain, *tmp;
2218 struct intel_iommu *iommu;
5a8f40e8 2219 struct device_domain_info *info;
579305f7 2220 u16 dma_alias;
ba395927 2221 unsigned long flags;
aa4d066a 2222 u8 bus, devfn;
ba395927 2223
146922ec 2224 domain = find_domain(dev);
ba395927
KA
2225 if (domain)
2226 return domain;
2227
579305f7
AW
2228 iommu = device_to_iommu(dev, &bus, &devfn);
2229 if (!iommu)
2230 return NULL;
2231
146922ec
DW
2232 if (dev_is_pci(dev)) {
2233 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2234
579305f7
AW
2235 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2236
2237 spin_lock_irqsave(&device_domain_lock, flags);
2238 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2239 PCI_BUS_NUM(dma_alias),
2240 dma_alias & 0xff);
2241 if (info) {
2242 iommu = info->iommu;
2243 domain = info->domain;
5a8f40e8 2244 }
579305f7 2245 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2246
579305f7
AW
2247 /* DMA alias already has a domain, uses it */
2248 if (info)
2249 goto found_domain;
2250 }
ba395927 2251
146922ec 2252 /* Allocate and initialize new domain for the device */
ab8dfe25 2253 domain = alloc_domain(0);
745f2586 2254 if (!domain)
579305f7 2255 return NULL;
44bde614
JL
2256 domain->id = iommu_attach_domain(domain, iommu);
2257 if (domain->id < 0) {
2fe9723d 2258 free_domain_mem(domain);
579305f7 2259 return NULL;
2c2e2c38 2260 }
fb170fb4 2261 domain_attach_iommu(domain, iommu);
579305f7
AW
2262 if (domain_init(domain, gaw)) {
2263 domain_exit(domain);
2264 return NULL;
2c2e2c38 2265 }
ba395927 2266
579305f7
AW
2267 /* register PCI DMA alias device */
2268 if (dev_is_pci(dev)) {
2269 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2270 dma_alias & 0xff, NULL, domain);
2271
2272 if (!tmp || tmp != domain) {
2273 domain_exit(domain);
2274 domain = tmp;
2275 }
2276
b718cd3d 2277 if (!domain)
579305f7 2278 return NULL;
ba395927
KA
2279 }
2280
2281found_domain:
579305f7
AW
2282 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2283
2284 if (!tmp || tmp != domain) {
2285 domain_exit(domain);
2286 domain = tmp;
2287 }
b718cd3d
DW
2288
2289 return domain;
ba395927
KA
2290}
2291
2c2e2c38 2292static int iommu_identity_mapping;
e0fc7e0b
DW
2293#define IDENTMAP_ALL 1
2294#define IDENTMAP_GFX 2
2295#define IDENTMAP_AZALIA 4
2c2e2c38 2296
b213203e
DW
2297static int iommu_domain_identity_map(struct dmar_domain *domain,
2298 unsigned long long start,
2299 unsigned long long end)
ba395927 2300{
c5395d5c
DW
2301 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2302 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2303
2304 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2305 dma_to_mm_pfn(last_vpfn))) {
ba395927 2306 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2307 return -ENOMEM;
ba395927
KA
2308 }
2309
c5395d5c
DW
2310 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2311 start, end, domain->id);
ba395927
KA
2312 /*
2313 * RMRR range might have overlap with physical memory range,
2314 * clear it first
2315 */
c5395d5c 2316 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2317
c5395d5c
DW
2318 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2319 last_vpfn - first_vpfn + 1,
61df7443 2320 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2321}
2322
0b9d9753 2323static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2324 unsigned long long start,
2325 unsigned long long end)
2326{
2327 struct dmar_domain *domain;
2328 int ret;
2329
0b9d9753 2330 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2331 if (!domain)
2332 return -ENOMEM;
2333
19943b0e
DW
2334 /* For _hardware_ passthrough, don't bother. But for software
2335 passthrough, we do it anyway -- it may indicate a memory
2336 range which is reserved in E820, so which didn't get set
2337 up to start with in si_domain */
2338 if (domain == si_domain && hw_pass_through) {
2339 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2340 dev_name(dev), start, end);
19943b0e
DW
2341 return 0;
2342 }
2343
2344 printk(KERN_INFO
2345 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2346 dev_name(dev), start, end);
2ff729f5 2347
5595b528
DW
2348 if (end < start) {
2349 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2350 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2351 dmi_get_system_info(DMI_BIOS_VENDOR),
2352 dmi_get_system_info(DMI_BIOS_VERSION),
2353 dmi_get_system_info(DMI_PRODUCT_VERSION));
2354 ret = -EIO;
2355 goto error;
2356 }
2357
2ff729f5
DW
2358 if (end >> agaw_to_width(domain->agaw)) {
2359 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2360 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2361 agaw_to_width(domain->agaw),
2362 dmi_get_system_info(DMI_BIOS_VENDOR),
2363 dmi_get_system_info(DMI_BIOS_VERSION),
2364 dmi_get_system_info(DMI_PRODUCT_VERSION));
2365 ret = -EIO;
2366 goto error;
2367 }
19943b0e 2368
b213203e 2369 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2370 if (ret)
2371 goto error;
2372
2373 /* context entry init */
0b9d9753 2374 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2375 if (ret)
2376 goto error;
2377
2378 return 0;
2379
2380 error:
ba395927
KA
2381 domain_exit(domain);
2382 return ret;
ba395927
KA
2383}
2384
2385static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2386 struct device *dev)
ba395927 2387{
0b9d9753 2388 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2389 return 0;
0b9d9753
DW
2390 return iommu_prepare_identity_map(dev, rmrr->base_address,
2391 rmrr->end_address);
ba395927
KA
2392}
2393
d3f13810 2394#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2395static inline void iommu_prepare_isa(void)
2396{
2397 struct pci_dev *pdev;
2398 int ret;
2399
2400 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2401 if (!pdev)
2402 return;
2403
c7ab48d2 2404 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2405 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2406
2407 if (ret)
c7ab48d2
DW
2408 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2409 "floppy might not work\n");
49a0429e 2410
9b27e82d 2411 pci_dev_put(pdev);
49a0429e
KA
2412}
2413#else
2414static inline void iommu_prepare_isa(void)
2415{
2416 return;
2417}
d3f13810 2418#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2419
2c2e2c38 2420static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2421
071e1374 2422static int __init si_domain_init(int hw)
2c2e2c38
FY
2423{
2424 struct dmar_drhd_unit *drhd;
2425 struct intel_iommu *iommu;
c7ab48d2 2426 int nid, ret = 0;
44bde614 2427 bool first = true;
2c2e2c38 2428
ab8dfe25 2429 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2430 if (!si_domain)
2431 return -EFAULT;
2432
2c2e2c38
FY
2433 for_each_active_iommu(iommu, drhd) {
2434 ret = iommu_attach_domain(si_domain, iommu);
fb170fb4 2435 if (ret < 0) {
2c2e2c38
FY
2436 domain_exit(si_domain);
2437 return -EFAULT;
44bde614
JL
2438 } else if (first) {
2439 si_domain->id = ret;
2440 first = false;
2441 } else if (si_domain->id != ret) {
2442 domain_exit(si_domain);
2443 return -EFAULT;
2c2e2c38 2444 }
fb170fb4 2445 domain_attach_iommu(si_domain, iommu);
2c2e2c38
FY
2446 }
2447
2448 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2449 domain_exit(si_domain);
2450 return -EFAULT;
2451 }
2452
9544c003
JL
2453 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2454 si_domain->id);
2c2e2c38 2455
19943b0e
DW
2456 if (hw)
2457 return 0;
2458
c7ab48d2 2459 for_each_online_node(nid) {
5dfe8660
TH
2460 unsigned long start_pfn, end_pfn;
2461 int i;
2462
2463 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2464 ret = iommu_domain_identity_map(si_domain,
2465 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2466 if (ret)
2467 return ret;
2468 }
c7ab48d2
DW
2469 }
2470
2c2e2c38
FY
2471 return 0;
2472}
2473
9b226624 2474static int identity_mapping(struct device *dev)
2c2e2c38
FY
2475{
2476 struct device_domain_info *info;
2477
2478 if (likely(!iommu_identity_mapping))
2479 return 0;
2480
9b226624 2481 info = dev->archdata.iommu;
cb452a40
MT
2482 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2483 return (info->domain == si_domain);
2c2e2c38 2484
2c2e2c38
FY
2485 return 0;
2486}
2487
2488static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2489 struct device *dev, int translation)
2c2e2c38 2490{
0ac72664 2491 struct dmar_domain *ndomain;
5a8f40e8 2492 struct intel_iommu *iommu;
156baca8 2493 u8 bus, devfn;
5fe60f4e 2494 int ret;
2c2e2c38 2495
5913c9bf 2496 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2497 if (!iommu)
2498 return -ENODEV;
2499
5913c9bf 2500 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2501 if (ndomain != domain)
2502 return -EBUSY;
2c2e2c38 2503
5913c9bf 2504 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2505 if (ret) {
5913c9bf 2506 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2507 return ret;
2508 }
2509
2c2e2c38
FY
2510 return 0;
2511}
2512
0b9d9753 2513static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2514{
2515 struct dmar_rmrr_unit *rmrr;
832bd858 2516 struct device *tmp;
ea2447f7
TM
2517 int i;
2518
0e242612 2519 rcu_read_lock();
ea2447f7 2520 for_each_rmrr_units(rmrr) {
b683b230
JL
2521 /*
2522 * Return TRUE if this RMRR contains the device that
2523 * is passed in.
2524 */
2525 for_each_active_dev_scope(rmrr->devices,
2526 rmrr->devices_cnt, i, tmp)
0b9d9753 2527 if (tmp == dev) {
0e242612 2528 rcu_read_unlock();
ea2447f7 2529 return true;
b683b230 2530 }
ea2447f7 2531 }
0e242612 2532 rcu_read_unlock();
ea2447f7
TM
2533 return false;
2534}
2535
3bdb2591 2536static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2537{
ea2447f7 2538
3bdb2591
DW
2539 if (dev_is_pci(dev)) {
2540 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2541
3bdb2591
DW
2542 /*
2543 * We want to prevent any device associated with an RMRR from
2544 * getting placed into the SI Domain. This is done because
2545 * problems exist when devices are moved in and out of domains
2546 * and their respective RMRR info is lost. We exempt USB devices
2547 * from this process due to their usage of RMRRs that are known
2548 * to not be needed after BIOS hand-off to OS.
2549 */
2550 if (device_has_rmrr(dev) &&
2551 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2552 return 0;
e0fc7e0b 2553
3bdb2591
DW
2554 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2555 return 1;
e0fc7e0b 2556
3bdb2591
DW
2557 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2558 return 1;
6941af28 2559
3bdb2591 2560 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2561 return 0;
3bdb2591
DW
2562
2563 /*
2564 * We want to start off with all devices in the 1:1 domain, and
2565 * take them out later if we find they can't access all of memory.
2566 *
2567 * However, we can't do this for PCI devices behind bridges,
2568 * because all PCI devices behind the same bridge will end up
2569 * with the same source-id on their transactions.
2570 *
2571 * Practically speaking, we can't change things around for these
2572 * devices at run-time, because we can't be sure there'll be no
2573 * DMA transactions in flight for any of their siblings.
2574 *
2575 * So PCI devices (unless they're on the root bus) as well as
2576 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2577 * the 1:1 domain, just in _case_ one of their siblings turns out
2578 * not to be able to map all of memory.
2579 */
2580 if (!pci_is_pcie(pdev)) {
2581 if (!pci_is_root_bus(pdev->bus))
2582 return 0;
2583 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2584 return 0;
2585 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2586 return 0;
3bdb2591
DW
2587 } else {
2588 if (device_has_rmrr(dev))
2589 return 0;
2590 }
3dfc813d 2591
3bdb2591 2592 /*
3dfc813d 2593 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2594 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2595 * take them out of the 1:1 domain later.
2596 */
8fcc5372
CW
2597 if (!startup) {
2598 /*
2599 * If the device's dma_mask is less than the system's memory
2600 * size then this is not a candidate for identity mapping.
2601 */
3bdb2591 2602 u64 dma_mask = *dev->dma_mask;
8fcc5372 2603
3bdb2591
DW
2604 if (dev->coherent_dma_mask &&
2605 dev->coherent_dma_mask < dma_mask)
2606 dma_mask = dev->coherent_dma_mask;
8fcc5372 2607
3bdb2591 2608 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2609 }
6941af28
DW
2610
2611 return 1;
2612}
2613
cf04eee8
DW
2614static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2615{
2616 int ret;
2617
2618 if (!iommu_should_identity_map(dev, 1))
2619 return 0;
2620
2621 ret = domain_add_dev_info(si_domain, dev,
2622 hw ? CONTEXT_TT_PASS_THROUGH :
2623 CONTEXT_TT_MULTI_LEVEL);
2624 if (!ret)
2625 pr_info("IOMMU: %s identity mapping for device %s\n",
2626 hw ? "hardware" : "software", dev_name(dev));
2627 else if (ret == -ENODEV)
2628 /* device not associated with an iommu */
2629 ret = 0;
2630
2631 return ret;
2632}
2633
2634
071e1374 2635static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2636{
2c2e2c38 2637 struct pci_dev *pdev = NULL;
cf04eee8
DW
2638 struct dmar_drhd_unit *drhd;
2639 struct intel_iommu *iommu;
2640 struct device *dev;
2641 int i;
2642 int ret = 0;
2c2e2c38 2643
19943b0e 2644 ret = si_domain_init(hw);
2c2e2c38
FY
2645 if (ret)
2646 return -EFAULT;
2647
2c2e2c38 2648 for_each_pci_dev(pdev) {
cf04eee8
DW
2649 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2650 if (ret)
2651 return ret;
2652 }
2653
2654 for_each_active_iommu(iommu, drhd)
2655 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2656 struct acpi_device_physical_node *pn;
2657 struct acpi_device *adev;
2658
2659 if (dev->bus != &acpi_bus_type)
2660 continue;
2661
2662 adev= to_acpi_device(dev);
2663 mutex_lock(&adev->physical_node_lock);
2664 list_for_each_entry(pn, &adev->physical_node_list, node) {
2665 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2666 if (ret)
2667 break;
eae460b6 2668 }
cf04eee8
DW
2669 mutex_unlock(&adev->physical_node_lock);
2670 if (ret)
2671 return ret;
62edf5dc 2672 }
2c2e2c38
FY
2673
2674 return 0;
2675}
2676
b779260b 2677static int __init init_dmars(void)
ba395927
KA
2678{
2679 struct dmar_drhd_unit *drhd;
2680 struct dmar_rmrr_unit *rmrr;
832bd858 2681 struct device *dev;
ba395927 2682 struct intel_iommu *iommu;
9d783ba0 2683 int i, ret;
2c2e2c38 2684
ba395927
KA
2685 /*
2686 * for each drhd
2687 * allocate root
2688 * initialize and program root entry to not present
2689 * endfor
2690 */
2691 for_each_drhd_unit(drhd) {
5e0d2a6f 2692 /*
2693 * lock not needed as this is only incremented in the single
2694 * threaded kernel __init code path all other access are read
2695 * only
2696 */
1b198bb0
MT
2697 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2698 g_num_of_iommus++;
2699 continue;
2700 }
2701 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2702 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2703 }
2704
d9630fe9
WH
2705 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2706 GFP_KERNEL);
2707 if (!g_iommus) {
2708 printk(KERN_ERR "Allocating global iommu array failed\n");
2709 ret = -ENOMEM;
2710 goto error;
2711 }
2712
80b20dd8 2713 deferred_flush = kzalloc(g_num_of_iommus *
2714 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2715 if (!deferred_flush) {
5e0d2a6f 2716 ret = -ENOMEM;
989d51fc 2717 goto free_g_iommus;
5e0d2a6f 2718 }
2719
7c919779 2720 for_each_active_iommu(iommu, drhd) {
d9630fe9 2721 g_iommus[iommu->seq_id] = iommu;
ba395927 2722
e61d98d8
SS
2723 ret = iommu_init_domains(iommu);
2724 if (ret)
989d51fc 2725 goto free_iommu;
e61d98d8 2726
ba395927
KA
2727 /*
2728 * TBD:
2729 * we could share the same root & context tables
25985edc 2730 * among all IOMMU's. Need to Split it later.
ba395927
KA
2731 */
2732 ret = iommu_alloc_root_entry(iommu);
2733 if (ret) {
2734 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2735 goto free_iommu;
ba395927 2736 }
4ed0d3e6 2737 if (!ecap_pass_through(iommu->ecap))
19943b0e 2738 hw_pass_through = 0;
ba395927
KA
2739 }
2740
1531a6a6
SS
2741 /*
2742 * Start from the sane iommu hardware state.
2743 */
7c919779 2744 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2745 /*
2746 * If the queued invalidation is already initialized by us
2747 * (for example, while enabling interrupt-remapping) then
2748 * we got the things already rolling from a sane state.
2749 */
2750 if (iommu->qi)
2751 continue;
2752
2753 /*
2754 * Clear any previous faults.
2755 */
2756 dmar_fault(-1, iommu);
2757 /*
2758 * Disable queued invalidation if supported and already enabled
2759 * before OS handover.
2760 */
2761 dmar_disable_qi(iommu);
2762 }
2763
7c919779 2764 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2765 if (dmar_enable_qi(iommu)) {
2766 /*
2767 * Queued Invalidate not enabled, use Register Based
2768 * Invalidate
2769 */
2770 iommu->flush.flush_context = __iommu_flush_context;
2771 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2772 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2773 "invalidation\n",
680a7524 2774 iommu->seq_id,
b4e0f9eb 2775 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2776 } else {
2777 iommu->flush.flush_context = qi_flush_context;
2778 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2779 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2780 "invalidation\n",
680a7524 2781 iommu->seq_id,
b4e0f9eb 2782 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2783 }
2784 }
2785
19943b0e 2786 if (iommu_pass_through)
e0fc7e0b
DW
2787 iommu_identity_mapping |= IDENTMAP_ALL;
2788
d3f13810 2789#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2790 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2791#endif
e0fc7e0b
DW
2792
2793 check_tylersburg_isoch();
2794
ba395927 2795 /*
19943b0e
DW
2796 * If pass through is not set or not enabled, setup context entries for
2797 * identity mappings for rmrr, gfx, and isa and may fall back to static
2798 * identity mapping if iommu_identity_mapping is set.
ba395927 2799 */
19943b0e
DW
2800 if (iommu_identity_mapping) {
2801 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2802 if (ret) {
19943b0e 2803 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2804 goto free_iommu;
ba395927
KA
2805 }
2806 }
ba395927 2807 /*
19943b0e
DW
2808 * For each rmrr
2809 * for each dev attached to rmrr
2810 * do
2811 * locate drhd for dev, alloc domain for dev
2812 * allocate free domain
2813 * allocate page table entries for rmrr
2814 * if context not allocated for bus
2815 * allocate and init context
2816 * set present in root table for this bus
2817 * init context with domain, translation etc
2818 * endfor
2819 * endfor
ba395927 2820 */
19943b0e
DW
2821 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2822 for_each_rmrr_units(rmrr) {
b683b230
JL
2823 /* some BIOS lists non-exist devices in DMAR table. */
2824 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2825 i, dev) {
0b9d9753 2826 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2827 if (ret)
2828 printk(KERN_ERR
2829 "IOMMU: mapping reserved region failed\n");
ba395927 2830 }
4ed0d3e6 2831 }
49a0429e 2832
19943b0e
DW
2833 iommu_prepare_isa();
2834
ba395927
KA
2835 /*
2836 * for each drhd
2837 * enable fault log
2838 * global invalidate context cache
2839 * global invalidate iotlb
2840 * enable translation
2841 */
7c919779 2842 for_each_iommu(iommu, drhd) {
51a63e67
JC
2843 if (drhd->ignored) {
2844 /*
2845 * we always have to disable PMRs or DMA may fail on
2846 * this device
2847 */
2848 if (force_on)
7c919779 2849 iommu_disable_protect_mem_regions(iommu);
ba395927 2850 continue;
51a63e67 2851 }
ba395927
KA
2852
2853 iommu_flush_write_buffer(iommu);
2854
3460a6d9
KA
2855 ret = dmar_set_interrupt(iommu);
2856 if (ret)
989d51fc 2857 goto free_iommu;
3460a6d9 2858
ba395927
KA
2859 iommu_set_root_entry(iommu);
2860
4c25a2c1 2861 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2862 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2863
ba395927
KA
2864 ret = iommu_enable_translation(iommu);
2865 if (ret)
989d51fc 2866 goto free_iommu;
b94996c9
DW
2867
2868 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2869 }
2870
2871 return 0;
989d51fc
JL
2872
2873free_iommu:
7c919779 2874 for_each_active_iommu(iommu, drhd)
a868e6b7 2875 free_dmar_iommu(iommu);
9bdc531e 2876 kfree(deferred_flush);
989d51fc 2877free_g_iommus:
d9630fe9 2878 kfree(g_iommus);
989d51fc 2879error:
ba395927
KA
2880 return ret;
2881}
2882
5a5e02a6 2883/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2884static struct iova *intel_alloc_iova(struct device *dev,
2885 struct dmar_domain *domain,
2886 unsigned long nrpages, uint64_t dma_mask)
ba395927 2887{
ba395927 2888 struct iova *iova = NULL;
ba395927 2889
875764de
DW
2890 /* Restrict dma_mask to the width that the iommu can handle */
2891 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2892
2893 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2894 /*
2895 * First try to allocate an io virtual address in
284901a9 2896 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2897 * from higher range
ba395927 2898 */
875764de
DW
2899 iova = alloc_iova(&domain->iovad, nrpages,
2900 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2901 if (iova)
2902 return iova;
2903 }
2904 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2905 if (unlikely(!iova)) {
2906 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2907 nrpages, dev_name(dev));
f76aec76
KA
2908 return NULL;
2909 }
2910
2911 return iova;
2912}
2913
d4b709f4 2914static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2915{
2916 struct dmar_domain *domain;
2917 int ret;
2918
d4b709f4 2919 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2920 if (!domain) {
d4b709f4
DW
2921 printk(KERN_ERR "Allocating domain for %s failed",
2922 dev_name(dev));
4fe05bbc 2923 return NULL;
ba395927
KA
2924 }
2925
2926 /* make sure context mapping is ok */
d4b709f4
DW
2927 if (unlikely(!domain_context_mapped(dev))) {
2928 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2929 if (ret) {
d4b709f4
DW
2930 printk(KERN_ERR "Domain context map for %s failed",
2931 dev_name(dev));
4fe05bbc 2932 return NULL;
f76aec76 2933 }
ba395927
KA
2934 }
2935
f76aec76
KA
2936 return domain;
2937}
2938
d4b709f4 2939static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2940{
2941 struct device_domain_info *info;
2942
2943 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2944 info = dev->archdata.iommu;
147202aa
DW
2945 if (likely(info))
2946 return info->domain;
2947
2948 return __get_valid_domain_for_dev(dev);
2949}
2950
3d89194a 2951static int iommu_dummy(struct device *dev)
2c2e2c38 2952{
3d89194a 2953 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2954}
2955
ecb509ec 2956/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2957static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2958{
2959 int found;
2960
3d89194a 2961 if (iommu_dummy(dev))
1e4c64c4
DW
2962 return 1;
2963
2c2e2c38 2964 if (!iommu_identity_mapping)
1e4c64c4 2965 return 0;
2c2e2c38 2966
9b226624 2967 found = identity_mapping(dev);
2c2e2c38 2968 if (found) {
ecb509ec 2969 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
2970 return 1;
2971 else {
2972 /*
2973 * 32 bit DMA is removed from si_domain and fall back
2974 * to non-identity mapping.
2975 */
bf9c9eda 2976 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 2977 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 2978 dev_name(dev));
2c2e2c38
FY
2979 return 0;
2980 }
2981 } else {
2982 /*
2983 * In case of a detached 64 bit DMA device from vm, the device
2984 * is put into si_domain for identity mapping.
2985 */
ecb509ec 2986 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 2987 int ret;
5913c9bf 2988 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
2989 hw_pass_through ?
2990 CONTEXT_TT_PASS_THROUGH :
2991 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2992 if (!ret) {
2993 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 2994 dev_name(dev));
2c2e2c38
FY
2995 return 1;
2996 }
2997 }
2998 }
2999
1e4c64c4 3000 return 0;
2c2e2c38
FY
3001}
3002
5040a918 3003static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3004 size_t size, int dir, u64 dma_mask)
f76aec76 3005{
f76aec76 3006 struct dmar_domain *domain;
5b6985ce 3007 phys_addr_t start_paddr;
f76aec76
KA
3008 struct iova *iova;
3009 int prot = 0;
6865f0d1 3010 int ret;
8c11e798 3011 struct intel_iommu *iommu;
33041ec0 3012 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3013
3014 BUG_ON(dir == DMA_NONE);
2c2e2c38 3015
5040a918 3016 if (iommu_no_mapping(dev))
6865f0d1 3017 return paddr;
f76aec76 3018
5040a918 3019 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3020 if (!domain)
3021 return 0;
3022
8c11e798 3023 iommu = domain_get_iommu(domain);
88cb6a74 3024 size = aligned_nrpages(paddr, size);
f76aec76 3025
5040a918 3026 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3027 if (!iova)
3028 goto error;
3029
ba395927
KA
3030 /*
3031 * Check if DMAR supports zero-length reads on write only
3032 * mappings..
3033 */
3034 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3035 !cap_zlr(iommu->cap))
ba395927
KA
3036 prot |= DMA_PTE_READ;
3037 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3038 prot |= DMA_PTE_WRITE;
3039 /*
6865f0d1 3040 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3041 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3042 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3043 * is not a big problem
3044 */
0ab36de2 3045 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3046 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3047 if (ret)
3048 goto error;
3049
1f0ef2aa
DW
3050 /* it's a non-present to present mapping. Only flush if caching mode */
3051 if (cap_caching_mode(iommu->cap))
ea8ea460 3052 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3053 else
8c11e798 3054 iommu_flush_write_buffer(iommu);
f76aec76 3055
03d6a246
DW
3056 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3057 start_paddr += paddr & ~PAGE_MASK;
3058 return start_paddr;
ba395927 3059
ba395927 3060error:
f76aec76
KA
3061 if (iova)
3062 __free_iova(&domain->iovad, iova);
4cf2e75d 3063 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3064 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3065 return 0;
3066}
3067
ffbbef5c
FT
3068static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3069 unsigned long offset, size_t size,
3070 enum dma_data_direction dir,
3071 struct dma_attrs *attrs)
bb9e6d65 3072{
ffbbef5c 3073 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3074 dir, *dev->dma_mask);
bb9e6d65
FT
3075}
3076
5e0d2a6f 3077static void flush_unmaps(void)
3078{
80b20dd8 3079 int i, j;
5e0d2a6f 3080
5e0d2a6f 3081 timer_on = 0;
3082
3083 /* just flush them all */
3084 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3085 struct intel_iommu *iommu = g_iommus[i];
3086 if (!iommu)
3087 continue;
c42d9f32 3088
9dd2fe89
YZ
3089 if (!deferred_flush[i].next)
3090 continue;
3091
78d5f0f5
NA
3092 /* In caching mode, global flushes turn emulation expensive */
3093 if (!cap_caching_mode(iommu->cap))
3094 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3095 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3096 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3097 unsigned long mask;
3098 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3099 struct dmar_domain *domain = deferred_flush[i].domain[j];
3100
3101 /* On real hardware multiple invalidations are expensive */
3102 if (cap_caching_mode(iommu->cap))
3103 iommu_flush_iotlb_psi(iommu, domain->id,
ea8ea460
DW
3104 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3105 !deferred_flush[i].freelist[j], 0);
78d5f0f5
NA
3106 else {
3107 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3108 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3109 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3110 }
93a23a72 3111 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3112 if (deferred_flush[i].freelist[j])
3113 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3114 }
9dd2fe89 3115 deferred_flush[i].next = 0;
5e0d2a6f 3116 }
3117
5e0d2a6f 3118 list_size = 0;
5e0d2a6f 3119}
3120
3121static void flush_unmaps_timeout(unsigned long data)
3122{
80b20dd8 3123 unsigned long flags;
3124
3125 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3126 flush_unmaps();
80b20dd8 3127 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3128}
3129
ea8ea460 3130static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3131{
3132 unsigned long flags;
80b20dd8 3133 int next, iommu_id;
8c11e798 3134 struct intel_iommu *iommu;
5e0d2a6f 3135
3136 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3137 if (list_size == HIGH_WATER_MARK)
3138 flush_unmaps();
3139
8c11e798
WH
3140 iommu = domain_get_iommu(dom);
3141 iommu_id = iommu->seq_id;
c42d9f32 3142
80b20dd8 3143 next = deferred_flush[iommu_id].next;
3144 deferred_flush[iommu_id].domain[next] = dom;
3145 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3146 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3147 deferred_flush[iommu_id].next++;
5e0d2a6f 3148
3149 if (!timer_on) {
3150 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3151 timer_on = 1;
3152 }
3153 list_size++;
3154 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3155}
3156
ffbbef5c
FT
3157static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3158 size_t size, enum dma_data_direction dir,
3159 struct dma_attrs *attrs)
ba395927 3160{
f76aec76 3161 struct dmar_domain *domain;
d794dc9b 3162 unsigned long start_pfn, last_pfn;
ba395927 3163 struct iova *iova;
8c11e798 3164 struct intel_iommu *iommu;
ea8ea460 3165 struct page *freelist;
ba395927 3166
73676832 3167 if (iommu_no_mapping(dev))
f76aec76 3168 return;
2c2e2c38 3169
1525a29a 3170 domain = find_domain(dev);
ba395927
KA
3171 BUG_ON(!domain);
3172
8c11e798
WH
3173 iommu = domain_get_iommu(domain);
3174
ba395927 3175 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3176 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3177 (unsigned long long)dev_addr))
ba395927 3178 return;
ba395927 3179
d794dc9b
DW
3180 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3181 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3182
d794dc9b 3183 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3184 dev_name(dev), start_pfn, last_pfn);
ba395927 3185
ea8ea460 3186 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3187
5e0d2a6f 3188 if (intel_iommu_strict) {
03d6a246 3189 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3190 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3191 /* free iova */
3192 __free_iova(&domain->iovad, iova);
ea8ea460 3193 dma_free_pagelist(freelist);
5e0d2a6f 3194 } else {
ea8ea460 3195 add_unmap(domain, iova, freelist);
5e0d2a6f 3196 /*
3197 * queue up the release of the unmap to save the 1/6th of the
3198 * cpu used up by the iotlb flush operation...
3199 */
5e0d2a6f 3200 }
ba395927
KA
3201}
3202
5040a918 3203static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3204 dma_addr_t *dma_handle, gfp_t flags,
3205 struct dma_attrs *attrs)
ba395927 3206{
36746436 3207 struct page *page = NULL;
ba395927
KA
3208 int order;
3209
5b6985ce 3210 size = PAGE_ALIGN(size);
ba395927 3211 order = get_order(size);
e8bb910d 3212
5040a918 3213 if (!iommu_no_mapping(dev))
e8bb910d 3214 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3215 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3216 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3217 flags |= GFP_DMA;
3218 else
3219 flags |= GFP_DMA32;
3220 }
ba395927 3221
36746436
AM
3222 if (flags & __GFP_WAIT) {
3223 unsigned int count = size >> PAGE_SHIFT;
3224
3225 page = dma_alloc_from_contiguous(dev, count, order);
3226 if (page && iommu_no_mapping(dev) &&
3227 page_to_phys(page) + size > dev->coherent_dma_mask) {
3228 dma_release_from_contiguous(dev, page, count);
3229 page = NULL;
3230 }
3231 }
3232
3233 if (!page)
3234 page = alloc_pages(flags, order);
3235 if (!page)
ba395927 3236 return NULL;
36746436 3237 memset(page_address(page), 0, size);
ba395927 3238
36746436 3239 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3240 DMA_BIDIRECTIONAL,
5040a918 3241 dev->coherent_dma_mask);
ba395927 3242 if (*dma_handle)
36746436
AM
3243 return page_address(page);
3244 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3245 __free_pages(page, order);
3246
ba395927
KA
3247 return NULL;
3248}
3249
5040a918 3250static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3251 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3252{
3253 int order;
36746436 3254 struct page *page = virt_to_page(vaddr);
ba395927 3255
5b6985ce 3256 size = PAGE_ALIGN(size);
ba395927
KA
3257 order = get_order(size);
3258
5040a918 3259 intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
36746436
AM
3260 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3261 __free_pages(page, order);
ba395927
KA
3262}
3263
5040a918 3264static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3265 int nelems, enum dma_data_direction dir,
3266 struct dma_attrs *attrs)
ba395927 3267{
ba395927 3268 struct dmar_domain *domain;
d794dc9b 3269 unsigned long start_pfn, last_pfn;
f76aec76 3270 struct iova *iova;
8c11e798 3271 struct intel_iommu *iommu;
ea8ea460 3272 struct page *freelist;
ba395927 3273
5040a918 3274 if (iommu_no_mapping(dev))
ba395927
KA
3275 return;
3276
5040a918 3277 domain = find_domain(dev);
8c11e798
WH
3278 BUG_ON(!domain);
3279
3280 iommu = domain_get_iommu(domain);
ba395927 3281
c03ab37c 3282 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3283 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3284 (unsigned long long)sglist[0].dma_address))
f76aec76 3285 return;
f76aec76 3286
d794dc9b
DW
3287 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3288 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76 3289
ea8ea460 3290 freelist = domain_unmap(domain, start_pfn, last_pfn);
f76aec76 3291
acea0018
DW
3292 if (intel_iommu_strict) {
3293 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3294 last_pfn - start_pfn + 1, !freelist, 0);
acea0018
DW
3295 /* free iova */
3296 __free_iova(&domain->iovad, iova);
ea8ea460 3297 dma_free_pagelist(freelist);
acea0018 3298 } else {
ea8ea460 3299 add_unmap(domain, iova, freelist);
acea0018
DW
3300 /*
3301 * queue up the release of the unmap to save the 1/6th of the
3302 * cpu used up by the iotlb flush operation...
3303 */
3304 }
ba395927
KA
3305}
3306
ba395927 3307static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3308 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3309{
3310 int i;
c03ab37c 3311 struct scatterlist *sg;
ba395927 3312
c03ab37c 3313 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3314 BUG_ON(!sg_page(sg));
4cf2e75d 3315 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3316 sg->dma_length = sg->length;
ba395927
KA
3317 }
3318 return nelems;
3319}
3320
5040a918 3321static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3322 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3323{
ba395927 3324 int i;
ba395927 3325 struct dmar_domain *domain;
f76aec76
KA
3326 size_t size = 0;
3327 int prot = 0;
f76aec76
KA
3328 struct iova *iova = NULL;
3329 int ret;
c03ab37c 3330 struct scatterlist *sg;
b536d24d 3331 unsigned long start_vpfn;
8c11e798 3332 struct intel_iommu *iommu;
ba395927
KA
3333
3334 BUG_ON(dir == DMA_NONE);
5040a918
DW
3335 if (iommu_no_mapping(dev))
3336 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3337
5040a918 3338 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3339 if (!domain)
3340 return 0;
3341
8c11e798
WH
3342 iommu = domain_get_iommu(domain);
3343
b536d24d 3344 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3345 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3346
5040a918
DW
3347 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3348 *dev->dma_mask);
f76aec76 3349 if (!iova) {
c03ab37c 3350 sglist->dma_length = 0;
f76aec76
KA
3351 return 0;
3352 }
3353
3354 /*
3355 * Check if DMAR supports zero-length reads on write only
3356 * mappings..
3357 */
3358 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3359 !cap_zlr(iommu->cap))
f76aec76
KA
3360 prot |= DMA_PTE_READ;
3361 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3362 prot |= DMA_PTE_WRITE;
3363
b536d24d 3364 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3365
f532959b 3366 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3367 if (unlikely(ret)) {
3368 /* clear the page */
3369 dma_pte_clear_range(domain, start_vpfn,
3370 start_vpfn + size - 1);
3371 /* free page tables */
3372 dma_pte_free_pagetable(domain, start_vpfn,
3373 start_vpfn + size - 1);
3374 /* free iova */
3375 __free_iova(&domain->iovad, iova);
3376 return 0;
ba395927
KA
3377 }
3378
1f0ef2aa
DW
3379 /* it's a non-present to present mapping. Only flush if caching mode */
3380 if (cap_caching_mode(iommu->cap))
ea8ea460 3381 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3382 else
8c11e798 3383 iommu_flush_write_buffer(iommu);
1f0ef2aa 3384
ba395927
KA
3385 return nelems;
3386}
3387
dfb805e8
FT
3388static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3389{
3390 return !dma_addr;
3391}
3392
160c1d8e 3393struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3394 .alloc = intel_alloc_coherent,
3395 .free = intel_free_coherent,
ba395927
KA
3396 .map_sg = intel_map_sg,
3397 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3398 .map_page = intel_map_page,
3399 .unmap_page = intel_unmap_page,
dfb805e8 3400 .mapping_error = intel_mapping_error,
ba395927
KA
3401};
3402
3403static inline int iommu_domain_cache_init(void)
3404{
3405 int ret = 0;
3406
3407 iommu_domain_cache = kmem_cache_create("iommu_domain",
3408 sizeof(struct dmar_domain),
3409 0,
3410 SLAB_HWCACHE_ALIGN,
3411
3412 NULL);
3413 if (!iommu_domain_cache) {
3414 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3415 ret = -ENOMEM;
3416 }
3417
3418 return ret;
3419}
3420
3421static inline int iommu_devinfo_cache_init(void)
3422{
3423 int ret = 0;
3424
3425 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3426 sizeof(struct device_domain_info),
3427 0,
3428 SLAB_HWCACHE_ALIGN,
ba395927
KA
3429 NULL);
3430 if (!iommu_devinfo_cache) {
3431 printk(KERN_ERR "Couldn't create devinfo cache\n");
3432 ret = -ENOMEM;
3433 }
3434
3435 return ret;
3436}
3437
3438static inline int iommu_iova_cache_init(void)
3439{
3440 int ret = 0;
3441
3442 iommu_iova_cache = kmem_cache_create("iommu_iova",
3443 sizeof(struct iova),
3444 0,
3445 SLAB_HWCACHE_ALIGN,
ba395927
KA
3446 NULL);
3447 if (!iommu_iova_cache) {
3448 printk(KERN_ERR "Couldn't create iova cache\n");
3449 ret = -ENOMEM;
3450 }
3451
3452 return ret;
3453}
3454
3455static int __init iommu_init_mempool(void)
3456{
3457 int ret;
3458 ret = iommu_iova_cache_init();
3459 if (ret)
3460 return ret;
3461
3462 ret = iommu_domain_cache_init();
3463 if (ret)
3464 goto domain_error;
3465
3466 ret = iommu_devinfo_cache_init();
3467 if (!ret)
3468 return ret;
3469
3470 kmem_cache_destroy(iommu_domain_cache);
3471domain_error:
3472 kmem_cache_destroy(iommu_iova_cache);
3473
3474 return -ENOMEM;
3475}
3476
3477static void __init iommu_exit_mempool(void)
3478{
3479 kmem_cache_destroy(iommu_devinfo_cache);
3480 kmem_cache_destroy(iommu_domain_cache);
3481 kmem_cache_destroy(iommu_iova_cache);
3482
3483}
3484
556ab45f
DW
3485static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3486{
3487 struct dmar_drhd_unit *drhd;
3488 u32 vtbar;
3489 int rc;
3490
3491 /* We know that this device on this chipset has its own IOMMU.
3492 * If we find it under a different IOMMU, then the BIOS is lying
3493 * to us. Hope that the IOMMU for this device is actually
3494 * disabled, and it needs no translation...
3495 */
3496 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3497 if (rc) {
3498 /* "can't" happen */
3499 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3500 return;
3501 }
3502 vtbar &= 0xffff0000;
3503
3504 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3505 drhd = dmar_find_matched_drhd_unit(pdev);
3506 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3507 TAINT_FIRMWARE_WORKAROUND,
3508 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3509 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3510}
3511DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3512
ba395927
KA
3513static void __init init_no_remapping_devices(void)
3514{
3515 struct dmar_drhd_unit *drhd;
832bd858 3516 struct device *dev;
b683b230 3517 int i;
ba395927
KA
3518
3519 for_each_drhd_unit(drhd) {
3520 if (!drhd->include_all) {
b683b230
JL
3521 for_each_active_dev_scope(drhd->devices,
3522 drhd->devices_cnt, i, dev)
3523 break;
832bd858 3524 /* ignore DMAR unit if no devices exist */
ba395927
KA
3525 if (i == drhd->devices_cnt)
3526 drhd->ignored = 1;
3527 }
3528 }
3529
7c919779 3530 for_each_active_drhd_unit(drhd) {
7c919779 3531 if (drhd->include_all)
ba395927
KA
3532 continue;
3533
b683b230
JL
3534 for_each_active_dev_scope(drhd->devices,
3535 drhd->devices_cnt, i, dev)
832bd858 3536 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3537 break;
ba395927
KA
3538 if (i < drhd->devices_cnt)
3539 continue;
3540
c0771df8
DW
3541 /* This IOMMU has *only* gfx devices. Either bypass it or
3542 set the gfx_mapped flag, as appropriate */
3543 if (dmar_map_gfx) {
3544 intel_iommu_gfx_mapped = 1;
3545 } else {
3546 drhd->ignored = 1;
b683b230
JL
3547 for_each_active_dev_scope(drhd->devices,
3548 drhd->devices_cnt, i, dev)
832bd858 3549 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3550 }
3551 }
3552}
3553
f59c7b69
FY
3554#ifdef CONFIG_SUSPEND
3555static int init_iommu_hw(void)
3556{
3557 struct dmar_drhd_unit *drhd;
3558 struct intel_iommu *iommu = NULL;
3559
3560 for_each_active_iommu(iommu, drhd)
3561 if (iommu->qi)
3562 dmar_reenable_qi(iommu);
3563
b779260b
JC
3564 for_each_iommu(iommu, drhd) {
3565 if (drhd->ignored) {
3566 /*
3567 * we always have to disable PMRs or DMA may fail on
3568 * this device
3569 */
3570 if (force_on)
3571 iommu_disable_protect_mem_regions(iommu);
3572 continue;
3573 }
3574
f59c7b69
FY
3575 iommu_flush_write_buffer(iommu);
3576
3577 iommu_set_root_entry(iommu);
3578
3579 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3580 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3581 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3582 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3583 if (iommu_enable_translation(iommu))
3584 return 1;
b94996c9 3585 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3586 }
3587
3588 return 0;
3589}
3590
3591static void iommu_flush_all(void)
3592{
3593 struct dmar_drhd_unit *drhd;
3594 struct intel_iommu *iommu;
3595
3596 for_each_active_iommu(iommu, drhd) {
3597 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3598 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3599 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3600 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3601 }
3602}
3603
134fac3f 3604static int iommu_suspend(void)
f59c7b69
FY
3605{
3606 struct dmar_drhd_unit *drhd;
3607 struct intel_iommu *iommu = NULL;
3608 unsigned long flag;
3609
3610 for_each_active_iommu(iommu, drhd) {
3611 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3612 GFP_ATOMIC);
3613 if (!iommu->iommu_state)
3614 goto nomem;
3615 }
3616
3617 iommu_flush_all();
3618
3619 for_each_active_iommu(iommu, drhd) {
3620 iommu_disable_translation(iommu);
3621
1f5b3c3f 3622 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3623
3624 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3625 readl(iommu->reg + DMAR_FECTL_REG);
3626 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3627 readl(iommu->reg + DMAR_FEDATA_REG);
3628 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3629 readl(iommu->reg + DMAR_FEADDR_REG);
3630 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3631 readl(iommu->reg + DMAR_FEUADDR_REG);
3632
1f5b3c3f 3633 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3634 }
3635 return 0;
3636
3637nomem:
3638 for_each_active_iommu(iommu, drhd)
3639 kfree(iommu->iommu_state);
3640
3641 return -ENOMEM;
3642}
3643
134fac3f 3644static void iommu_resume(void)
f59c7b69
FY
3645{
3646 struct dmar_drhd_unit *drhd;
3647 struct intel_iommu *iommu = NULL;
3648 unsigned long flag;
3649
3650 if (init_iommu_hw()) {
b779260b
JC
3651 if (force_on)
3652 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3653 else
3654 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3655 return;
f59c7b69
FY
3656 }
3657
3658 for_each_active_iommu(iommu, drhd) {
3659
1f5b3c3f 3660 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3661
3662 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3663 iommu->reg + DMAR_FECTL_REG);
3664 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3665 iommu->reg + DMAR_FEDATA_REG);
3666 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3667 iommu->reg + DMAR_FEADDR_REG);
3668 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3669 iommu->reg + DMAR_FEUADDR_REG);
3670
1f5b3c3f 3671 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3672 }
3673
3674 for_each_active_iommu(iommu, drhd)
3675 kfree(iommu->iommu_state);
f59c7b69
FY
3676}
3677
134fac3f 3678static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3679 .resume = iommu_resume,
3680 .suspend = iommu_suspend,
3681};
3682
134fac3f 3683static void __init init_iommu_pm_ops(void)
f59c7b69 3684{
134fac3f 3685 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3686}
3687
3688#else
99592ba4 3689static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3690#endif /* CONFIG_PM */
3691
318fe7df
SS
3692
3693int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3694{
3695 struct acpi_dmar_reserved_memory *rmrr;
3696 struct dmar_rmrr_unit *rmrru;
3697
3698 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3699 if (!rmrru)
3700 return -ENOMEM;
3701
3702 rmrru->hdr = header;
3703 rmrr = (struct acpi_dmar_reserved_memory *)header;
3704 rmrru->base_address = rmrr->base_address;
3705 rmrru->end_address = rmrr->end_address;
2e455289
JL
3706 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3707 ((void *)rmrr) + rmrr->header.length,
3708 &rmrru->devices_cnt);
3709 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3710 kfree(rmrru);
3711 return -ENOMEM;
3712 }
318fe7df 3713
2e455289 3714 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3715
2e455289 3716 return 0;
318fe7df
SS
3717}
3718
318fe7df
SS
3719int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3720{
3721 struct acpi_dmar_atsr *atsr;
3722 struct dmar_atsr_unit *atsru;
3723
3724 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3725 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3726 if (!atsru)
3727 return -ENOMEM;
3728
3729 atsru->hdr = hdr;
3730 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3731 if (!atsru->include_all) {
3732 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3733 (void *)atsr + atsr->header.length,
3734 &atsru->devices_cnt);
3735 if (atsru->devices_cnt && atsru->devices == NULL) {
3736 kfree(atsru);
3737 return -ENOMEM;
3738 }
3739 }
318fe7df 3740
0e242612 3741 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3742
3743 return 0;
3744}
3745
9bdc531e
JL
3746static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3747{
3748 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3749 kfree(atsru);
3750}
3751
3752static void intel_iommu_free_dmars(void)
3753{
3754 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3755 struct dmar_atsr_unit *atsru, *atsr_n;
3756
3757 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3758 list_del(&rmrru->list);
3759 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3760 kfree(rmrru);
318fe7df
SS
3761 }
3762
9bdc531e
JL
3763 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3764 list_del(&atsru->list);
3765 intel_iommu_free_atsr(atsru);
3766 }
318fe7df
SS
3767}
3768
3769int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3770{
b683b230 3771 int i, ret = 1;
318fe7df 3772 struct pci_bus *bus;
832bd858
DW
3773 struct pci_dev *bridge = NULL;
3774 struct device *tmp;
318fe7df
SS
3775 struct acpi_dmar_atsr *atsr;
3776 struct dmar_atsr_unit *atsru;
3777
3778 dev = pci_physfn(dev);
318fe7df 3779 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3780 bridge = bus->self;
318fe7df 3781 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3782 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3783 return 0;
b5f82ddf 3784 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3785 break;
318fe7df 3786 }
b5f82ddf
JL
3787 if (!bridge)
3788 return 0;
318fe7df 3789
0e242612 3790 rcu_read_lock();
b5f82ddf
JL
3791 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3792 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3793 if (atsr->segment != pci_domain_nr(dev->bus))
3794 continue;
3795
b683b230 3796 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3797 if (tmp == &bridge->dev)
b683b230 3798 goto out;
b5f82ddf
JL
3799
3800 if (atsru->include_all)
b683b230 3801 goto out;
b5f82ddf 3802 }
b683b230
JL
3803 ret = 0;
3804out:
0e242612 3805 rcu_read_unlock();
318fe7df 3806
b683b230 3807 return ret;
318fe7df
SS
3808}
3809
59ce0515
JL
3810int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3811{
3812 int ret = 0;
3813 struct dmar_rmrr_unit *rmrru;
3814 struct dmar_atsr_unit *atsru;
3815 struct acpi_dmar_atsr *atsr;
3816 struct acpi_dmar_reserved_memory *rmrr;
3817
3818 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3819 return 0;
3820
3821 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3822 rmrr = container_of(rmrru->hdr,
3823 struct acpi_dmar_reserved_memory, header);
3824 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3825 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3826 ((void *)rmrr) + rmrr->header.length,
3827 rmrr->segment, rmrru->devices,
3828 rmrru->devices_cnt);
27e24950 3829 if(ret < 0)
59ce0515
JL
3830 return ret;
3831 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3832 dmar_remove_dev_scope(info, rmrr->segment,
3833 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3834 }
3835 }
3836
3837 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3838 if (atsru->include_all)
3839 continue;
3840
3841 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3842 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3843 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3844 (void *)atsr + atsr->header.length,
3845 atsr->segment, atsru->devices,
3846 atsru->devices_cnt);
3847 if (ret > 0)
3848 break;
3849 else if(ret < 0)
3850 return ret;
3851 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3852 if (dmar_remove_dev_scope(info, atsr->segment,
3853 atsru->devices, atsru->devices_cnt))
3854 break;
3855 }
3856 }
3857
3858 return 0;
3859}
3860
99dcaded
FY
3861/*
3862 * Here we only respond to action of unbound device from driver.
3863 *
3864 * Added device is not attached to its DMAR domain here yet. That will happen
3865 * when mapping the device to iova.
3866 */
3867static int device_notifier(struct notifier_block *nb,
3868 unsigned long action, void *data)
3869{
3870 struct device *dev = data;
99dcaded
FY
3871 struct dmar_domain *domain;
3872
3d89194a 3873 if (iommu_dummy(dev))
44cd613c
DW
3874 return 0;
3875
7e7dfab7
JL
3876 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3877 action != BUS_NOTIFY_DEL_DEVICE)
3878 return 0;
3879
1525a29a 3880 domain = find_domain(dev);
99dcaded
FY
3881 if (!domain)
3882 return 0;
3883
3a5670e8 3884 down_read(&dmar_global_lock);
bf9c9eda 3885 domain_remove_one_dev_info(domain, dev);
ab8dfe25 3886 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 3887 domain_exit(domain);
3a5670e8 3888 up_read(&dmar_global_lock);
a97590e5 3889
99dcaded
FY
3890 return 0;
3891}
3892
3893static struct notifier_block device_nb = {
3894 .notifier_call = device_notifier,
3895};
3896
75f05569
JL
3897static int intel_iommu_memory_notifier(struct notifier_block *nb,
3898 unsigned long val, void *v)
3899{
3900 struct memory_notify *mhp = v;
3901 unsigned long long start, end;
3902 unsigned long start_vpfn, last_vpfn;
3903
3904 switch (val) {
3905 case MEM_GOING_ONLINE:
3906 start = mhp->start_pfn << PAGE_SHIFT;
3907 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3908 if (iommu_domain_identity_map(si_domain, start, end)) {
3909 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3910 start, end);
3911 return NOTIFY_BAD;
3912 }
3913 break;
3914
3915 case MEM_OFFLINE:
3916 case MEM_CANCEL_ONLINE:
3917 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3918 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3919 while (start_vpfn <= last_vpfn) {
3920 struct iova *iova;
3921 struct dmar_drhd_unit *drhd;
3922 struct intel_iommu *iommu;
ea8ea460 3923 struct page *freelist;
75f05569
JL
3924
3925 iova = find_iova(&si_domain->iovad, start_vpfn);
3926 if (iova == NULL) {
3927 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3928 start_vpfn);
3929 break;
3930 }
3931
3932 iova = split_and_remove_iova(&si_domain->iovad, iova,
3933 start_vpfn, last_vpfn);
3934 if (iova == NULL) {
3935 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3936 start_vpfn, last_vpfn);
3937 return NOTIFY_BAD;
3938 }
3939
ea8ea460
DW
3940 freelist = domain_unmap(si_domain, iova->pfn_lo,
3941 iova->pfn_hi);
3942
75f05569
JL
3943 rcu_read_lock();
3944 for_each_active_iommu(iommu, drhd)
3945 iommu_flush_iotlb_psi(iommu, si_domain->id,
3946 iova->pfn_lo,
ea8ea460
DW
3947 iova->pfn_hi - iova->pfn_lo + 1,
3948 !freelist, 0);
75f05569 3949 rcu_read_unlock();
ea8ea460 3950 dma_free_pagelist(freelist);
75f05569
JL
3951
3952 start_vpfn = iova->pfn_hi + 1;
3953 free_iova_mem(iova);
3954 }
3955 break;
3956 }
3957
3958 return NOTIFY_OK;
3959}
3960
3961static struct notifier_block intel_iommu_memory_nb = {
3962 .notifier_call = intel_iommu_memory_notifier,
3963 .priority = 0
3964};
3965
a5459cfe
AW
3966
3967static ssize_t intel_iommu_show_version(struct device *dev,
3968 struct device_attribute *attr,
3969 char *buf)
3970{
3971 struct intel_iommu *iommu = dev_get_drvdata(dev);
3972 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3973 return sprintf(buf, "%d:%d\n",
3974 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3975}
3976static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3977
3978static ssize_t intel_iommu_show_address(struct device *dev,
3979 struct device_attribute *attr,
3980 char *buf)
3981{
3982 struct intel_iommu *iommu = dev_get_drvdata(dev);
3983 return sprintf(buf, "%llx\n", iommu->reg_phys);
3984}
3985static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3986
3987static ssize_t intel_iommu_show_cap(struct device *dev,
3988 struct device_attribute *attr,
3989 char *buf)
3990{
3991 struct intel_iommu *iommu = dev_get_drvdata(dev);
3992 return sprintf(buf, "%llx\n", iommu->cap);
3993}
3994static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3995
3996static ssize_t intel_iommu_show_ecap(struct device *dev,
3997 struct device_attribute *attr,
3998 char *buf)
3999{
4000 struct intel_iommu *iommu = dev_get_drvdata(dev);
4001 return sprintf(buf, "%llx\n", iommu->ecap);
4002}
4003static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4004
4005static struct attribute *intel_iommu_attrs[] = {
4006 &dev_attr_version.attr,
4007 &dev_attr_address.attr,
4008 &dev_attr_cap.attr,
4009 &dev_attr_ecap.attr,
4010 NULL,
4011};
4012
4013static struct attribute_group intel_iommu_group = {
4014 .name = "intel-iommu",
4015 .attrs = intel_iommu_attrs,
4016};
4017
4018const struct attribute_group *intel_iommu_groups[] = {
4019 &intel_iommu_group,
4020 NULL,
4021};
4022
ba395927
KA
4023int __init intel_iommu_init(void)
4024{
9bdc531e 4025 int ret = -ENODEV;
3a93c841 4026 struct dmar_drhd_unit *drhd;
7c919779 4027 struct intel_iommu *iommu;
ba395927 4028
a59b50e9
JC
4029 /* VT-d is required for a TXT/tboot launch, so enforce that */
4030 force_on = tboot_force_iommu();
4031
3a5670e8
JL
4032 if (iommu_init_mempool()) {
4033 if (force_on)
4034 panic("tboot: Failed to initialize iommu memory\n");
4035 return -ENOMEM;
4036 }
4037
4038 down_write(&dmar_global_lock);
a59b50e9
JC
4039 if (dmar_table_init()) {
4040 if (force_on)
4041 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4042 goto out_free_dmar;
a59b50e9 4043 }
ba395927 4044
3a93c841
TI
4045 /*
4046 * Disable translation if already enabled prior to OS handover.
4047 */
7c919779 4048 for_each_active_iommu(iommu, drhd)
3a93c841
TI
4049 if (iommu->gcmd & DMA_GCMD_TE)
4050 iommu_disable_translation(iommu);
3a93c841 4051
c2c7286a 4052 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4053 if (force_on)
4054 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4055 goto out_free_dmar;
a59b50e9 4056 }
1886e8a9 4057
75f1cdf1 4058 if (no_iommu || dmar_disabled)
9bdc531e 4059 goto out_free_dmar;
2ae21010 4060
318fe7df
SS
4061 if (list_empty(&dmar_rmrr_units))
4062 printk(KERN_INFO "DMAR: No RMRR found\n");
4063
4064 if (list_empty(&dmar_atsr_units))
4065 printk(KERN_INFO "DMAR: No ATSR found\n");
4066
51a63e67
JC
4067 if (dmar_init_reserved_ranges()) {
4068 if (force_on)
4069 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4070 goto out_free_reserved_range;
51a63e67 4071 }
ba395927
KA
4072
4073 init_no_remapping_devices();
4074
b779260b 4075 ret = init_dmars();
ba395927 4076 if (ret) {
a59b50e9
JC
4077 if (force_on)
4078 panic("tboot: Failed to initialize DMARs\n");
ba395927 4079 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4080 goto out_free_reserved_range;
ba395927 4081 }
3a5670e8 4082 up_write(&dmar_global_lock);
ba395927
KA
4083 printk(KERN_INFO
4084 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4085
5e0d2a6f 4086 init_timer(&unmap_timer);
75f1cdf1
FT
4087#ifdef CONFIG_SWIOTLB
4088 swiotlb = 0;
4089#endif
19943b0e 4090 dma_ops = &intel_dma_ops;
4ed0d3e6 4091
134fac3f 4092 init_iommu_pm_ops();
a8bcbb0d 4093
a5459cfe
AW
4094 for_each_active_iommu(iommu, drhd)
4095 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4096 intel_iommu_groups,
4097 iommu->name);
4098
4236d97d 4099 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4100 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4101 if (si_domain && !hw_pass_through)
4102 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4103
8bc1f85c
ED
4104 intel_iommu_enabled = 1;
4105
ba395927 4106 return 0;
9bdc531e
JL
4107
4108out_free_reserved_range:
4109 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4110out_free_dmar:
4111 intel_iommu_free_dmars();
3a5670e8
JL
4112 up_write(&dmar_global_lock);
4113 iommu_exit_mempool();
9bdc531e 4114 return ret;
ba395927 4115}
e820482c 4116
579305f7
AW
4117static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4118{
4119 struct intel_iommu *iommu = opaque;
4120
4121 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4122 return 0;
4123}
4124
4125/*
4126 * NB - intel-iommu lacks any sort of reference counting for the users of
4127 * dependent devices. If multiple endpoints have intersecting dependent
4128 * devices, unbinding the driver from any one of them will possibly leave
4129 * the others unable to operate.
4130 */
3199aa6b 4131static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4132 struct device *dev)
3199aa6b 4133{
0bcb3e28 4134 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4135 return;
4136
579305f7 4137 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
3199aa6b
HW
4138}
4139
2c2e2c38 4140static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4141 struct device *dev)
c7151a8d 4142{
bca2b916 4143 struct device_domain_info *info, *tmp;
c7151a8d
WH
4144 struct intel_iommu *iommu;
4145 unsigned long flags;
4146 int found = 0;
156baca8 4147 u8 bus, devfn;
c7151a8d 4148
bf9c9eda 4149 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4150 if (!iommu)
4151 return;
4152
4153 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4154 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4155 if (info->iommu == iommu && info->bus == bus &&
4156 info->devfn == devfn) {
109b9b04 4157 unlink_domain_info(info);
c7151a8d
WH
4158 spin_unlock_irqrestore(&device_domain_lock, flags);
4159
93a23a72 4160 iommu_disable_dev_iotlb(info);
c7151a8d 4161 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4162 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4163 free_devinfo_mem(info);
4164
4165 spin_lock_irqsave(&device_domain_lock, flags);
4166
4167 if (found)
4168 break;
4169 else
4170 continue;
4171 }
4172
4173 /* if there is no other devices under the same iommu
4174 * owned by this domain, clear this iommu in iommu_bmp
4175 * update iommu count and coherency
4176 */
8bbc4410 4177 if (info->iommu == iommu)
c7151a8d
WH
4178 found = 1;
4179 }
4180
3e7abe25
RD
4181 spin_unlock_irqrestore(&device_domain_lock, flags);
4182
c7151a8d 4183 if (found == 0) {
fb170fb4
JL
4184 domain_detach_iommu(domain, iommu);
4185 if (!domain_type_is_vm_or_si(domain))
4186 iommu_detach_domain(domain, iommu);
c7151a8d 4187 }
c7151a8d
WH
4188}
4189
2c2e2c38 4190static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4191{
4192 int adjust_width;
4193
4194 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4195 domain_reserve_special_ranges(domain);
4196
4197 /* calculate AGAW */
4198 domain->gaw = guest_width;
4199 adjust_width = guestwidth_to_adjustwidth(guest_width);
4200 domain->agaw = width_to_agaw(adjust_width);
4201
5e98c4b1 4202 domain->iommu_coherency = 0;
c5b15255 4203 domain->iommu_snooping = 0;
6dd9a7c7 4204 domain->iommu_superpage = 0;
fe40f1e0 4205 domain->max_addr = 0;
5e98c4b1
WH
4206
4207 /* always allocate the top pgd */
4c923d47 4208 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4209 if (!domain->pgd)
4210 return -ENOMEM;
4211 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4212 return 0;
4213}
4214
5d450806 4215static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4216{
5d450806 4217 struct dmar_domain *dmar_domain;
38717946 4218
ab8dfe25 4219 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4220 if (!dmar_domain) {
38717946 4221 printk(KERN_ERR
5d450806
JR
4222 "intel_iommu_domain_init: dmar_domain == NULL\n");
4223 return -ENOMEM;
38717946 4224 }
2c2e2c38 4225 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4226 printk(KERN_ERR
5d450806 4227 "intel_iommu_domain_init() failed\n");
92d03cc8 4228 domain_exit(dmar_domain);
5d450806 4229 return -ENOMEM;
38717946 4230 }
8140a95d 4231 domain_update_iommu_cap(dmar_domain);
5d450806 4232 domain->priv = dmar_domain;
faa3d6f5 4233
8a0e715b
JR
4234 domain->geometry.aperture_start = 0;
4235 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4236 domain->geometry.force_aperture = true;
4237
5d450806 4238 return 0;
38717946 4239}
38717946 4240
5d450806 4241static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4242{
5d450806
JR
4243 struct dmar_domain *dmar_domain = domain->priv;
4244
4245 domain->priv = NULL;
92d03cc8 4246 domain_exit(dmar_domain);
38717946 4247}
38717946 4248
4c5478c9
JR
4249static int intel_iommu_attach_device(struct iommu_domain *domain,
4250 struct device *dev)
38717946 4251{
4c5478c9 4252 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4253 struct intel_iommu *iommu;
4254 int addr_width;
156baca8 4255 u8 bus, devfn;
faa3d6f5 4256
7207d8f9
DW
4257 /* normally dev is not mapped */
4258 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4259 struct dmar_domain *old_domain;
4260
1525a29a 4261 old_domain = find_domain(dev);
faa3d6f5 4262 if (old_domain) {
ab8dfe25 4263 if (domain_type_is_vm_or_si(dmar_domain))
bf9c9eda 4264 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4265 else
4266 domain_remove_dev_info(old_domain);
4267 }
4268 }
4269
156baca8 4270 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4271 if (!iommu)
4272 return -ENODEV;
4273
4274 /* check if this iommu agaw is sufficient for max mapped address */
4275 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4276 if (addr_width > cap_mgaw(iommu->cap))
4277 addr_width = cap_mgaw(iommu->cap);
4278
4279 if (dmar_domain->max_addr > (1LL << addr_width)) {
4280 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4281 "sufficient for the mapped address (%llx)\n",
a99c47a2 4282 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4283 return -EFAULT;
4284 }
a99c47a2
TL
4285 dmar_domain->gaw = addr_width;
4286
4287 /*
4288 * Knock out extra levels of page tables if necessary
4289 */
4290 while (iommu->agaw < dmar_domain->agaw) {
4291 struct dma_pte *pte;
4292
4293 pte = dmar_domain->pgd;
4294 if (dma_pte_present(pte)) {
25cbff16
SY
4295 dmar_domain->pgd = (struct dma_pte *)
4296 phys_to_virt(dma_pte_addr(pte));
7a661013 4297 free_pgtable_page(pte);
a99c47a2
TL
4298 }
4299 dmar_domain->agaw--;
4300 }
fe40f1e0 4301
5913c9bf 4302 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4303}
38717946 4304
4c5478c9
JR
4305static void intel_iommu_detach_device(struct iommu_domain *domain,
4306 struct device *dev)
38717946 4307{
4c5478c9 4308 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4309
bf9c9eda 4310 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4311}
c7151a8d 4312
b146a1c9
JR
4313static int intel_iommu_map(struct iommu_domain *domain,
4314 unsigned long iova, phys_addr_t hpa,
5009065d 4315 size_t size, int iommu_prot)
faa3d6f5 4316{
dde57a21 4317 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4318 u64 max_addr;
dde57a21 4319 int prot = 0;
faa3d6f5 4320 int ret;
fe40f1e0 4321
dde57a21
JR
4322 if (iommu_prot & IOMMU_READ)
4323 prot |= DMA_PTE_READ;
4324 if (iommu_prot & IOMMU_WRITE)
4325 prot |= DMA_PTE_WRITE;
9cf06697
SY
4326 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4327 prot |= DMA_PTE_SNP;
dde57a21 4328
163cc52c 4329 max_addr = iova + size;
dde57a21 4330 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4331 u64 end;
4332
4333 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4334 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4335 if (end < max_addr) {
8954da1f 4336 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4337 "sufficient for the mapped address (%llx)\n",
8954da1f 4338 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4339 return -EFAULT;
4340 }
dde57a21 4341 dmar_domain->max_addr = max_addr;
fe40f1e0 4342 }
ad051221
DW
4343 /* Round up size to next multiple of PAGE_SIZE, if it and
4344 the low bits of hpa would take us onto the next page */
88cb6a74 4345 size = aligned_nrpages(hpa, size);
ad051221
DW
4346 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4347 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4348 return ret;
38717946 4349}
38717946 4350
5009065d 4351static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4352 unsigned long iova, size_t size)
38717946 4353{
dde57a21 4354 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4355 struct page *freelist = NULL;
4356 struct intel_iommu *iommu;
4357 unsigned long start_pfn, last_pfn;
4358 unsigned int npages;
4359 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4360
4361 /* Cope with horrid API which requires us to unmap more than the
4362 size argument if it happens to be a large-page mapping. */
4363 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4364 BUG();
4365
4366 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4367 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4368
ea8ea460
DW
4369 start_pfn = iova >> VTD_PAGE_SHIFT;
4370 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4371
4372 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4373
4374 npages = last_pfn - start_pfn + 1;
4375
4376 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4377 iommu = g_iommus[iommu_id];
4378
4379 /*
4380 * find bit position of dmar_domain
4381 */
4382 ndomains = cap_ndoms(iommu->cap);
4383 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4384 if (iommu->domains[num] == dmar_domain)
4385 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4386 npages, !freelist, 0);
4387 }
4388
4389 }
4390
4391 dma_free_pagelist(freelist);
fe40f1e0 4392
163cc52c
DW
4393 if (dmar_domain->max_addr == iova + size)
4394 dmar_domain->max_addr = iova;
b146a1c9 4395
5cf0a76f 4396 return size;
38717946 4397}
38717946 4398
d14d6577 4399static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4400 dma_addr_t iova)
38717946 4401{
d14d6577 4402 struct dmar_domain *dmar_domain = domain->priv;
38717946 4403 struct dma_pte *pte;
5cf0a76f 4404 int level = 0;
faa3d6f5 4405 u64 phys = 0;
38717946 4406
5cf0a76f 4407 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4408 if (pte)
faa3d6f5 4409 phys = dma_pte_addr(pte);
38717946 4410
faa3d6f5 4411 return phys;
38717946 4412}
a8bcbb0d 4413
dbb9fd86
SY
4414static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4415 unsigned long cap)
4416{
4417 struct dmar_domain *dmar_domain = domain->priv;
4418
4419 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4420 return dmar_domain->iommu_snooping;
323f99cb 4421 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4422 return irq_remapping_enabled;
dbb9fd86
SY
4423
4424 return 0;
4425}
4426
abdfdde2
AW
4427static int intel_iommu_add_device(struct device *dev)
4428{
a5459cfe 4429 struct intel_iommu *iommu;
abdfdde2 4430 struct iommu_group *group;
156baca8 4431 u8 bus, devfn;
70ae6f0d 4432
a5459cfe
AW
4433 iommu = device_to_iommu(dev, &bus, &devfn);
4434 if (!iommu)
70ae6f0d
AW
4435 return -ENODEV;
4436
a5459cfe 4437 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4438
e17f9ff4 4439 group = iommu_group_get_for_dev(dev);
783f157b 4440
e17f9ff4
AW
4441 if (IS_ERR(group))
4442 return PTR_ERR(group);
bcb71abe 4443
abdfdde2 4444 iommu_group_put(group);
e17f9ff4 4445 return 0;
abdfdde2 4446}
70ae6f0d 4447
abdfdde2
AW
4448static void intel_iommu_remove_device(struct device *dev)
4449{
a5459cfe
AW
4450 struct intel_iommu *iommu;
4451 u8 bus, devfn;
4452
4453 iommu = device_to_iommu(dev, &bus, &devfn);
4454 if (!iommu)
4455 return;
4456
abdfdde2 4457 iommu_group_remove_device(dev);
a5459cfe
AW
4458
4459 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4460}
4461
b22f6434 4462static const struct iommu_ops intel_iommu_ops = {
a8bcbb0d
JR
4463 .domain_init = intel_iommu_domain_init,
4464 .domain_destroy = intel_iommu_domain_destroy,
4465 .attach_dev = intel_iommu_attach_device,
4466 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4467 .map = intel_iommu_map,
4468 .unmap = intel_iommu_unmap,
a8bcbb0d 4469 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4470 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4471 .add_device = intel_iommu_add_device,
4472 .remove_device = intel_iommu_remove_device,
6d1c56a9 4473 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4474};
9af88143 4475
9452618e
DV
4476static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4477{
4478 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4479 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4480 dmar_map_gfx = 0;
4481}
4482
4483DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4484DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4485DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4486DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4487DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4488DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4489DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4490
d34d6517 4491static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4492{
4493 /*
4494 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4495 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4496 */
4497 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4498 rwbf_quirk = 1;
4499}
4500
4501DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4502DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4503DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4505DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4506DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4507DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4508
eecfd57f
AJ
4509#define GGC 0x52
4510#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4511#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4512#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4513#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4514#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4515#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4516#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4517#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4518
d34d6517 4519static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4520{
4521 unsigned short ggc;
4522
eecfd57f 4523 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4524 return;
4525
eecfd57f 4526 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4527 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4528 dmar_map_gfx = 0;
6fbcfb3e
DW
4529 } else if (dmar_map_gfx) {
4530 /* we have to ensure the gfx device is idle before we flush */
4531 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4532 intel_iommu_strict = 1;
4533 }
9eecabcb
DW
4534}
4535DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4536DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4537DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4538DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4539
e0fc7e0b
DW
4540/* On Tylersburg chipsets, some BIOSes have been known to enable the
4541 ISOCH DMAR unit for the Azalia sound device, but not give it any
4542 TLB entries, which causes it to deadlock. Check for that. We do
4543 this in a function called from init_dmars(), instead of in a PCI
4544 quirk, because we don't want to print the obnoxious "BIOS broken"
4545 message if VT-d is actually disabled.
4546*/
4547static void __init check_tylersburg_isoch(void)
4548{
4549 struct pci_dev *pdev;
4550 uint32_t vtisochctrl;
4551
4552 /* If there's no Azalia in the system anyway, forget it. */
4553 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4554 if (!pdev)
4555 return;
4556 pci_dev_put(pdev);
4557
4558 /* System Management Registers. Might be hidden, in which case
4559 we can't do the sanity check. But that's OK, because the
4560 known-broken BIOSes _don't_ actually hide it, so far. */
4561 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4562 if (!pdev)
4563 return;
4564
4565 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4566 pci_dev_put(pdev);
4567 return;
4568 }
4569
4570 pci_dev_put(pdev);
4571
4572 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4573 if (vtisochctrl & 1)
4574 return;
4575
4576 /* Drop all bits other than the number of TLB entries */
4577 vtisochctrl &= 0x1c;
4578
4579 /* If we have the recommended number of TLB entries (16), fine. */
4580 if (vtisochctrl == 0x10)
4581 return;
4582
4583 /* Zero TLB entries? You get to ride the short bus to school. */
4584 if (!vtisochctrl) {
4585 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4586 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4587 dmi_get_system_info(DMI_BIOS_VENDOR),
4588 dmi_get_system_info(DMI_BIOS_VERSION),
4589 dmi_get_system_info(DMI_PRODUCT_VERSION));
4590 iommu_identity_mapping |= IDENTMAP_AZALIA;
4591 return;
4592 }
4593
4594 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4595 vtisochctrl);
4596}