]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/iommu/intel-iommu.c
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 288
[mirror_ubuntu-focal-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
2025cf9e 1// SPDX-License-Identifier: GPL-2.0-only
ba395927 2/*
ea8ea460 3 * Copyright © 2006-2014 Intel Corporation.
ba395927 4 *
ea8ea460
DW
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 10 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
11 */
12
9f10e5bf 13#define pr_fmt(fmt) "DMAR: " fmt
932a6523 14#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 15
ba395927
KA
16#include <linux/init.h>
17#include <linux/bitmap.h>
5e0d2a6f 18#include <linux/debugfs.h>
54485c30 19#include <linux/export.h>
ba395927
KA
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
ba395927
KA
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
75f05569 28#include <linux/memory.h>
aa473240 29#include <linux/cpu.h>
5e0d2a6f 30#include <linux/timer.h>
dfddb969 31#include <linux/io.h>
38717946 32#include <linux/iova.h>
5d450806 33#include <linux/iommu.h>
38717946 34#include <linux/intel-iommu.h>
134fac3f 35#include <linux/syscore_ops.h>
69575d38 36#include <linux/tboot.h>
adb2fe02 37#include <linux/dmi.h>
5cdede24 38#include <linux/pci-ats.h>
0ee332c1 39#include <linux/memblock.h>
36746436 40#include <linux/dma-contiguous.h>
fec777c3 41#include <linux/dma-direct.h>
091d42e4 42#include <linux/crash_dump.h>
98fa15f3 43#include <linux/numa.h>
8a8f422d 44#include <asm/irq_remapping.h>
ba395927 45#include <asm/cacheflush.h>
46a7fa27 46#include <asm/iommu.h>
ba395927 47
078e1ee2 48#include "irq_remapping.h"
56283174 49#include "intel-pasid.h"
078e1ee2 50
5b6985ce
FY
51#define ROOT_SIZE VTD_PAGE_SIZE
52#define CONTEXT_SIZE VTD_PAGE_SIZE
53
ba395927 54#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 55#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
5e3b4a15 63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 64
4ed0d3e6 65#define MAX_AGAW_WIDTH 64
5c645b35 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 67
2ebe3151
DW
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 76
1b722500
RM
77/* IO virtual address start page frame number */
78#define IOVA_START_PFN (1)
79
f27be03b 80#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 81
df08cdc7
AM
82/* page table handling */
83#define LEVEL_STRIDE (9)
84#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
85
6d1c56a9
OBC
86/*
87 * This bitmap is used to advertise the page sizes our hardware support
88 * to the IOMMU core, which will then use this information to split
89 * physically contiguous memory regions it is mapping into page sizes
90 * that we support.
91 *
92 * Traditionally the IOMMU core just handed us the mappings directly,
93 * after making sure the size is an order of a 4KiB page and that the
94 * mapping has natural alignment.
95 *
96 * To retain this behavior, we currently advertise that we support
97 * all page sizes that are an order of 4KiB.
98 *
99 * If at some point we'd like to utilize the IOMMU core's new behavior,
100 * we could change this to advertise the real page sizes we support.
101 */
102#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
103
df08cdc7
AM
104static inline int agaw_to_level(int agaw)
105{
106 return agaw + 2;
107}
108
109static inline int agaw_to_width(int agaw)
110{
5c645b35 111 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
112}
113
114static inline int width_to_agaw(int width)
115{
5c645b35 116 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
117}
118
119static inline unsigned int level_to_offset_bits(int level)
120{
121 return (level - 1) * LEVEL_STRIDE;
122}
123
124static inline int pfn_level_offset(unsigned long pfn, int level)
125{
126 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
127}
128
129static inline unsigned long level_mask(int level)
130{
131 return -1UL << level_to_offset_bits(level);
132}
133
134static inline unsigned long level_size(int level)
135{
136 return 1UL << level_to_offset_bits(level);
137}
138
139static inline unsigned long align_to_level(unsigned long pfn, int level)
140{
141 return (pfn + level_size(level) - 1) & level_mask(level);
142}
fd18de50 143
6dd9a7c7
YS
144static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
145{
5c645b35 146 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
147}
148
dd4e8319
DW
149/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
150 are never going to work. */
151static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
152{
153 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
154}
155
156static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
157{
158 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
159}
160static inline unsigned long page_to_dma_pfn(struct page *pg)
161{
162 return mm_to_dma_pfn(page_to_pfn(pg));
163}
164static inline unsigned long virt_to_dma_pfn(void *p)
165{
166 return page_to_dma_pfn(virt_to_page(p));
167}
168
d9630fe9
WH
169/* global iommu list, set NULL for ignored DMAR units */
170static struct intel_iommu **g_iommus;
171
e0fc7e0b 172static void __init check_tylersburg_isoch(void);
9af88143
DW
173static int rwbf_quirk;
174
b779260b
JC
175/*
176 * set to 1 to panic kernel if can't successfully enable VT-d
177 * (used when kernel is launched w/ TXT)
178 */
179static int force_on = 0;
bfd20f1c 180int intel_iommu_tboot_noforce;
89a6079d 181static int no_platform_optin;
b779260b 182
46b08e1a 183#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 184
091d42e4
JR
185/*
186 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
187 * if marked present.
188 */
189static phys_addr_t root_entry_lctp(struct root_entry *re)
190{
191 if (!(re->lo & 1))
192 return 0;
193
194 return re->lo & VTD_PAGE_MASK;
195}
196
197/*
198 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_uctp(struct root_entry *re)
202{
203 if (!(re->hi & 1))
204 return 0;
46b08e1a 205
091d42e4
JR
206 return re->hi & VTD_PAGE_MASK;
207}
c07e7d21 208
cf484d0e
JR
209static inline void context_clear_pasid_enable(struct context_entry *context)
210{
211 context->lo &= ~(1ULL << 11);
212}
213
214static inline bool context_pasid_enabled(struct context_entry *context)
215{
216 return !!(context->lo & (1ULL << 11));
217}
218
219static inline void context_set_copied(struct context_entry *context)
220{
221 context->hi |= (1ull << 3);
222}
223
224static inline bool context_copied(struct context_entry *context)
225{
226 return !!(context->hi & (1ULL << 3));
227}
228
229static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
230{
231 return (context->lo & 1);
232}
cf484d0e 233
26b86092 234bool context_present(struct context_entry *context)
cf484d0e
JR
235{
236 return context_pasid_enabled(context) ?
237 __context_present(context) :
238 __context_present(context) && !context_copied(context);
239}
240
c07e7d21
MM
241static inline void context_set_present(struct context_entry *context)
242{
243 context->lo |= 1;
244}
245
246static inline void context_set_fault_enable(struct context_entry *context)
247{
248 context->lo &= (((u64)-1) << 2) | 1;
249}
250
c07e7d21
MM
251static inline void context_set_translation_type(struct context_entry *context,
252 unsigned long value)
253{
254 context->lo &= (((u64)-1) << 4) | 3;
255 context->lo |= (value & 3) << 2;
256}
257
258static inline void context_set_address_root(struct context_entry *context,
259 unsigned long value)
260{
1a2262f9 261 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
262 context->lo |= value & VTD_PAGE_MASK;
263}
264
265static inline void context_set_address_width(struct context_entry *context,
266 unsigned long value)
267{
268 context->hi |= value & 7;
269}
270
271static inline void context_set_domain_id(struct context_entry *context,
272 unsigned long value)
273{
274 context->hi |= (value & ((1 << 16) - 1)) << 8;
275}
276
dbcd861f
JR
277static inline int context_domain_id(struct context_entry *c)
278{
279 return((c->hi >> 8) & 0xffff);
280}
281
c07e7d21
MM
282static inline void context_clear_entry(struct context_entry *context)
283{
284 context->lo = 0;
285 context->hi = 0;
286}
7a8fc25e 287
2c2e2c38
FY
288/*
289 * This domain is a statically identity mapping domain.
290 * 1. This domain creats a static 1:1 mapping to all usable memory.
291 * 2. It maps to each iommu if successful.
292 * 3. Each iommu mapps to this domain if successful.
293 */
19943b0e
DW
294static struct dmar_domain *si_domain;
295static int hw_pass_through = 1;
2c2e2c38 296
28ccce0d
JR
297/*
298 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
299 * across iommus may be owned in one domain, e.g. kvm guest.
300 */
ab8dfe25 301#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 302
2c2e2c38 303/* si_domain contains mulitple devices */
ab8dfe25 304#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 305
29a27719
JR
306#define for_each_domain_iommu(idx, domain) \
307 for (idx = 0; idx < g_num_of_iommus; idx++) \
308 if (domain->iommu_refcnt[idx])
309
b94e4117
JL
310struct dmar_rmrr_unit {
311 struct list_head list; /* list of rmrr units */
312 struct acpi_dmar_header *hdr; /* ACPI header */
313 u64 base_address; /* reserved base address*/
314 u64 end_address; /* reserved end address */
832bd858 315 struct dmar_dev_scope *devices; /* target devices */
b94e4117 316 int devices_cnt; /* target device count */
0659b8dc 317 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
318};
319
320struct dmar_atsr_unit {
321 struct list_head list; /* list of ATSR units */
322 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 323 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
324 int devices_cnt; /* target device count */
325 u8 include_all:1; /* include all ports */
326};
327
328static LIST_HEAD(dmar_atsr_units);
329static LIST_HEAD(dmar_rmrr_units);
330
331#define for_each_rmrr_units(rmrr) \
332 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
333
5e0d2a6f 334/* bitmap for indexing intel_iommus */
5e0d2a6f 335static int g_num_of_iommus;
336
92d03cc8 337static void domain_exit(struct dmar_domain *domain);
ba395927 338static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 339static void dmar_remove_one_dev_info(struct device *dev);
127c7615 340static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
341static void domain_context_clear(struct intel_iommu *iommu,
342 struct device *dev);
2a46ddf7
JL
343static int domain_detach_iommu(struct dmar_domain *domain,
344 struct intel_iommu *iommu);
ba395927 345
d3f13810 346#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
347int dmar_disabled = 0;
348#else
349int dmar_disabled = 1;
d3f13810 350#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 351
8bc1f85c
ED
352int intel_iommu_enabled = 0;
353EXPORT_SYMBOL_GPL(intel_iommu_enabled);
354
2d9e667e 355static int dmar_map_gfx = 1;
7d3b03ce 356static int dmar_forcedac;
5e0d2a6f 357static int intel_iommu_strict;
6dd9a7c7 358static int intel_iommu_superpage = 1;
8950dcd8 359static int intel_iommu_sm;
ae853ddb 360static int iommu_identity_mapping;
c83b2f20 361
ae853ddb
DW
362#define IDENTMAP_ALL 1
363#define IDENTMAP_GFX 2
364#define IDENTMAP_AZALIA 4
c83b2f20 365
765b6a98
LB
366#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
367#define pasid_supported(iommu) (sm_supported(iommu) && \
368 ecap_pasid((iommu)->ecap))
ba395927 369
c0771df8
DW
370int intel_iommu_gfx_mapped;
371EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
372
ba395927
KA
373#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
374static DEFINE_SPINLOCK(device_domain_lock);
375static LIST_HEAD(device_domain_list);
376
85319dcc
LB
377/*
378 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 379 * callback @fn against each element.
85319dcc
LB
380 */
381int for_each_device_domain(int (*fn)(struct device_domain_info *info,
382 void *data), void *data)
383{
384 int ret = 0;
0bbeb01a 385 unsigned long flags;
85319dcc
LB
386 struct device_domain_info *info;
387
0bbeb01a 388 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
389 list_for_each_entry(info, &device_domain_list, global) {
390 ret = fn(info, data);
0bbeb01a
LB
391 if (ret) {
392 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 393 return ret;
0bbeb01a 394 }
85319dcc 395 }
0bbeb01a 396 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
397
398 return 0;
399}
400
b0119e87 401const struct iommu_ops intel_iommu_ops;
a8bcbb0d 402
4158c2ec
JR
403static bool translation_pre_enabled(struct intel_iommu *iommu)
404{
405 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
406}
407
091d42e4
JR
408static void clear_translation_pre_enabled(struct intel_iommu *iommu)
409{
410 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
411}
412
4158c2ec
JR
413static void init_translation_status(struct intel_iommu *iommu)
414{
415 u32 gsts;
416
417 gsts = readl(iommu->reg + DMAR_GSTS_REG);
418 if (gsts & DMA_GSTS_TES)
419 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
420}
421
00a77deb
JR
422/* Convert generic 'struct iommu_domain to private struct dmar_domain */
423static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
424{
425 return container_of(dom, struct dmar_domain, domain);
426}
427
ba395927
KA
428static int __init intel_iommu_setup(char *str)
429{
430 if (!str)
431 return -EINVAL;
432 while (*str) {
0cd5c3c8
KM
433 if (!strncmp(str, "on", 2)) {
434 dmar_disabled = 0;
9f10e5bf 435 pr_info("IOMMU enabled\n");
0cd5c3c8 436 } else if (!strncmp(str, "off", 3)) {
ba395927 437 dmar_disabled = 1;
89a6079d 438 no_platform_optin = 1;
9f10e5bf 439 pr_info("IOMMU disabled\n");
ba395927
KA
440 } else if (!strncmp(str, "igfx_off", 8)) {
441 dmar_map_gfx = 0;
9f10e5bf 442 pr_info("Disable GFX device mapping\n");
7d3b03ce 443 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 444 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 445 dmar_forcedac = 1;
5e0d2a6f 446 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 447 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 448 intel_iommu_strict = 1;
6dd9a7c7 449 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 450 pr_info("Disable supported super page\n");
6dd9a7c7 451 intel_iommu_superpage = 0;
8950dcd8
LB
452 } else if (!strncmp(str, "sm_on", 5)) {
453 pr_info("Intel-IOMMU: scalable mode supported\n");
454 intel_iommu_sm = 1;
bfd20f1c
SL
455 } else if (!strncmp(str, "tboot_noforce", 13)) {
456 printk(KERN_INFO
457 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
458 intel_iommu_tboot_noforce = 1;
ba395927
KA
459 }
460
461 str += strcspn(str, ",");
462 while (*str == ',')
463 str++;
464 }
465 return 0;
466}
467__setup("intel_iommu=", intel_iommu_setup);
468
469static struct kmem_cache *iommu_domain_cache;
470static struct kmem_cache *iommu_devinfo_cache;
ba395927 471
9452d5bf
JR
472static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
473{
8bf47816
JR
474 struct dmar_domain **domains;
475 int idx = did >> 8;
476
477 domains = iommu->domains[idx];
478 if (!domains)
479 return NULL;
480
481 return domains[did & 0xff];
9452d5bf
JR
482}
483
484static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
485 struct dmar_domain *domain)
486{
8bf47816
JR
487 struct dmar_domain **domains;
488 int idx = did >> 8;
489
490 if (!iommu->domains[idx]) {
491 size_t size = 256 * sizeof(struct dmar_domain *);
492 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
493 }
494
495 domains = iommu->domains[idx];
496 if (WARN_ON(!domains))
497 return;
498 else
499 domains[did & 0xff] = domain;
9452d5bf
JR
500}
501
9ddbfb42 502void *alloc_pgtable_page(int node)
eb3fa7cb 503{
4c923d47
SS
504 struct page *page;
505 void *vaddr = NULL;
eb3fa7cb 506
4c923d47
SS
507 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
508 if (page)
509 vaddr = page_address(page);
eb3fa7cb 510 return vaddr;
ba395927
KA
511}
512
9ddbfb42 513void free_pgtable_page(void *vaddr)
ba395927
KA
514{
515 free_page((unsigned long)vaddr);
516}
517
518static inline void *alloc_domain_mem(void)
519{
354bb65e 520 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
521}
522
38717946 523static void free_domain_mem(void *vaddr)
ba395927
KA
524{
525 kmem_cache_free(iommu_domain_cache, vaddr);
526}
527
528static inline void * alloc_devinfo_mem(void)
529{
354bb65e 530 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
531}
532
533static inline void free_devinfo_mem(void *vaddr)
534{
535 kmem_cache_free(iommu_devinfo_cache, vaddr);
536}
537
ab8dfe25
JL
538static inline int domain_type_is_vm(struct dmar_domain *domain)
539{
540 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
541}
542
28ccce0d
JR
543static inline int domain_type_is_si(struct dmar_domain *domain)
544{
545 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
546}
547
ab8dfe25
JL
548static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
549{
550 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
551 DOMAIN_FLAG_STATIC_IDENTITY);
552}
1b573683 553
162d1b10
JL
554static inline int domain_pfn_supported(struct dmar_domain *domain,
555 unsigned long pfn)
556{
557 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
558
559 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
560}
561
4ed0d3e6 562static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
563{
564 unsigned long sagaw;
565 int agaw = -1;
566
567 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 568 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
569 agaw >= 0; agaw--) {
570 if (test_bit(agaw, &sagaw))
571 break;
572 }
573
574 return agaw;
575}
576
4ed0d3e6
FY
577/*
578 * Calculate max SAGAW for each iommu.
579 */
580int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
581{
582 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
583}
584
585/*
586 * calculate agaw for each iommu.
587 * "SAGAW" may be different across iommus, use a default agaw, and
588 * get a supported less agaw for iommus that don't support the default agaw.
589 */
590int iommu_calculate_agaw(struct intel_iommu *iommu)
591{
592 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
593}
594
2c2e2c38 595/* This functionin only returns single iommu in a domain */
9ddbfb42 596struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
597{
598 int iommu_id;
599
2c2e2c38 600 /* si_domain and vm domain should not get here. */
ab8dfe25 601 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
602 for_each_domain_iommu(iommu_id, domain)
603 break;
604
8c11e798
WH
605 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
606 return NULL;
607
608 return g_iommus[iommu_id];
609}
610
8e604097
WH
611static void domain_update_iommu_coherency(struct dmar_domain *domain)
612{
d0501960
DW
613 struct dmar_drhd_unit *drhd;
614 struct intel_iommu *iommu;
2f119c78
QL
615 bool found = false;
616 int i;
2e12bc29 617
d0501960 618 domain->iommu_coherency = 1;
8e604097 619
29a27719 620 for_each_domain_iommu(i, domain) {
2f119c78 621 found = true;
8e604097
WH
622 if (!ecap_coherent(g_iommus[i]->ecap)) {
623 domain->iommu_coherency = 0;
624 break;
625 }
8e604097 626 }
d0501960
DW
627 if (found)
628 return;
629
630 /* No hardware attached; use lowest common denominator */
631 rcu_read_lock();
632 for_each_active_iommu(iommu, drhd) {
633 if (!ecap_coherent(iommu->ecap)) {
634 domain->iommu_coherency = 0;
635 break;
636 }
637 }
638 rcu_read_unlock();
8e604097
WH
639}
640
161f6934 641static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 642{
161f6934
JL
643 struct dmar_drhd_unit *drhd;
644 struct intel_iommu *iommu;
645 int ret = 1;
58c610bd 646
161f6934
JL
647 rcu_read_lock();
648 for_each_active_iommu(iommu, drhd) {
649 if (iommu != skip) {
650 if (!ecap_sc_support(iommu->ecap)) {
651 ret = 0;
652 break;
653 }
58c610bd 654 }
58c610bd 655 }
161f6934
JL
656 rcu_read_unlock();
657
658 return ret;
58c610bd
SY
659}
660
161f6934 661static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 662{
8140a95d 663 struct dmar_drhd_unit *drhd;
161f6934 664 struct intel_iommu *iommu;
8140a95d 665 int mask = 0xf;
6dd9a7c7
YS
666
667 if (!intel_iommu_superpage) {
161f6934 668 return 0;
6dd9a7c7
YS
669 }
670
8140a95d 671 /* set iommu_superpage to the smallest common denominator */
0e242612 672 rcu_read_lock();
8140a95d 673 for_each_active_iommu(iommu, drhd) {
161f6934
JL
674 if (iommu != skip) {
675 mask &= cap_super_page_val(iommu->cap);
676 if (!mask)
677 break;
6dd9a7c7
YS
678 }
679 }
0e242612
JL
680 rcu_read_unlock();
681
161f6934 682 return fls(mask);
6dd9a7c7
YS
683}
684
58c610bd
SY
685/* Some capabilities may be different across iommus */
686static void domain_update_iommu_cap(struct dmar_domain *domain)
687{
688 domain_update_iommu_coherency(domain);
161f6934
JL
689 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
690 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
691}
692
26b86092
SM
693struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
694 u8 devfn, int alloc)
03ecc32c
DW
695{
696 struct root_entry *root = &iommu->root_entry[bus];
697 struct context_entry *context;
698 u64 *entry;
699
4df4eab1 700 entry = &root->lo;
765b6a98 701 if (sm_supported(iommu)) {
03ecc32c
DW
702 if (devfn >= 0x80) {
703 devfn -= 0x80;
704 entry = &root->hi;
705 }
706 devfn *= 2;
707 }
03ecc32c
DW
708 if (*entry & 1)
709 context = phys_to_virt(*entry & VTD_PAGE_MASK);
710 else {
711 unsigned long phy_addr;
712 if (!alloc)
713 return NULL;
714
715 context = alloc_pgtable_page(iommu->node);
716 if (!context)
717 return NULL;
718
719 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
720 phy_addr = virt_to_phys((void *)context);
721 *entry = phy_addr | 1;
722 __iommu_flush_cache(iommu, entry, sizeof(*entry));
723 }
724 return &context[devfn];
725}
726
4ed6a540
DW
727static int iommu_dummy(struct device *dev)
728{
729 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
730}
731
156baca8 732static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
733{
734 struct dmar_drhd_unit *drhd = NULL;
b683b230 735 struct intel_iommu *iommu;
156baca8
DW
736 struct device *tmp;
737 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 738 u16 segment = 0;
c7151a8d
WH
739 int i;
740
4ed6a540
DW
741 if (iommu_dummy(dev))
742 return NULL;
743
156baca8 744 if (dev_is_pci(dev)) {
1c387188
AR
745 struct pci_dev *pf_pdev;
746
156baca8 747 pdev = to_pci_dev(dev);
5823e330
JD
748
749#ifdef CONFIG_X86
750 /* VMD child devices currently cannot be handled individually */
751 if (is_vmd(pdev->bus))
752 return NULL;
753#endif
754
1c387188
AR
755 /* VFs aren't listed in scope tables; we need to look up
756 * the PF instead to find the IOMMU. */
757 pf_pdev = pci_physfn(pdev);
758 dev = &pf_pdev->dev;
156baca8 759 segment = pci_domain_nr(pdev->bus);
ca5b74d2 760 } else if (has_acpi_companion(dev))
156baca8
DW
761 dev = &ACPI_COMPANION(dev)->dev;
762
0e242612 763 rcu_read_lock();
b683b230 764 for_each_active_iommu(iommu, drhd) {
156baca8 765 if (pdev && segment != drhd->segment)
276dbf99 766 continue;
c7151a8d 767
b683b230 768 for_each_active_dev_scope(drhd->devices,
156baca8
DW
769 drhd->devices_cnt, i, tmp) {
770 if (tmp == dev) {
1c387188
AR
771 /* For a VF use its original BDF# not that of the PF
772 * which we used for the IOMMU lookup. Strictly speaking
773 * we could do this for all PCI devices; we only need to
774 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 775 if (pdev && pdev->is_virtfn)
1c387188
AR
776 goto got_pdev;
777
156baca8
DW
778 *bus = drhd->devices[i].bus;
779 *devfn = drhd->devices[i].devfn;
b683b230 780 goto out;
156baca8
DW
781 }
782
783 if (!pdev || !dev_is_pci(tmp))
784 continue;
785
786 ptmp = to_pci_dev(tmp);
787 if (ptmp->subordinate &&
788 ptmp->subordinate->number <= pdev->bus->number &&
789 ptmp->subordinate->busn_res.end >= pdev->bus->number)
790 goto got_pdev;
924b6231 791 }
c7151a8d 792
156baca8
DW
793 if (pdev && drhd->include_all) {
794 got_pdev:
795 *bus = pdev->bus->number;
796 *devfn = pdev->devfn;
b683b230 797 goto out;
156baca8 798 }
c7151a8d 799 }
b683b230 800 iommu = NULL;
156baca8 801 out:
0e242612 802 rcu_read_unlock();
c7151a8d 803
b683b230 804 return iommu;
c7151a8d
WH
805}
806
5331fe6f
WH
807static void domain_flush_cache(struct dmar_domain *domain,
808 void *addr, int size)
809{
810 if (!domain->iommu_coherency)
811 clflush_cache_range(addr, size);
812}
813
ba395927
KA
814static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
815{
ba395927 816 struct context_entry *context;
03ecc32c 817 int ret = 0;
ba395927
KA
818 unsigned long flags;
819
820 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
821 context = iommu_context_addr(iommu, bus, devfn, 0);
822 if (context)
823 ret = context_present(context);
ba395927
KA
824 spin_unlock_irqrestore(&iommu->lock, flags);
825 return ret;
826}
827
ba395927
KA
828static void free_context_table(struct intel_iommu *iommu)
829{
ba395927
KA
830 int i;
831 unsigned long flags;
832 struct context_entry *context;
833
834 spin_lock_irqsave(&iommu->lock, flags);
835 if (!iommu->root_entry) {
836 goto out;
837 }
838 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 839 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
840 if (context)
841 free_pgtable_page(context);
03ecc32c 842
765b6a98 843 if (!sm_supported(iommu))
03ecc32c
DW
844 continue;
845
846 context = iommu_context_addr(iommu, i, 0x80, 0);
847 if (context)
848 free_pgtable_page(context);
849
ba395927
KA
850 }
851 free_pgtable_page(iommu->root_entry);
852 iommu->root_entry = NULL;
853out:
854 spin_unlock_irqrestore(&iommu->lock, flags);
855}
856
b026fd28 857static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 858 unsigned long pfn, int *target_level)
ba395927 859{
e083ea5b 860 struct dma_pte *parent, *pte;
ba395927 861 int level = agaw_to_level(domain->agaw);
4399c8bf 862 int offset;
ba395927
KA
863
864 BUG_ON(!domain->pgd);
f9423606 865
162d1b10 866 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
867 /* Address beyond IOMMU's addressing capabilities. */
868 return NULL;
869
ba395927
KA
870 parent = domain->pgd;
871
5cf0a76f 872 while (1) {
ba395927
KA
873 void *tmp_page;
874
b026fd28 875 offset = pfn_level_offset(pfn, level);
ba395927 876 pte = &parent[offset];
5cf0a76f 877 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 878 break;
5cf0a76f 879 if (level == *target_level)
ba395927
KA
880 break;
881
19c239ce 882 if (!dma_pte_present(pte)) {
c85994e4
DW
883 uint64_t pteval;
884
4c923d47 885 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 886
206a73c1 887 if (!tmp_page)
ba395927 888 return NULL;
206a73c1 889
c85994e4 890 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 891 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 892 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
893 /* Someone else set it while we were thinking; use theirs. */
894 free_pgtable_page(tmp_page);
effad4b5 895 else
c85994e4 896 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 897 }
5cf0a76f
DW
898 if (level == 1)
899 break;
900
19c239ce 901 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
902 level--;
903 }
904
5cf0a76f
DW
905 if (!*target_level)
906 *target_level = level;
907
ba395927
KA
908 return pte;
909}
910
6dd9a7c7 911
ba395927 912/* return address's pte at specific level */
90dcfb5e
DW
913static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
914 unsigned long pfn,
6dd9a7c7 915 int level, int *large_page)
ba395927 916{
e083ea5b 917 struct dma_pte *parent, *pte;
ba395927
KA
918 int total = agaw_to_level(domain->agaw);
919 int offset;
920
921 parent = domain->pgd;
922 while (level <= total) {
90dcfb5e 923 offset = pfn_level_offset(pfn, total);
ba395927
KA
924 pte = &parent[offset];
925 if (level == total)
926 return pte;
927
6dd9a7c7
YS
928 if (!dma_pte_present(pte)) {
929 *large_page = total;
ba395927 930 break;
6dd9a7c7
YS
931 }
932
e16922af 933 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
934 *large_page = total;
935 return pte;
936 }
937
19c239ce 938 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
939 total--;
940 }
941 return NULL;
942}
943
ba395927 944/* clear last level pte, a tlb flush should be followed */
5cf0a76f 945static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
946 unsigned long start_pfn,
947 unsigned long last_pfn)
ba395927 948{
e083ea5b 949 unsigned int large_page;
310a5ab9 950 struct dma_pte *first_pte, *pte;
66eae846 951
162d1b10
JL
952 BUG_ON(!domain_pfn_supported(domain, start_pfn));
953 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 954 BUG_ON(start_pfn > last_pfn);
ba395927 955
04b18e65 956 /* we don't need lock here; nobody else touches the iova range */
59c36286 957 do {
6dd9a7c7
YS
958 large_page = 1;
959 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 960 if (!pte) {
6dd9a7c7 961 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
962 continue;
963 }
6dd9a7c7 964 do {
310a5ab9 965 dma_clear_pte(pte);
6dd9a7c7 966 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 967 pte++;
75e6bf96
DW
968 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
969
310a5ab9
DW
970 domain_flush_cache(domain, first_pte,
971 (void *)pte - (void *)first_pte);
59c36286
DW
972
973 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
974}
975
3269ee0b 976static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
977 int retain_level, struct dma_pte *pte,
978 unsigned long pfn, unsigned long start_pfn,
979 unsigned long last_pfn)
3269ee0b
AW
980{
981 pfn = max(start_pfn, pfn);
982 pte = &pte[pfn_level_offset(pfn, level)];
983
984 do {
985 unsigned long level_pfn;
986 struct dma_pte *level_pte;
987
988 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
989 goto next;
990
f7116e11 991 level_pfn = pfn & level_mask(level);
3269ee0b
AW
992 level_pte = phys_to_virt(dma_pte_addr(pte));
993
bc24c571
DD
994 if (level > 2) {
995 dma_pte_free_level(domain, level - 1, retain_level,
996 level_pte, level_pfn, start_pfn,
997 last_pfn);
998 }
3269ee0b 999
bc24c571
DD
1000 /*
1001 * Free the page table if we're below the level we want to
1002 * retain and the range covers the entire table.
1003 */
1004 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1005 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1006 dma_clear_pte(pte);
1007 domain_flush_cache(domain, pte, sizeof(*pte));
1008 free_pgtable_page(level_pte);
1009 }
1010next:
1011 pfn += level_size(level);
1012 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1013}
1014
bc24c571
DD
1015/*
1016 * clear last level (leaf) ptes and free page table pages below the
1017 * level we wish to keep intact.
1018 */
ba395927 1019static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1020 unsigned long start_pfn,
bc24c571
DD
1021 unsigned long last_pfn,
1022 int retain_level)
ba395927 1023{
162d1b10
JL
1024 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1025 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1026 BUG_ON(start_pfn > last_pfn);
ba395927 1027
d41a4adb
JL
1028 dma_pte_clear_range(domain, start_pfn, last_pfn);
1029
f3a0a52f 1030 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1031 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1032 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1033
ba395927 1034 /* free pgd */
d794dc9b 1035 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1036 free_pgtable_page(domain->pgd);
1037 domain->pgd = NULL;
1038 }
1039}
1040
ea8ea460
DW
1041/* When a page at a given level is being unlinked from its parent, we don't
1042 need to *modify* it at all. All we need to do is make a list of all the
1043 pages which can be freed just as soon as we've flushed the IOTLB and we
1044 know the hardware page-walk will no longer touch them.
1045 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1046 be freed. */
1047static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1048 int level, struct dma_pte *pte,
1049 struct page *freelist)
1050{
1051 struct page *pg;
1052
1053 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1054 pg->freelist = freelist;
1055 freelist = pg;
1056
1057 if (level == 1)
1058 return freelist;
1059
adeb2590
JL
1060 pte = page_address(pg);
1061 do {
ea8ea460
DW
1062 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1063 freelist = dma_pte_list_pagetables(domain, level - 1,
1064 pte, freelist);
adeb2590
JL
1065 pte++;
1066 } while (!first_pte_in_page(pte));
ea8ea460
DW
1067
1068 return freelist;
1069}
1070
1071static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1072 struct dma_pte *pte, unsigned long pfn,
1073 unsigned long start_pfn,
1074 unsigned long last_pfn,
1075 struct page *freelist)
1076{
1077 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1078
1079 pfn = max(start_pfn, pfn);
1080 pte = &pte[pfn_level_offset(pfn, level)];
1081
1082 do {
1083 unsigned long level_pfn;
1084
1085 if (!dma_pte_present(pte))
1086 goto next;
1087
1088 level_pfn = pfn & level_mask(level);
1089
1090 /* If range covers entire pagetable, free it */
1091 if (start_pfn <= level_pfn &&
1092 last_pfn >= level_pfn + level_size(level) - 1) {
1093 /* These suborbinate page tables are going away entirely. Don't
1094 bother to clear them; we're just going to *free* them. */
1095 if (level > 1 && !dma_pte_superpage(pte))
1096 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1097
1098 dma_clear_pte(pte);
1099 if (!first_pte)
1100 first_pte = pte;
1101 last_pte = pte;
1102 } else if (level > 1) {
1103 /* Recurse down into a level that isn't *entirely* obsolete */
1104 freelist = dma_pte_clear_level(domain, level - 1,
1105 phys_to_virt(dma_pte_addr(pte)),
1106 level_pfn, start_pfn, last_pfn,
1107 freelist);
1108 }
1109next:
1110 pfn += level_size(level);
1111 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1112
1113 if (first_pte)
1114 domain_flush_cache(domain, first_pte,
1115 (void *)++last_pte - (void *)first_pte);
1116
1117 return freelist;
1118}
1119
1120/* We can't just free the pages because the IOMMU may still be walking
1121 the page tables, and may have cached the intermediate levels. The
1122 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1123static struct page *domain_unmap(struct dmar_domain *domain,
1124 unsigned long start_pfn,
1125 unsigned long last_pfn)
ea8ea460 1126{
e083ea5b 1127 struct page *freelist;
ea8ea460 1128
162d1b10
JL
1129 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1130 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1131 BUG_ON(start_pfn > last_pfn);
1132
1133 /* we don't need lock here; nobody else touches the iova range */
1134 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1135 domain->pgd, 0, start_pfn, last_pfn, NULL);
1136
1137 /* free pgd */
1138 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1139 struct page *pgd_page = virt_to_page(domain->pgd);
1140 pgd_page->freelist = freelist;
1141 freelist = pgd_page;
1142
1143 domain->pgd = NULL;
1144 }
1145
1146 return freelist;
1147}
1148
b690420a 1149static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1150{
1151 struct page *pg;
1152
1153 while ((pg = freelist)) {
1154 freelist = pg->freelist;
1155 free_pgtable_page(page_address(pg));
1156 }
1157}
1158
13cf0174
JR
1159static void iova_entry_free(unsigned long data)
1160{
1161 struct page *freelist = (struct page *)data;
1162
1163 dma_free_pagelist(freelist);
1164}
1165
ba395927
KA
1166/* iommu handling */
1167static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1168{
1169 struct root_entry *root;
1170 unsigned long flags;
1171
4c923d47 1172 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1173 if (!root) {
9f10e5bf 1174 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1175 iommu->name);
ba395927 1176 return -ENOMEM;
ffebeb46 1177 }
ba395927 1178
5b6985ce 1179 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1180
1181 spin_lock_irqsave(&iommu->lock, flags);
1182 iommu->root_entry = root;
1183 spin_unlock_irqrestore(&iommu->lock, flags);
1184
1185 return 0;
1186}
1187
ba395927
KA
1188static void iommu_set_root_entry(struct intel_iommu *iommu)
1189{
03ecc32c 1190 u64 addr;
c416daa9 1191 u32 sts;
ba395927
KA
1192 unsigned long flag;
1193
03ecc32c 1194 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1195 if (sm_supported(iommu))
1196 addr |= DMA_RTADDR_SMT;
ba395927 1197
1f5b3c3f 1198 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1199 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1200
c416daa9 1201 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1202
1203 /* Make sure hardware complete it */
1204 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1205 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1206
1f5b3c3f 1207 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1208}
1209
6f7db75e 1210void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1211{
1212 u32 val;
1213 unsigned long flag;
1214
9af88143 1215 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1216 return;
ba395927 1217
1f5b3c3f 1218 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1219 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1220
1221 /* Make sure hardware complete it */
1222 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1223 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1224
1f5b3c3f 1225 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1226}
1227
1228/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1229static void __iommu_flush_context(struct intel_iommu *iommu,
1230 u16 did, u16 source_id, u8 function_mask,
1231 u64 type)
ba395927
KA
1232{
1233 u64 val = 0;
1234 unsigned long flag;
1235
ba395927
KA
1236 switch (type) {
1237 case DMA_CCMD_GLOBAL_INVL:
1238 val = DMA_CCMD_GLOBAL_INVL;
1239 break;
1240 case DMA_CCMD_DOMAIN_INVL:
1241 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1242 break;
1243 case DMA_CCMD_DEVICE_INVL:
1244 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1245 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1246 break;
1247 default:
1248 BUG();
1249 }
1250 val |= DMA_CCMD_ICC;
1251
1f5b3c3f 1252 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1253 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1254
1255 /* Make sure hardware complete it */
1256 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1257 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1258
1f5b3c3f 1259 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1260}
1261
ba395927 1262/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1263static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1264 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1265{
1266 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1267 u64 val = 0, val_iva = 0;
1268 unsigned long flag;
1269
ba395927
KA
1270 switch (type) {
1271 case DMA_TLB_GLOBAL_FLUSH:
1272 /* global flush doesn't need set IVA_REG */
1273 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1274 break;
1275 case DMA_TLB_DSI_FLUSH:
1276 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1277 break;
1278 case DMA_TLB_PSI_FLUSH:
1279 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1280 /* IH bit is passed in as part of address */
ba395927
KA
1281 val_iva = size_order | addr;
1282 break;
1283 default:
1284 BUG();
1285 }
1286 /* Note: set drain read/write */
1287#if 0
1288 /*
1289 * This is probably to be super secure.. Looks like we can
1290 * ignore it without any impact.
1291 */
1292 if (cap_read_drain(iommu->cap))
1293 val |= DMA_TLB_READ_DRAIN;
1294#endif
1295 if (cap_write_drain(iommu->cap))
1296 val |= DMA_TLB_WRITE_DRAIN;
1297
1f5b3c3f 1298 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1299 /* Note: Only uses first TLB reg currently */
1300 if (val_iva)
1301 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1302 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1303
1304 /* Make sure hardware complete it */
1305 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1306 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1307
1f5b3c3f 1308 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1309
1310 /* check IOTLB invalidation granularity */
1311 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1312 pr_err("Flush IOTLB failed\n");
ba395927 1313 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1314 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1315 (unsigned long long)DMA_TLB_IIRG(type),
1316 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1317}
1318
64ae892b
DW
1319static struct device_domain_info *
1320iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1321 u8 bus, u8 devfn)
93a23a72 1322{
93a23a72 1323 struct device_domain_info *info;
93a23a72 1324
55d94043
JR
1325 assert_spin_locked(&device_domain_lock);
1326
93a23a72
YZ
1327 if (!iommu->qi)
1328 return NULL;
1329
93a23a72 1330 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1331 if (info->iommu == iommu && info->bus == bus &&
1332 info->devfn == devfn) {
b16d0cb9
DW
1333 if (info->ats_supported && info->dev)
1334 return info;
93a23a72
YZ
1335 break;
1336 }
93a23a72 1337
b16d0cb9 1338 return NULL;
93a23a72
YZ
1339}
1340
0824c592
OP
1341static void domain_update_iotlb(struct dmar_domain *domain)
1342{
1343 struct device_domain_info *info;
1344 bool has_iotlb_device = false;
1345
1346 assert_spin_locked(&device_domain_lock);
1347
1348 list_for_each_entry(info, &domain->devices, link) {
1349 struct pci_dev *pdev;
1350
1351 if (!info->dev || !dev_is_pci(info->dev))
1352 continue;
1353
1354 pdev = to_pci_dev(info->dev);
1355 if (pdev->ats_enabled) {
1356 has_iotlb_device = true;
1357 break;
1358 }
1359 }
1360
1361 domain->has_iotlb_device = has_iotlb_device;
1362}
1363
93a23a72 1364static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1365{
fb0cc3aa
BH
1366 struct pci_dev *pdev;
1367
0824c592
OP
1368 assert_spin_locked(&device_domain_lock);
1369
0bcb3e28 1370 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1371 return;
1372
fb0cc3aa 1373 pdev = to_pci_dev(info->dev);
1c48db44
JP
1374 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1375 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1376 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1377 * reserved, which should be set to 0.
1378 */
1379 if (!ecap_dit(info->iommu->ecap))
1380 info->pfsid = 0;
1381 else {
1382 struct pci_dev *pf_pdev;
1383
1384 /* pdev will be returned if device is not a vf */
1385 pf_pdev = pci_physfn(pdev);
cc49baa9 1386 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1387 }
fb0cc3aa 1388
b16d0cb9
DW
1389#ifdef CONFIG_INTEL_IOMMU_SVM
1390 /* The PCIe spec, in its wisdom, declares that the behaviour of
1391 the device if you enable PASID support after ATS support is
1392 undefined. So always enable PASID support on devices which
1393 have it, even if we can't yet know if we're ever going to
1394 use it. */
1395 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1396 info->pasid_enabled = 1;
1397
1b84778a
KS
1398 if (info->pri_supported &&
1399 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1400 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1401 info->pri_enabled = 1;
1402#endif
fb58fdcd 1403 if (!pdev->untrusted && info->ats_supported &&
61363c14 1404 pci_ats_page_aligned(pdev) &&
fb58fdcd 1405 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1406 info->ats_enabled = 1;
0824c592 1407 domain_update_iotlb(info->domain);
b16d0cb9
DW
1408 info->ats_qdep = pci_ats_queue_depth(pdev);
1409 }
93a23a72
YZ
1410}
1411
1412static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1413{
b16d0cb9
DW
1414 struct pci_dev *pdev;
1415
0824c592
OP
1416 assert_spin_locked(&device_domain_lock);
1417
da972fb1 1418 if (!dev_is_pci(info->dev))
93a23a72
YZ
1419 return;
1420
b16d0cb9
DW
1421 pdev = to_pci_dev(info->dev);
1422
1423 if (info->ats_enabled) {
1424 pci_disable_ats(pdev);
1425 info->ats_enabled = 0;
0824c592 1426 domain_update_iotlb(info->domain);
b16d0cb9
DW
1427 }
1428#ifdef CONFIG_INTEL_IOMMU_SVM
1429 if (info->pri_enabled) {
1430 pci_disable_pri(pdev);
1431 info->pri_enabled = 0;
1432 }
1433 if (info->pasid_enabled) {
1434 pci_disable_pasid(pdev);
1435 info->pasid_enabled = 0;
1436 }
1437#endif
93a23a72
YZ
1438}
1439
1440static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1441 u64 addr, unsigned mask)
1442{
1443 u16 sid, qdep;
1444 unsigned long flags;
1445 struct device_domain_info *info;
1446
0824c592
OP
1447 if (!domain->has_iotlb_device)
1448 return;
1449
93a23a72
YZ
1450 spin_lock_irqsave(&device_domain_lock, flags);
1451 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1452 if (!info->ats_enabled)
93a23a72
YZ
1453 continue;
1454
1455 sid = info->bus << 8 | info->devfn;
b16d0cb9 1456 qdep = info->ats_qdep;
1c48db44
JP
1457 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1458 qdep, addr, mask);
93a23a72
YZ
1459 }
1460 spin_unlock_irqrestore(&device_domain_lock, flags);
1461}
1462
a1ddcbe9
JR
1463static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1464 struct dmar_domain *domain,
1465 unsigned long pfn, unsigned int pages,
1466 int ih, int map)
ba395927 1467{
9dd2fe89 1468 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1469 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1470 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1471
ba395927
KA
1472 BUG_ON(pages == 0);
1473
ea8ea460
DW
1474 if (ih)
1475 ih = 1 << 6;
ba395927 1476 /*
9dd2fe89
YZ
1477 * Fallback to domain selective flush if no PSI support or the size is
1478 * too big.
ba395927
KA
1479 * PSI requires page size to be 2 ^ x, and the base address is naturally
1480 * aligned to the size
1481 */
9dd2fe89
YZ
1482 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1483 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1484 DMA_TLB_DSI_FLUSH);
9dd2fe89 1485 else
ea8ea460 1486 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1487 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1488
1489 /*
82653633
NA
1490 * In caching mode, changes of pages from non-present to present require
1491 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1492 */
82653633 1493 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1494 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1495}
1496
eed91a0b
PX
1497/* Notification for newly created mappings */
1498static inline void __mapping_notify_one(struct intel_iommu *iommu,
1499 struct dmar_domain *domain,
1500 unsigned long pfn, unsigned int pages)
1501{
1502 /* It's a non-present to present mapping. Only flush if caching mode */
1503 if (cap_caching_mode(iommu->cap))
1504 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1505 else
1506 iommu_flush_write_buffer(iommu);
1507}
1508
13cf0174
JR
1509static void iommu_flush_iova(struct iova_domain *iovad)
1510{
1511 struct dmar_domain *domain;
1512 int idx;
1513
1514 domain = container_of(iovad, struct dmar_domain, iovad);
1515
1516 for_each_domain_iommu(idx, domain) {
1517 struct intel_iommu *iommu = g_iommus[idx];
1518 u16 did = domain->iommu_did[iommu->seq_id];
1519
1520 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1521
1522 if (!cap_caching_mode(iommu->cap))
1523 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1524 0, MAX_AGAW_PFN_WIDTH);
1525 }
1526}
1527
f8bab735 1528static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1529{
1530 u32 pmen;
1531 unsigned long flags;
1532
5bb71fc7
LB
1533 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1534 return;
1535
1f5b3c3f 1536 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1537 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1538 pmen &= ~DMA_PMEN_EPM;
1539 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1540
1541 /* wait for the protected region status bit to clear */
1542 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1543 readl, !(pmen & DMA_PMEN_PRS), pmen);
1544
1f5b3c3f 1545 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1546}
1547
2a41ccee 1548static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1549{
1550 u32 sts;
1551 unsigned long flags;
1552
1f5b3c3f 1553 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1554 iommu->gcmd |= DMA_GCMD_TE;
1555 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1556
1557 /* Make sure hardware complete it */
1558 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1559 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1560
1f5b3c3f 1561 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1562}
1563
2a41ccee 1564static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1565{
1566 u32 sts;
1567 unsigned long flag;
1568
1f5b3c3f 1569 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1570 iommu->gcmd &= ~DMA_GCMD_TE;
1571 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1572
1573 /* Make sure hardware complete it */
1574 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1575 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1576
1f5b3c3f 1577 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1578}
1579
3460a6d9 1580
ba395927
KA
1581static int iommu_init_domains(struct intel_iommu *iommu)
1582{
8bf47816
JR
1583 u32 ndomains, nlongs;
1584 size_t size;
ba395927
KA
1585
1586 ndomains = cap_ndoms(iommu->cap);
8bf47816 1587 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1588 iommu->name, ndomains);
ba395927
KA
1589 nlongs = BITS_TO_LONGS(ndomains);
1590
94a91b50
DD
1591 spin_lock_init(&iommu->lock);
1592
ba395927
KA
1593 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1594 if (!iommu->domain_ids) {
9f10e5bf
JR
1595 pr_err("%s: Allocating domain id array failed\n",
1596 iommu->name);
ba395927
KA
1597 return -ENOMEM;
1598 }
8bf47816 1599
86f004c7 1600 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1601 iommu->domains = kzalloc(size, GFP_KERNEL);
1602
1603 if (iommu->domains) {
1604 size = 256 * sizeof(struct dmar_domain *);
1605 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1606 }
1607
1608 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1609 pr_err("%s: Allocating domain array failed\n",
1610 iommu->name);
852bdb04 1611 kfree(iommu->domain_ids);
8bf47816 1612 kfree(iommu->domains);
852bdb04 1613 iommu->domain_ids = NULL;
8bf47816 1614 iommu->domains = NULL;
ba395927
KA
1615 return -ENOMEM;
1616 }
1617
8bf47816
JR
1618
1619
ba395927 1620 /*
c0e8a6c8
JR
1621 * If Caching mode is set, then invalid translations are tagged
1622 * with domain-id 0, hence we need to pre-allocate it. We also
1623 * use domain-id 0 as a marker for non-allocated domain-id, so
1624 * make sure it is not used for a real domain.
ba395927 1625 */
c0e8a6c8
JR
1626 set_bit(0, iommu->domain_ids);
1627
3b33d4ab
LB
1628 /*
1629 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1630 * entry for first-level or pass-through translation modes should
1631 * be programmed with a domain id different from those used for
1632 * second-level or nested translation. We reserve a domain id for
1633 * this purpose.
1634 */
1635 if (sm_supported(iommu))
1636 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1637
ba395927
KA
1638 return 0;
1639}
ba395927 1640
ffebeb46 1641static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1642{
29a27719 1643 struct device_domain_info *info, *tmp;
55d94043 1644 unsigned long flags;
ba395927 1645
29a27719
JR
1646 if (!iommu->domains || !iommu->domain_ids)
1647 return;
a4eaa86c 1648
bea64033 1649again:
55d94043 1650 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1651 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1652 struct dmar_domain *domain;
1653
1654 if (info->iommu != iommu)
1655 continue;
1656
1657 if (!info->dev || !info->domain)
1658 continue;
1659
1660 domain = info->domain;
1661
bea64033 1662 __dmar_remove_one_dev_info(info);
29a27719 1663
bea64033
JR
1664 if (!domain_type_is_vm_or_si(domain)) {
1665 /*
1666 * The domain_exit() function can't be called under
1667 * device_domain_lock, as it takes this lock itself.
1668 * So release the lock here and re-run the loop
1669 * afterwards.
1670 */
1671 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1672 domain_exit(domain);
bea64033
JR
1673 goto again;
1674 }
ba395927 1675 }
55d94043 1676 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1677
1678 if (iommu->gcmd & DMA_GCMD_TE)
1679 iommu_disable_translation(iommu);
ffebeb46 1680}
ba395927 1681
ffebeb46
JL
1682static void free_dmar_iommu(struct intel_iommu *iommu)
1683{
1684 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1685 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1686 int i;
1687
1688 for (i = 0; i < elems; i++)
1689 kfree(iommu->domains[i]);
ffebeb46
JL
1690 kfree(iommu->domains);
1691 kfree(iommu->domain_ids);
1692 iommu->domains = NULL;
1693 iommu->domain_ids = NULL;
1694 }
ba395927 1695
d9630fe9
WH
1696 g_iommus[iommu->seq_id] = NULL;
1697
ba395927
KA
1698 /* free context mapping */
1699 free_context_table(iommu);
8a94ade4
DW
1700
1701#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1702 if (pasid_supported(iommu)) {
a222a7f0
DW
1703 if (ecap_prs(iommu->ecap))
1704 intel_svm_finish_prq(iommu);
a222a7f0 1705 }
8a94ade4 1706#endif
ba395927
KA
1707}
1708
ab8dfe25 1709static struct dmar_domain *alloc_domain(int flags)
ba395927 1710{
ba395927 1711 struct dmar_domain *domain;
ba395927
KA
1712
1713 domain = alloc_domain_mem();
1714 if (!domain)
1715 return NULL;
1716
ab8dfe25 1717 memset(domain, 0, sizeof(*domain));
98fa15f3 1718 domain->nid = NUMA_NO_NODE;
ab8dfe25 1719 domain->flags = flags;
0824c592 1720 domain->has_iotlb_device = false;
92d03cc8 1721 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1722
1723 return domain;
1724}
1725
d160aca5
JR
1726/* Must be called with iommu->lock */
1727static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1728 struct intel_iommu *iommu)
1729{
44bde614 1730 unsigned long ndomains;
55d94043 1731 int num;
44bde614 1732
55d94043 1733 assert_spin_locked(&device_domain_lock);
d160aca5 1734 assert_spin_locked(&iommu->lock);
ba395927 1735
29a27719
JR
1736 domain->iommu_refcnt[iommu->seq_id] += 1;
1737 domain->iommu_count += 1;
1738 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1739 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1740 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1741
1742 if (num >= ndomains) {
1743 pr_err("%s: No free domain ids\n", iommu->name);
1744 domain->iommu_refcnt[iommu->seq_id] -= 1;
1745 domain->iommu_count -= 1;
55d94043 1746 return -ENOSPC;
2c2e2c38 1747 }
ba395927 1748
d160aca5
JR
1749 set_bit(num, iommu->domain_ids);
1750 set_iommu_domain(iommu, num, domain);
1751
1752 domain->iommu_did[iommu->seq_id] = num;
1753 domain->nid = iommu->node;
fb170fb4 1754
fb170fb4
JL
1755 domain_update_iommu_cap(domain);
1756 }
d160aca5 1757
55d94043 1758 return 0;
fb170fb4
JL
1759}
1760
1761static int domain_detach_iommu(struct dmar_domain *domain,
1762 struct intel_iommu *iommu)
1763{
e083ea5b 1764 int num, count;
d160aca5 1765
55d94043 1766 assert_spin_locked(&device_domain_lock);
d160aca5 1767 assert_spin_locked(&iommu->lock);
fb170fb4 1768
29a27719
JR
1769 domain->iommu_refcnt[iommu->seq_id] -= 1;
1770 count = --domain->iommu_count;
1771 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1772 num = domain->iommu_did[iommu->seq_id];
1773 clear_bit(num, iommu->domain_ids);
1774 set_iommu_domain(iommu, num, NULL);
fb170fb4 1775
fb170fb4 1776 domain_update_iommu_cap(domain);
c0e8a6c8 1777 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1778 }
fb170fb4
JL
1779
1780 return count;
1781}
1782
ba395927 1783static struct iova_domain reserved_iova_list;
8a443df4 1784static struct lock_class_key reserved_rbtree_key;
ba395927 1785
51a63e67 1786static int dmar_init_reserved_ranges(void)
ba395927
KA
1787{
1788 struct pci_dev *pdev = NULL;
1789 struct iova *iova;
1790 int i;
ba395927 1791
aa3ac946 1792 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1793
8a443df4
MG
1794 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1795 &reserved_rbtree_key);
1796
ba395927
KA
1797 /* IOAPIC ranges shouldn't be accessed by DMA */
1798 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1799 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1800 if (!iova) {
9f10e5bf 1801 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1802 return -ENODEV;
1803 }
ba395927
KA
1804
1805 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1806 for_each_pci_dev(pdev) {
1807 struct resource *r;
1808
1809 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1810 r = &pdev->resource[i];
1811 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1812 continue;
1a4a4551
DW
1813 iova = reserve_iova(&reserved_iova_list,
1814 IOVA_PFN(r->start),
1815 IOVA_PFN(r->end));
51a63e67 1816 if (!iova) {
932a6523 1817 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1818 return -ENODEV;
1819 }
ba395927
KA
1820 }
1821 }
51a63e67 1822 return 0;
ba395927
KA
1823}
1824
1825static void domain_reserve_special_ranges(struct dmar_domain *domain)
1826{
1827 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1828}
1829
1830static inline int guestwidth_to_adjustwidth(int gaw)
1831{
1832 int agaw;
1833 int r = (gaw - 12) % 9;
1834
1835 if (r == 0)
1836 agaw = gaw;
1837 else
1838 agaw = gaw + 9 - r;
1839 if (agaw > 64)
1840 agaw = 64;
1841 return agaw;
1842}
1843
dc534b25
JR
1844static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1845 int guest_width)
ba395927 1846{
ba395927
KA
1847 int adjust_width, agaw;
1848 unsigned long sagaw;
13cf0174 1849 int err;
ba395927 1850
aa3ac946 1851 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1852
1853 err = init_iova_flush_queue(&domain->iovad,
1854 iommu_flush_iova, iova_entry_free);
1855 if (err)
1856 return err;
1857
ba395927
KA
1858 domain_reserve_special_ranges(domain);
1859
1860 /* calculate AGAW */
ba395927
KA
1861 if (guest_width > cap_mgaw(iommu->cap))
1862 guest_width = cap_mgaw(iommu->cap);
1863 domain->gaw = guest_width;
1864 adjust_width = guestwidth_to_adjustwidth(guest_width);
1865 agaw = width_to_agaw(adjust_width);
1866 sagaw = cap_sagaw(iommu->cap);
1867 if (!test_bit(agaw, &sagaw)) {
1868 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1869 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1870 agaw = find_next_bit(&sagaw, 5, agaw);
1871 if (agaw >= 5)
1872 return -ENODEV;
1873 }
1874 domain->agaw = agaw;
ba395927 1875
8e604097
WH
1876 if (ecap_coherent(iommu->ecap))
1877 domain->iommu_coherency = 1;
1878 else
1879 domain->iommu_coherency = 0;
1880
58c610bd
SY
1881 if (ecap_sc_support(iommu->ecap))
1882 domain->iommu_snooping = 1;
1883 else
1884 domain->iommu_snooping = 0;
1885
214e39aa
DW
1886 if (intel_iommu_superpage)
1887 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1888 else
1889 domain->iommu_superpage = 0;
1890
4c923d47 1891 domain->nid = iommu->node;
c7151a8d 1892
ba395927 1893 /* always allocate the top pgd */
4c923d47 1894 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1895 if (!domain->pgd)
1896 return -ENOMEM;
5b6985ce 1897 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1898 return 0;
1899}
1900
1901static void domain_exit(struct dmar_domain *domain)
1902{
e083ea5b 1903 struct page *freelist;
ba395927 1904
d160aca5
JR
1905 /* Remove associated devices and clear attached or cached domains */
1906 rcu_read_lock();
ba395927 1907 domain_remove_dev_info(domain);
d160aca5 1908 rcu_read_unlock();
92d03cc8 1909
ba395927
KA
1910 /* destroy iovas */
1911 put_iova_domain(&domain->iovad);
ba395927 1912
ea8ea460 1913 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1914
ea8ea460
DW
1915 dma_free_pagelist(freelist);
1916
ba395927
KA
1917 free_domain_mem(domain);
1918}
1919
7373a8cc
LB
1920/*
1921 * Get the PASID directory size for scalable mode context entry.
1922 * Value of X in the PDTS field of a scalable mode context entry
1923 * indicates PASID directory with 2^(X + 7) entries.
1924 */
1925static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1926{
1927 int pds, max_pde;
1928
1929 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1930 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1931 if (pds < 7)
1932 return 0;
1933
1934 return pds - 7;
1935}
1936
1937/*
1938 * Set the RID_PASID field of a scalable mode context entry. The
1939 * IOMMU hardware will use the PASID value set in this field for
1940 * DMA translations of DMA requests without PASID.
1941 */
1942static inline void
1943context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1944{
1945 context->hi |= pasid & ((1 << 20) - 1);
1946 context->hi |= (1 << 20);
1947}
1948
1949/*
1950 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1951 * entry.
1952 */
1953static inline void context_set_sm_dte(struct context_entry *context)
1954{
1955 context->lo |= (1 << 2);
1956}
1957
1958/*
1959 * Set the PRE(Page Request Enable) field of a scalable mode context
1960 * entry.
1961 */
1962static inline void context_set_sm_pre(struct context_entry *context)
1963{
1964 context->lo |= (1 << 4);
1965}
1966
1967/* Convert value to context PASID directory size field coding. */
1968#define context_pdts(pds) (((pds) & 0x7) << 9)
1969
64ae892b
DW
1970static int domain_context_mapping_one(struct dmar_domain *domain,
1971 struct intel_iommu *iommu,
ca6e322d 1972 struct pasid_table *table,
28ccce0d 1973 u8 bus, u8 devfn)
ba395927 1974{
c6c2cebd 1975 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1976 int translation = CONTEXT_TT_MULTI_LEVEL;
1977 struct device_domain_info *info = NULL;
ba395927 1978 struct context_entry *context;
ba395927 1979 unsigned long flags;
7373a8cc 1980 int ret;
28ccce0d 1981
c6c2cebd
JR
1982 WARN_ON(did == 0);
1983
28ccce0d
JR
1984 if (hw_pass_through && domain_type_is_si(domain))
1985 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1986
1987 pr_debug("Set context mapping for %02x:%02x.%d\n",
1988 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1989
ba395927 1990 BUG_ON(!domain->pgd);
5331fe6f 1991
55d94043
JR
1992 spin_lock_irqsave(&device_domain_lock, flags);
1993 spin_lock(&iommu->lock);
1994
1995 ret = -ENOMEM;
03ecc32c 1996 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1997 if (!context)
55d94043 1998 goto out_unlock;
ba395927 1999
55d94043
JR
2000 ret = 0;
2001 if (context_present(context))
2002 goto out_unlock;
cf484d0e 2003
aec0e861
XP
2004 /*
2005 * For kdump cases, old valid entries may be cached due to the
2006 * in-flight DMA and copied pgtable, but there is no unmapping
2007 * behaviour for them, thus we need an explicit cache flush for
2008 * the newly-mapped device. For kdump, at this point, the device
2009 * is supposed to finish reset at its driver probe stage, so no
2010 * in-flight DMA will exist, and we don't need to worry anymore
2011 * hereafter.
2012 */
2013 if (context_copied(context)) {
2014 u16 did_old = context_domain_id(context);
2015
b117e038 2016 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2017 iommu->flush.flush_context(iommu, did_old,
2018 (((u16)bus) << 8) | devfn,
2019 DMA_CCMD_MASK_NOBIT,
2020 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2021 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2022 DMA_TLB_DSI_FLUSH);
2023 }
aec0e861
XP
2024 }
2025
de24e553 2026 context_clear_entry(context);
ea6606b0 2027
7373a8cc
LB
2028 if (sm_supported(iommu)) {
2029 unsigned long pds;
4ed0d3e6 2030
7373a8cc
LB
2031 WARN_ON(!table);
2032
2033 /* Setup the PASID DIR pointer: */
2034 pds = context_get_sm_pds(table);
2035 context->lo = (u64)virt_to_phys(table->table) |
2036 context_pdts(pds);
2037
2038 /* Setup the RID_PASID field: */
2039 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2040
de24e553 2041 /*
7373a8cc
LB
2042 * Setup the Device-TLB enable bit and Page request
2043 * Enable bit:
de24e553 2044 */
7373a8cc
LB
2045 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2046 if (info && info->ats_supported)
2047 context_set_sm_dte(context);
2048 if (info && info->pri_supported)
2049 context_set_sm_pre(context);
2050 } else {
2051 struct dma_pte *pgd = domain->pgd;
2052 int agaw;
2053
2054 context_set_domain_id(context, did);
7373a8cc
LB
2055
2056 if (translation != CONTEXT_TT_PASS_THROUGH) {
2057 /*
2058 * Skip top levels of page tables for iommu which has
2059 * less agaw than default. Unnecessary for PT mode.
2060 */
2061 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2062 ret = -ENOMEM;
2063 pgd = phys_to_virt(dma_pte_addr(pgd));
2064 if (!dma_pte_present(pgd))
2065 goto out_unlock;
2066 }
2067
2068 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2069 if (info && info->ats_supported)
2070 translation = CONTEXT_TT_DEV_IOTLB;
2071 else
2072 translation = CONTEXT_TT_MULTI_LEVEL;
2073
2074 context_set_address_root(context, virt_to_phys(pgd));
2075 context_set_address_width(context, agaw);
2076 } else {
2077 /*
2078 * In pass through mode, AW must be programmed to
2079 * indicate the largest AGAW value supported by
2080 * hardware. And ASR is ignored by hardware.
2081 */
2082 context_set_address_width(context, iommu->msagaw);
2083 }
41b80db2
LB
2084
2085 context_set_translation_type(context, translation);
93a23a72 2086 }
4ed0d3e6 2087
c07e7d21
MM
2088 context_set_fault_enable(context);
2089 context_set_present(context);
5331fe6f 2090 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2091
4c25a2c1
DW
2092 /*
2093 * It's a non-present to present mapping. If hardware doesn't cache
2094 * non-present entry we only need to flush the write-buffer. If the
2095 * _does_ cache non-present entries, then it does so in the special
2096 * domain #0, which we have to flush:
2097 */
2098 if (cap_caching_mode(iommu->cap)) {
2099 iommu->flush.flush_context(iommu, 0,
2100 (((u16)bus) << 8) | devfn,
2101 DMA_CCMD_MASK_NOBIT,
2102 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2103 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2104 } else {
ba395927 2105 iommu_flush_write_buffer(iommu);
4c25a2c1 2106 }
93a23a72 2107 iommu_enable_dev_iotlb(info);
c7151a8d 2108
55d94043
JR
2109 ret = 0;
2110
2111out_unlock:
2112 spin_unlock(&iommu->lock);
2113 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2114
5c365d18 2115 return ret;
ba395927
KA
2116}
2117
579305f7
AW
2118struct domain_context_mapping_data {
2119 struct dmar_domain *domain;
2120 struct intel_iommu *iommu;
ca6e322d 2121 struct pasid_table *table;
579305f7
AW
2122};
2123
2124static int domain_context_mapping_cb(struct pci_dev *pdev,
2125 u16 alias, void *opaque)
2126{
2127 struct domain_context_mapping_data *data = opaque;
2128
2129 return domain_context_mapping_one(data->domain, data->iommu,
ca6e322d
LB
2130 data->table, PCI_BUS_NUM(alias),
2131 alias & 0xff);
579305f7
AW
2132}
2133
ba395927 2134static int
28ccce0d 2135domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2136{
ca6e322d
LB
2137 struct domain_context_mapping_data data;
2138 struct pasid_table *table;
64ae892b 2139 struct intel_iommu *iommu;
156baca8 2140 u8 bus, devfn;
64ae892b 2141
e1f167f3 2142 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2143 if (!iommu)
2144 return -ENODEV;
ba395927 2145
ca6e322d
LB
2146 table = intel_pasid_get_table(dev);
2147
579305f7 2148 if (!dev_is_pci(dev))
ca6e322d
LB
2149 return domain_context_mapping_one(domain, iommu, table,
2150 bus, devfn);
579305f7
AW
2151
2152 data.domain = domain;
2153 data.iommu = iommu;
ca6e322d 2154 data.table = table;
579305f7
AW
2155
2156 return pci_for_each_dma_alias(to_pci_dev(dev),
2157 &domain_context_mapping_cb, &data);
2158}
2159
2160static int domain_context_mapped_cb(struct pci_dev *pdev,
2161 u16 alias, void *opaque)
2162{
2163 struct intel_iommu *iommu = opaque;
2164
2165 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2166}
2167
e1f167f3 2168static int domain_context_mapped(struct device *dev)
ba395927 2169{
5331fe6f 2170 struct intel_iommu *iommu;
156baca8 2171 u8 bus, devfn;
5331fe6f 2172
e1f167f3 2173 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2174 if (!iommu)
2175 return -ENODEV;
ba395927 2176
579305f7
AW
2177 if (!dev_is_pci(dev))
2178 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2179
579305f7
AW
2180 return !pci_for_each_dma_alias(to_pci_dev(dev),
2181 domain_context_mapped_cb, iommu);
ba395927
KA
2182}
2183
f532959b
FY
2184/* Returns a number of VTD pages, but aligned to MM page size */
2185static inline unsigned long aligned_nrpages(unsigned long host_addr,
2186 size_t size)
2187{
2188 host_addr &= ~PAGE_MASK;
2189 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2190}
2191
6dd9a7c7
YS
2192/* Return largest possible superpage level for a given mapping */
2193static inline int hardware_largepage_caps(struct dmar_domain *domain,
2194 unsigned long iov_pfn,
2195 unsigned long phy_pfn,
2196 unsigned long pages)
2197{
2198 int support, level = 1;
2199 unsigned long pfnmerge;
2200
2201 support = domain->iommu_superpage;
2202
2203 /* To use a large page, the virtual *and* physical addresses
2204 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2205 of them will mean we have to use smaller pages. So just
2206 merge them and check both at once. */
2207 pfnmerge = iov_pfn | phy_pfn;
2208
2209 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2210 pages >>= VTD_STRIDE_SHIFT;
2211 if (!pages)
2212 break;
2213 pfnmerge >>= VTD_STRIDE_SHIFT;
2214 level++;
2215 support--;
2216 }
2217 return level;
2218}
2219
9051aa02
DW
2220static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2221 struct scatterlist *sg, unsigned long phys_pfn,
2222 unsigned long nr_pages, int prot)
e1605495
DW
2223{
2224 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2225 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2226 unsigned long sg_res = 0;
6dd9a7c7
YS
2227 unsigned int largepage_lvl = 0;
2228 unsigned long lvl_pages = 0;
e1605495 2229
162d1b10 2230 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2231
2232 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2233 return -EINVAL;
2234
2235 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2236
cc4f14aa
JL
2237 if (!sg) {
2238 sg_res = nr_pages;
9051aa02
DW
2239 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2240 }
2241
6dd9a7c7 2242 while (nr_pages > 0) {
c85994e4
DW
2243 uint64_t tmp;
2244
e1605495 2245 if (!sg_res) {
29a90b70
RM
2246 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2247
f532959b 2248 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2249 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2250 sg->dma_length = sg->length;
29a90b70 2251 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2252 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2253 }
6dd9a7c7 2254
e1605495 2255 if (!pte) {
6dd9a7c7
YS
2256 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2257
5cf0a76f 2258 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2259 if (!pte)
2260 return -ENOMEM;
6dd9a7c7 2261 /* It is large page*/
6491d4d0 2262 if (largepage_lvl > 1) {
ba2374fd
CZ
2263 unsigned long nr_superpages, end_pfn;
2264
6dd9a7c7 2265 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2266 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2267
2268 nr_superpages = sg_res / lvl_pages;
2269 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2270
d41a4adb
JL
2271 /*
2272 * Ensure that old small page tables are
ba2374fd 2273 * removed to make room for superpage(s).
bc24c571
DD
2274 * We're adding new large pages, so make sure
2275 * we don't remove their parent tables.
d41a4adb 2276 */
bc24c571
DD
2277 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2278 largepage_lvl + 1);
6491d4d0 2279 } else {
6dd9a7c7 2280 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2281 }
6dd9a7c7 2282
e1605495
DW
2283 }
2284 /* We don't need lock here, nobody else
2285 * touches the iova range
2286 */
7766a3fb 2287 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2288 if (tmp) {
1bf20f0d 2289 static int dumps = 5;
9f10e5bf
JR
2290 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2291 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2292 if (dumps) {
2293 dumps--;
2294 debug_dma_dump_mappings(NULL);
2295 }
2296 WARN_ON(1);
2297 }
6dd9a7c7
YS
2298
2299 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2300
2301 BUG_ON(nr_pages < lvl_pages);
2302 BUG_ON(sg_res < lvl_pages);
2303
2304 nr_pages -= lvl_pages;
2305 iov_pfn += lvl_pages;
2306 phys_pfn += lvl_pages;
2307 pteval += lvl_pages * VTD_PAGE_SIZE;
2308 sg_res -= lvl_pages;
2309
2310 /* If the next PTE would be the first in a new page, then we
2311 need to flush the cache on the entries we've just written.
2312 And then we'll need to recalculate 'pte', so clear it and
2313 let it get set again in the if (!pte) block above.
2314
2315 If we're done (!nr_pages) we need to flush the cache too.
2316
2317 Also if we've been setting superpages, we may need to
2318 recalculate 'pte' and switch back to smaller pages for the
2319 end of the mapping, if the trailing size is not enough to
2320 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2321 pte++;
6dd9a7c7
YS
2322 if (!nr_pages || first_pte_in_page(pte) ||
2323 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2324 domain_flush_cache(domain, first_pte,
2325 (void *)pte - (void *)first_pte);
2326 pte = NULL;
2327 }
6dd9a7c7
YS
2328
2329 if (!sg_res && nr_pages)
e1605495
DW
2330 sg = sg_next(sg);
2331 }
2332 return 0;
2333}
2334
87684fd9 2335static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2336 struct scatterlist *sg, unsigned long phys_pfn,
2337 unsigned long nr_pages, int prot)
2338{
2339 int ret;
2340 struct intel_iommu *iommu;
2341
2342 /* Do the real mapping first */
2343 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2344 if (ret)
2345 return ret;
2346
2347 /* Notify about the new mapping */
2348 if (domain_type_is_vm(domain)) {
2349 /* VM typed domains can have more than one IOMMUs */
2350 int iommu_id;
2351
2352 for_each_domain_iommu(iommu_id, domain) {
2353 iommu = g_iommus[iommu_id];
2354 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2355 }
2356 } else {
2357 /* General domains only have one IOMMU */
2358 iommu = domain_get_iommu(domain);
2359 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2360 }
2361
2362 return 0;
87684fd9
PX
2363}
2364
9051aa02
DW
2365static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366 struct scatterlist *sg, unsigned long nr_pages,
2367 int prot)
ba395927 2368{
87684fd9 2369 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2370}
6f6a00e4 2371
9051aa02
DW
2372static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2373 unsigned long phys_pfn, unsigned long nr_pages,
2374 int prot)
2375{
87684fd9 2376 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2377}
2378
2452d9db 2379static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2380{
5082219b
FS
2381 unsigned long flags;
2382 struct context_entry *context;
2383 u16 did_old;
2384
c7151a8d
WH
2385 if (!iommu)
2386 return;
8c11e798 2387
5082219b
FS
2388 spin_lock_irqsave(&iommu->lock, flags);
2389 context = iommu_context_addr(iommu, bus, devfn, 0);
2390 if (!context) {
2391 spin_unlock_irqrestore(&iommu->lock, flags);
2392 return;
2393 }
2394 did_old = context_domain_id(context);
2395 context_clear_entry(context);
2396 __iommu_flush_cache(iommu, context, sizeof(*context));
2397 spin_unlock_irqrestore(&iommu->lock, flags);
2398 iommu->flush.flush_context(iommu,
2399 did_old,
2400 (((u16)bus) << 8) | devfn,
2401 DMA_CCMD_MASK_NOBIT,
2402 DMA_CCMD_DEVICE_INVL);
2403 iommu->flush.flush_iotlb(iommu,
2404 did_old,
2405 0,
2406 0,
2407 DMA_TLB_DSI_FLUSH);
ba395927
KA
2408}
2409
109b9b04
DW
2410static inline void unlink_domain_info(struct device_domain_info *info)
2411{
2412 assert_spin_locked(&device_domain_lock);
2413 list_del(&info->link);
2414 list_del(&info->global);
2415 if (info->dev)
0bcb3e28 2416 info->dev->archdata.iommu = NULL;
109b9b04
DW
2417}
2418
ba395927
KA
2419static void domain_remove_dev_info(struct dmar_domain *domain)
2420{
3a74ca01 2421 struct device_domain_info *info, *tmp;
fb170fb4 2422 unsigned long flags;
ba395927
KA
2423
2424 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2425 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2426 __dmar_remove_one_dev_info(info);
ba395927
KA
2427 spin_unlock_irqrestore(&device_domain_lock, flags);
2428}
2429
2430/*
2431 * find_domain
1525a29a 2432 * Note: we use struct device->archdata.iommu stores the info
ba395927 2433 */
1525a29a 2434static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2435{
2436 struct device_domain_info *info;
2437
2438 /* No lock here, assumes no domain exit in normal case */
1525a29a 2439 info = dev->archdata.iommu;
b316d02a 2440 if (likely(info))
ba395927
KA
2441 return info->domain;
2442 return NULL;
2443}
2444
5a8f40e8 2445static inline struct device_domain_info *
745f2586
JL
2446dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2447{
2448 struct device_domain_info *info;
2449
2450 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2451 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2452 info->devfn == devfn)
5a8f40e8 2453 return info;
745f2586
JL
2454
2455 return NULL;
2456}
2457
5db31569
JR
2458static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2459 int bus, int devfn,
2460 struct device *dev,
2461 struct dmar_domain *domain)
745f2586 2462{
5a8f40e8 2463 struct dmar_domain *found = NULL;
745f2586
JL
2464 struct device_domain_info *info;
2465 unsigned long flags;
d160aca5 2466 int ret;
745f2586
JL
2467
2468 info = alloc_devinfo_mem();
2469 if (!info)
b718cd3d 2470 return NULL;
745f2586 2471
745f2586
JL
2472 info->bus = bus;
2473 info->devfn = devfn;
b16d0cb9
DW
2474 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2475 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2476 info->ats_qdep = 0;
745f2586
JL
2477 info->dev = dev;
2478 info->domain = domain;
5a8f40e8 2479 info->iommu = iommu;
cc580e41 2480 info->pasid_table = NULL;
95587a75 2481 info->auxd_enabled = 0;
67b8e02b 2482 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2483
b16d0cb9
DW
2484 if (dev && dev_is_pci(dev)) {
2485 struct pci_dev *pdev = to_pci_dev(info->dev);
2486
d8b85910
LB
2487 if (!pdev->untrusted &&
2488 !pci_ats_disabled() &&
cef74409 2489 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2490 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2491 dmar_find_matched_atsr_unit(pdev))
2492 info->ats_supported = 1;
2493
765b6a98
LB
2494 if (sm_supported(iommu)) {
2495 if (pasid_supported(iommu)) {
b16d0cb9
DW
2496 int features = pci_pasid_features(pdev);
2497 if (features >= 0)
2498 info->pasid_supported = features | 1;
2499 }
2500
2501 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2502 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2503 info->pri_supported = 1;
2504 }
2505 }
2506
745f2586
JL
2507 spin_lock_irqsave(&device_domain_lock, flags);
2508 if (dev)
0bcb3e28 2509 found = find_domain(dev);
f303e507
JR
2510
2511 if (!found) {
5a8f40e8 2512 struct device_domain_info *info2;
41e80dca 2513 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2514 if (info2) {
2515 found = info2->domain;
2516 info2->dev = dev;
2517 }
5a8f40e8 2518 }
f303e507 2519
745f2586
JL
2520 if (found) {
2521 spin_unlock_irqrestore(&device_domain_lock, flags);
2522 free_devinfo_mem(info);
b718cd3d
DW
2523 /* Caller must free the original domain */
2524 return found;
745f2586
JL
2525 }
2526
d160aca5
JR
2527 spin_lock(&iommu->lock);
2528 ret = domain_attach_iommu(domain, iommu);
2529 spin_unlock(&iommu->lock);
2530
2531 if (ret) {
c6c2cebd 2532 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2533 free_devinfo_mem(info);
c6c2cebd
JR
2534 return NULL;
2535 }
c6c2cebd 2536
b718cd3d
DW
2537 list_add(&info->link, &domain->devices);
2538 list_add(&info->global, &device_domain_list);
2539 if (dev)
2540 dev->archdata.iommu = info;
0bbeb01a 2541 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2542
0bbeb01a
LB
2543 /* PASID table is mandatory for a PCI device in scalable mode. */
2544 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2545 ret = intel_pasid_alloc_table(dev);
2546 if (ret) {
932a6523 2547 dev_err(dev, "PASID table allocation failed\n");
71753239 2548 dmar_remove_one_dev_info(dev);
0bbeb01a 2549 return NULL;
a7fc93fe 2550 }
ef848b7e
LB
2551
2552 /* Setup the PASID entry for requests without PASID: */
2553 spin_lock(&iommu->lock);
2554 if (hw_pass_through && domain_type_is_si(domain))
2555 ret = intel_pasid_setup_pass_through(iommu, domain,
2556 dev, PASID_RID2PASID);
2557 else
2558 ret = intel_pasid_setup_second_level(iommu, domain,
2559 dev, PASID_RID2PASID);
2560 spin_unlock(&iommu->lock);
2561 if (ret) {
932a6523 2562 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2563 dmar_remove_one_dev_info(dev);
ef848b7e 2564 return NULL;
a7fc93fe
LB
2565 }
2566 }
b718cd3d 2567
cc4e2575 2568 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2569 dev_err(dev, "Domain context map failed\n");
71753239 2570 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2571 return NULL;
2572 }
2573
b718cd3d 2574 return domain;
745f2586
JL
2575}
2576
579305f7
AW
2577static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2578{
2579 *(u16 *)opaque = alias;
2580 return 0;
2581}
2582
76208356 2583static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2584{
e083ea5b 2585 struct device_domain_info *info;
76208356 2586 struct dmar_domain *domain = NULL;
579305f7 2587 struct intel_iommu *iommu;
fcc35c63 2588 u16 dma_alias;
ba395927 2589 unsigned long flags;
aa4d066a 2590 u8 bus, devfn;
ba395927 2591
579305f7
AW
2592 iommu = device_to_iommu(dev, &bus, &devfn);
2593 if (!iommu)
2594 return NULL;
2595
146922ec
DW
2596 if (dev_is_pci(dev)) {
2597 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2598
579305f7
AW
2599 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2600
2601 spin_lock_irqsave(&device_domain_lock, flags);
2602 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2603 PCI_BUS_NUM(dma_alias),
2604 dma_alias & 0xff);
2605 if (info) {
2606 iommu = info->iommu;
2607 domain = info->domain;
5a8f40e8 2608 }
579305f7 2609 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2610
76208356 2611 /* DMA alias already has a domain, use it */
579305f7 2612 if (info)
76208356 2613 goto out;
579305f7 2614 }
ba395927 2615
146922ec 2616 /* Allocate and initialize new domain for the device */
ab8dfe25 2617 domain = alloc_domain(0);
745f2586 2618 if (!domain)
579305f7 2619 return NULL;
dc534b25 2620 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2621 domain_exit(domain);
2622 return NULL;
2c2e2c38 2623 }
ba395927 2624
76208356 2625out:
579305f7 2626
76208356
JR
2627 return domain;
2628}
579305f7 2629
76208356
JR
2630static struct dmar_domain *set_domain_for_dev(struct device *dev,
2631 struct dmar_domain *domain)
2632{
2633 struct intel_iommu *iommu;
2634 struct dmar_domain *tmp;
2635 u16 req_id, dma_alias;
2636 u8 bus, devfn;
2637
2638 iommu = device_to_iommu(dev, &bus, &devfn);
2639 if (!iommu)
2640 return NULL;
2641
2642 req_id = ((u16)bus << 8) | devfn;
2643
2644 if (dev_is_pci(dev)) {
2645 struct pci_dev *pdev = to_pci_dev(dev);
2646
2647 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2648
2649 /* register PCI DMA alias device */
2650 if (req_id != dma_alias) {
2651 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2652 dma_alias & 0xff, NULL, domain);
2653
2654 if (!tmp || tmp != domain)
2655 return tmp;
2656 }
ba395927
KA
2657 }
2658
5db31569 2659 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2660 if (!tmp || tmp != domain)
2661 return tmp;
2662
2663 return domain;
2664}
579305f7 2665
76208356
JR
2666static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2667{
2668 struct dmar_domain *domain, *tmp;
2669
2670 domain = find_domain(dev);
2671 if (domain)
2672 goto out;
2673
2674 domain = find_or_alloc_domain(dev, gaw);
2675 if (!domain)
2676 goto out;
2677
2678 tmp = set_domain_for_dev(dev, domain);
2679 if (!tmp || domain != tmp) {
579305f7
AW
2680 domain_exit(domain);
2681 domain = tmp;
2682 }
b718cd3d 2683
76208356
JR
2684out:
2685
b718cd3d 2686 return domain;
ba395927
KA
2687}
2688
b213203e
DW
2689static int iommu_domain_identity_map(struct dmar_domain *domain,
2690 unsigned long long start,
2691 unsigned long long end)
ba395927 2692{
c5395d5c
DW
2693 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2694 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2695
2696 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2697 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2698 pr_err("Reserving iova failed\n");
b213203e 2699 return -ENOMEM;
ba395927
KA
2700 }
2701
af1089ce 2702 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2703 /*
2704 * RMRR range might have overlap with physical memory range,
2705 * clear it first
2706 */
c5395d5c 2707 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2708
87684fd9
PX
2709 return __domain_mapping(domain, first_vpfn, NULL,
2710 first_vpfn, last_vpfn - first_vpfn + 1,
2711 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2712}
2713
d66ce54b
JR
2714static int domain_prepare_identity_map(struct device *dev,
2715 struct dmar_domain *domain,
2716 unsigned long long start,
2717 unsigned long long end)
b213203e 2718{
19943b0e
DW
2719 /* For _hardware_ passthrough, don't bother. But for software
2720 passthrough, we do it anyway -- it may indicate a memory
2721 range which is reserved in E820, so which didn't get set
2722 up to start with in si_domain */
2723 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2724 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2725 start, end);
19943b0e
DW
2726 return 0;
2727 }
2728
932a6523 2729 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2730
5595b528
DW
2731 if (end < start) {
2732 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2733 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2734 dmi_get_system_info(DMI_BIOS_VENDOR),
2735 dmi_get_system_info(DMI_BIOS_VERSION),
2736 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2737 return -EIO;
5595b528
DW
2738 }
2739
2ff729f5
DW
2740 if (end >> agaw_to_width(domain->agaw)) {
2741 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2742 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2743 agaw_to_width(domain->agaw),
2744 dmi_get_system_info(DMI_BIOS_VENDOR),
2745 dmi_get_system_info(DMI_BIOS_VERSION),
2746 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2747 return -EIO;
2ff729f5 2748 }
19943b0e 2749
d66ce54b
JR
2750 return iommu_domain_identity_map(domain, start, end);
2751}
ba395927 2752
d66ce54b
JR
2753static int iommu_prepare_identity_map(struct device *dev,
2754 unsigned long long start,
2755 unsigned long long end)
2756{
2757 struct dmar_domain *domain;
2758 int ret;
2759
2760 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2761 if (!domain)
2762 return -ENOMEM;
2763
2764 ret = domain_prepare_identity_map(dev, domain, start, end);
2765 if (ret)
2766 domain_exit(domain);
b213203e 2767
ba395927 2768 return ret;
ba395927
KA
2769}
2770
2771static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2772 struct device *dev)
ba395927 2773{
0b9d9753 2774 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2775 return 0;
0b9d9753
DW
2776 return iommu_prepare_identity_map(dev, rmrr->base_address,
2777 rmrr->end_address);
ba395927
KA
2778}
2779
d3f13810 2780#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2781static inline void iommu_prepare_isa(void)
2782{
2783 struct pci_dev *pdev;
2784 int ret;
2785
2786 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2787 if (!pdev)
2788 return;
2789
9f10e5bf 2790 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2791 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2792
2793 if (ret)
9f10e5bf 2794 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2795
9b27e82d 2796 pci_dev_put(pdev);
49a0429e
KA
2797}
2798#else
2799static inline void iommu_prepare_isa(void)
2800{
2801 return;
2802}
d3f13810 2803#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2804
2c2e2c38 2805static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2806
071e1374 2807static int __init si_domain_init(int hw)
2c2e2c38 2808{
e083ea5b 2809 int nid, ret;
2c2e2c38 2810
ab8dfe25 2811 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2812 if (!si_domain)
2813 return -EFAULT;
2814
2c2e2c38
FY
2815 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2816 domain_exit(si_domain);
2817 return -EFAULT;
2818 }
2819
0dc79715 2820 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2821
19943b0e
DW
2822 if (hw)
2823 return 0;
2824
c7ab48d2 2825 for_each_online_node(nid) {
5dfe8660
TH
2826 unsigned long start_pfn, end_pfn;
2827 int i;
2828
2829 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2830 ret = iommu_domain_identity_map(si_domain,
2831 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2832 if (ret)
2833 return ret;
2834 }
c7ab48d2
DW
2835 }
2836
2c2e2c38
FY
2837 return 0;
2838}
2839
9b226624 2840static int identity_mapping(struct device *dev)
2c2e2c38
FY
2841{
2842 struct device_domain_info *info;
2843
2844 if (likely(!iommu_identity_mapping))
2845 return 0;
2846
9b226624 2847 info = dev->archdata.iommu;
cb452a40
MT
2848 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2849 return (info->domain == si_domain);
2c2e2c38 2850
2c2e2c38
FY
2851 return 0;
2852}
2853
28ccce0d 2854static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2855{
0ac72664 2856 struct dmar_domain *ndomain;
5a8f40e8 2857 struct intel_iommu *iommu;
156baca8 2858 u8 bus, devfn;
2c2e2c38 2859
5913c9bf 2860 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2861 if (!iommu)
2862 return -ENODEV;
2863
5db31569 2864 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2865 if (ndomain != domain)
2866 return -EBUSY;
2c2e2c38
FY
2867
2868 return 0;
2869}
2870
0b9d9753 2871static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2872{
2873 struct dmar_rmrr_unit *rmrr;
832bd858 2874 struct device *tmp;
ea2447f7
TM
2875 int i;
2876
0e242612 2877 rcu_read_lock();
ea2447f7 2878 for_each_rmrr_units(rmrr) {
b683b230
JL
2879 /*
2880 * Return TRUE if this RMRR contains the device that
2881 * is passed in.
2882 */
2883 for_each_active_dev_scope(rmrr->devices,
2884 rmrr->devices_cnt, i, tmp)
0b9d9753 2885 if (tmp == dev) {
0e242612 2886 rcu_read_unlock();
ea2447f7 2887 return true;
b683b230 2888 }
ea2447f7 2889 }
0e242612 2890 rcu_read_unlock();
ea2447f7
TM
2891 return false;
2892}
2893
c875d2c1
AW
2894/*
2895 * There are a couple cases where we need to restrict the functionality of
2896 * devices associated with RMRRs. The first is when evaluating a device for
2897 * identity mapping because problems exist when devices are moved in and out
2898 * of domains and their respective RMRR information is lost. This means that
2899 * a device with associated RMRRs will never be in a "passthrough" domain.
2900 * The second is use of the device through the IOMMU API. This interface
2901 * expects to have full control of the IOVA space for the device. We cannot
2902 * satisfy both the requirement that RMRR access is maintained and have an
2903 * unencumbered IOVA space. We also have no ability to quiesce the device's
2904 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2905 * We therefore prevent devices associated with an RMRR from participating in
2906 * the IOMMU API, which eliminates them from device assignment.
2907 *
2908 * In both cases we assume that PCI USB devices with RMRRs have them largely
2909 * for historical reasons and that the RMRR space is not actively used post
2910 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2911 *
2912 * The same exception is made for graphics devices, with the requirement that
2913 * any use of the RMRR regions will be torn down before assigning the device
2914 * to a guest.
c875d2c1
AW
2915 */
2916static bool device_is_rmrr_locked(struct device *dev)
2917{
2918 if (!device_has_rmrr(dev))
2919 return false;
2920
2921 if (dev_is_pci(dev)) {
2922 struct pci_dev *pdev = to_pci_dev(dev);
2923
18436afd 2924 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2925 return false;
2926 }
2927
2928 return true;
2929}
2930
3bdb2591 2931static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2932{
3bdb2591
DW
2933 if (dev_is_pci(dev)) {
2934 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2935
c875d2c1 2936 if (device_is_rmrr_locked(dev))
3bdb2591 2937 return 0;
e0fc7e0b 2938
89a6079d
LB
2939 /*
2940 * Prevent any device marked as untrusted from getting
2941 * placed into the statically identity mapping domain.
2942 */
2943 if (pdev->untrusted)
2944 return 0;
2945
3bdb2591
DW
2946 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2947 return 1;
e0fc7e0b 2948
3bdb2591
DW
2949 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2950 return 1;
6941af28 2951
3bdb2591 2952 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2953 return 0;
3bdb2591
DW
2954
2955 /*
2956 * We want to start off with all devices in the 1:1 domain, and
2957 * take them out later if we find they can't access all of memory.
2958 *
2959 * However, we can't do this for PCI devices behind bridges,
2960 * because all PCI devices behind the same bridge will end up
2961 * with the same source-id on their transactions.
2962 *
2963 * Practically speaking, we can't change things around for these
2964 * devices at run-time, because we can't be sure there'll be no
2965 * DMA transactions in flight for any of their siblings.
2966 *
2967 * So PCI devices (unless they're on the root bus) as well as
2968 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2969 * the 1:1 domain, just in _case_ one of their siblings turns out
2970 * not to be able to map all of memory.
2971 */
2972 if (!pci_is_pcie(pdev)) {
2973 if (!pci_is_root_bus(pdev->bus))
2974 return 0;
2975 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2976 return 0;
2977 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2978 return 0;
3bdb2591
DW
2979 } else {
2980 if (device_has_rmrr(dev))
2981 return 0;
2982 }
3dfc813d 2983
3bdb2591 2984 /*
3dfc813d 2985 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2986 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2987 * take them out of the 1:1 domain later.
2988 */
8fcc5372
CW
2989 if (!startup) {
2990 /*
2991 * If the device's dma_mask is less than the system's memory
2992 * size then this is not a candidate for identity mapping.
2993 */
3bdb2591 2994 u64 dma_mask = *dev->dma_mask;
8fcc5372 2995
3bdb2591
DW
2996 if (dev->coherent_dma_mask &&
2997 dev->coherent_dma_mask < dma_mask)
2998 dma_mask = dev->coherent_dma_mask;
8fcc5372 2999
3bdb2591 3000 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 3001 }
6941af28
DW
3002
3003 return 1;
3004}
3005
cf04eee8
DW
3006static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
3007{
3008 int ret;
3009
3010 if (!iommu_should_identity_map(dev, 1))
3011 return 0;
3012
28ccce0d 3013 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 3014 if (!ret)
932a6523
BH
3015 dev_info(dev, "%s identity mapping\n",
3016 hw ? "Hardware" : "Software");
cf04eee8
DW
3017 else if (ret == -ENODEV)
3018 /* device not associated with an iommu */
3019 ret = 0;
3020
3021 return ret;
3022}
3023
3024
071e1374 3025static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 3026{
2c2e2c38 3027 struct pci_dev *pdev = NULL;
cf04eee8 3028 struct dmar_drhd_unit *drhd;
d3ed71e5
QC
3029 /* To avoid a -Wunused-but-set-variable warning. */
3030 struct intel_iommu *iommu __maybe_unused;
cf04eee8
DW
3031 struct device *dev;
3032 int i;
3033 int ret = 0;
2c2e2c38 3034
2c2e2c38 3035 for_each_pci_dev(pdev) {
cf04eee8
DW
3036 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3037 if (ret)
3038 return ret;
3039 }
3040
3041 for_each_active_iommu(iommu, drhd)
3042 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3043 struct acpi_device_physical_node *pn;
3044 struct acpi_device *adev;
3045
3046 if (dev->bus != &acpi_bus_type)
3047 continue;
86080ccc 3048
cf04eee8
DW
3049 adev= to_acpi_device(dev);
3050 mutex_lock(&adev->physical_node_lock);
3051 list_for_each_entry(pn, &adev->physical_node_list, node) {
3052 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3053 if (ret)
3054 break;
eae460b6 3055 }
cf04eee8
DW
3056 mutex_unlock(&adev->physical_node_lock);
3057 if (ret)
3058 return ret;
62edf5dc 3059 }
2c2e2c38
FY
3060
3061 return 0;
3062}
3063
ffebeb46
JL
3064static void intel_iommu_init_qi(struct intel_iommu *iommu)
3065{
3066 /*
3067 * Start from the sane iommu hardware state.
3068 * If the queued invalidation is already initialized by us
3069 * (for example, while enabling interrupt-remapping) then
3070 * we got the things already rolling from a sane state.
3071 */
3072 if (!iommu->qi) {
3073 /*
3074 * Clear any previous faults.
3075 */
3076 dmar_fault(-1, iommu);
3077 /*
3078 * Disable queued invalidation if supported and already enabled
3079 * before OS handover.
3080 */
3081 dmar_disable_qi(iommu);
3082 }
3083
3084 if (dmar_enable_qi(iommu)) {
3085 /*
3086 * Queued Invalidate not enabled, use Register Based Invalidate
3087 */
3088 iommu->flush.flush_context = __iommu_flush_context;
3089 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3090 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3091 iommu->name);
3092 } else {
3093 iommu->flush.flush_context = qi_flush_context;
3094 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3095 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3096 }
3097}
3098
091d42e4 3099static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3100 struct root_entry *old_re,
091d42e4
JR
3101 struct context_entry **tbl,
3102 int bus, bool ext)
3103{
dbcd861f 3104 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3105 struct context_entry *new_ce = NULL, ce;
dfddb969 3106 struct context_entry *old_ce = NULL;
543c8dcf 3107 struct root_entry re;
091d42e4
JR
3108 phys_addr_t old_ce_phys;
3109
3110 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3111 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3112
3113 for (devfn = 0; devfn < 256; devfn++) {
3114 /* First calculate the correct index */
3115 idx = (ext ? devfn * 2 : devfn) % 256;
3116
3117 if (idx == 0) {
3118 /* First save what we may have and clean up */
3119 if (new_ce) {
3120 tbl[tbl_idx] = new_ce;
3121 __iommu_flush_cache(iommu, new_ce,
3122 VTD_PAGE_SIZE);
3123 pos = 1;
3124 }
3125
3126 if (old_ce)
829383e1 3127 memunmap(old_ce);
091d42e4
JR
3128
3129 ret = 0;
3130 if (devfn < 0x80)
543c8dcf 3131 old_ce_phys = root_entry_lctp(&re);
091d42e4 3132 else
543c8dcf 3133 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3134
3135 if (!old_ce_phys) {
3136 if (ext && devfn == 0) {
3137 /* No LCTP, try UCTP */
3138 devfn = 0x7f;
3139 continue;
3140 } else {
3141 goto out;
3142 }
3143 }
3144
3145 ret = -ENOMEM;
dfddb969
DW
3146 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3147 MEMREMAP_WB);
091d42e4
JR
3148 if (!old_ce)
3149 goto out;
3150
3151 new_ce = alloc_pgtable_page(iommu->node);
3152 if (!new_ce)
3153 goto out_unmap;
3154
3155 ret = 0;
3156 }
3157
3158 /* Now copy the context entry */
dfddb969 3159 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3160
cf484d0e 3161 if (!__context_present(&ce))
091d42e4
JR
3162 continue;
3163
dbcd861f
JR
3164 did = context_domain_id(&ce);
3165 if (did >= 0 && did < cap_ndoms(iommu->cap))
3166 set_bit(did, iommu->domain_ids);
3167
cf484d0e
JR
3168 /*
3169 * We need a marker for copied context entries. This
3170 * marker needs to work for the old format as well as
3171 * for extended context entries.
3172 *
3173 * Bit 67 of the context entry is used. In the old
3174 * format this bit is available to software, in the
3175 * extended format it is the PGE bit, but PGE is ignored
3176 * by HW if PASIDs are disabled (and thus still
3177 * available).
3178 *
3179 * So disable PASIDs first and then mark the entry
3180 * copied. This means that we don't copy PASID
3181 * translations from the old kernel, but this is fine as
3182 * faults there are not fatal.
3183 */
3184 context_clear_pasid_enable(&ce);
3185 context_set_copied(&ce);
3186
091d42e4
JR
3187 new_ce[idx] = ce;
3188 }
3189
3190 tbl[tbl_idx + pos] = new_ce;
3191
3192 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3193
3194out_unmap:
dfddb969 3195 memunmap(old_ce);
091d42e4
JR
3196
3197out:
3198 return ret;
3199}
3200
3201static int copy_translation_tables(struct intel_iommu *iommu)
3202{
3203 struct context_entry **ctxt_tbls;
dfddb969 3204 struct root_entry *old_rt;
091d42e4
JR
3205 phys_addr_t old_rt_phys;
3206 int ctxt_table_entries;
3207 unsigned long flags;
3208 u64 rtaddr_reg;
3209 int bus, ret;
c3361f2f 3210 bool new_ext, ext;
091d42e4
JR
3211
3212 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3213 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3214 new_ext = !!ecap_ecs(iommu->ecap);
3215
3216 /*
3217 * The RTT bit can only be changed when translation is disabled,
3218 * but disabling translation means to open a window for data
3219 * corruption. So bail out and don't copy anything if we would
3220 * have to change the bit.
3221 */
3222 if (new_ext != ext)
3223 return -EINVAL;
091d42e4
JR
3224
3225 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3226 if (!old_rt_phys)
3227 return -EINVAL;
3228
dfddb969 3229 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3230 if (!old_rt)
3231 return -ENOMEM;
3232
3233 /* This is too big for the stack - allocate it from slab */
3234 ctxt_table_entries = ext ? 512 : 256;
3235 ret = -ENOMEM;
6396bb22 3236 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3237 if (!ctxt_tbls)
3238 goto out_unmap;
3239
3240 for (bus = 0; bus < 256; bus++) {
3241 ret = copy_context_table(iommu, &old_rt[bus],
3242 ctxt_tbls, bus, ext);
3243 if (ret) {
3244 pr_err("%s: Failed to copy context table for bus %d\n",
3245 iommu->name, bus);
3246 continue;
3247 }
3248 }
3249
3250 spin_lock_irqsave(&iommu->lock, flags);
3251
3252 /* Context tables are copied, now write them to the root_entry table */
3253 for (bus = 0; bus < 256; bus++) {
3254 int idx = ext ? bus * 2 : bus;
3255 u64 val;
3256
3257 if (ctxt_tbls[idx]) {
3258 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3259 iommu->root_entry[bus].lo = val;
3260 }
3261
3262 if (!ext || !ctxt_tbls[idx + 1])
3263 continue;
3264
3265 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3266 iommu->root_entry[bus].hi = val;
3267 }
3268
3269 spin_unlock_irqrestore(&iommu->lock, flags);
3270
3271 kfree(ctxt_tbls);
3272
3273 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3274
3275 ret = 0;
3276
3277out_unmap:
dfddb969 3278 memunmap(old_rt);
091d42e4
JR
3279
3280 return ret;
3281}
3282
b779260b 3283static int __init init_dmars(void)
ba395927
KA
3284{
3285 struct dmar_drhd_unit *drhd;
3286 struct dmar_rmrr_unit *rmrr;
a87f4918 3287 bool copied_tables = false;
832bd858 3288 struct device *dev;
ba395927 3289 struct intel_iommu *iommu;
13cf0174 3290 int i, ret;
2c2e2c38 3291
ba395927
KA
3292 /*
3293 * for each drhd
3294 * allocate root
3295 * initialize and program root entry to not present
3296 * endfor
3297 */
3298 for_each_drhd_unit(drhd) {
5e0d2a6f 3299 /*
3300 * lock not needed as this is only incremented in the single
3301 * threaded kernel __init code path all other access are read
3302 * only
3303 */
78d8e704 3304 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3305 g_num_of_iommus++;
3306 continue;
3307 }
9f10e5bf 3308 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3309 }
3310
ffebeb46
JL
3311 /* Preallocate enough resources for IOMMU hot-addition */
3312 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3313 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3314
d9630fe9
WH
3315 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3316 GFP_KERNEL);
3317 if (!g_iommus) {
9f10e5bf 3318 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3319 ret = -ENOMEM;
3320 goto error;
3321 }
3322
7c919779 3323 for_each_active_iommu(iommu, drhd) {
56283174
LB
3324 /*
3325 * Find the max pasid size of all IOMMU's in the system.
3326 * We need to ensure the system pasid table is no bigger
3327 * than the smallest supported.
3328 */
765b6a98 3329 if (pasid_supported(iommu)) {
56283174
LB
3330 u32 temp = 2 << ecap_pss(iommu->ecap);
3331
3332 intel_pasid_max_id = min_t(u32, temp,
3333 intel_pasid_max_id);
3334 }
3335
d9630fe9 3336 g_iommus[iommu->seq_id] = iommu;
ba395927 3337
b63d80d1
JR
3338 intel_iommu_init_qi(iommu);
3339
e61d98d8
SS
3340 ret = iommu_init_domains(iommu);
3341 if (ret)
989d51fc 3342 goto free_iommu;
e61d98d8 3343
4158c2ec
JR
3344 init_translation_status(iommu);
3345
091d42e4
JR
3346 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3347 iommu_disable_translation(iommu);
3348 clear_translation_pre_enabled(iommu);
3349 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3350 iommu->name);
3351 }
4158c2ec 3352
ba395927
KA
3353 /*
3354 * TBD:
3355 * we could share the same root & context tables
25985edc 3356 * among all IOMMU's. Need to Split it later.
ba395927
KA
3357 */
3358 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3359 if (ret)
989d51fc 3360 goto free_iommu;
5f0a7f76 3361
091d42e4
JR
3362 if (translation_pre_enabled(iommu)) {
3363 pr_info("Translation already enabled - trying to copy translation structures\n");
3364
3365 ret = copy_translation_tables(iommu);
3366 if (ret) {
3367 /*
3368 * We found the IOMMU with translation
3369 * enabled - but failed to copy over the
3370 * old root-entry table. Try to proceed
3371 * by disabling translation now and
3372 * allocating a clean root-entry table.
3373 * This might cause DMAR faults, but
3374 * probably the dump will still succeed.
3375 */
3376 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3377 iommu->name);
3378 iommu_disable_translation(iommu);
3379 clear_translation_pre_enabled(iommu);
3380 } else {
3381 pr_info("Copied translation tables from previous kernel for %s\n",
3382 iommu->name);
a87f4918 3383 copied_tables = true;
091d42e4
JR
3384 }
3385 }
3386
4ed0d3e6 3387 if (!ecap_pass_through(iommu->ecap))
19943b0e 3388 hw_pass_through = 0;
8a94ade4 3389#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3390 if (pasid_supported(iommu))
d9737953 3391 intel_svm_init(iommu);
8a94ade4 3392#endif
ba395927
KA
3393 }
3394
a4c34ff1
JR
3395 /*
3396 * Now that qi is enabled on all iommus, set the root entry and flush
3397 * caches. This is required on some Intel X58 chipsets, otherwise the
3398 * flush_context function will loop forever and the boot hangs.
3399 */
3400 for_each_active_iommu(iommu, drhd) {
3401 iommu_flush_write_buffer(iommu);
3402 iommu_set_root_entry(iommu);
3403 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3404 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3405 }
3406
19943b0e 3407 if (iommu_pass_through)
e0fc7e0b
DW
3408 iommu_identity_mapping |= IDENTMAP_ALL;
3409
d3f13810 3410#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3411 dmar_map_gfx = 0;
19943b0e 3412#endif
e0fc7e0b 3413
5daab580
LB
3414 if (!dmar_map_gfx)
3415 iommu_identity_mapping |= IDENTMAP_GFX;
3416
21e722c4
AR
3417 check_tylersburg_isoch();
3418
86080ccc
JR
3419 if (iommu_identity_mapping) {
3420 ret = si_domain_init(hw_pass_through);
3421 if (ret)
3422 goto free_iommu;
3423 }
3424
e0fc7e0b 3425
a87f4918
JR
3426 /*
3427 * If we copied translations from a previous kernel in the kdump
3428 * case, we can not assign the devices to domains now, as that
3429 * would eliminate the old mappings. So skip this part and defer
3430 * the assignment to device driver initialization time.
3431 */
3432 if (copied_tables)
3433 goto domains_done;
3434
ba395927 3435 /*
19943b0e
DW
3436 * If pass through is not set or not enabled, setup context entries for
3437 * identity mappings for rmrr, gfx, and isa and may fall back to static
3438 * identity mapping if iommu_identity_mapping is set.
ba395927 3439 */
19943b0e
DW
3440 if (iommu_identity_mapping) {
3441 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3442 if (ret) {
9f10e5bf 3443 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3444 goto free_iommu;
ba395927
KA
3445 }
3446 }
ba395927 3447 /*
19943b0e
DW
3448 * For each rmrr
3449 * for each dev attached to rmrr
3450 * do
3451 * locate drhd for dev, alloc domain for dev
3452 * allocate free domain
3453 * allocate page table entries for rmrr
3454 * if context not allocated for bus
3455 * allocate and init context
3456 * set present in root table for this bus
3457 * init context with domain, translation etc
3458 * endfor
3459 * endfor
ba395927 3460 */
9f10e5bf 3461 pr_info("Setting RMRR:\n");
19943b0e 3462 for_each_rmrr_units(rmrr) {
b683b230
JL
3463 /* some BIOS lists non-exist devices in DMAR table. */
3464 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3465 i, dev) {
0b9d9753 3466 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3467 if (ret)
9f10e5bf 3468 pr_err("Mapping reserved region failed\n");
ba395927 3469 }
4ed0d3e6 3470 }
49a0429e 3471
19943b0e
DW
3472 iommu_prepare_isa();
3473
a87f4918
JR
3474domains_done:
3475
ba395927
KA
3476 /*
3477 * for each drhd
3478 * enable fault log
3479 * global invalidate context cache
3480 * global invalidate iotlb
3481 * enable translation
3482 */
7c919779 3483 for_each_iommu(iommu, drhd) {
51a63e67
JC
3484 if (drhd->ignored) {
3485 /*
3486 * we always have to disable PMRs or DMA may fail on
3487 * this device
3488 */
3489 if (force_on)
7c919779 3490 iommu_disable_protect_mem_regions(iommu);
ba395927 3491 continue;
51a63e67 3492 }
ba395927
KA
3493
3494 iommu_flush_write_buffer(iommu);
3495
a222a7f0 3496#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3497 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3498 /*
3499 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3500 * could cause possible lock race condition.
3501 */
3502 up_write(&dmar_global_lock);
a222a7f0 3503 ret = intel_svm_enable_prq(iommu);
a7755c3c 3504 down_write(&dmar_global_lock);
a222a7f0
DW
3505 if (ret)
3506 goto free_iommu;
3507 }
3508#endif
3460a6d9
KA
3509 ret = dmar_set_interrupt(iommu);
3510 if (ret)
989d51fc 3511 goto free_iommu;
3460a6d9 3512
8939ddf6
JR
3513 if (!translation_pre_enabled(iommu))
3514 iommu_enable_translation(iommu);
3515
b94996c9 3516 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3517 }
3518
3519 return 0;
989d51fc
JL
3520
3521free_iommu:
ffebeb46
JL
3522 for_each_active_iommu(iommu, drhd) {
3523 disable_dmar_iommu(iommu);
a868e6b7 3524 free_dmar_iommu(iommu);
ffebeb46 3525 }
13cf0174 3526
d9630fe9 3527 kfree(g_iommus);
13cf0174 3528
989d51fc 3529error:
ba395927
KA
3530 return ret;
3531}
3532
5a5e02a6 3533/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3534static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3535 struct dmar_domain *domain,
3536 unsigned long nrpages, uint64_t dma_mask)
ba395927 3537{
e083ea5b 3538 unsigned long iova_pfn;
ba395927 3539
875764de
DW
3540 /* Restrict dma_mask to the width that the iommu can handle */
3541 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3542 /* Ensure we reserve the whole size-aligned region */
3543 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3544
3545 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3546 /*
3547 * First try to allocate an io virtual address in
284901a9 3548 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3549 * from higher range
ba395927 3550 */
22e2f9fa 3551 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3552 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3553 if (iova_pfn)
3554 return iova_pfn;
875764de 3555 }
538d5b33
TN
3556 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3557 IOVA_PFN(dma_mask), true);
22e2f9fa 3558 if (unlikely(!iova_pfn)) {
932a6523 3559 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3560 return 0;
f76aec76
KA
3561 }
3562
22e2f9fa 3563 return iova_pfn;
f76aec76
KA
3564}
3565
9ddbfb42 3566struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3567{
1c5ebba9 3568 struct dmar_domain *domain, *tmp;
b1ce5b79 3569 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3570 struct device *i_dev;
3571 int i, ret;
f76aec76 3572
1c5ebba9
JR
3573 domain = find_domain(dev);
3574 if (domain)
3575 goto out;
3576
3577 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3578 if (!domain)
3579 goto out;
ba395927 3580
b1ce5b79
JR
3581 /* We have a new domain - setup possible RMRRs for the device */
3582 rcu_read_lock();
3583 for_each_rmrr_units(rmrr) {
3584 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3585 i, i_dev) {
3586 if (i_dev != dev)
3587 continue;
3588
3589 ret = domain_prepare_identity_map(dev, domain,
3590 rmrr->base_address,
3591 rmrr->end_address);
3592 if (ret)
3593 dev_err(dev, "Mapping reserved region failed\n");
3594 }
3595 }
3596 rcu_read_unlock();
3597
1c5ebba9
JR
3598 tmp = set_domain_for_dev(dev, domain);
3599 if (!tmp || domain != tmp) {
3600 domain_exit(domain);
3601 domain = tmp;
3602 }
3603
3604out:
3605
3606 if (!domain)
932a6523 3607 dev_err(dev, "Allocating domain failed\n");
1c5ebba9
JR
3608
3609
f76aec76
KA
3610 return domain;
3611}
3612
ecb509ec 3613/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3614static bool iommu_need_mapping(struct device *dev)
2c2e2c38
FY
3615{
3616 int found;
3617
3d89194a 3618 if (iommu_dummy(dev))
48b2c937 3619 return false;
1e4c64c4 3620
2c2e2c38 3621 if (!iommu_identity_mapping)
48b2c937 3622 return true;
2c2e2c38 3623
9b226624 3624 found = identity_mapping(dev);
2c2e2c38 3625 if (found) {
ecb509ec 3626 if (iommu_should_identity_map(dev, 0))
48b2c937
CH
3627 return false;
3628
3629 /*
3630 * 32 bit DMA is removed from si_domain and fall back to
3631 * non-identity mapping.
3632 */
3633 dmar_remove_one_dev_info(dev);
3634 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3635 } else {
3636 /*
3637 * In case of a detached 64 bit DMA device from vm, the device
3638 * is put into si_domain for identity mapping.
3639 */
48b2c937
CH
3640 if (iommu_should_identity_map(dev, 0) &&
3641 !domain_add_dev_info(si_domain, dev)) {
3642 dev_info(dev, "64bit DMA uses identity mapping\n");
3643 return false;
2c2e2c38
FY
3644 }
3645 }
3646
48b2c937 3647 return true;
2c2e2c38
FY
3648}
3649
21d5d27c
LG
3650static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3651 size_t size, int dir, u64 dma_mask)
f76aec76 3652{
f76aec76 3653 struct dmar_domain *domain;
5b6985ce 3654 phys_addr_t start_paddr;
2aac6304 3655 unsigned long iova_pfn;
f76aec76 3656 int prot = 0;
6865f0d1 3657 int ret;
8c11e798 3658 struct intel_iommu *iommu;
33041ec0 3659 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3660
3661 BUG_ON(dir == DMA_NONE);
2c2e2c38 3662
5040a918 3663 domain = get_valid_domain_for_dev(dev);
f76aec76 3664 if (!domain)
524a669b 3665 return DMA_MAPPING_ERROR;
f76aec76 3666
8c11e798 3667 iommu = domain_get_iommu(domain);
88cb6a74 3668 size = aligned_nrpages(paddr, size);
f76aec76 3669
2aac6304
OP
3670 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3671 if (!iova_pfn)
f76aec76
KA
3672 goto error;
3673
ba395927
KA
3674 /*
3675 * Check if DMAR supports zero-length reads on write only
3676 * mappings..
3677 */
3678 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3679 !cap_zlr(iommu->cap))
ba395927
KA
3680 prot |= DMA_PTE_READ;
3681 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3682 prot |= DMA_PTE_WRITE;
3683 /*
6865f0d1 3684 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3685 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3686 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3687 * is not a big problem
3688 */
2aac6304 3689 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3690 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3691 if (ret)
3692 goto error;
3693
2aac6304 3694 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3695 start_paddr += paddr & ~PAGE_MASK;
3696 return start_paddr;
ba395927 3697
ba395927 3698error:
2aac6304 3699 if (iova_pfn)
22e2f9fa 3700 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3701 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3702 size, (unsigned long long)paddr, dir);
524a669b 3703 return DMA_MAPPING_ERROR;
ba395927
KA
3704}
3705
ffbbef5c
FT
3706static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3707 unsigned long offset, size_t size,
3708 enum dma_data_direction dir,
00085f1e 3709 unsigned long attrs)
bb9e6d65 3710{
9cc0c2af
CH
3711 if (iommu_need_mapping(dev))
3712 return __intel_map_single(dev, page_to_phys(page) + offset,
3713 size, dir, *dev->dma_mask);
3714 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3715}
3716
3717static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3718 size_t size, enum dma_data_direction dir,
3719 unsigned long attrs)
3720{
9cc0c2af
CH
3721 if (iommu_need_mapping(dev))
3722 return __intel_map_single(dev, phys_addr, size, dir,
3723 *dev->dma_mask);
3724 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3725}
3726
769530e4 3727static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3728{
f76aec76 3729 struct dmar_domain *domain;
d794dc9b 3730 unsigned long start_pfn, last_pfn;
769530e4 3731 unsigned long nrpages;
2aac6304 3732 unsigned long iova_pfn;
8c11e798 3733 struct intel_iommu *iommu;
ea8ea460 3734 struct page *freelist;
f7b0c4ce 3735 struct pci_dev *pdev = NULL;
ba395927 3736
1525a29a 3737 domain = find_domain(dev);
ba395927
KA
3738 BUG_ON(!domain);
3739
8c11e798
WH
3740 iommu = domain_get_iommu(domain);
3741
2aac6304 3742 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3743
769530e4 3744 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3745 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3746 last_pfn = start_pfn + nrpages - 1;
ba395927 3747
f7b0c4ce
LB
3748 if (dev_is_pci(dev))
3749 pdev = to_pci_dev(dev);
3750
932a6523 3751 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3752
ea8ea460 3753 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3754
f7b0c4ce 3755 if (intel_iommu_strict || (pdev && pdev->untrusted)) {
a1ddcbe9 3756 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3757 nrpages, !freelist, 0);
5e0d2a6f 3758 /* free iova */
22e2f9fa 3759 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3760 dma_free_pagelist(freelist);
5e0d2a6f 3761 } else {
13cf0174
JR
3762 queue_iova(&domain->iovad, iova_pfn, nrpages,
3763 (unsigned long)freelist);
5e0d2a6f 3764 /*
3765 * queue up the release of the unmap to save the 1/6th of the
3766 * cpu used up by the iotlb flush operation...
3767 */
5e0d2a6f 3768 }
ba395927
KA
3769}
3770
d41a4adb
JL
3771static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3772 size_t size, enum dma_data_direction dir,
00085f1e 3773 unsigned long attrs)
d41a4adb 3774{
9cc0c2af
CH
3775 if (iommu_need_mapping(dev))
3776 intel_unmap(dev, dev_addr, size);
3777 else
3778 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3779}
3780
3781static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3782 size_t size, enum dma_data_direction dir, unsigned long attrs)
3783{
3784 if (iommu_need_mapping(dev))
3785 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3786}
3787
5040a918 3788static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3789 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3790 unsigned long attrs)
ba395927 3791{
7ec916f8
CH
3792 struct page *page = NULL;
3793 int order;
ba395927 3794
9cc0c2af
CH
3795 if (!iommu_need_mapping(dev))
3796 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3797
7ec916f8
CH
3798 size = PAGE_ALIGN(size);
3799 order = get_order(size);
7ec916f8
CH
3800
3801 if (gfpflags_allow_blocking(flags)) {
3802 unsigned int count = size >> PAGE_SHIFT;
3803
d834c5ab
MS
3804 page = dma_alloc_from_contiguous(dev, count, order,
3805 flags & __GFP_NOWARN);
7ec916f8
CH
3806 }
3807
3808 if (!page)
3809 page = alloc_pages(flags, order);
3810 if (!page)
3811 return NULL;
3812 memset(page_address(page), 0, size);
3813
21d5d27c
LG
3814 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3815 DMA_BIDIRECTIONAL,
3816 dev->coherent_dma_mask);
524a669b 3817 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3818 return page_address(page);
3819 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3820 __free_pages(page, order);
36746436 3821
ba395927
KA
3822 return NULL;
3823}
3824
5040a918 3825static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3826 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3827{
7ec916f8
CH
3828 int order;
3829 struct page *page = virt_to_page(vaddr);
3830
9cc0c2af
CH
3831 if (!iommu_need_mapping(dev))
3832 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3833
7ec916f8
CH
3834 size = PAGE_ALIGN(size);
3835 order = get_order(size);
3836
3837 intel_unmap(dev, dma_handle, size);
3838 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3839 __free_pages(page, order);
ba395927
KA
3840}
3841
5040a918 3842static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3843 int nelems, enum dma_data_direction dir,
00085f1e 3844 unsigned long attrs)
ba395927 3845{
769530e4
OP
3846 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3847 unsigned long nrpages = 0;
3848 struct scatterlist *sg;
3849 int i;
3850
9cc0c2af
CH
3851 if (!iommu_need_mapping(dev))
3852 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3853
769530e4
OP
3854 for_each_sg(sglist, sg, nelems, i) {
3855 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3856 }
3857
3858 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3859}
3860
5040a918 3861static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3862 enum dma_data_direction dir, unsigned long attrs)
ba395927 3863{
ba395927 3864 int i;
ba395927 3865 struct dmar_domain *domain;
f76aec76
KA
3866 size_t size = 0;
3867 int prot = 0;
2aac6304 3868 unsigned long iova_pfn;
f76aec76 3869 int ret;
c03ab37c 3870 struct scatterlist *sg;
b536d24d 3871 unsigned long start_vpfn;
8c11e798 3872 struct intel_iommu *iommu;
ba395927
KA
3873
3874 BUG_ON(dir == DMA_NONE);
48b2c937 3875 if (!iommu_need_mapping(dev))
9cc0c2af 3876 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3877
5040a918 3878 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3879 if (!domain)
3880 return 0;
3881
8c11e798
WH
3882 iommu = domain_get_iommu(domain);
3883
b536d24d 3884 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3885 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3886
2aac6304 3887 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3888 *dev->dma_mask);
2aac6304 3889 if (!iova_pfn) {
c03ab37c 3890 sglist->dma_length = 0;
f76aec76
KA
3891 return 0;
3892 }
3893
3894 /*
3895 * Check if DMAR supports zero-length reads on write only
3896 * mappings..
3897 */
3898 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3899 !cap_zlr(iommu->cap))
f76aec76
KA
3900 prot |= DMA_PTE_READ;
3901 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3902 prot |= DMA_PTE_WRITE;
3903
2aac6304 3904 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3905
f532959b 3906 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3907 if (unlikely(ret)) {
e1605495 3908 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3909 start_vpfn + size - 1,
3910 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3911 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3912 return 0;
ba395927
KA
3913 }
3914
ba395927
KA
3915 return nelems;
3916}
3917
02b4da5f 3918static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3919 .alloc = intel_alloc_coherent,
3920 .free = intel_free_coherent,
ba395927
KA
3921 .map_sg = intel_map_sg,
3922 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3923 .map_page = intel_map_page,
3924 .unmap_page = intel_unmap_page,
21d5d27c 3925 .map_resource = intel_map_resource,
9cc0c2af 3926 .unmap_resource = intel_unmap_resource,
fec777c3 3927 .dma_supported = dma_direct_supported,
ba395927
KA
3928};
3929
3930static inline int iommu_domain_cache_init(void)
3931{
3932 int ret = 0;
3933
3934 iommu_domain_cache = kmem_cache_create("iommu_domain",
3935 sizeof(struct dmar_domain),
3936 0,
3937 SLAB_HWCACHE_ALIGN,
3938
3939 NULL);
3940 if (!iommu_domain_cache) {
9f10e5bf 3941 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3942 ret = -ENOMEM;
3943 }
3944
3945 return ret;
3946}
3947
3948static inline int iommu_devinfo_cache_init(void)
3949{
3950 int ret = 0;
3951
3952 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3953 sizeof(struct device_domain_info),
3954 0,
3955 SLAB_HWCACHE_ALIGN,
ba395927
KA
3956 NULL);
3957 if (!iommu_devinfo_cache) {
9f10e5bf 3958 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3959 ret = -ENOMEM;
3960 }
3961
3962 return ret;
3963}
3964
ba395927
KA
3965static int __init iommu_init_mempool(void)
3966{
3967 int ret;
ae1ff3d6 3968 ret = iova_cache_get();
ba395927
KA
3969 if (ret)
3970 return ret;
3971
3972 ret = iommu_domain_cache_init();
3973 if (ret)
3974 goto domain_error;
3975
3976 ret = iommu_devinfo_cache_init();
3977 if (!ret)
3978 return ret;
3979
3980 kmem_cache_destroy(iommu_domain_cache);
3981domain_error:
ae1ff3d6 3982 iova_cache_put();
ba395927
KA
3983
3984 return -ENOMEM;
3985}
3986
3987static void __init iommu_exit_mempool(void)
3988{
3989 kmem_cache_destroy(iommu_devinfo_cache);
3990 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3991 iova_cache_put();
ba395927
KA
3992}
3993
556ab45f
DW
3994static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3995{
3996 struct dmar_drhd_unit *drhd;
3997 u32 vtbar;
3998 int rc;
3999
4000 /* We know that this device on this chipset has its own IOMMU.
4001 * If we find it under a different IOMMU, then the BIOS is lying
4002 * to us. Hope that the IOMMU for this device is actually
4003 * disabled, and it needs no translation...
4004 */
4005 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4006 if (rc) {
4007 /* "can't" happen */
4008 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4009 return;
4010 }
4011 vtbar &= 0xffff0000;
4012
4013 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4014 drhd = dmar_find_matched_drhd_unit(pdev);
4015 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4016 TAINT_FIRMWARE_WORKAROUND,
4017 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4018 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4019}
4020DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4021
ba395927
KA
4022static void __init init_no_remapping_devices(void)
4023{
4024 struct dmar_drhd_unit *drhd;
832bd858 4025 struct device *dev;
b683b230 4026 int i;
ba395927
KA
4027
4028 for_each_drhd_unit(drhd) {
4029 if (!drhd->include_all) {
b683b230
JL
4030 for_each_active_dev_scope(drhd->devices,
4031 drhd->devices_cnt, i, dev)
4032 break;
832bd858 4033 /* ignore DMAR unit if no devices exist */
ba395927
KA
4034 if (i == drhd->devices_cnt)
4035 drhd->ignored = 1;
4036 }
4037 }
4038
7c919779 4039 for_each_active_drhd_unit(drhd) {
7c919779 4040 if (drhd->include_all)
ba395927
KA
4041 continue;
4042
b683b230
JL
4043 for_each_active_dev_scope(drhd->devices,
4044 drhd->devices_cnt, i, dev)
832bd858 4045 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4046 break;
ba395927
KA
4047 if (i < drhd->devices_cnt)
4048 continue;
4049
c0771df8
DW
4050 /* This IOMMU has *only* gfx devices. Either bypass it or
4051 set the gfx_mapped flag, as appropriate */
cf1ec453 4052 if (!dmar_map_gfx) {
c0771df8 4053 drhd->ignored = 1;
b683b230
JL
4054 for_each_active_dev_scope(drhd->devices,
4055 drhd->devices_cnt, i, dev)
832bd858 4056 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4057 }
4058 }
4059}
4060
f59c7b69
FY
4061#ifdef CONFIG_SUSPEND
4062static int init_iommu_hw(void)
4063{
4064 struct dmar_drhd_unit *drhd;
4065 struct intel_iommu *iommu = NULL;
4066
4067 for_each_active_iommu(iommu, drhd)
4068 if (iommu->qi)
4069 dmar_reenable_qi(iommu);
4070
b779260b
JC
4071 for_each_iommu(iommu, drhd) {
4072 if (drhd->ignored) {
4073 /*
4074 * we always have to disable PMRs or DMA may fail on
4075 * this device
4076 */
4077 if (force_on)
4078 iommu_disable_protect_mem_regions(iommu);
4079 continue;
4080 }
095303e0 4081
f59c7b69
FY
4082 iommu_flush_write_buffer(iommu);
4083
4084 iommu_set_root_entry(iommu);
4085
4086 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4087 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4088 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4089 iommu_enable_translation(iommu);
b94996c9 4090 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4091 }
4092
4093 return 0;
4094}
4095
4096static void iommu_flush_all(void)
4097{
4098 struct dmar_drhd_unit *drhd;
4099 struct intel_iommu *iommu;
4100
4101 for_each_active_iommu(iommu, drhd) {
4102 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4103 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4104 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4105 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4106 }
4107}
4108
134fac3f 4109static int iommu_suspend(void)
f59c7b69
FY
4110{
4111 struct dmar_drhd_unit *drhd;
4112 struct intel_iommu *iommu = NULL;
4113 unsigned long flag;
4114
4115 for_each_active_iommu(iommu, drhd) {
6396bb22 4116 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4117 GFP_ATOMIC);
4118 if (!iommu->iommu_state)
4119 goto nomem;
4120 }
4121
4122 iommu_flush_all();
4123
4124 for_each_active_iommu(iommu, drhd) {
4125 iommu_disable_translation(iommu);
4126
1f5b3c3f 4127 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4128
4129 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4130 readl(iommu->reg + DMAR_FECTL_REG);
4131 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4132 readl(iommu->reg + DMAR_FEDATA_REG);
4133 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4134 readl(iommu->reg + DMAR_FEADDR_REG);
4135 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4136 readl(iommu->reg + DMAR_FEUADDR_REG);
4137
1f5b3c3f 4138 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4139 }
4140 return 0;
4141
4142nomem:
4143 for_each_active_iommu(iommu, drhd)
4144 kfree(iommu->iommu_state);
4145
4146 return -ENOMEM;
4147}
4148
134fac3f 4149static void iommu_resume(void)
f59c7b69
FY
4150{
4151 struct dmar_drhd_unit *drhd;
4152 struct intel_iommu *iommu = NULL;
4153 unsigned long flag;
4154
4155 if (init_iommu_hw()) {
b779260b
JC
4156 if (force_on)
4157 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4158 else
4159 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4160 return;
f59c7b69
FY
4161 }
4162
4163 for_each_active_iommu(iommu, drhd) {
4164
1f5b3c3f 4165 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4166
4167 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4168 iommu->reg + DMAR_FECTL_REG);
4169 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4170 iommu->reg + DMAR_FEDATA_REG);
4171 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4172 iommu->reg + DMAR_FEADDR_REG);
4173 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4174 iommu->reg + DMAR_FEUADDR_REG);
4175
1f5b3c3f 4176 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4177 }
4178
4179 for_each_active_iommu(iommu, drhd)
4180 kfree(iommu->iommu_state);
f59c7b69
FY
4181}
4182
134fac3f 4183static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4184 .resume = iommu_resume,
4185 .suspend = iommu_suspend,
4186};
4187
134fac3f 4188static void __init init_iommu_pm_ops(void)
f59c7b69 4189{
134fac3f 4190 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4191}
4192
4193#else
99592ba4 4194static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4195#endif /* CONFIG_PM */
4196
318fe7df 4197
c2a0b538 4198int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4199{
4200 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4201 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4202 struct dmar_rmrr_unit *rmrru;
0659b8dc 4203 size_t length;
318fe7df
SS
4204
4205 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4206 if (!rmrru)
0659b8dc 4207 goto out;
318fe7df
SS
4208
4209 rmrru->hdr = header;
4210 rmrr = (struct acpi_dmar_reserved_memory *)header;
4211 rmrru->base_address = rmrr->base_address;
4212 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4213
4214 length = rmrr->end_address - rmrr->base_address + 1;
4215 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4216 IOMMU_RESV_DIRECT);
4217 if (!rmrru->resv)
4218 goto free_rmrru;
4219
2e455289
JL
4220 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4221 ((void *)rmrr) + rmrr->header.length,
4222 &rmrru->devices_cnt);
0659b8dc
EA
4223 if (rmrru->devices_cnt && rmrru->devices == NULL)
4224 goto free_all;
318fe7df 4225
2e455289 4226 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4227
2e455289 4228 return 0;
0659b8dc
EA
4229free_all:
4230 kfree(rmrru->resv);
4231free_rmrru:
4232 kfree(rmrru);
4233out:
4234 return -ENOMEM;
318fe7df
SS
4235}
4236
6b197249
JL
4237static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4238{
4239 struct dmar_atsr_unit *atsru;
4240 struct acpi_dmar_atsr *tmp;
4241
4242 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4243 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4244 if (atsr->segment != tmp->segment)
4245 continue;
4246 if (atsr->header.length != tmp->header.length)
4247 continue;
4248 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4249 return atsru;
4250 }
4251
4252 return NULL;
4253}
4254
4255int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4256{
4257 struct acpi_dmar_atsr *atsr;
4258 struct dmar_atsr_unit *atsru;
4259
b608fe35 4260 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4261 return 0;
4262
318fe7df 4263 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4264 atsru = dmar_find_atsr(atsr);
4265 if (atsru)
4266 return 0;
4267
4268 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4269 if (!atsru)
4270 return -ENOMEM;
4271
6b197249
JL
4272 /*
4273 * If memory is allocated from slab by ACPI _DSM method, we need to
4274 * copy the memory content because the memory buffer will be freed
4275 * on return.
4276 */
4277 atsru->hdr = (void *)(atsru + 1);
4278 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4279 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4280 if (!atsru->include_all) {
4281 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4282 (void *)atsr + atsr->header.length,
4283 &atsru->devices_cnt);
4284 if (atsru->devices_cnt && atsru->devices == NULL) {
4285 kfree(atsru);
4286 return -ENOMEM;
4287 }
4288 }
318fe7df 4289
0e242612 4290 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4291
4292 return 0;
4293}
4294
9bdc531e
JL
4295static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4296{
4297 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4298 kfree(atsru);
4299}
4300
6b197249
JL
4301int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4302{
4303 struct acpi_dmar_atsr *atsr;
4304 struct dmar_atsr_unit *atsru;
4305
4306 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4307 atsru = dmar_find_atsr(atsr);
4308 if (atsru) {
4309 list_del_rcu(&atsru->list);
4310 synchronize_rcu();
4311 intel_iommu_free_atsr(atsru);
4312 }
4313
4314 return 0;
4315}
4316
4317int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4318{
4319 int i;
4320 struct device *dev;
4321 struct acpi_dmar_atsr *atsr;
4322 struct dmar_atsr_unit *atsru;
4323
4324 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4325 atsru = dmar_find_atsr(atsr);
4326 if (!atsru)
4327 return 0;
4328
194dc870 4329 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4330 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4331 i, dev)
4332 return -EBUSY;
194dc870 4333 }
6b197249
JL
4334
4335 return 0;
4336}
4337
ffebeb46
JL
4338static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4339{
e083ea5b 4340 int sp, ret;
ffebeb46
JL
4341 struct intel_iommu *iommu = dmaru->iommu;
4342
4343 if (g_iommus[iommu->seq_id])
4344 return 0;
4345
4346 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4347 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4348 iommu->name);
4349 return -ENXIO;
4350 }
4351 if (!ecap_sc_support(iommu->ecap) &&
4352 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4353 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4354 iommu->name);
4355 return -ENXIO;
4356 }
4357 sp = domain_update_iommu_superpage(iommu) - 1;
4358 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4359 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4360 iommu->name);
4361 return -ENXIO;
4362 }
4363
4364 /*
4365 * Disable translation if already enabled prior to OS handover.
4366 */
4367 if (iommu->gcmd & DMA_GCMD_TE)
4368 iommu_disable_translation(iommu);
4369
4370 g_iommus[iommu->seq_id] = iommu;
4371 ret = iommu_init_domains(iommu);
4372 if (ret == 0)
4373 ret = iommu_alloc_root_entry(iommu);
4374 if (ret)
4375 goto out;
4376
8a94ade4 4377#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4378 if (pasid_supported(iommu))
d9737953 4379 intel_svm_init(iommu);
8a94ade4
DW
4380#endif
4381
ffebeb46
JL
4382 if (dmaru->ignored) {
4383 /*
4384 * we always have to disable PMRs or DMA may fail on this device
4385 */
4386 if (force_on)
4387 iommu_disable_protect_mem_regions(iommu);
4388 return 0;
4389 }
4390
4391 intel_iommu_init_qi(iommu);
4392 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4393
4394#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4395 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4396 ret = intel_svm_enable_prq(iommu);
4397 if (ret)
4398 goto disable_iommu;
4399 }
4400#endif
ffebeb46
JL
4401 ret = dmar_set_interrupt(iommu);
4402 if (ret)
4403 goto disable_iommu;
4404
4405 iommu_set_root_entry(iommu);
4406 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4407 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4408 iommu_enable_translation(iommu);
4409
ffebeb46
JL
4410 iommu_disable_protect_mem_regions(iommu);
4411 return 0;
4412
4413disable_iommu:
4414 disable_dmar_iommu(iommu);
4415out:
4416 free_dmar_iommu(iommu);
4417 return ret;
4418}
4419
6b197249
JL
4420int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4421{
ffebeb46
JL
4422 int ret = 0;
4423 struct intel_iommu *iommu = dmaru->iommu;
4424
4425 if (!intel_iommu_enabled)
4426 return 0;
4427 if (iommu == NULL)
4428 return -EINVAL;
4429
4430 if (insert) {
4431 ret = intel_iommu_add(dmaru);
4432 } else {
4433 disable_dmar_iommu(iommu);
4434 free_dmar_iommu(iommu);
4435 }
4436
4437 return ret;
6b197249
JL
4438}
4439
9bdc531e
JL
4440static void intel_iommu_free_dmars(void)
4441{
4442 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4443 struct dmar_atsr_unit *atsru, *atsr_n;
4444
4445 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4446 list_del(&rmrru->list);
4447 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4448 kfree(rmrru->resv);
9bdc531e 4449 kfree(rmrru);
318fe7df
SS
4450 }
4451
9bdc531e
JL
4452 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4453 list_del(&atsru->list);
4454 intel_iommu_free_atsr(atsru);
4455 }
318fe7df
SS
4456}
4457
4458int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4459{
b683b230 4460 int i, ret = 1;
318fe7df 4461 struct pci_bus *bus;
832bd858
DW
4462 struct pci_dev *bridge = NULL;
4463 struct device *tmp;
318fe7df
SS
4464 struct acpi_dmar_atsr *atsr;
4465 struct dmar_atsr_unit *atsru;
4466
4467 dev = pci_physfn(dev);
318fe7df 4468 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4469 bridge = bus->self;
d14053b3
DW
4470 /* If it's an integrated device, allow ATS */
4471 if (!bridge)
4472 return 1;
4473 /* Connected via non-PCIe: no ATS */
4474 if (!pci_is_pcie(bridge) ||
62f87c0e 4475 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4476 return 0;
d14053b3 4477 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4478 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4479 break;
318fe7df
SS
4480 }
4481
0e242612 4482 rcu_read_lock();
b5f82ddf
JL
4483 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4484 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4485 if (atsr->segment != pci_domain_nr(dev->bus))
4486 continue;
4487
b683b230 4488 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4489 if (tmp == &bridge->dev)
b683b230 4490 goto out;
b5f82ddf
JL
4491
4492 if (atsru->include_all)
b683b230 4493 goto out;
b5f82ddf 4494 }
b683b230
JL
4495 ret = 0;
4496out:
0e242612 4497 rcu_read_unlock();
318fe7df 4498
b683b230 4499 return ret;
318fe7df
SS
4500}
4501
59ce0515
JL
4502int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4503{
e083ea5b 4504 int ret;
59ce0515
JL
4505 struct dmar_rmrr_unit *rmrru;
4506 struct dmar_atsr_unit *atsru;
4507 struct acpi_dmar_atsr *atsr;
4508 struct acpi_dmar_reserved_memory *rmrr;
4509
b608fe35 4510 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4511 return 0;
4512
4513 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4514 rmrr = container_of(rmrru->hdr,
4515 struct acpi_dmar_reserved_memory, header);
4516 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4517 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4518 ((void *)rmrr) + rmrr->header.length,
4519 rmrr->segment, rmrru->devices,
4520 rmrru->devices_cnt);
e083ea5b 4521 if (ret < 0)
59ce0515 4522 return ret;
e6a8c9b3 4523 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4524 dmar_remove_dev_scope(info, rmrr->segment,
4525 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4526 }
4527 }
4528
4529 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4530 if (atsru->include_all)
4531 continue;
4532
4533 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4534 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4535 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4536 (void *)atsr + atsr->header.length,
4537 atsr->segment, atsru->devices,
4538 atsru->devices_cnt);
4539 if (ret > 0)
4540 break;
e083ea5b 4541 else if (ret < 0)
59ce0515 4542 return ret;
e6a8c9b3 4543 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4544 if (dmar_remove_dev_scope(info, atsr->segment,
4545 atsru->devices, atsru->devices_cnt))
4546 break;
4547 }
4548 }
4549
4550 return 0;
4551}
4552
99dcaded
FY
4553/*
4554 * Here we only respond to action of unbound device from driver.
4555 *
4556 * Added device is not attached to its DMAR domain here yet. That will happen
4557 * when mapping the device to iova.
4558 */
4559static int device_notifier(struct notifier_block *nb,
4560 unsigned long action, void *data)
4561{
4562 struct device *dev = data;
99dcaded
FY
4563 struct dmar_domain *domain;
4564
3d89194a 4565 if (iommu_dummy(dev))
44cd613c
DW
4566 return 0;
4567
117266fd
LB
4568 if (action == BUS_NOTIFY_REMOVED_DEVICE) {
4569 domain = find_domain(dev);
4570 if (!domain)
4571 return 0;
99dcaded 4572
117266fd
LB
4573 dmar_remove_one_dev_info(dev);
4574 if (!domain_type_is_vm_or_si(domain) &&
4575 list_empty(&domain->devices))
4576 domain_exit(domain);
4577 } else if (action == BUS_NOTIFY_ADD_DEVICE) {
4578 if (iommu_should_identity_map(dev, 1))
4579 domain_add_dev_info(si_domain, dev);
4580 }
a97590e5 4581
99dcaded
FY
4582 return 0;
4583}
4584
4585static struct notifier_block device_nb = {
4586 .notifier_call = device_notifier,
4587};
4588
75f05569
JL
4589static int intel_iommu_memory_notifier(struct notifier_block *nb,
4590 unsigned long val, void *v)
4591{
4592 struct memory_notify *mhp = v;
4593 unsigned long long start, end;
4594 unsigned long start_vpfn, last_vpfn;
4595
4596 switch (val) {
4597 case MEM_GOING_ONLINE:
4598 start = mhp->start_pfn << PAGE_SHIFT;
4599 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4600 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4601 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4602 start, end);
4603 return NOTIFY_BAD;
4604 }
4605 break;
4606
4607 case MEM_OFFLINE:
4608 case MEM_CANCEL_ONLINE:
4609 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4610 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4611 while (start_vpfn <= last_vpfn) {
4612 struct iova *iova;
4613 struct dmar_drhd_unit *drhd;
4614 struct intel_iommu *iommu;
ea8ea460 4615 struct page *freelist;
75f05569
JL
4616
4617 iova = find_iova(&si_domain->iovad, start_vpfn);
4618 if (iova == NULL) {
9f10e5bf 4619 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4620 start_vpfn);
4621 break;
4622 }
4623
4624 iova = split_and_remove_iova(&si_domain->iovad, iova,
4625 start_vpfn, last_vpfn);
4626 if (iova == NULL) {
9f10e5bf 4627 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4628 start_vpfn, last_vpfn);
4629 return NOTIFY_BAD;
4630 }
4631
ea8ea460
DW
4632 freelist = domain_unmap(si_domain, iova->pfn_lo,
4633 iova->pfn_hi);
4634
75f05569
JL
4635 rcu_read_lock();
4636 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4637 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4638 iova->pfn_lo, iova_size(iova),
ea8ea460 4639 !freelist, 0);
75f05569 4640 rcu_read_unlock();
ea8ea460 4641 dma_free_pagelist(freelist);
75f05569
JL
4642
4643 start_vpfn = iova->pfn_hi + 1;
4644 free_iova_mem(iova);
4645 }
4646 break;
4647 }
4648
4649 return NOTIFY_OK;
4650}
4651
4652static struct notifier_block intel_iommu_memory_nb = {
4653 .notifier_call = intel_iommu_memory_notifier,
4654 .priority = 0
4655};
4656
22e2f9fa
OP
4657static void free_all_cpu_cached_iovas(unsigned int cpu)
4658{
4659 int i;
4660
4661 for (i = 0; i < g_num_of_iommus; i++) {
4662 struct intel_iommu *iommu = g_iommus[i];
4663 struct dmar_domain *domain;
0caa7616 4664 int did;
22e2f9fa
OP
4665
4666 if (!iommu)
4667 continue;
4668
3bd4f911 4669 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4670 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4671
4672 if (!domain)
4673 continue;
4674 free_cpu_cached_iovas(cpu, &domain->iovad);
4675 }
4676 }
4677}
4678
21647615 4679static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4680{
21647615 4681 free_all_cpu_cached_iovas(cpu);
21647615 4682 return 0;
aa473240
OP
4683}
4684
161b28aa
JR
4685static void intel_disable_iommus(void)
4686{
4687 struct intel_iommu *iommu = NULL;
4688 struct dmar_drhd_unit *drhd;
4689
4690 for_each_iommu(iommu, drhd)
4691 iommu_disable_translation(iommu);
4692}
4693
a7fdb6e6
JR
4694static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4695{
2926a2aa
JR
4696 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4697
4698 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4699}
4700
a5459cfe
AW
4701static ssize_t intel_iommu_show_version(struct device *dev,
4702 struct device_attribute *attr,
4703 char *buf)
4704{
a7fdb6e6 4705 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4706 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4707 return sprintf(buf, "%d:%d\n",
4708 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4709}
4710static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4711
4712static ssize_t intel_iommu_show_address(struct device *dev,
4713 struct device_attribute *attr,
4714 char *buf)
4715{
a7fdb6e6 4716 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4717 return sprintf(buf, "%llx\n", iommu->reg_phys);
4718}
4719static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4720
4721static ssize_t intel_iommu_show_cap(struct device *dev,
4722 struct device_attribute *attr,
4723 char *buf)
4724{
a7fdb6e6 4725 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4726 return sprintf(buf, "%llx\n", iommu->cap);
4727}
4728static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4729
4730static ssize_t intel_iommu_show_ecap(struct device *dev,
4731 struct device_attribute *attr,
4732 char *buf)
4733{
a7fdb6e6 4734 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4735 return sprintf(buf, "%llx\n", iommu->ecap);
4736}
4737static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4738
2238c082
AW
4739static ssize_t intel_iommu_show_ndoms(struct device *dev,
4740 struct device_attribute *attr,
4741 char *buf)
4742{
a7fdb6e6 4743 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4744 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4745}
4746static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4747
4748static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4749 struct device_attribute *attr,
4750 char *buf)
4751{
a7fdb6e6 4752 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4753 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4754 cap_ndoms(iommu->cap)));
4755}
4756static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4757
a5459cfe
AW
4758static struct attribute *intel_iommu_attrs[] = {
4759 &dev_attr_version.attr,
4760 &dev_attr_address.attr,
4761 &dev_attr_cap.attr,
4762 &dev_attr_ecap.attr,
2238c082
AW
4763 &dev_attr_domains_supported.attr,
4764 &dev_attr_domains_used.attr,
a5459cfe
AW
4765 NULL,
4766};
4767
4768static struct attribute_group intel_iommu_group = {
4769 .name = "intel-iommu",
4770 .attrs = intel_iommu_attrs,
4771};
4772
4773const struct attribute_group *intel_iommu_groups[] = {
4774 &intel_iommu_group,
4775 NULL,
4776};
4777
89a6079d
LB
4778static int __init platform_optin_force_iommu(void)
4779{
4780 struct pci_dev *pdev = NULL;
4781 bool has_untrusted_dev = false;
4782
4783 if (!dmar_platform_optin() || no_platform_optin)
4784 return 0;
4785
4786 for_each_pci_dev(pdev) {
4787 if (pdev->untrusted) {
4788 has_untrusted_dev = true;
4789 break;
4790 }
4791 }
4792
4793 if (!has_untrusted_dev)
4794 return 0;
4795
4796 if (no_iommu || dmar_disabled)
4797 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4798
4799 /*
4800 * If Intel-IOMMU is disabled by default, we will apply identity
4801 * map for all devices except those marked as being untrusted.
4802 */
4803 if (dmar_disabled)
4804 iommu_identity_mapping |= IDENTMAP_ALL;
4805
4806 dmar_disabled = 0;
4807#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4808 swiotlb = 0;
4809#endif
4810 no_iommu = 0;
4811
4812 return 1;
4813}
4814
ba395927
KA
4815int __init intel_iommu_init(void)
4816{
9bdc531e 4817 int ret = -ENODEV;
3a93c841 4818 struct dmar_drhd_unit *drhd;
7c919779 4819 struct intel_iommu *iommu;
ba395927 4820
89a6079d
LB
4821 /*
4822 * Intel IOMMU is required for a TXT/tboot launch or platform
4823 * opt in, so enforce that.
4824 */
4825 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4826
3a5670e8
JL
4827 if (iommu_init_mempool()) {
4828 if (force_on)
4829 panic("tboot: Failed to initialize iommu memory\n");
4830 return -ENOMEM;
4831 }
4832
4833 down_write(&dmar_global_lock);
a59b50e9
JC
4834 if (dmar_table_init()) {
4835 if (force_on)
4836 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4837 goto out_free_dmar;
a59b50e9 4838 }
ba395927 4839
c2c7286a 4840 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4841 if (force_on)
4842 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4843 goto out_free_dmar;
a59b50e9 4844 }
1886e8a9 4845
ec154bf5
JR
4846 up_write(&dmar_global_lock);
4847
4848 /*
4849 * The bus notifier takes the dmar_global_lock, so lockdep will
4850 * complain later when we register it under the lock.
4851 */
4852 dmar_register_bus_notifier();
4853
4854 down_write(&dmar_global_lock);
4855
161b28aa 4856 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4857 /*
4858 * We exit the function here to ensure IOMMU's remapping and
4859 * mempool aren't setup, which means that the IOMMU's PMRs
4860 * won't be disabled via the call to init_dmars(). So disable
4861 * it explicitly here. The PMRs were setup by tboot prior to
4862 * calling SENTER, but the kernel is expected to reset/tear
4863 * down the PMRs.
4864 */
4865 if (intel_iommu_tboot_noforce) {
4866 for_each_iommu(iommu, drhd)
4867 iommu_disable_protect_mem_regions(iommu);
4868 }
4869
161b28aa
JR
4870 /*
4871 * Make sure the IOMMUs are switched off, even when we
4872 * boot into a kexec kernel and the previous kernel left
4873 * them enabled
4874 */
4875 intel_disable_iommus();
9bdc531e 4876 goto out_free_dmar;
161b28aa 4877 }
2ae21010 4878
318fe7df 4879 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4880 pr_info("No RMRR found\n");
318fe7df
SS
4881
4882 if (list_empty(&dmar_atsr_units))
9f10e5bf 4883 pr_info("No ATSR found\n");
318fe7df 4884
51a63e67
JC
4885 if (dmar_init_reserved_ranges()) {
4886 if (force_on)
4887 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4888 goto out_free_reserved_range;
51a63e67 4889 }
ba395927 4890
cf1ec453
LB
4891 if (dmar_map_gfx)
4892 intel_iommu_gfx_mapped = 1;
4893
ba395927
KA
4894 init_no_remapping_devices();
4895
b779260b 4896 ret = init_dmars();
ba395927 4897 if (ret) {
a59b50e9
JC
4898 if (force_on)
4899 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4900 pr_err("Initialization failed\n");
9bdc531e 4901 goto out_free_reserved_range;
ba395927 4902 }
3a5670e8 4903 up_write(&dmar_global_lock);
9f10e5bf 4904 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4905
4fac8076 4906#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4907 swiotlb = 0;
4908#endif
19943b0e 4909 dma_ops = &intel_dma_ops;
4ed0d3e6 4910
134fac3f 4911 init_iommu_pm_ops();
a8bcbb0d 4912
39ab9555
JR
4913 for_each_active_iommu(iommu, drhd) {
4914 iommu_device_sysfs_add(&iommu->iommu, NULL,
4915 intel_iommu_groups,
4916 "%s", iommu->name);
4917 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4918 iommu_device_register(&iommu->iommu);
4919 }
a5459cfe 4920
4236d97d 4921 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4922 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4923 if (si_domain && !hw_pass_through)
4924 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4925 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4926 intel_iommu_cpu_dead);
8bc1f85c 4927 intel_iommu_enabled = 1;
ee2636b8 4928 intel_iommu_debugfs_init();
8bc1f85c 4929
ba395927 4930 return 0;
9bdc531e
JL
4931
4932out_free_reserved_range:
4933 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4934out_free_dmar:
4935 intel_iommu_free_dmars();
3a5670e8
JL
4936 up_write(&dmar_global_lock);
4937 iommu_exit_mempool();
9bdc531e 4938 return ret;
ba395927 4939}
e820482c 4940
2452d9db 4941static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4942{
4943 struct intel_iommu *iommu = opaque;
4944
2452d9db 4945 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4946 return 0;
4947}
4948
4949/*
4950 * NB - intel-iommu lacks any sort of reference counting for the users of
4951 * dependent devices. If multiple endpoints have intersecting dependent
4952 * devices, unbinding the driver from any one of them will possibly leave
4953 * the others unable to operate.
4954 */
2452d9db 4955static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4956{
0bcb3e28 4957 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4958 return;
4959
2452d9db 4960 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4961}
4962
127c7615 4963static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4964{
c7151a8d
WH
4965 struct intel_iommu *iommu;
4966 unsigned long flags;
c7151a8d 4967
55d94043
JR
4968 assert_spin_locked(&device_domain_lock);
4969
127c7615 4970 if (WARN_ON(!info))
c7151a8d
WH
4971 return;
4972
127c7615 4973 iommu = info->iommu;
c7151a8d 4974
127c7615 4975 if (info->dev) {
ef848b7e
LB
4976 if (dev_is_pci(info->dev) && sm_supported(iommu))
4977 intel_pasid_tear_down_entry(iommu, info->dev,
4978 PASID_RID2PASID);
4979
127c7615
JR
4980 iommu_disable_dev_iotlb(info);
4981 domain_context_clear(iommu, info->dev);
a7fc93fe 4982 intel_pasid_free_table(info->dev);
127c7615 4983 }
c7151a8d 4984
b608ac3b 4985 unlink_domain_info(info);
c7151a8d 4986
d160aca5 4987 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4988 domain_detach_iommu(info->domain, iommu);
d160aca5 4989 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4990
127c7615 4991 free_devinfo_mem(info);
c7151a8d 4992}
c7151a8d 4993
71753239 4994static void dmar_remove_one_dev_info(struct device *dev)
55d94043 4995{
127c7615 4996 struct device_domain_info *info;
55d94043 4997 unsigned long flags;
3e7abe25 4998
55d94043 4999 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
5000 info = dev->archdata.iommu;
5001 __dmar_remove_one_dev_info(info);
55d94043 5002 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5003}
5004
2c2e2c38 5005static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
5006{
5007 int adjust_width;
5008
aa3ac946 5009 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
5010 domain_reserve_special_ranges(domain);
5011
5012 /* calculate AGAW */
5013 domain->gaw = guest_width;
5014 adjust_width = guestwidth_to_adjustwidth(guest_width);
5015 domain->agaw = width_to_agaw(adjust_width);
5016
5e98c4b1 5017 domain->iommu_coherency = 0;
c5b15255 5018 domain->iommu_snooping = 0;
6dd9a7c7 5019 domain->iommu_superpage = 0;
fe40f1e0 5020 domain->max_addr = 0;
5e98c4b1
WH
5021
5022 /* always allocate the top pgd */
4c923d47 5023 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
5024 if (!domain->pgd)
5025 return -ENOMEM;
5026 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5027 return 0;
5028}
5029
00a77deb 5030static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5031{
5d450806 5032 struct dmar_domain *dmar_domain;
00a77deb
JR
5033 struct iommu_domain *domain;
5034
5035 if (type != IOMMU_DOMAIN_UNMANAGED)
5036 return NULL;
38717946 5037
ab8dfe25 5038 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 5039 if (!dmar_domain) {
9f10e5bf 5040 pr_err("Can't allocate dmar_domain\n");
00a77deb 5041 return NULL;
38717946 5042 }
2c2e2c38 5043 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 5044 pr_err("Domain initialization failed\n");
92d03cc8 5045 domain_exit(dmar_domain);
00a77deb 5046 return NULL;
38717946 5047 }
8140a95d 5048 domain_update_iommu_cap(dmar_domain);
faa3d6f5 5049
00a77deb 5050 domain = &dmar_domain->domain;
8a0e715b
JR
5051 domain->geometry.aperture_start = 0;
5052 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5053 domain->geometry.force_aperture = true;
5054
00a77deb 5055 return domain;
38717946 5056}
38717946 5057
00a77deb 5058static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5059{
00a77deb 5060 domain_exit(to_dmar_domain(domain));
38717946 5061}
38717946 5062
67b8e02b
LB
5063/*
5064 * Check whether a @domain could be attached to the @dev through the
5065 * aux-domain attach/detach APIs.
5066 */
5067static inline bool
5068is_aux_domain(struct device *dev, struct iommu_domain *domain)
5069{
5070 struct device_domain_info *info = dev->archdata.iommu;
5071
5072 return info && info->auxd_enabled &&
5073 domain->type == IOMMU_DOMAIN_UNMANAGED;
5074}
5075
5076static void auxiliary_link_device(struct dmar_domain *domain,
5077 struct device *dev)
5078{
5079 struct device_domain_info *info = dev->archdata.iommu;
5080
5081 assert_spin_locked(&device_domain_lock);
5082 if (WARN_ON(!info))
5083 return;
5084
5085 domain->auxd_refcnt++;
5086 list_add(&domain->auxd, &info->auxiliary_domains);
5087}
5088
5089static void auxiliary_unlink_device(struct dmar_domain *domain,
5090 struct device *dev)
5091{
5092 struct device_domain_info *info = dev->archdata.iommu;
5093
5094 assert_spin_locked(&device_domain_lock);
5095 if (WARN_ON(!info))
5096 return;
5097
5098 list_del(&domain->auxd);
5099 domain->auxd_refcnt--;
5100
5101 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5102 intel_pasid_free_id(domain->default_pasid);
5103}
5104
5105static int aux_domain_add_dev(struct dmar_domain *domain,
5106 struct device *dev)
5107{
5108 int ret;
5109 u8 bus, devfn;
5110 unsigned long flags;
5111 struct intel_iommu *iommu;
5112
5113 iommu = device_to_iommu(dev, &bus, &devfn);
5114 if (!iommu)
5115 return -ENODEV;
5116
5117 if (domain->default_pasid <= 0) {
5118 int pasid;
5119
5120 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5121 pci_max_pasids(to_pci_dev(dev)),
5122 GFP_KERNEL);
5123 if (pasid <= 0) {
5124 pr_err("Can't allocate default pasid\n");
5125 return -ENODEV;
5126 }
5127 domain->default_pasid = pasid;
5128 }
5129
5130 spin_lock_irqsave(&device_domain_lock, flags);
5131 /*
5132 * iommu->lock must be held to attach domain to iommu and setup the
5133 * pasid entry for second level translation.
5134 */
5135 spin_lock(&iommu->lock);
5136 ret = domain_attach_iommu(domain, iommu);
5137 if (ret)
5138 goto attach_failed;
5139
5140 /* Setup the PASID entry for mediated devices: */
5141 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5142 domain->default_pasid);
5143 if (ret)
5144 goto table_failed;
5145 spin_unlock(&iommu->lock);
5146
5147 auxiliary_link_device(domain, dev);
5148
5149 spin_unlock_irqrestore(&device_domain_lock, flags);
5150
5151 return 0;
5152
5153table_failed:
5154 domain_detach_iommu(domain, iommu);
5155attach_failed:
5156 spin_unlock(&iommu->lock);
5157 spin_unlock_irqrestore(&device_domain_lock, flags);
5158 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5159 intel_pasid_free_id(domain->default_pasid);
5160
5161 return ret;
5162}
5163
5164static void aux_domain_remove_dev(struct dmar_domain *domain,
5165 struct device *dev)
5166{
5167 struct device_domain_info *info;
5168 struct intel_iommu *iommu;
5169 unsigned long flags;
5170
5171 if (!is_aux_domain(dev, &domain->domain))
5172 return;
5173
5174 spin_lock_irqsave(&device_domain_lock, flags);
5175 info = dev->archdata.iommu;
5176 iommu = info->iommu;
5177
5178 auxiliary_unlink_device(domain, dev);
5179
5180 spin_lock(&iommu->lock);
5181 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5182 domain_detach_iommu(domain, iommu);
5183 spin_unlock(&iommu->lock);
5184
5185 spin_unlock_irqrestore(&device_domain_lock, flags);
5186}
5187
8cc3759a
LB
5188static int prepare_domain_attach_device(struct iommu_domain *domain,
5189 struct device *dev)
38717946 5190{
00a77deb 5191 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5192 struct intel_iommu *iommu;
5193 int addr_width;
156baca8 5194 u8 bus, devfn;
faa3d6f5 5195
156baca8 5196 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5197 if (!iommu)
5198 return -ENODEV;
5199
5200 /* check if this iommu agaw is sufficient for max mapped address */
5201 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5202 if (addr_width > cap_mgaw(iommu->cap))
5203 addr_width = cap_mgaw(iommu->cap);
5204
5205 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5206 dev_err(dev, "%s: iommu width (%d) is not "
5207 "sufficient for the mapped address (%llx)\n",
5208 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5209 return -EFAULT;
5210 }
a99c47a2
TL
5211 dmar_domain->gaw = addr_width;
5212
5213 /*
5214 * Knock out extra levels of page tables if necessary
5215 */
5216 while (iommu->agaw < dmar_domain->agaw) {
5217 struct dma_pte *pte;
5218
5219 pte = dmar_domain->pgd;
5220 if (dma_pte_present(pte)) {
25cbff16
SY
5221 dmar_domain->pgd = (struct dma_pte *)
5222 phys_to_virt(dma_pte_addr(pte));
7a661013 5223 free_pgtable_page(pte);
a99c47a2
TL
5224 }
5225 dmar_domain->agaw--;
5226 }
fe40f1e0 5227
8cc3759a
LB
5228 return 0;
5229}
5230
5231static int intel_iommu_attach_device(struct iommu_domain *domain,
5232 struct device *dev)
5233{
5234 int ret;
5235
5236 if (device_is_rmrr_locked(dev)) {
5237 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5238 return -EPERM;
5239 }
5240
67b8e02b
LB
5241 if (is_aux_domain(dev, domain))
5242 return -EPERM;
5243
8cc3759a
LB
5244 /* normally dev is not mapped */
5245 if (unlikely(domain_context_mapped(dev))) {
5246 struct dmar_domain *old_domain;
5247
5248 old_domain = find_domain(dev);
5249 if (old_domain) {
5250 rcu_read_lock();
5251 dmar_remove_one_dev_info(dev);
5252 rcu_read_unlock();
5253
5254 if (!domain_type_is_vm_or_si(old_domain) &&
5255 list_empty(&old_domain->devices))
5256 domain_exit(old_domain);
5257 }
5258 }
5259
5260 ret = prepare_domain_attach_device(domain, dev);
5261 if (ret)
5262 return ret;
5263
5264 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5265}
38717946 5266
67b8e02b
LB
5267static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5268 struct device *dev)
5269{
5270 int ret;
5271
5272 if (!is_aux_domain(dev, domain))
5273 return -EPERM;
5274
5275 ret = prepare_domain_attach_device(domain, dev);
5276 if (ret)
5277 return ret;
5278
5279 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5280}
5281
4c5478c9
JR
5282static void intel_iommu_detach_device(struct iommu_domain *domain,
5283 struct device *dev)
38717946 5284{
71753239 5285 dmar_remove_one_dev_info(dev);
faa3d6f5 5286}
c7151a8d 5287
67b8e02b
LB
5288static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5289 struct device *dev)
5290{
5291 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5292}
5293
b146a1c9
JR
5294static int intel_iommu_map(struct iommu_domain *domain,
5295 unsigned long iova, phys_addr_t hpa,
5009065d 5296 size_t size, int iommu_prot)
faa3d6f5 5297{
00a77deb 5298 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5299 u64 max_addr;
dde57a21 5300 int prot = 0;
faa3d6f5 5301 int ret;
fe40f1e0 5302
dde57a21
JR
5303 if (iommu_prot & IOMMU_READ)
5304 prot |= DMA_PTE_READ;
5305 if (iommu_prot & IOMMU_WRITE)
5306 prot |= DMA_PTE_WRITE;
9cf06697
SY
5307 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5308 prot |= DMA_PTE_SNP;
dde57a21 5309
163cc52c 5310 max_addr = iova + size;
dde57a21 5311 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5312 u64 end;
5313
5314 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5315 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5316 if (end < max_addr) {
9f10e5bf 5317 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5318 "sufficient for the mapped address (%llx)\n",
8954da1f 5319 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5320 return -EFAULT;
5321 }
dde57a21 5322 dmar_domain->max_addr = max_addr;
fe40f1e0 5323 }
ad051221
DW
5324 /* Round up size to next multiple of PAGE_SIZE, if it and
5325 the low bits of hpa would take us onto the next page */
88cb6a74 5326 size = aligned_nrpages(hpa, size);
ad051221
DW
5327 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5328 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5329 return ret;
38717946 5330}
38717946 5331
5009065d 5332static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5333 unsigned long iova, size_t size)
38717946 5334{
00a77deb 5335 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5336 struct page *freelist = NULL;
ea8ea460
DW
5337 unsigned long start_pfn, last_pfn;
5338 unsigned int npages;
42e8c186 5339 int iommu_id, level = 0;
5cf0a76f
DW
5340
5341 /* Cope with horrid API which requires us to unmap more than the
5342 size argument if it happens to be a large-page mapping. */
dc02e46e 5343 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5344
5345 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5346 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5347
ea8ea460
DW
5348 start_pfn = iova >> VTD_PAGE_SHIFT;
5349 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5350
5351 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5352
5353 npages = last_pfn - start_pfn + 1;
5354
f746a025 5355 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5356 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5357 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5358
5359 dma_free_pagelist(freelist);
fe40f1e0 5360
163cc52c
DW
5361 if (dmar_domain->max_addr == iova + size)
5362 dmar_domain->max_addr = iova;
b146a1c9 5363
5cf0a76f 5364 return size;
38717946 5365}
38717946 5366
d14d6577 5367static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5368 dma_addr_t iova)
38717946 5369{
00a77deb 5370 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5371 struct dma_pte *pte;
5cf0a76f 5372 int level = 0;
faa3d6f5 5373 u64 phys = 0;
38717946 5374
5cf0a76f 5375 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5376 if (pte)
faa3d6f5 5377 phys = dma_pte_addr(pte);
38717946 5378
faa3d6f5 5379 return phys;
38717946 5380}
a8bcbb0d 5381
95587a75
LB
5382static inline bool scalable_mode_support(void)
5383{
5384 struct dmar_drhd_unit *drhd;
5385 struct intel_iommu *iommu;
5386 bool ret = true;
5387
5388 rcu_read_lock();
5389 for_each_active_iommu(iommu, drhd) {
5390 if (!sm_supported(iommu)) {
5391 ret = false;
5392 break;
5393 }
5394 }
5395 rcu_read_unlock();
5396
5397 return ret;
5398}
5399
5400static inline bool iommu_pasid_support(void)
5401{
5402 struct dmar_drhd_unit *drhd;
5403 struct intel_iommu *iommu;
5404 bool ret = true;
5405
5406 rcu_read_lock();
5407 for_each_active_iommu(iommu, drhd) {
5408 if (!pasid_supported(iommu)) {
5409 ret = false;
5410 break;
5411 }
5412 }
5413 rcu_read_unlock();
5414
5415 return ret;
5416}
5417
5d587b8d 5418static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5419{
dbb9fd86 5420 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5421 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5422 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5423 return irq_remapping_enabled == 1;
dbb9fd86 5424
5d587b8d 5425 return false;
dbb9fd86
SY
5426}
5427
abdfdde2
AW
5428static int intel_iommu_add_device(struct device *dev)
5429{
a5459cfe 5430 struct intel_iommu *iommu;
abdfdde2 5431 struct iommu_group *group;
156baca8 5432 u8 bus, devfn;
70ae6f0d 5433
a5459cfe
AW
5434 iommu = device_to_iommu(dev, &bus, &devfn);
5435 if (!iommu)
70ae6f0d
AW
5436 return -ENODEV;
5437
e3d10af1 5438 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5439
e17f9ff4 5440 group = iommu_group_get_for_dev(dev);
783f157b 5441
e17f9ff4
AW
5442 if (IS_ERR(group))
5443 return PTR_ERR(group);
bcb71abe 5444
abdfdde2 5445 iommu_group_put(group);
e17f9ff4 5446 return 0;
abdfdde2 5447}
70ae6f0d 5448
abdfdde2
AW
5449static void intel_iommu_remove_device(struct device *dev)
5450{
a5459cfe
AW
5451 struct intel_iommu *iommu;
5452 u8 bus, devfn;
5453
5454 iommu = device_to_iommu(dev, &bus, &devfn);
5455 if (!iommu)
5456 return;
5457
abdfdde2 5458 iommu_group_remove_device(dev);
a5459cfe 5459
e3d10af1 5460 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5461}
5462
0659b8dc
EA
5463static void intel_iommu_get_resv_regions(struct device *device,
5464 struct list_head *head)
5465{
5466 struct iommu_resv_region *reg;
5467 struct dmar_rmrr_unit *rmrr;
5468 struct device *i_dev;
5469 int i;
5470
5471 rcu_read_lock();
5472 for_each_rmrr_units(rmrr) {
5473 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5474 i, i_dev) {
5475 if (i_dev != device)
5476 continue;
5477
5478 list_add_tail(&rmrr->resv->list, head);
5479 }
5480 }
5481 rcu_read_unlock();
5482
5483 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5484 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5485 0, IOMMU_RESV_MSI);
0659b8dc
EA
5486 if (!reg)
5487 return;
5488 list_add_tail(&reg->list, head);
5489}
5490
5491static void intel_iommu_put_resv_regions(struct device *dev,
5492 struct list_head *head)
5493{
5494 struct iommu_resv_region *entry, *next;
5495
5496 list_for_each_entry_safe(entry, next, head, list) {
198bc325 5497 if (entry->type == IOMMU_RESV_MSI)
0659b8dc
EA
5498 kfree(entry);
5499 }
70ae6f0d
AW
5500}
5501
d7cbc0f3 5502int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5503{
5504 struct device_domain_info *info;
5505 struct context_entry *context;
5506 struct dmar_domain *domain;
5507 unsigned long flags;
5508 u64 ctx_lo;
5509 int ret;
5510
d7cbc0f3 5511 domain = get_valid_domain_for_dev(dev);
2f26e0a9
DW
5512 if (!domain)
5513 return -EINVAL;
5514
5515 spin_lock_irqsave(&device_domain_lock, flags);
5516 spin_lock(&iommu->lock);
5517
5518 ret = -EINVAL;
d7cbc0f3 5519 info = dev->archdata.iommu;
2f26e0a9
DW
5520 if (!info || !info->pasid_supported)
5521 goto out;
5522
5523 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5524 if (WARN_ON(!context))
5525 goto out;
5526
5527 ctx_lo = context[0].lo;
5528
2f26e0a9 5529 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5530 ctx_lo |= CONTEXT_PASIDE;
5531 context[0].lo = ctx_lo;
5532 wmb();
d7cbc0f3
LB
5533 iommu->flush.flush_context(iommu,
5534 domain->iommu_did[iommu->seq_id],
5535 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5536 DMA_CCMD_MASK_NOBIT,
5537 DMA_CCMD_DEVICE_INVL);
5538 }
5539
5540 /* Enable PASID support in the device, if it wasn't already */
5541 if (!info->pasid_enabled)
5542 iommu_enable_dev_iotlb(info);
5543
2f26e0a9
DW
5544 ret = 0;
5545
5546 out:
5547 spin_unlock(&iommu->lock);
5548 spin_unlock_irqrestore(&device_domain_lock, flags);
5549
5550 return ret;
5551}
5552
d7cbc0f3 5553#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5554struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5555{
5556 struct intel_iommu *iommu;
5557 u8 bus, devfn;
5558
5559 if (iommu_dummy(dev)) {
5560 dev_warn(dev,
5561 "No IOMMU translation for device; cannot enable SVM\n");
5562 return NULL;
5563 }
5564
5565 iommu = device_to_iommu(dev, &bus, &devfn);
5566 if ((!iommu)) {
b9997e38 5567 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5568 return NULL;
5569 }
5570
2f26e0a9
DW
5571 return iommu;
5572}
5573#endif /* CONFIG_INTEL_IOMMU_SVM */
5574
95587a75
LB
5575static int intel_iommu_enable_auxd(struct device *dev)
5576{
5577 struct device_domain_info *info;
5578 struct intel_iommu *iommu;
5579 unsigned long flags;
5580 u8 bus, devfn;
5581 int ret;
5582
5583 iommu = device_to_iommu(dev, &bus, &devfn);
5584 if (!iommu || dmar_disabled)
5585 return -EINVAL;
5586
5587 if (!sm_supported(iommu) || !pasid_supported(iommu))
5588 return -EINVAL;
5589
5590 ret = intel_iommu_enable_pasid(iommu, dev);
5591 if (ret)
5592 return -ENODEV;
5593
5594 spin_lock_irqsave(&device_domain_lock, flags);
5595 info = dev->archdata.iommu;
5596 info->auxd_enabled = 1;
5597 spin_unlock_irqrestore(&device_domain_lock, flags);
5598
5599 return 0;
5600}
5601
5602static int intel_iommu_disable_auxd(struct device *dev)
5603{
5604 struct device_domain_info *info;
5605 unsigned long flags;
5606
5607 spin_lock_irqsave(&device_domain_lock, flags);
5608 info = dev->archdata.iommu;
5609 if (!WARN_ON(!info))
5610 info->auxd_enabled = 0;
5611 spin_unlock_irqrestore(&device_domain_lock, flags);
5612
5613 return 0;
5614}
5615
5616/*
5617 * A PCI express designated vendor specific extended capability is defined
5618 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5619 * for system software and tools to detect endpoint devices supporting the
5620 * Intel scalable IO virtualization without host driver dependency.
5621 *
5622 * Returns the address of the matching extended capability structure within
5623 * the device's PCI configuration space or 0 if the device does not support
5624 * it.
5625 */
5626static int siov_find_pci_dvsec(struct pci_dev *pdev)
5627{
5628 int pos;
5629 u16 vendor, id;
5630
5631 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5632 while (pos) {
5633 pci_read_config_word(pdev, pos + 4, &vendor);
5634 pci_read_config_word(pdev, pos + 8, &id);
5635 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5636 return pos;
5637
5638 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5639 }
5640
5641 return 0;
5642}
5643
5644static bool
5645intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5646{
5647 if (feat == IOMMU_DEV_FEAT_AUX) {
5648 int ret;
5649
5650 if (!dev_is_pci(dev) || dmar_disabled ||
5651 !scalable_mode_support() || !iommu_pasid_support())
5652 return false;
5653
5654 ret = pci_pasid_features(to_pci_dev(dev));
5655 if (ret < 0)
5656 return false;
5657
5658 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5659 }
5660
5661 return false;
5662}
5663
5664static int
5665intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5666{
5667 if (feat == IOMMU_DEV_FEAT_AUX)
5668 return intel_iommu_enable_auxd(dev);
5669
5670 return -ENODEV;
5671}
5672
5673static int
5674intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5675{
5676 if (feat == IOMMU_DEV_FEAT_AUX)
5677 return intel_iommu_disable_auxd(dev);
5678
5679 return -ENODEV;
5680}
5681
5682static bool
5683intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5684{
5685 struct device_domain_info *info = dev->archdata.iommu;
5686
5687 if (feat == IOMMU_DEV_FEAT_AUX)
5688 return scalable_mode_support() && info && info->auxd_enabled;
5689
5690 return false;
5691}
5692
0e8000f8
LB
5693static int
5694intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5695{
5696 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5697
5698 return dmar_domain->default_pasid > 0 ?
5699 dmar_domain->default_pasid : -EINVAL;
5700}
5701
b0119e87 5702const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5703 .capable = intel_iommu_capable,
5704 .domain_alloc = intel_iommu_domain_alloc,
5705 .domain_free = intel_iommu_domain_free,
5706 .attach_dev = intel_iommu_attach_device,
5707 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5708 .aux_attach_dev = intel_iommu_aux_attach_device,
5709 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5710 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5711 .map = intel_iommu_map,
5712 .unmap = intel_iommu_unmap,
0659b8dc
EA
5713 .iova_to_phys = intel_iommu_iova_to_phys,
5714 .add_device = intel_iommu_add_device,
5715 .remove_device = intel_iommu_remove_device,
5716 .get_resv_regions = intel_iommu_get_resv_regions,
5717 .put_resv_regions = intel_iommu_put_resv_regions,
5718 .device_group = pci_device_group,
95587a75
LB
5719 .dev_has_feat = intel_iommu_dev_has_feat,
5720 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5721 .dev_enable_feat = intel_iommu_dev_enable_feat,
5722 .dev_disable_feat = intel_iommu_dev_disable_feat,
0659b8dc 5723 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5724};
9af88143 5725
9452618e
DV
5726static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5727{
5728 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5729 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5730 dmar_map_gfx = 0;
5731}
5732
5733DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5734DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5735DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5736DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5737DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5738DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5739DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5740
d34d6517 5741static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5742{
5743 /*
5744 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5745 * but needs it. Same seems to hold for the desktop versions.
9af88143 5746 */
932a6523 5747 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5748 rwbf_quirk = 1;
5749}
5750
5751DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5752DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5753DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5754DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5755DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5756DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5757DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5758
eecfd57f
AJ
5759#define GGC 0x52
5760#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5761#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5762#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5763#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5764#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5765#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5766#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5767#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5768
d34d6517 5769static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5770{
5771 unsigned short ggc;
5772
eecfd57f 5773 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5774 return;
5775
eecfd57f 5776 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5777 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5778 dmar_map_gfx = 0;
6fbcfb3e
DW
5779 } else if (dmar_map_gfx) {
5780 /* we have to ensure the gfx device is idle before we flush */
932a6523 5781 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5782 intel_iommu_strict = 1;
5783 }
9eecabcb
DW
5784}
5785DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5786DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5787DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5788DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5789
e0fc7e0b
DW
5790/* On Tylersburg chipsets, some BIOSes have been known to enable the
5791 ISOCH DMAR unit for the Azalia sound device, but not give it any
5792 TLB entries, which causes it to deadlock. Check for that. We do
5793 this in a function called from init_dmars(), instead of in a PCI
5794 quirk, because we don't want to print the obnoxious "BIOS broken"
5795 message if VT-d is actually disabled.
5796*/
5797static void __init check_tylersburg_isoch(void)
5798{
5799 struct pci_dev *pdev;
5800 uint32_t vtisochctrl;
5801
5802 /* If there's no Azalia in the system anyway, forget it. */
5803 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5804 if (!pdev)
5805 return;
5806 pci_dev_put(pdev);
5807
5808 /* System Management Registers. Might be hidden, in which case
5809 we can't do the sanity check. But that's OK, because the
5810 known-broken BIOSes _don't_ actually hide it, so far. */
5811 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5812 if (!pdev)
5813 return;
5814
5815 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5816 pci_dev_put(pdev);
5817 return;
5818 }
5819
5820 pci_dev_put(pdev);
5821
5822 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5823 if (vtisochctrl & 1)
5824 return;
5825
5826 /* Drop all bits other than the number of TLB entries */
5827 vtisochctrl &= 0x1c;
5828
5829 /* If we have the recommended number of TLB entries (16), fine. */
5830 if (vtisochctrl == 0x10)
5831 return;
5832
5833 /* Zero TLB entries? You get to ride the short bus to school. */
5834 if (!vtisochctrl) {
5835 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5836 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5837 dmi_get_system_info(DMI_BIOS_VENDOR),
5838 dmi_get_system_info(DMI_BIOS_VERSION),
5839 dmi_get_system_info(DMI_PRODUCT_VERSION));
5840 iommu_identity_mapping |= IDENTMAP_AZALIA;
5841 return;
5842 }
9f10e5bf
JR
5843
5844 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5845 vtisochctrl);
5846}