]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
2025cf9e 1// SPDX-License-Identifier: GPL-2.0-only
ba395927 2/*
ea8ea460 3 * Copyright © 2006-2014 Intel Corporation.
ba395927 4 *
ea8ea460
DW
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 10 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
11 */
12
9f10e5bf 13#define pr_fmt(fmt) "DMAR: " fmt
932a6523 14#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 15
ba395927
KA
16#include <linux/init.h>
17#include <linux/bitmap.h>
5e0d2a6f 18#include <linux/debugfs.h>
54485c30 19#include <linux/export.h>
ba395927
KA
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
ba395927
KA
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
75f05569 28#include <linux/memory.h>
aa473240 29#include <linux/cpu.h>
5e0d2a6f 30#include <linux/timer.h>
dfddb969 31#include <linux/io.h>
38717946 32#include <linux/iova.h>
5d450806 33#include <linux/iommu.h>
38717946 34#include <linux/intel-iommu.h>
134fac3f 35#include <linux/syscore_ops.h>
69575d38 36#include <linux/tboot.h>
adb2fe02 37#include <linux/dmi.h>
5cdede24 38#include <linux/pci-ats.h>
0ee332c1 39#include <linux/memblock.h>
36746436 40#include <linux/dma-contiguous.h>
fec777c3 41#include <linux/dma-direct.h>
091d42e4 42#include <linux/crash_dump.h>
98fa15f3 43#include <linux/numa.h>
8a8f422d 44#include <asm/irq_remapping.h>
ba395927 45#include <asm/cacheflush.h>
46a7fa27 46#include <asm/iommu.h>
ba395927 47
078e1ee2 48#include "irq_remapping.h"
56283174 49#include "intel-pasid.h"
078e1ee2 50
5b6985ce
FY
51#define ROOT_SIZE VTD_PAGE_SIZE
52#define CONTEXT_SIZE VTD_PAGE_SIZE
53
ba395927 54#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 55#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
5e3b4a15 63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 64
4ed0d3e6 65#define MAX_AGAW_WIDTH 64
5c645b35 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 67
2ebe3151
DW
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 76
1b722500
RM
77/* IO virtual address start page frame number */
78#define IOVA_START_PFN (1)
79
f27be03b 80#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 81
df08cdc7
AM
82/* page table handling */
83#define LEVEL_STRIDE (9)
84#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
85
6d1c56a9
OBC
86/*
87 * This bitmap is used to advertise the page sizes our hardware support
88 * to the IOMMU core, which will then use this information to split
89 * physically contiguous memory regions it is mapping into page sizes
90 * that we support.
91 *
92 * Traditionally the IOMMU core just handed us the mappings directly,
93 * after making sure the size is an order of a 4KiB page and that the
94 * mapping has natural alignment.
95 *
96 * To retain this behavior, we currently advertise that we support
97 * all page sizes that are an order of 4KiB.
98 *
99 * If at some point we'd like to utilize the IOMMU core's new behavior,
100 * we could change this to advertise the real page sizes we support.
101 */
102#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
103
df08cdc7
AM
104static inline int agaw_to_level(int agaw)
105{
106 return agaw + 2;
107}
108
109static inline int agaw_to_width(int agaw)
110{
5c645b35 111 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
112}
113
114static inline int width_to_agaw(int width)
115{
5c645b35 116 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
117}
118
119static inline unsigned int level_to_offset_bits(int level)
120{
121 return (level - 1) * LEVEL_STRIDE;
122}
123
124static inline int pfn_level_offset(unsigned long pfn, int level)
125{
126 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
127}
128
129static inline unsigned long level_mask(int level)
130{
131 return -1UL << level_to_offset_bits(level);
132}
133
134static inline unsigned long level_size(int level)
135{
136 return 1UL << level_to_offset_bits(level);
137}
138
139static inline unsigned long align_to_level(unsigned long pfn, int level)
140{
141 return (pfn + level_size(level) - 1) & level_mask(level);
142}
fd18de50 143
6dd9a7c7
YS
144static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
145{
5c645b35 146 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
147}
148
dd4e8319
DW
149/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
150 are never going to work. */
151static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
152{
153 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
154}
155
156static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
157{
158 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
159}
160static inline unsigned long page_to_dma_pfn(struct page *pg)
161{
162 return mm_to_dma_pfn(page_to_pfn(pg));
163}
164static inline unsigned long virt_to_dma_pfn(void *p)
165{
166 return page_to_dma_pfn(virt_to_page(p));
167}
168
d9630fe9
WH
169/* global iommu list, set NULL for ignored DMAR units */
170static struct intel_iommu **g_iommus;
171
e0fc7e0b 172static void __init check_tylersburg_isoch(void);
9af88143
DW
173static int rwbf_quirk;
174
b779260b
JC
175/*
176 * set to 1 to panic kernel if can't successfully enable VT-d
177 * (used when kernel is launched w/ TXT)
178 */
179static int force_on = 0;
bfd20f1c 180int intel_iommu_tboot_noforce;
89a6079d 181static int no_platform_optin;
b779260b 182
46b08e1a 183#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 184
091d42e4
JR
185/*
186 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
187 * if marked present.
188 */
189static phys_addr_t root_entry_lctp(struct root_entry *re)
190{
191 if (!(re->lo & 1))
192 return 0;
193
194 return re->lo & VTD_PAGE_MASK;
195}
196
197/*
198 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_uctp(struct root_entry *re)
202{
203 if (!(re->hi & 1))
204 return 0;
46b08e1a 205
091d42e4
JR
206 return re->hi & VTD_PAGE_MASK;
207}
c07e7d21 208
cf484d0e
JR
209static inline void context_clear_pasid_enable(struct context_entry *context)
210{
211 context->lo &= ~(1ULL << 11);
212}
213
214static inline bool context_pasid_enabled(struct context_entry *context)
215{
216 return !!(context->lo & (1ULL << 11));
217}
218
219static inline void context_set_copied(struct context_entry *context)
220{
221 context->hi |= (1ull << 3);
222}
223
224static inline bool context_copied(struct context_entry *context)
225{
226 return !!(context->hi & (1ULL << 3));
227}
228
229static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
230{
231 return (context->lo & 1);
232}
cf484d0e 233
26b86092 234bool context_present(struct context_entry *context)
cf484d0e
JR
235{
236 return context_pasid_enabled(context) ?
237 __context_present(context) :
238 __context_present(context) && !context_copied(context);
239}
240
c07e7d21
MM
241static inline void context_set_present(struct context_entry *context)
242{
243 context->lo |= 1;
244}
245
246static inline void context_set_fault_enable(struct context_entry *context)
247{
248 context->lo &= (((u64)-1) << 2) | 1;
249}
250
c07e7d21
MM
251static inline void context_set_translation_type(struct context_entry *context,
252 unsigned long value)
253{
254 context->lo &= (((u64)-1) << 4) | 3;
255 context->lo |= (value & 3) << 2;
256}
257
258static inline void context_set_address_root(struct context_entry *context,
259 unsigned long value)
260{
1a2262f9 261 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
262 context->lo |= value & VTD_PAGE_MASK;
263}
264
265static inline void context_set_address_width(struct context_entry *context,
266 unsigned long value)
267{
268 context->hi |= value & 7;
269}
270
271static inline void context_set_domain_id(struct context_entry *context,
272 unsigned long value)
273{
274 context->hi |= (value & ((1 << 16) - 1)) << 8;
275}
276
dbcd861f
JR
277static inline int context_domain_id(struct context_entry *c)
278{
279 return((c->hi >> 8) & 0xffff);
280}
281
c07e7d21
MM
282static inline void context_clear_entry(struct context_entry *context)
283{
284 context->lo = 0;
285 context->hi = 0;
286}
7a8fc25e 287
2c2e2c38
FY
288/*
289 * This domain is a statically identity mapping domain.
290 * 1. This domain creats a static 1:1 mapping to all usable memory.
291 * 2. It maps to each iommu if successful.
292 * 3. Each iommu mapps to this domain if successful.
293 */
19943b0e
DW
294static struct dmar_domain *si_domain;
295static int hw_pass_through = 1;
2c2e2c38 296
28ccce0d
JR
297/*
298 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
299 * across iommus may be owned in one domain, e.g. kvm guest.
300 */
ab8dfe25 301#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 302
2c2e2c38 303/* si_domain contains mulitple devices */
ab8dfe25 304#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 305
29a27719
JR
306#define for_each_domain_iommu(idx, domain) \
307 for (idx = 0; idx < g_num_of_iommus; idx++) \
308 if (domain->iommu_refcnt[idx])
309
b94e4117
JL
310struct dmar_rmrr_unit {
311 struct list_head list; /* list of rmrr units */
312 struct acpi_dmar_header *hdr; /* ACPI header */
313 u64 base_address; /* reserved base address*/
314 u64 end_address; /* reserved end address */
832bd858 315 struct dmar_dev_scope *devices; /* target devices */
b94e4117 316 int devices_cnt; /* target device count */
0659b8dc 317 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
318};
319
320struct dmar_atsr_unit {
321 struct list_head list; /* list of ATSR units */
322 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 323 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
324 int devices_cnt; /* target device count */
325 u8 include_all:1; /* include all ports */
326};
327
328static LIST_HEAD(dmar_atsr_units);
329static LIST_HEAD(dmar_rmrr_units);
330
331#define for_each_rmrr_units(rmrr) \
332 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
333
5e0d2a6f 334/* bitmap for indexing intel_iommus */
5e0d2a6f 335static int g_num_of_iommus;
336
92d03cc8 337static void domain_exit(struct dmar_domain *domain);
ba395927 338static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 339static void dmar_remove_one_dev_info(struct device *dev);
127c7615 340static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
341static void domain_context_clear(struct intel_iommu *iommu,
342 struct device *dev);
2a46ddf7
JL
343static int domain_detach_iommu(struct dmar_domain *domain,
344 struct intel_iommu *iommu);
ba395927 345
d3f13810 346#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
347int dmar_disabled = 0;
348#else
349int dmar_disabled = 1;
d3f13810 350#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 351
8bc1f85c
ED
352int intel_iommu_enabled = 0;
353EXPORT_SYMBOL_GPL(intel_iommu_enabled);
354
2d9e667e 355static int dmar_map_gfx = 1;
7d3b03ce 356static int dmar_forcedac;
5e0d2a6f 357static int intel_iommu_strict;
6dd9a7c7 358static int intel_iommu_superpage = 1;
8950dcd8 359static int intel_iommu_sm;
ae853ddb 360static int iommu_identity_mapping;
c83b2f20 361
ae853ddb
DW
362#define IDENTMAP_ALL 1
363#define IDENTMAP_GFX 2
364#define IDENTMAP_AZALIA 4
c83b2f20 365
765b6a98
LB
366#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
367#define pasid_supported(iommu) (sm_supported(iommu) && \
368 ecap_pasid((iommu)->ecap))
ba395927 369
c0771df8
DW
370int intel_iommu_gfx_mapped;
371EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
372
ba395927
KA
373#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
374static DEFINE_SPINLOCK(device_domain_lock);
375static LIST_HEAD(device_domain_list);
376
85319dcc
LB
377/*
378 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 379 * callback @fn against each element.
85319dcc
LB
380 */
381int for_each_device_domain(int (*fn)(struct device_domain_info *info,
382 void *data), void *data)
383{
384 int ret = 0;
0bbeb01a 385 unsigned long flags;
85319dcc
LB
386 struct device_domain_info *info;
387
0bbeb01a 388 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
389 list_for_each_entry(info, &device_domain_list, global) {
390 ret = fn(info, data);
0bbeb01a
LB
391 if (ret) {
392 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 393 return ret;
0bbeb01a 394 }
85319dcc 395 }
0bbeb01a 396 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
397
398 return 0;
399}
400
b0119e87 401const struct iommu_ops intel_iommu_ops;
a8bcbb0d 402
4158c2ec
JR
403static bool translation_pre_enabled(struct intel_iommu *iommu)
404{
405 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
406}
407
091d42e4
JR
408static void clear_translation_pre_enabled(struct intel_iommu *iommu)
409{
410 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
411}
412
4158c2ec
JR
413static void init_translation_status(struct intel_iommu *iommu)
414{
415 u32 gsts;
416
417 gsts = readl(iommu->reg + DMAR_GSTS_REG);
418 if (gsts & DMA_GSTS_TES)
419 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
420}
421
00a77deb
JR
422/* Convert generic 'struct iommu_domain to private struct dmar_domain */
423static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
424{
425 return container_of(dom, struct dmar_domain, domain);
426}
427
ba395927
KA
428static int __init intel_iommu_setup(char *str)
429{
430 if (!str)
431 return -EINVAL;
432 while (*str) {
0cd5c3c8
KM
433 if (!strncmp(str, "on", 2)) {
434 dmar_disabled = 0;
9f10e5bf 435 pr_info("IOMMU enabled\n");
0cd5c3c8 436 } else if (!strncmp(str, "off", 3)) {
ba395927 437 dmar_disabled = 1;
89a6079d 438 no_platform_optin = 1;
9f10e5bf 439 pr_info("IOMMU disabled\n");
ba395927
KA
440 } else if (!strncmp(str, "igfx_off", 8)) {
441 dmar_map_gfx = 0;
9f10e5bf 442 pr_info("Disable GFX device mapping\n");
7d3b03ce 443 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 444 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 445 dmar_forcedac = 1;
5e0d2a6f 446 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 447 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 448 intel_iommu_strict = 1;
6dd9a7c7 449 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 450 pr_info("Disable supported super page\n");
6dd9a7c7 451 intel_iommu_superpage = 0;
8950dcd8
LB
452 } else if (!strncmp(str, "sm_on", 5)) {
453 pr_info("Intel-IOMMU: scalable mode supported\n");
454 intel_iommu_sm = 1;
bfd20f1c
SL
455 } else if (!strncmp(str, "tboot_noforce", 13)) {
456 printk(KERN_INFO
457 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
458 intel_iommu_tboot_noforce = 1;
ba395927
KA
459 }
460
461 str += strcspn(str, ",");
462 while (*str == ',')
463 str++;
464 }
465 return 0;
466}
467__setup("intel_iommu=", intel_iommu_setup);
468
469static struct kmem_cache *iommu_domain_cache;
470static struct kmem_cache *iommu_devinfo_cache;
ba395927 471
9452d5bf
JR
472static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
473{
8bf47816
JR
474 struct dmar_domain **domains;
475 int idx = did >> 8;
476
477 domains = iommu->domains[idx];
478 if (!domains)
479 return NULL;
480
481 return domains[did & 0xff];
9452d5bf
JR
482}
483
484static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
485 struct dmar_domain *domain)
486{
8bf47816
JR
487 struct dmar_domain **domains;
488 int idx = did >> 8;
489
490 if (!iommu->domains[idx]) {
491 size_t size = 256 * sizeof(struct dmar_domain *);
492 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
493 }
494
495 domains = iommu->domains[idx];
496 if (WARN_ON(!domains))
497 return;
498 else
499 domains[did & 0xff] = domain;
9452d5bf
JR
500}
501
9ddbfb42 502void *alloc_pgtable_page(int node)
eb3fa7cb 503{
4c923d47
SS
504 struct page *page;
505 void *vaddr = NULL;
eb3fa7cb 506
4c923d47
SS
507 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
508 if (page)
509 vaddr = page_address(page);
eb3fa7cb 510 return vaddr;
ba395927
KA
511}
512
9ddbfb42 513void free_pgtable_page(void *vaddr)
ba395927
KA
514{
515 free_page((unsigned long)vaddr);
516}
517
518static inline void *alloc_domain_mem(void)
519{
354bb65e 520 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
521}
522
38717946 523static void free_domain_mem(void *vaddr)
ba395927
KA
524{
525 kmem_cache_free(iommu_domain_cache, vaddr);
526}
527
528static inline void * alloc_devinfo_mem(void)
529{
354bb65e 530 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
531}
532
533static inline void free_devinfo_mem(void *vaddr)
534{
535 kmem_cache_free(iommu_devinfo_cache, vaddr);
536}
537
ab8dfe25
JL
538static inline int domain_type_is_vm(struct dmar_domain *domain)
539{
540 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
541}
542
28ccce0d
JR
543static inline int domain_type_is_si(struct dmar_domain *domain)
544{
545 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
546}
547
ab8dfe25
JL
548static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
549{
550 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
551 DOMAIN_FLAG_STATIC_IDENTITY);
552}
1b573683 553
162d1b10
JL
554static inline int domain_pfn_supported(struct dmar_domain *domain,
555 unsigned long pfn)
556{
557 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
558
559 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
560}
561
4ed0d3e6 562static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
563{
564 unsigned long sagaw;
565 int agaw = -1;
566
567 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 568 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
569 agaw >= 0; agaw--) {
570 if (test_bit(agaw, &sagaw))
571 break;
572 }
573
574 return agaw;
575}
576
4ed0d3e6
FY
577/*
578 * Calculate max SAGAW for each iommu.
579 */
580int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
581{
582 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
583}
584
585/*
586 * calculate agaw for each iommu.
587 * "SAGAW" may be different across iommus, use a default agaw, and
588 * get a supported less agaw for iommus that don't support the default agaw.
589 */
590int iommu_calculate_agaw(struct intel_iommu *iommu)
591{
592 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
593}
594
2c2e2c38 595/* This functionin only returns single iommu in a domain */
9ddbfb42 596struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
597{
598 int iommu_id;
599
2c2e2c38 600 /* si_domain and vm domain should not get here. */
ab8dfe25 601 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
602 for_each_domain_iommu(iommu_id, domain)
603 break;
604
8c11e798
WH
605 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
606 return NULL;
607
608 return g_iommus[iommu_id];
609}
610
8e604097
WH
611static void domain_update_iommu_coherency(struct dmar_domain *domain)
612{
d0501960
DW
613 struct dmar_drhd_unit *drhd;
614 struct intel_iommu *iommu;
2f119c78
QL
615 bool found = false;
616 int i;
2e12bc29 617
d0501960 618 domain->iommu_coherency = 1;
8e604097 619
29a27719 620 for_each_domain_iommu(i, domain) {
2f119c78 621 found = true;
8e604097
WH
622 if (!ecap_coherent(g_iommus[i]->ecap)) {
623 domain->iommu_coherency = 0;
624 break;
625 }
8e604097 626 }
d0501960
DW
627 if (found)
628 return;
629
630 /* No hardware attached; use lowest common denominator */
631 rcu_read_lock();
632 for_each_active_iommu(iommu, drhd) {
633 if (!ecap_coherent(iommu->ecap)) {
634 domain->iommu_coherency = 0;
635 break;
636 }
637 }
638 rcu_read_unlock();
8e604097
WH
639}
640
161f6934 641static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 642{
161f6934
JL
643 struct dmar_drhd_unit *drhd;
644 struct intel_iommu *iommu;
645 int ret = 1;
58c610bd 646
161f6934
JL
647 rcu_read_lock();
648 for_each_active_iommu(iommu, drhd) {
649 if (iommu != skip) {
650 if (!ecap_sc_support(iommu->ecap)) {
651 ret = 0;
652 break;
653 }
58c610bd 654 }
58c610bd 655 }
161f6934
JL
656 rcu_read_unlock();
657
658 return ret;
58c610bd
SY
659}
660
161f6934 661static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 662{
8140a95d 663 struct dmar_drhd_unit *drhd;
161f6934 664 struct intel_iommu *iommu;
8140a95d 665 int mask = 0xf;
6dd9a7c7
YS
666
667 if (!intel_iommu_superpage) {
161f6934 668 return 0;
6dd9a7c7
YS
669 }
670
8140a95d 671 /* set iommu_superpage to the smallest common denominator */
0e242612 672 rcu_read_lock();
8140a95d 673 for_each_active_iommu(iommu, drhd) {
161f6934
JL
674 if (iommu != skip) {
675 mask &= cap_super_page_val(iommu->cap);
676 if (!mask)
677 break;
6dd9a7c7
YS
678 }
679 }
0e242612
JL
680 rcu_read_unlock();
681
161f6934 682 return fls(mask);
6dd9a7c7
YS
683}
684
58c610bd
SY
685/* Some capabilities may be different across iommus */
686static void domain_update_iommu_cap(struct dmar_domain *domain)
687{
688 domain_update_iommu_coherency(domain);
161f6934
JL
689 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
690 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
691}
692
26b86092
SM
693struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
694 u8 devfn, int alloc)
03ecc32c
DW
695{
696 struct root_entry *root = &iommu->root_entry[bus];
697 struct context_entry *context;
698 u64 *entry;
699
4df4eab1 700 entry = &root->lo;
765b6a98 701 if (sm_supported(iommu)) {
03ecc32c
DW
702 if (devfn >= 0x80) {
703 devfn -= 0x80;
704 entry = &root->hi;
705 }
706 devfn *= 2;
707 }
03ecc32c
DW
708 if (*entry & 1)
709 context = phys_to_virt(*entry & VTD_PAGE_MASK);
710 else {
711 unsigned long phy_addr;
712 if (!alloc)
713 return NULL;
714
715 context = alloc_pgtable_page(iommu->node);
716 if (!context)
717 return NULL;
718
719 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
720 phy_addr = virt_to_phys((void *)context);
721 *entry = phy_addr | 1;
722 __iommu_flush_cache(iommu, entry, sizeof(*entry));
723 }
724 return &context[devfn];
725}
726
4ed6a540
DW
727static int iommu_dummy(struct device *dev)
728{
729 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
730}
731
156baca8 732static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
733{
734 struct dmar_drhd_unit *drhd = NULL;
b683b230 735 struct intel_iommu *iommu;
156baca8
DW
736 struct device *tmp;
737 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 738 u16 segment = 0;
c7151a8d
WH
739 int i;
740
4ed6a540
DW
741 if (iommu_dummy(dev))
742 return NULL;
743
156baca8 744 if (dev_is_pci(dev)) {
1c387188
AR
745 struct pci_dev *pf_pdev;
746
156baca8 747 pdev = to_pci_dev(dev);
5823e330
JD
748
749#ifdef CONFIG_X86
750 /* VMD child devices currently cannot be handled individually */
751 if (is_vmd(pdev->bus))
752 return NULL;
753#endif
754
1c387188
AR
755 /* VFs aren't listed in scope tables; we need to look up
756 * the PF instead to find the IOMMU. */
757 pf_pdev = pci_physfn(pdev);
758 dev = &pf_pdev->dev;
156baca8 759 segment = pci_domain_nr(pdev->bus);
ca5b74d2 760 } else if (has_acpi_companion(dev))
156baca8
DW
761 dev = &ACPI_COMPANION(dev)->dev;
762
0e242612 763 rcu_read_lock();
b683b230 764 for_each_active_iommu(iommu, drhd) {
156baca8 765 if (pdev && segment != drhd->segment)
276dbf99 766 continue;
c7151a8d 767
b683b230 768 for_each_active_dev_scope(drhd->devices,
156baca8
DW
769 drhd->devices_cnt, i, tmp) {
770 if (tmp == dev) {
1c387188
AR
771 /* For a VF use its original BDF# not that of the PF
772 * which we used for the IOMMU lookup. Strictly speaking
773 * we could do this for all PCI devices; we only need to
774 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 775 if (pdev && pdev->is_virtfn)
1c387188
AR
776 goto got_pdev;
777
156baca8
DW
778 *bus = drhd->devices[i].bus;
779 *devfn = drhd->devices[i].devfn;
b683b230 780 goto out;
156baca8
DW
781 }
782
783 if (!pdev || !dev_is_pci(tmp))
784 continue;
785
786 ptmp = to_pci_dev(tmp);
787 if (ptmp->subordinate &&
788 ptmp->subordinate->number <= pdev->bus->number &&
789 ptmp->subordinate->busn_res.end >= pdev->bus->number)
790 goto got_pdev;
924b6231 791 }
c7151a8d 792
156baca8
DW
793 if (pdev && drhd->include_all) {
794 got_pdev:
795 *bus = pdev->bus->number;
796 *devfn = pdev->devfn;
b683b230 797 goto out;
156baca8 798 }
c7151a8d 799 }
b683b230 800 iommu = NULL;
156baca8 801 out:
0e242612 802 rcu_read_unlock();
c7151a8d 803
b683b230 804 return iommu;
c7151a8d
WH
805}
806
5331fe6f
WH
807static void domain_flush_cache(struct dmar_domain *domain,
808 void *addr, int size)
809{
810 if (!domain->iommu_coherency)
811 clflush_cache_range(addr, size);
812}
813
ba395927
KA
814static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
815{
ba395927 816 struct context_entry *context;
03ecc32c 817 int ret = 0;
ba395927
KA
818 unsigned long flags;
819
820 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
821 context = iommu_context_addr(iommu, bus, devfn, 0);
822 if (context)
823 ret = context_present(context);
ba395927
KA
824 spin_unlock_irqrestore(&iommu->lock, flags);
825 return ret;
826}
827
ba395927
KA
828static void free_context_table(struct intel_iommu *iommu)
829{
ba395927
KA
830 int i;
831 unsigned long flags;
832 struct context_entry *context;
833
834 spin_lock_irqsave(&iommu->lock, flags);
835 if (!iommu->root_entry) {
836 goto out;
837 }
838 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 839 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
840 if (context)
841 free_pgtable_page(context);
03ecc32c 842
765b6a98 843 if (!sm_supported(iommu))
03ecc32c
DW
844 continue;
845
846 context = iommu_context_addr(iommu, i, 0x80, 0);
847 if (context)
848 free_pgtable_page(context);
849
ba395927
KA
850 }
851 free_pgtable_page(iommu->root_entry);
852 iommu->root_entry = NULL;
853out:
854 spin_unlock_irqrestore(&iommu->lock, flags);
855}
856
b026fd28 857static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 858 unsigned long pfn, int *target_level)
ba395927 859{
e083ea5b 860 struct dma_pte *parent, *pte;
ba395927 861 int level = agaw_to_level(domain->agaw);
4399c8bf 862 int offset;
ba395927
KA
863
864 BUG_ON(!domain->pgd);
f9423606 865
162d1b10 866 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
867 /* Address beyond IOMMU's addressing capabilities. */
868 return NULL;
869
ba395927
KA
870 parent = domain->pgd;
871
5cf0a76f 872 while (1) {
ba395927
KA
873 void *tmp_page;
874
b026fd28 875 offset = pfn_level_offset(pfn, level);
ba395927 876 pte = &parent[offset];
5cf0a76f 877 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 878 break;
5cf0a76f 879 if (level == *target_level)
ba395927
KA
880 break;
881
19c239ce 882 if (!dma_pte_present(pte)) {
c85994e4
DW
883 uint64_t pteval;
884
4c923d47 885 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 886
206a73c1 887 if (!tmp_page)
ba395927 888 return NULL;
206a73c1 889
c85994e4 890 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 891 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 892 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
893 /* Someone else set it while we were thinking; use theirs. */
894 free_pgtable_page(tmp_page);
effad4b5 895 else
c85994e4 896 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 897 }
5cf0a76f
DW
898 if (level == 1)
899 break;
900
19c239ce 901 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
902 level--;
903 }
904
5cf0a76f
DW
905 if (!*target_level)
906 *target_level = level;
907
ba395927
KA
908 return pte;
909}
910
6dd9a7c7 911
ba395927 912/* return address's pte at specific level */
90dcfb5e
DW
913static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
914 unsigned long pfn,
6dd9a7c7 915 int level, int *large_page)
ba395927 916{
e083ea5b 917 struct dma_pte *parent, *pte;
ba395927
KA
918 int total = agaw_to_level(domain->agaw);
919 int offset;
920
921 parent = domain->pgd;
922 while (level <= total) {
90dcfb5e 923 offset = pfn_level_offset(pfn, total);
ba395927
KA
924 pte = &parent[offset];
925 if (level == total)
926 return pte;
927
6dd9a7c7
YS
928 if (!dma_pte_present(pte)) {
929 *large_page = total;
ba395927 930 break;
6dd9a7c7
YS
931 }
932
e16922af 933 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
934 *large_page = total;
935 return pte;
936 }
937
19c239ce 938 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
939 total--;
940 }
941 return NULL;
942}
943
ba395927 944/* clear last level pte, a tlb flush should be followed */
5cf0a76f 945static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
946 unsigned long start_pfn,
947 unsigned long last_pfn)
ba395927 948{
e083ea5b 949 unsigned int large_page;
310a5ab9 950 struct dma_pte *first_pte, *pte;
66eae846 951
162d1b10
JL
952 BUG_ON(!domain_pfn_supported(domain, start_pfn));
953 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 954 BUG_ON(start_pfn > last_pfn);
ba395927 955
04b18e65 956 /* we don't need lock here; nobody else touches the iova range */
59c36286 957 do {
6dd9a7c7
YS
958 large_page = 1;
959 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 960 if (!pte) {
6dd9a7c7 961 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
962 continue;
963 }
6dd9a7c7 964 do {
310a5ab9 965 dma_clear_pte(pte);
6dd9a7c7 966 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 967 pte++;
75e6bf96
DW
968 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
969
310a5ab9
DW
970 domain_flush_cache(domain, first_pte,
971 (void *)pte - (void *)first_pte);
59c36286
DW
972
973 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
974}
975
3269ee0b 976static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
977 int retain_level, struct dma_pte *pte,
978 unsigned long pfn, unsigned long start_pfn,
979 unsigned long last_pfn)
3269ee0b
AW
980{
981 pfn = max(start_pfn, pfn);
982 pte = &pte[pfn_level_offset(pfn, level)];
983
984 do {
985 unsigned long level_pfn;
986 struct dma_pte *level_pte;
987
988 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
989 goto next;
990
f7116e11 991 level_pfn = pfn & level_mask(level);
3269ee0b
AW
992 level_pte = phys_to_virt(dma_pte_addr(pte));
993
bc24c571
DD
994 if (level > 2) {
995 dma_pte_free_level(domain, level - 1, retain_level,
996 level_pte, level_pfn, start_pfn,
997 last_pfn);
998 }
3269ee0b 999
bc24c571
DD
1000 /*
1001 * Free the page table if we're below the level we want to
1002 * retain and the range covers the entire table.
1003 */
1004 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1005 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1006 dma_clear_pte(pte);
1007 domain_flush_cache(domain, pte, sizeof(*pte));
1008 free_pgtable_page(level_pte);
1009 }
1010next:
1011 pfn += level_size(level);
1012 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1013}
1014
bc24c571
DD
1015/*
1016 * clear last level (leaf) ptes and free page table pages below the
1017 * level we wish to keep intact.
1018 */
ba395927 1019static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1020 unsigned long start_pfn,
bc24c571
DD
1021 unsigned long last_pfn,
1022 int retain_level)
ba395927 1023{
162d1b10
JL
1024 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1025 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1026 BUG_ON(start_pfn > last_pfn);
ba395927 1027
d41a4adb
JL
1028 dma_pte_clear_range(domain, start_pfn, last_pfn);
1029
f3a0a52f 1030 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1031 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1032 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1033
ba395927 1034 /* free pgd */
d794dc9b 1035 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1036 free_pgtable_page(domain->pgd);
1037 domain->pgd = NULL;
1038 }
1039}
1040
ea8ea460
DW
1041/* When a page at a given level is being unlinked from its parent, we don't
1042 need to *modify* it at all. All we need to do is make a list of all the
1043 pages which can be freed just as soon as we've flushed the IOTLB and we
1044 know the hardware page-walk will no longer touch them.
1045 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1046 be freed. */
1047static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1048 int level, struct dma_pte *pte,
1049 struct page *freelist)
1050{
1051 struct page *pg;
1052
1053 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1054 pg->freelist = freelist;
1055 freelist = pg;
1056
1057 if (level == 1)
1058 return freelist;
1059
adeb2590
JL
1060 pte = page_address(pg);
1061 do {
ea8ea460
DW
1062 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1063 freelist = dma_pte_list_pagetables(domain, level - 1,
1064 pte, freelist);
adeb2590
JL
1065 pte++;
1066 } while (!first_pte_in_page(pte));
ea8ea460
DW
1067
1068 return freelist;
1069}
1070
1071static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1072 struct dma_pte *pte, unsigned long pfn,
1073 unsigned long start_pfn,
1074 unsigned long last_pfn,
1075 struct page *freelist)
1076{
1077 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1078
1079 pfn = max(start_pfn, pfn);
1080 pte = &pte[pfn_level_offset(pfn, level)];
1081
1082 do {
1083 unsigned long level_pfn;
1084
1085 if (!dma_pte_present(pte))
1086 goto next;
1087
1088 level_pfn = pfn & level_mask(level);
1089
1090 /* If range covers entire pagetable, free it */
1091 if (start_pfn <= level_pfn &&
1092 last_pfn >= level_pfn + level_size(level) - 1) {
1093 /* These suborbinate page tables are going away entirely. Don't
1094 bother to clear them; we're just going to *free* them. */
1095 if (level > 1 && !dma_pte_superpage(pte))
1096 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1097
1098 dma_clear_pte(pte);
1099 if (!first_pte)
1100 first_pte = pte;
1101 last_pte = pte;
1102 } else if (level > 1) {
1103 /* Recurse down into a level that isn't *entirely* obsolete */
1104 freelist = dma_pte_clear_level(domain, level - 1,
1105 phys_to_virt(dma_pte_addr(pte)),
1106 level_pfn, start_pfn, last_pfn,
1107 freelist);
1108 }
1109next:
1110 pfn += level_size(level);
1111 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1112
1113 if (first_pte)
1114 domain_flush_cache(domain, first_pte,
1115 (void *)++last_pte - (void *)first_pte);
1116
1117 return freelist;
1118}
1119
1120/* We can't just free the pages because the IOMMU may still be walking
1121 the page tables, and may have cached the intermediate levels. The
1122 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1123static struct page *domain_unmap(struct dmar_domain *domain,
1124 unsigned long start_pfn,
1125 unsigned long last_pfn)
ea8ea460 1126{
e083ea5b 1127 struct page *freelist;
ea8ea460 1128
162d1b10
JL
1129 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1130 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1131 BUG_ON(start_pfn > last_pfn);
1132
1133 /* we don't need lock here; nobody else touches the iova range */
1134 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1135 domain->pgd, 0, start_pfn, last_pfn, NULL);
1136
1137 /* free pgd */
1138 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1139 struct page *pgd_page = virt_to_page(domain->pgd);
1140 pgd_page->freelist = freelist;
1141 freelist = pgd_page;
1142
1143 domain->pgd = NULL;
1144 }
1145
1146 return freelist;
1147}
1148
b690420a 1149static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1150{
1151 struct page *pg;
1152
1153 while ((pg = freelist)) {
1154 freelist = pg->freelist;
1155 free_pgtable_page(page_address(pg));
1156 }
1157}
1158
13cf0174
JR
1159static void iova_entry_free(unsigned long data)
1160{
1161 struct page *freelist = (struct page *)data;
1162
1163 dma_free_pagelist(freelist);
1164}
1165
ba395927
KA
1166/* iommu handling */
1167static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1168{
1169 struct root_entry *root;
1170 unsigned long flags;
1171
4c923d47 1172 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1173 if (!root) {
9f10e5bf 1174 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1175 iommu->name);
ba395927 1176 return -ENOMEM;
ffebeb46 1177 }
ba395927 1178
5b6985ce 1179 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1180
1181 spin_lock_irqsave(&iommu->lock, flags);
1182 iommu->root_entry = root;
1183 spin_unlock_irqrestore(&iommu->lock, flags);
1184
1185 return 0;
1186}
1187
ba395927
KA
1188static void iommu_set_root_entry(struct intel_iommu *iommu)
1189{
03ecc32c 1190 u64 addr;
c416daa9 1191 u32 sts;
ba395927
KA
1192 unsigned long flag;
1193
03ecc32c 1194 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1195 if (sm_supported(iommu))
1196 addr |= DMA_RTADDR_SMT;
ba395927 1197
1f5b3c3f 1198 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1199 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1200
c416daa9 1201 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1202
1203 /* Make sure hardware complete it */
1204 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1205 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1206
1f5b3c3f 1207 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1208}
1209
6f7db75e 1210void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1211{
1212 u32 val;
1213 unsigned long flag;
1214
9af88143 1215 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1216 return;
ba395927 1217
1f5b3c3f 1218 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1219 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1220
1221 /* Make sure hardware complete it */
1222 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1223 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1224
1f5b3c3f 1225 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1226}
1227
1228/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1229static void __iommu_flush_context(struct intel_iommu *iommu,
1230 u16 did, u16 source_id, u8 function_mask,
1231 u64 type)
ba395927
KA
1232{
1233 u64 val = 0;
1234 unsigned long flag;
1235
ba395927
KA
1236 switch (type) {
1237 case DMA_CCMD_GLOBAL_INVL:
1238 val = DMA_CCMD_GLOBAL_INVL;
1239 break;
1240 case DMA_CCMD_DOMAIN_INVL:
1241 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1242 break;
1243 case DMA_CCMD_DEVICE_INVL:
1244 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1245 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1246 break;
1247 default:
1248 BUG();
1249 }
1250 val |= DMA_CCMD_ICC;
1251
1f5b3c3f 1252 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1253 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1254
1255 /* Make sure hardware complete it */
1256 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1257 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1258
1f5b3c3f 1259 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1260}
1261
ba395927 1262/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1263static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1264 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1265{
1266 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1267 u64 val = 0, val_iva = 0;
1268 unsigned long flag;
1269
ba395927
KA
1270 switch (type) {
1271 case DMA_TLB_GLOBAL_FLUSH:
1272 /* global flush doesn't need set IVA_REG */
1273 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1274 break;
1275 case DMA_TLB_DSI_FLUSH:
1276 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1277 break;
1278 case DMA_TLB_PSI_FLUSH:
1279 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1280 /* IH bit is passed in as part of address */
ba395927
KA
1281 val_iva = size_order | addr;
1282 break;
1283 default:
1284 BUG();
1285 }
1286 /* Note: set drain read/write */
1287#if 0
1288 /*
1289 * This is probably to be super secure.. Looks like we can
1290 * ignore it without any impact.
1291 */
1292 if (cap_read_drain(iommu->cap))
1293 val |= DMA_TLB_READ_DRAIN;
1294#endif
1295 if (cap_write_drain(iommu->cap))
1296 val |= DMA_TLB_WRITE_DRAIN;
1297
1f5b3c3f 1298 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1299 /* Note: Only uses first TLB reg currently */
1300 if (val_iva)
1301 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1302 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1303
1304 /* Make sure hardware complete it */
1305 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1306 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1307
1f5b3c3f 1308 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1309
1310 /* check IOTLB invalidation granularity */
1311 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1312 pr_err("Flush IOTLB failed\n");
ba395927 1313 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1314 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1315 (unsigned long long)DMA_TLB_IIRG(type),
1316 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1317}
1318
64ae892b
DW
1319static struct device_domain_info *
1320iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1321 u8 bus, u8 devfn)
93a23a72 1322{
93a23a72 1323 struct device_domain_info *info;
93a23a72 1324
55d94043
JR
1325 assert_spin_locked(&device_domain_lock);
1326
93a23a72
YZ
1327 if (!iommu->qi)
1328 return NULL;
1329
93a23a72 1330 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1331 if (info->iommu == iommu && info->bus == bus &&
1332 info->devfn == devfn) {
b16d0cb9
DW
1333 if (info->ats_supported && info->dev)
1334 return info;
93a23a72
YZ
1335 break;
1336 }
93a23a72 1337
b16d0cb9 1338 return NULL;
93a23a72
YZ
1339}
1340
0824c592
OP
1341static void domain_update_iotlb(struct dmar_domain *domain)
1342{
1343 struct device_domain_info *info;
1344 bool has_iotlb_device = false;
1345
1346 assert_spin_locked(&device_domain_lock);
1347
1348 list_for_each_entry(info, &domain->devices, link) {
1349 struct pci_dev *pdev;
1350
1351 if (!info->dev || !dev_is_pci(info->dev))
1352 continue;
1353
1354 pdev = to_pci_dev(info->dev);
1355 if (pdev->ats_enabled) {
1356 has_iotlb_device = true;
1357 break;
1358 }
1359 }
1360
1361 domain->has_iotlb_device = has_iotlb_device;
1362}
1363
93a23a72 1364static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1365{
fb0cc3aa
BH
1366 struct pci_dev *pdev;
1367
0824c592
OP
1368 assert_spin_locked(&device_domain_lock);
1369
0bcb3e28 1370 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1371 return;
1372
fb0cc3aa 1373 pdev = to_pci_dev(info->dev);
1c48db44
JP
1374 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1375 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1376 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1377 * reserved, which should be set to 0.
1378 */
1379 if (!ecap_dit(info->iommu->ecap))
1380 info->pfsid = 0;
1381 else {
1382 struct pci_dev *pf_pdev;
1383
1384 /* pdev will be returned if device is not a vf */
1385 pf_pdev = pci_physfn(pdev);
cc49baa9 1386 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1387 }
fb0cc3aa 1388
b16d0cb9
DW
1389#ifdef CONFIG_INTEL_IOMMU_SVM
1390 /* The PCIe spec, in its wisdom, declares that the behaviour of
1391 the device if you enable PASID support after ATS support is
1392 undefined. So always enable PASID support on devices which
1393 have it, even if we can't yet know if we're ever going to
1394 use it. */
1395 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1396 info->pasid_enabled = 1;
1397
1b84778a
KS
1398 if (info->pri_supported &&
1399 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1400 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1401 info->pri_enabled = 1;
1402#endif
fb58fdcd 1403 if (!pdev->untrusted && info->ats_supported &&
61363c14 1404 pci_ats_page_aligned(pdev) &&
fb58fdcd 1405 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1406 info->ats_enabled = 1;
0824c592 1407 domain_update_iotlb(info->domain);
b16d0cb9
DW
1408 info->ats_qdep = pci_ats_queue_depth(pdev);
1409 }
93a23a72
YZ
1410}
1411
1412static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1413{
b16d0cb9
DW
1414 struct pci_dev *pdev;
1415
0824c592
OP
1416 assert_spin_locked(&device_domain_lock);
1417
da972fb1 1418 if (!dev_is_pci(info->dev))
93a23a72
YZ
1419 return;
1420
b16d0cb9
DW
1421 pdev = to_pci_dev(info->dev);
1422
1423 if (info->ats_enabled) {
1424 pci_disable_ats(pdev);
1425 info->ats_enabled = 0;
0824c592 1426 domain_update_iotlb(info->domain);
b16d0cb9
DW
1427 }
1428#ifdef CONFIG_INTEL_IOMMU_SVM
1429 if (info->pri_enabled) {
1430 pci_disable_pri(pdev);
1431 info->pri_enabled = 0;
1432 }
1433 if (info->pasid_enabled) {
1434 pci_disable_pasid(pdev);
1435 info->pasid_enabled = 0;
1436 }
1437#endif
93a23a72
YZ
1438}
1439
1440static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1441 u64 addr, unsigned mask)
1442{
1443 u16 sid, qdep;
1444 unsigned long flags;
1445 struct device_domain_info *info;
1446
0824c592
OP
1447 if (!domain->has_iotlb_device)
1448 return;
1449
93a23a72
YZ
1450 spin_lock_irqsave(&device_domain_lock, flags);
1451 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1452 if (!info->ats_enabled)
93a23a72
YZ
1453 continue;
1454
1455 sid = info->bus << 8 | info->devfn;
b16d0cb9 1456 qdep = info->ats_qdep;
1c48db44
JP
1457 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1458 qdep, addr, mask);
93a23a72
YZ
1459 }
1460 spin_unlock_irqrestore(&device_domain_lock, flags);
1461}
1462
a1ddcbe9
JR
1463static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1464 struct dmar_domain *domain,
1465 unsigned long pfn, unsigned int pages,
1466 int ih, int map)
ba395927 1467{
9dd2fe89 1468 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1469 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1470 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1471
ba395927
KA
1472 BUG_ON(pages == 0);
1473
ea8ea460
DW
1474 if (ih)
1475 ih = 1 << 6;
ba395927 1476 /*
9dd2fe89
YZ
1477 * Fallback to domain selective flush if no PSI support or the size is
1478 * too big.
ba395927
KA
1479 * PSI requires page size to be 2 ^ x, and the base address is naturally
1480 * aligned to the size
1481 */
9dd2fe89
YZ
1482 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1483 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1484 DMA_TLB_DSI_FLUSH);
9dd2fe89 1485 else
ea8ea460 1486 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1487 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1488
1489 /*
82653633
NA
1490 * In caching mode, changes of pages from non-present to present require
1491 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1492 */
82653633 1493 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1494 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1495}
1496
eed91a0b
PX
1497/* Notification for newly created mappings */
1498static inline void __mapping_notify_one(struct intel_iommu *iommu,
1499 struct dmar_domain *domain,
1500 unsigned long pfn, unsigned int pages)
1501{
1502 /* It's a non-present to present mapping. Only flush if caching mode */
1503 if (cap_caching_mode(iommu->cap))
1504 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1505 else
1506 iommu_flush_write_buffer(iommu);
1507}
1508
13cf0174
JR
1509static void iommu_flush_iova(struct iova_domain *iovad)
1510{
1511 struct dmar_domain *domain;
1512 int idx;
1513
1514 domain = container_of(iovad, struct dmar_domain, iovad);
1515
1516 for_each_domain_iommu(idx, domain) {
1517 struct intel_iommu *iommu = g_iommus[idx];
1518 u16 did = domain->iommu_did[iommu->seq_id];
1519
1520 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1521
1522 if (!cap_caching_mode(iommu->cap))
1523 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1524 0, MAX_AGAW_PFN_WIDTH);
1525 }
1526}
1527
f8bab735 1528static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1529{
1530 u32 pmen;
1531 unsigned long flags;
1532
5bb71fc7
LB
1533 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1534 return;
1535
1f5b3c3f 1536 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1537 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1538 pmen &= ~DMA_PMEN_EPM;
1539 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1540
1541 /* wait for the protected region status bit to clear */
1542 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1543 readl, !(pmen & DMA_PMEN_PRS), pmen);
1544
1f5b3c3f 1545 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1546}
1547
2a41ccee 1548static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1549{
1550 u32 sts;
1551 unsigned long flags;
1552
1f5b3c3f 1553 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1554 iommu->gcmd |= DMA_GCMD_TE;
1555 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1556
1557 /* Make sure hardware complete it */
1558 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1559 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1560
1f5b3c3f 1561 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1562}
1563
2a41ccee 1564static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1565{
1566 u32 sts;
1567 unsigned long flag;
1568
1f5b3c3f 1569 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1570 iommu->gcmd &= ~DMA_GCMD_TE;
1571 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1572
1573 /* Make sure hardware complete it */
1574 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1575 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1576
1f5b3c3f 1577 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1578}
1579
3460a6d9 1580
ba395927
KA
1581static int iommu_init_domains(struct intel_iommu *iommu)
1582{
8bf47816
JR
1583 u32 ndomains, nlongs;
1584 size_t size;
ba395927
KA
1585
1586 ndomains = cap_ndoms(iommu->cap);
8bf47816 1587 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1588 iommu->name, ndomains);
ba395927
KA
1589 nlongs = BITS_TO_LONGS(ndomains);
1590
94a91b50
DD
1591 spin_lock_init(&iommu->lock);
1592
ba395927
KA
1593 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1594 if (!iommu->domain_ids) {
9f10e5bf
JR
1595 pr_err("%s: Allocating domain id array failed\n",
1596 iommu->name);
ba395927
KA
1597 return -ENOMEM;
1598 }
8bf47816 1599
86f004c7 1600 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1601 iommu->domains = kzalloc(size, GFP_KERNEL);
1602
1603 if (iommu->domains) {
1604 size = 256 * sizeof(struct dmar_domain *);
1605 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1606 }
1607
1608 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1609 pr_err("%s: Allocating domain array failed\n",
1610 iommu->name);
852bdb04 1611 kfree(iommu->domain_ids);
8bf47816 1612 kfree(iommu->domains);
852bdb04 1613 iommu->domain_ids = NULL;
8bf47816 1614 iommu->domains = NULL;
ba395927
KA
1615 return -ENOMEM;
1616 }
1617
8bf47816
JR
1618
1619
ba395927 1620 /*
c0e8a6c8
JR
1621 * If Caching mode is set, then invalid translations are tagged
1622 * with domain-id 0, hence we need to pre-allocate it. We also
1623 * use domain-id 0 as a marker for non-allocated domain-id, so
1624 * make sure it is not used for a real domain.
ba395927 1625 */
c0e8a6c8
JR
1626 set_bit(0, iommu->domain_ids);
1627
3b33d4ab
LB
1628 /*
1629 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1630 * entry for first-level or pass-through translation modes should
1631 * be programmed with a domain id different from those used for
1632 * second-level or nested translation. We reserve a domain id for
1633 * this purpose.
1634 */
1635 if (sm_supported(iommu))
1636 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1637
ba395927
KA
1638 return 0;
1639}
ba395927 1640
ffebeb46 1641static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1642{
29a27719 1643 struct device_domain_info *info, *tmp;
55d94043 1644 unsigned long flags;
ba395927 1645
29a27719
JR
1646 if (!iommu->domains || !iommu->domain_ids)
1647 return;
a4eaa86c 1648
bea64033 1649again:
55d94043 1650 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1651 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1652 struct dmar_domain *domain;
1653
1654 if (info->iommu != iommu)
1655 continue;
1656
1657 if (!info->dev || !info->domain)
1658 continue;
1659
1660 domain = info->domain;
1661
bea64033 1662 __dmar_remove_one_dev_info(info);
29a27719 1663
bea64033
JR
1664 if (!domain_type_is_vm_or_si(domain)) {
1665 /*
1666 * The domain_exit() function can't be called under
1667 * device_domain_lock, as it takes this lock itself.
1668 * So release the lock here and re-run the loop
1669 * afterwards.
1670 */
1671 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1672 domain_exit(domain);
bea64033
JR
1673 goto again;
1674 }
ba395927 1675 }
55d94043 1676 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1677
1678 if (iommu->gcmd & DMA_GCMD_TE)
1679 iommu_disable_translation(iommu);
ffebeb46 1680}
ba395927 1681
ffebeb46
JL
1682static void free_dmar_iommu(struct intel_iommu *iommu)
1683{
1684 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1685 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1686 int i;
1687
1688 for (i = 0; i < elems; i++)
1689 kfree(iommu->domains[i]);
ffebeb46
JL
1690 kfree(iommu->domains);
1691 kfree(iommu->domain_ids);
1692 iommu->domains = NULL;
1693 iommu->domain_ids = NULL;
1694 }
ba395927 1695
d9630fe9
WH
1696 g_iommus[iommu->seq_id] = NULL;
1697
ba395927
KA
1698 /* free context mapping */
1699 free_context_table(iommu);
8a94ade4
DW
1700
1701#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1702 if (pasid_supported(iommu)) {
a222a7f0
DW
1703 if (ecap_prs(iommu->ecap))
1704 intel_svm_finish_prq(iommu);
a222a7f0 1705 }
8a94ade4 1706#endif
ba395927
KA
1707}
1708
ab8dfe25 1709static struct dmar_domain *alloc_domain(int flags)
ba395927 1710{
ba395927 1711 struct dmar_domain *domain;
ba395927
KA
1712
1713 domain = alloc_domain_mem();
1714 if (!domain)
1715 return NULL;
1716
ab8dfe25 1717 memset(domain, 0, sizeof(*domain));
98fa15f3 1718 domain->nid = NUMA_NO_NODE;
ab8dfe25 1719 domain->flags = flags;
0824c592 1720 domain->has_iotlb_device = false;
92d03cc8 1721 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1722
1723 return domain;
1724}
1725
d160aca5
JR
1726/* Must be called with iommu->lock */
1727static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1728 struct intel_iommu *iommu)
1729{
44bde614 1730 unsigned long ndomains;
55d94043 1731 int num;
44bde614 1732
55d94043 1733 assert_spin_locked(&device_domain_lock);
d160aca5 1734 assert_spin_locked(&iommu->lock);
ba395927 1735
29a27719
JR
1736 domain->iommu_refcnt[iommu->seq_id] += 1;
1737 domain->iommu_count += 1;
1738 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1739 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1740 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1741
1742 if (num >= ndomains) {
1743 pr_err("%s: No free domain ids\n", iommu->name);
1744 domain->iommu_refcnt[iommu->seq_id] -= 1;
1745 domain->iommu_count -= 1;
55d94043 1746 return -ENOSPC;
2c2e2c38 1747 }
ba395927 1748
d160aca5
JR
1749 set_bit(num, iommu->domain_ids);
1750 set_iommu_domain(iommu, num, domain);
1751
1752 domain->iommu_did[iommu->seq_id] = num;
1753 domain->nid = iommu->node;
fb170fb4 1754
fb170fb4
JL
1755 domain_update_iommu_cap(domain);
1756 }
d160aca5 1757
55d94043 1758 return 0;
fb170fb4
JL
1759}
1760
1761static int domain_detach_iommu(struct dmar_domain *domain,
1762 struct intel_iommu *iommu)
1763{
e083ea5b 1764 int num, count;
d160aca5 1765
55d94043 1766 assert_spin_locked(&device_domain_lock);
d160aca5 1767 assert_spin_locked(&iommu->lock);
fb170fb4 1768
29a27719
JR
1769 domain->iommu_refcnt[iommu->seq_id] -= 1;
1770 count = --domain->iommu_count;
1771 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1772 num = domain->iommu_did[iommu->seq_id];
1773 clear_bit(num, iommu->domain_ids);
1774 set_iommu_domain(iommu, num, NULL);
fb170fb4 1775
fb170fb4 1776 domain_update_iommu_cap(domain);
c0e8a6c8 1777 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1778 }
fb170fb4
JL
1779
1780 return count;
1781}
1782
ba395927 1783static struct iova_domain reserved_iova_list;
8a443df4 1784static struct lock_class_key reserved_rbtree_key;
ba395927 1785
51a63e67 1786static int dmar_init_reserved_ranges(void)
ba395927
KA
1787{
1788 struct pci_dev *pdev = NULL;
1789 struct iova *iova;
1790 int i;
ba395927 1791
aa3ac946 1792 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1793
8a443df4
MG
1794 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1795 &reserved_rbtree_key);
1796
ba395927
KA
1797 /* IOAPIC ranges shouldn't be accessed by DMA */
1798 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1799 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1800 if (!iova) {
9f10e5bf 1801 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1802 return -ENODEV;
1803 }
ba395927
KA
1804
1805 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1806 for_each_pci_dev(pdev) {
1807 struct resource *r;
1808
1809 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1810 r = &pdev->resource[i];
1811 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1812 continue;
1a4a4551
DW
1813 iova = reserve_iova(&reserved_iova_list,
1814 IOVA_PFN(r->start),
1815 IOVA_PFN(r->end));
51a63e67 1816 if (!iova) {
932a6523 1817 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1818 return -ENODEV;
1819 }
ba395927
KA
1820 }
1821 }
51a63e67 1822 return 0;
ba395927
KA
1823}
1824
1825static void domain_reserve_special_ranges(struct dmar_domain *domain)
1826{
1827 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1828}
1829
1830static inline int guestwidth_to_adjustwidth(int gaw)
1831{
1832 int agaw;
1833 int r = (gaw - 12) % 9;
1834
1835 if (r == 0)
1836 agaw = gaw;
1837 else
1838 agaw = gaw + 9 - r;
1839 if (agaw > 64)
1840 agaw = 64;
1841 return agaw;
1842}
1843
dc534b25
JR
1844static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1845 int guest_width)
ba395927 1846{
ba395927
KA
1847 int adjust_width, agaw;
1848 unsigned long sagaw;
13cf0174 1849 int err;
ba395927 1850
aa3ac946 1851 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1852
1853 err = init_iova_flush_queue(&domain->iovad,
1854 iommu_flush_iova, iova_entry_free);
1855 if (err)
1856 return err;
1857
ba395927
KA
1858 domain_reserve_special_ranges(domain);
1859
1860 /* calculate AGAW */
ba395927
KA
1861 if (guest_width > cap_mgaw(iommu->cap))
1862 guest_width = cap_mgaw(iommu->cap);
1863 domain->gaw = guest_width;
1864 adjust_width = guestwidth_to_adjustwidth(guest_width);
1865 agaw = width_to_agaw(adjust_width);
1866 sagaw = cap_sagaw(iommu->cap);
1867 if (!test_bit(agaw, &sagaw)) {
1868 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1869 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1870 agaw = find_next_bit(&sagaw, 5, agaw);
1871 if (agaw >= 5)
1872 return -ENODEV;
1873 }
1874 domain->agaw = agaw;
ba395927 1875
8e604097
WH
1876 if (ecap_coherent(iommu->ecap))
1877 domain->iommu_coherency = 1;
1878 else
1879 domain->iommu_coherency = 0;
1880
58c610bd
SY
1881 if (ecap_sc_support(iommu->ecap))
1882 domain->iommu_snooping = 1;
1883 else
1884 domain->iommu_snooping = 0;
1885
214e39aa
DW
1886 if (intel_iommu_superpage)
1887 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1888 else
1889 domain->iommu_superpage = 0;
1890
4c923d47 1891 domain->nid = iommu->node;
c7151a8d 1892
ba395927 1893 /* always allocate the top pgd */
4c923d47 1894 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1895 if (!domain->pgd)
1896 return -ENOMEM;
5b6985ce 1897 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1898 return 0;
1899}
1900
1901static void domain_exit(struct dmar_domain *domain)
1902{
e083ea5b 1903 struct page *freelist;
ba395927 1904
d160aca5
JR
1905 /* Remove associated devices and clear attached or cached domains */
1906 rcu_read_lock();
ba395927 1907 domain_remove_dev_info(domain);
d160aca5 1908 rcu_read_unlock();
92d03cc8 1909
ba395927
KA
1910 /* destroy iovas */
1911 put_iova_domain(&domain->iovad);
ba395927 1912
ea8ea460 1913 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1914
ea8ea460
DW
1915 dma_free_pagelist(freelist);
1916
ba395927
KA
1917 free_domain_mem(domain);
1918}
1919
7373a8cc
LB
1920/*
1921 * Get the PASID directory size for scalable mode context entry.
1922 * Value of X in the PDTS field of a scalable mode context entry
1923 * indicates PASID directory with 2^(X + 7) entries.
1924 */
1925static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1926{
1927 int pds, max_pde;
1928
1929 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1930 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1931 if (pds < 7)
1932 return 0;
1933
1934 return pds - 7;
1935}
1936
1937/*
1938 * Set the RID_PASID field of a scalable mode context entry. The
1939 * IOMMU hardware will use the PASID value set in this field for
1940 * DMA translations of DMA requests without PASID.
1941 */
1942static inline void
1943context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1944{
1945 context->hi |= pasid & ((1 << 20) - 1);
1946 context->hi |= (1 << 20);
1947}
1948
1949/*
1950 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1951 * entry.
1952 */
1953static inline void context_set_sm_dte(struct context_entry *context)
1954{
1955 context->lo |= (1 << 2);
1956}
1957
1958/*
1959 * Set the PRE(Page Request Enable) field of a scalable mode context
1960 * entry.
1961 */
1962static inline void context_set_sm_pre(struct context_entry *context)
1963{
1964 context->lo |= (1 << 4);
1965}
1966
1967/* Convert value to context PASID directory size field coding. */
1968#define context_pdts(pds) (((pds) & 0x7) << 9)
1969
64ae892b
DW
1970static int domain_context_mapping_one(struct dmar_domain *domain,
1971 struct intel_iommu *iommu,
ca6e322d 1972 struct pasid_table *table,
28ccce0d 1973 u8 bus, u8 devfn)
ba395927 1974{
c6c2cebd 1975 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1976 int translation = CONTEXT_TT_MULTI_LEVEL;
1977 struct device_domain_info *info = NULL;
ba395927 1978 struct context_entry *context;
ba395927 1979 unsigned long flags;
7373a8cc 1980 int ret;
28ccce0d 1981
c6c2cebd
JR
1982 WARN_ON(did == 0);
1983
28ccce0d
JR
1984 if (hw_pass_through && domain_type_is_si(domain))
1985 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1986
1987 pr_debug("Set context mapping for %02x:%02x.%d\n",
1988 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1989
ba395927 1990 BUG_ON(!domain->pgd);
5331fe6f 1991
55d94043
JR
1992 spin_lock_irqsave(&device_domain_lock, flags);
1993 spin_lock(&iommu->lock);
1994
1995 ret = -ENOMEM;
03ecc32c 1996 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1997 if (!context)
55d94043 1998 goto out_unlock;
ba395927 1999
55d94043
JR
2000 ret = 0;
2001 if (context_present(context))
2002 goto out_unlock;
cf484d0e 2003
aec0e861
XP
2004 /*
2005 * For kdump cases, old valid entries may be cached due to the
2006 * in-flight DMA and copied pgtable, but there is no unmapping
2007 * behaviour for them, thus we need an explicit cache flush for
2008 * the newly-mapped device. For kdump, at this point, the device
2009 * is supposed to finish reset at its driver probe stage, so no
2010 * in-flight DMA will exist, and we don't need to worry anymore
2011 * hereafter.
2012 */
2013 if (context_copied(context)) {
2014 u16 did_old = context_domain_id(context);
2015
b117e038 2016 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2017 iommu->flush.flush_context(iommu, did_old,
2018 (((u16)bus) << 8) | devfn,
2019 DMA_CCMD_MASK_NOBIT,
2020 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2021 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2022 DMA_TLB_DSI_FLUSH);
2023 }
aec0e861
XP
2024 }
2025
de24e553 2026 context_clear_entry(context);
ea6606b0 2027
7373a8cc
LB
2028 if (sm_supported(iommu)) {
2029 unsigned long pds;
4ed0d3e6 2030
7373a8cc
LB
2031 WARN_ON(!table);
2032
2033 /* Setup the PASID DIR pointer: */
2034 pds = context_get_sm_pds(table);
2035 context->lo = (u64)virt_to_phys(table->table) |
2036 context_pdts(pds);
2037
2038 /* Setup the RID_PASID field: */
2039 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2040
de24e553 2041 /*
7373a8cc
LB
2042 * Setup the Device-TLB enable bit and Page request
2043 * Enable bit:
de24e553 2044 */
7373a8cc
LB
2045 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2046 if (info && info->ats_supported)
2047 context_set_sm_dte(context);
2048 if (info && info->pri_supported)
2049 context_set_sm_pre(context);
2050 } else {
2051 struct dma_pte *pgd = domain->pgd;
2052 int agaw;
2053
2054 context_set_domain_id(context, did);
7373a8cc
LB
2055
2056 if (translation != CONTEXT_TT_PASS_THROUGH) {
2057 /*
2058 * Skip top levels of page tables for iommu which has
2059 * less agaw than default. Unnecessary for PT mode.
2060 */
2061 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2062 ret = -ENOMEM;
2063 pgd = phys_to_virt(dma_pte_addr(pgd));
2064 if (!dma_pte_present(pgd))
2065 goto out_unlock;
2066 }
2067
2068 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2069 if (info && info->ats_supported)
2070 translation = CONTEXT_TT_DEV_IOTLB;
2071 else
2072 translation = CONTEXT_TT_MULTI_LEVEL;
2073
2074 context_set_address_root(context, virt_to_phys(pgd));
2075 context_set_address_width(context, agaw);
2076 } else {
2077 /*
2078 * In pass through mode, AW must be programmed to
2079 * indicate the largest AGAW value supported by
2080 * hardware. And ASR is ignored by hardware.
2081 */
2082 context_set_address_width(context, iommu->msagaw);
2083 }
41b80db2
LB
2084
2085 context_set_translation_type(context, translation);
93a23a72 2086 }
4ed0d3e6 2087
c07e7d21
MM
2088 context_set_fault_enable(context);
2089 context_set_present(context);
5331fe6f 2090 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2091
4c25a2c1
DW
2092 /*
2093 * It's a non-present to present mapping. If hardware doesn't cache
2094 * non-present entry we only need to flush the write-buffer. If the
2095 * _does_ cache non-present entries, then it does so in the special
2096 * domain #0, which we have to flush:
2097 */
2098 if (cap_caching_mode(iommu->cap)) {
2099 iommu->flush.flush_context(iommu, 0,
2100 (((u16)bus) << 8) | devfn,
2101 DMA_CCMD_MASK_NOBIT,
2102 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2103 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2104 } else {
ba395927 2105 iommu_flush_write_buffer(iommu);
4c25a2c1 2106 }
93a23a72 2107 iommu_enable_dev_iotlb(info);
c7151a8d 2108
55d94043
JR
2109 ret = 0;
2110
2111out_unlock:
2112 spin_unlock(&iommu->lock);
2113 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2114
5c365d18 2115 return ret;
ba395927
KA
2116}
2117
579305f7
AW
2118struct domain_context_mapping_data {
2119 struct dmar_domain *domain;
2120 struct intel_iommu *iommu;
ca6e322d 2121 struct pasid_table *table;
579305f7
AW
2122};
2123
2124static int domain_context_mapping_cb(struct pci_dev *pdev,
2125 u16 alias, void *opaque)
2126{
2127 struct domain_context_mapping_data *data = opaque;
2128
2129 return domain_context_mapping_one(data->domain, data->iommu,
ca6e322d
LB
2130 data->table, PCI_BUS_NUM(alias),
2131 alias & 0xff);
579305f7
AW
2132}
2133
ba395927 2134static int
28ccce0d 2135domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2136{
ca6e322d
LB
2137 struct domain_context_mapping_data data;
2138 struct pasid_table *table;
64ae892b 2139 struct intel_iommu *iommu;
156baca8 2140 u8 bus, devfn;
64ae892b 2141
e1f167f3 2142 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2143 if (!iommu)
2144 return -ENODEV;
ba395927 2145
ca6e322d
LB
2146 table = intel_pasid_get_table(dev);
2147
579305f7 2148 if (!dev_is_pci(dev))
ca6e322d
LB
2149 return domain_context_mapping_one(domain, iommu, table,
2150 bus, devfn);
579305f7
AW
2151
2152 data.domain = domain;
2153 data.iommu = iommu;
ca6e322d 2154 data.table = table;
579305f7
AW
2155
2156 return pci_for_each_dma_alias(to_pci_dev(dev),
2157 &domain_context_mapping_cb, &data);
2158}
2159
2160static int domain_context_mapped_cb(struct pci_dev *pdev,
2161 u16 alias, void *opaque)
2162{
2163 struct intel_iommu *iommu = opaque;
2164
2165 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2166}
2167
e1f167f3 2168static int domain_context_mapped(struct device *dev)
ba395927 2169{
5331fe6f 2170 struct intel_iommu *iommu;
156baca8 2171 u8 bus, devfn;
5331fe6f 2172
e1f167f3 2173 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2174 if (!iommu)
2175 return -ENODEV;
ba395927 2176
579305f7
AW
2177 if (!dev_is_pci(dev))
2178 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2179
579305f7
AW
2180 return !pci_for_each_dma_alias(to_pci_dev(dev),
2181 domain_context_mapped_cb, iommu);
ba395927
KA
2182}
2183
f532959b
FY
2184/* Returns a number of VTD pages, but aligned to MM page size */
2185static inline unsigned long aligned_nrpages(unsigned long host_addr,
2186 size_t size)
2187{
2188 host_addr &= ~PAGE_MASK;
2189 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2190}
2191
6dd9a7c7
YS
2192/* Return largest possible superpage level for a given mapping */
2193static inline int hardware_largepage_caps(struct dmar_domain *domain,
2194 unsigned long iov_pfn,
2195 unsigned long phy_pfn,
2196 unsigned long pages)
2197{
2198 int support, level = 1;
2199 unsigned long pfnmerge;
2200
2201 support = domain->iommu_superpage;
2202
2203 /* To use a large page, the virtual *and* physical addresses
2204 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2205 of them will mean we have to use smaller pages. So just
2206 merge them and check both at once. */
2207 pfnmerge = iov_pfn | phy_pfn;
2208
2209 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2210 pages >>= VTD_STRIDE_SHIFT;
2211 if (!pages)
2212 break;
2213 pfnmerge >>= VTD_STRIDE_SHIFT;
2214 level++;
2215 support--;
2216 }
2217 return level;
2218}
2219
9051aa02
DW
2220static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2221 struct scatterlist *sg, unsigned long phys_pfn,
2222 unsigned long nr_pages, int prot)
e1605495
DW
2223{
2224 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2225 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2226 unsigned long sg_res = 0;
6dd9a7c7
YS
2227 unsigned int largepage_lvl = 0;
2228 unsigned long lvl_pages = 0;
e1605495 2229
162d1b10 2230 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2231
2232 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2233 return -EINVAL;
2234
2235 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2236
cc4f14aa
JL
2237 if (!sg) {
2238 sg_res = nr_pages;
9051aa02
DW
2239 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2240 }
2241
6dd9a7c7 2242 while (nr_pages > 0) {
c85994e4
DW
2243 uint64_t tmp;
2244
e1605495 2245 if (!sg_res) {
29a90b70
RM
2246 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2247
f532959b 2248 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2249 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2250 sg->dma_length = sg->length;
29a90b70 2251 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2252 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2253 }
6dd9a7c7 2254
e1605495 2255 if (!pte) {
6dd9a7c7
YS
2256 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2257
5cf0a76f 2258 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2259 if (!pte)
2260 return -ENOMEM;
6dd9a7c7 2261 /* It is large page*/
6491d4d0 2262 if (largepage_lvl > 1) {
ba2374fd
CZ
2263 unsigned long nr_superpages, end_pfn;
2264
6dd9a7c7 2265 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2266 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2267
2268 nr_superpages = sg_res / lvl_pages;
2269 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2270
d41a4adb
JL
2271 /*
2272 * Ensure that old small page tables are
ba2374fd 2273 * removed to make room for superpage(s).
bc24c571
DD
2274 * We're adding new large pages, so make sure
2275 * we don't remove their parent tables.
d41a4adb 2276 */
bc24c571
DD
2277 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2278 largepage_lvl + 1);
6491d4d0 2279 } else {
6dd9a7c7 2280 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2281 }
6dd9a7c7 2282
e1605495
DW
2283 }
2284 /* We don't need lock here, nobody else
2285 * touches the iova range
2286 */
7766a3fb 2287 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2288 if (tmp) {
1bf20f0d 2289 static int dumps = 5;
9f10e5bf
JR
2290 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2291 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2292 if (dumps) {
2293 dumps--;
2294 debug_dma_dump_mappings(NULL);
2295 }
2296 WARN_ON(1);
2297 }
6dd9a7c7
YS
2298
2299 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2300
2301 BUG_ON(nr_pages < lvl_pages);
2302 BUG_ON(sg_res < lvl_pages);
2303
2304 nr_pages -= lvl_pages;
2305 iov_pfn += lvl_pages;
2306 phys_pfn += lvl_pages;
2307 pteval += lvl_pages * VTD_PAGE_SIZE;
2308 sg_res -= lvl_pages;
2309
2310 /* If the next PTE would be the first in a new page, then we
2311 need to flush the cache on the entries we've just written.
2312 And then we'll need to recalculate 'pte', so clear it and
2313 let it get set again in the if (!pte) block above.
2314
2315 If we're done (!nr_pages) we need to flush the cache too.
2316
2317 Also if we've been setting superpages, we may need to
2318 recalculate 'pte' and switch back to smaller pages for the
2319 end of the mapping, if the trailing size is not enough to
2320 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2321 pte++;
6dd9a7c7
YS
2322 if (!nr_pages || first_pte_in_page(pte) ||
2323 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2324 domain_flush_cache(domain, first_pte,
2325 (void *)pte - (void *)first_pte);
2326 pte = NULL;
2327 }
6dd9a7c7
YS
2328
2329 if (!sg_res && nr_pages)
e1605495
DW
2330 sg = sg_next(sg);
2331 }
2332 return 0;
2333}
2334
87684fd9 2335static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2336 struct scatterlist *sg, unsigned long phys_pfn,
2337 unsigned long nr_pages, int prot)
2338{
2339 int ret;
2340 struct intel_iommu *iommu;
2341
2342 /* Do the real mapping first */
2343 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2344 if (ret)
2345 return ret;
2346
2347 /* Notify about the new mapping */
2348 if (domain_type_is_vm(domain)) {
2349 /* VM typed domains can have more than one IOMMUs */
2350 int iommu_id;
2351
2352 for_each_domain_iommu(iommu_id, domain) {
2353 iommu = g_iommus[iommu_id];
2354 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2355 }
2356 } else {
2357 /* General domains only have one IOMMU */
2358 iommu = domain_get_iommu(domain);
2359 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2360 }
2361
2362 return 0;
87684fd9
PX
2363}
2364
9051aa02
DW
2365static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366 struct scatterlist *sg, unsigned long nr_pages,
2367 int prot)
ba395927 2368{
87684fd9 2369 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2370}
6f6a00e4 2371
9051aa02
DW
2372static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2373 unsigned long phys_pfn, unsigned long nr_pages,
2374 int prot)
2375{
87684fd9 2376 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2377}
2378
2452d9db 2379static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2380{
5082219b
FS
2381 unsigned long flags;
2382 struct context_entry *context;
2383 u16 did_old;
2384
c7151a8d
WH
2385 if (!iommu)
2386 return;
8c11e798 2387
5082219b
FS
2388 spin_lock_irqsave(&iommu->lock, flags);
2389 context = iommu_context_addr(iommu, bus, devfn, 0);
2390 if (!context) {
2391 spin_unlock_irqrestore(&iommu->lock, flags);
2392 return;
2393 }
2394 did_old = context_domain_id(context);
2395 context_clear_entry(context);
2396 __iommu_flush_cache(iommu, context, sizeof(*context));
2397 spin_unlock_irqrestore(&iommu->lock, flags);
2398 iommu->flush.flush_context(iommu,
2399 did_old,
2400 (((u16)bus) << 8) | devfn,
2401 DMA_CCMD_MASK_NOBIT,
2402 DMA_CCMD_DEVICE_INVL);
2403 iommu->flush.flush_iotlb(iommu,
2404 did_old,
2405 0,
2406 0,
2407 DMA_TLB_DSI_FLUSH);
ba395927
KA
2408}
2409
109b9b04
DW
2410static inline void unlink_domain_info(struct device_domain_info *info)
2411{
2412 assert_spin_locked(&device_domain_lock);
2413 list_del(&info->link);
2414 list_del(&info->global);
2415 if (info->dev)
0bcb3e28 2416 info->dev->archdata.iommu = NULL;
109b9b04
DW
2417}
2418
ba395927
KA
2419static void domain_remove_dev_info(struct dmar_domain *domain)
2420{
3a74ca01 2421 struct device_domain_info *info, *tmp;
fb170fb4 2422 unsigned long flags;
ba395927
KA
2423
2424 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2425 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2426 __dmar_remove_one_dev_info(info);
ba395927
KA
2427 spin_unlock_irqrestore(&device_domain_lock, flags);
2428}
2429
2430/*
2431 * find_domain
1525a29a 2432 * Note: we use struct device->archdata.iommu stores the info
ba395927 2433 */
1525a29a 2434static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2435{
2436 struct device_domain_info *info;
2437
2438 /* No lock here, assumes no domain exit in normal case */
1525a29a 2439 info = dev->archdata.iommu;
b316d02a 2440 if (likely(info))
ba395927
KA
2441 return info->domain;
2442 return NULL;
2443}
2444
5a8f40e8 2445static inline struct device_domain_info *
745f2586
JL
2446dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2447{
2448 struct device_domain_info *info;
2449
2450 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2451 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2452 info->devfn == devfn)
5a8f40e8 2453 return info;
745f2586
JL
2454
2455 return NULL;
2456}
2457
5db31569
JR
2458static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2459 int bus, int devfn,
2460 struct device *dev,
2461 struct dmar_domain *domain)
745f2586 2462{
5a8f40e8 2463 struct dmar_domain *found = NULL;
745f2586
JL
2464 struct device_domain_info *info;
2465 unsigned long flags;
d160aca5 2466 int ret;
745f2586
JL
2467
2468 info = alloc_devinfo_mem();
2469 if (!info)
b718cd3d 2470 return NULL;
745f2586 2471
745f2586
JL
2472 info->bus = bus;
2473 info->devfn = devfn;
b16d0cb9
DW
2474 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2475 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2476 info->ats_qdep = 0;
745f2586
JL
2477 info->dev = dev;
2478 info->domain = domain;
5a8f40e8 2479 info->iommu = iommu;
cc580e41 2480 info->pasid_table = NULL;
95587a75 2481 info->auxd_enabled = 0;
67b8e02b 2482 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2483
b16d0cb9
DW
2484 if (dev && dev_is_pci(dev)) {
2485 struct pci_dev *pdev = to_pci_dev(info->dev);
2486
d8b85910
LB
2487 if (!pdev->untrusted &&
2488 !pci_ats_disabled() &&
cef74409 2489 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2490 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2491 dmar_find_matched_atsr_unit(pdev))
2492 info->ats_supported = 1;
2493
765b6a98
LB
2494 if (sm_supported(iommu)) {
2495 if (pasid_supported(iommu)) {
b16d0cb9
DW
2496 int features = pci_pasid_features(pdev);
2497 if (features >= 0)
2498 info->pasid_supported = features | 1;
2499 }
2500
2501 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2502 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2503 info->pri_supported = 1;
2504 }
2505 }
2506
7560cc3c 2507 spin_lock(&iommu->lock);
745f2586
JL
2508 spin_lock_irqsave(&device_domain_lock, flags);
2509 if (dev)
0bcb3e28 2510 found = find_domain(dev);
f303e507
JR
2511
2512 if (!found) {
5a8f40e8 2513 struct device_domain_info *info2;
41e80dca 2514 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2515 if (info2) {
2516 found = info2->domain;
2517 info2->dev = dev;
2518 }
5a8f40e8 2519 }
f303e507 2520
745f2586
JL
2521 if (found) {
2522 spin_unlock_irqrestore(&device_domain_lock, flags);
7560cc3c 2523 spin_unlock(&iommu->lock);
745f2586 2524 free_devinfo_mem(info);
b718cd3d
DW
2525 /* Caller must free the original domain */
2526 return found;
745f2586
JL
2527 }
2528
d160aca5 2529 ret = domain_attach_iommu(domain, iommu);
d160aca5 2530 if (ret) {
c6c2cebd 2531 spin_unlock_irqrestore(&device_domain_lock, flags);
7560cc3c 2532 spin_unlock(&iommu->lock);
499f3aa4 2533 free_devinfo_mem(info);
c6c2cebd
JR
2534 return NULL;
2535 }
c6c2cebd 2536
b718cd3d
DW
2537 list_add(&info->link, &domain->devices);
2538 list_add(&info->global, &device_domain_list);
2539 if (dev)
2540 dev->archdata.iommu = info;
0bbeb01a 2541 spin_unlock_irqrestore(&device_domain_lock, flags);
7560cc3c 2542 spin_unlock(&iommu->lock);
a7fc93fe 2543
0bbeb01a
LB
2544 /* PASID table is mandatory for a PCI device in scalable mode. */
2545 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2546 ret = intel_pasid_alloc_table(dev);
2547 if (ret) {
932a6523 2548 dev_err(dev, "PASID table allocation failed\n");
71753239 2549 dmar_remove_one_dev_info(dev);
0bbeb01a 2550 return NULL;
a7fc93fe 2551 }
ef848b7e
LB
2552
2553 /* Setup the PASID entry for requests without PASID: */
2554 spin_lock(&iommu->lock);
2555 if (hw_pass_through && domain_type_is_si(domain))
2556 ret = intel_pasid_setup_pass_through(iommu, domain,
2557 dev, PASID_RID2PASID);
2558 else
2559 ret = intel_pasid_setup_second_level(iommu, domain,
2560 dev, PASID_RID2PASID);
2561 spin_unlock(&iommu->lock);
2562 if (ret) {
932a6523 2563 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2564 dmar_remove_one_dev_info(dev);
ef848b7e 2565 return NULL;
a7fc93fe
LB
2566 }
2567 }
b718cd3d 2568
cc4e2575 2569 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2570 dev_err(dev, "Domain context map failed\n");
71753239 2571 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2572 return NULL;
2573 }
2574
b718cd3d 2575 return domain;
745f2586
JL
2576}
2577
579305f7
AW
2578static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2579{
2580 *(u16 *)opaque = alias;
2581 return 0;
2582}
2583
76208356 2584static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2585{
e083ea5b 2586 struct device_domain_info *info;
76208356 2587 struct dmar_domain *domain = NULL;
579305f7 2588 struct intel_iommu *iommu;
fcc35c63 2589 u16 dma_alias;
ba395927 2590 unsigned long flags;
aa4d066a 2591 u8 bus, devfn;
ba395927 2592
579305f7
AW
2593 iommu = device_to_iommu(dev, &bus, &devfn);
2594 if (!iommu)
2595 return NULL;
2596
146922ec
DW
2597 if (dev_is_pci(dev)) {
2598 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2599
579305f7
AW
2600 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2601
2602 spin_lock_irqsave(&device_domain_lock, flags);
2603 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2604 PCI_BUS_NUM(dma_alias),
2605 dma_alias & 0xff);
2606 if (info) {
2607 iommu = info->iommu;
2608 domain = info->domain;
5a8f40e8 2609 }
579305f7 2610 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2611
76208356 2612 /* DMA alias already has a domain, use it */
579305f7 2613 if (info)
76208356 2614 goto out;
579305f7 2615 }
ba395927 2616
146922ec 2617 /* Allocate and initialize new domain for the device */
ab8dfe25 2618 domain = alloc_domain(0);
745f2586 2619 if (!domain)
579305f7 2620 return NULL;
dc534b25 2621 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2622 domain_exit(domain);
2623 return NULL;
2c2e2c38 2624 }
ba395927 2625
76208356 2626out:
579305f7 2627
76208356
JR
2628 return domain;
2629}
579305f7 2630
76208356
JR
2631static struct dmar_domain *set_domain_for_dev(struct device *dev,
2632 struct dmar_domain *domain)
2633{
2634 struct intel_iommu *iommu;
2635 struct dmar_domain *tmp;
2636 u16 req_id, dma_alias;
2637 u8 bus, devfn;
2638
2639 iommu = device_to_iommu(dev, &bus, &devfn);
2640 if (!iommu)
2641 return NULL;
2642
2643 req_id = ((u16)bus << 8) | devfn;
2644
2645 if (dev_is_pci(dev)) {
2646 struct pci_dev *pdev = to_pci_dev(dev);
2647
2648 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2649
2650 /* register PCI DMA alias device */
2651 if (req_id != dma_alias) {
2652 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2653 dma_alias & 0xff, NULL, domain);
2654
2655 if (!tmp || tmp != domain)
2656 return tmp;
2657 }
ba395927
KA
2658 }
2659
5db31569 2660 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2661 if (!tmp || tmp != domain)
2662 return tmp;
2663
2664 return domain;
2665}
579305f7 2666
76208356
JR
2667static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2668{
2669 struct dmar_domain *domain, *tmp;
2670
2671 domain = find_domain(dev);
2672 if (domain)
2673 goto out;
2674
2675 domain = find_or_alloc_domain(dev, gaw);
2676 if (!domain)
2677 goto out;
2678
2679 tmp = set_domain_for_dev(dev, domain);
2680 if (!tmp || domain != tmp) {
579305f7
AW
2681 domain_exit(domain);
2682 domain = tmp;
2683 }
b718cd3d 2684
76208356
JR
2685out:
2686
b718cd3d 2687 return domain;
ba395927
KA
2688}
2689
b213203e
DW
2690static int iommu_domain_identity_map(struct dmar_domain *domain,
2691 unsigned long long start,
2692 unsigned long long end)
ba395927 2693{
c5395d5c
DW
2694 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2695 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2696
2697 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2698 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2699 pr_err("Reserving iova failed\n");
b213203e 2700 return -ENOMEM;
ba395927
KA
2701 }
2702
af1089ce 2703 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2704 /*
2705 * RMRR range might have overlap with physical memory range,
2706 * clear it first
2707 */
c5395d5c 2708 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2709
87684fd9
PX
2710 return __domain_mapping(domain, first_vpfn, NULL,
2711 first_vpfn, last_vpfn - first_vpfn + 1,
2712 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2713}
2714
d66ce54b
JR
2715static int domain_prepare_identity_map(struct device *dev,
2716 struct dmar_domain *domain,
2717 unsigned long long start,
2718 unsigned long long end)
b213203e 2719{
19943b0e
DW
2720 /* For _hardware_ passthrough, don't bother. But for software
2721 passthrough, we do it anyway -- it may indicate a memory
2722 range which is reserved in E820, so which didn't get set
2723 up to start with in si_domain */
2724 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2725 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2726 start, end);
19943b0e
DW
2727 return 0;
2728 }
2729
932a6523 2730 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2731
5595b528
DW
2732 if (end < start) {
2733 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2734 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2735 dmi_get_system_info(DMI_BIOS_VENDOR),
2736 dmi_get_system_info(DMI_BIOS_VERSION),
2737 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2738 return -EIO;
5595b528
DW
2739 }
2740
2ff729f5
DW
2741 if (end >> agaw_to_width(domain->agaw)) {
2742 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2743 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2744 agaw_to_width(domain->agaw),
2745 dmi_get_system_info(DMI_BIOS_VENDOR),
2746 dmi_get_system_info(DMI_BIOS_VERSION),
2747 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2748 return -EIO;
2ff729f5 2749 }
19943b0e 2750
d66ce54b
JR
2751 return iommu_domain_identity_map(domain, start, end);
2752}
ba395927 2753
d66ce54b
JR
2754static int iommu_prepare_identity_map(struct device *dev,
2755 unsigned long long start,
2756 unsigned long long end)
2757{
2758 struct dmar_domain *domain;
2759 int ret;
2760
2761 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2762 if (!domain)
2763 return -ENOMEM;
2764
2765 ret = domain_prepare_identity_map(dev, domain, start, end);
2766 if (ret)
2767 domain_exit(domain);
b213203e 2768
ba395927 2769 return ret;
ba395927
KA
2770}
2771
2772static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2773 struct device *dev)
ba395927 2774{
0b9d9753 2775 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2776 return 0;
0b9d9753
DW
2777 return iommu_prepare_identity_map(dev, rmrr->base_address,
2778 rmrr->end_address);
ba395927
KA
2779}
2780
d3f13810 2781#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2782static inline void iommu_prepare_isa(void)
2783{
2784 struct pci_dev *pdev;
2785 int ret;
2786
2787 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2788 if (!pdev)
2789 return;
2790
9f10e5bf 2791 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2792 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2793
2794 if (ret)
9f10e5bf 2795 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2796
9b27e82d 2797 pci_dev_put(pdev);
49a0429e
KA
2798}
2799#else
2800static inline void iommu_prepare_isa(void)
2801{
2802 return;
2803}
d3f13810 2804#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2805
2c2e2c38 2806static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2807
071e1374 2808static int __init si_domain_init(int hw)
2c2e2c38 2809{
e083ea5b 2810 int nid, ret;
2c2e2c38 2811
ab8dfe25 2812 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2813 if (!si_domain)
2814 return -EFAULT;
2815
2c2e2c38
FY
2816 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2817 domain_exit(si_domain);
2818 return -EFAULT;
2819 }
2820
0dc79715 2821 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2822
19943b0e
DW
2823 if (hw)
2824 return 0;
2825
c7ab48d2 2826 for_each_online_node(nid) {
5dfe8660
TH
2827 unsigned long start_pfn, end_pfn;
2828 int i;
2829
2830 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2831 ret = iommu_domain_identity_map(si_domain,
2832 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2833 if (ret)
2834 return ret;
2835 }
c7ab48d2
DW
2836 }
2837
2c2e2c38
FY
2838 return 0;
2839}
2840
9b226624 2841static int identity_mapping(struct device *dev)
2c2e2c38
FY
2842{
2843 struct device_domain_info *info;
2844
2845 if (likely(!iommu_identity_mapping))
2846 return 0;
2847
9b226624 2848 info = dev->archdata.iommu;
cb452a40
MT
2849 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2850 return (info->domain == si_domain);
2c2e2c38 2851
2c2e2c38
FY
2852 return 0;
2853}
2854
28ccce0d 2855static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2856{
0ac72664 2857 struct dmar_domain *ndomain;
5a8f40e8 2858 struct intel_iommu *iommu;
156baca8 2859 u8 bus, devfn;
2c2e2c38 2860
5913c9bf 2861 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2862 if (!iommu)
2863 return -ENODEV;
2864
5db31569 2865 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2866 if (ndomain != domain)
2867 return -EBUSY;
2c2e2c38
FY
2868
2869 return 0;
2870}
2871
0b9d9753 2872static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2873{
2874 struct dmar_rmrr_unit *rmrr;
832bd858 2875 struct device *tmp;
ea2447f7
TM
2876 int i;
2877
0e242612 2878 rcu_read_lock();
ea2447f7 2879 for_each_rmrr_units(rmrr) {
b683b230
JL
2880 /*
2881 * Return TRUE if this RMRR contains the device that
2882 * is passed in.
2883 */
2884 for_each_active_dev_scope(rmrr->devices,
2885 rmrr->devices_cnt, i, tmp)
0b9d9753 2886 if (tmp == dev) {
0e242612 2887 rcu_read_unlock();
ea2447f7 2888 return true;
b683b230 2889 }
ea2447f7 2890 }
0e242612 2891 rcu_read_unlock();
ea2447f7
TM
2892 return false;
2893}
2894
c875d2c1
AW
2895/*
2896 * There are a couple cases where we need to restrict the functionality of
2897 * devices associated with RMRRs. The first is when evaluating a device for
2898 * identity mapping because problems exist when devices are moved in and out
2899 * of domains and their respective RMRR information is lost. This means that
2900 * a device with associated RMRRs will never be in a "passthrough" domain.
2901 * The second is use of the device through the IOMMU API. This interface
2902 * expects to have full control of the IOVA space for the device. We cannot
2903 * satisfy both the requirement that RMRR access is maintained and have an
2904 * unencumbered IOVA space. We also have no ability to quiesce the device's
2905 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2906 * We therefore prevent devices associated with an RMRR from participating in
2907 * the IOMMU API, which eliminates them from device assignment.
2908 *
2909 * In both cases we assume that PCI USB devices with RMRRs have them largely
2910 * for historical reasons and that the RMRR space is not actively used post
2911 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2912 *
2913 * The same exception is made for graphics devices, with the requirement that
2914 * any use of the RMRR regions will be torn down before assigning the device
2915 * to a guest.
c875d2c1
AW
2916 */
2917static bool device_is_rmrr_locked(struct device *dev)
2918{
2919 if (!device_has_rmrr(dev))
2920 return false;
2921
2922 if (dev_is_pci(dev)) {
2923 struct pci_dev *pdev = to_pci_dev(dev);
2924
18436afd 2925 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2926 return false;
2927 }
2928
2929 return true;
2930}
2931
3bdb2591 2932static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2933{
3bdb2591
DW
2934 if (dev_is_pci(dev)) {
2935 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2936
c875d2c1 2937 if (device_is_rmrr_locked(dev))
3bdb2591 2938 return 0;
e0fc7e0b 2939
89a6079d
LB
2940 /*
2941 * Prevent any device marked as untrusted from getting
2942 * placed into the statically identity mapping domain.
2943 */
2944 if (pdev->untrusted)
2945 return 0;
2946
3bdb2591
DW
2947 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2948 return 1;
e0fc7e0b 2949
3bdb2591
DW
2950 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2951 return 1;
6941af28 2952
3bdb2591 2953 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2954 return 0;
3bdb2591
DW
2955
2956 /*
2957 * We want to start off with all devices in the 1:1 domain, and
2958 * take them out later if we find they can't access all of memory.
2959 *
2960 * However, we can't do this for PCI devices behind bridges,
2961 * because all PCI devices behind the same bridge will end up
2962 * with the same source-id on their transactions.
2963 *
2964 * Practically speaking, we can't change things around for these
2965 * devices at run-time, because we can't be sure there'll be no
2966 * DMA transactions in flight for any of their siblings.
2967 *
2968 * So PCI devices (unless they're on the root bus) as well as
2969 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2970 * the 1:1 domain, just in _case_ one of their siblings turns out
2971 * not to be able to map all of memory.
2972 */
2973 if (!pci_is_pcie(pdev)) {
2974 if (!pci_is_root_bus(pdev->bus))
2975 return 0;
2976 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2977 return 0;
2978 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2979 return 0;
3bdb2591
DW
2980 } else {
2981 if (device_has_rmrr(dev))
2982 return 0;
2983 }
3dfc813d 2984
3bdb2591 2985 /*
3dfc813d 2986 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2987 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2988 * take them out of the 1:1 domain later.
2989 */
8fcc5372
CW
2990 if (!startup) {
2991 /*
2992 * If the device's dma_mask is less than the system's memory
2993 * size then this is not a candidate for identity mapping.
2994 */
3bdb2591 2995 u64 dma_mask = *dev->dma_mask;
8fcc5372 2996
3bdb2591
DW
2997 if (dev->coherent_dma_mask &&
2998 dev->coherent_dma_mask < dma_mask)
2999 dma_mask = dev->coherent_dma_mask;
8fcc5372 3000
3bdb2591 3001 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 3002 }
6941af28
DW
3003
3004 return 1;
3005}
3006
cf04eee8
DW
3007static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
3008{
3009 int ret;
3010
3011 if (!iommu_should_identity_map(dev, 1))
3012 return 0;
3013
28ccce0d 3014 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 3015 if (!ret)
932a6523
BH
3016 dev_info(dev, "%s identity mapping\n",
3017 hw ? "Hardware" : "Software");
cf04eee8
DW
3018 else if (ret == -ENODEV)
3019 /* device not associated with an iommu */
3020 ret = 0;
3021
3022 return ret;
3023}
3024
3025
071e1374 3026static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 3027{
2c2e2c38 3028 struct pci_dev *pdev = NULL;
cf04eee8 3029 struct dmar_drhd_unit *drhd;
d3ed71e5
QC
3030 /* To avoid a -Wunused-but-set-variable warning. */
3031 struct intel_iommu *iommu __maybe_unused;
cf04eee8
DW
3032 struct device *dev;
3033 int i;
3034 int ret = 0;
2c2e2c38 3035
2c2e2c38 3036 for_each_pci_dev(pdev) {
cf04eee8
DW
3037 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3038 if (ret)
3039 return ret;
3040 }
3041
3042 for_each_active_iommu(iommu, drhd)
3043 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3044 struct acpi_device_physical_node *pn;
3045 struct acpi_device *adev;
3046
3047 if (dev->bus != &acpi_bus_type)
3048 continue;
86080ccc 3049
cf04eee8
DW
3050 adev= to_acpi_device(dev);
3051 mutex_lock(&adev->physical_node_lock);
3052 list_for_each_entry(pn, &adev->physical_node_list, node) {
3053 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3054 if (ret)
3055 break;
eae460b6 3056 }
cf04eee8
DW
3057 mutex_unlock(&adev->physical_node_lock);
3058 if (ret)
3059 return ret;
62edf5dc 3060 }
2c2e2c38
FY
3061
3062 return 0;
3063}
3064
ffebeb46
JL
3065static void intel_iommu_init_qi(struct intel_iommu *iommu)
3066{
3067 /*
3068 * Start from the sane iommu hardware state.
3069 * If the queued invalidation is already initialized by us
3070 * (for example, while enabling interrupt-remapping) then
3071 * we got the things already rolling from a sane state.
3072 */
3073 if (!iommu->qi) {
3074 /*
3075 * Clear any previous faults.
3076 */
3077 dmar_fault(-1, iommu);
3078 /*
3079 * Disable queued invalidation if supported and already enabled
3080 * before OS handover.
3081 */
3082 dmar_disable_qi(iommu);
3083 }
3084
3085 if (dmar_enable_qi(iommu)) {
3086 /*
3087 * Queued Invalidate not enabled, use Register Based Invalidate
3088 */
3089 iommu->flush.flush_context = __iommu_flush_context;
3090 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3091 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3092 iommu->name);
3093 } else {
3094 iommu->flush.flush_context = qi_flush_context;
3095 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3096 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3097 }
3098}
3099
091d42e4 3100static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3101 struct root_entry *old_re,
091d42e4
JR
3102 struct context_entry **tbl,
3103 int bus, bool ext)
3104{
dbcd861f 3105 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3106 struct context_entry *new_ce = NULL, ce;
dfddb969 3107 struct context_entry *old_ce = NULL;
543c8dcf 3108 struct root_entry re;
091d42e4
JR
3109 phys_addr_t old_ce_phys;
3110
3111 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3112 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3113
3114 for (devfn = 0; devfn < 256; devfn++) {
3115 /* First calculate the correct index */
3116 idx = (ext ? devfn * 2 : devfn) % 256;
3117
3118 if (idx == 0) {
3119 /* First save what we may have and clean up */
3120 if (new_ce) {
3121 tbl[tbl_idx] = new_ce;
3122 __iommu_flush_cache(iommu, new_ce,
3123 VTD_PAGE_SIZE);
3124 pos = 1;
3125 }
3126
3127 if (old_ce)
829383e1 3128 memunmap(old_ce);
091d42e4
JR
3129
3130 ret = 0;
3131 if (devfn < 0x80)
543c8dcf 3132 old_ce_phys = root_entry_lctp(&re);
091d42e4 3133 else
543c8dcf 3134 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3135
3136 if (!old_ce_phys) {
3137 if (ext && devfn == 0) {
3138 /* No LCTP, try UCTP */
3139 devfn = 0x7f;
3140 continue;
3141 } else {
3142 goto out;
3143 }
3144 }
3145
3146 ret = -ENOMEM;
dfddb969
DW
3147 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3148 MEMREMAP_WB);
091d42e4
JR
3149 if (!old_ce)
3150 goto out;
3151
3152 new_ce = alloc_pgtable_page(iommu->node);
3153 if (!new_ce)
3154 goto out_unmap;
3155
3156 ret = 0;
3157 }
3158
3159 /* Now copy the context entry */
dfddb969 3160 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3161
cf484d0e 3162 if (!__context_present(&ce))
091d42e4
JR
3163 continue;
3164
dbcd861f
JR
3165 did = context_domain_id(&ce);
3166 if (did >= 0 && did < cap_ndoms(iommu->cap))
3167 set_bit(did, iommu->domain_ids);
3168
cf484d0e
JR
3169 /*
3170 * We need a marker for copied context entries. This
3171 * marker needs to work for the old format as well as
3172 * for extended context entries.
3173 *
3174 * Bit 67 of the context entry is used. In the old
3175 * format this bit is available to software, in the
3176 * extended format it is the PGE bit, but PGE is ignored
3177 * by HW if PASIDs are disabled (and thus still
3178 * available).
3179 *
3180 * So disable PASIDs first and then mark the entry
3181 * copied. This means that we don't copy PASID
3182 * translations from the old kernel, but this is fine as
3183 * faults there are not fatal.
3184 */
3185 context_clear_pasid_enable(&ce);
3186 context_set_copied(&ce);
3187
091d42e4
JR
3188 new_ce[idx] = ce;
3189 }
3190
3191 tbl[tbl_idx + pos] = new_ce;
3192
3193 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3194
3195out_unmap:
dfddb969 3196 memunmap(old_ce);
091d42e4
JR
3197
3198out:
3199 return ret;
3200}
3201
3202static int copy_translation_tables(struct intel_iommu *iommu)
3203{
3204 struct context_entry **ctxt_tbls;
dfddb969 3205 struct root_entry *old_rt;
091d42e4
JR
3206 phys_addr_t old_rt_phys;
3207 int ctxt_table_entries;
3208 unsigned long flags;
3209 u64 rtaddr_reg;
3210 int bus, ret;
c3361f2f 3211 bool new_ext, ext;
091d42e4
JR
3212
3213 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3214 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3215 new_ext = !!ecap_ecs(iommu->ecap);
3216
3217 /*
3218 * The RTT bit can only be changed when translation is disabled,
3219 * but disabling translation means to open a window for data
3220 * corruption. So bail out and don't copy anything if we would
3221 * have to change the bit.
3222 */
3223 if (new_ext != ext)
3224 return -EINVAL;
091d42e4
JR
3225
3226 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3227 if (!old_rt_phys)
3228 return -EINVAL;
3229
dfddb969 3230 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3231 if (!old_rt)
3232 return -ENOMEM;
3233
3234 /* This is too big for the stack - allocate it from slab */
3235 ctxt_table_entries = ext ? 512 : 256;
3236 ret = -ENOMEM;
6396bb22 3237 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3238 if (!ctxt_tbls)
3239 goto out_unmap;
3240
3241 for (bus = 0; bus < 256; bus++) {
3242 ret = copy_context_table(iommu, &old_rt[bus],
3243 ctxt_tbls, bus, ext);
3244 if (ret) {
3245 pr_err("%s: Failed to copy context table for bus %d\n",
3246 iommu->name, bus);
3247 continue;
3248 }
3249 }
3250
3251 spin_lock_irqsave(&iommu->lock, flags);
3252
3253 /* Context tables are copied, now write them to the root_entry table */
3254 for (bus = 0; bus < 256; bus++) {
3255 int idx = ext ? bus * 2 : bus;
3256 u64 val;
3257
3258 if (ctxt_tbls[idx]) {
3259 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3260 iommu->root_entry[bus].lo = val;
3261 }
3262
3263 if (!ext || !ctxt_tbls[idx + 1])
3264 continue;
3265
3266 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3267 iommu->root_entry[bus].hi = val;
3268 }
3269
3270 spin_unlock_irqrestore(&iommu->lock, flags);
3271
3272 kfree(ctxt_tbls);
3273
3274 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3275
3276 ret = 0;
3277
3278out_unmap:
dfddb969 3279 memunmap(old_rt);
091d42e4
JR
3280
3281 return ret;
3282}
3283
b779260b 3284static int __init init_dmars(void)
ba395927
KA
3285{
3286 struct dmar_drhd_unit *drhd;
3287 struct dmar_rmrr_unit *rmrr;
a87f4918 3288 bool copied_tables = false;
832bd858 3289 struct device *dev;
ba395927 3290 struct intel_iommu *iommu;
13cf0174 3291 int i, ret;
2c2e2c38 3292
ba395927
KA
3293 /*
3294 * for each drhd
3295 * allocate root
3296 * initialize and program root entry to not present
3297 * endfor
3298 */
3299 for_each_drhd_unit(drhd) {
5e0d2a6f 3300 /*
3301 * lock not needed as this is only incremented in the single
3302 * threaded kernel __init code path all other access are read
3303 * only
3304 */
78d8e704 3305 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3306 g_num_of_iommus++;
3307 continue;
3308 }
9f10e5bf 3309 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3310 }
3311
ffebeb46
JL
3312 /* Preallocate enough resources for IOMMU hot-addition */
3313 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3314 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3315
d9630fe9
WH
3316 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3317 GFP_KERNEL);
3318 if (!g_iommus) {
9f10e5bf 3319 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3320 ret = -ENOMEM;
3321 goto error;
3322 }
3323
7c919779 3324 for_each_active_iommu(iommu, drhd) {
56283174
LB
3325 /*
3326 * Find the max pasid size of all IOMMU's in the system.
3327 * We need to ensure the system pasid table is no bigger
3328 * than the smallest supported.
3329 */
765b6a98 3330 if (pasid_supported(iommu)) {
56283174
LB
3331 u32 temp = 2 << ecap_pss(iommu->ecap);
3332
3333 intel_pasid_max_id = min_t(u32, temp,
3334 intel_pasid_max_id);
3335 }
3336
d9630fe9 3337 g_iommus[iommu->seq_id] = iommu;
ba395927 3338
b63d80d1
JR
3339 intel_iommu_init_qi(iommu);
3340
e61d98d8
SS
3341 ret = iommu_init_domains(iommu);
3342 if (ret)
989d51fc 3343 goto free_iommu;
e61d98d8 3344
4158c2ec
JR
3345 init_translation_status(iommu);
3346
091d42e4
JR
3347 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3348 iommu_disable_translation(iommu);
3349 clear_translation_pre_enabled(iommu);
3350 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3351 iommu->name);
3352 }
4158c2ec 3353
ba395927
KA
3354 /*
3355 * TBD:
3356 * we could share the same root & context tables
25985edc 3357 * among all IOMMU's. Need to Split it later.
ba395927
KA
3358 */
3359 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3360 if (ret)
989d51fc 3361 goto free_iommu;
5f0a7f76 3362
091d42e4
JR
3363 if (translation_pre_enabled(iommu)) {
3364 pr_info("Translation already enabled - trying to copy translation structures\n");
3365
3366 ret = copy_translation_tables(iommu);
3367 if (ret) {
3368 /*
3369 * We found the IOMMU with translation
3370 * enabled - but failed to copy over the
3371 * old root-entry table. Try to proceed
3372 * by disabling translation now and
3373 * allocating a clean root-entry table.
3374 * This might cause DMAR faults, but
3375 * probably the dump will still succeed.
3376 */
3377 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3378 iommu->name);
3379 iommu_disable_translation(iommu);
3380 clear_translation_pre_enabled(iommu);
3381 } else {
3382 pr_info("Copied translation tables from previous kernel for %s\n",
3383 iommu->name);
a87f4918 3384 copied_tables = true;
091d42e4
JR
3385 }
3386 }
3387
4ed0d3e6 3388 if (!ecap_pass_through(iommu->ecap))
19943b0e 3389 hw_pass_through = 0;
8a94ade4 3390#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3391 if (pasid_supported(iommu))
d9737953 3392 intel_svm_init(iommu);
8a94ade4 3393#endif
ba395927
KA
3394 }
3395
a4c34ff1
JR
3396 /*
3397 * Now that qi is enabled on all iommus, set the root entry and flush
3398 * caches. This is required on some Intel X58 chipsets, otherwise the
3399 * flush_context function will loop forever and the boot hangs.
3400 */
3401 for_each_active_iommu(iommu, drhd) {
3402 iommu_flush_write_buffer(iommu);
3403 iommu_set_root_entry(iommu);
3404 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3405 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3406 }
3407
19943b0e 3408 if (iommu_pass_through)
e0fc7e0b
DW
3409 iommu_identity_mapping |= IDENTMAP_ALL;
3410
d3f13810 3411#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3412 dmar_map_gfx = 0;
19943b0e 3413#endif
e0fc7e0b 3414
5daab580
LB
3415 if (!dmar_map_gfx)
3416 iommu_identity_mapping |= IDENTMAP_GFX;
3417
21e722c4
AR
3418 check_tylersburg_isoch();
3419
86080ccc
JR
3420 if (iommu_identity_mapping) {
3421 ret = si_domain_init(hw_pass_through);
3422 if (ret)
3423 goto free_iommu;
3424 }
3425
e0fc7e0b 3426
a87f4918
JR
3427 /*
3428 * If we copied translations from a previous kernel in the kdump
3429 * case, we can not assign the devices to domains now, as that
3430 * would eliminate the old mappings. So skip this part and defer
3431 * the assignment to device driver initialization time.
3432 */
3433 if (copied_tables)
3434 goto domains_done;
3435
ba395927 3436 /*
19943b0e
DW
3437 * If pass through is not set or not enabled, setup context entries for
3438 * identity mappings for rmrr, gfx, and isa and may fall back to static
3439 * identity mapping if iommu_identity_mapping is set.
ba395927 3440 */
19943b0e
DW
3441 if (iommu_identity_mapping) {
3442 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3443 if (ret) {
9f10e5bf 3444 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3445 goto free_iommu;
ba395927
KA
3446 }
3447 }
ba395927 3448 /*
19943b0e
DW
3449 * For each rmrr
3450 * for each dev attached to rmrr
3451 * do
3452 * locate drhd for dev, alloc domain for dev
3453 * allocate free domain
3454 * allocate page table entries for rmrr
3455 * if context not allocated for bus
3456 * allocate and init context
3457 * set present in root table for this bus
3458 * init context with domain, translation etc
3459 * endfor
3460 * endfor
ba395927 3461 */
9f10e5bf 3462 pr_info("Setting RMRR:\n");
19943b0e 3463 for_each_rmrr_units(rmrr) {
b683b230
JL
3464 /* some BIOS lists non-exist devices in DMAR table. */
3465 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3466 i, dev) {
0b9d9753 3467 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3468 if (ret)
9f10e5bf 3469 pr_err("Mapping reserved region failed\n");
ba395927 3470 }
4ed0d3e6 3471 }
49a0429e 3472
19943b0e
DW
3473 iommu_prepare_isa();
3474
a87f4918
JR
3475domains_done:
3476
ba395927
KA
3477 /*
3478 * for each drhd
3479 * enable fault log
3480 * global invalidate context cache
3481 * global invalidate iotlb
3482 * enable translation
3483 */
7c919779 3484 for_each_iommu(iommu, drhd) {
51a63e67
JC
3485 if (drhd->ignored) {
3486 /*
3487 * we always have to disable PMRs or DMA may fail on
3488 * this device
3489 */
3490 if (force_on)
7c919779 3491 iommu_disable_protect_mem_regions(iommu);
ba395927 3492 continue;
51a63e67 3493 }
ba395927
KA
3494
3495 iommu_flush_write_buffer(iommu);
3496
a222a7f0 3497#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3498 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3499 /*
3500 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3501 * could cause possible lock race condition.
3502 */
3503 up_write(&dmar_global_lock);
a222a7f0 3504 ret = intel_svm_enable_prq(iommu);
a7755c3c 3505 down_write(&dmar_global_lock);
a222a7f0
DW
3506 if (ret)
3507 goto free_iommu;
3508 }
3509#endif
3460a6d9
KA
3510 ret = dmar_set_interrupt(iommu);
3511 if (ret)
989d51fc 3512 goto free_iommu;
3460a6d9 3513
8939ddf6
JR
3514 if (!translation_pre_enabled(iommu))
3515 iommu_enable_translation(iommu);
3516
b94996c9 3517 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3518 }
3519
3520 return 0;
989d51fc
JL
3521
3522free_iommu:
ffebeb46
JL
3523 for_each_active_iommu(iommu, drhd) {
3524 disable_dmar_iommu(iommu);
a868e6b7 3525 free_dmar_iommu(iommu);
ffebeb46 3526 }
13cf0174 3527
d9630fe9 3528 kfree(g_iommus);
13cf0174 3529
989d51fc 3530error:
ba395927
KA
3531 return ret;
3532}
3533
5a5e02a6 3534/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3535static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3536 struct dmar_domain *domain,
3537 unsigned long nrpages, uint64_t dma_mask)
ba395927 3538{
e083ea5b 3539 unsigned long iova_pfn;
ba395927 3540
875764de
DW
3541 /* Restrict dma_mask to the width that the iommu can handle */
3542 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3543 /* Ensure we reserve the whole size-aligned region */
3544 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3545
3546 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3547 /*
3548 * First try to allocate an io virtual address in
284901a9 3549 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3550 * from higher range
ba395927 3551 */
22e2f9fa 3552 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3553 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3554 if (iova_pfn)
3555 return iova_pfn;
875764de 3556 }
538d5b33
TN
3557 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3558 IOVA_PFN(dma_mask), true);
22e2f9fa 3559 if (unlikely(!iova_pfn)) {
932a6523 3560 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3561 return 0;
f76aec76
KA
3562 }
3563
22e2f9fa 3564 return iova_pfn;
f76aec76
KA
3565}
3566
9ddbfb42 3567struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3568{
1c5ebba9 3569 struct dmar_domain *domain, *tmp;
b1ce5b79 3570 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3571 struct device *i_dev;
3572 int i, ret;
f76aec76 3573
1c5ebba9
JR
3574 domain = find_domain(dev);
3575 if (domain)
3576 goto out;
3577
3578 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3579 if (!domain)
3580 goto out;
ba395927 3581
b1ce5b79
JR
3582 /* We have a new domain - setup possible RMRRs for the device */
3583 rcu_read_lock();
3584 for_each_rmrr_units(rmrr) {
3585 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3586 i, i_dev) {
3587 if (i_dev != dev)
3588 continue;
3589
3590 ret = domain_prepare_identity_map(dev, domain,
3591 rmrr->base_address,
3592 rmrr->end_address);
3593 if (ret)
3594 dev_err(dev, "Mapping reserved region failed\n");
3595 }
3596 }
3597 rcu_read_unlock();
3598
1c5ebba9
JR
3599 tmp = set_domain_for_dev(dev, domain);
3600 if (!tmp || domain != tmp) {
3601 domain_exit(domain);
3602 domain = tmp;
3603 }
3604
3605out:
3606
3607 if (!domain)
932a6523 3608 dev_err(dev, "Allocating domain failed\n");
1c5ebba9
JR
3609
3610
f76aec76
KA
3611 return domain;
3612}
3613
ecb509ec 3614/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3615static bool iommu_need_mapping(struct device *dev)
2c2e2c38
FY
3616{
3617 int found;
3618
3d89194a 3619 if (iommu_dummy(dev))
48b2c937 3620 return false;
1e4c64c4 3621
2c2e2c38 3622 if (!iommu_identity_mapping)
48b2c937 3623 return true;
2c2e2c38 3624
9b226624 3625 found = identity_mapping(dev);
2c2e2c38 3626 if (found) {
ecb509ec 3627 if (iommu_should_identity_map(dev, 0))
48b2c937
CH
3628 return false;
3629
3630 /*
3631 * 32 bit DMA is removed from si_domain and fall back to
3632 * non-identity mapping.
3633 */
3634 dmar_remove_one_dev_info(dev);
3635 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3636 } else {
3637 /*
3638 * In case of a detached 64 bit DMA device from vm, the device
3639 * is put into si_domain for identity mapping.
3640 */
48b2c937
CH
3641 if (iommu_should_identity_map(dev, 0) &&
3642 !domain_add_dev_info(si_domain, dev)) {
3643 dev_info(dev, "64bit DMA uses identity mapping\n");
3644 return false;
2c2e2c38
FY
3645 }
3646 }
3647
48b2c937 3648 return true;
2c2e2c38
FY
3649}
3650
21d5d27c
LG
3651static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3652 size_t size, int dir, u64 dma_mask)
f76aec76 3653{
f76aec76 3654 struct dmar_domain *domain;
5b6985ce 3655 phys_addr_t start_paddr;
2aac6304 3656 unsigned long iova_pfn;
f76aec76 3657 int prot = 0;
6865f0d1 3658 int ret;
8c11e798 3659 struct intel_iommu *iommu;
33041ec0 3660 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3661
3662 BUG_ON(dir == DMA_NONE);
2c2e2c38 3663
5040a918 3664 domain = get_valid_domain_for_dev(dev);
f76aec76 3665 if (!domain)
524a669b 3666 return DMA_MAPPING_ERROR;
f76aec76 3667
8c11e798 3668 iommu = domain_get_iommu(domain);
88cb6a74 3669 size = aligned_nrpages(paddr, size);
f76aec76 3670
2aac6304
OP
3671 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3672 if (!iova_pfn)
f76aec76
KA
3673 goto error;
3674
ba395927
KA
3675 /*
3676 * Check if DMAR supports zero-length reads on write only
3677 * mappings..
3678 */
3679 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3680 !cap_zlr(iommu->cap))
ba395927
KA
3681 prot |= DMA_PTE_READ;
3682 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3683 prot |= DMA_PTE_WRITE;
3684 /*
6865f0d1 3685 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3686 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3687 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3688 * is not a big problem
3689 */
2aac6304 3690 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3691 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3692 if (ret)
3693 goto error;
3694
2aac6304 3695 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3696 start_paddr += paddr & ~PAGE_MASK;
3697 return start_paddr;
ba395927 3698
ba395927 3699error:
2aac6304 3700 if (iova_pfn)
22e2f9fa 3701 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3702 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3703 size, (unsigned long long)paddr, dir);
524a669b 3704 return DMA_MAPPING_ERROR;
ba395927
KA
3705}
3706
ffbbef5c
FT
3707static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3708 unsigned long offset, size_t size,
3709 enum dma_data_direction dir,
00085f1e 3710 unsigned long attrs)
bb9e6d65 3711{
9cc0c2af
CH
3712 if (iommu_need_mapping(dev))
3713 return __intel_map_single(dev, page_to_phys(page) + offset,
3714 size, dir, *dev->dma_mask);
3715 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3716}
3717
3718static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3719 size_t size, enum dma_data_direction dir,
3720 unsigned long attrs)
3721{
9cc0c2af
CH
3722 if (iommu_need_mapping(dev))
3723 return __intel_map_single(dev, phys_addr, size, dir,
3724 *dev->dma_mask);
3725 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3726}
3727
769530e4 3728static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3729{
f76aec76 3730 struct dmar_domain *domain;
d794dc9b 3731 unsigned long start_pfn, last_pfn;
769530e4 3732 unsigned long nrpages;
2aac6304 3733 unsigned long iova_pfn;
8c11e798 3734 struct intel_iommu *iommu;
ea8ea460 3735 struct page *freelist;
f7b0c4ce 3736 struct pci_dev *pdev = NULL;
ba395927 3737
1525a29a 3738 domain = find_domain(dev);
ba395927
KA
3739 BUG_ON(!domain);
3740
8c11e798
WH
3741 iommu = domain_get_iommu(domain);
3742
2aac6304 3743 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3744
769530e4 3745 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3746 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3747 last_pfn = start_pfn + nrpages - 1;
ba395927 3748
f7b0c4ce
LB
3749 if (dev_is_pci(dev))
3750 pdev = to_pci_dev(dev);
3751
932a6523 3752 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3753
ea8ea460 3754 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3755
f7b0c4ce 3756 if (intel_iommu_strict || (pdev && pdev->untrusted)) {
a1ddcbe9 3757 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3758 nrpages, !freelist, 0);
5e0d2a6f 3759 /* free iova */
22e2f9fa 3760 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3761 dma_free_pagelist(freelist);
5e0d2a6f 3762 } else {
13cf0174
JR
3763 queue_iova(&domain->iovad, iova_pfn, nrpages,
3764 (unsigned long)freelist);
5e0d2a6f 3765 /*
3766 * queue up the release of the unmap to save the 1/6th of the
3767 * cpu used up by the iotlb flush operation...
3768 */
5e0d2a6f 3769 }
ba395927
KA
3770}
3771
d41a4adb
JL
3772static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3773 size_t size, enum dma_data_direction dir,
00085f1e 3774 unsigned long attrs)
d41a4adb 3775{
9cc0c2af
CH
3776 if (iommu_need_mapping(dev))
3777 intel_unmap(dev, dev_addr, size);
3778 else
3779 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3780}
3781
3782static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3783 size_t size, enum dma_data_direction dir, unsigned long attrs)
3784{
3785 if (iommu_need_mapping(dev))
3786 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3787}
3788
5040a918 3789static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3790 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3791 unsigned long attrs)
ba395927 3792{
7ec916f8
CH
3793 struct page *page = NULL;
3794 int order;
ba395927 3795
9cc0c2af
CH
3796 if (!iommu_need_mapping(dev))
3797 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3798
7ec916f8
CH
3799 size = PAGE_ALIGN(size);
3800 order = get_order(size);
7ec916f8
CH
3801
3802 if (gfpflags_allow_blocking(flags)) {
3803 unsigned int count = size >> PAGE_SHIFT;
3804
d834c5ab
MS
3805 page = dma_alloc_from_contiguous(dev, count, order,
3806 flags & __GFP_NOWARN);
7ec916f8
CH
3807 }
3808
3809 if (!page)
3810 page = alloc_pages(flags, order);
3811 if (!page)
3812 return NULL;
3813 memset(page_address(page), 0, size);
3814
21d5d27c
LG
3815 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3816 DMA_BIDIRECTIONAL,
3817 dev->coherent_dma_mask);
524a669b 3818 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3819 return page_address(page);
3820 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3821 __free_pages(page, order);
36746436 3822
ba395927
KA
3823 return NULL;
3824}
3825
5040a918 3826static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3827 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3828{
7ec916f8
CH
3829 int order;
3830 struct page *page = virt_to_page(vaddr);
3831
9cc0c2af
CH
3832 if (!iommu_need_mapping(dev))
3833 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3834
7ec916f8
CH
3835 size = PAGE_ALIGN(size);
3836 order = get_order(size);
3837
3838 intel_unmap(dev, dma_handle, size);
3839 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3840 __free_pages(page, order);
ba395927
KA
3841}
3842
5040a918 3843static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3844 int nelems, enum dma_data_direction dir,
00085f1e 3845 unsigned long attrs)
ba395927 3846{
769530e4
OP
3847 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3848 unsigned long nrpages = 0;
3849 struct scatterlist *sg;
3850 int i;
3851
9cc0c2af
CH
3852 if (!iommu_need_mapping(dev))
3853 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3854
769530e4
OP
3855 for_each_sg(sglist, sg, nelems, i) {
3856 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3857 }
3858
3859 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3860}
3861
5040a918 3862static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3863 enum dma_data_direction dir, unsigned long attrs)
ba395927 3864{
ba395927 3865 int i;
ba395927 3866 struct dmar_domain *domain;
f76aec76
KA
3867 size_t size = 0;
3868 int prot = 0;
2aac6304 3869 unsigned long iova_pfn;
f76aec76 3870 int ret;
c03ab37c 3871 struct scatterlist *sg;
b536d24d 3872 unsigned long start_vpfn;
8c11e798 3873 struct intel_iommu *iommu;
ba395927
KA
3874
3875 BUG_ON(dir == DMA_NONE);
48b2c937 3876 if (!iommu_need_mapping(dev))
9cc0c2af 3877 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3878
5040a918 3879 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3880 if (!domain)
3881 return 0;
3882
8c11e798
WH
3883 iommu = domain_get_iommu(domain);
3884
b536d24d 3885 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3886 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3887
2aac6304 3888 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3889 *dev->dma_mask);
2aac6304 3890 if (!iova_pfn) {
c03ab37c 3891 sglist->dma_length = 0;
f76aec76
KA
3892 return 0;
3893 }
3894
3895 /*
3896 * Check if DMAR supports zero-length reads on write only
3897 * mappings..
3898 */
3899 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3900 !cap_zlr(iommu->cap))
f76aec76
KA
3901 prot |= DMA_PTE_READ;
3902 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3903 prot |= DMA_PTE_WRITE;
3904
2aac6304 3905 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3906
f532959b 3907 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3908 if (unlikely(ret)) {
e1605495 3909 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3910 start_vpfn + size - 1,
3911 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3912 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3913 return 0;
ba395927
KA
3914 }
3915
ba395927
KA
3916 return nelems;
3917}
3918
02b4da5f 3919static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3920 .alloc = intel_alloc_coherent,
3921 .free = intel_free_coherent,
ba395927
KA
3922 .map_sg = intel_map_sg,
3923 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3924 .map_page = intel_map_page,
3925 .unmap_page = intel_unmap_page,
21d5d27c 3926 .map_resource = intel_map_resource,
9cc0c2af 3927 .unmap_resource = intel_unmap_resource,
fec777c3 3928 .dma_supported = dma_direct_supported,
ba395927
KA
3929};
3930
3931static inline int iommu_domain_cache_init(void)
3932{
3933 int ret = 0;
3934
3935 iommu_domain_cache = kmem_cache_create("iommu_domain",
3936 sizeof(struct dmar_domain),
3937 0,
3938 SLAB_HWCACHE_ALIGN,
3939
3940 NULL);
3941 if (!iommu_domain_cache) {
9f10e5bf 3942 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3943 ret = -ENOMEM;
3944 }
3945
3946 return ret;
3947}
3948
3949static inline int iommu_devinfo_cache_init(void)
3950{
3951 int ret = 0;
3952
3953 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3954 sizeof(struct device_domain_info),
3955 0,
3956 SLAB_HWCACHE_ALIGN,
ba395927
KA
3957 NULL);
3958 if (!iommu_devinfo_cache) {
9f10e5bf 3959 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3960 ret = -ENOMEM;
3961 }
3962
3963 return ret;
3964}
3965
ba395927
KA
3966static int __init iommu_init_mempool(void)
3967{
3968 int ret;
ae1ff3d6 3969 ret = iova_cache_get();
ba395927
KA
3970 if (ret)
3971 return ret;
3972
3973 ret = iommu_domain_cache_init();
3974 if (ret)
3975 goto domain_error;
3976
3977 ret = iommu_devinfo_cache_init();
3978 if (!ret)
3979 return ret;
3980
3981 kmem_cache_destroy(iommu_domain_cache);
3982domain_error:
ae1ff3d6 3983 iova_cache_put();
ba395927
KA
3984
3985 return -ENOMEM;
3986}
3987
3988static void __init iommu_exit_mempool(void)
3989{
3990 kmem_cache_destroy(iommu_devinfo_cache);
3991 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3992 iova_cache_put();
ba395927
KA
3993}
3994
556ab45f
DW
3995static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3996{
3997 struct dmar_drhd_unit *drhd;
3998 u32 vtbar;
3999 int rc;
4000
4001 /* We know that this device on this chipset has its own IOMMU.
4002 * If we find it under a different IOMMU, then the BIOS is lying
4003 * to us. Hope that the IOMMU for this device is actually
4004 * disabled, and it needs no translation...
4005 */
4006 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4007 if (rc) {
4008 /* "can't" happen */
4009 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4010 return;
4011 }
4012 vtbar &= 0xffff0000;
4013
4014 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4015 drhd = dmar_find_matched_drhd_unit(pdev);
4016 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4017 TAINT_FIRMWARE_WORKAROUND,
4018 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4019 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4020}
4021DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4022
ba395927
KA
4023static void __init init_no_remapping_devices(void)
4024{
4025 struct dmar_drhd_unit *drhd;
832bd858 4026 struct device *dev;
b683b230 4027 int i;
ba395927
KA
4028
4029 for_each_drhd_unit(drhd) {
4030 if (!drhd->include_all) {
b683b230
JL
4031 for_each_active_dev_scope(drhd->devices,
4032 drhd->devices_cnt, i, dev)
4033 break;
832bd858 4034 /* ignore DMAR unit if no devices exist */
ba395927
KA
4035 if (i == drhd->devices_cnt)
4036 drhd->ignored = 1;
4037 }
4038 }
4039
7c919779 4040 for_each_active_drhd_unit(drhd) {
7c919779 4041 if (drhd->include_all)
ba395927
KA
4042 continue;
4043
b683b230
JL
4044 for_each_active_dev_scope(drhd->devices,
4045 drhd->devices_cnt, i, dev)
832bd858 4046 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4047 break;
ba395927
KA
4048 if (i < drhd->devices_cnt)
4049 continue;
4050
c0771df8
DW
4051 /* This IOMMU has *only* gfx devices. Either bypass it or
4052 set the gfx_mapped flag, as appropriate */
cf1ec453 4053 if (!dmar_map_gfx) {
c0771df8 4054 drhd->ignored = 1;
b683b230
JL
4055 for_each_active_dev_scope(drhd->devices,
4056 drhd->devices_cnt, i, dev)
832bd858 4057 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4058 }
4059 }
4060}
4061
f59c7b69
FY
4062#ifdef CONFIG_SUSPEND
4063static int init_iommu_hw(void)
4064{
4065 struct dmar_drhd_unit *drhd;
4066 struct intel_iommu *iommu = NULL;
4067
4068 for_each_active_iommu(iommu, drhd)
4069 if (iommu->qi)
4070 dmar_reenable_qi(iommu);
4071
b779260b
JC
4072 for_each_iommu(iommu, drhd) {
4073 if (drhd->ignored) {
4074 /*
4075 * we always have to disable PMRs or DMA may fail on
4076 * this device
4077 */
4078 if (force_on)
4079 iommu_disable_protect_mem_regions(iommu);
4080 continue;
4081 }
095303e0 4082
f59c7b69
FY
4083 iommu_flush_write_buffer(iommu);
4084
4085 iommu_set_root_entry(iommu);
4086
4087 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4088 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4089 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4090 iommu_enable_translation(iommu);
b94996c9 4091 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4092 }
4093
4094 return 0;
4095}
4096
4097static void iommu_flush_all(void)
4098{
4099 struct dmar_drhd_unit *drhd;
4100 struct intel_iommu *iommu;
4101
4102 for_each_active_iommu(iommu, drhd) {
4103 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4104 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4105 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4106 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4107 }
4108}
4109
134fac3f 4110static int iommu_suspend(void)
f59c7b69
FY
4111{
4112 struct dmar_drhd_unit *drhd;
4113 struct intel_iommu *iommu = NULL;
4114 unsigned long flag;
4115
4116 for_each_active_iommu(iommu, drhd) {
6396bb22 4117 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4118 GFP_ATOMIC);
4119 if (!iommu->iommu_state)
4120 goto nomem;
4121 }
4122
4123 iommu_flush_all();
4124
4125 for_each_active_iommu(iommu, drhd) {
4126 iommu_disable_translation(iommu);
4127
1f5b3c3f 4128 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4129
4130 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4131 readl(iommu->reg + DMAR_FECTL_REG);
4132 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4133 readl(iommu->reg + DMAR_FEDATA_REG);
4134 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4135 readl(iommu->reg + DMAR_FEADDR_REG);
4136 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4137 readl(iommu->reg + DMAR_FEUADDR_REG);
4138
1f5b3c3f 4139 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4140 }
4141 return 0;
4142
4143nomem:
4144 for_each_active_iommu(iommu, drhd)
4145 kfree(iommu->iommu_state);
4146
4147 return -ENOMEM;
4148}
4149
134fac3f 4150static void iommu_resume(void)
f59c7b69
FY
4151{
4152 struct dmar_drhd_unit *drhd;
4153 struct intel_iommu *iommu = NULL;
4154 unsigned long flag;
4155
4156 if (init_iommu_hw()) {
b779260b
JC
4157 if (force_on)
4158 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4159 else
4160 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4161 return;
f59c7b69
FY
4162 }
4163
4164 for_each_active_iommu(iommu, drhd) {
4165
1f5b3c3f 4166 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4167
4168 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4169 iommu->reg + DMAR_FECTL_REG);
4170 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4171 iommu->reg + DMAR_FEDATA_REG);
4172 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4173 iommu->reg + DMAR_FEADDR_REG);
4174 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4175 iommu->reg + DMAR_FEUADDR_REG);
4176
1f5b3c3f 4177 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4178 }
4179
4180 for_each_active_iommu(iommu, drhd)
4181 kfree(iommu->iommu_state);
f59c7b69
FY
4182}
4183
134fac3f 4184static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4185 .resume = iommu_resume,
4186 .suspend = iommu_suspend,
4187};
4188
134fac3f 4189static void __init init_iommu_pm_ops(void)
f59c7b69 4190{
134fac3f 4191 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4192}
4193
4194#else
99592ba4 4195static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4196#endif /* CONFIG_PM */
4197
318fe7df 4198
c2a0b538 4199int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4200{
4201 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4202 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4203 struct dmar_rmrr_unit *rmrru;
0659b8dc 4204 size_t length;
318fe7df
SS
4205
4206 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4207 if (!rmrru)
0659b8dc 4208 goto out;
318fe7df
SS
4209
4210 rmrru->hdr = header;
4211 rmrr = (struct acpi_dmar_reserved_memory *)header;
4212 rmrru->base_address = rmrr->base_address;
4213 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4214
4215 length = rmrr->end_address - rmrr->base_address + 1;
4216 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4217 IOMMU_RESV_DIRECT);
4218 if (!rmrru->resv)
4219 goto free_rmrru;
4220
2e455289
JL
4221 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4222 ((void *)rmrr) + rmrr->header.length,
4223 &rmrru->devices_cnt);
0659b8dc
EA
4224 if (rmrru->devices_cnt && rmrru->devices == NULL)
4225 goto free_all;
318fe7df 4226
2e455289 4227 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4228
2e455289 4229 return 0;
0659b8dc
EA
4230free_all:
4231 kfree(rmrru->resv);
4232free_rmrru:
4233 kfree(rmrru);
4234out:
4235 return -ENOMEM;
318fe7df
SS
4236}
4237
6b197249
JL
4238static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4239{
4240 struct dmar_atsr_unit *atsru;
4241 struct acpi_dmar_atsr *tmp;
4242
4243 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4244 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4245 if (atsr->segment != tmp->segment)
4246 continue;
4247 if (atsr->header.length != tmp->header.length)
4248 continue;
4249 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4250 return atsru;
4251 }
4252
4253 return NULL;
4254}
4255
4256int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4257{
4258 struct acpi_dmar_atsr *atsr;
4259 struct dmar_atsr_unit *atsru;
4260
b608fe35 4261 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4262 return 0;
4263
318fe7df 4264 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4265 atsru = dmar_find_atsr(atsr);
4266 if (atsru)
4267 return 0;
4268
4269 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4270 if (!atsru)
4271 return -ENOMEM;
4272
6b197249
JL
4273 /*
4274 * If memory is allocated from slab by ACPI _DSM method, we need to
4275 * copy the memory content because the memory buffer will be freed
4276 * on return.
4277 */
4278 atsru->hdr = (void *)(atsru + 1);
4279 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4280 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4281 if (!atsru->include_all) {
4282 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4283 (void *)atsr + atsr->header.length,
4284 &atsru->devices_cnt);
4285 if (atsru->devices_cnt && atsru->devices == NULL) {
4286 kfree(atsru);
4287 return -ENOMEM;
4288 }
4289 }
318fe7df 4290
0e242612 4291 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4292
4293 return 0;
4294}
4295
9bdc531e
JL
4296static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4297{
4298 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4299 kfree(atsru);
4300}
4301
6b197249
JL
4302int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4303{
4304 struct acpi_dmar_atsr *atsr;
4305 struct dmar_atsr_unit *atsru;
4306
4307 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4308 atsru = dmar_find_atsr(atsr);
4309 if (atsru) {
4310 list_del_rcu(&atsru->list);
4311 synchronize_rcu();
4312 intel_iommu_free_atsr(atsru);
4313 }
4314
4315 return 0;
4316}
4317
4318int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4319{
4320 int i;
4321 struct device *dev;
4322 struct acpi_dmar_atsr *atsr;
4323 struct dmar_atsr_unit *atsru;
4324
4325 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4326 atsru = dmar_find_atsr(atsr);
4327 if (!atsru)
4328 return 0;
4329
194dc870 4330 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4331 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4332 i, dev)
4333 return -EBUSY;
194dc870 4334 }
6b197249
JL
4335
4336 return 0;
4337}
4338
ffebeb46
JL
4339static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4340{
e083ea5b 4341 int sp, ret;
ffebeb46
JL
4342 struct intel_iommu *iommu = dmaru->iommu;
4343
4344 if (g_iommus[iommu->seq_id])
4345 return 0;
4346
4347 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4348 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4349 iommu->name);
4350 return -ENXIO;
4351 }
4352 if (!ecap_sc_support(iommu->ecap) &&
4353 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4354 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4355 iommu->name);
4356 return -ENXIO;
4357 }
4358 sp = domain_update_iommu_superpage(iommu) - 1;
4359 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4360 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4361 iommu->name);
4362 return -ENXIO;
4363 }
4364
4365 /*
4366 * Disable translation if already enabled prior to OS handover.
4367 */
4368 if (iommu->gcmd & DMA_GCMD_TE)
4369 iommu_disable_translation(iommu);
4370
4371 g_iommus[iommu->seq_id] = iommu;
4372 ret = iommu_init_domains(iommu);
4373 if (ret == 0)
4374 ret = iommu_alloc_root_entry(iommu);
4375 if (ret)
4376 goto out;
4377
8a94ade4 4378#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4379 if (pasid_supported(iommu))
d9737953 4380 intel_svm_init(iommu);
8a94ade4
DW
4381#endif
4382
ffebeb46
JL
4383 if (dmaru->ignored) {
4384 /*
4385 * we always have to disable PMRs or DMA may fail on this device
4386 */
4387 if (force_on)
4388 iommu_disable_protect_mem_regions(iommu);
4389 return 0;
4390 }
4391
4392 intel_iommu_init_qi(iommu);
4393 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4394
4395#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4396 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4397 ret = intel_svm_enable_prq(iommu);
4398 if (ret)
4399 goto disable_iommu;
4400 }
4401#endif
ffebeb46
JL
4402 ret = dmar_set_interrupt(iommu);
4403 if (ret)
4404 goto disable_iommu;
4405
4406 iommu_set_root_entry(iommu);
4407 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4408 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4409 iommu_enable_translation(iommu);
4410
ffebeb46
JL
4411 iommu_disable_protect_mem_regions(iommu);
4412 return 0;
4413
4414disable_iommu:
4415 disable_dmar_iommu(iommu);
4416out:
4417 free_dmar_iommu(iommu);
4418 return ret;
4419}
4420
6b197249
JL
4421int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4422{
ffebeb46
JL
4423 int ret = 0;
4424 struct intel_iommu *iommu = dmaru->iommu;
4425
4426 if (!intel_iommu_enabled)
4427 return 0;
4428 if (iommu == NULL)
4429 return -EINVAL;
4430
4431 if (insert) {
4432 ret = intel_iommu_add(dmaru);
4433 } else {
4434 disable_dmar_iommu(iommu);
4435 free_dmar_iommu(iommu);
4436 }
4437
4438 return ret;
6b197249
JL
4439}
4440
9bdc531e
JL
4441static void intel_iommu_free_dmars(void)
4442{
4443 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4444 struct dmar_atsr_unit *atsru, *atsr_n;
4445
4446 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4447 list_del(&rmrru->list);
4448 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4449 kfree(rmrru->resv);
9bdc531e 4450 kfree(rmrru);
318fe7df
SS
4451 }
4452
9bdc531e
JL
4453 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4454 list_del(&atsru->list);
4455 intel_iommu_free_atsr(atsru);
4456 }
318fe7df
SS
4457}
4458
4459int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4460{
b683b230 4461 int i, ret = 1;
318fe7df 4462 struct pci_bus *bus;
832bd858
DW
4463 struct pci_dev *bridge = NULL;
4464 struct device *tmp;
318fe7df
SS
4465 struct acpi_dmar_atsr *atsr;
4466 struct dmar_atsr_unit *atsru;
4467
4468 dev = pci_physfn(dev);
318fe7df 4469 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4470 bridge = bus->self;
d14053b3
DW
4471 /* If it's an integrated device, allow ATS */
4472 if (!bridge)
4473 return 1;
4474 /* Connected via non-PCIe: no ATS */
4475 if (!pci_is_pcie(bridge) ||
62f87c0e 4476 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4477 return 0;
d14053b3 4478 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4479 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4480 break;
318fe7df
SS
4481 }
4482
0e242612 4483 rcu_read_lock();
b5f82ddf
JL
4484 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4485 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4486 if (atsr->segment != pci_domain_nr(dev->bus))
4487 continue;
4488
b683b230 4489 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4490 if (tmp == &bridge->dev)
b683b230 4491 goto out;
b5f82ddf
JL
4492
4493 if (atsru->include_all)
b683b230 4494 goto out;
b5f82ddf 4495 }
b683b230
JL
4496 ret = 0;
4497out:
0e242612 4498 rcu_read_unlock();
318fe7df 4499
b683b230 4500 return ret;
318fe7df
SS
4501}
4502
59ce0515
JL
4503int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4504{
e083ea5b 4505 int ret;
59ce0515
JL
4506 struct dmar_rmrr_unit *rmrru;
4507 struct dmar_atsr_unit *atsru;
4508 struct acpi_dmar_atsr *atsr;
4509 struct acpi_dmar_reserved_memory *rmrr;
4510
b608fe35 4511 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4512 return 0;
4513
4514 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4515 rmrr = container_of(rmrru->hdr,
4516 struct acpi_dmar_reserved_memory, header);
4517 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4518 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4519 ((void *)rmrr) + rmrr->header.length,
4520 rmrr->segment, rmrru->devices,
4521 rmrru->devices_cnt);
e083ea5b 4522 if (ret < 0)
59ce0515 4523 return ret;
e6a8c9b3 4524 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4525 dmar_remove_dev_scope(info, rmrr->segment,
4526 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4527 }
4528 }
4529
4530 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4531 if (atsru->include_all)
4532 continue;
4533
4534 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4535 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4536 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4537 (void *)atsr + atsr->header.length,
4538 atsr->segment, atsru->devices,
4539 atsru->devices_cnt);
4540 if (ret > 0)
4541 break;
e083ea5b 4542 else if (ret < 0)
59ce0515 4543 return ret;
e6a8c9b3 4544 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4545 if (dmar_remove_dev_scope(info, atsr->segment,
4546 atsru->devices, atsru->devices_cnt))
4547 break;
4548 }
4549 }
4550
4551 return 0;
4552}
4553
99dcaded
FY
4554/*
4555 * Here we only respond to action of unbound device from driver.
4556 *
4557 * Added device is not attached to its DMAR domain here yet. That will happen
4558 * when mapping the device to iova.
4559 */
4560static int device_notifier(struct notifier_block *nb,
4561 unsigned long action, void *data)
4562{
4563 struct device *dev = data;
99dcaded
FY
4564 struct dmar_domain *domain;
4565
3d89194a 4566 if (iommu_dummy(dev))
44cd613c
DW
4567 return 0;
4568
117266fd
LB
4569 if (action == BUS_NOTIFY_REMOVED_DEVICE) {
4570 domain = find_domain(dev);
4571 if (!domain)
4572 return 0;
99dcaded 4573
117266fd
LB
4574 dmar_remove_one_dev_info(dev);
4575 if (!domain_type_is_vm_or_si(domain) &&
4576 list_empty(&domain->devices))
4577 domain_exit(domain);
4578 } else if (action == BUS_NOTIFY_ADD_DEVICE) {
4579 if (iommu_should_identity_map(dev, 1))
4580 domain_add_dev_info(si_domain, dev);
4581 }
a97590e5 4582
99dcaded
FY
4583 return 0;
4584}
4585
4586static struct notifier_block device_nb = {
4587 .notifier_call = device_notifier,
4588};
4589
75f05569
JL
4590static int intel_iommu_memory_notifier(struct notifier_block *nb,
4591 unsigned long val, void *v)
4592{
4593 struct memory_notify *mhp = v;
4594 unsigned long long start, end;
4595 unsigned long start_vpfn, last_vpfn;
4596
4597 switch (val) {
4598 case MEM_GOING_ONLINE:
4599 start = mhp->start_pfn << PAGE_SHIFT;
4600 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4601 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4602 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4603 start, end);
4604 return NOTIFY_BAD;
4605 }
4606 break;
4607
4608 case MEM_OFFLINE:
4609 case MEM_CANCEL_ONLINE:
4610 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4611 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4612 while (start_vpfn <= last_vpfn) {
4613 struct iova *iova;
4614 struct dmar_drhd_unit *drhd;
4615 struct intel_iommu *iommu;
ea8ea460 4616 struct page *freelist;
75f05569
JL
4617
4618 iova = find_iova(&si_domain->iovad, start_vpfn);
4619 if (iova == NULL) {
9f10e5bf 4620 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4621 start_vpfn);
4622 break;
4623 }
4624
4625 iova = split_and_remove_iova(&si_domain->iovad, iova,
4626 start_vpfn, last_vpfn);
4627 if (iova == NULL) {
9f10e5bf 4628 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4629 start_vpfn, last_vpfn);
4630 return NOTIFY_BAD;
4631 }
4632
ea8ea460
DW
4633 freelist = domain_unmap(si_domain, iova->pfn_lo,
4634 iova->pfn_hi);
4635
75f05569
JL
4636 rcu_read_lock();
4637 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4638 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4639 iova->pfn_lo, iova_size(iova),
ea8ea460 4640 !freelist, 0);
75f05569 4641 rcu_read_unlock();
ea8ea460 4642 dma_free_pagelist(freelist);
75f05569
JL
4643
4644 start_vpfn = iova->pfn_hi + 1;
4645 free_iova_mem(iova);
4646 }
4647 break;
4648 }
4649
4650 return NOTIFY_OK;
4651}
4652
4653static struct notifier_block intel_iommu_memory_nb = {
4654 .notifier_call = intel_iommu_memory_notifier,
4655 .priority = 0
4656};
4657
22e2f9fa
OP
4658static void free_all_cpu_cached_iovas(unsigned int cpu)
4659{
4660 int i;
4661
4662 for (i = 0; i < g_num_of_iommus; i++) {
4663 struct intel_iommu *iommu = g_iommus[i];
4664 struct dmar_domain *domain;
0caa7616 4665 int did;
22e2f9fa
OP
4666
4667 if (!iommu)
4668 continue;
4669
3bd4f911 4670 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4671 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4672
4673 if (!domain)
4674 continue;
4675 free_cpu_cached_iovas(cpu, &domain->iovad);
4676 }
4677 }
4678}
4679
21647615 4680static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4681{
21647615 4682 free_all_cpu_cached_iovas(cpu);
21647615 4683 return 0;
aa473240
OP
4684}
4685
161b28aa
JR
4686static void intel_disable_iommus(void)
4687{
4688 struct intel_iommu *iommu = NULL;
4689 struct dmar_drhd_unit *drhd;
4690
4691 for_each_iommu(iommu, drhd)
4692 iommu_disable_translation(iommu);
4693}
4694
a7fdb6e6
JR
4695static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4696{
2926a2aa
JR
4697 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4698
4699 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4700}
4701
a5459cfe
AW
4702static ssize_t intel_iommu_show_version(struct device *dev,
4703 struct device_attribute *attr,
4704 char *buf)
4705{
a7fdb6e6 4706 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4707 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4708 return sprintf(buf, "%d:%d\n",
4709 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4710}
4711static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4712
4713static ssize_t intel_iommu_show_address(struct device *dev,
4714 struct device_attribute *attr,
4715 char *buf)
4716{
a7fdb6e6 4717 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4718 return sprintf(buf, "%llx\n", iommu->reg_phys);
4719}
4720static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4721
4722static ssize_t intel_iommu_show_cap(struct device *dev,
4723 struct device_attribute *attr,
4724 char *buf)
4725{
a7fdb6e6 4726 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4727 return sprintf(buf, "%llx\n", iommu->cap);
4728}
4729static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4730
4731static ssize_t intel_iommu_show_ecap(struct device *dev,
4732 struct device_attribute *attr,
4733 char *buf)
4734{
a7fdb6e6 4735 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4736 return sprintf(buf, "%llx\n", iommu->ecap);
4737}
4738static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4739
2238c082
AW
4740static ssize_t intel_iommu_show_ndoms(struct device *dev,
4741 struct device_attribute *attr,
4742 char *buf)
4743{
a7fdb6e6 4744 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4745 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4746}
4747static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4748
4749static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4750 struct device_attribute *attr,
4751 char *buf)
4752{
a7fdb6e6 4753 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4754 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4755 cap_ndoms(iommu->cap)));
4756}
4757static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4758
a5459cfe
AW
4759static struct attribute *intel_iommu_attrs[] = {
4760 &dev_attr_version.attr,
4761 &dev_attr_address.attr,
4762 &dev_attr_cap.attr,
4763 &dev_attr_ecap.attr,
2238c082
AW
4764 &dev_attr_domains_supported.attr,
4765 &dev_attr_domains_used.attr,
a5459cfe
AW
4766 NULL,
4767};
4768
4769static struct attribute_group intel_iommu_group = {
4770 .name = "intel-iommu",
4771 .attrs = intel_iommu_attrs,
4772};
4773
4774const struct attribute_group *intel_iommu_groups[] = {
4775 &intel_iommu_group,
4776 NULL,
4777};
4778
89a6079d
LB
4779static int __init platform_optin_force_iommu(void)
4780{
4781 struct pci_dev *pdev = NULL;
4782 bool has_untrusted_dev = false;
4783
4784 if (!dmar_platform_optin() || no_platform_optin)
4785 return 0;
4786
4787 for_each_pci_dev(pdev) {
4788 if (pdev->untrusted) {
4789 has_untrusted_dev = true;
4790 break;
4791 }
4792 }
4793
4794 if (!has_untrusted_dev)
4795 return 0;
4796
4797 if (no_iommu || dmar_disabled)
4798 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4799
4800 /*
4801 * If Intel-IOMMU is disabled by default, we will apply identity
4802 * map for all devices except those marked as being untrusted.
4803 */
4804 if (dmar_disabled)
4805 iommu_identity_mapping |= IDENTMAP_ALL;
4806
4807 dmar_disabled = 0;
4808#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4809 swiotlb = 0;
4810#endif
4811 no_iommu = 0;
4812
4813 return 1;
4814}
4815
ba395927
KA
4816int __init intel_iommu_init(void)
4817{
9bdc531e 4818 int ret = -ENODEV;
3a93c841 4819 struct dmar_drhd_unit *drhd;
7c919779 4820 struct intel_iommu *iommu;
ba395927 4821
89a6079d
LB
4822 /*
4823 * Intel IOMMU is required for a TXT/tboot launch or platform
4824 * opt in, so enforce that.
4825 */
4826 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4827
3a5670e8
JL
4828 if (iommu_init_mempool()) {
4829 if (force_on)
4830 panic("tboot: Failed to initialize iommu memory\n");
4831 return -ENOMEM;
4832 }
4833
4834 down_write(&dmar_global_lock);
a59b50e9
JC
4835 if (dmar_table_init()) {
4836 if (force_on)
4837 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4838 goto out_free_dmar;
a59b50e9 4839 }
ba395927 4840
c2c7286a 4841 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4842 if (force_on)
4843 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4844 goto out_free_dmar;
a59b50e9 4845 }
1886e8a9 4846
ec154bf5
JR
4847 up_write(&dmar_global_lock);
4848
4849 /*
4850 * The bus notifier takes the dmar_global_lock, so lockdep will
4851 * complain later when we register it under the lock.
4852 */
4853 dmar_register_bus_notifier();
4854
4855 down_write(&dmar_global_lock);
4856
161b28aa 4857 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4858 /*
4859 * We exit the function here to ensure IOMMU's remapping and
4860 * mempool aren't setup, which means that the IOMMU's PMRs
4861 * won't be disabled via the call to init_dmars(). So disable
4862 * it explicitly here. The PMRs were setup by tboot prior to
4863 * calling SENTER, but the kernel is expected to reset/tear
4864 * down the PMRs.
4865 */
4866 if (intel_iommu_tboot_noforce) {
4867 for_each_iommu(iommu, drhd)
4868 iommu_disable_protect_mem_regions(iommu);
4869 }
4870
161b28aa
JR
4871 /*
4872 * Make sure the IOMMUs are switched off, even when we
4873 * boot into a kexec kernel and the previous kernel left
4874 * them enabled
4875 */
4876 intel_disable_iommus();
9bdc531e 4877 goto out_free_dmar;
161b28aa 4878 }
2ae21010 4879
318fe7df 4880 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4881 pr_info("No RMRR found\n");
318fe7df
SS
4882
4883 if (list_empty(&dmar_atsr_units))
9f10e5bf 4884 pr_info("No ATSR found\n");
318fe7df 4885
51a63e67
JC
4886 if (dmar_init_reserved_ranges()) {
4887 if (force_on)
4888 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4889 goto out_free_reserved_range;
51a63e67 4890 }
ba395927 4891
cf1ec453
LB
4892 if (dmar_map_gfx)
4893 intel_iommu_gfx_mapped = 1;
4894
ba395927
KA
4895 init_no_remapping_devices();
4896
b779260b 4897 ret = init_dmars();
ba395927 4898 if (ret) {
a59b50e9
JC
4899 if (force_on)
4900 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4901 pr_err("Initialization failed\n");
9bdc531e 4902 goto out_free_reserved_range;
ba395927 4903 }
3a5670e8 4904 up_write(&dmar_global_lock);
9f10e5bf 4905 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4906
4fac8076 4907#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4908 swiotlb = 0;
4909#endif
19943b0e 4910 dma_ops = &intel_dma_ops;
4ed0d3e6 4911
134fac3f 4912 init_iommu_pm_ops();
a8bcbb0d 4913
39ab9555
JR
4914 for_each_active_iommu(iommu, drhd) {
4915 iommu_device_sysfs_add(&iommu->iommu, NULL,
4916 intel_iommu_groups,
4917 "%s", iommu->name);
4918 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4919 iommu_device_register(&iommu->iommu);
4920 }
a5459cfe 4921
4236d97d 4922 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4923 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4924 if (si_domain && !hw_pass_through)
4925 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4926 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4927 intel_iommu_cpu_dead);
8bc1f85c 4928 intel_iommu_enabled = 1;
ee2636b8 4929 intel_iommu_debugfs_init();
8bc1f85c 4930
ba395927 4931 return 0;
9bdc531e
JL
4932
4933out_free_reserved_range:
4934 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4935out_free_dmar:
4936 intel_iommu_free_dmars();
3a5670e8
JL
4937 up_write(&dmar_global_lock);
4938 iommu_exit_mempool();
9bdc531e 4939 return ret;
ba395927 4940}
e820482c 4941
2452d9db 4942static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4943{
4944 struct intel_iommu *iommu = opaque;
4945
2452d9db 4946 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4947 return 0;
4948}
4949
4950/*
4951 * NB - intel-iommu lacks any sort of reference counting for the users of
4952 * dependent devices. If multiple endpoints have intersecting dependent
4953 * devices, unbinding the driver from any one of them will possibly leave
4954 * the others unable to operate.
4955 */
2452d9db 4956static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4957{
0bcb3e28 4958 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4959 return;
4960
2452d9db 4961 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4962}
4963
127c7615 4964static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4965{
c7151a8d
WH
4966 struct intel_iommu *iommu;
4967 unsigned long flags;
c7151a8d 4968
55d94043
JR
4969 assert_spin_locked(&device_domain_lock);
4970
127c7615 4971 if (WARN_ON(!info))
c7151a8d
WH
4972 return;
4973
127c7615 4974 iommu = info->iommu;
c7151a8d 4975
127c7615 4976 if (info->dev) {
ef848b7e
LB
4977 if (dev_is_pci(info->dev) && sm_supported(iommu))
4978 intel_pasid_tear_down_entry(iommu, info->dev,
4979 PASID_RID2PASID);
4980
127c7615
JR
4981 iommu_disable_dev_iotlb(info);
4982 domain_context_clear(iommu, info->dev);
a7fc93fe 4983 intel_pasid_free_table(info->dev);
127c7615 4984 }
c7151a8d 4985
b608ac3b 4986 unlink_domain_info(info);
c7151a8d 4987
d160aca5 4988 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4989 domain_detach_iommu(info->domain, iommu);
d160aca5 4990 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4991
127c7615 4992 free_devinfo_mem(info);
c7151a8d 4993}
c7151a8d 4994
71753239 4995static void dmar_remove_one_dev_info(struct device *dev)
55d94043 4996{
127c7615 4997 struct device_domain_info *info;
55d94043 4998 unsigned long flags;
3e7abe25 4999
55d94043 5000 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
5001 info = dev->archdata.iommu;
5002 __dmar_remove_one_dev_info(info);
55d94043 5003 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5004}
5005
2c2e2c38 5006static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
5007{
5008 int adjust_width;
5009
aa3ac946 5010 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
5011 domain_reserve_special_ranges(domain);
5012
5013 /* calculate AGAW */
5014 domain->gaw = guest_width;
5015 adjust_width = guestwidth_to_adjustwidth(guest_width);
5016 domain->agaw = width_to_agaw(adjust_width);
5017
5e98c4b1 5018 domain->iommu_coherency = 0;
c5b15255 5019 domain->iommu_snooping = 0;
6dd9a7c7 5020 domain->iommu_superpage = 0;
fe40f1e0 5021 domain->max_addr = 0;
5e98c4b1
WH
5022
5023 /* always allocate the top pgd */
4c923d47 5024 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
5025 if (!domain->pgd)
5026 return -ENOMEM;
5027 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5028 return 0;
5029}
5030
00a77deb 5031static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5032{
5d450806 5033 struct dmar_domain *dmar_domain;
00a77deb
JR
5034 struct iommu_domain *domain;
5035
5036 if (type != IOMMU_DOMAIN_UNMANAGED)
5037 return NULL;
38717946 5038
ab8dfe25 5039 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 5040 if (!dmar_domain) {
9f10e5bf 5041 pr_err("Can't allocate dmar_domain\n");
00a77deb 5042 return NULL;
38717946 5043 }
2c2e2c38 5044 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 5045 pr_err("Domain initialization failed\n");
92d03cc8 5046 domain_exit(dmar_domain);
00a77deb 5047 return NULL;
38717946 5048 }
8140a95d 5049 domain_update_iommu_cap(dmar_domain);
faa3d6f5 5050
00a77deb 5051 domain = &dmar_domain->domain;
8a0e715b
JR
5052 domain->geometry.aperture_start = 0;
5053 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5054 domain->geometry.force_aperture = true;
5055
00a77deb 5056 return domain;
38717946 5057}
38717946 5058
00a77deb 5059static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5060{
00a77deb 5061 domain_exit(to_dmar_domain(domain));
38717946 5062}
38717946 5063
67b8e02b
LB
5064/*
5065 * Check whether a @domain could be attached to the @dev through the
5066 * aux-domain attach/detach APIs.
5067 */
5068static inline bool
5069is_aux_domain(struct device *dev, struct iommu_domain *domain)
5070{
5071 struct device_domain_info *info = dev->archdata.iommu;
5072
5073 return info && info->auxd_enabled &&
5074 domain->type == IOMMU_DOMAIN_UNMANAGED;
5075}
5076
5077static void auxiliary_link_device(struct dmar_domain *domain,
5078 struct device *dev)
5079{
5080 struct device_domain_info *info = dev->archdata.iommu;
5081
5082 assert_spin_locked(&device_domain_lock);
5083 if (WARN_ON(!info))
5084 return;
5085
5086 domain->auxd_refcnt++;
5087 list_add(&domain->auxd, &info->auxiliary_domains);
5088}
5089
5090static void auxiliary_unlink_device(struct dmar_domain *domain,
5091 struct device *dev)
5092{
5093 struct device_domain_info *info = dev->archdata.iommu;
5094
5095 assert_spin_locked(&device_domain_lock);
5096 if (WARN_ON(!info))
5097 return;
5098
5099 list_del(&domain->auxd);
5100 domain->auxd_refcnt--;
5101
5102 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5103 intel_pasid_free_id(domain->default_pasid);
5104}
5105
5106static int aux_domain_add_dev(struct dmar_domain *domain,
5107 struct device *dev)
5108{
5109 int ret;
5110 u8 bus, devfn;
5111 unsigned long flags;
5112 struct intel_iommu *iommu;
5113
5114 iommu = device_to_iommu(dev, &bus, &devfn);
5115 if (!iommu)
5116 return -ENODEV;
5117
5118 if (domain->default_pasid <= 0) {
5119 int pasid;
5120
5121 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5122 pci_max_pasids(to_pci_dev(dev)),
5123 GFP_KERNEL);
5124 if (pasid <= 0) {
5125 pr_err("Can't allocate default pasid\n");
5126 return -ENODEV;
5127 }
5128 domain->default_pasid = pasid;
5129 }
5130
5131 spin_lock_irqsave(&device_domain_lock, flags);
5132 /*
5133 * iommu->lock must be held to attach domain to iommu and setup the
5134 * pasid entry for second level translation.
5135 */
5136 spin_lock(&iommu->lock);
5137 ret = domain_attach_iommu(domain, iommu);
5138 if (ret)
5139 goto attach_failed;
5140
5141 /* Setup the PASID entry for mediated devices: */
5142 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5143 domain->default_pasid);
5144 if (ret)
5145 goto table_failed;
5146 spin_unlock(&iommu->lock);
5147
5148 auxiliary_link_device(domain, dev);
5149
5150 spin_unlock_irqrestore(&device_domain_lock, flags);
5151
5152 return 0;
5153
5154table_failed:
5155 domain_detach_iommu(domain, iommu);
5156attach_failed:
5157 spin_unlock(&iommu->lock);
5158 spin_unlock_irqrestore(&device_domain_lock, flags);
5159 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5160 intel_pasid_free_id(domain->default_pasid);
5161
5162 return ret;
5163}
5164
5165static void aux_domain_remove_dev(struct dmar_domain *domain,
5166 struct device *dev)
5167{
5168 struct device_domain_info *info;
5169 struct intel_iommu *iommu;
5170 unsigned long flags;
5171
5172 if (!is_aux_domain(dev, &domain->domain))
5173 return;
5174
5175 spin_lock_irqsave(&device_domain_lock, flags);
5176 info = dev->archdata.iommu;
5177 iommu = info->iommu;
5178
5179 auxiliary_unlink_device(domain, dev);
5180
5181 spin_lock(&iommu->lock);
5182 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5183 domain_detach_iommu(domain, iommu);
5184 spin_unlock(&iommu->lock);
5185
5186 spin_unlock_irqrestore(&device_domain_lock, flags);
5187}
5188
8cc3759a
LB
5189static int prepare_domain_attach_device(struct iommu_domain *domain,
5190 struct device *dev)
38717946 5191{
00a77deb 5192 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5193 struct intel_iommu *iommu;
5194 int addr_width;
156baca8 5195 u8 bus, devfn;
faa3d6f5 5196
156baca8 5197 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5198 if (!iommu)
5199 return -ENODEV;
5200
5201 /* check if this iommu agaw is sufficient for max mapped address */
5202 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5203 if (addr_width > cap_mgaw(iommu->cap))
5204 addr_width = cap_mgaw(iommu->cap);
5205
5206 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5207 dev_err(dev, "%s: iommu width (%d) is not "
5208 "sufficient for the mapped address (%llx)\n",
5209 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5210 return -EFAULT;
5211 }
a99c47a2
TL
5212 dmar_domain->gaw = addr_width;
5213
5214 /*
5215 * Knock out extra levels of page tables if necessary
5216 */
5217 while (iommu->agaw < dmar_domain->agaw) {
5218 struct dma_pte *pte;
5219
5220 pte = dmar_domain->pgd;
5221 if (dma_pte_present(pte)) {
25cbff16
SY
5222 dmar_domain->pgd = (struct dma_pte *)
5223 phys_to_virt(dma_pte_addr(pte));
7a661013 5224 free_pgtable_page(pte);
a99c47a2
TL
5225 }
5226 dmar_domain->agaw--;
5227 }
fe40f1e0 5228
8cc3759a
LB
5229 return 0;
5230}
5231
5232static int intel_iommu_attach_device(struct iommu_domain *domain,
5233 struct device *dev)
5234{
5235 int ret;
5236
5237 if (device_is_rmrr_locked(dev)) {
5238 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5239 return -EPERM;
5240 }
5241
67b8e02b
LB
5242 if (is_aux_domain(dev, domain))
5243 return -EPERM;
5244
8cc3759a
LB
5245 /* normally dev is not mapped */
5246 if (unlikely(domain_context_mapped(dev))) {
5247 struct dmar_domain *old_domain;
5248
5249 old_domain = find_domain(dev);
5250 if (old_domain) {
5251 rcu_read_lock();
5252 dmar_remove_one_dev_info(dev);
5253 rcu_read_unlock();
5254
5255 if (!domain_type_is_vm_or_si(old_domain) &&
5256 list_empty(&old_domain->devices))
5257 domain_exit(old_domain);
5258 }
5259 }
5260
5261 ret = prepare_domain_attach_device(domain, dev);
5262 if (ret)
5263 return ret;
5264
5265 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5266}
38717946 5267
67b8e02b
LB
5268static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5269 struct device *dev)
5270{
5271 int ret;
5272
5273 if (!is_aux_domain(dev, domain))
5274 return -EPERM;
5275
5276 ret = prepare_domain_attach_device(domain, dev);
5277 if (ret)
5278 return ret;
5279
5280 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5281}
5282
4c5478c9
JR
5283static void intel_iommu_detach_device(struct iommu_domain *domain,
5284 struct device *dev)
38717946 5285{
71753239 5286 dmar_remove_one_dev_info(dev);
faa3d6f5 5287}
c7151a8d 5288
67b8e02b
LB
5289static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5290 struct device *dev)
5291{
5292 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5293}
5294
b146a1c9
JR
5295static int intel_iommu_map(struct iommu_domain *domain,
5296 unsigned long iova, phys_addr_t hpa,
5009065d 5297 size_t size, int iommu_prot)
faa3d6f5 5298{
00a77deb 5299 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5300 u64 max_addr;
dde57a21 5301 int prot = 0;
faa3d6f5 5302 int ret;
fe40f1e0 5303
dde57a21
JR
5304 if (iommu_prot & IOMMU_READ)
5305 prot |= DMA_PTE_READ;
5306 if (iommu_prot & IOMMU_WRITE)
5307 prot |= DMA_PTE_WRITE;
9cf06697
SY
5308 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5309 prot |= DMA_PTE_SNP;
dde57a21 5310
163cc52c 5311 max_addr = iova + size;
dde57a21 5312 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5313 u64 end;
5314
5315 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5316 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5317 if (end < max_addr) {
9f10e5bf 5318 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5319 "sufficient for the mapped address (%llx)\n",
8954da1f 5320 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5321 return -EFAULT;
5322 }
dde57a21 5323 dmar_domain->max_addr = max_addr;
fe40f1e0 5324 }
ad051221
DW
5325 /* Round up size to next multiple of PAGE_SIZE, if it and
5326 the low bits of hpa would take us onto the next page */
88cb6a74 5327 size = aligned_nrpages(hpa, size);
ad051221
DW
5328 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5329 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5330 return ret;
38717946 5331}
38717946 5332
5009065d 5333static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5334 unsigned long iova, size_t size)
38717946 5335{
00a77deb 5336 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5337 struct page *freelist = NULL;
ea8ea460
DW
5338 unsigned long start_pfn, last_pfn;
5339 unsigned int npages;
42e8c186 5340 int iommu_id, level = 0;
5cf0a76f
DW
5341
5342 /* Cope with horrid API which requires us to unmap more than the
5343 size argument if it happens to be a large-page mapping. */
dc02e46e 5344 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5345
5346 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5347 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5348
ea8ea460
DW
5349 start_pfn = iova >> VTD_PAGE_SHIFT;
5350 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5351
5352 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5353
5354 npages = last_pfn - start_pfn + 1;
5355
f746a025 5356 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5357 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5358 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5359
5360 dma_free_pagelist(freelist);
fe40f1e0 5361
163cc52c
DW
5362 if (dmar_domain->max_addr == iova + size)
5363 dmar_domain->max_addr = iova;
b146a1c9 5364
5cf0a76f 5365 return size;
38717946 5366}
38717946 5367
d14d6577 5368static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5369 dma_addr_t iova)
38717946 5370{
00a77deb 5371 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5372 struct dma_pte *pte;
5cf0a76f 5373 int level = 0;
faa3d6f5 5374 u64 phys = 0;
38717946 5375
5cf0a76f 5376 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5377 if (pte)
faa3d6f5 5378 phys = dma_pte_addr(pte);
38717946 5379
faa3d6f5 5380 return phys;
38717946 5381}
a8bcbb0d 5382
95587a75
LB
5383static inline bool scalable_mode_support(void)
5384{
5385 struct dmar_drhd_unit *drhd;
5386 struct intel_iommu *iommu;
5387 bool ret = true;
5388
5389 rcu_read_lock();
5390 for_each_active_iommu(iommu, drhd) {
5391 if (!sm_supported(iommu)) {
5392 ret = false;
5393 break;
5394 }
5395 }
5396 rcu_read_unlock();
5397
5398 return ret;
5399}
5400
5401static inline bool iommu_pasid_support(void)
5402{
5403 struct dmar_drhd_unit *drhd;
5404 struct intel_iommu *iommu;
5405 bool ret = true;
5406
5407 rcu_read_lock();
5408 for_each_active_iommu(iommu, drhd) {
5409 if (!pasid_supported(iommu)) {
5410 ret = false;
5411 break;
5412 }
5413 }
5414 rcu_read_unlock();
5415
5416 return ret;
5417}
5418
5d587b8d 5419static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5420{
dbb9fd86 5421 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5422 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5423 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5424 return irq_remapping_enabled == 1;
dbb9fd86 5425
5d587b8d 5426 return false;
dbb9fd86
SY
5427}
5428
abdfdde2
AW
5429static int intel_iommu_add_device(struct device *dev)
5430{
a5459cfe 5431 struct intel_iommu *iommu;
abdfdde2 5432 struct iommu_group *group;
156baca8 5433 u8 bus, devfn;
70ae6f0d 5434
a5459cfe
AW
5435 iommu = device_to_iommu(dev, &bus, &devfn);
5436 if (!iommu)
70ae6f0d
AW
5437 return -ENODEV;
5438
e3d10af1 5439 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5440
e17f9ff4 5441 group = iommu_group_get_for_dev(dev);
783f157b 5442
e17f9ff4
AW
5443 if (IS_ERR(group))
5444 return PTR_ERR(group);
bcb71abe 5445
abdfdde2 5446 iommu_group_put(group);
e17f9ff4 5447 return 0;
abdfdde2 5448}
70ae6f0d 5449
abdfdde2
AW
5450static void intel_iommu_remove_device(struct device *dev)
5451{
a5459cfe
AW
5452 struct intel_iommu *iommu;
5453 u8 bus, devfn;
5454
5455 iommu = device_to_iommu(dev, &bus, &devfn);
5456 if (!iommu)
5457 return;
5458
abdfdde2 5459 iommu_group_remove_device(dev);
a5459cfe 5460
e3d10af1 5461 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5462}
5463
0659b8dc
EA
5464static void intel_iommu_get_resv_regions(struct device *device,
5465 struct list_head *head)
5466{
5467 struct iommu_resv_region *reg;
5468 struct dmar_rmrr_unit *rmrr;
5469 struct device *i_dev;
5470 int i;
5471
5472 rcu_read_lock();
5473 for_each_rmrr_units(rmrr) {
5474 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5475 i, i_dev) {
5476 if (i_dev != device)
5477 continue;
5478
5479 list_add_tail(&rmrr->resv->list, head);
5480 }
5481 }
5482 rcu_read_unlock();
5483
5484 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5485 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5486 0, IOMMU_RESV_MSI);
0659b8dc
EA
5487 if (!reg)
5488 return;
5489 list_add_tail(&reg->list, head);
5490}
5491
5492static void intel_iommu_put_resv_regions(struct device *dev,
5493 struct list_head *head)
5494{
5495 struct iommu_resv_region *entry, *next;
5496
5497 list_for_each_entry_safe(entry, next, head, list) {
198bc325 5498 if (entry->type == IOMMU_RESV_MSI)
0659b8dc
EA
5499 kfree(entry);
5500 }
70ae6f0d
AW
5501}
5502
d7cbc0f3 5503int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5504{
5505 struct device_domain_info *info;
5506 struct context_entry *context;
5507 struct dmar_domain *domain;
5508 unsigned long flags;
5509 u64 ctx_lo;
5510 int ret;
5511
d7cbc0f3 5512 domain = get_valid_domain_for_dev(dev);
2f26e0a9
DW
5513 if (!domain)
5514 return -EINVAL;
5515
5516 spin_lock_irqsave(&device_domain_lock, flags);
5517 spin_lock(&iommu->lock);
5518
5519 ret = -EINVAL;
d7cbc0f3 5520 info = dev->archdata.iommu;
2f26e0a9
DW
5521 if (!info || !info->pasid_supported)
5522 goto out;
5523
5524 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5525 if (WARN_ON(!context))
5526 goto out;
5527
5528 ctx_lo = context[0].lo;
5529
2f26e0a9 5530 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5531 ctx_lo |= CONTEXT_PASIDE;
5532 context[0].lo = ctx_lo;
5533 wmb();
d7cbc0f3
LB
5534 iommu->flush.flush_context(iommu,
5535 domain->iommu_did[iommu->seq_id],
5536 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5537 DMA_CCMD_MASK_NOBIT,
5538 DMA_CCMD_DEVICE_INVL);
5539 }
5540
5541 /* Enable PASID support in the device, if it wasn't already */
5542 if (!info->pasid_enabled)
5543 iommu_enable_dev_iotlb(info);
5544
2f26e0a9
DW
5545 ret = 0;
5546
5547 out:
5548 spin_unlock(&iommu->lock);
5549 spin_unlock_irqrestore(&device_domain_lock, flags);
5550
5551 return ret;
5552}
5553
d7cbc0f3 5554#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5555struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5556{
5557 struct intel_iommu *iommu;
5558 u8 bus, devfn;
5559
5560 if (iommu_dummy(dev)) {
5561 dev_warn(dev,
5562 "No IOMMU translation for device; cannot enable SVM\n");
5563 return NULL;
5564 }
5565
5566 iommu = device_to_iommu(dev, &bus, &devfn);
5567 if ((!iommu)) {
b9997e38 5568 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5569 return NULL;
5570 }
5571
2f26e0a9
DW
5572 return iommu;
5573}
5574#endif /* CONFIG_INTEL_IOMMU_SVM */
5575
95587a75
LB
5576static int intel_iommu_enable_auxd(struct device *dev)
5577{
5578 struct device_domain_info *info;
5579 struct intel_iommu *iommu;
5580 unsigned long flags;
5581 u8 bus, devfn;
5582 int ret;
5583
5584 iommu = device_to_iommu(dev, &bus, &devfn);
5585 if (!iommu || dmar_disabled)
5586 return -EINVAL;
5587
5588 if (!sm_supported(iommu) || !pasid_supported(iommu))
5589 return -EINVAL;
5590
5591 ret = intel_iommu_enable_pasid(iommu, dev);
5592 if (ret)
5593 return -ENODEV;
5594
5595 spin_lock_irqsave(&device_domain_lock, flags);
5596 info = dev->archdata.iommu;
5597 info->auxd_enabled = 1;
5598 spin_unlock_irqrestore(&device_domain_lock, flags);
5599
5600 return 0;
5601}
5602
5603static int intel_iommu_disable_auxd(struct device *dev)
5604{
5605 struct device_domain_info *info;
5606 unsigned long flags;
5607
5608 spin_lock_irqsave(&device_domain_lock, flags);
5609 info = dev->archdata.iommu;
5610 if (!WARN_ON(!info))
5611 info->auxd_enabled = 0;
5612 spin_unlock_irqrestore(&device_domain_lock, flags);
5613
5614 return 0;
5615}
5616
5617/*
5618 * A PCI express designated vendor specific extended capability is defined
5619 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5620 * for system software and tools to detect endpoint devices supporting the
5621 * Intel scalable IO virtualization without host driver dependency.
5622 *
5623 * Returns the address of the matching extended capability structure within
5624 * the device's PCI configuration space or 0 if the device does not support
5625 * it.
5626 */
5627static int siov_find_pci_dvsec(struct pci_dev *pdev)
5628{
5629 int pos;
5630 u16 vendor, id;
5631
5632 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5633 while (pos) {
5634 pci_read_config_word(pdev, pos + 4, &vendor);
5635 pci_read_config_word(pdev, pos + 8, &id);
5636 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5637 return pos;
5638
5639 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5640 }
5641
5642 return 0;
5643}
5644
5645static bool
5646intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5647{
5648 if (feat == IOMMU_DEV_FEAT_AUX) {
5649 int ret;
5650
5651 if (!dev_is_pci(dev) || dmar_disabled ||
5652 !scalable_mode_support() || !iommu_pasid_support())
5653 return false;
5654
5655 ret = pci_pasid_features(to_pci_dev(dev));
5656 if (ret < 0)
5657 return false;
5658
5659 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5660 }
5661
5662 return false;
5663}
5664
5665static int
5666intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5667{
5668 if (feat == IOMMU_DEV_FEAT_AUX)
5669 return intel_iommu_enable_auxd(dev);
5670
5671 return -ENODEV;
5672}
5673
5674static int
5675intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5676{
5677 if (feat == IOMMU_DEV_FEAT_AUX)
5678 return intel_iommu_disable_auxd(dev);
5679
5680 return -ENODEV;
5681}
5682
5683static bool
5684intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5685{
5686 struct device_domain_info *info = dev->archdata.iommu;
5687
5688 if (feat == IOMMU_DEV_FEAT_AUX)
5689 return scalable_mode_support() && info && info->auxd_enabled;
5690
5691 return false;
5692}
5693
0e8000f8
LB
5694static int
5695intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5696{
5697 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5698
5699 return dmar_domain->default_pasid > 0 ?
5700 dmar_domain->default_pasid : -EINVAL;
5701}
5702
b0119e87 5703const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5704 .capable = intel_iommu_capable,
5705 .domain_alloc = intel_iommu_domain_alloc,
5706 .domain_free = intel_iommu_domain_free,
5707 .attach_dev = intel_iommu_attach_device,
5708 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5709 .aux_attach_dev = intel_iommu_aux_attach_device,
5710 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5711 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5712 .map = intel_iommu_map,
5713 .unmap = intel_iommu_unmap,
0659b8dc
EA
5714 .iova_to_phys = intel_iommu_iova_to_phys,
5715 .add_device = intel_iommu_add_device,
5716 .remove_device = intel_iommu_remove_device,
5717 .get_resv_regions = intel_iommu_get_resv_regions,
5718 .put_resv_regions = intel_iommu_put_resv_regions,
5719 .device_group = pci_device_group,
95587a75
LB
5720 .dev_has_feat = intel_iommu_dev_has_feat,
5721 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5722 .dev_enable_feat = intel_iommu_dev_enable_feat,
5723 .dev_disable_feat = intel_iommu_dev_disable_feat,
0659b8dc 5724 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5725};
9af88143 5726
9452618e
DV
5727static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5728{
5729 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5730 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5731 dmar_map_gfx = 0;
5732}
5733
5734DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5735DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5736DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5737DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5738DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5739DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5740DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5741
d34d6517 5742static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5743{
5744 /*
5745 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5746 * but needs it. Same seems to hold for the desktop versions.
9af88143 5747 */
932a6523 5748 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5749 rwbf_quirk = 1;
5750}
5751
5752DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5753DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5754DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5755DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5756DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5757DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5758DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5759
eecfd57f
AJ
5760#define GGC 0x52
5761#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5762#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5763#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5764#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5765#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5766#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5767#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5768#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5769
d34d6517 5770static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5771{
5772 unsigned short ggc;
5773
eecfd57f 5774 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5775 return;
5776
eecfd57f 5777 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5778 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5779 dmar_map_gfx = 0;
6fbcfb3e
DW
5780 } else if (dmar_map_gfx) {
5781 /* we have to ensure the gfx device is idle before we flush */
932a6523 5782 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5783 intel_iommu_strict = 1;
5784 }
9eecabcb
DW
5785}
5786DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5787DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5788DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5789DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5790
e0fc7e0b
DW
5791/* On Tylersburg chipsets, some BIOSes have been known to enable the
5792 ISOCH DMAR unit for the Azalia sound device, but not give it any
5793 TLB entries, which causes it to deadlock. Check for that. We do
5794 this in a function called from init_dmars(), instead of in a PCI
5795 quirk, because we don't want to print the obnoxious "BIOS broken"
5796 message if VT-d is actually disabled.
5797*/
5798static void __init check_tylersburg_isoch(void)
5799{
5800 struct pci_dev *pdev;
5801 uint32_t vtisochctrl;
5802
5803 /* If there's no Azalia in the system anyway, forget it. */
5804 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5805 if (!pdev)
5806 return;
5807 pci_dev_put(pdev);
5808
5809 /* System Management Registers. Might be hidden, in which case
5810 we can't do the sanity check. But that's OK, because the
5811 known-broken BIOSes _don't_ actually hide it, so far. */
5812 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5813 if (!pdev)
5814 return;
5815
5816 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5817 pci_dev_put(pdev);
5818 return;
5819 }
5820
5821 pci_dev_put(pdev);
5822
5823 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5824 if (vtisochctrl & 1)
5825 return;
5826
5827 /* Drop all bits other than the number of TLB entries */
5828 vtisochctrl &= 0x1c;
5829
5830 /* If we have the recommended number of TLB entries (16), fine. */
5831 if (vtisochctrl == 0x10)
5832 return;
5833
5834 /* Zero TLB entries? You get to ride the short bus to school. */
5835 if (!vtisochctrl) {
5836 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5837 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5838 dmi_get_system_info(DMI_BIOS_VENDOR),
5839 dmi_get_system_info(DMI_BIOS_VERSION),
5840 dmi_get_system_info(DMI_PRODUCT_VERSION));
5841 iommu_identity_mapping |= IDENTMAP_AZALIA;
5842 return;
5843 }
9f10e5bf
JR
5844
5845 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5846 vtisochctrl);
5847}