]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Check whether device requires bounce buffer
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
2025cf9e 1// SPDX-License-Identifier: GPL-2.0-only
ba395927 2/*
ea8ea460 3 * Copyright © 2006-2014 Intel Corporation.
ba395927 4 *
ea8ea460
DW
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 10 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
11 */
12
9f10e5bf 13#define pr_fmt(fmt) "DMAR: " fmt
932a6523 14#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 15
ba395927
KA
16#include <linux/init.h>
17#include <linux/bitmap.h>
5e0d2a6f 18#include <linux/debugfs.h>
54485c30 19#include <linux/export.h>
ba395927
KA
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
ba395927
KA
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
75f05569 28#include <linux/memory.h>
aa473240 29#include <linux/cpu.h>
5e0d2a6f 30#include <linux/timer.h>
dfddb969 31#include <linux/io.h>
38717946 32#include <linux/iova.h>
5d450806 33#include <linux/iommu.h>
38717946 34#include <linux/intel-iommu.h>
134fac3f 35#include <linux/syscore_ops.h>
69575d38 36#include <linux/tboot.h>
adb2fe02 37#include <linux/dmi.h>
5cdede24 38#include <linux/pci-ats.h>
0ee332c1 39#include <linux/memblock.h>
36746436 40#include <linux/dma-contiguous.h>
fec777c3 41#include <linux/dma-direct.h>
091d42e4 42#include <linux/crash_dump.h>
98fa15f3 43#include <linux/numa.h>
8a8f422d 44#include <asm/irq_remapping.h>
ba395927 45#include <asm/cacheflush.h>
46a7fa27 46#include <asm/iommu.h>
ba395927 47
078e1ee2 48#include "irq_remapping.h"
56283174 49#include "intel-pasid.h"
078e1ee2 50
5b6985ce
FY
51#define ROOT_SIZE VTD_PAGE_SIZE
52#define CONTEXT_SIZE VTD_PAGE_SIZE
53
ba395927 54#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 55#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
5e3b4a15 63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 64
4ed0d3e6 65#define MAX_AGAW_WIDTH 64
5c645b35 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 67
2ebe3151
DW
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 76
1b722500
RM
77/* IO virtual address start page frame number */
78#define IOVA_START_PFN (1)
79
f27be03b 80#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 81
df08cdc7
AM
82/* page table handling */
83#define LEVEL_STRIDE (9)
84#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
85
6d1c56a9
OBC
86/*
87 * This bitmap is used to advertise the page sizes our hardware support
88 * to the IOMMU core, which will then use this information to split
89 * physically contiguous memory regions it is mapping into page sizes
90 * that we support.
91 *
92 * Traditionally the IOMMU core just handed us the mappings directly,
93 * after making sure the size is an order of a 4KiB page and that the
94 * mapping has natural alignment.
95 *
96 * To retain this behavior, we currently advertise that we support
97 * all page sizes that are an order of 4KiB.
98 *
99 * If at some point we'd like to utilize the IOMMU core's new behavior,
100 * we could change this to advertise the real page sizes we support.
101 */
102#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
103
df08cdc7
AM
104static inline int agaw_to_level(int agaw)
105{
106 return agaw + 2;
107}
108
109static inline int agaw_to_width(int agaw)
110{
5c645b35 111 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
112}
113
114static inline int width_to_agaw(int width)
115{
5c645b35 116 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
117}
118
119static inline unsigned int level_to_offset_bits(int level)
120{
121 return (level - 1) * LEVEL_STRIDE;
122}
123
124static inline int pfn_level_offset(unsigned long pfn, int level)
125{
126 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
127}
128
129static inline unsigned long level_mask(int level)
130{
131 return -1UL << level_to_offset_bits(level);
132}
133
134static inline unsigned long level_size(int level)
135{
136 return 1UL << level_to_offset_bits(level);
137}
138
139static inline unsigned long align_to_level(unsigned long pfn, int level)
140{
141 return (pfn + level_size(level) - 1) & level_mask(level);
142}
fd18de50 143
6dd9a7c7
YS
144static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
145{
5c645b35 146 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
147}
148
dd4e8319
DW
149/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
150 are never going to work. */
151static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
152{
153 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
154}
155
156static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
157{
158 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
159}
160static inline unsigned long page_to_dma_pfn(struct page *pg)
161{
162 return mm_to_dma_pfn(page_to_pfn(pg));
163}
164static inline unsigned long virt_to_dma_pfn(void *p)
165{
166 return page_to_dma_pfn(virt_to_page(p));
167}
168
d9630fe9
WH
169/* global iommu list, set NULL for ignored DMAR units */
170static struct intel_iommu **g_iommus;
171
e0fc7e0b 172static void __init check_tylersburg_isoch(void);
9af88143
DW
173static int rwbf_quirk;
174
b779260b
JC
175/*
176 * set to 1 to panic kernel if can't successfully enable VT-d
177 * (used when kernel is launched w/ TXT)
178 */
179static int force_on = 0;
bfd20f1c 180int intel_iommu_tboot_noforce;
89a6079d 181static int no_platform_optin;
b779260b 182
46b08e1a 183#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 184
091d42e4
JR
185/*
186 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
187 * if marked present.
188 */
189static phys_addr_t root_entry_lctp(struct root_entry *re)
190{
191 if (!(re->lo & 1))
192 return 0;
193
194 return re->lo & VTD_PAGE_MASK;
195}
196
197/*
198 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_uctp(struct root_entry *re)
202{
203 if (!(re->hi & 1))
204 return 0;
46b08e1a 205
091d42e4
JR
206 return re->hi & VTD_PAGE_MASK;
207}
c07e7d21 208
cf484d0e
JR
209static inline void context_clear_pasid_enable(struct context_entry *context)
210{
211 context->lo &= ~(1ULL << 11);
212}
213
214static inline bool context_pasid_enabled(struct context_entry *context)
215{
216 return !!(context->lo & (1ULL << 11));
217}
218
219static inline void context_set_copied(struct context_entry *context)
220{
221 context->hi |= (1ull << 3);
222}
223
224static inline bool context_copied(struct context_entry *context)
225{
226 return !!(context->hi & (1ULL << 3));
227}
228
229static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
230{
231 return (context->lo & 1);
232}
cf484d0e 233
26b86092 234bool context_present(struct context_entry *context)
cf484d0e
JR
235{
236 return context_pasid_enabled(context) ?
237 __context_present(context) :
238 __context_present(context) && !context_copied(context);
239}
240
c07e7d21
MM
241static inline void context_set_present(struct context_entry *context)
242{
243 context->lo |= 1;
244}
245
246static inline void context_set_fault_enable(struct context_entry *context)
247{
248 context->lo &= (((u64)-1) << 2) | 1;
249}
250
c07e7d21
MM
251static inline void context_set_translation_type(struct context_entry *context,
252 unsigned long value)
253{
254 context->lo &= (((u64)-1) << 4) | 3;
255 context->lo |= (value & 3) << 2;
256}
257
258static inline void context_set_address_root(struct context_entry *context,
259 unsigned long value)
260{
1a2262f9 261 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
262 context->lo |= value & VTD_PAGE_MASK;
263}
264
265static inline void context_set_address_width(struct context_entry *context,
266 unsigned long value)
267{
268 context->hi |= value & 7;
269}
270
271static inline void context_set_domain_id(struct context_entry *context,
272 unsigned long value)
273{
274 context->hi |= (value & ((1 << 16) - 1)) << 8;
275}
276
dbcd861f
JR
277static inline int context_domain_id(struct context_entry *c)
278{
279 return((c->hi >> 8) & 0xffff);
280}
281
c07e7d21
MM
282static inline void context_clear_entry(struct context_entry *context)
283{
284 context->lo = 0;
285 context->hi = 0;
286}
7a8fc25e 287
2c2e2c38
FY
288/*
289 * This domain is a statically identity mapping domain.
290 * 1. This domain creats a static 1:1 mapping to all usable memory.
291 * 2. It maps to each iommu if successful.
292 * 3. Each iommu mapps to this domain if successful.
293 */
19943b0e
DW
294static struct dmar_domain *si_domain;
295static int hw_pass_through = 1;
2c2e2c38 296
2c2e2c38 297/* si_domain contains mulitple devices */
fa954e68 298#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
2c2e2c38 299
942067f1
LB
300/*
301 * This is a DMA domain allocated through the iommu domain allocation
302 * interface. But one or more devices belonging to this domain have
303 * been chosen to use a private domain. We should avoid to use the
304 * map/unmap/iova_to_phys APIs on it.
305 */
306#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
307
29a27719
JR
308#define for_each_domain_iommu(idx, domain) \
309 for (idx = 0; idx < g_num_of_iommus; idx++) \
310 if (domain->iommu_refcnt[idx])
311
b94e4117
JL
312struct dmar_rmrr_unit {
313 struct list_head list; /* list of rmrr units */
314 struct acpi_dmar_header *hdr; /* ACPI header */
315 u64 base_address; /* reserved base address*/
316 u64 end_address; /* reserved end address */
832bd858 317 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
318 int devices_cnt; /* target device count */
319};
320
321struct dmar_atsr_unit {
322 struct list_head list; /* list of ATSR units */
323 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 324 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
325 int devices_cnt; /* target device count */
326 u8 include_all:1; /* include all ports */
327};
328
329static LIST_HEAD(dmar_atsr_units);
330static LIST_HEAD(dmar_rmrr_units);
331
332#define for_each_rmrr_units(rmrr) \
333 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
334
5e0d2a6f 335/* bitmap for indexing intel_iommus */
5e0d2a6f 336static int g_num_of_iommus;
337
92d03cc8 338static void domain_exit(struct dmar_domain *domain);
ba395927 339static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 340static void dmar_remove_one_dev_info(struct device *dev);
127c7615 341static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2a46ddf7
JL
342static int domain_detach_iommu(struct dmar_domain *domain,
343 struct intel_iommu *iommu);
4de354ec 344static bool device_is_rmrr_locked(struct device *dev);
8af46c78
LB
345static int intel_iommu_attach_device(struct iommu_domain *domain,
346 struct device *dev);
ba395927 347
d3f13810 348#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
349int dmar_disabled = 0;
350#else
351int dmar_disabled = 1;
d3f13810 352#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 353
cdd3a249 354int intel_iommu_sm;
8bc1f85c
ED
355int intel_iommu_enabled = 0;
356EXPORT_SYMBOL_GPL(intel_iommu_enabled);
357
2d9e667e 358static int dmar_map_gfx = 1;
7d3b03ce 359static int dmar_forcedac;
5e0d2a6f 360static int intel_iommu_strict;
6dd9a7c7 361static int intel_iommu_superpage = 1;
ae853ddb 362static int iommu_identity_mapping;
e5e04d05 363static int intel_no_bounce;
c83b2f20 364
ae853ddb
DW
365#define IDENTMAP_ALL 1
366#define IDENTMAP_GFX 2
367#define IDENTMAP_AZALIA 4
c83b2f20 368
c0771df8
DW
369int intel_iommu_gfx_mapped;
370EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
371
ba395927 372#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
8af46c78 373#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
ba395927
KA
374static DEFINE_SPINLOCK(device_domain_lock);
375static LIST_HEAD(device_domain_list);
376
e5e04d05
LB
377#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
378 to_pci_dev(d)->untrusted)
379
85319dcc
LB
380/*
381 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 382 * callback @fn against each element.
85319dcc
LB
383 */
384int for_each_device_domain(int (*fn)(struct device_domain_info *info,
385 void *data), void *data)
386{
387 int ret = 0;
0bbeb01a 388 unsigned long flags;
85319dcc
LB
389 struct device_domain_info *info;
390
0bbeb01a 391 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
392 list_for_each_entry(info, &device_domain_list, global) {
393 ret = fn(info, data);
0bbeb01a
LB
394 if (ret) {
395 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 396 return ret;
0bbeb01a 397 }
85319dcc 398 }
0bbeb01a 399 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
400
401 return 0;
402}
403
b0119e87 404const struct iommu_ops intel_iommu_ops;
a8bcbb0d 405
4158c2ec
JR
406static bool translation_pre_enabled(struct intel_iommu *iommu)
407{
408 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
409}
410
091d42e4
JR
411static void clear_translation_pre_enabled(struct intel_iommu *iommu)
412{
413 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
414}
415
4158c2ec
JR
416static void init_translation_status(struct intel_iommu *iommu)
417{
418 u32 gsts;
419
420 gsts = readl(iommu->reg + DMAR_GSTS_REG);
421 if (gsts & DMA_GSTS_TES)
422 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
423}
424
00a77deb
JR
425/* Convert generic 'struct iommu_domain to private struct dmar_domain */
426static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
427{
428 return container_of(dom, struct dmar_domain, domain);
429}
430
ba395927
KA
431static int __init intel_iommu_setup(char *str)
432{
433 if (!str)
434 return -EINVAL;
435 while (*str) {
0cd5c3c8
KM
436 if (!strncmp(str, "on", 2)) {
437 dmar_disabled = 0;
9f10e5bf 438 pr_info("IOMMU enabled\n");
0cd5c3c8 439 } else if (!strncmp(str, "off", 3)) {
ba395927 440 dmar_disabled = 1;
89a6079d 441 no_platform_optin = 1;
9f10e5bf 442 pr_info("IOMMU disabled\n");
ba395927
KA
443 } else if (!strncmp(str, "igfx_off", 8)) {
444 dmar_map_gfx = 0;
9f10e5bf 445 pr_info("Disable GFX device mapping\n");
7d3b03ce 446 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 447 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 448 dmar_forcedac = 1;
5e0d2a6f 449 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 450 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 451 intel_iommu_strict = 1;
6dd9a7c7 452 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 453 pr_info("Disable supported super page\n");
6dd9a7c7 454 intel_iommu_superpage = 0;
8950dcd8
LB
455 } else if (!strncmp(str, "sm_on", 5)) {
456 pr_info("Intel-IOMMU: scalable mode supported\n");
457 intel_iommu_sm = 1;
bfd20f1c
SL
458 } else if (!strncmp(str, "tboot_noforce", 13)) {
459 printk(KERN_INFO
460 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
461 intel_iommu_tboot_noforce = 1;
e5e04d05
LB
462 } else if (!strncmp(str, "nobounce", 8)) {
463 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
464 intel_no_bounce = 1;
ba395927
KA
465 }
466
467 str += strcspn(str, ",");
468 while (*str == ',')
469 str++;
470 }
471 return 0;
472}
473__setup("intel_iommu=", intel_iommu_setup);
474
475static struct kmem_cache *iommu_domain_cache;
476static struct kmem_cache *iommu_devinfo_cache;
ba395927 477
9452d5bf
JR
478static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
479{
8bf47816
JR
480 struct dmar_domain **domains;
481 int idx = did >> 8;
482
483 domains = iommu->domains[idx];
484 if (!domains)
485 return NULL;
486
487 return domains[did & 0xff];
9452d5bf
JR
488}
489
490static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
491 struct dmar_domain *domain)
492{
8bf47816
JR
493 struct dmar_domain **domains;
494 int idx = did >> 8;
495
496 if (!iommu->domains[idx]) {
497 size_t size = 256 * sizeof(struct dmar_domain *);
498 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
499 }
500
501 domains = iommu->domains[idx];
502 if (WARN_ON(!domains))
503 return;
504 else
505 domains[did & 0xff] = domain;
9452d5bf
JR
506}
507
9ddbfb42 508void *alloc_pgtable_page(int node)
eb3fa7cb 509{
4c923d47
SS
510 struct page *page;
511 void *vaddr = NULL;
eb3fa7cb 512
4c923d47
SS
513 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
514 if (page)
515 vaddr = page_address(page);
eb3fa7cb 516 return vaddr;
ba395927
KA
517}
518
9ddbfb42 519void free_pgtable_page(void *vaddr)
ba395927
KA
520{
521 free_page((unsigned long)vaddr);
522}
523
524static inline void *alloc_domain_mem(void)
525{
354bb65e 526 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
527}
528
38717946 529static void free_domain_mem(void *vaddr)
ba395927
KA
530{
531 kmem_cache_free(iommu_domain_cache, vaddr);
532}
533
534static inline void * alloc_devinfo_mem(void)
535{
354bb65e 536 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
537}
538
539static inline void free_devinfo_mem(void *vaddr)
540{
541 kmem_cache_free(iommu_devinfo_cache, vaddr);
542}
543
28ccce0d
JR
544static inline int domain_type_is_si(struct dmar_domain *domain)
545{
546 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
547}
548
162d1b10
JL
549static inline int domain_pfn_supported(struct dmar_domain *domain,
550 unsigned long pfn)
551{
552 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
553
554 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
555}
556
4ed0d3e6 557static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
558{
559 unsigned long sagaw;
560 int agaw = -1;
561
562 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 563 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
564 agaw >= 0; agaw--) {
565 if (test_bit(agaw, &sagaw))
566 break;
567 }
568
569 return agaw;
570}
571
4ed0d3e6
FY
572/*
573 * Calculate max SAGAW for each iommu.
574 */
575int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
576{
577 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
578}
579
580/*
581 * calculate agaw for each iommu.
582 * "SAGAW" may be different across iommus, use a default agaw, and
583 * get a supported less agaw for iommus that don't support the default agaw.
584 */
585int iommu_calculate_agaw(struct intel_iommu *iommu)
586{
587 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
588}
589
2c2e2c38 590/* This functionin only returns single iommu in a domain */
9ddbfb42 591struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
592{
593 int iommu_id;
594
2c2e2c38 595 /* si_domain and vm domain should not get here. */
fa954e68
LB
596 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
597 return NULL;
598
29a27719
JR
599 for_each_domain_iommu(iommu_id, domain)
600 break;
601
8c11e798
WH
602 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
603 return NULL;
604
605 return g_iommus[iommu_id];
606}
607
8e604097
WH
608static void domain_update_iommu_coherency(struct dmar_domain *domain)
609{
d0501960
DW
610 struct dmar_drhd_unit *drhd;
611 struct intel_iommu *iommu;
2f119c78
QL
612 bool found = false;
613 int i;
2e12bc29 614
d0501960 615 domain->iommu_coherency = 1;
8e604097 616
29a27719 617 for_each_domain_iommu(i, domain) {
2f119c78 618 found = true;
8e604097
WH
619 if (!ecap_coherent(g_iommus[i]->ecap)) {
620 domain->iommu_coherency = 0;
621 break;
622 }
8e604097 623 }
d0501960
DW
624 if (found)
625 return;
626
627 /* No hardware attached; use lowest common denominator */
628 rcu_read_lock();
629 for_each_active_iommu(iommu, drhd) {
630 if (!ecap_coherent(iommu->ecap)) {
631 domain->iommu_coherency = 0;
632 break;
633 }
634 }
635 rcu_read_unlock();
8e604097
WH
636}
637
161f6934 638static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 639{
161f6934
JL
640 struct dmar_drhd_unit *drhd;
641 struct intel_iommu *iommu;
642 int ret = 1;
58c610bd 643
161f6934
JL
644 rcu_read_lock();
645 for_each_active_iommu(iommu, drhd) {
646 if (iommu != skip) {
647 if (!ecap_sc_support(iommu->ecap)) {
648 ret = 0;
649 break;
650 }
58c610bd 651 }
58c610bd 652 }
161f6934
JL
653 rcu_read_unlock();
654
655 return ret;
58c610bd
SY
656}
657
161f6934 658static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 659{
8140a95d 660 struct dmar_drhd_unit *drhd;
161f6934 661 struct intel_iommu *iommu;
8140a95d 662 int mask = 0xf;
6dd9a7c7
YS
663
664 if (!intel_iommu_superpage) {
161f6934 665 return 0;
6dd9a7c7
YS
666 }
667
8140a95d 668 /* set iommu_superpage to the smallest common denominator */
0e242612 669 rcu_read_lock();
8140a95d 670 for_each_active_iommu(iommu, drhd) {
161f6934
JL
671 if (iommu != skip) {
672 mask &= cap_super_page_val(iommu->cap);
673 if (!mask)
674 break;
6dd9a7c7
YS
675 }
676 }
0e242612
JL
677 rcu_read_unlock();
678
161f6934 679 return fls(mask);
6dd9a7c7
YS
680}
681
58c610bd
SY
682/* Some capabilities may be different across iommus */
683static void domain_update_iommu_cap(struct dmar_domain *domain)
684{
685 domain_update_iommu_coherency(domain);
161f6934
JL
686 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
687 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
688}
689
26b86092
SM
690struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
691 u8 devfn, int alloc)
03ecc32c
DW
692{
693 struct root_entry *root = &iommu->root_entry[bus];
694 struct context_entry *context;
695 u64 *entry;
696
4df4eab1 697 entry = &root->lo;
765b6a98 698 if (sm_supported(iommu)) {
03ecc32c
DW
699 if (devfn >= 0x80) {
700 devfn -= 0x80;
701 entry = &root->hi;
702 }
703 devfn *= 2;
704 }
03ecc32c
DW
705 if (*entry & 1)
706 context = phys_to_virt(*entry & VTD_PAGE_MASK);
707 else {
708 unsigned long phy_addr;
709 if (!alloc)
710 return NULL;
711
712 context = alloc_pgtable_page(iommu->node);
713 if (!context)
714 return NULL;
715
716 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
717 phy_addr = virt_to_phys((void *)context);
718 *entry = phy_addr | 1;
719 __iommu_flush_cache(iommu, entry, sizeof(*entry));
720 }
721 return &context[devfn];
722}
723
4ed6a540
DW
724static int iommu_dummy(struct device *dev)
725{
726 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
727}
728
b9a7f981
EA
729/**
730 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
731 * sub-hierarchy of a candidate PCI-PCI bridge
732 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
733 * @bridge: the candidate PCI-PCI bridge
734 *
735 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
736 */
737static bool
738is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
739{
740 struct pci_dev *pdev, *pbridge;
741
742 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
743 return false;
744
745 pdev = to_pci_dev(dev);
746 pbridge = to_pci_dev(bridge);
747
748 if (pbridge->subordinate &&
749 pbridge->subordinate->number <= pdev->bus->number &&
750 pbridge->subordinate->busn_res.end >= pdev->bus->number)
751 return true;
752
753 return false;
754}
755
156baca8 756static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
757{
758 struct dmar_drhd_unit *drhd = NULL;
b683b230 759 struct intel_iommu *iommu;
156baca8 760 struct device *tmp;
b9a7f981 761 struct pci_dev *pdev = NULL;
aa4d066a 762 u16 segment = 0;
c7151a8d
WH
763 int i;
764
4ed6a540
DW
765 if (iommu_dummy(dev))
766 return NULL;
767
156baca8 768 if (dev_is_pci(dev)) {
1c387188
AR
769 struct pci_dev *pf_pdev;
770
156baca8 771 pdev = to_pci_dev(dev);
5823e330
JD
772
773#ifdef CONFIG_X86
774 /* VMD child devices currently cannot be handled individually */
775 if (is_vmd(pdev->bus))
776 return NULL;
777#endif
778
1c387188
AR
779 /* VFs aren't listed in scope tables; we need to look up
780 * the PF instead to find the IOMMU. */
781 pf_pdev = pci_physfn(pdev);
782 dev = &pf_pdev->dev;
156baca8 783 segment = pci_domain_nr(pdev->bus);
ca5b74d2 784 } else if (has_acpi_companion(dev))
156baca8
DW
785 dev = &ACPI_COMPANION(dev)->dev;
786
0e242612 787 rcu_read_lock();
b683b230 788 for_each_active_iommu(iommu, drhd) {
156baca8 789 if (pdev && segment != drhd->segment)
276dbf99 790 continue;
c7151a8d 791
b683b230 792 for_each_active_dev_scope(drhd->devices,
156baca8
DW
793 drhd->devices_cnt, i, tmp) {
794 if (tmp == dev) {
1c387188
AR
795 /* For a VF use its original BDF# not that of the PF
796 * which we used for the IOMMU lookup. Strictly speaking
797 * we could do this for all PCI devices; we only need to
798 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 799 if (pdev && pdev->is_virtfn)
1c387188
AR
800 goto got_pdev;
801
156baca8
DW
802 *bus = drhd->devices[i].bus;
803 *devfn = drhd->devices[i].devfn;
b683b230 804 goto out;
156baca8
DW
805 }
806
b9a7f981 807 if (is_downstream_to_pci_bridge(dev, tmp))
156baca8 808 goto got_pdev;
924b6231 809 }
c7151a8d 810
156baca8
DW
811 if (pdev && drhd->include_all) {
812 got_pdev:
813 *bus = pdev->bus->number;
814 *devfn = pdev->devfn;
b683b230 815 goto out;
156baca8 816 }
c7151a8d 817 }
b683b230 818 iommu = NULL;
156baca8 819 out:
0e242612 820 rcu_read_unlock();
c7151a8d 821
b683b230 822 return iommu;
c7151a8d
WH
823}
824
5331fe6f
WH
825static void domain_flush_cache(struct dmar_domain *domain,
826 void *addr, int size)
827{
828 if (!domain->iommu_coherency)
829 clflush_cache_range(addr, size);
830}
831
ba395927
KA
832static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
833{
ba395927 834 struct context_entry *context;
03ecc32c 835 int ret = 0;
ba395927
KA
836 unsigned long flags;
837
838 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
839 context = iommu_context_addr(iommu, bus, devfn, 0);
840 if (context)
841 ret = context_present(context);
ba395927
KA
842 spin_unlock_irqrestore(&iommu->lock, flags);
843 return ret;
844}
845
ba395927
KA
846static void free_context_table(struct intel_iommu *iommu)
847{
ba395927
KA
848 int i;
849 unsigned long flags;
850 struct context_entry *context;
851
852 spin_lock_irqsave(&iommu->lock, flags);
853 if (!iommu->root_entry) {
854 goto out;
855 }
856 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 857 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
858 if (context)
859 free_pgtable_page(context);
03ecc32c 860
765b6a98 861 if (!sm_supported(iommu))
03ecc32c
DW
862 continue;
863
864 context = iommu_context_addr(iommu, i, 0x80, 0);
865 if (context)
866 free_pgtable_page(context);
867
ba395927
KA
868 }
869 free_pgtable_page(iommu->root_entry);
870 iommu->root_entry = NULL;
871out:
872 spin_unlock_irqrestore(&iommu->lock, flags);
873}
874
b026fd28 875static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 876 unsigned long pfn, int *target_level)
ba395927 877{
e083ea5b 878 struct dma_pte *parent, *pte;
ba395927 879 int level = agaw_to_level(domain->agaw);
4399c8bf 880 int offset;
ba395927
KA
881
882 BUG_ON(!domain->pgd);
f9423606 883
162d1b10 884 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
885 /* Address beyond IOMMU's addressing capabilities. */
886 return NULL;
887
ba395927
KA
888 parent = domain->pgd;
889
5cf0a76f 890 while (1) {
ba395927
KA
891 void *tmp_page;
892
b026fd28 893 offset = pfn_level_offset(pfn, level);
ba395927 894 pte = &parent[offset];
5cf0a76f 895 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 896 break;
5cf0a76f 897 if (level == *target_level)
ba395927
KA
898 break;
899
19c239ce 900 if (!dma_pte_present(pte)) {
c85994e4
DW
901 uint64_t pteval;
902
4c923d47 903 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 904
206a73c1 905 if (!tmp_page)
ba395927 906 return NULL;
206a73c1 907
c85994e4 908 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 909 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 910 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
911 /* Someone else set it while we were thinking; use theirs. */
912 free_pgtable_page(tmp_page);
effad4b5 913 else
c85994e4 914 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 915 }
5cf0a76f
DW
916 if (level == 1)
917 break;
918
19c239ce 919 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
920 level--;
921 }
922
5cf0a76f
DW
923 if (!*target_level)
924 *target_level = level;
925
ba395927
KA
926 return pte;
927}
928
929/* return address's pte at specific level */
90dcfb5e
DW
930static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
931 unsigned long pfn,
6dd9a7c7 932 int level, int *large_page)
ba395927 933{
e083ea5b 934 struct dma_pte *parent, *pte;
ba395927
KA
935 int total = agaw_to_level(domain->agaw);
936 int offset;
937
938 parent = domain->pgd;
939 while (level <= total) {
90dcfb5e 940 offset = pfn_level_offset(pfn, total);
ba395927
KA
941 pte = &parent[offset];
942 if (level == total)
943 return pte;
944
6dd9a7c7
YS
945 if (!dma_pte_present(pte)) {
946 *large_page = total;
ba395927 947 break;
6dd9a7c7
YS
948 }
949
e16922af 950 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
951 *large_page = total;
952 return pte;
953 }
954
19c239ce 955 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
956 total--;
957 }
958 return NULL;
959}
960
ba395927 961/* clear last level pte, a tlb flush should be followed */
5cf0a76f 962static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
963 unsigned long start_pfn,
964 unsigned long last_pfn)
ba395927 965{
e083ea5b 966 unsigned int large_page;
310a5ab9 967 struct dma_pte *first_pte, *pte;
66eae846 968
162d1b10
JL
969 BUG_ON(!domain_pfn_supported(domain, start_pfn));
970 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 971 BUG_ON(start_pfn > last_pfn);
ba395927 972
04b18e65 973 /* we don't need lock here; nobody else touches the iova range */
59c36286 974 do {
6dd9a7c7
YS
975 large_page = 1;
976 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 977 if (!pte) {
6dd9a7c7 978 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
979 continue;
980 }
6dd9a7c7 981 do {
310a5ab9 982 dma_clear_pte(pte);
6dd9a7c7 983 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 984 pte++;
75e6bf96
DW
985 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
986
310a5ab9
DW
987 domain_flush_cache(domain, first_pte,
988 (void *)pte - (void *)first_pte);
59c36286
DW
989
990 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
991}
992
3269ee0b 993static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
994 int retain_level, struct dma_pte *pte,
995 unsigned long pfn, unsigned long start_pfn,
996 unsigned long last_pfn)
3269ee0b
AW
997{
998 pfn = max(start_pfn, pfn);
999 pte = &pte[pfn_level_offset(pfn, level)];
1000
1001 do {
1002 unsigned long level_pfn;
1003 struct dma_pte *level_pte;
1004
1005 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1006 goto next;
1007
f7116e11 1008 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1009 level_pte = phys_to_virt(dma_pte_addr(pte));
1010
bc24c571
DD
1011 if (level > 2) {
1012 dma_pte_free_level(domain, level - 1, retain_level,
1013 level_pte, level_pfn, start_pfn,
1014 last_pfn);
1015 }
3269ee0b 1016
bc24c571
DD
1017 /*
1018 * Free the page table if we're below the level we want to
1019 * retain and the range covers the entire table.
1020 */
1021 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1022 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1023 dma_clear_pte(pte);
1024 domain_flush_cache(domain, pte, sizeof(*pte));
1025 free_pgtable_page(level_pte);
1026 }
1027next:
1028 pfn += level_size(level);
1029 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1030}
1031
bc24c571
DD
1032/*
1033 * clear last level (leaf) ptes and free page table pages below the
1034 * level we wish to keep intact.
1035 */
ba395927 1036static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1037 unsigned long start_pfn,
bc24c571
DD
1038 unsigned long last_pfn,
1039 int retain_level)
ba395927 1040{
162d1b10
JL
1041 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1042 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1043 BUG_ON(start_pfn > last_pfn);
ba395927 1044
d41a4adb
JL
1045 dma_pte_clear_range(domain, start_pfn, last_pfn);
1046
f3a0a52f 1047 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1048 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1049 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1050
ba395927 1051 /* free pgd */
d794dc9b 1052 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1053 free_pgtable_page(domain->pgd);
1054 domain->pgd = NULL;
1055 }
1056}
1057
ea8ea460
DW
1058/* When a page at a given level is being unlinked from its parent, we don't
1059 need to *modify* it at all. All we need to do is make a list of all the
1060 pages which can be freed just as soon as we've flushed the IOTLB and we
1061 know the hardware page-walk will no longer touch them.
1062 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1063 be freed. */
1064static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1065 int level, struct dma_pte *pte,
1066 struct page *freelist)
1067{
1068 struct page *pg;
1069
1070 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1071 pg->freelist = freelist;
1072 freelist = pg;
1073
1074 if (level == 1)
1075 return freelist;
1076
adeb2590
JL
1077 pte = page_address(pg);
1078 do {
ea8ea460
DW
1079 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1080 freelist = dma_pte_list_pagetables(domain, level - 1,
1081 pte, freelist);
adeb2590
JL
1082 pte++;
1083 } while (!first_pte_in_page(pte));
ea8ea460
DW
1084
1085 return freelist;
1086}
1087
1088static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1089 struct dma_pte *pte, unsigned long pfn,
1090 unsigned long start_pfn,
1091 unsigned long last_pfn,
1092 struct page *freelist)
1093{
1094 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1095
1096 pfn = max(start_pfn, pfn);
1097 pte = &pte[pfn_level_offset(pfn, level)];
1098
1099 do {
1100 unsigned long level_pfn;
1101
1102 if (!dma_pte_present(pte))
1103 goto next;
1104
1105 level_pfn = pfn & level_mask(level);
1106
1107 /* If range covers entire pagetable, free it */
1108 if (start_pfn <= level_pfn &&
1109 last_pfn >= level_pfn + level_size(level) - 1) {
1110 /* These suborbinate page tables are going away entirely. Don't
1111 bother to clear them; we're just going to *free* them. */
1112 if (level > 1 && !dma_pte_superpage(pte))
1113 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1114
1115 dma_clear_pte(pte);
1116 if (!first_pte)
1117 first_pte = pte;
1118 last_pte = pte;
1119 } else if (level > 1) {
1120 /* Recurse down into a level that isn't *entirely* obsolete */
1121 freelist = dma_pte_clear_level(domain, level - 1,
1122 phys_to_virt(dma_pte_addr(pte)),
1123 level_pfn, start_pfn, last_pfn,
1124 freelist);
1125 }
1126next:
1127 pfn += level_size(level);
1128 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1129
1130 if (first_pte)
1131 domain_flush_cache(domain, first_pte,
1132 (void *)++last_pte - (void *)first_pte);
1133
1134 return freelist;
1135}
1136
1137/* We can't just free the pages because the IOMMU may still be walking
1138 the page tables, and may have cached the intermediate levels. The
1139 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1140static struct page *domain_unmap(struct dmar_domain *domain,
1141 unsigned long start_pfn,
1142 unsigned long last_pfn)
ea8ea460 1143{
e083ea5b 1144 struct page *freelist;
ea8ea460 1145
162d1b10
JL
1146 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1147 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1148 BUG_ON(start_pfn > last_pfn);
1149
1150 /* we don't need lock here; nobody else touches the iova range */
1151 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1152 domain->pgd, 0, start_pfn, last_pfn, NULL);
1153
1154 /* free pgd */
1155 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1156 struct page *pgd_page = virt_to_page(domain->pgd);
1157 pgd_page->freelist = freelist;
1158 freelist = pgd_page;
1159
1160 domain->pgd = NULL;
1161 }
1162
1163 return freelist;
1164}
1165
b690420a 1166static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1167{
1168 struct page *pg;
1169
1170 while ((pg = freelist)) {
1171 freelist = pg->freelist;
1172 free_pgtable_page(page_address(pg));
1173 }
1174}
1175
13cf0174
JR
1176static void iova_entry_free(unsigned long data)
1177{
1178 struct page *freelist = (struct page *)data;
1179
1180 dma_free_pagelist(freelist);
1181}
1182
ba395927
KA
1183/* iommu handling */
1184static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1185{
1186 struct root_entry *root;
1187 unsigned long flags;
1188
4c923d47 1189 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1190 if (!root) {
9f10e5bf 1191 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1192 iommu->name);
ba395927 1193 return -ENOMEM;
ffebeb46 1194 }
ba395927 1195
5b6985ce 1196 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1197
1198 spin_lock_irqsave(&iommu->lock, flags);
1199 iommu->root_entry = root;
1200 spin_unlock_irqrestore(&iommu->lock, flags);
1201
1202 return 0;
1203}
1204
ba395927
KA
1205static void iommu_set_root_entry(struct intel_iommu *iommu)
1206{
03ecc32c 1207 u64 addr;
c416daa9 1208 u32 sts;
ba395927
KA
1209 unsigned long flag;
1210
03ecc32c 1211 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1212 if (sm_supported(iommu))
1213 addr |= DMA_RTADDR_SMT;
ba395927 1214
1f5b3c3f 1215 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1216 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1217
c416daa9 1218 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1219
1220 /* Make sure hardware complete it */
1221 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1222 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1223
1f5b3c3f 1224 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1225}
1226
6f7db75e 1227void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1228{
1229 u32 val;
1230 unsigned long flag;
1231
9af88143 1232 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1233 return;
ba395927 1234
1f5b3c3f 1235 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1236 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1237
1238 /* Make sure hardware complete it */
1239 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1240 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1241
1f5b3c3f 1242 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1243}
1244
1245/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1246static void __iommu_flush_context(struct intel_iommu *iommu,
1247 u16 did, u16 source_id, u8 function_mask,
1248 u64 type)
ba395927
KA
1249{
1250 u64 val = 0;
1251 unsigned long flag;
1252
ba395927
KA
1253 switch (type) {
1254 case DMA_CCMD_GLOBAL_INVL:
1255 val = DMA_CCMD_GLOBAL_INVL;
1256 break;
1257 case DMA_CCMD_DOMAIN_INVL:
1258 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1259 break;
1260 case DMA_CCMD_DEVICE_INVL:
1261 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1262 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1263 break;
1264 default:
1265 BUG();
1266 }
1267 val |= DMA_CCMD_ICC;
1268
1f5b3c3f 1269 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1270 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1271
1272 /* Make sure hardware complete it */
1273 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1274 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1275
1f5b3c3f 1276 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1277}
1278
ba395927 1279/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1280static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1281 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1282{
1283 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1284 u64 val = 0, val_iva = 0;
1285 unsigned long flag;
1286
ba395927
KA
1287 switch (type) {
1288 case DMA_TLB_GLOBAL_FLUSH:
1289 /* global flush doesn't need set IVA_REG */
1290 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1291 break;
1292 case DMA_TLB_DSI_FLUSH:
1293 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1294 break;
1295 case DMA_TLB_PSI_FLUSH:
1296 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1297 /* IH bit is passed in as part of address */
ba395927
KA
1298 val_iva = size_order | addr;
1299 break;
1300 default:
1301 BUG();
1302 }
1303 /* Note: set drain read/write */
1304#if 0
1305 /*
1306 * This is probably to be super secure.. Looks like we can
1307 * ignore it without any impact.
1308 */
1309 if (cap_read_drain(iommu->cap))
1310 val |= DMA_TLB_READ_DRAIN;
1311#endif
1312 if (cap_write_drain(iommu->cap))
1313 val |= DMA_TLB_WRITE_DRAIN;
1314
1f5b3c3f 1315 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1316 /* Note: Only uses first TLB reg currently */
1317 if (val_iva)
1318 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1319 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1320
1321 /* Make sure hardware complete it */
1322 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1323 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1324
1f5b3c3f 1325 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1326
1327 /* check IOTLB invalidation granularity */
1328 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1329 pr_err("Flush IOTLB failed\n");
ba395927 1330 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1331 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1332 (unsigned long long)DMA_TLB_IIRG(type),
1333 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1334}
1335
64ae892b
DW
1336static struct device_domain_info *
1337iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1338 u8 bus, u8 devfn)
93a23a72 1339{
93a23a72 1340 struct device_domain_info *info;
93a23a72 1341
55d94043
JR
1342 assert_spin_locked(&device_domain_lock);
1343
93a23a72
YZ
1344 if (!iommu->qi)
1345 return NULL;
1346
93a23a72 1347 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1348 if (info->iommu == iommu && info->bus == bus &&
1349 info->devfn == devfn) {
b16d0cb9
DW
1350 if (info->ats_supported && info->dev)
1351 return info;
93a23a72
YZ
1352 break;
1353 }
93a23a72 1354
b16d0cb9 1355 return NULL;
93a23a72
YZ
1356}
1357
0824c592
OP
1358static void domain_update_iotlb(struct dmar_domain *domain)
1359{
1360 struct device_domain_info *info;
1361 bool has_iotlb_device = false;
1362
1363 assert_spin_locked(&device_domain_lock);
1364
1365 list_for_each_entry(info, &domain->devices, link) {
1366 struct pci_dev *pdev;
1367
1368 if (!info->dev || !dev_is_pci(info->dev))
1369 continue;
1370
1371 pdev = to_pci_dev(info->dev);
1372 if (pdev->ats_enabled) {
1373 has_iotlb_device = true;
1374 break;
1375 }
1376 }
1377
1378 domain->has_iotlb_device = has_iotlb_device;
1379}
1380
93a23a72 1381static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1382{
fb0cc3aa
BH
1383 struct pci_dev *pdev;
1384
0824c592
OP
1385 assert_spin_locked(&device_domain_lock);
1386
0bcb3e28 1387 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1388 return;
1389
fb0cc3aa 1390 pdev = to_pci_dev(info->dev);
1c48db44
JP
1391 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1392 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1393 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1394 * reserved, which should be set to 0.
1395 */
1396 if (!ecap_dit(info->iommu->ecap))
1397 info->pfsid = 0;
1398 else {
1399 struct pci_dev *pf_pdev;
1400
1401 /* pdev will be returned if device is not a vf */
1402 pf_pdev = pci_physfn(pdev);
cc49baa9 1403 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1404 }
fb0cc3aa 1405
b16d0cb9
DW
1406#ifdef CONFIG_INTEL_IOMMU_SVM
1407 /* The PCIe spec, in its wisdom, declares that the behaviour of
1408 the device if you enable PASID support after ATS support is
1409 undefined. So always enable PASID support on devices which
1410 have it, even if we can't yet know if we're ever going to
1411 use it. */
1412 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1413 info->pasid_enabled = 1;
1414
1b84778a
KS
1415 if (info->pri_supported &&
1416 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1417 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1418 info->pri_enabled = 1;
1419#endif
fb58fdcd 1420 if (!pdev->untrusted && info->ats_supported &&
61363c14 1421 pci_ats_page_aligned(pdev) &&
fb58fdcd 1422 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1423 info->ats_enabled = 1;
0824c592 1424 domain_update_iotlb(info->domain);
b16d0cb9
DW
1425 info->ats_qdep = pci_ats_queue_depth(pdev);
1426 }
93a23a72
YZ
1427}
1428
1429static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1430{
b16d0cb9
DW
1431 struct pci_dev *pdev;
1432
0824c592
OP
1433 assert_spin_locked(&device_domain_lock);
1434
da972fb1 1435 if (!dev_is_pci(info->dev))
93a23a72
YZ
1436 return;
1437
b16d0cb9
DW
1438 pdev = to_pci_dev(info->dev);
1439
1440 if (info->ats_enabled) {
1441 pci_disable_ats(pdev);
1442 info->ats_enabled = 0;
0824c592 1443 domain_update_iotlb(info->domain);
b16d0cb9
DW
1444 }
1445#ifdef CONFIG_INTEL_IOMMU_SVM
1446 if (info->pri_enabled) {
1447 pci_disable_pri(pdev);
1448 info->pri_enabled = 0;
1449 }
1450 if (info->pasid_enabled) {
1451 pci_disable_pasid(pdev);
1452 info->pasid_enabled = 0;
1453 }
1454#endif
93a23a72
YZ
1455}
1456
1457static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1458 u64 addr, unsigned mask)
1459{
1460 u16 sid, qdep;
1461 unsigned long flags;
1462 struct device_domain_info *info;
1463
0824c592
OP
1464 if (!domain->has_iotlb_device)
1465 return;
1466
93a23a72
YZ
1467 spin_lock_irqsave(&device_domain_lock, flags);
1468 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1469 if (!info->ats_enabled)
93a23a72
YZ
1470 continue;
1471
1472 sid = info->bus << 8 | info->devfn;
b16d0cb9 1473 qdep = info->ats_qdep;
1c48db44
JP
1474 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1475 qdep, addr, mask);
93a23a72
YZ
1476 }
1477 spin_unlock_irqrestore(&device_domain_lock, flags);
1478}
1479
a1ddcbe9
JR
1480static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1481 struct dmar_domain *domain,
1482 unsigned long pfn, unsigned int pages,
1483 int ih, int map)
ba395927 1484{
9dd2fe89 1485 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1486 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1487 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1488
ba395927
KA
1489 BUG_ON(pages == 0);
1490
ea8ea460
DW
1491 if (ih)
1492 ih = 1 << 6;
ba395927 1493 /*
9dd2fe89
YZ
1494 * Fallback to domain selective flush if no PSI support or the size is
1495 * too big.
ba395927
KA
1496 * PSI requires page size to be 2 ^ x, and the base address is naturally
1497 * aligned to the size
1498 */
9dd2fe89
YZ
1499 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1500 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1501 DMA_TLB_DSI_FLUSH);
9dd2fe89 1502 else
ea8ea460 1503 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1504 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1505
1506 /*
82653633
NA
1507 * In caching mode, changes of pages from non-present to present require
1508 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1509 */
82653633 1510 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1511 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1512}
1513
eed91a0b
PX
1514/* Notification for newly created mappings */
1515static inline void __mapping_notify_one(struct intel_iommu *iommu,
1516 struct dmar_domain *domain,
1517 unsigned long pfn, unsigned int pages)
1518{
1519 /* It's a non-present to present mapping. Only flush if caching mode */
1520 if (cap_caching_mode(iommu->cap))
1521 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1522 else
1523 iommu_flush_write_buffer(iommu);
1524}
1525
13cf0174
JR
1526static void iommu_flush_iova(struct iova_domain *iovad)
1527{
1528 struct dmar_domain *domain;
1529 int idx;
1530
1531 domain = container_of(iovad, struct dmar_domain, iovad);
1532
1533 for_each_domain_iommu(idx, domain) {
1534 struct intel_iommu *iommu = g_iommus[idx];
1535 u16 did = domain->iommu_did[iommu->seq_id];
1536
1537 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1538
1539 if (!cap_caching_mode(iommu->cap))
1540 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1541 0, MAX_AGAW_PFN_WIDTH);
1542 }
1543}
1544
f8bab735 1545static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1546{
1547 u32 pmen;
1548 unsigned long flags;
1549
5bb71fc7
LB
1550 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1551 return;
1552
1f5b3c3f 1553 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1554 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1555 pmen &= ~DMA_PMEN_EPM;
1556 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1557
1558 /* wait for the protected region status bit to clear */
1559 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1560 readl, !(pmen & DMA_PMEN_PRS), pmen);
1561
1f5b3c3f 1562 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1563}
1564
2a41ccee 1565static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1566{
1567 u32 sts;
1568 unsigned long flags;
1569
1f5b3c3f 1570 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1571 iommu->gcmd |= DMA_GCMD_TE;
1572 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1573
1574 /* Make sure hardware complete it */
1575 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1576 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1577
1f5b3c3f 1578 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1579}
1580
2a41ccee 1581static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1582{
1583 u32 sts;
1584 unsigned long flag;
1585
1f5b3c3f 1586 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1587 iommu->gcmd &= ~DMA_GCMD_TE;
1588 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1589
1590 /* Make sure hardware complete it */
1591 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1592 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1593
1f5b3c3f 1594 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1595}
1596
1597static int iommu_init_domains(struct intel_iommu *iommu)
1598{
8bf47816
JR
1599 u32 ndomains, nlongs;
1600 size_t size;
ba395927
KA
1601
1602 ndomains = cap_ndoms(iommu->cap);
8bf47816 1603 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1604 iommu->name, ndomains);
ba395927
KA
1605 nlongs = BITS_TO_LONGS(ndomains);
1606
94a91b50
DD
1607 spin_lock_init(&iommu->lock);
1608
ba395927
KA
1609 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1610 if (!iommu->domain_ids) {
9f10e5bf
JR
1611 pr_err("%s: Allocating domain id array failed\n",
1612 iommu->name);
ba395927
KA
1613 return -ENOMEM;
1614 }
8bf47816 1615
86f004c7 1616 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1617 iommu->domains = kzalloc(size, GFP_KERNEL);
1618
1619 if (iommu->domains) {
1620 size = 256 * sizeof(struct dmar_domain *);
1621 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1622 }
1623
1624 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1625 pr_err("%s: Allocating domain array failed\n",
1626 iommu->name);
852bdb04 1627 kfree(iommu->domain_ids);
8bf47816 1628 kfree(iommu->domains);
852bdb04 1629 iommu->domain_ids = NULL;
8bf47816 1630 iommu->domains = NULL;
ba395927
KA
1631 return -ENOMEM;
1632 }
1633
1634 /*
c0e8a6c8
JR
1635 * If Caching mode is set, then invalid translations are tagged
1636 * with domain-id 0, hence we need to pre-allocate it. We also
1637 * use domain-id 0 as a marker for non-allocated domain-id, so
1638 * make sure it is not used for a real domain.
ba395927 1639 */
c0e8a6c8
JR
1640 set_bit(0, iommu->domain_ids);
1641
3b33d4ab
LB
1642 /*
1643 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1644 * entry for first-level or pass-through translation modes should
1645 * be programmed with a domain id different from those used for
1646 * second-level or nested translation. We reserve a domain id for
1647 * this purpose.
1648 */
1649 if (sm_supported(iommu))
1650 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1651
ba395927
KA
1652 return 0;
1653}
ba395927 1654
ffebeb46 1655static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1656{
29a27719 1657 struct device_domain_info *info, *tmp;
55d94043 1658 unsigned long flags;
ba395927 1659
29a27719
JR
1660 if (!iommu->domains || !iommu->domain_ids)
1661 return;
a4eaa86c 1662
55d94043 1663 spin_lock_irqsave(&device_domain_lock, flags);
29a27719 1664 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
29a27719
JR
1665 if (info->iommu != iommu)
1666 continue;
1667
1668 if (!info->dev || !info->domain)
1669 continue;
1670
bea64033 1671 __dmar_remove_one_dev_info(info);
ba395927 1672 }
55d94043 1673 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1674
1675 if (iommu->gcmd & DMA_GCMD_TE)
1676 iommu_disable_translation(iommu);
ffebeb46 1677}
ba395927 1678
ffebeb46
JL
1679static void free_dmar_iommu(struct intel_iommu *iommu)
1680{
1681 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1682 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1683 int i;
1684
1685 for (i = 0; i < elems; i++)
1686 kfree(iommu->domains[i]);
ffebeb46
JL
1687 kfree(iommu->domains);
1688 kfree(iommu->domain_ids);
1689 iommu->domains = NULL;
1690 iommu->domain_ids = NULL;
1691 }
ba395927 1692
d9630fe9
WH
1693 g_iommus[iommu->seq_id] = NULL;
1694
ba395927
KA
1695 /* free context mapping */
1696 free_context_table(iommu);
8a94ade4
DW
1697
1698#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1699 if (pasid_supported(iommu)) {
a222a7f0
DW
1700 if (ecap_prs(iommu->ecap))
1701 intel_svm_finish_prq(iommu);
a222a7f0 1702 }
8a94ade4 1703#endif
ba395927
KA
1704}
1705
ab8dfe25 1706static struct dmar_domain *alloc_domain(int flags)
ba395927 1707{
ba395927 1708 struct dmar_domain *domain;
ba395927
KA
1709
1710 domain = alloc_domain_mem();
1711 if (!domain)
1712 return NULL;
1713
ab8dfe25 1714 memset(domain, 0, sizeof(*domain));
98fa15f3 1715 domain->nid = NUMA_NO_NODE;
ab8dfe25 1716 domain->flags = flags;
0824c592 1717 domain->has_iotlb_device = false;
92d03cc8 1718 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1719
1720 return domain;
1721}
1722
d160aca5
JR
1723/* Must be called with iommu->lock */
1724static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1725 struct intel_iommu *iommu)
1726{
44bde614 1727 unsigned long ndomains;
55d94043 1728 int num;
44bde614 1729
55d94043 1730 assert_spin_locked(&device_domain_lock);
d160aca5 1731 assert_spin_locked(&iommu->lock);
ba395927 1732
29a27719
JR
1733 domain->iommu_refcnt[iommu->seq_id] += 1;
1734 domain->iommu_count += 1;
1735 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1736 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1737 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1738
1739 if (num >= ndomains) {
1740 pr_err("%s: No free domain ids\n", iommu->name);
1741 domain->iommu_refcnt[iommu->seq_id] -= 1;
1742 domain->iommu_count -= 1;
55d94043 1743 return -ENOSPC;
2c2e2c38 1744 }
ba395927 1745
d160aca5
JR
1746 set_bit(num, iommu->domain_ids);
1747 set_iommu_domain(iommu, num, domain);
1748
1749 domain->iommu_did[iommu->seq_id] = num;
1750 domain->nid = iommu->node;
fb170fb4 1751
fb170fb4
JL
1752 domain_update_iommu_cap(domain);
1753 }
d160aca5 1754
55d94043 1755 return 0;
fb170fb4
JL
1756}
1757
1758static int domain_detach_iommu(struct dmar_domain *domain,
1759 struct intel_iommu *iommu)
1760{
e083ea5b 1761 int num, count;
d160aca5 1762
55d94043 1763 assert_spin_locked(&device_domain_lock);
d160aca5 1764 assert_spin_locked(&iommu->lock);
fb170fb4 1765
29a27719
JR
1766 domain->iommu_refcnt[iommu->seq_id] -= 1;
1767 count = --domain->iommu_count;
1768 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1769 num = domain->iommu_did[iommu->seq_id];
1770 clear_bit(num, iommu->domain_ids);
1771 set_iommu_domain(iommu, num, NULL);
fb170fb4 1772
fb170fb4 1773 domain_update_iommu_cap(domain);
c0e8a6c8 1774 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1775 }
fb170fb4
JL
1776
1777 return count;
1778}
1779
ba395927 1780static struct iova_domain reserved_iova_list;
8a443df4 1781static struct lock_class_key reserved_rbtree_key;
ba395927 1782
51a63e67 1783static int dmar_init_reserved_ranges(void)
ba395927
KA
1784{
1785 struct pci_dev *pdev = NULL;
1786 struct iova *iova;
1787 int i;
ba395927 1788
aa3ac946 1789 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1790
8a443df4
MG
1791 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1792 &reserved_rbtree_key);
1793
ba395927
KA
1794 /* IOAPIC ranges shouldn't be accessed by DMA */
1795 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1796 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1797 if (!iova) {
9f10e5bf 1798 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1799 return -ENODEV;
1800 }
ba395927
KA
1801
1802 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1803 for_each_pci_dev(pdev) {
1804 struct resource *r;
1805
1806 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1807 r = &pdev->resource[i];
1808 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1809 continue;
1a4a4551
DW
1810 iova = reserve_iova(&reserved_iova_list,
1811 IOVA_PFN(r->start),
1812 IOVA_PFN(r->end));
51a63e67 1813 if (!iova) {
932a6523 1814 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1815 return -ENODEV;
1816 }
ba395927
KA
1817 }
1818 }
51a63e67 1819 return 0;
ba395927
KA
1820}
1821
1822static void domain_reserve_special_ranges(struct dmar_domain *domain)
1823{
1824 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1825}
1826
1827static inline int guestwidth_to_adjustwidth(int gaw)
1828{
1829 int agaw;
1830 int r = (gaw - 12) % 9;
1831
1832 if (r == 0)
1833 agaw = gaw;
1834 else
1835 agaw = gaw + 9 - r;
1836 if (agaw > 64)
1837 agaw = 64;
1838 return agaw;
1839}
1840
301e7ee1
JR
1841static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1842 int guest_width)
1843{
1844 int adjust_width, agaw;
1845 unsigned long sagaw;
1846 int err;
1847
1848 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1849
1850 err = init_iova_flush_queue(&domain->iovad,
1851 iommu_flush_iova, iova_entry_free);
1852 if (err)
1853 return err;
1854
1855 domain_reserve_special_ranges(domain);
1856
1857 /* calculate AGAW */
1858 if (guest_width > cap_mgaw(iommu->cap))
1859 guest_width = cap_mgaw(iommu->cap);
1860 domain->gaw = guest_width;
1861 adjust_width = guestwidth_to_adjustwidth(guest_width);
1862 agaw = width_to_agaw(adjust_width);
1863 sagaw = cap_sagaw(iommu->cap);
1864 if (!test_bit(agaw, &sagaw)) {
1865 /* hardware doesn't support it, choose a bigger one */
1866 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1867 agaw = find_next_bit(&sagaw, 5, agaw);
1868 if (agaw >= 5)
1869 return -ENODEV;
1870 }
1871 domain->agaw = agaw;
1872
1873 if (ecap_coherent(iommu->ecap))
1874 domain->iommu_coherency = 1;
1875 else
1876 domain->iommu_coherency = 0;
1877
1878 if (ecap_sc_support(iommu->ecap))
1879 domain->iommu_snooping = 1;
1880 else
1881 domain->iommu_snooping = 0;
1882
1883 if (intel_iommu_superpage)
1884 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1885 else
1886 domain->iommu_superpage = 0;
1887
1888 domain->nid = iommu->node;
1889
1890 /* always allocate the top pgd */
1891 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1892 if (!domain->pgd)
1893 return -ENOMEM;
1894 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1895 return 0;
1896}
1897
ba395927
KA
1898static void domain_exit(struct dmar_domain *domain)
1899{
ba395927 1900
d160aca5 1901 /* Remove associated devices and clear attached or cached domains */
ba395927 1902 domain_remove_dev_info(domain);
92d03cc8 1903
ba395927
KA
1904 /* destroy iovas */
1905 put_iova_domain(&domain->iovad);
ba395927 1906
3ee9eca7
DS
1907 if (domain->pgd) {
1908 struct page *freelist;
ba395927 1909
3ee9eca7
DS
1910 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1911 dma_free_pagelist(freelist);
1912 }
ea8ea460 1913
ba395927
KA
1914 free_domain_mem(domain);
1915}
1916
7373a8cc
LB
1917/*
1918 * Get the PASID directory size for scalable mode context entry.
1919 * Value of X in the PDTS field of a scalable mode context entry
1920 * indicates PASID directory with 2^(X + 7) entries.
1921 */
1922static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1923{
1924 int pds, max_pde;
1925
1926 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1927 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1928 if (pds < 7)
1929 return 0;
1930
1931 return pds - 7;
1932}
1933
1934/*
1935 * Set the RID_PASID field of a scalable mode context entry. The
1936 * IOMMU hardware will use the PASID value set in this field for
1937 * DMA translations of DMA requests without PASID.
1938 */
1939static inline void
1940context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1941{
1942 context->hi |= pasid & ((1 << 20) - 1);
1943 context->hi |= (1 << 20);
1944}
1945
1946/*
1947 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1948 * entry.
1949 */
1950static inline void context_set_sm_dte(struct context_entry *context)
1951{
1952 context->lo |= (1 << 2);
1953}
1954
1955/*
1956 * Set the PRE(Page Request Enable) field of a scalable mode context
1957 * entry.
1958 */
1959static inline void context_set_sm_pre(struct context_entry *context)
1960{
1961 context->lo |= (1 << 4);
1962}
1963
1964/* Convert value to context PASID directory size field coding. */
1965#define context_pdts(pds) (((pds) & 0x7) << 9)
1966
64ae892b
DW
1967static int domain_context_mapping_one(struct dmar_domain *domain,
1968 struct intel_iommu *iommu,
ca6e322d 1969 struct pasid_table *table,
28ccce0d 1970 u8 bus, u8 devfn)
ba395927 1971{
c6c2cebd 1972 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1973 int translation = CONTEXT_TT_MULTI_LEVEL;
1974 struct device_domain_info *info = NULL;
ba395927 1975 struct context_entry *context;
ba395927 1976 unsigned long flags;
7373a8cc 1977 int ret;
28ccce0d 1978
c6c2cebd
JR
1979 WARN_ON(did == 0);
1980
28ccce0d
JR
1981 if (hw_pass_through && domain_type_is_si(domain))
1982 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1983
1984 pr_debug("Set context mapping for %02x:%02x.%d\n",
1985 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1986
ba395927 1987 BUG_ON(!domain->pgd);
5331fe6f 1988
55d94043
JR
1989 spin_lock_irqsave(&device_domain_lock, flags);
1990 spin_lock(&iommu->lock);
1991
1992 ret = -ENOMEM;
03ecc32c 1993 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1994 if (!context)
55d94043 1995 goto out_unlock;
ba395927 1996
55d94043
JR
1997 ret = 0;
1998 if (context_present(context))
1999 goto out_unlock;
cf484d0e 2000
aec0e861
XP
2001 /*
2002 * For kdump cases, old valid entries may be cached due to the
2003 * in-flight DMA and copied pgtable, but there is no unmapping
2004 * behaviour for them, thus we need an explicit cache flush for
2005 * the newly-mapped device. For kdump, at this point, the device
2006 * is supposed to finish reset at its driver probe stage, so no
2007 * in-flight DMA will exist, and we don't need to worry anymore
2008 * hereafter.
2009 */
2010 if (context_copied(context)) {
2011 u16 did_old = context_domain_id(context);
2012
b117e038 2013 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2014 iommu->flush.flush_context(iommu, did_old,
2015 (((u16)bus) << 8) | devfn,
2016 DMA_CCMD_MASK_NOBIT,
2017 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2018 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2019 DMA_TLB_DSI_FLUSH);
2020 }
aec0e861
XP
2021 }
2022
de24e553 2023 context_clear_entry(context);
ea6606b0 2024
7373a8cc
LB
2025 if (sm_supported(iommu)) {
2026 unsigned long pds;
4ed0d3e6 2027
7373a8cc
LB
2028 WARN_ON(!table);
2029
2030 /* Setup the PASID DIR pointer: */
2031 pds = context_get_sm_pds(table);
2032 context->lo = (u64)virt_to_phys(table->table) |
2033 context_pdts(pds);
2034
2035 /* Setup the RID_PASID field: */
2036 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2037
de24e553 2038 /*
7373a8cc
LB
2039 * Setup the Device-TLB enable bit and Page request
2040 * Enable bit:
de24e553 2041 */
7373a8cc
LB
2042 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2043 if (info && info->ats_supported)
2044 context_set_sm_dte(context);
2045 if (info && info->pri_supported)
2046 context_set_sm_pre(context);
2047 } else {
2048 struct dma_pte *pgd = domain->pgd;
2049 int agaw;
2050
2051 context_set_domain_id(context, did);
7373a8cc
LB
2052
2053 if (translation != CONTEXT_TT_PASS_THROUGH) {
2054 /*
2055 * Skip top levels of page tables for iommu which has
2056 * less agaw than default. Unnecessary for PT mode.
2057 */
2058 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2059 ret = -ENOMEM;
2060 pgd = phys_to_virt(dma_pte_addr(pgd));
2061 if (!dma_pte_present(pgd))
2062 goto out_unlock;
2063 }
2064
2065 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2066 if (info && info->ats_supported)
2067 translation = CONTEXT_TT_DEV_IOTLB;
2068 else
2069 translation = CONTEXT_TT_MULTI_LEVEL;
2070
2071 context_set_address_root(context, virt_to_phys(pgd));
2072 context_set_address_width(context, agaw);
2073 } else {
2074 /*
2075 * In pass through mode, AW must be programmed to
2076 * indicate the largest AGAW value supported by
2077 * hardware. And ASR is ignored by hardware.
2078 */
2079 context_set_address_width(context, iommu->msagaw);
2080 }
41b80db2
LB
2081
2082 context_set_translation_type(context, translation);
93a23a72 2083 }
4ed0d3e6 2084
c07e7d21
MM
2085 context_set_fault_enable(context);
2086 context_set_present(context);
5331fe6f 2087 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2088
4c25a2c1
DW
2089 /*
2090 * It's a non-present to present mapping. If hardware doesn't cache
2091 * non-present entry we only need to flush the write-buffer. If the
2092 * _does_ cache non-present entries, then it does so in the special
2093 * domain #0, which we have to flush:
2094 */
2095 if (cap_caching_mode(iommu->cap)) {
2096 iommu->flush.flush_context(iommu, 0,
2097 (((u16)bus) << 8) | devfn,
2098 DMA_CCMD_MASK_NOBIT,
2099 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2100 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2101 } else {
ba395927 2102 iommu_flush_write_buffer(iommu);
4c25a2c1 2103 }
93a23a72 2104 iommu_enable_dev_iotlb(info);
c7151a8d 2105
55d94043
JR
2106 ret = 0;
2107
2108out_unlock:
2109 spin_unlock(&iommu->lock);
2110 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2111
5c365d18 2112 return ret;
ba395927
KA
2113}
2114
2115static int
28ccce0d 2116domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2117{
ca6e322d 2118 struct pasid_table *table;
64ae892b 2119 struct intel_iommu *iommu;
156baca8 2120 u8 bus, devfn;
64ae892b 2121
e1f167f3 2122 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2123 if (!iommu)
2124 return -ENODEV;
ba395927 2125
ca6e322d 2126 table = intel_pasid_get_table(dev);
55752949 2127 return domain_context_mapping_one(domain, iommu, table, bus, devfn);
579305f7
AW
2128}
2129
2130static int domain_context_mapped_cb(struct pci_dev *pdev,
2131 u16 alias, void *opaque)
2132{
2133 struct intel_iommu *iommu = opaque;
2134
2135 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2136}
2137
e1f167f3 2138static int domain_context_mapped(struct device *dev)
ba395927 2139{
5331fe6f 2140 struct intel_iommu *iommu;
156baca8 2141 u8 bus, devfn;
5331fe6f 2142
e1f167f3 2143 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2144 if (!iommu)
2145 return -ENODEV;
ba395927 2146
579305f7
AW
2147 if (!dev_is_pci(dev))
2148 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2149
579305f7
AW
2150 return !pci_for_each_dma_alias(to_pci_dev(dev),
2151 domain_context_mapped_cb, iommu);
ba395927
KA
2152}
2153
f532959b
FY
2154/* Returns a number of VTD pages, but aligned to MM page size */
2155static inline unsigned long aligned_nrpages(unsigned long host_addr,
2156 size_t size)
2157{
2158 host_addr &= ~PAGE_MASK;
2159 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2160}
2161
6dd9a7c7
YS
2162/* Return largest possible superpage level for a given mapping */
2163static inline int hardware_largepage_caps(struct dmar_domain *domain,
2164 unsigned long iov_pfn,
2165 unsigned long phy_pfn,
2166 unsigned long pages)
2167{
2168 int support, level = 1;
2169 unsigned long pfnmerge;
2170
2171 support = domain->iommu_superpage;
2172
2173 /* To use a large page, the virtual *and* physical addresses
2174 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2175 of them will mean we have to use smaller pages. So just
2176 merge them and check both at once. */
2177 pfnmerge = iov_pfn | phy_pfn;
2178
2179 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2180 pages >>= VTD_STRIDE_SHIFT;
2181 if (!pages)
2182 break;
2183 pfnmerge >>= VTD_STRIDE_SHIFT;
2184 level++;
2185 support--;
2186 }
2187 return level;
2188}
2189
9051aa02
DW
2190static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2191 struct scatterlist *sg, unsigned long phys_pfn,
2192 unsigned long nr_pages, int prot)
e1605495
DW
2193{
2194 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2195 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2196 unsigned long sg_res = 0;
6dd9a7c7
YS
2197 unsigned int largepage_lvl = 0;
2198 unsigned long lvl_pages = 0;
e1605495 2199
162d1b10 2200 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2201
2202 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2203 return -EINVAL;
2204
2205 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2206
cc4f14aa
JL
2207 if (!sg) {
2208 sg_res = nr_pages;
9051aa02
DW
2209 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2210 }
2211
6dd9a7c7 2212 while (nr_pages > 0) {
c85994e4
DW
2213 uint64_t tmp;
2214
e1605495 2215 if (!sg_res) {
29a90b70
RM
2216 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2217
f532959b 2218 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2219 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2220 sg->dma_length = sg->length;
29a90b70 2221 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2222 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2223 }
6dd9a7c7 2224
e1605495 2225 if (!pte) {
6dd9a7c7
YS
2226 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2227
5cf0a76f 2228 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2229 if (!pte)
2230 return -ENOMEM;
6dd9a7c7 2231 /* It is large page*/
6491d4d0 2232 if (largepage_lvl > 1) {
ba2374fd
CZ
2233 unsigned long nr_superpages, end_pfn;
2234
6dd9a7c7 2235 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2236 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2237
2238 nr_superpages = sg_res / lvl_pages;
2239 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2240
d41a4adb
JL
2241 /*
2242 * Ensure that old small page tables are
ba2374fd 2243 * removed to make room for superpage(s).
bc24c571
DD
2244 * We're adding new large pages, so make sure
2245 * we don't remove their parent tables.
d41a4adb 2246 */
bc24c571
DD
2247 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2248 largepage_lvl + 1);
6491d4d0 2249 } else {
6dd9a7c7 2250 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2251 }
6dd9a7c7 2252
e1605495
DW
2253 }
2254 /* We don't need lock here, nobody else
2255 * touches the iova range
2256 */
7766a3fb 2257 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2258 if (tmp) {
1bf20f0d 2259 static int dumps = 5;
9f10e5bf
JR
2260 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2261 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2262 if (dumps) {
2263 dumps--;
2264 debug_dma_dump_mappings(NULL);
2265 }
2266 WARN_ON(1);
2267 }
6dd9a7c7
YS
2268
2269 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2270
2271 BUG_ON(nr_pages < lvl_pages);
2272 BUG_ON(sg_res < lvl_pages);
2273
2274 nr_pages -= lvl_pages;
2275 iov_pfn += lvl_pages;
2276 phys_pfn += lvl_pages;
2277 pteval += lvl_pages * VTD_PAGE_SIZE;
2278 sg_res -= lvl_pages;
2279
2280 /* If the next PTE would be the first in a new page, then we
2281 need to flush the cache on the entries we've just written.
2282 And then we'll need to recalculate 'pte', so clear it and
2283 let it get set again in the if (!pte) block above.
2284
2285 If we're done (!nr_pages) we need to flush the cache too.
2286
2287 Also if we've been setting superpages, we may need to
2288 recalculate 'pte' and switch back to smaller pages for the
2289 end of the mapping, if the trailing size is not enough to
2290 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2291 pte++;
6dd9a7c7
YS
2292 if (!nr_pages || first_pte_in_page(pte) ||
2293 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2294 domain_flush_cache(domain, first_pte,
2295 (void *)pte - (void *)first_pte);
2296 pte = NULL;
2297 }
6dd9a7c7
YS
2298
2299 if (!sg_res && nr_pages)
e1605495
DW
2300 sg = sg_next(sg);
2301 }
2302 return 0;
2303}
2304
87684fd9 2305static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2306 struct scatterlist *sg, unsigned long phys_pfn,
2307 unsigned long nr_pages, int prot)
2308{
fa954e68 2309 int iommu_id, ret;
095303e0
LB
2310 struct intel_iommu *iommu;
2311
2312 /* Do the real mapping first */
2313 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2314 if (ret)
2315 return ret;
2316
fa954e68
LB
2317 for_each_domain_iommu(iommu_id, domain) {
2318 iommu = g_iommus[iommu_id];
095303e0
LB
2319 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2320 }
2321
2322 return 0;
87684fd9
PX
2323}
2324
9051aa02
DW
2325static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2326 struct scatterlist *sg, unsigned long nr_pages,
2327 int prot)
ba395927 2328{
87684fd9 2329 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2330}
6f6a00e4 2331
9051aa02
DW
2332static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2333 unsigned long phys_pfn, unsigned long nr_pages,
2334 int prot)
2335{
87684fd9 2336 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2337}
2338
2452d9db 2339static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2340{
5082219b
FS
2341 unsigned long flags;
2342 struct context_entry *context;
2343 u16 did_old;
2344
c7151a8d
WH
2345 if (!iommu)
2346 return;
8c11e798 2347
5082219b
FS
2348 spin_lock_irqsave(&iommu->lock, flags);
2349 context = iommu_context_addr(iommu, bus, devfn, 0);
2350 if (!context) {
2351 spin_unlock_irqrestore(&iommu->lock, flags);
2352 return;
2353 }
2354 did_old = context_domain_id(context);
2355 context_clear_entry(context);
2356 __iommu_flush_cache(iommu, context, sizeof(*context));
2357 spin_unlock_irqrestore(&iommu->lock, flags);
2358 iommu->flush.flush_context(iommu,
2359 did_old,
2360 (((u16)bus) << 8) | devfn,
2361 DMA_CCMD_MASK_NOBIT,
2362 DMA_CCMD_DEVICE_INVL);
2363 iommu->flush.flush_iotlb(iommu,
2364 did_old,
2365 0,
2366 0,
2367 DMA_TLB_DSI_FLUSH);
ba395927
KA
2368}
2369
109b9b04
DW
2370static inline void unlink_domain_info(struct device_domain_info *info)
2371{
2372 assert_spin_locked(&device_domain_lock);
2373 list_del(&info->link);
2374 list_del(&info->global);
2375 if (info->dev)
0bcb3e28 2376 info->dev->archdata.iommu = NULL;
109b9b04
DW
2377}
2378
ba395927
KA
2379static void domain_remove_dev_info(struct dmar_domain *domain)
2380{
3a74ca01 2381 struct device_domain_info *info, *tmp;
fb170fb4 2382 unsigned long flags;
ba395927
KA
2383
2384 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2385 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2386 __dmar_remove_one_dev_info(info);
ba395927
KA
2387 spin_unlock_irqrestore(&device_domain_lock, flags);
2388}
2389
2390/*
2391 * find_domain
1525a29a 2392 * Note: we use struct device->archdata.iommu stores the info
ba395927 2393 */
1525a29a 2394static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2395{
2396 struct device_domain_info *info;
2397
8af46c78
LB
2398 if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2399 struct iommu_domain *domain;
2400
2401 dev->archdata.iommu = NULL;
2402 domain = iommu_get_domain_for_dev(dev);
2403 if (domain)
2404 intel_iommu_attach_device(domain, dev);
2405 }
2406
ba395927 2407 /* No lock here, assumes no domain exit in normal case */
1525a29a 2408 info = dev->archdata.iommu;
8af46c78 2409
b316d02a 2410 if (likely(info))
ba395927
KA
2411 return info->domain;
2412 return NULL;
2413}
2414
5a8f40e8 2415static inline struct device_domain_info *
745f2586
JL
2416dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2417{
2418 struct device_domain_info *info;
2419
2420 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2421 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2422 info->devfn == devfn)
5a8f40e8 2423 return info;
745f2586
JL
2424
2425 return NULL;
2426}
2427
5db31569
JR
2428static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2429 int bus, int devfn,
2430 struct device *dev,
2431 struct dmar_domain *domain)
745f2586 2432{
5a8f40e8 2433 struct dmar_domain *found = NULL;
745f2586
JL
2434 struct device_domain_info *info;
2435 unsigned long flags;
d160aca5 2436 int ret;
745f2586
JL
2437
2438 info = alloc_devinfo_mem();
2439 if (!info)
b718cd3d 2440 return NULL;
745f2586 2441
745f2586
JL
2442 info->bus = bus;
2443 info->devfn = devfn;
b16d0cb9
DW
2444 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2445 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2446 info->ats_qdep = 0;
745f2586
JL
2447 info->dev = dev;
2448 info->domain = domain;
5a8f40e8 2449 info->iommu = iommu;
cc580e41 2450 info->pasid_table = NULL;
95587a75 2451 info->auxd_enabled = 0;
67b8e02b 2452 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2453
b16d0cb9
DW
2454 if (dev && dev_is_pci(dev)) {
2455 struct pci_dev *pdev = to_pci_dev(info->dev);
2456
d8b85910
LB
2457 if (!pdev->untrusted &&
2458 !pci_ats_disabled() &&
cef74409 2459 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2460 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2461 dmar_find_matched_atsr_unit(pdev))
2462 info->ats_supported = 1;
2463
765b6a98
LB
2464 if (sm_supported(iommu)) {
2465 if (pasid_supported(iommu)) {
b16d0cb9
DW
2466 int features = pci_pasid_features(pdev);
2467 if (features >= 0)
2468 info->pasid_supported = features | 1;
2469 }
2470
2471 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2472 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2473 info->pri_supported = 1;
2474 }
2475 }
2476
745f2586
JL
2477 spin_lock_irqsave(&device_domain_lock, flags);
2478 if (dev)
0bcb3e28 2479 found = find_domain(dev);
f303e507
JR
2480
2481 if (!found) {
5a8f40e8 2482 struct device_domain_info *info2;
41e80dca 2483 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2484 if (info2) {
2485 found = info2->domain;
2486 info2->dev = dev;
2487 }
5a8f40e8 2488 }
f303e507 2489
745f2586
JL
2490 if (found) {
2491 spin_unlock_irqrestore(&device_domain_lock, flags);
2492 free_devinfo_mem(info);
b718cd3d
DW
2493 /* Caller must free the original domain */
2494 return found;
745f2586
JL
2495 }
2496
d160aca5
JR
2497 spin_lock(&iommu->lock);
2498 ret = domain_attach_iommu(domain, iommu);
2499 spin_unlock(&iommu->lock);
2500
2501 if (ret) {
c6c2cebd 2502 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2503 free_devinfo_mem(info);
c6c2cebd
JR
2504 return NULL;
2505 }
c6c2cebd 2506
b718cd3d
DW
2507 list_add(&info->link, &domain->devices);
2508 list_add(&info->global, &device_domain_list);
2509 if (dev)
2510 dev->archdata.iommu = info;
0bbeb01a 2511 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2512
0bbeb01a
LB
2513 /* PASID table is mandatory for a PCI device in scalable mode. */
2514 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2515 ret = intel_pasid_alloc_table(dev);
2516 if (ret) {
932a6523 2517 dev_err(dev, "PASID table allocation failed\n");
71753239 2518 dmar_remove_one_dev_info(dev);
0bbeb01a 2519 return NULL;
a7fc93fe 2520 }
ef848b7e
LB
2521
2522 /* Setup the PASID entry for requests without PASID: */
2523 spin_lock(&iommu->lock);
2524 if (hw_pass_through && domain_type_is_si(domain))
2525 ret = intel_pasid_setup_pass_through(iommu, domain,
2526 dev, PASID_RID2PASID);
2527 else
2528 ret = intel_pasid_setup_second_level(iommu, domain,
2529 dev, PASID_RID2PASID);
2530 spin_unlock(&iommu->lock);
2531 if (ret) {
932a6523 2532 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2533 dmar_remove_one_dev_info(dev);
ef848b7e 2534 return NULL;
a7fc93fe
LB
2535 }
2536 }
b718cd3d 2537
cc4e2575 2538 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2539 dev_err(dev, "Domain context map failed\n");
71753239 2540 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2541 return NULL;
2542 }
2543
b718cd3d 2544 return domain;
745f2586
JL
2545}
2546
579305f7
AW
2547static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2548{
2549 *(u16 *)opaque = alias;
2550 return 0;
2551}
2552
76208356 2553static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2554{
e083ea5b 2555 struct device_domain_info *info;
76208356 2556 struct dmar_domain *domain = NULL;
579305f7 2557 struct intel_iommu *iommu;
fcc35c63 2558 u16 dma_alias;
ba395927 2559 unsigned long flags;
aa4d066a 2560 u8 bus, devfn;
ba395927 2561
579305f7
AW
2562 iommu = device_to_iommu(dev, &bus, &devfn);
2563 if (!iommu)
2564 return NULL;
2565
146922ec
DW
2566 if (dev_is_pci(dev)) {
2567 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2568
579305f7
AW
2569 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2570
2571 spin_lock_irqsave(&device_domain_lock, flags);
2572 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2573 PCI_BUS_NUM(dma_alias),
2574 dma_alias & 0xff);
2575 if (info) {
2576 iommu = info->iommu;
2577 domain = info->domain;
5a8f40e8 2578 }
579305f7 2579 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2580
76208356 2581 /* DMA alias already has a domain, use it */
579305f7 2582 if (info)
76208356 2583 goto out;
579305f7 2584 }
ba395927 2585
146922ec 2586 /* Allocate and initialize new domain for the device */
ab8dfe25 2587 domain = alloc_domain(0);
745f2586 2588 if (!domain)
579305f7 2589 return NULL;
301e7ee1 2590 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2591 domain_exit(domain);
2592 return NULL;
2c2e2c38 2593 }
ba395927 2594
76208356 2595out:
76208356
JR
2596 return domain;
2597}
579305f7 2598
76208356
JR
2599static struct dmar_domain *set_domain_for_dev(struct device *dev,
2600 struct dmar_domain *domain)
2601{
2602 struct intel_iommu *iommu;
2603 struct dmar_domain *tmp;
2604 u16 req_id, dma_alias;
2605 u8 bus, devfn;
2606
2607 iommu = device_to_iommu(dev, &bus, &devfn);
2608 if (!iommu)
2609 return NULL;
2610
2611 req_id = ((u16)bus << 8) | devfn;
2612
2613 if (dev_is_pci(dev)) {
2614 struct pci_dev *pdev = to_pci_dev(dev);
2615
2616 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2617
2618 /* register PCI DMA alias device */
2619 if (req_id != dma_alias) {
2620 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2621 dma_alias & 0xff, NULL, domain);
2622
2623 if (!tmp || tmp != domain)
2624 return tmp;
2625 }
ba395927
KA
2626 }
2627
5db31569 2628 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2629 if (!tmp || tmp != domain)
2630 return tmp;
2631
2632 return domain;
2633}
579305f7 2634
b213203e
DW
2635static int iommu_domain_identity_map(struct dmar_domain *domain,
2636 unsigned long long start,
2637 unsigned long long end)
ba395927 2638{
c5395d5c
DW
2639 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2640 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2641
2642 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2643 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2644 pr_err("Reserving iova failed\n");
b213203e 2645 return -ENOMEM;
ba395927
KA
2646 }
2647
af1089ce 2648 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2649 /*
2650 * RMRR range might have overlap with physical memory range,
2651 * clear it first
2652 */
c5395d5c 2653 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2654
87684fd9
PX
2655 return __domain_mapping(domain, first_vpfn, NULL,
2656 first_vpfn, last_vpfn - first_vpfn + 1,
2657 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2658}
2659
d66ce54b
JR
2660static int domain_prepare_identity_map(struct device *dev,
2661 struct dmar_domain *domain,
2662 unsigned long long start,
2663 unsigned long long end)
b213203e 2664{
19943b0e
DW
2665 /* For _hardware_ passthrough, don't bother. But for software
2666 passthrough, we do it anyway -- it may indicate a memory
2667 range which is reserved in E820, so which didn't get set
2668 up to start with in si_domain */
2669 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2670 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2671 start, end);
19943b0e
DW
2672 return 0;
2673 }
2674
932a6523 2675 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2676
5595b528
DW
2677 if (end < start) {
2678 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2679 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2680 dmi_get_system_info(DMI_BIOS_VENDOR),
2681 dmi_get_system_info(DMI_BIOS_VERSION),
2682 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2683 return -EIO;
5595b528
DW
2684 }
2685
2ff729f5
DW
2686 if (end >> agaw_to_width(domain->agaw)) {
2687 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2688 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2689 agaw_to_width(domain->agaw),
2690 dmi_get_system_info(DMI_BIOS_VENDOR),
2691 dmi_get_system_info(DMI_BIOS_VERSION),
2692 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2693 return -EIO;
2ff729f5 2694 }
19943b0e 2695
d66ce54b
JR
2696 return iommu_domain_identity_map(domain, start, end);
2697}
ba395927 2698
301e7ee1
JR
2699static int md_domain_init(struct dmar_domain *domain, int guest_width);
2700
071e1374 2701static int __init si_domain_init(int hw)
2c2e2c38 2702{
4de354ec
LB
2703 struct dmar_rmrr_unit *rmrr;
2704 struct device *dev;
2705 int i, nid, ret;
2c2e2c38 2706
ab8dfe25 2707 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2708 if (!si_domain)
2709 return -EFAULT;
2710
301e7ee1 2711 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2c2e2c38
FY
2712 domain_exit(si_domain);
2713 return -EFAULT;
2714 }
2715
19943b0e
DW
2716 if (hw)
2717 return 0;
2718
c7ab48d2 2719 for_each_online_node(nid) {
5dfe8660
TH
2720 unsigned long start_pfn, end_pfn;
2721 int i;
2722
2723 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2724 ret = iommu_domain_identity_map(si_domain,
2725 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2726 if (ret)
2727 return ret;
2728 }
c7ab48d2
DW
2729 }
2730
4de354ec
LB
2731 /*
2732 * Normally we use DMA domains for devices which have RMRRs. But we
2733 * loose this requirement for graphic and usb devices. Identity map
2734 * the RMRRs for graphic and USB devices so that they could use the
2735 * si_domain.
2736 */
2737 for_each_rmrr_units(rmrr) {
2738 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2739 i, dev) {
2740 unsigned long long start = rmrr->base_address;
2741 unsigned long long end = rmrr->end_address;
2742
2743 if (device_is_rmrr_locked(dev))
2744 continue;
2745
2746 if (WARN_ON(end < start ||
2747 end >> agaw_to_width(si_domain->agaw)))
2748 continue;
2749
2750 ret = iommu_domain_identity_map(si_domain, start, end);
2751 if (ret)
2752 return ret;
2753 }
2754 }
2755
2c2e2c38
FY
2756 return 0;
2757}
2758
9b226624 2759static int identity_mapping(struct device *dev)
2c2e2c38
FY
2760{
2761 struct device_domain_info *info;
2762
9b226624 2763 info = dev->archdata.iommu;
cb452a40
MT
2764 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2765 return (info->domain == si_domain);
2c2e2c38 2766
2c2e2c38
FY
2767 return 0;
2768}
2769
28ccce0d 2770static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2771{
0ac72664 2772 struct dmar_domain *ndomain;
5a8f40e8 2773 struct intel_iommu *iommu;
156baca8 2774 u8 bus, devfn;
2c2e2c38 2775
5913c9bf 2776 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2777 if (!iommu)
2778 return -ENODEV;
2779
5db31569 2780 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2781 if (ndomain != domain)
2782 return -EBUSY;
2c2e2c38
FY
2783
2784 return 0;
2785}
2786
0b9d9753 2787static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2788{
2789 struct dmar_rmrr_unit *rmrr;
832bd858 2790 struct device *tmp;
ea2447f7
TM
2791 int i;
2792
0e242612 2793 rcu_read_lock();
ea2447f7 2794 for_each_rmrr_units(rmrr) {
b683b230
JL
2795 /*
2796 * Return TRUE if this RMRR contains the device that
2797 * is passed in.
2798 */
2799 for_each_active_dev_scope(rmrr->devices,
2800 rmrr->devices_cnt, i, tmp)
e143fd45
EA
2801 if (tmp == dev ||
2802 is_downstream_to_pci_bridge(dev, tmp)) {
0e242612 2803 rcu_read_unlock();
ea2447f7 2804 return true;
b683b230 2805 }
ea2447f7 2806 }
0e242612 2807 rcu_read_unlock();
ea2447f7
TM
2808 return false;
2809}
2810
1c5c59fb
EA
2811/**
2812 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2813 * is relaxable (ie. is allowed to be not enforced under some conditions)
2814 * @dev: device handle
2815 *
2816 * We assume that PCI USB devices with RMRRs have them largely
2817 * for historical reasons and that the RMRR space is not actively used post
2818 * boot. This exclusion may change if vendors begin to abuse it.
2819 *
2820 * The same exception is made for graphics devices, with the requirement that
2821 * any use of the RMRR regions will be torn down before assigning the device
2822 * to a guest.
2823 *
2824 * Return: true if the RMRR is relaxable, false otherwise
2825 */
2826static bool device_rmrr_is_relaxable(struct device *dev)
2827{
2828 struct pci_dev *pdev;
2829
2830 if (!dev_is_pci(dev))
2831 return false;
2832
2833 pdev = to_pci_dev(dev);
2834 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2835 return true;
2836 else
2837 return false;
2838}
2839
c875d2c1
AW
2840/*
2841 * There are a couple cases where we need to restrict the functionality of
2842 * devices associated with RMRRs. The first is when evaluating a device for
2843 * identity mapping because problems exist when devices are moved in and out
2844 * of domains and their respective RMRR information is lost. This means that
2845 * a device with associated RMRRs will never be in a "passthrough" domain.
2846 * The second is use of the device through the IOMMU API. This interface
2847 * expects to have full control of the IOVA space for the device. We cannot
2848 * satisfy both the requirement that RMRR access is maintained and have an
2849 * unencumbered IOVA space. We also have no ability to quiesce the device's
2850 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2851 * We therefore prevent devices associated with an RMRR from participating in
2852 * the IOMMU API, which eliminates them from device assignment.
2853 *
1c5c59fb
EA
2854 * In both cases, devices which have relaxable RMRRs are not concerned by this
2855 * restriction. See device_rmrr_is_relaxable comment.
c875d2c1
AW
2856 */
2857static bool device_is_rmrr_locked(struct device *dev)
2858{
2859 if (!device_has_rmrr(dev))
2860 return false;
2861
1c5c59fb
EA
2862 if (device_rmrr_is_relaxable(dev))
2863 return false;
c875d2c1
AW
2864
2865 return true;
2866}
2867
f273a453
LB
2868/*
2869 * Return the required default domain type for a specific device.
2870 *
2871 * @dev: the device in query
2872 * @startup: true if this is during early boot
2873 *
2874 * Returns:
2875 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2876 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2877 * - 0: both identity and dynamic domains work for this device
2878 */
0e31a726 2879static int device_def_domain_type(struct device *dev)
6941af28 2880{
3bdb2591
DW
2881 if (dev_is_pci(dev)) {
2882 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2883
c875d2c1 2884 if (device_is_rmrr_locked(dev))
f273a453 2885 return IOMMU_DOMAIN_DMA;
e0fc7e0b 2886
89a6079d
LB
2887 /*
2888 * Prevent any device marked as untrusted from getting
2889 * placed into the statically identity mapping domain.
2890 */
2891 if (pdev->untrusted)
f273a453 2892 return IOMMU_DOMAIN_DMA;
89a6079d 2893
3bdb2591 2894 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 2895 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 2896
3bdb2591 2897 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 2898 return IOMMU_DOMAIN_IDENTITY;
3bdb2591
DW
2899
2900 /*
2901 * We want to start off with all devices in the 1:1 domain, and
2902 * take them out later if we find they can't access all of memory.
2903 *
2904 * However, we can't do this for PCI devices behind bridges,
2905 * because all PCI devices behind the same bridge will end up
2906 * with the same source-id on their transactions.
2907 *
2908 * Practically speaking, we can't change things around for these
2909 * devices at run-time, because we can't be sure there'll be no
2910 * DMA transactions in flight for any of their siblings.
2911 *
2912 * So PCI devices (unless they're on the root bus) as well as
2913 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2914 * the 1:1 domain, just in _case_ one of their siblings turns out
2915 * not to be able to map all of memory.
2916 */
2917 if (!pci_is_pcie(pdev)) {
2918 if (!pci_is_root_bus(pdev->bus))
f273a453 2919 return IOMMU_DOMAIN_DMA;
3bdb2591 2920 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
f273a453 2921 return IOMMU_DOMAIN_DMA;
3bdb2591 2922 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
f273a453 2923 return IOMMU_DOMAIN_DMA;
3bdb2591
DW
2924 } else {
2925 if (device_has_rmrr(dev))
f273a453 2926 return IOMMU_DOMAIN_DMA;
3bdb2591 2927 }
3dfc813d 2928
f273a453
LB
2929 return (iommu_identity_mapping & IDENTMAP_ALL) ?
2930 IOMMU_DOMAIN_IDENTITY : 0;
2931}
2932
ffebeb46
JL
2933static void intel_iommu_init_qi(struct intel_iommu *iommu)
2934{
2935 /*
2936 * Start from the sane iommu hardware state.
2937 * If the queued invalidation is already initialized by us
2938 * (for example, while enabling interrupt-remapping) then
2939 * we got the things already rolling from a sane state.
2940 */
2941 if (!iommu->qi) {
2942 /*
2943 * Clear any previous faults.
2944 */
2945 dmar_fault(-1, iommu);
2946 /*
2947 * Disable queued invalidation if supported and already enabled
2948 * before OS handover.
2949 */
2950 dmar_disable_qi(iommu);
2951 }
2952
2953 if (dmar_enable_qi(iommu)) {
2954 /*
2955 * Queued Invalidate not enabled, use Register Based Invalidate
2956 */
2957 iommu->flush.flush_context = __iommu_flush_context;
2958 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2959 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2960 iommu->name);
2961 } else {
2962 iommu->flush.flush_context = qi_flush_context;
2963 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2964 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2965 }
2966}
2967
091d42e4 2968static int copy_context_table(struct intel_iommu *iommu,
dfddb969 2969 struct root_entry *old_re,
091d42e4
JR
2970 struct context_entry **tbl,
2971 int bus, bool ext)
2972{
dbcd861f 2973 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 2974 struct context_entry *new_ce = NULL, ce;
dfddb969 2975 struct context_entry *old_ce = NULL;
543c8dcf 2976 struct root_entry re;
091d42e4
JR
2977 phys_addr_t old_ce_phys;
2978
2979 tbl_idx = ext ? bus * 2 : bus;
dfddb969 2980 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
2981
2982 for (devfn = 0; devfn < 256; devfn++) {
2983 /* First calculate the correct index */
2984 idx = (ext ? devfn * 2 : devfn) % 256;
2985
2986 if (idx == 0) {
2987 /* First save what we may have and clean up */
2988 if (new_ce) {
2989 tbl[tbl_idx] = new_ce;
2990 __iommu_flush_cache(iommu, new_ce,
2991 VTD_PAGE_SIZE);
2992 pos = 1;
2993 }
2994
2995 if (old_ce)
829383e1 2996 memunmap(old_ce);
091d42e4
JR
2997
2998 ret = 0;
2999 if (devfn < 0x80)
543c8dcf 3000 old_ce_phys = root_entry_lctp(&re);
091d42e4 3001 else
543c8dcf 3002 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3003
3004 if (!old_ce_phys) {
3005 if (ext && devfn == 0) {
3006 /* No LCTP, try UCTP */
3007 devfn = 0x7f;
3008 continue;
3009 } else {
3010 goto out;
3011 }
3012 }
3013
3014 ret = -ENOMEM;
dfddb969
DW
3015 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3016 MEMREMAP_WB);
091d42e4
JR
3017 if (!old_ce)
3018 goto out;
3019
3020 new_ce = alloc_pgtable_page(iommu->node);
3021 if (!new_ce)
3022 goto out_unmap;
3023
3024 ret = 0;
3025 }
3026
3027 /* Now copy the context entry */
dfddb969 3028 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3029
cf484d0e 3030 if (!__context_present(&ce))
091d42e4
JR
3031 continue;
3032
dbcd861f
JR
3033 did = context_domain_id(&ce);
3034 if (did >= 0 && did < cap_ndoms(iommu->cap))
3035 set_bit(did, iommu->domain_ids);
3036
cf484d0e
JR
3037 /*
3038 * We need a marker for copied context entries. This
3039 * marker needs to work for the old format as well as
3040 * for extended context entries.
3041 *
3042 * Bit 67 of the context entry is used. In the old
3043 * format this bit is available to software, in the
3044 * extended format it is the PGE bit, but PGE is ignored
3045 * by HW if PASIDs are disabled (and thus still
3046 * available).
3047 *
3048 * So disable PASIDs first and then mark the entry
3049 * copied. This means that we don't copy PASID
3050 * translations from the old kernel, but this is fine as
3051 * faults there are not fatal.
3052 */
3053 context_clear_pasid_enable(&ce);
3054 context_set_copied(&ce);
3055
091d42e4
JR
3056 new_ce[idx] = ce;
3057 }
3058
3059 tbl[tbl_idx + pos] = new_ce;
3060
3061 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3062
3063out_unmap:
dfddb969 3064 memunmap(old_ce);
091d42e4
JR
3065
3066out:
3067 return ret;
3068}
3069
3070static int copy_translation_tables(struct intel_iommu *iommu)
3071{
3072 struct context_entry **ctxt_tbls;
dfddb969 3073 struct root_entry *old_rt;
091d42e4
JR
3074 phys_addr_t old_rt_phys;
3075 int ctxt_table_entries;
3076 unsigned long flags;
3077 u64 rtaddr_reg;
3078 int bus, ret;
c3361f2f 3079 bool new_ext, ext;
091d42e4
JR
3080
3081 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3082 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3083 new_ext = !!ecap_ecs(iommu->ecap);
3084
3085 /*
3086 * The RTT bit can only be changed when translation is disabled,
3087 * but disabling translation means to open a window for data
3088 * corruption. So bail out and don't copy anything if we would
3089 * have to change the bit.
3090 */
3091 if (new_ext != ext)
3092 return -EINVAL;
091d42e4
JR
3093
3094 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3095 if (!old_rt_phys)
3096 return -EINVAL;
3097
dfddb969 3098 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3099 if (!old_rt)
3100 return -ENOMEM;
3101
3102 /* This is too big for the stack - allocate it from slab */
3103 ctxt_table_entries = ext ? 512 : 256;
3104 ret = -ENOMEM;
6396bb22 3105 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3106 if (!ctxt_tbls)
3107 goto out_unmap;
3108
3109 for (bus = 0; bus < 256; bus++) {
3110 ret = copy_context_table(iommu, &old_rt[bus],
3111 ctxt_tbls, bus, ext);
3112 if (ret) {
3113 pr_err("%s: Failed to copy context table for bus %d\n",
3114 iommu->name, bus);
3115 continue;
3116 }
3117 }
3118
3119 spin_lock_irqsave(&iommu->lock, flags);
3120
3121 /* Context tables are copied, now write them to the root_entry table */
3122 for (bus = 0; bus < 256; bus++) {
3123 int idx = ext ? bus * 2 : bus;
3124 u64 val;
3125
3126 if (ctxt_tbls[idx]) {
3127 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3128 iommu->root_entry[bus].lo = val;
3129 }
3130
3131 if (!ext || !ctxt_tbls[idx + 1])
3132 continue;
3133
3134 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3135 iommu->root_entry[bus].hi = val;
3136 }
3137
3138 spin_unlock_irqrestore(&iommu->lock, flags);
3139
3140 kfree(ctxt_tbls);
3141
3142 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3143
3144 ret = 0;
3145
3146out_unmap:
dfddb969 3147 memunmap(old_rt);
091d42e4
JR
3148
3149 return ret;
3150}
3151
b779260b 3152static int __init init_dmars(void)
ba395927
KA
3153{
3154 struct dmar_drhd_unit *drhd;
ba395927 3155 struct intel_iommu *iommu;
df4f3c60 3156 int ret;
2c2e2c38 3157
ba395927
KA
3158 /*
3159 * for each drhd
3160 * allocate root
3161 * initialize and program root entry to not present
3162 * endfor
3163 */
3164 for_each_drhd_unit(drhd) {
5e0d2a6f 3165 /*
3166 * lock not needed as this is only incremented in the single
3167 * threaded kernel __init code path all other access are read
3168 * only
3169 */
78d8e704 3170 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3171 g_num_of_iommus++;
3172 continue;
3173 }
9f10e5bf 3174 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3175 }
3176
ffebeb46
JL
3177 /* Preallocate enough resources for IOMMU hot-addition */
3178 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3179 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3180
d9630fe9
WH
3181 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3182 GFP_KERNEL);
3183 if (!g_iommus) {
9f10e5bf 3184 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3185 ret = -ENOMEM;
3186 goto error;
3187 }
3188
6a8c6748
LB
3189 for_each_iommu(iommu, drhd) {
3190 if (drhd->ignored) {
3191 iommu_disable_translation(iommu);
3192 continue;
3193 }
3194
56283174
LB
3195 /*
3196 * Find the max pasid size of all IOMMU's in the system.
3197 * We need to ensure the system pasid table is no bigger
3198 * than the smallest supported.
3199 */
765b6a98 3200 if (pasid_supported(iommu)) {
56283174
LB
3201 u32 temp = 2 << ecap_pss(iommu->ecap);
3202
3203 intel_pasid_max_id = min_t(u32, temp,
3204 intel_pasid_max_id);
3205 }
3206
d9630fe9 3207 g_iommus[iommu->seq_id] = iommu;
ba395927 3208
b63d80d1
JR
3209 intel_iommu_init_qi(iommu);
3210
e61d98d8
SS
3211 ret = iommu_init_domains(iommu);
3212 if (ret)
989d51fc 3213 goto free_iommu;
e61d98d8 3214
4158c2ec
JR
3215 init_translation_status(iommu);
3216
091d42e4
JR
3217 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3218 iommu_disable_translation(iommu);
3219 clear_translation_pre_enabled(iommu);
3220 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3221 iommu->name);
3222 }
4158c2ec 3223
ba395927
KA
3224 /*
3225 * TBD:
3226 * we could share the same root & context tables
25985edc 3227 * among all IOMMU's. Need to Split it later.
ba395927
KA
3228 */
3229 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3230 if (ret)
989d51fc 3231 goto free_iommu;
5f0a7f76 3232
091d42e4
JR
3233 if (translation_pre_enabled(iommu)) {
3234 pr_info("Translation already enabled - trying to copy translation structures\n");
3235
3236 ret = copy_translation_tables(iommu);
3237 if (ret) {
3238 /*
3239 * We found the IOMMU with translation
3240 * enabled - but failed to copy over the
3241 * old root-entry table. Try to proceed
3242 * by disabling translation now and
3243 * allocating a clean root-entry table.
3244 * This might cause DMAR faults, but
3245 * probably the dump will still succeed.
3246 */
3247 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3248 iommu->name);
3249 iommu_disable_translation(iommu);
3250 clear_translation_pre_enabled(iommu);
3251 } else {
3252 pr_info("Copied translation tables from previous kernel for %s\n",
3253 iommu->name);
3254 }
3255 }
3256
4ed0d3e6 3257 if (!ecap_pass_through(iommu->ecap))
19943b0e 3258 hw_pass_through = 0;
8a94ade4 3259#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3260 if (pasid_supported(iommu))
d9737953 3261 intel_svm_init(iommu);
8a94ade4 3262#endif
ba395927
KA
3263 }
3264
a4c34ff1
JR
3265 /*
3266 * Now that qi is enabled on all iommus, set the root entry and flush
3267 * caches. This is required on some Intel X58 chipsets, otherwise the
3268 * flush_context function will loop forever and the boot hangs.
3269 */
3270 for_each_active_iommu(iommu, drhd) {
3271 iommu_flush_write_buffer(iommu);
3272 iommu_set_root_entry(iommu);
3273 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3274 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3275 }
3276
19943b0e 3277 if (iommu_pass_through)
e0fc7e0b
DW
3278 iommu_identity_mapping |= IDENTMAP_ALL;
3279
d3f13810 3280#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3281 dmar_map_gfx = 0;
19943b0e 3282#endif
e0fc7e0b 3283
5daab580
LB
3284 if (!dmar_map_gfx)
3285 iommu_identity_mapping |= IDENTMAP_GFX;
3286
21e722c4
AR
3287 check_tylersburg_isoch();
3288
4de354ec
LB
3289 ret = si_domain_init(hw_pass_through);
3290 if (ret)
3291 goto free_iommu;
86080ccc 3292
ba395927
KA
3293 /*
3294 * for each drhd
3295 * enable fault log
3296 * global invalidate context cache
3297 * global invalidate iotlb
3298 * enable translation
3299 */
7c919779 3300 for_each_iommu(iommu, drhd) {
51a63e67
JC
3301 if (drhd->ignored) {
3302 /*
3303 * we always have to disable PMRs or DMA may fail on
3304 * this device
3305 */
3306 if (force_on)
7c919779 3307 iommu_disable_protect_mem_regions(iommu);
ba395927 3308 continue;
51a63e67 3309 }
ba395927
KA
3310
3311 iommu_flush_write_buffer(iommu);
3312
a222a7f0 3313#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3314 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3315 /*
3316 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3317 * could cause possible lock race condition.
3318 */
3319 up_write(&dmar_global_lock);
a222a7f0 3320 ret = intel_svm_enable_prq(iommu);
a7755c3c 3321 down_write(&dmar_global_lock);
a222a7f0
DW
3322 if (ret)
3323 goto free_iommu;
3324 }
3325#endif
3460a6d9
KA
3326 ret = dmar_set_interrupt(iommu);
3327 if (ret)
989d51fc 3328 goto free_iommu;
ba395927
KA
3329 }
3330
3331 return 0;
989d51fc
JL
3332
3333free_iommu:
ffebeb46
JL
3334 for_each_active_iommu(iommu, drhd) {
3335 disable_dmar_iommu(iommu);
a868e6b7 3336 free_dmar_iommu(iommu);
ffebeb46 3337 }
13cf0174 3338
d9630fe9 3339 kfree(g_iommus);
13cf0174 3340
989d51fc 3341error:
ba395927
KA
3342 return ret;
3343}
3344
5a5e02a6 3345/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3346static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3347 struct dmar_domain *domain,
3348 unsigned long nrpages, uint64_t dma_mask)
ba395927 3349{
e083ea5b 3350 unsigned long iova_pfn;
ba395927 3351
875764de
DW
3352 /* Restrict dma_mask to the width that the iommu can handle */
3353 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3354 /* Ensure we reserve the whole size-aligned region */
3355 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3356
3357 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3358 /*
3359 * First try to allocate an io virtual address in
284901a9 3360 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3361 * from higher range
ba395927 3362 */
22e2f9fa 3363 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3364 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3365 if (iova_pfn)
3366 return iova_pfn;
875764de 3367 }
538d5b33
TN
3368 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3369 IOVA_PFN(dma_mask), true);
22e2f9fa 3370 if (unlikely(!iova_pfn)) {
932a6523 3371 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3372 return 0;
f76aec76
KA
3373 }
3374
22e2f9fa 3375 return iova_pfn;
f76aec76
KA
3376}
3377
4ec066c7 3378static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
f76aec76 3379{
1c5ebba9 3380 struct dmar_domain *domain, *tmp;
b1ce5b79 3381 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3382 struct device *i_dev;
3383 int i, ret;
f76aec76 3384
4ec066c7 3385 /* Device shouldn't be attached by any domains. */
1c5ebba9
JR
3386 domain = find_domain(dev);
3387 if (domain)
4ec066c7 3388 return NULL;
1c5ebba9
JR
3389
3390 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3391 if (!domain)
3392 goto out;
ba395927 3393
b1ce5b79
JR
3394 /* We have a new domain - setup possible RMRRs for the device */
3395 rcu_read_lock();
3396 for_each_rmrr_units(rmrr) {
3397 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3398 i, i_dev) {
3399 if (i_dev != dev)
3400 continue;
3401
3402 ret = domain_prepare_identity_map(dev, domain,
3403 rmrr->base_address,
3404 rmrr->end_address);
3405 if (ret)
3406 dev_err(dev, "Mapping reserved region failed\n");
3407 }
3408 }
3409 rcu_read_unlock();
3410
1c5ebba9
JR
3411 tmp = set_domain_for_dev(dev, domain);
3412 if (!tmp || domain != tmp) {
3413 domain_exit(domain);
3414 domain = tmp;
3415 }
3416
3417out:
1c5ebba9 3418 if (!domain)
932a6523 3419 dev_err(dev, "Allocating domain failed\n");
c57b260a
LB
3420 else
3421 domain->domain.type = IOMMU_DOMAIN_DMA;
1c5ebba9 3422
f76aec76
KA
3423 return domain;
3424}
3425
ecb509ec 3426/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3427static bool iommu_need_mapping(struct device *dev)
2c2e2c38 3428{
98b2fffb 3429 int ret;
2c2e2c38 3430
3d89194a 3431 if (iommu_dummy(dev))
48b2c937 3432 return false;
1e4c64c4 3433
98b2fffb
LB
3434 ret = identity_mapping(dev);
3435 if (ret) {
3436 u64 dma_mask = *dev->dma_mask;
3437
3438 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3439 dma_mask = dev->coherent_dma_mask;
3440
3441 if (dma_mask >= dma_get_required_mask(dev))
48b2c937
CH
3442 return false;
3443
3444 /*
3445 * 32 bit DMA is removed from si_domain and fall back to
3446 * non-identity mapping.
3447 */
3448 dmar_remove_one_dev_info(dev);
98b2fffb
LB
3449 ret = iommu_request_dma_domain_for_dev(dev);
3450 if (ret) {
3451 struct iommu_domain *domain;
3452 struct dmar_domain *dmar_domain;
3453
3454 domain = iommu_get_domain_for_dev(dev);
3455 if (domain) {
3456 dmar_domain = to_dmar_domain(domain);
3457 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3458 }
ae23bfb6 3459 dmar_remove_one_dev_info(dev);
4ec066c7 3460 get_private_domain_for_dev(dev);
2c2e2c38 3461 }
98b2fffb
LB
3462
3463 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3464 }
3465
48b2c937 3466 return true;
2c2e2c38
FY
3467}
3468
21d5d27c
LG
3469static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3470 size_t size, int dir, u64 dma_mask)
f76aec76 3471{
f76aec76 3472 struct dmar_domain *domain;
5b6985ce 3473 phys_addr_t start_paddr;
2aac6304 3474 unsigned long iova_pfn;
f76aec76 3475 int prot = 0;
6865f0d1 3476 int ret;
8c11e798 3477 struct intel_iommu *iommu;
33041ec0 3478 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3479
3480 BUG_ON(dir == DMA_NONE);
2c2e2c38 3481
4ec066c7 3482 domain = find_domain(dev);
f76aec76 3483 if (!domain)
524a669b 3484 return DMA_MAPPING_ERROR;
f76aec76 3485
8c11e798 3486 iommu = domain_get_iommu(domain);
88cb6a74 3487 size = aligned_nrpages(paddr, size);
f76aec76 3488
2aac6304
OP
3489 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3490 if (!iova_pfn)
f76aec76
KA
3491 goto error;
3492
ba395927
KA
3493 /*
3494 * Check if DMAR supports zero-length reads on write only
3495 * mappings..
3496 */
3497 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3498 !cap_zlr(iommu->cap))
ba395927
KA
3499 prot |= DMA_PTE_READ;
3500 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3501 prot |= DMA_PTE_WRITE;
3502 /*
6865f0d1 3503 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3504 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3505 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3506 * is not a big problem
3507 */
2aac6304 3508 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3509 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3510 if (ret)
3511 goto error;
3512
2aac6304 3513 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3514 start_paddr += paddr & ~PAGE_MASK;
3515 return start_paddr;
ba395927 3516
ba395927 3517error:
2aac6304 3518 if (iova_pfn)
22e2f9fa 3519 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3520 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3521 size, (unsigned long long)paddr, dir);
524a669b 3522 return DMA_MAPPING_ERROR;
ba395927
KA
3523}
3524
ffbbef5c
FT
3525static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3526 unsigned long offset, size_t size,
3527 enum dma_data_direction dir,
00085f1e 3528 unsigned long attrs)
bb9e6d65 3529{
9cc0c2af
CH
3530 if (iommu_need_mapping(dev))
3531 return __intel_map_single(dev, page_to_phys(page) + offset,
3532 size, dir, *dev->dma_mask);
3533 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3534}
3535
3536static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3537 size_t size, enum dma_data_direction dir,
3538 unsigned long attrs)
3539{
9cc0c2af
CH
3540 if (iommu_need_mapping(dev))
3541 return __intel_map_single(dev, phys_addr, size, dir,
3542 *dev->dma_mask);
3543 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3544}
3545
769530e4 3546static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3547{
f76aec76 3548 struct dmar_domain *domain;
d794dc9b 3549 unsigned long start_pfn, last_pfn;
769530e4 3550 unsigned long nrpages;
2aac6304 3551 unsigned long iova_pfn;
8c11e798 3552 struct intel_iommu *iommu;
ea8ea460 3553 struct page *freelist;
f7b0c4ce 3554 struct pci_dev *pdev = NULL;
ba395927 3555
1525a29a 3556 domain = find_domain(dev);
ba395927
KA
3557 BUG_ON(!domain);
3558
8c11e798
WH
3559 iommu = domain_get_iommu(domain);
3560
2aac6304 3561 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3562
769530e4 3563 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3564 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3565 last_pfn = start_pfn + nrpages - 1;
ba395927 3566
f7b0c4ce
LB
3567 if (dev_is_pci(dev))
3568 pdev = to_pci_dev(dev);
3569
932a6523 3570 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3571
ea8ea460 3572 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3573
effa4678
DS
3574 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3575 !has_iova_flush_queue(&domain->iovad)) {
a1ddcbe9 3576 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3577 nrpages, !freelist, 0);
5e0d2a6f 3578 /* free iova */
22e2f9fa 3579 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3580 dma_free_pagelist(freelist);
5e0d2a6f 3581 } else {
13cf0174
JR
3582 queue_iova(&domain->iovad, iova_pfn, nrpages,
3583 (unsigned long)freelist);
5e0d2a6f 3584 /*
3585 * queue up the release of the unmap to save the 1/6th of the
3586 * cpu used up by the iotlb flush operation...
3587 */
5e0d2a6f 3588 }
ba395927
KA
3589}
3590
d41a4adb
JL
3591static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3592 size_t size, enum dma_data_direction dir,
00085f1e 3593 unsigned long attrs)
d41a4adb 3594{
9cc0c2af
CH
3595 if (iommu_need_mapping(dev))
3596 intel_unmap(dev, dev_addr, size);
3597 else
3598 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3599}
3600
3601static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3602 size_t size, enum dma_data_direction dir, unsigned long attrs)
3603{
3604 if (iommu_need_mapping(dev))
3605 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3606}
3607
5040a918 3608static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3609 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3610 unsigned long attrs)
ba395927 3611{
7ec916f8
CH
3612 struct page *page = NULL;
3613 int order;
ba395927 3614
9cc0c2af
CH
3615 if (!iommu_need_mapping(dev))
3616 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3617
7ec916f8
CH
3618 size = PAGE_ALIGN(size);
3619 order = get_order(size);
7ec916f8
CH
3620
3621 if (gfpflags_allow_blocking(flags)) {
3622 unsigned int count = size >> PAGE_SHIFT;
3623
d834c5ab
MS
3624 page = dma_alloc_from_contiguous(dev, count, order,
3625 flags & __GFP_NOWARN);
7ec916f8
CH
3626 }
3627
3628 if (!page)
3629 page = alloc_pages(flags, order);
3630 if (!page)
3631 return NULL;
3632 memset(page_address(page), 0, size);
3633
21d5d27c
LG
3634 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3635 DMA_BIDIRECTIONAL,
3636 dev->coherent_dma_mask);
524a669b 3637 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3638 return page_address(page);
3639 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3640 __free_pages(page, order);
36746436 3641
ba395927
KA
3642 return NULL;
3643}
3644
5040a918 3645static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3646 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3647{
7ec916f8
CH
3648 int order;
3649 struct page *page = virt_to_page(vaddr);
3650
9cc0c2af
CH
3651 if (!iommu_need_mapping(dev))
3652 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3653
7ec916f8
CH
3654 size = PAGE_ALIGN(size);
3655 order = get_order(size);
3656
3657 intel_unmap(dev, dma_handle, size);
3658 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3659 __free_pages(page, order);
ba395927
KA
3660}
3661
5040a918 3662static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3663 int nelems, enum dma_data_direction dir,
00085f1e 3664 unsigned long attrs)
ba395927 3665{
769530e4
OP
3666 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3667 unsigned long nrpages = 0;
3668 struct scatterlist *sg;
3669 int i;
3670
9cc0c2af
CH
3671 if (!iommu_need_mapping(dev))
3672 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3673
769530e4
OP
3674 for_each_sg(sglist, sg, nelems, i) {
3675 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3676 }
3677
3678 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3679}
3680
5040a918 3681static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3682 enum dma_data_direction dir, unsigned long attrs)
ba395927 3683{
ba395927 3684 int i;
ba395927 3685 struct dmar_domain *domain;
f76aec76
KA
3686 size_t size = 0;
3687 int prot = 0;
2aac6304 3688 unsigned long iova_pfn;
f76aec76 3689 int ret;
c03ab37c 3690 struct scatterlist *sg;
b536d24d 3691 unsigned long start_vpfn;
8c11e798 3692 struct intel_iommu *iommu;
ba395927
KA
3693
3694 BUG_ON(dir == DMA_NONE);
48b2c937 3695 if (!iommu_need_mapping(dev))
9cc0c2af 3696 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3697
4ec066c7 3698 domain = find_domain(dev);
f76aec76
KA
3699 if (!domain)
3700 return 0;
3701
8c11e798
WH
3702 iommu = domain_get_iommu(domain);
3703
b536d24d 3704 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3705 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3706
2aac6304 3707 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3708 *dev->dma_mask);
2aac6304 3709 if (!iova_pfn) {
c03ab37c 3710 sglist->dma_length = 0;
f76aec76
KA
3711 return 0;
3712 }
3713
3714 /*
3715 * Check if DMAR supports zero-length reads on write only
3716 * mappings..
3717 */
3718 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3719 !cap_zlr(iommu->cap))
f76aec76
KA
3720 prot |= DMA_PTE_READ;
3721 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3722 prot |= DMA_PTE_WRITE;
3723
2aac6304 3724 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3725
f532959b 3726 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3727 if (unlikely(ret)) {
e1605495 3728 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3729 start_vpfn + size - 1,
3730 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3731 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3732 return 0;
ba395927
KA
3733 }
3734
ba395927
KA
3735 return nelems;
3736}
3737
02b4da5f 3738static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3739 .alloc = intel_alloc_coherent,
3740 .free = intel_free_coherent,
ba395927
KA
3741 .map_sg = intel_map_sg,
3742 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3743 .map_page = intel_map_page,
3744 .unmap_page = intel_unmap_page,
21d5d27c 3745 .map_resource = intel_map_resource,
9cc0c2af 3746 .unmap_resource = intel_unmap_resource,
fec777c3 3747 .dma_supported = dma_direct_supported,
ba395927
KA
3748};
3749
3750static inline int iommu_domain_cache_init(void)
3751{
3752 int ret = 0;
3753
3754 iommu_domain_cache = kmem_cache_create("iommu_domain",
3755 sizeof(struct dmar_domain),
3756 0,
3757 SLAB_HWCACHE_ALIGN,
3758
3759 NULL);
3760 if (!iommu_domain_cache) {
9f10e5bf 3761 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3762 ret = -ENOMEM;
3763 }
3764
3765 return ret;
3766}
3767
3768static inline int iommu_devinfo_cache_init(void)
3769{
3770 int ret = 0;
3771
3772 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3773 sizeof(struct device_domain_info),
3774 0,
3775 SLAB_HWCACHE_ALIGN,
ba395927
KA
3776 NULL);
3777 if (!iommu_devinfo_cache) {
9f10e5bf 3778 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3779 ret = -ENOMEM;
3780 }
3781
3782 return ret;
3783}
3784
ba395927
KA
3785static int __init iommu_init_mempool(void)
3786{
3787 int ret;
ae1ff3d6 3788 ret = iova_cache_get();
ba395927
KA
3789 if (ret)
3790 return ret;
3791
3792 ret = iommu_domain_cache_init();
3793 if (ret)
3794 goto domain_error;
3795
3796 ret = iommu_devinfo_cache_init();
3797 if (!ret)
3798 return ret;
3799
3800 kmem_cache_destroy(iommu_domain_cache);
3801domain_error:
ae1ff3d6 3802 iova_cache_put();
ba395927
KA
3803
3804 return -ENOMEM;
3805}
3806
3807static void __init iommu_exit_mempool(void)
3808{
3809 kmem_cache_destroy(iommu_devinfo_cache);
3810 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3811 iova_cache_put();
ba395927
KA
3812}
3813
556ab45f
DW
3814static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3815{
3816 struct dmar_drhd_unit *drhd;
3817 u32 vtbar;
3818 int rc;
3819
3820 /* We know that this device on this chipset has its own IOMMU.
3821 * If we find it under a different IOMMU, then the BIOS is lying
3822 * to us. Hope that the IOMMU for this device is actually
3823 * disabled, and it needs no translation...
3824 */
3825 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3826 if (rc) {
3827 /* "can't" happen */
3828 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3829 return;
3830 }
3831 vtbar &= 0xffff0000;
3832
3833 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3834 drhd = dmar_find_matched_drhd_unit(pdev);
3835 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3836 TAINT_FIRMWARE_WORKAROUND,
3837 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3838 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3839}
3840DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3841
ba395927
KA
3842static void __init init_no_remapping_devices(void)
3843{
3844 struct dmar_drhd_unit *drhd;
832bd858 3845 struct device *dev;
b683b230 3846 int i;
ba395927
KA
3847
3848 for_each_drhd_unit(drhd) {
3849 if (!drhd->include_all) {
b683b230
JL
3850 for_each_active_dev_scope(drhd->devices,
3851 drhd->devices_cnt, i, dev)
3852 break;
832bd858 3853 /* ignore DMAR unit if no devices exist */
ba395927
KA
3854 if (i == drhd->devices_cnt)
3855 drhd->ignored = 1;
3856 }
3857 }
3858
7c919779 3859 for_each_active_drhd_unit(drhd) {
7c919779 3860 if (drhd->include_all)
ba395927
KA
3861 continue;
3862
b683b230
JL
3863 for_each_active_dev_scope(drhd->devices,
3864 drhd->devices_cnt, i, dev)
832bd858 3865 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3866 break;
ba395927
KA
3867 if (i < drhd->devices_cnt)
3868 continue;
3869
c0771df8
DW
3870 /* This IOMMU has *only* gfx devices. Either bypass it or
3871 set the gfx_mapped flag, as appropriate */
cf1ec453 3872 if (!dmar_map_gfx) {
c0771df8 3873 drhd->ignored = 1;
b683b230
JL
3874 for_each_active_dev_scope(drhd->devices,
3875 drhd->devices_cnt, i, dev)
832bd858 3876 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3877 }
3878 }
3879}
3880
f59c7b69
FY
3881#ifdef CONFIG_SUSPEND
3882static int init_iommu_hw(void)
3883{
3884 struct dmar_drhd_unit *drhd;
3885 struct intel_iommu *iommu = NULL;
3886
3887 for_each_active_iommu(iommu, drhd)
3888 if (iommu->qi)
3889 dmar_reenable_qi(iommu);
3890
b779260b
JC
3891 for_each_iommu(iommu, drhd) {
3892 if (drhd->ignored) {
3893 /*
3894 * we always have to disable PMRs or DMA may fail on
3895 * this device
3896 */
3897 if (force_on)
3898 iommu_disable_protect_mem_regions(iommu);
3899 continue;
3900 }
095303e0 3901
f59c7b69
FY
3902 iommu_flush_write_buffer(iommu);
3903
3904 iommu_set_root_entry(iommu);
3905
3906 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3907 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3908 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3909 iommu_enable_translation(iommu);
b94996c9 3910 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3911 }
3912
3913 return 0;
3914}
3915
3916static void iommu_flush_all(void)
3917{
3918 struct dmar_drhd_unit *drhd;
3919 struct intel_iommu *iommu;
3920
3921 for_each_active_iommu(iommu, drhd) {
3922 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3923 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3924 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3925 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3926 }
3927}
3928
134fac3f 3929static int iommu_suspend(void)
f59c7b69
FY
3930{
3931 struct dmar_drhd_unit *drhd;
3932 struct intel_iommu *iommu = NULL;
3933 unsigned long flag;
3934
3935 for_each_active_iommu(iommu, drhd) {
6396bb22 3936 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
3937 GFP_ATOMIC);
3938 if (!iommu->iommu_state)
3939 goto nomem;
3940 }
3941
3942 iommu_flush_all();
3943
3944 for_each_active_iommu(iommu, drhd) {
3945 iommu_disable_translation(iommu);
3946
1f5b3c3f 3947 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3948
3949 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3950 readl(iommu->reg + DMAR_FECTL_REG);
3951 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3952 readl(iommu->reg + DMAR_FEDATA_REG);
3953 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3954 readl(iommu->reg + DMAR_FEADDR_REG);
3955 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3956 readl(iommu->reg + DMAR_FEUADDR_REG);
3957
1f5b3c3f 3958 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3959 }
3960 return 0;
3961
3962nomem:
3963 for_each_active_iommu(iommu, drhd)
3964 kfree(iommu->iommu_state);
3965
3966 return -ENOMEM;
3967}
3968
134fac3f 3969static void iommu_resume(void)
f59c7b69
FY
3970{
3971 struct dmar_drhd_unit *drhd;
3972 struct intel_iommu *iommu = NULL;
3973 unsigned long flag;
3974
3975 if (init_iommu_hw()) {
b779260b
JC
3976 if (force_on)
3977 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3978 else
3979 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3980 return;
f59c7b69
FY
3981 }
3982
3983 for_each_active_iommu(iommu, drhd) {
3984
1f5b3c3f 3985 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3986
3987 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3988 iommu->reg + DMAR_FECTL_REG);
3989 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3990 iommu->reg + DMAR_FEDATA_REG);
3991 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3992 iommu->reg + DMAR_FEADDR_REG);
3993 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3994 iommu->reg + DMAR_FEUADDR_REG);
3995
1f5b3c3f 3996 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3997 }
3998
3999 for_each_active_iommu(iommu, drhd)
4000 kfree(iommu->iommu_state);
f59c7b69
FY
4001}
4002
134fac3f 4003static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4004 .resume = iommu_resume,
4005 .suspend = iommu_suspend,
4006};
4007
134fac3f 4008static void __init init_iommu_pm_ops(void)
f59c7b69 4009{
134fac3f 4010 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4011}
4012
4013#else
99592ba4 4014static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4015#endif /* CONFIG_PM */
4016
c2a0b538 4017int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4018{
4019 struct acpi_dmar_reserved_memory *rmrr;
4020 struct dmar_rmrr_unit *rmrru;
4021
4022 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4023 if (!rmrru)
0659b8dc 4024 goto out;
318fe7df
SS
4025
4026 rmrru->hdr = header;
4027 rmrr = (struct acpi_dmar_reserved_memory *)header;
4028 rmrru->base_address = rmrr->base_address;
4029 rmrru->end_address = rmrr->end_address;
0659b8dc 4030
2e455289
JL
4031 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4032 ((void *)rmrr) + rmrr->header.length,
4033 &rmrru->devices_cnt);
0659b8dc 4034 if (rmrru->devices_cnt && rmrru->devices == NULL)
5f64ce54 4035 goto free_rmrru;
318fe7df 4036
2e455289 4037 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4038
2e455289 4039 return 0;
0659b8dc
EA
4040free_rmrru:
4041 kfree(rmrru);
4042out:
4043 return -ENOMEM;
318fe7df
SS
4044}
4045
6b197249
JL
4046static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4047{
4048 struct dmar_atsr_unit *atsru;
4049 struct acpi_dmar_atsr *tmp;
4050
4051 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4052 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4053 if (atsr->segment != tmp->segment)
4054 continue;
4055 if (atsr->header.length != tmp->header.length)
4056 continue;
4057 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4058 return atsru;
4059 }
4060
4061 return NULL;
4062}
4063
4064int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4065{
4066 struct acpi_dmar_atsr *atsr;
4067 struct dmar_atsr_unit *atsru;
4068
b608fe35 4069 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4070 return 0;
4071
318fe7df 4072 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4073 atsru = dmar_find_atsr(atsr);
4074 if (atsru)
4075 return 0;
4076
4077 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4078 if (!atsru)
4079 return -ENOMEM;
4080
6b197249
JL
4081 /*
4082 * If memory is allocated from slab by ACPI _DSM method, we need to
4083 * copy the memory content because the memory buffer will be freed
4084 * on return.
4085 */
4086 atsru->hdr = (void *)(atsru + 1);
4087 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4088 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4089 if (!atsru->include_all) {
4090 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4091 (void *)atsr + atsr->header.length,
4092 &atsru->devices_cnt);
4093 if (atsru->devices_cnt && atsru->devices == NULL) {
4094 kfree(atsru);
4095 return -ENOMEM;
4096 }
4097 }
318fe7df 4098
0e242612 4099 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4100
4101 return 0;
4102}
4103
9bdc531e
JL
4104static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4105{
4106 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4107 kfree(atsru);
4108}
4109
6b197249
JL
4110int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4111{
4112 struct acpi_dmar_atsr *atsr;
4113 struct dmar_atsr_unit *atsru;
4114
4115 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4116 atsru = dmar_find_atsr(atsr);
4117 if (atsru) {
4118 list_del_rcu(&atsru->list);
4119 synchronize_rcu();
4120 intel_iommu_free_atsr(atsru);
4121 }
4122
4123 return 0;
4124}
4125
4126int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4127{
4128 int i;
4129 struct device *dev;
4130 struct acpi_dmar_atsr *atsr;
4131 struct dmar_atsr_unit *atsru;
4132
4133 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4134 atsru = dmar_find_atsr(atsr);
4135 if (!atsru)
4136 return 0;
4137
194dc870 4138 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4139 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4140 i, dev)
4141 return -EBUSY;
194dc870 4142 }
6b197249
JL
4143
4144 return 0;
4145}
4146
ffebeb46
JL
4147static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4148{
e083ea5b 4149 int sp, ret;
ffebeb46
JL
4150 struct intel_iommu *iommu = dmaru->iommu;
4151
4152 if (g_iommus[iommu->seq_id])
4153 return 0;
4154
4155 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4156 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4157 iommu->name);
4158 return -ENXIO;
4159 }
4160 if (!ecap_sc_support(iommu->ecap) &&
4161 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4162 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4163 iommu->name);
4164 return -ENXIO;
4165 }
4166 sp = domain_update_iommu_superpage(iommu) - 1;
4167 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4168 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4169 iommu->name);
4170 return -ENXIO;
4171 }
4172
4173 /*
4174 * Disable translation if already enabled prior to OS handover.
4175 */
4176 if (iommu->gcmd & DMA_GCMD_TE)
4177 iommu_disable_translation(iommu);
4178
4179 g_iommus[iommu->seq_id] = iommu;
4180 ret = iommu_init_domains(iommu);
4181 if (ret == 0)
4182 ret = iommu_alloc_root_entry(iommu);
4183 if (ret)
4184 goto out;
4185
8a94ade4 4186#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4187 if (pasid_supported(iommu))
d9737953 4188 intel_svm_init(iommu);
8a94ade4
DW
4189#endif
4190
ffebeb46
JL
4191 if (dmaru->ignored) {
4192 /*
4193 * we always have to disable PMRs or DMA may fail on this device
4194 */
4195 if (force_on)
4196 iommu_disable_protect_mem_regions(iommu);
4197 return 0;
4198 }
4199
4200 intel_iommu_init_qi(iommu);
4201 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4202
4203#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4204 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4205 ret = intel_svm_enable_prq(iommu);
4206 if (ret)
4207 goto disable_iommu;
4208 }
4209#endif
ffebeb46
JL
4210 ret = dmar_set_interrupt(iommu);
4211 if (ret)
4212 goto disable_iommu;
4213
4214 iommu_set_root_entry(iommu);
4215 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4216 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4217 iommu_enable_translation(iommu);
4218
ffebeb46
JL
4219 iommu_disable_protect_mem_regions(iommu);
4220 return 0;
4221
4222disable_iommu:
4223 disable_dmar_iommu(iommu);
4224out:
4225 free_dmar_iommu(iommu);
4226 return ret;
4227}
4228
6b197249
JL
4229int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4230{
ffebeb46
JL
4231 int ret = 0;
4232 struct intel_iommu *iommu = dmaru->iommu;
4233
4234 if (!intel_iommu_enabled)
4235 return 0;
4236 if (iommu == NULL)
4237 return -EINVAL;
4238
4239 if (insert) {
4240 ret = intel_iommu_add(dmaru);
4241 } else {
4242 disable_dmar_iommu(iommu);
4243 free_dmar_iommu(iommu);
4244 }
4245
4246 return ret;
6b197249
JL
4247}
4248
9bdc531e
JL
4249static void intel_iommu_free_dmars(void)
4250{
4251 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4252 struct dmar_atsr_unit *atsru, *atsr_n;
4253
4254 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4255 list_del(&rmrru->list);
4256 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4257 kfree(rmrru);
318fe7df
SS
4258 }
4259
9bdc531e
JL
4260 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4261 list_del(&atsru->list);
4262 intel_iommu_free_atsr(atsru);
4263 }
318fe7df
SS
4264}
4265
4266int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4267{
b683b230 4268 int i, ret = 1;
318fe7df 4269 struct pci_bus *bus;
832bd858
DW
4270 struct pci_dev *bridge = NULL;
4271 struct device *tmp;
318fe7df
SS
4272 struct acpi_dmar_atsr *atsr;
4273 struct dmar_atsr_unit *atsru;
4274
4275 dev = pci_physfn(dev);
318fe7df 4276 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4277 bridge = bus->self;
d14053b3
DW
4278 /* If it's an integrated device, allow ATS */
4279 if (!bridge)
4280 return 1;
4281 /* Connected via non-PCIe: no ATS */
4282 if (!pci_is_pcie(bridge) ||
62f87c0e 4283 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4284 return 0;
d14053b3 4285 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4286 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4287 break;
318fe7df
SS
4288 }
4289
0e242612 4290 rcu_read_lock();
b5f82ddf
JL
4291 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4292 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4293 if (atsr->segment != pci_domain_nr(dev->bus))
4294 continue;
4295
b683b230 4296 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4297 if (tmp == &bridge->dev)
b683b230 4298 goto out;
b5f82ddf
JL
4299
4300 if (atsru->include_all)
b683b230 4301 goto out;
b5f82ddf 4302 }
b683b230
JL
4303 ret = 0;
4304out:
0e242612 4305 rcu_read_unlock();
318fe7df 4306
b683b230 4307 return ret;
318fe7df
SS
4308}
4309
59ce0515
JL
4310int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4311{
e083ea5b 4312 int ret;
59ce0515
JL
4313 struct dmar_rmrr_unit *rmrru;
4314 struct dmar_atsr_unit *atsru;
4315 struct acpi_dmar_atsr *atsr;
4316 struct acpi_dmar_reserved_memory *rmrr;
4317
b608fe35 4318 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4319 return 0;
4320
4321 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4322 rmrr = container_of(rmrru->hdr,
4323 struct acpi_dmar_reserved_memory, header);
4324 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4325 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4326 ((void *)rmrr) + rmrr->header.length,
4327 rmrr->segment, rmrru->devices,
4328 rmrru->devices_cnt);
e083ea5b 4329 if (ret < 0)
59ce0515 4330 return ret;
e6a8c9b3 4331 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4332 dmar_remove_dev_scope(info, rmrr->segment,
4333 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4334 }
4335 }
4336
4337 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4338 if (atsru->include_all)
4339 continue;
4340
4341 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4342 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4343 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4344 (void *)atsr + atsr->header.length,
4345 atsr->segment, atsru->devices,
4346 atsru->devices_cnt);
4347 if (ret > 0)
4348 break;
e083ea5b 4349 else if (ret < 0)
59ce0515 4350 return ret;
e6a8c9b3 4351 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4352 if (dmar_remove_dev_scope(info, atsr->segment,
4353 atsru->devices, atsru->devices_cnt))
4354 break;
4355 }
4356 }
4357
4358 return 0;
4359}
4360
75f05569
JL
4361static int intel_iommu_memory_notifier(struct notifier_block *nb,
4362 unsigned long val, void *v)
4363{
4364 struct memory_notify *mhp = v;
4365 unsigned long long start, end;
4366 unsigned long start_vpfn, last_vpfn;
4367
4368 switch (val) {
4369 case MEM_GOING_ONLINE:
4370 start = mhp->start_pfn << PAGE_SHIFT;
4371 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4372 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4373 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4374 start, end);
4375 return NOTIFY_BAD;
4376 }
4377 break;
4378
4379 case MEM_OFFLINE:
4380 case MEM_CANCEL_ONLINE:
4381 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4382 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4383 while (start_vpfn <= last_vpfn) {
4384 struct iova *iova;
4385 struct dmar_drhd_unit *drhd;
4386 struct intel_iommu *iommu;
ea8ea460 4387 struct page *freelist;
75f05569
JL
4388
4389 iova = find_iova(&si_domain->iovad, start_vpfn);
4390 if (iova == NULL) {
9f10e5bf 4391 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4392 start_vpfn);
4393 break;
4394 }
4395
4396 iova = split_and_remove_iova(&si_domain->iovad, iova,
4397 start_vpfn, last_vpfn);
4398 if (iova == NULL) {
9f10e5bf 4399 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4400 start_vpfn, last_vpfn);
4401 return NOTIFY_BAD;
4402 }
4403
ea8ea460
DW
4404 freelist = domain_unmap(si_domain, iova->pfn_lo,
4405 iova->pfn_hi);
4406
75f05569
JL
4407 rcu_read_lock();
4408 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4409 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4410 iova->pfn_lo, iova_size(iova),
ea8ea460 4411 !freelist, 0);
75f05569 4412 rcu_read_unlock();
ea8ea460 4413 dma_free_pagelist(freelist);
75f05569
JL
4414
4415 start_vpfn = iova->pfn_hi + 1;
4416 free_iova_mem(iova);
4417 }
4418 break;
4419 }
4420
4421 return NOTIFY_OK;
4422}
4423
4424static struct notifier_block intel_iommu_memory_nb = {
4425 .notifier_call = intel_iommu_memory_notifier,
4426 .priority = 0
4427};
4428
22e2f9fa
OP
4429static void free_all_cpu_cached_iovas(unsigned int cpu)
4430{
4431 int i;
4432
4433 for (i = 0; i < g_num_of_iommus; i++) {
4434 struct intel_iommu *iommu = g_iommus[i];
4435 struct dmar_domain *domain;
0caa7616 4436 int did;
22e2f9fa
OP
4437
4438 if (!iommu)
4439 continue;
4440
3bd4f911 4441 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4442 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4443
4444 if (!domain)
4445 continue;
4446 free_cpu_cached_iovas(cpu, &domain->iovad);
4447 }
4448 }
4449}
4450
21647615 4451static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4452{
21647615 4453 free_all_cpu_cached_iovas(cpu);
21647615 4454 return 0;
aa473240
OP
4455}
4456
161b28aa
JR
4457static void intel_disable_iommus(void)
4458{
4459 struct intel_iommu *iommu = NULL;
4460 struct dmar_drhd_unit *drhd;
4461
4462 for_each_iommu(iommu, drhd)
4463 iommu_disable_translation(iommu);
4464}
4465
a7fdb6e6
JR
4466static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4467{
2926a2aa
JR
4468 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4469
4470 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4471}
4472
a5459cfe
AW
4473static ssize_t intel_iommu_show_version(struct device *dev,
4474 struct device_attribute *attr,
4475 char *buf)
4476{
a7fdb6e6 4477 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4478 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4479 return sprintf(buf, "%d:%d\n",
4480 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4481}
4482static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4483
4484static ssize_t intel_iommu_show_address(struct device *dev,
4485 struct device_attribute *attr,
4486 char *buf)
4487{
a7fdb6e6 4488 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4489 return sprintf(buf, "%llx\n", iommu->reg_phys);
4490}
4491static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4492
4493static ssize_t intel_iommu_show_cap(struct device *dev,
4494 struct device_attribute *attr,
4495 char *buf)
4496{
a7fdb6e6 4497 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4498 return sprintf(buf, "%llx\n", iommu->cap);
4499}
4500static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4501
4502static ssize_t intel_iommu_show_ecap(struct device *dev,
4503 struct device_attribute *attr,
4504 char *buf)
4505{
a7fdb6e6 4506 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4507 return sprintf(buf, "%llx\n", iommu->ecap);
4508}
4509static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4510
2238c082
AW
4511static ssize_t intel_iommu_show_ndoms(struct device *dev,
4512 struct device_attribute *attr,
4513 char *buf)
4514{
a7fdb6e6 4515 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4516 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4517}
4518static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4519
4520static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4521 struct device_attribute *attr,
4522 char *buf)
4523{
a7fdb6e6 4524 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4525 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4526 cap_ndoms(iommu->cap)));
4527}
4528static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4529
a5459cfe
AW
4530static struct attribute *intel_iommu_attrs[] = {
4531 &dev_attr_version.attr,
4532 &dev_attr_address.attr,
4533 &dev_attr_cap.attr,
4534 &dev_attr_ecap.attr,
2238c082
AW
4535 &dev_attr_domains_supported.attr,
4536 &dev_attr_domains_used.attr,
a5459cfe
AW
4537 NULL,
4538};
4539
4540static struct attribute_group intel_iommu_group = {
4541 .name = "intel-iommu",
4542 .attrs = intel_iommu_attrs,
4543};
4544
4545const struct attribute_group *intel_iommu_groups[] = {
4546 &intel_iommu_group,
4547 NULL,
4548};
4549
89a6079d
LB
4550static int __init platform_optin_force_iommu(void)
4551{
4552 struct pci_dev *pdev = NULL;
4553 bool has_untrusted_dev = false;
4554
4555 if (!dmar_platform_optin() || no_platform_optin)
4556 return 0;
4557
4558 for_each_pci_dev(pdev) {
4559 if (pdev->untrusted) {
4560 has_untrusted_dev = true;
4561 break;
4562 }
4563 }
4564
4565 if (!has_untrusted_dev)
4566 return 0;
4567
4568 if (no_iommu || dmar_disabled)
4569 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4570
4571 /*
4572 * If Intel-IOMMU is disabled by default, we will apply identity
4573 * map for all devices except those marked as being untrusted.
4574 */
4575 if (dmar_disabled)
4576 iommu_identity_mapping |= IDENTMAP_ALL;
4577
4578 dmar_disabled = 0;
4579#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4580 swiotlb = 0;
4581#endif
4582 no_iommu = 0;
4583
4584 return 1;
4585}
4586
fa212a97
LB
4587static int __init probe_acpi_namespace_devices(void)
4588{
4589 struct dmar_drhd_unit *drhd;
af88ec39
QC
4590 /* To avoid a -Wunused-but-set-variable warning. */
4591 struct intel_iommu *iommu __maybe_unused;
fa212a97
LB
4592 struct device *dev;
4593 int i, ret = 0;
4594
4595 for_each_active_iommu(iommu, drhd) {
4596 for_each_active_dev_scope(drhd->devices,
4597 drhd->devices_cnt, i, dev) {
4598 struct acpi_device_physical_node *pn;
4599 struct iommu_group *group;
4600 struct acpi_device *adev;
4601
4602 if (dev->bus != &acpi_bus_type)
4603 continue;
4604
4605 adev = to_acpi_device(dev);
4606 mutex_lock(&adev->physical_node_lock);
4607 list_for_each_entry(pn,
4608 &adev->physical_node_list, node) {
4609 group = iommu_group_get(pn->dev);
4610 if (group) {
4611 iommu_group_put(group);
4612 continue;
4613 }
4614
4615 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4616 ret = iommu_probe_device(pn->dev);
4617 if (ret)
4618 break;
4619 }
4620 mutex_unlock(&adev->physical_node_lock);
4621
4622 if (ret)
4623 return ret;
4624 }
4625 }
4626
4627 return 0;
4628}
4629
ba395927
KA
4630int __init intel_iommu_init(void)
4631{
9bdc531e 4632 int ret = -ENODEV;
3a93c841 4633 struct dmar_drhd_unit *drhd;
7c919779 4634 struct intel_iommu *iommu;
ba395927 4635
89a6079d
LB
4636 /*
4637 * Intel IOMMU is required for a TXT/tboot launch or platform
4638 * opt in, so enforce that.
4639 */
4640 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4641
3a5670e8
JL
4642 if (iommu_init_mempool()) {
4643 if (force_on)
4644 panic("tboot: Failed to initialize iommu memory\n");
4645 return -ENOMEM;
4646 }
4647
4648 down_write(&dmar_global_lock);
a59b50e9
JC
4649 if (dmar_table_init()) {
4650 if (force_on)
4651 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4652 goto out_free_dmar;
a59b50e9 4653 }
ba395927 4654
c2c7286a 4655 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4656 if (force_on)
4657 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4658 goto out_free_dmar;
a59b50e9 4659 }
1886e8a9 4660
ec154bf5
JR
4661 up_write(&dmar_global_lock);
4662
4663 /*
4664 * The bus notifier takes the dmar_global_lock, so lockdep will
4665 * complain later when we register it under the lock.
4666 */
4667 dmar_register_bus_notifier();
4668
4669 down_write(&dmar_global_lock);
4670
161b28aa 4671 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4672 /*
4673 * We exit the function here to ensure IOMMU's remapping and
4674 * mempool aren't setup, which means that the IOMMU's PMRs
4675 * won't be disabled via the call to init_dmars(). So disable
4676 * it explicitly here. The PMRs were setup by tboot prior to
4677 * calling SENTER, but the kernel is expected to reset/tear
4678 * down the PMRs.
4679 */
4680 if (intel_iommu_tboot_noforce) {
4681 for_each_iommu(iommu, drhd)
4682 iommu_disable_protect_mem_regions(iommu);
4683 }
4684
161b28aa
JR
4685 /*
4686 * Make sure the IOMMUs are switched off, even when we
4687 * boot into a kexec kernel and the previous kernel left
4688 * them enabled
4689 */
4690 intel_disable_iommus();
9bdc531e 4691 goto out_free_dmar;
161b28aa 4692 }
2ae21010 4693
318fe7df 4694 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4695 pr_info("No RMRR found\n");
318fe7df
SS
4696
4697 if (list_empty(&dmar_atsr_units))
9f10e5bf 4698 pr_info("No ATSR found\n");
318fe7df 4699
51a63e67
JC
4700 if (dmar_init_reserved_ranges()) {
4701 if (force_on)
4702 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4703 goto out_free_reserved_range;
51a63e67 4704 }
ba395927 4705
cf1ec453
LB
4706 if (dmar_map_gfx)
4707 intel_iommu_gfx_mapped = 1;
4708
ba395927
KA
4709 init_no_remapping_devices();
4710
b779260b 4711 ret = init_dmars();
ba395927 4712 if (ret) {
a59b50e9
JC
4713 if (force_on)
4714 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4715 pr_err("Initialization failed\n");
9bdc531e 4716 goto out_free_reserved_range;
ba395927 4717 }
3a5670e8 4718 up_write(&dmar_global_lock);
ba395927 4719
4fac8076 4720#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4721 swiotlb = 0;
4722#endif
19943b0e 4723 dma_ops = &intel_dma_ops;
4ed0d3e6 4724
134fac3f 4725 init_iommu_pm_ops();
a8bcbb0d 4726
39ab9555
JR
4727 for_each_active_iommu(iommu, drhd) {
4728 iommu_device_sysfs_add(&iommu->iommu, NULL,
4729 intel_iommu_groups,
4730 "%s", iommu->name);
4731 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4732 iommu_device_register(&iommu->iommu);
4733 }
a5459cfe 4734
4236d97d 4735 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
75f05569
JL
4736 if (si_domain && !hw_pass_through)
4737 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4738 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4739 intel_iommu_cpu_dead);
d8190dc6 4740
d5692d4a 4741 down_read(&dmar_global_lock);
fa212a97
LB
4742 if (probe_acpi_namespace_devices())
4743 pr_warn("ACPI name space devices didn't probe correctly\n");
d5692d4a 4744 up_read(&dmar_global_lock);
fa212a97 4745
d8190dc6
LB
4746 /* Finally, we enable the DMA remapping hardware. */
4747 for_each_iommu(iommu, drhd) {
6a8c6748 4748 if (!drhd->ignored && !translation_pre_enabled(iommu))
d8190dc6
LB
4749 iommu_enable_translation(iommu);
4750
4751 iommu_disable_protect_mem_regions(iommu);
4752 }
4753 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4754
8bc1f85c 4755 intel_iommu_enabled = 1;
ee2636b8 4756 intel_iommu_debugfs_init();
8bc1f85c 4757
ba395927 4758 return 0;
9bdc531e
JL
4759
4760out_free_reserved_range:
4761 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4762out_free_dmar:
4763 intel_iommu_free_dmars();
3a5670e8
JL
4764 up_write(&dmar_global_lock);
4765 iommu_exit_mempool();
9bdc531e 4766 return ret;
ba395927 4767}
e820482c 4768
127c7615 4769static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4770{
942067f1 4771 struct dmar_domain *domain;
c7151a8d
WH
4772 struct intel_iommu *iommu;
4773 unsigned long flags;
c7151a8d 4774
55d94043
JR
4775 assert_spin_locked(&device_domain_lock);
4776
127c7615 4777 if (WARN_ON(!info))
c7151a8d
WH
4778 return;
4779
127c7615 4780 iommu = info->iommu;
942067f1 4781 domain = info->domain;
c7151a8d 4782
127c7615 4783 if (info->dev) {
ef848b7e
LB
4784 if (dev_is_pci(info->dev) && sm_supported(iommu))
4785 intel_pasid_tear_down_entry(iommu, info->dev,
4786 PASID_RID2PASID);
4787
127c7615 4788 iommu_disable_dev_iotlb(info);
55752949 4789 domain_context_clear_one(iommu, info->bus, info->devfn);
a7fc93fe 4790 intel_pasid_free_table(info->dev);
127c7615 4791 }
c7151a8d 4792
b608ac3b 4793 unlink_domain_info(info);
c7151a8d 4794
d160aca5 4795 spin_lock_irqsave(&iommu->lock, flags);
942067f1 4796 domain_detach_iommu(domain, iommu);
d160aca5 4797 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4798
942067f1
LB
4799 /* free the private domain */
4800 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
3a18844d
LB
4801 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
4802 list_empty(&domain->devices))
942067f1
LB
4803 domain_exit(info->domain);
4804
127c7615 4805 free_devinfo_mem(info);
c7151a8d 4806}
c7151a8d 4807
71753239 4808static void dmar_remove_one_dev_info(struct device *dev)
55d94043 4809{
127c7615 4810 struct device_domain_info *info;
55d94043 4811 unsigned long flags;
3e7abe25 4812
55d94043 4813 spin_lock_irqsave(&device_domain_lock, flags);
127c7615 4814 info = dev->archdata.iommu;
ae23bfb6
LB
4815 if (info)
4816 __dmar_remove_one_dev_info(info);
55d94043 4817 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4818}
4819
301e7ee1
JR
4820static int md_domain_init(struct dmar_domain *domain, int guest_width)
4821{
4822 int adjust_width;
4823
4824 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
4825 domain_reserve_special_ranges(domain);
4826
4827 /* calculate AGAW */
4828 domain->gaw = guest_width;
4829 adjust_width = guestwidth_to_adjustwidth(guest_width);
4830 domain->agaw = width_to_agaw(adjust_width);
4831
4832 domain->iommu_coherency = 0;
4833 domain->iommu_snooping = 0;
4834 domain->iommu_superpage = 0;
4835 domain->max_addr = 0;
4836
4837 /* always allocate the top pgd */
4838 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4839 if (!domain->pgd)
4840 return -ENOMEM;
4841 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4842 return 0;
4843}
4844
00a77deb 4845static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4846{
5d450806 4847 struct dmar_domain *dmar_domain;
00a77deb
JR
4848 struct iommu_domain *domain;
4849
4de354ec 4850 switch (type) {
fa954e68
LB
4851 case IOMMU_DOMAIN_DMA:
4852 /* fallthrough */
4de354ec 4853 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 4854 dmar_domain = alloc_domain(0);
4de354ec
LB
4855 if (!dmar_domain) {
4856 pr_err("Can't allocate dmar_domain\n");
4857 return NULL;
4858 }
301e7ee1 4859 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4de354ec
LB
4860 pr_err("Domain initialization failed\n");
4861 domain_exit(dmar_domain);
4862 return NULL;
4863 }
fa954e68
LB
4864
4865 if (type == IOMMU_DOMAIN_DMA &&
4866 init_iova_flush_queue(&dmar_domain->iovad,
4867 iommu_flush_iova, iova_entry_free)) {
4868 pr_warn("iova flush queue initialization failed\n");
4869 intel_iommu_strict = 1;
4870 }
4871
4de354ec 4872 domain_update_iommu_cap(dmar_domain);
38717946 4873
4de354ec
LB
4874 domain = &dmar_domain->domain;
4875 domain->geometry.aperture_start = 0;
4876 domain->geometry.aperture_end =
4877 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4878 domain->geometry.force_aperture = true;
4879
4880 return domain;
4881 case IOMMU_DOMAIN_IDENTITY:
4882 return &si_domain->domain;
4883 default:
00a77deb 4884 return NULL;
38717946 4885 }
8a0e715b 4886
4de354ec 4887 return NULL;
38717946 4888}
38717946 4889
00a77deb 4890static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4891{
4de354ec
LB
4892 if (domain != &si_domain->domain)
4893 domain_exit(to_dmar_domain(domain));
38717946 4894}
38717946 4895
67b8e02b
LB
4896/*
4897 * Check whether a @domain could be attached to the @dev through the
4898 * aux-domain attach/detach APIs.
4899 */
4900static inline bool
4901is_aux_domain(struct device *dev, struct iommu_domain *domain)
4902{
4903 struct device_domain_info *info = dev->archdata.iommu;
4904
4905 return info && info->auxd_enabled &&
4906 domain->type == IOMMU_DOMAIN_UNMANAGED;
4907}
4908
4909static void auxiliary_link_device(struct dmar_domain *domain,
4910 struct device *dev)
4911{
4912 struct device_domain_info *info = dev->archdata.iommu;
4913
4914 assert_spin_locked(&device_domain_lock);
4915 if (WARN_ON(!info))
4916 return;
4917
4918 domain->auxd_refcnt++;
4919 list_add(&domain->auxd, &info->auxiliary_domains);
4920}
4921
4922static void auxiliary_unlink_device(struct dmar_domain *domain,
4923 struct device *dev)
4924{
4925 struct device_domain_info *info = dev->archdata.iommu;
4926
4927 assert_spin_locked(&device_domain_lock);
4928 if (WARN_ON(!info))
4929 return;
4930
4931 list_del(&domain->auxd);
4932 domain->auxd_refcnt--;
4933
4934 if (!domain->auxd_refcnt && domain->default_pasid > 0)
4935 intel_pasid_free_id(domain->default_pasid);
4936}
4937
4938static int aux_domain_add_dev(struct dmar_domain *domain,
4939 struct device *dev)
4940{
4941 int ret;
4942 u8 bus, devfn;
4943 unsigned long flags;
4944 struct intel_iommu *iommu;
4945
4946 iommu = device_to_iommu(dev, &bus, &devfn);
4947 if (!iommu)
4948 return -ENODEV;
4949
4950 if (domain->default_pasid <= 0) {
4951 int pasid;
4952
4953 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
4954 pci_max_pasids(to_pci_dev(dev)),
4955 GFP_KERNEL);
4956 if (pasid <= 0) {
4957 pr_err("Can't allocate default pasid\n");
4958 return -ENODEV;
4959 }
4960 domain->default_pasid = pasid;
4961 }
4962
4963 spin_lock_irqsave(&device_domain_lock, flags);
4964 /*
4965 * iommu->lock must be held to attach domain to iommu and setup the
4966 * pasid entry for second level translation.
4967 */
4968 spin_lock(&iommu->lock);
4969 ret = domain_attach_iommu(domain, iommu);
4970 if (ret)
4971 goto attach_failed;
4972
4973 /* Setup the PASID entry for mediated devices: */
4974 ret = intel_pasid_setup_second_level(iommu, domain, dev,
4975 domain->default_pasid);
4976 if (ret)
4977 goto table_failed;
4978 spin_unlock(&iommu->lock);
4979
4980 auxiliary_link_device(domain, dev);
4981
4982 spin_unlock_irqrestore(&device_domain_lock, flags);
4983
4984 return 0;
4985
4986table_failed:
4987 domain_detach_iommu(domain, iommu);
4988attach_failed:
4989 spin_unlock(&iommu->lock);
4990 spin_unlock_irqrestore(&device_domain_lock, flags);
4991 if (!domain->auxd_refcnt && domain->default_pasid > 0)
4992 intel_pasid_free_id(domain->default_pasid);
4993
4994 return ret;
4995}
4996
4997static void aux_domain_remove_dev(struct dmar_domain *domain,
4998 struct device *dev)
4999{
5000 struct device_domain_info *info;
5001 struct intel_iommu *iommu;
5002 unsigned long flags;
5003
5004 if (!is_aux_domain(dev, &domain->domain))
5005 return;
5006
5007 spin_lock_irqsave(&device_domain_lock, flags);
5008 info = dev->archdata.iommu;
5009 iommu = info->iommu;
5010
5011 auxiliary_unlink_device(domain, dev);
5012
5013 spin_lock(&iommu->lock);
5014 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5015 domain_detach_iommu(domain, iommu);
5016 spin_unlock(&iommu->lock);
5017
5018 spin_unlock_irqrestore(&device_domain_lock, flags);
5019}
5020
8cc3759a
LB
5021static int prepare_domain_attach_device(struct iommu_domain *domain,
5022 struct device *dev)
38717946 5023{
00a77deb 5024 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5025 struct intel_iommu *iommu;
5026 int addr_width;
156baca8 5027 u8 bus, devfn;
faa3d6f5 5028
156baca8 5029 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5030 if (!iommu)
5031 return -ENODEV;
5032
5033 /* check if this iommu agaw is sufficient for max mapped address */
5034 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5035 if (addr_width > cap_mgaw(iommu->cap))
5036 addr_width = cap_mgaw(iommu->cap);
5037
5038 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5039 dev_err(dev, "%s: iommu width (%d) is not "
5040 "sufficient for the mapped address (%llx)\n",
5041 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5042 return -EFAULT;
5043 }
a99c47a2
TL
5044 dmar_domain->gaw = addr_width;
5045
5046 /*
5047 * Knock out extra levels of page tables if necessary
5048 */
5049 while (iommu->agaw < dmar_domain->agaw) {
5050 struct dma_pte *pte;
5051
5052 pte = dmar_domain->pgd;
5053 if (dma_pte_present(pte)) {
25cbff16
SY
5054 dmar_domain->pgd = (struct dma_pte *)
5055 phys_to_virt(dma_pte_addr(pte));
7a661013 5056 free_pgtable_page(pte);
a99c47a2
TL
5057 }
5058 dmar_domain->agaw--;
5059 }
fe40f1e0 5060
8cc3759a
LB
5061 return 0;
5062}
5063
5064static int intel_iommu_attach_device(struct iommu_domain *domain,
5065 struct device *dev)
5066{
5067 int ret;
5068
5679582c
LB
5069 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5070 device_is_rmrr_locked(dev)) {
8cc3759a
LB
5071 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5072 return -EPERM;
5073 }
5074
67b8e02b
LB
5075 if (is_aux_domain(dev, domain))
5076 return -EPERM;
5077
8cc3759a
LB
5078 /* normally dev is not mapped */
5079 if (unlikely(domain_context_mapped(dev))) {
5080 struct dmar_domain *old_domain;
5081
5082 old_domain = find_domain(dev);
fa954e68 5083 if (old_domain)
8cc3759a 5084 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5085 }
5086
5087 ret = prepare_domain_attach_device(domain, dev);
5088 if (ret)
5089 return ret;
5090
5091 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5092}
38717946 5093
67b8e02b
LB
5094static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5095 struct device *dev)
5096{
5097 int ret;
5098
5099 if (!is_aux_domain(dev, domain))
5100 return -EPERM;
5101
5102 ret = prepare_domain_attach_device(domain, dev);
5103 if (ret)
5104 return ret;
5105
5106 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5107}
5108
4c5478c9
JR
5109static void intel_iommu_detach_device(struct iommu_domain *domain,
5110 struct device *dev)
38717946 5111{
71753239 5112 dmar_remove_one_dev_info(dev);
faa3d6f5 5113}
c7151a8d 5114
67b8e02b
LB
5115static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5116 struct device *dev)
5117{
5118 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5119}
5120
b146a1c9
JR
5121static int intel_iommu_map(struct iommu_domain *domain,
5122 unsigned long iova, phys_addr_t hpa,
5009065d 5123 size_t size, int iommu_prot)
faa3d6f5 5124{
00a77deb 5125 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5126 u64 max_addr;
dde57a21 5127 int prot = 0;
faa3d6f5 5128 int ret;
fe40f1e0 5129
942067f1
LB
5130 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5131 return -EINVAL;
5132
dde57a21
JR
5133 if (iommu_prot & IOMMU_READ)
5134 prot |= DMA_PTE_READ;
5135 if (iommu_prot & IOMMU_WRITE)
5136 prot |= DMA_PTE_WRITE;
9cf06697
SY
5137 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5138 prot |= DMA_PTE_SNP;
dde57a21 5139
163cc52c 5140 max_addr = iova + size;
dde57a21 5141 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5142 u64 end;
5143
5144 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5145 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5146 if (end < max_addr) {
9f10e5bf 5147 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5148 "sufficient for the mapped address (%llx)\n",
8954da1f 5149 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5150 return -EFAULT;
5151 }
dde57a21 5152 dmar_domain->max_addr = max_addr;
fe40f1e0 5153 }
ad051221
DW
5154 /* Round up size to next multiple of PAGE_SIZE, if it and
5155 the low bits of hpa would take us onto the next page */
88cb6a74 5156 size = aligned_nrpages(hpa, size);
ad051221
DW
5157 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5158 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5159 return ret;
38717946 5160}
38717946 5161
5009065d 5162static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5163 unsigned long iova, size_t size)
38717946 5164{
00a77deb 5165 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5166 struct page *freelist = NULL;
ea8ea460
DW
5167 unsigned long start_pfn, last_pfn;
5168 unsigned int npages;
42e8c186 5169 int iommu_id, level = 0;
5cf0a76f
DW
5170
5171 /* Cope with horrid API which requires us to unmap more than the
5172 size argument if it happens to be a large-page mapping. */
dc02e46e 5173 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
942067f1
LB
5174 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5175 return 0;
5cf0a76f
DW
5176
5177 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5178 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5179
ea8ea460
DW
5180 start_pfn = iova >> VTD_PAGE_SHIFT;
5181 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5182
5183 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5184
5185 npages = last_pfn - start_pfn + 1;
5186
f746a025 5187 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5188 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5189 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5190
5191 dma_free_pagelist(freelist);
fe40f1e0 5192
163cc52c
DW
5193 if (dmar_domain->max_addr == iova + size)
5194 dmar_domain->max_addr = iova;
b146a1c9 5195
5cf0a76f 5196 return size;
38717946 5197}
38717946 5198
d14d6577 5199static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5200 dma_addr_t iova)
38717946 5201{
00a77deb 5202 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5203 struct dma_pte *pte;
5cf0a76f 5204 int level = 0;
faa3d6f5 5205 u64 phys = 0;
38717946 5206
942067f1
LB
5207 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5208 return 0;
5209
5cf0a76f 5210 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5211 if (pte)
faa3d6f5 5212 phys = dma_pte_addr(pte);
38717946 5213
faa3d6f5 5214 return phys;
38717946 5215}
a8bcbb0d 5216
95587a75
LB
5217static inline bool scalable_mode_support(void)
5218{
5219 struct dmar_drhd_unit *drhd;
5220 struct intel_iommu *iommu;
5221 bool ret = true;
5222
5223 rcu_read_lock();
5224 for_each_active_iommu(iommu, drhd) {
5225 if (!sm_supported(iommu)) {
5226 ret = false;
5227 break;
5228 }
5229 }
5230 rcu_read_unlock();
5231
5232 return ret;
5233}
5234
5235static inline bool iommu_pasid_support(void)
5236{
5237 struct dmar_drhd_unit *drhd;
5238 struct intel_iommu *iommu;
5239 bool ret = true;
5240
5241 rcu_read_lock();
5242 for_each_active_iommu(iommu, drhd) {
5243 if (!pasid_supported(iommu)) {
5244 ret = false;
5245 break;
5246 }
5247 }
5248 rcu_read_unlock();
5249
5250 return ret;
5251}
5252
5d587b8d 5253static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5254{
dbb9fd86 5255 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5256 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5257 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5258 return irq_remapping_enabled == 1;
dbb9fd86 5259
5d587b8d 5260 return false;
dbb9fd86
SY
5261}
5262
abdfdde2
AW
5263static int intel_iommu_add_device(struct device *dev)
5264{
942067f1
LB
5265 struct dmar_domain *dmar_domain;
5266 struct iommu_domain *domain;
a5459cfe 5267 struct intel_iommu *iommu;
abdfdde2 5268 struct iommu_group *group;
156baca8 5269 u8 bus, devfn;
942067f1 5270 int ret;
70ae6f0d 5271
a5459cfe
AW
5272 iommu = device_to_iommu(dev, &bus, &devfn);
5273 if (!iommu)
70ae6f0d
AW
5274 return -ENODEV;
5275
e3d10af1 5276 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5277
8af46c78
LB
5278 if (translation_pre_enabled(iommu))
5279 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5280
e17f9ff4 5281 group = iommu_group_get_for_dev(dev);
783f157b 5282
e17f9ff4
AW
5283 if (IS_ERR(group))
5284 return PTR_ERR(group);
bcb71abe 5285
abdfdde2 5286 iommu_group_put(group);
942067f1
LB
5287
5288 domain = iommu_get_domain_for_dev(dev);
5289 dmar_domain = to_dmar_domain(domain);
5290 if (domain->type == IOMMU_DOMAIN_DMA) {
0e31a726 5291 if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
942067f1
LB
5292 ret = iommu_request_dm_for_dev(dev);
5293 if (ret) {
ae23bfb6 5294 dmar_remove_one_dev_info(dev);
942067f1
LB
5295 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5296 domain_add_dev_info(si_domain, dev);
5297 dev_info(dev,
5298 "Device uses a private identity domain.\n");
942067f1 5299 }
942067f1
LB
5300 }
5301 } else {
0e31a726 5302 if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
942067f1
LB
5303 ret = iommu_request_dma_domain_for_dev(dev);
5304 if (ret) {
ae23bfb6 5305 dmar_remove_one_dev_info(dev);
942067f1 5306 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
4ec066c7 5307 if (!get_private_domain_for_dev(dev)) {
942067f1
LB
5308 dev_warn(dev,
5309 "Failed to get a private domain.\n");
5310 return -ENOMEM;
5311 }
5312
5313 dev_info(dev,
5314 "Device uses a private dma domain.\n");
942067f1 5315 }
942067f1
LB
5316 }
5317 }
5318
e17f9ff4 5319 return 0;
abdfdde2 5320}
70ae6f0d 5321
abdfdde2
AW
5322static void intel_iommu_remove_device(struct device *dev)
5323{
a5459cfe
AW
5324 struct intel_iommu *iommu;
5325 u8 bus, devfn;
5326
5327 iommu = device_to_iommu(dev, &bus, &devfn);
5328 if (!iommu)
5329 return;
5330
458b7c8e
LB
5331 dmar_remove_one_dev_info(dev);
5332
abdfdde2 5333 iommu_group_remove_device(dev);
a5459cfe 5334
e3d10af1 5335 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5336}
5337
0659b8dc
EA
5338static void intel_iommu_get_resv_regions(struct device *device,
5339 struct list_head *head)
5340{
5f64ce54 5341 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
0659b8dc
EA
5342 struct iommu_resv_region *reg;
5343 struct dmar_rmrr_unit *rmrr;
5344 struct device *i_dev;
5345 int i;
5346
5f64ce54 5347 down_read(&dmar_global_lock);
0659b8dc
EA
5348 for_each_rmrr_units(rmrr) {
5349 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5350 i, i_dev) {
5f64ce54 5351 struct iommu_resv_region *resv;
1c5c59fb 5352 enum iommu_resv_type type;
5f64ce54
EA
5353 size_t length;
5354
3855ba2d
EA
5355 if (i_dev != device &&
5356 !is_downstream_to_pci_bridge(device, i_dev))
0659b8dc
EA
5357 continue;
5358
5f64ce54 5359 length = rmrr->end_address - rmrr->base_address + 1;
1c5c59fb
EA
5360
5361 type = device_rmrr_is_relaxable(device) ?
5362 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5363
5f64ce54 5364 resv = iommu_alloc_resv_region(rmrr->base_address,
1c5c59fb 5365 length, prot, type);
5f64ce54
EA
5366 if (!resv)
5367 break;
5368
5369 list_add_tail(&resv->list, head);
0659b8dc
EA
5370 }
5371 }
5f64ce54 5372 up_read(&dmar_global_lock);
0659b8dc 5373
d850c2ee
LB
5374#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5375 if (dev_is_pci(device)) {
5376 struct pci_dev *pdev = to_pci_dev(device);
5377
5378 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5379 reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
5380 IOMMU_RESV_DIRECT);
5381 if (reg)
5382 list_add_tail(&reg->list, head);
5383 }
5384 }
5385#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5386
0659b8dc
EA
5387 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5388 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5389 0, IOMMU_RESV_MSI);
0659b8dc
EA
5390 if (!reg)
5391 return;
5392 list_add_tail(&reg->list, head);
5393}
5394
5395static void intel_iommu_put_resv_regions(struct device *dev,
5396 struct list_head *head)
5397{
5398 struct iommu_resv_region *entry, *next;
5399
5f64ce54
EA
5400 list_for_each_entry_safe(entry, next, head, list)
5401 kfree(entry);
70ae6f0d
AW
5402}
5403
d7cbc0f3 5404int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5405{
5406 struct device_domain_info *info;
5407 struct context_entry *context;
5408 struct dmar_domain *domain;
5409 unsigned long flags;
5410 u64 ctx_lo;
5411 int ret;
5412
4ec066c7 5413 domain = find_domain(dev);
2f26e0a9
DW
5414 if (!domain)
5415 return -EINVAL;
5416
5417 spin_lock_irqsave(&device_domain_lock, flags);
5418 spin_lock(&iommu->lock);
5419
5420 ret = -EINVAL;
d7cbc0f3 5421 info = dev->archdata.iommu;
2f26e0a9
DW
5422 if (!info || !info->pasid_supported)
5423 goto out;
5424
5425 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5426 if (WARN_ON(!context))
5427 goto out;
5428
5429 ctx_lo = context[0].lo;
5430
2f26e0a9 5431 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5432 ctx_lo |= CONTEXT_PASIDE;
5433 context[0].lo = ctx_lo;
5434 wmb();
d7cbc0f3
LB
5435 iommu->flush.flush_context(iommu,
5436 domain->iommu_did[iommu->seq_id],
5437 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5438 DMA_CCMD_MASK_NOBIT,
5439 DMA_CCMD_DEVICE_INVL);
5440 }
5441
5442 /* Enable PASID support in the device, if it wasn't already */
5443 if (!info->pasid_enabled)
5444 iommu_enable_dev_iotlb(info);
5445
2f26e0a9
DW
5446 ret = 0;
5447
5448 out:
5449 spin_unlock(&iommu->lock);
5450 spin_unlock_irqrestore(&device_domain_lock, flags);
5451
5452 return ret;
5453}
5454
73bcbdc9
JS
5455static void intel_iommu_apply_resv_region(struct device *dev,
5456 struct iommu_domain *domain,
5457 struct iommu_resv_region *region)
5458{
5459 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5460 unsigned long start, end;
5461
5462 start = IOVA_PFN(region->start);
5463 end = IOVA_PFN(region->start + region->length - 1);
5464
5465 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5466}
5467
d7cbc0f3 5468#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5469struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5470{
5471 struct intel_iommu *iommu;
5472 u8 bus, devfn;
5473
5474 if (iommu_dummy(dev)) {
5475 dev_warn(dev,
5476 "No IOMMU translation for device; cannot enable SVM\n");
5477 return NULL;
5478 }
5479
5480 iommu = device_to_iommu(dev, &bus, &devfn);
5481 if ((!iommu)) {
b9997e38 5482 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5483 return NULL;
5484 }
5485
2f26e0a9
DW
5486 return iommu;
5487}
5488#endif /* CONFIG_INTEL_IOMMU_SVM */
5489
95587a75
LB
5490static int intel_iommu_enable_auxd(struct device *dev)
5491{
5492 struct device_domain_info *info;
5493 struct intel_iommu *iommu;
5494 unsigned long flags;
5495 u8 bus, devfn;
5496 int ret;
5497
5498 iommu = device_to_iommu(dev, &bus, &devfn);
5499 if (!iommu || dmar_disabled)
5500 return -EINVAL;
5501
5502 if (!sm_supported(iommu) || !pasid_supported(iommu))
5503 return -EINVAL;
5504
5505 ret = intel_iommu_enable_pasid(iommu, dev);
5506 if (ret)
5507 return -ENODEV;
5508
5509 spin_lock_irqsave(&device_domain_lock, flags);
5510 info = dev->archdata.iommu;
5511 info->auxd_enabled = 1;
5512 spin_unlock_irqrestore(&device_domain_lock, flags);
5513
5514 return 0;
5515}
5516
5517static int intel_iommu_disable_auxd(struct device *dev)
5518{
5519 struct device_domain_info *info;
5520 unsigned long flags;
5521
5522 spin_lock_irqsave(&device_domain_lock, flags);
5523 info = dev->archdata.iommu;
5524 if (!WARN_ON(!info))
5525 info->auxd_enabled = 0;
5526 spin_unlock_irqrestore(&device_domain_lock, flags);
5527
5528 return 0;
5529}
5530
5531/*
5532 * A PCI express designated vendor specific extended capability is defined
5533 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5534 * for system software and tools to detect endpoint devices supporting the
5535 * Intel scalable IO virtualization without host driver dependency.
5536 *
5537 * Returns the address of the matching extended capability structure within
5538 * the device's PCI configuration space or 0 if the device does not support
5539 * it.
5540 */
5541static int siov_find_pci_dvsec(struct pci_dev *pdev)
5542{
5543 int pos;
5544 u16 vendor, id;
5545
5546 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5547 while (pos) {
5548 pci_read_config_word(pdev, pos + 4, &vendor);
5549 pci_read_config_word(pdev, pos + 8, &id);
5550 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5551 return pos;
5552
5553 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5554 }
5555
5556 return 0;
5557}
5558
5559static bool
5560intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5561{
5562 if (feat == IOMMU_DEV_FEAT_AUX) {
5563 int ret;
5564
5565 if (!dev_is_pci(dev) || dmar_disabled ||
5566 !scalable_mode_support() || !iommu_pasid_support())
5567 return false;
5568
5569 ret = pci_pasid_features(to_pci_dev(dev));
5570 if (ret < 0)
5571 return false;
5572
5573 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5574 }
5575
5576 return false;
5577}
5578
5579static int
5580intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5581{
5582 if (feat == IOMMU_DEV_FEAT_AUX)
5583 return intel_iommu_enable_auxd(dev);
5584
5585 return -ENODEV;
5586}
5587
5588static int
5589intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5590{
5591 if (feat == IOMMU_DEV_FEAT_AUX)
5592 return intel_iommu_disable_auxd(dev);
5593
5594 return -ENODEV;
5595}
5596
5597static bool
5598intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5599{
5600 struct device_domain_info *info = dev->archdata.iommu;
5601
5602 if (feat == IOMMU_DEV_FEAT_AUX)
5603 return scalable_mode_support() && info && info->auxd_enabled;
5604
5605 return false;
5606}
5607
0e8000f8
LB
5608static int
5609intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5610{
5611 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5612
5613 return dmar_domain->default_pasid > 0 ?
5614 dmar_domain->default_pasid : -EINVAL;
5615}
5616
8af46c78
LB
5617static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5618 struct device *dev)
5619{
5620 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5621}
5622
b0119e87 5623const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5624 .capable = intel_iommu_capable,
5625 .domain_alloc = intel_iommu_domain_alloc,
5626 .domain_free = intel_iommu_domain_free,
5627 .attach_dev = intel_iommu_attach_device,
5628 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5629 .aux_attach_dev = intel_iommu_aux_attach_device,
5630 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5631 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5632 .map = intel_iommu_map,
5633 .unmap = intel_iommu_unmap,
0659b8dc
EA
5634 .iova_to_phys = intel_iommu_iova_to_phys,
5635 .add_device = intel_iommu_add_device,
5636 .remove_device = intel_iommu_remove_device,
5637 .get_resv_regions = intel_iommu_get_resv_regions,
5638 .put_resv_regions = intel_iommu_put_resv_regions,
73bcbdc9 5639 .apply_resv_region = intel_iommu_apply_resv_region,
0659b8dc 5640 .device_group = pci_device_group,
95587a75
LB
5641 .dev_has_feat = intel_iommu_dev_has_feat,
5642 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5643 .dev_enable_feat = intel_iommu_dev_enable_feat,
5644 .dev_disable_feat = intel_iommu_dev_disable_feat,
8af46c78 5645 .is_attach_deferred = intel_iommu_is_attach_deferred,
0659b8dc 5646 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5647};
9af88143 5648
9452618e
DV
5649static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5650{
5651 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5652 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5653 dmar_map_gfx = 0;
5654}
5655
5656DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5657DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5658DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5659DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5660DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5661DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5662DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5663
d34d6517 5664static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5665{
5666 /*
5667 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5668 * but needs it. Same seems to hold for the desktop versions.
9af88143 5669 */
932a6523 5670 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5671 rwbf_quirk = 1;
5672}
5673
5674DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5675DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5676DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5677DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5678DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5679DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5680DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5681
eecfd57f
AJ
5682#define GGC 0x52
5683#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5684#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5685#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5686#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5687#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5688#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5689#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5690#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5691
d34d6517 5692static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5693{
5694 unsigned short ggc;
5695
eecfd57f 5696 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5697 return;
5698
eecfd57f 5699 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5700 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5701 dmar_map_gfx = 0;
6fbcfb3e
DW
5702 } else if (dmar_map_gfx) {
5703 /* we have to ensure the gfx device is idle before we flush */
932a6523 5704 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5705 intel_iommu_strict = 1;
5706 }
9eecabcb
DW
5707}
5708DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5709DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5710DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5711DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5712
e0fc7e0b
DW
5713/* On Tylersburg chipsets, some BIOSes have been known to enable the
5714 ISOCH DMAR unit for the Azalia sound device, but not give it any
5715 TLB entries, which causes it to deadlock. Check for that. We do
5716 this in a function called from init_dmars(), instead of in a PCI
5717 quirk, because we don't want to print the obnoxious "BIOS broken"
5718 message if VT-d is actually disabled.
5719*/
5720static void __init check_tylersburg_isoch(void)
5721{
5722 struct pci_dev *pdev;
5723 uint32_t vtisochctrl;
5724
5725 /* If there's no Azalia in the system anyway, forget it. */
5726 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5727 if (!pdev)
5728 return;
5729 pci_dev_put(pdev);
5730
5731 /* System Management Registers. Might be hidden, in which case
5732 we can't do the sanity check. But that's OK, because the
5733 known-broken BIOSes _don't_ actually hide it, so far. */
5734 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5735 if (!pdev)
5736 return;
5737
5738 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5739 pci_dev_put(pdev);
5740 return;
5741 }
5742
5743 pci_dev_put(pdev);
5744
5745 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5746 if (vtisochctrl & 1)
5747 return;
5748
5749 /* Drop all bits other than the number of TLB entries */
5750 vtisochctrl &= 0x1c;
5751
5752 /* If we have the recommended number of TLB entries (16), fine. */
5753 if (vtisochctrl == 0x10)
5754 return;
5755
5756 /* Zero TLB entries? You get to ride the short bus to school. */
5757 if (!vtisochctrl) {
5758 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5759 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5760 dmi_get_system_info(DMI_BIOS_VENDOR),
5761 dmi_get_system_info(DMI_BIOS_VERSION),
5762 dmi_get_system_info(DMI_PRODUCT_VERSION));
5763 iommu_identity_mapping |= IDENTMAP_AZALIA;
5764 return;
5765 }
9f10e5bf
JR
5766
5767 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5768 vtisochctrl);
5769}