]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu: Introduce IOMMU_RESV_DIRECT_RELAXABLE reserved memory regions
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf 21#define pr_fmt(fmt) "DMAR: " fmt
932a6523 22#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 23
ba395927
KA
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
75f05569 36#include <linux/memory.h>
aa473240 37#include <linux/cpu.h>
5e0d2a6f 38#include <linux/timer.h>
dfddb969 39#include <linux/io.h>
38717946 40#include <linux/iova.h>
5d450806 41#include <linux/iommu.h>
38717946 42#include <linux/intel-iommu.h>
134fac3f 43#include <linux/syscore_ops.h>
69575d38 44#include <linux/tboot.h>
adb2fe02 45#include <linux/dmi.h>
5cdede24 46#include <linux/pci-ats.h>
0ee332c1 47#include <linux/memblock.h>
36746436 48#include <linux/dma-contiguous.h>
fec777c3 49#include <linux/dma-direct.h>
091d42e4 50#include <linux/crash_dump.h>
98fa15f3 51#include <linux/numa.h>
8a8f422d 52#include <asm/irq_remapping.h>
ba395927 53#include <asm/cacheflush.h>
46a7fa27 54#include <asm/iommu.h>
ba395927 55
078e1ee2 56#include "irq_remapping.h"
56283174 57#include "intel-pasid.h"
078e1ee2 58
5b6985ce
FY
59#define ROOT_SIZE VTD_PAGE_SIZE
60#define CONTEXT_SIZE VTD_PAGE_SIZE
61
ba395927 62#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 63#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 64#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 65#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
66
67#define IOAPIC_RANGE_START (0xfee00000)
68#define IOAPIC_RANGE_END (0xfeefffff)
69#define IOVA_START_ADDR (0x1000)
70
5e3b4a15 71#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 72
4ed0d3e6 73#define MAX_AGAW_WIDTH 64
5c645b35 74#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 75
2ebe3151
DW
76#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
77#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
78
79/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
80 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
81#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
82 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
83#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 84
1b722500
RM
85/* IO virtual address start page frame number */
86#define IOVA_START_PFN (1)
87
f27be03b 88#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 89
df08cdc7
AM
90/* page table handling */
91#define LEVEL_STRIDE (9)
92#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
93
6d1c56a9
OBC
94/*
95 * This bitmap is used to advertise the page sizes our hardware support
96 * to the IOMMU core, which will then use this information to split
97 * physically contiguous memory regions it is mapping into page sizes
98 * that we support.
99 *
100 * Traditionally the IOMMU core just handed us the mappings directly,
101 * after making sure the size is an order of a 4KiB page and that the
102 * mapping has natural alignment.
103 *
104 * To retain this behavior, we currently advertise that we support
105 * all page sizes that are an order of 4KiB.
106 *
107 * If at some point we'd like to utilize the IOMMU core's new behavior,
108 * we could change this to advertise the real page sizes we support.
109 */
110#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
111
df08cdc7
AM
112static inline int agaw_to_level(int agaw)
113{
114 return agaw + 2;
115}
116
117static inline int agaw_to_width(int agaw)
118{
5c645b35 119 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
120}
121
122static inline int width_to_agaw(int width)
123{
5c645b35 124 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
125}
126
127static inline unsigned int level_to_offset_bits(int level)
128{
129 return (level - 1) * LEVEL_STRIDE;
130}
131
132static inline int pfn_level_offset(unsigned long pfn, int level)
133{
134 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
135}
136
137static inline unsigned long level_mask(int level)
138{
139 return -1UL << level_to_offset_bits(level);
140}
141
142static inline unsigned long level_size(int level)
143{
144 return 1UL << level_to_offset_bits(level);
145}
146
147static inline unsigned long align_to_level(unsigned long pfn, int level)
148{
149 return (pfn + level_size(level) - 1) & level_mask(level);
150}
fd18de50 151
6dd9a7c7
YS
152static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
153{
5c645b35 154 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
155}
156
dd4e8319
DW
157/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
158 are never going to work. */
159static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
160{
161 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
162}
163
164static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
165{
166 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
167}
168static inline unsigned long page_to_dma_pfn(struct page *pg)
169{
170 return mm_to_dma_pfn(page_to_pfn(pg));
171}
172static inline unsigned long virt_to_dma_pfn(void *p)
173{
174 return page_to_dma_pfn(virt_to_page(p));
175}
176
d9630fe9
WH
177/* global iommu list, set NULL for ignored DMAR units */
178static struct intel_iommu **g_iommus;
179
e0fc7e0b 180static void __init check_tylersburg_isoch(void);
9af88143
DW
181static int rwbf_quirk;
182
b779260b
JC
183/*
184 * set to 1 to panic kernel if can't successfully enable VT-d
185 * (used when kernel is launched w/ TXT)
186 */
187static int force_on = 0;
bfd20f1c 188int intel_iommu_tboot_noforce;
89a6079d 189static int no_platform_optin;
b779260b 190
46b08e1a 191#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 192
091d42e4
JR
193/*
194 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
195 * if marked present.
196 */
197static phys_addr_t root_entry_lctp(struct root_entry *re)
198{
199 if (!(re->lo & 1))
200 return 0;
201
202 return re->lo & VTD_PAGE_MASK;
203}
204
205/*
206 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
207 * if marked present.
208 */
209static phys_addr_t root_entry_uctp(struct root_entry *re)
210{
211 if (!(re->hi & 1))
212 return 0;
46b08e1a 213
091d42e4
JR
214 return re->hi & VTD_PAGE_MASK;
215}
c07e7d21 216
cf484d0e
JR
217static inline void context_clear_pasid_enable(struct context_entry *context)
218{
219 context->lo &= ~(1ULL << 11);
220}
221
222static inline bool context_pasid_enabled(struct context_entry *context)
223{
224 return !!(context->lo & (1ULL << 11));
225}
226
227static inline void context_set_copied(struct context_entry *context)
228{
229 context->hi |= (1ull << 3);
230}
231
232static inline bool context_copied(struct context_entry *context)
233{
234 return !!(context->hi & (1ULL << 3));
235}
236
237static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
238{
239 return (context->lo & 1);
240}
cf484d0e 241
26b86092 242bool context_present(struct context_entry *context)
cf484d0e
JR
243{
244 return context_pasid_enabled(context) ?
245 __context_present(context) :
246 __context_present(context) && !context_copied(context);
247}
248
c07e7d21
MM
249static inline void context_set_present(struct context_entry *context)
250{
251 context->lo |= 1;
252}
253
254static inline void context_set_fault_enable(struct context_entry *context)
255{
256 context->lo &= (((u64)-1) << 2) | 1;
257}
258
c07e7d21
MM
259static inline void context_set_translation_type(struct context_entry *context,
260 unsigned long value)
261{
262 context->lo &= (((u64)-1) << 4) | 3;
263 context->lo |= (value & 3) << 2;
264}
265
266static inline void context_set_address_root(struct context_entry *context,
267 unsigned long value)
268{
1a2262f9 269 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
270 context->lo |= value & VTD_PAGE_MASK;
271}
272
273static inline void context_set_address_width(struct context_entry *context,
274 unsigned long value)
275{
276 context->hi |= value & 7;
277}
278
279static inline void context_set_domain_id(struct context_entry *context,
280 unsigned long value)
281{
282 context->hi |= (value & ((1 << 16) - 1)) << 8;
283}
284
dbcd861f
JR
285static inline int context_domain_id(struct context_entry *c)
286{
287 return((c->hi >> 8) & 0xffff);
288}
289
c07e7d21
MM
290static inline void context_clear_entry(struct context_entry *context)
291{
292 context->lo = 0;
293 context->hi = 0;
294}
7a8fc25e 295
2c2e2c38
FY
296/*
297 * This domain is a statically identity mapping domain.
298 * 1. This domain creats a static 1:1 mapping to all usable memory.
299 * 2. It maps to each iommu if successful.
300 * 3. Each iommu mapps to this domain if successful.
301 */
19943b0e
DW
302static struct dmar_domain *si_domain;
303static int hw_pass_through = 1;
2c2e2c38 304
2c2e2c38 305/* si_domain contains mulitple devices */
fa954e68 306#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
2c2e2c38 307
942067f1
LB
308/*
309 * This is a DMA domain allocated through the iommu domain allocation
310 * interface. But one or more devices belonging to this domain have
311 * been chosen to use a private domain. We should avoid to use the
312 * map/unmap/iova_to_phys APIs on it.
313 */
314#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
315
29a27719
JR
316#define for_each_domain_iommu(idx, domain) \
317 for (idx = 0; idx < g_num_of_iommus; idx++) \
318 if (domain->iommu_refcnt[idx])
319
b94e4117
JL
320struct dmar_rmrr_unit {
321 struct list_head list; /* list of rmrr units */
322 struct acpi_dmar_header *hdr; /* ACPI header */
323 u64 base_address; /* reserved base address*/
324 u64 end_address; /* reserved end address */
832bd858 325 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
326 int devices_cnt; /* target device count */
327};
328
329struct dmar_atsr_unit {
330 struct list_head list; /* list of ATSR units */
331 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 332 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
333 int devices_cnt; /* target device count */
334 u8 include_all:1; /* include all ports */
335};
336
337static LIST_HEAD(dmar_atsr_units);
338static LIST_HEAD(dmar_rmrr_units);
339
340#define for_each_rmrr_units(rmrr) \
341 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
342
5e0d2a6f 343/* bitmap for indexing intel_iommus */
5e0d2a6f 344static int g_num_of_iommus;
345
92d03cc8 346static void domain_exit(struct dmar_domain *domain);
ba395927 347static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 348static void dmar_remove_one_dev_info(struct device *dev);
127c7615 349static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
350static void domain_context_clear(struct intel_iommu *iommu,
351 struct device *dev);
2a46ddf7
JL
352static int domain_detach_iommu(struct dmar_domain *domain,
353 struct intel_iommu *iommu);
4de354ec 354static bool device_is_rmrr_locked(struct device *dev);
8af46c78
LB
355static int intel_iommu_attach_device(struct iommu_domain *domain,
356 struct device *dev);
ba395927 357
d3f13810 358#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
359int dmar_disabled = 0;
360#else
361int dmar_disabled = 1;
d3f13810 362#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 363
cdd3a249 364int intel_iommu_sm;
8bc1f85c
ED
365int intel_iommu_enabled = 0;
366EXPORT_SYMBOL_GPL(intel_iommu_enabled);
367
2d9e667e 368static int dmar_map_gfx = 1;
7d3b03ce 369static int dmar_forcedac;
5e0d2a6f 370static int intel_iommu_strict;
6dd9a7c7 371static int intel_iommu_superpage = 1;
ae853ddb 372static int iommu_identity_mapping;
c83b2f20 373
ae853ddb
DW
374#define IDENTMAP_ALL 1
375#define IDENTMAP_GFX 2
376#define IDENTMAP_AZALIA 4
c83b2f20 377
c0771df8
DW
378int intel_iommu_gfx_mapped;
379EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
380
ba395927 381#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
8af46c78 382#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
ba395927
KA
383static DEFINE_SPINLOCK(device_domain_lock);
384static LIST_HEAD(device_domain_list);
385
85319dcc
LB
386/*
387 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 388 * callback @fn against each element.
85319dcc
LB
389 */
390int for_each_device_domain(int (*fn)(struct device_domain_info *info,
391 void *data), void *data)
392{
393 int ret = 0;
0bbeb01a 394 unsigned long flags;
85319dcc
LB
395 struct device_domain_info *info;
396
0bbeb01a 397 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
398 list_for_each_entry(info, &device_domain_list, global) {
399 ret = fn(info, data);
0bbeb01a
LB
400 if (ret) {
401 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 402 return ret;
0bbeb01a 403 }
85319dcc 404 }
0bbeb01a 405 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
406
407 return 0;
408}
409
b0119e87 410const struct iommu_ops intel_iommu_ops;
a8bcbb0d 411
4158c2ec
JR
412static bool translation_pre_enabled(struct intel_iommu *iommu)
413{
414 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
415}
416
091d42e4
JR
417static void clear_translation_pre_enabled(struct intel_iommu *iommu)
418{
419 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
420}
421
4158c2ec
JR
422static void init_translation_status(struct intel_iommu *iommu)
423{
424 u32 gsts;
425
426 gsts = readl(iommu->reg + DMAR_GSTS_REG);
427 if (gsts & DMA_GSTS_TES)
428 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
429}
430
00a77deb
JR
431/* Convert generic 'struct iommu_domain to private struct dmar_domain */
432static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
433{
434 return container_of(dom, struct dmar_domain, domain);
435}
436
ba395927
KA
437static int __init intel_iommu_setup(char *str)
438{
439 if (!str)
440 return -EINVAL;
441 while (*str) {
0cd5c3c8
KM
442 if (!strncmp(str, "on", 2)) {
443 dmar_disabled = 0;
9f10e5bf 444 pr_info("IOMMU enabled\n");
0cd5c3c8 445 } else if (!strncmp(str, "off", 3)) {
ba395927 446 dmar_disabled = 1;
89a6079d 447 no_platform_optin = 1;
9f10e5bf 448 pr_info("IOMMU disabled\n");
ba395927
KA
449 } else if (!strncmp(str, "igfx_off", 8)) {
450 dmar_map_gfx = 0;
9f10e5bf 451 pr_info("Disable GFX device mapping\n");
7d3b03ce 452 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 453 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 454 dmar_forcedac = 1;
5e0d2a6f 455 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 456 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 457 intel_iommu_strict = 1;
6dd9a7c7 458 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 459 pr_info("Disable supported super page\n");
6dd9a7c7 460 intel_iommu_superpage = 0;
8950dcd8
LB
461 } else if (!strncmp(str, "sm_on", 5)) {
462 pr_info("Intel-IOMMU: scalable mode supported\n");
463 intel_iommu_sm = 1;
bfd20f1c
SL
464 } else if (!strncmp(str, "tboot_noforce", 13)) {
465 printk(KERN_INFO
466 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
467 intel_iommu_tboot_noforce = 1;
ba395927
KA
468 }
469
470 str += strcspn(str, ",");
471 while (*str == ',')
472 str++;
473 }
474 return 0;
475}
476__setup("intel_iommu=", intel_iommu_setup);
477
478static struct kmem_cache *iommu_domain_cache;
479static struct kmem_cache *iommu_devinfo_cache;
ba395927 480
9452d5bf
JR
481static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
482{
8bf47816
JR
483 struct dmar_domain **domains;
484 int idx = did >> 8;
485
486 domains = iommu->domains[idx];
487 if (!domains)
488 return NULL;
489
490 return domains[did & 0xff];
9452d5bf
JR
491}
492
493static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
494 struct dmar_domain *domain)
495{
8bf47816
JR
496 struct dmar_domain **domains;
497 int idx = did >> 8;
498
499 if (!iommu->domains[idx]) {
500 size_t size = 256 * sizeof(struct dmar_domain *);
501 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
502 }
503
504 domains = iommu->domains[idx];
505 if (WARN_ON(!domains))
506 return;
507 else
508 domains[did & 0xff] = domain;
9452d5bf
JR
509}
510
9ddbfb42 511void *alloc_pgtable_page(int node)
eb3fa7cb 512{
4c923d47
SS
513 struct page *page;
514 void *vaddr = NULL;
eb3fa7cb 515
4c923d47
SS
516 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
517 if (page)
518 vaddr = page_address(page);
eb3fa7cb 519 return vaddr;
ba395927
KA
520}
521
9ddbfb42 522void free_pgtable_page(void *vaddr)
ba395927
KA
523{
524 free_page((unsigned long)vaddr);
525}
526
527static inline void *alloc_domain_mem(void)
528{
354bb65e 529 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
530}
531
38717946 532static void free_domain_mem(void *vaddr)
ba395927
KA
533{
534 kmem_cache_free(iommu_domain_cache, vaddr);
535}
536
537static inline void * alloc_devinfo_mem(void)
538{
354bb65e 539 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
540}
541
542static inline void free_devinfo_mem(void *vaddr)
543{
544 kmem_cache_free(iommu_devinfo_cache, vaddr);
545}
546
28ccce0d
JR
547static inline int domain_type_is_si(struct dmar_domain *domain)
548{
549 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
550}
551
162d1b10
JL
552static inline int domain_pfn_supported(struct dmar_domain *domain,
553 unsigned long pfn)
554{
555 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
556
557 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
558}
559
4ed0d3e6 560static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
561{
562 unsigned long sagaw;
563 int agaw = -1;
564
565 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 566 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
567 agaw >= 0; agaw--) {
568 if (test_bit(agaw, &sagaw))
569 break;
570 }
571
572 return agaw;
573}
574
4ed0d3e6
FY
575/*
576 * Calculate max SAGAW for each iommu.
577 */
578int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
579{
580 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
581}
582
583/*
584 * calculate agaw for each iommu.
585 * "SAGAW" may be different across iommus, use a default agaw, and
586 * get a supported less agaw for iommus that don't support the default agaw.
587 */
588int iommu_calculate_agaw(struct intel_iommu *iommu)
589{
590 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
591}
592
2c2e2c38 593/* This functionin only returns single iommu in a domain */
9ddbfb42 594struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
595{
596 int iommu_id;
597
2c2e2c38 598 /* si_domain and vm domain should not get here. */
fa954e68
LB
599 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
600 return NULL;
601
29a27719
JR
602 for_each_domain_iommu(iommu_id, domain)
603 break;
604
8c11e798
WH
605 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
606 return NULL;
607
608 return g_iommus[iommu_id];
609}
610
8e604097
WH
611static void domain_update_iommu_coherency(struct dmar_domain *domain)
612{
d0501960
DW
613 struct dmar_drhd_unit *drhd;
614 struct intel_iommu *iommu;
2f119c78
QL
615 bool found = false;
616 int i;
2e12bc29 617
d0501960 618 domain->iommu_coherency = 1;
8e604097 619
29a27719 620 for_each_domain_iommu(i, domain) {
2f119c78 621 found = true;
8e604097
WH
622 if (!ecap_coherent(g_iommus[i]->ecap)) {
623 domain->iommu_coherency = 0;
624 break;
625 }
8e604097 626 }
d0501960
DW
627 if (found)
628 return;
629
630 /* No hardware attached; use lowest common denominator */
631 rcu_read_lock();
632 for_each_active_iommu(iommu, drhd) {
633 if (!ecap_coherent(iommu->ecap)) {
634 domain->iommu_coherency = 0;
635 break;
636 }
637 }
638 rcu_read_unlock();
8e604097
WH
639}
640
161f6934 641static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 642{
161f6934
JL
643 struct dmar_drhd_unit *drhd;
644 struct intel_iommu *iommu;
645 int ret = 1;
58c610bd 646
161f6934
JL
647 rcu_read_lock();
648 for_each_active_iommu(iommu, drhd) {
649 if (iommu != skip) {
650 if (!ecap_sc_support(iommu->ecap)) {
651 ret = 0;
652 break;
653 }
58c610bd 654 }
58c610bd 655 }
161f6934
JL
656 rcu_read_unlock();
657
658 return ret;
58c610bd
SY
659}
660
161f6934 661static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 662{
8140a95d 663 struct dmar_drhd_unit *drhd;
161f6934 664 struct intel_iommu *iommu;
8140a95d 665 int mask = 0xf;
6dd9a7c7
YS
666
667 if (!intel_iommu_superpage) {
161f6934 668 return 0;
6dd9a7c7
YS
669 }
670
8140a95d 671 /* set iommu_superpage to the smallest common denominator */
0e242612 672 rcu_read_lock();
8140a95d 673 for_each_active_iommu(iommu, drhd) {
161f6934
JL
674 if (iommu != skip) {
675 mask &= cap_super_page_val(iommu->cap);
676 if (!mask)
677 break;
6dd9a7c7
YS
678 }
679 }
0e242612
JL
680 rcu_read_unlock();
681
161f6934 682 return fls(mask);
6dd9a7c7
YS
683}
684
58c610bd
SY
685/* Some capabilities may be different across iommus */
686static void domain_update_iommu_cap(struct dmar_domain *domain)
687{
688 domain_update_iommu_coherency(domain);
161f6934
JL
689 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
690 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
691}
692
26b86092
SM
693struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
694 u8 devfn, int alloc)
03ecc32c
DW
695{
696 struct root_entry *root = &iommu->root_entry[bus];
697 struct context_entry *context;
698 u64 *entry;
699
4df4eab1 700 entry = &root->lo;
765b6a98 701 if (sm_supported(iommu)) {
03ecc32c
DW
702 if (devfn >= 0x80) {
703 devfn -= 0x80;
704 entry = &root->hi;
705 }
706 devfn *= 2;
707 }
03ecc32c
DW
708 if (*entry & 1)
709 context = phys_to_virt(*entry & VTD_PAGE_MASK);
710 else {
711 unsigned long phy_addr;
712 if (!alloc)
713 return NULL;
714
715 context = alloc_pgtable_page(iommu->node);
716 if (!context)
717 return NULL;
718
719 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
720 phy_addr = virt_to_phys((void *)context);
721 *entry = phy_addr | 1;
722 __iommu_flush_cache(iommu, entry, sizeof(*entry));
723 }
724 return &context[devfn];
725}
726
4ed6a540
DW
727static int iommu_dummy(struct device *dev)
728{
729 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
730}
731
b9a7f981
EA
732/**
733 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
734 * sub-hierarchy of a candidate PCI-PCI bridge
735 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
736 * @bridge: the candidate PCI-PCI bridge
737 *
738 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
739 */
740static bool
741is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
742{
743 struct pci_dev *pdev, *pbridge;
744
745 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
746 return false;
747
748 pdev = to_pci_dev(dev);
749 pbridge = to_pci_dev(bridge);
750
751 if (pbridge->subordinate &&
752 pbridge->subordinate->number <= pdev->bus->number &&
753 pbridge->subordinate->busn_res.end >= pdev->bus->number)
754 return true;
755
756 return false;
757}
758
156baca8 759static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
760{
761 struct dmar_drhd_unit *drhd = NULL;
b683b230 762 struct intel_iommu *iommu;
156baca8 763 struct device *tmp;
b9a7f981 764 struct pci_dev *pdev = NULL;
aa4d066a 765 u16 segment = 0;
c7151a8d
WH
766 int i;
767
4ed6a540
DW
768 if (iommu_dummy(dev))
769 return NULL;
770
156baca8 771 if (dev_is_pci(dev)) {
1c387188
AR
772 struct pci_dev *pf_pdev;
773
156baca8 774 pdev = to_pci_dev(dev);
5823e330
JD
775
776#ifdef CONFIG_X86
777 /* VMD child devices currently cannot be handled individually */
778 if (is_vmd(pdev->bus))
779 return NULL;
780#endif
781
1c387188
AR
782 /* VFs aren't listed in scope tables; we need to look up
783 * the PF instead to find the IOMMU. */
784 pf_pdev = pci_physfn(pdev);
785 dev = &pf_pdev->dev;
156baca8 786 segment = pci_domain_nr(pdev->bus);
ca5b74d2 787 } else if (has_acpi_companion(dev))
156baca8
DW
788 dev = &ACPI_COMPANION(dev)->dev;
789
0e242612 790 rcu_read_lock();
b683b230 791 for_each_active_iommu(iommu, drhd) {
156baca8 792 if (pdev && segment != drhd->segment)
276dbf99 793 continue;
c7151a8d 794
b683b230 795 for_each_active_dev_scope(drhd->devices,
156baca8
DW
796 drhd->devices_cnt, i, tmp) {
797 if (tmp == dev) {
1c387188
AR
798 /* For a VF use its original BDF# not that of the PF
799 * which we used for the IOMMU lookup. Strictly speaking
800 * we could do this for all PCI devices; we only need to
801 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 802 if (pdev && pdev->is_virtfn)
1c387188
AR
803 goto got_pdev;
804
156baca8
DW
805 *bus = drhd->devices[i].bus;
806 *devfn = drhd->devices[i].devfn;
b683b230 807 goto out;
156baca8
DW
808 }
809
b9a7f981 810 if (is_downstream_to_pci_bridge(dev, tmp))
156baca8 811 goto got_pdev;
924b6231 812 }
c7151a8d 813
156baca8
DW
814 if (pdev && drhd->include_all) {
815 got_pdev:
816 *bus = pdev->bus->number;
817 *devfn = pdev->devfn;
b683b230 818 goto out;
156baca8 819 }
c7151a8d 820 }
b683b230 821 iommu = NULL;
156baca8 822 out:
0e242612 823 rcu_read_unlock();
c7151a8d 824
b683b230 825 return iommu;
c7151a8d
WH
826}
827
5331fe6f
WH
828static void domain_flush_cache(struct dmar_domain *domain,
829 void *addr, int size)
830{
831 if (!domain->iommu_coherency)
832 clflush_cache_range(addr, size);
833}
834
ba395927
KA
835static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
836{
ba395927 837 struct context_entry *context;
03ecc32c 838 int ret = 0;
ba395927
KA
839 unsigned long flags;
840
841 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
842 context = iommu_context_addr(iommu, bus, devfn, 0);
843 if (context)
844 ret = context_present(context);
ba395927
KA
845 spin_unlock_irqrestore(&iommu->lock, flags);
846 return ret;
847}
848
ba395927
KA
849static void free_context_table(struct intel_iommu *iommu)
850{
ba395927
KA
851 int i;
852 unsigned long flags;
853 struct context_entry *context;
854
855 spin_lock_irqsave(&iommu->lock, flags);
856 if (!iommu->root_entry) {
857 goto out;
858 }
859 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 860 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
861 if (context)
862 free_pgtable_page(context);
03ecc32c 863
765b6a98 864 if (!sm_supported(iommu))
03ecc32c
DW
865 continue;
866
867 context = iommu_context_addr(iommu, i, 0x80, 0);
868 if (context)
869 free_pgtable_page(context);
870
ba395927
KA
871 }
872 free_pgtable_page(iommu->root_entry);
873 iommu->root_entry = NULL;
874out:
875 spin_unlock_irqrestore(&iommu->lock, flags);
876}
877
b026fd28 878static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 879 unsigned long pfn, int *target_level)
ba395927 880{
e083ea5b 881 struct dma_pte *parent, *pte;
ba395927 882 int level = agaw_to_level(domain->agaw);
4399c8bf 883 int offset;
ba395927
KA
884
885 BUG_ON(!domain->pgd);
f9423606 886
162d1b10 887 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
888 /* Address beyond IOMMU's addressing capabilities. */
889 return NULL;
890
ba395927
KA
891 parent = domain->pgd;
892
5cf0a76f 893 while (1) {
ba395927
KA
894 void *tmp_page;
895
b026fd28 896 offset = pfn_level_offset(pfn, level);
ba395927 897 pte = &parent[offset];
5cf0a76f 898 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 899 break;
5cf0a76f 900 if (level == *target_level)
ba395927
KA
901 break;
902
19c239ce 903 if (!dma_pte_present(pte)) {
c85994e4
DW
904 uint64_t pteval;
905
4c923d47 906 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 907
206a73c1 908 if (!tmp_page)
ba395927 909 return NULL;
206a73c1 910
c85994e4 911 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 912 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 913 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
914 /* Someone else set it while we were thinking; use theirs. */
915 free_pgtable_page(tmp_page);
effad4b5 916 else
c85994e4 917 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 918 }
5cf0a76f
DW
919 if (level == 1)
920 break;
921
19c239ce 922 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
923 level--;
924 }
925
5cf0a76f
DW
926 if (!*target_level)
927 *target_level = level;
928
ba395927
KA
929 return pte;
930}
931
6dd9a7c7 932
ba395927 933/* return address's pte at specific level */
90dcfb5e
DW
934static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
935 unsigned long pfn,
6dd9a7c7 936 int level, int *large_page)
ba395927 937{
e083ea5b 938 struct dma_pte *parent, *pte;
ba395927
KA
939 int total = agaw_to_level(domain->agaw);
940 int offset;
941
942 parent = domain->pgd;
943 while (level <= total) {
90dcfb5e 944 offset = pfn_level_offset(pfn, total);
ba395927
KA
945 pte = &parent[offset];
946 if (level == total)
947 return pte;
948
6dd9a7c7
YS
949 if (!dma_pte_present(pte)) {
950 *large_page = total;
ba395927 951 break;
6dd9a7c7
YS
952 }
953
e16922af 954 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
955 *large_page = total;
956 return pte;
957 }
958
19c239ce 959 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
960 total--;
961 }
962 return NULL;
963}
964
ba395927 965/* clear last level pte, a tlb flush should be followed */
5cf0a76f 966static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
967 unsigned long start_pfn,
968 unsigned long last_pfn)
ba395927 969{
e083ea5b 970 unsigned int large_page;
310a5ab9 971 struct dma_pte *first_pte, *pte;
66eae846 972
162d1b10
JL
973 BUG_ON(!domain_pfn_supported(domain, start_pfn));
974 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 975 BUG_ON(start_pfn > last_pfn);
ba395927 976
04b18e65 977 /* we don't need lock here; nobody else touches the iova range */
59c36286 978 do {
6dd9a7c7
YS
979 large_page = 1;
980 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 981 if (!pte) {
6dd9a7c7 982 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
983 continue;
984 }
6dd9a7c7 985 do {
310a5ab9 986 dma_clear_pte(pte);
6dd9a7c7 987 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 988 pte++;
75e6bf96
DW
989 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
990
310a5ab9
DW
991 domain_flush_cache(domain, first_pte,
992 (void *)pte - (void *)first_pte);
59c36286
DW
993
994 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
995}
996
3269ee0b 997static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
998 int retain_level, struct dma_pte *pte,
999 unsigned long pfn, unsigned long start_pfn,
1000 unsigned long last_pfn)
3269ee0b
AW
1001{
1002 pfn = max(start_pfn, pfn);
1003 pte = &pte[pfn_level_offset(pfn, level)];
1004
1005 do {
1006 unsigned long level_pfn;
1007 struct dma_pte *level_pte;
1008
1009 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1010 goto next;
1011
f7116e11 1012 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1013 level_pte = phys_to_virt(dma_pte_addr(pte));
1014
bc24c571
DD
1015 if (level > 2) {
1016 dma_pte_free_level(domain, level - 1, retain_level,
1017 level_pte, level_pfn, start_pfn,
1018 last_pfn);
1019 }
3269ee0b 1020
bc24c571
DD
1021 /*
1022 * Free the page table if we're below the level we want to
1023 * retain and the range covers the entire table.
1024 */
1025 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1026 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1027 dma_clear_pte(pte);
1028 domain_flush_cache(domain, pte, sizeof(*pte));
1029 free_pgtable_page(level_pte);
1030 }
1031next:
1032 pfn += level_size(level);
1033 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1034}
1035
bc24c571
DD
1036/*
1037 * clear last level (leaf) ptes and free page table pages below the
1038 * level we wish to keep intact.
1039 */
ba395927 1040static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1041 unsigned long start_pfn,
bc24c571
DD
1042 unsigned long last_pfn,
1043 int retain_level)
ba395927 1044{
162d1b10
JL
1045 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1046 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1047 BUG_ON(start_pfn > last_pfn);
ba395927 1048
d41a4adb
JL
1049 dma_pte_clear_range(domain, start_pfn, last_pfn);
1050
f3a0a52f 1051 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1052 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1053 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1054
ba395927 1055 /* free pgd */
d794dc9b 1056 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1057 free_pgtable_page(domain->pgd);
1058 domain->pgd = NULL;
1059 }
1060}
1061
ea8ea460
DW
1062/* When a page at a given level is being unlinked from its parent, we don't
1063 need to *modify* it at all. All we need to do is make a list of all the
1064 pages which can be freed just as soon as we've flushed the IOTLB and we
1065 know the hardware page-walk will no longer touch them.
1066 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1067 be freed. */
1068static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1069 int level, struct dma_pte *pte,
1070 struct page *freelist)
1071{
1072 struct page *pg;
1073
1074 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1075 pg->freelist = freelist;
1076 freelist = pg;
1077
1078 if (level == 1)
1079 return freelist;
1080
adeb2590
JL
1081 pte = page_address(pg);
1082 do {
ea8ea460
DW
1083 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1084 freelist = dma_pte_list_pagetables(domain, level - 1,
1085 pte, freelist);
adeb2590
JL
1086 pte++;
1087 } while (!first_pte_in_page(pte));
ea8ea460
DW
1088
1089 return freelist;
1090}
1091
1092static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1093 struct dma_pte *pte, unsigned long pfn,
1094 unsigned long start_pfn,
1095 unsigned long last_pfn,
1096 struct page *freelist)
1097{
1098 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1099
1100 pfn = max(start_pfn, pfn);
1101 pte = &pte[pfn_level_offset(pfn, level)];
1102
1103 do {
1104 unsigned long level_pfn;
1105
1106 if (!dma_pte_present(pte))
1107 goto next;
1108
1109 level_pfn = pfn & level_mask(level);
1110
1111 /* If range covers entire pagetable, free it */
1112 if (start_pfn <= level_pfn &&
1113 last_pfn >= level_pfn + level_size(level) - 1) {
1114 /* These suborbinate page tables are going away entirely. Don't
1115 bother to clear them; we're just going to *free* them. */
1116 if (level > 1 && !dma_pte_superpage(pte))
1117 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1118
1119 dma_clear_pte(pte);
1120 if (!first_pte)
1121 first_pte = pte;
1122 last_pte = pte;
1123 } else if (level > 1) {
1124 /* Recurse down into a level that isn't *entirely* obsolete */
1125 freelist = dma_pte_clear_level(domain, level - 1,
1126 phys_to_virt(dma_pte_addr(pte)),
1127 level_pfn, start_pfn, last_pfn,
1128 freelist);
1129 }
1130next:
1131 pfn += level_size(level);
1132 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1133
1134 if (first_pte)
1135 domain_flush_cache(domain, first_pte,
1136 (void *)++last_pte - (void *)first_pte);
1137
1138 return freelist;
1139}
1140
1141/* We can't just free the pages because the IOMMU may still be walking
1142 the page tables, and may have cached the intermediate levels. The
1143 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1144static struct page *domain_unmap(struct dmar_domain *domain,
1145 unsigned long start_pfn,
1146 unsigned long last_pfn)
ea8ea460 1147{
e083ea5b 1148 struct page *freelist;
ea8ea460 1149
162d1b10
JL
1150 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1151 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1152 BUG_ON(start_pfn > last_pfn);
1153
1154 /* we don't need lock here; nobody else touches the iova range */
1155 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1156 domain->pgd, 0, start_pfn, last_pfn, NULL);
1157
1158 /* free pgd */
1159 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1160 struct page *pgd_page = virt_to_page(domain->pgd);
1161 pgd_page->freelist = freelist;
1162 freelist = pgd_page;
1163
1164 domain->pgd = NULL;
1165 }
1166
1167 return freelist;
1168}
1169
b690420a 1170static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1171{
1172 struct page *pg;
1173
1174 while ((pg = freelist)) {
1175 freelist = pg->freelist;
1176 free_pgtable_page(page_address(pg));
1177 }
1178}
1179
13cf0174
JR
1180static void iova_entry_free(unsigned long data)
1181{
1182 struct page *freelist = (struct page *)data;
1183
1184 dma_free_pagelist(freelist);
1185}
1186
ba395927
KA
1187/* iommu handling */
1188static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1189{
1190 struct root_entry *root;
1191 unsigned long flags;
1192
4c923d47 1193 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1194 if (!root) {
9f10e5bf 1195 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1196 iommu->name);
ba395927 1197 return -ENOMEM;
ffebeb46 1198 }
ba395927 1199
5b6985ce 1200 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1201
1202 spin_lock_irqsave(&iommu->lock, flags);
1203 iommu->root_entry = root;
1204 spin_unlock_irqrestore(&iommu->lock, flags);
1205
1206 return 0;
1207}
1208
ba395927
KA
1209static void iommu_set_root_entry(struct intel_iommu *iommu)
1210{
03ecc32c 1211 u64 addr;
c416daa9 1212 u32 sts;
ba395927
KA
1213 unsigned long flag;
1214
03ecc32c 1215 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1216 if (sm_supported(iommu))
1217 addr |= DMA_RTADDR_SMT;
ba395927 1218
1f5b3c3f 1219 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1220 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1221
c416daa9 1222 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1223
1224 /* Make sure hardware complete it */
1225 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1226 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1227
1f5b3c3f 1228 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1229}
1230
6f7db75e 1231void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1232{
1233 u32 val;
1234 unsigned long flag;
1235
9af88143 1236 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1237 return;
ba395927 1238
1f5b3c3f 1239 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1240 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1241
1242 /* Make sure hardware complete it */
1243 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1244 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1245
1f5b3c3f 1246 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1247}
1248
1249/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1250static void __iommu_flush_context(struct intel_iommu *iommu,
1251 u16 did, u16 source_id, u8 function_mask,
1252 u64 type)
ba395927
KA
1253{
1254 u64 val = 0;
1255 unsigned long flag;
1256
ba395927
KA
1257 switch (type) {
1258 case DMA_CCMD_GLOBAL_INVL:
1259 val = DMA_CCMD_GLOBAL_INVL;
1260 break;
1261 case DMA_CCMD_DOMAIN_INVL:
1262 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1263 break;
1264 case DMA_CCMD_DEVICE_INVL:
1265 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1266 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1267 break;
1268 default:
1269 BUG();
1270 }
1271 val |= DMA_CCMD_ICC;
1272
1f5b3c3f 1273 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1274 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1275
1276 /* Make sure hardware complete it */
1277 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1278 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1279
1f5b3c3f 1280 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1281}
1282
ba395927 1283/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1284static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1285 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1286{
1287 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1288 u64 val = 0, val_iva = 0;
1289 unsigned long flag;
1290
ba395927
KA
1291 switch (type) {
1292 case DMA_TLB_GLOBAL_FLUSH:
1293 /* global flush doesn't need set IVA_REG */
1294 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1295 break;
1296 case DMA_TLB_DSI_FLUSH:
1297 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1298 break;
1299 case DMA_TLB_PSI_FLUSH:
1300 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1301 /* IH bit is passed in as part of address */
ba395927
KA
1302 val_iva = size_order | addr;
1303 break;
1304 default:
1305 BUG();
1306 }
1307 /* Note: set drain read/write */
1308#if 0
1309 /*
1310 * This is probably to be super secure.. Looks like we can
1311 * ignore it without any impact.
1312 */
1313 if (cap_read_drain(iommu->cap))
1314 val |= DMA_TLB_READ_DRAIN;
1315#endif
1316 if (cap_write_drain(iommu->cap))
1317 val |= DMA_TLB_WRITE_DRAIN;
1318
1f5b3c3f 1319 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1320 /* Note: Only uses first TLB reg currently */
1321 if (val_iva)
1322 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1323 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1324
1325 /* Make sure hardware complete it */
1326 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1327 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1328
1f5b3c3f 1329 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1330
1331 /* check IOTLB invalidation granularity */
1332 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1333 pr_err("Flush IOTLB failed\n");
ba395927 1334 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1335 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1336 (unsigned long long)DMA_TLB_IIRG(type),
1337 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1338}
1339
64ae892b
DW
1340static struct device_domain_info *
1341iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1342 u8 bus, u8 devfn)
93a23a72 1343{
93a23a72 1344 struct device_domain_info *info;
93a23a72 1345
55d94043
JR
1346 assert_spin_locked(&device_domain_lock);
1347
93a23a72
YZ
1348 if (!iommu->qi)
1349 return NULL;
1350
93a23a72 1351 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1352 if (info->iommu == iommu && info->bus == bus &&
1353 info->devfn == devfn) {
b16d0cb9
DW
1354 if (info->ats_supported && info->dev)
1355 return info;
93a23a72
YZ
1356 break;
1357 }
93a23a72 1358
b16d0cb9 1359 return NULL;
93a23a72
YZ
1360}
1361
0824c592
OP
1362static void domain_update_iotlb(struct dmar_domain *domain)
1363{
1364 struct device_domain_info *info;
1365 bool has_iotlb_device = false;
1366
1367 assert_spin_locked(&device_domain_lock);
1368
1369 list_for_each_entry(info, &domain->devices, link) {
1370 struct pci_dev *pdev;
1371
1372 if (!info->dev || !dev_is_pci(info->dev))
1373 continue;
1374
1375 pdev = to_pci_dev(info->dev);
1376 if (pdev->ats_enabled) {
1377 has_iotlb_device = true;
1378 break;
1379 }
1380 }
1381
1382 domain->has_iotlb_device = has_iotlb_device;
1383}
1384
93a23a72 1385static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1386{
fb0cc3aa
BH
1387 struct pci_dev *pdev;
1388
0824c592
OP
1389 assert_spin_locked(&device_domain_lock);
1390
0bcb3e28 1391 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1392 return;
1393
fb0cc3aa 1394 pdev = to_pci_dev(info->dev);
1c48db44
JP
1395 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1396 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1397 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1398 * reserved, which should be set to 0.
1399 */
1400 if (!ecap_dit(info->iommu->ecap))
1401 info->pfsid = 0;
1402 else {
1403 struct pci_dev *pf_pdev;
1404
1405 /* pdev will be returned if device is not a vf */
1406 pf_pdev = pci_physfn(pdev);
cc49baa9 1407 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1408 }
fb0cc3aa 1409
b16d0cb9
DW
1410#ifdef CONFIG_INTEL_IOMMU_SVM
1411 /* The PCIe spec, in its wisdom, declares that the behaviour of
1412 the device if you enable PASID support after ATS support is
1413 undefined. So always enable PASID support on devices which
1414 have it, even if we can't yet know if we're ever going to
1415 use it. */
1416 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1417 info->pasid_enabled = 1;
1418
1b84778a
KS
1419 if (info->pri_supported &&
1420 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1421 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1422 info->pri_enabled = 1;
1423#endif
fb58fdcd 1424 if (!pdev->untrusted && info->ats_supported &&
61363c14 1425 pci_ats_page_aligned(pdev) &&
fb58fdcd 1426 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1427 info->ats_enabled = 1;
0824c592 1428 domain_update_iotlb(info->domain);
b16d0cb9
DW
1429 info->ats_qdep = pci_ats_queue_depth(pdev);
1430 }
93a23a72
YZ
1431}
1432
1433static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1434{
b16d0cb9
DW
1435 struct pci_dev *pdev;
1436
0824c592
OP
1437 assert_spin_locked(&device_domain_lock);
1438
da972fb1 1439 if (!dev_is_pci(info->dev))
93a23a72
YZ
1440 return;
1441
b16d0cb9
DW
1442 pdev = to_pci_dev(info->dev);
1443
1444 if (info->ats_enabled) {
1445 pci_disable_ats(pdev);
1446 info->ats_enabled = 0;
0824c592 1447 domain_update_iotlb(info->domain);
b16d0cb9
DW
1448 }
1449#ifdef CONFIG_INTEL_IOMMU_SVM
1450 if (info->pri_enabled) {
1451 pci_disable_pri(pdev);
1452 info->pri_enabled = 0;
1453 }
1454 if (info->pasid_enabled) {
1455 pci_disable_pasid(pdev);
1456 info->pasid_enabled = 0;
1457 }
1458#endif
93a23a72
YZ
1459}
1460
1461static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1462 u64 addr, unsigned mask)
1463{
1464 u16 sid, qdep;
1465 unsigned long flags;
1466 struct device_domain_info *info;
1467
0824c592
OP
1468 if (!domain->has_iotlb_device)
1469 return;
1470
93a23a72
YZ
1471 spin_lock_irqsave(&device_domain_lock, flags);
1472 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1473 if (!info->ats_enabled)
93a23a72
YZ
1474 continue;
1475
1476 sid = info->bus << 8 | info->devfn;
b16d0cb9 1477 qdep = info->ats_qdep;
1c48db44
JP
1478 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1479 qdep, addr, mask);
93a23a72
YZ
1480 }
1481 spin_unlock_irqrestore(&device_domain_lock, flags);
1482}
1483
a1ddcbe9
JR
1484static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1485 struct dmar_domain *domain,
1486 unsigned long pfn, unsigned int pages,
1487 int ih, int map)
ba395927 1488{
9dd2fe89 1489 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1490 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1491 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1492
ba395927
KA
1493 BUG_ON(pages == 0);
1494
ea8ea460
DW
1495 if (ih)
1496 ih = 1 << 6;
ba395927 1497 /*
9dd2fe89
YZ
1498 * Fallback to domain selective flush if no PSI support or the size is
1499 * too big.
ba395927
KA
1500 * PSI requires page size to be 2 ^ x, and the base address is naturally
1501 * aligned to the size
1502 */
9dd2fe89
YZ
1503 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1504 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1505 DMA_TLB_DSI_FLUSH);
9dd2fe89 1506 else
ea8ea460 1507 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1508 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1509
1510 /*
82653633
NA
1511 * In caching mode, changes of pages from non-present to present require
1512 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1513 */
82653633 1514 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1515 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1516}
1517
eed91a0b
PX
1518/* Notification for newly created mappings */
1519static inline void __mapping_notify_one(struct intel_iommu *iommu,
1520 struct dmar_domain *domain,
1521 unsigned long pfn, unsigned int pages)
1522{
1523 /* It's a non-present to present mapping. Only flush if caching mode */
1524 if (cap_caching_mode(iommu->cap))
1525 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1526 else
1527 iommu_flush_write_buffer(iommu);
1528}
1529
13cf0174
JR
1530static void iommu_flush_iova(struct iova_domain *iovad)
1531{
1532 struct dmar_domain *domain;
1533 int idx;
1534
1535 domain = container_of(iovad, struct dmar_domain, iovad);
1536
1537 for_each_domain_iommu(idx, domain) {
1538 struct intel_iommu *iommu = g_iommus[idx];
1539 u16 did = domain->iommu_did[iommu->seq_id];
1540
1541 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1542
1543 if (!cap_caching_mode(iommu->cap))
1544 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1545 0, MAX_AGAW_PFN_WIDTH);
1546 }
1547}
1548
f8bab735 1549static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1550{
1551 u32 pmen;
1552 unsigned long flags;
1553
5bb71fc7
LB
1554 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1555 return;
1556
1f5b3c3f 1557 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1558 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1559 pmen &= ~DMA_PMEN_EPM;
1560 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1561
1562 /* wait for the protected region status bit to clear */
1563 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1564 readl, !(pmen & DMA_PMEN_PRS), pmen);
1565
1f5b3c3f 1566 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1567}
1568
2a41ccee 1569static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1570{
1571 u32 sts;
1572 unsigned long flags;
1573
1f5b3c3f 1574 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1575 iommu->gcmd |= DMA_GCMD_TE;
1576 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1577
1578 /* Make sure hardware complete it */
1579 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1580 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1581
1f5b3c3f 1582 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1583}
1584
2a41ccee 1585static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1586{
1587 u32 sts;
1588 unsigned long flag;
1589
1f5b3c3f 1590 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1591 iommu->gcmd &= ~DMA_GCMD_TE;
1592 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1593
1594 /* Make sure hardware complete it */
1595 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1596 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1597
1f5b3c3f 1598 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1599}
1600
3460a6d9 1601
ba395927
KA
1602static int iommu_init_domains(struct intel_iommu *iommu)
1603{
8bf47816
JR
1604 u32 ndomains, nlongs;
1605 size_t size;
ba395927
KA
1606
1607 ndomains = cap_ndoms(iommu->cap);
8bf47816 1608 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1609 iommu->name, ndomains);
ba395927
KA
1610 nlongs = BITS_TO_LONGS(ndomains);
1611
94a91b50
DD
1612 spin_lock_init(&iommu->lock);
1613
ba395927
KA
1614 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1615 if (!iommu->domain_ids) {
9f10e5bf
JR
1616 pr_err("%s: Allocating domain id array failed\n",
1617 iommu->name);
ba395927
KA
1618 return -ENOMEM;
1619 }
8bf47816 1620
86f004c7 1621 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1622 iommu->domains = kzalloc(size, GFP_KERNEL);
1623
1624 if (iommu->domains) {
1625 size = 256 * sizeof(struct dmar_domain *);
1626 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1627 }
1628
1629 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1630 pr_err("%s: Allocating domain array failed\n",
1631 iommu->name);
852bdb04 1632 kfree(iommu->domain_ids);
8bf47816 1633 kfree(iommu->domains);
852bdb04 1634 iommu->domain_ids = NULL;
8bf47816 1635 iommu->domains = NULL;
ba395927
KA
1636 return -ENOMEM;
1637 }
1638
8bf47816
JR
1639
1640
ba395927 1641 /*
c0e8a6c8
JR
1642 * If Caching mode is set, then invalid translations are tagged
1643 * with domain-id 0, hence we need to pre-allocate it. We also
1644 * use domain-id 0 as a marker for non-allocated domain-id, so
1645 * make sure it is not used for a real domain.
ba395927 1646 */
c0e8a6c8
JR
1647 set_bit(0, iommu->domain_ids);
1648
3b33d4ab
LB
1649 /*
1650 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1651 * entry for first-level or pass-through translation modes should
1652 * be programmed with a domain id different from those used for
1653 * second-level or nested translation. We reserve a domain id for
1654 * this purpose.
1655 */
1656 if (sm_supported(iommu))
1657 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1658
ba395927
KA
1659 return 0;
1660}
ba395927 1661
ffebeb46 1662static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1663{
29a27719 1664 struct device_domain_info *info, *tmp;
55d94043 1665 unsigned long flags;
ba395927 1666
29a27719
JR
1667 if (!iommu->domains || !iommu->domain_ids)
1668 return;
a4eaa86c 1669
55d94043 1670 spin_lock_irqsave(&device_domain_lock, flags);
29a27719 1671 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
29a27719
JR
1672 if (info->iommu != iommu)
1673 continue;
1674
1675 if (!info->dev || !info->domain)
1676 continue;
1677
bea64033 1678 __dmar_remove_one_dev_info(info);
ba395927 1679 }
55d94043 1680 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1681
1682 if (iommu->gcmd & DMA_GCMD_TE)
1683 iommu_disable_translation(iommu);
ffebeb46 1684}
ba395927 1685
ffebeb46
JL
1686static void free_dmar_iommu(struct intel_iommu *iommu)
1687{
1688 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1689 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1690 int i;
1691
1692 for (i = 0; i < elems; i++)
1693 kfree(iommu->domains[i]);
ffebeb46
JL
1694 kfree(iommu->domains);
1695 kfree(iommu->domain_ids);
1696 iommu->domains = NULL;
1697 iommu->domain_ids = NULL;
1698 }
ba395927 1699
d9630fe9
WH
1700 g_iommus[iommu->seq_id] = NULL;
1701
ba395927
KA
1702 /* free context mapping */
1703 free_context_table(iommu);
8a94ade4
DW
1704
1705#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1706 if (pasid_supported(iommu)) {
a222a7f0
DW
1707 if (ecap_prs(iommu->ecap))
1708 intel_svm_finish_prq(iommu);
a222a7f0 1709 }
8a94ade4 1710#endif
ba395927
KA
1711}
1712
ab8dfe25 1713static struct dmar_domain *alloc_domain(int flags)
ba395927 1714{
ba395927 1715 struct dmar_domain *domain;
ba395927
KA
1716
1717 domain = alloc_domain_mem();
1718 if (!domain)
1719 return NULL;
1720
ab8dfe25 1721 memset(domain, 0, sizeof(*domain));
98fa15f3 1722 domain->nid = NUMA_NO_NODE;
ab8dfe25 1723 domain->flags = flags;
0824c592 1724 domain->has_iotlb_device = false;
92d03cc8 1725 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1726
1727 return domain;
1728}
1729
d160aca5
JR
1730/* Must be called with iommu->lock */
1731static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1732 struct intel_iommu *iommu)
1733{
44bde614 1734 unsigned long ndomains;
55d94043 1735 int num;
44bde614 1736
55d94043 1737 assert_spin_locked(&device_domain_lock);
d160aca5 1738 assert_spin_locked(&iommu->lock);
ba395927 1739
29a27719
JR
1740 domain->iommu_refcnt[iommu->seq_id] += 1;
1741 domain->iommu_count += 1;
1742 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1743 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1744 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1745
1746 if (num >= ndomains) {
1747 pr_err("%s: No free domain ids\n", iommu->name);
1748 domain->iommu_refcnt[iommu->seq_id] -= 1;
1749 domain->iommu_count -= 1;
55d94043 1750 return -ENOSPC;
2c2e2c38 1751 }
ba395927 1752
d160aca5
JR
1753 set_bit(num, iommu->domain_ids);
1754 set_iommu_domain(iommu, num, domain);
1755
1756 domain->iommu_did[iommu->seq_id] = num;
1757 domain->nid = iommu->node;
fb170fb4 1758
fb170fb4
JL
1759 domain_update_iommu_cap(domain);
1760 }
d160aca5 1761
55d94043 1762 return 0;
fb170fb4
JL
1763}
1764
1765static int domain_detach_iommu(struct dmar_domain *domain,
1766 struct intel_iommu *iommu)
1767{
e083ea5b 1768 int num, count;
d160aca5 1769
55d94043 1770 assert_spin_locked(&device_domain_lock);
d160aca5 1771 assert_spin_locked(&iommu->lock);
fb170fb4 1772
29a27719
JR
1773 domain->iommu_refcnt[iommu->seq_id] -= 1;
1774 count = --domain->iommu_count;
1775 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1776 num = domain->iommu_did[iommu->seq_id];
1777 clear_bit(num, iommu->domain_ids);
1778 set_iommu_domain(iommu, num, NULL);
fb170fb4 1779
fb170fb4 1780 domain_update_iommu_cap(domain);
c0e8a6c8 1781 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1782 }
fb170fb4
JL
1783
1784 return count;
1785}
1786
ba395927 1787static struct iova_domain reserved_iova_list;
8a443df4 1788static struct lock_class_key reserved_rbtree_key;
ba395927 1789
51a63e67 1790static int dmar_init_reserved_ranges(void)
ba395927
KA
1791{
1792 struct pci_dev *pdev = NULL;
1793 struct iova *iova;
1794 int i;
ba395927 1795
aa3ac946 1796 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1797
8a443df4
MG
1798 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1799 &reserved_rbtree_key);
1800
ba395927
KA
1801 /* IOAPIC ranges shouldn't be accessed by DMA */
1802 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1803 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1804 if (!iova) {
9f10e5bf 1805 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1806 return -ENODEV;
1807 }
ba395927
KA
1808
1809 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1810 for_each_pci_dev(pdev) {
1811 struct resource *r;
1812
1813 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1814 r = &pdev->resource[i];
1815 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1816 continue;
1a4a4551
DW
1817 iova = reserve_iova(&reserved_iova_list,
1818 IOVA_PFN(r->start),
1819 IOVA_PFN(r->end));
51a63e67 1820 if (!iova) {
932a6523 1821 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1822 return -ENODEV;
1823 }
ba395927
KA
1824 }
1825 }
51a63e67 1826 return 0;
ba395927
KA
1827}
1828
1829static void domain_reserve_special_ranges(struct dmar_domain *domain)
1830{
1831 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1832}
1833
1834static inline int guestwidth_to_adjustwidth(int gaw)
1835{
1836 int agaw;
1837 int r = (gaw - 12) % 9;
1838
1839 if (r == 0)
1840 agaw = gaw;
1841 else
1842 agaw = gaw + 9 - r;
1843 if (agaw > 64)
1844 agaw = 64;
1845 return agaw;
1846}
1847
dc534b25
JR
1848static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1849 int guest_width)
ba395927 1850{
ba395927
KA
1851 int adjust_width, agaw;
1852 unsigned long sagaw;
13cf0174 1853 int err;
ba395927 1854
aa3ac946 1855 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1856
1857 err = init_iova_flush_queue(&domain->iovad,
1858 iommu_flush_iova, iova_entry_free);
1859 if (err)
1860 return err;
1861
ba395927
KA
1862 domain_reserve_special_ranges(domain);
1863
1864 /* calculate AGAW */
ba395927
KA
1865 if (guest_width > cap_mgaw(iommu->cap))
1866 guest_width = cap_mgaw(iommu->cap);
1867 domain->gaw = guest_width;
1868 adjust_width = guestwidth_to_adjustwidth(guest_width);
1869 agaw = width_to_agaw(adjust_width);
1870 sagaw = cap_sagaw(iommu->cap);
1871 if (!test_bit(agaw, &sagaw)) {
1872 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1873 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1874 agaw = find_next_bit(&sagaw, 5, agaw);
1875 if (agaw >= 5)
1876 return -ENODEV;
1877 }
1878 domain->agaw = agaw;
ba395927 1879
8e604097
WH
1880 if (ecap_coherent(iommu->ecap))
1881 domain->iommu_coherency = 1;
1882 else
1883 domain->iommu_coherency = 0;
1884
58c610bd
SY
1885 if (ecap_sc_support(iommu->ecap))
1886 domain->iommu_snooping = 1;
1887 else
1888 domain->iommu_snooping = 0;
1889
214e39aa
DW
1890 if (intel_iommu_superpage)
1891 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1892 else
1893 domain->iommu_superpage = 0;
1894
4c923d47 1895 domain->nid = iommu->node;
c7151a8d 1896
ba395927 1897 /* always allocate the top pgd */
4c923d47 1898 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1899 if (!domain->pgd)
1900 return -ENOMEM;
5b6985ce 1901 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1902 return 0;
1903}
1904
1905static void domain_exit(struct dmar_domain *domain)
1906{
e083ea5b 1907 struct page *freelist;
ba395927 1908
d160aca5 1909 /* Remove associated devices and clear attached or cached domains */
ba395927 1910 domain_remove_dev_info(domain);
92d03cc8 1911
ba395927
KA
1912 /* destroy iovas */
1913 put_iova_domain(&domain->iovad);
ba395927 1914
ea8ea460 1915 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1916
ea8ea460
DW
1917 dma_free_pagelist(freelist);
1918
ba395927
KA
1919 free_domain_mem(domain);
1920}
1921
7373a8cc
LB
1922/*
1923 * Get the PASID directory size for scalable mode context entry.
1924 * Value of X in the PDTS field of a scalable mode context entry
1925 * indicates PASID directory with 2^(X + 7) entries.
1926 */
1927static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1928{
1929 int pds, max_pde;
1930
1931 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1932 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1933 if (pds < 7)
1934 return 0;
1935
1936 return pds - 7;
1937}
1938
1939/*
1940 * Set the RID_PASID field of a scalable mode context entry. The
1941 * IOMMU hardware will use the PASID value set in this field for
1942 * DMA translations of DMA requests without PASID.
1943 */
1944static inline void
1945context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1946{
1947 context->hi |= pasid & ((1 << 20) - 1);
1948 context->hi |= (1 << 20);
1949}
1950
1951/*
1952 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1953 * entry.
1954 */
1955static inline void context_set_sm_dte(struct context_entry *context)
1956{
1957 context->lo |= (1 << 2);
1958}
1959
1960/*
1961 * Set the PRE(Page Request Enable) field of a scalable mode context
1962 * entry.
1963 */
1964static inline void context_set_sm_pre(struct context_entry *context)
1965{
1966 context->lo |= (1 << 4);
1967}
1968
1969/* Convert value to context PASID directory size field coding. */
1970#define context_pdts(pds) (((pds) & 0x7) << 9)
1971
64ae892b
DW
1972static int domain_context_mapping_one(struct dmar_domain *domain,
1973 struct intel_iommu *iommu,
ca6e322d 1974 struct pasid_table *table,
28ccce0d 1975 u8 bus, u8 devfn)
ba395927 1976{
c6c2cebd 1977 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1978 int translation = CONTEXT_TT_MULTI_LEVEL;
1979 struct device_domain_info *info = NULL;
ba395927 1980 struct context_entry *context;
ba395927 1981 unsigned long flags;
7373a8cc 1982 int ret;
28ccce0d 1983
c6c2cebd
JR
1984 WARN_ON(did == 0);
1985
28ccce0d
JR
1986 if (hw_pass_through && domain_type_is_si(domain))
1987 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1988
1989 pr_debug("Set context mapping for %02x:%02x.%d\n",
1990 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1991
ba395927 1992 BUG_ON(!domain->pgd);
5331fe6f 1993
55d94043
JR
1994 spin_lock_irqsave(&device_domain_lock, flags);
1995 spin_lock(&iommu->lock);
1996
1997 ret = -ENOMEM;
03ecc32c 1998 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1999 if (!context)
55d94043 2000 goto out_unlock;
ba395927 2001
55d94043
JR
2002 ret = 0;
2003 if (context_present(context))
2004 goto out_unlock;
cf484d0e 2005
aec0e861
XP
2006 /*
2007 * For kdump cases, old valid entries may be cached due to the
2008 * in-flight DMA and copied pgtable, but there is no unmapping
2009 * behaviour for them, thus we need an explicit cache flush for
2010 * the newly-mapped device. For kdump, at this point, the device
2011 * is supposed to finish reset at its driver probe stage, so no
2012 * in-flight DMA will exist, and we don't need to worry anymore
2013 * hereafter.
2014 */
2015 if (context_copied(context)) {
2016 u16 did_old = context_domain_id(context);
2017
b117e038 2018 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2019 iommu->flush.flush_context(iommu, did_old,
2020 (((u16)bus) << 8) | devfn,
2021 DMA_CCMD_MASK_NOBIT,
2022 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2023 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2024 DMA_TLB_DSI_FLUSH);
2025 }
aec0e861
XP
2026 }
2027
de24e553 2028 context_clear_entry(context);
ea6606b0 2029
7373a8cc
LB
2030 if (sm_supported(iommu)) {
2031 unsigned long pds;
4ed0d3e6 2032
7373a8cc
LB
2033 WARN_ON(!table);
2034
2035 /* Setup the PASID DIR pointer: */
2036 pds = context_get_sm_pds(table);
2037 context->lo = (u64)virt_to_phys(table->table) |
2038 context_pdts(pds);
2039
2040 /* Setup the RID_PASID field: */
2041 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2042
de24e553 2043 /*
7373a8cc
LB
2044 * Setup the Device-TLB enable bit and Page request
2045 * Enable bit:
de24e553 2046 */
7373a8cc
LB
2047 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2048 if (info && info->ats_supported)
2049 context_set_sm_dte(context);
2050 if (info && info->pri_supported)
2051 context_set_sm_pre(context);
2052 } else {
2053 struct dma_pte *pgd = domain->pgd;
2054 int agaw;
2055
2056 context_set_domain_id(context, did);
7373a8cc
LB
2057
2058 if (translation != CONTEXT_TT_PASS_THROUGH) {
2059 /*
2060 * Skip top levels of page tables for iommu which has
2061 * less agaw than default. Unnecessary for PT mode.
2062 */
2063 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2064 ret = -ENOMEM;
2065 pgd = phys_to_virt(dma_pte_addr(pgd));
2066 if (!dma_pte_present(pgd))
2067 goto out_unlock;
2068 }
2069
2070 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2071 if (info && info->ats_supported)
2072 translation = CONTEXT_TT_DEV_IOTLB;
2073 else
2074 translation = CONTEXT_TT_MULTI_LEVEL;
2075
2076 context_set_address_root(context, virt_to_phys(pgd));
2077 context_set_address_width(context, agaw);
2078 } else {
2079 /*
2080 * In pass through mode, AW must be programmed to
2081 * indicate the largest AGAW value supported by
2082 * hardware. And ASR is ignored by hardware.
2083 */
2084 context_set_address_width(context, iommu->msagaw);
2085 }
41b80db2
LB
2086
2087 context_set_translation_type(context, translation);
93a23a72 2088 }
4ed0d3e6 2089
c07e7d21
MM
2090 context_set_fault_enable(context);
2091 context_set_present(context);
5331fe6f 2092 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2093
4c25a2c1
DW
2094 /*
2095 * It's a non-present to present mapping. If hardware doesn't cache
2096 * non-present entry we only need to flush the write-buffer. If the
2097 * _does_ cache non-present entries, then it does so in the special
2098 * domain #0, which we have to flush:
2099 */
2100 if (cap_caching_mode(iommu->cap)) {
2101 iommu->flush.flush_context(iommu, 0,
2102 (((u16)bus) << 8) | devfn,
2103 DMA_CCMD_MASK_NOBIT,
2104 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2105 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2106 } else {
ba395927 2107 iommu_flush_write_buffer(iommu);
4c25a2c1 2108 }
93a23a72 2109 iommu_enable_dev_iotlb(info);
c7151a8d 2110
55d94043
JR
2111 ret = 0;
2112
2113out_unlock:
2114 spin_unlock(&iommu->lock);
2115 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2116
5c365d18 2117 return ret;
ba395927
KA
2118}
2119
579305f7
AW
2120struct domain_context_mapping_data {
2121 struct dmar_domain *domain;
2122 struct intel_iommu *iommu;
ca6e322d 2123 struct pasid_table *table;
579305f7
AW
2124};
2125
2126static int domain_context_mapping_cb(struct pci_dev *pdev,
2127 u16 alias, void *opaque)
2128{
2129 struct domain_context_mapping_data *data = opaque;
2130
2131 return domain_context_mapping_one(data->domain, data->iommu,
ca6e322d
LB
2132 data->table, PCI_BUS_NUM(alias),
2133 alias & 0xff);
579305f7
AW
2134}
2135
ba395927 2136static int
28ccce0d 2137domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2138{
ca6e322d
LB
2139 struct domain_context_mapping_data data;
2140 struct pasid_table *table;
64ae892b 2141 struct intel_iommu *iommu;
156baca8 2142 u8 bus, devfn;
64ae892b 2143
e1f167f3 2144 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2145 if (!iommu)
2146 return -ENODEV;
ba395927 2147
ca6e322d
LB
2148 table = intel_pasid_get_table(dev);
2149
579305f7 2150 if (!dev_is_pci(dev))
ca6e322d
LB
2151 return domain_context_mapping_one(domain, iommu, table,
2152 bus, devfn);
579305f7
AW
2153
2154 data.domain = domain;
2155 data.iommu = iommu;
ca6e322d 2156 data.table = table;
579305f7
AW
2157
2158 return pci_for_each_dma_alias(to_pci_dev(dev),
2159 &domain_context_mapping_cb, &data);
2160}
2161
2162static int domain_context_mapped_cb(struct pci_dev *pdev,
2163 u16 alias, void *opaque)
2164{
2165 struct intel_iommu *iommu = opaque;
2166
2167 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2168}
2169
e1f167f3 2170static int domain_context_mapped(struct device *dev)
ba395927 2171{
5331fe6f 2172 struct intel_iommu *iommu;
156baca8 2173 u8 bus, devfn;
5331fe6f 2174
e1f167f3 2175 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2176 if (!iommu)
2177 return -ENODEV;
ba395927 2178
579305f7
AW
2179 if (!dev_is_pci(dev))
2180 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2181
579305f7
AW
2182 return !pci_for_each_dma_alias(to_pci_dev(dev),
2183 domain_context_mapped_cb, iommu);
ba395927
KA
2184}
2185
f532959b
FY
2186/* Returns a number of VTD pages, but aligned to MM page size */
2187static inline unsigned long aligned_nrpages(unsigned long host_addr,
2188 size_t size)
2189{
2190 host_addr &= ~PAGE_MASK;
2191 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2192}
2193
6dd9a7c7
YS
2194/* Return largest possible superpage level for a given mapping */
2195static inline int hardware_largepage_caps(struct dmar_domain *domain,
2196 unsigned long iov_pfn,
2197 unsigned long phy_pfn,
2198 unsigned long pages)
2199{
2200 int support, level = 1;
2201 unsigned long pfnmerge;
2202
2203 support = domain->iommu_superpage;
2204
2205 /* To use a large page, the virtual *and* physical addresses
2206 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2207 of them will mean we have to use smaller pages. So just
2208 merge them and check both at once. */
2209 pfnmerge = iov_pfn | phy_pfn;
2210
2211 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2212 pages >>= VTD_STRIDE_SHIFT;
2213 if (!pages)
2214 break;
2215 pfnmerge >>= VTD_STRIDE_SHIFT;
2216 level++;
2217 support--;
2218 }
2219 return level;
2220}
2221
9051aa02
DW
2222static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2223 struct scatterlist *sg, unsigned long phys_pfn,
2224 unsigned long nr_pages, int prot)
e1605495
DW
2225{
2226 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2227 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2228 unsigned long sg_res = 0;
6dd9a7c7
YS
2229 unsigned int largepage_lvl = 0;
2230 unsigned long lvl_pages = 0;
e1605495 2231
162d1b10 2232 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2233
2234 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2235 return -EINVAL;
2236
2237 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2238
cc4f14aa
JL
2239 if (!sg) {
2240 sg_res = nr_pages;
9051aa02
DW
2241 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2242 }
2243
6dd9a7c7 2244 while (nr_pages > 0) {
c85994e4
DW
2245 uint64_t tmp;
2246
e1605495 2247 if (!sg_res) {
29a90b70
RM
2248 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2249
f532959b 2250 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2251 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2252 sg->dma_length = sg->length;
29a90b70 2253 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2254 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2255 }
6dd9a7c7 2256
e1605495 2257 if (!pte) {
6dd9a7c7
YS
2258 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2259
5cf0a76f 2260 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2261 if (!pte)
2262 return -ENOMEM;
6dd9a7c7 2263 /* It is large page*/
6491d4d0 2264 if (largepage_lvl > 1) {
ba2374fd
CZ
2265 unsigned long nr_superpages, end_pfn;
2266
6dd9a7c7 2267 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2268 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2269
2270 nr_superpages = sg_res / lvl_pages;
2271 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2272
d41a4adb
JL
2273 /*
2274 * Ensure that old small page tables are
ba2374fd 2275 * removed to make room for superpage(s).
bc24c571
DD
2276 * We're adding new large pages, so make sure
2277 * we don't remove their parent tables.
d41a4adb 2278 */
bc24c571
DD
2279 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2280 largepage_lvl + 1);
6491d4d0 2281 } else {
6dd9a7c7 2282 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2283 }
6dd9a7c7 2284
e1605495
DW
2285 }
2286 /* We don't need lock here, nobody else
2287 * touches the iova range
2288 */
7766a3fb 2289 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2290 if (tmp) {
1bf20f0d 2291 static int dumps = 5;
9f10e5bf
JR
2292 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2293 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2294 if (dumps) {
2295 dumps--;
2296 debug_dma_dump_mappings(NULL);
2297 }
2298 WARN_ON(1);
2299 }
6dd9a7c7
YS
2300
2301 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2302
2303 BUG_ON(nr_pages < lvl_pages);
2304 BUG_ON(sg_res < lvl_pages);
2305
2306 nr_pages -= lvl_pages;
2307 iov_pfn += lvl_pages;
2308 phys_pfn += lvl_pages;
2309 pteval += lvl_pages * VTD_PAGE_SIZE;
2310 sg_res -= lvl_pages;
2311
2312 /* If the next PTE would be the first in a new page, then we
2313 need to flush the cache on the entries we've just written.
2314 And then we'll need to recalculate 'pte', so clear it and
2315 let it get set again in the if (!pte) block above.
2316
2317 If we're done (!nr_pages) we need to flush the cache too.
2318
2319 Also if we've been setting superpages, we may need to
2320 recalculate 'pte' and switch back to smaller pages for the
2321 end of the mapping, if the trailing size is not enough to
2322 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2323 pte++;
6dd9a7c7
YS
2324 if (!nr_pages || first_pte_in_page(pte) ||
2325 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2326 domain_flush_cache(domain, first_pte,
2327 (void *)pte - (void *)first_pte);
2328 pte = NULL;
2329 }
6dd9a7c7
YS
2330
2331 if (!sg_res && nr_pages)
e1605495
DW
2332 sg = sg_next(sg);
2333 }
2334 return 0;
2335}
2336
87684fd9 2337static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2338 struct scatterlist *sg, unsigned long phys_pfn,
2339 unsigned long nr_pages, int prot)
2340{
fa954e68 2341 int iommu_id, ret;
095303e0
LB
2342 struct intel_iommu *iommu;
2343
2344 /* Do the real mapping first */
2345 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2346 if (ret)
2347 return ret;
2348
fa954e68
LB
2349 for_each_domain_iommu(iommu_id, domain) {
2350 iommu = g_iommus[iommu_id];
095303e0
LB
2351 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2352 }
2353
2354 return 0;
87684fd9
PX
2355}
2356
9051aa02
DW
2357static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2358 struct scatterlist *sg, unsigned long nr_pages,
2359 int prot)
ba395927 2360{
87684fd9 2361 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2362}
6f6a00e4 2363
9051aa02
DW
2364static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2365 unsigned long phys_pfn, unsigned long nr_pages,
2366 int prot)
2367{
87684fd9 2368 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2369}
2370
2452d9db 2371static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2372{
5082219b
FS
2373 unsigned long flags;
2374 struct context_entry *context;
2375 u16 did_old;
2376
c7151a8d
WH
2377 if (!iommu)
2378 return;
8c11e798 2379
5082219b
FS
2380 spin_lock_irqsave(&iommu->lock, flags);
2381 context = iommu_context_addr(iommu, bus, devfn, 0);
2382 if (!context) {
2383 spin_unlock_irqrestore(&iommu->lock, flags);
2384 return;
2385 }
2386 did_old = context_domain_id(context);
2387 context_clear_entry(context);
2388 __iommu_flush_cache(iommu, context, sizeof(*context));
2389 spin_unlock_irqrestore(&iommu->lock, flags);
2390 iommu->flush.flush_context(iommu,
2391 did_old,
2392 (((u16)bus) << 8) | devfn,
2393 DMA_CCMD_MASK_NOBIT,
2394 DMA_CCMD_DEVICE_INVL);
2395 iommu->flush.flush_iotlb(iommu,
2396 did_old,
2397 0,
2398 0,
2399 DMA_TLB_DSI_FLUSH);
ba395927
KA
2400}
2401
109b9b04
DW
2402static inline void unlink_domain_info(struct device_domain_info *info)
2403{
2404 assert_spin_locked(&device_domain_lock);
2405 list_del(&info->link);
2406 list_del(&info->global);
2407 if (info->dev)
0bcb3e28 2408 info->dev->archdata.iommu = NULL;
109b9b04
DW
2409}
2410
ba395927
KA
2411static void domain_remove_dev_info(struct dmar_domain *domain)
2412{
3a74ca01 2413 struct device_domain_info *info, *tmp;
fb170fb4 2414 unsigned long flags;
ba395927
KA
2415
2416 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2417 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2418 __dmar_remove_one_dev_info(info);
ba395927
KA
2419 spin_unlock_irqrestore(&device_domain_lock, flags);
2420}
2421
2422/*
2423 * find_domain
1525a29a 2424 * Note: we use struct device->archdata.iommu stores the info
ba395927 2425 */
1525a29a 2426static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2427{
2428 struct device_domain_info *info;
2429
8af46c78
LB
2430 if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2431 struct iommu_domain *domain;
2432
2433 dev->archdata.iommu = NULL;
2434 domain = iommu_get_domain_for_dev(dev);
2435 if (domain)
2436 intel_iommu_attach_device(domain, dev);
2437 }
2438
ba395927 2439 /* No lock here, assumes no domain exit in normal case */
1525a29a 2440 info = dev->archdata.iommu;
8af46c78 2441
b316d02a 2442 if (likely(info))
ba395927
KA
2443 return info->domain;
2444 return NULL;
2445}
2446
5a8f40e8 2447static inline struct device_domain_info *
745f2586
JL
2448dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2449{
2450 struct device_domain_info *info;
2451
2452 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2453 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2454 info->devfn == devfn)
5a8f40e8 2455 return info;
745f2586
JL
2456
2457 return NULL;
2458}
2459
5db31569
JR
2460static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2461 int bus, int devfn,
2462 struct device *dev,
2463 struct dmar_domain *domain)
745f2586 2464{
5a8f40e8 2465 struct dmar_domain *found = NULL;
745f2586
JL
2466 struct device_domain_info *info;
2467 unsigned long flags;
d160aca5 2468 int ret;
745f2586
JL
2469
2470 info = alloc_devinfo_mem();
2471 if (!info)
b718cd3d 2472 return NULL;
745f2586 2473
745f2586
JL
2474 info->bus = bus;
2475 info->devfn = devfn;
b16d0cb9
DW
2476 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2477 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2478 info->ats_qdep = 0;
745f2586
JL
2479 info->dev = dev;
2480 info->domain = domain;
5a8f40e8 2481 info->iommu = iommu;
cc580e41 2482 info->pasid_table = NULL;
95587a75 2483 info->auxd_enabled = 0;
67b8e02b 2484 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2485
b16d0cb9
DW
2486 if (dev && dev_is_pci(dev)) {
2487 struct pci_dev *pdev = to_pci_dev(info->dev);
2488
d8b85910
LB
2489 if (!pdev->untrusted &&
2490 !pci_ats_disabled() &&
cef74409 2491 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2492 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2493 dmar_find_matched_atsr_unit(pdev))
2494 info->ats_supported = 1;
2495
765b6a98
LB
2496 if (sm_supported(iommu)) {
2497 if (pasid_supported(iommu)) {
b16d0cb9
DW
2498 int features = pci_pasid_features(pdev);
2499 if (features >= 0)
2500 info->pasid_supported = features | 1;
2501 }
2502
2503 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2504 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2505 info->pri_supported = 1;
2506 }
2507 }
2508
745f2586
JL
2509 spin_lock_irqsave(&device_domain_lock, flags);
2510 if (dev)
0bcb3e28 2511 found = find_domain(dev);
f303e507
JR
2512
2513 if (!found) {
5a8f40e8 2514 struct device_domain_info *info2;
41e80dca 2515 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2516 if (info2) {
2517 found = info2->domain;
2518 info2->dev = dev;
2519 }
5a8f40e8 2520 }
f303e507 2521
745f2586
JL
2522 if (found) {
2523 spin_unlock_irqrestore(&device_domain_lock, flags);
2524 free_devinfo_mem(info);
b718cd3d
DW
2525 /* Caller must free the original domain */
2526 return found;
745f2586
JL
2527 }
2528
d160aca5
JR
2529 spin_lock(&iommu->lock);
2530 ret = domain_attach_iommu(domain, iommu);
2531 spin_unlock(&iommu->lock);
2532
2533 if (ret) {
c6c2cebd 2534 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2535 free_devinfo_mem(info);
c6c2cebd
JR
2536 return NULL;
2537 }
c6c2cebd 2538
b718cd3d
DW
2539 list_add(&info->link, &domain->devices);
2540 list_add(&info->global, &device_domain_list);
2541 if (dev)
2542 dev->archdata.iommu = info;
0bbeb01a 2543 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2544
0bbeb01a
LB
2545 /* PASID table is mandatory for a PCI device in scalable mode. */
2546 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2547 ret = intel_pasid_alloc_table(dev);
2548 if (ret) {
932a6523 2549 dev_err(dev, "PASID table allocation failed\n");
71753239 2550 dmar_remove_one_dev_info(dev);
0bbeb01a 2551 return NULL;
a7fc93fe 2552 }
ef848b7e
LB
2553
2554 /* Setup the PASID entry for requests without PASID: */
2555 spin_lock(&iommu->lock);
2556 if (hw_pass_through && domain_type_is_si(domain))
2557 ret = intel_pasid_setup_pass_through(iommu, domain,
2558 dev, PASID_RID2PASID);
2559 else
2560 ret = intel_pasid_setup_second_level(iommu, domain,
2561 dev, PASID_RID2PASID);
2562 spin_unlock(&iommu->lock);
2563 if (ret) {
932a6523 2564 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2565 dmar_remove_one_dev_info(dev);
ef848b7e 2566 return NULL;
a7fc93fe
LB
2567 }
2568 }
b718cd3d 2569
cc4e2575 2570 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2571 dev_err(dev, "Domain context map failed\n");
71753239 2572 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2573 return NULL;
2574 }
2575
b718cd3d 2576 return domain;
745f2586
JL
2577}
2578
579305f7
AW
2579static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2580{
2581 *(u16 *)opaque = alias;
2582 return 0;
2583}
2584
76208356 2585static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2586{
e083ea5b 2587 struct device_domain_info *info;
76208356 2588 struct dmar_domain *domain = NULL;
579305f7 2589 struct intel_iommu *iommu;
fcc35c63 2590 u16 dma_alias;
ba395927 2591 unsigned long flags;
aa4d066a 2592 u8 bus, devfn;
ba395927 2593
579305f7
AW
2594 iommu = device_to_iommu(dev, &bus, &devfn);
2595 if (!iommu)
2596 return NULL;
2597
146922ec
DW
2598 if (dev_is_pci(dev)) {
2599 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2600
579305f7
AW
2601 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2602
2603 spin_lock_irqsave(&device_domain_lock, flags);
2604 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2605 PCI_BUS_NUM(dma_alias),
2606 dma_alias & 0xff);
2607 if (info) {
2608 iommu = info->iommu;
2609 domain = info->domain;
5a8f40e8 2610 }
579305f7 2611 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2612
76208356 2613 /* DMA alias already has a domain, use it */
579305f7 2614 if (info)
76208356 2615 goto out;
579305f7 2616 }
ba395927 2617
146922ec 2618 /* Allocate and initialize new domain for the device */
ab8dfe25 2619 domain = alloc_domain(0);
745f2586 2620 if (!domain)
579305f7 2621 return NULL;
dc534b25 2622 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2623 domain_exit(domain);
2624 return NULL;
2c2e2c38 2625 }
ba395927 2626
76208356 2627out:
76208356
JR
2628 return domain;
2629}
579305f7 2630
76208356
JR
2631static struct dmar_domain *set_domain_for_dev(struct device *dev,
2632 struct dmar_domain *domain)
2633{
2634 struct intel_iommu *iommu;
2635 struct dmar_domain *tmp;
2636 u16 req_id, dma_alias;
2637 u8 bus, devfn;
2638
2639 iommu = device_to_iommu(dev, &bus, &devfn);
2640 if (!iommu)
2641 return NULL;
2642
2643 req_id = ((u16)bus << 8) | devfn;
2644
2645 if (dev_is_pci(dev)) {
2646 struct pci_dev *pdev = to_pci_dev(dev);
2647
2648 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2649
2650 /* register PCI DMA alias device */
2651 if (req_id != dma_alias) {
2652 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2653 dma_alias & 0xff, NULL, domain);
2654
2655 if (!tmp || tmp != domain)
2656 return tmp;
2657 }
ba395927
KA
2658 }
2659
5db31569 2660 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2661 if (!tmp || tmp != domain)
2662 return tmp;
2663
2664 return domain;
2665}
579305f7 2666
76208356
JR
2667static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2668{
2669 struct dmar_domain *domain, *tmp;
2670
2671 domain = find_domain(dev);
2672 if (domain)
2673 goto out;
2674
2675 domain = find_or_alloc_domain(dev, gaw);
2676 if (!domain)
2677 goto out;
2678
2679 tmp = set_domain_for_dev(dev, domain);
2680 if (!tmp || domain != tmp) {
579305f7
AW
2681 domain_exit(domain);
2682 domain = tmp;
2683 }
b718cd3d 2684
76208356
JR
2685out:
2686
b718cd3d 2687 return domain;
ba395927
KA
2688}
2689
b213203e
DW
2690static int iommu_domain_identity_map(struct dmar_domain *domain,
2691 unsigned long long start,
2692 unsigned long long end)
ba395927 2693{
c5395d5c
DW
2694 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2695 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2696
2697 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2698 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2699 pr_err("Reserving iova failed\n");
b213203e 2700 return -ENOMEM;
ba395927
KA
2701 }
2702
af1089ce 2703 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2704 /*
2705 * RMRR range might have overlap with physical memory range,
2706 * clear it first
2707 */
c5395d5c 2708 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2709
87684fd9
PX
2710 return __domain_mapping(domain, first_vpfn, NULL,
2711 first_vpfn, last_vpfn - first_vpfn + 1,
2712 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2713}
2714
d66ce54b
JR
2715static int domain_prepare_identity_map(struct device *dev,
2716 struct dmar_domain *domain,
2717 unsigned long long start,
2718 unsigned long long end)
b213203e 2719{
19943b0e
DW
2720 /* For _hardware_ passthrough, don't bother. But for software
2721 passthrough, we do it anyway -- it may indicate a memory
2722 range which is reserved in E820, so which didn't get set
2723 up to start with in si_domain */
2724 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2725 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2726 start, end);
19943b0e
DW
2727 return 0;
2728 }
2729
932a6523 2730 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2731
5595b528
DW
2732 if (end < start) {
2733 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2734 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2735 dmi_get_system_info(DMI_BIOS_VENDOR),
2736 dmi_get_system_info(DMI_BIOS_VERSION),
2737 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2738 return -EIO;
5595b528
DW
2739 }
2740
2ff729f5
DW
2741 if (end >> agaw_to_width(domain->agaw)) {
2742 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2743 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2744 agaw_to_width(domain->agaw),
2745 dmi_get_system_info(DMI_BIOS_VENDOR),
2746 dmi_get_system_info(DMI_BIOS_VERSION),
2747 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2748 return -EIO;
2ff729f5 2749 }
19943b0e 2750
d66ce54b
JR
2751 return iommu_domain_identity_map(domain, start, end);
2752}
ba395927 2753
d66ce54b
JR
2754static int iommu_prepare_identity_map(struct device *dev,
2755 unsigned long long start,
2756 unsigned long long end)
2757{
2758 struct dmar_domain *domain;
2759 int ret;
2760
2761 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2762 if (!domain)
2763 return -ENOMEM;
2764
2765 ret = domain_prepare_identity_map(dev, domain, start, end);
2766 if (ret)
2767 domain_exit(domain);
b213203e 2768
ba395927 2769 return ret;
ba395927
KA
2770}
2771
2772static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2773 struct device *dev)
ba395927 2774{
0b9d9753 2775 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2776 return 0;
0b9d9753
DW
2777 return iommu_prepare_identity_map(dev, rmrr->base_address,
2778 rmrr->end_address);
ba395927
KA
2779}
2780
2c2e2c38 2781static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2782
071e1374 2783static int __init si_domain_init(int hw)
2c2e2c38 2784{
4de354ec
LB
2785 struct dmar_rmrr_unit *rmrr;
2786 struct device *dev;
2787 int i, nid, ret;
2c2e2c38 2788
ab8dfe25 2789 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2790 if (!si_domain)
2791 return -EFAULT;
2792
2c2e2c38
FY
2793 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2794 domain_exit(si_domain);
2795 return -EFAULT;
2796 }
2797
19943b0e
DW
2798 if (hw)
2799 return 0;
2800
c7ab48d2 2801 for_each_online_node(nid) {
5dfe8660
TH
2802 unsigned long start_pfn, end_pfn;
2803 int i;
2804
2805 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2806 ret = iommu_domain_identity_map(si_domain,
2807 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2808 if (ret)
2809 return ret;
2810 }
c7ab48d2
DW
2811 }
2812
4de354ec
LB
2813 /*
2814 * Normally we use DMA domains for devices which have RMRRs. But we
2815 * loose this requirement for graphic and usb devices. Identity map
2816 * the RMRRs for graphic and USB devices so that they could use the
2817 * si_domain.
2818 */
2819 for_each_rmrr_units(rmrr) {
2820 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2821 i, dev) {
2822 unsigned long long start = rmrr->base_address;
2823 unsigned long long end = rmrr->end_address;
2824
2825 if (device_is_rmrr_locked(dev))
2826 continue;
2827
2828 if (WARN_ON(end < start ||
2829 end >> agaw_to_width(si_domain->agaw)))
2830 continue;
2831
2832 ret = iommu_domain_identity_map(si_domain, start, end);
2833 if (ret)
2834 return ret;
2835 }
2836 }
2837
2c2e2c38
FY
2838 return 0;
2839}
2840
9b226624 2841static int identity_mapping(struct device *dev)
2c2e2c38
FY
2842{
2843 struct device_domain_info *info;
2844
9b226624 2845 info = dev->archdata.iommu;
cb452a40
MT
2846 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2847 return (info->domain == si_domain);
2c2e2c38 2848
2c2e2c38
FY
2849 return 0;
2850}
2851
28ccce0d 2852static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2853{
0ac72664 2854 struct dmar_domain *ndomain;
5a8f40e8 2855 struct intel_iommu *iommu;
156baca8 2856 u8 bus, devfn;
2c2e2c38 2857
5913c9bf 2858 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2859 if (!iommu)
2860 return -ENODEV;
2861
5db31569 2862 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2863 if (ndomain != domain)
2864 return -EBUSY;
2c2e2c38
FY
2865
2866 return 0;
2867}
2868
0b9d9753 2869static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2870{
2871 struct dmar_rmrr_unit *rmrr;
832bd858 2872 struct device *tmp;
ea2447f7
TM
2873 int i;
2874
0e242612 2875 rcu_read_lock();
ea2447f7 2876 for_each_rmrr_units(rmrr) {
b683b230
JL
2877 /*
2878 * Return TRUE if this RMRR contains the device that
2879 * is passed in.
2880 */
2881 for_each_active_dev_scope(rmrr->devices,
2882 rmrr->devices_cnt, i, tmp)
e143fd45
EA
2883 if (tmp == dev ||
2884 is_downstream_to_pci_bridge(dev, tmp)) {
0e242612 2885 rcu_read_unlock();
ea2447f7 2886 return true;
b683b230 2887 }
ea2447f7 2888 }
0e242612 2889 rcu_read_unlock();
ea2447f7
TM
2890 return false;
2891}
2892
c875d2c1
AW
2893/*
2894 * There are a couple cases where we need to restrict the functionality of
2895 * devices associated with RMRRs. The first is when evaluating a device for
2896 * identity mapping because problems exist when devices are moved in and out
2897 * of domains and their respective RMRR information is lost. This means that
2898 * a device with associated RMRRs will never be in a "passthrough" domain.
2899 * The second is use of the device through the IOMMU API. This interface
2900 * expects to have full control of the IOVA space for the device. We cannot
2901 * satisfy both the requirement that RMRR access is maintained and have an
2902 * unencumbered IOVA space. We also have no ability to quiesce the device's
2903 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2904 * We therefore prevent devices associated with an RMRR from participating in
2905 * the IOMMU API, which eliminates them from device assignment.
2906 *
2907 * In both cases we assume that PCI USB devices with RMRRs have them largely
2908 * for historical reasons and that the RMRR space is not actively used post
2909 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2910 *
2911 * The same exception is made for graphics devices, with the requirement that
2912 * any use of the RMRR regions will be torn down before assigning the device
2913 * to a guest.
c875d2c1
AW
2914 */
2915static bool device_is_rmrr_locked(struct device *dev)
2916{
2917 if (!device_has_rmrr(dev))
2918 return false;
2919
2920 if (dev_is_pci(dev)) {
2921 struct pci_dev *pdev = to_pci_dev(dev);
2922
18436afd 2923 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2924 return false;
2925 }
2926
2927 return true;
2928}
2929
f273a453
LB
2930/*
2931 * Return the required default domain type for a specific device.
2932 *
2933 * @dev: the device in query
2934 * @startup: true if this is during early boot
2935 *
2936 * Returns:
2937 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2938 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2939 * - 0: both identity and dynamic domains work for this device
2940 */
0e31a726 2941static int device_def_domain_type(struct device *dev)
6941af28 2942{
3bdb2591
DW
2943 if (dev_is_pci(dev)) {
2944 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2945
c875d2c1 2946 if (device_is_rmrr_locked(dev))
f273a453 2947 return IOMMU_DOMAIN_DMA;
e0fc7e0b 2948
89a6079d
LB
2949 /*
2950 * Prevent any device marked as untrusted from getting
2951 * placed into the statically identity mapping domain.
2952 */
2953 if (pdev->untrusted)
f273a453 2954 return IOMMU_DOMAIN_DMA;
89a6079d 2955
3bdb2591 2956 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 2957 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 2958
3bdb2591 2959 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 2960 return IOMMU_DOMAIN_IDENTITY;
3bdb2591
DW
2961
2962 /*
2963 * We want to start off with all devices in the 1:1 domain, and
2964 * take them out later if we find they can't access all of memory.
2965 *
2966 * However, we can't do this for PCI devices behind bridges,
2967 * because all PCI devices behind the same bridge will end up
2968 * with the same source-id on their transactions.
2969 *
2970 * Practically speaking, we can't change things around for these
2971 * devices at run-time, because we can't be sure there'll be no
2972 * DMA transactions in flight for any of their siblings.
2973 *
2974 * So PCI devices (unless they're on the root bus) as well as
2975 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2976 * the 1:1 domain, just in _case_ one of their siblings turns out
2977 * not to be able to map all of memory.
2978 */
2979 if (!pci_is_pcie(pdev)) {
2980 if (!pci_is_root_bus(pdev->bus))
f273a453 2981 return IOMMU_DOMAIN_DMA;
3bdb2591 2982 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
f273a453 2983 return IOMMU_DOMAIN_DMA;
3bdb2591 2984 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
f273a453 2985 return IOMMU_DOMAIN_DMA;
3bdb2591
DW
2986 } else {
2987 if (device_has_rmrr(dev))
f273a453 2988 return IOMMU_DOMAIN_DMA;
3bdb2591 2989 }
3dfc813d 2990
f273a453
LB
2991 return (iommu_identity_mapping & IDENTMAP_ALL) ?
2992 IOMMU_DOMAIN_IDENTITY : 0;
2993}
2994
ffebeb46
JL
2995static void intel_iommu_init_qi(struct intel_iommu *iommu)
2996{
2997 /*
2998 * Start from the sane iommu hardware state.
2999 * If the queued invalidation is already initialized by us
3000 * (for example, while enabling interrupt-remapping) then
3001 * we got the things already rolling from a sane state.
3002 */
3003 if (!iommu->qi) {
3004 /*
3005 * Clear any previous faults.
3006 */
3007 dmar_fault(-1, iommu);
3008 /*
3009 * Disable queued invalidation if supported and already enabled
3010 * before OS handover.
3011 */
3012 dmar_disable_qi(iommu);
3013 }
3014
3015 if (dmar_enable_qi(iommu)) {
3016 /*
3017 * Queued Invalidate not enabled, use Register Based Invalidate
3018 */
3019 iommu->flush.flush_context = __iommu_flush_context;
3020 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3021 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3022 iommu->name);
3023 } else {
3024 iommu->flush.flush_context = qi_flush_context;
3025 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3026 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3027 }
3028}
3029
091d42e4 3030static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3031 struct root_entry *old_re,
091d42e4
JR
3032 struct context_entry **tbl,
3033 int bus, bool ext)
3034{
dbcd861f 3035 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3036 struct context_entry *new_ce = NULL, ce;
dfddb969 3037 struct context_entry *old_ce = NULL;
543c8dcf 3038 struct root_entry re;
091d42e4
JR
3039 phys_addr_t old_ce_phys;
3040
3041 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3042 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3043
3044 for (devfn = 0; devfn < 256; devfn++) {
3045 /* First calculate the correct index */
3046 idx = (ext ? devfn * 2 : devfn) % 256;
3047
3048 if (idx == 0) {
3049 /* First save what we may have and clean up */
3050 if (new_ce) {
3051 tbl[tbl_idx] = new_ce;
3052 __iommu_flush_cache(iommu, new_ce,
3053 VTD_PAGE_SIZE);
3054 pos = 1;
3055 }
3056
3057 if (old_ce)
829383e1 3058 memunmap(old_ce);
091d42e4
JR
3059
3060 ret = 0;
3061 if (devfn < 0x80)
543c8dcf 3062 old_ce_phys = root_entry_lctp(&re);
091d42e4 3063 else
543c8dcf 3064 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3065
3066 if (!old_ce_phys) {
3067 if (ext && devfn == 0) {
3068 /* No LCTP, try UCTP */
3069 devfn = 0x7f;
3070 continue;
3071 } else {
3072 goto out;
3073 }
3074 }
3075
3076 ret = -ENOMEM;
dfddb969
DW
3077 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3078 MEMREMAP_WB);
091d42e4
JR
3079 if (!old_ce)
3080 goto out;
3081
3082 new_ce = alloc_pgtable_page(iommu->node);
3083 if (!new_ce)
3084 goto out_unmap;
3085
3086 ret = 0;
3087 }
3088
3089 /* Now copy the context entry */
dfddb969 3090 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3091
cf484d0e 3092 if (!__context_present(&ce))
091d42e4
JR
3093 continue;
3094
dbcd861f
JR
3095 did = context_domain_id(&ce);
3096 if (did >= 0 && did < cap_ndoms(iommu->cap))
3097 set_bit(did, iommu->domain_ids);
3098
cf484d0e
JR
3099 /*
3100 * We need a marker for copied context entries. This
3101 * marker needs to work for the old format as well as
3102 * for extended context entries.
3103 *
3104 * Bit 67 of the context entry is used. In the old
3105 * format this bit is available to software, in the
3106 * extended format it is the PGE bit, but PGE is ignored
3107 * by HW if PASIDs are disabled (and thus still
3108 * available).
3109 *
3110 * So disable PASIDs first and then mark the entry
3111 * copied. This means that we don't copy PASID
3112 * translations from the old kernel, but this is fine as
3113 * faults there are not fatal.
3114 */
3115 context_clear_pasid_enable(&ce);
3116 context_set_copied(&ce);
3117
091d42e4
JR
3118 new_ce[idx] = ce;
3119 }
3120
3121 tbl[tbl_idx + pos] = new_ce;
3122
3123 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3124
3125out_unmap:
dfddb969 3126 memunmap(old_ce);
091d42e4
JR
3127
3128out:
3129 return ret;
3130}
3131
3132static int copy_translation_tables(struct intel_iommu *iommu)
3133{
3134 struct context_entry **ctxt_tbls;
dfddb969 3135 struct root_entry *old_rt;
091d42e4
JR
3136 phys_addr_t old_rt_phys;
3137 int ctxt_table_entries;
3138 unsigned long flags;
3139 u64 rtaddr_reg;
3140 int bus, ret;
c3361f2f 3141 bool new_ext, ext;
091d42e4
JR
3142
3143 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3144 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3145 new_ext = !!ecap_ecs(iommu->ecap);
3146
3147 /*
3148 * The RTT bit can only be changed when translation is disabled,
3149 * but disabling translation means to open a window for data
3150 * corruption. So bail out and don't copy anything if we would
3151 * have to change the bit.
3152 */
3153 if (new_ext != ext)
3154 return -EINVAL;
091d42e4
JR
3155
3156 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3157 if (!old_rt_phys)
3158 return -EINVAL;
3159
dfddb969 3160 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3161 if (!old_rt)
3162 return -ENOMEM;
3163
3164 /* This is too big for the stack - allocate it from slab */
3165 ctxt_table_entries = ext ? 512 : 256;
3166 ret = -ENOMEM;
6396bb22 3167 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3168 if (!ctxt_tbls)
3169 goto out_unmap;
3170
3171 for (bus = 0; bus < 256; bus++) {
3172 ret = copy_context_table(iommu, &old_rt[bus],
3173 ctxt_tbls, bus, ext);
3174 if (ret) {
3175 pr_err("%s: Failed to copy context table for bus %d\n",
3176 iommu->name, bus);
3177 continue;
3178 }
3179 }
3180
3181 spin_lock_irqsave(&iommu->lock, flags);
3182
3183 /* Context tables are copied, now write them to the root_entry table */
3184 for (bus = 0; bus < 256; bus++) {
3185 int idx = ext ? bus * 2 : bus;
3186 u64 val;
3187
3188 if (ctxt_tbls[idx]) {
3189 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3190 iommu->root_entry[bus].lo = val;
3191 }
3192
3193 if (!ext || !ctxt_tbls[idx + 1])
3194 continue;
3195
3196 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3197 iommu->root_entry[bus].hi = val;
3198 }
3199
3200 spin_unlock_irqrestore(&iommu->lock, flags);
3201
3202 kfree(ctxt_tbls);
3203
3204 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3205
3206 ret = 0;
3207
3208out_unmap:
dfddb969 3209 memunmap(old_rt);
091d42e4
JR
3210
3211 return ret;
3212}
3213
b779260b 3214static int __init init_dmars(void)
ba395927
KA
3215{
3216 struct dmar_drhd_unit *drhd;
ba395927 3217 struct intel_iommu *iommu;
df4f3c60 3218 int ret;
2c2e2c38 3219
ba395927
KA
3220 /*
3221 * for each drhd
3222 * allocate root
3223 * initialize and program root entry to not present
3224 * endfor
3225 */
3226 for_each_drhd_unit(drhd) {
5e0d2a6f 3227 /*
3228 * lock not needed as this is only incremented in the single
3229 * threaded kernel __init code path all other access are read
3230 * only
3231 */
78d8e704 3232 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3233 g_num_of_iommus++;
3234 continue;
3235 }
9f10e5bf 3236 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3237 }
3238
ffebeb46
JL
3239 /* Preallocate enough resources for IOMMU hot-addition */
3240 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3241 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3242
d9630fe9
WH
3243 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3244 GFP_KERNEL);
3245 if (!g_iommus) {
9f10e5bf 3246 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3247 ret = -ENOMEM;
3248 goto error;
3249 }
3250
7c919779 3251 for_each_active_iommu(iommu, drhd) {
56283174
LB
3252 /*
3253 * Find the max pasid size of all IOMMU's in the system.
3254 * We need to ensure the system pasid table is no bigger
3255 * than the smallest supported.
3256 */
765b6a98 3257 if (pasid_supported(iommu)) {
56283174
LB
3258 u32 temp = 2 << ecap_pss(iommu->ecap);
3259
3260 intel_pasid_max_id = min_t(u32, temp,
3261 intel_pasid_max_id);
3262 }
3263
d9630fe9 3264 g_iommus[iommu->seq_id] = iommu;
ba395927 3265
b63d80d1
JR
3266 intel_iommu_init_qi(iommu);
3267
e61d98d8
SS
3268 ret = iommu_init_domains(iommu);
3269 if (ret)
989d51fc 3270 goto free_iommu;
e61d98d8 3271
4158c2ec
JR
3272 init_translation_status(iommu);
3273
091d42e4
JR
3274 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3275 iommu_disable_translation(iommu);
3276 clear_translation_pre_enabled(iommu);
3277 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3278 iommu->name);
3279 }
4158c2ec 3280
ba395927
KA
3281 /*
3282 * TBD:
3283 * we could share the same root & context tables
25985edc 3284 * among all IOMMU's. Need to Split it later.
ba395927
KA
3285 */
3286 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3287 if (ret)
989d51fc 3288 goto free_iommu;
5f0a7f76 3289
091d42e4
JR
3290 if (translation_pre_enabled(iommu)) {
3291 pr_info("Translation already enabled - trying to copy translation structures\n");
3292
3293 ret = copy_translation_tables(iommu);
3294 if (ret) {
3295 /*
3296 * We found the IOMMU with translation
3297 * enabled - but failed to copy over the
3298 * old root-entry table. Try to proceed
3299 * by disabling translation now and
3300 * allocating a clean root-entry table.
3301 * This might cause DMAR faults, but
3302 * probably the dump will still succeed.
3303 */
3304 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3305 iommu->name);
3306 iommu_disable_translation(iommu);
3307 clear_translation_pre_enabled(iommu);
3308 } else {
3309 pr_info("Copied translation tables from previous kernel for %s\n",
3310 iommu->name);
3311 }
3312 }
3313
4ed0d3e6 3314 if (!ecap_pass_through(iommu->ecap))
19943b0e 3315 hw_pass_through = 0;
8a94ade4 3316#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3317 if (pasid_supported(iommu))
d9737953 3318 intel_svm_init(iommu);
8a94ade4 3319#endif
ba395927
KA
3320 }
3321
a4c34ff1
JR
3322 /*
3323 * Now that qi is enabled on all iommus, set the root entry and flush
3324 * caches. This is required on some Intel X58 chipsets, otherwise the
3325 * flush_context function will loop forever and the boot hangs.
3326 */
3327 for_each_active_iommu(iommu, drhd) {
3328 iommu_flush_write_buffer(iommu);
3329 iommu_set_root_entry(iommu);
3330 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3331 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3332 }
3333
19943b0e 3334 if (iommu_pass_through)
e0fc7e0b
DW
3335 iommu_identity_mapping |= IDENTMAP_ALL;
3336
d3f13810 3337#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3338 dmar_map_gfx = 0;
19943b0e 3339#endif
e0fc7e0b 3340
5daab580
LB
3341 if (!dmar_map_gfx)
3342 iommu_identity_mapping |= IDENTMAP_GFX;
3343
21e722c4
AR
3344 check_tylersburg_isoch();
3345
4de354ec
LB
3346 ret = si_domain_init(hw_pass_through);
3347 if (ret)
3348 goto free_iommu;
86080ccc 3349
ba395927
KA
3350 /*
3351 * for each drhd
3352 * enable fault log
3353 * global invalidate context cache
3354 * global invalidate iotlb
3355 * enable translation
3356 */
7c919779 3357 for_each_iommu(iommu, drhd) {
51a63e67
JC
3358 if (drhd->ignored) {
3359 /*
3360 * we always have to disable PMRs or DMA may fail on
3361 * this device
3362 */
3363 if (force_on)
7c919779 3364 iommu_disable_protect_mem_regions(iommu);
ba395927 3365 continue;
51a63e67 3366 }
ba395927
KA
3367
3368 iommu_flush_write_buffer(iommu);
3369
a222a7f0 3370#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3371 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3372 /*
3373 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3374 * could cause possible lock race condition.
3375 */
3376 up_write(&dmar_global_lock);
a222a7f0 3377 ret = intel_svm_enable_prq(iommu);
a7755c3c 3378 down_write(&dmar_global_lock);
a222a7f0
DW
3379 if (ret)
3380 goto free_iommu;
3381 }
3382#endif
3460a6d9
KA
3383 ret = dmar_set_interrupt(iommu);
3384 if (ret)
989d51fc 3385 goto free_iommu;
ba395927
KA
3386 }
3387
3388 return 0;
989d51fc
JL
3389
3390free_iommu:
ffebeb46
JL
3391 for_each_active_iommu(iommu, drhd) {
3392 disable_dmar_iommu(iommu);
a868e6b7 3393 free_dmar_iommu(iommu);
ffebeb46 3394 }
13cf0174 3395
d9630fe9 3396 kfree(g_iommus);
13cf0174 3397
989d51fc 3398error:
ba395927
KA
3399 return ret;
3400}
3401
5a5e02a6 3402/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3403static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3404 struct dmar_domain *domain,
3405 unsigned long nrpages, uint64_t dma_mask)
ba395927 3406{
e083ea5b 3407 unsigned long iova_pfn;
ba395927 3408
875764de
DW
3409 /* Restrict dma_mask to the width that the iommu can handle */
3410 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3411 /* Ensure we reserve the whole size-aligned region */
3412 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3413
3414 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3415 /*
3416 * First try to allocate an io virtual address in
284901a9 3417 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3418 * from higher range
ba395927 3419 */
22e2f9fa 3420 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3421 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3422 if (iova_pfn)
3423 return iova_pfn;
875764de 3424 }
538d5b33
TN
3425 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3426 IOVA_PFN(dma_mask), true);
22e2f9fa 3427 if (unlikely(!iova_pfn)) {
932a6523 3428 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3429 return 0;
f76aec76
KA
3430 }
3431
22e2f9fa 3432 return iova_pfn;
f76aec76
KA
3433}
3434
4ec066c7 3435static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
f76aec76 3436{
1c5ebba9 3437 struct dmar_domain *domain, *tmp;
b1ce5b79 3438 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3439 struct device *i_dev;
3440 int i, ret;
f76aec76 3441
4ec066c7 3442 /* Device shouldn't be attached by any domains. */
1c5ebba9
JR
3443 domain = find_domain(dev);
3444 if (domain)
4ec066c7 3445 return NULL;
1c5ebba9
JR
3446
3447 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3448 if (!domain)
3449 goto out;
ba395927 3450
b1ce5b79
JR
3451 /* We have a new domain - setup possible RMRRs for the device */
3452 rcu_read_lock();
3453 for_each_rmrr_units(rmrr) {
3454 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3455 i, i_dev) {
3456 if (i_dev != dev)
3457 continue;
3458
3459 ret = domain_prepare_identity_map(dev, domain,
3460 rmrr->base_address,
3461 rmrr->end_address);
3462 if (ret)
3463 dev_err(dev, "Mapping reserved region failed\n");
3464 }
3465 }
3466 rcu_read_unlock();
3467
1c5ebba9
JR
3468 tmp = set_domain_for_dev(dev, domain);
3469 if (!tmp || domain != tmp) {
3470 domain_exit(domain);
3471 domain = tmp;
3472 }
3473
3474out:
1c5ebba9 3475 if (!domain)
932a6523 3476 dev_err(dev, "Allocating domain failed\n");
1c5ebba9 3477
f76aec76
KA
3478 return domain;
3479}
3480
ecb509ec 3481/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3482static bool iommu_need_mapping(struct device *dev)
2c2e2c38 3483{
98b2fffb 3484 int ret;
2c2e2c38 3485
3d89194a 3486 if (iommu_dummy(dev))
48b2c937 3487 return false;
1e4c64c4 3488
98b2fffb
LB
3489 ret = identity_mapping(dev);
3490 if (ret) {
3491 u64 dma_mask = *dev->dma_mask;
3492
3493 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3494 dma_mask = dev->coherent_dma_mask;
3495
3496 if (dma_mask >= dma_get_required_mask(dev))
48b2c937
CH
3497 return false;
3498
3499 /*
3500 * 32 bit DMA is removed from si_domain and fall back to
3501 * non-identity mapping.
3502 */
3503 dmar_remove_one_dev_info(dev);
98b2fffb
LB
3504 ret = iommu_request_dma_domain_for_dev(dev);
3505 if (ret) {
3506 struct iommu_domain *domain;
3507 struct dmar_domain *dmar_domain;
3508
3509 domain = iommu_get_domain_for_dev(dev);
3510 if (domain) {
3511 dmar_domain = to_dmar_domain(domain);
3512 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3513 }
4ec066c7 3514 get_private_domain_for_dev(dev);
2c2e2c38 3515 }
98b2fffb
LB
3516
3517 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3518 }
3519
48b2c937 3520 return true;
2c2e2c38
FY
3521}
3522
21d5d27c
LG
3523static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3524 size_t size, int dir, u64 dma_mask)
f76aec76 3525{
f76aec76 3526 struct dmar_domain *domain;
5b6985ce 3527 phys_addr_t start_paddr;
2aac6304 3528 unsigned long iova_pfn;
f76aec76 3529 int prot = 0;
6865f0d1 3530 int ret;
8c11e798 3531 struct intel_iommu *iommu;
33041ec0 3532 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3533
3534 BUG_ON(dir == DMA_NONE);
2c2e2c38 3535
4ec066c7 3536 domain = find_domain(dev);
f76aec76 3537 if (!domain)
524a669b 3538 return DMA_MAPPING_ERROR;
f76aec76 3539
8c11e798 3540 iommu = domain_get_iommu(domain);
88cb6a74 3541 size = aligned_nrpages(paddr, size);
f76aec76 3542
2aac6304
OP
3543 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3544 if (!iova_pfn)
f76aec76
KA
3545 goto error;
3546
ba395927
KA
3547 /*
3548 * Check if DMAR supports zero-length reads on write only
3549 * mappings..
3550 */
3551 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3552 !cap_zlr(iommu->cap))
ba395927
KA
3553 prot |= DMA_PTE_READ;
3554 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3555 prot |= DMA_PTE_WRITE;
3556 /*
6865f0d1 3557 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3558 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3559 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3560 * is not a big problem
3561 */
2aac6304 3562 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3563 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3564 if (ret)
3565 goto error;
3566
2aac6304 3567 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3568 start_paddr += paddr & ~PAGE_MASK;
3569 return start_paddr;
ba395927 3570
ba395927 3571error:
2aac6304 3572 if (iova_pfn)
22e2f9fa 3573 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3574 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3575 size, (unsigned long long)paddr, dir);
524a669b 3576 return DMA_MAPPING_ERROR;
ba395927
KA
3577}
3578
ffbbef5c
FT
3579static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3580 unsigned long offset, size_t size,
3581 enum dma_data_direction dir,
00085f1e 3582 unsigned long attrs)
bb9e6d65 3583{
9cc0c2af
CH
3584 if (iommu_need_mapping(dev))
3585 return __intel_map_single(dev, page_to_phys(page) + offset,
3586 size, dir, *dev->dma_mask);
3587 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3588}
3589
3590static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3591 size_t size, enum dma_data_direction dir,
3592 unsigned long attrs)
3593{
9cc0c2af
CH
3594 if (iommu_need_mapping(dev))
3595 return __intel_map_single(dev, phys_addr, size, dir,
3596 *dev->dma_mask);
3597 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3598}
3599
769530e4 3600static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3601{
f76aec76 3602 struct dmar_domain *domain;
d794dc9b 3603 unsigned long start_pfn, last_pfn;
769530e4 3604 unsigned long nrpages;
2aac6304 3605 unsigned long iova_pfn;
8c11e798 3606 struct intel_iommu *iommu;
ea8ea460 3607 struct page *freelist;
f7b0c4ce 3608 struct pci_dev *pdev = NULL;
ba395927 3609
1525a29a 3610 domain = find_domain(dev);
ba395927
KA
3611 BUG_ON(!domain);
3612
8c11e798
WH
3613 iommu = domain_get_iommu(domain);
3614
2aac6304 3615 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3616
769530e4 3617 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3618 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3619 last_pfn = start_pfn + nrpages - 1;
ba395927 3620
f7b0c4ce
LB
3621 if (dev_is_pci(dev))
3622 pdev = to_pci_dev(dev);
3623
932a6523 3624 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3625
ea8ea460 3626 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3627
f7b0c4ce 3628 if (intel_iommu_strict || (pdev && pdev->untrusted)) {
a1ddcbe9 3629 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3630 nrpages, !freelist, 0);
5e0d2a6f 3631 /* free iova */
22e2f9fa 3632 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3633 dma_free_pagelist(freelist);
5e0d2a6f 3634 } else {
13cf0174
JR
3635 queue_iova(&domain->iovad, iova_pfn, nrpages,
3636 (unsigned long)freelist);
5e0d2a6f 3637 /*
3638 * queue up the release of the unmap to save the 1/6th of the
3639 * cpu used up by the iotlb flush operation...
3640 */
5e0d2a6f 3641 }
ba395927
KA
3642}
3643
d41a4adb
JL
3644static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3645 size_t size, enum dma_data_direction dir,
00085f1e 3646 unsigned long attrs)
d41a4adb 3647{
9cc0c2af
CH
3648 if (iommu_need_mapping(dev))
3649 intel_unmap(dev, dev_addr, size);
3650 else
3651 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3652}
3653
3654static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3655 size_t size, enum dma_data_direction dir, unsigned long attrs)
3656{
3657 if (iommu_need_mapping(dev))
3658 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3659}
3660
5040a918 3661static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3662 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3663 unsigned long attrs)
ba395927 3664{
7ec916f8
CH
3665 struct page *page = NULL;
3666 int order;
ba395927 3667
9cc0c2af
CH
3668 if (!iommu_need_mapping(dev))
3669 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3670
7ec916f8
CH
3671 size = PAGE_ALIGN(size);
3672 order = get_order(size);
7ec916f8
CH
3673
3674 if (gfpflags_allow_blocking(flags)) {
3675 unsigned int count = size >> PAGE_SHIFT;
3676
d834c5ab
MS
3677 page = dma_alloc_from_contiguous(dev, count, order,
3678 flags & __GFP_NOWARN);
7ec916f8
CH
3679 }
3680
3681 if (!page)
3682 page = alloc_pages(flags, order);
3683 if (!page)
3684 return NULL;
3685 memset(page_address(page), 0, size);
3686
21d5d27c
LG
3687 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3688 DMA_BIDIRECTIONAL,
3689 dev->coherent_dma_mask);
524a669b 3690 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3691 return page_address(page);
3692 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3693 __free_pages(page, order);
36746436 3694
ba395927
KA
3695 return NULL;
3696}
3697
5040a918 3698static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3699 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3700{
7ec916f8
CH
3701 int order;
3702 struct page *page = virt_to_page(vaddr);
3703
9cc0c2af
CH
3704 if (!iommu_need_mapping(dev))
3705 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3706
7ec916f8
CH
3707 size = PAGE_ALIGN(size);
3708 order = get_order(size);
3709
3710 intel_unmap(dev, dma_handle, size);
3711 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3712 __free_pages(page, order);
ba395927
KA
3713}
3714
5040a918 3715static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3716 int nelems, enum dma_data_direction dir,
00085f1e 3717 unsigned long attrs)
ba395927 3718{
769530e4
OP
3719 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3720 unsigned long nrpages = 0;
3721 struct scatterlist *sg;
3722 int i;
3723
9cc0c2af
CH
3724 if (!iommu_need_mapping(dev))
3725 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3726
769530e4
OP
3727 for_each_sg(sglist, sg, nelems, i) {
3728 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3729 }
3730
3731 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3732}
3733
5040a918 3734static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3735 enum dma_data_direction dir, unsigned long attrs)
ba395927 3736{
ba395927 3737 int i;
ba395927 3738 struct dmar_domain *domain;
f76aec76
KA
3739 size_t size = 0;
3740 int prot = 0;
2aac6304 3741 unsigned long iova_pfn;
f76aec76 3742 int ret;
c03ab37c 3743 struct scatterlist *sg;
b536d24d 3744 unsigned long start_vpfn;
8c11e798 3745 struct intel_iommu *iommu;
ba395927
KA
3746
3747 BUG_ON(dir == DMA_NONE);
48b2c937 3748 if (!iommu_need_mapping(dev))
9cc0c2af 3749 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3750
4ec066c7 3751 domain = find_domain(dev);
f76aec76
KA
3752 if (!domain)
3753 return 0;
3754
8c11e798
WH
3755 iommu = domain_get_iommu(domain);
3756
b536d24d 3757 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3758 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3759
2aac6304 3760 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3761 *dev->dma_mask);
2aac6304 3762 if (!iova_pfn) {
c03ab37c 3763 sglist->dma_length = 0;
f76aec76
KA
3764 return 0;
3765 }
3766
3767 /*
3768 * Check if DMAR supports zero-length reads on write only
3769 * mappings..
3770 */
3771 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3772 !cap_zlr(iommu->cap))
f76aec76
KA
3773 prot |= DMA_PTE_READ;
3774 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3775 prot |= DMA_PTE_WRITE;
3776
2aac6304 3777 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3778
f532959b 3779 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3780 if (unlikely(ret)) {
e1605495 3781 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3782 start_vpfn + size - 1,
3783 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3784 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3785 return 0;
ba395927
KA
3786 }
3787
ba395927
KA
3788 return nelems;
3789}
3790
02b4da5f 3791static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3792 .alloc = intel_alloc_coherent,
3793 .free = intel_free_coherent,
ba395927
KA
3794 .map_sg = intel_map_sg,
3795 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3796 .map_page = intel_map_page,
3797 .unmap_page = intel_unmap_page,
21d5d27c 3798 .map_resource = intel_map_resource,
9cc0c2af 3799 .unmap_resource = intel_unmap_resource,
fec777c3 3800 .dma_supported = dma_direct_supported,
ba395927
KA
3801};
3802
3803static inline int iommu_domain_cache_init(void)
3804{
3805 int ret = 0;
3806
3807 iommu_domain_cache = kmem_cache_create("iommu_domain",
3808 sizeof(struct dmar_domain),
3809 0,
3810 SLAB_HWCACHE_ALIGN,
3811
3812 NULL);
3813 if (!iommu_domain_cache) {
9f10e5bf 3814 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3815 ret = -ENOMEM;
3816 }
3817
3818 return ret;
3819}
3820
3821static inline int iommu_devinfo_cache_init(void)
3822{
3823 int ret = 0;
3824
3825 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3826 sizeof(struct device_domain_info),
3827 0,
3828 SLAB_HWCACHE_ALIGN,
ba395927
KA
3829 NULL);
3830 if (!iommu_devinfo_cache) {
9f10e5bf 3831 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3832 ret = -ENOMEM;
3833 }
3834
3835 return ret;
3836}
3837
ba395927
KA
3838static int __init iommu_init_mempool(void)
3839{
3840 int ret;
ae1ff3d6 3841 ret = iova_cache_get();
ba395927
KA
3842 if (ret)
3843 return ret;
3844
3845 ret = iommu_domain_cache_init();
3846 if (ret)
3847 goto domain_error;
3848
3849 ret = iommu_devinfo_cache_init();
3850 if (!ret)
3851 return ret;
3852
3853 kmem_cache_destroy(iommu_domain_cache);
3854domain_error:
ae1ff3d6 3855 iova_cache_put();
ba395927
KA
3856
3857 return -ENOMEM;
3858}
3859
3860static void __init iommu_exit_mempool(void)
3861{
3862 kmem_cache_destroy(iommu_devinfo_cache);
3863 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3864 iova_cache_put();
ba395927
KA
3865}
3866
556ab45f
DW
3867static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3868{
3869 struct dmar_drhd_unit *drhd;
3870 u32 vtbar;
3871 int rc;
3872
3873 /* We know that this device on this chipset has its own IOMMU.
3874 * If we find it under a different IOMMU, then the BIOS is lying
3875 * to us. Hope that the IOMMU for this device is actually
3876 * disabled, and it needs no translation...
3877 */
3878 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3879 if (rc) {
3880 /* "can't" happen */
3881 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3882 return;
3883 }
3884 vtbar &= 0xffff0000;
3885
3886 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3887 drhd = dmar_find_matched_drhd_unit(pdev);
3888 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3889 TAINT_FIRMWARE_WORKAROUND,
3890 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3891 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3892}
3893DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3894
ba395927
KA
3895static void __init init_no_remapping_devices(void)
3896{
3897 struct dmar_drhd_unit *drhd;
832bd858 3898 struct device *dev;
b683b230 3899 int i;
ba395927
KA
3900
3901 for_each_drhd_unit(drhd) {
3902 if (!drhd->include_all) {
b683b230
JL
3903 for_each_active_dev_scope(drhd->devices,
3904 drhd->devices_cnt, i, dev)
3905 break;
832bd858 3906 /* ignore DMAR unit if no devices exist */
ba395927
KA
3907 if (i == drhd->devices_cnt)
3908 drhd->ignored = 1;
3909 }
3910 }
3911
7c919779 3912 for_each_active_drhd_unit(drhd) {
7c919779 3913 if (drhd->include_all)
ba395927
KA
3914 continue;
3915
b683b230
JL
3916 for_each_active_dev_scope(drhd->devices,
3917 drhd->devices_cnt, i, dev)
832bd858 3918 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3919 break;
ba395927
KA
3920 if (i < drhd->devices_cnt)
3921 continue;
3922
c0771df8
DW
3923 /* This IOMMU has *only* gfx devices. Either bypass it or
3924 set the gfx_mapped flag, as appropriate */
cf1ec453 3925 if (!dmar_map_gfx) {
c0771df8 3926 drhd->ignored = 1;
b683b230
JL
3927 for_each_active_dev_scope(drhd->devices,
3928 drhd->devices_cnt, i, dev)
832bd858 3929 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3930 }
3931 }
3932}
3933
f59c7b69
FY
3934#ifdef CONFIG_SUSPEND
3935static int init_iommu_hw(void)
3936{
3937 struct dmar_drhd_unit *drhd;
3938 struct intel_iommu *iommu = NULL;
3939
3940 for_each_active_iommu(iommu, drhd)
3941 if (iommu->qi)
3942 dmar_reenable_qi(iommu);
3943
b779260b
JC
3944 for_each_iommu(iommu, drhd) {
3945 if (drhd->ignored) {
3946 /*
3947 * we always have to disable PMRs or DMA may fail on
3948 * this device
3949 */
3950 if (force_on)
3951 iommu_disable_protect_mem_regions(iommu);
3952 continue;
3953 }
095303e0 3954
f59c7b69
FY
3955 iommu_flush_write_buffer(iommu);
3956
3957 iommu_set_root_entry(iommu);
3958
3959 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3960 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3961 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3962 iommu_enable_translation(iommu);
b94996c9 3963 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3964 }
3965
3966 return 0;
3967}
3968
3969static void iommu_flush_all(void)
3970{
3971 struct dmar_drhd_unit *drhd;
3972 struct intel_iommu *iommu;
3973
3974 for_each_active_iommu(iommu, drhd) {
3975 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3976 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3977 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3978 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3979 }
3980}
3981
134fac3f 3982static int iommu_suspend(void)
f59c7b69
FY
3983{
3984 struct dmar_drhd_unit *drhd;
3985 struct intel_iommu *iommu = NULL;
3986 unsigned long flag;
3987
3988 for_each_active_iommu(iommu, drhd) {
6396bb22 3989 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
3990 GFP_ATOMIC);
3991 if (!iommu->iommu_state)
3992 goto nomem;
3993 }
3994
3995 iommu_flush_all();
3996
3997 for_each_active_iommu(iommu, drhd) {
3998 iommu_disable_translation(iommu);
3999
1f5b3c3f 4000 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4001
4002 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4003 readl(iommu->reg + DMAR_FECTL_REG);
4004 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4005 readl(iommu->reg + DMAR_FEDATA_REG);
4006 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4007 readl(iommu->reg + DMAR_FEADDR_REG);
4008 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4009 readl(iommu->reg + DMAR_FEUADDR_REG);
4010
1f5b3c3f 4011 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4012 }
4013 return 0;
4014
4015nomem:
4016 for_each_active_iommu(iommu, drhd)
4017 kfree(iommu->iommu_state);
4018
4019 return -ENOMEM;
4020}
4021
134fac3f 4022static void iommu_resume(void)
f59c7b69
FY
4023{
4024 struct dmar_drhd_unit *drhd;
4025 struct intel_iommu *iommu = NULL;
4026 unsigned long flag;
4027
4028 if (init_iommu_hw()) {
b779260b
JC
4029 if (force_on)
4030 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4031 else
4032 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4033 return;
f59c7b69
FY
4034 }
4035
4036 for_each_active_iommu(iommu, drhd) {
4037
1f5b3c3f 4038 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4039
4040 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4041 iommu->reg + DMAR_FECTL_REG);
4042 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4043 iommu->reg + DMAR_FEDATA_REG);
4044 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4045 iommu->reg + DMAR_FEADDR_REG);
4046 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4047 iommu->reg + DMAR_FEUADDR_REG);
4048
1f5b3c3f 4049 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4050 }
4051
4052 for_each_active_iommu(iommu, drhd)
4053 kfree(iommu->iommu_state);
f59c7b69
FY
4054}
4055
134fac3f 4056static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4057 .resume = iommu_resume,
4058 .suspend = iommu_suspend,
4059};
4060
134fac3f 4061static void __init init_iommu_pm_ops(void)
f59c7b69 4062{
134fac3f 4063 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4064}
4065
4066#else
99592ba4 4067static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4068#endif /* CONFIG_PM */
4069
318fe7df 4070
c2a0b538 4071int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4072{
4073 struct acpi_dmar_reserved_memory *rmrr;
4074 struct dmar_rmrr_unit *rmrru;
0659b8dc 4075 size_t length;
318fe7df
SS
4076
4077 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4078 if (!rmrru)
0659b8dc 4079 goto out;
318fe7df
SS
4080
4081 rmrru->hdr = header;
4082 rmrr = (struct acpi_dmar_reserved_memory *)header;
4083 rmrru->base_address = rmrr->base_address;
4084 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4085
4086 length = rmrr->end_address - rmrr->base_address + 1;
0659b8dc 4087
2e455289
JL
4088 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4089 ((void *)rmrr) + rmrr->header.length,
4090 &rmrru->devices_cnt);
0659b8dc 4091 if (rmrru->devices_cnt && rmrru->devices == NULL)
5f64ce54 4092 goto free_rmrru;
318fe7df 4093
2e455289 4094 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4095
2e455289 4096 return 0;
0659b8dc
EA
4097free_rmrru:
4098 kfree(rmrru);
4099out:
4100 return -ENOMEM;
318fe7df
SS
4101}
4102
6b197249
JL
4103static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4104{
4105 struct dmar_atsr_unit *atsru;
4106 struct acpi_dmar_atsr *tmp;
4107
4108 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4109 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4110 if (atsr->segment != tmp->segment)
4111 continue;
4112 if (atsr->header.length != tmp->header.length)
4113 continue;
4114 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4115 return atsru;
4116 }
4117
4118 return NULL;
4119}
4120
4121int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4122{
4123 struct acpi_dmar_atsr *atsr;
4124 struct dmar_atsr_unit *atsru;
4125
b608fe35 4126 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4127 return 0;
4128
318fe7df 4129 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4130 atsru = dmar_find_atsr(atsr);
4131 if (atsru)
4132 return 0;
4133
4134 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4135 if (!atsru)
4136 return -ENOMEM;
4137
6b197249
JL
4138 /*
4139 * If memory is allocated from slab by ACPI _DSM method, we need to
4140 * copy the memory content because the memory buffer will be freed
4141 * on return.
4142 */
4143 atsru->hdr = (void *)(atsru + 1);
4144 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4145 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4146 if (!atsru->include_all) {
4147 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4148 (void *)atsr + atsr->header.length,
4149 &atsru->devices_cnt);
4150 if (atsru->devices_cnt && atsru->devices == NULL) {
4151 kfree(atsru);
4152 return -ENOMEM;
4153 }
4154 }
318fe7df 4155
0e242612 4156 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4157
4158 return 0;
4159}
4160
9bdc531e
JL
4161static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4162{
4163 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4164 kfree(atsru);
4165}
4166
6b197249
JL
4167int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4168{
4169 struct acpi_dmar_atsr *atsr;
4170 struct dmar_atsr_unit *atsru;
4171
4172 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4173 atsru = dmar_find_atsr(atsr);
4174 if (atsru) {
4175 list_del_rcu(&atsru->list);
4176 synchronize_rcu();
4177 intel_iommu_free_atsr(atsru);
4178 }
4179
4180 return 0;
4181}
4182
4183int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4184{
4185 int i;
4186 struct device *dev;
4187 struct acpi_dmar_atsr *atsr;
4188 struct dmar_atsr_unit *atsru;
4189
4190 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4191 atsru = dmar_find_atsr(atsr);
4192 if (!atsru)
4193 return 0;
4194
194dc870 4195 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4196 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4197 i, dev)
4198 return -EBUSY;
194dc870 4199 }
6b197249
JL
4200
4201 return 0;
4202}
4203
ffebeb46
JL
4204static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4205{
e083ea5b 4206 int sp, ret;
ffebeb46
JL
4207 struct intel_iommu *iommu = dmaru->iommu;
4208
4209 if (g_iommus[iommu->seq_id])
4210 return 0;
4211
4212 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4213 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4214 iommu->name);
4215 return -ENXIO;
4216 }
4217 if (!ecap_sc_support(iommu->ecap) &&
4218 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4219 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4220 iommu->name);
4221 return -ENXIO;
4222 }
4223 sp = domain_update_iommu_superpage(iommu) - 1;
4224 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4225 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4226 iommu->name);
4227 return -ENXIO;
4228 }
4229
4230 /*
4231 * Disable translation if already enabled prior to OS handover.
4232 */
4233 if (iommu->gcmd & DMA_GCMD_TE)
4234 iommu_disable_translation(iommu);
4235
4236 g_iommus[iommu->seq_id] = iommu;
4237 ret = iommu_init_domains(iommu);
4238 if (ret == 0)
4239 ret = iommu_alloc_root_entry(iommu);
4240 if (ret)
4241 goto out;
4242
8a94ade4 4243#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4244 if (pasid_supported(iommu))
d9737953 4245 intel_svm_init(iommu);
8a94ade4
DW
4246#endif
4247
ffebeb46
JL
4248 if (dmaru->ignored) {
4249 /*
4250 * we always have to disable PMRs or DMA may fail on this device
4251 */
4252 if (force_on)
4253 iommu_disable_protect_mem_regions(iommu);
4254 return 0;
4255 }
4256
4257 intel_iommu_init_qi(iommu);
4258 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4259
4260#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4261 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4262 ret = intel_svm_enable_prq(iommu);
4263 if (ret)
4264 goto disable_iommu;
4265 }
4266#endif
ffebeb46
JL
4267 ret = dmar_set_interrupt(iommu);
4268 if (ret)
4269 goto disable_iommu;
4270
4271 iommu_set_root_entry(iommu);
4272 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4273 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4274 iommu_enable_translation(iommu);
4275
ffebeb46
JL
4276 iommu_disable_protect_mem_regions(iommu);
4277 return 0;
4278
4279disable_iommu:
4280 disable_dmar_iommu(iommu);
4281out:
4282 free_dmar_iommu(iommu);
4283 return ret;
4284}
4285
6b197249
JL
4286int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4287{
ffebeb46
JL
4288 int ret = 0;
4289 struct intel_iommu *iommu = dmaru->iommu;
4290
4291 if (!intel_iommu_enabled)
4292 return 0;
4293 if (iommu == NULL)
4294 return -EINVAL;
4295
4296 if (insert) {
4297 ret = intel_iommu_add(dmaru);
4298 } else {
4299 disable_dmar_iommu(iommu);
4300 free_dmar_iommu(iommu);
4301 }
4302
4303 return ret;
6b197249
JL
4304}
4305
9bdc531e
JL
4306static void intel_iommu_free_dmars(void)
4307{
4308 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4309 struct dmar_atsr_unit *atsru, *atsr_n;
4310
4311 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4312 list_del(&rmrru->list);
4313 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4314 kfree(rmrru);
318fe7df
SS
4315 }
4316
9bdc531e
JL
4317 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4318 list_del(&atsru->list);
4319 intel_iommu_free_atsr(atsru);
4320 }
318fe7df
SS
4321}
4322
4323int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4324{
b683b230 4325 int i, ret = 1;
318fe7df 4326 struct pci_bus *bus;
832bd858
DW
4327 struct pci_dev *bridge = NULL;
4328 struct device *tmp;
318fe7df
SS
4329 struct acpi_dmar_atsr *atsr;
4330 struct dmar_atsr_unit *atsru;
4331
4332 dev = pci_physfn(dev);
318fe7df 4333 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4334 bridge = bus->self;
d14053b3
DW
4335 /* If it's an integrated device, allow ATS */
4336 if (!bridge)
4337 return 1;
4338 /* Connected via non-PCIe: no ATS */
4339 if (!pci_is_pcie(bridge) ||
62f87c0e 4340 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4341 return 0;
d14053b3 4342 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4343 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4344 break;
318fe7df
SS
4345 }
4346
0e242612 4347 rcu_read_lock();
b5f82ddf
JL
4348 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4349 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4350 if (atsr->segment != pci_domain_nr(dev->bus))
4351 continue;
4352
b683b230 4353 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4354 if (tmp == &bridge->dev)
b683b230 4355 goto out;
b5f82ddf
JL
4356
4357 if (atsru->include_all)
b683b230 4358 goto out;
b5f82ddf 4359 }
b683b230
JL
4360 ret = 0;
4361out:
0e242612 4362 rcu_read_unlock();
318fe7df 4363
b683b230 4364 return ret;
318fe7df
SS
4365}
4366
59ce0515
JL
4367int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4368{
e083ea5b 4369 int ret;
59ce0515
JL
4370 struct dmar_rmrr_unit *rmrru;
4371 struct dmar_atsr_unit *atsru;
4372 struct acpi_dmar_atsr *atsr;
4373 struct acpi_dmar_reserved_memory *rmrr;
4374
b608fe35 4375 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4376 return 0;
4377
4378 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4379 rmrr = container_of(rmrru->hdr,
4380 struct acpi_dmar_reserved_memory, header);
4381 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4382 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4383 ((void *)rmrr) + rmrr->header.length,
4384 rmrr->segment, rmrru->devices,
4385 rmrru->devices_cnt);
e083ea5b 4386 if (ret < 0)
59ce0515 4387 return ret;
e6a8c9b3 4388 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4389 dmar_remove_dev_scope(info, rmrr->segment,
4390 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4391 }
4392 }
4393
4394 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4395 if (atsru->include_all)
4396 continue;
4397
4398 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4399 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4400 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4401 (void *)atsr + atsr->header.length,
4402 atsr->segment, atsru->devices,
4403 atsru->devices_cnt);
4404 if (ret > 0)
4405 break;
e083ea5b 4406 else if (ret < 0)
59ce0515 4407 return ret;
e6a8c9b3 4408 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4409 if (dmar_remove_dev_scope(info, atsr->segment,
4410 atsru->devices, atsru->devices_cnt))
4411 break;
4412 }
4413 }
4414
4415 return 0;
4416}
4417
75f05569
JL
4418static int intel_iommu_memory_notifier(struct notifier_block *nb,
4419 unsigned long val, void *v)
4420{
4421 struct memory_notify *mhp = v;
4422 unsigned long long start, end;
4423 unsigned long start_vpfn, last_vpfn;
4424
4425 switch (val) {
4426 case MEM_GOING_ONLINE:
4427 start = mhp->start_pfn << PAGE_SHIFT;
4428 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4429 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4430 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4431 start, end);
4432 return NOTIFY_BAD;
4433 }
4434 break;
4435
4436 case MEM_OFFLINE:
4437 case MEM_CANCEL_ONLINE:
4438 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4439 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4440 while (start_vpfn <= last_vpfn) {
4441 struct iova *iova;
4442 struct dmar_drhd_unit *drhd;
4443 struct intel_iommu *iommu;
ea8ea460 4444 struct page *freelist;
75f05569
JL
4445
4446 iova = find_iova(&si_domain->iovad, start_vpfn);
4447 if (iova == NULL) {
9f10e5bf 4448 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4449 start_vpfn);
4450 break;
4451 }
4452
4453 iova = split_and_remove_iova(&si_domain->iovad, iova,
4454 start_vpfn, last_vpfn);
4455 if (iova == NULL) {
9f10e5bf 4456 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4457 start_vpfn, last_vpfn);
4458 return NOTIFY_BAD;
4459 }
4460
ea8ea460
DW
4461 freelist = domain_unmap(si_domain, iova->pfn_lo,
4462 iova->pfn_hi);
4463
75f05569
JL
4464 rcu_read_lock();
4465 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4466 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4467 iova->pfn_lo, iova_size(iova),
ea8ea460 4468 !freelist, 0);
75f05569 4469 rcu_read_unlock();
ea8ea460 4470 dma_free_pagelist(freelist);
75f05569
JL
4471
4472 start_vpfn = iova->pfn_hi + 1;
4473 free_iova_mem(iova);
4474 }
4475 break;
4476 }
4477
4478 return NOTIFY_OK;
4479}
4480
4481static struct notifier_block intel_iommu_memory_nb = {
4482 .notifier_call = intel_iommu_memory_notifier,
4483 .priority = 0
4484};
4485
22e2f9fa
OP
4486static void free_all_cpu_cached_iovas(unsigned int cpu)
4487{
4488 int i;
4489
4490 for (i = 0; i < g_num_of_iommus; i++) {
4491 struct intel_iommu *iommu = g_iommus[i];
4492 struct dmar_domain *domain;
0caa7616 4493 int did;
22e2f9fa
OP
4494
4495 if (!iommu)
4496 continue;
4497
3bd4f911 4498 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4499 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4500
4501 if (!domain)
4502 continue;
4503 free_cpu_cached_iovas(cpu, &domain->iovad);
4504 }
4505 }
4506}
4507
21647615 4508static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4509{
21647615 4510 free_all_cpu_cached_iovas(cpu);
21647615 4511 return 0;
aa473240
OP
4512}
4513
161b28aa
JR
4514static void intel_disable_iommus(void)
4515{
4516 struct intel_iommu *iommu = NULL;
4517 struct dmar_drhd_unit *drhd;
4518
4519 for_each_iommu(iommu, drhd)
4520 iommu_disable_translation(iommu);
4521}
4522
a7fdb6e6
JR
4523static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4524{
2926a2aa
JR
4525 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4526
4527 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4528}
4529
a5459cfe
AW
4530static ssize_t intel_iommu_show_version(struct device *dev,
4531 struct device_attribute *attr,
4532 char *buf)
4533{
a7fdb6e6 4534 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4535 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4536 return sprintf(buf, "%d:%d\n",
4537 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4538}
4539static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4540
4541static ssize_t intel_iommu_show_address(struct device *dev,
4542 struct device_attribute *attr,
4543 char *buf)
4544{
a7fdb6e6 4545 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4546 return sprintf(buf, "%llx\n", iommu->reg_phys);
4547}
4548static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4549
4550static ssize_t intel_iommu_show_cap(struct device *dev,
4551 struct device_attribute *attr,
4552 char *buf)
4553{
a7fdb6e6 4554 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4555 return sprintf(buf, "%llx\n", iommu->cap);
4556}
4557static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4558
4559static ssize_t intel_iommu_show_ecap(struct device *dev,
4560 struct device_attribute *attr,
4561 char *buf)
4562{
a7fdb6e6 4563 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4564 return sprintf(buf, "%llx\n", iommu->ecap);
4565}
4566static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4567
2238c082
AW
4568static ssize_t intel_iommu_show_ndoms(struct device *dev,
4569 struct device_attribute *attr,
4570 char *buf)
4571{
a7fdb6e6 4572 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4573 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4574}
4575static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4576
4577static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4578 struct device_attribute *attr,
4579 char *buf)
4580{
a7fdb6e6 4581 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4582 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4583 cap_ndoms(iommu->cap)));
4584}
4585static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4586
a5459cfe
AW
4587static struct attribute *intel_iommu_attrs[] = {
4588 &dev_attr_version.attr,
4589 &dev_attr_address.attr,
4590 &dev_attr_cap.attr,
4591 &dev_attr_ecap.attr,
2238c082
AW
4592 &dev_attr_domains_supported.attr,
4593 &dev_attr_domains_used.attr,
a5459cfe
AW
4594 NULL,
4595};
4596
4597static struct attribute_group intel_iommu_group = {
4598 .name = "intel-iommu",
4599 .attrs = intel_iommu_attrs,
4600};
4601
4602const struct attribute_group *intel_iommu_groups[] = {
4603 &intel_iommu_group,
4604 NULL,
4605};
4606
89a6079d
LB
4607static int __init platform_optin_force_iommu(void)
4608{
4609 struct pci_dev *pdev = NULL;
4610 bool has_untrusted_dev = false;
4611
4612 if (!dmar_platform_optin() || no_platform_optin)
4613 return 0;
4614
4615 for_each_pci_dev(pdev) {
4616 if (pdev->untrusted) {
4617 has_untrusted_dev = true;
4618 break;
4619 }
4620 }
4621
4622 if (!has_untrusted_dev)
4623 return 0;
4624
4625 if (no_iommu || dmar_disabled)
4626 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4627
4628 /*
4629 * If Intel-IOMMU is disabled by default, we will apply identity
4630 * map for all devices except those marked as being untrusted.
4631 */
4632 if (dmar_disabled)
4633 iommu_identity_mapping |= IDENTMAP_ALL;
4634
4635 dmar_disabled = 0;
4636#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4637 swiotlb = 0;
4638#endif
4639 no_iommu = 0;
4640
4641 return 1;
4642}
4643
fa212a97
LB
4644static int __init probe_acpi_namespace_devices(void)
4645{
4646 struct dmar_drhd_unit *drhd;
4647 struct intel_iommu *iommu;
4648 struct device *dev;
4649 int i, ret = 0;
4650
4651 for_each_active_iommu(iommu, drhd) {
4652 for_each_active_dev_scope(drhd->devices,
4653 drhd->devices_cnt, i, dev) {
4654 struct acpi_device_physical_node *pn;
4655 struct iommu_group *group;
4656 struct acpi_device *adev;
4657
4658 if (dev->bus != &acpi_bus_type)
4659 continue;
4660
4661 adev = to_acpi_device(dev);
4662 mutex_lock(&adev->physical_node_lock);
4663 list_for_each_entry(pn,
4664 &adev->physical_node_list, node) {
4665 group = iommu_group_get(pn->dev);
4666 if (group) {
4667 iommu_group_put(group);
4668 continue;
4669 }
4670
4671 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4672 ret = iommu_probe_device(pn->dev);
4673 if (ret)
4674 break;
4675 }
4676 mutex_unlock(&adev->physical_node_lock);
4677
4678 if (ret)
4679 return ret;
4680 }
4681 }
4682
4683 return 0;
4684}
4685
ba395927
KA
4686int __init intel_iommu_init(void)
4687{
9bdc531e 4688 int ret = -ENODEV;
3a93c841 4689 struct dmar_drhd_unit *drhd;
7c919779 4690 struct intel_iommu *iommu;
ba395927 4691
89a6079d
LB
4692 /*
4693 * Intel IOMMU is required for a TXT/tboot launch or platform
4694 * opt in, so enforce that.
4695 */
4696 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4697
3a5670e8
JL
4698 if (iommu_init_mempool()) {
4699 if (force_on)
4700 panic("tboot: Failed to initialize iommu memory\n");
4701 return -ENOMEM;
4702 }
4703
4704 down_write(&dmar_global_lock);
a59b50e9
JC
4705 if (dmar_table_init()) {
4706 if (force_on)
4707 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4708 goto out_free_dmar;
a59b50e9 4709 }
ba395927 4710
c2c7286a 4711 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4712 if (force_on)
4713 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4714 goto out_free_dmar;
a59b50e9 4715 }
1886e8a9 4716
ec154bf5
JR
4717 up_write(&dmar_global_lock);
4718
4719 /*
4720 * The bus notifier takes the dmar_global_lock, so lockdep will
4721 * complain later when we register it under the lock.
4722 */
4723 dmar_register_bus_notifier();
4724
4725 down_write(&dmar_global_lock);
4726
161b28aa 4727 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4728 /*
4729 * We exit the function here to ensure IOMMU's remapping and
4730 * mempool aren't setup, which means that the IOMMU's PMRs
4731 * won't be disabled via the call to init_dmars(). So disable
4732 * it explicitly here. The PMRs were setup by tboot prior to
4733 * calling SENTER, but the kernel is expected to reset/tear
4734 * down the PMRs.
4735 */
4736 if (intel_iommu_tboot_noforce) {
4737 for_each_iommu(iommu, drhd)
4738 iommu_disable_protect_mem_regions(iommu);
4739 }
4740
161b28aa
JR
4741 /*
4742 * Make sure the IOMMUs are switched off, even when we
4743 * boot into a kexec kernel and the previous kernel left
4744 * them enabled
4745 */
4746 intel_disable_iommus();
9bdc531e 4747 goto out_free_dmar;
161b28aa 4748 }
2ae21010 4749
318fe7df 4750 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4751 pr_info("No RMRR found\n");
318fe7df
SS
4752
4753 if (list_empty(&dmar_atsr_units))
9f10e5bf 4754 pr_info("No ATSR found\n");
318fe7df 4755
51a63e67
JC
4756 if (dmar_init_reserved_ranges()) {
4757 if (force_on)
4758 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4759 goto out_free_reserved_range;
51a63e67 4760 }
ba395927 4761
cf1ec453
LB
4762 if (dmar_map_gfx)
4763 intel_iommu_gfx_mapped = 1;
4764
ba395927
KA
4765 init_no_remapping_devices();
4766
b779260b 4767 ret = init_dmars();
ba395927 4768 if (ret) {
a59b50e9
JC
4769 if (force_on)
4770 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4771 pr_err("Initialization failed\n");
9bdc531e 4772 goto out_free_reserved_range;
ba395927 4773 }
3a5670e8 4774 up_write(&dmar_global_lock);
ba395927 4775
4fac8076 4776#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4777 swiotlb = 0;
4778#endif
19943b0e 4779 dma_ops = &intel_dma_ops;
4ed0d3e6 4780
134fac3f 4781 init_iommu_pm_ops();
a8bcbb0d 4782
39ab9555
JR
4783 for_each_active_iommu(iommu, drhd) {
4784 iommu_device_sysfs_add(&iommu->iommu, NULL,
4785 intel_iommu_groups,
4786 "%s", iommu->name);
4787 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4788 iommu_device_register(&iommu->iommu);
4789 }
a5459cfe 4790
4236d97d 4791 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
75f05569
JL
4792 if (si_domain && !hw_pass_through)
4793 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4794 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4795 intel_iommu_cpu_dead);
d8190dc6 4796
fa212a97
LB
4797 if (probe_acpi_namespace_devices())
4798 pr_warn("ACPI name space devices didn't probe correctly\n");
4799
d8190dc6
LB
4800 /* Finally, we enable the DMA remapping hardware. */
4801 for_each_iommu(iommu, drhd) {
4802 if (!translation_pre_enabled(iommu))
4803 iommu_enable_translation(iommu);
4804
4805 iommu_disable_protect_mem_regions(iommu);
4806 }
4807 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4808
8bc1f85c 4809 intel_iommu_enabled = 1;
ee2636b8 4810 intel_iommu_debugfs_init();
8bc1f85c 4811
ba395927 4812 return 0;
9bdc531e
JL
4813
4814out_free_reserved_range:
4815 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4816out_free_dmar:
4817 intel_iommu_free_dmars();
3a5670e8
JL
4818 up_write(&dmar_global_lock);
4819 iommu_exit_mempool();
9bdc531e 4820 return ret;
ba395927 4821}
e820482c 4822
2452d9db 4823static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4824{
4825 struct intel_iommu *iommu = opaque;
4826
2452d9db 4827 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4828 return 0;
4829}
4830
4831/*
4832 * NB - intel-iommu lacks any sort of reference counting for the users of
4833 * dependent devices. If multiple endpoints have intersecting dependent
4834 * devices, unbinding the driver from any one of them will possibly leave
4835 * the others unable to operate.
4836 */
2452d9db 4837static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4838{
0bcb3e28 4839 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4840 return;
4841
2452d9db 4842 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4843}
4844
127c7615 4845static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4846{
942067f1 4847 struct dmar_domain *domain;
c7151a8d
WH
4848 struct intel_iommu *iommu;
4849 unsigned long flags;
c7151a8d 4850
55d94043
JR
4851 assert_spin_locked(&device_domain_lock);
4852
127c7615 4853 if (WARN_ON(!info))
c7151a8d
WH
4854 return;
4855
127c7615 4856 iommu = info->iommu;
942067f1 4857 domain = info->domain;
c7151a8d 4858
127c7615 4859 if (info->dev) {
ef848b7e
LB
4860 if (dev_is_pci(info->dev) && sm_supported(iommu))
4861 intel_pasid_tear_down_entry(iommu, info->dev,
4862 PASID_RID2PASID);
4863
127c7615
JR
4864 iommu_disable_dev_iotlb(info);
4865 domain_context_clear(iommu, info->dev);
a7fc93fe 4866 intel_pasid_free_table(info->dev);
127c7615 4867 }
c7151a8d 4868
b608ac3b 4869 unlink_domain_info(info);
c7151a8d 4870
d160aca5 4871 spin_lock_irqsave(&iommu->lock, flags);
942067f1 4872 domain_detach_iommu(domain, iommu);
d160aca5 4873 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4874
942067f1
LB
4875 /* free the private domain */
4876 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
4877 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY))
4878 domain_exit(info->domain);
4879
127c7615 4880 free_devinfo_mem(info);
c7151a8d 4881}
c7151a8d 4882
71753239 4883static void dmar_remove_one_dev_info(struct device *dev)
55d94043 4884{
127c7615 4885 struct device_domain_info *info;
55d94043 4886 unsigned long flags;
3e7abe25 4887
55d94043 4888 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4889 info = dev->archdata.iommu;
4890 __dmar_remove_one_dev_info(info);
55d94043 4891 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4892}
4893
2c2e2c38 4894static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4895{
4896 int adjust_width;
4897
aa3ac946 4898 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
4899 domain_reserve_special_ranges(domain);
4900
4901 /* calculate AGAW */
4902 domain->gaw = guest_width;
4903 adjust_width = guestwidth_to_adjustwidth(guest_width);
4904 domain->agaw = width_to_agaw(adjust_width);
4905
5e98c4b1 4906 domain->iommu_coherency = 0;
c5b15255 4907 domain->iommu_snooping = 0;
6dd9a7c7 4908 domain->iommu_superpage = 0;
fe40f1e0 4909 domain->max_addr = 0;
5e98c4b1
WH
4910
4911 /* always allocate the top pgd */
4c923d47 4912 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4913 if (!domain->pgd)
4914 return -ENOMEM;
4915 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4916 return 0;
4917}
4918
00a77deb 4919static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4920{
5d450806 4921 struct dmar_domain *dmar_domain;
00a77deb
JR
4922 struct iommu_domain *domain;
4923
4de354ec 4924 switch (type) {
fa954e68
LB
4925 case IOMMU_DOMAIN_DMA:
4926 /* fallthrough */
4de354ec 4927 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 4928 dmar_domain = alloc_domain(0);
4de354ec
LB
4929 if (!dmar_domain) {
4930 pr_err("Can't allocate dmar_domain\n");
4931 return NULL;
4932 }
4933 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4934 pr_err("Domain initialization failed\n");
4935 domain_exit(dmar_domain);
4936 return NULL;
4937 }
fa954e68
LB
4938
4939 if (type == IOMMU_DOMAIN_DMA &&
4940 init_iova_flush_queue(&dmar_domain->iovad,
4941 iommu_flush_iova, iova_entry_free)) {
4942 pr_warn("iova flush queue initialization failed\n");
4943 intel_iommu_strict = 1;
4944 }
4945
4de354ec 4946 domain_update_iommu_cap(dmar_domain);
38717946 4947
4de354ec
LB
4948 domain = &dmar_domain->domain;
4949 domain->geometry.aperture_start = 0;
4950 domain->geometry.aperture_end =
4951 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4952 domain->geometry.force_aperture = true;
4953
4954 return domain;
4955 case IOMMU_DOMAIN_IDENTITY:
4956 return &si_domain->domain;
4957 default:
00a77deb 4958 return NULL;
38717946 4959 }
8a0e715b 4960
4de354ec 4961 return NULL;
38717946 4962}
38717946 4963
00a77deb 4964static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4965{
4de354ec
LB
4966 if (domain != &si_domain->domain)
4967 domain_exit(to_dmar_domain(domain));
38717946 4968}
38717946 4969
67b8e02b
LB
4970/*
4971 * Check whether a @domain could be attached to the @dev through the
4972 * aux-domain attach/detach APIs.
4973 */
4974static inline bool
4975is_aux_domain(struct device *dev, struct iommu_domain *domain)
4976{
4977 struct device_domain_info *info = dev->archdata.iommu;
4978
4979 return info && info->auxd_enabled &&
4980 domain->type == IOMMU_DOMAIN_UNMANAGED;
4981}
4982
4983static void auxiliary_link_device(struct dmar_domain *domain,
4984 struct device *dev)
4985{
4986 struct device_domain_info *info = dev->archdata.iommu;
4987
4988 assert_spin_locked(&device_domain_lock);
4989 if (WARN_ON(!info))
4990 return;
4991
4992 domain->auxd_refcnt++;
4993 list_add(&domain->auxd, &info->auxiliary_domains);
4994}
4995
4996static void auxiliary_unlink_device(struct dmar_domain *domain,
4997 struct device *dev)
4998{
4999 struct device_domain_info *info = dev->archdata.iommu;
5000
5001 assert_spin_locked(&device_domain_lock);
5002 if (WARN_ON(!info))
5003 return;
5004
5005 list_del(&domain->auxd);
5006 domain->auxd_refcnt--;
5007
5008 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5009 intel_pasid_free_id(domain->default_pasid);
5010}
5011
5012static int aux_domain_add_dev(struct dmar_domain *domain,
5013 struct device *dev)
5014{
5015 int ret;
5016 u8 bus, devfn;
5017 unsigned long flags;
5018 struct intel_iommu *iommu;
5019
5020 iommu = device_to_iommu(dev, &bus, &devfn);
5021 if (!iommu)
5022 return -ENODEV;
5023
5024 if (domain->default_pasid <= 0) {
5025 int pasid;
5026
5027 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5028 pci_max_pasids(to_pci_dev(dev)),
5029 GFP_KERNEL);
5030 if (pasid <= 0) {
5031 pr_err("Can't allocate default pasid\n");
5032 return -ENODEV;
5033 }
5034 domain->default_pasid = pasid;
5035 }
5036
5037 spin_lock_irqsave(&device_domain_lock, flags);
5038 /*
5039 * iommu->lock must be held to attach domain to iommu and setup the
5040 * pasid entry for second level translation.
5041 */
5042 spin_lock(&iommu->lock);
5043 ret = domain_attach_iommu(domain, iommu);
5044 if (ret)
5045 goto attach_failed;
5046
5047 /* Setup the PASID entry for mediated devices: */
5048 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5049 domain->default_pasid);
5050 if (ret)
5051 goto table_failed;
5052 spin_unlock(&iommu->lock);
5053
5054 auxiliary_link_device(domain, dev);
5055
5056 spin_unlock_irqrestore(&device_domain_lock, flags);
5057
5058 return 0;
5059
5060table_failed:
5061 domain_detach_iommu(domain, iommu);
5062attach_failed:
5063 spin_unlock(&iommu->lock);
5064 spin_unlock_irqrestore(&device_domain_lock, flags);
5065 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5066 intel_pasid_free_id(domain->default_pasid);
5067
5068 return ret;
5069}
5070
5071static void aux_domain_remove_dev(struct dmar_domain *domain,
5072 struct device *dev)
5073{
5074 struct device_domain_info *info;
5075 struct intel_iommu *iommu;
5076 unsigned long flags;
5077
5078 if (!is_aux_domain(dev, &domain->domain))
5079 return;
5080
5081 spin_lock_irqsave(&device_domain_lock, flags);
5082 info = dev->archdata.iommu;
5083 iommu = info->iommu;
5084
5085 auxiliary_unlink_device(domain, dev);
5086
5087 spin_lock(&iommu->lock);
5088 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5089 domain_detach_iommu(domain, iommu);
5090 spin_unlock(&iommu->lock);
5091
5092 spin_unlock_irqrestore(&device_domain_lock, flags);
5093}
5094
8cc3759a
LB
5095static int prepare_domain_attach_device(struct iommu_domain *domain,
5096 struct device *dev)
38717946 5097{
00a77deb 5098 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5099 struct intel_iommu *iommu;
5100 int addr_width;
156baca8 5101 u8 bus, devfn;
faa3d6f5 5102
156baca8 5103 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5104 if (!iommu)
5105 return -ENODEV;
5106
5107 /* check if this iommu agaw is sufficient for max mapped address */
5108 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5109 if (addr_width > cap_mgaw(iommu->cap))
5110 addr_width = cap_mgaw(iommu->cap);
5111
5112 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5113 dev_err(dev, "%s: iommu width (%d) is not "
5114 "sufficient for the mapped address (%llx)\n",
5115 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5116 return -EFAULT;
5117 }
a99c47a2
TL
5118 dmar_domain->gaw = addr_width;
5119
5120 /*
5121 * Knock out extra levels of page tables if necessary
5122 */
5123 while (iommu->agaw < dmar_domain->agaw) {
5124 struct dma_pte *pte;
5125
5126 pte = dmar_domain->pgd;
5127 if (dma_pte_present(pte)) {
25cbff16
SY
5128 dmar_domain->pgd = (struct dma_pte *)
5129 phys_to_virt(dma_pte_addr(pte));
7a661013 5130 free_pgtable_page(pte);
a99c47a2
TL
5131 }
5132 dmar_domain->agaw--;
5133 }
fe40f1e0 5134
8cc3759a
LB
5135 return 0;
5136}
5137
5138static int intel_iommu_attach_device(struct iommu_domain *domain,
5139 struct device *dev)
5140{
5141 int ret;
5142
5143 if (device_is_rmrr_locked(dev)) {
5144 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5145 return -EPERM;
5146 }
5147
67b8e02b
LB
5148 if (is_aux_domain(dev, domain))
5149 return -EPERM;
5150
8cc3759a
LB
5151 /* normally dev is not mapped */
5152 if (unlikely(domain_context_mapped(dev))) {
5153 struct dmar_domain *old_domain;
5154
5155 old_domain = find_domain(dev);
fa954e68 5156 if (old_domain)
8cc3759a 5157 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5158 }
5159
5160 ret = prepare_domain_attach_device(domain, dev);
5161 if (ret)
5162 return ret;
5163
5164 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5165}
38717946 5166
67b8e02b
LB
5167static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5168 struct device *dev)
5169{
5170 int ret;
5171
5172 if (!is_aux_domain(dev, domain))
5173 return -EPERM;
5174
5175 ret = prepare_domain_attach_device(domain, dev);
5176 if (ret)
5177 return ret;
5178
5179 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5180}
5181
4c5478c9
JR
5182static void intel_iommu_detach_device(struct iommu_domain *domain,
5183 struct device *dev)
38717946 5184{
71753239 5185 dmar_remove_one_dev_info(dev);
faa3d6f5 5186}
c7151a8d 5187
67b8e02b
LB
5188static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5189 struct device *dev)
5190{
5191 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5192}
5193
b146a1c9
JR
5194static int intel_iommu_map(struct iommu_domain *domain,
5195 unsigned long iova, phys_addr_t hpa,
5009065d 5196 size_t size, int iommu_prot)
faa3d6f5 5197{
00a77deb 5198 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5199 u64 max_addr;
dde57a21 5200 int prot = 0;
faa3d6f5 5201 int ret;
fe40f1e0 5202
942067f1
LB
5203 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5204 return -EINVAL;
5205
dde57a21
JR
5206 if (iommu_prot & IOMMU_READ)
5207 prot |= DMA_PTE_READ;
5208 if (iommu_prot & IOMMU_WRITE)
5209 prot |= DMA_PTE_WRITE;
9cf06697
SY
5210 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5211 prot |= DMA_PTE_SNP;
dde57a21 5212
163cc52c 5213 max_addr = iova + size;
dde57a21 5214 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5215 u64 end;
5216
5217 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5218 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5219 if (end < max_addr) {
9f10e5bf 5220 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5221 "sufficient for the mapped address (%llx)\n",
8954da1f 5222 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5223 return -EFAULT;
5224 }
dde57a21 5225 dmar_domain->max_addr = max_addr;
fe40f1e0 5226 }
ad051221
DW
5227 /* Round up size to next multiple of PAGE_SIZE, if it and
5228 the low bits of hpa would take us onto the next page */
88cb6a74 5229 size = aligned_nrpages(hpa, size);
ad051221
DW
5230 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5231 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5232 return ret;
38717946 5233}
38717946 5234
5009065d 5235static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5236 unsigned long iova, size_t size)
38717946 5237{
00a77deb 5238 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5239 struct page *freelist = NULL;
ea8ea460
DW
5240 unsigned long start_pfn, last_pfn;
5241 unsigned int npages;
42e8c186 5242 int iommu_id, level = 0;
5cf0a76f
DW
5243
5244 /* Cope with horrid API which requires us to unmap more than the
5245 size argument if it happens to be a large-page mapping. */
dc02e46e 5246 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
942067f1
LB
5247 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5248 return 0;
5cf0a76f
DW
5249
5250 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5251 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5252
ea8ea460
DW
5253 start_pfn = iova >> VTD_PAGE_SHIFT;
5254 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5255
5256 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5257
5258 npages = last_pfn - start_pfn + 1;
5259
f746a025 5260 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5261 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5262 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5263
5264 dma_free_pagelist(freelist);
fe40f1e0 5265
163cc52c
DW
5266 if (dmar_domain->max_addr == iova + size)
5267 dmar_domain->max_addr = iova;
b146a1c9 5268
5cf0a76f 5269 return size;
38717946 5270}
38717946 5271
d14d6577 5272static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5273 dma_addr_t iova)
38717946 5274{
00a77deb 5275 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5276 struct dma_pte *pte;
5cf0a76f 5277 int level = 0;
faa3d6f5 5278 u64 phys = 0;
38717946 5279
942067f1
LB
5280 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5281 return 0;
5282
5cf0a76f 5283 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5284 if (pte)
faa3d6f5 5285 phys = dma_pte_addr(pte);
38717946 5286
faa3d6f5 5287 return phys;
38717946 5288}
a8bcbb0d 5289
95587a75
LB
5290static inline bool scalable_mode_support(void)
5291{
5292 struct dmar_drhd_unit *drhd;
5293 struct intel_iommu *iommu;
5294 bool ret = true;
5295
5296 rcu_read_lock();
5297 for_each_active_iommu(iommu, drhd) {
5298 if (!sm_supported(iommu)) {
5299 ret = false;
5300 break;
5301 }
5302 }
5303 rcu_read_unlock();
5304
5305 return ret;
5306}
5307
5308static inline bool iommu_pasid_support(void)
5309{
5310 struct dmar_drhd_unit *drhd;
5311 struct intel_iommu *iommu;
5312 bool ret = true;
5313
5314 rcu_read_lock();
5315 for_each_active_iommu(iommu, drhd) {
5316 if (!pasid_supported(iommu)) {
5317 ret = false;
5318 break;
5319 }
5320 }
5321 rcu_read_unlock();
5322
5323 return ret;
5324}
5325
5d587b8d 5326static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5327{
dbb9fd86 5328 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5329 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5330 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5331 return irq_remapping_enabled == 1;
dbb9fd86 5332
5d587b8d 5333 return false;
dbb9fd86
SY
5334}
5335
abdfdde2
AW
5336static int intel_iommu_add_device(struct device *dev)
5337{
942067f1
LB
5338 struct dmar_domain *dmar_domain;
5339 struct iommu_domain *domain;
a5459cfe 5340 struct intel_iommu *iommu;
abdfdde2 5341 struct iommu_group *group;
156baca8 5342 u8 bus, devfn;
942067f1 5343 int ret;
70ae6f0d 5344
a5459cfe
AW
5345 iommu = device_to_iommu(dev, &bus, &devfn);
5346 if (!iommu)
70ae6f0d
AW
5347 return -ENODEV;
5348
e3d10af1 5349 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5350
8af46c78
LB
5351 if (translation_pre_enabled(iommu))
5352 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5353
e17f9ff4 5354 group = iommu_group_get_for_dev(dev);
783f157b 5355
e17f9ff4
AW
5356 if (IS_ERR(group))
5357 return PTR_ERR(group);
bcb71abe 5358
abdfdde2 5359 iommu_group_put(group);
942067f1
LB
5360
5361 domain = iommu_get_domain_for_dev(dev);
5362 dmar_domain = to_dmar_domain(domain);
5363 if (domain->type == IOMMU_DOMAIN_DMA) {
0e31a726 5364 if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
942067f1
LB
5365 ret = iommu_request_dm_for_dev(dev);
5366 if (ret) {
5367 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5368 domain_add_dev_info(si_domain, dev);
5369 dev_info(dev,
5370 "Device uses a private identity domain.\n");
5371 return 0;
5372 }
5373
5374 return -ENODEV;
5375 }
5376 } else {
0e31a726 5377 if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
942067f1
LB
5378 ret = iommu_request_dma_domain_for_dev(dev);
5379 if (ret) {
5380 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
4ec066c7 5381 if (!get_private_domain_for_dev(dev)) {
942067f1
LB
5382 dev_warn(dev,
5383 "Failed to get a private domain.\n");
5384 return -ENOMEM;
5385 }
5386
5387 dev_info(dev,
5388 "Device uses a private dma domain.\n");
5389 return 0;
5390 }
5391
5392 return -ENODEV;
5393 }
5394 }
5395
e17f9ff4 5396 return 0;
abdfdde2 5397}
70ae6f0d 5398
abdfdde2
AW
5399static void intel_iommu_remove_device(struct device *dev)
5400{
a5459cfe
AW
5401 struct intel_iommu *iommu;
5402 u8 bus, devfn;
5403
5404 iommu = device_to_iommu(dev, &bus, &devfn);
5405 if (!iommu)
5406 return;
5407
abdfdde2 5408 iommu_group_remove_device(dev);
a5459cfe 5409
e3d10af1 5410 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5411}
5412
0659b8dc
EA
5413static void intel_iommu_get_resv_regions(struct device *device,
5414 struct list_head *head)
5415{
5f64ce54 5416 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
0659b8dc
EA
5417 struct iommu_resv_region *reg;
5418 struct dmar_rmrr_unit *rmrr;
5419 struct device *i_dev;
5420 int i;
5421
5f64ce54 5422 down_read(&dmar_global_lock);
0659b8dc
EA
5423 for_each_rmrr_units(rmrr) {
5424 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5425 i, i_dev) {
5f64ce54
EA
5426 struct iommu_resv_region *resv;
5427 size_t length;
5428
3855ba2d
EA
5429 if (i_dev != device &&
5430 !is_downstream_to_pci_bridge(device, i_dev))
0659b8dc
EA
5431 continue;
5432
5f64ce54
EA
5433 length = rmrr->end_address - rmrr->base_address + 1;
5434 resv = iommu_alloc_resv_region(rmrr->base_address,
5435 length, prot,
5436 IOMMU_RESV_DIRECT);
5437 if (!resv)
5438 break;
5439
5440 list_add_tail(&resv->list, head);
0659b8dc
EA
5441 }
5442 }
5f64ce54 5443 up_read(&dmar_global_lock);
0659b8dc 5444
d850c2ee
LB
5445#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5446 if (dev_is_pci(device)) {
5447 struct pci_dev *pdev = to_pci_dev(device);
5448
5449 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5450 reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
5451 IOMMU_RESV_DIRECT);
5452 if (reg)
5453 list_add_tail(&reg->list, head);
5454 }
5455 }
5456#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5457
0659b8dc
EA
5458 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5459 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5460 0, IOMMU_RESV_MSI);
0659b8dc
EA
5461 if (!reg)
5462 return;
5463 list_add_tail(&reg->list, head);
5464}
5465
5466static void intel_iommu_put_resv_regions(struct device *dev,
5467 struct list_head *head)
5468{
5469 struct iommu_resv_region *entry, *next;
5470
5f64ce54
EA
5471 list_for_each_entry_safe(entry, next, head, list)
5472 kfree(entry);
70ae6f0d
AW
5473}
5474
d7cbc0f3 5475int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5476{
5477 struct device_domain_info *info;
5478 struct context_entry *context;
5479 struct dmar_domain *domain;
5480 unsigned long flags;
5481 u64 ctx_lo;
5482 int ret;
5483
4ec066c7 5484 domain = find_domain(dev);
2f26e0a9
DW
5485 if (!domain)
5486 return -EINVAL;
5487
5488 spin_lock_irqsave(&device_domain_lock, flags);
5489 spin_lock(&iommu->lock);
5490
5491 ret = -EINVAL;
d7cbc0f3 5492 info = dev->archdata.iommu;
2f26e0a9
DW
5493 if (!info || !info->pasid_supported)
5494 goto out;
5495
5496 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5497 if (WARN_ON(!context))
5498 goto out;
5499
5500 ctx_lo = context[0].lo;
5501
2f26e0a9 5502 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5503 ctx_lo |= CONTEXT_PASIDE;
5504 context[0].lo = ctx_lo;
5505 wmb();
d7cbc0f3
LB
5506 iommu->flush.flush_context(iommu,
5507 domain->iommu_did[iommu->seq_id],
5508 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5509 DMA_CCMD_MASK_NOBIT,
5510 DMA_CCMD_DEVICE_INVL);
5511 }
5512
5513 /* Enable PASID support in the device, if it wasn't already */
5514 if (!info->pasid_enabled)
5515 iommu_enable_dev_iotlb(info);
5516
2f26e0a9
DW
5517 ret = 0;
5518
5519 out:
5520 spin_unlock(&iommu->lock);
5521 spin_unlock_irqrestore(&device_domain_lock, flags);
5522
5523 return ret;
5524}
5525
73bcbdc9
JS
5526static void intel_iommu_apply_resv_region(struct device *dev,
5527 struct iommu_domain *domain,
5528 struct iommu_resv_region *region)
5529{
5530 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5531 unsigned long start, end;
5532
5533 start = IOVA_PFN(region->start);
5534 end = IOVA_PFN(region->start + region->length - 1);
5535
5536 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5537}
5538
d7cbc0f3 5539#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5540struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5541{
5542 struct intel_iommu *iommu;
5543 u8 bus, devfn;
5544
5545 if (iommu_dummy(dev)) {
5546 dev_warn(dev,
5547 "No IOMMU translation for device; cannot enable SVM\n");
5548 return NULL;
5549 }
5550
5551 iommu = device_to_iommu(dev, &bus, &devfn);
5552 if ((!iommu)) {
b9997e38 5553 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5554 return NULL;
5555 }
5556
2f26e0a9
DW
5557 return iommu;
5558}
5559#endif /* CONFIG_INTEL_IOMMU_SVM */
5560
95587a75
LB
5561static int intel_iommu_enable_auxd(struct device *dev)
5562{
5563 struct device_domain_info *info;
5564 struct intel_iommu *iommu;
5565 unsigned long flags;
5566 u8 bus, devfn;
5567 int ret;
5568
5569 iommu = device_to_iommu(dev, &bus, &devfn);
5570 if (!iommu || dmar_disabled)
5571 return -EINVAL;
5572
5573 if (!sm_supported(iommu) || !pasid_supported(iommu))
5574 return -EINVAL;
5575
5576 ret = intel_iommu_enable_pasid(iommu, dev);
5577 if (ret)
5578 return -ENODEV;
5579
5580 spin_lock_irqsave(&device_domain_lock, flags);
5581 info = dev->archdata.iommu;
5582 info->auxd_enabled = 1;
5583 spin_unlock_irqrestore(&device_domain_lock, flags);
5584
5585 return 0;
5586}
5587
5588static int intel_iommu_disable_auxd(struct device *dev)
5589{
5590 struct device_domain_info *info;
5591 unsigned long flags;
5592
5593 spin_lock_irqsave(&device_domain_lock, flags);
5594 info = dev->archdata.iommu;
5595 if (!WARN_ON(!info))
5596 info->auxd_enabled = 0;
5597 spin_unlock_irqrestore(&device_domain_lock, flags);
5598
5599 return 0;
5600}
5601
5602/*
5603 * A PCI express designated vendor specific extended capability is defined
5604 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5605 * for system software and tools to detect endpoint devices supporting the
5606 * Intel scalable IO virtualization without host driver dependency.
5607 *
5608 * Returns the address of the matching extended capability structure within
5609 * the device's PCI configuration space or 0 if the device does not support
5610 * it.
5611 */
5612static int siov_find_pci_dvsec(struct pci_dev *pdev)
5613{
5614 int pos;
5615 u16 vendor, id;
5616
5617 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5618 while (pos) {
5619 pci_read_config_word(pdev, pos + 4, &vendor);
5620 pci_read_config_word(pdev, pos + 8, &id);
5621 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5622 return pos;
5623
5624 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5625 }
5626
5627 return 0;
5628}
5629
5630static bool
5631intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5632{
5633 if (feat == IOMMU_DEV_FEAT_AUX) {
5634 int ret;
5635
5636 if (!dev_is_pci(dev) || dmar_disabled ||
5637 !scalable_mode_support() || !iommu_pasid_support())
5638 return false;
5639
5640 ret = pci_pasid_features(to_pci_dev(dev));
5641 if (ret < 0)
5642 return false;
5643
5644 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5645 }
5646
5647 return false;
5648}
5649
5650static int
5651intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5652{
5653 if (feat == IOMMU_DEV_FEAT_AUX)
5654 return intel_iommu_enable_auxd(dev);
5655
5656 return -ENODEV;
5657}
5658
5659static int
5660intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5661{
5662 if (feat == IOMMU_DEV_FEAT_AUX)
5663 return intel_iommu_disable_auxd(dev);
5664
5665 return -ENODEV;
5666}
5667
5668static bool
5669intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5670{
5671 struct device_domain_info *info = dev->archdata.iommu;
5672
5673 if (feat == IOMMU_DEV_FEAT_AUX)
5674 return scalable_mode_support() && info && info->auxd_enabled;
5675
5676 return false;
5677}
5678
0e8000f8
LB
5679static int
5680intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5681{
5682 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5683
5684 return dmar_domain->default_pasid > 0 ?
5685 dmar_domain->default_pasid : -EINVAL;
5686}
5687
8af46c78
LB
5688static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5689 struct device *dev)
5690{
5691 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5692}
5693
b0119e87 5694const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5695 .capable = intel_iommu_capable,
5696 .domain_alloc = intel_iommu_domain_alloc,
5697 .domain_free = intel_iommu_domain_free,
5698 .attach_dev = intel_iommu_attach_device,
5699 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5700 .aux_attach_dev = intel_iommu_aux_attach_device,
5701 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5702 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5703 .map = intel_iommu_map,
5704 .unmap = intel_iommu_unmap,
0659b8dc
EA
5705 .iova_to_phys = intel_iommu_iova_to_phys,
5706 .add_device = intel_iommu_add_device,
5707 .remove_device = intel_iommu_remove_device,
5708 .get_resv_regions = intel_iommu_get_resv_regions,
5709 .put_resv_regions = intel_iommu_put_resv_regions,
73bcbdc9 5710 .apply_resv_region = intel_iommu_apply_resv_region,
0659b8dc 5711 .device_group = pci_device_group,
95587a75
LB
5712 .dev_has_feat = intel_iommu_dev_has_feat,
5713 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5714 .dev_enable_feat = intel_iommu_dev_enable_feat,
5715 .dev_disable_feat = intel_iommu_dev_disable_feat,
8af46c78 5716 .is_attach_deferred = intel_iommu_is_attach_deferred,
0659b8dc 5717 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5718};
9af88143 5719
9452618e
DV
5720static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5721{
5722 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5723 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5724 dmar_map_gfx = 0;
5725}
5726
5727DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5728DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5729DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5730DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5731DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5732DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5733DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5734
d34d6517 5735static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5736{
5737 /*
5738 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5739 * but needs it. Same seems to hold for the desktop versions.
9af88143 5740 */
932a6523 5741 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5742 rwbf_quirk = 1;
5743}
5744
5745DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5746DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5747DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5748DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5749DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5750DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5751DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5752
eecfd57f
AJ
5753#define GGC 0x52
5754#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5755#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5756#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5757#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5758#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5759#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5760#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5761#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5762
d34d6517 5763static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5764{
5765 unsigned short ggc;
5766
eecfd57f 5767 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5768 return;
5769
eecfd57f 5770 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5771 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5772 dmar_map_gfx = 0;
6fbcfb3e
DW
5773 } else if (dmar_map_gfx) {
5774 /* we have to ensure the gfx device is idle before we flush */
932a6523 5775 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5776 intel_iommu_strict = 1;
5777 }
9eecabcb
DW
5778}
5779DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5780DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5781DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5782DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5783
e0fc7e0b
DW
5784/* On Tylersburg chipsets, some BIOSes have been known to enable the
5785 ISOCH DMAR unit for the Azalia sound device, but not give it any
5786 TLB entries, which causes it to deadlock. Check for that. We do
5787 this in a function called from init_dmars(), instead of in a PCI
5788 quirk, because we don't want to print the obnoxious "BIOS broken"
5789 message if VT-d is actually disabled.
5790*/
5791static void __init check_tylersburg_isoch(void)
5792{
5793 struct pci_dev *pdev;
5794 uint32_t vtisochctrl;
5795
5796 /* If there's no Azalia in the system anyway, forget it. */
5797 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5798 if (!pdev)
5799 return;
5800 pci_dev_put(pdev);
5801
5802 /* System Management Registers. Might be hidden, in which case
5803 we can't do the sanity check. But that's OK, because the
5804 known-broken BIOSes _don't_ actually hide it, so far. */
5805 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5806 if (!pdev)
5807 return;
5808
5809 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5810 pci_dev_put(pdev);
5811 return;
5812 }
5813
5814 pci_dev_put(pdev);
5815
5816 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5817 if (vtisochctrl & 1)
5818 return;
5819
5820 /* Drop all bits other than the number of TLB entries */
5821 vtisochctrl &= 0x1c;
5822
5823 /* If we have the recommended number of TLB entries (16), fine. */
5824 if (vtisochctrl == 0x10)
5825 return;
5826
5827 /* Zero TLB entries? You get to ride the short bus to school. */
5828 if (!vtisochctrl) {
5829 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5830 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5831 dmi_get_system_info(DMI_BIOS_VENDOR),
5832 dmi_get_system_info(DMI_BIOS_VERSION),
5833 dmi_get_system_info(DMI_PRODUCT_VERSION));
5834 iommu_identity_mapping |= IDENTMAP_AZALIA;
5835 return;
5836 }
9f10e5bf
JR
5837
5838 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5839 vtisochctrl);
5840}