]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Cleanup get_valid_domain_for_dev()
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf 21#define pr_fmt(fmt) "DMAR: " fmt
932a6523 22#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 23
ba395927
KA
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
75f05569 36#include <linux/memory.h>
aa473240 37#include <linux/cpu.h>
5e0d2a6f 38#include <linux/timer.h>
dfddb969 39#include <linux/io.h>
38717946 40#include <linux/iova.h>
5d450806 41#include <linux/iommu.h>
38717946 42#include <linux/intel-iommu.h>
134fac3f 43#include <linux/syscore_ops.h>
69575d38 44#include <linux/tboot.h>
adb2fe02 45#include <linux/dmi.h>
5cdede24 46#include <linux/pci-ats.h>
0ee332c1 47#include <linux/memblock.h>
36746436 48#include <linux/dma-contiguous.h>
fec777c3 49#include <linux/dma-direct.h>
091d42e4 50#include <linux/crash_dump.h>
98fa15f3 51#include <linux/numa.h>
8a8f422d 52#include <asm/irq_remapping.h>
ba395927 53#include <asm/cacheflush.h>
46a7fa27 54#include <asm/iommu.h>
ba395927 55
078e1ee2 56#include "irq_remapping.h"
56283174 57#include "intel-pasid.h"
078e1ee2 58
5b6985ce
FY
59#define ROOT_SIZE VTD_PAGE_SIZE
60#define CONTEXT_SIZE VTD_PAGE_SIZE
61
ba395927 62#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 63#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 64#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 65#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
66
67#define IOAPIC_RANGE_START (0xfee00000)
68#define IOAPIC_RANGE_END (0xfeefffff)
69#define IOVA_START_ADDR (0x1000)
70
5e3b4a15 71#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 72
4ed0d3e6 73#define MAX_AGAW_WIDTH 64
5c645b35 74#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 75
2ebe3151
DW
76#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
77#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
78
79/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
80 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
81#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
82 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
83#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 84
1b722500
RM
85/* IO virtual address start page frame number */
86#define IOVA_START_PFN (1)
87
f27be03b 88#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 89
df08cdc7
AM
90/* page table handling */
91#define LEVEL_STRIDE (9)
92#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
93
6d1c56a9
OBC
94/*
95 * This bitmap is used to advertise the page sizes our hardware support
96 * to the IOMMU core, which will then use this information to split
97 * physically contiguous memory regions it is mapping into page sizes
98 * that we support.
99 *
100 * Traditionally the IOMMU core just handed us the mappings directly,
101 * after making sure the size is an order of a 4KiB page and that the
102 * mapping has natural alignment.
103 *
104 * To retain this behavior, we currently advertise that we support
105 * all page sizes that are an order of 4KiB.
106 *
107 * If at some point we'd like to utilize the IOMMU core's new behavior,
108 * we could change this to advertise the real page sizes we support.
109 */
110#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
111
df08cdc7
AM
112static inline int agaw_to_level(int agaw)
113{
114 return agaw + 2;
115}
116
117static inline int agaw_to_width(int agaw)
118{
5c645b35 119 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
120}
121
122static inline int width_to_agaw(int width)
123{
5c645b35 124 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
125}
126
127static inline unsigned int level_to_offset_bits(int level)
128{
129 return (level - 1) * LEVEL_STRIDE;
130}
131
132static inline int pfn_level_offset(unsigned long pfn, int level)
133{
134 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
135}
136
137static inline unsigned long level_mask(int level)
138{
139 return -1UL << level_to_offset_bits(level);
140}
141
142static inline unsigned long level_size(int level)
143{
144 return 1UL << level_to_offset_bits(level);
145}
146
147static inline unsigned long align_to_level(unsigned long pfn, int level)
148{
149 return (pfn + level_size(level) - 1) & level_mask(level);
150}
fd18de50 151
6dd9a7c7
YS
152static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
153{
5c645b35 154 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
155}
156
dd4e8319
DW
157/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
158 are never going to work. */
159static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
160{
161 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
162}
163
164static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
165{
166 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
167}
168static inline unsigned long page_to_dma_pfn(struct page *pg)
169{
170 return mm_to_dma_pfn(page_to_pfn(pg));
171}
172static inline unsigned long virt_to_dma_pfn(void *p)
173{
174 return page_to_dma_pfn(virt_to_page(p));
175}
176
d9630fe9
WH
177/* global iommu list, set NULL for ignored DMAR units */
178static struct intel_iommu **g_iommus;
179
e0fc7e0b 180static void __init check_tylersburg_isoch(void);
9af88143
DW
181static int rwbf_quirk;
182
b779260b
JC
183/*
184 * set to 1 to panic kernel if can't successfully enable VT-d
185 * (used when kernel is launched w/ TXT)
186 */
187static int force_on = 0;
bfd20f1c 188int intel_iommu_tboot_noforce;
89a6079d 189static int no_platform_optin;
b779260b 190
46b08e1a 191#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 192
091d42e4
JR
193/*
194 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
195 * if marked present.
196 */
197static phys_addr_t root_entry_lctp(struct root_entry *re)
198{
199 if (!(re->lo & 1))
200 return 0;
201
202 return re->lo & VTD_PAGE_MASK;
203}
204
205/*
206 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
207 * if marked present.
208 */
209static phys_addr_t root_entry_uctp(struct root_entry *re)
210{
211 if (!(re->hi & 1))
212 return 0;
46b08e1a 213
091d42e4
JR
214 return re->hi & VTD_PAGE_MASK;
215}
c07e7d21 216
cf484d0e
JR
217static inline void context_clear_pasid_enable(struct context_entry *context)
218{
219 context->lo &= ~(1ULL << 11);
220}
221
222static inline bool context_pasid_enabled(struct context_entry *context)
223{
224 return !!(context->lo & (1ULL << 11));
225}
226
227static inline void context_set_copied(struct context_entry *context)
228{
229 context->hi |= (1ull << 3);
230}
231
232static inline bool context_copied(struct context_entry *context)
233{
234 return !!(context->hi & (1ULL << 3));
235}
236
237static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
238{
239 return (context->lo & 1);
240}
cf484d0e 241
26b86092 242bool context_present(struct context_entry *context)
cf484d0e
JR
243{
244 return context_pasid_enabled(context) ?
245 __context_present(context) :
246 __context_present(context) && !context_copied(context);
247}
248
c07e7d21
MM
249static inline void context_set_present(struct context_entry *context)
250{
251 context->lo |= 1;
252}
253
254static inline void context_set_fault_enable(struct context_entry *context)
255{
256 context->lo &= (((u64)-1) << 2) | 1;
257}
258
c07e7d21
MM
259static inline void context_set_translation_type(struct context_entry *context,
260 unsigned long value)
261{
262 context->lo &= (((u64)-1) << 4) | 3;
263 context->lo |= (value & 3) << 2;
264}
265
266static inline void context_set_address_root(struct context_entry *context,
267 unsigned long value)
268{
1a2262f9 269 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
270 context->lo |= value & VTD_PAGE_MASK;
271}
272
273static inline void context_set_address_width(struct context_entry *context,
274 unsigned long value)
275{
276 context->hi |= value & 7;
277}
278
279static inline void context_set_domain_id(struct context_entry *context,
280 unsigned long value)
281{
282 context->hi |= (value & ((1 << 16) - 1)) << 8;
283}
284
dbcd861f
JR
285static inline int context_domain_id(struct context_entry *c)
286{
287 return((c->hi >> 8) & 0xffff);
288}
289
c07e7d21
MM
290static inline void context_clear_entry(struct context_entry *context)
291{
292 context->lo = 0;
293 context->hi = 0;
294}
7a8fc25e 295
2c2e2c38
FY
296/*
297 * This domain is a statically identity mapping domain.
298 * 1. This domain creats a static 1:1 mapping to all usable memory.
299 * 2. It maps to each iommu if successful.
300 * 3. Each iommu mapps to this domain if successful.
301 */
19943b0e
DW
302static struct dmar_domain *si_domain;
303static int hw_pass_through = 1;
2c2e2c38 304
2c2e2c38 305/* si_domain contains mulitple devices */
fa954e68 306#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
2c2e2c38 307
942067f1
LB
308/*
309 * This is a DMA domain allocated through the iommu domain allocation
310 * interface. But one or more devices belonging to this domain have
311 * been chosen to use a private domain. We should avoid to use the
312 * map/unmap/iova_to_phys APIs on it.
313 */
314#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
315
29a27719
JR
316#define for_each_domain_iommu(idx, domain) \
317 for (idx = 0; idx < g_num_of_iommus; idx++) \
318 if (domain->iommu_refcnt[idx])
319
b94e4117
JL
320struct dmar_rmrr_unit {
321 struct list_head list; /* list of rmrr units */
322 struct acpi_dmar_header *hdr; /* ACPI header */
323 u64 base_address; /* reserved base address*/
324 u64 end_address; /* reserved end address */
832bd858 325 struct dmar_dev_scope *devices; /* target devices */
b94e4117 326 int devices_cnt; /* target device count */
0659b8dc 327 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
328};
329
330struct dmar_atsr_unit {
331 struct list_head list; /* list of ATSR units */
332 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 333 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
334 int devices_cnt; /* target device count */
335 u8 include_all:1; /* include all ports */
336};
337
338static LIST_HEAD(dmar_atsr_units);
339static LIST_HEAD(dmar_rmrr_units);
340
341#define for_each_rmrr_units(rmrr) \
342 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
343
5e0d2a6f 344/* bitmap for indexing intel_iommus */
5e0d2a6f 345static int g_num_of_iommus;
346
92d03cc8 347static void domain_exit(struct dmar_domain *domain);
ba395927 348static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 349static void dmar_remove_one_dev_info(struct device *dev);
127c7615 350static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
351static void domain_context_clear(struct intel_iommu *iommu,
352 struct device *dev);
2a46ddf7
JL
353static int domain_detach_iommu(struct dmar_domain *domain,
354 struct intel_iommu *iommu);
4de354ec 355static bool device_is_rmrr_locked(struct device *dev);
8af46c78
LB
356static int intel_iommu_attach_device(struct iommu_domain *domain,
357 struct device *dev);
ba395927 358
d3f13810 359#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
360int dmar_disabled = 0;
361#else
362int dmar_disabled = 1;
d3f13810 363#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 364
cdd3a249 365int intel_iommu_sm;
8bc1f85c
ED
366int intel_iommu_enabled = 0;
367EXPORT_SYMBOL_GPL(intel_iommu_enabled);
368
2d9e667e 369static int dmar_map_gfx = 1;
7d3b03ce 370static int dmar_forcedac;
5e0d2a6f 371static int intel_iommu_strict;
6dd9a7c7 372static int intel_iommu_superpage = 1;
ae853ddb 373static int iommu_identity_mapping;
c83b2f20 374
ae853ddb
DW
375#define IDENTMAP_ALL 1
376#define IDENTMAP_GFX 2
377#define IDENTMAP_AZALIA 4
c83b2f20 378
c0771df8
DW
379int intel_iommu_gfx_mapped;
380EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
381
ba395927 382#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
8af46c78 383#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
ba395927
KA
384static DEFINE_SPINLOCK(device_domain_lock);
385static LIST_HEAD(device_domain_list);
386
85319dcc
LB
387/*
388 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 389 * callback @fn against each element.
85319dcc
LB
390 */
391int for_each_device_domain(int (*fn)(struct device_domain_info *info,
392 void *data), void *data)
393{
394 int ret = 0;
0bbeb01a 395 unsigned long flags;
85319dcc
LB
396 struct device_domain_info *info;
397
0bbeb01a 398 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
399 list_for_each_entry(info, &device_domain_list, global) {
400 ret = fn(info, data);
0bbeb01a
LB
401 if (ret) {
402 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 403 return ret;
0bbeb01a 404 }
85319dcc 405 }
0bbeb01a 406 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
407
408 return 0;
409}
410
b0119e87 411const struct iommu_ops intel_iommu_ops;
a8bcbb0d 412
4158c2ec
JR
413static bool translation_pre_enabled(struct intel_iommu *iommu)
414{
415 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
416}
417
091d42e4
JR
418static void clear_translation_pre_enabled(struct intel_iommu *iommu)
419{
420 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
421}
422
4158c2ec
JR
423static void init_translation_status(struct intel_iommu *iommu)
424{
425 u32 gsts;
426
427 gsts = readl(iommu->reg + DMAR_GSTS_REG);
428 if (gsts & DMA_GSTS_TES)
429 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
430}
431
00a77deb
JR
432/* Convert generic 'struct iommu_domain to private struct dmar_domain */
433static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
434{
435 return container_of(dom, struct dmar_domain, domain);
436}
437
ba395927
KA
438static int __init intel_iommu_setup(char *str)
439{
440 if (!str)
441 return -EINVAL;
442 while (*str) {
0cd5c3c8
KM
443 if (!strncmp(str, "on", 2)) {
444 dmar_disabled = 0;
9f10e5bf 445 pr_info("IOMMU enabled\n");
0cd5c3c8 446 } else if (!strncmp(str, "off", 3)) {
ba395927 447 dmar_disabled = 1;
89a6079d 448 no_platform_optin = 1;
9f10e5bf 449 pr_info("IOMMU disabled\n");
ba395927
KA
450 } else if (!strncmp(str, "igfx_off", 8)) {
451 dmar_map_gfx = 0;
9f10e5bf 452 pr_info("Disable GFX device mapping\n");
7d3b03ce 453 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 454 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 455 dmar_forcedac = 1;
5e0d2a6f 456 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 457 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 458 intel_iommu_strict = 1;
6dd9a7c7 459 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 460 pr_info("Disable supported super page\n");
6dd9a7c7 461 intel_iommu_superpage = 0;
8950dcd8
LB
462 } else if (!strncmp(str, "sm_on", 5)) {
463 pr_info("Intel-IOMMU: scalable mode supported\n");
464 intel_iommu_sm = 1;
bfd20f1c
SL
465 } else if (!strncmp(str, "tboot_noforce", 13)) {
466 printk(KERN_INFO
467 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
468 intel_iommu_tboot_noforce = 1;
ba395927
KA
469 }
470
471 str += strcspn(str, ",");
472 while (*str == ',')
473 str++;
474 }
475 return 0;
476}
477__setup("intel_iommu=", intel_iommu_setup);
478
479static struct kmem_cache *iommu_domain_cache;
480static struct kmem_cache *iommu_devinfo_cache;
ba395927 481
9452d5bf
JR
482static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
483{
8bf47816
JR
484 struct dmar_domain **domains;
485 int idx = did >> 8;
486
487 domains = iommu->domains[idx];
488 if (!domains)
489 return NULL;
490
491 return domains[did & 0xff];
9452d5bf
JR
492}
493
494static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
495 struct dmar_domain *domain)
496{
8bf47816
JR
497 struct dmar_domain **domains;
498 int idx = did >> 8;
499
500 if (!iommu->domains[idx]) {
501 size_t size = 256 * sizeof(struct dmar_domain *);
502 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
503 }
504
505 domains = iommu->domains[idx];
506 if (WARN_ON(!domains))
507 return;
508 else
509 domains[did & 0xff] = domain;
9452d5bf
JR
510}
511
9ddbfb42 512void *alloc_pgtable_page(int node)
eb3fa7cb 513{
4c923d47
SS
514 struct page *page;
515 void *vaddr = NULL;
eb3fa7cb 516
4c923d47
SS
517 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
518 if (page)
519 vaddr = page_address(page);
eb3fa7cb 520 return vaddr;
ba395927
KA
521}
522
9ddbfb42 523void free_pgtable_page(void *vaddr)
ba395927
KA
524{
525 free_page((unsigned long)vaddr);
526}
527
528static inline void *alloc_domain_mem(void)
529{
354bb65e 530 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
531}
532
38717946 533static void free_domain_mem(void *vaddr)
ba395927
KA
534{
535 kmem_cache_free(iommu_domain_cache, vaddr);
536}
537
538static inline void * alloc_devinfo_mem(void)
539{
354bb65e 540 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
541}
542
543static inline void free_devinfo_mem(void *vaddr)
544{
545 kmem_cache_free(iommu_devinfo_cache, vaddr);
546}
547
28ccce0d
JR
548static inline int domain_type_is_si(struct dmar_domain *domain)
549{
550 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
551}
552
162d1b10
JL
553static inline int domain_pfn_supported(struct dmar_domain *domain,
554 unsigned long pfn)
555{
556 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
557
558 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
559}
560
4ed0d3e6 561static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
562{
563 unsigned long sagaw;
564 int agaw = -1;
565
566 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 567 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
568 agaw >= 0; agaw--) {
569 if (test_bit(agaw, &sagaw))
570 break;
571 }
572
573 return agaw;
574}
575
4ed0d3e6
FY
576/*
577 * Calculate max SAGAW for each iommu.
578 */
579int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
580{
581 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
582}
583
584/*
585 * calculate agaw for each iommu.
586 * "SAGAW" may be different across iommus, use a default agaw, and
587 * get a supported less agaw for iommus that don't support the default agaw.
588 */
589int iommu_calculate_agaw(struct intel_iommu *iommu)
590{
591 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
592}
593
2c2e2c38 594/* This functionin only returns single iommu in a domain */
9ddbfb42 595struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
596{
597 int iommu_id;
598
2c2e2c38 599 /* si_domain and vm domain should not get here. */
fa954e68
LB
600 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
601 return NULL;
602
29a27719
JR
603 for_each_domain_iommu(iommu_id, domain)
604 break;
605
8c11e798
WH
606 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
607 return NULL;
608
609 return g_iommus[iommu_id];
610}
611
8e604097
WH
612static void domain_update_iommu_coherency(struct dmar_domain *domain)
613{
d0501960
DW
614 struct dmar_drhd_unit *drhd;
615 struct intel_iommu *iommu;
2f119c78
QL
616 bool found = false;
617 int i;
2e12bc29 618
d0501960 619 domain->iommu_coherency = 1;
8e604097 620
29a27719 621 for_each_domain_iommu(i, domain) {
2f119c78 622 found = true;
8e604097
WH
623 if (!ecap_coherent(g_iommus[i]->ecap)) {
624 domain->iommu_coherency = 0;
625 break;
626 }
8e604097 627 }
d0501960
DW
628 if (found)
629 return;
630
631 /* No hardware attached; use lowest common denominator */
632 rcu_read_lock();
633 for_each_active_iommu(iommu, drhd) {
634 if (!ecap_coherent(iommu->ecap)) {
635 domain->iommu_coherency = 0;
636 break;
637 }
638 }
639 rcu_read_unlock();
8e604097
WH
640}
641
161f6934 642static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 643{
161f6934
JL
644 struct dmar_drhd_unit *drhd;
645 struct intel_iommu *iommu;
646 int ret = 1;
58c610bd 647
161f6934
JL
648 rcu_read_lock();
649 for_each_active_iommu(iommu, drhd) {
650 if (iommu != skip) {
651 if (!ecap_sc_support(iommu->ecap)) {
652 ret = 0;
653 break;
654 }
58c610bd 655 }
58c610bd 656 }
161f6934
JL
657 rcu_read_unlock();
658
659 return ret;
58c610bd
SY
660}
661
161f6934 662static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 663{
8140a95d 664 struct dmar_drhd_unit *drhd;
161f6934 665 struct intel_iommu *iommu;
8140a95d 666 int mask = 0xf;
6dd9a7c7
YS
667
668 if (!intel_iommu_superpage) {
161f6934 669 return 0;
6dd9a7c7
YS
670 }
671
8140a95d 672 /* set iommu_superpage to the smallest common denominator */
0e242612 673 rcu_read_lock();
8140a95d 674 for_each_active_iommu(iommu, drhd) {
161f6934
JL
675 if (iommu != skip) {
676 mask &= cap_super_page_val(iommu->cap);
677 if (!mask)
678 break;
6dd9a7c7
YS
679 }
680 }
0e242612
JL
681 rcu_read_unlock();
682
161f6934 683 return fls(mask);
6dd9a7c7
YS
684}
685
58c610bd
SY
686/* Some capabilities may be different across iommus */
687static void domain_update_iommu_cap(struct dmar_domain *domain)
688{
689 domain_update_iommu_coherency(domain);
161f6934
JL
690 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
691 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
692}
693
26b86092
SM
694struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
695 u8 devfn, int alloc)
03ecc32c
DW
696{
697 struct root_entry *root = &iommu->root_entry[bus];
698 struct context_entry *context;
699 u64 *entry;
700
4df4eab1 701 entry = &root->lo;
765b6a98 702 if (sm_supported(iommu)) {
03ecc32c
DW
703 if (devfn >= 0x80) {
704 devfn -= 0x80;
705 entry = &root->hi;
706 }
707 devfn *= 2;
708 }
03ecc32c
DW
709 if (*entry & 1)
710 context = phys_to_virt(*entry & VTD_PAGE_MASK);
711 else {
712 unsigned long phy_addr;
713 if (!alloc)
714 return NULL;
715
716 context = alloc_pgtable_page(iommu->node);
717 if (!context)
718 return NULL;
719
720 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
721 phy_addr = virt_to_phys((void *)context);
722 *entry = phy_addr | 1;
723 __iommu_flush_cache(iommu, entry, sizeof(*entry));
724 }
725 return &context[devfn];
726}
727
4ed6a540
DW
728static int iommu_dummy(struct device *dev)
729{
730 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
731}
732
156baca8 733static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
734{
735 struct dmar_drhd_unit *drhd = NULL;
b683b230 736 struct intel_iommu *iommu;
156baca8
DW
737 struct device *tmp;
738 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 739 u16 segment = 0;
c7151a8d
WH
740 int i;
741
4ed6a540
DW
742 if (iommu_dummy(dev))
743 return NULL;
744
156baca8 745 if (dev_is_pci(dev)) {
1c387188
AR
746 struct pci_dev *pf_pdev;
747
156baca8 748 pdev = to_pci_dev(dev);
5823e330
JD
749
750#ifdef CONFIG_X86
751 /* VMD child devices currently cannot be handled individually */
752 if (is_vmd(pdev->bus))
753 return NULL;
754#endif
755
1c387188
AR
756 /* VFs aren't listed in scope tables; we need to look up
757 * the PF instead to find the IOMMU. */
758 pf_pdev = pci_physfn(pdev);
759 dev = &pf_pdev->dev;
156baca8 760 segment = pci_domain_nr(pdev->bus);
ca5b74d2 761 } else if (has_acpi_companion(dev))
156baca8
DW
762 dev = &ACPI_COMPANION(dev)->dev;
763
0e242612 764 rcu_read_lock();
b683b230 765 for_each_active_iommu(iommu, drhd) {
156baca8 766 if (pdev && segment != drhd->segment)
276dbf99 767 continue;
c7151a8d 768
b683b230 769 for_each_active_dev_scope(drhd->devices,
156baca8
DW
770 drhd->devices_cnt, i, tmp) {
771 if (tmp == dev) {
1c387188
AR
772 /* For a VF use its original BDF# not that of the PF
773 * which we used for the IOMMU lookup. Strictly speaking
774 * we could do this for all PCI devices; we only need to
775 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 776 if (pdev && pdev->is_virtfn)
1c387188
AR
777 goto got_pdev;
778
156baca8
DW
779 *bus = drhd->devices[i].bus;
780 *devfn = drhd->devices[i].devfn;
b683b230 781 goto out;
156baca8
DW
782 }
783
784 if (!pdev || !dev_is_pci(tmp))
785 continue;
786
787 ptmp = to_pci_dev(tmp);
788 if (ptmp->subordinate &&
789 ptmp->subordinate->number <= pdev->bus->number &&
790 ptmp->subordinate->busn_res.end >= pdev->bus->number)
791 goto got_pdev;
924b6231 792 }
c7151a8d 793
156baca8
DW
794 if (pdev && drhd->include_all) {
795 got_pdev:
796 *bus = pdev->bus->number;
797 *devfn = pdev->devfn;
b683b230 798 goto out;
156baca8 799 }
c7151a8d 800 }
b683b230 801 iommu = NULL;
156baca8 802 out:
0e242612 803 rcu_read_unlock();
c7151a8d 804
b683b230 805 return iommu;
c7151a8d
WH
806}
807
5331fe6f
WH
808static void domain_flush_cache(struct dmar_domain *domain,
809 void *addr, int size)
810{
811 if (!domain->iommu_coherency)
812 clflush_cache_range(addr, size);
813}
814
ba395927
KA
815static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
816{
ba395927 817 struct context_entry *context;
03ecc32c 818 int ret = 0;
ba395927
KA
819 unsigned long flags;
820
821 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
822 context = iommu_context_addr(iommu, bus, devfn, 0);
823 if (context)
824 ret = context_present(context);
ba395927
KA
825 spin_unlock_irqrestore(&iommu->lock, flags);
826 return ret;
827}
828
ba395927
KA
829static void free_context_table(struct intel_iommu *iommu)
830{
ba395927
KA
831 int i;
832 unsigned long flags;
833 struct context_entry *context;
834
835 spin_lock_irqsave(&iommu->lock, flags);
836 if (!iommu->root_entry) {
837 goto out;
838 }
839 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 840 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
841 if (context)
842 free_pgtable_page(context);
03ecc32c 843
765b6a98 844 if (!sm_supported(iommu))
03ecc32c
DW
845 continue;
846
847 context = iommu_context_addr(iommu, i, 0x80, 0);
848 if (context)
849 free_pgtable_page(context);
850
ba395927
KA
851 }
852 free_pgtable_page(iommu->root_entry);
853 iommu->root_entry = NULL;
854out:
855 spin_unlock_irqrestore(&iommu->lock, flags);
856}
857
b026fd28 858static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 859 unsigned long pfn, int *target_level)
ba395927 860{
e083ea5b 861 struct dma_pte *parent, *pte;
ba395927 862 int level = agaw_to_level(domain->agaw);
4399c8bf 863 int offset;
ba395927
KA
864
865 BUG_ON(!domain->pgd);
f9423606 866
162d1b10 867 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
868 /* Address beyond IOMMU's addressing capabilities. */
869 return NULL;
870
ba395927
KA
871 parent = domain->pgd;
872
5cf0a76f 873 while (1) {
ba395927
KA
874 void *tmp_page;
875
b026fd28 876 offset = pfn_level_offset(pfn, level);
ba395927 877 pte = &parent[offset];
5cf0a76f 878 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 879 break;
5cf0a76f 880 if (level == *target_level)
ba395927
KA
881 break;
882
19c239ce 883 if (!dma_pte_present(pte)) {
c85994e4
DW
884 uint64_t pteval;
885
4c923d47 886 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 887
206a73c1 888 if (!tmp_page)
ba395927 889 return NULL;
206a73c1 890
c85994e4 891 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 892 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 893 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
894 /* Someone else set it while we were thinking; use theirs. */
895 free_pgtable_page(tmp_page);
effad4b5 896 else
c85994e4 897 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 898 }
5cf0a76f
DW
899 if (level == 1)
900 break;
901
19c239ce 902 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
903 level--;
904 }
905
5cf0a76f
DW
906 if (!*target_level)
907 *target_level = level;
908
ba395927
KA
909 return pte;
910}
911
6dd9a7c7 912
ba395927 913/* return address's pte at specific level */
90dcfb5e
DW
914static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
915 unsigned long pfn,
6dd9a7c7 916 int level, int *large_page)
ba395927 917{
e083ea5b 918 struct dma_pte *parent, *pte;
ba395927
KA
919 int total = agaw_to_level(domain->agaw);
920 int offset;
921
922 parent = domain->pgd;
923 while (level <= total) {
90dcfb5e 924 offset = pfn_level_offset(pfn, total);
ba395927
KA
925 pte = &parent[offset];
926 if (level == total)
927 return pte;
928
6dd9a7c7
YS
929 if (!dma_pte_present(pte)) {
930 *large_page = total;
ba395927 931 break;
6dd9a7c7
YS
932 }
933
e16922af 934 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
935 *large_page = total;
936 return pte;
937 }
938
19c239ce 939 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
940 total--;
941 }
942 return NULL;
943}
944
ba395927 945/* clear last level pte, a tlb flush should be followed */
5cf0a76f 946static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
947 unsigned long start_pfn,
948 unsigned long last_pfn)
ba395927 949{
e083ea5b 950 unsigned int large_page;
310a5ab9 951 struct dma_pte *first_pte, *pte;
66eae846 952
162d1b10
JL
953 BUG_ON(!domain_pfn_supported(domain, start_pfn));
954 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 955 BUG_ON(start_pfn > last_pfn);
ba395927 956
04b18e65 957 /* we don't need lock here; nobody else touches the iova range */
59c36286 958 do {
6dd9a7c7
YS
959 large_page = 1;
960 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 961 if (!pte) {
6dd9a7c7 962 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
963 continue;
964 }
6dd9a7c7 965 do {
310a5ab9 966 dma_clear_pte(pte);
6dd9a7c7 967 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 968 pte++;
75e6bf96
DW
969 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
970
310a5ab9
DW
971 domain_flush_cache(domain, first_pte,
972 (void *)pte - (void *)first_pte);
59c36286
DW
973
974 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
975}
976
3269ee0b 977static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
978 int retain_level, struct dma_pte *pte,
979 unsigned long pfn, unsigned long start_pfn,
980 unsigned long last_pfn)
3269ee0b
AW
981{
982 pfn = max(start_pfn, pfn);
983 pte = &pte[pfn_level_offset(pfn, level)];
984
985 do {
986 unsigned long level_pfn;
987 struct dma_pte *level_pte;
988
989 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
990 goto next;
991
f7116e11 992 level_pfn = pfn & level_mask(level);
3269ee0b
AW
993 level_pte = phys_to_virt(dma_pte_addr(pte));
994
bc24c571
DD
995 if (level > 2) {
996 dma_pte_free_level(domain, level - 1, retain_level,
997 level_pte, level_pfn, start_pfn,
998 last_pfn);
999 }
3269ee0b 1000
bc24c571
DD
1001 /*
1002 * Free the page table if we're below the level we want to
1003 * retain and the range covers the entire table.
1004 */
1005 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1006 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1007 dma_clear_pte(pte);
1008 domain_flush_cache(domain, pte, sizeof(*pte));
1009 free_pgtable_page(level_pte);
1010 }
1011next:
1012 pfn += level_size(level);
1013 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1014}
1015
bc24c571
DD
1016/*
1017 * clear last level (leaf) ptes and free page table pages below the
1018 * level we wish to keep intact.
1019 */
ba395927 1020static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1021 unsigned long start_pfn,
bc24c571
DD
1022 unsigned long last_pfn,
1023 int retain_level)
ba395927 1024{
162d1b10
JL
1025 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1026 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1027 BUG_ON(start_pfn > last_pfn);
ba395927 1028
d41a4adb
JL
1029 dma_pte_clear_range(domain, start_pfn, last_pfn);
1030
f3a0a52f 1031 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1032 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1033 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1034
ba395927 1035 /* free pgd */
d794dc9b 1036 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1037 free_pgtable_page(domain->pgd);
1038 domain->pgd = NULL;
1039 }
1040}
1041
ea8ea460
DW
1042/* When a page at a given level is being unlinked from its parent, we don't
1043 need to *modify* it at all. All we need to do is make a list of all the
1044 pages which can be freed just as soon as we've flushed the IOTLB and we
1045 know the hardware page-walk will no longer touch them.
1046 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1047 be freed. */
1048static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1049 int level, struct dma_pte *pte,
1050 struct page *freelist)
1051{
1052 struct page *pg;
1053
1054 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1055 pg->freelist = freelist;
1056 freelist = pg;
1057
1058 if (level == 1)
1059 return freelist;
1060
adeb2590
JL
1061 pte = page_address(pg);
1062 do {
ea8ea460
DW
1063 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1064 freelist = dma_pte_list_pagetables(domain, level - 1,
1065 pte, freelist);
adeb2590
JL
1066 pte++;
1067 } while (!first_pte_in_page(pte));
ea8ea460
DW
1068
1069 return freelist;
1070}
1071
1072static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1073 struct dma_pte *pte, unsigned long pfn,
1074 unsigned long start_pfn,
1075 unsigned long last_pfn,
1076 struct page *freelist)
1077{
1078 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1079
1080 pfn = max(start_pfn, pfn);
1081 pte = &pte[pfn_level_offset(pfn, level)];
1082
1083 do {
1084 unsigned long level_pfn;
1085
1086 if (!dma_pte_present(pte))
1087 goto next;
1088
1089 level_pfn = pfn & level_mask(level);
1090
1091 /* If range covers entire pagetable, free it */
1092 if (start_pfn <= level_pfn &&
1093 last_pfn >= level_pfn + level_size(level) - 1) {
1094 /* These suborbinate page tables are going away entirely. Don't
1095 bother to clear them; we're just going to *free* them. */
1096 if (level > 1 && !dma_pte_superpage(pte))
1097 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1098
1099 dma_clear_pte(pte);
1100 if (!first_pte)
1101 first_pte = pte;
1102 last_pte = pte;
1103 } else if (level > 1) {
1104 /* Recurse down into a level that isn't *entirely* obsolete */
1105 freelist = dma_pte_clear_level(domain, level - 1,
1106 phys_to_virt(dma_pte_addr(pte)),
1107 level_pfn, start_pfn, last_pfn,
1108 freelist);
1109 }
1110next:
1111 pfn += level_size(level);
1112 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1113
1114 if (first_pte)
1115 domain_flush_cache(domain, first_pte,
1116 (void *)++last_pte - (void *)first_pte);
1117
1118 return freelist;
1119}
1120
1121/* We can't just free the pages because the IOMMU may still be walking
1122 the page tables, and may have cached the intermediate levels. The
1123 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1124static struct page *domain_unmap(struct dmar_domain *domain,
1125 unsigned long start_pfn,
1126 unsigned long last_pfn)
ea8ea460 1127{
e083ea5b 1128 struct page *freelist;
ea8ea460 1129
162d1b10
JL
1130 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1131 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1132 BUG_ON(start_pfn > last_pfn);
1133
1134 /* we don't need lock here; nobody else touches the iova range */
1135 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1136 domain->pgd, 0, start_pfn, last_pfn, NULL);
1137
1138 /* free pgd */
1139 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1140 struct page *pgd_page = virt_to_page(domain->pgd);
1141 pgd_page->freelist = freelist;
1142 freelist = pgd_page;
1143
1144 domain->pgd = NULL;
1145 }
1146
1147 return freelist;
1148}
1149
b690420a 1150static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1151{
1152 struct page *pg;
1153
1154 while ((pg = freelist)) {
1155 freelist = pg->freelist;
1156 free_pgtable_page(page_address(pg));
1157 }
1158}
1159
13cf0174
JR
1160static void iova_entry_free(unsigned long data)
1161{
1162 struct page *freelist = (struct page *)data;
1163
1164 dma_free_pagelist(freelist);
1165}
1166
ba395927
KA
1167/* iommu handling */
1168static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1169{
1170 struct root_entry *root;
1171 unsigned long flags;
1172
4c923d47 1173 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1174 if (!root) {
9f10e5bf 1175 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1176 iommu->name);
ba395927 1177 return -ENOMEM;
ffebeb46 1178 }
ba395927 1179
5b6985ce 1180 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1181
1182 spin_lock_irqsave(&iommu->lock, flags);
1183 iommu->root_entry = root;
1184 spin_unlock_irqrestore(&iommu->lock, flags);
1185
1186 return 0;
1187}
1188
ba395927
KA
1189static void iommu_set_root_entry(struct intel_iommu *iommu)
1190{
03ecc32c 1191 u64 addr;
c416daa9 1192 u32 sts;
ba395927
KA
1193 unsigned long flag;
1194
03ecc32c 1195 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1196 if (sm_supported(iommu))
1197 addr |= DMA_RTADDR_SMT;
ba395927 1198
1f5b3c3f 1199 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1200 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1201
c416daa9 1202 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1203
1204 /* Make sure hardware complete it */
1205 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1206 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1207
1f5b3c3f 1208 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1209}
1210
6f7db75e 1211void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1212{
1213 u32 val;
1214 unsigned long flag;
1215
9af88143 1216 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1217 return;
ba395927 1218
1f5b3c3f 1219 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1220 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1221
1222 /* Make sure hardware complete it */
1223 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1224 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1225
1f5b3c3f 1226 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1227}
1228
1229/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1230static void __iommu_flush_context(struct intel_iommu *iommu,
1231 u16 did, u16 source_id, u8 function_mask,
1232 u64 type)
ba395927
KA
1233{
1234 u64 val = 0;
1235 unsigned long flag;
1236
ba395927
KA
1237 switch (type) {
1238 case DMA_CCMD_GLOBAL_INVL:
1239 val = DMA_CCMD_GLOBAL_INVL;
1240 break;
1241 case DMA_CCMD_DOMAIN_INVL:
1242 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1243 break;
1244 case DMA_CCMD_DEVICE_INVL:
1245 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1246 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1247 break;
1248 default:
1249 BUG();
1250 }
1251 val |= DMA_CCMD_ICC;
1252
1f5b3c3f 1253 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1254 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1255
1256 /* Make sure hardware complete it */
1257 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1258 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1259
1f5b3c3f 1260 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1261}
1262
ba395927 1263/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1264static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1265 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1266{
1267 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1268 u64 val = 0, val_iva = 0;
1269 unsigned long flag;
1270
ba395927
KA
1271 switch (type) {
1272 case DMA_TLB_GLOBAL_FLUSH:
1273 /* global flush doesn't need set IVA_REG */
1274 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1275 break;
1276 case DMA_TLB_DSI_FLUSH:
1277 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1278 break;
1279 case DMA_TLB_PSI_FLUSH:
1280 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1281 /* IH bit is passed in as part of address */
ba395927
KA
1282 val_iva = size_order | addr;
1283 break;
1284 default:
1285 BUG();
1286 }
1287 /* Note: set drain read/write */
1288#if 0
1289 /*
1290 * This is probably to be super secure.. Looks like we can
1291 * ignore it without any impact.
1292 */
1293 if (cap_read_drain(iommu->cap))
1294 val |= DMA_TLB_READ_DRAIN;
1295#endif
1296 if (cap_write_drain(iommu->cap))
1297 val |= DMA_TLB_WRITE_DRAIN;
1298
1f5b3c3f 1299 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1300 /* Note: Only uses first TLB reg currently */
1301 if (val_iva)
1302 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1303 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1304
1305 /* Make sure hardware complete it */
1306 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1307 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1308
1f5b3c3f 1309 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1310
1311 /* check IOTLB invalidation granularity */
1312 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1313 pr_err("Flush IOTLB failed\n");
ba395927 1314 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1315 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1316 (unsigned long long)DMA_TLB_IIRG(type),
1317 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1318}
1319
64ae892b
DW
1320static struct device_domain_info *
1321iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1322 u8 bus, u8 devfn)
93a23a72 1323{
93a23a72 1324 struct device_domain_info *info;
93a23a72 1325
55d94043
JR
1326 assert_spin_locked(&device_domain_lock);
1327
93a23a72
YZ
1328 if (!iommu->qi)
1329 return NULL;
1330
93a23a72 1331 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1332 if (info->iommu == iommu && info->bus == bus &&
1333 info->devfn == devfn) {
b16d0cb9
DW
1334 if (info->ats_supported && info->dev)
1335 return info;
93a23a72
YZ
1336 break;
1337 }
93a23a72 1338
b16d0cb9 1339 return NULL;
93a23a72
YZ
1340}
1341
0824c592
OP
1342static void domain_update_iotlb(struct dmar_domain *domain)
1343{
1344 struct device_domain_info *info;
1345 bool has_iotlb_device = false;
1346
1347 assert_spin_locked(&device_domain_lock);
1348
1349 list_for_each_entry(info, &domain->devices, link) {
1350 struct pci_dev *pdev;
1351
1352 if (!info->dev || !dev_is_pci(info->dev))
1353 continue;
1354
1355 pdev = to_pci_dev(info->dev);
1356 if (pdev->ats_enabled) {
1357 has_iotlb_device = true;
1358 break;
1359 }
1360 }
1361
1362 domain->has_iotlb_device = has_iotlb_device;
1363}
1364
93a23a72 1365static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1366{
fb0cc3aa
BH
1367 struct pci_dev *pdev;
1368
0824c592
OP
1369 assert_spin_locked(&device_domain_lock);
1370
0bcb3e28 1371 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1372 return;
1373
fb0cc3aa 1374 pdev = to_pci_dev(info->dev);
1c48db44
JP
1375 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1376 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1377 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1378 * reserved, which should be set to 0.
1379 */
1380 if (!ecap_dit(info->iommu->ecap))
1381 info->pfsid = 0;
1382 else {
1383 struct pci_dev *pf_pdev;
1384
1385 /* pdev will be returned if device is not a vf */
1386 pf_pdev = pci_physfn(pdev);
cc49baa9 1387 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1388 }
fb0cc3aa 1389
b16d0cb9
DW
1390#ifdef CONFIG_INTEL_IOMMU_SVM
1391 /* The PCIe spec, in its wisdom, declares that the behaviour of
1392 the device if you enable PASID support after ATS support is
1393 undefined. So always enable PASID support on devices which
1394 have it, even if we can't yet know if we're ever going to
1395 use it. */
1396 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1397 info->pasid_enabled = 1;
1398
1b84778a
KS
1399 if (info->pri_supported &&
1400 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1401 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1402 info->pri_enabled = 1;
1403#endif
fb58fdcd 1404 if (!pdev->untrusted && info->ats_supported &&
61363c14 1405 pci_ats_page_aligned(pdev) &&
fb58fdcd 1406 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1407 info->ats_enabled = 1;
0824c592 1408 domain_update_iotlb(info->domain);
b16d0cb9
DW
1409 info->ats_qdep = pci_ats_queue_depth(pdev);
1410 }
93a23a72
YZ
1411}
1412
1413static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1414{
b16d0cb9
DW
1415 struct pci_dev *pdev;
1416
0824c592
OP
1417 assert_spin_locked(&device_domain_lock);
1418
da972fb1 1419 if (!dev_is_pci(info->dev))
93a23a72
YZ
1420 return;
1421
b16d0cb9
DW
1422 pdev = to_pci_dev(info->dev);
1423
1424 if (info->ats_enabled) {
1425 pci_disable_ats(pdev);
1426 info->ats_enabled = 0;
0824c592 1427 domain_update_iotlb(info->domain);
b16d0cb9
DW
1428 }
1429#ifdef CONFIG_INTEL_IOMMU_SVM
1430 if (info->pri_enabled) {
1431 pci_disable_pri(pdev);
1432 info->pri_enabled = 0;
1433 }
1434 if (info->pasid_enabled) {
1435 pci_disable_pasid(pdev);
1436 info->pasid_enabled = 0;
1437 }
1438#endif
93a23a72
YZ
1439}
1440
1441static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1442 u64 addr, unsigned mask)
1443{
1444 u16 sid, qdep;
1445 unsigned long flags;
1446 struct device_domain_info *info;
1447
0824c592
OP
1448 if (!domain->has_iotlb_device)
1449 return;
1450
93a23a72
YZ
1451 spin_lock_irqsave(&device_domain_lock, flags);
1452 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1453 if (!info->ats_enabled)
93a23a72
YZ
1454 continue;
1455
1456 sid = info->bus << 8 | info->devfn;
b16d0cb9 1457 qdep = info->ats_qdep;
1c48db44
JP
1458 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1459 qdep, addr, mask);
93a23a72
YZ
1460 }
1461 spin_unlock_irqrestore(&device_domain_lock, flags);
1462}
1463
a1ddcbe9
JR
1464static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1465 struct dmar_domain *domain,
1466 unsigned long pfn, unsigned int pages,
1467 int ih, int map)
ba395927 1468{
9dd2fe89 1469 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1470 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1471 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1472
ba395927
KA
1473 BUG_ON(pages == 0);
1474
ea8ea460
DW
1475 if (ih)
1476 ih = 1 << 6;
ba395927 1477 /*
9dd2fe89
YZ
1478 * Fallback to domain selective flush if no PSI support or the size is
1479 * too big.
ba395927
KA
1480 * PSI requires page size to be 2 ^ x, and the base address is naturally
1481 * aligned to the size
1482 */
9dd2fe89
YZ
1483 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1484 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1485 DMA_TLB_DSI_FLUSH);
9dd2fe89 1486 else
ea8ea460 1487 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1488 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1489
1490 /*
82653633
NA
1491 * In caching mode, changes of pages from non-present to present require
1492 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1493 */
82653633 1494 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1495 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1496}
1497
eed91a0b
PX
1498/* Notification for newly created mappings */
1499static inline void __mapping_notify_one(struct intel_iommu *iommu,
1500 struct dmar_domain *domain,
1501 unsigned long pfn, unsigned int pages)
1502{
1503 /* It's a non-present to present mapping. Only flush if caching mode */
1504 if (cap_caching_mode(iommu->cap))
1505 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1506 else
1507 iommu_flush_write_buffer(iommu);
1508}
1509
13cf0174
JR
1510static void iommu_flush_iova(struct iova_domain *iovad)
1511{
1512 struct dmar_domain *domain;
1513 int idx;
1514
1515 domain = container_of(iovad, struct dmar_domain, iovad);
1516
1517 for_each_domain_iommu(idx, domain) {
1518 struct intel_iommu *iommu = g_iommus[idx];
1519 u16 did = domain->iommu_did[iommu->seq_id];
1520
1521 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1522
1523 if (!cap_caching_mode(iommu->cap))
1524 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1525 0, MAX_AGAW_PFN_WIDTH);
1526 }
1527}
1528
f8bab735 1529static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1530{
1531 u32 pmen;
1532 unsigned long flags;
1533
5bb71fc7
LB
1534 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1535 return;
1536
1f5b3c3f 1537 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1538 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1539 pmen &= ~DMA_PMEN_EPM;
1540 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1541
1542 /* wait for the protected region status bit to clear */
1543 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1544 readl, !(pmen & DMA_PMEN_PRS), pmen);
1545
1f5b3c3f 1546 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1547}
1548
2a41ccee 1549static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1550{
1551 u32 sts;
1552 unsigned long flags;
1553
1f5b3c3f 1554 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1555 iommu->gcmd |= DMA_GCMD_TE;
1556 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1557
1558 /* Make sure hardware complete it */
1559 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1560 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1561
1f5b3c3f 1562 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1563}
1564
2a41ccee 1565static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1566{
1567 u32 sts;
1568 unsigned long flag;
1569
1f5b3c3f 1570 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1571 iommu->gcmd &= ~DMA_GCMD_TE;
1572 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1573
1574 /* Make sure hardware complete it */
1575 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1576 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1577
1f5b3c3f 1578 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1579}
1580
3460a6d9 1581
ba395927
KA
1582static int iommu_init_domains(struct intel_iommu *iommu)
1583{
8bf47816
JR
1584 u32 ndomains, nlongs;
1585 size_t size;
ba395927
KA
1586
1587 ndomains = cap_ndoms(iommu->cap);
8bf47816 1588 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1589 iommu->name, ndomains);
ba395927
KA
1590 nlongs = BITS_TO_LONGS(ndomains);
1591
94a91b50
DD
1592 spin_lock_init(&iommu->lock);
1593
ba395927
KA
1594 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1595 if (!iommu->domain_ids) {
9f10e5bf
JR
1596 pr_err("%s: Allocating domain id array failed\n",
1597 iommu->name);
ba395927
KA
1598 return -ENOMEM;
1599 }
8bf47816 1600
86f004c7 1601 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1602 iommu->domains = kzalloc(size, GFP_KERNEL);
1603
1604 if (iommu->domains) {
1605 size = 256 * sizeof(struct dmar_domain *);
1606 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1607 }
1608
1609 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1610 pr_err("%s: Allocating domain array failed\n",
1611 iommu->name);
852bdb04 1612 kfree(iommu->domain_ids);
8bf47816 1613 kfree(iommu->domains);
852bdb04 1614 iommu->domain_ids = NULL;
8bf47816 1615 iommu->domains = NULL;
ba395927
KA
1616 return -ENOMEM;
1617 }
1618
8bf47816
JR
1619
1620
ba395927 1621 /*
c0e8a6c8
JR
1622 * If Caching mode is set, then invalid translations are tagged
1623 * with domain-id 0, hence we need to pre-allocate it. We also
1624 * use domain-id 0 as a marker for non-allocated domain-id, so
1625 * make sure it is not used for a real domain.
ba395927 1626 */
c0e8a6c8
JR
1627 set_bit(0, iommu->domain_ids);
1628
3b33d4ab
LB
1629 /*
1630 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1631 * entry for first-level or pass-through translation modes should
1632 * be programmed with a domain id different from those used for
1633 * second-level or nested translation. We reserve a domain id for
1634 * this purpose.
1635 */
1636 if (sm_supported(iommu))
1637 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1638
ba395927
KA
1639 return 0;
1640}
ba395927 1641
ffebeb46 1642static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1643{
29a27719 1644 struct device_domain_info *info, *tmp;
55d94043 1645 unsigned long flags;
ba395927 1646
29a27719
JR
1647 if (!iommu->domains || !iommu->domain_ids)
1648 return;
a4eaa86c 1649
55d94043 1650 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1651 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1652 struct dmar_domain *domain;
1653
1654 if (info->iommu != iommu)
1655 continue;
1656
1657 if (!info->dev || !info->domain)
1658 continue;
1659
1660 domain = info->domain;
1661
bea64033 1662 __dmar_remove_one_dev_info(info);
ba395927 1663 }
55d94043 1664 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1665
1666 if (iommu->gcmd & DMA_GCMD_TE)
1667 iommu_disable_translation(iommu);
ffebeb46 1668}
ba395927 1669
ffebeb46
JL
1670static void free_dmar_iommu(struct intel_iommu *iommu)
1671{
1672 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1673 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1674 int i;
1675
1676 for (i = 0; i < elems; i++)
1677 kfree(iommu->domains[i]);
ffebeb46
JL
1678 kfree(iommu->domains);
1679 kfree(iommu->domain_ids);
1680 iommu->domains = NULL;
1681 iommu->domain_ids = NULL;
1682 }
ba395927 1683
d9630fe9
WH
1684 g_iommus[iommu->seq_id] = NULL;
1685
ba395927
KA
1686 /* free context mapping */
1687 free_context_table(iommu);
8a94ade4
DW
1688
1689#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1690 if (pasid_supported(iommu)) {
a222a7f0
DW
1691 if (ecap_prs(iommu->ecap))
1692 intel_svm_finish_prq(iommu);
a222a7f0 1693 }
8a94ade4 1694#endif
ba395927
KA
1695}
1696
ab8dfe25 1697static struct dmar_domain *alloc_domain(int flags)
ba395927 1698{
ba395927 1699 struct dmar_domain *domain;
ba395927
KA
1700
1701 domain = alloc_domain_mem();
1702 if (!domain)
1703 return NULL;
1704
ab8dfe25 1705 memset(domain, 0, sizeof(*domain));
98fa15f3 1706 domain->nid = NUMA_NO_NODE;
ab8dfe25 1707 domain->flags = flags;
0824c592 1708 domain->has_iotlb_device = false;
92d03cc8 1709 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1710
1711 return domain;
1712}
1713
d160aca5
JR
1714/* Must be called with iommu->lock */
1715static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1716 struct intel_iommu *iommu)
1717{
44bde614 1718 unsigned long ndomains;
55d94043 1719 int num;
44bde614 1720
55d94043 1721 assert_spin_locked(&device_domain_lock);
d160aca5 1722 assert_spin_locked(&iommu->lock);
ba395927 1723
29a27719
JR
1724 domain->iommu_refcnt[iommu->seq_id] += 1;
1725 domain->iommu_count += 1;
1726 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1727 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1728 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1729
1730 if (num >= ndomains) {
1731 pr_err("%s: No free domain ids\n", iommu->name);
1732 domain->iommu_refcnt[iommu->seq_id] -= 1;
1733 domain->iommu_count -= 1;
55d94043 1734 return -ENOSPC;
2c2e2c38 1735 }
ba395927 1736
d160aca5
JR
1737 set_bit(num, iommu->domain_ids);
1738 set_iommu_domain(iommu, num, domain);
1739
1740 domain->iommu_did[iommu->seq_id] = num;
1741 domain->nid = iommu->node;
fb170fb4 1742
fb170fb4
JL
1743 domain_update_iommu_cap(domain);
1744 }
d160aca5 1745
55d94043 1746 return 0;
fb170fb4
JL
1747}
1748
1749static int domain_detach_iommu(struct dmar_domain *domain,
1750 struct intel_iommu *iommu)
1751{
e083ea5b 1752 int num, count;
d160aca5 1753
55d94043 1754 assert_spin_locked(&device_domain_lock);
d160aca5 1755 assert_spin_locked(&iommu->lock);
fb170fb4 1756
29a27719
JR
1757 domain->iommu_refcnt[iommu->seq_id] -= 1;
1758 count = --domain->iommu_count;
1759 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1760 num = domain->iommu_did[iommu->seq_id];
1761 clear_bit(num, iommu->domain_ids);
1762 set_iommu_domain(iommu, num, NULL);
fb170fb4 1763
fb170fb4 1764 domain_update_iommu_cap(domain);
c0e8a6c8 1765 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1766 }
fb170fb4
JL
1767
1768 return count;
1769}
1770
ba395927 1771static struct iova_domain reserved_iova_list;
8a443df4 1772static struct lock_class_key reserved_rbtree_key;
ba395927 1773
51a63e67 1774static int dmar_init_reserved_ranges(void)
ba395927
KA
1775{
1776 struct pci_dev *pdev = NULL;
1777 struct iova *iova;
1778 int i;
ba395927 1779
aa3ac946 1780 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1781
8a443df4
MG
1782 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1783 &reserved_rbtree_key);
1784
ba395927
KA
1785 /* IOAPIC ranges shouldn't be accessed by DMA */
1786 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1787 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1788 if (!iova) {
9f10e5bf 1789 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1790 return -ENODEV;
1791 }
ba395927
KA
1792
1793 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1794 for_each_pci_dev(pdev) {
1795 struct resource *r;
1796
1797 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1798 r = &pdev->resource[i];
1799 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1800 continue;
1a4a4551
DW
1801 iova = reserve_iova(&reserved_iova_list,
1802 IOVA_PFN(r->start),
1803 IOVA_PFN(r->end));
51a63e67 1804 if (!iova) {
932a6523 1805 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1806 return -ENODEV;
1807 }
ba395927
KA
1808 }
1809 }
51a63e67 1810 return 0;
ba395927
KA
1811}
1812
1813static void domain_reserve_special_ranges(struct dmar_domain *domain)
1814{
1815 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1816}
1817
1818static inline int guestwidth_to_adjustwidth(int gaw)
1819{
1820 int agaw;
1821 int r = (gaw - 12) % 9;
1822
1823 if (r == 0)
1824 agaw = gaw;
1825 else
1826 agaw = gaw + 9 - r;
1827 if (agaw > 64)
1828 agaw = 64;
1829 return agaw;
1830}
1831
dc534b25
JR
1832static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1833 int guest_width)
ba395927 1834{
ba395927
KA
1835 int adjust_width, agaw;
1836 unsigned long sagaw;
13cf0174 1837 int err;
ba395927 1838
aa3ac946 1839 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1840
1841 err = init_iova_flush_queue(&domain->iovad,
1842 iommu_flush_iova, iova_entry_free);
1843 if (err)
1844 return err;
1845
ba395927
KA
1846 domain_reserve_special_ranges(domain);
1847
1848 /* calculate AGAW */
ba395927
KA
1849 if (guest_width > cap_mgaw(iommu->cap))
1850 guest_width = cap_mgaw(iommu->cap);
1851 domain->gaw = guest_width;
1852 adjust_width = guestwidth_to_adjustwidth(guest_width);
1853 agaw = width_to_agaw(adjust_width);
1854 sagaw = cap_sagaw(iommu->cap);
1855 if (!test_bit(agaw, &sagaw)) {
1856 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1857 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1858 agaw = find_next_bit(&sagaw, 5, agaw);
1859 if (agaw >= 5)
1860 return -ENODEV;
1861 }
1862 domain->agaw = agaw;
ba395927 1863
8e604097
WH
1864 if (ecap_coherent(iommu->ecap))
1865 domain->iommu_coherency = 1;
1866 else
1867 domain->iommu_coherency = 0;
1868
58c610bd
SY
1869 if (ecap_sc_support(iommu->ecap))
1870 domain->iommu_snooping = 1;
1871 else
1872 domain->iommu_snooping = 0;
1873
214e39aa
DW
1874 if (intel_iommu_superpage)
1875 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1876 else
1877 domain->iommu_superpage = 0;
1878
4c923d47 1879 domain->nid = iommu->node;
c7151a8d 1880
ba395927 1881 /* always allocate the top pgd */
4c923d47 1882 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1883 if (!domain->pgd)
1884 return -ENOMEM;
5b6985ce 1885 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1886 return 0;
1887}
1888
1889static void domain_exit(struct dmar_domain *domain)
1890{
e083ea5b 1891 struct page *freelist;
ba395927 1892
d160aca5 1893 /* Remove associated devices and clear attached or cached domains */
ba395927 1894 domain_remove_dev_info(domain);
92d03cc8 1895
ba395927
KA
1896 /* destroy iovas */
1897 put_iova_domain(&domain->iovad);
ba395927 1898
ea8ea460 1899 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1900
ea8ea460
DW
1901 dma_free_pagelist(freelist);
1902
ba395927
KA
1903 free_domain_mem(domain);
1904}
1905
7373a8cc
LB
1906/*
1907 * Get the PASID directory size for scalable mode context entry.
1908 * Value of X in the PDTS field of a scalable mode context entry
1909 * indicates PASID directory with 2^(X + 7) entries.
1910 */
1911static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1912{
1913 int pds, max_pde;
1914
1915 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1916 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1917 if (pds < 7)
1918 return 0;
1919
1920 return pds - 7;
1921}
1922
1923/*
1924 * Set the RID_PASID field of a scalable mode context entry. The
1925 * IOMMU hardware will use the PASID value set in this field for
1926 * DMA translations of DMA requests without PASID.
1927 */
1928static inline void
1929context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1930{
1931 context->hi |= pasid & ((1 << 20) - 1);
1932 context->hi |= (1 << 20);
1933}
1934
1935/*
1936 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1937 * entry.
1938 */
1939static inline void context_set_sm_dte(struct context_entry *context)
1940{
1941 context->lo |= (1 << 2);
1942}
1943
1944/*
1945 * Set the PRE(Page Request Enable) field of a scalable mode context
1946 * entry.
1947 */
1948static inline void context_set_sm_pre(struct context_entry *context)
1949{
1950 context->lo |= (1 << 4);
1951}
1952
1953/* Convert value to context PASID directory size field coding. */
1954#define context_pdts(pds) (((pds) & 0x7) << 9)
1955
64ae892b
DW
1956static int domain_context_mapping_one(struct dmar_domain *domain,
1957 struct intel_iommu *iommu,
ca6e322d 1958 struct pasid_table *table,
28ccce0d 1959 u8 bus, u8 devfn)
ba395927 1960{
c6c2cebd 1961 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1962 int translation = CONTEXT_TT_MULTI_LEVEL;
1963 struct device_domain_info *info = NULL;
ba395927 1964 struct context_entry *context;
ba395927 1965 unsigned long flags;
7373a8cc 1966 int ret;
28ccce0d 1967
c6c2cebd
JR
1968 WARN_ON(did == 0);
1969
28ccce0d
JR
1970 if (hw_pass_through && domain_type_is_si(domain))
1971 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1972
1973 pr_debug("Set context mapping for %02x:%02x.%d\n",
1974 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1975
ba395927 1976 BUG_ON(!domain->pgd);
5331fe6f 1977
55d94043
JR
1978 spin_lock_irqsave(&device_domain_lock, flags);
1979 spin_lock(&iommu->lock);
1980
1981 ret = -ENOMEM;
03ecc32c 1982 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1983 if (!context)
55d94043 1984 goto out_unlock;
ba395927 1985
55d94043
JR
1986 ret = 0;
1987 if (context_present(context))
1988 goto out_unlock;
cf484d0e 1989
aec0e861
XP
1990 /*
1991 * For kdump cases, old valid entries may be cached due to the
1992 * in-flight DMA and copied pgtable, but there is no unmapping
1993 * behaviour for them, thus we need an explicit cache flush for
1994 * the newly-mapped device. For kdump, at this point, the device
1995 * is supposed to finish reset at its driver probe stage, so no
1996 * in-flight DMA will exist, and we don't need to worry anymore
1997 * hereafter.
1998 */
1999 if (context_copied(context)) {
2000 u16 did_old = context_domain_id(context);
2001
b117e038 2002 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2003 iommu->flush.flush_context(iommu, did_old,
2004 (((u16)bus) << 8) | devfn,
2005 DMA_CCMD_MASK_NOBIT,
2006 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2007 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2008 DMA_TLB_DSI_FLUSH);
2009 }
aec0e861
XP
2010 }
2011
de24e553 2012 context_clear_entry(context);
ea6606b0 2013
7373a8cc
LB
2014 if (sm_supported(iommu)) {
2015 unsigned long pds;
4ed0d3e6 2016
7373a8cc
LB
2017 WARN_ON(!table);
2018
2019 /* Setup the PASID DIR pointer: */
2020 pds = context_get_sm_pds(table);
2021 context->lo = (u64)virt_to_phys(table->table) |
2022 context_pdts(pds);
2023
2024 /* Setup the RID_PASID field: */
2025 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2026
de24e553 2027 /*
7373a8cc
LB
2028 * Setup the Device-TLB enable bit and Page request
2029 * Enable bit:
de24e553 2030 */
7373a8cc
LB
2031 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2032 if (info && info->ats_supported)
2033 context_set_sm_dte(context);
2034 if (info && info->pri_supported)
2035 context_set_sm_pre(context);
2036 } else {
2037 struct dma_pte *pgd = domain->pgd;
2038 int agaw;
2039
2040 context_set_domain_id(context, did);
7373a8cc
LB
2041
2042 if (translation != CONTEXT_TT_PASS_THROUGH) {
2043 /*
2044 * Skip top levels of page tables for iommu which has
2045 * less agaw than default. Unnecessary for PT mode.
2046 */
2047 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2048 ret = -ENOMEM;
2049 pgd = phys_to_virt(dma_pte_addr(pgd));
2050 if (!dma_pte_present(pgd))
2051 goto out_unlock;
2052 }
2053
2054 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2055 if (info && info->ats_supported)
2056 translation = CONTEXT_TT_DEV_IOTLB;
2057 else
2058 translation = CONTEXT_TT_MULTI_LEVEL;
2059
2060 context_set_address_root(context, virt_to_phys(pgd));
2061 context_set_address_width(context, agaw);
2062 } else {
2063 /*
2064 * In pass through mode, AW must be programmed to
2065 * indicate the largest AGAW value supported by
2066 * hardware. And ASR is ignored by hardware.
2067 */
2068 context_set_address_width(context, iommu->msagaw);
2069 }
41b80db2
LB
2070
2071 context_set_translation_type(context, translation);
93a23a72 2072 }
4ed0d3e6 2073
c07e7d21
MM
2074 context_set_fault_enable(context);
2075 context_set_present(context);
5331fe6f 2076 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2077
4c25a2c1
DW
2078 /*
2079 * It's a non-present to present mapping. If hardware doesn't cache
2080 * non-present entry we only need to flush the write-buffer. If the
2081 * _does_ cache non-present entries, then it does so in the special
2082 * domain #0, which we have to flush:
2083 */
2084 if (cap_caching_mode(iommu->cap)) {
2085 iommu->flush.flush_context(iommu, 0,
2086 (((u16)bus) << 8) | devfn,
2087 DMA_CCMD_MASK_NOBIT,
2088 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2089 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2090 } else {
ba395927 2091 iommu_flush_write_buffer(iommu);
4c25a2c1 2092 }
93a23a72 2093 iommu_enable_dev_iotlb(info);
c7151a8d 2094
55d94043
JR
2095 ret = 0;
2096
2097out_unlock:
2098 spin_unlock(&iommu->lock);
2099 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2100
5c365d18 2101 return ret;
ba395927
KA
2102}
2103
579305f7
AW
2104struct domain_context_mapping_data {
2105 struct dmar_domain *domain;
2106 struct intel_iommu *iommu;
ca6e322d 2107 struct pasid_table *table;
579305f7
AW
2108};
2109
2110static int domain_context_mapping_cb(struct pci_dev *pdev,
2111 u16 alias, void *opaque)
2112{
2113 struct domain_context_mapping_data *data = opaque;
2114
2115 return domain_context_mapping_one(data->domain, data->iommu,
ca6e322d
LB
2116 data->table, PCI_BUS_NUM(alias),
2117 alias & 0xff);
579305f7
AW
2118}
2119
ba395927 2120static int
28ccce0d 2121domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2122{
ca6e322d
LB
2123 struct domain_context_mapping_data data;
2124 struct pasid_table *table;
64ae892b 2125 struct intel_iommu *iommu;
156baca8 2126 u8 bus, devfn;
64ae892b 2127
e1f167f3 2128 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2129 if (!iommu)
2130 return -ENODEV;
ba395927 2131
ca6e322d
LB
2132 table = intel_pasid_get_table(dev);
2133
579305f7 2134 if (!dev_is_pci(dev))
ca6e322d
LB
2135 return domain_context_mapping_one(domain, iommu, table,
2136 bus, devfn);
579305f7
AW
2137
2138 data.domain = domain;
2139 data.iommu = iommu;
ca6e322d 2140 data.table = table;
579305f7
AW
2141
2142 return pci_for_each_dma_alias(to_pci_dev(dev),
2143 &domain_context_mapping_cb, &data);
2144}
2145
2146static int domain_context_mapped_cb(struct pci_dev *pdev,
2147 u16 alias, void *opaque)
2148{
2149 struct intel_iommu *iommu = opaque;
2150
2151 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2152}
2153
e1f167f3 2154static int domain_context_mapped(struct device *dev)
ba395927 2155{
5331fe6f 2156 struct intel_iommu *iommu;
156baca8 2157 u8 bus, devfn;
5331fe6f 2158
e1f167f3 2159 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2160 if (!iommu)
2161 return -ENODEV;
ba395927 2162
579305f7
AW
2163 if (!dev_is_pci(dev))
2164 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2165
579305f7
AW
2166 return !pci_for_each_dma_alias(to_pci_dev(dev),
2167 domain_context_mapped_cb, iommu);
ba395927
KA
2168}
2169
f532959b
FY
2170/* Returns a number of VTD pages, but aligned to MM page size */
2171static inline unsigned long aligned_nrpages(unsigned long host_addr,
2172 size_t size)
2173{
2174 host_addr &= ~PAGE_MASK;
2175 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2176}
2177
6dd9a7c7
YS
2178/* Return largest possible superpage level for a given mapping */
2179static inline int hardware_largepage_caps(struct dmar_domain *domain,
2180 unsigned long iov_pfn,
2181 unsigned long phy_pfn,
2182 unsigned long pages)
2183{
2184 int support, level = 1;
2185 unsigned long pfnmerge;
2186
2187 support = domain->iommu_superpage;
2188
2189 /* To use a large page, the virtual *and* physical addresses
2190 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2191 of them will mean we have to use smaller pages. So just
2192 merge them and check both at once. */
2193 pfnmerge = iov_pfn | phy_pfn;
2194
2195 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2196 pages >>= VTD_STRIDE_SHIFT;
2197 if (!pages)
2198 break;
2199 pfnmerge >>= VTD_STRIDE_SHIFT;
2200 level++;
2201 support--;
2202 }
2203 return level;
2204}
2205
9051aa02
DW
2206static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2207 struct scatterlist *sg, unsigned long phys_pfn,
2208 unsigned long nr_pages, int prot)
e1605495
DW
2209{
2210 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2211 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2212 unsigned long sg_res = 0;
6dd9a7c7
YS
2213 unsigned int largepage_lvl = 0;
2214 unsigned long lvl_pages = 0;
e1605495 2215
162d1b10 2216 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2217
2218 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2219 return -EINVAL;
2220
2221 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2222
cc4f14aa
JL
2223 if (!sg) {
2224 sg_res = nr_pages;
9051aa02
DW
2225 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2226 }
2227
6dd9a7c7 2228 while (nr_pages > 0) {
c85994e4
DW
2229 uint64_t tmp;
2230
e1605495 2231 if (!sg_res) {
29a90b70
RM
2232 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2233
f532959b 2234 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2235 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2236 sg->dma_length = sg->length;
29a90b70 2237 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2238 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2239 }
6dd9a7c7 2240
e1605495 2241 if (!pte) {
6dd9a7c7
YS
2242 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2243
5cf0a76f 2244 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2245 if (!pte)
2246 return -ENOMEM;
6dd9a7c7 2247 /* It is large page*/
6491d4d0 2248 if (largepage_lvl > 1) {
ba2374fd
CZ
2249 unsigned long nr_superpages, end_pfn;
2250
6dd9a7c7 2251 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2252 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2253
2254 nr_superpages = sg_res / lvl_pages;
2255 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2256
d41a4adb
JL
2257 /*
2258 * Ensure that old small page tables are
ba2374fd 2259 * removed to make room for superpage(s).
bc24c571
DD
2260 * We're adding new large pages, so make sure
2261 * we don't remove their parent tables.
d41a4adb 2262 */
bc24c571
DD
2263 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2264 largepage_lvl + 1);
6491d4d0 2265 } else {
6dd9a7c7 2266 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2267 }
6dd9a7c7 2268
e1605495
DW
2269 }
2270 /* We don't need lock here, nobody else
2271 * touches the iova range
2272 */
7766a3fb 2273 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2274 if (tmp) {
1bf20f0d 2275 static int dumps = 5;
9f10e5bf
JR
2276 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2277 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2278 if (dumps) {
2279 dumps--;
2280 debug_dma_dump_mappings(NULL);
2281 }
2282 WARN_ON(1);
2283 }
6dd9a7c7
YS
2284
2285 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2286
2287 BUG_ON(nr_pages < lvl_pages);
2288 BUG_ON(sg_res < lvl_pages);
2289
2290 nr_pages -= lvl_pages;
2291 iov_pfn += lvl_pages;
2292 phys_pfn += lvl_pages;
2293 pteval += lvl_pages * VTD_PAGE_SIZE;
2294 sg_res -= lvl_pages;
2295
2296 /* If the next PTE would be the first in a new page, then we
2297 need to flush the cache on the entries we've just written.
2298 And then we'll need to recalculate 'pte', so clear it and
2299 let it get set again in the if (!pte) block above.
2300
2301 If we're done (!nr_pages) we need to flush the cache too.
2302
2303 Also if we've been setting superpages, we may need to
2304 recalculate 'pte' and switch back to smaller pages for the
2305 end of the mapping, if the trailing size is not enough to
2306 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2307 pte++;
6dd9a7c7
YS
2308 if (!nr_pages || first_pte_in_page(pte) ||
2309 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2310 domain_flush_cache(domain, first_pte,
2311 (void *)pte - (void *)first_pte);
2312 pte = NULL;
2313 }
6dd9a7c7
YS
2314
2315 if (!sg_res && nr_pages)
e1605495
DW
2316 sg = sg_next(sg);
2317 }
2318 return 0;
2319}
2320
87684fd9 2321static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2322 struct scatterlist *sg, unsigned long phys_pfn,
2323 unsigned long nr_pages, int prot)
2324{
fa954e68 2325 int iommu_id, ret;
095303e0
LB
2326 struct intel_iommu *iommu;
2327
2328 /* Do the real mapping first */
2329 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2330 if (ret)
2331 return ret;
2332
fa954e68
LB
2333 for_each_domain_iommu(iommu_id, domain) {
2334 iommu = g_iommus[iommu_id];
095303e0
LB
2335 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2336 }
2337
2338 return 0;
87684fd9
PX
2339}
2340
9051aa02
DW
2341static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2342 struct scatterlist *sg, unsigned long nr_pages,
2343 int prot)
ba395927 2344{
87684fd9 2345 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2346}
6f6a00e4 2347
9051aa02
DW
2348static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2349 unsigned long phys_pfn, unsigned long nr_pages,
2350 int prot)
2351{
87684fd9 2352 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2353}
2354
2452d9db 2355static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2356{
5082219b
FS
2357 unsigned long flags;
2358 struct context_entry *context;
2359 u16 did_old;
2360
c7151a8d
WH
2361 if (!iommu)
2362 return;
8c11e798 2363
5082219b
FS
2364 spin_lock_irqsave(&iommu->lock, flags);
2365 context = iommu_context_addr(iommu, bus, devfn, 0);
2366 if (!context) {
2367 spin_unlock_irqrestore(&iommu->lock, flags);
2368 return;
2369 }
2370 did_old = context_domain_id(context);
2371 context_clear_entry(context);
2372 __iommu_flush_cache(iommu, context, sizeof(*context));
2373 spin_unlock_irqrestore(&iommu->lock, flags);
2374 iommu->flush.flush_context(iommu,
2375 did_old,
2376 (((u16)bus) << 8) | devfn,
2377 DMA_CCMD_MASK_NOBIT,
2378 DMA_CCMD_DEVICE_INVL);
2379 iommu->flush.flush_iotlb(iommu,
2380 did_old,
2381 0,
2382 0,
2383 DMA_TLB_DSI_FLUSH);
ba395927
KA
2384}
2385
109b9b04
DW
2386static inline void unlink_domain_info(struct device_domain_info *info)
2387{
2388 assert_spin_locked(&device_domain_lock);
2389 list_del(&info->link);
2390 list_del(&info->global);
2391 if (info->dev)
0bcb3e28 2392 info->dev->archdata.iommu = NULL;
109b9b04
DW
2393}
2394
ba395927
KA
2395static void domain_remove_dev_info(struct dmar_domain *domain)
2396{
3a74ca01 2397 struct device_domain_info *info, *tmp;
fb170fb4 2398 unsigned long flags;
ba395927
KA
2399
2400 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2401 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2402 __dmar_remove_one_dev_info(info);
ba395927
KA
2403 spin_unlock_irqrestore(&device_domain_lock, flags);
2404}
2405
2406/*
2407 * find_domain
1525a29a 2408 * Note: we use struct device->archdata.iommu stores the info
ba395927 2409 */
1525a29a 2410static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2411{
2412 struct device_domain_info *info;
2413
8af46c78
LB
2414 if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2415 struct iommu_domain *domain;
2416
2417 dev->archdata.iommu = NULL;
2418 domain = iommu_get_domain_for_dev(dev);
2419 if (domain)
2420 intel_iommu_attach_device(domain, dev);
2421 }
2422
ba395927 2423 /* No lock here, assumes no domain exit in normal case */
1525a29a 2424 info = dev->archdata.iommu;
8af46c78 2425
b316d02a 2426 if (likely(info))
ba395927
KA
2427 return info->domain;
2428 return NULL;
2429}
2430
5a8f40e8 2431static inline struct device_domain_info *
745f2586
JL
2432dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2433{
2434 struct device_domain_info *info;
2435
2436 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2437 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2438 info->devfn == devfn)
5a8f40e8 2439 return info;
745f2586
JL
2440
2441 return NULL;
2442}
2443
5db31569
JR
2444static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2445 int bus, int devfn,
2446 struct device *dev,
2447 struct dmar_domain *domain)
745f2586 2448{
5a8f40e8 2449 struct dmar_domain *found = NULL;
745f2586
JL
2450 struct device_domain_info *info;
2451 unsigned long flags;
d160aca5 2452 int ret;
745f2586
JL
2453
2454 info = alloc_devinfo_mem();
2455 if (!info)
b718cd3d 2456 return NULL;
745f2586 2457
745f2586
JL
2458 info->bus = bus;
2459 info->devfn = devfn;
b16d0cb9
DW
2460 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2461 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2462 info->ats_qdep = 0;
745f2586
JL
2463 info->dev = dev;
2464 info->domain = domain;
5a8f40e8 2465 info->iommu = iommu;
cc580e41 2466 info->pasid_table = NULL;
95587a75 2467 info->auxd_enabled = 0;
67b8e02b 2468 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2469
b16d0cb9
DW
2470 if (dev && dev_is_pci(dev)) {
2471 struct pci_dev *pdev = to_pci_dev(info->dev);
2472
d8b85910
LB
2473 if (!pdev->untrusted &&
2474 !pci_ats_disabled() &&
cef74409 2475 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2476 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2477 dmar_find_matched_atsr_unit(pdev))
2478 info->ats_supported = 1;
2479
765b6a98
LB
2480 if (sm_supported(iommu)) {
2481 if (pasid_supported(iommu)) {
b16d0cb9
DW
2482 int features = pci_pasid_features(pdev);
2483 if (features >= 0)
2484 info->pasid_supported = features | 1;
2485 }
2486
2487 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2488 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2489 info->pri_supported = 1;
2490 }
2491 }
2492
745f2586
JL
2493 spin_lock_irqsave(&device_domain_lock, flags);
2494 if (dev)
0bcb3e28 2495 found = find_domain(dev);
f303e507
JR
2496
2497 if (!found) {
5a8f40e8 2498 struct device_domain_info *info2;
41e80dca 2499 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2500 if (info2) {
2501 found = info2->domain;
2502 info2->dev = dev;
2503 }
5a8f40e8 2504 }
f303e507 2505
745f2586
JL
2506 if (found) {
2507 spin_unlock_irqrestore(&device_domain_lock, flags);
2508 free_devinfo_mem(info);
b718cd3d
DW
2509 /* Caller must free the original domain */
2510 return found;
745f2586
JL
2511 }
2512
d160aca5
JR
2513 spin_lock(&iommu->lock);
2514 ret = domain_attach_iommu(domain, iommu);
2515 spin_unlock(&iommu->lock);
2516
2517 if (ret) {
c6c2cebd 2518 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2519 free_devinfo_mem(info);
c6c2cebd
JR
2520 return NULL;
2521 }
c6c2cebd 2522
b718cd3d
DW
2523 list_add(&info->link, &domain->devices);
2524 list_add(&info->global, &device_domain_list);
2525 if (dev)
2526 dev->archdata.iommu = info;
0bbeb01a 2527 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2528
0bbeb01a
LB
2529 /* PASID table is mandatory for a PCI device in scalable mode. */
2530 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2531 ret = intel_pasid_alloc_table(dev);
2532 if (ret) {
932a6523 2533 dev_err(dev, "PASID table allocation failed\n");
71753239 2534 dmar_remove_one_dev_info(dev);
0bbeb01a 2535 return NULL;
a7fc93fe 2536 }
ef848b7e
LB
2537
2538 /* Setup the PASID entry for requests without PASID: */
2539 spin_lock(&iommu->lock);
2540 if (hw_pass_through && domain_type_is_si(domain))
2541 ret = intel_pasid_setup_pass_through(iommu, domain,
2542 dev, PASID_RID2PASID);
2543 else
2544 ret = intel_pasid_setup_second_level(iommu, domain,
2545 dev, PASID_RID2PASID);
2546 spin_unlock(&iommu->lock);
2547 if (ret) {
932a6523 2548 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2549 dmar_remove_one_dev_info(dev);
ef848b7e 2550 return NULL;
a7fc93fe
LB
2551 }
2552 }
b718cd3d 2553
cc4e2575 2554 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2555 dev_err(dev, "Domain context map failed\n");
71753239 2556 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2557 return NULL;
2558 }
2559
b718cd3d 2560 return domain;
745f2586
JL
2561}
2562
579305f7
AW
2563static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2564{
2565 *(u16 *)opaque = alias;
2566 return 0;
2567}
2568
76208356 2569static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2570{
e083ea5b 2571 struct device_domain_info *info;
76208356 2572 struct dmar_domain *domain = NULL;
579305f7 2573 struct intel_iommu *iommu;
fcc35c63 2574 u16 dma_alias;
ba395927 2575 unsigned long flags;
aa4d066a 2576 u8 bus, devfn;
ba395927 2577
579305f7
AW
2578 iommu = device_to_iommu(dev, &bus, &devfn);
2579 if (!iommu)
2580 return NULL;
2581
146922ec
DW
2582 if (dev_is_pci(dev)) {
2583 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2584
579305f7
AW
2585 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2586
2587 spin_lock_irqsave(&device_domain_lock, flags);
2588 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2589 PCI_BUS_NUM(dma_alias),
2590 dma_alias & 0xff);
2591 if (info) {
2592 iommu = info->iommu;
2593 domain = info->domain;
5a8f40e8 2594 }
579305f7 2595 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2596
76208356 2597 /* DMA alias already has a domain, use it */
579305f7 2598 if (info)
76208356 2599 goto out;
579305f7 2600 }
ba395927 2601
146922ec 2602 /* Allocate and initialize new domain for the device */
ab8dfe25 2603 domain = alloc_domain(0);
745f2586 2604 if (!domain)
579305f7 2605 return NULL;
dc534b25 2606 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2607 domain_exit(domain);
2608 return NULL;
2c2e2c38 2609 }
ba395927 2610
76208356 2611out:
76208356
JR
2612 return domain;
2613}
579305f7 2614
76208356
JR
2615static struct dmar_domain *set_domain_for_dev(struct device *dev,
2616 struct dmar_domain *domain)
2617{
2618 struct intel_iommu *iommu;
2619 struct dmar_domain *tmp;
2620 u16 req_id, dma_alias;
2621 u8 bus, devfn;
2622
2623 iommu = device_to_iommu(dev, &bus, &devfn);
2624 if (!iommu)
2625 return NULL;
2626
2627 req_id = ((u16)bus << 8) | devfn;
2628
2629 if (dev_is_pci(dev)) {
2630 struct pci_dev *pdev = to_pci_dev(dev);
2631
2632 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2633
2634 /* register PCI DMA alias device */
2635 if (req_id != dma_alias) {
2636 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2637 dma_alias & 0xff, NULL, domain);
2638
2639 if (!tmp || tmp != domain)
2640 return tmp;
2641 }
ba395927
KA
2642 }
2643
5db31569 2644 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2645 if (!tmp || tmp != domain)
2646 return tmp;
2647
2648 return domain;
2649}
579305f7 2650
76208356
JR
2651static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2652{
2653 struct dmar_domain *domain, *tmp;
2654
2655 domain = find_domain(dev);
2656 if (domain)
2657 goto out;
2658
2659 domain = find_or_alloc_domain(dev, gaw);
2660 if (!domain)
2661 goto out;
2662
2663 tmp = set_domain_for_dev(dev, domain);
2664 if (!tmp || domain != tmp) {
579305f7
AW
2665 domain_exit(domain);
2666 domain = tmp;
2667 }
b718cd3d 2668
76208356
JR
2669out:
2670
b718cd3d 2671 return domain;
ba395927
KA
2672}
2673
b213203e
DW
2674static int iommu_domain_identity_map(struct dmar_domain *domain,
2675 unsigned long long start,
2676 unsigned long long end)
ba395927 2677{
c5395d5c
DW
2678 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2679 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2680
2681 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2682 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2683 pr_err("Reserving iova failed\n");
b213203e 2684 return -ENOMEM;
ba395927
KA
2685 }
2686
af1089ce 2687 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2688 /*
2689 * RMRR range might have overlap with physical memory range,
2690 * clear it first
2691 */
c5395d5c 2692 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2693
87684fd9
PX
2694 return __domain_mapping(domain, first_vpfn, NULL,
2695 first_vpfn, last_vpfn - first_vpfn + 1,
2696 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2697}
2698
d66ce54b
JR
2699static int domain_prepare_identity_map(struct device *dev,
2700 struct dmar_domain *domain,
2701 unsigned long long start,
2702 unsigned long long end)
b213203e 2703{
19943b0e
DW
2704 /* For _hardware_ passthrough, don't bother. But for software
2705 passthrough, we do it anyway -- it may indicate a memory
2706 range which is reserved in E820, so which didn't get set
2707 up to start with in si_domain */
2708 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2709 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2710 start, end);
19943b0e
DW
2711 return 0;
2712 }
2713
932a6523 2714 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2715
5595b528
DW
2716 if (end < start) {
2717 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2718 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2719 dmi_get_system_info(DMI_BIOS_VENDOR),
2720 dmi_get_system_info(DMI_BIOS_VERSION),
2721 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2722 return -EIO;
5595b528
DW
2723 }
2724
2ff729f5
DW
2725 if (end >> agaw_to_width(domain->agaw)) {
2726 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2727 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2728 agaw_to_width(domain->agaw),
2729 dmi_get_system_info(DMI_BIOS_VENDOR),
2730 dmi_get_system_info(DMI_BIOS_VERSION),
2731 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2732 return -EIO;
2ff729f5 2733 }
19943b0e 2734
d66ce54b
JR
2735 return iommu_domain_identity_map(domain, start, end);
2736}
ba395927 2737
d66ce54b
JR
2738static int iommu_prepare_identity_map(struct device *dev,
2739 unsigned long long start,
2740 unsigned long long end)
2741{
2742 struct dmar_domain *domain;
2743 int ret;
2744
2745 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2746 if (!domain)
2747 return -ENOMEM;
2748
2749 ret = domain_prepare_identity_map(dev, domain, start, end);
2750 if (ret)
2751 domain_exit(domain);
b213203e 2752
ba395927 2753 return ret;
ba395927
KA
2754}
2755
2756static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2757 struct device *dev)
ba395927 2758{
0b9d9753 2759 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2760 return 0;
0b9d9753
DW
2761 return iommu_prepare_identity_map(dev, rmrr->base_address,
2762 rmrr->end_address);
ba395927
KA
2763}
2764
d3f13810 2765#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2766static inline void iommu_prepare_isa(void)
2767{
2768 struct pci_dev *pdev;
2769 int ret;
2770
2771 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2772 if (!pdev)
2773 return;
2774
9f10e5bf 2775 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2776 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2777
2778 if (ret)
9f10e5bf 2779 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2780
9b27e82d 2781 pci_dev_put(pdev);
49a0429e
KA
2782}
2783#else
2784static inline void iommu_prepare_isa(void)
2785{
2786 return;
2787}
d3f13810 2788#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2789
2c2e2c38 2790static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2791
071e1374 2792static int __init si_domain_init(int hw)
2c2e2c38 2793{
4de354ec
LB
2794 struct dmar_rmrr_unit *rmrr;
2795 struct device *dev;
2796 int i, nid, ret;
2c2e2c38 2797
ab8dfe25 2798 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2799 if (!si_domain)
2800 return -EFAULT;
2801
2c2e2c38
FY
2802 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2803 domain_exit(si_domain);
2804 return -EFAULT;
2805 }
2806
19943b0e
DW
2807 if (hw)
2808 return 0;
2809
c7ab48d2 2810 for_each_online_node(nid) {
5dfe8660
TH
2811 unsigned long start_pfn, end_pfn;
2812 int i;
2813
2814 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2815 ret = iommu_domain_identity_map(si_domain,
2816 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2817 if (ret)
2818 return ret;
2819 }
c7ab48d2
DW
2820 }
2821
4de354ec
LB
2822 /*
2823 * Normally we use DMA domains for devices which have RMRRs. But we
2824 * loose this requirement for graphic and usb devices. Identity map
2825 * the RMRRs for graphic and USB devices so that they could use the
2826 * si_domain.
2827 */
2828 for_each_rmrr_units(rmrr) {
2829 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2830 i, dev) {
2831 unsigned long long start = rmrr->base_address;
2832 unsigned long long end = rmrr->end_address;
2833
2834 if (device_is_rmrr_locked(dev))
2835 continue;
2836
2837 if (WARN_ON(end < start ||
2838 end >> agaw_to_width(si_domain->agaw)))
2839 continue;
2840
2841 ret = iommu_domain_identity_map(si_domain, start, end);
2842 if (ret)
2843 return ret;
2844 }
2845 }
2846
2c2e2c38
FY
2847 return 0;
2848}
2849
9b226624 2850static int identity_mapping(struct device *dev)
2c2e2c38
FY
2851{
2852 struct device_domain_info *info;
2853
9b226624 2854 info = dev->archdata.iommu;
cb452a40
MT
2855 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2856 return (info->domain == si_domain);
2c2e2c38 2857
2c2e2c38
FY
2858 return 0;
2859}
2860
28ccce0d 2861static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2862{
0ac72664 2863 struct dmar_domain *ndomain;
5a8f40e8 2864 struct intel_iommu *iommu;
156baca8 2865 u8 bus, devfn;
2c2e2c38 2866
5913c9bf 2867 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2868 if (!iommu)
2869 return -ENODEV;
2870
5db31569 2871 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2872 if (ndomain != domain)
2873 return -EBUSY;
2c2e2c38
FY
2874
2875 return 0;
2876}
2877
0b9d9753 2878static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2879{
2880 struct dmar_rmrr_unit *rmrr;
832bd858 2881 struct device *tmp;
ea2447f7
TM
2882 int i;
2883
0e242612 2884 rcu_read_lock();
ea2447f7 2885 for_each_rmrr_units(rmrr) {
b683b230
JL
2886 /*
2887 * Return TRUE if this RMRR contains the device that
2888 * is passed in.
2889 */
2890 for_each_active_dev_scope(rmrr->devices,
2891 rmrr->devices_cnt, i, tmp)
0b9d9753 2892 if (tmp == dev) {
0e242612 2893 rcu_read_unlock();
ea2447f7 2894 return true;
b683b230 2895 }
ea2447f7 2896 }
0e242612 2897 rcu_read_unlock();
ea2447f7
TM
2898 return false;
2899}
2900
c875d2c1
AW
2901/*
2902 * There are a couple cases where we need to restrict the functionality of
2903 * devices associated with RMRRs. The first is when evaluating a device for
2904 * identity mapping because problems exist when devices are moved in and out
2905 * of domains and their respective RMRR information is lost. This means that
2906 * a device with associated RMRRs will never be in a "passthrough" domain.
2907 * The second is use of the device through the IOMMU API. This interface
2908 * expects to have full control of the IOVA space for the device. We cannot
2909 * satisfy both the requirement that RMRR access is maintained and have an
2910 * unencumbered IOVA space. We also have no ability to quiesce the device's
2911 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2912 * We therefore prevent devices associated with an RMRR from participating in
2913 * the IOMMU API, which eliminates them from device assignment.
2914 *
2915 * In both cases we assume that PCI USB devices with RMRRs have them largely
2916 * for historical reasons and that the RMRR space is not actively used post
2917 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2918 *
2919 * The same exception is made for graphics devices, with the requirement that
2920 * any use of the RMRR regions will be torn down before assigning the device
2921 * to a guest.
c875d2c1
AW
2922 */
2923static bool device_is_rmrr_locked(struct device *dev)
2924{
2925 if (!device_has_rmrr(dev))
2926 return false;
2927
2928 if (dev_is_pci(dev)) {
2929 struct pci_dev *pdev = to_pci_dev(dev);
2930
18436afd 2931 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2932 return false;
2933 }
2934
2935 return true;
2936}
2937
f273a453
LB
2938/*
2939 * Return the required default domain type for a specific device.
2940 *
2941 * @dev: the device in query
2942 * @startup: true if this is during early boot
2943 *
2944 * Returns:
2945 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2946 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2947 * - 0: both identity and dynamic domains work for this device
2948 */
2949static int device_def_domain_type(struct device *dev, int startup)
6941af28 2950{
3bdb2591
DW
2951 if (dev_is_pci(dev)) {
2952 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2953
c875d2c1 2954 if (device_is_rmrr_locked(dev))
f273a453 2955 return IOMMU_DOMAIN_DMA;
e0fc7e0b 2956
89a6079d
LB
2957 /*
2958 * Prevent any device marked as untrusted from getting
2959 * placed into the statically identity mapping domain.
2960 */
2961 if (pdev->untrusted)
f273a453 2962 return IOMMU_DOMAIN_DMA;
89a6079d 2963
3bdb2591 2964 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 2965 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 2966
3bdb2591 2967 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 2968 return IOMMU_DOMAIN_IDENTITY;
3bdb2591
DW
2969
2970 /*
2971 * We want to start off with all devices in the 1:1 domain, and
2972 * take them out later if we find they can't access all of memory.
2973 *
2974 * However, we can't do this for PCI devices behind bridges,
2975 * because all PCI devices behind the same bridge will end up
2976 * with the same source-id on their transactions.
2977 *
2978 * Practically speaking, we can't change things around for these
2979 * devices at run-time, because we can't be sure there'll be no
2980 * DMA transactions in flight for any of their siblings.
2981 *
2982 * So PCI devices (unless they're on the root bus) as well as
2983 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2984 * the 1:1 domain, just in _case_ one of their siblings turns out
2985 * not to be able to map all of memory.
2986 */
2987 if (!pci_is_pcie(pdev)) {
2988 if (!pci_is_root_bus(pdev->bus))
f273a453 2989 return IOMMU_DOMAIN_DMA;
3bdb2591 2990 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
f273a453 2991 return IOMMU_DOMAIN_DMA;
3bdb2591 2992 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
f273a453 2993 return IOMMU_DOMAIN_DMA;
3bdb2591
DW
2994 } else {
2995 if (device_has_rmrr(dev))
f273a453 2996 return IOMMU_DOMAIN_DMA;
3bdb2591 2997 }
3dfc813d 2998
f273a453
LB
2999 return (iommu_identity_mapping & IDENTMAP_ALL) ?
3000 IOMMU_DOMAIN_IDENTITY : 0;
3001}
3002
3003static inline int iommu_should_identity_map(struct device *dev, int startup)
3004{
3005 return device_def_domain_type(dev, startup) == IOMMU_DOMAIN_IDENTITY;
6941af28
DW
3006}
3007
cf04eee8
DW
3008static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
3009{
3010 int ret;
3011
3012 if (!iommu_should_identity_map(dev, 1))
3013 return 0;
3014
28ccce0d 3015 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 3016 if (!ret)
932a6523
BH
3017 dev_info(dev, "%s identity mapping\n",
3018 hw ? "Hardware" : "Software");
cf04eee8
DW
3019 else if (ret == -ENODEV)
3020 /* device not associated with an iommu */
3021 ret = 0;
3022
3023 return ret;
3024}
3025
3026
071e1374 3027static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 3028{
2c2e2c38 3029 struct pci_dev *pdev = NULL;
cf04eee8
DW
3030 struct dmar_drhd_unit *drhd;
3031 struct intel_iommu *iommu;
3032 struct device *dev;
3033 int i;
3034 int ret = 0;
2c2e2c38 3035
2c2e2c38 3036 for_each_pci_dev(pdev) {
cf04eee8
DW
3037 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3038 if (ret)
3039 return ret;
3040 }
3041
3042 for_each_active_iommu(iommu, drhd)
3043 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3044 struct acpi_device_physical_node *pn;
3045 struct acpi_device *adev;
3046
3047 if (dev->bus != &acpi_bus_type)
3048 continue;
86080ccc 3049
cf04eee8
DW
3050 adev= to_acpi_device(dev);
3051 mutex_lock(&adev->physical_node_lock);
3052 list_for_each_entry(pn, &adev->physical_node_list, node) {
3053 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3054 if (ret)
3055 break;
eae460b6 3056 }
cf04eee8
DW
3057 mutex_unlock(&adev->physical_node_lock);
3058 if (ret)
3059 return ret;
62edf5dc 3060 }
2c2e2c38
FY
3061
3062 return 0;
3063}
3064
ffebeb46
JL
3065static void intel_iommu_init_qi(struct intel_iommu *iommu)
3066{
3067 /*
3068 * Start from the sane iommu hardware state.
3069 * If the queued invalidation is already initialized by us
3070 * (for example, while enabling interrupt-remapping) then
3071 * we got the things already rolling from a sane state.
3072 */
3073 if (!iommu->qi) {
3074 /*
3075 * Clear any previous faults.
3076 */
3077 dmar_fault(-1, iommu);
3078 /*
3079 * Disable queued invalidation if supported and already enabled
3080 * before OS handover.
3081 */
3082 dmar_disable_qi(iommu);
3083 }
3084
3085 if (dmar_enable_qi(iommu)) {
3086 /*
3087 * Queued Invalidate not enabled, use Register Based Invalidate
3088 */
3089 iommu->flush.flush_context = __iommu_flush_context;
3090 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3091 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3092 iommu->name);
3093 } else {
3094 iommu->flush.flush_context = qi_flush_context;
3095 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3096 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3097 }
3098}
3099
091d42e4 3100static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3101 struct root_entry *old_re,
091d42e4
JR
3102 struct context_entry **tbl,
3103 int bus, bool ext)
3104{
dbcd861f 3105 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3106 struct context_entry *new_ce = NULL, ce;
dfddb969 3107 struct context_entry *old_ce = NULL;
543c8dcf 3108 struct root_entry re;
091d42e4
JR
3109 phys_addr_t old_ce_phys;
3110
3111 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3112 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3113
3114 for (devfn = 0; devfn < 256; devfn++) {
3115 /* First calculate the correct index */
3116 idx = (ext ? devfn * 2 : devfn) % 256;
3117
3118 if (idx == 0) {
3119 /* First save what we may have and clean up */
3120 if (new_ce) {
3121 tbl[tbl_idx] = new_ce;
3122 __iommu_flush_cache(iommu, new_ce,
3123 VTD_PAGE_SIZE);
3124 pos = 1;
3125 }
3126
3127 if (old_ce)
829383e1 3128 memunmap(old_ce);
091d42e4
JR
3129
3130 ret = 0;
3131 if (devfn < 0x80)
543c8dcf 3132 old_ce_phys = root_entry_lctp(&re);
091d42e4 3133 else
543c8dcf 3134 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3135
3136 if (!old_ce_phys) {
3137 if (ext && devfn == 0) {
3138 /* No LCTP, try UCTP */
3139 devfn = 0x7f;
3140 continue;
3141 } else {
3142 goto out;
3143 }
3144 }
3145
3146 ret = -ENOMEM;
dfddb969
DW
3147 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3148 MEMREMAP_WB);
091d42e4
JR
3149 if (!old_ce)
3150 goto out;
3151
3152 new_ce = alloc_pgtable_page(iommu->node);
3153 if (!new_ce)
3154 goto out_unmap;
3155
3156 ret = 0;
3157 }
3158
3159 /* Now copy the context entry */
dfddb969 3160 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3161
cf484d0e 3162 if (!__context_present(&ce))
091d42e4
JR
3163 continue;
3164
dbcd861f
JR
3165 did = context_domain_id(&ce);
3166 if (did >= 0 && did < cap_ndoms(iommu->cap))
3167 set_bit(did, iommu->domain_ids);
3168
cf484d0e
JR
3169 /*
3170 * We need a marker for copied context entries. This
3171 * marker needs to work for the old format as well as
3172 * for extended context entries.
3173 *
3174 * Bit 67 of the context entry is used. In the old
3175 * format this bit is available to software, in the
3176 * extended format it is the PGE bit, but PGE is ignored
3177 * by HW if PASIDs are disabled (and thus still
3178 * available).
3179 *
3180 * So disable PASIDs first and then mark the entry
3181 * copied. This means that we don't copy PASID
3182 * translations from the old kernel, but this is fine as
3183 * faults there are not fatal.
3184 */
3185 context_clear_pasid_enable(&ce);
3186 context_set_copied(&ce);
3187
091d42e4
JR
3188 new_ce[idx] = ce;
3189 }
3190
3191 tbl[tbl_idx + pos] = new_ce;
3192
3193 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3194
3195out_unmap:
dfddb969 3196 memunmap(old_ce);
091d42e4
JR
3197
3198out:
3199 return ret;
3200}
3201
3202static int copy_translation_tables(struct intel_iommu *iommu)
3203{
3204 struct context_entry **ctxt_tbls;
dfddb969 3205 struct root_entry *old_rt;
091d42e4
JR
3206 phys_addr_t old_rt_phys;
3207 int ctxt_table_entries;
3208 unsigned long flags;
3209 u64 rtaddr_reg;
3210 int bus, ret;
c3361f2f 3211 bool new_ext, ext;
091d42e4
JR
3212
3213 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3214 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3215 new_ext = !!ecap_ecs(iommu->ecap);
3216
3217 /*
3218 * The RTT bit can only be changed when translation is disabled,
3219 * but disabling translation means to open a window for data
3220 * corruption. So bail out and don't copy anything if we would
3221 * have to change the bit.
3222 */
3223 if (new_ext != ext)
3224 return -EINVAL;
091d42e4
JR
3225
3226 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3227 if (!old_rt_phys)
3228 return -EINVAL;
3229
dfddb969 3230 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3231 if (!old_rt)
3232 return -ENOMEM;
3233
3234 /* This is too big for the stack - allocate it from slab */
3235 ctxt_table_entries = ext ? 512 : 256;
3236 ret = -ENOMEM;
6396bb22 3237 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3238 if (!ctxt_tbls)
3239 goto out_unmap;
3240
3241 for (bus = 0; bus < 256; bus++) {
3242 ret = copy_context_table(iommu, &old_rt[bus],
3243 ctxt_tbls, bus, ext);
3244 if (ret) {
3245 pr_err("%s: Failed to copy context table for bus %d\n",
3246 iommu->name, bus);
3247 continue;
3248 }
3249 }
3250
3251 spin_lock_irqsave(&iommu->lock, flags);
3252
3253 /* Context tables are copied, now write them to the root_entry table */
3254 for (bus = 0; bus < 256; bus++) {
3255 int idx = ext ? bus * 2 : bus;
3256 u64 val;
3257
3258 if (ctxt_tbls[idx]) {
3259 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3260 iommu->root_entry[bus].lo = val;
3261 }
3262
3263 if (!ext || !ctxt_tbls[idx + 1])
3264 continue;
3265
3266 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3267 iommu->root_entry[bus].hi = val;
3268 }
3269
3270 spin_unlock_irqrestore(&iommu->lock, flags);
3271
3272 kfree(ctxt_tbls);
3273
3274 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3275
3276 ret = 0;
3277
3278out_unmap:
dfddb969 3279 memunmap(old_rt);
091d42e4
JR
3280
3281 return ret;
3282}
3283
b779260b 3284static int __init init_dmars(void)
ba395927
KA
3285{
3286 struct dmar_drhd_unit *drhd;
3287 struct dmar_rmrr_unit *rmrr;
a87f4918 3288 bool copied_tables = false;
832bd858 3289 struct device *dev;
ba395927 3290 struct intel_iommu *iommu;
13cf0174 3291 int i, ret;
2c2e2c38 3292
ba395927
KA
3293 /*
3294 * for each drhd
3295 * allocate root
3296 * initialize and program root entry to not present
3297 * endfor
3298 */
3299 for_each_drhd_unit(drhd) {
5e0d2a6f 3300 /*
3301 * lock not needed as this is only incremented in the single
3302 * threaded kernel __init code path all other access are read
3303 * only
3304 */
78d8e704 3305 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3306 g_num_of_iommus++;
3307 continue;
3308 }
9f10e5bf 3309 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3310 }
3311
ffebeb46
JL
3312 /* Preallocate enough resources for IOMMU hot-addition */
3313 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3314 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3315
d9630fe9
WH
3316 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3317 GFP_KERNEL);
3318 if (!g_iommus) {
9f10e5bf 3319 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3320 ret = -ENOMEM;
3321 goto error;
3322 }
3323
7c919779 3324 for_each_active_iommu(iommu, drhd) {
56283174
LB
3325 /*
3326 * Find the max pasid size of all IOMMU's in the system.
3327 * We need to ensure the system pasid table is no bigger
3328 * than the smallest supported.
3329 */
765b6a98 3330 if (pasid_supported(iommu)) {
56283174
LB
3331 u32 temp = 2 << ecap_pss(iommu->ecap);
3332
3333 intel_pasid_max_id = min_t(u32, temp,
3334 intel_pasid_max_id);
3335 }
3336
d9630fe9 3337 g_iommus[iommu->seq_id] = iommu;
ba395927 3338
b63d80d1
JR
3339 intel_iommu_init_qi(iommu);
3340
e61d98d8
SS
3341 ret = iommu_init_domains(iommu);
3342 if (ret)
989d51fc 3343 goto free_iommu;
e61d98d8 3344
4158c2ec
JR
3345 init_translation_status(iommu);
3346
091d42e4
JR
3347 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3348 iommu_disable_translation(iommu);
3349 clear_translation_pre_enabled(iommu);
3350 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3351 iommu->name);
3352 }
4158c2ec 3353
ba395927
KA
3354 /*
3355 * TBD:
3356 * we could share the same root & context tables
25985edc 3357 * among all IOMMU's. Need to Split it later.
ba395927
KA
3358 */
3359 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3360 if (ret)
989d51fc 3361 goto free_iommu;
5f0a7f76 3362
091d42e4
JR
3363 if (translation_pre_enabled(iommu)) {
3364 pr_info("Translation already enabled - trying to copy translation structures\n");
3365
3366 ret = copy_translation_tables(iommu);
3367 if (ret) {
3368 /*
3369 * We found the IOMMU with translation
3370 * enabled - but failed to copy over the
3371 * old root-entry table. Try to proceed
3372 * by disabling translation now and
3373 * allocating a clean root-entry table.
3374 * This might cause DMAR faults, but
3375 * probably the dump will still succeed.
3376 */
3377 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3378 iommu->name);
3379 iommu_disable_translation(iommu);
3380 clear_translation_pre_enabled(iommu);
3381 } else {
3382 pr_info("Copied translation tables from previous kernel for %s\n",
3383 iommu->name);
a87f4918 3384 copied_tables = true;
091d42e4
JR
3385 }
3386 }
3387
4ed0d3e6 3388 if (!ecap_pass_through(iommu->ecap))
19943b0e 3389 hw_pass_through = 0;
8a94ade4 3390#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3391 if (pasid_supported(iommu))
d9737953 3392 intel_svm_init(iommu);
8a94ade4 3393#endif
ba395927
KA
3394 }
3395
a4c34ff1
JR
3396 /*
3397 * Now that qi is enabled on all iommus, set the root entry and flush
3398 * caches. This is required on some Intel X58 chipsets, otherwise the
3399 * flush_context function will loop forever and the boot hangs.
3400 */
3401 for_each_active_iommu(iommu, drhd) {
3402 iommu_flush_write_buffer(iommu);
3403 iommu_set_root_entry(iommu);
3404 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3405 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3406 }
3407
19943b0e 3408 if (iommu_pass_through)
e0fc7e0b
DW
3409 iommu_identity_mapping |= IDENTMAP_ALL;
3410
d3f13810 3411#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3412 dmar_map_gfx = 0;
19943b0e 3413#endif
e0fc7e0b 3414
5daab580
LB
3415 if (!dmar_map_gfx)
3416 iommu_identity_mapping |= IDENTMAP_GFX;
3417
21e722c4
AR
3418 check_tylersburg_isoch();
3419
4de354ec
LB
3420 ret = si_domain_init(hw_pass_through);
3421 if (ret)
3422 goto free_iommu;
86080ccc 3423
e0fc7e0b 3424
a87f4918
JR
3425 /*
3426 * If we copied translations from a previous kernel in the kdump
3427 * case, we can not assign the devices to domains now, as that
3428 * would eliminate the old mappings. So skip this part and defer
3429 * the assignment to device driver initialization time.
3430 */
3431 if (copied_tables)
3432 goto domains_done;
3433
ba395927 3434 /*
19943b0e
DW
3435 * If pass through is not set or not enabled, setup context entries for
3436 * identity mappings for rmrr, gfx, and isa and may fall back to static
3437 * identity mapping if iommu_identity_mapping is set.
ba395927 3438 */
19943b0e
DW
3439 if (iommu_identity_mapping) {
3440 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3441 if (ret) {
9f10e5bf 3442 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3443 goto free_iommu;
ba395927
KA
3444 }
3445 }
ba395927 3446 /*
19943b0e
DW
3447 * For each rmrr
3448 * for each dev attached to rmrr
3449 * do
3450 * locate drhd for dev, alloc domain for dev
3451 * allocate free domain
3452 * allocate page table entries for rmrr
3453 * if context not allocated for bus
3454 * allocate and init context
3455 * set present in root table for this bus
3456 * init context with domain, translation etc
3457 * endfor
3458 * endfor
ba395927 3459 */
9f10e5bf 3460 pr_info("Setting RMRR:\n");
19943b0e 3461 for_each_rmrr_units(rmrr) {
b683b230
JL
3462 /* some BIOS lists non-exist devices in DMAR table. */
3463 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3464 i, dev) {
0b9d9753 3465 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3466 if (ret)
9f10e5bf 3467 pr_err("Mapping reserved region failed\n");
ba395927 3468 }
4ed0d3e6 3469 }
49a0429e 3470
19943b0e
DW
3471 iommu_prepare_isa();
3472
a87f4918
JR
3473domains_done:
3474
ba395927
KA
3475 /*
3476 * for each drhd
3477 * enable fault log
3478 * global invalidate context cache
3479 * global invalidate iotlb
3480 * enable translation
3481 */
7c919779 3482 for_each_iommu(iommu, drhd) {
51a63e67
JC
3483 if (drhd->ignored) {
3484 /*
3485 * we always have to disable PMRs or DMA may fail on
3486 * this device
3487 */
3488 if (force_on)
7c919779 3489 iommu_disable_protect_mem_regions(iommu);
ba395927 3490 continue;
51a63e67 3491 }
ba395927
KA
3492
3493 iommu_flush_write_buffer(iommu);
3494
a222a7f0 3495#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3496 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3497 /*
3498 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3499 * could cause possible lock race condition.
3500 */
3501 up_write(&dmar_global_lock);
a222a7f0 3502 ret = intel_svm_enable_prq(iommu);
a7755c3c 3503 down_write(&dmar_global_lock);
a222a7f0
DW
3504 if (ret)
3505 goto free_iommu;
3506 }
3507#endif
3460a6d9
KA
3508 ret = dmar_set_interrupt(iommu);
3509 if (ret)
989d51fc 3510 goto free_iommu;
ba395927
KA
3511 }
3512
3513 return 0;
989d51fc
JL
3514
3515free_iommu:
ffebeb46
JL
3516 for_each_active_iommu(iommu, drhd) {
3517 disable_dmar_iommu(iommu);
a868e6b7 3518 free_dmar_iommu(iommu);
ffebeb46 3519 }
13cf0174 3520
d9630fe9 3521 kfree(g_iommus);
13cf0174 3522
989d51fc 3523error:
ba395927
KA
3524 return ret;
3525}
3526
5a5e02a6 3527/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3528static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3529 struct dmar_domain *domain,
3530 unsigned long nrpages, uint64_t dma_mask)
ba395927 3531{
e083ea5b 3532 unsigned long iova_pfn;
ba395927 3533
875764de
DW
3534 /* Restrict dma_mask to the width that the iommu can handle */
3535 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3536 /* Ensure we reserve the whole size-aligned region */
3537 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3538
3539 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3540 /*
3541 * First try to allocate an io virtual address in
284901a9 3542 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3543 * from higher range
ba395927 3544 */
22e2f9fa 3545 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3546 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3547 if (iova_pfn)
3548 return iova_pfn;
875764de 3549 }
538d5b33
TN
3550 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3551 IOVA_PFN(dma_mask), true);
22e2f9fa 3552 if (unlikely(!iova_pfn)) {
932a6523 3553 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3554 return 0;
f76aec76
KA
3555 }
3556
22e2f9fa 3557 return iova_pfn;
f76aec76
KA
3558}
3559
4ec066c7 3560static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
f76aec76 3561{
1c5ebba9 3562 struct dmar_domain *domain, *tmp;
b1ce5b79 3563 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3564 struct device *i_dev;
3565 int i, ret;
f76aec76 3566
4ec066c7 3567 /* Device shouldn't be attached by any domains. */
1c5ebba9
JR
3568 domain = find_domain(dev);
3569 if (domain)
4ec066c7 3570 return NULL;
1c5ebba9
JR
3571
3572 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3573 if (!domain)
3574 goto out;
ba395927 3575
b1ce5b79
JR
3576 /* We have a new domain - setup possible RMRRs for the device */
3577 rcu_read_lock();
3578 for_each_rmrr_units(rmrr) {
3579 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3580 i, i_dev) {
3581 if (i_dev != dev)
3582 continue;
3583
3584 ret = domain_prepare_identity_map(dev, domain,
3585 rmrr->base_address,
3586 rmrr->end_address);
3587 if (ret)
3588 dev_err(dev, "Mapping reserved region failed\n");
3589 }
3590 }
3591 rcu_read_unlock();
3592
1c5ebba9
JR
3593 tmp = set_domain_for_dev(dev, domain);
3594 if (!tmp || domain != tmp) {
3595 domain_exit(domain);
3596 domain = tmp;
3597 }
3598
3599out:
1c5ebba9 3600 if (!domain)
932a6523 3601 dev_err(dev, "Allocating domain failed\n");
1c5ebba9 3602
f76aec76
KA
3603 return domain;
3604}
3605
ecb509ec 3606/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3607static bool iommu_need_mapping(struct device *dev)
2c2e2c38 3608{
98b2fffb 3609 int ret;
2c2e2c38 3610
3d89194a 3611 if (iommu_dummy(dev))
48b2c937 3612 return false;
1e4c64c4 3613
98b2fffb
LB
3614 ret = identity_mapping(dev);
3615 if (ret) {
3616 u64 dma_mask = *dev->dma_mask;
3617
3618 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3619 dma_mask = dev->coherent_dma_mask;
3620
3621 if (dma_mask >= dma_get_required_mask(dev))
48b2c937
CH
3622 return false;
3623
3624 /*
3625 * 32 bit DMA is removed from si_domain and fall back to
3626 * non-identity mapping.
3627 */
3628 dmar_remove_one_dev_info(dev);
98b2fffb
LB
3629 ret = iommu_request_dma_domain_for_dev(dev);
3630 if (ret) {
3631 struct iommu_domain *domain;
3632 struct dmar_domain *dmar_domain;
3633
3634 domain = iommu_get_domain_for_dev(dev);
3635 if (domain) {
3636 dmar_domain = to_dmar_domain(domain);
3637 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3638 }
4ec066c7 3639 get_private_domain_for_dev(dev);
2c2e2c38 3640 }
98b2fffb
LB
3641
3642 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3643 }
3644
48b2c937 3645 return true;
2c2e2c38
FY
3646}
3647
21d5d27c
LG
3648static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3649 size_t size, int dir, u64 dma_mask)
f76aec76 3650{
f76aec76 3651 struct dmar_domain *domain;
5b6985ce 3652 phys_addr_t start_paddr;
2aac6304 3653 unsigned long iova_pfn;
f76aec76 3654 int prot = 0;
6865f0d1 3655 int ret;
8c11e798 3656 struct intel_iommu *iommu;
33041ec0 3657 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3658
3659 BUG_ON(dir == DMA_NONE);
2c2e2c38 3660
4ec066c7 3661 domain = find_domain(dev);
f76aec76 3662 if (!domain)
524a669b 3663 return DMA_MAPPING_ERROR;
f76aec76 3664
8c11e798 3665 iommu = domain_get_iommu(domain);
88cb6a74 3666 size = aligned_nrpages(paddr, size);
f76aec76 3667
2aac6304
OP
3668 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3669 if (!iova_pfn)
f76aec76
KA
3670 goto error;
3671
ba395927
KA
3672 /*
3673 * Check if DMAR supports zero-length reads on write only
3674 * mappings..
3675 */
3676 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3677 !cap_zlr(iommu->cap))
ba395927
KA
3678 prot |= DMA_PTE_READ;
3679 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3680 prot |= DMA_PTE_WRITE;
3681 /*
6865f0d1 3682 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3683 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3684 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3685 * is not a big problem
3686 */
2aac6304 3687 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3688 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3689 if (ret)
3690 goto error;
3691
2aac6304 3692 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3693 start_paddr += paddr & ~PAGE_MASK;
3694 return start_paddr;
ba395927 3695
ba395927 3696error:
2aac6304 3697 if (iova_pfn)
22e2f9fa 3698 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3699 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3700 size, (unsigned long long)paddr, dir);
524a669b 3701 return DMA_MAPPING_ERROR;
ba395927
KA
3702}
3703
ffbbef5c
FT
3704static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3705 unsigned long offset, size_t size,
3706 enum dma_data_direction dir,
00085f1e 3707 unsigned long attrs)
bb9e6d65 3708{
9cc0c2af
CH
3709 if (iommu_need_mapping(dev))
3710 return __intel_map_single(dev, page_to_phys(page) + offset,
3711 size, dir, *dev->dma_mask);
3712 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3713}
3714
3715static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3716 size_t size, enum dma_data_direction dir,
3717 unsigned long attrs)
3718{
9cc0c2af
CH
3719 if (iommu_need_mapping(dev))
3720 return __intel_map_single(dev, phys_addr, size, dir,
3721 *dev->dma_mask);
3722 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3723}
3724
769530e4 3725static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3726{
f76aec76 3727 struct dmar_domain *domain;
d794dc9b 3728 unsigned long start_pfn, last_pfn;
769530e4 3729 unsigned long nrpages;
2aac6304 3730 unsigned long iova_pfn;
8c11e798 3731 struct intel_iommu *iommu;
ea8ea460 3732 struct page *freelist;
f7b0c4ce 3733 struct pci_dev *pdev = NULL;
ba395927 3734
1525a29a 3735 domain = find_domain(dev);
ba395927
KA
3736 BUG_ON(!domain);
3737
8c11e798
WH
3738 iommu = domain_get_iommu(domain);
3739
2aac6304 3740 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3741
769530e4 3742 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3743 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3744 last_pfn = start_pfn + nrpages - 1;
ba395927 3745
f7b0c4ce
LB
3746 if (dev_is_pci(dev))
3747 pdev = to_pci_dev(dev);
3748
932a6523 3749 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3750
ea8ea460 3751 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3752
f7b0c4ce 3753 if (intel_iommu_strict || (pdev && pdev->untrusted)) {
a1ddcbe9 3754 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3755 nrpages, !freelist, 0);
5e0d2a6f 3756 /* free iova */
22e2f9fa 3757 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3758 dma_free_pagelist(freelist);
5e0d2a6f 3759 } else {
13cf0174
JR
3760 queue_iova(&domain->iovad, iova_pfn, nrpages,
3761 (unsigned long)freelist);
5e0d2a6f 3762 /*
3763 * queue up the release of the unmap to save the 1/6th of the
3764 * cpu used up by the iotlb flush operation...
3765 */
5e0d2a6f 3766 }
ba395927
KA
3767}
3768
d41a4adb
JL
3769static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3770 size_t size, enum dma_data_direction dir,
00085f1e 3771 unsigned long attrs)
d41a4adb 3772{
9cc0c2af
CH
3773 if (iommu_need_mapping(dev))
3774 intel_unmap(dev, dev_addr, size);
3775 else
3776 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3777}
3778
3779static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3780 size_t size, enum dma_data_direction dir, unsigned long attrs)
3781{
3782 if (iommu_need_mapping(dev))
3783 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3784}
3785
5040a918 3786static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3787 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3788 unsigned long attrs)
ba395927 3789{
7ec916f8
CH
3790 struct page *page = NULL;
3791 int order;
ba395927 3792
9cc0c2af
CH
3793 if (!iommu_need_mapping(dev))
3794 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3795
7ec916f8
CH
3796 size = PAGE_ALIGN(size);
3797 order = get_order(size);
7ec916f8
CH
3798
3799 if (gfpflags_allow_blocking(flags)) {
3800 unsigned int count = size >> PAGE_SHIFT;
3801
d834c5ab
MS
3802 page = dma_alloc_from_contiguous(dev, count, order,
3803 flags & __GFP_NOWARN);
7ec916f8
CH
3804 }
3805
3806 if (!page)
3807 page = alloc_pages(flags, order);
3808 if (!page)
3809 return NULL;
3810 memset(page_address(page), 0, size);
3811
21d5d27c
LG
3812 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3813 DMA_BIDIRECTIONAL,
3814 dev->coherent_dma_mask);
524a669b 3815 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3816 return page_address(page);
3817 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3818 __free_pages(page, order);
36746436 3819
ba395927
KA
3820 return NULL;
3821}
3822
5040a918 3823static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3824 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3825{
7ec916f8
CH
3826 int order;
3827 struct page *page = virt_to_page(vaddr);
3828
9cc0c2af
CH
3829 if (!iommu_need_mapping(dev))
3830 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3831
7ec916f8
CH
3832 size = PAGE_ALIGN(size);
3833 order = get_order(size);
3834
3835 intel_unmap(dev, dma_handle, size);
3836 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3837 __free_pages(page, order);
ba395927
KA
3838}
3839
5040a918 3840static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3841 int nelems, enum dma_data_direction dir,
00085f1e 3842 unsigned long attrs)
ba395927 3843{
769530e4
OP
3844 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3845 unsigned long nrpages = 0;
3846 struct scatterlist *sg;
3847 int i;
3848
9cc0c2af
CH
3849 if (!iommu_need_mapping(dev))
3850 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3851
769530e4
OP
3852 for_each_sg(sglist, sg, nelems, i) {
3853 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3854 }
3855
3856 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3857}
3858
5040a918 3859static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3860 enum dma_data_direction dir, unsigned long attrs)
ba395927 3861{
ba395927 3862 int i;
ba395927 3863 struct dmar_domain *domain;
f76aec76
KA
3864 size_t size = 0;
3865 int prot = 0;
2aac6304 3866 unsigned long iova_pfn;
f76aec76 3867 int ret;
c03ab37c 3868 struct scatterlist *sg;
b536d24d 3869 unsigned long start_vpfn;
8c11e798 3870 struct intel_iommu *iommu;
ba395927
KA
3871
3872 BUG_ON(dir == DMA_NONE);
48b2c937 3873 if (!iommu_need_mapping(dev))
9cc0c2af 3874 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3875
4ec066c7 3876 domain = find_domain(dev);
f76aec76
KA
3877 if (!domain)
3878 return 0;
3879
8c11e798
WH
3880 iommu = domain_get_iommu(domain);
3881
b536d24d 3882 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3883 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3884
2aac6304 3885 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3886 *dev->dma_mask);
2aac6304 3887 if (!iova_pfn) {
c03ab37c 3888 sglist->dma_length = 0;
f76aec76
KA
3889 return 0;
3890 }
3891
3892 /*
3893 * Check if DMAR supports zero-length reads on write only
3894 * mappings..
3895 */
3896 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3897 !cap_zlr(iommu->cap))
f76aec76
KA
3898 prot |= DMA_PTE_READ;
3899 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3900 prot |= DMA_PTE_WRITE;
3901
2aac6304 3902 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3903
f532959b 3904 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3905 if (unlikely(ret)) {
e1605495 3906 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3907 start_vpfn + size - 1,
3908 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3909 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3910 return 0;
ba395927
KA
3911 }
3912
ba395927
KA
3913 return nelems;
3914}
3915
02b4da5f 3916static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3917 .alloc = intel_alloc_coherent,
3918 .free = intel_free_coherent,
ba395927
KA
3919 .map_sg = intel_map_sg,
3920 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3921 .map_page = intel_map_page,
3922 .unmap_page = intel_unmap_page,
21d5d27c 3923 .map_resource = intel_map_resource,
9cc0c2af 3924 .unmap_resource = intel_unmap_resource,
fec777c3 3925 .dma_supported = dma_direct_supported,
ba395927
KA
3926};
3927
3928static inline int iommu_domain_cache_init(void)
3929{
3930 int ret = 0;
3931
3932 iommu_domain_cache = kmem_cache_create("iommu_domain",
3933 sizeof(struct dmar_domain),
3934 0,
3935 SLAB_HWCACHE_ALIGN,
3936
3937 NULL);
3938 if (!iommu_domain_cache) {
9f10e5bf 3939 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3940 ret = -ENOMEM;
3941 }
3942
3943 return ret;
3944}
3945
3946static inline int iommu_devinfo_cache_init(void)
3947{
3948 int ret = 0;
3949
3950 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3951 sizeof(struct device_domain_info),
3952 0,
3953 SLAB_HWCACHE_ALIGN,
ba395927
KA
3954 NULL);
3955 if (!iommu_devinfo_cache) {
9f10e5bf 3956 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3957 ret = -ENOMEM;
3958 }
3959
3960 return ret;
3961}
3962
ba395927
KA
3963static int __init iommu_init_mempool(void)
3964{
3965 int ret;
ae1ff3d6 3966 ret = iova_cache_get();
ba395927
KA
3967 if (ret)
3968 return ret;
3969
3970 ret = iommu_domain_cache_init();
3971 if (ret)
3972 goto domain_error;
3973
3974 ret = iommu_devinfo_cache_init();
3975 if (!ret)
3976 return ret;
3977
3978 kmem_cache_destroy(iommu_domain_cache);
3979domain_error:
ae1ff3d6 3980 iova_cache_put();
ba395927
KA
3981
3982 return -ENOMEM;
3983}
3984
3985static void __init iommu_exit_mempool(void)
3986{
3987 kmem_cache_destroy(iommu_devinfo_cache);
3988 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3989 iova_cache_put();
ba395927
KA
3990}
3991
556ab45f
DW
3992static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3993{
3994 struct dmar_drhd_unit *drhd;
3995 u32 vtbar;
3996 int rc;
3997
3998 /* We know that this device on this chipset has its own IOMMU.
3999 * If we find it under a different IOMMU, then the BIOS is lying
4000 * to us. Hope that the IOMMU for this device is actually
4001 * disabled, and it needs no translation...
4002 */
4003 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4004 if (rc) {
4005 /* "can't" happen */
4006 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4007 return;
4008 }
4009 vtbar &= 0xffff0000;
4010
4011 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4012 drhd = dmar_find_matched_drhd_unit(pdev);
4013 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4014 TAINT_FIRMWARE_WORKAROUND,
4015 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4016 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4017}
4018DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4019
ba395927
KA
4020static void __init init_no_remapping_devices(void)
4021{
4022 struct dmar_drhd_unit *drhd;
832bd858 4023 struct device *dev;
b683b230 4024 int i;
ba395927
KA
4025
4026 for_each_drhd_unit(drhd) {
4027 if (!drhd->include_all) {
b683b230
JL
4028 for_each_active_dev_scope(drhd->devices,
4029 drhd->devices_cnt, i, dev)
4030 break;
832bd858 4031 /* ignore DMAR unit if no devices exist */
ba395927
KA
4032 if (i == drhd->devices_cnt)
4033 drhd->ignored = 1;
4034 }
4035 }
4036
7c919779 4037 for_each_active_drhd_unit(drhd) {
7c919779 4038 if (drhd->include_all)
ba395927
KA
4039 continue;
4040
b683b230
JL
4041 for_each_active_dev_scope(drhd->devices,
4042 drhd->devices_cnt, i, dev)
832bd858 4043 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4044 break;
ba395927
KA
4045 if (i < drhd->devices_cnt)
4046 continue;
4047
c0771df8
DW
4048 /* This IOMMU has *only* gfx devices. Either bypass it or
4049 set the gfx_mapped flag, as appropriate */
cf1ec453 4050 if (!dmar_map_gfx) {
c0771df8 4051 drhd->ignored = 1;
b683b230
JL
4052 for_each_active_dev_scope(drhd->devices,
4053 drhd->devices_cnt, i, dev)
832bd858 4054 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4055 }
4056 }
4057}
4058
f59c7b69
FY
4059#ifdef CONFIG_SUSPEND
4060static int init_iommu_hw(void)
4061{
4062 struct dmar_drhd_unit *drhd;
4063 struct intel_iommu *iommu = NULL;
4064
4065 for_each_active_iommu(iommu, drhd)
4066 if (iommu->qi)
4067 dmar_reenable_qi(iommu);
4068
b779260b
JC
4069 for_each_iommu(iommu, drhd) {
4070 if (drhd->ignored) {
4071 /*
4072 * we always have to disable PMRs or DMA may fail on
4073 * this device
4074 */
4075 if (force_on)
4076 iommu_disable_protect_mem_regions(iommu);
4077 continue;
4078 }
095303e0 4079
f59c7b69
FY
4080 iommu_flush_write_buffer(iommu);
4081
4082 iommu_set_root_entry(iommu);
4083
4084 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4085 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4086 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4087 iommu_enable_translation(iommu);
b94996c9 4088 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4089 }
4090
4091 return 0;
4092}
4093
4094static void iommu_flush_all(void)
4095{
4096 struct dmar_drhd_unit *drhd;
4097 struct intel_iommu *iommu;
4098
4099 for_each_active_iommu(iommu, drhd) {
4100 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4101 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4102 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4103 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4104 }
4105}
4106
134fac3f 4107static int iommu_suspend(void)
f59c7b69
FY
4108{
4109 struct dmar_drhd_unit *drhd;
4110 struct intel_iommu *iommu = NULL;
4111 unsigned long flag;
4112
4113 for_each_active_iommu(iommu, drhd) {
6396bb22 4114 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4115 GFP_ATOMIC);
4116 if (!iommu->iommu_state)
4117 goto nomem;
4118 }
4119
4120 iommu_flush_all();
4121
4122 for_each_active_iommu(iommu, drhd) {
4123 iommu_disable_translation(iommu);
4124
1f5b3c3f 4125 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4126
4127 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4128 readl(iommu->reg + DMAR_FECTL_REG);
4129 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4130 readl(iommu->reg + DMAR_FEDATA_REG);
4131 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4132 readl(iommu->reg + DMAR_FEADDR_REG);
4133 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4134 readl(iommu->reg + DMAR_FEUADDR_REG);
4135
1f5b3c3f 4136 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4137 }
4138 return 0;
4139
4140nomem:
4141 for_each_active_iommu(iommu, drhd)
4142 kfree(iommu->iommu_state);
4143
4144 return -ENOMEM;
4145}
4146
134fac3f 4147static void iommu_resume(void)
f59c7b69
FY
4148{
4149 struct dmar_drhd_unit *drhd;
4150 struct intel_iommu *iommu = NULL;
4151 unsigned long flag;
4152
4153 if (init_iommu_hw()) {
b779260b
JC
4154 if (force_on)
4155 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4156 else
4157 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4158 return;
f59c7b69
FY
4159 }
4160
4161 for_each_active_iommu(iommu, drhd) {
4162
1f5b3c3f 4163 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4164
4165 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4166 iommu->reg + DMAR_FECTL_REG);
4167 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4168 iommu->reg + DMAR_FEDATA_REG);
4169 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4170 iommu->reg + DMAR_FEADDR_REG);
4171 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4172 iommu->reg + DMAR_FEUADDR_REG);
4173
1f5b3c3f 4174 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4175 }
4176
4177 for_each_active_iommu(iommu, drhd)
4178 kfree(iommu->iommu_state);
f59c7b69
FY
4179}
4180
134fac3f 4181static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4182 .resume = iommu_resume,
4183 .suspend = iommu_suspend,
4184};
4185
134fac3f 4186static void __init init_iommu_pm_ops(void)
f59c7b69 4187{
134fac3f 4188 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4189}
4190
4191#else
99592ba4 4192static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4193#endif /* CONFIG_PM */
4194
318fe7df 4195
c2a0b538 4196int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4197{
4198 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4199 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4200 struct dmar_rmrr_unit *rmrru;
0659b8dc 4201 size_t length;
318fe7df
SS
4202
4203 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4204 if (!rmrru)
0659b8dc 4205 goto out;
318fe7df
SS
4206
4207 rmrru->hdr = header;
4208 rmrr = (struct acpi_dmar_reserved_memory *)header;
4209 rmrru->base_address = rmrr->base_address;
4210 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4211
4212 length = rmrr->end_address - rmrr->base_address + 1;
4213 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4214 IOMMU_RESV_DIRECT);
4215 if (!rmrru->resv)
4216 goto free_rmrru;
4217
2e455289
JL
4218 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4219 ((void *)rmrr) + rmrr->header.length,
4220 &rmrru->devices_cnt);
0659b8dc
EA
4221 if (rmrru->devices_cnt && rmrru->devices == NULL)
4222 goto free_all;
318fe7df 4223
2e455289 4224 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4225
2e455289 4226 return 0;
0659b8dc
EA
4227free_all:
4228 kfree(rmrru->resv);
4229free_rmrru:
4230 kfree(rmrru);
4231out:
4232 return -ENOMEM;
318fe7df
SS
4233}
4234
6b197249
JL
4235static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4236{
4237 struct dmar_atsr_unit *atsru;
4238 struct acpi_dmar_atsr *tmp;
4239
4240 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4241 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4242 if (atsr->segment != tmp->segment)
4243 continue;
4244 if (atsr->header.length != tmp->header.length)
4245 continue;
4246 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4247 return atsru;
4248 }
4249
4250 return NULL;
4251}
4252
4253int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4254{
4255 struct acpi_dmar_atsr *atsr;
4256 struct dmar_atsr_unit *atsru;
4257
b608fe35 4258 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4259 return 0;
4260
318fe7df 4261 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4262 atsru = dmar_find_atsr(atsr);
4263 if (atsru)
4264 return 0;
4265
4266 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4267 if (!atsru)
4268 return -ENOMEM;
4269
6b197249
JL
4270 /*
4271 * If memory is allocated from slab by ACPI _DSM method, we need to
4272 * copy the memory content because the memory buffer will be freed
4273 * on return.
4274 */
4275 atsru->hdr = (void *)(atsru + 1);
4276 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4277 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4278 if (!atsru->include_all) {
4279 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4280 (void *)atsr + atsr->header.length,
4281 &atsru->devices_cnt);
4282 if (atsru->devices_cnt && atsru->devices == NULL) {
4283 kfree(atsru);
4284 return -ENOMEM;
4285 }
4286 }
318fe7df 4287
0e242612 4288 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4289
4290 return 0;
4291}
4292
9bdc531e
JL
4293static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4294{
4295 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4296 kfree(atsru);
4297}
4298
6b197249
JL
4299int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4300{
4301 struct acpi_dmar_atsr *atsr;
4302 struct dmar_atsr_unit *atsru;
4303
4304 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4305 atsru = dmar_find_atsr(atsr);
4306 if (atsru) {
4307 list_del_rcu(&atsru->list);
4308 synchronize_rcu();
4309 intel_iommu_free_atsr(atsru);
4310 }
4311
4312 return 0;
4313}
4314
4315int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4316{
4317 int i;
4318 struct device *dev;
4319 struct acpi_dmar_atsr *atsr;
4320 struct dmar_atsr_unit *atsru;
4321
4322 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4323 atsru = dmar_find_atsr(atsr);
4324 if (!atsru)
4325 return 0;
4326
194dc870 4327 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4328 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4329 i, dev)
4330 return -EBUSY;
194dc870 4331 }
6b197249
JL
4332
4333 return 0;
4334}
4335
ffebeb46
JL
4336static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4337{
e083ea5b 4338 int sp, ret;
ffebeb46
JL
4339 struct intel_iommu *iommu = dmaru->iommu;
4340
4341 if (g_iommus[iommu->seq_id])
4342 return 0;
4343
4344 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4345 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4346 iommu->name);
4347 return -ENXIO;
4348 }
4349 if (!ecap_sc_support(iommu->ecap) &&
4350 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4351 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4352 iommu->name);
4353 return -ENXIO;
4354 }
4355 sp = domain_update_iommu_superpage(iommu) - 1;
4356 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4357 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4358 iommu->name);
4359 return -ENXIO;
4360 }
4361
4362 /*
4363 * Disable translation if already enabled prior to OS handover.
4364 */
4365 if (iommu->gcmd & DMA_GCMD_TE)
4366 iommu_disable_translation(iommu);
4367
4368 g_iommus[iommu->seq_id] = iommu;
4369 ret = iommu_init_domains(iommu);
4370 if (ret == 0)
4371 ret = iommu_alloc_root_entry(iommu);
4372 if (ret)
4373 goto out;
4374
8a94ade4 4375#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4376 if (pasid_supported(iommu))
d9737953 4377 intel_svm_init(iommu);
8a94ade4
DW
4378#endif
4379
ffebeb46
JL
4380 if (dmaru->ignored) {
4381 /*
4382 * we always have to disable PMRs or DMA may fail on this device
4383 */
4384 if (force_on)
4385 iommu_disable_protect_mem_regions(iommu);
4386 return 0;
4387 }
4388
4389 intel_iommu_init_qi(iommu);
4390 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4391
4392#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4393 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4394 ret = intel_svm_enable_prq(iommu);
4395 if (ret)
4396 goto disable_iommu;
4397 }
4398#endif
ffebeb46
JL
4399 ret = dmar_set_interrupt(iommu);
4400 if (ret)
4401 goto disable_iommu;
4402
4403 iommu_set_root_entry(iommu);
4404 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4405 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4406 iommu_enable_translation(iommu);
4407
ffebeb46
JL
4408 iommu_disable_protect_mem_regions(iommu);
4409 return 0;
4410
4411disable_iommu:
4412 disable_dmar_iommu(iommu);
4413out:
4414 free_dmar_iommu(iommu);
4415 return ret;
4416}
4417
6b197249
JL
4418int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4419{
ffebeb46
JL
4420 int ret = 0;
4421 struct intel_iommu *iommu = dmaru->iommu;
4422
4423 if (!intel_iommu_enabled)
4424 return 0;
4425 if (iommu == NULL)
4426 return -EINVAL;
4427
4428 if (insert) {
4429 ret = intel_iommu_add(dmaru);
4430 } else {
4431 disable_dmar_iommu(iommu);
4432 free_dmar_iommu(iommu);
4433 }
4434
4435 return ret;
6b197249
JL
4436}
4437
9bdc531e
JL
4438static void intel_iommu_free_dmars(void)
4439{
4440 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4441 struct dmar_atsr_unit *atsru, *atsr_n;
4442
4443 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4444 list_del(&rmrru->list);
4445 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4446 kfree(rmrru->resv);
9bdc531e 4447 kfree(rmrru);
318fe7df
SS
4448 }
4449
9bdc531e
JL
4450 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4451 list_del(&atsru->list);
4452 intel_iommu_free_atsr(atsru);
4453 }
318fe7df
SS
4454}
4455
4456int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4457{
b683b230 4458 int i, ret = 1;
318fe7df 4459 struct pci_bus *bus;
832bd858
DW
4460 struct pci_dev *bridge = NULL;
4461 struct device *tmp;
318fe7df
SS
4462 struct acpi_dmar_atsr *atsr;
4463 struct dmar_atsr_unit *atsru;
4464
4465 dev = pci_physfn(dev);
318fe7df 4466 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4467 bridge = bus->self;
d14053b3
DW
4468 /* If it's an integrated device, allow ATS */
4469 if (!bridge)
4470 return 1;
4471 /* Connected via non-PCIe: no ATS */
4472 if (!pci_is_pcie(bridge) ||
62f87c0e 4473 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4474 return 0;
d14053b3 4475 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4476 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4477 break;
318fe7df
SS
4478 }
4479
0e242612 4480 rcu_read_lock();
b5f82ddf
JL
4481 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4482 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4483 if (atsr->segment != pci_domain_nr(dev->bus))
4484 continue;
4485
b683b230 4486 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4487 if (tmp == &bridge->dev)
b683b230 4488 goto out;
b5f82ddf
JL
4489
4490 if (atsru->include_all)
b683b230 4491 goto out;
b5f82ddf 4492 }
b683b230
JL
4493 ret = 0;
4494out:
0e242612 4495 rcu_read_unlock();
318fe7df 4496
b683b230 4497 return ret;
318fe7df
SS
4498}
4499
59ce0515
JL
4500int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4501{
e083ea5b 4502 int ret;
59ce0515
JL
4503 struct dmar_rmrr_unit *rmrru;
4504 struct dmar_atsr_unit *atsru;
4505 struct acpi_dmar_atsr *atsr;
4506 struct acpi_dmar_reserved_memory *rmrr;
4507
b608fe35 4508 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4509 return 0;
4510
4511 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4512 rmrr = container_of(rmrru->hdr,
4513 struct acpi_dmar_reserved_memory, header);
4514 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4515 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4516 ((void *)rmrr) + rmrr->header.length,
4517 rmrr->segment, rmrru->devices,
4518 rmrru->devices_cnt);
e083ea5b 4519 if (ret < 0)
59ce0515 4520 return ret;
e6a8c9b3 4521 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4522 dmar_remove_dev_scope(info, rmrr->segment,
4523 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4524 }
4525 }
4526
4527 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4528 if (atsru->include_all)
4529 continue;
4530
4531 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4532 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4533 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4534 (void *)atsr + atsr->header.length,
4535 atsr->segment, atsru->devices,
4536 atsru->devices_cnt);
4537 if (ret > 0)
4538 break;
e083ea5b 4539 else if (ret < 0)
59ce0515 4540 return ret;
e6a8c9b3 4541 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4542 if (dmar_remove_dev_scope(info, atsr->segment,
4543 atsru->devices, atsru->devices_cnt))
4544 break;
4545 }
4546 }
4547
4548 return 0;
4549}
4550
99dcaded
FY
4551/*
4552 * Here we only respond to action of unbound device from driver.
4553 *
4554 * Added device is not attached to its DMAR domain here yet. That will happen
4555 * when mapping the device to iova.
4556 */
4557static int device_notifier(struct notifier_block *nb,
4558 unsigned long action, void *data)
4559{
4560 struct device *dev = data;
99dcaded
FY
4561 struct dmar_domain *domain;
4562
3d89194a 4563 if (iommu_dummy(dev))
44cd613c
DW
4564 return 0;
4565
117266fd
LB
4566 if (action == BUS_NOTIFY_REMOVED_DEVICE) {
4567 domain = find_domain(dev);
4568 if (!domain)
4569 return 0;
99dcaded 4570
117266fd 4571 dmar_remove_one_dev_info(dev);
117266fd
LB
4572 } else if (action == BUS_NOTIFY_ADD_DEVICE) {
4573 if (iommu_should_identity_map(dev, 1))
4574 domain_add_dev_info(si_domain, dev);
4575 }
a97590e5 4576
99dcaded
FY
4577 return 0;
4578}
4579
4580static struct notifier_block device_nb = {
4581 .notifier_call = device_notifier,
4582};
4583
75f05569
JL
4584static int intel_iommu_memory_notifier(struct notifier_block *nb,
4585 unsigned long val, void *v)
4586{
4587 struct memory_notify *mhp = v;
4588 unsigned long long start, end;
4589 unsigned long start_vpfn, last_vpfn;
4590
4591 switch (val) {
4592 case MEM_GOING_ONLINE:
4593 start = mhp->start_pfn << PAGE_SHIFT;
4594 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4595 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4596 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4597 start, end);
4598 return NOTIFY_BAD;
4599 }
4600 break;
4601
4602 case MEM_OFFLINE:
4603 case MEM_CANCEL_ONLINE:
4604 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4605 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4606 while (start_vpfn <= last_vpfn) {
4607 struct iova *iova;
4608 struct dmar_drhd_unit *drhd;
4609 struct intel_iommu *iommu;
ea8ea460 4610 struct page *freelist;
75f05569
JL
4611
4612 iova = find_iova(&si_domain->iovad, start_vpfn);
4613 if (iova == NULL) {
9f10e5bf 4614 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4615 start_vpfn);
4616 break;
4617 }
4618
4619 iova = split_and_remove_iova(&si_domain->iovad, iova,
4620 start_vpfn, last_vpfn);
4621 if (iova == NULL) {
9f10e5bf 4622 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4623 start_vpfn, last_vpfn);
4624 return NOTIFY_BAD;
4625 }
4626
ea8ea460
DW
4627 freelist = domain_unmap(si_domain, iova->pfn_lo,
4628 iova->pfn_hi);
4629
75f05569
JL
4630 rcu_read_lock();
4631 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4632 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4633 iova->pfn_lo, iova_size(iova),
ea8ea460 4634 !freelist, 0);
75f05569 4635 rcu_read_unlock();
ea8ea460 4636 dma_free_pagelist(freelist);
75f05569
JL
4637
4638 start_vpfn = iova->pfn_hi + 1;
4639 free_iova_mem(iova);
4640 }
4641 break;
4642 }
4643
4644 return NOTIFY_OK;
4645}
4646
4647static struct notifier_block intel_iommu_memory_nb = {
4648 .notifier_call = intel_iommu_memory_notifier,
4649 .priority = 0
4650};
4651
22e2f9fa
OP
4652static void free_all_cpu_cached_iovas(unsigned int cpu)
4653{
4654 int i;
4655
4656 for (i = 0; i < g_num_of_iommus; i++) {
4657 struct intel_iommu *iommu = g_iommus[i];
4658 struct dmar_domain *domain;
0caa7616 4659 int did;
22e2f9fa
OP
4660
4661 if (!iommu)
4662 continue;
4663
3bd4f911 4664 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4665 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4666
4667 if (!domain)
4668 continue;
4669 free_cpu_cached_iovas(cpu, &domain->iovad);
4670 }
4671 }
4672}
4673
21647615 4674static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4675{
21647615 4676 free_all_cpu_cached_iovas(cpu);
21647615 4677 return 0;
aa473240
OP
4678}
4679
161b28aa
JR
4680static void intel_disable_iommus(void)
4681{
4682 struct intel_iommu *iommu = NULL;
4683 struct dmar_drhd_unit *drhd;
4684
4685 for_each_iommu(iommu, drhd)
4686 iommu_disable_translation(iommu);
4687}
4688
a7fdb6e6
JR
4689static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4690{
2926a2aa
JR
4691 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4692
4693 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4694}
4695
a5459cfe
AW
4696static ssize_t intel_iommu_show_version(struct device *dev,
4697 struct device_attribute *attr,
4698 char *buf)
4699{
a7fdb6e6 4700 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4701 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4702 return sprintf(buf, "%d:%d\n",
4703 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4704}
4705static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4706
4707static ssize_t intel_iommu_show_address(struct device *dev,
4708 struct device_attribute *attr,
4709 char *buf)
4710{
a7fdb6e6 4711 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4712 return sprintf(buf, "%llx\n", iommu->reg_phys);
4713}
4714static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4715
4716static ssize_t intel_iommu_show_cap(struct device *dev,
4717 struct device_attribute *attr,
4718 char *buf)
4719{
a7fdb6e6 4720 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4721 return sprintf(buf, "%llx\n", iommu->cap);
4722}
4723static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4724
4725static ssize_t intel_iommu_show_ecap(struct device *dev,
4726 struct device_attribute *attr,
4727 char *buf)
4728{
a7fdb6e6 4729 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4730 return sprintf(buf, "%llx\n", iommu->ecap);
4731}
4732static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4733
2238c082
AW
4734static ssize_t intel_iommu_show_ndoms(struct device *dev,
4735 struct device_attribute *attr,
4736 char *buf)
4737{
a7fdb6e6 4738 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4739 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4740}
4741static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4742
4743static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4744 struct device_attribute *attr,
4745 char *buf)
4746{
a7fdb6e6 4747 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4748 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4749 cap_ndoms(iommu->cap)));
4750}
4751static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4752
a5459cfe
AW
4753static struct attribute *intel_iommu_attrs[] = {
4754 &dev_attr_version.attr,
4755 &dev_attr_address.attr,
4756 &dev_attr_cap.attr,
4757 &dev_attr_ecap.attr,
2238c082
AW
4758 &dev_attr_domains_supported.attr,
4759 &dev_attr_domains_used.attr,
a5459cfe
AW
4760 NULL,
4761};
4762
4763static struct attribute_group intel_iommu_group = {
4764 .name = "intel-iommu",
4765 .attrs = intel_iommu_attrs,
4766};
4767
4768const struct attribute_group *intel_iommu_groups[] = {
4769 &intel_iommu_group,
4770 NULL,
4771};
4772
89a6079d
LB
4773static int __init platform_optin_force_iommu(void)
4774{
4775 struct pci_dev *pdev = NULL;
4776 bool has_untrusted_dev = false;
4777
4778 if (!dmar_platform_optin() || no_platform_optin)
4779 return 0;
4780
4781 for_each_pci_dev(pdev) {
4782 if (pdev->untrusted) {
4783 has_untrusted_dev = true;
4784 break;
4785 }
4786 }
4787
4788 if (!has_untrusted_dev)
4789 return 0;
4790
4791 if (no_iommu || dmar_disabled)
4792 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4793
4794 /*
4795 * If Intel-IOMMU is disabled by default, we will apply identity
4796 * map for all devices except those marked as being untrusted.
4797 */
4798 if (dmar_disabled)
4799 iommu_identity_mapping |= IDENTMAP_ALL;
4800
4801 dmar_disabled = 0;
4802#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4803 swiotlb = 0;
4804#endif
4805 no_iommu = 0;
4806
4807 return 1;
4808}
4809
fa212a97
LB
4810static int __init probe_acpi_namespace_devices(void)
4811{
4812 struct dmar_drhd_unit *drhd;
4813 struct intel_iommu *iommu;
4814 struct device *dev;
4815 int i, ret = 0;
4816
4817 for_each_active_iommu(iommu, drhd) {
4818 for_each_active_dev_scope(drhd->devices,
4819 drhd->devices_cnt, i, dev) {
4820 struct acpi_device_physical_node *pn;
4821 struct iommu_group *group;
4822 struct acpi_device *adev;
4823
4824 if (dev->bus != &acpi_bus_type)
4825 continue;
4826
4827 adev = to_acpi_device(dev);
4828 mutex_lock(&adev->physical_node_lock);
4829 list_for_each_entry(pn,
4830 &adev->physical_node_list, node) {
4831 group = iommu_group_get(pn->dev);
4832 if (group) {
4833 iommu_group_put(group);
4834 continue;
4835 }
4836
4837 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4838 ret = iommu_probe_device(pn->dev);
4839 if (ret)
4840 break;
4841 }
4842 mutex_unlock(&adev->physical_node_lock);
4843
4844 if (ret)
4845 return ret;
4846 }
4847 }
4848
4849 return 0;
4850}
4851
ba395927
KA
4852int __init intel_iommu_init(void)
4853{
9bdc531e 4854 int ret = -ENODEV;
3a93c841 4855 struct dmar_drhd_unit *drhd;
7c919779 4856 struct intel_iommu *iommu;
ba395927 4857
89a6079d
LB
4858 /*
4859 * Intel IOMMU is required for a TXT/tboot launch or platform
4860 * opt in, so enforce that.
4861 */
4862 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4863
3a5670e8
JL
4864 if (iommu_init_mempool()) {
4865 if (force_on)
4866 panic("tboot: Failed to initialize iommu memory\n");
4867 return -ENOMEM;
4868 }
4869
4870 down_write(&dmar_global_lock);
a59b50e9
JC
4871 if (dmar_table_init()) {
4872 if (force_on)
4873 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4874 goto out_free_dmar;
a59b50e9 4875 }
ba395927 4876
c2c7286a 4877 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4878 if (force_on)
4879 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4880 goto out_free_dmar;
a59b50e9 4881 }
1886e8a9 4882
ec154bf5
JR
4883 up_write(&dmar_global_lock);
4884
4885 /*
4886 * The bus notifier takes the dmar_global_lock, so lockdep will
4887 * complain later when we register it under the lock.
4888 */
4889 dmar_register_bus_notifier();
4890
4891 down_write(&dmar_global_lock);
4892
161b28aa 4893 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4894 /*
4895 * We exit the function here to ensure IOMMU's remapping and
4896 * mempool aren't setup, which means that the IOMMU's PMRs
4897 * won't be disabled via the call to init_dmars(). So disable
4898 * it explicitly here. The PMRs were setup by tboot prior to
4899 * calling SENTER, but the kernel is expected to reset/tear
4900 * down the PMRs.
4901 */
4902 if (intel_iommu_tboot_noforce) {
4903 for_each_iommu(iommu, drhd)
4904 iommu_disable_protect_mem_regions(iommu);
4905 }
4906
161b28aa
JR
4907 /*
4908 * Make sure the IOMMUs are switched off, even when we
4909 * boot into a kexec kernel and the previous kernel left
4910 * them enabled
4911 */
4912 intel_disable_iommus();
9bdc531e 4913 goto out_free_dmar;
161b28aa 4914 }
2ae21010 4915
318fe7df 4916 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4917 pr_info("No RMRR found\n");
318fe7df
SS
4918
4919 if (list_empty(&dmar_atsr_units))
9f10e5bf 4920 pr_info("No ATSR found\n");
318fe7df 4921
51a63e67
JC
4922 if (dmar_init_reserved_ranges()) {
4923 if (force_on)
4924 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4925 goto out_free_reserved_range;
51a63e67 4926 }
ba395927 4927
cf1ec453
LB
4928 if (dmar_map_gfx)
4929 intel_iommu_gfx_mapped = 1;
4930
ba395927
KA
4931 init_no_remapping_devices();
4932
b779260b 4933 ret = init_dmars();
ba395927 4934 if (ret) {
a59b50e9
JC
4935 if (force_on)
4936 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4937 pr_err("Initialization failed\n");
9bdc531e 4938 goto out_free_reserved_range;
ba395927 4939 }
3a5670e8 4940 up_write(&dmar_global_lock);
ba395927 4941
4fac8076 4942#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4943 swiotlb = 0;
4944#endif
19943b0e 4945 dma_ops = &intel_dma_ops;
4ed0d3e6 4946
134fac3f 4947 init_iommu_pm_ops();
a8bcbb0d 4948
39ab9555
JR
4949 for_each_active_iommu(iommu, drhd) {
4950 iommu_device_sysfs_add(&iommu->iommu, NULL,
4951 intel_iommu_groups,
4952 "%s", iommu->name);
4953 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4954 iommu_device_register(&iommu->iommu);
4955 }
a5459cfe 4956
4236d97d 4957 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4958 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4959 if (si_domain && !hw_pass_through)
4960 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4961 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4962 intel_iommu_cpu_dead);
d8190dc6 4963
fa212a97
LB
4964 if (probe_acpi_namespace_devices())
4965 pr_warn("ACPI name space devices didn't probe correctly\n");
4966
d8190dc6
LB
4967 /* Finally, we enable the DMA remapping hardware. */
4968 for_each_iommu(iommu, drhd) {
4969 if (!translation_pre_enabled(iommu))
4970 iommu_enable_translation(iommu);
4971
4972 iommu_disable_protect_mem_regions(iommu);
4973 }
4974 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4975
8bc1f85c 4976 intel_iommu_enabled = 1;
ee2636b8 4977 intel_iommu_debugfs_init();
8bc1f85c 4978
ba395927 4979 return 0;
9bdc531e
JL
4980
4981out_free_reserved_range:
4982 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4983out_free_dmar:
4984 intel_iommu_free_dmars();
3a5670e8
JL
4985 up_write(&dmar_global_lock);
4986 iommu_exit_mempool();
9bdc531e 4987 return ret;
ba395927 4988}
e820482c 4989
2452d9db 4990static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4991{
4992 struct intel_iommu *iommu = opaque;
4993
2452d9db 4994 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4995 return 0;
4996}
4997
4998/*
4999 * NB - intel-iommu lacks any sort of reference counting for the users of
5000 * dependent devices. If multiple endpoints have intersecting dependent
5001 * devices, unbinding the driver from any one of them will possibly leave
5002 * the others unable to operate.
5003 */
2452d9db 5004static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 5005{
0bcb3e28 5006 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
5007 return;
5008
2452d9db 5009 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
5010}
5011
127c7615 5012static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 5013{
942067f1 5014 struct dmar_domain *domain;
c7151a8d
WH
5015 struct intel_iommu *iommu;
5016 unsigned long flags;
c7151a8d 5017
55d94043
JR
5018 assert_spin_locked(&device_domain_lock);
5019
127c7615 5020 if (WARN_ON(!info))
c7151a8d
WH
5021 return;
5022
127c7615 5023 iommu = info->iommu;
942067f1 5024 domain = info->domain;
c7151a8d 5025
127c7615 5026 if (info->dev) {
ef848b7e
LB
5027 if (dev_is_pci(info->dev) && sm_supported(iommu))
5028 intel_pasid_tear_down_entry(iommu, info->dev,
5029 PASID_RID2PASID);
5030
127c7615
JR
5031 iommu_disable_dev_iotlb(info);
5032 domain_context_clear(iommu, info->dev);
a7fc93fe 5033 intel_pasid_free_table(info->dev);
127c7615 5034 }
c7151a8d 5035
b608ac3b 5036 unlink_domain_info(info);
c7151a8d 5037
d160aca5 5038 spin_lock_irqsave(&iommu->lock, flags);
942067f1 5039 domain_detach_iommu(domain, iommu);
d160aca5 5040 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 5041
942067f1
LB
5042 /* free the private domain */
5043 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
5044 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY))
5045 domain_exit(info->domain);
5046
127c7615 5047 free_devinfo_mem(info);
c7151a8d 5048}
c7151a8d 5049
71753239 5050static void dmar_remove_one_dev_info(struct device *dev)
55d94043 5051{
127c7615 5052 struct device_domain_info *info;
55d94043 5053 unsigned long flags;
3e7abe25 5054
55d94043 5055 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
5056 info = dev->archdata.iommu;
5057 __dmar_remove_one_dev_info(info);
55d94043 5058 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5059}
5060
2c2e2c38 5061static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
5062{
5063 int adjust_width;
5064
aa3ac946 5065 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
5066 domain_reserve_special_ranges(domain);
5067
5068 /* calculate AGAW */
5069 domain->gaw = guest_width;
5070 adjust_width = guestwidth_to_adjustwidth(guest_width);
5071 domain->agaw = width_to_agaw(adjust_width);
5072
5e98c4b1 5073 domain->iommu_coherency = 0;
c5b15255 5074 domain->iommu_snooping = 0;
6dd9a7c7 5075 domain->iommu_superpage = 0;
fe40f1e0 5076 domain->max_addr = 0;
5e98c4b1
WH
5077
5078 /* always allocate the top pgd */
4c923d47 5079 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
5080 if (!domain->pgd)
5081 return -ENOMEM;
5082 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5083 return 0;
5084}
5085
00a77deb 5086static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5087{
5d450806 5088 struct dmar_domain *dmar_domain;
00a77deb
JR
5089 struct iommu_domain *domain;
5090
4de354ec 5091 switch (type) {
fa954e68
LB
5092 case IOMMU_DOMAIN_DMA:
5093 /* fallthrough */
4de354ec 5094 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 5095 dmar_domain = alloc_domain(0);
4de354ec
LB
5096 if (!dmar_domain) {
5097 pr_err("Can't allocate dmar_domain\n");
5098 return NULL;
5099 }
5100 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5101 pr_err("Domain initialization failed\n");
5102 domain_exit(dmar_domain);
5103 return NULL;
5104 }
fa954e68
LB
5105
5106 if (type == IOMMU_DOMAIN_DMA &&
5107 init_iova_flush_queue(&dmar_domain->iovad,
5108 iommu_flush_iova, iova_entry_free)) {
5109 pr_warn("iova flush queue initialization failed\n");
5110 intel_iommu_strict = 1;
5111 }
5112
4de354ec 5113 domain_update_iommu_cap(dmar_domain);
38717946 5114
4de354ec
LB
5115 domain = &dmar_domain->domain;
5116 domain->geometry.aperture_start = 0;
5117 domain->geometry.aperture_end =
5118 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5119 domain->geometry.force_aperture = true;
5120
5121 return domain;
5122 case IOMMU_DOMAIN_IDENTITY:
5123 return &si_domain->domain;
5124 default:
00a77deb 5125 return NULL;
38717946 5126 }
8a0e715b 5127
4de354ec 5128 return NULL;
38717946 5129}
38717946 5130
00a77deb 5131static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5132{
4de354ec
LB
5133 if (domain != &si_domain->domain)
5134 domain_exit(to_dmar_domain(domain));
38717946 5135}
38717946 5136
67b8e02b
LB
5137/*
5138 * Check whether a @domain could be attached to the @dev through the
5139 * aux-domain attach/detach APIs.
5140 */
5141static inline bool
5142is_aux_domain(struct device *dev, struct iommu_domain *domain)
5143{
5144 struct device_domain_info *info = dev->archdata.iommu;
5145
5146 return info && info->auxd_enabled &&
5147 domain->type == IOMMU_DOMAIN_UNMANAGED;
5148}
5149
5150static void auxiliary_link_device(struct dmar_domain *domain,
5151 struct device *dev)
5152{
5153 struct device_domain_info *info = dev->archdata.iommu;
5154
5155 assert_spin_locked(&device_domain_lock);
5156 if (WARN_ON(!info))
5157 return;
5158
5159 domain->auxd_refcnt++;
5160 list_add(&domain->auxd, &info->auxiliary_domains);
5161}
5162
5163static void auxiliary_unlink_device(struct dmar_domain *domain,
5164 struct device *dev)
5165{
5166 struct device_domain_info *info = dev->archdata.iommu;
5167
5168 assert_spin_locked(&device_domain_lock);
5169 if (WARN_ON(!info))
5170 return;
5171
5172 list_del(&domain->auxd);
5173 domain->auxd_refcnt--;
5174
5175 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5176 intel_pasid_free_id(domain->default_pasid);
5177}
5178
5179static int aux_domain_add_dev(struct dmar_domain *domain,
5180 struct device *dev)
5181{
5182 int ret;
5183 u8 bus, devfn;
5184 unsigned long flags;
5185 struct intel_iommu *iommu;
5186
5187 iommu = device_to_iommu(dev, &bus, &devfn);
5188 if (!iommu)
5189 return -ENODEV;
5190
5191 if (domain->default_pasid <= 0) {
5192 int pasid;
5193
5194 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5195 pci_max_pasids(to_pci_dev(dev)),
5196 GFP_KERNEL);
5197 if (pasid <= 0) {
5198 pr_err("Can't allocate default pasid\n");
5199 return -ENODEV;
5200 }
5201 domain->default_pasid = pasid;
5202 }
5203
5204 spin_lock_irqsave(&device_domain_lock, flags);
5205 /*
5206 * iommu->lock must be held to attach domain to iommu and setup the
5207 * pasid entry for second level translation.
5208 */
5209 spin_lock(&iommu->lock);
5210 ret = domain_attach_iommu(domain, iommu);
5211 if (ret)
5212 goto attach_failed;
5213
5214 /* Setup the PASID entry for mediated devices: */
5215 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5216 domain->default_pasid);
5217 if (ret)
5218 goto table_failed;
5219 spin_unlock(&iommu->lock);
5220
5221 auxiliary_link_device(domain, dev);
5222
5223 spin_unlock_irqrestore(&device_domain_lock, flags);
5224
5225 return 0;
5226
5227table_failed:
5228 domain_detach_iommu(domain, iommu);
5229attach_failed:
5230 spin_unlock(&iommu->lock);
5231 spin_unlock_irqrestore(&device_domain_lock, flags);
5232 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5233 intel_pasid_free_id(domain->default_pasid);
5234
5235 return ret;
5236}
5237
5238static void aux_domain_remove_dev(struct dmar_domain *domain,
5239 struct device *dev)
5240{
5241 struct device_domain_info *info;
5242 struct intel_iommu *iommu;
5243 unsigned long flags;
5244
5245 if (!is_aux_domain(dev, &domain->domain))
5246 return;
5247
5248 spin_lock_irqsave(&device_domain_lock, flags);
5249 info = dev->archdata.iommu;
5250 iommu = info->iommu;
5251
5252 auxiliary_unlink_device(domain, dev);
5253
5254 spin_lock(&iommu->lock);
5255 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5256 domain_detach_iommu(domain, iommu);
5257 spin_unlock(&iommu->lock);
5258
5259 spin_unlock_irqrestore(&device_domain_lock, flags);
5260}
5261
8cc3759a
LB
5262static int prepare_domain_attach_device(struct iommu_domain *domain,
5263 struct device *dev)
38717946 5264{
00a77deb 5265 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5266 struct intel_iommu *iommu;
5267 int addr_width;
156baca8 5268 u8 bus, devfn;
faa3d6f5 5269
156baca8 5270 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5271 if (!iommu)
5272 return -ENODEV;
5273
5274 /* check if this iommu agaw is sufficient for max mapped address */
5275 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5276 if (addr_width > cap_mgaw(iommu->cap))
5277 addr_width = cap_mgaw(iommu->cap);
5278
5279 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5280 dev_err(dev, "%s: iommu width (%d) is not "
5281 "sufficient for the mapped address (%llx)\n",
5282 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5283 return -EFAULT;
5284 }
a99c47a2
TL
5285 dmar_domain->gaw = addr_width;
5286
5287 /*
5288 * Knock out extra levels of page tables if necessary
5289 */
5290 while (iommu->agaw < dmar_domain->agaw) {
5291 struct dma_pte *pte;
5292
5293 pte = dmar_domain->pgd;
5294 if (dma_pte_present(pte)) {
25cbff16
SY
5295 dmar_domain->pgd = (struct dma_pte *)
5296 phys_to_virt(dma_pte_addr(pte));
7a661013 5297 free_pgtable_page(pte);
a99c47a2
TL
5298 }
5299 dmar_domain->agaw--;
5300 }
fe40f1e0 5301
8cc3759a
LB
5302 return 0;
5303}
5304
5305static int intel_iommu_attach_device(struct iommu_domain *domain,
5306 struct device *dev)
5307{
5308 int ret;
5309
5310 if (device_is_rmrr_locked(dev)) {
5311 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5312 return -EPERM;
5313 }
5314
67b8e02b
LB
5315 if (is_aux_domain(dev, domain))
5316 return -EPERM;
5317
8cc3759a
LB
5318 /* normally dev is not mapped */
5319 if (unlikely(domain_context_mapped(dev))) {
5320 struct dmar_domain *old_domain;
5321
5322 old_domain = find_domain(dev);
fa954e68 5323 if (old_domain)
8cc3759a 5324 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5325 }
5326
5327 ret = prepare_domain_attach_device(domain, dev);
5328 if (ret)
5329 return ret;
5330
5331 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5332}
38717946 5333
67b8e02b
LB
5334static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5335 struct device *dev)
5336{
5337 int ret;
5338
5339 if (!is_aux_domain(dev, domain))
5340 return -EPERM;
5341
5342 ret = prepare_domain_attach_device(domain, dev);
5343 if (ret)
5344 return ret;
5345
5346 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5347}
5348
4c5478c9
JR
5349static void intel_iommu_detach_device(struct iommu_domain *domain,
5350 struct device *dev)
38717946 5351{
71753239 5352 dmar_remove_one_dev_info(dev);
faa3d6f5 5353}
c7151a8d 5354
67b8e02b
LB
5355static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5356 struct device *dev)
5357{
5358 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5359}
5360
b146a1c9
JR
5361static int intel_iommu_map(struct iommu_domain *domain,
5362 unsigned long iova, phys_addr_t hpa,
5009065d 5363 size_t size, int iommu_prot)
faa3d6f5 5364{
00a77deb 5365 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5366 u64 max_addr;
dde57a21 5367 int prot = 0;
faa3d6f5 5368 int ret;
fe40f1e0 5369
942067f1
LB
5370 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5371 return -EINVAL;
5372
dde57a21
JR
5373 if (iommu_prot & IOMMU_READ)
5374 prot |= DMA_PTE_READ;
5375 if (iommu_prot & IOMMU_WRITE)
5376 prot |= DMA_PTE_WRITE;
9cf06697
SY
5377 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5378 prot |= DMA_PTE_SNP;
dde57a21 5379
163cc52c 5380 max_addr = iova + size;
dde57a21 5381 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5382 u64 end;
5383
5384 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5385 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5386 if (end < max_addr) {
9f10e5bf 5387 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5388 "sufficient for the mapped address (%llx)\n",
8954da1f 5389 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5390 return -EFAULT;
5391 }
dde57a21 5392 dmar_domain->max_addr = max_addr;
fe40f1e0 5393 }
ad051221
DW
5394 /* Round up size to next multiple of PAGE_SIZE, if it and
5395 the low bits of hpa would take us onto the next page */
88cb6a74 5396 size = aligned_nrpages(hpa, size);
ad051221
DW
5397 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5398 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5399 return ret;
38717946 5400}
38717946 5401
5009065d 5402static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5403 unsigned long iova, size_t size)
38717946 5404{
00a77deb 5405 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5406 struct page *freelist = NULL;
ea8ea460
DW
5407 unsigned long start_pfn, last_pfn;
5408 unsigned int npages;
42e8c186 5409 int iommu_id, level = 0;
5cf0a76f
DW
5410
5411 /* Cope with horrid API which requires us to unmap more than the
5412 size argument if it happens to be a large-page mapping. */
dc02e46e 5413 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
942067f1
LB
5414 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5415 return 0;
5cf0a76f
DW
5416
5417 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5418 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5419
ea8ea460
DW
5420 start_pfn = iova >> VTD_PAGE_SHIFT;
5421 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5422
5423 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5424
5425 npages = last_pfn - start_pfn + 1;
5426
f746a025 5427 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5428 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5429 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5430
5431 dma_free_pagelist(freelist);
fe40f1e0 5432
163cc52c
DW
5433 if (dmar_domain->max_addr == iova + size)
5434 dmar_domain->max_addr = iova;
b146a1c9 5435
5cf0a76f 5436 return size;
38717946 5437}
38717946 5438
d14d6577 5439static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5440 dma_addr_t iova)
38717946 5441{
00a77deb 5442 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5443 struct dma_pte *pte;
5cf0a76f 5444 int level = 0;
faa3d6f5 5445 u64 phys = 0;
38717946 5446
942067f1
LB
5447 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5448 return 0;
5449
5cf0a76f 5450 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5451 if (pte)
faa3d6f5 5452 phys = dma_pte_addr(pte);
38717946 5453
faa3d6f5 5454 return phys;
38717946 5455}
a8bcbb0d 5456
95587a75
LB
5457static inline bool scalable_mode_support(void)
5458{
5459 struct dmar_drhd_unit *drhd;
5460 struct intel_iommu *iommu;
5461 bool ret = true;
5462
5463 rcu_read_lock();
5464 for_each_active_iommu(iommu, drhd) {
5465 if (!sm_supported(iommu)) {
5466 ret = false;
5467 break;
5468 }
5469 }
5470 rcu_read_unlock();
5471
5472 return ret;
5473}
5474
5475static inline bool iommu_pasid_support(void)
5476{
5477 struct dmar_drhd_unit *drhd;
5478 struct intel_iommu *iommu;
5479 bool ret = true;
5480
5481 rcu_read_lock();
5482 for_each_active_iommu(iommu, drhd) {
5483 if (!pasid_supported(iommu)) {
5484 ret = false;
5485 break;
5486 }
5487 }
5488 rcu_read_unlock();
5489
5490 return ret;
5491}
5492
5d587b8d 5493static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5494{
dbb9fd86 5495 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5496 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5497 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5498 return irq_remapping_enabled == 1;
dbb9fd86 5499
5d587b8d 5500 return false;
dbb9fd86
SY
5501}
5502
abdfdde2
AW
5503static int intel_iommu_add_device(struct device *dev)
5504{
942067f1
LB
5505 struct dmar_domain *dmar_domain;
5506 struct iommu_domain *domain;
a5459cfe 5507 struct intel_iommu *iommu;
abdfdde2 5508 struct iommu_group *group;
156baca8 5509 u8 bus, devfn;
942067f1 5510 int ret;
70ae6f0d 5511
a5459cfe
AW
5512 iommu = device_to_iommu(dev, &bus, &devfn);
5513 if (!iommu)
70ae6f0d
AW
5514 return -ENODEV;
5515
e3d10af1 5516 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5517
8af46c78
LB
5518 if (translation_pre_enabled(iommu))
5519 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5520
e17f9ff4 5521 group = iommu_group_get_for_dev(dev);
783f157b 5522
e17f9ff4
AW
5523 if (IS_ERR(group))
5524 return PTR_ERR(group);
bcb71abe 5525
abdfdde2 5526 iommu_group_put(group);
942067f1
LB
5527
5528 domain = iommu_get_domain_for_dev(dev);
5529 dmar_domain = to_dmar_domain(domain);
5530 if (domain->type == IOMMU_DOMAIN_DMA) {
5531 if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_IDENTITY) {
5532 ret = iommu_request_dm_for_dev(dev);
5533 if (ret) {
5534 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5535 domain_add_dev_info(si_domain, dev);
5536 dev_info(dev,
5537 "Device uses a private identity domain.\n");
5538 return 0;
5539 }
5540
5541 return -ENODEV;
5542 }
5543 } else {
5544 if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_DMA) {
5545 ret = iommu_request_dma_domain_for_dev(dev);
5546 if (ret) {
5547 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
4ec066c7 5548 if (!get_private_domain_for_dev(dev)) {
942067f1
LB
5549 dev_warn(dev,
5550 "Failed to get a private domain.\n");
5551 return -ENOMEM;
5552 }
5553
5554 dev_info(dev,
5555 "Device uses a private dma domain.\n");
5556 return 0;
5557 }
5558
5559 return -ENODEV;
5560 }
5561 }
5562
e17f9ff4 5563 return 0;
abdfdde2 5564}
70ae6f0d 5565
abdfdde2
AW
5566static void intel_iommu_remove_device(struct device *dev)
5567{
a5459cfe
AW
5568 struct intel_iommu *iommu;
5569 u8 bus, devfn;
5570
5571 iommu = device_to_iommu(dev, &bus, &devfn);
5572 if (!iommu)
5573 return;
5574
abdfdde2 5575 iommu_group_remove_device(dev);
a5459cfe 5576
e3d10af1 5577 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5578}
5579
0659b8dc
EA
5580static void intel_iommu_get_resv_regions(struct device *device,
5581 struct list_head *head)
5582{
5583 struct iommu_resv_region *reg;
5584 struct dmar_rmrr_unit *rmrr;
5585 struct device *i_dev;
5586 int i;
5587
5588 rcu_read_lock();
5589 for_each_rmrr_units(rmrr) {
5590 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5591 i, i_dev) {
5592 if (i_dev != device)
5593 continue;
5594
5595 list_add_tail(&rmrr->resv->list, head);
5596 }
5597 }
5598 rcu_read_unlock();
5599
d850c2ee
LB
5600#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5601 if (dev_is_pci(device)) {
5602 struct pci_dev *pdev = to_pci_dev(device);
5603
5604 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5605 reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
5606 IOMMU_RESV_DIRECT);
5607 if (reg)
5608 list_add_tail(&reg->list, head);
5609 }
5610 }
5611#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5612
0659b8dc
EA
5613 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5614 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5615 0, IOMMU_RESV_MSI);
0659b8dc
EA
5616 if (!reg)
5617 return;
5618 list_add_tail(&reg->list, head);
5619}
5620
5621static void intel_iommu_put_resv_regions(struct device *dev,
5622 struct list_head *head)
5623{
5624 struct iommu_resv_region *entry, *next;
5625
5626 list_for_each_entry_safe(entry, next, head, list) {
198bc325 5627 if (entry->type == IOMMU_RESV_MSI)
0659b8dc
EA
5628 kfree(entry);
5629 }
70ae6f0d
AW
5630}
5631
d7cbc0f3 5632int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5633{
5634 struct device_domain_info *info;
5635 struct context_entry *context;
5636 struct dmar_domain *domain;
5637 unsigned long flags;
5638 u64 ctx_lo;
5639 int ret;
5640
4ec066c7 5641 domain = find_domain(dev);
2f26e0a9
DW
5642 if (!domain)
5643 return -EINVAL;
5644
5645 spin_lock_irqsave(&device_domain_lock, flags);
5646 spin_lock(&iommu->lock);
5647
5648 ret = -EINVAL;
d7cbc0f3 5649 info = dev->archdata.iommu;
2f26e0a9
DW
5650 if (!info || !info->pasid_supported)
5651 goto out;
5652
5653 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5654 if (WARN_ON(!context))
5655 goto out;
5656
5657 ctx_lo = context[0].lo;
5658
2f26e0a9 5659 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5660 ctx_lo |= CONTEXT_PASIDE;
5661 context[0].lo = ctx_lo;
5662 wmb();
d7cbc0f3
LB
5663 iommu->flush.flush_context(iommu,
5664 domain->iommu_did[iommu->seq_id],
5665 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5666 DMA_CCMD_MASK_NOBIT,
5667 DMA_CCMD_DEVICE_INVL);
5668 }
5669
5670 /* Enable PASID support in the device, if it wasn't already */
5671 if (!info->pasid_enabled)
5672 iommu_enable_dev_iotlb(info);
5673
2f26e0a9
DW
5674 ret = 0;
5675
5676 out:
5677 spin_unlock(&iommu->lock);
5678 spin_unlock_irqrestore(&device_domain_lock, flags);
5679
5680 return ret;
5681}
5682
73bcbdc9
JS
5683static void intel_iommu_apply_resv_region(struct device *dev,
5684 struct iommu_domain *domain,
5685 struct iommu_resv_region *region)
5686{
5687 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5688 unsigned long start, end;
5689
5690 start = IOVA_PFN(region->start);
5691 end = IOVA_PFN(region->start + region->length - 1);
5692
5693 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5694}
5695
d7cbc0f3 5696#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5697struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5698{
5699 struct intel_iommu *iommu;
5700 u8 bus, devfn;
5701
5702 if (iommu_dummy(dev)) {
5703 dev_warn(dev,
5704 "No IOMMU translation for device; cannot enable SVM\n");
5705 return NULL;
5706 }
5707
5708 iommu = device_to_iommu(dev, &bus, &devfn);
5709 if ((!iommu)) {
b9997e38 5710 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5711 return NULL;
5712 }
5713
2f26e0a9
DW
5714 return iommu;
5715}
5716#endif /* CONFIG_INTEL_IOMMU_SVM */
5717
95587a75
LB
5718static int intel_iommu_enable_auxd(struct device *dev)
5719{
5720 struct device_domain_info *info;
5721 struct intel_iommu *iommu;
5722 unsigned long flags;
5723 u8 bus, devfn;
5724 int ret;
5725
5726 iommu = device_to_iommu(dev, &bus, &devfn);
5727 if (!iommu || dmar_disabled)
5728 return -EINVAL;
5729
5730 if (!sm_supported(iommu) || !pasid_supported(iommu))
5731 return -EINVAL;
5732
5733 ret = intel_iommu_enable_pasid(iommu, dev);
5734 if (ret)
5735 return -ENODEV;
5736
5737 spin_lock_irqsave(&device_domain_lock, flags);
5738 info = dev->archdata.iommu;
5739 info->auxd_enabled = 1;
5740 spin_unlock_irqrestore(&device_domain_lock, flags);
5741
5742 return 0;
5743}
5744
5745static int intel_iommu_disable_auxd(struct device *dev)
5746{
5747 struct device_domain_info *info;
5748 unsigned long flags;
5749
5750 spin_lock_irqsave(&device_domain_lock, flags);
5751 info = dev->archdata.iommu;
5752 if (!WARN_ON(!info))
5753 info->auxd_enabled = 0;
5754 spin_unlock_irqrestore(&device_domain_lock, flags);
5755
5756 return 0;
5757}
5758
5759/*
5760 * A PCI express designated vendor specific extended capability is defined
5761 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5762 * for system software and tools to detect endpoint devices supporting the
5763 * Intel scalable IO virtualization without host driver dependency.
5764 *
5765 * Returns the address of the matching extended capability structure within
5766 * the device's PCI configuration space or 0 if the device does not support
5767 * it.
5768 */
5769static int siov_find_pci_dvsec(struct pci_dev *pdev)
5770{
5771 int pos;
5772 u16 vendor, id;
5773
5774 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5775 while (pos) {
5776 pci_read_config_word(pdev, pos + 4, &vendor);
5777 pci_read_config_word(pdev, pos + 8, &id);
5778 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5779 return pos;
5780
5781 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5782 }
5783
5784 return 0;
5785}
5786
5787static bool
5788intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5789{
5790 if (feat == IOMMU_DEV_FEAT_AUX) {
5791 int ret;
5792
5793 if (!dev_is_pci(dev) || dmar_disabled ||
5794 !scalable_mode_support() || !iommu_pasid_support())
5795 return false;
5796
5797 ret = pci_pasid_features(to_pci_dev(dev));
5798 if (ret < 0)
5799 return false;
5800
5801 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5802 }
5803
5804 return false;
5805}
5806
5807static int
5808intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5809{
5810 if (feat == IOMMU_DEV_FEAT_AUX)
5811 return intel_iommu_enable_auxd(dev);
5812
5813 return -ENODEV;
5814}
5815
5816static int
5817intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5818{
5819 if (feat == IOMMU_DEV_FEAT_AUX)
5820 return intel_iommu_disable_auxd(dev);
5821
5822 return -ENODEV;
5823}
5824
5825static bool
5826intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5827{
5828 struct device_domain_info *info = dev->archdata.iommu;
5829
5830 if (feat == IOMMU_DEV_FEAT_AUX)
5831 return scalable_mode_support() && info && info->auxd_enabled;
5832
5833 return false;
5834}
5835
0e8000f8
LB
5836static int
5837intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5838{
5839 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5840
5841 return dmar_domain->default_pasid > 0 ?
5842 dmar_domain->default_pasid : -EINVAL;
5843}
5844
8af46c78
LB
5845static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5846 struct device *dev)
5847{
5848 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5849}
5850
b0119e87 5851const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5852 .capable = intel_iommu_capable,
5853 .domain_alloc = intel_iommu_domain_alloc,
5854 .domain_free = intel_iommu_domain_free,
5855 .attach_dev = intel_iommu_attach_device,
5856 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5857 .aux_attach_dev = intel_iommu_aux_attach_device,
5858 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5859 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5860 .map = intel_iommu_map,
5861 .unmap = intel_iommu_unmap,
0659b8dc
EA
5862 .iova_to_phys = intel_iommu_iova_to_phys,
5863 .add_device = intel_iommu_add_device,
5864 .remove_device = intel_iommu_remove_device,
5865 .get_resv_regions = intel_iommu_get_resv_regions,
5866 .put_resv_regions = intel_iommu_put_resv_regions,
73bcbdc9 5867 .apply_resv_region = intel_iommu_apply_resv_region,
0659b8dc 5868 .device_group = pci_device_group,
95587a75
LB
5869 .dev_has_feat = intel_iommu_dev_has_feat,
5870 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5871 .dev_enable_feat = intel_iommu_dev_enable_feat,
5872 .dev_disable_feat = intel_iommu_dev_disable_feat,
8af46c78 5873 .is_attach_deferred = intel_iommu_is_attach_deferred,
0659b8dc 5874 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5875};
9af88143 5876
9452618e
DV
5877static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5878{
5879 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5880 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5881 dmar_map_gfx = 0;
5882}
5883
5884DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5885DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5886DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5887DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5888DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5889DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5890DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5891
d34d6517 5892static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5893{
5894 /*
5895 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5896 * but needs it. Same seems to hold for the desktop versions.
9af88143 5897 */
932a6523 5898 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5899 rwbf_quirk = 1;
5900}
5901
5902DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5903DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5904DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5905DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5906DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5907DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5908DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5909
eecfd57f
AJ
5910#define GGC 0x52
5911#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5912#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5913#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5914#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5915#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5916#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5917#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5918#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5919
d34d6517 5920static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5921{
5922 unsigned short ggc;
5923
eecfd57f 5924 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5925 return;
5926
eecfd57f 5927 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5928 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5929 dmar_map_gfx = 0;
6fbcfb3e
DW
5930 } else if (dmar_map_gfx) {
5931 /* we have to ensure the gfx device is idle before we flush */
932a6523 5932 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5933 intel_iommu_strict = 1;
5934 }
9eecabcb
DW
5935}
5936DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5937DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5938DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5939DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5940
e0fc7e0b
DW
5941/* On Tylersburg chipsets, some BIOSes have been known to enable the
5942 ISOCH DMAR unit for the Azalia sound device, but not give it any
5943 TLB entries, which causes it to deadlock. Check for that. We do
5944 this in a function called from init_dmars(), instead of in a PCI
5945 quirk, because we don't want to print the obnoxious "BIOS broken"
5946 message if VT-d is actually disabled.
5947*/
5948static void __init check_tylersburg_isoch(void)
5949{
5950 struct pci_dev *pdev;
5951 uint32_t vtisochctrl;
5952
5953 /* If there's no Azalia in the system anyway, forget it. */
5954 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5955 if (!pdev)
5956 return;
5957 pci_dev_put(pdev);
5958
5959 /* System Management Registers. Might be hidden, in which case
5960 we can't do the sanity check. But that's OK, because the
5961 known-broken BIOSes _don't_ actually hide it, so far. */
5962 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5963 if (!pdev)
5964 return;
5965
5966 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5967 pci_dev_put(pdev);
5968 return;
5969 }
5970
5971 pci_dev_put(pdev);
5972
5973 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5974 if (vtisochctrl & 1)
5975 return;
5976
5977 /* Drop all bits other than the number of TLB entries */
5978 vtisochctrl &= 0x1c;
5979
5980 /* If we have the recommended number of TLB entries (16), fine. */
5981 if (vtisochctrl == 0x10)
5982 return;
5983
5984 /* Zero TLB entries? You get to ride the short bus to school. */
5985 if (!vtisochctrl) {
5986 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5987 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5988 dmi_get_system_info(DMI_BIOS_VENDOR),
5989 dmi_get_system_info(DMI_BIOS_VERSION),
5990 dmi_get_system_info(DMI_PRODUCT_VERSION));
5991 iommu_identity_mapping |= IDENTMAP_AZALIA;
5992 return;
5993 }
9f10e5bf
JR
5994
5995 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5996 vtisochctrl);
5997}