]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
2025cf9e 1// SPDX-License-Identifier: GPL-2.0-only
ba395927 2/*
ea8ea460 3 * Copyright © 2006-2014 Intel Corporation.
ba395927 4 *
ea8ea460
DW
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 10 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
11 */
12
9f10e5bf 13#define pr_fmt(fmt) "DMAR: " fmt
932a6523 14#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 15
ba395927
KA
16#include <linux/init.h>
17#include <linux/bitmap.h>
5e0d2a6f 18#include <linux/debugfs.h>
54485c30 19#include <linux/export.h>
ba395927
KA
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
ba395927
KA
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
75f05569 28#include <linux/memory.h>
aa473240 29#include <linux/cpu.h>
5e0d2a6f 30#include <linux/timer.h>
dfddb969 31#include <linux/io.h>
38717946 32#include <linux/iova.h>
5d450806 33#include <linux/iommu.h>
38717946 34#include <linux/intel-iommu.h>
134fac3f 35#include <linux/syscore_ops.h>
69575d38 36#include <linux/tboot.h>
adb2fe02 37#include <linux/dmi.h>
5cdede24 38#include <linux/pci-ats.h>
0ee332c1 39#include <linux/memblock.h>
36746436 40#include <linux/dma-contiguous.h>
fec777c3 41#include <linux/dma-direct.h>
091d42e4 42#include <linux/crash_dump.h>
98fa15f3 43#include <linux/numa.h>
cfb94a37 44#include <linux/swiotlb.h>
8a8f422d 45#include <asm/irq_remapping.h>
ba395927 46#include <asm/cacheflush.h>
46a7fa27 47#include <asm/iommu.h>
cfb94a37 48#include <trace/events/intel_iommu.h>
ba395927 49
078e1ee2 50#include "irq_remapping.h"
56283174 51#include "intel-pasid.h"
078e1ee2 52
5b6985ce
FY
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
ba395927 56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
5e3b4a15 65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 66
4ed0d3e6 67#define MAX_AGAW_WIDTH 64
5c645b35 68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 69
2ebe3151
DW
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 78
1b722500
RM
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
f27be03b 82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 83
df08cdc7
AM
84/* page table handling */
85#define LEVEL_STRIDE (9)
86#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
87
6d1c56a9
OBC
88/*
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
92 * that we support.
93 *
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
97 *
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
100 *
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
103 */
104#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
105
df08cdc7
AM
106static inline int agaw_to_level(int agaw)
107{
108 return agaw + 2;
109}
110
111static inline int agaw_to_width(int agaw)
112{
5c645b35 113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
114}
115
116static inline int width_to_agaw(int width)
117{
5c645b35 118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
119}
120
121static inline unsigned int level_to_offset_bits(int level)
122{
123 return (level - 1) * LEVEL_STRIDE;
124}
125
126static inline int pfn_level_offset(unsigned long pfn, int level)
127{
128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129}
130
131static inline unsigned long level_mask(int level)
132{
133 return -1UL << level_to_offset_bits(level);
134}
135
136static inline unsigned long level_size(int level)
137{
138 return 1UL << level_to_offset_bits(level);
139}
140
141static inline unsigned long align_to_level(unsigned long pfn, int level)
142{
143 return (pfn + level_size(level) - 1) & level_mask(level);
144}
fd18de50 145
6dd9a7c7
YS
146static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147{
5c645b35 148 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
149}
150
dd4e8319
DW
151/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154{
155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156}
157
158static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159{
160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161}
162static inline unsigned long page_to_dma_pfn(struct page *pg)
163{
164 return mm_to_dma_pfn(page_to_pfn(pg));
165}
166static inline unsigned long virt_to_dma_pfn(void *p)
167{
168 return page_to_dma_pfn(virt_to_page(p));
169}
170
d9630fe9
WH
171/* global iommu list, set NULL for ignored DMAR units */
172static struct intel_iommu **g_iommus;
173
e0fc7e0b 174static void __init check_tylersburg_isoch(void);
9af88143
DW
175static int rwbf_quirk;
176
b779260b
JC
177/*
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
180 */
181static int force_on = 0;
bfd20f1c 182int intel_iommu_tboot_noforce;
89a6079d 183static int no_platform_optin;
b779260b 184
46b08e1a 185#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 186
091d42e4
JR
187/*
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189 * if marked present.
190 */
191static phys_addr_t root_entry_lctp(struct root_entry *re)
192{
193 if (!(re->lo & 1))
194 return 0;
195
196 return re->lo & VTD_PAGE_MASK;
197}
198
199/*
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201 * if marked present.
202 */
203static phys_addr_t root_entry_uctp(struct root_entry *re)
204{
205 if (!(re->hi & 1))
206 return 0;
46b08e1a 207
091d42e4
JR
208 return re->hi & VTD_PAGE_MASK;
209}
c07e7d21 210
cf484d0e
JR
211static inline void context_clear_pasid_enable(struct context_entry *context)
212{
213 context->lo &= ~(1ULL << 11);
214}
215
216static inline bool context_pasid_enabled(struct context_entry *context)
217{
218 return !!(context->lo & (1ULL << 11));
219}
220
221static inline void context_set_copied(struct context_entry *context)
222{
223 context->hi |= (1ull << 3);
224}
225
226static inline bool context_copied(struct context_entry *context)
227{
228 return !!(context->hi & (1ULL << 3));
229}
230
231static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
232{
233 return (context->lo & 1);
234}
cf484d0e 235
26b86092 236bool context_present(struct context_entry *context)
cf484d0e
JR
237{
238 return context_pasid_enabled(context) ?
239 __context_present(context) :
240 __context_present(context) && !context_copied(context);
241}
242
c07e7d21
MM
243static inline void context_set_present(struct context_entry *context)
244{
245 context->lo |= 1;
246}
247
248static inline void context_set_fault_enable(struct context_entry *context)
249{
250 context->lo &= (((u64)-1) << 2) | 1;
251}
252
c07e7d21
MM
253static inline void context_set_translation_type(struct context_entry *context,
254 unsigned long value)
255{
256 context->lo &= (((u64)-1) << 4) | 3;
257 context->lo |= (value & 3) << 2;
258}
259
260static inline void context_set_address_root(struct context_entry *context,
261 unsigned long value)
262{
1a2262f9 263 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
264 context->lo |= value & VTD_PAGE_MASK;
265}
266
267static inline void context_set_address_width(struct context_entry *context,
268 unsigned long value)
269{
270 context->hi |= value & 7;
271}
272
273static inline void context_set_domain_id(struct context_entry *context,
274 unsigned long value)
275{
276 context->hi |= (value & ((1 << 16) - 1)) << 8;
277}
278
dbcd861f
JR
279static inline int context_domain_id(struct context_entry *c)
280{
281 return((c->hi >> 8) & 0xffff);
282}
283
c07e7d21
MM
284static inline void context_clear_entry(struct context_entry *context)
285{
286 context->lo = 0;
287 context->hi = 0;
288}
7a8fc25e 289
2c2e2c38
FY
290/*
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
295 */
19943b0e
DW
296static struct dmar_domain *si_domain;
297static int hw_pass_through = 1;
2c2e2c38 298
2c2e2c38 299/* si_domain contains mulitple devices */
fa954e68 300#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
2c2e2c38 301
942067f1
LB
302/*
303 * This is a DMA domain allocated through the iommu domain allocation
304 * interface. But one or more devices belonging to this domain have
305 * been chosen to use a private domain. We should avoid to use the
306 * map/unmap/iova_to_phys APIs on it.
307 */
308#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
309
29a27719
JR
310#define for_each_domain_iommu(idx, domain) \
311 for (idx = 0; idx < g_num_of_iommus; idx++) \
312 if (domain->iommu_refcnt[idx])
313
b94e4117
JL
314struct dmar_rmrr_unit {
315 struct list_head list; /* list of rmrr units */
316 struct acpi_dmar_header *hdr; /* ACPI header */
317 u64 base_address; /* reserved base address*/
318 u64 end_address; /* reserved end address */
832bd858 319 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
320 int devices_cnt; /* target device count */
321};
322
323struct dmar_atsr_unit {
324 struct list_head list; /* list of ATSR units */
325 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 326 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
327 int devices_cnt; /* target device count */
328 u8 include_all:1; /* include all ports */
329};
330
331static LIST_HEAD(dmar_atsr_units);
332static LIST_HEAD(dmar_rmrr_units);
333
334#define for_each_rmrr_units(rmrr) \
335 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
336
5e0d2a6f 337/* bitmap for indexing intel_iommus */
5e0d2a6f 338static int g_num_of_iommus;
339
92d03cc8 340static void domain_exit(struct dmar_domain *domain);
ba395927 341static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 342static void dmar_remove_one_dev_info(struct device *dev);
127c7615 343static void __dmar_remove_one_dev_info(struct device_domain_info *info);
0ce4a85f
LB
344static void domain_context_clear(struct intel_iommu *iommu,
345 struct device *dev);
2a46ddf7
JL
346static int domain_detach_iommu(struct dmar_domain *domain,
347 struct intel_iommu *iommu);
4de354ec 348static bool device_is_rmrr_locked(struct device *dev);
8af46c78
LB
349static int intel_iommu_attach_device(struct iommu_domain *domain,
350 struct device *dev);
cfb94a37
LB
351static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
352 dma_addr_t iova);
ba395927 353
d3f13810 354#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
355int dmar_disabled = 0;
356#else
357int dmar_disabled = 1;
d3f13810 358#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 359
cdd3a249 360int intel_iommu_sm;
8bc1f85c
ED
361int intel_iommu_enabled = 0;
362EXPORT_SYMBOL_GPL(intel_iommu_enabled);
363
2d9e667e 364static int dmar_map_gfx = 1;
7d3b03ce 365static int dmar_forcedac;
5e0d2a6f 366static int intel_iommu_strict;
6dd9a7c7 367static int intel_iommu_superpage = 1;
ae853ddb 368static int iommu_identity_mapping;
e5e04d05 369static int intel_no_bounce;
c83b2f20 370
ae853ddb
DW
371#define IDENTMAP_ALL 1
372#define IDENTMAP_GFX 2
373#define IDENTMAP_AZALIA 4
c83b2f20 374
c0771df8
DW
375int intel_iommu_gfx_mapped;
376EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
377
ba395927 378#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
8af46c78 379#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
ba395927
KA
380static DEFINE_SPINLOCK(device_domain_lock);
381static LIST_HEAD(device_domain_list);
382
e5e04d05
LB
383#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
384 to_pci_dev(d)->untrusted)
385
85319dcc
LB
386/*
387 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 388 * callback @fn against each element.
85319dcc
LB
389 */
390int for_each_device_domain(int (*fn)(struct device_domain_info *info,
391 void *data), void *data)
392{
393 int ret = 0;
0bbeb01a 394 unsigned long flags;
85319dcc
LB
395 struct device_domain_info *info;
396
0bbeb01a 397 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
398 list_for_each_entry(info, &device_domain_list, global) {
399 ret = fn(info, data);
0bbeb01a
LB
400 if (ret) {
401 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 402 return ret;
0bbeb01a 403 }
85319dcc 404 }
0bbeb01a 405 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
406
407 return 0;
408}
409
b0119e87 410const struct iommu_ops intel_iommu_ops;
a8bcbb0d 411
4158c2ec
JR
412static bool translation_pre_enabled(struct intel_iommu *iommu)
413{
414 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
415}
416
091d42e4
JR
417static void clear_translation_pre_enabled(struct intel_iommu *iommu)
418{
419 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
420}
421
4158c2ec
JR
422static void init_translation_status(struct intel_iommu *iommu)
423{
424 u32 gsts;
425
426 gsts = readl(iommu->reg + DMAR_GSTS_REG);
427 if (gsts & DMA_GSTS_TES)
428 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
429}
430
00a77deb
JR
431/* Convert generic 'struct iommu_domain to private struct dmar_domain */
432static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
433{
434 return container_of(dom, struct dmar_domain, domain);
435}
436
ba395927
KA
437static int __init intel_iommu_setup(char *str)
438{
439 if (!str)
440 return -EINVAL;
441 while (*str) {
0cd5c3c8
KM
442 if (!strncmp(str, "on", 2)) {
443 dmar_disabled = 0;
9f10e5bf 444 pr_info("IOMMU enabled\n");
0cd5c3c8 445 } else if (!strncmp(str, "off", 3)) {
ba395927 446 dmar_disabled = 1;
89a6079d 447 no_platform_optin = 1;
9f10e5bf 448 pr_info("IOMMU disabled\n");
ba395927
KA
449 } else if (!strncmp(str, "igfx_off", 8)) {
450 dmar_map_gfx = 0;
9f10e5bf 451 pr_info("Disable GFX device mapping\n");
7d3b03ce 452 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 453 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 454 dmar_forcedac = 1;
5e0d2a6f 455 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 456 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 457 intel_iommu_strict = 1;
6dd9a7c7 458 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 459 pr_info("Disable supported super page\n");
6dd9a7c7 460 intel_iommu_superpage = 0;
8950dcd8
LB
461 } else if (!strncmp(str, "sm_on", 5)) {
462 pr_info("Intel-IOMMU: scalable mode supported\n");
463 intel_iommu_sm = 1;
bfd20f1c
SL
464 } else if (!strncmp(str, "tboot_noforce", 13)) {
465 printk(KERN_INFO
466 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
467 intel_iommu_tboot_noforce = 1;
e5e04d05
LB
468 } else if (!strncmp(str, "nobounce", 8)) {
469 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
470 intel_no_bounce = 1;
ba395927
KA
471 }
472
473 str += strcspn(str, ",");
474 while (*str == ',')
475 str++;
476 }
477 return 0;
478}
479__setup("intel_iommu=", intel_iommu_setup);
480
481static struct kmem_cache *iommu_domain_cache;
482static struct kmem_cache *iommu_devinfo_cache;
ba395927 483
9452d5bf
JR
484static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
485{
8bf47816
JR
486 struct dmar_domain **domains;
487 int idx = did >> 8;
488
489 domains = iommu->domains[idx];
490 if (!domains)
491 return NULL;
492
493 return domains[did & 0xff];
9452d5bf
JR
494}
495
496static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
497 struct dmar_domain *domain)
498{
8bf47816
JR
499 struct dmar_domain **domains;
500 int idx = did >> 8;
501
502 if (!iommu->domains[idx]) {
503 size_t size = 256 * sizeof(struct dmar_domain *);
504 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
505 }
506
507 domains = iommu->domains[idx];
508 if (WARN_ON(!domains))
509 return;
510 else
511 domains[did & 0xff] = domain;
9452d5bf
JR
512}
513
9ddbfb42 514void *alloc_pgtable_page(int node)
eb3fa7cb 515{
4c923d47
SS
516 struct page *page;
517 void *vaddr = NULL;
eb3fa7cb 518
4c923d47
SS
519 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
520 if (page)
521 vaddr = page_address(page);
eb3fa7cb 522 return vaddr;
ba395927
KA
523}
524
9ddbfb42 525void free_pgtable_page(void *vaddr)
ba395927
KA
526{
527 free_page((unsigned long)vaddr);
528}
529
530static inline void *alloc_domain_mem(void)
531{
354bb65e 532 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
533}
534
38717946 535static void free_domain_mem(void *vaddr)
ba395927
KA
536{
537 kmem_cache_free(iommu_domain_cache, vaddr);
538}
539
540static inline void * alloc_devinfo_mem(void)
541{
354bb65e 542 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
543}
544
545static inline void free_devinfo_mem(void *vaddr)
546{
547 kmem_cache_free(iommu_devinfo_cache, vaddr);
548}
549
28ccce0d
JR
550static inline int domain_type_is_si(struct dmar_domain *domain)
551{
552 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
553}
554
162d1b10
JL
555static inline int domain_pfn_supported(struct dmar_domain *domain,
556 unsigned long pfn)
557{
558 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
559
560 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
561}
562
4ed0d3e6 563static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
564{
565 unsigned long sagaw;
566 int agaw = -1;
567
568 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 569 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
570 agaw >= 0; agaw--) {
571 if (test_bit(agaw, &sagaw))
572 break;
573 }
574
575 return agaw;
576}
577
4ed0d3e6
FY
578/*
579 * Calculate max SAGAW for each iommu.
580 */
581int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
582{
583 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
584}
585
586/*
587 * calculate agaw for each iommu.
588 * "SAGAW" may be different across iommus, use a default agaw, and
589 * get a supported less agaw for iommus that don't support the default agaw.
590 */
591int iommu_calculate_agaw(struct intel_iommu *iommu)
592{
593 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
594}
595
2c2e2c38 596/* This functionin only returns single iommu in a domain */
9ddbfb42 597struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
598{
599 int iommu_id;
600
2c2e2c38 601 /* si_domain and vm domain should not get here. */
fa954e68
LB
602 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
603 return NULL;
604
29a27719
JR
605 for_each_domain_iommu(iommu_id, domain)
606 break;
607
8c11e798
WH
608 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
609 return NULL;
610
611 return g_iommus[iommu_id];
612}
613
8e604097
WH
614static void domain_update_iommu_coherency(struct dmar_domain *domain)
615{
d0501960
DW
616 struct dmar_drhd_unit *drhd;
617 struct intel_iommu *iommu;
2f119c78
QL
618 bool found = false;
619 int i;
2e12bc29 620
d0501960 621 domain->iommu_coherency = 1;
8e604097 622
29a27719 623 for_each_domain_iommu(i, domain) {
2f119c78 624 found = true;
8e604097
WH
625 if (!ecap_coherent(g_iommus[i]->ecap)) {
626 domain->iommu_coherency = 0;
627 break;
628 }
8e604097 629 }
d0501960
DW
630 if (found)
631 return;
632
633 /* No hardware attached; use lowest common denominator */
634 rcu_read_lock();
635 for_each_active_iommu(iommu, drhd) {
636 if (!ecap_coherent(iommu->ecap)) {
637 domain->iommu_coherency = 0;
638 break;
639 }
640 }
641 rcu_read_unlock();
8e604097
WH
642}
643
161f6934 644static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 645{
161f6934
JL
646 struct dmar_drhd_unit *drhd;
647 struct intel_iommu *iommu;
648 int ret = 1;
58c610bd 649
161f6934
JL
650 rcu_read_lock();
651 for_each_active_iommu(iommu, drhd) {
652 if (iommu != skip) {
653 if (!ecap_sc_support(iommu->ecap)) {
654 ret = 0;
655 break;
656 }
58c610bd 657 }
58c610bd 658 }
161f6934
JL
659 rcu_read_unlock();
660
661 return ret;
58c610bd
SY
662}
663
161f6934 664static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 665{
8140a95d 666 struct dmar_drhd_unit *drhd;
161f6934 667 struct intel_iommu *iommu;
8140a95d 668 int mask = 0xf;
6dd9a7c7
YS
669
670 if (!intel_iommu_superpage) {
161f6934 671 return 0;
6dd9a7c7
YS
672 }
673
8140a95d 674 /* set iommu_superpage to the smallest common denominator */
0e242612 675 rcu_read_lock();
8140a95d 676 for_each_active_iommu(iommu, drhd) {
161f6934
JL
677 if (iommu != skip) {
678 mask &= cap_super_page_val(iommu->cap);
679 if (!mask)
680 break;
6dd9a7c7
YS
681 }
682 }
0e242612
JL
683 rcu_read_unlock();
684
161f6934 685 return fls(mask);
6dd9a7c7
YS
686}
687
58c610bd
SY
688/* Some capabilities may be different across iommus */
689static void domain_update_iommu_cap(struct dmar_domain *domain)
690{
691 domain_update_iommu_coherency(domain);
161f6934
JL
692 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
693 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
694}
695
26b86092
SM
696struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
697 u8 devfn, int alloc)
03ecc32c
DW
698{
699 struct root_entry *root = &iommu->root_entry[bus];
700 struct context_entry *context;
701 u64 *entry;
702
4df4eab1 703 entry = &root->lo;
765b6a98 704 if (sm_supported(iommu)) {
03ecc32c
DW
705 if (devfn >= 0x80) {
706 devfn -= 0x80;
707 entry = &root->hi;
708 }
709 devfn *= 2;
710 }
03ecc32c
DW
711 if (*entry & 1)
712 context = phys_to_virt(*entry & VTD_PAGE_MASK);
713 else {
714 unsigned long phy_addr;
715 if (!alloc)
716 return NULL;
717
718 context = alloc_pgtable_page(iommu->node);
719 if (!context)
720 return NULL;
721
722 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
723 phy_addr = virt_to_phys((void *)context);
724 *entry = phy_addr | 1;
725 __iommu_flush_cache(iommu, entry, sizeof(*entry));
726 }
727 return &context[devfn];
728}
729
4ed6a540
DW
730static int iommu_dummy(struct device *dev)
731{
732 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
733}
734
b9a7f981
EA
735/**
736 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
737 * sub-hierarchy of a candidate PCI-PCI bridge
738 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
739 * @bridge: the candidate PCI-PCI bridge
740 *
741 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
742 */
743static bool
744is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
745{
746 struct pci_dev *pdev, *pbridge;
747
748 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
749 return false;
750
751 pdev = to_pci_dev(dev);
752 pbridge = to_pci_dev(bridge);
753
754 if (pbridge->subordinate &&
755 pbridge->subordinate->number <= pdev->bus->number &&
756 pbridge->subordinate->busn_res.end >= pdev->bus->number)
757 return true;
758
759 return false;
760}
761
156baca8 762static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
763{
764 struct dmar_drhd_unit *drhd = NULL;
b683b230 765 struct intel_iommu *iommu;
156baca8 766 struct device *tmp;
b9a7f981 767 struct pci_dev *pdev = NULL;
aa4d066a 768 u16 segment = 0;
c7151a8d
WH
769 int i;
770
4ed6a540
DW
771 if (iommu_dummy(dev))
772 return NULL;
773
156baca8 774 if (dev_is_pci(dev)) {
1c387188
AR
775 struct pci_dev *pf_pdev;
776
156baca8 777 pdev = to_pci_dev(dev);
5823e330
JD
778
779#ifdef CONFIG_X86
780 /* VMD child devices currently cannot be handled individually */
781 if (is_vmd(pdev->bus))
782 return NULL;
783#endif
784
1c387188
AR
785 /* VFs aren't listed in scope tables; we need to look up
786 * the PF instead to find the IOMMU. */
787 pf_pdev = pci_physfn(pdev);
788 dev = &pf_pdev->dev;
156baca8 789 segment = pci_domain_nr(pdev->bus);
ca5b74d2 790 } else if (has_acpi_companion(dev))
156baca8
DW
791 dev = &ACPI_COMPANION(dev)->dev;
792
0e242612 793 rcu_read_lock();
b683b230 794 for_each_active_iommu(iommu, drhd) {
156baca8 795 if (pdev && segment != drhd->segment)
276dbf99 796 continue;
c7151a8d 797
b683b230 798 for_each_active_dev_scope(drhd->devices,
156baca8
DW
799 drhd->devices_cnt, i, tmp) {
800 if (tmp == dev) {
1c387188
AR
801 /* For a VF use its original BDF# not that of the PF
802 * which we used for the IOMMU lookup. Strictly speaking
803 * we could do this for all PCI devices; we only need to
804 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 805 if (pdev && pdev->is_virtfn)
1c387188
AR
806 goto got_pdev;
807
156baca8
DW
808 *bus = drhd->devices[i].bus;
809 *devfn = drhd->devices[i].devfn;
b683b230 810 goto out;
156baca8
DW
811 }
812
b9a7f981 813 if (is_downstream_to_pci_bridge(dev, tmp))
156baca8 814 goto got_pdev;
924b6231 815 }
c7151a8d 816
156baca8
DW
817 if (pdev && drhd->include_all) {
818 got_pdev:
819 *bus = pdev->bus->number;
820 *devfn = pdev->devfn;
b683b230 821 goto out;
156baca8 822 }
c7151a8d 823 }
b683b230 824 iommu = NULL;
156baca8 825 out:
0e242612 826 rcu_read_unlock();
c7151a8d 827
b683b230 828 return iommu;
c7151a8d
WH
829}
830
5331fe6f
WH
831static void domain_flush_cache(struct dmar_domain *domain,
832 void *addr, int size)
833{
834 if (!domain->iommu_coherency)
835 clflush_cache_range(addr, size);
836}
837
ba395927
KA
838static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
839{
ba395927 840 struct context_entry *context;
03ecc32c 841 int ret = 0;
ba395927
KA
842 unsigned long flags;
843
844 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
845 context = iommu_context_addr(iommu, bus, devfn, 0);
846 if (context)
847 ret = context_present(context);
ba395927
KA
848 spin_unlock_irqrestore(&iommu->lock, flags);
849 return ret;
850}
851
ba395927
KA
852static void free_context_table(struct intel_iommu *iommu)
853{
ba395927
KA
854 int i;
855 unsigned long flags;
856 struct context_entry *context;
857
858 spin_lock_irqsave(&iommu->lock, flags);
859 if (!iommu->root_entry) {
860 goto out;
861 }
862 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 863 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
864 if (context)
865 free_pgtable_page(context);
03ecc32c 866
765b6a98 867 if (!sm_supported(iommu))
03ecc32c
DW
868 continue;
869
870 context = iommu_context_addr(iommu, i, 0x80, 0);
871 if (context)
872 free_pgtable_page(context);
873
ba395927
KA
874 }
875 free_pgtable_page(iommu->root_entry);
876 iommu->root_entry = NULL;
877out:
878 spin_unlock_irqrestore(&iommu->lock, flags);
879}
880
b026fd28 881static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 882 unsigned long pfn, int *target_level)
ba395927 883{
e083ea5b 884 struct dma_pte *parent, *pte;
ba395927 885 int level = agaw_to_level(domain->agaw);
4399c8bf 886 int offset;
ba395927
KA
887
888 BUG_ON(!domain->pgd);
f9423606 889
162d1b10 890 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
891 /* Address beyond IOMMU's addressing capabilities. */
892 return NULL;
893
ba395927
KA
894 parent = domain->pgd;
895
5cf0a76f 896 while (1) {
ba395927
KA
897 void *tmp_page;
898
b026fd28 899 offset = pfn_level_offset(pfn, level);
ba395927 900 pte = &parent[offset];
5cf0a76f 901 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 902 break;
5cf0a76f 903 if (level == *target_level)
ba395927
KA
904 break;
905
19c239ce 906 if (!dma_pte_present(pte)) {
c85994e4
DW
907 uint64_t pteval;
908
4c923d47 909 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 910
206a73c1 911 if (!tmp_page)
ba395927 912 return NULL;
206a73c1 913
c85994e4 914 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 915 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 916 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
917 /* Someone else set it while we were thinking; use theirs. */
918 free_pgtable_page(tmp_page);
effad4b5 919 else
c85994e4 920 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 921 }
5cf0a76f
DW
922 if (level == 1)
923 break;
924
19c239ce 925 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
926 level--;
927 }
928
5cf0a76f
DW
929 if (!*target_level)
930 *target_level = level;
931
ba395927
KA
932 return pte;
933}
934
935/* return address's pte at specific level */
90dcfb5e
DW
936static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
937 unsigned long pfn,
6dd9a7c7 938 int level, int *large_page)
ba395927 939{
e083ea5b 940 struct dma_pte *parent, *pte;
ba395927
KA
941 int total = agaw_to_level(domain->agaw);
942 int offset;
943
944 parent = domain->pgd;
945 while (level <= total) {
90dcfb5e 946 offset = pfn_level_offset(pfn, total);
ba395927
KA
947 pte = &parent[offset];
948 if (level == total)
949 return pte;
950
6dd9a7c7
YS
951 if (!dma_pte_present(pte)) {
952 *large_page = total;
ba395927 953 break;
6dd9a7c7
YS
954 }
955
e16922af 956 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
957 *large_page = total;
958 return pte;
959 }
960
19c239ce 961 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
962 total--;
963 }
964 return NULL;
965}
966
ba395927 967/* clear last level pte, a tlb flush should be followed */
5cf0a76f 968static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
969 unsigned long start_pfn,
970 unsigned long last_pfn)
ba395927 971{
e083ea5b 972 unsigned int large_page;
310a5ab9 973 struct dma_pte *first_pte, *pte;
66eae846 974
162d1b10
JL
975 BUG_ON(!domain_pfn_supported(domain, start_pfn));
976 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 977 BUG_ON(start_pfn > last_pfn);
ba395927 978
04b18e65 979 /* we don't need lock here; nobody else touches the iova range */
59c36286 980 do {
6dd9a7c7
YS
981 large_page = 1;
982 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 983 if (!pte) {
6dd9a7c7 984 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
985 continue;
986 }
6dd9a7c7 987 do {
310a5ab9 988 dma_clear_pte(pte);
6dd9a7c7 989 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 990 pte++;
75e6bf96
DW
991 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
992
310a5ab9
DW
993 domain_flush_cache(domain, first_pte,
994 (void *)pte - (void *)first_pte);
59c36286
DW
995
996 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
997}
998
3269ee0b 999static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
1000 int retain_level, struct dma_pte *pte,
1001 unsigned long pfn, unsigned long start_pfn,
1002 unsigned long last_pfn)
3269ee0b
AW
1003{
1004 pfn = max(start_pfn, pfn);
1005 pte = &pte[pfn_level_offset(pfn, level)];
1006
1007 do {
1008 unsigned long level_pfn;
1009 struct dma_pte *level_pte;
1010
1011 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1012 goto next;
1013
f7116e11 1014 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1015 level_pte = phys_to_virt(dma_pte_addr(pte));
1016
bc24c571
DD
1017 if (level > 2) {
1018 dma_pte_free_level(domain, level - 1, retain_level,
1019 level_pte, level_pfn, start_pfn,
1020 last_pfn);
1021 }
3269ee0b 1022
bc24c571
DD
1023 /*
1024 * Free the page table if we're below the level we want to
1025 * retain and the range covers the entire table.
1026 */
1027 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1028 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1029 dma_clear_pte(pte);
1030 domain_flush_cache(domain, pte, sizeof(*pte));
1031 free_pgtable_page(level_pte);
1032 }
1033next:
1034 pfn += level_size(level);
1035 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1036}
1037
bc24c571
DD
1038/*
1039 * clear last level (leaf) ptes and free page table pages below the
1040 * level we wish to keep intact.
1041 */
ba395927 1042static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1043 unsigned long start_pfn,
bc24c571
DD
1044 unsigned long last_pfn,
1045 int retain_level)
ba395927 1046{
162d1b10
JL
1047 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1048 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1049 BUG_ON(start_pfn > last_pfn);
ba395927 1050
d41a4adb
JL
1051 dma_pte_clear_range(domain, start_pfn, last_pfn);
1052
f3a0a52f 1053 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1054 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1055 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1056
ba395927 1057 /* free pgd */
d794dc9b 1058 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1059 free_pgtable_page(domain->pgd);
1060 domain->pgd = NULL;
1061 }
1062}
1063
ea8ea460
DW
1064/* When a page at a given level is being unlinked from its parent, we don't
1065 need to *modify* it at all. All we need to do is make a list of all the
1066 pages which can be freed just as soon as we've flushed the IOTLB and we
1067 know the hardware page-walk will no longer touch them.
1068 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1069 be freed. */
1070static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1071 int level, struct dma_pte *pte,
1072 struct page *freelist)
1073{
1074 struct page *pg;
1075
1076 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1077 pg->freelist = freelist;
1078 freelist = pg;
1079
1080 if (level == 1)
1081 return freelist;
1082
adeb2590
JL
1083 pte = page_address(pg);
1084 do {
ea8ea460
DW
1085 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1086 freelist = dma_pte_list_pagetables(domain, level - 1,
1087 pte, freelist);
adeb2590
JL
1088 pte++;
1089 } while (!first_pte_in_page(pte));
ea8ea460
DW
1090
1091 return freelist;
1092}
1093
1094static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1095 struct dma_pte *pte, unsigned long pfn,
1096 unsigned long start_pfn,
1097 unsigned long last_pfn,
1098 struct page *freelist)
1099{
1100 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1101
1102 pfn = max(start_pfn, pfn);
1103 pte = &pte[pfn_level_offset(pfn, level)];
1104
1105 do {
1106 unsigned long level_pfn;
1107
1108 if (!dma_pte_present(pte))
1109 goto next;
1110
1111 level_pfn = pfn & level_mask(level);
1112
1113 /* If range covers entire pagetable, free it */
1114 if (start_pfn <= level_pfn &&
1115 last_pfn >= level_pfn + level_size(level) - 1) {
1116 /* These suborbinate page tables are going away entirely. Don't
1117 bother to clear them; we're just going to *free* them. */
1118 if (level > 1 && !dma_pte_superpage(pte))
1119 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1120
1121 dma_clear_pte(pte);
1122 if (!first_pte)
1123 first_pte = pte;
1124 last_pte = pte;
1125 } else if (level > 1) {
1126 /* Recurse down into a level that isn't *entirely* obsolete */
1127 freelist = dma_pte_clear_level(domain, level - 1,
1128 phys_to_virt(dma_pte_addr(pte)),
1129 level_pfn, start_pfn, last_pfn,
1130 freelist);
1131 }
1132next:
1133 pfn += level_size(level);
1134 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1135
1136 if (first_pte)
1137 domain_flush_cache(domain, first_pte,
1138 (void *)++last_pte - (void *)first_pte);
1139
1140 return freelist;
1141}
1142
1143/* We can't just free the pages because the IOMMU may still be walking
1144 the page tables, and may have cached the intermediate levels. The
1145 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1146static struct page *domain_unmap(struct dmar_domain *domain,
1147 unsigned long start_pfn,
1148 unsigned long last_pfn)
ea8ea460 1149{
e083ea5b 1150 struct page *freelist;
ea8ea460 1151
162d1b10
JL
1152 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1153 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1154 BUG_ON(start_pfn > last_pfn);
1155
1156 /* we don't need lock here; nobody else touches the iova range */
1157 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1158 domain->pgd, 0, start_pfn, last_pfn, NULL);
1159
1160 /* free pgd */
1161 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1162 struct page *pgd_page = virt_to_page(domain->pgd);
1163 pgd_page->freelist = freelist;
1164 freelist = pgd_page;
1165
1166 domain->pgd = NULL;
1167 }
1168
1169 return freelist;
1170}
1171
b690420a 1172static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1173{
1174 struct page *pg;
1175
1176 while ((pg = freelist)) {
1177 freelist = pg->freelist;
1178 free_pgtable_page(page_address(pg));
1179 }
1180}
1181
13cf0174
JR
1182static void iova_entry_free(unsigned long data)
1183{
1184 struct page *freelist = (struct page *)data;
1185
1186 dma_free_pagelist(freelist);
1187}
1188
ba395927
KA
1189/* iommu handling */
1190static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1191{
1192 struct root_entry *root;
1193 unsigned long flags;
1194
4c923d47 1195 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1196 if (!root) {
9f10e5bf 1197 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1198 iommu->name);
ba395927 1199 return -ENOMEM;
ffebeb46 1200 }
ba395927 1201
5b6985ce 1202 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1203
1204 spin_lock_irqsave(&iommu->lock, flags);
1205 iommu->root_entry = root;
1206 spin_unlock_irqrestore(&iommu->lock, flags);
1207
1208 return 0;
1209}
1210
ba395927
KA
1211static void iommu_set_root_entry(struct intel_iommu *iommu)
1212{
03ecc32c 1213 u64 addr;
c416daa9 1214 u32 sts;
ba395927
KA
1215 unsigned long flag;
1216
03ecc32c 1217 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1218 if (sm_supported(iommu))
1219 addr |= DMA_RTADDR_SMT;
ba395927 1220
1f5b3c3f 1221 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1222 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1223
c416daa9 1224 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1225
1226 /* Make sure hardware complete it */
1227 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1228 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1229
1f5b3c3f 1230 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1231}
1232
6f7db75e 1233void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1234{
1235 u32 val;
1236 unsigned long flag;
1237
9af88143 1238 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1239 return;
ba395927 1240
1f5b3c3f 1241 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1242 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1243
1244 /* Make sure hardware complete it */
1245 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1246 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1247
1f5b3c3f 1248 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1249}
1250
1251/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1252static void __iommu_flush_context(struct intel_iommu *iommu,
1253 u16 did, u16 source_id, u8 function_mask,
1254 u64 type)
ba395927
KA
1255{
1256 u64 val = 0;
1257 unsigned long flag;
1258
ba395927
KA
1259 switch (type) {
1260 case DMA_CCMD_GLOBAL_INVL:
1261 val = DMA_CCMD_GLOBAL_INVL;
1262 break;
1263 case DMA_CCMD_DOMAIN_INVL:
1264 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1265 break;
1266 case DMA_CCMD_DEVICE_INVL:
1267 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1268 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1269 break;
1270 default:
1271 BUG();
1272 }
1273 val |= DMA_CCMD_ICC;
1274
1f5b3c3f 1275 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1276 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1277
1278 /* Make sure hardware complete it */
1279 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1280 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1281
1f5b3c3f 1282 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1283}
1284
ba395927 1285/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1286static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1287 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1288{
1289 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1290 u64 val = 0, val_iva = 0;
1291 unsigned long flag;
1292
ba395927
KA
1293 switch (type) {
1294 case DMA_TLB_GLOBAL_FLUSH:
1295 /* global flush doesn't need set IVA_REG */
1296 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1297 break;
1298 case DMA_TLB_DSI_FLUSH:
1299 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1300 break;
1301 case DMA_TLB_PSI_FLUSH:
1302 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1303 /* IH bit is passed in as part of address */
ba395927
KA
1304 val_iva = size_order | addr;
1305 break;
1306 default:
1307 BUG();
1308 }
1309 /* Note: set drain read/write */
1310#if 0
1311 /*
1312 * This is probably to be super secure.. Looks like we can
1313 * ignore it without any impact.
1314 */
1315 if (cap_read_drain(iommu->cap))
1316 val |= DMA_TLB_READ_DRAIN;
1317#endif
1318 if (cap_write_drain(iommu->cap))
1319 val |= DMA_TLB_WRITE_DRAIN;
1320
1f5b3c3f 1321 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1322 /* Note: Only uses first TLB reg currently */
1323 if (val_iva)
1324 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1325 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1326
1327 /* Make sure hardware complete it */
1328 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1329 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1330
1f5b3c3f 1331 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1332
1333 /* check IOTLB invalidation granularity */
1334 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1335 pr_err("Flush IOTLB failed\n");
ba395927 1336 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1337 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1338 (unsigned long long)DMA_TLB_IIRG(type),
1339 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1340}
1341
64ae892b
DW
1342static struct device_domain_info *
1343iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1344 u8 bus, u8 devfn)
93a23a72 1345{
93a23a72 1346 struct device_domain_info *info;
93a23a72 1347
55d94043
JR
1348 assert_spin_locked(&device_domain_lock);
1349
93a23a72
YZ
1350 if (!iommu->qi)
1351 return NULL;
1352
93a23a72 1353 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1354 if (info->iommu == iommu && info->bus == bus &&
1355 info->devfn == devfn) {
b16d0cb9
DW
1356 if (info->ats_supported && info->dev)
1357 return info;
93a23a72
YZ
1358 break;
1359 }
93a23a72 1360
b16d0cb9 1361 return NULL;
93a23a72
YZ
1362}
1363
0824c592
OP
1364static void domain_update_iotlb(struct dmar_domain *domain)
1365{
1366 struct device_domain_info *info;
1367 bool has_iotlb_device = false;
1368
1369 assert_spin_locked(&device_domain_lock);
1370
1371 list_for_each_entry(info, &domain->devices, link) {
1372 struct pci_dev *pdev;
1373
1374 if (!info->dev || !dev_is_pci(info->dev))
1375 continue;
1376
1377 pdev = to_pci_dev(info->dev);
1378 if (pdev->ats_enabled) {
1379 has_iotlb_device = true;
1380 break;
1381 }
1382 }
1383
1384 domain->has_iotlb_device = has_iotlb_device;
1385}
1386
93a23a72 1387static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1388{
fb0cc3aa
BH
1389 struct pci_dev *pdev;
1390
0824c592
OP
1391 assert_spin_locked(&device_domain_lock);
1392
0bcb3e28 1393 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1394 return;
1395
fb0cc3aa 1396 pdev = to_pci_dev(info->dev);
1c48db44
JP
1397 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1398 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1399 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1400 * reserved, which should be set to 0.
1401 */
1402 if (!ecap_dit(info->iommu->ecap))
1403 info->pfsid = 0;
1404 else {
1405 struct pci_dev *pf_pdev;
1406
1407 /* pdev will be returned if device is not a vf */
1408 pf_pdev = pci_physfn(pdev);
cc49baa9 1409 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1410 }
fb0cc3aa 1411
b16d0cb9
DW
1412#ifdef CONFIG_INTEL_IOMMU_SVM
1413 /* The PCIe spec, in its wisdom, declares that the behaviour of
1414 the device if you enable PASID support after ATS support is
1415 undefined. So always enable PASID support on devices which
1416 have it, even if we can't yet know if we're ever going to
1417 use it. */
1418 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1419 info->pasid_enabled = 1;
1420
1b84778a
KS
1421 if (info->pri_supported &&
1422 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1423 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1424 info->pri_enabled = 1;
1425#endif
fb58fdcd 1426 if (!pdev->untrusted && info->ats_supported &&
61363c14 1427 pci_ats_page_aligned(pdev) &&
fb58fdcd 1428 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1429 info->ats_enabled = 1;
0824c592 1430 domain_update_iotlb(info->domain);
b16d0cb9
DW
1431 info->ats_qdep = pci_ats_queue_depth(pdev);
1432 }
93a23a72
YZ
1433}
1434
1435static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1436{
b16d0cb9
DW
1437 struct pci_dev *pdev;
1438
0824c592
OP
1439 assert_spin_locked(&device_domain_lock);
1440
da972fb1 1441 if (!dev_is_pci(info->dev))
93a23a72
YZ
1442 return;
1443
b16d0cb9
DW
1444 pdev = to_pci_dev(info->dev);
1445
1446 if (info->ats_enabled) {
1447 pci_disable_ats(pdev);
1448 info->ats_enabled = 0;
0824c592 1449 domain_update_iotlb(info->domain);
b16d0cb9
DW
1450 }
1451#ifdef CONFIG_INTEL_IOMMU_SVM
1452 if (info->pri_enabled) {
1453 pci_disable_pri(pdev);
1454 info->pri_enabled = 0;
1455 }
1456 if (info->pasid_enabled) {
1457 pci_disable_pasid(pdev);
1458 info->pasid_enabled = 0;
1459 }
1460#endif
93a23a72
YZ
1461}
1462
1463static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1464 u64 addr, unsigned mask)
1465{
1466 u16 sid, qdep;
1467 unsigned long flags;
1468 struct device_domain_info *info;
1469
0824c592
OP
1470 if (!domain->has_iotlb_device)
1471 return;
1472
93a23a72
YZ
1473 spin_lock_irqsave(&device_domain_lock, flags);
1474 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1475 if (!info->ats_enabled)
93a23a72
YZ
1476 continue;
1477
1478 sid = info->bus << 8 | info->devfn;
b16d0cb9 1479 qdep = info->ats_qdep;
1c48db44
JP
1480 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1481 qdep, addr, mask);
93a23a72
YZ
1482 }
1483 spin_unlock_irqrestore(&device_domain_lock, flags);
1484}
1485
a1ddcbe9
JR
1486static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1487 struct dmar_domain *domain,
1488 unsigned long pfn, unsigned int pages,
1489 int ih, int map)
ba395927 1490{
9dd2fe89 1491 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1492 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1493 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1494
ba395927
KA
1495 BUG_ON(pages == 0);
1496
ea8ea460
DW
1497 if (ih)
1498 ih = 1 << 6;
ba395927 1499 /*
9dd2fe89
YZ
1500 * Fallback to domain selective flush if no PSI support or the size is
1501 * too big.
ba395927
KA
1502 * PSI requires page size to be 2 ^ x, and the base address is naturally
1503 * aligned to the size
1504 */
9dd2fe89
YZ
1505 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1506 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1507 DMA_TLB_DSI_FLUSH);
9dd2fe89 1508 else
ea8ea460 1509 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1510 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1511
1512 /*
82653633
NA
1513 * In caching mode, changes of pages from non-present to present require
1514 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1515 */
82653633 1516 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1517 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1518}
1519
eed91a0b
PX
1520/* Notification for newly created mappings */
1521static inline void __mapping_notify_one(struct intel_iommu *iommu,
1522 struct dmar_domain *domain,
1523 unsigned long pfn, unsigned int pages)
1524{
1525 /* It's a non-present to present mapping. Only flush if caching mode */
1526 if (cap_caching_mode(iommu->cap))
1527 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1528 else
1529 iommu_flush_write_buffer(iommu);
1530}
1531
13cf0174
JR
1532static void iommu_flush_iova(struct iova_domain *iovad)
1533{
1534 struct dmar_domain *domain;
1535 int idx;
1536
1537 domain = container_of(iovad, struct dmar_domain, iovad);
1538
1539 for_each_domain_iommu(idx, domain) {
1540 struct intel_iommu *iommu = g_iommus[idx];
1541 u16 did = domain->iommu_did[iommu->seq_id];
1542
1543 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1544
1545 if (!cap_caching_mode(iommu->cap))
1546 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1547 0, MAX_AGAW_PFN_WIDTH);
1548 }
1549}
1550
f8bab735 1551static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1552{
1553 u32 pmen;
1554 unsigned long flags;
1555
5bb71fc7
LB
1556 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1557 return;
1558
1f5b3c3f 1559 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1560 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1561 pmen &= ~DMA_PMEN_EPM;
1562 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1563
1564 /* wait for the protected region status bit to clear */
1565 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1566 readl, !(pmen & DMA_PMEN_PRS), pmen);
1567
1f5b3c3f 1568 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1569}
1570
2a41ccee 1571static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1572{
1573 u32 sts;
1574 unsigned long flags;
1575
1f5b3c3f 1576 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1577 iommu->gcmd |= DMA_GCMD_TE;
1578 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1579
1580 /* Make sure hardware complete it */
1581 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1582 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1583
1f5b3c3f 1584 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1585}
1586
2a41ccee 1587static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1588{
1589 u32 sts;
1590 unsigned long flag;
1591
1f5b3c3f 1592 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1593 iommu->gcmd &= ~DMA_GCMD_TE;
1594 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1595
1596 /* Make sure hardware complete it */
1597 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1598 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1599
1f5b3c3f 1600 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1601}
1602
1603static int iommu_init_domains(struct intel_iommu *iommu)
1604{
8bf47816
JR
1605 u32 ndomains, nlongs;
1606 size_t size;
ba395927
KA
1607
1608 ndomains = cap_ndoms(iommu->cap);
8bf47816 1609 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1610 iommu->name, ndomains);
ba395927
KA
1611 nlongs = BITS_TO_LONGS(ndomains);
1612
94a91b50
DD
1613 spin_lock_init(&iommu->lock);
1614
ba395927
KA
1615 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1616 if (!iommu->domain_ids) {
9f10e5bf
JR
1617 pr_err("%s: Allocating domain id array failed\n",
1618 iommu->name);
ba395927
KA
1619 return -ENOMEM;
1620 }
8bf47816 1621
86f004c7 1622 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1623 iommu->domains = kzalloc(size, GFP_KERNEL);
1624
1625 if (iommu->domains) {
1626 size = 256 * sizeof(struct dmar_domain *);
1627 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1628 }
1629
1630 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1631 pr_err("%s: Allocating domain array failed\n",
1632 iommu->name);
852bdb04 1633 kfree(iommu->domain_ids);
8bf47816 1634 kfree(iommu->domains);
852bdb04 1635 iommu->domain_ids = NULL;
8bf47816 1636 iommu->domains = NULL;
ba395927
KA
1637 return -ENOMEM;
1638 }
1639
1640 /*
c0e8a6c8
JR
1641 * If Caching mode is set, then invalid translations are tagged
1642 * with domain-id 0, hence we need to pre-allocate it. We also
1643 * use domain-id 0 as a marker for non-allocated domain-id, so
1644 * make sure it is not used for a real domain.
ba395927 1645 */
c0e8a6c8
JR
1646 set_bit(0, iommu->domain_ids);
1647
3b33d4ab
LB
1648 /*
1649 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1650 * entry for first-level or pass-through translation modes should
1651 * be programmed with a domain id different from those used for
1652 * second-level or nested translation. We reserve a domain id for
1653 * this purpose.
1654 */
1655 if (sm_supported(iommu))
1656 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1657
ba395927
KA
1658 return 0;
1659}
ba395927 1660
ffebeb46 1661static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1662{
29a27719 1663 struct device_domain_info *info, *tmp;
55d94043 1664 unsigned long flags;
ba395927 1665
29a27719
JR
1666 if (!iommu->domains || !iommu->domain_ids)
1667 return;
a4eaa86c 1668
55d94043 1669 spin_lock_irqsave(&device_domain_lock, flags);
29a27719 1670 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
29a27719
JR
1671 if (info->iommu != iommu)
1672 continue;
1673
1674 if (!info->dev || !info->domain)
1675 continue;
1676
bea64033 1677 __dmar_remove_one_dev_info(info);
ba395927 1678 }
55d94043 1679 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1680
1681 if (iommu->gcmd & DMA_GCMD_TE)
1682 iommu_disable_translation(iommu);
ffebeb46 1683}
ba395927 1684
ffebeb46
JL
1685static void free_dmar_iommu(struct intel_iommu *iommu)
1686{
1687 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1688 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1689 int i;
1690
1691 for (i = 0; i < elems; i++)
1692 kfree(iommu->domains[i]);
ffebeb46
JL
1693 kfree(iommu->domains);
1694 kfree(iommu->domain_ids);
1695 iommu->domains = NULL;
1696 iommu->domain_ids = NULL;
1697 }
ba395927 1698
d9630fe9
WH
1699 g_iommus[iommu->seq_id] = NULL;
1700
ba395927
KA
1701 /* free context mapping */
1702 free_context_table(iommu);
8a94ade4
DW
1703
1704#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1705 if (pasid_supported(iommu)) {
a222a7f0
DW
1706 if (ecap_prs(iommu->ecap))
1707 intel_svm_finish_prq(iommu);
a222a7f0 1708 }
8a94ade4 1709#endif
ba395927
KA
1710}
1711
ab8dfe25 1712static struct dmar_domain *alloc_domain(int flags)
ba395927 1713{
ba395927 1714 struct dmar_domain *domain;
ba395927
KA
1715
1716 domain = alloc_domain_mem();
1717 if (!domain)
1718 return NULL;
1719
ab8dfe25 1720 memset(domain, 0, sizeof(*domain));
98fa15f3 1721 domain->nid = NUMA_NO_NODE;
ab8dfe25 1722 domain->flags = flags;
0824c592 1723 domain->has_iotlb_device = false;
92d03cc8 1724 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1725
1726 return domain;
1727}
1728
d160aca5
JR
1729/* Must be called with iommu->lock */
1730static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1731 struct intel_iommu *iommu)
1732{
44bde614 1733 unsigned long ndomains;
55d94043 1734 int num;
44bde614 1735
55d94043 1736 assert_spin_locked(&device_domain_lock);
d160aca5 1737 assert_spin_locked(&iommu->lock);
ba395927 1738
29a27719
JR
1739 domain->iommu_refcnt[iommu->seq_id] += 1;
1740 domain->iommu_count += 1;
1741 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1742 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1743 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1744
1745 if (num >= ndomains) {
1746 pr_err("%s: No free domain ids\n", iommu->name);
1747 domain->iommu_refcnt[iommu->seq_id] -= 1;
1748 domain->iommu_count -= 1;
55d94043 1749 return -ENOSPC;
2c2e2c38 1750 }
ba395927 1751
d160aca5
JR
1752 set_bit(num, iommu->domain_ids);
1753 set_iommu_domain(iommu, num, domain);
1754
1755 domain->iommu_did[iommu->seq_id] = num;
1756 domain->nid = iommu->node;
fb170fb4 1757
fb170fb4
JL
1758 domain_update_iommu_cap(domain);
1759 }
d160aca5 1760
55d94043 1761 return 0;
fb170fb4
JL
1762}
1763
1764static int domain_detach_iommu(struct dmar_domain *domain,
1765 struct intel_iommu *iommu)
1766{
e083ea5b 1767 int num, count;
d160aca5 1768
55d94043 1769 assert_spin_locked(&device_domain_lock);
d160aca5 1770 assert_spin_locked(&iommu->lock);
fb170fb4 1771
29a27719
JR
1772 domain->iommu_refcnt[iommu->seq_id] -= 1;
1773 count = --domain->iommu_count;
1774 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1775 num = domain->iommu_did[iommu->seq_id];
1776 clear_bit(num, iommu->domain_ids);
1777 set_iommu_domain(iommu, num, NULL);
fb170fb4 1778
fb170fb4 1779 domain_update_iommu_cap(domain);
c0e8a6c8 1780 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1781 }
fb170fb4
JL
1782
1783 return count;
1784}
1785
ba395927 1786static struct iova_domain reserved_iova_list;
8a443df4 1787static struct lock_class_key reserved_rbtree_key;
ba395927 1788
51a63e67 1789static int dmar_init_reserved_ranges(void)
ba395927
KA
1790{
1791 struct pci_dev *pdev = NULL;
1792 struct iova *iova;
1793 int i;
ba395927 1794
aa3ac946 1795 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1796
8a443df4
MG
1797 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1798 &reserved_rbtree_key);
1799
ba395927
KA
1800 /* IOAPIC ranges shouldn't be accessed by DMA */
1801 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1802 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1803 if (!iova) {
9f10e5bf 1804 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1805 return -ENODEV;
1806 }
ba395927
KA
1807
1808 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1809 for_each_pci_dev(pdev) {
1810 struct resource *r;
1811
1812 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1813 r = &pdev->resource[i];
1814 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1815 continue;
1a4a4551
DW
1816 iova = reserve_iova(&reserved_iova_list,
1817 IOVA_PFN(r->start),
1818 IOVA_PFN(r->end));
51a63e67 1819 if (!iova) {
932a6523 1820 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1821 return -ENODEV;
1822 }
ba395927
KA
1823 }
1824 }
51a63e67 1825 return 0;
ba395927
KA
1826}
1827
1828static void domain_reserve_special_ranges(struct dmar_domain *domain)
1829{
1830 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1831}
1832
1833static inline int guestwidth_to_adjustwidth(int gaw)
1834{
1835 int agaw;
1836 int r = (gaw - 12) % 9;
1837
1838 if (r == 0)
1839 agaw = gaw;
1840 else
1841 agaw = gaw + 9 - r;
1842 if (agaw > 64)
1843 agaw = 64;
1844 return agaw;
1845}
1846
301e7ee1
JR
1847static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1848 int guest_width)
1849{
1850 int adjust_width, agaw;
1851 unsigned long sagaw;
1852 int err;
1853
1854 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1855
1856 err = init_iova_flush_queue(&domain->iovad,
1857 iommu_flush_iova, iova_entry_free);
1858 if (err)
1859 return err;
1860
1861 domain_reserve_special_ranges(domain);
1862
1863 /* calculate AGAW */
1864 if (guest_width > cap_mgaw(iommu->cap))
1865 guest_width = cap_mgaw(iommu->cap);
1866 domain->gaw = guest_width;
1867 adjust_width = guestwidth_to_adjustwidth(guest_width);
1868 agaw = width_to_agaw(adjust_width);
1869 sagaw = cap_sagaw(iommu->cap);
1870 if (!test_bit(agaw, &sagaw)) {
1871 /* hardware doesn't support it, choose a bigger one */
1872 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1873 agaw = find_next_bit(&sagaw, 5, agaw);
1874 if (agaw >= 5)
1875 return -ENODEV;
1876 }
1877 domain->agaw = agaw;
1878
1879 if (ecap_coherent(iommu->ecap))
1880 domain->iommu_coherency = 1;
1881 else
1882 domain->iommu_coherency = 0;
1883
1884 if (ecap_sc_support(iommu->ecap))
1885 domain->iommu_snooping = 1;
1886 else
1887 domain->iommu_snooping = 0;
1888
1889 if (intel_iommu_superpage)
1890 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1891 else
1892 domain->iommu_superpage = 0;
1893
1894 domain->nid = iommu->node;
1895
1896 /* always allocate the top pgd */
1897 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1898 if (!domain->pgd)
1899 return -ENOMEM;
1900 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1901 return 0;
1902}
1903
ba395927
KA
1904static void domain_exit(struct dmar_domain *domain)
1905{
ba395927 1906
d160aca5 1907 /* Remove associated devices and clear attached or cached domains */
ba395927 1908 domain_remove_dev_info(domain);
92d03cc8 1909
ba395927
KA
1910 /* destroy iovas */
1911 put_iova_domain(&domain->iovad);
ba395927 1912
3ee9eca7
DS
1913 if (domain->pgd) {
1914 struct page *freelist;
ba395927 1915
3ee9eca7
DS
1916 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1917 dma_free_pagelist(freelist);
1918 }
ea8ea460 1919
ba395927
KA
1920 free_domain_mem(domain);
1921}
1922
7373a8cc
LB
1923/*
1924 * Get the PASID directory size for scalable mode context entry.
1925 * Value of X in the PDTS field of a scalable mode context entry
1926 * indicates PASID directory with 2^(X + 7) entries.
1927 */
1928static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1929{
1930 int pds, max_pde;
1931
1932 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1933 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1934 if (pds < 7)
1935 return 0;
1936
1937 return pds - 7;
1938}
1939
1940/*
1941 * Set the RID_PASID field of a scalable mode context entry. The
1942 * IOMMU hardware will use the PASID value set in this field for
1943 * DMA translations of DMA requests without PASID.
1944 */
1945static inline void
1946context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1947{
1948 context->hi |= pasid & ((1 << 20) - 1);
1949 context->hi |= (1 << 20);
1950}
1951
1952/*
1953 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1954 * entry.
1955 */
1956static inline void context_set_sm_dte(struct context_entry *context)
1957{
1958 context->lo |= (1 << 2);
1959}
1960
1961/*
1962 * Set the PRE(Page Request Enable) field of a scalable mode context
1963 * entry.
1964 */
1965static inline void context_set_sm_pre(struct context_entry *context)
1966{
1967 context->lo |= (1 << 4);
1968}
1969
1970/* Convert value to context PASID directory size field coding. */
1971#define context_pdts(pds) (((pds) & 0x7) << 9)
1972
64ae892b
DW
1973static int domain_context_mapping_one(struct dmar_domain *domain,
1974 struct intel_iommu *iommu,
ca6e322d 1975 struct pasid_table *table,
28ccce0d 1976 u8 bus, u8 devfn)
ba395927 1977{
c6c2cebd 1978 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1979 int translation = CONTEXT_TT_MULTI_LEVEL;
1980 struct device_domain_info *info = NULL;
ba395927 1981 struct context_entry *context;
ba395927 1982 unsigned long flags;
7373a8cc 1983 int ret;
28ccce0d 1984
c6c2cebd
JR
1985 WARN_ON(did == 0);
1986
28ccce0d
JR
1987 if (hw_pass_through && domain_type_is_si(domain))
1988 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1989
1990 pr_debug("Set context mapping for %02x:%02x.%d\n",
1991 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1992
ba395927 1993 BUG_ON(!domain->pgd);
5331fe6f 1994
55d94043
JR
1995 spin_lock_irqsave(&device_domain_lock, flags);
1996 spin_lock(&iommu->lock);
1997
1998 ret = -ENOMEM;
03ecc32c 1999 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2000 if (!context)
55d94043 2001 goto out_unlock;
ba395927 2002
55d94043
JR
2003 ret = 0;
2004 if (context_present(context))
2005 goto out_unlock;
cf484d0e 2006
aec0e861
XP
2007 /*
2008 * For kdump cases, old valid entries may be cached due to the
2009 * in-flight DMA and copied pgtable, but there is no unmapping
2010 * behaviour for them, thus we need an explicit cache flush for
2011 * the newly-mapped device. For kdump, at this point, the device
2012 * is supposed to finish reset at its driver probe stage, so no
2013 * in-flight DMA will exist, and we don't need to worry anymore
2014 * hereafter.
2015 */
2016 if (context_copied(context)) {
2017 u16 did_old = context_domain_id(context);
2018
b117e038 2019 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2020 iommu->flush.flush_context(iommu, did_old,
2021 (((u16)bus) << 8) | devfn,
2022 DMA_CCMD_MASK_NOBIT,
2023 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2024 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2025 DMA_TLB_DSI_FLUSH);
2026 }
aec0e861
XP
2027 }
2028
de24e553 2029 context_clear_entry(context);
ea6606b0 2030
7373a8cc
LB
2031 if (sm_supported(iommu)) {
2032 unsigned long pds;
4ed0d3e6 2033
7373a8cc
LB
2034 WARN_ON(!table);
2035
2036 /* Setup the PASID DIR pointer: */
2037 pds = context_get_sm_pds(table);
2038 context->lo = (u64)virt_to_phys(table->table) |
2039 context_pdts(pds);
2040
2041 /* Setup the RID_PASID field: */
2042 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2043
de24e553 2044 /*
7373a8cc
LB
2045 * Setup the Device-TLB enable bit and Page request
2046 * Enable bit:
de24e553 2047 */
7373a8cc
LB
2048 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2049 if (info && info->ats_supported)
2050 context_set_sm_dte(context);
2051 if (info && info->pri_supported)
2052 context_set_sm_pre(context);
2053 } else {
2054 struct dma_pte *pgd = domain->pgd;
2055 int agaw;
2056
2057 context_set_domain_id(context, did);
7373a8cc
LB
2058
2059 if (translation != CONTEXT_TT_PASS_THROUGH) {
2060 /*
2061 * Skip top levels of page tables for iommu which has
2062 * less agaw than default. Unnecessary for PT mode.
2063 */
2064 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2065 ret = -ENOMEM;
2066 pgd = phys_to_virt(dma_pte_addr(pgd));
2067 if (!dma_pte_present(pgd))
2068 goto out_unlock;
2069 }
2070
2071 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2072 if (info && info->ats_supported)
2073 translation = CONTEXT_TT_DEV_IOTLB;
2074 else
2075 translation = CONTEXT_TT_MULTI_LEVEL;
2076
2077 context_set_address_root(context, virt_to_phys(pgd));
2078 context_set_address_width(context, agaw);
2079 } else {
2080 /*
2081 * In pass through mode, AW must be programmed to
2082 * indicate the largest AGAW value supported by
2083 * hardware. And ASR is ignored by hardware.
2084 */
2085 context_set_address_width(context, iommu->msagaw);
2086 }
41b80db2
LB
2087
2088 context_set_translation_type(context, translation);
93a23a72 2089 }
4ed0d3e6 2090
c07e7d21
MM
2091 context_set_fault_enable(context);
2092 context_set_present(context);
5331fe6f 2093 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2094
4c25a2c1
DW
2095 /*
2096 * It's a non-present to present mapping. If hardware doesn't cache
2097 * non-present entry we only need to flush the write-buffer. If the
2098 * _does_ cache non-present entries, then it does so in the special
2099 * domain #0, which we have to flush:
2100 */
2101 if (cap_caching_mode(iommu->cap)) {
2102 iommu->flush.flush_context(iommu, 0,
2103 (((u16)bus) << 8) | devfn,
2104 DMA_CCMD_MASK_NOBIT,
2105 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2106 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2107 } else {
ba395927 2108 iommu_flush_write_buffer(iommu);
4c25a2c1 2109 }
93a23a72 2110 iommu_enable_dev_iotlb(info);
c7151a8d 2111
55d94043
JR
2112 ret = 0;
2113
2114out_unlock:
2115 spin_unlock(&iommu->lock);
2116 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2117
5c365d18 2118 return ret;
ba395927
KA
2119}
2120
0ce4a85f
LB
2121struct domain_context_mapping_data {
2122 struct dmar_domain *domain;
2123 struct intel_iommu *iommu;
2124 struct pasid_table *table;
2125};
2126
2127static int domain_context_mapping_cb(struct pci_dev *pdev,
2128 u16 alias, void *opaque)
2129{
2130 struct domain_context_mapping_data *data = opaque;
2131
2132 return domain_context_mapping_one(data->domain, data->iommu,
2133 data->table, PCI_BUS_NUM(alias),
2134 alias & 0xff);
2135}
2136
ba395927 2137static int
28ccce0d 2138domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2139{
0ce4a85f 2140 struct domain_context_mapping_data data;
ca6e322d 2141 struct pasid_table *table;
64ae892b 2142 struct intel_iommu *iommu;
156baca8 2143 u8 bus, devfn;
64ae892b 2144
e1f167f3 2145 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2146 if (!iommu)
2147 return -ENODEV;
ba395927 2148
ca6e322d 2149 table = intel_pasid_get_table(dev);
0ce4a85f
LB
2150
2151 if (!dev_is_pci(dev))
2152 return domain_context_mapping_one(domain, iommu, table,
2153 bus, devfn);
2154
2155 data.domain = domain;
2156 data.iommu = iommu;
2157 data.table = table;
2158
2159 return pci_for_each_dma_alias(to_pci_dev(dev),
2160 &domain_context_mapping_cb, &data);
579305f7
AW
2161}
2162
2163static int domain_context_mapped_cb(struct pci_dev *pdev,
2164 u16 alias, void *opaque)
2165{
2166 struct intel_iommu *iommu = opaque;
2167
2168 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2169}
2170
e1f167f3 2171static int domain_context_mapped(struct device *dev)
ba395927 2172{
5331fe6f 2173 struct intel_iommu *iommu;
156baca8 2174 u8 bus, devfn;
5331fe6f 2175
e1f167f3 2176 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2177 if (!iommu)
2178 return -ENODEV;
ba395927 2179
579305f7
AW
2180 if (!dev_is_pci(dev))
2181 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2182
579305f7
AW
2183 return !pci_for_each_dma_alias(to_pci_dev(dev),
2184 domain_context_mapped_cb, iommu);
ba395927
KA
2185}
2186
f532959b
FY
2187/* Returns a number of VTD pages, but aligned to MM page size */
2188static inline unsigned long aligned_nrpages(unsigned long host_addr,
2189 size_t size)
2190{
2191 host_addr &= ~PAGE_MASK;
2192 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2193}
2194
6dd9a7c7
YS
2195/* Return largest possible superpage level for a given mapping */
2196static inline int hardware_largepage_caps(struct dmar_domain *domain,
2197 unsigned long iov_pfn,
2198 unsigned long phy_pfn,
2199 unsigned long pages)
2200{
2201 int support, level = 1;
2202 unsigned long pfnmerge;
2203
2204 support = domain->iommu_superpage;
2205
2206 /* To use a large page, the virtual *and* physical addresses
2207 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2208 of them will mean we have to use smaller pages. So just
2209 merge them and check both at once. */
2210 pfnmerge = iov_pfn | phy_pfn;
2211
2212 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2213 pages >>= VTD_STRIDE_SHIFT;
2214 if (!pages)
2215 break;
2216 pfnmerge >>= VTD_STRIDE_SHIFT;
2217 level++;
2218 support--;
2219 }
2220 return level;
2221}
2222
9051aa02
DW
2223static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2224 struct scatterlist *sg, unsigned long phys_pfn,
2225 unsigned long nr_pages, int prot)
e1605495
DW
2226{
2227 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2228 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2229 unsigned long sg_res = 0;
6dd9a7c7
YS
2230 unsigned int largepage_lvl = 0;
2231 unsigned long lvl_pages = 0;
e1605495 2232
162d1b10 2233 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2234
2235 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2236 return -EINVAL;
2237
2238 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2239
cc4f14aa
JL
2240 if (!sg) {
2241 sg_res = nr_pages;
9051aa02
DW
2242 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2243 }
2244
6dd9a7c7 2245 while (nr_pages > 0) {
c85994e4
DW
2246 uint64_t tmp;
2247
e1605495 2248 if (!sg_res) {
29a90b70
RM
2249 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2250
f532959b 2251 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2252 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2253 sg->dma_length = sg->length;
29a90b70 2254 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2255 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2256 }
6dd9a7c7 2257
e1605495 2258 if (!pte) {
6dd9a7c7
YS
2259 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2260
5cf0a76f 2261 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2262 if (!pte)
2263 return -ENOMEM;
6dd9a7c7 2264 /* It is large page*/
6491d4d0 2265 if (largepage_lvl > 1) {
ba2374fd
CZ
2266 unsigned long nr_superpages, end_pfn;
2267
6dd9a7c7 2268 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2269 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2270
2271 nr_superpages = sg_res / lvl_pages;
2272 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2273
d41a4adb
JL
2274 /*
2275 * Ensure that old small page tables are
ba2374fd 2276 * removed to make room for superpage(s).
bc24c571
DD
2277 * We're adding new large pages, so make sure
2278 * we don't remove their parent tables.
d41a4adb 2279 */
bc24c571
DD
2280 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2281 largepage_lvl + 1);
6491d4d0 2282 } else {
6dd9a7c7 2283 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2284 }
6dd9a7c7 2285
e1605495
DW
2286 }
2287 /* We don't need lock here, nobody else
2288 * touches the iova range
2289 */
7766a3fb 2290 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2291 if (tmp) {
1bf20f0d 2292 static int dumps = 5;
9f10e5bf
JR
2293 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2294 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2295 if (dumps) {
2296 dumps--;
2297 debug_dma_dump_mappings(NULL);
2298 }
2299 WARN_ON(1);
2300 }
6dd9a7c7
YS
2301
2302 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2303
2304 BUG_ON(nr_pages < lvl_pages);
2305 BUG_ON(sg_res < lvl_pages);
2306
2307 nr_pages -= lvl_pages;
2308 iov_pfn += lvl_pages;
2309 phys_pfn += lvl_pages;
2310 pteval += lvl_pages * VTD_PAGE_SIZE;
2311 sg_res -= lvl_pages;
2312
2313 /* If the next PTE would be the first in a new page, then we
2314 need to flush the cache on the entries we've just written.
2315 And then we'll need to recalculate 'pte', so clear it and
2316 let it get set again in the if (!pte) block above.
2317
2318 If we're done (!nr_pages) we need to flush the cache too.
2319
2320 Also if we've been setting superpages, we may need to
2321 recalculate 'pte' and switch back to smaller pages for the
2322 end of the mapping, if the trailing size is not enough to
2323 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2324 pte++;
6dd9a7c7
YS
2325 if (!nr_pages || first_pte_in_page(pte) ||
2326 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2327 domain_flush_cache(domain, first_pte,
2328 (void *)pte - (void *)first_pte);
2329 pte = NULL;
2330 }
6dd9a7c7
YS
2331
2332 if (!sg_res && nr_pages)
e1605495
DW
2333 sg = sg_next(sg);
2334 }
2335 return 0;
2336}
2337
87684fd9 2338static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2339 struct scatterlist *sg, unsigned long phys_pfn,
2340 unsigned long nr_pages, int prot)
2341{
fa954e68 2342 int iommu_id, ret;
095303e0
LB
2343 struct intel_iommu *iommu;
2344
2345 /* Do the real mapping first */
2346 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2347 if (ret)
2348 return ret;
2349
fa954e68
LB
2350 for_each_domain_iommu(iommu_id, domain) {
2351 iommu = g_iommus[iommu_id];
095303e0
LB
2352 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2353 }
2354
2355 return 0;
87684fd9
PX
2356}
2357
9051aa02
DW
2358static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2359 struct scatterlist *sg, unsigned long nr_pages,
2360 int prot)
ba395927 2361{
87684fd9 2362 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2363}
6f6a00e4 2364
9051aa02
DW
2365static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366 unsigned long phys_pfn, unsigned long nr_pages,
2367 int prot)
2368{
87684fd9 2369 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2370}
2371
2452d9db 2372static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2373{
5082219b
FS
2374 unsigned long flags;
2375 struct context_entry *context;
2376 u16 did_old;
2377
c7151a8d
WH
2378 if (!iommu)
2379 return;
8c11e798 2380
5082219b
FS
2381 spin_lock_irqsave(&iommu->lock, flags);
2382 context = iommu_context_addr(iommu, bus, devfn, 0);
2383 if (!context) {
2384 spin_unlock_irqrestore(&iommu->lock, flags);
2385 return;
2386 }
2387 did_old = context_domain_id(context);
2388 context_clear_entry(context);
2389 __iommu_flush_cache(iommu, context, sizeof(*context));
2390 spin_unlock_irqrestore(&iommu->lock, flags);
2391 iommu->flush.flush_context(iommu,
2392 did_old,
2393 (((u16)bus) << 8) | devfn,
2394 DMA_CCMD_MASK_NOBIT,
2395 DMA_CCMD_DEVICE_INVL);
2396 iommu->flush.flush_iotlb(iommu,
2397 did_old,
2398 0,
2399 0,
2400 DMA_TLB_DSI_FLUSH);
ba395927
KA
2401}
2402
109b9b04
DW
2403static inline void unlink_domain_info(struct device_domain_info *info)
2404{
2405 assert_spin_locked(&device_domain_lock);
2406 list_del(&info->link);
2407 list_del(&info->global);
2408 if (info->dev)
0bcb3e28 2409 info->dev->archdata.iommu = NULL;
109b9b04
DW
2410}
2411
ba395927
KA
2412static void domain_remove_dev_info(struct dmar_domain *domain)
2413{
3a74ca01 2414 struct device_domain_info *info, *tmp;
fb170fb4 2415 unsigned long flags;
ba395927
KA
2416
2417 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2418 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2419 __dmar_remove_one_dev_info(info);
ba395927
KA
2420 spin_unlock_irqrestore(&device_domain_lock, flags);
2421}
2422
1525a29a 2423static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2424{
2425 struct device_domain_info *info;
2426
1ee0186b
LB
2427 if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO ||
2428 dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO))
2429 return NULL;
2430
2431 /* No lock here, assumes no domain exit in normal case */
2432 info = dev->archdata.iommu;
2433 if (likely(info))
2434 return info->domain;
2435
2436 return NULL;
2437}
2438
2439static struct dmar_domain *deferred_attach_domain(struct device *dev)
2440{
8af46c78
LB
2441 if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2442 struct iommu_domain *domain;
2443
2444 dev->archdata.iommu = NULL;
2445 domain = iommu_get_domain_for_dev(dev);
2446 if (domain)
2447 intel_iommu_attach_device(domain, dev);
2448 }
2449
1ee0186b 2450 return find_domain(dev);
ba395927
KA
2451}
2452
5a8f40e8 2453static inline struct device_domain_info *
745f2586
JL
2454dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2455{
2456 struct device_domain_info *info;
2457
2458 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2459 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2460 info->devfn == devfn)
5a8f40e8 2461 return info;
745f2586
JL
2462
2463 return NULL;
2464}
2465
5db31569
JR
2466static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2467 int bus, int devfn,
2468 struct device *dev,
2469 struct dmar_domain *domain)
745f2586 2470{
5a8f40e8 2471 struct dmar_domain *found = NULL;
745f2586
JL
2472 struct device_domain_info *info;
2473 unsigned long flags;
d160aca5 2474 int ret;
745f2586
JL
2475
2476 info = alloc_devinfo_mem();
2477 if (!info)
b718cd3d 2478 return NULL;
745f2586 2479
745f2586
JL
2480 info->bus = bus;
2481 info->devfn = devfn;
b16d0cb9
DW
2482 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2483 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2484 info->ats_qdep = 0;
745f2586
JL
2485 info->dev = dev;
2486 info->domain = domain;
5a8f40e8 2487 info->iommu = iommu;
cc580e41 2488 info->pasid_table = NULL;
95587a75 2489 info->auxd_enabled = 0;
67b8e02b 2490 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2491
b16d0cb9
DW
2492 if (dev && dev_is_pci(dev)) {
2493 struct pci_dev *pdev = to_pci_dev(info->dev);
2494
d8b85910
LB
2495 if (!pdev->untrusted &&
2496 !pci_ats_disabled() &&
cef74409 2497 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2498 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2499 dmar_find_matched_atsr_unit(pdev))
2500 info->ats_supported = 1;
2501
765b6a98
LB
2502 if (sm_supported(iommu)) {
2503 if (pasid_supported(iommu)) {
b16d0cb9
DW
2504 int features = pci_pasid_features(pdev);
2505 if (features >= 0)
2506 info->pasid_supported = features | 1;
2507 }
2508
2509 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2510 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2511 info->pri_supported = 1;
2512 }
2513 }
2514
745f2586
JL
2515 spin_lock_irqsave(&device_domain_lock, flags);
2516 if (dev)
0bcb3e28 2517 found = find_domain(dev);
f303e507
JR
2518
2519 if (!found) {
5a8f40e8 2520 struct device_domain_info *info2;
41e80dca 2521 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2522 if (info2) {
2523 found = info2->domain;
2524 info2->dev = dev;
2525 }
5a8f40e8 2526 }
f303e507 2527
745f2586
JL
2528 if (found) {
2529 spin_unlock_irqrestore(&device_domain_lock, flags);
2530 free_devinfo_mem(info);
b718cd3d
DW
2531 /* Caller must free the original domain */
2532 return found;
745f2586
JL
2533 }
2534
d160aca5
JR
2535 spin_lock(&iommu->lock);
2536 ret = domain_attach_iommu(domain, iommu);
2537 spin_unlock(&iommu->lock);
2538
2539 if (ret) {
c6c2cebd 2540 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2541 free_devinfo_mem(info);
c6c2cebd
JR
2542 return NULL;
2543 }
c6c2cebd 2544
b718cd3d
DW
2545 list_add(&info->link, &domain->devices);
2546 list_add(&info->global, &device_domain_list);
2547 if (dev)
2548 dev->archdata.iommu = info;
0bbeb01a 2549 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2550
0bbeb01a
LB
2551 /* PASID table is mandatory for a PCI device in scalable mode. */
2552 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2553 ret = intel_pasid_alloc_table(dev);
2554 if (ret) {
932a6523 2555 dev_err(dev, "PASID table allocation failed\n");
71753239 2556 dmar_remove_one_dev_info(dev);
0bbeb01a 2557 return NULL;
a7fc93fe 2558 }
ef848b7e
LB
2559
2560 /* Setup the PASID entry for requests without PASID: */
2561 spin_lock(&iommu->lock);
2562 if (hw_pass_through && domain_type_is_si(domain))
2563 ret = intel_pasid_setup_pass_through(iommu, domain,
2564 dev, PASID_RID2PASID);
2565 else
2566 ret = intel_pasid_setup_second_level(iommu, domain,
2567 dev, PASID_RID2PASID);
2568 spin_unlock(&iommu->lock);
2569 if (ret) {
932a6523 2570 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2571 dmar_remove_one_dev_info(dev);
ef848b7e 2572 return NULL;
a7fc93fe
LB
2573 }
2574 }
b718cd3d 2575
cc4e2575 2576 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2577 dev_err(dev, "Domain context map failed\n");
71753239 2578 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2579 return NULL;
2580 }
2581
b718cd3d 2582 return domain;
745f2586
JL
2583}
2584
579305f7
AW
2585static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2586{
2587 *(u16 *)opaque = alias;
2588 return 0;
2589}
2590
76208356 2591static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2592{
e083ea5b 2593 struct device_domain_info *info;
76208356 2594 struct dmar_domain *domain = NULL;
579305f7 2595 struct intel_iommu *iommu;
fcc35c63 2596 u16 dma_alias;
ba395927 2597 unsigned long flags;
aa4d066a 2598 u8 bus, devfn;
ba395927 2599
579305f7
AW
2600 iommu = device_to_iommu(dev, &bus, &devfn);
2601 if (!iommu)
2602 return NULL;
2603
146922ec
DW
2604 if (dev_is_pci(dev)) {
2605 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2606
579305f7
AW
2607 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2608
2609 spin_lock_irqsave(&device_domain_lock, flags);
2610 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2611 PCI_BUS_NUM(dma_alias),
2612 dma_alias & 0xff);
2613 if (info) {
2614 iommu = info->iommu;
2615 domain = info->domain;
5a8f40e8 2616 }
579305f7 2617 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2618
76208356 2619 /* DMA alias already has a domain, use it */
579305f7 2620 if (info)
76208356 2621 goto out;
579305f7 2622 }
ba395927 2623
146922ec 2624 /* Allocate and initialize new domain for the device */
ab8dfe25 2625 domain = alloc_domain(0);
745f2586 2626 if (!domain)
579305f7 2627 return NULL;
301e7ee1 2628 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2629 domain_exit(domain);
2630 return NULL;
2c2e2c38 2631 }
ba395927 2632
76208356 2633out:
76208356
JR
2634 return domain;
2635}
579305f7 2636
76208356
JR
2637static struct dmar_domain *set_domain_for_dev(struct device *dev,
2638 struct dmar_domain *domain)
2639{
2640 struct intel_iommu *iommu;
2641 struct dmar_domain *tmp;
2642 u16 req_id, dma_alias;
2643 u8 bus, devfn;
2644
2645 iommu = device_to_iommu(dev, &bus, &devfn);
2646 if (!iommu)
2647 return NULL;
2648
2649 req_id = ((u16)bus << 8) | devfn;
2650
2651 if (dev_is_pci(dev)) {
2652 struct pci_dev *pdev = to_pci_dev(dev);
2653
2654 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2655
2656 /* register PCI DMA alias device */
2657 if (req_id != dma_alias) {
2658 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2659 dma_alias & 0xff, NULL, domain);
2660
2661 if (!tmp || tmp != domain)
2662 return tmp;
2663 }
ba395927
KA
2664 }
2665
5db31569 2666 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2667 if (!tmp || tmp != domain)
2668 return tmp;
2669
2670 return domain;
2671}
579305f7 2672
b213203e
DW
2673static int iommu_domain_identity_map(struct dmar_domain *domain,
2674 unsigned long long start,
2675 unsigned long long end)
ba395927 2676{
c5395d5c
DW
2677 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2678 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2679
2680 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2681 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2682 pr_err("Reserving iova failed\n");
b213203e 2683 return -ENOMEM;
ba395927
KA
2684 }
2685
af1089ce 2686 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2687 /*
2688 * RMRR range might have overlap with physical memory range,
2689 * clear it first
2690 */
c5395d5c 2691 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2692
87684fd9
PX
2693 return __domain_mapping(domain, first_vpfn, NULL,
2694 first_vpfn, last_vpfn - first_vpfn + 1,
2695 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2696}
2697
d66ce54b
JR
2698static int domain_prepare_identity_map(struct device *dev,
2699 struct dmar_domain *domain,
2700 unsigned long long start,
2701 unsigned long long end)
b213203e 2702{
19943b0e
DW
2703 /* For _hardware_ passthrough, don't bother. But for software
2704 passthrough, we do it anyway -- it may indicate a memory
2705 range which is reserved in E820, so which didn't get set
2706 up to start with in si_domain */
2707 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2708 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2709 start, end);
19943b0e
DW
2710 return 0;
2711 }
2712
932a6523 2713 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2714
5595b528
DW
2715 if (end < start) {
2716 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2717 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2718 dmi_get_system_info(DMI_BIOS_VENDOR),
2719 dmi_get_system_info(DMI_BIOS_VERSION),
2720 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2721 return -EIO;
5595b528
DW
2722 }
2723
2ff729f5
DW
2724 if (end >> agaw_to_width(domain->agaw)) {
2725 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2726 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2727 agaw_to_width(domain->agaw),
2728 dmi_get_system_info(DMI_BIOS_VENDOR),
2729 dmi_get_system_info(DMI_BIOS_VERSION),
2730 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2731 return -EIO;
2ff729f5 2732 }
19943b0e 2733
d66ce54b
JR
2734 return iommu_domain_identity_map(domain, start, end);
2735}
ba395927 2736
301e7ee1
JR
2737static int md_domain_init(struct dmar_domain *domain, int guest_width);
2738
071e1374 2739static int __init si_domain_init(int hw)
2c2e2c38 2740{
4de354ec
LB
2741 struct dmar_rmrr_unit *rmrr;
2742 struct device *dev;
2743 int i, nid, ret;
2c2e2c38 2744
ab8dfe25 2745 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2746 if (!si_domain)
2747 return -EFAULT;
2748
301e7ee1 2749 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2c2e2c38
FY
2750 domain_exit(si_domain);
2751 return -EFAULT;
2752 }
2753
19943b0e
DW
2754 if (hw)
2755 return 0;
2756
c7ab48d2 2757 for_each_online_node(nid) {
5dfe8660
TH
2758 unsigned long start_pfn, end_pfn;
2759 int i;
2760
2761 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2762 ret = iommu_domain_identity_map(si_domain,
2763 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2764 if (ret)
2765 return ret;
2766 }
c7ab48d2
DW
2767 }
2768
4de354ec
LB
2769 /*
2770 * Normally we use DMA domains for devices which have RMRRs. But we
2771 * loose this requirement for graphic and usb devices. Identity map
2772 * the RMRRs for graphic and USB devices so that they could use the
2773 * si_domain.
2774 */
2775 for_each_rmrr_units(rmrr) {
2776 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2777 i, dev) {
2778 unsigned long long start = rmrr->base_address;
2779 unsigned long long end = rmrr->end_address;
2780
2781 if (device_is_rmrr_locked(dev))
2782 continue;
2783
2784 if (WARN_ON(end < start ||
2785 end >> agaw_to_width(si_domain->agaw)))
2786 continue;
2787
2788 ret = iommu_domain_identity_map(si_domain, start, end);
2789 if (ret)
2790 return ret;
2791 }
2792 }
2793
2c2e2c38
FY
2794 return 0;
2795}
2796
9b226624 2797static int identity_mapping(struct device *dev)
2c2e2c38
FY
2798{
2799 struct device_domain_info *info;
2800
9b226624 2801 info = dev->archdata.iommu;
160c63f9 2802 if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
cb452a40 2803 return (info->domain == si_domain);
2c2e2c38 2804
2c2e2c38
FY
2805 return 0;
2806}
2807
28ccce0d 2808static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2809{
0ac72664 2810 struct dmar_domain *ndomain;
5a8f40e8 2811 struct intel_iommu *iommu;
156baca8 2812 u8 bus, devfn;
2c2e2c38 2813
5913c9bf 2814 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2815 if (!iommu)
2816 return -ENODEV;
2817
5db31569 2818 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2819 if (ndomain != domain)
2820 return -EBUSY;
2c2e2c38
FY
2821
2822 return 0;
2823}
2824
0b9d9753 2825static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2826{
2827 struct dmar_rmrr_unit *rmrr;
832bd858 2828 struct device *tmp;
ea2447f7
TM
2829 int i;
2830
0e242612 2831 rcu_read_lock();
ea2447f7 2832 for_each_rmrr_units(rmrr) {
b683b230
JL
2833 /*
2834 * Return TRUE if this RMRR contains the device that
2835 * is passed in.
2836 */
2837 for_each_active_dev_scope(rmrr->devices,
2838 rmrr->devices_cnt, i, tmp)
e143fd45
EA
2839 if (tmp == dev ||
2840 is_downstream_to_pci_bridge(dev, tmp)) {
0e242612 2841 rcu_read_unlock();
ea2447f7 2842 return true;
b683b230 2843 }
ea2447f7 2844 }
0e242612 2845 rcu_read_unlock();
ea2447f7
TM
2846 return false;
2847}
2848
1c5c59fb
EA
2849/**
2850 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2851 * is relaxable (ie. is allowed to be not enforced under some conditions)
2852 * @dev: device handle
2853 *
2854 * We assume that PCI USB devices with RMRRs have them largely
2855 * for historical reasons and that the RMRR space is not actively used post
2856 * boot. This exclusion may change if vendors begin to abuse it.
2857 *
2858 * The same exception is made for graphics devices, with the requirement that
2859 * any use of the RMRR regions will be torn down before assigning the device
2860 * to a guest.
2861 *
2862 * Return: true if the RMRR is relaxable, false otherwise
2863 */
2864static bool device_rmrr_is_relaxable(struct device *dev)
2865{
2866 struct pci_dev *pdev;
2867
2868 if (!dev_is_pci(dev))
2869 return false;
2870
2871 pdev = to_pci_dev(dev);
2872 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2873 return true;
2874 else
2875 return false;
2876}
2877
c875d2c1
AW
2878/*
2879 * There are a couple cases where we need to restrict the functionality of
2880 * devices associated with RMRRs. The first is when evaluating a device for
2881 * identity mapping because problems exist when devices are moved in and out
2882 * of domains and their respective RMRR information is lost. This means that
2883 * a device with associated RMRRs will never be in a "passthrough" domain.
2884 * The second is use of the device through the IOMMU API. This interface
2885 * expects to have full control of the IOVA space for the device. We cannot
2886 * satisfy both the requirement that RMRR access is maintained and have an
2887 * unencumbered IOVA space. We also have no ability to quiesce the device's
2888 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2889 * We therefore prevent devices associated with an RMRR from participating in
2890 * the IOMMU API, which eliminates them from device assignment.
2891 *
1c5c59fb
EA
2892 * In both cases, devices which have relaxable RMRRs are not concerned by this
2893 * restriction. See device_rmrr_is_relaxable comment.
c875d2c1
AW
2894 */
2895static bool device_is_rmrr_locked(struct device *dev)
2896{
2897 if (!device_has_rmrr(dev))
2898 return false;
2899
1c5c59fb
EA
2900 if (device_rmrr_is_relaxable(dev))
2901 return false;
c875d2c1
AW
2902
2903 return true;
2904}
2905
f273a453
LB
2906/*
2907 * Return the required default domain type for a specific device.
2908 *
2909 * @dev: the device in query
2910 * @startup: true if this is during early boot
2911 *
2912 * Returns:
2913 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2914 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2915 * - 0: both identity and dynamic domains work for this device
2916 */
0e31a726 2917static int device_def_domain_type(struct device *dev)
6941af28 2918{
3bdb2591
DW
2919 if (dev_is_pci(dev)) {
2920 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2921
c875d2c1 2922 if (device_is_rmrr_locked(dev))
f273a453 2923 return IOMMU_DOMAIN_DMA;
e0fc7e0b 2924
89a6079d
LB
2925 /*
2926 * Prevent any device marked as untrusted from getting
2927 * placed into the statically identity mapping domain.
2928 */
2929 if (pdev->untrusted)
f273a453 2930 return IOMMU_DOMAIN_DMA;
89a6079d 2931
3bdb2591 2932 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 2933 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 2934
3bdb2591 2935 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 2936 return IOMMU_DOMAIN_IDENTITY;
3bdb2591
DW
2937
2938 /*
2939 * We want to start off with all devices in the 1:1 domain, and
2940 * take them out later if we find they can't access all of memory.
2941 *
2942 * However, we can't do this for PCI devices behind bridges,
2943 * because all PCI devices behind the same bridge will end up
2944 * with the same source-id on their transactions.
2945 *
2946 * Practically speaking, we can't change things around for these
2947 * devices at run-time, because we can't be sure there'll be no
2948 * DMA transactions in flight for any of their siblings.
2949 *
2950 * So PCI devices (unless they're on the root bus) as well as
2951 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2952 * the 1:1 domain, just in _case_ one of their siblings turns out
2953 * not to be able to map all of memory.
2954 */
2955 if (!pci_is_pcie(pdev)) {
2956 if (!pci_is_root_bus(pdev->bus))
f273a453 2957 return IOMMU_DOMAIN_DMA;
3bdb2591 2958 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
f273a453 2959 return IOMMU_DOMAIN_DMA;
3bdb2591 2960 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
f273a453 2961 return IOMMU_DOMAIN_DMA;
3bdb2591
DW
2962 } else {
2963 if (device_has_rmrr(dev))
f273a453 2964 return IOMMU_DOMAIN_DMA;
3bdb2591 2965 }
3dfc813d 2966
f273a453
LB
2967 return (iommu_identity_mapping & IDENTMAP_ALL) ?
2968 IOMMU_DOMAIN_IDENTITY : 0;
2969}
2970
ffebeb46
JL
2971static void intel_iommu_init_qi(struct intel_iommu *iommu)
2972{
2973 /*
2974 * Start from the sane iommu hardware state.
2975 * If the queued invalidation is already initialized by us
2976 * (for example, while enabling interrupt-remapping) then
2977 * we got the things already rolling from a sane state.
2978 */
2979 if (!iommu->qi) {
2980 /*
2981 * Clear any previous faults.
2982 */
2983 dmar_fault(-1, iommu);
2984 /*
2985 * Disable queued invalidation if supported and already enabled
2986 * before OS handover.
2987 */
2988 dmar_disable_qi(iommu);
2989 }
2990
2991 if (dmar_enable_qi(iommu)) {
2992 /*
2993 * Queued Invalidate not enabled, use Register Based Invalidate
2994 */
2995 iommu->flush.flush_context = __iommu_flush_context;
2996 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2997 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2998 iommu->name);
2999 } else {
3000 iommu->flush.flush_context = qi_flush_context;
3001 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3002 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3003 }
3004}
3005
091d42e4 3006static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3007 struct root_entry *old_re,
091d42e4
JR
3008 struct context_entry **tbl,
3009 int bus, bool ext)
3010{
dbcd861f 3011 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3012 struct context_entry *new_ce = NULL, ce;
dfddb969 3013 struct context_entry *old_ce = NULL;
543c8dcf 3014 struct root_entry re;
091d42e4
JR
3015 phys_addr_t old_ce_phys;
3016
3017 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3018 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3019
3020 for (devfn = 0; devfn < 256; devfn++) {
3021 /* First calculate the correct index */
3022 idx = (ext ? devfn * 2 : devfn) % 256;
3023
3024 if (idx == 0) {
3025 /* First save what we may have and clean up */
3026 if (new_ce) {
3027 tbl[tbl_idx] = new_ce;
3028 __iommu_flush_cache(iommu, new_ce,
3029 VTD_PAGE_SIZE);
3030 pos = 1;
3031 }
3032
3033 if (old_ce)
829383e1 3034 memunmap(old_ce);
091d42e4
JR
3035
3036 ret = 0;
3037 if (devfn < 0x80)
543c8dcf 3038 old_ce_phys = root_entry_lctp(&re);
091d42e4 3039 else
543c8dcf 3040 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3041
3042 if (!old_ce_phys) {
3043 if (ext && devfn == 0) {
3044 /* No LCTP, try UCTP */
3045 devfn = 0x7f;
3046 continue;
3047 } else {
3048 goto out;
3049 }
3050 }
3051
3052 ret = -ENOMEM;
dfddb969
DW
3053 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3054 MEMREMAP_WB);
091d42e4
JR
3055 if (!old_ce)
3056 goto out;
3057
3058 new_ce = alloc_pgtable_page(iommu->node);
3059 if (!new_ce)
3060 goto out_unmap;
3061
3062 ret = 0;
3063 }
3064
3065 /* Now copy the context entry */
dfddb969 3066 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3067
cf484d0e 3068 if (!__context_present(&ce))
091d42e4
JR
3069 continue;
3070
dbcd861f
JR
3071 did = context_domain_id(&ce);
3072 if (did >= 0 && did < cap_ndoms(iommu->cap))
3073 set_bit(did, iommu->domain_ids);
3074
cf484d0e
JR
3075 /*
3076 * We need a marker for copied context entries. This
3077 * marker needs to work for the old format as well as
3078 * for extended context entries.
3079 *
3080 * Bit 67 of the context entry is used. In the old
3081 * format this bit is available to software, in the
3082 * extended format it is the PGE bit, but PGE is ignored
3083 * by HW if PASIDs are disabled (and thus still
3084 * available).
3085 *
3086 * So disable PASIDs first and then mark the entry
3087 * copied. This means that we don't copy PASID
3088 * translations from the old kernel, but this is fine as
3089 * faults there are not fatal.
3090 */
3091 context_clear_pasid_enable(&ce);
3092 context_set_copied(&ce);
3093
091d42e4
JR
3094 new_ce[idx] = ce;
3095 }
3096
3097 tbl[tbl_idx + pos] = new_ce;
3098
3099 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3100
3101out_unmap:
dfddb969 3102 memunmap(old_ce);
091d42e4
JR
3103
3104out:
3105 return ret;
3106}
3107
3108static int copy_translation_tables(struct intel_iommu *iommu)
3109{
3110 struct context_entry **ctxt_tbls;
dfddb969 3111 struct root_entry *old_rt;
091d42e4
JR
3112 phys_addr_t old_rt_phys;
3113 int ctxt_table_entries;
3114 unsigned long flags;
3115 u64 rtaddr_reg;
3116 int bus, ret;
c3361f2f 3117 bool new_ext, ext;
091d42e4
JR
3118
3119 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3120 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3121 new_ext = !!ecap_ecs(iommu->ecap);
3122
3123 /*
3124 * The RTT bit can only be changed when translation is disabled,
3125 * but disabling translation means to open a window for data
3126 * corruption. So bail out and don't copy anything if we would
3127 * have to change the bit.
3128 */
3129 if (new_ext != ext)
3130 return -EINVAL;
091d42e4
JR
3131
3132 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3133 if (!old_rt_phys)
3134 return -EINVAL;
3135
dfddb969 3136 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3137 if (!old_rt)
3138 return -ENOMEM;
3139
3140 /* This is too big for the stack - allocate it from slab */
3141 ctxt_table_entries = ext ? 512 : 256;
3142 ret = -ENOMEM;
6396bb22 3143 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3144 if (!ctxt_tbls)
3145 goto out_unmap;
3146
3147 for (bus = 0; bus < 256; bus++) {
3148 ret = copy_context_table(iommu, &old_rt[bus],
3149 ctxt_tbls, bus, ext);
3150 if (ret) {
3151 pr_err("%s: Failed to copy context table for bus %d\n",
3152 iommu->name, bus);
3153 continue;
3154 }
3155 }
3156
3157 spin_lock_irqsave(&iommu->lock, flags);
3158
3159 /* Context tables are copied, now write them to the root_entry table */
3160 for (bus = 0; bus < 256; bus++) {
3161 int idx = ext ? bus * 2 : bus;
3162 u64 val;
3163
3164 if (ctxt_tbls[idx]) {
3165 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3166 iommu->root_entry[bus].lo = val;
3167 }
3168
3169 if (!ext || !ctxt_tbls[idx + 1])
3170 continue;
3171
3172 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3173 iommu->root_entry[bus].hi = val;
3174 }
3175
3176 spin_unlock_irqrestore(&iommu->lock, flags);
3177
3178 kfree(ctxt_tbls);
3179
3180 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3181
3182 ret = 0;
3183
3184out_unmap:
dfddb969 3185 memunmap(old_rt);
091d42e4
JR
3186
3187 return ret;
3188}
3189
b779260b 3190static int __init init_dmars(void)
ba395927
KA
3191{
3192 struct dmar_drhd_unit *drhd;
ba395927 3193 struct intel_iommu *iommu;
df4f3c60 3194 int ret;
2c2e2c38 3195
ba395927
KA
3196 /*
3197 * for each drhd
3198 * allocate root
3199 * initialize and program root entry to not present
3200 * endfor
3201 */
3202 for_each_drhd_unit(drhd) {
5e0d2a6f 3203 /*
3204 * lock not needed as this is only incremented in the single
3205 * threaded kernel __init code path all other access are read
3206 * only
3207 */
78d8e704 3208 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3209 g_num_of_iommus++;
3210 continue;
3211 }
9f10e5bf 3212 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3213 }
3214
ffebeb46
JL
3215 /* Preallocate enough resources for IOMMU hot-addition */
3216 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3217 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3218
d9630fe9
WH
3219 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3220 GFP_KERNEL);
3221 if (!g_iommus) {
9f10e5bf 3222 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3223 ret = -ENOMEM;
3224 goto error;
3225 }
3226
6a8c6748
LB
3227 for_each_iommu(iommu, drhd) {
3228 if (drhd->ignored) {
3229 iommu_disable_translation(iommu);
3230 continue;
3231 }
3232
56283174
LB
3233 /*
3234 * Find the max pasid size of all IOMMU's in the system.
3235 * We need to ensure the system pasid table is no bigger
3236 * than the smallest supported.
3237 */
765b6a98 3238 if (pasid_supported(iommu)) {
56283174
LB
3239 u32 temp = 2 << ecap_pss(iommu->ecap);
3240
3241 intel_pasid_max_id = min_t(u32, temp,
3242 intel_pasid_max_id);
3243 }
3244
d9630fe9 3245 g_iommus[iommu->seq_id] = iommu;
ba395927 3246
b63d80d1
JR
3247 intel_iommu_init_qi(iommu);
3248
e61d98d8
SS
3249 ret = iommu_init_domains(iommu);
3250 if (ret)
989d51fc 3251 goto free_iommu;
e61d98d8 3252
4158c2ec
JR
3253 init_translation_status(iommu);
3254
091d42e4
JR
3255 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3256 iommu_disable_translation(iommu);
3257 clear_translation_pre_enabled(iommu);
3258 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3259 iommu->name);
3260 }
4158c2ec 3261
ba395927
KA
3262 /*
3263 * TBD:
3264 * we could share the same root & context tables
25985edc 3265 * among all IOMMU's. Need to Split it later.
ba395927
KA
3266 */
3267 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3268 if (ret)
989d51fc 3269 goto free_iommu;
5f0a7f76 3270
091d42e4
JR
3271 if (translation_pre_enabled(iommu)) {
3272 pr_info("Translation already enabled - trying to copy translation structures\n");
3273
3274 ret = copy_translation_tables(iommu);
3275 if (ret) {
3276 /*
3277 * We found the IOMMU with translation
3278 * enabled - but failed to copy over the
3279 * old root-entry table. Try to proceed
3280 * by disabling translation now and
3281 * allocating a clean root-entry table.
3282 * This might cause DMAR faults, but
3283 * probably the dump will still succeed.
3284 */
3285 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3286 iommu->name);
3287 iommu_disable_translation(iommu);
3288 clear_translation_pre_enabled(iommu);
3289 } else {
3290 pr_info("Copied translation tables from previous kernel for %s\n",
3291 iommu->name);
3292 }
3293 }
3294
4ed0d3e6 3295 if (!ecap_pass_through(iommu->ecap))
19943b0e 3296 hw_pass_through = 0;
8a94ade4 3297#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3298 if (pasid_supported(iommu))
d9737953 3299 intel_svm_init(iommu);
8a94ade4 3300#endif
ba395927
KA
3301 }
3302
a4c34ff1
JR
3303 /*
3304 * Now that qi is enabled on all iommus, set the root entry and flush
3305 * caches. This is required on some Intel X58 chipsets, otherwise the
3306 * flush_context function will loop forever and the boot hangs.
3307 */
3308 for_each_active_iommu(iommu, drhd) {
3309 iommu_flush_write_buffer(iommu);
3310 iommu_set_root_entry(iommu);
3311 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3312 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3313 }
3314
6b9a7d3a 3315 if (iommu_default_passthrough())
e0fc7e0b
DW
3316 iommu_identity_mapping |= IDENTMAP_ALL;
3317
d3f13810 3318#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3319 dmar_map_gfx = 0;
19943b0e 3320#endif
e0fc7e0b 3321
5daab580
LB
3322 if (!dmar_map_gfx)
3323 iommu_identity_mapping |= IDENTMAP_GFX;
3324
21e722c4
AR
3325 check_tylersburg_isoch();
3326
4de354ec
LB
3327 ret = si_domain_init(hw_pass_through);
3328 if (ret)
3329 goto free_iommu;
86080ccc 3330
ba395927
KA
3331 /*
3332 * for each drhd
3333 * enable fault log
3334 * global invalidate context cache
3335 * global invalidate iotlb
3336 * enable translation
3337 */
7c919779 3338 for_each_iommu(iommu, drhd) {
51a63e67
JC
3339 if (drhd->ignored) {
3340 /*
3341 * we always have to disable PMRs or DMA may fail on
3342 * this device
3343 */
3344 if (force_on)
7c919779 3345 iommu_disable_protect_mem_regions(iommu);
ba395927 3346 continue;
51a63e67 3347 }
ba395927
KA
3348
3349 iommu_flush_write_buffer(iommu);
3350
a222a7f0 3351#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3352 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3353 /*
3354 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3355 * could cause possible lock race condition.
3356 */
3357 up_write(&dmar_global_lock);
a222a7f0 3358 ret = intel_svm_enable_prq(iommu);
a7755c3c 3359 down_write(&dmar_global_lock);
a222a7f0
DW
3360 if (ret)
3361 goto free_iommu;
3362 }
3363#endif
3460a6d9
KA
3364 ret = dmar_set_interrupt(iommu);
3365 if (ret)
989d51fc 3366 goto free_iommu;
ba395927
KA
3367 }
3368
3369 return 0;
989d51fc
JL
3370
3371free_iommu:
ffebeb46
JL
3372 for_each_active_iommu(iommu, drhd) {
3373 disable_dmar_iommu(iommu);
a868e6b7 3374 free_dmar_iommu(iommu);
ffebeb46 3375 }
13cf0174 3376
d9630fe9 3377 kfree(g_iommus);
13cf0174 3378
989d51fc 3379error:
ba395927
KA
3380 return ret;
3381}
3382
5a5e02a6 3383/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3384static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3385 struct dmar_domain *domain,
3386 unsigned long nrpages, uint64_t dma_mask)
ba395927 3387{
e083ea5b 3388 unsigned long iova_pfn;
ba395927 3389
875764de
DW
3390 /* Restrict dma_mask to the width that the iommu can handle */
3391 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3392 /* Ensure we reserve the whole size-aligned region */
3393 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3394
3395 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3396 /*
3397 * First try to allocate an io virtual address in
284901a9 3398 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3399 * from higher range
ba395927 3400 */
22e2f9fa 3401 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3402 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3403 if (iova_pfn)
3404 return iova_pfn;
875764de 3405 }
538d5b33
TN
3406 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3407 IOVA_PFN(dma_mask), true);
22e2f9fa 3408 if (unlikely(!iova_pfn)) {
932a6523 3409 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3410 return 0;
f76aec76
KA
3411 }
3412
22e2f9fa 3413 return iova_pfn;
f76aec76
KA
3414}
3415
4ec066c7 3416static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
f76aec76 3417{
1c5ebba9 3418 struct dmar_domain *domain, *tmp;
b1ce5b79 3419 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3420 struct device *i_dev;
3421 int i, ret;
f76aec76 3422
4ec066c7 3423 /* Device shouldn't be attached by any domains. */
1c5ebba9
JR
3424 domain = find_domain(dev);
3425 if (domain)
4ec066c7 3426 return NULL;
1c5ebba9
JR
3427
3428 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3429 if (!domain)
3430 goto out;
ba395927 3431
b1ce5b79
JR
3432 /* We have a new domain - setup possible RMRRs for the device */
3433 rcu_read_lock();
3434 for_each_rmrr_units(rmrr) {
3435 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3436 i, i_dev) {
3437 if (i_dev != dev)
3438 continue;
3439
3440 ret = domain_prepare_identity_map(dev, domain,
3441 rmrr->base_address,
3442 rmrr->end_address);
3443 if (ret)
3444 dev_err(dev, "Mapping reserved region failed\n");
3445 }
3446 }
3447 rcu_read_unlock();
3448
1c5ebba9
JR
3449 tmp = set_domain_for_dev(dev, domain);
3450 if (!tmp || domain != tmp) {
3451 domain_exit(domain);
3452 domain = tmp;
3453 }
3454
3455out:
1c5ebba9 3456 if (!domain)
932a6523 3457 dev_err(dev, "Allocating domain failed\n");
c57b260a
LB
3458 else
3459 domain->domain.type = IOMMU_DOMAIN_DMA;
1c5ebba9 3460
f76aec76
KA
3461 return domain;
3462}
3463
ecb509ec 3464/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3465static bool iommu_need_mapping(struct device *dev)
2c2e2c38 3466{
98b2fffb 3467 int ret;
2c2e2c38 3468
3d89194a 3469 if (iommu_dummy(dev))
48b2c937 3470 return false;
1e4c64c4 3471
98b2fffb
LB
3472 ret = identity_mapping(dev);
3473 if (ret) {
3474 u64 dma_mask = *dev->dma_mask;
3475
3476 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3477 dma_mask = dev->coherent_dma_mask;
3478
9c24eaf8 3479 if (dma_mask >= dma_direct_get_required_mask(dev))
48b2c937
CH
3480 return false;
3481
3482 /*
3483 * 32 bit DMA is removed from si_domain and fall back to
3484 * non-identity mapping.
3485 */
3486 dmar_remove_one_dev_info(dev);
98b2fffb
LB
3487 ret = iommu_request_dma_domain_for_dev(dev);
3488 if (ret) {
3489 struct iommu_domain *domain;
3490 struct dmar_domain *dmar_domain;
3491
3492 domain = iommu_get_domain_for_dev(dev);
3493 if (domain) {
3494 dmar_domain = to_dmar_domain(domain);
3495 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3496 }
ae23bfb6 3497 dmar_remove_one_dev_info(dev);
4ec066c7 3498 get_private_domain_for_dev(dev);
2c2e2c38 3499 }
98b2fffb
LB
3500
3501 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3502 }
3503
48b2c937 3504 return true;
2c2e2c38
FY
3505}
3506
21d5d27c
LG
3507static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3508 size_t size, int dir, u64 dma_mask)
f76aec76 3509{
f76aec76 3510 struct dmar_domain *domain;
5b6985ce 3511 phys_addr_t start_paddr;
2aac6304 3512 unsigned long iova_pfn;
f76aec76 3513 int prot = 0;
6865f0d1 3514 int ret;
8c11e798 3515 struct intel_iommu *iommu;
33041ec0 3516 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3517
3518 BUG_ON(dir == DMA_NONE);
2c2e2c38 3519
1ee0186b 3520 domain = deferred_attach_domain(dev);
f76aec76 3521 if (!domain)
524a669b 3522 return DMA_MAPPING_ERROR;
f76aec76 3523
8c11e798 3524 iommu = domain_get_iommu(domain);
88cb6a74 3525 size = aligned_nrpages(paddr, size);
f76aec76 3526
2aac6304
OP
3527 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3528 if (!iova_pfn)
f76aec76
KA
3529 goto error;
3530
ba395927
KA
3531 /*
3532 * Check if DMAR supports zero-length reads on write only
3533 * mappings..
3534 */
3535 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3536 !cap_zlr(iommu->cap))
ba395927
KA
3537 prot |= DMA_PTE_READ;
3538 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3539 prot |= DMA_PTE_WRITE;
3540 /*
6865f0d1 3541 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3542 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3543 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3544 * is not a big problem
3545 */
2aac6304 3546 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3547 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3548 if (ret)
3549 goto error;
3550
2aac6304 3551 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246 3552 start_paddr += paddr & ~PAGE_MASK;
3b53034c
LB
3553
3554 trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3555
03d6a246 3556 return start_paddr;
ba395927 3557
ba395927 3558error:
2aac6304 3559 if (iova_pfn)
22e2f9fa 3560 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3561 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3562 size, (unsigned long long)paddr, dir);
524a669b 3563 return DMA_MAPPING_ERROR;
ba395927
KA
3564}
3565
ffbbef5c
FT
3566static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3567 unsigned long offset, size_t size,
3568 enum dma_data_direction dir,
00085f1e 3569 unsigned long attrs)
bb9e6d65 3570{
9cc0c2af
CH
3571 if (iommu_need_mapping(dev))
3572 return __intel_map_single(dev, page_to_phys(page) + offset,
3573 size, dir, *dev->dma_mask);
3574 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3575}
3576
3577static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3578 size_t size, enum dma_data_direction dir,
3579 unsigned long attrs)
3580{
9cc0c2af
CH
3581 if (iommu_need_mapping(dev))
3582 return __intel_map_single(dev, phys_addr, size, dir,
3583 *dev->dma_mask);
3584 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3585}
3586
769530e4 3587static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3588{
f76aec76 3589 struct dmar_domain *domain;
d794dc9b 3590 unsigned long start_pfn, last_pfn;
769530e4 3591 unsigned long nrpages;
2aac6304 3592 unsigned long iova_pfn;
8c11e798 3593 struct intel_iommu *iommu;
ea8ea460 3594 struct page *freelist;
f7b0c4ce 3595 struct pci_dev *pdev = NULL;
ba395927 3596
1525a29a 3597 domain = find_domain(dev);
ba395927
KA
3598 BUG_ON(!domain);
3599
8c11e798
WH
3600 iommu = domain_get_iommu(domain);
3601
2aac6304 3602 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3603
769530e4 3604 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3605 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3606 last_pfn = start_pfn + nrpages - 1;
ba395927 3607
f7b0c4ce
LB
3608 if (dev_is_pci(dev))
3609 pdev = to_pci_dev(dev);
3610
ea8ea460 3611 freelist = domain_unmap(domain, start_pfn, last_pfn);
effa4678
DS
3612 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3613 !has_iova_flush_queue(&domain->iovad)) {
a1ddcbe9 3614 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3615 nrpages, !freelist, 0);
5e0d2a6f 3616 /* free iova */
22e2f9fa 3617 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3618 dma_free_pagelist(freelist);
5e0d2a6f 3619 } else {
13cf0174
JR
3620 queue_iova(&domain->iovad, iova_pfn, nrpages,
3621 (unsigned long)freelist);
5e0d2a6f 3622 /*
3623 * queue up the release of the unmap to save the 1/6th of the
3624 * cpu used up by the iotlb flush operation...
3625 */
5e0d2a6f 3626 }
3b53034c
LB
3627
3628 trace_unmap_single(dev, dev_addr, size);
ba395927
KA
3629}
3630
d41a4adb
JL
3631static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3632 size_t size, enum dma_data_direction dir,
00085f1e 3633 unsigned long attrs)
d41a4adb 3634{
9cc0c2af
CH
3635 if (iommu_need_mapping(dev))
3636 intel_unmap(dev, dev_addr, size);
3637 else
3638 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3639}
3640
3641static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3642 size_t size, enum dma_data_direction dir, unsigned long attrs)
3643{
3644 if (iommu_need_mapping(dev))
3645 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3646}
3647
5040a918 3648static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3649 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3650 unsigned long attrs)
ba395927 3651{
7ec916f8
CH
3652 struct page *page = NULL;
3653 int order;
ba395927 3654
9cc0c2af
CH
3655 if (!iommu_need_mapping(dev))
3656 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3657
7ec916f8
CH
3658 size = PAGE_ALIGN(size);
3659 order = get_order(size);
7ec916f8
CH
3660
3661 if (gfpflags_allow_blocking(flags)) {
3662 unsigned int count = size >> PAGE_SHIFT;
3663
d834c5ab
MS
3664 page = dma_alloc_from_contiguous(dev, count, order,
3665 flags & __GFP_NOWARN);
7ec916f8
CH
3666 }
3667
3668 if (!page)
3669 page = alloc_pages(flags, order);
3670 if (!page)
3671 return NULL;
3672 memset(page_address(page), 0, size);
3673
21d5d27c
LG
3674 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3675 DMA_BIDIRECTIONAL,
3676 dev->coherent_dma_mask);
524a669b 3677 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3678 return page_address(page);
3679 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3680 __free_pages(page, order);
36746436 3681
ba395927
KA
3682 return NULL;
3683}
3684
5040a918 3685static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3686 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3687{
7ec916f8
CH
3688 int order;
3689 struct page *page = virt_to_page(vaddr);
3690
9cc0c2af
CH
3691 if (!iommu_need_mapping(dev))
3692 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3693
7ec916f8
CH
3694 size = PAGE_ALIGN(size);
3695 order = get_order(size);
3696
3697 intel_unmap(dev, dma_handle, size);
3698 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3699 __free_pages(page, order);
ba395927
KA
3700}
3701
5040a918 3702static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3703 int nelems, enum dma_data_direction dir,
00085f1e 3704 unsigned long attrs)
ba395927 3705{
769530e4
OP
3706 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3707 unsigned long nrpages = 0;
3708 struct scatterlist *sg;
3709 int i;
3710
9cc0c2af
CH
3711 if (!iommu_need_mapping(dev))
3712 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3713
769530e4
OP
3714 for_each_sg(sglist, sg, nelems, i) {
3715 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3716 }
3717
3718 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3b53034c
LB
3719
3720 trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3721}
3722
5040a918 3723static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3724 enum dma_data_direction dir, unsigned long attrs)
ba395927 3725{
ba395927 3726 int i;
ba395927 3727 struct dmar_domain *domain;
f76aec76
KA
3728 size_t size = 0;
3729 int prot = 0;
2aac6304 3730 unsigned long iova_pfn;
f76aec76 3731 int ret;
c03ab37c 3732 struct scatterlist *sg;
b536d24d 3733 unsigned long start_vpfn;
8c11e798 3734 struct intel_iommu *iommu;
ba395927
KA
3735
3736 BUG_ON(dir == DMA_NONE);
48b2c937 3737 if (!iommu_need_mapping(dev))
9cc0c2af 3738 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3739
1ee0186b 3740 domain = deferred_attach_domain(dev);
f76aec76
KA
3741 if (!domain)
3742 return 0;
3743
8c11e798
WH
3744 iommu = domain_get_iommu(domain);
3745
b536d24d 3746 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3747 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3748
2aac6304 3749 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3750 *dev->dma_mask);
2aac6304 3751 if (!iova_pfn) {
c03ab37c 3752 sglist->dma_length = 0;
f76aec76
KA
3753 return 0;
3754 }
3755
3756 /*
3757 * Check if DMAR supports zero-length reads on write only
3758 * mappings..
3759 */
3760 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3761 !cap_zlr(iommu->cap))
f76aec76
KA
3762 prot |= DMA_PTE_READ;
3763 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3764 prot |= DMA_PTE_WRITE;
3765
2aac6304 3766 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3767
f532959b 3768 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3769 if (unlikely(ret)) {
e1605495 3770 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3771 start_vpfn + size - 1,
3772 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3773 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3774 return 0;
ba395927
KA
3775 }
3776
3b53034c
LB
3777 trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
3778 sg_phys(sglist), size << VTD_PAGE_SHIFT);
3779
ba395927
KA
3780 return nelems;
3781}
3782
9c24eaf8
AS
3783static u64 intel_get_required_mask(struct device *dev)
3784{
3785 if (!iommu_need_mapping(dev))
3786 return dma_direct_get_required_mask(dev);
3787 return DMA_BIT_MASK(32);
3788}
3789
02b4da5f 3790static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3791 .alloc = intel_alloc_coherent,
3792 .free = intel_free_coherent,
ba395927
KA
3793 .map_sg = intel_map_sg,
3794 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3795 .map_page = intel_map_page,
3796 .unmap_page = intel_unmap_page,
21d5d27c 3797 .map_resource = intel_map_resource,
9cc0c2af 3798 .unmap_resource = intel_unmap_resource,
fec777c3 3799 .dma_supported = dma_direct_supported,
f9f3232a
CH
3800 .mmap = dma_common_mmap,
3801 .get_sgtable = dma_common_get_sgtable,
9c24eaf8 3802 .get_required_mask = intel_get_required_mask,
ba395927
KA
3803};
3804
cfb94a37
LB
3805static void
3806bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3807 enum dma_data_direction dir, enum dma_sync_target target)
3808{
3809 struct dmar_domain *domain;
3810 phys_addr_t tlb_addr;
3811
3812 domain = find_domain(dev);
3813 if (WARN_ON(!domain))
3814 return;
3815
3816 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3817 if (is_swiotlb_buffer(tlb_addr))
3818 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3819}
3820
3821static dma_addr_t
3822bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3823 enum dma_data_direction dir, unsigned long attrs,
3824 u64 dma_mask)
3825{
3826 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3827 struct dmar_domain *domain;
3828 struct intel_iommu *iommu;
3829 unsigned long iova_pfn;
3830 unsigned long nrpages;
3831 phys_addr_t tlb_addr;
3832 int prot = 0;
3833 int ret;
3834
1ee0186b 3835 domain = deferred_attach_domain(dev);
cfb94a37
LB
3836 if (WARN_ON(dir == DMA_NONE || !domain))
3837 return DMA_MAPPING_ERROR;
3838
3839 iommu = domain_get_iommu(domain);
3840 if (WARN_ON(!iommu))
3841 return DMA_MAPPING_ERROR;
3842
3843 nrpages = aligned_nrpages(0, size);
3844 iova_pfn = intel_alloc_iova(dev, domain,
3845 dma_to_mm_pfn(nrpages), dma_mask);
3846 if (!iova_pfn)
3847 return DMA_MAPPING_ERROR;
3848
3849 /*
3850 * Check if DMAR supports zero-length reads on write only
3851 * mappings..
3852 */
3853 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3854 !cap_zlr(iommu->cap))
3855 prot |= DMA_PTE_READ;
3856 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3857 prot |= DMA_PTE_WRITE;
3858
3859 /*
3860 * If both the physical buffer start address and size are
3861 * page aligned, we don't need to use a bounce page.
3862 */
3863 if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3864 tlb_addr = swiotlb_tbl_map_single(dev,
3865 __phys_to_dma(dev, io_tlb_start),
3866 paddr, size, aligned_size, dir, attrs);
3867 if (tlb_addr == DMA_MAPPING_ERROR) {
3868 goto swiotlb_error;
3869 } else {
3870 /* Cleanup the padding area. */
3871 void *padding_start = phys_to_virt(tlb_addr);
3872 size_t padding_size = aligned_size;
3873
3874 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3875 (dir == DMA_TO_DEVICE ||
3876 dir == DMA_BIDIRECTIONAL)) {
3877 padding_start += size;
3878 padding_size -= size;
3879 }
3880
3881 memset(padding_start, 0, padding_size);
3882 }
3883 } else {
3884 tlb_addr = paddr;
3885 }
3886
3887 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3888 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3889 if (ret)
3890 goto mapping_error;
3891
3892 trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3893
3894 return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3895
3896mapping_error:
3897 if (is_swiotlb_buffer(tlb_addr))
3898 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3899 aligned_size, dir, attrs);
3900swiotlb_error:
3901 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3902 dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3903 size, (unsigned long long)paddr, dir);
3904
3905 return DMA_MAPPING_ERROR;
3906}
3907
3908static void
3909bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3910 enum dma_data_direction dir, unsigned long attrs)
3911{
3912 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3913 struct dmar_domain *domain;
3914 phys_addr_t tlb_addr;
3915
3916 domain = find_domain(dev);
3917 if (WARN_ON(!domain))
3918 return;
3919
3920 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3921 if (WARN_ON(!tlb_addr))
3922 return;
3923
3924 intel_unmap(dev, dev_addr, size);
3925 if (is_swiotlb_buffer(tlb_addr))
3926 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3927 aligned_size, dir, attrs);
3928
3929 trace_bounce_unmap_single(dev, dev_addr, size);
3930}
3931
3932static dma_addr_t
3933bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3934 size_t size, enum dma_data_direction dir, unsigned long attrs)
3935{
3936 return bounce_map_single(dev, page_to_phys(page) + offset,
3937 size, dir, attrs, *dev->dma_mask);
3938}
3939
3940static dma_addr_t
3941bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3942 enum dma_data_direction dir, unsigned long attrs)
3943{
3944 return bounce_map_single(dev, phys_addr, size,
3945 dir, attrs, *dev->dma_mask);
3946}
3947
3948static void
3949bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3950 enum dma_data_direction dir, unsigned long attrs)
3951{
3952 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3953}
3954
3955static void
3956bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3957 enum dma_data_direction dir, unsigned long attrs)
3958{
3959 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3960}
3961
3962static void
3963bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3964 enum dma_data_direction dir, unsigned long attrs)
3965{
3966 struct scatterlist *sg;
3967 int i;
3968
3969 for_each_sg(sglist, sg, nelems, i)
3970 bounce_unmap_page(dev, sg->dma_address,
3971 sg_dma_len(sg), dir, attrs);
3972}
3973
3974static int
3975bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3976 enum dma_data_direction dir, unsigned long attrs)
3977{
3978 int i;
3979 struct scatterlist *sg;
3980
3981 for_each_sg(sglist, sg, nelems, i) {
3982 sg->dma_address = bounce_map_page(dev, sg_page(sg),
3983 sg->offset, sg->length,
3984 dir, attrs);
3985 if (sg->dma_address == DMA_MAPPING_ERROR)
3986 goto out_unmap;
3987 sg_dma_len(sg) = sg->length;
3988 }
3989
3990 return nelems;
3991
3992out_unmap:
3993 bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3994 return 0;
3995}
3996
3997static void
3998bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3999 size_t size, enum dma_data_direction dir)
4000{
4001 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
4002}
4003
4004static void
4005bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
4006 size_t size, enum dma_data_direction dir)
4007{
4008 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
4009}
4010
4011static void
4012bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
4013 int nelems, enum dma_data_direction dir)
4014{
4015 struct scatterlist *sg;
4016 int i;
4017
4018 for_each_sg(sglist, sg, nelems, i)
4019 bounce_sync_single(dev, sg_dma_address(sg),
4020 sg_dma_len(sg), dir, SYNC_FOR_CPU);
4021}
4022
4023static void
4024bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4025 int nelems, enum dma_data_direction dir)
4026{
4027 struct scatterlist *sg;
4028 int i;
4029
4030 for_each_sg(sglist, sg, nelems, i)
4031 bounce_sync_single(dev, sg_dma_address(sg),
4032 sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4033}
4034
4035static const struct dma_map_ops bounce_dma_ops = {
4036 .alloc = intel_alloc_coherent,
4037 .free = intel_free_coherent,
4038 .map_sg = bounce_map_sg,
4039 .unmap_sg = bounce_unmap_sg,
4040 .map_page = bounce_map_page,
4041 .unmap_page = bounce_unmap_page,
4042 .sync_single_for_cpu = bounce_sync_single_for_cpu,
4043 .sync_single_for_device = bounce_sync_single_for_device,
4044 .sync_sg_for_cpu = bounce_sync_sg_for_cpu,
4045 .sync_sg_for_device = bounce_sync_sg_for_device,
4046 .map_resource = bounce_map_resource,
4047 .unmap_resource = bounce_unmap_resource,
4048 .dma_supported = dma_direct_supported,
4049};
4050
ba395927
KA
4051static inline int iommu_domain_cache_init(void)
4052{
4053 int ret = 0;
4054
4055 iommu_domain_cache = kmem_cache_create("iommu_domain",
4056 sizeof(struct dmar_domain),
4057 0,
4058 SLAB_HWCACHE_ALIGN,
4059
4060 NULL);
4061 if (!iommu_domain_cache) {
9f10e5bf 4062 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
4063 ret = -ENOMEM;
4064 }
4065
4066 return ret;
4067}
4068
4069static inline int iommu_devinfo_cache_init(void)
4070{
4071 int ret = 0;
4072
4073 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4074 sizeof(struct device_domain_info),
4075 0,
4076 SLAB_HWCACHE_ALIGN,
ba395927
KA
4077 NULL);
4078 if (!iommu_devinfo_cache) {
9f10e5bf 4079 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
4080 ret = -ENOMEM;
4081 }
4082
4083 return ret;
4084}
4085
ba395927
KA
4086static int __init iommu_init_mempool(void)
4087{
4088 int ret;
ae1ff3d6 4089 ret = iova_cache_get();
ba395927
KA
4090 if (ret)
4091 return ret;
4092
4093 ret = iommu_domain_cache_init();
4094 if (ret)
4095 goto domain_error;
4096
4097 ret = iommu_devinfo_cache_init();
4098 if (!ret)
4099 return ret;
4100
4101 kmem_cache_destroy(iommu_domain_cache);
4102domain_error:
ae1ff3d6 4103 iova_cache_put();
ba395927
KA
4104
4105 return -ENOMEM;
4106}
4107
4108static void __init iommu_exit_mempool(void)
4109{
4110 kmem_cache_destroy(iommu_devinfo_cache);
4111 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 4112 iova_cache_put();
ba395927
KA
4113}
4114
556ab45f
DW
4115static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4116{
4117 struct dmar_drhd_unit *drhd;
4118 u32 vtbar;
4119 int rc;
4120
4121 /* We know that this device on this chipset has its own IOMMU.
4122 * If we find it under a different IOMMU, then the BIOS is lying
4123 * to us. Hope that the IOMMU for this device is actually
4124 * disabled, and it needs no translation...
4125 */
4126 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4127 if (rc) {
4128 /* "can't" happen */
4129 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4130 return;
4131 }
4132 vtbar &= 0xffff0000;
4133
4134 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4135 drhd = dmar_find_matched_drhd_unit(pdev);
4136 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4137 TAINT_FIRMWARE_WORKAROUND,
4138 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4139 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4140}
4141DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4142
ba395927
KA
4143static void __init init_no_remapping_devices(void)
4144{
4145 struct dmar_drhd_unit *drhd;
832bd858 4146 struct device *dev;
b683b230 4147 int i;
ba395927
KA
4148
4149 for_each_drhd_unit(drhd) {
4150 if (!drhd->include_all) {
b683b230
JL
4151 for_each_active_dev_scope(drhd->devices,
4152 drhd->devices_cnt, i, dev)
4153 break;
832bd858 4154 /* ignore DMAR unit if no devices exist */
ba395927
KA
4155 if (i == drhd->devices_cnt)
4156 drhd->ignored = 1;
4157 }
4158 }
4159
7c919779 4160 for_each_active_drhd_unit(drhd) {
7c919779 4161 if (drhd->include_all)
ba395927
KA
4162 continue;
4163
b683b230
JL
4164 for_each_active_dev_scope(drhd->devices,
4165 drhd->devices_cnt, i, dev)
832bd858 4166 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4167 break;
ba395927
KA
4168 if (i < drhd->devices_cnt)
4169 continue;
4170
c0771df8
DW
4171 /* This IOMMU has *only* gfx devices. Either bypass it or
4172 set the gfx_mapped flag, as appropriate */
cf1ec453 4173 if (!dmar_map_gfx) {
c0771df8 4174 drhd->ignored = 1;
b683b230
JL
4175 for_each_active_dev_scope(drhd->devices,
4176 drhd->devices_cnt, i, dev)
832bd858 4177 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4178 }
4179 }
4180}
4181
f59c7b69
FY
4182#ifdef CONFIG_SUSPEND
4183static int init_iommu_hw(void)
4184{
4185 struct dmar_drhd_unit *drhd;
4186 struct intel_iommu *iommu = NULL;
4187
4188 for_each_active_iommu(iommu, drhd)
4189 if (iommu->qi)
4190 dmar_reenable_qi(iommu);
4191
b779260b
JC
4192 for_each_iommu(iommu, drhd) {
4193 if (drhd->ignored) {
4194 /*
4195 * we always have to disable PMRs or DMA may fail on
4196 * this device
4197 */
4198 if (force_on)
4199 iommu_disable_protect_mem_regions(iommu);
4200 continue;
4201 }
095303e0 4202
f59c7b69
FY
4203 iommu_flush_write_buffer(iommu);
4204
4205 iommu_set_root_entry(iommu);
4206
4207 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4208 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4209 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4210 iommu_enable_translation(iommu);
b94996c9 4211 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4212 }
4213
4214 return 0;
4215}
4216
4217static void iommu_flush_all(void)
4218{
4219 struct dmar_drhd_unit *drhd;
4220 struct intel_iommu *iommu;
4221
4222 for_each_active_iommu(iommu, drhd) {
4223 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4224 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4225 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4226 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4227 }
4228}
4229
134fac3f 4230static int iommu_suspend(void)
f59c7b69
FY
4231{
4232 struct dmar_drhd_unit *drhd;
4233 struct intel_iommu *iommu = NULL;
4234 unsigned long flag;
4235
4236 for_each_active_iommu(iommu, drhd) {
6396bb22 4237 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4238 GFP_ATOMIC);
4239 if (!iommu->iommu_state)
4240 goto nomem;
4241 }
4242
4243 iommu_flush_all();
4244
4245 for_each_active_iommu(iommu, drhd) {
4246 iommu_disable_translation(iommu);
4247
1f5b3c3f 4248 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4249
4250 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4251 readl(iommu->reg + DMAR_FECTL_REG);
4252 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4253 readl(iommu->reg + DMAR_FEDATA_REG);
4254 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4255 readl(iommu->reg + DMAR_FEADDR_REG);
4256 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4257 readl(iommu->reg + DMAR_FEUADDR_REG);
4258
1f5b3c3f 4259 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4260 }
4261 return 0;
4262
4263nomem:
4264 for_each_active_iommu(iommu, drhd)
4265 kfree(iommu->iommu_state);
4266
4267 return -ENOMEM;
4268}
4269
134fac3f 4270static void iommu_resume(void)
f59c7b69
FY
4271{
4272 struct dmar_drhd_unit *drhd;
4273 struct intel_iommu *iommu = NULL;
4274 unsigned long flag;
4275
4276 if (init_iommu_hw()) {
b779260b
JC
4277 if (force_on)
4278 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4279 else
4280 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4281 return;
f59c7b69
FY
4282 }
4283
4284 for_each_active_iommu(iommu, drhd) {
4285
1f5b3c3f 4286 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4287
4288 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4289 iommu->reg + DMAR_FECTL_REG);
4290 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4291 iommu->reg + DMAR_FEDATA_REG);
4292 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4293 iommu->reg + DMAR_FEADDR_REG);
4294 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4295 iommu->reg + DMAR_FEUADDR_REG);
4296
1f5b3c3f 4297 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4298 }
4299
4300 for_each_active_iommu(iommu, drhd)
4301 kfree(iommu->iommu_state);
f59c7b69
FY
4302}
4303
134fac3f 4304static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4305 .resume = iommu_resume,
4306 .suspend = iommu_suspend,
4307};
4308
134fac3f 4309static void __init init_iommu_pm_ops(void)
f59c7b69 4310{
134fac3f 4311 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4312}
4313
4314#else
99592ba4 4315static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4316#endif /* CONFIG_PM */
4317
c2a0b538 4318int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4319{
4320 struct acpi_dmar_reserved_memory *rmrr;
4321 struct dmar_rmrr_unit *rmrru;
f036c7fa
YC
4322 int ret;
4323
4324 rmrr = (struct acpi_dmar_reserved_memory *)header;
4325 ret = arch_rmrr_sanity_check(rmrr);
4326 if (ret)
4327 return ret;
318fe7df
SS
4328
4329 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4330 if (!rmrru)
0659b8dc 4331 goto out;
318fe7df
SS
4332
4333 rmrru->hdr = header;
f036c7fa 4334
318fe7df
SS
4335 rmrru->base_address = rmrr->base_address;
4336 rmrru->end_address = rmrr->end_address;
0659b8dc 4337
2e455289
JL
4338 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4339 ((void *)rmrr) + rmrr->header.length,
4340 &rmrru->devices_cnt);
0659b8dc 4341 if (rmrru->devices_cnt && rmrru->devices == NULL)
5f64ce54 4342 goto free_rmrru;
318fe7df 4343
2e455289 4344 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4345
2e455289 4346 return 0;
0659b8dc
EA
4347free_rmrru:
4348 kfree(rmrru);
4349out:
4350 return -ENOMEM;
318fe7df
SS
4351}
4352
6b197249
JL
4353static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4354{
4355 struct dmar_atsr_unit *atsru;
4356 struct acpi_dmar_atsr *tmp;
4357
4358 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4359 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4360 if (atsr->segment != tmp->segment)
4361 continue;
4362 if (atsr->header.length != tmp->header.length)
4363 continue;
4364 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4365 return atsru;
4366 }
4367
4368 return NULL;
4369}
4370
4371int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4372{
4373 struct acpi_dmar_atsr *atsr;
4374 struct dmar_atsr_unit *atsru;
4375
b608fe35 4376 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4377 return 0;
4378
318fe7df 4379 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4380 atsru = dmar_find_atsr(atsr);
4381 if (atsru)
4382 return 0;
4383
4384 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4385 if (!atsru)
4386 return -ENOMEM;
4387
6b197249
JL
4388 /*
4389 * If memory is allocated from slab by ACPI _DSM method, we need to
4390 * copy the memory content because the memory buffer will be freed
4391 * on return.
4392 */
4393 atsru->hdr = (void *)(atsru + 1);
4394 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4395 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4396 if (!atsru->include_all) {
4397 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4398 (void *)atsr + atsr->header.length,
4399 &atsru->devices_cnt);
4400 if (atsru->devices_cnt && atsru->devices == NULL) {
4401 kfree(atsru);
4402 return -ENOMEM;
4403 }
4404 }
318fe7df 4405
0e242612 4406 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4407
4408 return 0;
4409}
4410
9bdc531e
JL
4411static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4412{
4413 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4414 kfree(atsru);
4415}
4416
6b197249
JL
4417int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4418{
4419 struct acpi_dmar_atsr *atsr;
4420 struct dmar_atsr_unit *atsru;
4421
4422 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4423 atsru = dmar_find_atsr(atsr);
4424 if (atsru) {
4425 list_del_rcu(&atsru->list);
4426 synchronize_rcu();
4427 intel_iommu_free_atsr(atsru);
4428 }
4429
4430 return 0;
4431}
4432
4433int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4434{
4435 int i;
4436 struct device *dev;
4437 struct acpi_dmar_atsr *atsr;
4438 struct dmar_atsr_unit *atsru;
4439
4440 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4441 atsru = dmar_find_atsr(atsr);
4442 if (!atsru)
4443 return 0;
4444
194dc870 4445 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4446 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4447 i, dev)
4448 return -EBUSY;
194dc870 4449 }
6b197249
JL
4450
4451 return 0;
4452}
4453
ffebeb46
JL
4454static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4455{
e083ea5b 4456 int sp, ret;
ffebeb46
JL
4457 struct intel_iommu *iommu = dmaru->iommu;
4458
4459 if (g_iommus[iommu->seq_id])
4460 return 0;
4461
4462 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4463 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4464 iommu->name);
4465 return -ENXIO;
4466 }
4467 if (!ecap_sc_support(iommu->ecap) &&
4468 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4469 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4470 iommu->name);
4471 return -ENXIO;
4472 }
4473 sp = domain_update_iommu_superpage(iommu) - 1;
4474 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4475 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4476 iommu->name);
4477 return -ENXIO;
4478 }
4479
4480 /*
4481 * Disable translation if already enabled prior to OS handover.
4482 */
4483 if (iommu->gcmd & DMA_GCMD_TE)
4484 iommu_disable_translation(iommu);
4485
4486 g_iommus[iommu->seq_id] = iommu;
4487 ret = iommu_init_domains(iommu);
4488 if (ret == 0)
4489 ret = iommu_alloc_root_entry(iommu);
4490 if (ret)
4491 goto out;
4492
8a94ade4 4493#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4494 if (pasid_supported(iommu))
d9737953 4495 intel_svm_init(iommu);
8a94ade4
DW
4496#endif
4497
ffebeb46
JL
4498 if (dmaru->ignored) {
4499 /*
4500 * we always have to disable PMRs or DMA may fail on this device
4501 */
4502 if (force_on)
4503 iommu_disable_protect_mem_regions(iommu);
4504 return 0;
4505 }
4506
4507 intel_iommu_init_qi(iommu);
4508 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4509
4510#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4511 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4512 ret = intel_svm_enable_prq(iommu);
4513 if (ret)
4514 goto disable_iommu;
4515 }
4516#endif
ffebeb46
JL
4517 ret = dmar_set_interrupt(iommu);
4518 if (ret)
4519 goto disable_iommu;
4520
4521 iommu_set_root_entry(iommu);
4522 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4523 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4524 iommu_enable_translation(iommu);
4525
ffebeb46
JL
4526 iommu_disable_protect_mem_regions(iommu);
4527 return 0;
4528
4529disable_iommu:
4530 disable_dmar_iommu(iommu);
4531out:
4532 free_dmar_iommu(iommu);
4533 return ret;
4534}
4535
6b197249
JL
4536int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4537{
ffebeb46
JL
4538 int ret = 0;
4539 struct intel_iommu *iommu = dmaru->iommu;
4540
4541 if (!intel_iommu_enabled)
4542 return 0;
4543 if (iommu == NULL)
4544 return -EINVAL;
4545
4546 if (insert) {
4547 ret = intel_iommu_add(dmaru);
4548 } else {
4549 disable_dmar_iommu(iommu);
4550 free_dmar_iommu(iommu);
4551 }
4552
4553 return ret;
6b197249
JL
4554}
4555
9bdc531e
JL
4556static void intel_iommu_free_dmars(void)
4557{
4558 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4559 struct dmar_atsr_unit *atsru, *atsr_n;
4560
4561 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4562 list_del(&rmrru->list);
4563 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4564 kfree(rmrru);
318fe7df
SS
4565 }
4566
9bdc531e
JL
4567 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4568 list_del(&atsru->list);
4569 intel_iommu_free_atsr(atsru);
4570 }
318fe7df
SS
4571}
4572
4573int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4574{
b683b230 4575 int i, ret = 1;
318fe7df 4576 struct pci_bus *bus;
832bd858
DW
4577 struct pci_dev *bridge = NULL;
4578 struct device *tmp;
318fe7df
SS
4579 struct acpi_dmar_atsr *atsr;
4580 struct dmar_atsr_unit *atsru;
4581
4582 dev = pci_physfn(dev);
318fe7df 4583 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4584 bridge = bus->self;
d14053b3
DW
4585 /* If it's an integrated device, allow ATS */
4586 if (!bridge)
4587 return 1;
4588 /* Connected via non-PCIe: no ATS */
4589 if (!pci_is_pcie(bridge) ||
62f87c0e 4590 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4591 return 0;
d14053b3 4592 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4593 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4594 break;
318fe7df
SS
4595 }
4596
0e242612 4597 rcu_read_lock();
b5f82ddf
JL
4598 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4599 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4600 if (atsr->segment != pci_domain_nr(dev->bus))
4601 continue;
4602
b683b230 4603 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4604 if (tmp == &bridge->dev)
b683b230 4605 goto out;
b5f82ddf
JL
4606
4607 if (atsru->include_all)
b683b230 4608 goto out;
b5f82ddf 4609 }
b683b230
JL
4610 ret = 0;
4611out:
0e242612 4612 rcu_read_unlock();
318fe7df 4613
b683b230 4614 return ret;
318fe7df
SS
4615}
4616
59ce0515
JL
4617int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4618{
e083ea5b 4619 int ret;
59ce0515
JL
4620 struct dmar_rmrr_unit *rmrru;
4621 struct dmar_atsr_unit *atsru;
4622 struct acpi_dmar_atsr *atsr;
4623 struct acpi_dmar_reserved_memory *rmrr;
4624
b608fe35 4625 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4626 return 0;
4627
4628 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4629 rmrr = container_of(rmrru->hdr,
4630 struct acpi_dmar_reserved_memory, header);
4631 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4632 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4633 ((void *)rmrr) + rmrr->header.length,
4634 rmrr->segment, rmrru->devices,
4635 rmrru->devices_cnt);
e083ea5b 4636 if (ret < 0)
59ce0515 4637 return ret;
e6a8c9b3 4638 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4639 dmar_remove_dev_scope(info, rmrr->segment,
4640 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4641 }
4642 }
4643
4644 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4645 if (atsru->include_all)
4646 continue;
4647
4648 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4649 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4650 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4651 (void *)atsr + atsr->header.length,
4652 atsr->segment, atsru->devices,
4653 atsru->devices_cnt);
4654 if (ret > 0)
4655 break;
e083ea5b 4656 else if (ret < 0)
59ce0515 4657 return ret;
e6a8c9b3 4658 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4659 if (dmar_remove_dev_scope(info, atsr->segment,
4660 atsru->devices, atsru->devices_cnt))
4661 break;
4662 }
4663 }
4664
4665 return 0;
4666}
4667
75f05569
JL
4668static int intel_iommu_memory_notifier(struct notifier_block *nb,
4669 unsigned long val, void *v)
4670{
4671 struct memory_notify *mhp = v;
4672 unsigned long long start, end;
4673 unsigned long start_vpfn, last_vpfn;
4674
4675 switch (val) {
4676 case MEM_GOING_ONLINE:
4677 start = mhp->start_pfn << PAGE_SHIFT;
4678 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4679 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4680 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4681 start, end);
4682 return NOTIFY_BAD;
4683 }
4684 break;
4685
4686 case MEM_OFFLINE:
4687 case MEM_CANCEL_ONLINE:
4688 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4689 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4690 while (start_vpfn <= last_vpfn) {
4691 struct iova *iova;
4692 struct dmar_drhd_unit *drhd;
4693 struct intel_iommu *iommu;
ea8ea460 4694 struct page *freelist;
75f05569
JL
4695
4696 iova = find_iova(&si_domain->iovad, start_vpfn);
4697 if (iova == NULL) {
9f10e5bf 4698 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4699 start_vpfn);
4700 break;
4701 }
4702
4703 iova = split_and_remove_iova(&si_domain->iovad, iova,
4704 start_vpfn, last_vpfn);
4705 if (iova == NULL) {
9f10e5bf 4706 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4707 start_vpfn, last_vpfn);
4708 return NOTIFY_BAD;
4709 }
4710
ea8ea460
DW
4711 freelist = domain_unmap(si_domain, iova->pfn_lo,
4712 iova->pfn_hi);
4713
75f05569
JL
4714 rcu_read_lock();
4715 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4716 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4717 iova->pfn_lo, iova_size(iova),
ea8ea460 4718 !freelist, 0);
75f05569 4719 rcu_read_unlock();
ea8ea460 4720 dma_free_pagelist(freelist);
75f05569
JL
4721
4722 start_vpfn = iova->pfn_hi + 1;
4723 free_iova_mem(iova);
4724 }
4725 break;
4726 }
4727
4728 return NOTIFY_OK;
4729}
4730
4731static struct notifier_block intel_iommu_memory_nb = {
4732 .notifier_call = intel_iommu_memory_notifier,
4733 .priority = 0
4734};
4735
22e2f9fa
OP
4736static void free_all_cpu_cached_iovas(unsigned int cpu)
4737{
4738 int i;
4739
4740 for (i = 0; i < g_num_of_iommus; i++) {
4741 struct intel_iommu *iommu = g_iommus[i];
4742 struct dmar_domain *domain;
0caa7616 4743 int did;
22e2f9fa
OP
4744
4745 if (!iommu)
4746 continue;
4747
3bd4f911 4748 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4749 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4750
4751 if (!domain)
4752 continue;
4753 free_cpu_cached_iovas(cpu, &domain->iovad);
4754 }
4755 }
4756}
4757
21647615 4758static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4759{
21647615 4760 free_all_cpu_cached_iovas(cpu);
21647615 4761 return 0;
aa473240
OP
4762}
4763
161b28aa
JR
4764static void intel_disable_iommus(void)
4765{
4766 struct intel_iommu *iommu = NULL;
4767 struct dmar_drhd_unit *drhd;
4768
4769 for_each_iommu(iommu, drhd)
4770 iommu_disable_translation(iommu);
4771}
4772
6c3a44ed
DD
4773void intel_iommu_shutdown(void)
4774{
4775 struct dmar_drhd_unit *drhd;
4776 struct intel_iommu *iommu = NULL;
4777
4778 if (no_iommu || dmar_disabled)
4779 return;
4780
4781 down_write(&dmar_global_lock);
4782
4783 /* Disable PMRs explicitly here. */
4784 for_each_iommu(iommu, drhd)
4785 iommu_disable_protect_mem_regions(iommu);
4786
4787 /* Make sure the IOMMUs are switched off */
4788 intel_disable_iommus();
4789
4790 up_write(&dmar_global_lock);
4791}
4792
a7fdb6e6
JR
4793static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4794{
2926a2aa
JR
4795 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4796
4797 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4798}
4799
a5459cfe
AW
4800static ssize_t intel_iommu_show_version(struct device *dev,
4801 struct device_attribute *attr,
4802 char *buf)
4803{
a7fdb6e6 4804 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4805 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4806 return sprintf(buf, "%d:%d\n",
4807 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4808}
4809static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4810
4811static ssize_t intel_iommu_show_address(struct device *dev,
4812 struct device_attribute *attr,
4813 char *buf)
4814{
a7fdb6e6 4815 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4816 return sprintf(buf, "%llx\n", iommu->reg_phys);
4817}
4818static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4819
4820static ssize_t intel_iommu_show_cap(struct device *dev,
4821 struct device_attribute *attr,
4822 char *buf)
4823{
a7fdb6e6 4824 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4825 return sprintf(buf, "%llx\n", iommu->cap);
4826}
4827static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4828
4829static ssize_t intel_iommu_show_ecap(struct device *dev,
4830 struct device_attribute *attr,
4831 char *buf)
4832{
a7fdb6e6 4833 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4834 return sprintf(buf, "%llx\n", iommu->ecap);
4835}
4836static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4837
2238c082
AW
4838static ssize_t intel_iommu_show_ndoms(struct device *dev,
4839 struct device_attribute *attr,
4840 char *buf)
4841{
a7fdb6e6 4842 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4843 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4844}
4845static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4846
4847static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4848 struct device_attribute *attr,
4849 char *buf)
4850{
a7fdb6e6 4851 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4852 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4853 cap_ndoms(iommu->cap)));
4854}
4855static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4856
a5459cfe
AW
4857static struct attribute *intel_iommu_attrs[] = {
4858 &dev_attr_version.attr,
4859 &dev_attr_address.attr,
4860 &dev_attr_cap.attr,
4861 &dev_attr_ecap.attr,
2238c082
AW
4862 &dev_attr_domains_supported.attr,
4863 &dev_attr_domains_used.attr,
a5459cfe
AW
4864 NULL,
4865};
4866
4867static struct attribute_group intel_iommu_group = {
4868 .name = "intel-iommu",
4869 .attrs = intel_iommu_attrs,
4870};
4871
4872const struct attribute_group *intel_iommu_groups[] = {
4873 &intel_iommu_group,
4874 NULL,
4875};
4876
c5a5dc4c 4877static inline bool has_untrusted_dev(void)
89a6079d
LB
4878{
4879 struct pci_dev *pdev = NULL;
89a6079d 4880
c5a5dc4c
LB
4881 for_each_pci_dev(pdev)
4882 if (pdev->untrusted)
4883 return true;
89a6079d 4884
c5a5dc4c
LB
4885 return false;
4886}
89a6079d 4887
c5a5dc4c
LB
4888static int __init platform_optin_force_iommu(void)
4889{
4890 if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
89a6079d
LB
4891 return 0;
4892
4893 if (no_iommu || dmar_disabled)
4894 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4895
4896 /*
4897 * If Intel-IOMMU is disabled by default, we will apply identity
4898 * map for all devices except those marked as being untrusted.
4899 */
4900 if (dmar_disabled)
4901 iommu_identity_mapping |= IDENTMAP_ALL;
4902
4903 dmar_disabled = 0;
89a6079d
LB
4904 no_iommu = 0;
4905
4906 return 1;
4907}
4908
fa212a97
LB
4909static int __init probe_acpi_namespace_devices(void)
4910{
4911 struct dmar_drhd_unit *drhd;
af88ec39
QC
4912 /* To avoid a -Wunused-but-set-variable warning. */
4913 struct intel_iommu *iommu __maybe_unused;
fa212a97
LB
4914 struct device *dev;
4915 int i, ret = 0;
4916
4917 for_each_active_iommu(iommu, drhd) {
4918 for_each_active_dev_scope(drhd->devices,
4919 drhd->devices_cnt, i, dev) {
4920 struct acpi_device_physical_node *pn;
4921 struct iommu_group *group;
4922 struct acpi_device *adev;
4923
4924 if (dev->bus != &acpi_bus_type)
4925 continue;
4926
4927 adev = to_acpi_device(dev);
4928 mutex_lock(&adev->physical_node_lock);
4929 list_for_each_entry(pn,
4930 &adev->physical_node_list, node) {
4931 group = iommu_group_get(pn->dev);
4932 if (group) {
4933 iommu_group_put(group);
4934 continue;
4935 }
4936
4937 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4938 ret = iommu_probe_device(pn->dev);
4939 if (ret)
4940 break;
4941 }
4942 mutex_unlock(&adev->physical_node_lock);
4943
4944 if (ret)
4945 return ret;
4946 }
4947 }
4948
4949 return 0;
4950}
4951
ba395927
KA
4952int __init intel_iommu_init(void)
4953{
9bdc531e 4954 int ret = -ENODEV;
3a93c841 4955 struct dmar_drhd_unit *drhd;
7c919779 4956 struct intel_iommu *iommu;
ba395927 4957
89a6079d
LB
4958 /*
4959 * Intel IOMMU is required for a TXT/tboot launch or platform
4960 * opt in, so enforce that.
4961 */
4962 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4963
3a5670e8
JL
4964 if (iommu_init_mempool()) {
4965 if (force_on)
4966 panic("tboot: Failed to initialize iommu memory\n");
4967 return -ENOMEM;
4968 }
4969
4970 down_write(&dmar_global_lock);
a59b50e9
JC
4971 if (dmar_table_init()) {
4972 if (force_on)
4973 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4974 goto out_free_dmar;
a59b50e9 4975 }
ba395927 4976
c2c7286a 4977 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4978 if (force_on)
4979 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4980 goto out_free_dmar;
a59b50e9 4981 }
1886e8a9 4982
ec154bf5
JR
4983 up_write(&dmar_global_lock);
4984
4985 /*
4986 * The bus notifier takes the dmar_global_lock, so lockdep will
4987 * complain later when we register it under the lock.
4988 */
4989 dmar_register_bus_notifier();
4990
4991 down_write(&dmar_global_lock);
4992
161b28aa 4993 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4994 /*
4995 * We exit the function here to ensure IOMMU's remapping and
4996 * mempool aren't setup, which means that the IOMMU's PMRs
4997 * won't be disabled via the call to init_dmars(). So disable
4998 * it explicitly here. The PMRs were setup by tboot prior to
4999 * calling SENTER, but the kernel is expected to reset/tear
5000 * down the PMRs.
5001 */
5002 if (intel_iommu_tboot_noforce) {
5003 for_each_iommu(iommu, drhd)
5004 iommu_disable_protect_mem_regions(iommu);
5005 }
5006
161b28aa
JR
5007 /*
5008 * Make sure the IOMMUs are switched off, even when we
5009 * boot into a kexec kernel and the previous kernel left
5010 * them enabled
5011 */
5012 intel_disable_iommus();
9bdc531e 5013 goto out_free_dmar;
161b28aa 5014 }
2ae21010 5015
318fe7df 5016 if (list_empty(&dmar_rmrr_units))
9f10e5bf 5017 pr_info("No RMRR found\n");
318fe7df
SS
5018
5019 if (list_empty(&dmar_atsr_units))
9f10e5bf 5020 pr_info("No ATSR found\n");
318fe7df 5021
51a63e67
JC
5022 if (dmar_init_reserved_ranges()) {
5023 if (force_on)
5024 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 5025 goto out_free_reserved_range;
51a63e67 5026 }
ba395927 5027
cf1ec453
LB
5028 if (dmar_map_gfx)
5029 intel_iommu_gfx_mapped = 1;
5030
ba395927
KA
5031 init_no_remapping_devices();
5032
b779260b 5033 ret = init_dmars();
ba395927 5034 if (ret) {
a59b50e9
JC
5035 if (force_on)
5036 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 5037 pr_err("Initialization failed\n");
9bdc531e 5038 goto out_free_reserved_range;
ba395927 5039 }
3a5670e8 5040 up_write(&dmar_global_lock);
ba395927 5041
4fac8076 5042#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
c5a5dc4c
LB
5043 /*
5044 * If the system has no untrusted device or the user has decided
5045 * to disable the bounce page mechanisms, we don't need swiotlb.
5046 * Mark this and the pre-allocated bounce pages will be released
5047 * later.
5048 */
5049 if (!has_untrusted_dev() || intel_no_bounce)
5050 swiotlb = 0;
75f1cdf1 5051#endif
19943b0e 5052 dma_ops = &intel_dma_ops;
4ed0d3e6 5053
134fac3f 5054 init_iommu_pm_ops();
a8bcbb0d 5055
39ab9555
JR
5056 for_each_active_iommu(iommu, drhd) {
5057 iommu_device_sysfs_add(&iommu->iommu, NULL,
5058 intel_iommu_groups,
5059 "%s", iommu->name);
5060 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5061 iommu_device_register(&iommu->iommu);
5062 }
a5459cfe 5063
4236d97d 5064 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
75f05569
JL
5065 if (si_domain && !hw_pass_through)
5066 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
5067 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5068 intel_iommu_cpu_dead);
d8190dc6 5069
d5692d4a 5070 down_read(&dmar_global_lock);
fa212a97
LB
5071 if (probe_acpi_namespace_devices())
5072 pr_warn("ACPI name space devices didn't probe correctly\n");
d5692d4a 5073 up_read(&dmar_global_lock);
fa212a97 5074
d8190dc6
LB
5075 /* Finally, we enable the DMA remapping hardware. */
5076 for_each_iommu(iommu, drhd) {
6a8c6748 5077 if (!drhd->ignored && !translation_pre_enabled(iommu))
d8190dc6
LB
5078 iommu_enable_translation(iommu);
5079
5080 iommu_disable_protect_mem_regions(iommu);
5081 }
5082 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5083
8bc1f85c 5084 intel_iommu_enabled = 1;
ee2636b8 5085 intel_iommu_debugfs_init();
8bc1f85c 5086
ba395927 5087 return 0;
9bdc531e
JL
5088
5089out_free_reserved_range:
5090 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
5091out_free_dmar:
5092 intel_iommu_free_dmars();
3a5670e8
JL
5093 up_write(&dmar_global_lock);
5094 iommu_exit_mempool();
9bdc531e 5095 return ret;
ba395927 5096}
e820482c 5097
0ce4a85f
LB
5098static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5099{
5100 struct intel_iommu *iommu = opaque;
5101
5102 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5103 return 0;
5104}
5105
5106/*
5107 * NB - intel-iommu lacks any sort of reference counting for the users of
5108 * dependent devices. If multiple endpoints have intersecting dependent
5109 * devices, unbinding the driver from any one of them will possibly leave
5110 * the others unable to operate.
5111 */
5112static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5113{
5114 if (!iommu || !dev || !dev_is_pci(dev))
5115 return;
5116
5117 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5118}
5119
127c7615 5120static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 5121{
942067f1 5122 struct dmar_domain *domain;
c7151a8d
WH
5123 struct intel_iommu *iommu;
5124 unsigned long flags;
c7151a8d 5125
55d94043
JR
5126 assert_spin_locked(&device_domain_lock);
5127
127c7615 5128 if (WARN_ON(!info))
c7151a8d
WH
5129 return;
5130
127c7615 5131 iommu = info->iommu;
942067f1 5132 domain = info->domain;
c7151a8d 5133
127c7615 5134 if (info->dev) {
ef848b7e
LB
5135 if (dev_is_pci(info->dev) && sm_supported(iommu))
5136 intel_pasid_tear_down_entry(iommu, info->dev,
5137 PASID_RID2PASID);
5138
127c7615 5139 iommu_disable_dev_iotlb(info);
0ce4a85f 5140 domain_context_clear(iommu, info->dev);
a7fc93fe 5141 intel_pasid_free_table(info->dev);
127c7615 5142 }
c7151a8d 5143
b608ac3b 5144 unlink_domain_info(info);
c7151a8d 5145
d160aca5 5146 spin_lock_irqsave(&iommu->lock, flags);
942067f1 5147 domain_detach_iommu(domain, iommu);
d160aca5 5148 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 5149
942067f1
LB
5150 /* free the private domain */
5151 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
3a18844d
LB
5152 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
5153 list_empty(&domain->devices))
942067f1
LB
5154 domain_exit(info->domain);
5155
127c7615 5156 free_devinfo_mem(info);
c7151a8d 5157}
c7151a8d 5158
71753239 5159static void dmar_remove_one_dev_info(struct device *dev)
55d94043 5160{
127c7615 5161 struct device_domain_info *info;
55d94043 5162 unsigned long flags;
3e7abe25 5163
55d94043 5164 spin_lock_irqsave(&device_domain_lock, flags);
127c7615 5165 info = dev->archdata.iommu;
ae23bfb6
LB
5166 if (info)
5167 __dmar_remove_one_dev_info(info);
55d94043 5168 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5169}
5170
301e7ee1
JR
5171static int md_domain_init(struct dmar_domain *domain, int guest_width)
5172{
5173 int adjust_width;
5174
5175 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5176 domain_reserve_special_ranges(domain);
5177
5178 /* calculate AGAW */
5179 domain->gaw = guest_width;
5180 adjust_width = guestwidth_to_adjustwidth(guest_width);
5181 domain->agaw = width_to_agaw(adjust_width);
5182
5183 domain->iommu_coherency = 0;
5184 domain->iommu_snooping = 0;
5185 domain->iommu_superpage = 0;
5186 domain->max_addr = 0;
5187
5188 /* always allocate the top pgd */
5189 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5190 if (!domain->pgd)
5191 return -ENOMEM;
5192 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5193 return 0;
5194}
5195
00a77deb 5196static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5197{
5d450806 5198 struct dmar_domain *dmar_domain;
00a77deb
JR
5199 struct iommu_domain *domain;
5200
4de354ec 5201 switch (type) {
fa954e68
LB
5202 case IOMMU_DOMAIN_DMA:
5203 /* fallthrough */
4de354ec 5204 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 5205 dmar_domain = alloc_domain(0);
4de354ec
LB
5206 if (!dmar_domain) {
5207 pr_err("Can't allocate dmar_domain\n");
5208 return NULL;
5209 }
301e7ee1 5210 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4de354ec
LB
5211 pr_err("Domain initialization failed\n");
5212 domain_exit(dmar_domain);
5213 return NULL;
5214 }
fa954e68
LB
5215
5216 if (type == IOMMU_DOMAIN_DMA &&
5217 init_iova_flush_queue(&dmar_domain->iovad,
5218 iommu_flush_iova, iova_entry_free)) {
5219 pr_warn("iova flush queue initialization failed\n");
5220 intel_iommu_strict = 1;
5221 }
5222
4de354ec 5223 domain_update_iommu_cap(dmar_domain);
38717946 5224
4de354ec
LB
5225 domain = &dmar_domain->domain;
5226 domain->geometry.aperture_start = 0;
5227 domain->geometry.aperture_end =
5228 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5229 domain->geometry.force_aperture = true;
5230
5231 return domain;
5232 case IOMMU_DOMAIN_IDENTITY:
5233 return &si_domain->domain;
5234 default:
00a77deb 5235 return NULL;
38717946 5236 }
8a0e715b 5237
4de354ec 5238 return NULL;
38717946 5239}
38717946 5240
00a77deb 5241static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5242{
4de354ec
LB
5243 if (domain != &si_domain->domain)
5244 domain_exit(to_dmar_domain(domain));
38717946 5245}
38717946 5246
67b8e02b
LB
5247/*
5248 * Check whether a @domain could be attached to the @dev through the
5249 * aux-domain attach/detach APIs.
5250 */
5251static inline bool
5252is_aux_domain(struct device *dev, struct iommu_domain *domain)
5253{
5254 struct device_domain_info *info = dev->archdata.iommu;
5255
5256 return info && info->auxd_enabled &&
5257 domain->type == IOMMU_DOMAIN_UNMANAGED;
5258}
5259
5260static void auxiliary_link_device(struct dmar_domain *domain,
5261 struct device *dev)
5262{
5263 struct device_domain_info *info = dev->archdata.iommu;
5264
5265 assert_spin_locked(&device_domain_lock);
5266 if (WARN_ON(!info))
5267 return;
5268
5269 domain->auxd_refcnt++;
5270 list_add(&domain->auxd, &info->auxiliary_domains);
5271}
5272
5273static void auxiliary_unlink_device(struct dmar_domain *domain,
5274 struct device *dev)
5275{
5276 struct device_domain_info *info = dev->archdata.iommu;
5277
5278 assert_spin_locked(&device_domain_lock);
5279 if (WARN_ON(!info))
5280 return;
5281
5282 list_del(&domain->auxd);
5283 domain->auxd_refcnt--;
5284
5285 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5286 intel_pasid_free_id(domain->default_pasid);
5287}
5288
5289static int aux_domain_add_dev(struct dmar_domain *domain,
5290 struct device *dev)
5291{
5292 int ret;
5293 u8 bus, devfn;
5294 unsigned long flags;
5295 struct intel_iommu *iommu;
5296
5297 iommu = device_to_iommu(dev, &bus, &devfn);
5298 if (!iommu)
5299 return -ENODEV;
5300
5301 if (domain->default_pasid <= 0) {
5302 int pasid;
5303
5304 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5305 pci_max_pasids(to_pci_dev(dev)),
5306 GFP_KERNEL);
5307 if (pasid <= 0) {
5308 pr_err("Can't allocate default pasid\n");
5309 return -ENODEV;
5310 }
5311 domain->default_pasid = pasid;
5312 }
5313
5314 spin_lock_irqsave(&device_domain_lock, flags);
5315 /*
5316 * iommu->lock must be held to attach domain to iommu and setup the
5317 * pasid entry for second level translation.
5318 */
5319 spin_lock(&iommu->lock);
5320 ret = domain_attach_iommu(domain, iommu);
5321 if (ret)
5322 goto attach_failed;
5323
5324 /* Setup the PASID entry for mediated devices: */
5325 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5326 domain->default_pasid);
5327 if (ret)
5328 goto table_failed;
5329 spin_unlock(&iommu->lock);
5330
5331 auxiliary_link_device(domain, dev);
5332
5333 spin_unlock_irqrestore(&device_domain_lock, flags);
5334
5335 return 0;
5336
5337table_failed:
5338 domain_detach_iommu(domain, iommu);
5339attach_failed:
5340 spin_unlock(&iommu->lock);
5341 spin_unlock_irqrestore(&device_domain_lock, flags);
5342 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5343 intel_pasid_free_id(domain->default_pasid);
5344
5345 return ret;
5346}
5347
5348static void aux_domain_remove_dev(struct dmar_domain *domain,
5349 struct device *dev)
5350{
5351 struct device_domain_info *info;
5352 struct intel_iommu *iommu;
5353 unsigned long flags;
5354
5355 if (!is_aux_domain(dev, &domain->domain))
5356 return;
5357
5358 spin_lock_irqsave(&device_domain_lock, flags);
5359 info = dev->archdata.iommu;
5360 iommu = info->iommu;
5361
5362 auxiliary_unlink_device(domain, dev);
5363
5364 spin_lock(&iommu->lock);
5365 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5366 domain_detach_iommu(domain, iommu);
5367 spin_unlock(&iommu->lock);
5368
5369 spin_unlock_irqrestore(&device_domain_lock, flags);
5370}
5371
8cc3759a
LB
5372static int prepare_domain_attach_device(struct iommu_domain *domain,
5373 struct device *dev)
38717946 5374{
00a77deb 5375 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5376 struct intel_iommu *iommu;
5377 int addr_width;
156baca8 5378 u8 bus, devfn;
faa3d6f5 5379
156baca8 5380 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5381 if (!iommu)
5382 return -ENODEV;
5383
5384 /* check if this iommu agaw is sufficient for max mapped address */
5385 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5386 if (addr_width > cap_mgaw(iommu->cap))
5387 addr_width = cap_mgaw(iommu->cap);
5388
5389 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5390 dev_err(dev, "%s: iommu width (%d) is not "
5391 "sufficient for the mapped address (%llx)\n",
5392 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5393 return -EFAULT;
5394 }
a99c47a2
TL
5395 dmar_domain->gaw = addr_width;
5396
5397 /*
5398 * Knock out extra levels of page tables if necessary
5399 */
5400 while (iommu->agaw < dmar_domain->agaw) {
5401 struct dma_pte *pte;
5402
5403 pte = dmar_domain->pgd;
5404 if (dma_pte_present(pte)) {
25cbff16
SY
5405 dmar_domain->pgd = (struct dma_pte *)
5406 phys_to_virt(dma_pte_addr(pte));
7a661013 5407 free_pgtable_page(pte);
a99c47a2
TL
5408 }
5409 dmar_domain->agaw--;
5410 }
fe40f1e0 5411
8cc3759a
LB
5412 return 0;
5413}
5414
5415static int intel_iommu_attach_device(struct iommu_domain *domain,
5416 struct device *dev)
5417{
5418 int ret;
5419
5679582c
LB
5420 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5421 device_is_rmrr_locked(dev)) {
8cc3759a
LB
5422 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5423 return -EPERM;
5424 }
5425
67b8e02b
LB
5426 if (is_aux_domain(dev, domain))
5427 return -EPERM;
5428
8cc3759a
LB
5429 /* normally dev is not mapped */
5430 if (unlikely(domain_context_mapped(dev))) {
5431 struct dmar_domain *old_domain;
5432
5433 old_domain = find_domain(dev);
fa954e68 5434 if (old_domain)
8cc3759a 5435 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5436 }
5437
5438 ret = prepare_domain_attach_device(domain, dev);
5439 if (ret)
5440 return ret;
5441
5442 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5443}
38717946 5444
67b8e02b
LB
5445static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5446 struct device *dev)
5447{
5448 int ret;
5449
5450 if (!is_aux_domain(dev, domain))
5451 return -EPERM;
5452
5453 ret = prepare_domain_attach_device(domain, dev);
5454 if (ret)
5455 return ret;
5456
5457 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5458}
5459
4c5478c9
JR
5460static void intel_iommu_detach_device(struct iommu_domain *domain,
5461 struct device *dev)
38717946 5462{
71753239 5463 dmar_remove_one_dev_info(dev);
faa3d6f5 5464}
c7151a8d 5465
67b8e02b
LB
5466static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5467 struct device *dev)
5468{
5469 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5470}
5471
b146a1c9
JR
5472static int intel_iommu_map(struct iommu_domain *domain,
5473 unsigned long iova, phys_addr_t hpa,
781ca2de 5474 size_t size, int iommu_prot, gfp_t gfp)
faa3d6f5 5475{
00a77deb 5476 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5477 u64 max_addr;
dde57a21 5478 int prot = 0;
faa3d6f5 5479 int ret;
fe40f1e0 5480
dde57a21
JR
5481 if (iommu_prot & IOMMU_READ)
5482 prot |= DMA_PTE_READ;
5483 if (iommu_prot & IOMMU_WRITE)
5484 prot |= DMA_PTE_WRITE;
9cf06697
SY
5485 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5486 prot |= DMA_PTE_SNP;
dde57a21 5487
163cc52c 5488 max_addr = iova + size;
dde57a21 5489 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5490 u64 end;
5491
5492 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5493 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5494 if (end < max_addr) {
9f10e5bf 5495 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5496 "sufficient for the mapped address (%llx)\n",
8954da1f 5497 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5498 return -EFAULT;
5499 }
dde57a21 5500 dmar_domain->max_addr = max_addr;
fe40f1e0 5501 }
ad051221
DW
5502 /* Round up size to next multiple of PAGE_SIZE, if it and
5503 the low bits of hpa would take us onto the next page */
88cb6a74 5504 size = aligned_nrpages(hpa, size);
ad051221
DW
5505 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5506 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5507 return ret;
38717946 5508}
38717946 5509
5009065d 5510static size_t intel_iommu_unmap(struct iommu_domain *domain,
56f8af5e
WD
5511 unsigned long iova, size_t size,
5512 struct iommu_iotlb_gather *gather)
38717946 5513{
00a77deb 5514 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5515 struct page *freelist = NULL;
ea8ea460
DW
5516 unsigned long start_pfn, last_pfn;
5517 unsigned int npages;
42e8c186 5518 int iommu_id, level = 0;
5cf0a76f
DW
5519
5520 /* Cope with horrid API which requires us to unmap more than the
5521 size argument if it happens to be a large-page mapping. */
dc02e46e 5522 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5523
5524 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5525 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5526
ea8ea460
DW
5527 start_pfn = iova >> VTD_PAGE_SHIFT;
5528 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5529
5530 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5531
5532 npages = last_pfn - start_pfn + 1;
5533
f746a025 5534 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5535 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5536 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5537
5538 dma_free_pagelist(freelist);
fe40f1e0 5539
163cc52c
DW
5540 if (dmar_domain->max_addr == iova + size)
5541 dmar_domain->max_addr = iova;
b146a1c9 5542
5cf0a76f 5543 return size;
38717946 5544}
38717946 5545
d14d6577 5546static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5547 dma_addr_t iova)
38717946 5548{
00a77deb 5549 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5550 struct dma_pte *pte;
5cf0a76f 5551 int level = 0;
faa3d6f5 5552 u64 phys = 0;
38717946 5553
5cf0a76f 5554 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5555 if (pte)
faa3d6f5 5556 phys = dma_pte_addr(pte);
38717946 5557
faa3d6f5 5558 return phys;
38717946 5559}
a8bcbb0d 5560
95587a75
LB
5561static inline bool scalable_mode_support(void)
5562{
5563 struct dmar_drhd_unit *drhd;
5564 struct intel_iommu *iommu;
5565 bool ret = true;
5566
5567 rcu_read_lock();
5568 for_each_active_iommu(iommu, drhd) {
5569 if (!sm_supported(iommu)) {
5570 ret = false;
5571 break;
5572 }
5573 }
5574 rcu_read_unlock();
5575
5576 return ret;
5577}
5578
5579static inline bool iommu_pasid_support(void)
5580{
5581 struct dmar_drhd_unit *drhd;
5582 struct intel_iommu *iommu;
5583 bool ret = true;
5584
5585 rcu_read_lock();
5586 for_each_active_iommu(iommu, drhd) {
5587 if (!pasid_supported(iommu)) {
5588 ret = false;
5589 break;
5590 }
5591 }
5592 rcu_read_unlock();
5593
5594 return ret;
5595}
5596
5d587b8d 5597static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5598{
dbb9fd86 5599 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5600 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5601 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5602 return irq_remapping_enabled == 1;
dbb9fd86 5603
5d587b8d 5604 return false;
dbb9fd86
SY
5605}
5606
abdfdde2
AW
5607static int intel_iommu_add_device(struct device *dev)
5608{
942067f1
LB
5609 struct dmar_domain *dmar_domain;
5610 struct iommu_domain *domain;
a5459cfe 5611 struct intel_iommu *iommu;
abdfdde2 5612 struct iommu_group *group;
156baca8 5613 u8 bus, devfn;
942067f1 5614 int ret;
70ae6f0d 5615
a5459cfe
AW
5616 iommu = device_to_iommu(dev, &bus, &devfn);
5617 if (!iommu)
70ae6f0d
AW
5618 return -ENODEV;
5619
e3d10af1 5620 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5621
8af46c78
LB
5622 if (translation_pre_enabled(iommu))
5623 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5624
e17f9ff4 5625 group = iommu_group_get_for_dev(dev);
783f157b 5626
e17f9ff4
AW
5627 if (IS_ERR(group))
5628 return PTR_ERR(group);
bcb71abe 5629
abdfdde2 5630 iommu_group_put(group);
942067f1
LB
5631
5632 domain = iommu_get_domain_for_dev(dev);
5633 dmar_domain = to_dmar_domain(domain);
5634 if (domain->type == IOMMU_DOMAIN_DMA) {
0e31a726 5635 if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
942067f1
LB
5636 ret = iommu_request_dm_for_dev(dev);
5637 if (ret) {
ae23bfb6 5638 dmar_remove_one_dev_info(dev);
942067f1
LB
5639 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5640 domain_add_dev_info(si_domain, dev);
5641 dev_info(dev,
5642 "Device uses a private identity domain.\n");
942067f1 5643 }
942067f1
LB
5644 }
5645 } else {
0e31a726 5646 if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
942067f1
LB
5647 ret = iommu_request_dma_domain_for_dev(dev);
5648 if (ret) {
ae23bfb6 5649 dmar_remove_one_dev_info(dev);
942067f1 5650 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
4ec066c7 5651 if (!get_private_domain_for_dev(dev)) {
942067f1
LB
5652 dev_warn(dev,
5653 "Failed to get a private domain.\n");
5654 return -ENOMEM;
5655 }
5656
5657 dev_info(dev,
5658 "Device uses a private dma domain.\n");
942067f1 5659 }
942067f1
LB
5660 }
5661 }
5662
cfb94a37
LB
5663 if (device_needs_bounce(dev)) {
5664 dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
5665 set_dma_ops(dev, &bounce_dma_ops);
5666 }
5667
e17f9ff4 5668 return 0;
abdfdde2 5669}
70ae6f0d 5670
abdfdde2
AW
5671static void intel_iommu_remove_device(struct device *dev)
5672{
a5459cfe
AW
5673 struct intel_iommu *iommu;
5674 u8 bus, devfn;
5675
5676 iommu = device_to_iommu(dev, &bus, &devfn);
5677 if (!iommu)
5678 return;
5679
458b7c8e
LB
5680 dmar_remove_one_dev_info(dev);
5681
abdfdde2 5682 iommu_group_remove_device(dev);
a5459cfe 5683
e3d10af1 5684 iommu_device_unlink(&iommu->iommu, dev);
cfb94a37
LB
5685
5686 if (device_needs_bounce(dev))
5687 set_dma_ops(dev, NULL);
70ae6f0d
AW
5688}
5689
0659b8dc
EA
5690static void intel_iommu_get_resv_regions(struct device *device,
5691 struct list_head *head)
5692{
5f64ce54 5693 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
0659b8dc
EA
5694 struct iommu_resv_region *reg;
5695 struct dmar_rmrr_unit *rmrr;
5696 struct device *i_dev;
5697 int i;
5698
5f64ce54 5699 down_read(&dmar_global_lock);
0659b8dc
EA
5700 for_each_rmrr_units(rmrr) {
5701 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5702 i, i_dev) {
5f64ce54 5703 struct iommu_resv_region *resv;
1c5c59fb 5704 enum iommu_resv_type type;
5f64ce54
EA
5705 size_t length;
5706
3855ba2d
EA
5707 if (i_dev != device &&
5708 !is_downstream_to_pci_bridge(device, i_dev))
0659b8dc
EA
5709 continue;
5710
5f64ce54 5711 length = rmrr->end_address - rmrr->base_address + 1;
1c5c59fb
EA
5712
5713 type = device_rmrr_is_relaxable(device) ?
5714 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5715
5f64ce54 5716 resv = iommu_alloc_resv_region(rmrr->base_address,
1c5c59fb 5717 length, prot, type);
5f64ce54
EA
5718 if (!resv)
5719 break;
5720
5721 list_add_tail(&resv->list, head);
0659b8dc
EA
5722 }
5723 }
5f64ce54 5724 up_read(&dmar_global_lock);
0659b8dc 5725
d850c2ee
LB
5726#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5727 if (dev_is_pci(device)) {
5728 struct pci_dev *pdev = to_pci_dev(device);
5729
5730 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
cde9319e 5731 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
d8018a0e 5732 IOMMU_RESV_DIRECT_RELAXABLE);
d850c2ee
LB
5733 if (reg)
5734 list_add_tail(&reg->list, head);
5735 }
5736 }
5737#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5738
0659b8dc
EA
5739 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5740 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5741 0, IOMMU_RESV_MSI);
0659b8dc
EA
5742 if (!reg)
5743 return;
5744 list_add_tail(&reg->list, head);
5745}
5746
5747static void intel_iommu_put_resv_regions(struct device *dev,
5748 struct list_head *head)
5749{
5750 struct iommu_resv_region *entry, *next;
5751
5f64ce54
EA
5752 list_for_each_entry_safe(entry, next, head, list)
5753 kfree(entry);
70ae6f0d
AW
5754}
5755
d7cbc0f3 5756int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5757{
5758 struct device_domain_info *info;
5759 struct context_entry *context;
5760 struct dmar_domain *domain;
5761 unsigned long flags;
5762 u64 ctx_lo;
5763 int ret;
5764
4ec066c7 5765 domain = find_domain(dev);
2f26e0a9
DW
5766 if (!domain)
5767 return -EINVAL;
5768
5769 spin_lock_irqsave(&device_domain_lock, flags);
5770 spin_lock(&iommu->lock);
5771
5772 ret = -EINVAL;
d7cbc0f3 5773 info = dev->archdata.iommu;
2f26e0a9
DW
5774 if (!info || !info->pasid_supported)
5775 goto out;
5776
5777 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5778 if (WARN_ON(!context))
5779 goto out;
5780
5781 ctx_lo = context[0].lo;
5782
2f26e0a9 5783 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5784 ctx_lo |= CONTEXT_PASIDE;
5785 context[0].lo = ctx_lo;
5786 wmb();
d7cbc0f3
LB
5787 iommu->flush.flush_context(iommu,
5788 domain->iommu_did[iommu->seq_id],
5789 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5790 DMA_CCMD_MASK_NOBIT,
5791 DMA_CCMD_DEVICE_INVL);
5792 }
5793
5794 /* Enable PASID support in the device, if it wasn't already */
5795 if (!info->pasid_enabled)
5796 iommu_enable_dev_iotlb(info);
5797
2f26e0a9
DW
5798 ret = 0;
5799
5800 out:
5801 spin_unlock(&iommu->lock);
5802 spin_unlock_irqrestore(&device_domain_lock, flags);
5803
5804 return ret;
5805}
5806
73bcbdc9
JS
5807static void intel_iommu_apply_resv_region(struct device *dev,
5808 struct iommu_domain *domain,
5809 struct iommu_resv_region *region)
5810{
5811 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5812 unsigned long start, end;
5813
5814 start = IOVA_PFN(region->start);
5815 end = IOVA_PFN(region->start + region->length - 1);
5816
5817 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5818}
5819
d7cbc0f3 5820#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5821struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5822{
5823 struct intel_iommu *iommu;
5824 u8 bus, devfn;
5825
5826 if (iommu_dummy(dev)) {
5827 dev_warn(dev,
5828 "No IOMMU translation for device; cannot enable SVM\n");
5829 return NULL;
5830 }
5831
5832 iommu = device_to_iommu(dev, &bus, &devfn);
5833 if ((!iommu)) {
b9997e38 5834 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5835 return NULL;
5836 }
5837
2f26e0a9
DW
5838 return iommu;
5839}
5840#endif /* CONFIG_INTEL_IOMMU_SVM */
5841
95587a75
LB
5842static int intel_iommu_enable_auxd(struct device *dev)
5843{
5844 struct device_domain_info *info;
5845 struct intel_iommu *iommu;
5846 unsigned long flags;
5847 u8 bus, devfn;
5848 int ret;
5849
5850 iommu = device_to_iommu(dev, &bus, &devfn);
5851 if (!iommu || dmar_disabled)
5852 return -EINVAL;
5853
5854 if (!sm_supported(iommu) || !pasid_supported(iommu))
5855 return -EINVAL;
5856
5857 ret = intel_iommu_enable_pasid(iommu, dev);
5858 if (ret)
5859 return -ENODEV;
5860
5861 spin_lock_irqsave(&device_domain_lock, flags);
5862 info = dev->archdata.iommu;
5863 info->auxd_enabled = 1;
5864 spin_unlock_irqrestore(&device_domain_lock, flags);
5865
5866 return 0;
5867}
5868
5869static int intel_iommu_disable_auxd(struct device *dev)
5870{
5871 struct device_domain_info *info;
5872 unsigned long flags;
5873
5874 spin_lock_irqsave(&device_domain_lock, flags);
5875 info = dev->archdata.iommu;
5876 if (!WARN_ON(!info))
5877 info->auxd_enabled = 0;
5878 spin_unlock_irqrestore(&device_domain_lock, flags);
5879
5880 return 0;
5881}
5882
5883/*
5884 * A PCI express designated vendor specific extended capability is defined
5885 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5886 * for system software and tools to detect endpoint devices supporting the
5887 * Intel scalable IO virtualization without host driver dependency.
5888 *
5889 * Returns the address of the matching extended capability structure within
5890 * the device's PCI configuration space or 0 if the device does not support
5891 * it.
5892 */
5893static int siov_find_pci_dvsec(struct pci_dev *pdev)
5894{
5895 int pos;
5896 u16 vendor, id;
5897
5898 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5899 while (pos) {
5900 pci_read_config_word(pdev, pos + 4, &vendor);
5901 pci_read_config_word(pdev, pos + 8, &id);
5902 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5903 return pos;
5904
5905 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5906 }
5907
5908 return 0;
5909}
5910
5911static bool
5912intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5913{
5914 if (feat == IOMMU_DEV_FEAT_AUX) {
5915 int ret;
5916
5917 if (!dev_is_pci(dev) || dmar_disabled ||
5918 !scalable_mode_support() || !iommu_pasid_support())
5919 return false;
5920
5921 ret = pci_pasid_features(to_pci_dev(dev));
5922 if (ret < 0)
5923 return false;
5924
5925 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5926 }
5927
5928 return false;
5929}
5930
5931static int
5932intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5933{
5934 if (feat == IOMMU_DEV_FEAT_AUX)
5935 return intel_iommu_enable_auxd(dev);
5936
5937 return -ENODEV;
5938}
5939
5940static int
5941intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5942{
5943 if (feat == IOMMU_DEV_FEAT_AUX)
5944 return intel_iommu_disable_auxd(dev);
5945
5946 return -ENODEV;
5947}
5948
5949static bool
5950intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5951{
5952 struct device_domain_info *info = dev->archdata.iommu;
5953
5954 if (feat == IOMMU_DEV_FEAT_AUX)
5955 return scalable_mode_support() && info && info->auxd_enabled;
5956
5957 return false;
5958}
5959
0e8000f8
LB
5960static int
5961intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5962{
5963 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5964
5965 return dmar_domain->default_pasid > 0 ?
5966 dmar_domain->default_pasid : -EINVAL;
5967}
5968
8af46c78
LB
5969static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5970 struct device *dev)
5971{
5972 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5973}
5974
b0119e87 5975const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5976 .capable = intel_iommu_capable,
5977 .domain_alloc = intel_iommu_domain_alloc,
5978 .domain_free = intel_iommu_domain_free,
5979 .attach_dev = intel_iommu_attach_device,
5980 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5981 .aux_attach_dev = intel_iommu_aux_attach_device,
5982 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5983 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5984 .map = intel_iommu_map,
5985 .unmap = intel_iommu_unmap,
0659b8dc
EA
5986 .iova_to_phys = intel_iommu_iova_to_phys,
5987 .add_device = intel_iommu_add_device,
5988 .remove_device = intel_iommu_remove_device,
5989 .get_resv_regions = intel_iommu_get_resv_regions,
5990 .put_resv_regions = intel_iommu_put_resv_regions,
73bcbdc9 5991 .apply_resv_region = intel_iommu_apply_resv_region,
0659b8dc 5992 .device_group = pci_device_group,
95587a75
LB
5993 .dev_has_feat = intel_iommu_dev_has_feat,
5994 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5995 .dev_enable_feat = intel_iommu_dev_enable_feat,
5996 .dev_disable_feat = intel_iommu_dev_disable_feat,
8af46c78 5997 .is_attach_deferred = intel_iommu_is_attach_deferred,
0659b8dc 5998 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5999};
9af88143 6000
1f76249c 6001static void quirk_iommu_igfx(struct pci_dev *dev)
9452618e 6002{
932a6523 6003 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
6004 dmar_map_gfx = 0;
6005}
6006
1f76249c
CW
6007/* G4x/GM45 integrated gfx dmar support is totally busted. */
6008DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
6009DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
6010DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
6011DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
6012DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
6013DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
6014DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
6015
6016/* Broadwell igfx malfunctions with dmar */
6017DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6018DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6019DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6020DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6021DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6022DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6023DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6024DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6025DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6026DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6027DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6028DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6029DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6030DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6031DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6032DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6033DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6034DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6035DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6036DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6037DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6038DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6039DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6040DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
9452618e 6041
d34d6517 6042static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
6043{
6044 /*
6045 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 6046 * but needs it. Same seems to hold for the desktop versions.
9af88143 6047 */
932a6523 6048 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
6049 rwbf_quirk = 1;
6050}
6051
6052DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
6053DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6054DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6055DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6056DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6057DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6058DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 6059
eecfd57f
AJ
6060#define GGC 0x52
6061#define GGC_MEMORY_SIZE_MASK (0xf << 8)
6062#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6063#define GGC_MEMORY_SIZE_1M (0x1 << 8)
6064#define GGC_MEMORY_SIZE_2M (0x3 << 8)
6065#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6066#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6067#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6068#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6069
d34d6517 6070static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
6071{
6072 unsigned short ggc;
6073
eecfd57f 6074 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
6075 return;
6076
eecfd57f 6077 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 6078 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 6079 dmar_map_gfx = 0;
6fbcfb3e
DW
6080 } else if (dmar_map_gfx) {
6081 /* we have to ensure the gfx device is idle before we flush */
932a6523 6082 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
6083 intel_iommu_strict = 1;
6084 }
9eecabcb
DW
6085}
6086DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6087DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6088DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6089DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6090
e0fc7e0b
DW
6091/* On Tylersburg chipsets, some BIOSes have been known to enable the
6092 ISOCH DMAR unit for the Azalia sound device, but not give it any
6093 TLB entries, which causes it to deadlock. Check for that. We do
6094 this in a function called from init_dmars(), instead of in a PCI
6095 quirk, because we don't want to print the obnoxious "BIOS broken"
6096 message if VT-d is actually disabled.
6097*/
6098static void __init check_tylersburg_isoch(void)
6099{
6100 struct pci_dev *pdev;
6101 uint32_t vtisochctrl;
6102
6103 /* If there's no Azalia in the system anyway, forget it. */
6104 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6105 if (!pdev)
6106 return;
6107 pci_dev_put(pdev);
6108
6109 /* System Management Registers. Might be hidden, in which case
6110 we can't do the sanity check. But that's OK, because the
6111 known-broken BIOSes _don't_ actually hide it, so far. */
6112 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6113 if (!pdev)
6114 return;
6115
6116 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6117 pci_dev_put(pdev);
6118 return;
6119 }
6120
6121 pci_dev_put(pdev);
6122
6123 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6124 if (vtisochctrl & 1)
6125 return;
6126
6127 /* Drop all bits other than the number of TLB entries */
6128 vtisochctrl &= 0x1c;
6129
6130 /* If we have the recommended number of TLB entries (16), fine. */
6131 if (vtisochctrl == 0x10)
6132 return;
6133
6134 /* Zero TLB entries? You get to ride the short bus to school. */
6135 if (!vtisochctrl) {
6136 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6137 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6138 dmi_get_system_info(DMI_BIOS_VENDOR),
6139 dmi_get_system_info(DMI_BIOS_VERSION),
6140 dmi_get_system_info(DMI_PRODUCT_VERSION));
6141 iommu_identity_mapping |= IDENTMAP_AZALIA;
6142 return;
6143 }
9f10e5bf
JR
6144
6145 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
6146 vtisochctrl);
6147}