]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
PCI/ATS: Add inline to pci_prg_resp_pasid_required()
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf 21#define pr_fmt(fmt) "DMAR: " fmt
932a6523 22#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 23
ba395927
KA
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
75f05569 36#include <linux/memory.h>
aa473240 37#include <linux/cpu.h>
5e0d2a6f 38#include <linux/timer.h>
dfddb969 39#include <linux/io.h>
38717946 40#include <linux/iova.h>
5d450806 41#include <linux/iommu.h>
38717946 42#include <linux/intel-iommu.h>
134fac3f 43#include <linux/syscore_ops.h>
69575d38 44#include <linux/tboot.h>
adb2fe02 45#include <linux/dmi.h>
5cdede24 46#include <linux/pci-ats.h>
0ee332c1 47#include <linux/memblock.h>
36746436 48#include <linux/dma-contiguous.h>
fec777c3 49#include <linux/dma-direct.h>
091d42e4 50#include <linux/crash_dump.h>
8a8f422d 51#include <asm/irq_remapping.h>
ba395927 52#include <asm/cacheflush.h>
46a7fa27 53#include <asm/iommu.h>
ba395927 54
078e1ee2 55#include "irq_remapping.h"
56283174 56#include "intel-pasid.h"
078e1ee2 57
5b6985ce
FY
58#define ROOT_SIZE VTD_PAGE_SIZE
59#define CONTEXT_SIZE VTD_PAGE_SIZE
60
ba395927 61#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 62#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 63#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 64#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
65
66#define IOAPIC_RANGE_START (0xfee00000)
67#define IOAPIC_RANGE_END (0xfeefffff)
68#define IOVA_START_ADDR (0x1000)
69
5e3b4a15 70#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 71
4ed0d3e6 72#define MAX_AGAW_WIDTH 64
5c645b35 73#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 74
2ebe3151
DW
75#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
76#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
77
78/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
79 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
80#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
81 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
82#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 83
1b722500
RM
84/* IO virtual address start page frame number */
85#define IOVA_START_PFN (1)
86
f27be03b 87#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 88
df08cdc7
AM
89/* page table handling */
90#define LEVEL_STRIDE (9)
91#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
92
6d1c56a9
OBC
93/*
94 * This bitmap is used to advertise the page sizes our hardware support
95 * to the IOMMU core, which will then use this information to split
96 * physically contiguous memory regions it is mapping into page sizes
97 * that we support.
98 *
99 * Traditionally the IOMMU core just handed us the mappings directly,
100 * after making sure the size is an order of a 4KiB page and that the
101 * mapping has natural alignment.
102 *
103 * To retain this behavior, we currently advertise that we support
104 * all page sizes that are an order of 4KiB.
105 *
106 * If at some point we'd like to utilize the IOMMU core's new behavior,
107 * we could change this to advertise the real page sizes we support.
108 */
109#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
110
df08cdc7
AM
111static inline int agaw_to_level(int agaw)
112{
113 return agaw + 2;
114}
115
116static inline int agaw_to_width(int agaw)
117{
5c645b35 118 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
119}
120
121static inline int width_to_agaw(int width)
122{
5c645b35 123 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
124}
125
126static inline unsigned int level_to_offset_bits(int level)
127{
128 return (level - 1) * LEVEL_STRIDE;
129}
130
131static inline int pfn_level_offset(unsigned long pfn, int level)
132{
133 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
134}
135
136static inline unsigned long level_mask(int level)
137{
138 return -1UL << level_to_offset_bits(level);
139}
140
141static inline unsigned long level_size(int level)
142{
143 return 1UL << level_to_offset_bits(level);
144}
145
146static inline unsigned long align_to_level(unsigned long pfn, int level)
147{
148 return (pfn + level_size(level) - 1) & level_mask(level);
149}
fd18de50 150
6dd9a7c7
YS
151static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
152{
5c645b35 153 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
154}
155
dd4e8319
DW
156/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
157 are never going to work. */
158static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
159{
160 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
161}
162
163static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
164{
165 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
166}
167static inline unsigned long page_to_dma_pfn(struct page *pg)
168{
169 return mm_to_dma_pfn(page_to_pfn(pg));
170}
171static inline unsigned long virt_to_dma_pfn(void *p)
172{
173 return page_to_dma_pfn(virt_to_page(p));
174}
175
d9630fe9
WH
176/* global iommu list, set NULL for ignored DMAR units */
177static struct intel_iommu **g_iommus;
178
e0fc7e0b 179static void __init check_tylersburg_isoch(void);
9af88143
DW
180static int rwbf_quirk;
181
b779260b
JC
182/*
183 * set to 1 to panic kernel if can't successfully enable VT-d
184 * (used when kernel is launched w/ TXT)
185 */
186static int force_on = 0;
bfd20f1c 187int intel_iommu_tboot_noforce;
89a6079d 188static int no_platform_optin;
b779260b 189
46b08e1a 190#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 191
091d42e4
JR
192/*
193 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
194 * if marked present.
195 */
196static phys_addr_t root_entry_lctp(struct root_entry *re)
197{
198 if (!(re->lo & 1))
199 return 0;
200
201 return re->lo & VTD_PAGE_MASK;
202}
203
204/*
205 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
206 * if marked present.
207 */
208static phys_addr_t root_entry_uctp(struct root_entry *re)
209{
210 if (!(re->hi & 1))
211 return 0;
46b08e1a 212
091d42e4
JR
213 return re->hi & VTD_PAGE_MASK;
214}
c07e7d21 215
cf484d0e
JR
216static inline void context_clear_pasid_enable(struct context_entry *context)
217{
218 context->lo &= ~(1ULL << 11);
219}
220
221static inline bool context_pasid_enabled(struct context_entry *context)
222{
223 return !!(context->lo & (1ULL << 11));
224}
225
226static inline void context_set_copied(struct context_entry *context)
227{
228 context->hi |= (1ull << 3);
229}
230
231static inline bool context_copied(struct context_entry *context)
232{
233 return !!(context->hi & (1ULL << 3));
234}
235
236static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
237{
238 return (context->lo & 1);
239}
cf484d0e 240
26b86092 241bool context_present(struct context_entry *context)
cf484d0e
JR
242{
243 return context_pasid_enabled(context) ?
244 __context_present(context) :
245 __context_present(context) && !context_copied(context);
246}
247
c07e7d21
MM
248static inline void context_set_present(struct context_entry *context)
249{
250 context->lo |= 1;
251}
252
253static inline void context_set_fault_enable(struct context_entry *context)
254{
255 context->lo &= (((u64)-1) << 2) | 1;
256}
257
c07e7d21
MM
258static inline void context_set_translation_type(struct context_entry *context,
259 unsigned long value)
260{
261 context->lo &= (((u64)-1) << 4) | 3;
262 context->lo |= (value & 3) << 2;
263}
264
265static inline void context_set_address_root(struct context_entry *context,
266 unsigned long value)
267{
1a2262f9 268 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
269 context->lo |= value & VTD_PAGE_MASK;
270}
271
272static inline void context_set_address_width(struct context_entry *context,
273 unsigned long value)
274{
275 context->hi |= value & 7;
276}
277
278static inline void context_set_domain_id(struct context_entry *context,
279 unsigned long value)
280{
281 context->hi |= (value & ((1 << 16) - 1)) << 8;
282}
283
dbcd861f
JR
284static inline int context_domain_id(struct context_entry *c)
285{
286 return((c->hi >> 8) & 0xffff);
287}
288
c07e7d21
MM
289static inline void context_clear_entry(struct context_entry *context)
290{
291 context->lo = 0;
292 context->hi = 0;
293}
7a8fc25e 294
2c2e2c38
FY
295/*
296 * This domain is a statically identity mapping domain.
297 * 1. This domain creats a static 1:1 mapping to all usable memory.
298 * 2. It maps to each iommu if successful.
299 * 3. Each iommu mapps to this domain if successful.
300 */
19943b0e
DW
301static struct dmar_domain *si_domain;
302static int hw_pass_through = 1;
2c2e2c38 303
28ccce0d
JR
304/*
305 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
306 * across iommus may be owned in one domain, e.g. kvm guest.
307 */
ab8dfe25 308#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 309
2c2e2c38 310/* si_domain contains mulitple devices */
ab8dfe25 311#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 312
29a27719
JR
313#define for_each_domain_iommu(idx, domain) \
314 for (idx = 0; idx < g_num_of_iommus; idx++) \
315 if (domain->iommu_refcnt[idx])
316
b94e4117
JL
317struct dmar_rmrr_unit {
318 struct list_head list; /* list of rmrr units */
319 struct acpi_dmar_header *hdr; /* ACPI header */
320 u64 base_address; /* reserved base address*/
321 u64 end_address; /* reserved end address */
832bd858 322 struct dmar_dev_scope *devices; /* target devices */
b94e4117 323 int devices_cnt; /* target device count */
0659b8dc 324 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
325};
326
327struct dmar_atsr_unit {
328 struct list_head list; /* list of ATSR units */
329 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 330 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
331 int devices_cnt; /* target device count */
332 u8 include_all:1; /* include all ports */
333};
334
335static LIST_HEAD(dmar_atsr_units);
336static LIST_HEAD(dmar_rmrr_units);
337
338#define for_each_rmrr_units(rmrr) \
339 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
340
5e0d2a6f 341/* bitmap for indexing intel_iommus */
5e0d2a6f 342static int g_num_of_iommus;
343
92d03cc8 344static void domain_exit(struct dmar_domain *domain);
ba395927 345static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 346static void dmar_remove_one_dev_info(struct device *dev);
127c7615 347static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
348static void domain_context_clear(struct intel_iommu *iommu,
349 struct device *dev);
2a46ddf7
JL
350static int domain_detach_iommu(struct dmar_domain *domain,
351 struct intel_iommu *iommu);
ba395927 352
d3f13810 353#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
354int dmar_disabled = 0;
355#else
356int dmar_disabled = 1;
d3f13810 357#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 358
8bc1f85c
ED
359int intel_iommu_enabled = 0;
360EXPORT_SYMBOL_GPL(intel_iommu_enabled);
361
2d9e667e 362static int dmar_map_gfx = 1;
7d3b03ce 363static int dmar_forcedac;
5e0d2a6f 364static int intel_iommu_strict;
6dd9a7c7 365static int intel_iommu_superpage = 1;
765b6a98 366static int intel_iommu_sm = 1;
ae853ddb 367static int iommu_identity_mapping;
c83b2f20 368
ae853ddb
DW
369#define IDENTMAP_ALL 1
370#define IDENTMAP_GFX 2
371#define IDENTMAP_AZALIA 4
c83b2f20 372
765b6a98
LB
373#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
374#define pasid_supported(iommu) (sm_supported(iommu) && \
375 ecap_pasid((iommu)->ecap))
ba395927 376
c0771df8
DW
377int intel_iommu_gfx_mapped;
378EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
379
ba395927
KA
380#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
381static DEFINE_SPINLOCK(device_domain_lock);
382static LIST_HEAD(device_domain_list);
383
85319dcc
LB
384/*
385 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 386 * callback @fn against each element.
85319dcc
LB
387 */
388int for_each_device_domain(int (*fn)(struct device_domain_info *info,
389 void *data), void *data)
390{
391 int ret = 0;
0bbeb01a 392 unsigned long flags;
85319dcc
LB
393 struct device_domain_info *info;
394
0bbeb01a 395 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
396 list_for_each_entry(info, &device_domain_list, global) {
397 ret = fn(info, data);
0bbeb01a
LB
398 if (ret) {
399 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 400 return ret;
0bbeb01a 401 }
85319dcc 402 }
0bbeb01a 403 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
404
405 return 0;
406}
407
b0119e87 408const struct iommu_ops intel_iommu_ops;
a8bcbb0d 409
4158c2ec
JR
410static bool translation_pre_enabled(struct intel_iommu *iommu)
411{
412 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
413}
414
091d42e4
JR
415static void clear_translation_pre_enabled(struct intel_iommu *iommu)
416{
417 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
418}
419
4158c2ec
JR
420static void init_translation_status(struct intel_iommu *iommu)
421{
422 u32 gsts;
423
424 gsts = readl(iommu->reg + DMAR_GSTS_REG);
425 if (gsts & DMA_GSTS_TES)
426 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
427}
428
00a77deb
JR
429/* Convert generic 'struct iommu_domain to private struct dmar_domain */
430static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
431{
432 return container_of(dom, struct dmar_domain, domain);
433}
434
ba395927
KA
435static int __init intel_iommu_setup(char *str)
436{
437 if (!str)
438 return -EINVAL;
439 while (*str) {
0cd5c3c8
KM
440 if (!strncmp(str, "on", 2)) {
441 dmar_disabled = 0;
9f10e5bf 442 pr_info("IOMMU enabled\n");
0cd5c3c8 443 } else if (!strncmp(str, "off", 3)) {
ba395927 444 dmar_disabled = 1;
89a6079d 445 no_platform_optin = 1;
9f10e5bf 446 pr_info("IOMMU disabled\n");
ba395927
KA
447 } else if (!strncmp(str, "igfx_off", 8)) {
448 dmar_map_gfx = 0;
9f10e5bf 449 pr_info("Disable GFX device mapping\n");
7d3b03ce 450 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 451 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 452 dmar_forcedac = 1;
5e0d2a6f 453 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 454 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 455 intel_iommu_strict = 1;
6dd9a7c7 456 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 457 pr_info("Disable supported super page\n");
6dd9a7c7 458 intel_iommu_superpage = 0;
765b6a98
LB
459 } else if (!strncmp(str, "sm_off", 6)) {
460 pr_info("Intel-IOMMU: disable scalable mode support\n");
461 intel_iommu_sm = 0;
bfd20f1c
SL
462 } else if (!strncmp(str, "tboot_noforce", 13)) {
463 printk(KERN_INFO
464 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
465 intel_iommu_tboot_noforce = 1;
ba395927
KA
466 }
467
468 str += strcspn(str, ",");
469 while (*str == ',')
470 str++;
471 }
472 return 0;
473}
474__setup("intel_iommu=", intel_iommu_setup);
475
476static struct kmem_cache *iommu_domain_cache;
477static struct kmem_cache *iommu_devinfo_cache;
ba395927 478
9452d5bf
JR
479static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
480{
8bf47816
JR
481 struct dmar_domain **domains;
482 int idx = did >> 8;
483
484 domains = iommu->domains[idx];
485 if (!domains)
486 return NULL;
487
488 return domains[did & 0xff];
9452d5bf
JR
489}
490
491static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
492 struct dmar_domain *domain)
493{
8bf47816
JR
494 struct dmar_domain **domains;
495 int idx = did >> 8;
496
497 if (!iommu->domains[idx]) {
498 size_t size = 256 * sizeof(struct dmar_domain *);
499 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
500 }
501
502 domains = iommu->domains[idx];
503 if (WARN_ON(!domains))
504 return;
505 else
506 domains[did & 0xff] = domain;
9452d5bf
JR
507}
508
9ddbfb42 509void *alloc_pgtable_page(int node)
eb3fa7cb 510{
4c923d47
SS
511 struct page *page;
512 void *vaddr = NULL;
eb3fa7cb 513
4c923d47
SS
514 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
515 if (page)
516 vaddr = page_address(page);
eb3fa7cb 517 return vaddr;
ba395927
KA
518}
519
9ddbfb42 520void free_pgtable_page(void *vaddr)
ba395927
KA
521{
522 free_page((unsigned long)vaddr);
523}
524
525static inline void *alloc_domain_mem(void)
526{
354bb65e 527 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
528}
529
38717946 530static void free_domain_mem(void *vaddr)
ba395927
KA
531{
532 kmem_cache_free(iommu_domain_cache, vaddr);
533}
534
535static inline void * alloc_devinfo_mem(void)
536{
354bb65e 537 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
538}
539
540static inline void free_devinfo_mem(void *vaddr)
541{
542 kmem_cache_free(iommu_devinfo_cache, vaddr);
543}
544
ab8dfe25
JL
545static inline int domain_type_is_vm(struct dmar_domain *domain)
546{
547 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
548}
549
28ccce0d
JR
550static inline int domain_type_is_si(struct dmar_domain *domain)
551{
552 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
553}
554
ab8dfe25
JL
555static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
556{
557 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
558 DOMAIN_FLAG_STATIC_IDENTITY);
559}
1b573683 560
162d1b10
JL
561static inline int domain_pfn_supported(struct dmar_domain *domain,
562 unsigned long pfn)
563{
564 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
565
566 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
567}
568
4ed0d3e6 569static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
570{
571 unsigned long sagaw;
572 int agaw = -1;
573
574 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 575 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
576 agaw >= 0; agaw--) {
577 if (test_bit(agaw, &sagaw))
578 break;
579 }
580
581 return agaw;
582}
583
4ed0d3e6
FY
584/*
585 * Calculate max SAGAW for each iommu.
586 */
587int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
588{
589 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
590}
591
592/*
593 * calculate agaw for each iommu.
594 * "SAGAW" may be different across iommus, use a default agaw, and
595 * get a supported less agaw for iommus that don't support the default agaw.
596 */
597int iommu_calculate_agaw(struct intel_iommu *iommu)
598{
599 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
600}
601
2c2e2c38 602/* This functionin only returns single iommu in a domain */
9ddbfb42 603struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
604{
605 int iommu_id;
606
2c2e2c38 607 /* si_domain and vm domain should not get here. */
ab8dfe25 608 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
609 for_each_domain_iommu(iommu_id, domain)
610 break;
611
8c11e798
WH
612 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
613 return NULL;
614
615 return g_iommus[iommu_id];
616}
617
8e604097
WH
618static void domain_update_iommu_coherency(struct dmar_domain *domain)
619{
d0501960
DW
620 struct dmar_drhd_unit *drhd;
621 struct intel_iommu *iommu;
2f119c78
QL
622 bool found = false;
623 int i;
2e12bc29 624
d0501960 625 domain->iommu_coherency = 1;
8e604097 626
29a27719 627 for_each_domain_iommu(i, domain) {
2f119c78 628 found = true;
8e604097
WH
629 if (!ecap_coherent(g_iommus[i]->ecap)) {
630 domain->iommu_coherency = 0;
631 break;
632 }
8e604097 633 }
d0501960
DW
634 if (found)
635 return;
636
637 /* No hardware attached; use lowest common denominator */
638 rcu_read_lock();
639 for_each_active_iommu(iommu, drhd) {
640 if (!ecap_coherent(iommu->ecap)) {
641 domain->iommu_coherency = 0;
642 break;
643 }
644 }
645 rcu_read_unlock();
8e604097
WH
646}
647
161f6934 648static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 649{
161f6934
JL
650 struct dmar_drhd_unit *drhd;
651 struct intel_iommu *iommu;
652 int ret = 1;
58c610bd 653
161f6934
JL
654 rcu_read_lock();
655 for_each_active_iommu(iommu, drhd) {
656 if (iommu != skip) {
657 if (!ecap_sc_support(iommu->ecap)) {
658 ret = 0;
659 break;
660 }
58c610bd 661 }
58c610bd 662 }
161f6934
JL
663 rcu_read_unlock();
664
665 return ret;
58c610bd
SY
666}
667
161f6934 668static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 669{
8140a95d 670 struct dmar_drhd_unit *drhd;
161f6934 671 struct intel_iommu *iommu;
8140a95d 672 int mask = 0xf;
6dd9a7c7
YS
673
674 if (!intel_iommu_superpage) {
161f6934 675 return 0;
6dd9a7c7
YS
676 }
677
8140a95d 678 /* set iommu_superpage to the smallest common denominator */
0e242612 679 rcu_read_lock();
8140a95d 680 for_each_active_iommu(iommu, drhd) {
161f6934
JL
681 if (iommu != skip) {
682 mask &= cap_super_page_val(iommu->cap);
683 if (!mask)
684 break;
6dd9a7c7
YS
685 }
686 }
0e242612
JL
687 rcu_read_unlock();
688
161f6934 689 return fls(mask);
6dd9a7c7
YS
690}
691
58c610bd
SY
692/* Some capabilities may be different across iommus */
693static void domain_update_iommu_cap(struct dmar_domain *domain)
694{
695 domain_update_iommu_coherency(domain);
161f6934
JL
696 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
697 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
698}
699
26b86092
SM
700struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
701 u8 devfn, int alloc)
03ecc32c
DW
702{
703 struct root_entry *root = &iommu->root_entry[bus];
704 struct context_entry *context;
705 u64 *entry;
706
4df4eab1 707 entry = &root->lo;
765b6a98 708 if (sm_supported(iommu)) {
03ecc32c
DW
709 if (devfn >= 0x80) {
710 devfn -= 0x80;
711 entry = &root->hi;
712 }
713 devfn *= 2;
714 }
03ecc32c
DW
715 if (*entry & 1)
716 context = phys_to_virt(*entry & VTD_PAGE_MASK);
717 else {
718 unsigned long phy_addr;
719 if (!alloc)
720 return NULL;
721
722 context = alloc_pgtable_page(iommu->node);
723 if (!context)
724 return NULL;
725
726 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
727 phy_addr = virt_to_phys((void *)context);
728 *entry = phy_addr | 1;
729 __iommu_flush_cache(iommu, entry, sizeof(*entry));
730 }
731 return &context[devfn];
732}
733
4ed6a540
DW
734static int iommu_dummy(struct device *dev)
735{
736 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
737}
738
156baca8 739static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
740{
741 struct dmar_drhd_unit *drhd = NULL;
b683b230 742 struct intel_iommu *iommu;
156baca8
DW
743 struct device *tmp;
744 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 745 u16 segment = 0;
c7151a8d
WH
746 int i;
747
4ed6a540
DW
748 if (iommu_dummy(dev))
749 return NULL;
750
156baca8 751 if (dev_is_pci(dev)) {
1c387188
AR
752 struct pci_dev *pf_pdev;
753
156baca8 754 pdev = to_pci_dev(dev);
5823e330
JD
755
756#ifdef CONFIG_X86
757 /* VMD child devices currently cannot be handled individually */
758 if (is_vmd(pdev->bus))
759 return NULL;
760#endif
761
1c387188
AR
762 /* VFs aren't listed in scope tables; we need to look up
763 * the PF instead to find the IOMMU. */
764 pf_pdev = pci_physfn(pdev);
765 dev = &pf_pdev->dev;
156baca8 766 segment = pci_domain_nr(pdev->bus);
ca5b74d2 767 } else if (has_acpi_companion(dev))
156baca8
DW
768 dev = &ACPI_COMPANION(dev)->dev;
769
0e242612 770 rcu_read_lock();
b683b230 771 for_each_active_iommu(iommu, drhd) {
156baca8 772 if (pdev && segment != drhd->segment)
276dbf99 773 continue;
c7151a8d 774
b683b230 775 for_each_active_dev_scope(drhd->devices,
156baca8
DW
776 drhd->devices_cnt, i, tmp) {
777 if (tmp == dev) {
1c387188
AR
778 /* For a VF use its original BDF# not that of the PF
779 * which we used for the IOMMU lookup. Strictly speaking
780 * we could do this for all PCI devices; we only need to
781 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 782 if (pdev && pdev->is_virtfn)
1c387188
AR
783 goto got_pdev;
784
156baca8
DW
785 *bus = drhd->devices[i].bus;
786 *devfn = drhd->devices[i].devfn;
b683b230 787 goto out;
156baca8
DW
788 }
789
790 if (!pdev || !dev_is_pci(tmp))
791 continue;
792
793 ptmp = to_pci_dev(tmp);
794 if (ptmp->subordinate &&
795 ptmp->subordinate->number <= pdev->bus->number &&
796 ptmp->subordinate->busn_res.end >= pdev->bus->number)
797 goto got_pdev;
924b6231 798 }
c7151a8d 799
156baca8
DW
800 if (pdev && drhd->include_all) {
801 got_pdev:
802 *bus = pdev->bus->number;
803 *devfn = pdev->devfn;
b683b230 804 goto out;
156baca8 805 }
c7151a8d 806 }
b683b230 807 iommu = NULL;
156baca8 808 out:
0e242612 809 rcu_read_unlock();
c7151a8d 810
b683b230 811 return iommu;
c7151a8d
WH
812}
813
5331fe6f
WH
814static void domain_flush_cache(struct dmar_domain *domain,
815 void *addr, int size)
816{
817 if (!domain->iommu_coherency)
818 clflush_cache_range(addr, size);
819}
820
ba395927
KA
821static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
822{
ba395927 823 struct context_entry *context;
03ecc32c 824 int ret = 0;
ba395927
KA
825 unsigned long flags;
826
827 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
828 context = iommu_context_addr(iommu, bus, devfn, 0);
829 if (context)
830 ret = context_present(context);
ba395927
KA
831 spin_unlock_irqrestore(&iommu->lock, flags);
832 return ret;
833}
834
ba395927
KA
835static void free_context_table(struct intel_iommu *iommu)
836{
ba395927
KA
837 int i;
838 unsigned long flags;
839 struct context_entry *context;
840
841 spin_lock_irqsave(&iommu->lock, flags);
842 if (!iommu->root_entry) {
843 goto out;
844 }
845 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 846 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
847 if (context)
848 free_pgtable_page(context);
03ecc32c 849
765b6a98 850 if (!sm_supported(iommu))
03ecc32c
DW
851 continue;
852
853 context = iommu_context_addr(iommu, i, 0x80, 0);
854 if (context)
855 free_pgtable_page(context);
856
ba395927
KA
857 }
858 free_pgtable_page(iommu->root_entry);
859 iommu->root_entry = NULL;
860out:
861 spin_unlock_irqrestore(&iommu->lock, flags);
862}
863
b026fd28 864static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 865 unsigned long pfn, int *target_level)
ba395927 866{
e083ea5b 867 struct dma_pte *parent, *pte;
ba395927 868 int level = agaw_to_level(domain->agaw);
4399c8bf 869 int offset;
ba395927
KA
870
871 BUG_ON(!domain->pgd);
f9423606 872
162d1b10 873 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
874 /* Address beyond IOMMU's addressing capabilities. */
875 return NULL;
876
ba395927
KA
877 parent = domain->pgd;
878
5cf0a76f 879 while (1) {
ba395927
KA
880 void *tmp_page;
881
b026fd28 882 offset = pfn_level_offset(pfn, level);
ba395927 883 pte = &parent[offset];
5cf0a76f 884 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 885 break;
5cf0a76f 886 if (level == *target_level)
ba395927
KA
887 break;
888
19c239ce 889 if (!dma_pte_present(pte)) {
c85994e4
DW
890 uint64_t pteval;
891
4c923d47 892 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 893
206a73c1 894 if (!tmp_page)
ba395927 895 return NULL;
206a73c1 896
c85994e4 897 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 898 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 899 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
900 /* Someone else set it while we were thinking; use theirs. */
901 free_pgtable_page(tmp_page);
effad4b5 902 else
c85994e4 903 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 904 }
5cf0a76f
DW
905 if (level == 1)
906 break;
907
19c239ce 908 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
909 level--;
910 }
911
5cf0a76f
DW
912 if (!*target_level)
913 *target_level = level;
914
ba395927
KA
915 return pte;
916}
917
6dd9a7c7 918
ba395927 919/* return address's pte at specific level */
90dcfb5e
DW
920static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
921 unsigned long pfn,
6dd9a7c7 922 int level, int *large_page)
ba395927 923{
e083ea5b 924 struct dma_pte *parent, *pte;
ba395927
KA
925 int total = agaw_to_level(domain->agaw);
926 int offset;
927
928 parent = domain->pgd;
929 while (level <= total) {
90dcfb5e 930 offset = pfn_level_offset(pfn, total);
ba395927
KA
931 pte = &parent[offset];
932 if (level == total)
933 return pte;
934
6dd9a7c7
YS
935 if (!dma_pte_present(pte)) {
936 *large_page = total;
ba395927 937 break;
6dd9a7c7
YS
938 }
939
e16922af 940 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
941 *large_page = total;
942 return pte;
943 }
944
19c239ce 945 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
946 total--;
947 }
948 return NULL;
949}
950
ba395927 951/* clear last level pte, a tlb flush should be followed */
5cf0a76f 952static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
953 unsigned long start_pfn,
954 unsigned long last_pfn)
ba395927 955{
e083ea5b 956 unsigned int large_page;
310a5ab9 957 struct dma_pte *first_pte, *pte;
66eae846 958
162d1b10
JL
959 BUG_ON(!domain_pfn_supported(domain, start_pfn));
960 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 961 BUG_ON(start_pfn > last_pfn);
ba395927 962
04b18e65 963 /* we don't need lock here; nobody else touches the iova range */
59c36286 964 do {
6dd9a7c7
YS
965 large_page = 1;
966 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 967 if (!pte) {
6dd9a7c7 968 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
969 continue;
970 }
6dd9a7c7 971 do {
310a5ab9 972 dma_clear_pte(pte);
6dd9a7c7 973 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 974 pte++;
75e6bf96
DW
975 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
976
310a5ab9
DW
977 domain_flush_cache(domain, first_pte,
978 (void *)pte - (void *)first_pte);
59c36286
DW
979
980 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
981}
982
3269ee0b 983static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
984 int retain_level, struct dma_pte *pte,
985 unsigned long pfn, unsigned long start_pfn,
986 unsigned long last_pfn)
3269ee0b
AW
987{
988 pfn = max(start_pfn, pfn);
989 pte = &pte[pfn_level_offset(pfn, level)];
990
991 do {
992 unsigned long level_pfn;
993 struct dma_pte *level_pte;
994
995 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
996 goto next;
997
f7116e11 998 level_pfn = pfn & level_mask(level);
3269ee0b
AW
999 level_pte = phys_to_virt(dma_pte_addr(pte));
1000
bc24c571
DD
1001 if (level > 2) {
1002 dma_pte_free_level(domain, level - 1, retain_level,
1003 level_pte, level_pfn, start_pfn,
1004 last_pfn);
1005 }
3269ee0b 1006
bc24c571
DD
1007 /*
1008 * Free the page table if we're below the level we want to
1009 * retain and the range covers the entire table.
1010 */
1011 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1012 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1013 dma_clear_pte(pte);
1014 domain_flush_cache(domain, pte, sizeof(*pte));
1015 free_pgtable_page(level_pte);
1016 }
1017next:
1018 pfn += level_size(level);
1019 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1020}
1021
bc24c571
DD
1022/*
1023 * clear last level (leaf) ptes and free page table pages below the
1024 * level we wish to keep intact.
1025 */
ba395927 1026static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1027 unsigned long start_pfn,
bc24c571
DD
1028 unsigned long last_pfn,
1029 int retain_level)
ba395927 1030{
162d1b10
JL
1031 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1032 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1033 BUG_ON(start_pfn > last_pfn);
ba395927 1034
d41a4adb
JL
1035 dma_pte_clear_range(domain, start_pfn, last_pfn);
1036
f3a0a52f 1037 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1038 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1039 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1040
ba395927 1041 /* free pgd */
d794dc9b 1042 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1043 free_pgtable_page(domain->pgd);
1044 domain->pgd = NULL;
1045 }
1046}
1047
ea8ea460
DW
1048/* When a page at a given level is being unlinked from its parent, we don't
1049 need to *modify* it at all. All we need to do is make a list of all the
1050 pages which can be freed just as soon as we've flushed the IOTLB and we
1051 know the hardware page-walk will no longer touch them.
1052 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1053 be freed. */
1054static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1055 int level, struct dma_pte *pte,
1056 struct page *freelist)
1057{
1058 struct page *pg;
1059
1060 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1061 pg->freelist = freelist;
1062 freelist = pg;
1063
1064 if (level == 1)
1065 return freelist;
1066
adeb2590
JL
1067 pte = page_address(pg);
1068 do {
ea8ea460
DW
1069 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1070 freelist = dma_pte_list_pagetables(domain, level - 1,
1071 pte, freelist);
adeb2590
JL
1072 pte++;
1073 } while (!first_pte_in_page(pte));
ea8ea460
DW
1074
1075 return freelist;
1076}
1077
1078static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1079 struct dma_pte *pte, unsigned long pfn,
1080 unsigned long start_pfn,
1081 unsigned long last_pfn,
1082 struct page *freelist)
1083{
1084 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1085
1086 pfn = max(start_pfn, pfn);
1087 pte = &pte[pfn_level_offset(pfn, level)];
1088
1089 do {
1090 unsigned long level_pfn;
1091
1092 if (!dma_pte_present(pte))
1093 goto next;
1094
1095 level_pfn = pfn & level_mask(level);
1096
1097 /* If range covers entire pagetable, free it */
1098 if (start_pfn <= level_pfn &&
1099 last_pfn >= level_pfn + level_size(level) - 1) {
1100 /* These suborbinate page tables are going away entirely. Don't
1101 bother to clear them; we're just going to *free* them. */
1102 if (level > 1 && !dma_pte_superpage(pte))
1103 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1104
1105 dma_clear_pte(pte);
1106 if (!first_pte)
1107 first_pte = pte;
1108 last_pte = pte;
1109 } else if (level > 1) {
1110 /* Recurse down into a level that isn't *entirely* obsolete */
1111 freelist = dma_pte_clear_level(domain, level - 1,
1112 phys_to_virt(dma_pte_addr(pte)),
1113 level_pfn, start_pfn, last_pfn,
1114 freelist);
1115 }
1116next:
1117 pfn += level_size(level);
1118 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1119
1120 if (first_pte)
1121 domain_flush_cache(domain, first_pte,
1122 (void *)++last_pte - (void *)first_pte);
1123
1124 return freelist;
1125}
1126
1127/* We can't just free the pages because the IOMMU may still be walking
1128 the page tables, and may have cached the intermediate levels. The
1129 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1130static struct page *domain_unmap(struct dmar_domain *domain,
1131 unsigned long start_pfn,
1132 unsigned long last_pfn)
ea8ea460 1133{
e083ea5b 1134 struct page *freelist;
ea8ea460 1135
162d1b10
JL
1136 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1137 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1138 BUG_ON(start_pfn > last_pfn);
1139
1140 /* we don't need lock here; nobody else touches the iova range */
1141 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1142 domain->pgd, 0, start_pfn, last_pfn, NULL);
1143
1144 /* free pgd */
1145 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1146 struct page *pgd_page = virt_to_page(domain->pgd);
1147 pgd_page->freelist = freelist;
1148 freelist = pgd_page;
1149
1150 domain->pgd = NULL;
1151 }
1152
1153 return freelist;
1154}
1155
b690420a 1156static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1157{
1158 struct page *pg;
1159
1160 while ((pg = freelist)) {
1161 freelist = pg->freelist;
1162 free_pgtable_page(page_address(pg));
1163 }
1164}
1165
13cf0174
JR
1166static void iova_entry_free(unsigned long data)
1167{
1168 struct page *freelist = (struct page *)data;
1169
1170 dma_free_pagelist(freelist);
1171}
1172
ba395927
KA
1173/* iommu handling */
1174static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1175{
1176 struct root_entry *root;
1177 unsigned long flags;
1178
4c923d47 1179 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1180 if (!root) {
9f10e5bf 1181 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1182 iommu->name);
ba395927 1183 return -ENOMEM;
ffebeb46 1184 }
ba395927 1185
5b6985ce 1186 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1187
1188 spin_lock_irqsave(&iommu->lock, flags);
1189 iommu->root_entry = root;
1190 spin_unlock_irqrestore(&iommu->lock, flags);
1191
1192 return 0;
1193}
1194
ba395927
KA
1195static void iommu_set_root_entry(struct intel_iommu *iommu)
1196{
03ecc32c 1197 u64 addr;
c416daa9 1198 u32 sts;
ba395927
KA
1199 unsigned long flag;
1200
03ecc32c 1201 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1202 if (sm_supported(iommu))
1203 addr |= DMA_RTADDR_SMT;
ba395927 1204
1f5b3c3f 1205 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1206 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1207
c416daa9 1208 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1209
1210 /* Make sure hardware complete it */
1211 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1212 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1213
1f5b3c3f 1214 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1215}
1216
6f7db75e 1217void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1218{
1219 u32 val;
1220 unsigned long flag;
1221
9af88143 1222 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1223 return;
ba395927 1224
1f5b3c3f 1225 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1226 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1227
1228 /* Make sure hardware complete it */
1229 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1230 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1231
1f5b3c3f 1232 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1233}
1234
1235/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1236static void __iommu_flush_context(struct intel_iommu *iommu,
1237 u16 did, u16 source_id, u8 function_mask,
1238 u64 type)
ba395927
KA
1239{
1240 u64 val = 0;
1241 unsigned long flag;
1242
ba395927
KA
1243 switch (type) {
1244 case DMA_CCMD_GLOBAL_INVL:
1245 val = DMA_CCMD_GLOBAL_INVL;
1246 break;
1247 case DMA_CCMD_DOMAIN_INVL:
1248 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1249 break;
1250 case DMA_CCMD_DEVICE_INVL:
1251 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1252 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1253 break;
1254 default:
1255 BUG();
1256 }
1257 val |= DMA_CCMD_ICC;
1258
1f5b3c3f 1259 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1260 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1261
1262 /* Make sure hardware complete it */
1263 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1264 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1265
1f5b3c3f 1266 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1267}
1268
ba395927 1269/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1270static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1271 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1272{
1273 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1274 u64 val = 0, val_iva = 0;
1275 unsigned long flag;
1276
ba395927
KA
1277 switch (type) {
1278 case DMA_TLB_GLOBAL_FLUSH:
1279 /* global flush doesn't need set IVA_REG */
1280 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1281 break;
1282 case DMA_TLB_DSI_FLUSH:
1283 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1284 break;
1285 case DMA_TLB_PSI_FLUSH:
1286 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1287 /* IH bit is passed in as part of address */
ba395927
KA
1288 val_iva = size_order | addr;
1289 break;
1290 default:
1291 BUG();
1292 }
1293 /* Note: set drain read/write */
1294#if 0
1295 /*
1296 * This is probably to be super secure.. Looks like we can
1297 * ignore it without any impact.
1298 */
1299 if (cap_read_drain(iommu->cap))
1300 val |= DMA_TLB_READ_DRAIN;
1301#endif
1302 if (cap_write_drain(iommu->cap))
1303 val |= DMA_TLB_WRITE_DRAIN;
1304
1f5b3c3f 1305 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1306 /* Note: Only uses first TLB reg currently */
1307 if (val_iva)
1308 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1309 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1310
1311 /* Make sure hardware complete it */
1312 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1313 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1314
1f5b3c3f 1315 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1316
1317 /* check IOTLB invalidation granularity */
1318 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1319 pr_err("Flush IOTLB failed\n");
ba395927 1320 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1321 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1322 (unsigned long long)DMA_TLB_IIRG(type),
1323 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1324}
1325
64ae892b
DW
1326static struct device_domain_info *
1327iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1328 u8 bus, u8 devfn)
93a23a72 1329{
93a23a72 1330 struct device_domain_info *info;
93a23a72 1331
55d94043
JR
1332 assert_spin_locked(&device_domain_lock);
1333
93a23a72
YZ
1334 if (!iommu->qi)
1335 return NULL;
1336
93a23a72 1337 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1338 if (info->iommu == iommu && info->bus == bus &&
1339 info->devfn == devfn) {
b16d0cb9
DW
1340 if (info->ats_supported && info->dev)
1341 return info;
93a23a72
YZ
1342 break;
1343 }
93a23a72 1344
b16d0cb9 1345 return NULL;
93a23a72
YZ
1346}
1347
0824c592
OP
1348static void domain_update_iotlb(struct dmar_domain *domain)
1349{
1350 struct device_domain_info *info;
1351 bool has_iotlb_device = false;
1352
1353 assert_spin_locked(&device_domain_lock);
1354
1355 list_for_each_entry(info, &domain->devices, link) {
1356 struct pci_dev *pdev;
1357
1358 if (!info->dev || !dev_is_pci(info->dev))
1359 continue;
1360
1361 pdev = to_pci_dev(info->dev);
1362 if (pdev->ats_enabled) {
1363 has_iotlb_device = true;
1364 break;
1365 }
1366 }
1367
1368 domain->has_iotlb_device = has_iotlb_device;
1369}
1370
93a23a72 1371static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1372{
fb0cc3aa
BH
1373 struct pci_dev *pdev;
1374
0824c592
OP
1375 assert_spin_locked(&device_domain_lock);
1376
0bcb3e28 1377 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1378 return;
1379
fb0cc3aa 1380 pdev = to_pci_dev(info->dev);
1c48db44
JP
1381 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1382 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1383 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1384 * reserved, which should be set to 0.
1385 */
1386 if (!ecap_dit(info->iommu->ecap))
1387 info->pfsid = 0;
1388 else {
1389 struct pci_dev *pf_pdev;
1390
1391 /* pdev will be returned if device is not a vf */
1392 pf_pdev = pci_physfn(pdev);
1393 info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
1394 }
fb0cc3aa 1395
b16d0cb9
DW
1396#ifdef CONFIG_INTEL_IOMMU_SVM
1397 /* The PCIe spec, in its wisdom, declares that the behaviour of
1398 the device if you enable PASID support after ATS support is
1399 undefined. So always enable PASID support on devices which
1400 have it, even if we can't yet know if we're ever going to
1401 use it. */
1402 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1403 info->pasid_enabled = 1;
1404
1b84778a
KS
1405 if (info->pri_supported &&
1406 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1407 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1408 info->pri_enabled = 1;
1409#endif
fb58fdcd 1410 if (!pdev->untrusted && info->ats_supported &&
61363c14 1411 pci_ats_page_aligned(pdev) &&
fb58fdcd 1412 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1413 info->ats_enabled = 1;
0824c592 1414 domain_update_iotlb(info->domain);
b16d0cb9
DW
1415 info->ats_qdep = pci_ats_queue_depth(pdev);
1416 }
93a23a72
YZ
1417}
1418
1419static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1420{
b16d0cb9
DW
1421 struct pci_dev *pdev;
1422
0824c592
OP
1423 assert_spin_locked(&device_domain_lock);
1424
da972fb1 1425 if (!dev_is_pci(info->dev))
93a23a72
YZ
1426 return;
1427
b16d0cb9
DW
1428 pdev = to_pci_dev(info->dev);
1429
1430 if (info->ats_enabled) {
1431 pci_disable_ats(pdev);
1432 info->ats_enabled = 0;
0824c592 1433 domain_update_iotlb(info->domain);
b16d0cb9
DW
1434 }
1435#ifdef CONFIG_INTEL_IOMMU_SVM
1436 if (info->pri_enabled) {
1437 pci_disable_pri(pdev);
1438 info->pri_enabled = 0;
1439 }
1440 if (info->pasid_enabled) {
1441 pci_disable_pasid(pdev);
1442 info->pasid_enabled = 0;
1443 }
1444#endif
93a23a72
YZ
1445}
1446
1447static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1448 u64 addr, unsigned mask)
1449{
1450 u16 sid, qdep;
1451 unsigned long flags;
1452 struct device_domain_info *info;
1453
0824c592
OP
1454 if (!domain->has_iotlb_device)
1455 return;
1456
93a23a72
YZ
1457 spin_lock_irqsave(&device_domain_lock, flags);
1458 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1459 if (!info->ats_enabled)
93a23a72
YZ
1460 continue;
1461
1462 sid = info->bus << 8 | info->devfn;
b16d0cb9 1463 qdep = info->ats_qdep;
1c48db44
JP
1464 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1465 qdep, addr, mask);
93a23a72
YZ
1466 }
1467 spin_unlock_irqrestore(&device_domain_lock, flags);
1468}
1469
a1ddcbe9
JR
1470static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1471 struct dmar_domain *domain,
1472 unsigned long pfn, unsigned int pages,
1473 int ih, int map)
ba395927 1474{
9dd2fe89 1475 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1476 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1477 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1478
ba395927
KA
1479 BUG_ON(pages == 0);
1480
ea8ea460
DW
1481 if (ih)
1482 ih = 1 << 6;
ba395927 1483 /*
9dd2fe89
YZ
1484 * Fallback to domain selective flush if no PSI support or the size is
1485 * too big.
ba395927
KA
1486 * PSI requires page size to be 2 ^ x, and the base address is naturally
1487 * aligned to the size
1488 */
9dd2fe89
YZ
1489 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1490 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1491 DMA_TLB_DSI_FLUSH);
9dd2fe89 1492 else
ea8ea460 1493 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1494 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1495
1496 /*
82653633
NA
1497 * In caching mode, changes of pages from non-present to present require
1498 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1499 */
82653633 1500 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1501 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1502}
1503
eed91a0b
PX
1504/* Notification for newly created mappings */
1505static inline void __mapping_notify_one(struct intel_iommu *iommu,
1506 struct dmar_domain *domain,
1507 unsigned long pfn, unsigned int pages)
1508{
1509 /* It's a non-present to present mapping. Only flush if caching mode */
1510 if (cap_caching_mode(iommu->cap))
1511 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1512 else
1513 iommu_flush_write_buffer(iommu);
1514}
1515
13cf0174
JR
1516static void iommu_flush_iova(struct iova_domain *iovad)
1517{
1518 struct dmar_domain *domain;
1519 int idx;
1520
1521 domain = container_of(iovad, struct dmar_domain, iovad);
1522
1523 for_each_domain_iommu(idx, domain) {
1524 struct intel_iommu *iommu = g_iommus[idx];
1525 u16 did = domain->iommu_did[iommu->seq_id];
1526
1527 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1528
1529 if (!cap_caching_mode(iommu->cap))
1530 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1531 0, MAX_AGAW_PFN_WIDTH);
1532 }
1533}
1534
f8bab735 1535static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1536{
1537 u32 pmen;
1538 unsigned long flags;
1539
1f5b3c3f 1540 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1541 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1542 pmen &= ~DMA_PMEN_EPM;
1543 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1544
1545 /* wait for the protected region status bit to clear */
1546 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1547 readl, !(pmen & DMA_PMEN_PRS), pmen);
1548
1f5b3c3f 1549 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1550}
1551
2a41ccee 1552static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1553{
1554 u32 sts;
1555 unsigned long flags;
1556
1f5b3c3f 1557 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1558 iommu->gcmd |= DMA_GCMD_TE;
1559 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1560
1561 /* Make sure hardware complete it */
1562 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1563 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1564
1f5b3c3f 1565 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1566}
1567
2a41ccee 1568static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1569{
1570 u32 sts;
1571 unsigned long flag;
1572
1f5b3c3f 1573 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1574 iommu->gcmd &= ~DMA_GCMD_TE;
1575 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1576
1577 /* Make sure hardware complete it */
1578 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1579 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1580
1f5b3c3f 1581 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1582}
1583
3460a6d9 1584
ba395927
KA
1585static int iommu_init_domains(struct intel_iommu *iommu)
1586{
8bf47816
JR
1587 u32 ndomains, nlongs;
1588 size_t size;
ba395927
KA
1589
1590 ndomains = cap_ndoms(iommu->cap);
8bf47816 1591 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1592 iommu->name, ndomains);
ba395927
KA
1593 nlongs = BITS_TO_LONGS(ndomains);
1594
94a91b50
DD
1595 spin_lock_init(&iommu->lock);
1596
ba395927
KA
1597 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1598 if (!iommu->domain_ids) {
9f10e5bf
JR
1599 pr_err("%s: Allocating domain id array failed\n",
1600 iommu->name);
ba395927
KA
1601 return -ENOMEM;
1602 }
8bf47816 1603
86f004c7 1604 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1605 iommu->domains = kzalloc(size, GFP_KERNEL);
1606
1607 if (iommu->domains) {
1608 size = 256 * sizeof(struct dmar_domain *);
1609 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1610 }
1611
1612 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1613 pr_err("%s: Allocating domain array failed\n",
1614 iommu->name);
852bdb04 1615 kfree(iommu->domain_ids);
8bf47816 1616 kfree(iommu->domains);
852bdb04 1617 iommu->domain_ids = NULL;
8bf47816 1618 iommu->domains = NULL;
ba395927
KA
1619 return -ENOMEM;
1620 }
1621
8bf47816
JR
1622
1623
ba395927 1624 /*
c0e8a6c8
JR
1625 * If Caching mode is set, then invalid translations are tagged
1626 * with domain-id 0, hence we need to pre-allocate it. We also
1627 * use domain-id 0 as a marker for non-allocated domain-id, so
1628 * make sure it is not used for a real domain.
ba395927 1629 */
c0e8a6c8
JR
1630 set_bit(0, iommu->domain_ids);
1631
3b33d4ab
LB
1632 /*
1633 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1634 * entry for first-level or pass-through translation modes should
1635 * be programmed with a domain id different from those used for
1636 * second-level or nested translation. We reserve a domain id for
1637 * this purpose.
1638 */
1639 if (sm_supported(iommu))
1640 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1641
ba395927
KA
1642 return 0;
1643}
ba395927 1644
ffebeb46 1645static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1646{
29a27719 1647 struct device_domain_info *info, *tmp;
55d94043 1648 unsigned long flags;
ba395927 1649
29a27719
JR
1650 if (!iommu->domains || !iommu->domain_ids)
1651 return;
a4eaa86c 1652
bea64033 1653again:
55d94043 1654 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1655 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1656 struct dmar_domain *domain;
1657
1658 if (info->iommu != iommu)
1659 continue;
1660
1661 if (!info->dev || !info->domain)
1662 continue;
1663
1664 domain = info->domain;
1665
bea64033 1666 __dmar_remove_one_dev_info(info);
29a27719 1667
bea64033
JR
1668 if (!domain_type_is_vm_or_si(domain)) {
1669 /*
1670 * The domain_exit() function can't be called under
1671 * device_domain_lock, as it takes this lock itself.
1672 * So release the lock here and re-run the loop
1673 * afterwards.
1674 */
1675 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1676 domain_exit(domain);
bea64033
JR
1677 goto again;
1678 }
ba395927 1679 }
55d94043 1680 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1681
1682 if (iommu->gcmd & DMA_GCMD_TE)
1683 iommu_disable_translation(iommu);
ffebeb46 1684}
ba395927 1685
ffebeb46
JL
1686static void free_dmar_iommu(struct intel_iommu *iommu)
1687{
1688 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1689 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1690 int i;
1691
1692 for (i = 0; i < elems; i++)
1693 kfree(iommu->domains[i]);
ffebeb46
JL
1694 kfree(iommu->domains);
1695 kfree(iommu->domain_ids);
1696 iommu->domains = NULL;
1697 iommu->domain_ids = NULL;
1698 }
ba395927 1699
d9630fe9
WH
1700 g_iommus[iommu->seq_id] = NULL;
1701
ba395927
KA
1702 /* free context mapping */
1703 free_context_table(iommu);
8a94ade4
DW
1704
1705#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1706 if (pasid_supported(iommu)) {
a222a7f0
DW
1707 if (ecap_prs(iommu->ecap))
1708 intel_svm_finish_prq(iommu);
a222a7f0 1709 }
8a94ade4 1710#endif
ba395927
KA
1711}
1712
ab8dfe25 1713static struct dmar_domain *alloc_domain(int flags)
ba395927 1714{
ba395927 1715 struct dmar_domain *domain;
ba395927
KA
1716
1717 domain = alloc_domain_mem();
1718 if (!domain)
1719 return NULL;
1720
ab8dfe25 1721 memset(domain, 0, sizeof(*domain));
4c923d47 1722 domain->nid = -1;
ab8dfe25 1723 domain->flags = flags;
0824c592 1724 domain->has_iotlb_device = false;
92d03cc8 1725 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1726
1727 return domain;
1728}
1729
d160aca5
JR
1730/* Must be called with iommu->lock */
1731static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1732 struct intel_iommu *iommu)
1733{
44bde614 1734 unsigned long ndomains;
55d94043 1735 int num;
44bde614 1736
55d94043 1737 assert_spin_locked(&device_domain_lock);
d160aca5 1738 assert_spin_locked(&iommu->lock);
ba395927 1739
29a27719
JR
1740 domain->iommu_refcnt[iommu->seq_id] += 1;
1741 domain->iommu_count += 1;
1742 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1743 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1744 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1745
1746 if (num >= ndomains) {
1747 pr_err("%s: No free domain ids\n", iommu->name);
1748 domain->iommu_refcnt[iommu->seq_id] -= 1;
1749 domain->iommu_count -= 1;
55d94043 1750 return -ENOSPC;
2c2e2c38 1751 }
ba395927 1752
d160aca5
JR
1753 set_bit(num, iommu->domain_ids);
1754 set_iommu_domain(iommu, num, domain);
1755
1756 domain->iommu_did[iommu->seq_id] = num;
1757 domain->nid = iommu->node;
fb170fb4 1758
fb170fb4
JL
1759 domain_update_iommu_cap(domain);
1760 }
d160aca5 1761
55d94043 1762 return 0;
fb170fb4
JL
1763}
1764
1765static int domain_detach_iommu(struct dmar_domain *domain,
1766 struct intel_iommu *iommu)
1767{
e083ea5b 1768 int num, count;
d160aca5 1769
55d94043 1770 assert_spin_locked(&device_domain_lock);
d160aca5 1771 assert_spin_locked(&iommu->lock);
fb170fb4 1772
29a27719
JR
1773 domain->iommu_refcnt[iommu->seq_id] -= 1;
1774 count = --domain->iommu_count;
1775 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1776 num = domain->iommu_did[iommu->seq_id];
1777 clear_bit(num, iommu->domain_ids);
1778 set_iommu_domain(iommu, num, NULL);
fb170fb4 1779
fb170fb4 1780 domain_update_iommu_cap(domain);
c0e8a6c8 1781 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1782 }
fb170fb4
JL
1783
1784 return count;
1785}
1786
ba395927 1787static struct iova_domain reserved_iova_list;
8a443df4 1788static struct lock_class_key reserved_rbtree_key;
ba395927 1789
51a63e67 1790static int dmar_init_reserved_ranges(void)
ba395927
KA
1791{
1792 struct pci_dev *pdev = NULL;
1793 struct iova *iova;
1794 int i;
ba395927 1795
aa3ac946 1796 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1797
8a443df4
MG
1798 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1799 &reserved_rbtree_key);
1800
ba395927
KA
1801 /* IOAPIC ranges shouldn't be accessed by DMA */
1802 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1803 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1804 if (!iova) {
9f10e5bf 1805 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1806 return -ENODEV;
1807 }
ba395927
KA
1808
1809 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1810 for_each_pci_dev(pdev) {
1811 struct resource *r;
1812
1813 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1814 r = &pdev->resource[i];
1815 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1816 continue;
1a4a4551
DW
1817 iova = reserve_iova(&reserved_iova_list,
1818 IOVA_PFN(r->start),
1819 IOVA_PFN(r->end));
51a63e67 1820 if (!iova) {
932a6523 1821 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1822 return -ENODEV;
1823 }
ba395927
KA
1824 }
1825 }
51a63e67 1826 return 0;
ba395927
KA
1827}
1828
1829static void domain_reserve_special_ranges(struct dmar_domain *domain)
1830{
1831 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1832}
1833
1834static inline int guestwidth_to_adjustwidth(int gaw)
1835{
1836 int agaw;
1837 int r = (gaw - 12) % 9;
1838
1839 if (r == 0)
1840 agaw = gaw;
1841 else
1842 agaw = gaw + 9 - r;
1843 if (agaw > 64)
1844 agaw = 64;
1845 return agaw;
1846}
1847
dc534b25
JR
1848static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1849 int guest_width)
ba395927 1850{
ba395927
KA
1851 int adjust_width, agaw;
1852 unsigned long sagaw;
13cf0174 1853 int err;
ba395927 1854
aa3ac946 1855 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1856
1857 err = init_iova_flush_queue(&domain->iovad,
1858 iommu_flush_iova, iova_entry_free);
1859 if (err)
1860 return err;
1861
ba395927
KA
1862 domain_reserve_special_ranges(domain);
1863
1864 /* calculate AGAW */
ba395927
KA
1865 if (guest_width > cap_mgaw(iommu->cap))
1866 guest_width = cap_mgaw(iommu->cap);
1867 domain->gaw = guest_width;
1868 adjust_width = guestwidth_to_adjustwidth(guest_width);
1869 agaw = width_to_agaw(adjust_width);
1870 sagaw = cap_sagaw(iommu->cap);
1871 if (!test_bit(agaw, &sagaw)) {
1872 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1873 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1874 agaw = find_next_bit(&sagaw, 5, agaw);
1875 if (agaw >= 5)
1876 return -ENODEV;
1877 }
1878 domain->agaw = agaw;
ba395927 1879
8e604097
WH
1880 if (ecap_coherent(iommu->ecap))
1881 domain->iommu_coherency = 1;
1882 else
1883 domain->iommu_coherency = 0;
1884
58c610bd
SY
1885 if (ecap_sc_support(iommu->ecap))
1886 domain->iommu_snooping = 1;
1887 else
1888 domain->iommu_snooping = 0;
1889
214e39aa
DW
1890 if (intel_iommu_superpage)
1891 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1892 else
1893 domain->iommu_superpage = 0;
1894
4c923d47 1895 domain->nid = iommu->node;
c7151a8d 1896
ba395927 1897 /* always allocate the top pgd */
4c923d47 1898 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1899 if (!domain->pgd)
1900 return -ENOMEM;
5b6985ce 1901 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1902 return 0;
1903}
1904
1905static void domain_exit(struct dmar_domain *domain)
1906{
e083ea5b 1907 struct page *freelist;
ba395927 1908
d160aca5
JR
1909 /* Remove associated devices and clear attached or cached domains */
1910 rcu_read_lock();
ba395927 1911 domain_remove_dev_info(domain);
d160aca5 1912 rcu_read_unlock();
92d03cc8 1913
ba395927
KA
1914 /* destroy iovas */
1915 put_iova_domain(&domain->iovad);
ba395927 1916
ea8ea460 1917 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1918
ea8ea460
DW
1919 dma_free_pagelist(freelist);
1920
ba395927
KA
1921 free_domain_mem(domain);
1922}
1923
7373a8cc
LB
1924/*
1925 * Get the PASID directory size for scalable mode context entry.
1926 * Value of X in the PDTS field of a scalable mode context entry
1927 * indicates PASID directory with 2^(X + 7) entries.
1928 */
1929static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1930{
1931 int pds, max_pde;
1932
1933 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1934 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1935 if (pds < 7)
1936 return 0;
1937
1938 return pds - 7;
1939}
1940
1941/*
1942 * Set the RID_PASID field of a scalable mode context entry. The
1943 * IOMMU hardware will use the PASID value set in this field for
1944 * DMA translations of DMA requests without PASID.
1945 */
1946static inline void
1947context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1948{
1949 context->hi |= pasid & ((1 << 20) - 1);
1950 context->hi |= (1 << 20);
1951}
1952
1953/*
1954 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1955 * entry.
1956 */
1957static inline void context_set_sm_dte(struct context_entry *context)
1958{
1959 context->lo |= (1 << 2);
1960}
1961
1962/*
1963 * Set the PRE(Page Request Enable) field of a scalable mode context
1964 * entry.
1965 */
1966static inline void context_set_sm_pre(struct context_entry *context)
1967{
1968 context->lo |= (1 << 4);
1969}
1970
1971/* Convert value to context PASID directory size field coding. */
1972#define context_pdts(pds) (((pds) & 0x7) << 9)
1973
64ae892b
DW
1974static int domain_context_mapping_one(struct dmar_domain *domain,
1975 struct intel_iommu *iommu,
ca6e322d 1976 struct pasid_table *table,
28ccce0d 1977 u8 bus, u8 devfn)
ba395927 1978{
c6c2cebd 1979 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1980 int translation = CONTEXT_TT_MULTI_LEVEL;
1981 struct device_domain_info *info = NULL;
ba395927 1982 struct context_entry *context;
ba395927 1983 unsigned long flags;
7373a8cc 1984 int ret;
28ccce0d 1985
c6c2cebd
JR
1986 WARN_ON(did == 0);
1987
28ccce0d
JR
1988 if (hw_pass_through && domain_type_is_si(domain))
1989 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1990
1991 pr_debug("Set context mapping for %02x:%02x.%d\n",
1992 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1993
ba395927 1994 BUG_ON(!domain->pgd);
5331fe6f 1995
55d94043
JR
1996 spin_lock_irqsave(&device_domain_lock, flags);
1997 spin_lock(&iommu->lock);
1998
1999 ret = -ENOMEM;
03ecc32c 2000 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2001 if (!context)
55d94043 2002 goto out_unlock;
ba395927 2003
55d94043
JR
2004 ret = 0;
2005 if (context_present(context))
2006 goto out_unlock;
cf484d0e 2007
aec0e861
XP
2008 /*
2009 * For kdump cases, old valid entries may be cached due to the
2010 * in-flight DMA and copied pgtable, but there is no unmapping
2011 * behaviour for them, thus we need an explicit cache flush for
2012 * the newly-mapped device. For kdump, at this point, the device
2013 * is supposed to finish reset at its driver probe stage, so no
2014 * in-flight DMA will exist, and we don't need to worry anymore
2015 * hereafter.
2016 */
2017 if (context_copied(context)) {
2018 u16 did_old = context_domain_id(context);
2019
b117e038 2020 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2021 iommu->flush.flush_context(iommu, did_old,
2022 (((u16)bus) << 8) | devfn,
2023 DMA_CCMD_MASK_NOBIT,
2024 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2025 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2026 DMA_TLB_DSI_FLUSH);
2027 }
aec0e861
XP
2028 }
2029
de24e553 2030 context_clear_entry(context);
ea6606b0 2031
7373a8cc
LB
2032 if (sm_supported(iommu)) {
2033 unsigned long pds;
4ed0d3e6 2034
7373a8cc
LB
2035 WARN_ON(!table);
2036
2037 /* Setup the PASID DIR pointer: */
2038 pds = context_get_sm_pds(table);
2039 context->lo = (u64)virt_to_phys(table->table) |
2040 context_pdts(pds);
2041
2042 /* Setup the RID_PASID field: */
2043 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2044
de24e553 2045 /*
7373a8cc
LB
2046 * Setup the Device-TLB enable bit and Page request
2047 * Enable bit:
de24e553 2048 */
7373a8cc
LB
2049 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2050 if (info && info->ats_supported)
2051 context_set_sm_dte(context);
2052 if (info && info->pri_supported)
2053 context_set_sm_pre(context);
2054 } else {
2055 struct dma_pte *pgd = domain->pgd;
2056 int agaw;
2057
2058 context_set_domain_id(context, did);
2059 context_set_translation_type(context, translation);
2060
2061 if (translation != CONTEXT_TT_PASS_THROUGH) {
2062 /*
2063 * Skip top levels of page tables for iommu which has
2064 * less agaw than default. Unnecessary for PT mode.
2065 */
2066 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2067 ret = -ENOMEM;
2068 pgd = phys_to_virt(dma_pte_addr(pgd));
2069 if (!dma_pte_present(pgd))
2070 goto out_unlock;
2071 }
2072
2073 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2074 if (info && info->ats_supported)
2075 translation = CONTEXT_TT_DEV_IOTLB;
2076 else
2077 translation = CONTEXT_TT_MULTI_LEVEL;
2078
2079 context_set_address_root(context, virt_to_phys(pgd));
2080 context_set_address_width(context, agaw);
2081 } else {
2082 /*
2083 * In pass through mode, AW must be programmed to
2084 * indicate the largest AGAW value supported by
2085 * hardware. And ASR is ignored by hardware.
2086 */
2087 context_set_address_width(context, iommu->msagaw);
2088 }
93a23a72 2089 }
4ed0d3e6 2090
c07e7d21
MM
2091 context_set_fault_enable(context);
2092 context_set_present(context);
5331fe6f 2093 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2094
4c25a2c1
DW
2095 /*
2096 * It's a non-present to present mapping. If hardware doesn't cache
2097 * non-present entry we only need to flush the write-buffer. If the
2098 * _does_ cache non-present entries, then it does so in the special
2099 * domain #0, which we have to flush:
2100 */
2101 if (cap_caching_mode(iommu->cap)) {
2102 iommu->flush.flush_context(iommu, 0,
2103 (((u16)bus) << 8) | devfn,
2104 DMA_CCMD_MASK_NOBIT,
2105 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2106 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2107 } else {
ba395927 2108 iommu_flush_write_buffer(iommu);
4c25a2c1 2109 }
93a23a72 2110 iommu_enable_dev_iotlb(info);
c7151a8d 2111
55d94043
JR
2112 ret = 0;
2113
2114out_unlock:
2115 spin_unlock(&iommu->lock);
2116 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2117
5c365d18 2118 return ret;
ba395927
KA
2119}
2120
579305f7
AW
2121struct domain_context_mapping_data {
2122 struct dmar_domain *domain;
2123 struct intel_iommu *iommu;
ca6e322d 2124 struct pasid_table *table;
579305f7
AW
2125};
2126
2127static int domain_context_mapping_cb(struct pci_dev *pdev,
2128 u16 alias, void *opaque)
2129{
2130 struct domain_context_mapping_data *data = opaque;
2131
2132 return domain_context_mapping_one(data->domain, data->iommu,
ca6e322d
LB
2133 data->table, PCI_BUS_NUM(alias),
2134 alias & 0xff);
579305f7
AW
2135}
2136
ba395927 2137static int
28ccce0d 2138domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2139{
ca6e322d
LB
2140 struct domain_context_mapping_data data;
2141 struct pasid_table *table;
64ae892b 2142 struct intel_iommu *iommu;
156baca8 2143 u8 bus, devfn;
64ae892b 2144
e1f167f3 2145 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2146 if (!iommu)
2147 return -ENODEV;
ba395927 2148
ca6e322d
LB
2149 table = intel_pasid_get_table(dev);
2150
579305f7 2151 if (!dev_is_pci(dev))
ca6e322d
LB
2152 return domain_context_mapping_one(domain, iommu, table,
2153 bus, devfn);
579305f7
AW
2154
2155 data.domain = domain;
2156 data.iommu = iommu;
ca6e322d 2157 data.table = table;
579305f7
AW
2158
2159 return pci_for_each_dma_alias(to_pci_dev(dev),
2160 &domain_context_mapping_cb, &data);
2161}
2162
2163static int domain_context_mapped_cb(struct pci_dev *pdev,
2164 u16 alias, void *opaque)
2165{
2166 struct intel_iommu *iommu = opaque;
2167
2168 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2169}
2170
e1f167f3 2171static int domain_context_mapped(struct device *dev)
ba395927 2172{
5331fe6f 2173 struct intel_iommu *iommu;
156baca8 2174 u8 bus, devfn;
5331fe6f 2175
e1f167f3 2176 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2177 if (!iommu)
2178 return -ENODEV;
ba395927 2179
579305f7
AW
2180 if (!dev_is_pci(dev))
2181 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2182
579305f7
AW
2183 return !pci_for_each_dma_alias(to_pci_dev(dev),
2184 domain_context_mapped_cb, iommu);
ba395927
KA
2185}
2186
f532959b
FY
2187/* Returns a number of VTD pages, but aligned to MM page size */
2188static inline unsigned long aligned_nrpages(unsigned long host_addr,
2189 size_t size)
2190{
2191 host_addr &= ~PAGE_MASK;
2192 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2193}
2194
6dd9a7c7
YS
2195/* Return largest possible superpage level for a given mapping */
2196static inline int hardware_largepage_caps(struct dmar_domain *domain,
2197 unsigned long iov_pfn,
2198 unsigned long phy_pfn,
2199 unsigned long pages)
2200{
2201 int support, level = 1;
2202 unsigned long pfnmerge;
2203
2204 support = domain->iommu_superpage;
2205
2206 /* To use a large page, the virtual *and* physical addresses
2207 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2208 of them will mean we have to use smaller pages. So just
2209 merge them and check both at once. */
2210 pfnmerge = iov_pfn | phy_pfn;
2211
2212 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2213 pages >>= VTD_STRIDE_SHIFT;
2214 if (!pages)
2215 break;
2216 pfnmerge >>= VTD_STRIDE_SHIFT;
2217 level++;
2218 support--;
2219 }
2220 return level;
2221}
2222
9051aa02
DW
2223static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2224 struct scatterlist *sg, unsigned long phys_pfn,
2225 unsigned long nr_pages, int prot)
e1605495
DW
2226{
2227 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2228 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2229 unsigned long sg_res = 0;
6dd9a7c7
YS
2230 unsigned int largepage_lvl = 0;
2231 unsigned long lvl_pages = 0;
e1605495 2232
162d1b10 2233 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2234
2235 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2236 return -EINVAL;
2237
2238 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2239
cc4f14aa
JL
2240 if (!sg) {
2241 sg_res = nr_pages;
9051aa02
DW
2242 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2243 }
2244
6dd9a7c7 2245 while (nr_pages > 0) {
c85994e4
DW
2246 uint64_t tmp;
2247
e1605495 2248 if (!sg_res) {
29a90b70
RM
2249 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2250
f532959b 2251 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2252 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2253 sg->dma_length = sg->length;
29a90b70 2254 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2255 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2256 }
6dd9a7c7 2257
e1605495 2258 if (!pte) {
6dd9a7c7
YS
2259 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2260
5cf0a76f 2261 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2262 if (!pte)
2263 return -ENOMEM;
6dd9a7c7 2264 /* It is large page*/
6491d4d0 2265 if (largepage_lvl > 1) {
ba2374fd
CZ
2266 unsigned long nr_superpages, end_pfn;
2267
6dd9a7c7 2268 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2269 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2270
2271 nr_superpages = sg_res / lvl_pages;
2272 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2273
d41a4adb
JL
2274 /*
2275 * Ensure that old small page tables are
ba2374fd 2276 * removed to make room for superpage(s).
bc24c571
DD
2277 * We're adding new large pages, so make sure
2278 * we don't remove their parent tables.
d41a4adb 2279 */
bc24c571
DD
2280 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2281 largepage_lvl + 1);
6491d4d0 2282 } else {
6dd9a7c7 2283 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2284 }
6dd9a7c7 2285
e1605495
DW
2286 }
2287 /* We don't need lock here, nobody else
2288 * touches the iova range
2289 */
7766a3fb 2290 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2291 if (tmp) {
1bf20f0d 2292 static int dumps = 5;
9f10e5bf
JR
2293 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2294 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2295 if (dumps) {
2296 dumps--;
2297 debug_dma_dump_mappings(NULL);
2298 }
2299 WARN_ON(1);
2300 }
6dd9a7c7
YS
2301
2302 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2303
2304 BUG_ON(nr_pages < lvl_pages);
2305 BUG_ON(sg_res < lvl_pages);
2306
2307 nr_pages -= lvl_pages;
2308 iov_pfn += lvl_pages;
2309 phys_pfn += lvl_pages;
2310 pteval += lvl_pages * VTD_PAGE_SIZE;
2311 sg_res -= lvl_pages;
2312
2313 /* If the next PTE would be the first in a new page, then we
2314 need to flush the cache on the entries we've just written.
2315 And then we'll need to recalculate 'pte', so clear it and
2316 let it get set again in the if (!pte) block above.
2317
2318 If we're done (!nr_pages) we need to flush the cache too.
2319
2320 Also if we've been setting superpages, we may need to
2321 recalculate 'pte' and switch back to smaller pages for the
2322 end of the mapping, if the trailing size is not enough to
2323 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2324 pte++;
6dd9a7c7
YS
2325 if (!nr_pages || first_pte_in_page(pte) ||
2326 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2327 domain_flush_cache(domain, first_pte,
2328 (void *)pte - (void *)first_pte);
2329 pte = NULL;
2330 }
6dd9a7c7
YS
2331
2332 if (!sg_res && nr_pages)
e1605495
DW
2333 sg = sg_next(sg);
2334 }
2335 return 0;
2336}
2337
87684fd9
PX
2338static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2339 struct scatterlist *sg, unsigned long phys_pfn,
2340 unsigned long nr_pages, int prot)
2341{
2342 int ret;
2343 struct intel_iommu *iommu;
2344
2345 /* Do the real mapping first */
2346 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2347 if (ret)
2348 return ret;
2349
2350 /* Notify about the new mapping */
2351 if (domain_type_is_vm(domain)) {
2352 /* VM typed domains can have more than one IOMMUs */
2353 int iommu_id;
2354 for_each_domain_iommu(iommu_id, domain) {
2355 iommu = g_iommus[iommu_id];
2356 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2357 }
2358 } else {
2359 /* General domains only have one IOMMU */
2360 iommu = domain_get_iommu(domain);
2361 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2362 }
2363
2364 return 0;
2365}
2366
9051aa02
DW
2367static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2368 struct scatterlist *sg, unsigned long nr_pages,
2369 int prot)
ba395927 2370{
87684fd9 2371 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2372}
6f6a00e4 2373
9051aa02
DW
2374static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2375 unsigned long phys_pfn, unsigned long nr_pages,
2376 int prot)
2377{
87684fd9 2378 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2379}
2380
2452d9db 2381static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2382{
5082219b
FS
2383 unsigned long flags;
2384 struct context_entry *context;
2385 u16 did_old;
2386
c7151a8d
WH
2387 if (!iommu)
2388 return;
8c11e798 2389
5082219b
FS
2390 spin_lock_irqsave(&iommu->lock, flags);
2391 context = iommu_context_addr(iommu, bus, devfn, 0);
2392 if (!context) {
2393 spin_unlock_irqrestore(&iommu->lock, flags);
2394 return;
2395 }
2396 did_old = context_domain_id(context);
2397 context_clear_entry(context);
2398 __iommu_flush_cache(iommu, context, sizeof(*context));
2399 spin_unlock_irqrestore(&iommu->lock, flags);
2400 iommu->flush.flush_context(iommu,
2401 did_old,
2402 (((u16)bus) << 8) | devfn,
2403 DMA_CCMD_MASK_NOBIT,
2404 DMA_CCMD_DEVICE_INVL);
2405 iommu->flush.flush_iotlb(iommu,
2406 did_old,
2407 0,
2408 0,
2409 DMA_TLB_DSI_FLUSH);
ba395927
KA
2410}
2411
109b9b04
DW
2412static inline void unlink_domain_info(struct device_domain_info *info)
2413{
2414 assert_spin_locked(&device_domain_lock);
2415 list_del(&info->link);
2416 list_del(&info->global);
2417 if (info->dev)
0bcb3e28 2418 info->dev->archdata.iommu = NULL;
109b9b04
DW
2419}
2420
ba395927
KA
2421static void domain_remove_dev_info(struct dmar_domain *domain)
2422{
3a74ca01 2423 struct device_domain_info *info, *tmp;
fb170fb4 2424 unsigned long flags;
ba395927
KA
2425
2426 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2427 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2428 __dmar_remove_one_dev_info(info);
ba395927
KA
2429 spin_unlock_irqrestore(&device_domain_lock, flags);
2430}
2431
2432/*
2433 * find_domain
1525a29a 2434 * Note: we use struct device->archdata.iommu stores the info
ba395927 2435 */
1525a29a 2436static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2437{
2438 struct device_domain_info *info;
2439
2440 /* No lock here, assumes no domain exit in normal case */
1525a29a 2441 info = dev->archdata.iommu;
b316d02a 2442 if (likely(info))
ba395927
KA
2443 return info->domain;
2444 return NULL;
2445}
2446
5a8f40e8 2447static inline struct device_domain_info *
745f2586
JL
2448dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2449{
2450 struct device_domain_info *info;
2451
2452 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2453 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2454 info->devfn == devfn)
5a8f40e8 2455 return info;
745f2586
JL
2456
2457 return NULL;
2458}
2459
5db31569
JR
2460static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2461 int bus, int devfn,
2462 struct device *dev,
2463 struct dmar_domain *domain)
745f2586 2464{
5a8f40e8 2465 struct dmar_domain *found = NULL;
745f2586
JL
2466 struct device_domain_info *info;
2467 unsigned long flags;
d160aca5 2468 int ret;
745f2586
JL
2469
2470 info = alloc_devinfo_mem();
2471 if (!info)
b718cd3d 2472 return NULL;
745f2586 2473
745f2586
JL
2474 info->bus = bus;
2475 info->devfn = devfn;
b16d0cb9
DW
2476 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2477 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2478 info->ats_qdep = 0;
745f2586
JL
2479 info->dev = dev;
2480 info->domain = domain;
5a8f40e8 2481 info->iommu = iommu;
cc580e41 2482 info->pasid_table = NULL;
745f2586 2483
b16d0cb9
DW
2484 if (dev && dev_is_pci(dev)) {
2485 struct pci_dev *pdev = to_pci_dev(info->dev);
2486
cef74409
GK
2487 if (!pci_ats_disabled() &&
2488 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2489 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2490 dmar_find_matched_atsr_unit(pdev))
2491 info->ats_supported = 1;
2492
765b6a98
LB
2493 if (sm_supported(iommu)) {
2494 if (pasid_supported(iommu)) {
b16d0cb9
DW
2495 int features = pci_pasid_features(pdev);
2496 if (features >= 0)
2497 info->pasid_supported = features | 1;
2498 }
2499
2500 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2501 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2502 info->pri_supported = 1;
2503 }
2504 }
2505
745f2586
JL
2506 spin_lock_irqsave(&device_domain_lock, flags);
2507 if (dev)
0bcb3e28 2508 found = find_domain(dev);
f303e507
JR
2509
2510 if (!found) {
5a8f40e8 2511 struct device_domain_info *info2;
41e80dca 2512 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2513 if (info2) {
2514 found = info2->domain;
2515 info2->dev = dev;
2516 }
5a8f40e8 2517 }
f303e507 2518
745f2586
JL
2519 if (found) {
2520 spin_unlock_irqrestore(&device_domain_lock, flags);
2521 free_devinfo_mem(info);
b718cd3d
DW
2522 /* Caller must free the original domain */
2523 return found;
745f2586
JL
2524 }
2525
d160aca5
JR
2526 spin_lock(&iommu->lock);
2527 ret = domain_attach_iommu(domain, iommu);
2528 spin_unlock(&iommu->lock);
2529
2530 if (ret) {
c6c2cebd 2531 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2532 free_devinfo_mem(info);
c6c2cebd
JR
2533 return NULL;
2534 }
c6c2cebd 2535
b718cd3d
DW
2536 list_add(&info->link, &domain->devices);
2537 list_add(&info->global, &device_domain_list);
2538 if (dev)
2539 dev->archdata.iommu = info;
0bbeb01a 2540 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2541
0bbeb01a
LB
2542 /* PASID table is mandatory for a PCI device in scalable mode. */
2543 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2544 ret = intel_pasid_alloc_table(dev);
2545 if (ret) {
932a6523 2546 dev_err(dev, "PASID table allocation failed\n");
71753239 2547 dmar_remove_one_dev_info(dev);
0bbeb01a 2548 return NULL;
a7fc93fe 2549 }
ef848b7e
LB
2550
2551 /* Setup the PASID entry for requests without PASID: */
2552 spin_lock(&iommu->lock);
2553 if (hw_pass_through && domain_type_is_si(domain))
2554 ret = intel_pasid_setup_pass_through(iommu, domain,
2555 dev, PASID_RID2PASID);
2556 else
2557 ret = intel_pasid_setup_second_level(iommu, domain,
2558 dev, PASID_RID2PASID);
2559 spin_unlock(&iommu->lock);
2560 if (ret) {
932a6523 2561 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2562 dmar_remove_one_dev_info(dev);
ef848b7e 2563 return NULL;
a7fc93fe
LB
2564 }
2565 }
b718cd3d 2566
cc4e2575 2567 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2568 dev_err(dev, "Domain context map failed\n");
71753239 2569 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2570 return NULL;
2571 }
2572
b718cd3d 2573 return domain;
745f2586
JL
2574}
2575
579305f7
AW
2576static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2577{
2578 *(u16 *)opaque = alias;
2579 return 0;
2580}
2581
76208356 2582static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2583{
e083ea5b 2584 struct device_domain_info *info;
76208356 2585 struct dmar_domain *domain = NULL;
579305f7 2586 struct intel_iommu *iommu;
fcc35c63 2587 u16 dma_alias;
ba395927 2588 unsigned long flags;
aa4d066a 2589 u8 bus, devfn;
ba395927 2590
579305f7
AW
2591 iommu = device_to_iommu(dev, &bus, &devfn);
2592 if (!iommu)
2593 return NULL;
2594
146922ec
DW
2595 if (dev_is_pci(dev)) {
2596 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2597
579305f7
AW
2598 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2599
2600 spin_lock_irqsave(&device_domain_lock, flags);
2601 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2602 PCI_BUS_NUM(dma_alias),
2603 dma_alias & 0xff);
2604 if (info) {
2605 iommu = info->iommu;
2606 domain = info->domain;
5a8f40e8 2607 }
579305f7 2608 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2609
76208356 2610 /* DMA alias already has a domain, use it */
579305f7 2611 if (info)
76208356 2612 goto out;
579305f7 2613 }
ba395927 2614
146922ec 2615 /* Allocate and initialize new domain for the device */
ab8dfe25 2616 domain = alloc_domain(0);
745f2586 2617 if (!domain)
579305f7 2618 return NULL;
dc534b25 2619 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2620 domain_exit(domain);
2621 return NULL;
2c2e2c38 2622 }
ba395927 2623
76208356 2624out:
579305f7 2625
76208356
JR
2626 return domain;
2627}
579305f7 2628
76208356
JR
2629static struct dmar_domain *set_domain_for_dev(struct device *dev,
2630 struct dmar_domain *domain)
2631{
2632 struct intel_iommu *iommu;
2633 struct dmar_domain *tmp;
2634 u16 req_id, dma_alias;
2635 u8 bus, devfn;
2636
2637 iommu = device_to_iommu(dev, &bus, &devfn);
2638 if (!iommu)
2639 return NULL;
2640
2641 req_id = ((u16)bus << 8) | devfn;
2642
2643 if (dev_is_pci(dev)) {
2644 struct pci_dev *pdev = to_pci_dev(dev);
2645
2646 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2647
2648 /* register PCI DMA alias device */
2649 if (req_id != dma_alias) {
2650 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2651 dma_alias & 0xff, NULL, domain);
2652
2653 if (!tmp || tmp != domain)
2654 return tmp;
2655 }
ba395927
KA
2656 }
2657
5db31569 2658 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2659 if (!tmp || tmp != domain)
2660 return tmp;
2661
2662 return domain;
2663}
579305f7 2664
76208356
JR
2665static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2666{
2667 struct dmar_domain *domain, *tmp;
2668
2669 domain = find_domain(dev);
2670 if (domain)
2671 goto out;
2672
2673 domain = find_or_alloc_domain(dev, gaw);
2674 if (!domain)
2675 goto out;
2676
2677 tmp = set_domain_for_dev(dev, domain);
2678 if (!tmp || domain != tmp) {
579305f7
AW
2679 domain_exit(domain);
2680 domain = tmp;
2681 }
b718cd3d 2682
76208356
JR
2683out:
2684
b718cd3d 2685 return domain;
ba395927
KA
2686}
2687
b213203e
DW
2688static int iommu_domain_identity_map(struct dmar_domain *domain,
2689 unsigned long long start,
2690 unsigned long long end)
ba395927 2691{
c5395d5c
DW
2692 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2693 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2694
2695 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2696 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2697 pr_err("Reserving iova failed\n");
b213203e 2698 return -ENOMEM;
ba395927
KA
2699 }
2700
af1089ce 2701 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2702 /*
2703 * RMRR range might have overlap with physical memory range,
2704 * clear it first
2705 */
c5395d5c 2706 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2707
87684fd9
PX
2708 return __domain_mapping(domain, first_vpfn, NULL,
2709 first_vpfn, last_vpfn - first_vpfn + 1,
2710 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2711}
2712
d66ce54b
JR
2713static int domain_prepare_identity_map(struct device *dev,
2714 struct dmar_domain *domain,
2715 unsigned long long start,
2716 unsigned long long end)
b213203e 2717{
19943b0e
DW
2718 /* For _hardware_ passthrough, don't bother. But for software
2719 passthrough, we do it anyway -- it may indicate a memory
2720 range which is reserved in E820, so which didn't get set
2721 up to start with in si_domain */
2722 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2723 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2724 start, end);
19943b0e
DW
2725 return 0;
2726 }
2727
932a6523 2728 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2729
5595b528
DW
2730 if (end < start) {
2731 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2732 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2733 dmi_get_system_info(DMI_BIOS_VENDOR),
2734 dmi_get_system_info(DMI_BIOS_VERSION),
2735 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2736 return -EIO;
5595b528
DW
2737 }
2738
2ff729f5
DW
2739 if (end >> agaw_to_width(domain->agaw)) {
2740 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2741 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2742 agaw_to_width(domain->agaw),
2743 dmi_get_system_info(DMI_BIOS_VENDOR),
2744 dmi_get_system_info(DMI_BIOS_VERSION),
2745 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2746 return -EIO;
2ff729f5 2747 }
19943b0e 2748
d66ce54b
JR
2749 return iommu_domain_identity_map(domain, start, end);
2750}
ba395927 2751
d66ce54b
JR
2752static int iommu_prepare_identity_map(struct device *dev,
2753 unsigned long long start,
2754 unsigned long long end)
2755{
2756 struct dmar_domain *domain;
2757 int ret;
2758
2759 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2760 if (!domain)
2761 return -ENOMEM;
2762
2763 ret = domain_prepare_identity_map(dev, domain, start, end);
2764 if (ret)
2765 domain_exit(domain);
b213203e 2766
ba395927 2767 return ret;
ba395927
KA
2768}
2769
2770static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2771 struct device *dev)
ba395927 2772{
0b9d9753 2773 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2774 return 0;
0b9d9753
DW
2775 return iommu_prepare_identity_map(dev, rmrr->base_address,
2776 rmrr->end_address);
ba395927
KA
2777}
2778
d3f13810 2779#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2780static inline void iommu_prepare_isa(void)
2781{
2782 struct pci_dev *pdev;
2783 int ret;
2784
2785 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2786 if (!pdev)
2787 return;
2788
9f10e5bf 2789 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2790 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2791
2792 if (ret)
9f10e5bf 2793 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2794
9b27e82d 2795 pci_dev_put(pdev);
49a0429e
KA
2796}
2797#else
2798static inline void iommu_prepare_isa(void)
2799{
2800 return;
2801}
d3f13810 2802#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2803
2c2e2c38 2804static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2805
071e1374 2806static int __init si_domain_init(int hw)
2c2e2c38 2807{
e083ea5b 2808 int nid, ret;
2c2e2c38 2809
ab8dfe25 2810 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2811 if (!si_domain)
2812 return -EFAULT;
2813
2c2e2c38
FY
2814 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2815 domain_exit(si_domain);
2816 return -EFAULT;
2817 }
2818
0dc79715 2819 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2820
19943b0e
DW
2821 if (hw)
2822 return 0;
2823
c7ab48d2 2824 for_each_online_node(nid) {
5dfe8660
TH
2825 unsigned long start_pfn, end_pfn;
2826 int i;
2827
2828 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2829 ret = iommu_domain_identity_map(si_domain,
2830 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2831 if (ret)
2832 return ret;
2833 }
c7ab48d2
DW
2834 }
2835
2c2e2c38
FY
2836 return 0;
2837}
2838
9b226624 2839static int identity_mapping(struct device *dev)
2c2e2c38
FY
2840{
2841 struct device_domain_info *info;
2842
2843 if (likely(!iommu_identity_mapping))
2844 return 0;
2845
9b226624 2846 info = dev->archdata.iommu;
cb452a40
MT
2847 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2848 return (info->domain == si_domain);
2c2e2c38 2849
2c2e2c38
FY
2850 return 0;
2851}
2852
28ccce0d 2853static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2854{
0ac72664 2855 struct dmar_domain *ndomain;
5a8f40e8 2856 struct intel_iommu *iommu;
156baca8 2857 u8 bus, devfn;
2c2e2c38 2858
5913c9bf 2859 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2860 if (!iommu)
2861 return -ENODEV;
2862
5db31569 2863 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2864 if (ndomain != domain)
2865 return -EBUSY;
2c2e2c38
FY
2866
2867 return 0;
2868}
2869
0b9d9753 2870static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2871{
2872 struct dmar_rmrr_unit *rmrr;
832bd858 2873 struct device *tmp;
ea2447f7
TM
2874 int i;
2875
0e242612 2876 rcu_read_lock();
ea2447f7 2877 for_each_rmrr_units(rmrr) {
b683b230
JL
2878 /*
2879 * Return TRUE if this RMRR contains the device that
2880 * is passed in.
2881 */
2882 for_each_active_dev_scope(rmrr->devices,
2883 rmrr->devices_cnt, i, tmp)
0b9d9753 2884 if (tmp == dev) {
0e242612 2885 rcu_read_unlock();
ea2447f7 2886 return true;
b683b230 2887 }
ea2447f7 2888 }
0e242612 2889 rcu_read_unlock();
ea2447f7
TM
2890 return false;
2891}
2892
c875d2c1
AW
2893/*
2894 * There are a couple cases where we need to restrict the functionality of
2895 * devices associated with RMRRs. The first is when evaluating a device for
2896 * identity mapping because problems exist when devices are moved in and out
2897 * of domains and their respective RMRR information is lost. This means that
2898 * a device with associated RMRRs will never be in a "passthrough" domain.
2899 * The second is use of the device through the IOMMU API. This interface
2900 * expects to have full control of the IOVA space for the device. We cannot
2901 * satisfy both the requirement that RMRR access is maintained and have an
2902 * unencumbered IOVA space. We also have no ability to quiesce the device's
2903 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2904 * We therefore prevent devices associated with an RMRR from participating in
2905 * the IOMMU API, which eliminates them from device assignment.
2906 *
2907 * In both cases we assume that PCI USB devices with RMRRs have them largely
2908 * for historical reasons and that the RMRR space is not actively used post
2909 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2910 *
2911 * The same exception is made for graphics devices, with the requirement that
2912 * any use of the RMRR regions will be torn down before assigning the device
2913 * to a guest.
c875d2c1
AW
2914 */
2915static bool device_is_rmrr_locked(struct device *dev)
2916{
2917 if (!device_has_rmrr(dev))
2918 return false;
2919
2920 if (dev_is_pci(dev)) {
2921 struct pci_dev *pdev = to_pci_dev(dev);
2922
18436afd 2923 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2924 return false;
2925 }
2926
2927 return true;
2928}
2929
3bdb2591 2930static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2931{
3bdb2591
DW
2932 if (dev_is_pci(dev)) {
2933 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2934
c875d2c1 2935 if (device_is_rmrr_locked(dev))
3bdb2591 2936 return 0;
e0fc7e0b 2937
89a6079d
LB
2938 /*
2939 * Prevent any device marked as untrusted from getting
2940 * placed into the statically identity mapping domain.
2941 */
2942 if (pdev->untrusted)
2943 return 0;
2944
3bdb2591
DW
2945 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2946 return 1;
e0fc7e0b 2947
3bdb2591
DW
2948 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2949 return 1;
6941af28 2950
3bdb2591 2951 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2952 return 0;
3bdb2591
DW
2953
2954 /*
2955 * We want to start off with all devices in the 1:1 domain, and
2956 * take them out later if we find they can't access all of memory.
2957 *
2958 * However, we can't do this for PCI devices behind bridges,
2959 * because all PCI devices behind the same bridge will end up
2960 * with the same source-id on their transactions.
2961 *
2962 * Practically speaking, we can't change things around for these
2963 * devices at run-time, because we can't be sure there'll be no
2964 * DMA transactions in flight for any of their siblings.
2965 *
2966 * So PCI devices (unless they're on the root bus) as well as
2967 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2968 * the 1:1 domain, just in _case_ one of their siblings turns out
2969 * not to be able to map all of memory.
2970 */
2971 if (!pci_is_pcie(pdev)) {
2972 if (!pci_is_root_bus(pdev->bus))
2973 return 0;
2974 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2975 return 0;
2976 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2977 return 0;
3bdb2591
DW
2978 } else {
2979 if (device_has_rmrr(dev))
2980 return 0;
2981 }
3dfc813d 2982
3bdb2591 2983 /*
3dfc813d 2984 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2985 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2986 * take them out of the 1:1 domain later.
2987 */
8fcc5372
CW
2988 if (!startup) {
2989 /*
2990 * If the device's dma_mask is less than the system's memory
2991 * size then this is not a candidate for identity mapping.
2992 */
3bdb2591 2993 u64 dma_mask = *dev->dma_mask;
8fcc5372 2994
3bdb2591
DW
2995 if (dev->coherent_dma_mask &&
2996 dev->coherent_dma_mask < dma_mask)
2997 dma_mask = dev->coherent_dma_mask;
8fcc5372 2998
3bdb2591 2999 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 3000 }
6941af28
DW
3001
3002 return 1;
3003}
3004
cf04eee8
DW
3005static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
3006{
3007 int ret;
3008
3009 if (!iommu_should_identity_map(dev, 1))
3010 return 0;
3011
28ccce0d 3012 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 3013 if (!ret)
932a6523
BH
3014 dev_info(dev, "%s identity mapping\n",
3015 hw ? "Hardware" : "Software");
cf04eee8
DW
3016 else if (ret == -ENODEV)
3017 /* device not associated with an iommu */
3018 ret = 0;
3019
3020 return ret;
3021}
3022
3023
071e1374 3024static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 3025{
2c2e2c38 3026 struct pci_dev *pdev = NULL;
cf04eee8
DW
3027 struct dmar_drhd_unit *drhd;
3028 struct intel_iommu *iommu;
3029 struct device *dev;
3030 int i;
3031 int ret = 0;
2c2e2c38 3032
2c2e2c38 3033 for_each_pci_dev(pdev) {
cf04eee8
DW
3034 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3035 if (ret)
3036 return ret;
3037 }
3038
3039 for_each_active_iommu(iommu, drhd)
3040 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3041 struct acpi_device_physical_node *pn;
3042 struct acpi_device *adev;
3043
3044 if (dev->bus != &acpi_bus_type)
3045 continue;
86080ccc 3046
cf04eee8
DW
3047 adev= to_acpi_device(dev);
3048 mutex_lock(&adev->physical_node_lock);
3049 list_for_each_entry(pn, &adev->physical_node_list, node) {
3050 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3051 if (ret)
3052 break;
eae460b6 3053 }
cf04eee8
DW
3054 mutex_unlock(&adev->physical_node_lock);
3055 if (ret)
3056 return ret;
62edf5dc 3057 }
2c2e2c38
FY
3058
3059 return 0;
3060}
3061
ffebeb46
JL
3062static void intel_iommu_init_qi(struct intel_iommu *iommu)
3063{
3064 /*
3065 * Start from the sane iommu hardware state.
3066 * If the queued invalidation is already initialized by us
3067 * (for example, while enabling interrupt-remapping) then
3068 * we got the things already rolling from a sane state.
3069 */
3070 if (!iommu->qi) {
3071 /*
3072 * Clear any previous faults.
3073 */
3074 dmar_fault(-1, iommu);
3075 /*
3076 * Disable queued invalidation if supported and already enabled
3077 * before OS handover.
3078 */
3079 dmar_disable_qi(iommu);
3080 }
3081
3082 if (dmar_enable_qi(iommu)) {
3083 /*
3084 * Queued Invalidate not enabled, use Register Based Invalidate
3085 */
3086 iommu->flush.flush_context = __iommu_flush_context;
3087 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3088 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3089 iommu->name);
3090 } else {
3091 iommu->flush.flush_context = qi_flush_context;
3092 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3093 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3094 }
3095}
3096
091d42e4 3097static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3098 struct root_entry *old_re,
091d42e4
JR
3099 struct context_entry **tbl,
3100 int bus, bool ext)
3101{
dbcd861f 3102 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3103 struct context_entry *new_ce = NULL, ce;
dfddb969 3104 struct context_entry *old_ce = NULL;
543c8dcf 3105 struct root_entry re;
091d42e4
JR
3106 phys_addr_t old_ce_phys;
3107
3108 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3109 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3110
3111 for (devfn = 0; devfn < 256; devfn++) {
3112 /* First calculate the correct index */
3113 idx = (ext ? devfn * 2 : devfn) % 256;
3114
3115 if (idx == 0) {
3116 /* First save what we may have and clean up */
3117 if (new_ce) {
3118 tbl[tbl_idx] = new_ce;
3119 __iommu_flush_cache(iommu, new_ce,
3120 VTD_PAGE_SIZE);
3121 pos = 1;
3122 }
3123
3124 if (old_ce)
829383e1 3125 memunmap(old_ce);
091d42e4
JR
3126
3127 ret = 0;
3128 if (devfn < 0x80)
543c8dcf 3129 old_ce_phys = root_entry_lctp(&re);
091d42e4 3130 else
543c8dcf 3131 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3132
3133 if (!old_ce_phys) {
3134 if (ext && devfn == 0) {
3135 /* No LCTP, try UCTP */
3136 devfn = 0x7f;
3137 continue;
3138 } else {
3139 goto out;
3140 }
3141 }
3142
3143 ret = -ENOMEM;
dfddb969
DW
3144 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3145 MEMREMAP_WB);
091d42e4
JR
3146 if (!old_ce)
3147 goto out;
3148
3149 new_ce = alloc_pgtable_page(iommu->node);
3150 if (!new_ce)
3151 goto out_unmap;
3152
3153 ret = 0;
3154 }
3155
3156 /* Now copy the context entry */
dfddb969 3157 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3158
cf484d0e 3159 if (!__context_present(&ce))
091d42e4
JR
3160 continue;
3161
dbcd861f
JR
3162 did = context_domain_id(&ce);
3163 if (did >= 0 && did < cap_ndoms(iommu->cap))
3164 set_bit(did, iommu->domain_ids);
3165
cf484d0e
JR
3166 /*
3167 * We need a marker for copied context entries. This
3168 * marker needs to work for the old format as well as
3169 * for extended context entries.
3170 *
3171 * Bit 67 of the context entry is used. In the old
3172 * format this bit is available to software, in the
3173 * extended format it is the PGE bit, but PGE is ignored
3174 * by HW if PASIDs are disabled (and thus still
3175 * available).
3176 *
3177 * So disable PASIDs first and then mark the entry
3178 * copied. This means that we don't copy PASID
3179 * translations from the old kernel, but this is fine as
3180 * faults there are not fatal.
3181 */
3182 context_clear_pasid_enable(&ce);
3183 context_set_copied(&ce);
3184
091d42e4
JR
3185 new_ce[idx] = ce;
3186 }
3187
3188 tbl[tbl_idx + pos] = new_ce;
3189
3190 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3191
3192out_unmap:
dfddb969 3193 memunmap(old_ce);
091d42e4
JR
3194
3195out:
3196 return ret;
3197}
3198
3199static int copy_translation_tables(struct intel_iommu *iommu)
3200{
3201 struct context_entry **ctxt_tbls;
dfddb969 3202 struct root_entry *old_rt;
091d42e4
JR
3203 phys_addr_t old_rt_phys;
3204 int ctxt_table_entries;
3205 unsigned long flags;
3206 u64 rtaddr_reg;
3207 int bus, ret;
c3361f2f 3208 bool new_ext, ext;
091d42e4
JR
3209
3210 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3211 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3212 new_ext = !!ecap_ecs(iommu->ecap);
3213
3214 /*
3215 * The RTT bit can only be changed when translation is disabled,
3216 * but disabling translation means to open a window for data
3217 * corruption. So bail out and don't copy anything if we would
3218 * have to change the bit.
3219 */
3220 if (new_ext != ext)
3221 return -EINVAL;
091d42e4
JR
3222
3223 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3224 if (!old_rt_phys)
3225 return -EINVAL;
3226
dfddb969 3227 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3228 if (!old_rt)
3229 return -ENOMEM;
3230
3231 /* This is too big for the stack - allocate it from slab */
3232 ctxt_table_entries = ext ? 512 : 256;
3233 ret = -ENOMEM;
6396bb22 3234 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3235 if (!ctxt_tbls)
3236 goto out_unmap;
3237
3238 for (bus = 0; bus < 256; bus++) {
3239 ret = copy_context_table(iommu, &old_rt[bus],
3240 ctxt_tbls, bus, ext);
3241 if (ret) {
3242 pr_err("%s: Failed to copy context table for bus %d\n",
3243 iommu->name, bus);
3244 continue;
3245 }
3246 }
3247
3248 spin_lock_irqsave(&iommu->lock, flags);
3249
3250 /* Context tables are copied, now write them to the root_entry table */
3251 for (bus = 0; bus < 256; bus++) {
3252 int idx = ext ? bus * 2 : bus;
3253 u64 val;
3254
3255 if (ctxt_tbls[idx]) {
3256 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3257 iommu->root_entry[bus].lo = val;
3258 }
3259
3260 if (!ext || !ctxt_tbls[idx + 1])
3261 continue;
3262
3263 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3264 iommu->root_entry[bus].hi = val;
3265 }
3266
3267 spin_unlock_irqrestore(&iommu->lock, flags);
3268
3269 kfree(ctxt_tbls);
3270
3271 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3272
3273 ret = 0;
3274
3275out_unmap:
dfddb969 3276 memunmap(old_rt);
091d42e4
JR
3277
3278 return ret;
3279}
3280
b779260b 3281static int __init init_dmars(void)
ba395927
KA
3282{
3283 struct dmar_drhd_unit *drhd;
3284 struct dmar_rmrr_unit *rmrr;
a87f4918 3285 bool copied_tables = false;
832bd858 3286 struct device *dev;
ba395927 3287 struct intel_iommu *iommu;
13cf0174 3288 int i, ret;
2c2e2c38 3289
ba395927
KA
3290 /*
3291 * for each drhd
3292 * allocate root
3293 * initialize and program root entry to not present
3294 * endfor
3295 */
3296 for_each_drhd_unit(drhd) {
5e0d2a6f 3297 /*
3298 * lock not needed as this is only incremented in the single
3299 * threaded kernel __init code path all other access are read
3300 * only
3301 */
78d8e704 3302 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3303 g_num_of_iommus++;
3304 continue;
3305 }
9f10e5bf 3306 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3307 }
3308
ffebeb46
JL
3309 /* Preallocate enough resources for IOMMU hot-addition */
3310 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3311 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3312
d9630fe9
WH
3313 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3314 GFP_KERNEL);
3315 if (!g_iommus) {
9f10e5bf 3316 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3317 ret = -ENOMEM;
3318 goto error;
3319 }
3320
7c919779 3321 for_each_active_iommu(iommu, drhd) {
56283174
LB
3322 /*
3323 * Find the max pasid size of all IOMMU's in the system.
3324 * We need to ensure the system pasid table is no bigger
3325 * than the smallest supported.
3326 */
765b6a98 3327 if (pasid_supported(iommu)) {
56283174
LB
3328 u32 temp = 2 << ecap_pss(iommu->ecap);
3329
3330 intel_pasid_max_id = min_t(u32, temp,
3331 intel_pasid_max_id);
3332 }
3333
d9630fe9 3334 g_iommus[iommu->seq_id] = iommu;
ba395927 3335
b63d80d1
JR
3336 intel_iommu_init_qi(iommu);
3337
e61d98d8
SS
3338 ret = iommu_init_domains(iommu);
3339 if (ret)
989d51fc 3340 goto free_iommu;
e61d98d8 3341
4158c2ec
JR
3342 init_translation_status(iommu);
3343
091d42e4
JR
3344 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3345 iommu_disable_translation(iommu);
3346 clear_translation_pre_enabled(iommu);
3347 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3348 iommu->name);
3349 }
4158c2ec 3350
ba395927
KA
3351 /*
3352 * TBD:
3353 * we could share the same root & context tables
25985edc 3354 * among all IOMMU's. Need to Split it later.
ba395927
KA
3355 */
3356 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3357 if (ret)
989d51fc 3358 goto free_iommu;
5f0a7f76 3359
091d42e4
JR
3360 if (translation_pre_enabled(iommu)) {
3361 pr_info("Translation already enabled - trying to copy translation structures\n");
3362
3363 ret = copy_translation_tables(iommu);
3364 if (ret) {
3365 /*
3366 * We found the IOMMU with translation
3367 * enabled - but failed to copy over the
3368 * old root-entry table. Try to proceed
3369 * by disabling translation now and
3370 * allocating a clean root-entry table.
3371 * This might cause DMAR faults, but
3372 * probably the dump will still succeed.
3373 */
3374 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3375 iommu->name);
3376 iommu_disable_translation(iommu);
3377 clear_translation_pre_enabled(iommu);
3378 } else {
3379 pr_info("Copied translation tables from previous kernel for %s\n",
3380 iommu->name);
a87f4918 3381 copied_tables = true;
091d42e4
JR
3382 }
3383 }
3384
4ed0d3e6 3385 if (!ecap_pass_through(iommu->ecap))
19943b0e 3386 hw_pass_through = 0;
8a94ade4 3387#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3388 if (pasid_supported(iommu))
d9737953 3389 intel_svm_init(iommu);
8a94ade4 3390#endif
ba395927
KA
3391 }
3392
a4c34ff1
JR
3393 /*
3394 * Now that qi is enabled on all iommus, set the root entry and flush
3395 * caches. This is required on some Intel X58 chipsets, otherwise the
3396 * flush_context function will loop forever and the boot hangs.
3397 */
3398 for_each_active_iommu(iommu, drhd) {
3399 iommu_flush_write_buffer(iommu);
3400 iommu_set_root_entry(iommu);
3401 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3402 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3403 }
3404
19943b0e 3405 if (iommu_pass_through)
e0fc7e0b
DW
3406 iommu_identity_mapping |= IDENTMAP_ALL;
3407
d3f13810 3408#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3409 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3410#endif
e0fc7e0b 3411
21e722c4
AR
3412 check_tylersburg_isoch();
3413
86080ccc
JR
3414 if (iommu_identity_mapping) {
3415 ret = si_domain_init(hw_pass_through);
3416 if (ret)
3417 goto free_iommu;
3418 }
3419
e0fc7e0b 3420
a87f4918
JR
3421 /*
3422 * If we copied translations from a previous kernel in the kdump
3423 * case, we can not assign the devices to domains now, as that
3424 * would eliminate the old mappings. So skip this part and defer
3425 * the assignment to device driver initialization time.
3426 */
3427 if (copied_tables)
3428 goto domains_done;
3429
ba395927 3430 /*
19943b0e
DW
3431 * If pass through is not set or not enabled, setup context entries for
3432 * identity mappings for rmrr, gfx, and isa and may fall back to static
3433 * identity mapping if iommu_identity_mapping is set.
ba395927 3434 */
19943b0e
DW
3435 if (iommu_identity_mapping) {
3436 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3437 if (ret) {
9f10e5bf 3438 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3439 goto free_iommu;
ba395927
KA
3440 }
3441 }
ba395927 3442 /*
19943b0e
DW
3443 * For each rmrr
3444 * for each dev attached to rmrr
3445 * do
3446 * locate drhd for dev, alloc domain for dev
3447 * allocate free domain
3448 * allocate page table entries for rmrr
3449 * if context not allocated for bus
3450 * allocate and init context
3451 * set present in root table for this bus
3452 * init context with domain, translation etc
3453 * endfor
3454 * endfor
ba395927 3455 */
9f10e5bf 3456 pr_info("Setting RMRR:\n");
19943b0e 3457 for_each_rmrr_units(rmrr) {
b683b230
JL
3458 /* some BIOS lists non-exist devices in DMAR table. */
3459 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3460 i, dev) {
0b9d9753 3461 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3462 if (ret)
9f10e5bf 3463 pr_err("Mapping reserved region failed\n");
ba395927 3464 }
4ed0d3e6 3465 }
49a0429e 3466
19943b0e
DW
3467 iommu_prepare_isa();
3468
a87f4918
JR
3469domains_done:
3470
ba395927
KA
3471 /*
3472 * for each drhd
3473 * enable fault log
3474 * global invalidate context cache
3475 * global invalidate iotlb
3476 * enable translation
3477 */
7c919779 3478 for_each_iommu(iommu, drhd) {
51a63e67
JC
3479 if (drhd->ignored) {
3480 /*
3481 * we always have to disable PMRs or DMA may fail on
3482 * this device
3483 */
3484 if (force_on)
7c919779 3485 iommu_disable_protect_mem_regions(iommu);
ba395927 3486 continue;
51a63e67 3487 }
ba395927
KA
3488
3489 iommu_flush_write_buffer(iommu);
3490
a222a7f0 3491#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3492 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
3493 ret = intel_svm_enable_prq(iommu);
3494 if (ret)
3495 goto free_iommu;
3496 }
3497#endif
3460a6d9
KA
3498 ret = dmar_set_interrupt(iommu);
3499 if (ret)
989d51fc 3500 goto free_iommu;
3460a6d9 3501
8939ddf6
JR
3502 if (!translation_pre_enabled(iommu))
3503 iommu_enable_translation(iommu);
3504
b94996c9 3505 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3506 }
3507
3508 return 0;
989d51fc
JL
3509
3510free_iommu:
ffebeb46
JL
3511 for_each_active_iommu(iommu, drhd) {
3512 disable_dmar_iommu(iommu);
a868e6b7 3513 free_dmar_iommu(iommu);
ffebeb46 3514 }
13cf0174 3515
d9630fe9 3516 kfree(g_iommus);
13cf0174 3517
989d51fc 3518error:
ba395927
KA
3519 return ret;
3520}
3521
5a5e02a6 3522/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3523static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3524 struct dmar_domain *domain,
3525 unsigned long nrpages, uint64_t dma_mask)
ba395927 3526{
e083ea5b 3527 unsigned long iova_pfn;
ba395927 3528
875764de
DW
3529 /* Restrict dma_mask to the width that the iommu can handle */
3530 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3531 /* Ensure we reserve the whole size-aligned region */
3532 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3533
3534 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3535 /*
3536 * First try to allocate an io virtual address in
284901a9 3537 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3538 * from higher range
ba395927 3539 */
22e2f9fa 3540 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3541 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3542 if (iova_pfn)
3543 return iova_pfn;
875764de 3544 }
538d5b33
TN
3545 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3546 IOVA_PFN(dma_mask), true);
22e2f9fa 3547 if (unlikely(!iova_pfn)) {
932a6523 3548 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3549 return 0;
f76aec76
KA
3550 }
3551
22e2f9fa 3552 return iova_pfn;
f76aec76
KA
3553}
3554
9ddbfb42 3555struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3556{
1c5ebba9 3557 struct dmar_domain *domain, *tmp;
b1ce5b79 3558 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3559 struct device *i_dev;
3560 int i, ret;
f76aec76 3561
1c5ebba9
JR
3562 domain = find_domain(dev);
3563 if (domain)
3564 goto out;
3565
3566 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3567 if (!domain)
3568 goto out;
ba395927 3569
b1ce5b79
JR
3570 /* We have a new domain - setup possible RMRRs for the device */
3571 rcu_read_lock();
3572 for_each_rmrr_units(rmrr) {
3573 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3574 i, i_dev) {
3575 if (i_dev != dev)
3576 continue;
3577
3578 ret = domain_prepare_identity_map(dev, domain,
3579 rmrr->base_address,
3580 rmrr->end_address);
3581 if (ret)
3582 dev_err(dev, "Mapping reserved region failed\n");
3583 }
3584 }
3585 rcu_read_unlock();
3586
1c5ebba9
JR
3587 tmp = set_domain_for_dev(dev, domain);
3588 if (!tmp || domain != tmp) {
3589 domain_exit(domain);
3590 domain = tmp;
3591 }
3592
3593out:
3594
3595 if (!domain)
932a6523 3596 dev_err(dev, "Allocating domain failed\n");
1c5ebba9
JR
3597
3598
f76aec76
KA
3599 return domain;
3600}
3601
ecb509ec 3602/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3603static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3604{
3605 int found;
3606
3d89194a 3607 if (iommu_dummy(dev))
1e4c64c4
DW
3608 return 1;
3609
2c2e2c38 3610 if (!iommu_identity_mapping)
1e4c64c4 3611 return 0;
2c2e2c38 3612
9b226624 3613 found = identity_mapping(dev);
2c2e2c38 3614 if (found) {
ecb509ec 3615 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3616 return 1;
3617 else {
3618 /*
3619 * 32 bit DMA is removed from si_domain and fall back
3620 * to non-identity mapping.
3621 */
71753239 3622 dmar_remove_one_dev_info(dev);
932a6523 3623 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3624 return 0;
3625 }
3626 } else {
3627 /*
3628 * In case of a detached 64 bit DMA device from vm, the device
3629 * is put into si_domain for identity mapping.
3630 */
ecb509ec 3631 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3632 int ret;
28ccce0d 3633 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3634 if (!ret) {
932a6523 3635 dev_info(dev, "64bit DMA uses identity mapping\n");
2c2e2c38
FY
3636 return 1;
3637 }
3638 }
3639 }
3640
1e4c64c4 3641 return 0;
2c2e2c38
FY
3642}
3643
21d5d27c
LG
3644static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3645 size_t size, int dir, u64 dma_mask)
f76aec76 3646{
f76aec76 3647 struct dmar_domain *domain;
5b6985ce 3648 phys_addr_t start_paddr;
2aac6304 3649 unsigned long iova_pfn;
f76aec76 3650 int prot = 0;
6865f0d1 3651 int ret;
8c11e798 3652 struct intel_iommu *iommu;
33041ec0 3653 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3654
3655 BUG_ON(dir == DMA_NONE);
2c2e2c38 3656
5040a918 3657 if (iommu_no_mapping(dev))
6865f0d1 3658 return paddr;
f76aec76 3659
5040a918 3660 domain = get_valid_domain_for_dev(dev);
f76aec76 3661 if (!domain)
524a669b 3662 return DMA_MAPPING_ERROR;
f76aec76 3663
8c11e798 3664 iommu = domain_get_iommu(domain);
88cb6a74 3665 size = aligned_nrpages(paddr, size);
f76aec76 3666
2aac6304
OP
3667 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3668 if (!iova_pfn)
f76aec76
KA
3669 goto error;
3670
ba395927
KA
3671 /*
3672 * Check if DMAR supports zero-length reads on write only
3673 * mappings..
3674 */
3675 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3676 !cap_zlr(iommu->cap))
ba395927
KA
3677 prot |= DMA_PTE_READ;
3678 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3679 prot |= DMA_PTE_WRITE;
3680 /*
6865f0d1 3681 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3682 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3683 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3684 * is not a big problem
3685 */
2aac6304 3686 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3687 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3688 if (ret)
3689 goto error;
3690
2aac6304 3691 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3692 start_paddr += paddr & ~PAGE_MASK;
3693 return start_paddr;
ba395927 3694
ba395927 3695error:
2aac6304 3696 if (iova_pfn)
22e2f9fa 3697 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3698 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3699 size, (unsigned long long)paddr, dir);
524a669b 3700 return DMA_MAPPING_ERROR;
ba395927
KA
3701}
3702
ffbbef5c
FT
3703static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3704 unsigned long offset, size_t size,
3705 enum dma_data_direction dir,
00085f1e 3706 unsigned long attrs)
bb9e6d65 3707{
21d5d27c
LG
3708 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3709 dir, *dev->dma_mask);
3710}
3711
3712static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3713 size_t size, enum dma_data_direction dir,
3714 unsigned long attrs)
3715{
3716 return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
bb9e6d65
FT
3717}
3718
769530e4 3719static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3720{
f76aec76 3721 struct dmar_domain *domain;
d794dc9b 3722 unsigned long start_pfn, last_pfn;
769530e4 3723 unsigned long nrpages;
2aac6304 3724 unsigned long iova_pfn;
8c11e798 3725 struct intel_iommu *iommu;
ea8ea460 3726 struct page *freelist;
ba395927 3727
73676832 3728 if (iommu_no_mapping(dev))
f76aec76 3729 return;
2c2e2c38 3730
1525a29a 3731 domain = find_domain(dev);
ba395927
KA
3732 BUG_ON(!domain);
3733
8c11e798
WH
3734 iommu = domain_get_iommu(domain);
3735
2aac6304 3736 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3737
769530e4 3738 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3739 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3740 last_pfn = start_pfn + nrpages - 1;
ba395927 3741
932a6523 3742 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3743
ea8ea460 3744 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3745
5e0d2a6f 3746 if (intel_iommu_strict) {
a1ddcbe9 3747 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3748 nrpages, !freelist, 0);
5e0d2a6f 3749 /* free iova */
22e2f9fa 3750 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3751 dma_free_pagelist(freelist);
5e0d2a6f 3752 } else {
13cf0174
JR
3753 queue_iova(&domain->iovad, iova_pfn, nrpages,
3754 (unsigned long)freelist);
5e0d2a6f 3755 /*
3756 * queue up the release of the unmap to save the 1/6th of the
3757 * cpu used up by the iotlb flush operation...
3758 */
5e0d2a6f 3759 }
ba395927
KA
3760}
3761
d41a4adb
JL
3762static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3763 size_t size, enum dma_data_direction dir,
00085f1e 3764 unsigned long attrs)
d41a4adb 3765{
769530e4 3766 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3767}
3768
5040a918 3769static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3770 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3771 unsigned long attrs)
ba395927 3772{
7ec916f8
CH
3773 struct page *page = NULL;
3774 int order;
ba395927 3775
7ec916f8
CH
3776 size = PAGE_ALIGN(size);
3777 order = get_order(size);
36746436 3778
7ec916f8
CH
3779 if (!iommu_no_mapping(dev))
3780 flags &= ~(GFP_DMA | GFP_DMA32);
3781 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3782 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3783 flags |= GFP_DMA;
3784 else
3785 flags |= GFP_DMA32;
3786 }
3787
3788 if (gfpflags_allow_blocking(flags)) {
3789 unsigned int count = size >> PAGE_SHIFT;
3790
d834c5ab
MS
3791 page = dma_alloc_from_contiguous(dev, count, order,
3792 flags & __GFP_NOWARN);
7ec916f8
CH
3793 if (page && iommu_no_mapping(dev) &&
3794 page_to_phys(page) + size > dev->coherent_dma_mask) {
3795 dma_release_from_contiguous(dev, page, count);
3796 page = NULL;
3797 }
3798 }
3799
3800 if (!page)
3801 page = alloc_pages(flags, order);
3802 if (!page)
3803 return NULL;
3804 memset(page_address(page), 0, size);
3805
21d5d27c
LG
3806 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3807 DMA_BIDIRECTIONAL,
3808 dev->coherent_dma_mask);
524a669b 3809 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3810 return page_address(page);
3811 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3812 __free_pages(page, order);
36746436 3813
ba395927
KA
3814 return NULL;
3815}
3816
5040a918 3817static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3818 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3819{
7ec916f8
CH
3820 int order;
3821 struct page *page = virt_to_page(vaddr);
3822
3823 size = PAGE_ALIGN(size);
3824 order = get_order(size);
3825
3826 intel_unmap(dev, dma_handle, size);
3827 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3828 __free_pages(page, order);
ba395927
KA
3829}
3830
5040a918 3831static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3832 int nelems, enum dma_data_direction dir,
00085f1e 3833 unsigned long attrs)
ba395927 3834{
769530e4
OP
3835 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3836 unsigned long nrpages = 0;
3837 struct scatterlist *sg;
3838 int i;
3839
3840 for_each_sg(sglist, sg, nelems, i) {
3841 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3842 }
3843
3844 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3845}
3846
ba395927 3847static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3848 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3849{
3850 int i;
c03ab37c 3851 struct scatterlist *sg;
ba395927 3852
c03ab37c 3853 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3854 BUG_ON(!sg_page(sg));
29a90b70 3855 sg->dma_address = sg_phys(sg);
c03ab37c 3856 sg->dma_length = sg->length;
ba395927
KA
3857 }
3858 return nelems;
3859}
3860
5040a918 3861static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3862 enum dma_data_direction dir, unsigned long attrs)
ba395927 3863{
ba395927 3864 int i;
ba395927 3865 struct dmar_domain *domain;
f76aec76
KA
3866 size_t size = 0;
3867 int prot = 0;
2aac6304 3868 unsigned long iova_pfn;
f76aec76 3869 int ret;
c03ab37c 3870 struct scatterlist *sg;
b536d24d 3871 unsigned long start_vpfn;
8c11e798 3872 struct intel_iommu *iommu;
ba395927
KA
3873
3874 BUG_ON(dir == DMA_NONE);
5040a918
DW
3875 if (iommu_no_mapping(dev))
3876 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3877
5040a918 3878 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3879 if (!domain)
3880 return 0;
3881
8c11e798
WH
3882 iommu = domain_get_iommu(domain);
3883
b536d24d 3884 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3885 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3886
2aac6304 3887 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3888 *dev->dma_mask);
2aac6304 3889 if (!iova_pfn) {
c03ab37c 3890 sglist->dma_length = 0;
f76aec76
KA
3891 return 0;
3892 }
3893
3894 /*
3895 * Check if DMAR supports zero-length reads on write only
3896 * mappings..
3897 */
3898 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3899 !cap_zlr(iommu->cap))
f76aec76
KA
3900 prot |= DMA_PTE_READ;
3901 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3902 prot |= DMA_PTE_WRITE;
3903
2aac6304 3904 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3905
f532959b 3906 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3907 if (unlikely(ret)) {
e1605495 3908 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3909 start_vpfn + size - 1,
3910 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3911 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3912 return 0;
ba395927
KA
3913 }
3914
ba395927
KA
3915 return nelems;
3916}
3917
02b4da5f 3918static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3919 .alloc = intel_alloc_coherent,
3920 .free = intel_free_coherent,
ba395927
KA
3921 .map_sg = intel_map_sg,
3922 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3923 .map_page = intel_map_page,
3924 .unmap_page = intel_unmap_page,
21d5d27c
LG
3925 .map_resource = intel_map_resource,
3926 .unmap_resource = intel_unmap_page,
fec777c3 3927 .dma_supported = dma_direct_supported,
ba395927
KA
3928};
3929
3930static inline int iommu_domain_cache_init(void)
3931{
3932 int ret = 0;
3933
3934 iommu_domain_cache = kmem_cache_create("iommu_domain",
3935 sizeof(struct dmar_domain),
3936 0,
3937 SLAB_HWCACHE_ALIGN,
3938
3939 NULL);
3940 if (!iommu_domain_cache) {
9f10e5bf 3941 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3942 ret = -ENOMEM;
3943 }
3944
3945 return ret;
3946}
3947
3948static inline int iommu_devinfo_cache_init(void)
3949{
3950 int ret = 0;
3951
3952 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3953 sizeof(struct device_domain_info),
3954 0,
3955 SLAB_HWCACHE_ALIGN,
ba395927
KA
3956 NULL);
3957 if (!iommu_devinfo_cache) {
9f10e5bf 3958 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3959 ret = -ENOMEM;
3960 }
3961
3962 return ret;
3963}
3964
ba395927
KA
3965static int __init iommu_init_mempool(void)
3966{
3967 int ret;
ae1ff3d6 3968 ret = iova_cache_get();
ba395927
KA
3969 if (ret)
3970 return ret;
3971
3972 ret = iommu_domain_cache_init();
3973 if (ret)
3974 goto domain_error;
3975
3976 ret = iommu_devinfo_cache_init();
3977 if (!ret)
3978 return ret;
3979
3980 kmem_cache_destroy(iommu_domain_cache);
3981domain_error:
ae1ff3d6 3982 iova_cache_put();
ba395927
KA
3983
3984 return -ENOMEM;
3985}
3986
3987static void __init iommu_exit_mempool(void)
3988{
3989 kmem_cache_destroy(iommu_devinfo_cache);
3990 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3991 iova_cache_put();
ba395927
KA
3992}
3993
556ab45f
DW
3994static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3995{
3996 struct dmar_drhd_unit *drhd;
3997 u32 vtbar;
3998 int rc;
3999
4000 /* We know that this device on this chipset has its own IOMMU.
4001 * If we find it under a different IOMMU, then the BIOS is lying
4002 * to us. Hope that the IOMMU for this device is actually
4003 * disabled, and it needs no translation...
4004 */
4005 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4006 if (rc) {
4007 /* "can't" happen */
4008 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4009 return;
4010 }
4011 vtbar &= 0xffff0000;
4012
4013 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4014 drhd = dmar_find_matched_drhd_unit(pdev);
4015 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4016 TAINT_FIRMWARE_WORKAROUND,
4017 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4018 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4019}
4020DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4021
ba395927
KA
4022static void __init init_no_remapping_devices(void)
4023{
4024 struct dmar_drhd_unit *drhd;
832bd858 4025 struct device *dev;
b683b230 4026 int i;
ba395927
KA
4027
4028 for_each_drhd_unit(drhd) {
4029 if (!drhd->include_all) {
b683b230
JL
4030 for_each_active_dev_scope(drhd->devices,
4031 drhd->devices_cnt, i, dev)
4032 break;
832bd858 4033 /* ignore DMAR unit if no devices exist */
ba395927
KA
4034 if (i == drhd->devices_cnt)
4035 drhd->ignored = 1;
4036 }
4037 }
4038
7c919779 4039 for_each_active_drhd_unit(drhd) {
7c919779 4040 if (drhd->include_all)
ba395927
KA
4041 continue;
4042
b683b230
JL
4043 for_each_active_dev_scope(drhd->devices,
4044 drhd->devices_cnt, i, dev)
832bd858 4045 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4046 break;
ba395927
KA
4047 if (i < drhd->devices_cnt)
4048 continue;
4049
c0771df8
DW
4050 /* This IOMMU has *only* gfx devices. Either bypass it or
4051 set the gfx_mapped flag, as appropriate */
4052 if (dmar_map_gfx) {
4053 intel_iommu_gfx_mapped = 1;
4054 } else {
4055 drhd->ignored = 1;
b683b230
JL
4056 for_each_active_dev_scope(drhd->devices,
4057 drhd->devices_cnt, i, dev)
832bd858 4058 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4059 }
4060 }
4061}
4062
f59c7b69
FY
4063#ifdef CONFIG_SUSPEND
4064static int init_iommu_hw(void)
4065{
4066 struct dmar_drhd_unit *drhd;
4067 struct intel_iommu *iommu = NULL;
4068
4069 for_each_active_iommu(iommu, drhd)
4070 if (iommu->qi)
4071 dmar_reenable_qi(iommu);
4072
b779260b
JC
4073 for_each_iommu(iommu, drhd) {
4074 if (drhd->ignored) {
4075 /*
4076 * we always have to disable PMRs or DMA may fail on
4077 * this device
4078 */
4079 if (force_on)
4080 iommu_disable_protect_mem_regions(iommu);
4081 continue;
4082 }
4083
f59c7b69
FY
4084 iommu_flush_write_buffer(iommu);
4085
4086 iommu_set_root_entry(iommu);
4087
4088 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4089 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4090 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4091 iommu_enable_translation(iommu);
b94996c9 4092 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4093 }
4094
4095 return 0;
4096}
4097
4098static void iommu_flush_all(void)
4099{
4100 struct dmar_drhd_unit *drhd;
4101 struct intel_iommu *iommu;
4102
4103 for_each_active_iommu(iommu, drhd) {
4104 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4105 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4106 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4107 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4108 }
4109}
4110
134fac3f 4111static int iommu_suspend(void)
f59c7b69
FY
4112{
4113 struct dmar_drhd_unit *drhd;
4114 struct intel_iommu *iommu = NULL;
4115 unsigned long flag;
4116
4117 for_each_active_iommu(iommu, drhd) {
6396bb22 4118 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4119 GFP_ATOMIC);
4120 if (!iommu->iommu_state)
4121 goto nomem;
4122 }
4123
4124 iommu_flush_all();
4125
4126 for_each_active_iommu(iommu, drhd) {
4127 iommu_disable_translation(iommu);
4128
1f5b3c3f 4129 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4130
4131 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4132 readl(iommu->reg + DMAR_FECTL_REG);
4133 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4134 readl(iommu->reg + DMAR_FEDATA_REG);
4135 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4136 readl(iommu->reg + DMAR_FEADDR_REG);
4137 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4138 readl(iommu->reg + DMAR_FEUADDR_REG);
4139
1f5b3c3f 4140 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4141 }
4142 return 0;
4143
4144nomem:
4145 for_each_active_iommu(iommu, drhd)
4146 kfree(iommu->iommu_state);
4147
4148 return -ENOMEM;
4149}
4150
134fac3f 4151static void iommu_resume(void)
f59c7b69
FY
4152{
4153 struct dmar_drhd_unit *drhd;
4154 struct intel_iommu *iommu = NULL;
4155 unsigned long flag;
4156
4157 if (init_iommu_hw()) {
b779260b
JC
4158 if (force_on)
4159 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4160 else
4161 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4162 return;
f59c7b69
FY
4163 }
4164
4165 for_each_active_iommu(iommu, drhd) {
4166
1f5b3c3f 4167 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4168
4169 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4170 iommu->reg + DMAR_FECTL_REG);
4171 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4172 iommu->reg + DMAR_FEDATA_REG);
4173 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4174 iommu->reg + DMAR_FEADDR_REG);
4175 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4176 iommu->reg + DMAR_FEUADDR_REG);
4177
1f5b3c3f 4178 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4179 }
4180
4181 for_each_active_iommu(iommu, drhd)
4182 kfree(iommu->iommu_state);
f59c7b69
FY
4183}
4184
134fac3f 4185static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4186 .resume = iommu_resume,
4187 .suspend = iommu_suspend,
4188};
4189
134fac3f 4190static void __init init_iommu_pm_ops(void)
f59c7b69 4191{
134fac3f 4192 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4193}
4194
4195#else
99592ba4 4196static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4197#endif /* CONFIG_PM */
4198
318fe7df 4199
c2a0b538 4200int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4201{
4202 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4203 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4204 struct dmar_rmrr_unit *rmrru;
0659b8dc 4205 size_t length;
318fe7df
SS
4206
4207 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4208 if (!rmrru)
0659b8dc 4209 goto out;
318fe7df
SS
4210
4211 rmrru->hdr = header;
4212 rmrr = (struct acpi_dmar_reserved_memory *)header;
4213 rmrru->base_address = rmrr->base_address;
4214 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4215
4216 length = rmrr->end_address - rmrr->base_address + 1;
4217 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4218 IOMMU_RESV_DIRECT);
4219 if (!rmrru->resv)
4220 goto free_rmrru;
4221
2e455289
JL
4222 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4223 ((void *)rmrr) + rmrr->header.length,
4224 &rmrru->devices_cnt);
0659b8dc
EA
4225 if (rmrru->devices_cnt && rmrru->devices == NULL)
4226 goto free_all;
318fe7df 4227
2e455289 4228 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4229
2e455289 4230 return 0;
0659b8dc
EA
4231free_all:
4232 kfree(rmrru->resv);
4233free_rmrru:
4234 kfree(rmrru);
4235out:
4236 return -ENOMEM;
318fe7df
SS
4237}
4238
6b197249
JL
4239static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4240{
4241 struct dmar_atsr_unit *atsru;
4242 struct acpi_dmar_atsr *tmp;
4243
4244 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4245 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4246 if (atsr->segment != tmp->segment)
4247 continue;
4248 if (atsr->header.length != tmp->header.length)
4249 continue;
4250 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4251 return atsru;
4252 }
4253
4254 return NULL;
4255}
4256
4257int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4258{
4259 struct acpi_dmar_atsr *atsr;
4260 struct dmar_atsr_unit *atsru;
4261
b608fe35 4262 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4263 return 0;
4264
318fe7df 4265 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4266 atsru = dmar_find_atsr(atsr);
4267 if (atsru)
4268 return 0;
4269
4270 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4271 if (!atsru)
4272 return -ENOMEM;
4273
6b197249
JL
4274 /*
4275 * If memory is allocated from slab by ACPI _DSM method, we need to
4276 * copy the memory content because the memory buffer will be freed
4277 * on return.
4278 */
4279 atsru->hdr = (void *)(atsru + 1);
4280 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4281 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4282 if (!atsru->include_all) {
4283 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4284 (void *)atsr + atsr->header.length,
4285 &atsru->devices_cnt);
4286 if (atsru->devices_cnt && atsru->devices == NULL) {
4287 kfree(atsru);
4288 return -ENOMEM;
4289 }
4290 }
318fe7df 4291
0e242612 4292 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4293
4294 return 0;
4295}
4296
9bdc531e
JL
4297static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4298{
4299 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4300 kfree(atsru);
4301}
4302
6b197249
JL
4303int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4304{
4305 struct acpi_dmar_atsr *atsr;
4306 struct dmar_atsr_unit *atsru;
4307
4308 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4309 atsru = dmar_find_atsr(atsr);
4310 if (atsru) {
4311 list_del_rcu(&atsru->list);
4312 synchronize_rcu();
4313 intel_iommu_free_atsr(atsru);
4314 }
4315
4316 return 0;
4317}
4318
4319int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4320{
4321 int i;
4322 struct device *dev;
4323 struct acpi_dmar_atsr *atsr;
4324 struct dmar_atsr_unit *atsru;
4325
4326 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4327 atsru = dmar_find_atsr(atsr);
4328 if (!atsru)
4329 return 0;
4330
194dc870 4331 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4332 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4333 i, dev)
4334 return -EBUSY;
194dc870 4335 }
6b197249
JL
4336
4337 return 0;
4338}
4339
ffebeb46
JL
4340static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4341{
e083ea5b 4342 int sp, ret;
ffebeb46
JL
4343 struct intel_iommu *iommu = dmaru->iommu;
4344
4345 if (g_iommus[iommu->seq_id])
4346 return 0;
4347
4348 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4349 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4350 iommu->name);
4351 return -ENXIO;
4352 }
4353 if (!ecap_sc_support(iommu->ecap) &&
4354 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4355 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4356 iommu->name);
4357 return -ENXIO;
4358 }
4359 sp = domain_update_iommu_superpage(iommu) - 1;
4360 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4361 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4362 iommu->name);
4363 return -ENXIO;
4364 }
4365
4366 /*
4367 * Disable translation if already enabled prior to OS handover.
4368 */
4369 if (iommu->gcmd & DMA_GCMD_TE)
4370 iommu_disable_translation(iommu);
4371
4372 g_iommus[iommu->seq_id] = iommu;
4373 ret = iommu_init_domains(iommu);
4374 if (ret == 0)
4375 ret = iommu_alloc_root_entry(iommu);
4376 if (ret)
4377 goto out;
4378
8a94ade4 4379#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4380 if (pasid_supported(iommu))
d9737953 4381 intel_svm_init(iommu);
8a94ade4
DW
4382#endif
4383
ffebeb46
JL
4384 if (dmaru->ignored) {
4385 /*
4386 * we always have to disable PMRs or DMA may fail on this device
4387 */
4388 if (force_on)
4389 iommu_disable_protect_mem_regions(iommu);
4390 return 0;
4391 }
4392
4393 intel_iommu_init_qi(iommu);
4394 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4395
4396#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4397 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4398 ret = intel_svm_enable_prq(iommu);
4399 if (ret)
4400 goto disable_iommu;
4401 }
4402#endif
ffebeb46
JL
4403 ret = dmar_set_interrupt(iommu);
4404 if (ret)
4405 goto disable_iommu;
4406
4407 iommu_set_root_entry(iommu);
4408 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4409 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4410 iommu_enable_translation(iommu);
4411
ffebeb46
JL
4412 iommu_disable_protect_mem_regions(iommu);
4413 return 0;
4414
4415disable_iommu:
4416 disable_dmar_iommu(iommu);
4417out:
4418 free_dmar_iommu(iommu);
4419 return ret;
4420}
4421
6b197249
JL
4422int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4423{
ffebeb46
JL
4424 int ret = 0;
4425 struct intel_iommu *iommu = dmaru->iommu;
4426
4427 if (!intel_iommu_enabled)
4428 return 0;
4429 if (iommu == NULL)
4430 return -EINVAL;
4431
4432 if (insert) {
4433 ret = intel_iommu_add(dmaru);
4434 } else {
4435 disable_dmar_iommu(iommu);
4436 free_dmar_iommu(iommu);
4437 }
4438
4439 return ret;
6b197249
JL
4440}
4441
9bdc531e
JL
4442static void intel_iommu_free_dmars(void)
4443{
4444 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4445 struct dmar_atsr_unit *atsru, *atsr_n;
4446
4447 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4448 list_del(&rmrru->list);
4449 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4450 kfree(rmrru->resv);
9bdc531e 4451 kfree(rmrru);
318fe7df
SS
4452 }
4453
9bdc531e
JL
4454 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4455 list_del(&atsru->list);
4456 intel_iommu_free_atsr(atsru);
4457 }
318fe7df
SS
4458}
4459
4460int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4461{
b683b230 4462 int i, ret = 1;
318fe7df 4463 struct pci_bus *bus;
832bd858
DW
4464 struct pci_dev *bridge = NULL;
4465 struct device *tmp;
318fe7df
SS
4466 struct acpi_dmar_atsr *atsr;
4467 struct dmar_atsr_unit *atsru;
4468
4469 dev = pci_physfn(dev);
318fe7df 4470 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4471 bridge = bus->self;
d14053b3
DW
4472 /* If it's an integrated device, allow ATS */
4473 if (!bridge)
4474 return 1;
4475 /* Connected via non-PCIe: no ATS */
4476 if (!pci_is_pcie(bridge) ||
62f87c0e 4477 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4478 return 0;
d14053b3 4479 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4480 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4481 break;
318fe7df
SS
4482 }
4483
0e242612 4484 rcu_read_lock();
b5f82ddf
JL
4485 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4486 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4487 if (atsr->segment != pci_domain_nr(dev->bus))
4488 continue;
4489
b683b230 4490 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4491 if (tmp == &bridge->dev)
b683b230 4492 goto out;
b5f82ddf
JL
4493
4494 if (atsru->include_all)
b683b230 4495 goto out;
b5f82ddf 4496 }
b683b230
JL
4497 ret = 0;
4498out:
0e242612 4499 rcu_read_unlock();
318fe7df 4500
b683b230 4501 return ret;
318fe7df
SS
4502}
4503
59ce0515
JL
4504int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4505{
e083ea5b 4506 int ret;
59ce0515
JL
4507 struct dmar_rmrr_unit *rmrru;
4508 struct dmar_atsr_unit *atsru;
4509 struct acpi_dmar_atsr *atsr;
4510 struct acpi_dmar_reserved_memory *rmrr;
4511
b608fe35 4512 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4513 return 0;
4514
4515 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4516 rmrr = container_of(rmrru->hdr,
4517 struct acpi_dmar_reserved_memory, header);
4518 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4519 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4520 ((void *)rmrr) + rmrr->header.length,
4521 rmrr->segment, rmrru->devices,
4522 rmrru->devices_cnt);
e083ea5b 4523 if (ret < 0)
59ce0515 4524 return ret;
e6a8c9b3 4525 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4526 dmar_remove_dev_scope(info, rmrr->segment,
4527 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4528 }
4529 }
4530
4531 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4532 if (atsru->include_all)
4533 continue;
4534
4535 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4536 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4537 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4538 (void *)atsr + atsr->header.length,
4539 atsr->segment, atsru->devices,
4540 atsru->devices_cnt);
4541 if (ret > 0)
4542 break;
e083ea5b 4543 else if (ret < 0)
59ce0515 4544 return ret;
e6a8c9b3 4545 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4546 if (dmar_remove_dev_scope(info, atsr->segment,
4547 atsru->devices, atsru->devices_cnt))
4548 break;
4549 }
4550 }
4551
4552 return 0;
4553}
4554
99dcaded
FY
4555/*
4556 * Here we only respond to action of unbound device from driver.
4557 *
4558 * Added device is not attached to its DMAR domain here yet. That will happen
4559 * when mapping the device to iova.
4560 */
4561static int device_notifier(struct notifier_block *nb,
4562 unsigned long action, void *data)
4563{
4564 struct device *dev = data;
99dcaded
FY
4565 struct dmar_domain *domain;
4566
3d89194a 4567 if (iommu_dummy(dev))
44cd613c
DW
4568 return 0;
4569
117266fd
LB
4570 if (action == BUS_NOTIFY_REMOVED_DEVICE) {
4571 domain = find_domain(dev);
4572 if (!domain)
4573 return 0;
99dcaded 4574
117266fd
LB
4575 dmar_remove_one_dev_info(dev);
4576 if (!domain_type_is_vm_or_si(domain) &&
4577 list_empty(&domain->devices))
4578 domain_exit(domain);
4579 } else if (action == BUS_NOTIFY_ADD_DEVICE) {
4580 if (iommu_should_identity_map(dev, 1))
4581 domain_add_dev_info(si_domain, dev);
4582 }
a97590e5 4583
99dcaded
FY
4584 return 0;
4585}
4586
4587static struct notifier_block device_nb = {
4588 .notifier_call = device_notifier,
4589};
4590
75f05569
JL
4591static int intel_iommu_memory_notifier(struct notifier_block *nb,
4592 unsigned long val, void *v)
4593{
4594 struct memory_notify *mhp = v;
4595 unsigned long long start, end;
4596 unsigned long start_vpfn, last_vpfn;
4597
4598 switch (val) {
4599 case MEM_GOING_ONLINE:
4600 start = mhp->start_pfn << PAGE_SHIFT;
4601 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4602 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4603 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4604 start, end);
4605 return NOTIFY_BAD;
4606 }
4607 break;
4608
4609 case MEM_OFFLINE:
4610 case MEM_CANCEL_ONLINE:
4611 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4612 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4613 while (start_vpfn <= last_vpfn) {
4614 struct iova *iova;
4615 struct dmar_drhd_unit *drhd;
4616 struct intel_iommu *iommu;
ea8ea460 4617 struct page *freelist;
75f05569
JL
4618
4619 iova = find_iova(&si_domain->iovad, start_vpfn);
4620 if (iova == NULL) {
9f10e5bf 4621 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4622 start_vpfn);
4623 break;
4624 }
4625
4626 iova = split_and_remove_iova(&si_domain->iovad, iova,
4627 start_vpfn, last_vpfn);
4628 if (iova == NULL) {
9f10e5bf 4629 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4630 start_vpfn, last_vpfn);
4631 return NOTIFY_BAD;
4632 }
4633
ea8ea460
DW
4634 freelist = domain_unmap(si_domain, iova->pfn_lo,
4635 iova->pfn_hi);
4636
75f05569
JL
4637 rcu_read_lock();
4638 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4639 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4640 iova->pfn_lo, iova_size(iova),
ea8ea460 4641 !freelist, 0);
75f05569 4642 rcu_read_unlock();
ea8ea460 4643 dma_free_pagelist(freelist);
75f05569
JL
4644
4645 start_vpfn = iova->pfn_hi + 1;
4646 free_iova_mem(iova);
4647 }
4648 break;
4649 }
4650
4651 return NOTIFY_OK;
4652}
4653
4654static struct notifier_block intel_iommu_memory_nb = {
4655 .notifier_call = intel_iommu_memory_notifier,
4656 .priority = 0
4657};
4658
22e2f9fa
OP
4659static void free_all_cpu_cached_iovas(unsigned int cpu)
4660{
4661 int i;
4662
4663 for (i = 0; i < g_num_of_iommus; i++) {
4664 struct intel_iommu *iommu = g_iommus[i];
4665 struct dmar_domain *domain;
0caa7616 4666 int did;
22e2f9fa
OP
4667
4668 if (!iommu)
4669 continue;
4670
3bd4f911 4671 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4672 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4673
4674 if (!domain)
4675 continue;
4676 free_cpu_cached_iovas(cpu, &domain->iovad);
4677 }
4678 }
4679}
4680
21647615 4681static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4682{
21647615 4683 free_all_cpu_cached_iovas(cpu);
21647615 4684 return 0;
aa473240
OP
4685}
4686
161b28aa
JR
4687static void intel_disable_iommus(void)
4688{
4689 struct intel_iommu *iommu = NULL;
4690 struct dmar_drhd_unit *drhd;
4691
4692 for_each_iommu(iommu, drhd)
4693 iommu_disable_translation(iommu);
4694}
4695
a7fdb6e6
JR
4696static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4697{
2926a2aa
JR
4698 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4699
4700 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4701}
4702
a5459cfe
AW
4703static ssize_t intel_iommu_show_version(struct device *dev,
4704 struct device_attribute *attr,
4705 char *buf)
4706{
a7fdb6e6 4707 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4708 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4709 return sprintf(buf, "%d:%d\n",
4710 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4711}
4712static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4713
4714static ssize_t intel_iommu_show_address(struct device *dev,
4715 struct device_attribute *attr,
4716 char *buf)
4717{
a7fdb6e6 4718 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4719 return sprintf(buf, "%llx\n", iommu->reg_phys);
4720}
4721static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4722
4723static ssize_t intel_iommu_show_cap(struct device *dev,
4724 struct device_attribute *attr,
4725 char *buf)
4726{
a7fdb6e6 4727 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4728 return sprintf(buf, "%llx\n", iommu->cap);
4729}
4730static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4731
4732static ssize_t intel_iommu_show_ecap(struct device *dev,
4733 struct device_attribute *attr,
4734 char *buf)
4735{
a7fdb6e6 4736 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4737 return sprintf(buf, "%llx\n", iommu->ecap);
4738}
4739static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4740
2238c082
AW
4741static ssize_t intel_iommu_show_ndoms(struct device *dev,
4742 struct device_attribute *attr,
4743 char *buf)
4744{
a7fdb6e6 4745 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4746 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4747}
4748static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4749
4750static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4751 struct device_attribute *attr,
4752 char *buf)
4753{
a7fdb6e6 4754 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4755 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4756 cap_ndoms(iommu->cap)));
4757}
4758static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4759
a5459cfe
AW
4760static struct attribute *intel_iommu_attrs[] = {
4761 &dev_attr_version.attr,
4762 &dev_attr_address.attr,
4763 &dev_attr_cap.attr,
4764 &dev_attr_ecap.attr,
2238c082
AW
4765 &dev_attr_domains_supported.attr,
4766 &dev_attr_domains_used.attr,
a5459cfe
AW
4767 NULL,
4768};
4769
4770static struct attribute_group intel_iommu_group = {
4771 .name = "intel-iommu",
4772 .attrs = intel_iommu_attrs,
4773};
4774
4775const struct attribute_group *intel_iommu_groups[] = {
4776 &intel_iommu_group,
4777 NULL,
4778};
4779
89a6079d
LB
4780static int __init platform_optin_force_iommu(void)
4781{
4782 struct pci_dev *pdev = NULL;
4783 bool has_untrusted_dev = false;
4784
4785 if (!dmar_platform_optin() || no_platform_optin)
4786 return 0;
4787
4788 for_each_pci_dev(pdev) {
4789 if (pdev->untrusted) {
4790 has_untrusted_dev = true;
4791 break;
4792 }
4793 }
4794
4795 if (!has_untrusted_dev)
4796 return 0;
4797
4798 if (no_iommu || dmar_disabled)
4799 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4800
4801 /*
4802 * If Intel-IOMMU is disabled by default, we will apply identity
4803 * map for all devices except those marked as being untrusted.
4804 */
4805 if (dmar_disabled)
4806 iommu_identity_mapping |= IDENTMAP_ALL;
4807
4808 dmar_disabled = 0;
4809#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4810 swiotlb = 0;
4811#endif
4812 no_iommu = 0;
4813
4814 return 1;
4815}
4816
ba395927
KA
4817int __init intel_iommu_init(void)
4818{
9bdc531e 4819 int ret = -ENODEV;
3a93c841 4820 struct dmar_drhd_unit *drhd;
7c919779 4821 struct intel_iommu *iommu;
ba395927 4822
89a6079d
LB
4823 /*
4824 * Intel IOMMU is required for a TXT/tboot launch or platform
4825 * opt in, so enforce that.
4826 */
4827 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4828
3a5670e8
JL
4829 if (iommu_init_mempool()) {
4830 if (force_on)
4831 panic("tboot: Failed to initialize iommu memory\n");
4832 return -ENOMEM;
4833 }
4834
4835 down_write(&dmar_global_lock);
a59b50e9
JC
4836 if (dmar_table_init()) {
4837 if (force_on)
4838 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4839 goto out_free_dmar;
a59b50e9 4840 }
ba395927 4841
c2c7286a 4842 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4843 if (force_on)
4844 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4845 goto out_free_dmar;
a59b50e9 4846 }
1886e8a9 4847
ec154bf5
JR
4848 up_write(&dmar_global_lock);
4849
4850 /*
4851 * The bus notifier takes the dmar_global_lock, so lockdep will
4852 * complain later when we register it under the lock.
4853 */
4854 dmar_register_bus_notifier();
4855
4856 down_write(&dmar_global_lock);
4857
161b28aa 4858 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4859 /*
4860 * We exit the function here to ensure IOMMU's remapping and
4861 * mempool aren't setup, which means that the IOMMU's PMRs
4862 * won't be disabled via the call to init_dmars(). So disable
4863 * it explicitly here. The PMRs were setup by tboot prior to
4864 * calling SENTER, but the kernel is expected to reset/tear
4865 * down the PMRs.
4866 */
4867 if (intel_iommu_tboot_noforce) {
4868 for_each_iommu(iommu, drhd)
4869 iommu_disable_protect_mem_regions(iommu);
4870 }
4871
161b28aa
JR
4872 /*
4873 * Make sure the IOMMUs are switched off, even when we
4874 * boot into a kexec kernel and the previous kernel left
4875 * them enabled
4876 */
4877 intel_disable_iommus();
9bdc531e 4878 goto out_free_dmar;
161b28aa 4879 }
2ae21010 4880
318fe7df 4881 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4882 pr_info("No RMRR found\n");
318fe7df
SS
4883
4884 if (list_empty(&dmar_atsr_units))
9f10e5bf 4885 pr_info("No ATSR found\n");
318fe7df 4886
51a63e67
JC
4887 if (dmar_init_reserved_ranges()) {
4888 if (force_on)
4889 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4890 goto out_free_reserved_range;
51a63e67 4891 }
ba395927
KA
4892
4893 init_no_remapping_devices();
4894
b779260b 4895 ret = init_dmars();
ba395927 4896 if (ret) {
a59b50e9
JC
4897 if (force_on)
4898 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4899 pr_err("Initialization failed\n");
9bdc531e 4900 goto out_free_reserved_range;
ba395927 4901 }
3a5670e8 4902 up_write(&dmar_global_lock);
9f10e5bf 4903 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4904
4fac8076 4905#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4906 swiotlb = 0;
4907#endif
19943b0e 4908 dma_ops = &intel_dma_ops;
4ed0d3e6 4909
134fac3f 4910 init_iommu_pm_ops();
a8bcbb0d 4911
39ab9555
JR
4912 for_each_active_iommu(iommu, drhd) {
4913 iommu_device_sysfs_add(&iommu->iommu, NULL,
4914 intel_iommu_groups,
4915 "%s", iommu->name);
4916 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4917 iommu_device_register(&iommu->iommu);
4918 }
a5459cfe 4919
4236d97d 4920 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4921 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4922 if (si_domain && !hw_pass_through)
4923 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4924 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4925 intel_iommu_cpu_dead);
8bc1f85c 4926 intel_iommu_enabled = 1;
ee2636b8 4927 intel_iommu_debugfs_init();
8bc1f85c 4928
ba395927 4929 return 0;
9bdc531e
JL
4930
4931out_free_reserved_range:
4932 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4933out_free_dmar:
4934 intel_iommu_free_dmars();
3a5670e8
JL
4935 up_write(&dmar_global_lock);
4936 iommu_exit_mempool();
9bdc531e 4937 return ret;
ba395927 4938}
e820482c 4939
2452d9db 4940static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4941{
4942 struct intel_iommu *iommu = opaque;
4943
2452d9db 4944 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4945 return 0;
4946}
4947
4948/*
4949 * NB - intel-iommu lacks any sort of reference counting for the users of
4950 * dependent devices. If multiple endpoints have intersecting dependent
4951 * devices, unbinding the driver from any one of them will possibly leave
4952 * the others unable to operate.
4953 */
2452d9db 4954static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4955{
0bcb3e28 4956 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4957 return;
4958
2452d9db 4959 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4960}
4961
127c7615 4962static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4963{
c7151a8d
WH
4964 struct intel_iommu *iommu;
4965 unsigned long flags;
c7151a8d 4966
55d94043
JR
4967 assert_spin_locked(&device_domain_lock);
4968
127c7615 4969 if (WARN_ON(!info))
c7151a8d
WH
4970 return;
4971
127c7615 4972 iommu = info->iommu;
c7151a8d 4973
127c7615 4974 if (info->dev) {
ef848b7e
LB
4975 if (dev_is_pci(info->dev) && sm_supported(iommu))
4976 intel_pasid_tear_down_entry(iommu, info->dev,
4977 PASID_RID2PASID);
4978
127c7615
JR
4979 iommu_disable_dev_iotlb(info);
4980 domain_context_clear(iommu, info->dev);
a7fc93fe 4981 intel_pasid_free_table(info->dev);
127c7615 4982 }
c7151a8d 4983
b608ac3b 4984 unlink_domain_info(info);
c7151a8d 4985
d160aca5 4986 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4987 domain_detach_iommu(info->domain, iommu);
d160aca5 4988 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4989
127c7615 4990 free_devinfo_mem(info);
c7151a8d 4991}
c7151a8d 4992
71753239 4993static void dmar_remove_one_dev_info(struct device *dev)
55d94043 4994{
127c7615 4995 struct device_domain_info *info;
55d94043 4996 unsigned long flags;
3e7abe25 4997
55d94043 4998 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4999 info = dev->archdata.iommu;
5000 __dmar_remove_one_dev_info(info);
55d94043 5001 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5002}
5003
2c2e2c38 5004static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
5005{
5006 int adjust_width;
5007
aa3ac946 5008 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
5009 domain_reserve_special_ranges(domain);
5010
5011 /* calculate AGAW */
5012 domain->gaw = guest_width;
5013 adjust_width = guestwidth_to_adjustwidth(guest_width);
5014 domain->agaw = width_to_agaw(adjust_width);
5015
5e98c4b1 5016 domain->iommu_coherency = 0;
c5b15255 5017 domain->iommu_snooping = 0;
6dd9a7c7 5018 domain->iommu_superpage = 0;
fe40f1e0 5019 domain->max_addr = 0;
5e98c4b1
WH
5020
5021 /* always allocate the top pgd */
4c923d47 5022 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
5023 if (!domain->pgd)
5024 return -ENOMEM;
5025 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5026 return 0;
5027}
5028
00a77deb 5029static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5030{
5d450806 5031 struct dmar_domain *dmar_domain;
00a77deb
JR
5032 struct iommu_domain *domain;
5033
5034 if (type != IOMMU_DOMAIN_UNMANAGED)
5035 return NULL;
38717946 5036
ab8dfe25 5037 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 5038 if (!dmar_domain) {
9f10e5bf 5039 pr_err("Can't allocate dmar_domain\n");
00a77deb 5040 return NULL;
38717946 5041 }
2c2e2c38 5042 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 5043 pr_err("Domain initialization failed\n");
92d03cc8 5044 domain_exit(dmar_domain);
00a77deb 5045 return NULL;
38717946 5046 }
8140a95d 5047 domain_update_iommu_cap(dmar_domain);
faa3d6f5 5048
00a77deb 5049 domain = &dmar_domain->domain;
8a0e715b
JR
5050 domain->geometry.aperture_start = 0;
5051 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5052 domain->geometry.force_aperture = true;
5053
00a77deb 5054 return domain;
38717946 5055}
38717946 5056
00a77deb 5057static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5058{
00a77deb 5059 domain_exit(to_dmar_domain(domain));
38717946 5060}
38717946 5061
4c5478c9
JR
5062static int intel_iommu_attach_device(struct iommu_domain *domain,
5063 struct device *dev)
38717946 5064{
00a77deb 5065 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5066 struct intel_iommu *iommu;
5067 int addr_width;
156baca8 5068 u8 bus, devfn;
faa3d6f5 5069
c875d2c1
AW
5070 if (device_is_rmrr_locked(dev)) {
5071 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5072 return -EPERM;
5073 }
5074
7207d8f9
DW
5075 /* normally dev is not mapped */
5076 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
5077 struct dmar_domain *old_domain;
5078
1525a29a 5079 old_domain = find_domain(dev);
faa3d6f5 5080 if (old_domain) {
d160aca5 5081 rcu_read_lock();
71753239 5082 dmar_remove_one_dev_info(dev);
d160aca5 5083 rcu_read_unlock();
62c22167
JR
5084
5085 if (!domain_type_is_vm_or_si(old_domain) &&
5086 list_empty(&old_domain->devices))
5087 domain_exit(old_domain);
faa3d6f5
WH
5088 }
5089 }
5090
156baca8 5091 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5092 if (!iommu)
5093 return -ENODEV;
5094
5095 /* check if this iommu agaw is sufficient for max mapped address */
5096 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5097 if (addr_width > cap_mgaw(iommu->cap))
5098 addr_width = cap_mgaw(iommu->cap);
5099
5100 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5101 dev_err(dev, "%s: iommu width (%d) is not "
5102 "sufficient for the mapped address (%llx)\n",
5103 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5104 return -EFAULT;
5105 }
a99c47a2
TL
5106 dmar_domain->gaw = addr_width;
5107
5108 /*
5109 * Knock out extra levels of page tables if necessary
5110 */
5111 while (iommu->agaw < dmar_domain->agaw) {
5112 struct dma_pte *pte;
5113
5114 pte = dmar_domain->pgd;
5115 if (dma_pte_present(pte)) {
25cbff16
SY
5116 dmar_domain->pgd = (struct dma_pte *)
5117 phys_to_virt(dma_pte_addr(pte));
7a661013 5118 free_pgtable_page(pte);
a99c47a2
TL
5119 }
5120 dmar_domain->agaw--;
5121 }
fe40f1e0 5122
28ccce0d 5123 return domain_add_dev_info(dmar_domain, dev);
38717946 5124}
38717946 5125
4c5478c9
JR
5126static void intel_iommu_detach_device(struct iommu_domain *domain,
5127 struct device *dev)
38717946 5128{
71753239 5129 dmar_remove_one_dev_info(dev);
faa3d6f5 5130}
c7151a8d 5131
b146a1c9
JR
5132static int intel_iommu_map(struct iommu_domain *domain,
5133 unsigned long iova, phys_addr_t hpa,
5009065d 5134 size_t size, int iommu_prot)
faa3d6f5 5135{
00a77deb 5136 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5137 u64 max_addr;
dde57a21 5138 int prot = 0;
faa3d6f5 5139 int ret;
fe40f1e0 5140
dde57a21
JR
5141 if (iommu_prot & IOMMU_READ)
5142 prot |= DMA_PTE_READ;
5143 if (iommu_prot & IOMMU_WRITE)
5144 prot |= DMA_PTE_WRITE;
9cf06697
SY
5145 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5146 prot |= DMA_PTE_SNP;
dde57a21 5147
163cc52c 5148 max_addr = iova + size;
dde57a21 5149 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5150 u64 end;
5151
5152 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5153 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5154 if (end < max_addr) {
9f10e5bf 5155 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5156 "sufficient for the mapped address (%llx)\n",
8954da1f 5157 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5158 return -EFAULT;
5159 }
dde57a21 5160 dmar_domain->max_addr = max_addr;
fe40f1e0 5161 }
ad051221
DW
5162 /* Round up size to next multiple of PAGE_SIZE, if it and
5163 the low bits of hpa would take us onto the next page */
88cb6a74 5164 size = aligned_nrpages(hpa, size);
ad051221
DW
5165 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5166 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5167 return ret;
38717946 5168}
38717946 5169
5009065d 5170static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5171 unsigned long iova, size_t size)
38717946 5172{
00a77deb 5173 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5174 struct page *freelist = NULL;
ea8ea460
DW
5175 unsigned long start_pfn, last_pfn;
5176 unsigned int npages;
42e8c186 5177 int iommu_id, level = 0;
5cf0a76f
DW
5178
5179 /* Cope with horrid API which requires us to unmap more than the
5180 size argument if it happens to be a large-page mapping. */
dc02e46e 5181 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5182
5183 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5184 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5185
ea8ea460
DW
5186 start_pfn = iova >> VTD_PAGE_SHIFT;
5187 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5188
5189 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5190
5191 npages = last_pfn - start_pfn + 1;
5192
f746a025 5193 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5194 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5195 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5196
5197 dma_free_pagelist(freelist);
fe40f1e0 5198
163cc52c
DW
5199 if (dmar_domain->max_addr == iova + size)
5200 dmar_domain->max_addr = iova;
b146a1c9 5201
5cf0a76f 5202 return size;
38717946 5203}
38717946 5204
d14d6577 5205static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5206 dma_addr_t iova)
38717946 5207{
00a77deb 5208 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5209 struct dma_pte *pte;
5cf0a76f 5210 int level = 0;
faa3d6f5 5211 u64 phys = 0;
38717946 5212
5cf0a76f 5213 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5214 if (pte)
faa3d6f5 5215 phys = dma_pte_addr(pte);
38717946 5216
faa3d6f5 5217 return phys;
38717946 5218}
a8bcbb0d 5219
5d587b8d 5220static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5221{
dbb9fd86 5222 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5223 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5224 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5225 return irq_remapping_enabled == 1;
dbb9fd86 5226
5d587b8d 5227 return false;
dbb9fd86
SY
5228}
5229
abdfdde2
AW
5230static int intel_iommu_add_device(struct device *dev)
5231{
a5459cfe 5232 struct intel_iommu *iommu;
abdfdde2 5233 struct iommu_group *group;
156baca8 5234 u8 bus, devfn;
70ae6f0d 5235
a5459cfe
AW
5236 iommu = device_to_iommu(dev, &bus, &devfn);
5237 if (!iommu)
70ae6f0d
AW
5238 return -ENODEV;
5239
e3d10af1 5240 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5241
e17f9ff4 5242 group = iommu_group_get_for_dev(dev);
783f157b 5243
e17f9ff4
AW
5244 if (IS_ERR(group))
5245 return PTR_ERR(group);
bcb71abe 5246
abdfdde2 5247 iommu_group_put(group);
e17f9ff4 5248 return 0;
abdfdde2 5249}
70ae6f0d 5250
abdfdde2
AW
5251static void intel_iommu_remove_device(struct device *dev)
5252{
a5459cfe
AW
5253 struct intel_iommu *iommu;
5254 u8 bus, devfn;
5255
5256 iommu = device_to_iommu(dev, &bus, &devfn);
5257 if (!iommu)
5258 return;
5259
abdfdde2 5260 iommu_group_remove_device(dev);
a5459cfe 5261
e3d10af1 5262 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5263}
5264
0659b8dc
EA
5265static void intel_iommu_get_resv_regions(struct device *device,
5266 struct list_head *head)
5267{
5268 struct iommu_resv_region *reg;
5269 struct dmar_rmrr_unit *rmrr;
5270 struct device *i_dev;
5271 int i;
5272
5273 rcu_read_lock();
5274 for_each_rmrr_units(rmrr) {
5275 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5276 i, i_dev) {
5277 if (i_dev != device)
5278 continue;
5279
5280 list_add_tail(&rmrr->resv->list, head);
5281 }
5282 }
5283 rcu_read_unlock();
5284
5285 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5286 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5287 0, IOMMU_RESV_MSI);
0659b8dc
EA
5288 if (!reg)
5289 return;
5290 list_add_tail(&reg->list, head);
5291}
5292
5293static void intel_iommu_put_resv_regions(struct device *dev,
5294 struct list_head *head)
5295{
5296 struct iommu_resv_region *entry, *next;
5297
5298 list_for_each_entry_safe(entry, next, head, list) {
5299 if (entry->type == IOMMU_RESV_RESERVED)
5300 kfree(entry);
5301 }
70ae6f0d
AW
5302}
5303
2f26e0a9
DW
5304#ifdef CONFIG_INTEL_IOMMU_SVM
5305int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5306{
5307 struct device_domain_info *info;
5308 struct context_entry *context;
5309 struct dmar_domain *domain;
5310 unsigned long flags;
5311 u64 ctx_lo;
5312 int ret;
5313
5314 domain = get_valid_domain_for_dev(sdev->dev);
5315 if (!domain)
5316 return -EINVAL;
5317
5318 spin_lock_irqsave(&device_domain_lock, flags);
5319 spin_lock(&iommu->lock);
5320
5321 ret = -EINVAL;
5322 info = sdev->dev->archdata.iommu;
5323 if (!info || !info->pasid_supported)
5324 goto out;
5325
5326 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5327 if (WARN_ON(!context))
5328 goto out;
5329
5330 ctx_lo = context[0].lo;
5331
5332 sdev->did = domain->iommu_did[iommu->seq_id];
5333 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5334
5335 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5336 ctx_lo |= CONTEXT_PASIDE;
5337 context[0].lo = ctx_lo;
5338 wmb();
5339 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5340 DMA_CCMD_MASK_NOBIT,
5341 DMA_CCMD_DEVICE_INVL);
5342 }
5343
5344 /* Enable PASID support in the device, if it wasn't already */
5345 if (!info->pasid_enabled)
5346 iommu_enable_dev_iotlb(info);
5347
5348 if (info->ats_enabled) {
5349 sdev->dev_iotlb = 1;
5350 sdev->qdep = info->ats_qdep;
5351 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5352 sdev->qdep = 0;
5353 }
5354 ret = 0;
5355
5356 out:
5357 spin_unlock(&iommu->lock);
5358 spin_unlock_irqrestore(&device_domain_lock, flags);
5359
5360 return ret;
5361}
5362
5363struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5364{
5365 struct intel_iommu *iommu;
5366 u8 bus, devfn;
5367
5368 if (iommu_dummy(dev)) {
5369 dev_warn(dev,
5370 "No IOMMU translation for device; cannot enable SVM\n");
5371 return NULL;
5372 }
5373
5374 iommu = device_to_iommu(dev, &bus, &devfn);
5375 if ((!iommu)) {
b9997e38 5376 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5377 return NULL;
5378 }
5379
2f26e0a9
DW
5380 return iommu;
5381}
5382#endif /* CONFIG_INTEL_IOMMU_SVM */
5383
b0119e87 5384const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5385 .capable = intel_iommu_capable,
5386 .domain_alloc = intel_iommu_domain_alloc,
5387 .domain_free = intel_iommu_domain_free,
5388 .attach_dev = intel_iommu_attach_device,
5389 .detach_dev = intel_iommu_detach_device,
5390 .map = intel_iommu_map,
5391 .unmap = intel_iommu_unmap,
0659b8dc
EA
5392 .iova_to_phys = intel_iommu_iova_to_phys,
5393 .add_device = intel_iommu_add_device,
5394 .remove_device = intel_iommu_remove_device,
5395 .get_resv_regions = intel_iommu_get_resv_regions,
5396 .put_resv_regions = intel_iommu_put_resv_regions,
5397 .device_group = pci_device_group,
5398 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5399};
9af88143 5400
9452618e
DV
5401static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5402{
5403 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5404 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5405 dmar_map_gfx = 0;
5406}
5407
5408DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5409DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5410DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5411DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5412DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5413DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5414DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5415
d34d6517 5416static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5417{
5418 /*
5419 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5420 * but needs it. Same seems to hold for the desktop versions.
9af88143 5421 */
932a6523 5422 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5423 rwbf_quirk = 1;
5424}
5425
5426DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5427DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5428DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5429DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5430DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5431DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5432DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5433
eecfd57f
AJ
5434#define GGC 0x52
5435#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5436#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5437#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5438#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5439#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5440#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5441#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5442#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5443
d34d6517 5444static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5445{
5446 unsigned short ggc;
5447
eecfd57f 5448 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5449 return;
5450
eecfd57f 5451 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5452 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5453 dmar_map_gfx = 0;
6fbcfb3e
DW
5454 } else if (dmar_map_gfx) {
5455 /* we have to ensure the gfx device is idle before we flush */
932a6523 5456 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5457 intel_iommu_strict = 1;
5458 }
9eecabcb
DW
5459}
5460DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5461DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5462DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5463DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5464
e0fc7e0b
DW
5465/* On Tylersburg chipsets, some BIOSes have been known to enable the
5466 ISOCH DMAR unit for the Azalia sound device, but not give it any
5467 TLB entries, which causes it to deadlock. Check for that. We do
5468 this in a function called from init_dmars(), instead of in a PCI
5469 quirk, because we don't want to print the obnoxious "BIOS broken"
5470 message if VT-d is actually disabled.
5471*/
5472static void __init check_tylersburg_isoch(void)
5473{
5474 struct pci_dev *pdev;
5475 uint32_t vtisochctrl;
5476
5477 /* If there's no Azalia in the system anyway, forget it. */
5478 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5479 if (!pdev)
5480 return;
5481 pci_dev_put(pdev);
5482
5483 /* System Management Registers. Might be hidden, in which case
5484 we can't do the sanity check. But that's OK, because the
5485 known-broken BIOSes _don't_ actually hide it, so far. */
5486 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5487 if (!pdev)
5488 return;
5489
5490 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5491 pci_dev_put(pdev);
5492 return;
5493 }
5494
5495 pci_dev_put(pdev);
5496
5497 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5498 if (vtisochctrl & 1)
5499 return;
5500
5501 /* Drop all bits other than the number of TLB entries */
5502 vtisochctrl &= 0x1c;
5503
5504 /* If we have the recommended number of TLB entries (16), fine. */
5505 if (vtisochctrl == 0x10)
5506 return;
5507
5508 /* Zero TLB entries? You get to ride the short bus to school. */
5509 if (!vtisochctrl) {
5510 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5511 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5512 dmi_get_system_info(DMI_BIOS_VENDOR),
5513 dmi_get_system_info(DMI_BIOS_VERSION),
5514 dmi_get_system_info(DMI_PRODUCT_VERSION));
5515 iommu_identity_mapping |= IDENTMAP_AZALIA;
5516 return;
5517 }
9f10e5bf
JR
5518
5519 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5520 vtisochctrl);
5521}