]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/iova: Extend rbtree node caching
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
aa473240 36#include <linux/cpu.h>
5e0d2a6f 37#include <linux/timer.h>
dfddb969 38#include <linux/io.h>
38717946 39#include <linux/iova.h>
5d450806 40#include <linux/iommu.h>
38717946 41#include <linux/intel-iommu.h>
134fac3f 42#include <linux/syscore_ops.h>
69575d38 43#include <linux/tboot.h>
adb2fe02 44#include <linux/dmi.h>
5cdede24 45#include <linux/pci-ats.h>
0ee332c1 46#include <linux/memblock.h>
36746436 47#include <linux/dma-contiguous.h>
091d42e4 48#include <linux/crash_dump.h>
8a8f422d 49#include <asm/irq_remapping.h>
ba395927 50#include <asm/cacheflush.h>
46a7fa27 51#include <asm/iommu.h>
ba395927 52
078e1ee2
JR
53#include "irq_remapping.h"
54
5b6985ce
FY
55#define ROOT_SIZE VTD_PAGE_SIZE
56#define CONTEXT_SIZE VTD_PAGE_SIZE
57
ba395927 58#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 59#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 60#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 61#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
62
63#define IOAPIC_RANGE_START (0xfee00000)
64#define IOAPIC_RANGE_END (0xfeefffff)
65#define IOVA_START_ADDR (0x1000)
66
67#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
68
4ed0d3e6 69#define MAX_AGAW_WIDTH 64
5c645b35 70#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 71
2ebe3151
DW
72#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
73#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
74
75/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
76 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
77#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
78 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
79#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 80
1b722500
RM
81/* IO virtual address start page frame number */
82#define IOVA_START_PFN (1)
83
f27be03b 84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 85#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 86#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 87
df08cdc7
AM
88/* page table handling */
89#define LEVEL_STRIDE (9)
90#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
6d1c56a9
OBC
92/*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
df08cdc7
AM
110static inline int agaw_to_level(int agaw)
111{
112 return agaw + 2;
113}
114
115static inline int agaw_to_width(int agaw)
116{
5c645b35 117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
118}
119
120static inline int width_to_agaw(int width)
121{
5c645b35 122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
123}
124
125static inline unsigned int level_to_offset_bits(int level)
126{
127 return (level - 1) * LEVEL_STRIDE;
128}
129
130static inline int pfn_level_offset(unsigned long pfn, int level)
131{
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133}
134
135static inline unsigned long level_mask(int level)
136{
137 return -1UL << level_to_offset_bits(level);
138}
139
140static inline unsigned long level_size(int level)
141{
142 return 1UL << level_to_offset_bits(level);
143}
144
145static inline unsigned long align_to_level(unsigned long pfn, int level)
146{
147 return (pfn + level_size(level) - 1) & level_mask(level);
148}
fd18de50 149
6dd9a7c7
YS
150static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151{
5c645b35 152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
153}
154
dd4e8319
DW
155/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158{
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160}
161
162static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163{
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165}
166static inline unsigned long page_to_dma_pfn(struct page *pg)
167{
168 return mm_to_dma_pfn(page_to_pfn(pg));
169}
170static inline unsigned long virt_to_dma_pfn(void *p)
171{
172 return page_to_dma_pfn(virt_to_page(p));
173}
174
d9630fe9
WH
175/* global iommu list, set NULL for ignored DMAR units */
176static struct intel_iommu **g_iommus;
177
e0fc7e0b 178static void __init check_tylersburg_isoch(void);
9af88143
DW
179static int rwbf_quirk;
180
b779260b
JC
181/*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185static int force_on = 0;
bfd20f1c 186int intel_iommu_tboot_noforce;
b779260b 187
46b08e1a
MM
188/*
189 * 0: Present
190 * 1-11: Reserved
191 * 12-63: Context Ptr (12 - (haw-1))
192 * 64-127: Reserved
193 */
194struct root_entry {
03ecc32c
DW
195 u64 lo;
196 u64 hi;
46b08e1a
MM
197};
198#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 199
091d42e4
JR
200/*
201 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
202 * if marked present.
203 */
204static phys_addr_t root_entry_lctp(struct root_entry *re)
205{
206 if (!(re->lo & 1))
207 return 0;
208
209 return re->lo & VTD_PAGE_MASK;
210}
211
212/*
213 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
214 * if marked present.
215 */
216static phys_addr_t root_entry_uctp(struct root_entry *re)
217{
218 if (!(re->hi & 1))
219 return 0;
46b08e1a 220
091d42e4
JR
221 return re->hi & VTD_PAGE_MASK;
222}
7a8fc25e
MM
223/*
224 * low 64 bits:
225 * 0: present
226 * 1: fault processing disable
227 * 2-3: translation type
228 * 12-63: address space root
229 * high 64 bits:
230 * 0-2: address width
231 * 3-6: aval
232 * 8-23: domain id
233 */
234struct context_entry {
235 u64 lo;
236 u64 hi;
237};
c07e7d21 238
cf484d0e
JR
239static inline void context_clear_pasid_enable(struct context_entry *context)
240{
241 context->lo &= ~(1ULL << 11);
242}
243
244static inline bool context_pasid_enabled(struct context_entry *context)
245{
246 return !!(context->lo & (1ULL << 11));
247}
248
249static inline void context_set_copied(struct context_entry *context)
250{
251 context->hi |= (1ull << 3);
252}
253
254static inline bool context_copied(struct context_entry *context)
255{
256 return !!(context->hi & (1ULL << 3));
257}
258
259static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
260{
261 return (context->lo & 1);
262}
cf484d0e
JR
263
264static inline bool context_present(struct context_entry *context)
265{
266 return context_pasid_enabled(context) ?
267 __context_present(context) :
268 __context_present(context) && !context_copied(context);
269}
270
c07e7d21
MM
271static inline void context_set_present(struct context_entry *context)
272{
273 context->lo |= 1;
274}
275
276static inline void context_set_fault_enable(struct context_entry *context)
277{
278 context->lo &= (((u64)-1) << 2) | 1;
279}
280
c07e7d21
MM
281static inline void context_set_translation_type(struct context_entry *context,
282 unsigned long value)
283{
284 context->lo &= (((u64)-1) << 4) | 3;
285 context->lo |= (value & 3) << 2;
286}
287
288static inline void context_set_address_root(struct context_entry *context,
289 unsigned long value)
290{
1a2262f9 291 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
292 context->lo |= value & VTD_PAGE_MASK;
293}
294
295static inline void context_set_address_width(struct context_entry *context,
296 unsigned long value)
297{
298 context->hi |= value & 7;
299}
300
301static inline void context_set_domain_id(struct context_entry *context,
302 unsigned long value)
303{
304 context->hi |= (value & ((1 << 16) - 1)) << 8;
305}
306
dbcd861f
JR
307static inline int context_domain_id(struct context_entry *c)
308{
309 return((c->hi >> 8) & 0xffff);
310}
311
c07e7d21
MM
312static inline void context_clear_entry(struct context_entry *context)
313{
314 context->lo = 0;
315 context->hi = 0;
316}
7a8fc25e 317
622ba12a
MM
318/*
319 * 0: readable
320 * 1: writable
321 * 2-6: reserved
322 * 7: super page
9cf06697
SY
323 * 8-10: available
324 * 11: snoop behavior
622ba12a
MM
325 * 12-63: Host physcial address
326 */
327struct dma_pte {
328 u64 val;
329};
622ba12a 330
19c239ce
MM
331static inline void dma_clear_pte(struct dma_pte *pte)
332{
333 pte->val = 0;
334}
335
19c239ce
MM
336static inline u64 dma_pte_addr(struct dma_pte *pte)
337{
c85994e4
DW
338#ifdef CONFIG_64BIT
339 return pte->val & VTD_PAGE_MASK;
340#else
341 /* Must have a full atomic 64-bit read */
1a8bd481 342 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 343#endif
19c239ce
MM
344}
345
19c239ce
MM
346static inline bool dma_pte_present(struct dma_pte *pte)
347{
348 return (pte->val & 3) != 0;
349}
622ba12a 350
4399c8bf
AK
351static inline bool dma_pte_superpage(struct dma_pte *pte)
352{
c3c75eb7 353 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
354}
355
75e6bf96
DW
356static inline int first_pte_in_page(struct dma_pte *pte)
357{
358 return !((unsigned long)pte & ~VTD_PAGE_MASK);
359}
360
2c2e2c38
FY
361/*
362 * This domain is a statically identity mapping domain.
363 * 1. This domain creats a static 1:1 mapping to all usable memory.
364 * 2. It maps to each iommu if successful.
365 * 3. Each iommu mapps to this domain if successful.
366 */
19943b0e
DW
367static struct dmar_domain *si_domain;
368static int hw_pass_through = 1;
2c2e2c38 369
28ccce0d
JR
370/*
371 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
372 * across iommus may be owned in one domain, e.g. kvm guest.
373 */
ab8dfe25 374#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 375
2c2e2c38 376/* si_domain contains mulitple devices */
ab8dfe25 377#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 378
29a27719
JR
379#define for_each_domain_iommu(idx, domain) \
380 for (idx = 0; idx < g_num_of_iommus; idx++) \
381 if (domain->iommu_refcnt[idx])
382
99126f7c 383struct dmar_domain {
4c923d47 384 int nid; /* node id */
29a27719
JR
385
386 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
387 /* Refcount of devices per iommu */
388
99126f7c 389
c0e8a6c8
JR
390 u16 iommu_did[DMAR_UNITS_SUPPORTED];
391 /* Domain ids per IOMMU. Use u16 since
392 * domain ids are 16 bit wide according
393 * to VT-d spec, section 9.3 */
99126f7c 394
0824c592 395 bool has_iotlb_device;
00a77deb 396 struct list_head devices; /* all devices' list */
99126f7c
MM
397 struct iova_domain iovad; /* iova's that belong to this domain */
398
399 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
400 int gaw; /* max guest address width */
401
402 /* adjusted guest address width, 0 is level 2 30-bit */
403 int agaw;
404
3b5410e7 405 int flags; /* flags to find out type of domain */
8e604097
WH
406
407 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 408 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 409 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
410 int iommu_superpage;/* Level of superpages supported:
411 0 == 4KiB (no superpages), 1 == 2MiB,
412 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 413 u64 max_addr; /* maximum mapped address */
00a77deb
JR
414
415 struct iommu_domain domain; /* generic domain data structure for
416 iommu core */
99126f7c
MM
417};
418
a647dacb
MM
419/* PCI domain-device relationship */
420struct device_domain_info {
421 struct list_head link; /* link to domain siblings */
422 struct list_head global; /* link to global list */
276dbf99 423 u8 bus; /* PCI bus number */
a647dacb 424 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
425 u8 pasid_supported:3;
426 u8 pasid_enabled:1;
427 u8 pri_supported:1;
428 u8 pri_enabled:1;
429 u8 ats_supported:1;
430 u8 ats_enabled:1;
431 u8 ats_qdep;
0bcb3e28 432 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 433 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
434 struct dmar_domain *domain; /* pointer to domain */
435};
436
b94e4117
JL
437struct dmar_rmrr_unit {
438 struct list_head list; /* list of rmrr units */
439 struct acpi_dmar_header *hdr; /* ACPI header */
440 u64 base_address; /* reserved base address*/
441 u64 end_address; /* reserved end address */
832bd858 442 struct dmar_dev_scope *devices; /* target devices */
b94e4117 443 int devices_cnt; /* target device count */
0659b8dc 444 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
445};
446
447struct dmar_atsr_unit {
448 struct list_head list; /* list of ATSR units */
449 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 450 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
451 int devices_cnt; /* target device count */
452 u8 include_all:1; /* include all ports */
453};
454
455static LIST_HEAD(dmar_atsr_units);
456static LIST_HEAD(dmar_rmrr_units);
457
458#define for_each_rmrr_units(rmrr) \
459 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
460
5e0d2a6f 461/* bitmap for indexing intel_iommus */
5e0d2a6f 462static int g_num_of_iommus;
463
92d03cc8 464static void domain_exit(struct dmar_domain *domain);
ba395927 465static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
466static void dmar_remove_one_dev_info(struct dmar_domain *domain,
467 struct device *dev);
127c7615 468static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
469static void domain_context_clear(struct intel_iommu *iommu,
470 struct device *dev);
2a46ddf7
JL
471static int domain_detach_iommu(struct dmar_domain *domain,
472 struct intel_iommu *iommu);
ba395927 473
d3f13810 474#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
475int dmar_disabled = 0;
476#else
477int dmar_disabled = 1;
d3f13810 478#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 479
8bc1f85c
ED
480int intel_iommu_enabled = 0;
481EXPORT_SYMBOL_GPL(intel_iommu_enabled);
482
2d9e667e 483static int dmar_map_gfx = 1;
7d3b03ce 484static int dmar_forcedac;
5e0d2a6f 485static int intel_iommu_strict;
6dd9a7c7 486static int intel_iommu_superpage = 1;
c83b2f20 487static int intel_iommu_ecs = 1;
ae853ddb
DW
488static int intel_iommu_pasid28;
489static int iommu_identity_mapping;
c83b2f20 490
ae853ddb
DW
491#define IDENTMAP_ALL 1
492#define IDENTMAP_GFX 2
493#define IDENTMAP_AZALIA 4
c83b2f20 494
d42fde70
DW
495/* Broadwell and Skylake have broken ECS support — normal so-called "second
496 * level" translation of DMA requests-without-PASID doesn't actually happen
497 * unless you also set the NESTE bit in an extended context-entry. Which of
498 * course means that SVM doesn't work because it's trying to do nested
499 * translation of the physical addresses it finds in the process page tables,
500 * through the IOVA->phys mapping found in the "second level" page tables.
501 *
502 * The VT-d specification was retroactively changed to change the definition
503 * of the capability bits and pretend that Broadwell/Skylake never happened...
504 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
505 * for some reason it was the PASID capability bit which was redefined (from
506 * bit 28 on BDW/SKL to bit 40 in future).
507 *
508 * So our test for ECS needs to eschew those implementations which set the old
509 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
510 * Unless we are working around the 'pasid28' limitations, that is, by putting
511 * the device into passthrough mode for normal DMA and thus masking the bug.
512 */
c83b2f20 513#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
d42fde70
DW
514 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
515/* PASID support is thus enabled if ECS is enabled and *either* of the old
516 * or new capability bits are set. */
517#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
518 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 519
c0771df8
DW
520int intel_iommu_gfx_mapped;
521EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
522
ba395927
KA
523#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
524static DEFINE_SPINLOCK(device_domain_lock);
525static LIST_HEAD(device_domain_list);
526
b0119e87 527const struct iommu_ops intel_iommu_ops;
a8bcbb0d 528
4158c2ec
JR
529static bool translation_pre_enabled(struct intel_iommu *iommu)
530{
531 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
532}
533
091d42e4
JR
534static void clear_translation_pre_enabled(struct intel_iommu *iommu)
535{
536 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
537}
538
4158c2ec
JR
539static void init_translation_status(struct intel_iommu *iommu)
540{
541 u32 gsts;
542
543 gsts = readl(iommu->reg + DMAR_GSTS_REG);
544 if (gsts & DMA_GSTS_TES)
545 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
546}
547
00a77deb
JR
548/* Convert generic 'struct iommu_domain to private struct dmar_domain */
549static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
550{
551 return container_of(dom, struct dmar_domain, domain);
552}
553
ba395927
KA
554static int __init intel_iommu_setup(char *str)
555{
556 if (!str)
557 return -EINVAL;
558 while (*str) {
0cd5c3c8
KM
559 if (!strncmp(str, "on", 2)) {
560 dmar_disabled = 0;
9f10e5bf 561 pr_info("IOMMU enabled\n");
0cd5c3c8 562 } else if (!strncmp(str, "off", 3)) {
ba395927 563 dmar_disabled = 1;
9f10e5bf 564 pr_info("IOMMU disabled\n");
ba395927
KA
565 } else if (!strncmp(str, "igfx_off", 8)) {
566 dmar_map_gfx = 0;
9f10e5bf 567 pr_info("Disable GFX device mapping\n");
7d3b03ce 568 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 569 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 570 dmar_forcedac = 1;
5e0d2a6f 571 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 572 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 573 intel_iommu_strict = 1;
6dd9a7c7 574 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 575 pr_info("Disable supported super page\n");
6dd9a7c7 576 intel_iommu_superpage = 0;
c83b2f20
DW
577 } else if (!strncmp(str, "ecs_off", 7)) {
578 printk(KERN_INFO
579 "Intel-IOMMU: disable extended context table support\n");
580 intel_iommu_ecs = 0;
ae853ddb
DW
581 } else if (!strncmp(str, "pasid28", 7)) {
582 printk(KERN_INFO
583 "Intel-IOMMU: enable pre-production PASID support\n");
584 intel_iommu_pasid28 = 1;
585 iommu_identity_mapping |= IDENTMAP_GFX;
bfd20f1c
SL
586 } else if (!strncmp(str, "tboot_noforce", 13)) {
587 printk(KERN_INFO
588 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
589 intel_iommu_tboot_noforce = 1;
ba395927
KA
590 }
591
592 str += strcspn(str, ",");
593 while (*str == ',')
594 str++;
595 }
596 return 0;
597}
598__setup("intel_iommu=", intel_iommu_setup);
599
600static struct kmem_cache *iommu_domain_cache;
601static struct kmem_cache *iommu_devinfo_cache;
ba395927 602
9452d5bf
JR
603static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
604{
8bf47816
JR
605 struct dmar_domain **domains;
606 int idx = did >> 8;
607
608 domains = iommu->domains[idx];
609 if (!domains)
610 return NULL;
611
612 return domains[did & 0xff];
9452d5bf
JR
613}
614
615static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
616 struct dmar_domain *domain)
617{
8bf47816
JR
618 struct dmar_domain **domains;
619 int idx = did >> 8;
620
621 if (!iommu->domains[idx]) {
622 size_t size = 256 * sizeof(struct dmar_domain *);
623 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
624 }
625
626 domains = iommu->domains[idx];
627 if (WARN_ON(!domains))
628 return;
629 else
630 domains[did & 0xff] = domain;
9452d5bf
JR
631}
632
4c923d47 633static inline void *alloc_pgtable_page(int node)
eb3fa7cb 634{
4c923d47
SS
635 struct page *page;
636 void *vaddr = NULL;
eb3fa7cb 637
4c923d47
SS
638 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
639 if (page)
640 vaddr = page_address(page);
eb3fa7cb 641 return vaddr;
ba395927
KA
642}
643
644static inline void free_pgtable_page(void *vaddr)
645{
646 free_page((unsigned long)vaddr);
647}
648
649static inline void *alloc_domain_mem(void)
650{
354bb65e 651 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
652}
653
38717946 654static void free_domain_mem(void *vaddr)
ba395927
KA
655{
656 kmem_cache_free(iommu_domain_cache, vaddr);
657}
658
659static inline void * alloc_devinfo_mem(void)
660{
354bb65e 661 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
662}
663
664static inline void free_devinfo_mem(void *vaddr)
665{
666 kmem_cache_free(iommu_devinfo_cache, vaddr);
667}
668
ab8dfe25
JL
669static inline int domain_type_is_vm(struct dmar_domain *domain)
670{
671 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
672}
673
28ccce0d
JR
674static inline int domain_type_is_si(struct dmar_domain *domain)
675{
676 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
677}
678
ab8dfe25
JL
679static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
680{
681 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
682 DOMAIN_FLAG_STATIC_IDENTITY);
683}
1b573683 684
162d1b10
JL
685static inline int domain_pfn_supported(struct dmar_domain *domain,
686 unsigned long pfn)
687{
688 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
689
690 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
691}
692
4ed0d3e6 693static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
694{
695 unsigned long sagaw;
696 int agaw = -1;
697
698 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 699 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
700 agaw >= 0; agaw--) {
701 if (test_bit(agaw, &sagaw))
702 break;
703 }
704
705 return agaw;
706}
707
4ed0d3e6
FY
708/*
709 * Calculate max SAGAW for each iommu.
710 */
711int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
712{
713 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
714}
715
716/*
717 * calculate agaw for each iommu.
718 * "SAGAW" may be different across iommus, use a default agaw, and
719 * get a supported less agaw for iommus that don't support the default agaw.
720 */
721int iommu_calculate_agaw(struct intel_iommu *iommu)
722{
723 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
724}
725
2c2e2c38 726/* This functionin only returns single iommu in a domain */
8c11e798
WH
727static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
728{
729 int iommu_id;
730
2c2e2c38 731 /* si_domain and vm domain should not get here. */
ab8dfe25 732 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
733 for_each_domain_iommu(iommu_id, domain)
734 break;
735
8c11e798
WH
736 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
737 return NULL;
738
739 return g_iommus[iommu_id];
740}
741
8e604097
WH
742static void domain_update_iommu_coherency(struct dmar_domain *domain)
743{
d0501960
DW
744 struct dmar_drhd_unit *drhd;
745 struct intel_iommu *iommu;
2f119c78
QL
746 bool found = false;
747 int i;
2e12bc29 748
d0501960 749 domain->iommu_coherency = 1;
8e604097 750
29a27719 751 for_each_domain_iommu(i, domain) {
2f119c78 752 found = true;
8e604097
WH
753 if (!ecap_coherent(g_iommus[i]->ecap)) {
754 domain->iommu_coherency = 0;
755 break;
756 }
8e604097 757 }
d0501960
DW
758 if (found)
759 return;
760
761 /* No hardware attached; use lowest common denominator */
762 rcu_read_lock();
763 for_each_active_iommu(iommu, drhd) {
764 if (!ecap_coherent(iommu->ecap)) {
765 domain->iommu_coherency = 0;
766 break;
767 }
768 }
769 rcu_read_unlock();
8e604097
WH
770}
771
161f6934 772static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 773{
161f6934
JL
774 struct dmar_drhd_unit *drhd;
775 struct intel_iommu *iommu;
776 int ret = 1;
58c610bd 777
161f6934
JL
778 rcu_read_lock();
779 for_each_active_iommu(iommu, drhd) {
780 if (iommu != skip) {
781 if (!ecap_sc_support(iommu->ecap)) {
782 ret = 0;
783 break;
784 }
58c610bd 785 }
58c610bd 786 }
161f6934
JL
787 rcu_read_unlock();
788
789 return ret;
58c610bd
SY
790}
791
161f6934 792static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 793{
8140a95d 794 struct dmar_drhd_unit *drhd;
161f6934 795 struct intel_iommu *iommu;
8140a95d 796 int mask = 0xf;
6dd9a7c7
YS
797
798 if (!intel_iommu_superpage) {
161f6934 799 return 0;
6dd9a7c7
YS
800 }
801
8140a95d 802 /* set iommu_superpage to the smallest common denominator */
0e242612 803 rcu_read_lock();
8140a95d 804 for_each_active_iommu(iommu, drhd) {
161f6934
JL
805 if (iommu != skip) {
806 mask &= cap_super_page_val(iommu->cap);
807 if (!mask)
808 break;
6dd9a7c7
YS
809 }
810 }
0e242612
JL
811 rcu_read_unlock();
812
161f6934 813 return fls(mask);
6dd9a7c7
YS
814}
815
58c610bd
SY
816/* Some capabilities may be different across iommus */
817static void domain_update_iommu_cap(struct dmar_domain *domain)
818{
819 domain_update_iommu_coherency(domain);
161f6934
JL
820 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
821 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
822}
823
03ecc32c
DW
824static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
825 u8 bus, u8 devfn, int alloc)
826{
827 struct root_entry *root = &iommu->root_entry[bus];
828 struct context_entry *context;
829 u64 *entry;
830
4df4eab1 831 entry = &root->lo;
c83b2f20 832 if (ecs_enabled(iommu)) {
03ecc32c
DW
833 if (devfn >= 0x80) {
834 devfn -= 0x80;
835 entry = &root->hi;
836 }
837 devfn *= 2;
838 }
03ecc32c
DW
839 if (*entry & 1)
840 context = phys_to_virt(*entry & VTD_PAGE_MASK);
841 else {
842 unsigned long phy_addr;
843 if (!alloc)
844 return NULL;
845
846 context = alloc_pgtable_page(iommu->node);
847 if (!context)
848 return NULL;
849
850 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
851 phy_addr = virt_to_phys((void *)context);
852 *entry = phy_addr | 1;
853 __iommu_flush_cache(iommu, entry, sizeof(*entry));
854 }
855 return &context[devfn];
856}
857
4ed6a540
DW
858static int iommu_dummy(struct device *dev)
859{
860 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
861}
862
156baca8 863static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
864{
865 struct dmar_drhd_unit *drhd = NULL;
b683b230 866 struct intel_iommu *iommu;
156baca8
DW
867 struct device *tmp;
868 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 869 u16 segment = 0;
c7151a8d
WH
870 int i;
871
4ed6a540
DW
872 if (iommu_dummy(dev))
873 return NULL;
874
156baca8 875 if (dev_is_pci(dev)) {
1c387188
AR
876 struct pci_dev *pf_pdev;
877
156baca8 878 pdev = to_pci_dev(dev);
5823e330
JD
879
880#ifdef CONFIG_X86
881 /* VMD child devices currently cannot be handled individually */
882 if (is_vmd(pdev->bus))
883 return NULL;
884#endif
885
1c387188
AR
886 /* VFs aren't listed in scope tables; we need to look up
887 * the PF instead to find the IOMMU. */
888 pf_pdev = pci_physfn(pdev);
889 dev = &pf_pdev->dev;
156baca8 890 segment = pci_domain_nr(pdev->bus);
ca5b74d2 891 } else if (has_acpi_companion(dev))
156baca8
DW
892 dev = &ACPI_COMPANION(dev)->dev;
893
0e242612 894 rcu_read_lock();
b683b230 895 for_each_active_iommu(iommu, drhd) {
156baca8 896 if (pdev && segment != drhd->segment)
276dbf99 897 continue;
c7151a8d 898
b683b230 899 for_each_active_dev_scope(drhd->devices,
156baca8
DW
900 drhd->devices_cnt, i, tmp) {
901 if (tmp == dev) {
1c387188
AR
902 /* For a VF use its original BDF# not that of the PF
903 * which we used for the IOMMU lookup. Strictly speaking
904 * we could do this for all PCI devices; we only need to
905 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 906 if (pdev && pdev->is_virtfn)
1c387188
AR
907 goto got_pdev;
908
156baca8
DW
909 *bus = drhd->devices[i].bus;
910 *devfn = drhd->devices[i].devfn;
b683b230 911 goto out;
156baca8
DW
912 }
913
914 if (!pdev || !dev_is_pci(tmp))
915 continue;
916
917 ptmp = to_pci_dev(tmp);
918 if (ptmp->subordinate &&
919 ptmp->subordinate->number <= pdev->bus->number &&
920 ptmp->subordinate->busn_res.end >= pdev->bus->number)
921 goto got_pdev;
924b6231 922 }
c7151a8d 923
156baca8
DW
924 if (pdev && drhd->include_all) {
925 got_pdev:
926 *bus = pdev->bus->number;
927 *devfn = pdev->devfn;
b683b230 928 goto out;
156baca8 929 }
c7151a8d 930 }
b683b230 931 iommu = NULL;
156baca8 932 out:
0e242612 933 rcu_read_unlock();
c7151a8d 934
b683b230 935 return iommu;
c7151a8d
WH
936}
937
5331fe6f
WH
938static void domain_flush_cache(struct dmar_domain *domain,
939 void *addr, int size)
940{
941 if (!domain->iommu_coherency)
942 clflush_cache_range(addr, size);
943}
944
ba395927
KA
945static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
946{
ba395927 947 struct context_entry *context;
03ecc32c 948 int ret = 0;
ba395927
KA
949 unsigned long flags;
950
951 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
952 context = iommu_context_addr(iommu, bus, devfn, 0);
953 if (context)
954 ret = context_present(context);
ba395927
KA
955 spin_unlock_irqrestore(&iommu->lock, flags);
956 return ret;
957}
958
ba395927
KA
959static void free_context_table(struct intel_iommu *iommu)
960{
ba395927
KA
961 int i;
962 unsigned long flags;
963 struct context_entry *context;
964
965 spin_lock_irqsave(&iommu->lock, flags);
966 if (!iommu->root_entry) {
967 goto out;
968 }
969 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 970 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
971 if (context)
972 free_pgtable_page(context);
03ecc32c 973
c83b2f20 974 if (!ecs_enabled(iommu))
03ecc32c
DW
975 continue;
976
977 context = iommu_context_addr(iommu, i, 0x80, 0);
978 if (context)
979 free_pgtable_page(context);
980
ba395927
KA
981 }
982 free_pgtable_page(iommu->root_entry);
983 iommu->root_entry = NULL;
984out:
985 spin_unlock_irqrestore(&iommu->lock, flags);
986}
987
b026fd28 988static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 989 unsigned long pfn, int *target_level)
ba395927 990{
ba395927
KA
991 struct dma_pte *parent, *pte = NULL;
992 int level = agaw_to_level(domain->agaw);
4399c8bf 993 int offset;
ba395927
KA
994
995 BUG_ON(!domain->pgd);
f9423606 996
162d1b10 997 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
998 /* Address beyond IOMMU's addressing capabilities. */
999 return NULL;
1000
ba395927
KA
1001 parent = domain->pgd;
1002
5cf0a76f 1003 while (1) {
ba395927
KA
1004 void *tmp_page;
1005
b026fd28 1006 offset = pfn_level_offset(pfn, level);
ba395927 1007 pte = &parent[offset];
5cf0a76f 1008 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 1009 break;
5cf0a76f 1010 if (level == *target_level)
ba395927
KA
1011 break;
1012
19c239ce 1013 if (!dma_pte_present(pte)) {
c85994e4
DW
1014 uint64_t pteval;
1015
4c923d47 1016 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1017
206a73c1 1018 if (!tmp_page)
ba395927 1019 return NULL;
206a73c1 1020
c85994e4 1021 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1022 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1023 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1024 /* Someone else set it while we were thinking; use theirs. */
1025 free_pgtable_page(tmp_page);
effad4b5 1026 else
c85994e4 1027 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1028 }
5cf0a76f
DW
1029 if (level == 1)
1030 break;
1031
19c239ce 1032 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1033 level--;
1034 }
1035
5cf0a76f
DW
1036 if (!*target_level)
1037 *target_level = level;
1038
ba395927
KA
1039 return pte;
1040}
1041
6dd9a7c7 1042
ba395927 1043/* return address's pte at specific level */
90dcfb5e
DW
1044static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1045 unsigned long pfn,
6dd9a7c7 1046 int level, int *large_page)
ba395927
KA
1047{
1048 struct dma_pte *parent, *pte = NULL;
1049 int total = agaw_to_level(domain->agaw);
1050 int offset;
1051
1052 parent = domain->pgd;
1053 while (level <= total) {
90dcfb5e 1054 offset = pfn_level_offset(pfn, total);
ba395927
KA
1055 pte = &parent[offset];
1056 if (level == total)
1057 return pte;
1058
6dd9a7c7
YS
1059 if (!dma_pte_present(pte)) {
1060 *large_page = total;
ba395927 1061 break;
6dd9a7c7
YS
1062 }
1063
e16922af 1064 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1065 *large_page = total;
1066 return pte;
1067 }
1068
19c239ce 1069 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1070 total--;
1071 }
1072 return NULL;
1073}
1074
ba395927 1075/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1076static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1077 unsigned long start_pfn,
1078 unsigned long last_pfn)
ba395927 1079{
6dd9a7c7 1080 unsigned int large_page = 1;
310a5ab9 1081 struct dma_pte *first_pte, *pte;
66eae846 1082
162d1b10
JL
1083 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1084 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1085 BUG_ON(start_pfn > last_pfn);
ba395927 1086
04b18e65 1087 /* we don't need lock here; nobody else touches the iova range */
59c36286 1088 do {
6dd9a7c7
YS
1089 large_page = 1;
1090 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1091 if (!pte) {
6dd9a7c7 1092 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1093 continue;
1094 }
6dd9a7c7 1095 do {
310a5ab9 1096 dma_clear_pte(pte);
6dd9a7c7 1097 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1098 pte++;
75e6bf96
DW
1099 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1100
310a5ab9
DW
1101 domain_flush_cache(domain, first_pte,
1102 (void *)pte - (void *)first_pte);
59c36286
DW
1103
1104 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1105}
1106
3269ee0b 1107static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
1108 int retain_level, struct dma_pte *pte,
1109 unsigned long pfn, unsigned long start_pfn,
1110 unsigned long last_pfn)
3269ee0b
AW
1111{
1112 pfn = max(start_pfn, pfn);
1113 pte = &pte[pfn_level_offset(pfn, level)];
1114
1115 do {
1116 unsigned long level_pfn;
1117 struct dma_pte *level_pte;
1118
1119 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1120 goto next;
1121
f7116e11 1122 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1123 level_pte = phys_to_virt(dma_pte_addr(pte));
1124
bc24c571
DD
1125 if (level > 2) {
1126 dma_pte_free_level(domain, level - 1, retain_level,
1127 level_pte, level_pfn, start_pfn,
1128 last_pfn);
1129 }
3269ee0b 1130
bc24c571
DD
1131 /*
1132 * Free the page table if we're below the level we want to
1133 * retain and the range covers the entire table.
1134 */
1135 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1136 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1137 dma_clear_pte(pte);
1138 domain_flush_cache(domain, pte, sizeof(*pte));
1139 free_pgtable_page(level_pte);
1140 }
1141next:
1142 pfn += level_size(level);
1143 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1144}
1145
bc24c571
DD
1146/*
1147 * clear last level (leaf) ptes and free page table pages below the
1148 * level we wish to keep intact.
1149 */
ba395927 1150static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1151 unsigned long start_pfn,
bc24c571
DD
1152 unsigned long last_pfn,
1153 int retain_level)
ba395927 1154{
162d1b10
JL
1155 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1156 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1157 BUG_ON(start_pfn > last_pfn);
ba395927 1158
d41a4adb
JL
1159 dma_pte_clear_range(domain, start_pfn, last_pfn);
1160
f3a0a52f 1161 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1162 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1163 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1164
ba395927 1165 /* free pgd */
d794dc9b 1166 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1167 free_pgtable_page(domain->pgd);
1168 domain->pgd = NULL;
1169 }
1170}
1171
ea8ea460
DW
1172/* When a page at a given level is being unlinked from its parent, we don't
1173 need to *modify* it at all. All we need to do is make a list of all the
1174 pages which can be freed just as soon as we've flushed the IOTLB and we
1175 know the hardware page-walk will no longer touch them.
1176 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1177 be freed. */
1178static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1179 int level, struct dma_pte *pte,
1180 struct page *freelist)
1181{
1182 struct page *pg;
1183
1184 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1185 pg->freelist = freelist;
1186 freelist = pg;
1187
1188 if (level == 1)
1189 return freelist;
1190
adeb2590
JL
1191 pte = page_address(pg);
1192 do {
ea8ea460
DW
1193 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1194 freelist = dma_pte_list_pagetables(domain, level - 1,
1195 pte, freelist);
adeb2590
JL
1196 pte++;
1197 } while (!first_pte_in_page(pte));
ea8ea460
DW
1198
1199 return freelist;
1200}
1201
1202static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1203 struct dma_pte *pte, unsigned long pfn,
1204 unsigned long start_pfn,
1205 unsigned long last_pfn,
1206 struct page *freelist)
1207{
1208 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1209
1210 pfn = max(start_pfn, pfn);
1211 pte = &pte[pfn_level_offset(pfn, level)];
1212
1213 do {
1214 unsigned long level_pfn;
1215
1216 if (!dma_pte_present(pte))
1217 goto next;
1218
1219 level_pfn = pfn & level_mask(level);
1220
1221 /* If range covers entire pagetable, free it */
1222 if (start_pfn <= level_pfn &&
1223 last_pfn >= level_pfn + level_size(level) - 1) {
1224 /* These suborbinate page tables are going away entirely. Don't
1225 bother to clear them; we're just going to *free* them. */
1226 if (level > 1 && !dma_pte_superpage(pte))
1227 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1228
1229 dma_clear_pte(pte);
1230 if (!first_pte)
1231 first_pte = pte;
1232 last_pte = pte;
1233 } else if (level > 1) {
1234 /* Recurse down into a level that isn't *entirely* obsolete */
1235 freelist = dma_pte_clear_level(domain, level - 1,
1236 phys_to_virt(dma_pte_addr(pte)),
1237 level_pfn, start_pfn, last_pfn,
1238 freelist);
1239 }
1240next:
1241 pfn += level_size(level);
1242 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1243
1244 if (first_pte)
1245 domain_flush_cache(domain, first_pte,
1246 (void *)++last_pte - (void *)first_pte);
1247
1248 return freelist;
1249}
1250
1251/* We can't just free the pages because the IOMMU may still be walking
1252 the page tables, and may have cached the intermediate levels. The
1253 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1254static struct page *domain_unmap(struct dmar_domain *domain,
1255 unsigned long start_pfn,
1256 unsigned long last_pfn)
ea8ea460 1257{
ea8ea460
DW
1258 struct page *freelist = NULL;
1259
162d1b10
JL
1260 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1261 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1262 BUG_ON(start_pfn > last_pfn);
1263
1264 /* we don't need lock here; nobody else touches the iova range */
1265 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1266 domain->pgd, 0, start_pfn, last_pfn, NULL);
1267
1268 /* free pgd */
1269 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1270 struct page *pgd_page = virt_to_page(domain->pgd);
1271 pgd_page->freelist = freelist;
1272 freelist = pgd_page;
1273
1274 domain->pgd = NULL;
1275 }
1276
1277 return freelist;
1278}
1279
b690420a 1280static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1281{
1282 struct page *pg;
1283
1284 while ((pg = freelist)) {
1285 freelist = pg->freelist;
1286 free_pgtable_page(page_address(pg));
1287 }
1288}
1289
13cf0174
JR
1290static void iova_entry_free(unsigned long data)
1291{
1292 struct page *freelist = (struct page *)data;
1293
1294 dma_free_pagelist(freelist);
1295}
1296
ba395927
KA
1297/* iommu handling */
1298static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1299{
1300 struct root_entry *root;
1301 unsigned long flags;
1302
4c923d47 1303 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1304 if (!root) {
9f10e5bf 1305 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1306 iommu->name);
ba395927 1307 return -ENOMEM;
ffebeb46 1308 }
ba395927 1309
5b6985ce 1310 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1311
1312 spin_lock_irqsave(&iommu->lock, flags);
1313 iommu->root_entry = root;
1314 spin_unlock_irqrestore(&iommu->lock, flags);
1315
1316 return 0;
1317}
1318
ba395927
KA
1319static void iommu_set_root_entry(struct intel_iommu *iommu)
1320{
03ecc32c 1321 u64 addr;
c416daa9 1322 u32 sts;
ba395927
KA
1323 unsigned long flag;
1324
03ecc32c 1325 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1326 if (ecs_enabled(iommu))
03ecc32c 1327 addr |= DMA_RTADDR_RTT;
ba395927 1328
1f5b3c3f 1329 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1330 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1331
c416daa9 1332 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1333
1334 /* Make sure hardware complete it */
1335 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1336 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1337
1f5b3c3f 1338 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1339}
1340
1341static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1342{
1343 u32 val;
1344 unsigned long flag;
1345
9af88143 1346 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1347 return;
ba395927 1348
1f5b3c3f 1349 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1350 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1351
1352 /* Make sure hardware complete it */
1353 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1354 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1355
1f5b3c3f 1356 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1357}
1358
1359/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1360static void __iommu_flush_context(struct intel_iommu *iommu,
1361 u16 did, u16 source_id, u8 function_mask,
1362 u64 type)
ba395927
KA
1363{
1364 u64 val = 0;
1365 unsigned long flag;
1366
ba395927
KA
1367 switch (type) {
1368 case DMA_CCMD_GLOBAL_INVL:
1369 val = DMA_CCMD_GLOBAL_INVL;
1370 break;
1371 case DMA_CCMD_DOMAIN_INVL:
1372 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1373 break;
1374 case DMA_CCMD_DEVICE_INVL:
1375 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1376 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1377 break;
1378 default:
1379 BUG();
1380 }
1381 val |= DMA_CCMD_ICC;
1382
1f5b3c3f 1383 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1384 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1385
1386 /* Make sure hardware complete it */
1387 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1388 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1389
1f5b3c3f 1390 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1391}
1392
ba395927 1393/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1394static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1395 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1396{
1397 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1398 u64 val = 0, val_iva = 0;
1399 unsigned long flag;
1400
ba395927
KA
1401 switch (type) {
1402 case DMA_TLB_GLOBAL_FLUSH:
1403 /* global flush doesn't need set IVA_REG */
1404 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1405 break;
1406 case DMA_TLB_DSI_FLUSH:
1407 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1408 break;
1409 case DMA_TLB_PSI_FLUSH:
1410 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1411 /* IH bit is passed in as part of address */
ba395927
KA
1412 val_iva = size_order | addr;
1413 break;
1414 default:
1415 BUG();
1416 }
1417 /* Note: set drain read/write */
1418#if 0
1419 /*
1420 * This is probably to be super secure.. Looks like we can
1421 * ignore it without any impact.
1422 */
1423 if (cap_read_drain(iommu->cap))
1424 val |= DMA_TLB_READ_DRAIN;
1425#endif
1426 if (cap_write_drain(iommu->cap))
1427 val |= DMA_TLB_WRITE_DRAIN;
1428
1f5b3c3f 1429 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1430 /* Note: Only uses first TLB reg currently */
1431 if (val_iva)
1432 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1433 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1434
1435 /* Make sure hardware complete it */
1436 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1437 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1438
1f5b3c3f 1439 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1440
1441 /* check IOTLB invalidation granularity */
1442 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1443 pr_err("Flush IOTLB failed\n");
ba395927 1444 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1445 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1446 (unsigned long long)DMA_TLB_IIRG(type),
1447 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1448}
1449
64ae892b
DW
1450static struct device_domain_info *
1451iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1452 u8 bus, u8 devfn)
93a23a72 1453{
93a23a72 1454 struct device_domain_info *info;
93a23a72 1455
55d94043
JR
1456 assert_spin_locked(&device_domain_lock);
1457
93a23a72
YZ
1458 if (!iommu->qi)
1459 return NULL;
1460
93a23a72 1461 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1462 if (info->iommu == iommu && info->bus == bus &&
1463 info->devfn == devfn) {
b16d0cb9
DW
1464 if (info->ats_supported && info->dev)
1465 return info;
93a23a72
YZ
1466 break;
1467 }
93a23a72 1468
b16d0cb9 1469 return NULL;
93a23a72
YZ
1470}
1471
0824c592
OP
1472static void domain_update_iotlb(struct dmar_domain *domain)
1473{
1474 struct device_domain_info *info;
1475 bool has_iotlb_device = false;
1476
1477 assert_spin_locked(&device_domain_lock);
1478
1479 list_for_each_entry(info, &domain->devices, link) {
1480 struct pci_dev *pdev;
1481
1482 if (!info->dev || !dev_is_pci(info->dev))
1483 continue;
1484
1485 pdev = to_pci_dev(info->dev);
1486 if (pdev->ats_enabled) {
1487 has_iotlb_device = true;
1488 break;
1489 }
1490 }
1491
1492 domain->has_iotlb_device = has_iotlb_device;
1493}
1494
93a23a72 1495static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1496{
fb0cc3aa
BH
1497 struct pci_dev *pdev;
1498
0824c592
OP
1499 assert_spin_locked(&device_domain_lock);
1500
0bcb3e28 1501 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1502 return;
1503
fb0cc3aa 1504 pdev = to_pci_dev(info->dev);
fb0cc3aa 1505
b16d0cb9
DW
1506#ifdef CONFIG_INTEL_IOMMU_SVM
1507 /* The PCIe spec, in its wisdom, declares that the behaviour of
1508 the device if you enable PASID support after ATS support is
1509 undefined. So always enable PASID support on devices which
1510 have it, even if we can't yet know if we're ever going to
1511 use it. */
1512 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1513 info->pasid_enabled = 1;
1514
1515 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1516 info->pri_enabled = 1;
1517#endif
1518 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1519 info->ats_enabled = 1;
0824c592 1520 domain_update_iotlb(info->domain);
b16d0cb9
DW
1521 info->ats_qdep = pci_ats_queue_depth(pdev);
1522 }
93a23a72
YZ
1523}
1524
1525static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1526{
b16d0cb9
DW
1527 struct pci_dev *pdev;
1528
0824c592
OP
1529 assert_spin_locked(&device_domain_lock);
1530
da972fb1 1531 if (!dev_is_pci(info->dev))
93a23a72
YZ
1532 return;
1533
b16d0cb9
DW
1534 pdev = to_pci_dev(info->dev);
1535
1536 if (info->ats_enabled) {
1537 pci_disable_ats(pdev);
1538 info->ats_enabled = 0;
0824c592 1539 domain_update_iotlb(info->domain);
b16d0cb9
DW
1540 }
1541#ifdef CONFIG_INTEL_IOMMU_SVM
1542 if (info->pri_enabled) {
1543 pci_disable_pri(pdev);
1544 info->pri_enabled = 0;
1545 }
1546 if (info->pasid_enabled) {
1547 pci_disable_pasid(pdev);
1548 info->pasid_enabled = 0;
1549 }
1550#endif
93a23a72
YZ
1551}
1552
1553static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1554 u64 addr, unsigned mask)
1555{
1556 u16 sid, qdep;
1557 unsigned long flags;
1558 struct device_domain_info *info;
1559
0824c592
OP
1560 if (!domain->has_iotlb_device)
1561 return;
1562
93a23a72
YZ
1563 spin_lock_irqsave(&device_domain_lock, flags);
1564 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1565 if (!info->ats_enabled)
93a23a72
YZ
1566 continue;
1567
1568 sid = info->bus << 8 | info->devfn;
b16d0cb9 1569 qdep = info->ats_qdep;
93a23a72
YZ
1570 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1571 }
1572 spin_unlock_irqrestore(&device_domain_lock, flags);
1573}
1574
a1ddcbe9
JR
1575static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1576 struct dmar_domain *domain,
1577 unsigned long pfn, unsigned int pages,
1578 int ih, int map)
ba395927 1579{
9dd2fe89 1580 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1581 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1582 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1583
ba395927
KA
1584 BUG_ON(pages == 0);
1585
ea8ea460
DW
1586 if (ih)
1587 ih = 1 << 6;
ba395927 1588 /*
9dd2fe89
YZ
1589 * Fallback to domain selective flush if no PSI support or the size is
1590 * too big.
ba395927
KA
1591 * PSI requires page size to be 2 ^ x, and the base address is naturally
1592 * aligned to the size
1593 */
9dd2fe89
YZ
1594 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1595 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1596 DMA_TLB_DSI_FLUSH);
9dd2fe89 1597 else
ea8ea460 1598 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1599 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1600
1601 /*
82653633
NA
1602 * In caching mode, changes of pages from non-present to present require
1603 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1604 */
82653633 1605 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1606 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1607 addr, mask);
ba395927
KA
1608}
1609
13cf0174
JR
1610static void iommu_flush_iova(struct iova_domain *iovad)
1611{
1612 struct dmar_domain *domain;
1613 int idx;
1614
1615 domain = container_of(iovad, struct dmar_domain, iovad);
1616
1617 for_each_domain_iommu(idx, domain) {
1618 struct intel_iommu *iommu = g_iommus[idx];
1619 u16 did = domain->iommu_did[iommu->seq_id];
1620
1621 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1622
1623 if (!cap_caching_mode(iommu->cap))
1624 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1625 0, MAX_AGAW_PFN_WIDTH);
1626 }
1627}
1628
f8bab735 1629static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1630{
1631 u32 pmen;
1632 unsigned long flags;
1633
1f5b3c3f 1634 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1635 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1636 pmen &= ~DMA_PMEN_EPM;
1637 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1638
1639 /* wait for the protected region status bit to clear */
1640 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1641 readl, !(pmen & DMA_PMEN_PRS), pmen);
1642
1f5b3c3f 1643 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1644}
1645
2a41ccee 1646static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1647{
1648 u32 sts;
1649 unsigned long flags;
1650
1f5b3c3f 1651 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1652 iommu->gcmd |= DMA_GCMD_TE;
1653 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1654
1655 /* Make sure hardware complete it */
1656 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1657 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1658
1f5b3c3f 1659 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1660}
1661
2a41ccee 1662static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1663{
1664 u32 sts;
1665 unsigned long flag;
1666
1f5b3c3f 1667 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1668 iommu->gcmd &= ~DMA_GCMD_TE;
1669 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1670
1671 /* Make sure hardware complete it */
1672 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1673 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1674
1f5b3c3f 1675 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1676}
1677
3460a6d9 1678
ba395927
KA
1679static int iommu_init_domains(struct intel_iommu *iommu)
1680{
8bf47816
JR
1681 u32 ndomains, nlongs;
1682 size_t size;
ba395927
KA
1683
1684 ndomains = cap_ndoms(iommu->cap);
8bf47816 1685 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1686 iommu->name, ndomains);
ba395927
KA
1687 nlongs = BITS_TO_LONGS(ndomains);
1688
94a91b50
DD
1689 spin_lock_init(&iommu->lock);
1690
ba395927
KA
1691 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1692 if (!iommu->domain_ids) {
9f10e5bf
JR
1693 pr_err("%s: Allocating domain id array failed\n",
1694 iommu->name);
ba395927
KA
1695 return -ENOMEM;
1696 }
8bf47816 1697
86f004c7 1698 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1699 iommu->domains = kzalloc(size, GFP_KERNEL);
1700
1701 if (iommu->domains) {
1702 size = 256 * sizeof(struct dmar_domain *);
1703 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1704 }
1705
1706 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1707 pr_err("%s: Allocating domain array failed\n",
1708 iommu->name);
852bdb04 1709 kfree(iommu->domain_ids);
8bf47816 1710 kfree(iommu->domains);
852bdb04 1711 iommu->domain_ids = NULL;
8bf47816 1712 iommu->domains = NULL;
ba395927
KA
1713 return -ENOMEM;
1714 }
1715
8bf47816
JR
1716
1717
ba395927 1718 /*
c0e8a6c8
JR
1719 * If Caching mode is set, then invalid translations are tagged
1720 * with domain-id 0, hence we need to pre-allocate it. We also
1721 * use domain-id 0 as a marker for non-allocated domain-id, so
1722 * make sure it is not used for a real domain.
ba395927 1723 */
c0e8a6c8
JR
1724 set_bit(0, iommu->domain_ids);
1725
ba395927
KA
1726 return 0;
1727}
ba395927 1728
ffebeb46 1729static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1730{
29a27719 1731 struct device_domain_info *info, *tmp;
55d94043 1732 unsigned long flags;
ba395927 1733
29a27719
JR
1734 if (!iommu->domains || !iommu->domain_ids)
1735 return;
a4eaa86c 1736
bea64033 1737again:
55d94043 1738 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1739 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1740 struct dmar_domain *domain;
1741
1742 if (info->iommu != iommu)
1743 continue;
1744
1745 if (!info->dev || !info->domain)
1746 continue;
1747
1748 domain = info->domain;
1749
bea64033 1750 __dmar_remove_one_dev_info(info);
29a27719 1751
bea64033
JR
1752 if (!domain_type_is_vm_or_si(domain)) {
1753 /*
1754 * The domain_exit() function can't be called under
1755 * device_domain_lock, as it takes this lock itself.
1756 * So release the lock here and re-run the loop
1757 * afterwards.
1758 */
1759 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1760 domain_exit(domain);
bea64033
JR
1761 goto again;
1762 }
ba395927 1763 }
55d94043 1764 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1765
1766 if (iommu->gcmd & DMA_GCMD_TE)
1767 iommu_disable_translation(iommu);
ffebeb46 1768}
ba395927 1769
ffebeb46
JL
1770static void free_dmar_iommu(struct intel_iommu *iommu)
1771{
1772 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1773 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1774 int i;
1775
1776 for (i = 0; i < elems; i++)
1777 kfree(iommu->domains[i]);
ffebeb46
JL
1778 kfree(iommu->domains);
1779 kfree(iommu->domain_ids);
1780 iommu->domains = NULL;
1781 iommu->domain_ids = NULL;
1782 }
ba395927 1783
d9630fe9
WH
1784 g_iommus[iommu->seq_id] = NULL;
1785
ba395927
KA
1786 /* free context mapping */
1787 free_context_table(iommu);
8a94ade4
DW
1788
1789#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1790 if (pasid_enabled(iommu)) {
1791 if (ecap_prs(iommu->ecap))
1792 intel_svm_finish_prq(iommu);
8a94ade4 1793 intel_svm_free_pasid_tables(iommu);
a222a7f0 1794 }
8a94ade4 1795#endif
ba395927
KA
1796}
1797
ab8dfe25 1798static struct dmar_domain *alloc_domain(int flags)
ba395927 1799{
ba395927 1800 struct dmar_domain *domain;
ba395927
KA
1801
1802 domain = alloc_domain_mem();
1803 if (!domain)
1804 return NULL;
1805
ab8dfe25 1806 memset(domain, 0, sizeof(*domain));
4c923d47 1807 domain->nid = -1;
ab8dfe25 1808 domain->flags = flags;
0824c592 1809 domain->has_iotlb_device = false;
92d03cc8 1810 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1811
1812 return domain;
1813}
1814
d160aca5
JR
1815/* Must be called with iommu->lock */
1816static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1817 struct intel_iommu *iommu)
1818{
44bde614 1819 unsigned long ndomains;
55d94043 1820 int num;
44bde614 1821
55d94043 1822 assert_spin_locked(&device_domain_lock);
d160aca5 1823 assert_spin_locked(&iommu->lock);
ba395927 1824
29a27719
JR
1825 domain->iommu_refcnt[iommu->seq_id] += 1;
1826 domain->iommu_count += 1;
1827 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1828 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1829 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1830
1831 if (num >= ndomains) {
1832 pr_err("%s: No free domain ids\n", iommu->name);
1833 domain->iommu_refcnt[iommu->seq_id] -= 1;
1834 domain->iommu_count -= 1;
55d94043 1835 return -ENOSPC;
2c2e2c38 1836 }
ba395927 1837
d160aca5
JR
1838 set_bit(num, iommu->domain_ids);
1839 set_iommu_domain(iommu, num, domain);
1840
1841 domain->iommu_did[iommu->seq_id] = num;
1842 domain->nid = iommu->node;
fb170fb4 1843
fb170fb4
JL
1844 domain_update_iommu_cap(domain);
1845 }
d160aca5 1846
55d94043 1847 return 0;
fb170fb4
JL
1848}
1849
1850static int domain_detach_iommu(struct dmar_domain *domain,
1851 struct intel_iommu *iommu)
1852{
d160aca5 1853 int num, count = INT_MAX;
d160aca5 1854
55d94043 1855 assert_spin_locked(&device_domain_lock);
d160aca5 1856 assert_spin_locked(&iommu->lock);
fb170fb4 1857
29a27719
JR
1858 domain->iommu_refcnt[iommu->seq_id] -= 1;
1859 count = --domain->iommu_count;
1860 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1861 num = domain->iommu_did[iommu->seq_id];
1862 clear_bit(num, iommu->domain_ids);
1863 set_iommu_domain(iommu, num, NULL);
fb170fb4 1864
fb170fb4 1865 domain_update_iommu_cap(domain);
c0e8a6c8 1866 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1867 }
fb170fb4
JL
1868
1869 return count;
1870}
1871
ba395927 1872static struct iova_domain reserved_iova_list;
8a443df4 1873static struct lock_class_key reserved_rbtree_key;
ba395927 1874
51a63e67 1875static int dmar_init_reserved_ranges(void)
ba395927
KA
1876{
1877 struct pci_dev *pdev = NULL;
1878 struct iova *iova;
1879 int i;
ba395927 1880
0fb5fe87
RM
1881 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1882 DMA_32BIT_PFN);
ba395927 1883
8a443df4
MG
1884 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1885 &reserved_rbtree_key);
1886
ba395927
KA
1887 /* IOAPIC ranges shouldn't be accessed by DMA */
1888 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1889 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1890 if (!iova) {
9f10e5bf 1891 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1892 return -ENODEV;
1893 }
ba395927
KA
1894
1895 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1896 for_each_pci_dev(pdev) {
1897 struct resource *r;
1898
1899 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1900 r = &pdev->resource[i];
1901 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1902 continue;
1a4a4551
DW
1903 iova = reserve_iova(&reserved_iova_list,
1904 IOVA_PFN(r->start),
1905 IOVA_PFN(r->end));
51a63e67 1906 if (!iova) {
9f10e5bf 1907 pr_err("Reserve iova failed\n");
51a63e67
JC
1908 return -ENODEV;
1909 }
ba395927
KA
1910 }
1911 }
51a63e67 1912 return 0;
ba395927
KA
1913}
1914
1915static void domain_reserve_special_ranges(struct dmar_domain *domain)
1916{
1917 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1918}
1919
1920static inline int guestwidth_to_adjustwidth(int gaw)
1921{
1922 int agaw;
1923 int r = (gaw - 12) % 9;
1924
1925 if (r == 0)
1926 agaw = gaw;
1927 else
1928 agaw = gaw + 9 - r;
1929 if (agaw > 64)
1930 agaw = 64;
1931 return agaw;
1932}
1933
dc534b25
JR
1934static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1935 int guest_width)
ba395927 1936{
ba395927
KA
1937 int adjust_width, agaw;
1938 unsigned long sagaw;
13cf0174 1939 int err;
ba395927 1940
0fb5fe87
RM
1941 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1942 DMA_32BIT_PFN);
13cf0174
JR
1943
1944 err = init_iova_flush_queue(&domain->iovad,
1945 iommu_flush_iova, iova_entry_free);
1946 if (err)
1947 return err;
1948
ba395927
KA
1949 domain_reserve_special_ranges(domain);
1950
1951 /* calculate AGAW */
ba395927
KA
1952 if (guest_width > cap_mgaw(iommu->cap))
1953 guest_width = cap_mgaw(iommu->cap);
1954 domain->gaw = guest_width;
1955 adjust_width = guestwidth_to_adjustwidth(guest_width);
1956 agaw = width_to_agaw(adjust_width);
1957 sagaw = cap_sagaw(iommu->cap);
1958 if (!test_bit(agaw, &sagaw)) {
1959 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1960 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1961 agaw = find_next_bit(&sagaw, 5, agaw);
1962 if (agaw >= 5)
1963 return -ENODEV;
1964 }
1965 domain->agaw = agaw;
ba395927 1966
8e604097
WH
1967 if (ecap_coherent(iommu->ecap))
1968 domain->iommu_coherency = 1;
1969 else
1970 domain->iommu_coherency = 0;
1971
58c610bd
SY
1972 if (ecap_sc_support(iommu->ecap))
1973 domain->iommu_snooping = 1;
1974 else
1975 domain->iommu_snooping = 0;
1976
214e39aa
DW
1977 if (intel_iommu_superpage)
1978 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1979 else
1980 domain->iommu_superpage = 0;
1981
4c923d47 1982 domain->nid = iommu->node;
c7151a8d 1983
ba395927 1984 /* always allocate the top pgd */
4c923d47 1985 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1986 if (!domain->pgd)
1987 return -ENOMEM;
5b6985ce 1988 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1989 return 0;
1990}
1991
1992static void domain_exit(struct dmar_domain *domain)
1993{
ea8ea460 1994 struct page *freelist = NULL;
ba395927
KA
1995
1996 /* Domain 0 is reserved, so dont process it */
1997 if (!domain)
1998 return;
1999
d160aca5
JR
2000 /* Remove associated devices and clear attached or cached domains */
2001 rcu_read_lock();
ba395927 2002 domain_remove_dev_info(domain);
d160aca5 2003 rcu_read_unlock();
92d03cc8 2004
ba395927
KA
2005 /* destroy iovas */
2006 put_iova_domain(&domain->iovad);
ba395927 2007
ea8ea460 2008 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 2009
ea8ea460
DW
2010 dma_free_pagelist(freelist);
2011
ba395927
KA
2012 free_domain_mem(domain);
2013}
2014
64ae892b
DW
2015static int domain_context_mapping_one(struct dmar_domain *domain,
2016 struct intel_iommu *iommu,
28ccce0d 2017 u8 bus, u8 devfn)
ba395927 2018{
c6c2cebd 2019 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
2020 int translation = CONTEXT_TT_MULTI_LEVEL;
2021 struct device_domain_info *info = NULL;
ba395927 2022 struct context_entry *context;
ba395927 2023 unsigned long flags;
ea6606b0 2024 struct dma_pte *pgd;
55d94043 2025 int ret, agaw;
28ccce0d 2026
c6c2cebd
JR
2027 WARN_ON(did == 0);
2028
28ccce0d
JR
2029 if (hw_pass_through && domain_type_is_si(domain))
2030 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
2031
2032 pr_debug("Set context mapping for %02x:%02x.%d\n",
2033 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2034
ba395927 2035 BUG_ON(!domain->pgd);
5331fe6f 2036
55d94043
JR
2037 spin_lock_irqsave(&device_domain_lock, flags);
2038 spin_lock(&iommu->lock);
2039
2040 ret = -ENOMEM;
03ecc32c 2041 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2042 if (!context)
55d94043 2043 goto out_unlock;
ba395927 2044
55d94043
JR
2045 ret = 0;
2046 if (context_present(context))
2047 goto out_unlock;
cf484d0e 2048
aec0e861
XP
2049 /*
2050 * For kdump cases, old valid entries may be cached due to the
2051 * in-flight DMA and copied pgtable, but there is no unmapping
2052 * behaviour for them, thus we need an explicit cache flush for
2053 * the newly-mapped device. For kdump, at this point, the device
2054 * is supposed to finish reset at its driver probe stage, so no
2055 * in-flight DMA will exist, and we don't need to worry anymore
2056 * hereafter.
2057 */
2058 if (context_copied(context)) {
2059 u16 did_old = context_domain_id(context);
2060
f73a7eee 2061 if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2062 iommu->flush.flush_context(iommu, did_old,
2063 (((u16)bus) << 8) | devfn,
2064 DMA_CCMD_MASK_NOBIT,
2065 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2066 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2067 DMA_TLB_DSI_FLUSH);
2068 }
aec0e861
XP
2069 }
2070
ea6606b0
WH
2071 pgd = domain->pgd;
2072
de24e553 2073 context_clear_entry(context);
c6c2cebd 2074 context_set_domain_id(context, did);
ea6606b0 2075
de24e553
JR
2076 /*
2077 * Skip top levels of page tables for iommu which has less agaw
2078 * than default. Unnecessary for PT mode.
2079 */
93a23a72 2080 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 2081 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 2082 ret = -ENOMEM;
de24e553 2083 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
2084 if (!dma_pte_present(pgd))
2085 goto out_unlock;
ea6606b0 2086 }
4ed0d3e6 2087
64ae892b 2088 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
2089 if (info && info->ats_supported)
2090 translation = CONTEXT_TT_DEV_IOTLB;
2091 else
2092 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 2093
93a23a72
YZ
2094 context_set_address_root(context, virt_to_phys(pgd));
2095 context_set_address_width(context, iommu->agaw);
de24e553
JR
2096 } else {
2097 /*
2098 * In pass through mode, AW must be programmed to
2099 * indicate the largest AGAW value supported by
2100 * hardware. And ASR is ignored by hardware.
2101 */
2102 context_set_address_width(context, iommu->msagaw);
93a23a72 2103 }
4ed0d3e6
FY
2104
2105 context_set_translation_type(context, translation);
c07e7d21
MM
2106 context_set_fault_enable(context);
2107 context_set_present(context);
5331fe6f 2108 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2109
4c25a2c1
DW
2110 /*
2111 * It's a non-present to present mapping. If hardware doesn't cache
2112 * non-present entry we only need to flush the write-buffer. If the
2113 * _does_ cache non-present entries, then it does so in the special
2114 * domain #0, which we have to flush:
2115 */
2116 if (cap_caching_mode(iommu->cap)) {
2117 iommu->flush.flush_context(iommu, 0,
2118 (((u16)bus) << 8) | devfn,
2119 DMA_CCMD_MASK_NOBIT,
2120 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2121 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2122 } else {
ba395927 2123 iommu_flush_write_buffer(iommu);
4c25a2c1 2124 }
93a23a72 2125 iommu_enable_dev_iotlb(info);
c7151a8d 2126
55d94043
JR
2127 ret = 0;
2128
2129out_unlock:
2130 spin_unlock(&iommu->lock);
2131 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2132
5c365d18 2133 return ret;
ba395927
KA
2134}
2135
579305f7
AW
2136struct domain_context_mapping_data {
2137 struct dmar_domain *domain;
2138 struct intel_iommu *iommu;
579305f7
AW
2139};
2140
2141static int domain_context_mapping_cb(struct pci_dev *pdev,
2142 u16 alias, void *opaque)
2143{
2144 struct domain_context_mapping_data *data = opaque;
2145
2146 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2147 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2148}
2149
ba395927 2150static int
28ccce0d 2151domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2152{
64ae892b 2153 struct intel_iommu *iommu;
156baca8 2154 u8 bus, devfn;
579305f7 2155 struct domain_context_mapping_data data;
64ae892b 2156
e1f167f3 2157 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2158 if (!iommu)
2159 return -ENODEV;
ba395927 2160
579305f7 2161 if (!dev_is_pci(dev))
28ccce0d 2162 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2163
2164 data.domain = domain;
2165 data.iommu = iommu;
579305f7
AW
2166
2167 return pci_for_each_dma_alias(to_pci_dev(dev),
2168 &domain_context_mapping_cb, &data);
2169}
2170
2171static int domain_context_mapped_cb(struct pci_dev *pdev,
2172 u16 alias, void *opaque)
2173{
2174 struct intel_iommu *iommu = opaque;
2175
2176 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2177}
2178
e1f167f3 2179static int domain_context_mapped(struct device *dev)
ba395927 2180{
5331fe6f 2181 struct intel_iommu *iommu;
156baca8 2182 u8 bus, devfn;
5331fe6f 2183
e1f167f3 2184 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2185 if (!iommu)
2186 return -ENODEV;
ba395927 2187
579305f7
AW
2188 if (!dev_is_pci(dev))
2189 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2190
579305f7
AW
2191 return !pci_for_each_dma_alias(to_pci_dev(dev),
2192 domain_context_mapped_cb, iommu);
ba395927
KA
2193}
2194
f532959b
FY
2195/* Returns a number of VTD pages, but aligned to MM page size */
2196static inline unsigned long aligned_nrpages(unsigned long host_addr,
2197 size_t size)
2198{
2199 host_addr &= ~PAGE_MASK;
2200 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2201}
2202
6dd9a7c7
YS
2203/* Return largest possible superpage level for a given mapping */
2204static inline int hardware_largepage_caps(struct dmar_domain *domain,
2205 unsigned long iov_pfn,
2206 unsigned long phy_pfn,
2207 unsigned long pages)
2208{
2209 int support, level = 1;
2210 unsigned long pfnmerge;
2211
2212 support = domain->iommu_superpage;
2213
2214 /* To use a large page, the virtual *and* physical addresses
2215 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2216 of them will mean we have to use smaller pages. So just
2217 merge them and check both at once. */
2218 pfnmerge = iov_pfn | phy_pfn;
2219
2220 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2221 pages >>= VTD_STRIDE_SHIFT;
2222 if (!pages)
2223 break;
2224 pfnmerge >>= VTD_STRIDE_SHIFT;
2225 level++;
2226 support--;
2227 }
2228 return level;
2229}
2230
9051aa02
DW
2231static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2232 struct scatterlist *sg, unsigned long phys_pfn,
2233 unsigned long nr_pages, int prot)
e1605495
DW
2234{
2235 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2236 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2237 unsigned long sg_res = 0;
6dd9a7c7
YS
2238 unsigned int largepage_lvl = 0;
2239 unsigned long lvl_pages = 0;
e1605495 2240
162d1b10 2241 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2242
2243 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2244 return -EINVAL;
2245
2246 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2247
cc4f14aa
JL
2248 if (!sg) {
2249 sg_res = nr_pages;
9051aa02
DW
2250 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2251 }
2252
6dd9a7c7 2253 while (nr_pages > 0) {
c85994e4
DW
2254 uint64_t tmp;
2255
e1605495 2256 if (!sg_res) {
f532959b 2257 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2258 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2259 sg->dma_length = sg->length;
3e6110fd 2260 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2261 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2262 }
6dd9a7c7 2263
e1605495 2264 if (!pte) {
6dd9a7c7
YS
2265 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2266
5cf0a76f 2267 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2268 if (!pte)
2269 return -ENOMEM;
6dd9a7c7 2270 /* It is large page*/
6491d4d0 2271 if (largepage_lvl > 1) {
ba2374fd
CZ
2272 unsigned long nr_superpages, end_pfn;
2273
6dd9a7c7 2274 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2275 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2276
2277 nr_superpages = sg_res / lvl_pages;
2278 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2279
d41a4adb
JL
2280 /*
2281 * Ensure that old small page tables are
ba2374fd 2282 * removed to make room for superpage(s).
bc24c571
DD
2283 * We're adding new large pages, so make sure
2284 * we don't remove their parent tables.
d41a4adb 2285 */
bc24c571
DD
2286 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2287 largepage_lvl + 1);
6491d4d0 2288 } else {
6dd9a7c7 2289 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2290 }
6dd9a7c7 2291
e1605495
DW
2292 }
2293 /* We don't need lock here, nobody else
2294 * touches the iova range
2295 */
7766a3fb 2296 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2297 if (tmp) {
1bf20f0d 2298 static int dumps = 5;
9f10e5bf
JR
2299 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2300 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2301 if (dumps) {
2302 dumps--;
2303 debug_dma_dump_mappings(NULL);
2304 }
2305 WARN_ON(1);
2306 }
6dd9a7c7
YS
2307
2308 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2309
2310 BUG_ON(nr_pages < lvl_pages);
2311 BUG_ON(sg_res < lvl_pages);
2312
2313 nr_pages -= lvl_pages;
2314 iov_pfn += lvl_pages;
2315 phys_pfn += lvl_pages;
2316 pteval += lvl_pages * VTD_PAGE_SIZE;
2317 sg_res -= lvl_pages;
2318
2319 /* If the next PTE would be the first in a new page, then we
2320 need to flush the cache on the entries we've just written.
2321 And then we'll need to recalculate 'pte', so clear it and
2322 let it get set again in the if (!pte) block above.
2323
2324 If we're done (!nr_pages) we need to flush the cache too.
2325
2326 Also if we've been setting superpages, we may need to
2327 recalculate 'pte' and switch back to smaller pages for the
2328 end of the mapping, if the trailing size is not enough to
2329 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2330 pte++;
6dd9a7c7
YS
2331 if (!nr_pages || first_pte_in_page(pte) ||
2332 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2333 domain_flush_cache(domain, first_pte,
2334 (void *)pte - (void *)first_pte);
2335 pte = NULL;
2336 }
6dd9a7c7
YS
2337
2338 if (!sg_res && nr_pages)
e1605495
DW
2339 sg = sg_next(sg);
2340 }
2341 return 0;
2342}
2343
9051aa02
DW
2344static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2345 struct scatterlist *sg, unsigned long nr_pages,
2346 int prot)
ba395927 2347{
9051aa02
DW
2348 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2349}
6f6a00e4 2350
9051aa02
DW
2351static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2352 unsigned long phys_pfn, unsigned long nr_pages,
2353 int prot)
2354{
2355 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2356}
2357
2452d9db 2358static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2359{
5082219b
FS
2360 unsigned long flags;
2361 struct context_entry *context;
2362 u16 did_old;
2363
c7151a8d
WH
2364 if (!iommu)
2365 return;
8c11e798 2366
5082219b
FS
2367 spin_lock_irqsave(&iommu->lock, flags);
2368 context = iommu_context_addr(iommu, bus, devfn, 0);
2369 if (!context) {
2370 spin_unlock_irqrestore(&iommu->lock, flags);
2371 return;
2372 }
2373 did_old = context_domain_id(context);
2374 context_clear_entry(context);
2375 __iommu_flush_cache(iommu, context, sizeof(*context));
2376 spin_unlock_irqrestore(&iommu->lock, flags);
2377 iommu->flush.flush_context(iommu,
2378 did_old,
2379 (((u16)bus) << 8) | devfn,
2380 DMA_CCMD_MASK_NOBIT,
2381 DMA_CCMD_DEVICE_INVL);
2382 iommu->flush.flush_iotlb(iommu,
2383 did_old,
2384 0,
2385 0,
2386 DMA_TLB_DSI_FLUSH);
ba395927
KA
2387}
2388
109b9b04
DW
2389static inline void unlink_domain_info(struct device_domain_info *info)
2390{
2391 assert_spin_locked(&device_domain_lock);
2392 list_del(&info->link);
2393 list_del(&info->global);
2394 if (info->dev)
0bcb3e28 2395 info->dev->archdata.iommu = NULL;
109b9b04
DW
2396}
2397
ba395927
KA
2398static void domain_remove_dev_info(struct dmar_domain *domain)
2399{
3a74ca01 2400 struct device_domain_info *info, *tmp;
fb170fb4 2401 unsigned long flags;
ba395927
KA
2402
2403 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2404 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2405 __dmar_remove_one_dev_info(info);
ba395927
KA
2406 spin_unlock_irqrestore(&device_domain_lock, flags);
2407}
2408
2409/*
2410 * find_domain
1525a29a 2411 * Note: we use struct device->archdata.iommu stores the info
ba395927 2412 */
1525a29a 2413static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2414{
2415 struct device_domain_info *info;
2416
2417 /* No lock here, assumes no domain exit in normal case */
1525a29a 2418 info = dev->archdata.iommu;
b316d02a 2419 if (likely(info))
ba395927
KA
2420 return info->domain;
2421 return NULL;
2422}
2423
5a8f40e8 2424static inline struct device_domain_info *
745f2586
JL
2425dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2426{
2427 struct device_domain_info *info;
2428
2429 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2430 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2431 info->devfn == devfn)
5a8f40e8 2432 return info;
745f2586
JL
2433
2434 return NULL;
2435}
2436
5db31569
JR
2437static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2438 int bus, int devfn,
2439 struct device *dev,
2440 struct dmar_domain *domain)
745f2586 2441{
5a8f40e8 2442 struct dmar_domain *found = NULL;
745f2586
JL
2443 struct device_domain_info *info;
2444 unsigned long flags;
d160aca5 2445 int ret;
745f2586
JL
2446
2447 info = alloc_devinfo_mem();
2448 if (!info)
b718cd3d 2449 return NULL;
745f2586 2450
745f2586
JL
2451 info->bus = bus;
2452 info->devfn = devfn;
b16d0cb9
DW
2453 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2454 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2455 info->ats_qdep = 0;
745f2586
JL
2456 info->dev = dev;
2457 info->domain = domain;
5a8f40e8 2458 info->iommu = iommu;
745f2586 2459
b16d0cb9
DW
2460 if (dev && dev_is_pci(dev)) {
2461 struct pci_dev *pdev = to_pci_dev(info->dev);
2462
2463 if (ecap_dev_iotlb_support(iommu->ecap) &&
2464 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2465 dmar_find_matched_atsr_unit(pdev))
2466 info->ats_supported = 1;
2467
2468 if (ecs_enabled(iommu)) {
2469 if (pasid_enabled(iommu)) {
2470 int features = pci_pasid_features(pdev);
2471 if (features >= 0)
2472 info->pasid_supported = features | 1;
2473 }
2474
2475 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2476 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2477 info->pri_supported = 1;
2478 }
2479 }
2480
745f2586
JL
2481 spin_lock_irqsave(&device_domain_lock, flags);
2482 if (dev)
0bcb3e28 2483 found = find_domain(dev);
f303e507
JR
2484
2485 if (!found) {
5a8f40e8 2486 struct device_domain_info *info2;
41e80dca 2487 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2488 if (info2) {
2489 found = info2->domain;
2490 info2->dev = dev;
2491 }
5a8f40e8 2492 }
f303e507 2493
745f2586
JL
2494 if (found) {
2495 spin_unlock_irqrestore(&device_domain_lock, flags);
2496 free_devinfo_mem(info);
b718cd3d
DW
2497 /* Caller must free the original domain */
2498 return found;
745f2586
JL
2499 }
2500
d160aca5
JR
2501 spin_lock(&iommu->lock);
2502 ret = domain_attach_iommu(domain, iommu);
2503 spin_unlock(&iommu->lock);
2504
2505 if (ret) {
c6c2cebd 2506 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2507 free_devinfo_mem(info);
c6c2cebd
JR
2508 return NULL;
2509 }
c6c2cebd 2510
b718cd3d
DW
2511 list_add(&info->link, &domain->devices);
2512 list_add(&info->global, &device_domain_list);
2513 if (dev)
2514 dev->archdata.iommu = info;
2515 spin_unlock_irqrestore(&device_domain_lock, flags);
2516
cc4e2575
JR
2517 if (dev && domain_context_mapping(domain, dev)) {
2518 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2519 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2520 return NULL;
2521 }
2522
b718cd3d 2523 return domain;
745f2586
JL
2524}
2525
579305f7
AW
2526static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2527{
2528 *(u16 *)opaque = alias;
2529 return 0;
2530}
2531
76208356 2532static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2533{
cc4e2575 2534 struct device_domain_info *info = NULL;
76208356 2535 struct dmar_domain *domain = NULL;
579305f7 2536 struct intel_iommu *iommu;
08a7f456 2537 u16 req_id, dma_alias;
ba395927 2538 unsigned long flags;
aa4d066a 2539 u8 bus, devfn;
ba395927 2540
579305f7
AW
2541 iommu = device_to_iommu(dev, &bus, &devfn);
2542 if (!iommu)
2543 return NULL;
2544
08a7f456
JR
2545 req_id = ((u16)bus << 8) | devfn;
2546
146922ec
DW
2547 if (dev_is_pci(dev)) {
2548 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2549
579305f7
AW
2550 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2551
2552 spin_lock_irqsave(&device_domain_lock, flags);
2553 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2554 PCI_BUS_NUM(dma_alias),
2555 dma_alias & 0xff);
2556 if (info) {
2557 iommu = info->iommu;
2558 domain = info->domain;
5a8f40e8 2559 }
579305f7 2560 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2561
76208356 2562 /* DMA alias already has a domain, use it */
579305f7 2563 if (info)
76208356 2564 goto out;
579305f7 2565 }
ba395927 2566
146922ec 2567 /* Allocate and initialize new domain for the device */
ab8dfe25 2568 domain = alloc_domain(0);
745f2586 2569 if (!domain)
579305f7 2570 return NULL;
dc534b25 2571 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2572 domain_exit(domain);
2573 return NULL;
2c2e2c38 2574 }
ba395927 2575
76208356 2576out:
579305f7 2577
76208356
JR
2578 return domain;
2579}
579305f7 2580
76208356
JR
2581static struct dmar_domain *set_domain_for_dev(struct device *dev,
2582 struct dmar_domain *domain)
2583{
2584 struct intel_iommu *iommu;
2585 struct dmar_domain *tmp;
2586 u16 req_id, dma_alias;
2587 u8 bus, devfn;
2588
2589 iommu = device_to_iommu(dev, &bus, &devfn);
2590 if (!iommu)
2591 return NULL;
2592
2593 req_id = ((u16)bus << 8) | devfn;
2594
2595 if (dev_is_pci(dev)) {
2596 struct pci_dev *pdev = to_pci_dev(dev);
2597
2598 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2599
2600 /* register PCI DMA alias device */
2601 if (req_id != dma_alias) {
2602 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2603 dma_alias & 0xff, NULL, domain);
2604
2605 if (!tmp || tmp != domain)
2606 return tmp;
2607 }
ba395927
KA
2608 }
2609
5db31569 2610 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2611 if (!tmp || tmp != domain)
2612 return tmp;
2613
2614 return domain;
2615}
579305f7 2616
76208356
JR
2617static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2618{
2619 struct dmar_domain *domain, *tmp;
2620
2621 domain = find_domain(dev);
2622 if (domain)
2623 goto out;
2624
2625 domain = find_or_alloc_domain(dev, gaw);
2626 if (!domain)
2627 goto out;
2628
2629 tmp = set_domain_for_dev(dev, domain);
2630 if (!tmp || domain != tmp) {
579305f7
AW
2631 domain_exit(domain);
2632 domain = tmp;
2633 }
b718cd3d 2634
76208356
JR
2635out:
2636
b718cd3d 2637 return domain;
ba395927
KA
2638}
2639
b213203e
DW
2640static int iommu_domain_identity_map(struct dmar_domain *domain,
2641 unsigned long long start,
2642 unsigned long long end)
ba395927 2643{
c5395d5c
DW
2644 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2645 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2646
2647 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2648 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2649 pr_err("Reserving iova failed\n");
b213203e 2650 return -ENOMEM;
ba395927
KA
2651 }
2652
af1089ce 2653 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2654 /*
2655 * RMRR range might have overlap with physical memory range,
2656 * clear it first
2657 */
c5395d5c 2658 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2659
c5395d5c
DW
2660 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2661 last_vpfn - first_vpfn + 1,
61df7443 2662 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2663}
2664
d66ce54b
JR
2665static int domain_prepare_identity_map(struct device *dev,
2666 struct dmar_domain *domain,
2667 unsigned long long start,
2668 unsigned long long end)
b213203e 2669{
19943b0e
DW
2670 /* For _hardware_ passthrough, don't bother. But for software
2671 passthrough, we do it anyway -- it may indicate a memory
2672 range which is reserved in E820, so which didn't get set
2673 up to start with in si_domain */
2674 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2675 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2676 dev_name(dev), start, end);
19943b0e
DW
2677 return 0;
2678 }
2679
9f10e5bf
JR
2680 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2681 dev_name(dev), start, end);
2682
5595b528
DW
2683 if (end < start) {
2684 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2685 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2686 dmi_get_system_info(DMI_BIOS_VENDOR),
2687 dmi_get_system_info(DMI_BIOS_VERSION),
2688 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2689 return -EIO;
5595b528
DW
2690 }
2691
2ff729f5
DW
2692 if (end >> agaw_to_width(domain->agaw)) {
2693 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2694 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2695 agaw_to_width(domain->agaw),
2696 dmi_get_system_info(DMI_BIOS_VENDOR),
2697 dmi_get_system_info(DMI_BIOS_VERSION),
2698 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2699 return -EIO;
2ff729f5 2700 }
19943b0e 2701
d66ce54b
JR
2702 return iommu_domain_identity_map(domain, start, end);
2703}
ba395927 2704
d66ce54b
JR
2705static int iommu_prepare_identity_map(struct device *dev,
2706 unsigned long long start,
2707 unsigned long long end)
2708{
2709 struct dmar_domain *domain;
2710 int ret;
2711
2712 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2713 if (!domain)
2714 return -ENOMEM;
2715
2716 ret = domain_prepare_identity_map(dev, domain, start, end);
2717 if (ret)
2718 domain_exit(domain);
b213203e 2719
ba395927 2720 return ret;
ba395927
KA
2721}
2722
2723static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2724 struct device *dev)
ba395927 2725{
0b9d9753 2726 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2727 return 0;
0b9d9753
DW
2728 return iommu_prepare_identity_map(dev, rmrr->base_address,
2729 rmrr->end_address);
ba395927
KA
2730}
2731
d3f13810 2732#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2733static inline void iommu_prepare_isa(void)
2734{
2735 struct pci_dev *pdev;
2736 int ret;
2737
2738 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2739 if (!pdev)
2740 return;
2741
9f10e5bf 2742 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2743 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2744
2745 if (ret)
9f10e5bf 2746 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2747
9b27e82d 2748 pci_dev_put(pdev);
49a0429e
KA
2749}
2750#else
2751static inline void iommu_prepare_isa(void)
2752{
2753 return;
2754}
d3f13810 2755#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2756
2c2e2c38 2757static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2758
071e1374 2759static int __init si_domain_init(int hw)
2c2e2c38 2760{
c7ab48d2 2761 int nid, ret = 0;
2c2e2c38 2762
ab8dfe25 2763 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2764 if (!si_domain)
2765 return -EFAULT;
2766
2c2e2c38
FY
2767 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2768 domain_exit(si_domain);
2769 return -EFAULT;
2770 }
2771
0dc79715 2772 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2773
19943b0e
DW
2774 if (hw)
2775 return 0;
2776
c7ab48d2 2777 for_each_online_node(nid) {
5dfe8660
TH
2778 unsigned long start_pfn, end_pfn;
2779 int i;
2780
2781 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2782 ret = iommu_domain_identity_map(si_domain,
2783 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2784 if (ret)
2785 return ret;
2786 }
c7ab48d2
DW
2787 }
2788
2c2e2c38
FY
2789 return 0;
2790}
2791
9b226624 2792static int identity_mapping(struct device *dev)
2c2e2c38
FY
2793{
2794 struct device_domain_info *info;
2795
2796 if (likely(!iommu_identity_mapping))
2797 return 0;
2798
9b226624 2799 info = dev->archdata.iommu;
cb452a40
MT
2800 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2801 return (info->domain == si_domain);
2c2e2c38 2802
2c2e2c38
FY
2803 return 0;
2804}
2805
28ccce0d 2806static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2807{
0ac72664 2808 struct dmar_domain *ndomain;
5a8f40e8 2809 struct intel_iommu *iommu;
156baca8 2810 u8 bus, devfn;
2c2e2c38 2811
5913c9bf 2812 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2813 if (!iommu)
2814 return -ENODEV;
2815
5db31569 2816 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2817 if (ndomain != domain)
2818 return -EBUSY;
2c2e2c38
FY
2819
2820 return 0;
2821}
2822
0b9d9753 2823static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2824{
2825 struct dmar_rmrr_unit *rmrr;
832bd858 2826 struct device *tmp;
ea2447f7
TM
2827 int i;
2828
0e242612 2829 rcu_read_lock();
ea2447f7 2830 for_each_rmrr_units(rmrr) {
b683b230
JL
2831 /*
2832 * Return TRUE if this RMRR contains the device that
2833 * is passed in.
2834 */
2835 for_each_active_dev_scope(rmrr->devices,
2836 rmrr->devices_cnt, i, tmp)
0b9d9753 2837 if (tmp == dev) {
0e242612 2838 rcu_read_unlock();
ea2447f7 2839 return true;
b683b230 2840 }
ea2447f7 2841 }
0e242612 2842 rcu_read_unlock();
ea2447f7
TM
2843 return false;
2844}
2845
c875d2c1
AW
2846/*
2847 * There are a couple cases where we need to restrict the functionality of
2848 * devices associated with RMRRs. The first is when evaluating a device for
2849 * identity mapping because problems exist when devices are moved in and out
2850 * of domains and their respective RMRR information is lost. This means that
2851 * a device with associated RMRRs will never be in a "passthrough" domain.
2852 * The second is use of the device through the IOMMU API. This interface
2853 * expects to have full control of the IOVA space for the device. We cannot
2854 * satisfy both the requirement that RMRR access is maintained and have an
2855 * unencumbered IOVA space. We also have no ability to quiesce the device's
2856 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2857 * We therefore prevent devices associated with an RMRR from participating in
2858 * the IOMMU API, which eliminates them from device assignment.
2859 *
2860 * In both cases we assume that PCI USB devices with RMRRs have them largely
2861 * for historical reasons and that the RMRR space is not actively used post
2862 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2863 *
2864 * The same exception is made for graphics devices, with the requirement that
2865 * any use of the RMRR regions will be torn down before assigning the device
2866 * to a guest.
c875d2c1
AW
2867 */
2868static bool device_is_rmrr_locked(struct device *dev)
2869{
2870 if (!device_has_rmrr(dev))
2871 return false;
2872
2873 if (dev_is_pci(dev)) {
2874 struct pci_dev *pdev = to_pci_dev(dev);
2875
18436afd 2876 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2877 return false;
2878 }
2879
2880 return true;
2881}
2882
3bdb2591 2883static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2884{
ea2447f7 2885
3bdb2591
DW
2886 if (dev_is_pci(dev)) {
2887 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2888
c875d2c1 2889 if (device_is_rmrr_locked(dev))
3bdb2591 2890 return 0;
e0fc7e0b 2891
3bdb2591
DW
2892 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2893 return 1;
e0fc7e0b 2894
3bdb2591
DW
2895 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2896 return 1;
6941af28 2897
3bdb2591 2898 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2899 return 0;
3bdb2591
DW
2900
2901 /*
2902 * We want to start off with all devices in the 1:1 domain, and
2903 * take them out later if we find they can't access all of memory.
2904 *
2905 * However, we can't do this for PCI devices behind bridges,
2906 * because all PCI devices behind the same bridge will end up
2907 * with the same source-id on their transactions.
2908 *
2909 * Practically speaking, we can't change things around for these
2910 * devices at run-time, because we can't be sure there'll be no
2911 * DMA transactions in flight for any of their siblings.
2912 *
2913 * So PCI devices (unless they're on the root bus) as well as
2914 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2915 * the 1:1 domain, just in _case_ one of their siblings turns out
2916 * not to be able to map all of memory.
2917 */
2918 if (!pci_is_pcie(pdev)) {
2919 if (!pci_is_root_bus(pdev->bus))
2920 return 0;
2921 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2922 return 0;
2923 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2924 return 0;
3bdb2591
DW
2925 } else {
2926 if (device_has_rmrr(dev))
2927 return 0;
2928 }
3dfc813d 2929
3bdb2591 2930 /*
3dfc813d 2931 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2932 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2933 * take them out of the 1:1 domain later.
2934 */
8fcc5372
CW
2935 if (!startup) {
2936 /*
2937 * If the device's dma_mask is less than the system's memory
2938 * size then this is not a candidate for identity mapping.
2939 */
3bdb2591 2940 u64 dma_mask = *dev->dma_mask;
8fcc5372 2941
3bdb2591
DW
2942 if (dev->coherent_dma_mask &&
2943 dev->coherent_dma_mask < dma_mask)
2944 dma_mask = dev->coherent_dma_mask;
8fcc5372 2945
3bdb2591 2946 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2947 }
6941af28
DW
2948
2949 return 1;
2950}
2951
cf04eee8
DW
2952static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2953{
2954 int ret;
2955
2956 if (!iommu_should_identity_map(dev, 1))
2957 return 0;
2958
28ccce0d 2959 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2960 if (!ret)
9f10e5bf
JR
2961 pr_info("%s identity mapping for device %s\n",
2962 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2963 else if (ret == -ENODEV)
2964 /* device not associated with an iommu */
2965 ret = 0;
2966
2967 return ret;
2968}
2969
2970
071e1374 2971static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2972{
2c2e2c38 2973 struct pci_dev *pdev = NULL;
cf04eee8
DW
2974 struct dmar_drhd_unit *drhd;
2975 struct intel_iommu *iommu;
2976 struct device *dev;
2977 int i;
2978 int ret = 0;
2c2e2c38 2979
2c2e2c38 2980 for_each_pci_dev(pdev) {
cf04eee8
DW
2981 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2982 if (ret)
2983 return ret;
2984 }
2985
2986 for_each_active_iommu(iommu, drhd)
2987 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2988 struct acpi_device_physical_node *pn;
2989 struct acpi_device *adev;
2990
2991 if (dev->bus != &acpi_bus_type)
2992 continue;
86080ccc 2993
cf04eee8
DW
2994 adev= to_acpi_device(dev);
2995 mutex_lock(&adev->physical_node_lock);
2996 list_for_each_entry(pn, &adev->physical_node_list, node) {
2997 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2998 if (ret)
2999 break;
eae460b6 3000 }
cf04eee8
DW
3001 mutex_unlock(&adev->physical_node_lock);
3002 if (ret)
3003 return ret;
62edf5dc 3004 }
2c2e2c38
FY
3005
3006 return 0;
3007}
3008
ffebeb46
JL
3009static void intel_iommu_init_qi(struct intel_iommu *iommu)
3010{
3011 /*
3012 * Start from the sane iommu hardware state.
3013 * If the queued invalidation is already initialized by us
3014 * (for example, while enabling interrupt-remapping) then
3015 * we got the things already rolling from a sane state.
3016 */
3017 if (!iommu->qi) {
3018 /*
3019 * Clear any previous faults.
3020 */
3021 dmar_fault(-1, iommu);
3022 /*
3023 * Disable queued invalidation if supported and already enabled
3024 * before OS handover.
3025 */
3026 dmar_disable_qi(iommu);
3027 }
3028
3029 if (dmar_enable_qi(iommu)) {
3030 /*
3031 * Queued Invalidate not enabled, use Register Based Invalidate
3032 */
3033 iommu->flush.flush_context = __iommu_flush_context;
3034 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3035 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3036 iommu->name);
3037 } else {
3038 iommu->flush.flush_context = qi_flush_context;
3039 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3040 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3041 }
3042}
3043
091d42e4 3044static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3045 struct root_entry *old_re,
091d42e4
JR
3046 struct context_entry **tbl,
3047 int bus, bool ext)
3048{
dbcd861f 3049 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3050 struct context_entry *new_ce = NULL, ce;
dfddb969 3051 struct context_entry *old_ce = NULL;
543c8dcf 3052 struct root_entry re;
091d42e4
JR
3053 phys_addr_t old_ce_phys;
3054
3055 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3056 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3057
3058 for (devfn = 0; devfn < 256; devfn++) {
3059 /* First calculate the correct index */
3060 idx = (ext ? devfn * 2 : devfn) % 256;
3061
3062 if (idx == 0) {
3063 /* First save what we may have and clean up */
3064 if (new_ce) {
3065 tbl[tbl_idx] = new_ce;
3066 __iommu_flush_cache(iommu, new_ce,
3067 VTD_PAGE_SIZE);
3068 pos = 1;
3069 }
3070
3071 if (old_ce)
3072 iounmap(old_ce);
3073
3074 ret = 0;
3075 if (devfn < 0x80)
543c8dcf 3076 old_ce_phys = root_entry_lctp(&re);
091d42e4 3077 else
543c8dcf 3078 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3079
3080 if (!old_ce_phys) {
3081 if (ext && devfn == 0) {
3082 /* No LCTP, try UCTP */
3083 devfn = 0x7f;
3084 continue;
3085 } else {
3086 goto out;
3087 }
3088 }
3089
3090 ret = -ENOMEM;
dfddb969
DW
3091 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3092 MEMREMAP_WB);
091d42e4
JR
3093 if (!old_ce)
3094 goto out;
3095
3096 new_ce = alloc_pgtable_page(iommu->node);
3097 if (!new_ce)
3098 goto out_unmap;
3099
3100 ret = 0;
3101 }
3102
3103 /* Now copy the context entry */
dfddb969 3104 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3105
cf484d0e 3106 if (!__context_present(&ce))
091d42e4
JR
3107 continue;
3108
dbcd861f
JR
3109 did = context_domain_id(&ce);
3110 if (did >= 0 && did < cap_ndoms(iommu->cap))
3111 set_bit(did, iommu->domain_ids);
3112
cf484d0e
JR
3113 /*
3114 * We need a marker for copied context entries. This
3115 * marker needs to work for the old format as well as
3116 * for extended context entries.
3117 *
3118 * Bit 67 of the context entry is used. In the old
3119 * format this bit is available to software, in the
3120 * extended format it is the PGE bit, but PGE is ignored
3121 * by HW if PASIDs are disabled (and thus still
3122 * available).
3123 *
3124 * So disable PASIDs first and then mark the entry
3125 * copied. This means that we don't copy PASID
3126 * translations from the old kernel, but this is fine as
3127 * faults there are not fatal.
3128 */
3129 context_clear_pasid_enable(&ce);
3130 context_set_copied(&ce);
3131
091d42e4
JR
3132 new_ce[idx] = ce;
3133 }
3134
3135 tbl[tbl_idx + pos] = new_ce;
3136
3137 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3138
3139out_unmap:
dfddb969 3140 memunmap(old_ce);
091d42e4
JR
3141
3142out:
3143 return ret;
3144}
3145
3146static int copy_translation_tables(struct intel_iommu *iommu)
3147{
3148 struct context_entry **ctxt_tbls;
dfddb969 3149 struct root_entry *old_rt;
091d42e4
JR
3150 phys_addr_t old_rt_phys;
3151 int ctxt_table_entries;
3152 unsigned long flags;
3153 u64 rtaddr_reg;
3154 int bus, ret;
c3361f2f 3155 bool new_ext, ext;
091d42e4
JR
3156
3157 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3158 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3159 new_ext = !!ecap_ecs(iommu->ecap);
3160
3161 /*
3162 * The RTT bit can only be changed when translation is disabled,
3163 * but disabling translation means to open a window for data
3164 * corruption. So bail out and don't copy anything if we would
3165 * have to change the bit.
3166 */
3167 if (new_ext != ext)
3168 return -EINVAL;
091d42e4
JR
3169
3170 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3171 if (!old_rt_phys)
3172 return -EINVAL;
3173
dfddb969 3174 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3175 if (!old_rt)
3176 return -ENOMEM;
3177
3178 /* This is too big for the stack - allocate it from slab */
3179 ctxt_table_entries = ext ? 512 : 256;
3180 ret = -ENOMEM;
3181 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3182 if (!ctxt_tbls)
3183 goto out_unmap;
3184
3185 for (bus = 0; bus < 256; bus++) {
3186 ret = copy_context_table(iommu, &old_rt[bus],
3187 ctxt_tbls, bus, ext);
3188 if (ret) {
3189 pr_err("%s: Failed to copy context table for bus %d\n",
3190 iommu->name, bus);
3191 continue;
3192 }
3193 }
3194
3195 spin_lock_irqsave(&iommu->lock, flags);
3196
3197 /* Context tables are copied, now write them to the root_entry table */
3198 for (bus = 0; bus < 256; bus++) {
3199 int idx = ext ? bus * 2 : bus;
3200 u64 val;
3201
3202 if (ctxt_tbls[idx]) {
3203 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3204 iommu->root_entry[bus].lo = val;
3205 }
3206
3207 if (!ext || !ctxt_tbls[idx + 1])
3208 continue;
3209
3210 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3211 iommu->root_entry[bus].hi = val;
3212 }
3213
3214 spin_unlock_irqrestore(&iommu->lock, flags);
3215
3216 kfree(ctxt_tbls);
3217
3218 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3219
3220 ret = 0;
3221
3222out_unmap:
dfddb969 3223 memunmap(old_rt);
091d42e4
JR
3224
3225 return ret;
3226}
3227
b779260b 3228static int __init init_dmars(void)
ba395927
KA
3229{
3230 struct dmar_drhd_unit *drhd;
3231 struct dmar_rmrr_unit *rmrr;
a87f4918 3232 bool copied_tables = false;
832bd858 3233 struct device *dev;
ba395927 3234 struct intel_iommu *iommu;
13cf0174 3235 int i, ret;
2c2e2c38 3236
ba395927
KA
3237 /*
3238 * for each drhd
3239 * allocate root
3240 * initialize and program root entry to not present
3241 * endfor
3242 */
3243 for_each_drhd_unit(drhd) {
5e0d2a6f 3244 /*
3245 * lock not needed as this is only incremented in the single
3246 * threaded kernel __init code path all other access are read
3247 * only
3248 */
78d8e704 3249 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3250 g_num_of_iommus++;
3251 continue;
3252 }
9f10e5bf 3253 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3254 }
3255
ffebeb46
JL
3256 /* Preallocate enough resources for IOMMU hot-addition */
3257 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3258 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3259
d9630fe9
WH
3260 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3261 GFP_KERNEL);
3262 if (!g_iommus) {
9f10e5bf 3263 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3264 ret = -ENOMEM;
3265 goto error;
3266 }
3267
7c919779 3268 for_each_active_iommu(iommu, drhd) {
d9630fe9 3269 g_iommus[iommu->seq_id] = iommu;
ba395927 3270
b63d80d1
JR
3271 intel_iommu_init_qi(iommu);
3272
e61d98d8
SS
3273 ret = iommu_init_domains(iommu);
3274 if (ret)
989d51fc 3275 goto free_iommu;
e61d98d8 3276
4158c2ec
JR
3277 init_translation_status(iommu);
3278
091d42e4
JR
3279 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3280 iommu_disable_translation(iommu);
3281 clear_translation_pre_enabled(iommu);
3282 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3283 iommu->name);
3284 }
4158c2ec 3285
ba395927
KA
3286 /*
3287 * TBD:
3288 * we could share the same root & context tables
25985edc 3289 * among all IOMMU's. Need to Split it later.
ba395927
KA
3290 */
3291 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3292 if (ret)
989d51fc 3293 goto free_iommu;
5f0a7f76 3294
091d42e4
JR
3295 if (translation_pre_enabled(iommu)) {
3296 pr_info("Translation already enabled - trying to copy translation structures\n");
3297
3298 ret = copy_translation_tables(iommu);
3299 if (ret) {
3300 /*
3301 * We found the IOMMU with translation
3302 * enabled - but failed to copy over the
3303 * old root-entry table. Try to proceed
3304 * by disabling translation now and
3305 * allocating a clean root-entry table.
3306 * This might cause DMAR faults, but
3307 * probably the dump will still succeed.
3308 */
3309 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3310 iommu->name);
3311 iommu_disable_translation(iommu);
3312 clear_translation_pre_enabled(iommu);
3313 } else {
3314 pr_info("Copied translation tables from previous kernel for %s\n",
3315 iommu->name);
a87f4918 3316 copied_tables = true;
091d42e4
JR
3317 }
3318 }
3319
4ed0d3e6 3320 if (!ecap_pass_through(iommu->ecap))
19943b0e 3321 hw_pass_through = 0;
8a94ade4
DW
3322#ifdef CONFIG_INTEL_IOMMU_SVM
3323 if (pasid_enabled(iommu))
3324 intel_svm_alloc_pasid_tables(iommu);
3325#endif
ba395927
KA
3326 }
3327
a4c34ff1
JR
3328 /*
3329 * Now that qi is enabled on all iommus, set the root entry and flush
3330 * caches. This is required on some Intel X58 chipsets, otherwise the
3331 * flush_context function will loop forever and the boot hangs.
3332 */
3333 for_each_active_iommu(iommu, drhd) {
3334 iommu_flush_write_buffer(iommu);
3335 iommu_set_root_entry(iommu);
3336 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3337 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3338 }
3339
19943b0e 3340 if (iommu_pass_through)
e0fc7e0b
DW
3341 iommu_identity_mapping |= IDENTMAP_ALL;
3342
d3f13810 3343#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3344 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3345#endif
e0fc7e0b 3346
21e722c4
AR
3347 check_tylersburg_isoch();
3348
86080ccc
JR
3349 if (iommu_identity_mapping) {
3350 ret = si_domain_init(hw_pass_through);
3351 if (ret)
3352 goto free_iommu;
3353 }
3354
e0fc7e0b 3355
a87f4918
JR
3356 /*
3357 * If we copied translations from a previous kernel in the kdump
3358 * case, we can not assign the devices to domains now, as that
3359 * would eliminate the old mappings. So skip this part and defer
3360 * the assignment to device driver initialization time.
3361 */
3362 if (copied_tables)
3363 goto domains_done;
3364
ba395927 3365 /*
19943b0e
DW
3366 * If pass through is not set or not enabled, setup context entries for
3367 * identity mappings for rmrr, gfx, and isa and may fall back to static
3368 * identity mapping if iommu_identity_mapping is set.
ba395927 3369 */
19943b0e
DW
3370 if (iommu_identity_mapping) {
3371 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3372 if (ret) {
9f10e5bf 3373 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3374 goto free_iommu;
ba395927
KA
3375 }
3376 }
ba395927 3377 /*
19943b0e
DW
3378 * For each rmrr
3379 * for each dev attached to rmrr
3380 * do
3381 * locate drhd for dev, alloc domain for dev
3382 * allocate free domain
3383 * allocate page table entries for rmrr
3384 * if context not allocated for bus
3385 * allocate and init context
3386 * set present in root table for this bus
3387 * init context with domain, translation etc
3388 * endfor
3389 * endfor
ba395927 3390 */
9f10e5bf 3391 pr_info("Setting RMRR:\n");
19943b0e 3392 for_each_rmrr_units(rmrr) {
b683b230
JL
3393 /* some BIOS lists non-exist devices in DMAR table. */
3394 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3395 i, dev) {
0b9d9753 3396 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3397 if (ret)
9f10e5bf 3398 pr_err("Mapping reserved region failed\n");
ba395927 3399 }
4ed0d3e6 3400 }
49a0429e 3401
19943b0e
DW
3402 iommu_prepare_isa();
3403
a87f4918
JR
3404domains_done:
3405
ba395927
KA
3406 /*
3407 * for each drhd
3408 * enable fault log
3409 * global invalidate context cache
3410 * global invalidate iotlb
3411 * enable translation
3412 */
7c919779 3413 for_each_iommu(iommu, drhd) {
51a63e67
JC
3414 if (drhd->ignored) {
3415 /*
3416 * we always have to disable PMRs or DMA may fail on
3417 * this device
3418 */
3419 if (force_on)
7c919779 3420 iommu_disable_protect_mem_regions(iommu);
ba395927 3421 continue;
51a63e67 3422 }
ba395927
KA
3423
3424 iommu_flush_write_buffer(iommu);
3425
a222a7f0
DW
3426#ifdef CONFIG_INTEL_IOMMU_SVM
3427 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3428 ret = intel_svm_enable_prq(iommu);
3429 if (ret)
3430 goto free_iommu;
3431 }
3432#endif
3460a6d9
KA
3433 ret = dmar_set_interrupt(iommu);
3434 if (ret)
989d51fc 3435 goto free_iommu;
3460a6d9 3436
8939ddf6
JR
3437 if (!translation_pre_enabled(iommu))
3438 iommu_enable_translation(iommu);
3439
b94996c9 3440 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3441 }
3442
3443 return 0;
989d51fc
JL
3444
3445free_iommu:
ffebeb46
JL
3446 for_each_active_iommu(iommu, drhd) {
3447 disable_dmar_iommu(iommu);
a868e6b7 3448 free_dmar_iommu(iommu);
ffebeb46 3449 }
13cf0174 3450
d9630fe9 3451 kfree(g_iommus);
13cf0174 3452
989d51fc 3453error:
ba395927
KA
3454 return ret;
3455}
3456
5a5e02a6 3457/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3458static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3459 struct dmar_domain *domain,
3460 unsigned long nrpages, uint64_t dma_mask)
ba395927 3461{
22e2f9fa 3462 unsigned long iova_pfn = 0;
ba395927 3463
875764de
DW
3464 /* Restrict dma_mask to the width that the iommu can handle */
3465 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3466 /* Ensure we reserve the whole size-aligned region */
3467 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3468
3469 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3470 /*
3471 * First try to allocate an io virtual address in
284901a9 3472 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3473 * from higher range
ba395927 3474 */
22e2f9fa
OP
3475 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3476 IOVA_PFN(DMA_BIT_MASK(32)));
3477 if (iova_pfn)
3478 return iova_pfn;
875764de 3479 }
22e2f9fa
OP
3480 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask));
3481 if (unlikely(!iova_pfn)) {
9f10e5bf 3482 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3483 nrpages, dev_name(dev));
2aac6304 3484 return 0;
f76aec76
KA
3485 }
3486
22e2f9fa 3487 return iova_pfn;
f76aec76
KA
3488}
3489
b316d02a 3490static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3491{
1c5ebba9 3492 struct dmar_domain *domain, *tmp;
b1ce5b79 3493 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3494 struct device *i_dev;
3495 int i, ret;
f76aec76 3496
1c5ebba9
JR
3497 domain = find_domain(dev);
3498 if (domain)
3499 goto out;
3500
3501 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3502 if (!domain)
3503 goto out;
ba395927 3504
b1ce5b79
JR
3505 /* We have a new domain - setup possible RMRRs for the device */
3506 rcu_read_lock();
3507 for_each_rmrr_units(rmrr) {
3508 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3509 i, i_dev) {
3510 if (i_dev != dev)
3511 continue;
3512
3513 ret = domain_prepare_identity_map(dev, domain,
3514 rmrr->base_address,
3515 rmrr->end_address);
3516 if (ret)
3517 dev_err(dev, "Mapping reserved region failed\n");
3518 }
3519 }
3520 rcu_read_unlock();
3521
1c5ebba9
JR
3522 tmp = set_domain_for_dev(dev, domain);
3523 if (!tmp || domain != tmp) {
3524 domain_exit(domain);
3525 domain = tmp;
3526 }
3527
3528out:
3529
3530 if (!domain)
3531 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3532
3533
f76aec76
KA
3534 return domain;
3535}
3536
ecb509ec 3537/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3538static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3539{
3540 int found;
3541
3d89194a 3542 if (iommu_dummy(dev))
1e4c64c4
DW
3543 return 1;
3544
2c2e2c38 3545 if (!iommu_identity_mapping)
1e4c64c4 3546 return 0;
2c2e2c38 3547
9b226624 3548 found = identity_mapping(dev);
2c2e2c38 3549 if (found) {
ecb509ec 3550 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3551 return 1;
3552 else {
3553 /*
3554 * 32 bit DMA is removed from si_domain and fall back
3555 * to non-identity mapping.
3556 */
e6de0f8d 3557 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3558 pr_info("32bit %s uses non-identity mapping\n",
3559 dev_name(dev));
2c2e2c38
FY
3560 return 0;
3561 }
3562 } else {
3563 /*
3564 * In case of a detached 64 bit DMA device from vm, the device
3565 * is put into si_domain for identity mapping.
3566 */
ecb509ec 3567 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3568 int ret;
28ccce0d 3569 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3570 if (!ret) {
9f10e5bf
JR
3571 pr_info("64bit %s uses identity mapping\n",
3572 dev_name(dev));
2c2e2c38
FY
3573 return 1;
3574 }
3575 }
3576 }
3577
1e4c64c4 3578 return 0;
2c2e2c38
FY
3579}
3580
5040a918 3581static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3582 size_t size, int dir, u64 dma_mask)
f76aec76 3583{
f76aec76 3584 struct dmar_domain *domain;
5b6985ce 3585 phys_addr_t start_paddr;
2aac6304 3586 unsigned long iova_pfn;
f76aec76 3587 int prot = 0;
6865f0d1 3588 int ret;
8c11e798 3589 struct intel_iommu *iommu;
33041ec0 3590 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3591
3592 BUG_ON(dir == DMA_NONE);
2c2e2c38 3593
5040a918 3594 if (iommu_no_mapping(dev))
6865f0d1 3595 return paddr;
f76aec76 3596
5040a918 3597 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3598 if (!domain)
3599 return 0;
3600
8c11e798 3601 iommu = domain_get_iommu(domain);
88cb6a74 3602 size = aligned_nrpages(paddr, size);
f76aec76 3603
2aac6304
OP
3604 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3605 if (!iova_pfn)
f76aec76
KA
3606 goto error;
3607
ba395927
KA
3608 /*
3609 * Check if DMAR supports zero-length reads on write only
3610 * mappings..
3611 */
3612 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3613 !cap_zlr(iommu->cap))
ba395927
KA
3614 prot |= DMA_PTE_READ;
3615 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3616 prot |= DMA_PTE_WRITE;
3617 /*
6865f0d1 3618 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3619 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3620 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3621 * is not a big problem
3622 */
2aac6304 3623 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3624 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3625 if (ret)
3626 goto error;
3627
1f0ef2aa
DW
3628 /* it's a non-present to present mapping. Only flush if caching mode */
3629 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3630 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3631 mm_to_dma_pfn(iova_pfn),
a1ddcbe9 3632 size, 0, 1);
1f0ef2aa 3633 else
8c11e798 3634 iommu_flush_write_buffer(iommu);
f76aec76 3635
2aac6304 3636 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3637 start_paddr += paddr & ~PAGE_MASK;
3638 return start_paddr;
ba395927 3639
ba395927 3640error:
2aac6304 3641 if (iova_pfn)
22e2f9fa 3642 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
9f10e5bf 3643 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3644 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3645 return 0;
3646}
3647
ffbbef5c
FT
3648static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3649 unsigned long offset, size_t size,
3650 enum dma_data_direction dir,
00085f1e 3651 unsigned long attrs)
bb9e6d65 3652{
ffbbef5c 3653 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3654 dir, *dev->dma_mask);
bb9e6d65
FT
3655}
3656
769530e4 3657static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3658{
f76aec76 3659 struct dmar_domain *domain;
d794dc9b 3660 unsigned long start_pfn, last_pfn;
769530e4 3661 unsigned long nrpages;
2aac6304 3662 unsigned long iova_pfn;
8c11e798 3663 struct intel_iommu *iommu;
ea8ea460 3664 struct page *freelist;
ba395927 3665
73676832 3666 if (iommu_no_mapping(dev))
f76aec76 3667 return;
2c2e2c38 3668
1525a29a 3669 domain = find_domain(dev);
ba395927
KA
3670 BUG_ON(!domain);
3671
8c11e798
WH
3672 iommu = domain_get_iommu(domain);
3673
2aac6304 3674 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3675
769530e4 3676 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3677 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3678 last_pfn = start_pfn + nrpages - 1;
ba395927 3679
d794dc9b 3680 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3681 dev_name(dev), start_pfn, last_pfn);
ba395927 3682
ea8ea460 3683 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3684
5e0d2a6f 3685 if (intel_iommu_strict) {
a1ddcbe9 3686 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3687 nrpages, !freelist, 0);
5e0d2a6f 3688 /* free iova */
22e2f9fa 3689 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3690 dma_free_pagelist(freelist);
5e0d2a6f 3691 } else {
13cf0174
JR
3692 queue_iova(&domain->iovad, iova_pfn, nrpages,
3693 (unsigned long)freelist);
5e0d2a6f 3694 /*
3695 * queue up the release of the unmap to save the 1/6th of the
3696 * cpu used up by the iotlb flush operation...
3697 */
5e0d2a6f 3698 }
ba395927
KA
3699}
3700
d41a4adb
JL
3701static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3702 size_t size, enum dma_data_direction dir,
00085f1e 3703 unsigned long attrs)
d41a4adb 3704{
769530e4 3705 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3706}
3707
5040a918 3708static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3709 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3710 unsigned long attrs)
ba395927 3711{
36746436 3712 struct page *page = NULL;
ba395927
KA
3713 int order;
3714
5b6985ce 3715 size = PAGE_ALIGN(size);
ba395927 3716 order = get_order(size);
e8bb910d 3717
5040a918 3718 if (!iommu_no_mapping(dev))
e8bb910d 3719 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3720 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3721 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3722 flags |= GFP_DMA;
3723 else
3724 flags |= GFP_DMA32;
3725 }
ba395927 3726
d0164adc 3727 if (gfpflags_allow_blocking(flags)) {
36746436
AM
3728 unsigned int count = size >> PAGE_SHIFT;
3729
712c604d 3730 page = dma_alloc_from_contiguous(dev, count, order, flags);
36746436
AM
3731 if (page && iommu_no_mapping(dev) &&
3732 page_to_phys(page) + size > dev->coherent_dma_mask) {
3733 dma_release_from_contiguous(dev, page, count);
3734 page = NULL;
3735 }
3736 }
3737
3738 if (!page)
3739 page = alloc_pages(flags, order);
3740 if (!page)
ba395927 3741 return NULL;
36746436 3742 memset(page_address(page), 0, size);
ba395927 3743
36746436 3744 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3745 DMA_BIDIRECTIONAL,
5040a918 3746 dev->coherent_dma_mask);
ba395927 3747 if (*dma_handle)
36746436
AM
3748 return page_address(page);
3749 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3750 __free_pages(page, order);
3751
ba395927
KA
3752 return NULL;
3753}
3754
5040a918 3755static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3756 dma_addr_t dma_handle, unsigned long attrs)
ba395927
KA
3757{
3758 int order;
36746436 3759 struct page *page = virt_to_page(vaddr);
ba395927 3760
5b6985ce 3761 size = PAGE_ALIGN(size);
ba395927
KA
3762 order = get_order(size);
3763
769530e4 3764 intel_unmap(dev, dma_handle, size);
36746436
AM
3765 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3766 __free_pages(page, order);
ba395927
KA
3767}
3768
5040a918 3769static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3770 int nelems, enum dma_data_direction dir,
00085f1e 3771 unsigned long attrs)
ba395927 3772{
769530e4
OP
3773 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3774 unsigned long nrpages = 0;
3775 struct scatterlist *sg;
3776 int i;
3777
3778 for_each_sg(sglist, sg, nelems, i) {
3779 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3780 }
3781
3782 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3783}
3784
ba395927 3785static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3786 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3787{
3788 int i;
c03ab37c 3789 struct scatterlist *sg;
ba395927 3790
c03ab37c 3791 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3792 BUG_ON(!sg_page(sg));
3e6110fd 3793 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3794 sg->dma_length = sg->length;
ba395927
KA
3795 }
3796 return nelems;
3797}
3798
5040a918 3799static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3800 enum dma_data_direction dir, unsigned long attrs)
ba395927 3801{
ba395927 3802 int i;
ba395927 3803 struct dmar_domain *domain;
f76aec76
KA
3804 size_t size = 0;
3805 int prot = 0;
2aac6304 3806 unsigned long iova_pfn;
f76aec76 3807 int ret;
c03ab37c 3808 struct scatterlist *sg;
b536d24d 3809 unsigned long start_vpfn;
8c11e798 3810 struct intel_iommu *iommu;
ba395927
KA
3811
3812 BUG_ON(dir == DMA_NONE);
5040a918
DW
3813 if (iommu_no_mapping(dev))
3814 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3815
5040a918 3816 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3817 if (!domain)
3818 return 0;
3819
8c11e798
WH
3820 iommu = domain_get_iommu(domain);
3821
b536d24d 3822 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3823 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3824
2aac6304 3825 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3826 *dev->dma_mask);
2aac6304 3827 if (!iova_pfn) {
c03ab37c 3828 sglist->dma_length = 0;
f76aec76
KA
3829 return 0;
3830 }
3831
3832 /*
3833 * Check if DMAR supports zero-length reads on write only
3834 * mappings..
3835 */
3836 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3837 !cap_zlr(iommu->cap))
f76aec76
KA
3838 prot |= DMA_PTE_READ;
3839 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3840 prot |= DMA_PTE_WRITE;
3841
2aac6304 3842 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3843
f532959b 3844 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3845 if (unlikely(ret)) {
e1605495 3846 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3847 start_vpfn + size - 1,
3848 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3849 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3850 return 0;
ba395927
KA
3851 }
3852
1f0ef2aa
DW
3853 /* it's a non-present to present mapping. Only flush if caching mode */
3854 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3855 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3856 else
8c11e798 3857 iommu_flush_write_buffer(iommu);
1f0ef2aa 3858
ba395927
KA
3859 return nelems;
3860}
3861
dfb805e8
FT
3862static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3863{
3864 return !dma_addr;
3865}
3866
01e1932a 3867const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3868 .alloc = intel_alloc_coherent,
3869 .free = intel_free_coherent,
ba395927
KA
3870 .map_sg = intel_map_sg,
3871 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3872 .map_page = intel_map_page,
3873 .unmap_page = intel_unmap_page,
dfb805e8 3874 .mapping_error = intel_mapping_error,
5860acc1
CH
3875#ifdef CONFIG_X86
3876 .dma_supported = x86_dma_supported,
3877#endif
ba395927
KA
3878};
3879
3880static inline int iommu_domain_cache_init(void)
3881{
3882 int ret = 0;
3883
3884 iommu_domain_cache = kmem_cache_create("iommu_domain",
3885 sizeof(struct dmar_domain),
3886 0,
3887 SLAB_HWCACHE_ALIGN,
3888
3889 NULL);
3890 if (!iommu_domain_cache) {
9f10e5bf 3891 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3892 ret = -ENOMEM;
3893 }
3894
3895 return ret;
3896}
3897
3898static inline int iommu_devinfo_cache_init(void)
3899{
3900 int ret = 0;
3901
3902 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3903 sizeof(struct device_domain_info),
3904 0,
3905 SLAB_HWCACHE_ALIGN,
ba395927
KA
3906 NULL);
3907 if (!iommu_devinfo_cache) {
9f10e5bf 3908 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3909 ret = -ENOMEM;
3910 }
3911
3912 return ret;
3913}
3914
ba395927
KA
3915static int __init iommu_init_mempool(void)
3916{
3917 int ret;
ae1ff3d6 3918 ret = iova_cache_get();
ba395927
KA
3919 if (ret)
3920 return ret;
3921
3922 ret = iommu_domain_cache_init();
3923 if (ret)
3924 goto domain_error;
3925
3926 ret = iommu_devinfo_cache_init();
3927 if (!ret)
3928 return ret;
3929
3930 kmem_cache_destroy(iommu_domain_cache);
3931domain_error:
ae1ff3d6 3932 iova_cache_put();
ba395927
KA
3933
3934 return -ENOMEM;
3935}
3936
3937static void __init iommu_exit_mempool(void)
3938{
3939 kmem_cache_destroy(iommu_devinfo_cache);
3940 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3941 iova_cache_put();
ba395927
KA
3942}
3943
556ab45f
DW
3944static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3945{
3946 struct dmar_drhd_unit *drhd;
3947 u32 vtbar;
3948 int rc;
3949
3950 /* We know that this device on this chipset has its own IOMMU.
3951 * If we find it under a different IOMMU, then the BIOS is lying
3952 * to us. Hope that the IOMMU for this device is actually
3953 * disabled, and it needs no translation...
3954 */
3955 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3956 if (rc) {
3957 /* "can't" happen */
3958 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3959 return;
3960 }
3961 vtbar &= 0xffff0000;
3962
3963 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3964 drhd = dmar_find_matched_drhd_unit(pdev);
3965 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3966 TAINT_FIRMWARE_WORKAROUND,
3967 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3968 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3969}
3970DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3971
ba395927
KA
3972static void __init init_no_remapping_devices(void)
3973{
3974 struct dmar_drhd_unit *drhd;
832bd858 3975 struct device *dev;
b683b230 3976 int i;
ba395927
KA
3977
3978 for_each_drhd_unit(drhd) {
3979 if (!drhd->include_all) {
b683b230
JL
3980 for_each_active_dev_scope(drhd->devices,
3981 drhd->devices_cnt, i, dev)
3982 break;
832bd858 3983 /* ignore DMAR unit if no devices exist */
ba395927
KA
3984 if (i == drhd->devices_cnt)
3985 drhd->ignored = 1;
3986 }
3987 }
3988
7c919779 3989 for_each_active_drhd_unit(drhd) {
7c919779 3990 if (drhd->include_all)
ba395927
KA
3991 continue;
3992
b683b230
JL
3993 for_each_active_dev_scope(drhd->devices,
3994 drhd->devices_cnt, i, dev)
832bd858 3995 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3996 break;
ba395927
KA
3997 if (i < drhd->devices_cnt)
3998 continue;
3999
c0771df8
DW
4000 /* This IOMMU has *only* gfx devices. Either bypass it or
4001 set the gfx_mapped flag, as appropriate */
4002 if (dmar_map_gfx) {
4003 intel_iommu_gfx_mapped = 1;
4004 } else {
4005 drhd->ignored = 1;
b683b230
JL
4006 for_each_active_dev_scope(drhd->devices,
4007 drhd->devices_cnt, i, dev)
832bd858 4008 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4009 }
4010 }
4011}
4012
f59c7b69
FY
4013#ifdef CONFIG_SUSPEND
4014static int init_iommu_hw(void)
4015{
4016 struct dmar_drhd_unit *drhd;
4017 struct intel_iommu *iommu = NULL;
4018
4019 for_each_active_iommu(iommu, drhd)
4020 if (iommu->qi)
4021 dmar_reenable_qi(iommu);
4022
b779260b
JC
4023 for_each_iommu(iommu, drhd) {
4024 if (drhd->ignored) {
4025 /*
4026 * we always have to disable PMRs or DMA may fail on
4027 * this device
4028 */
4029 if (force_on)
4030 iommu_disable_protect_mem_regions(iommu);
4031 continue;
4032 }
4033
f59c7b69
FY
4034 iommu_flush_write_buffer(iommu);
4035
4036 iommu_set_root_entry(iommu);
4037
4038 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4039 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4040 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4041 iommu_enable_translation(iommu);
b94996c9 4042 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4043 }
4044
4045 return 0;
4046}
4047
4048static void iommu_flush_all(void)
4049{
4050 struct dmar_drhd_unit *drhd;
4051 struct intel_iommu *iommu;
4052
4053 for_each_active_iommu(iommu, drhd) {
4054 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4055 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4056 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4057 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4058 }
4059}
4060
134fac3f 4061static int iommu_suspend(void)
f59c7b69
FY
4062{
4063 struct dmar_drhd_unit *drhd;
4064 struct intel_iommu *iommu = NULL;
4065 unsigned long flag;
4066
4067 for_each_active_iommu(iommu, drhd) {
4068 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4069 GFP_ATOMIC);
4070 if (!iommu->iommu_state)
4071 goto nomem;
4072 }
4073
4074 iommu_flush_all();
4075
4076 for_each_active_iommu(iommu, drhd) {
4077 iommu_disable_translation(iommu);
4078
1f5b3c3f 4079 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4080
4081 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4082 readl(iommu->reg + DMAR_FECTL_REG);
4083 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4084 readl(iommu->reg + DMAR_FEDATA_REG);
4085 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4086 readl(iommu->reg + DMAR_FEADDR_REG);
4087 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4088 readl(iommu->reg + DMAR_FEUADDR_REG);
4089
1f5b3c3f 4090 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4091 }
4092 return 0;
4093
4094nomem:
4095 for_each_active_iommu(iommu, drhd)
4096 kfree(iommu->iommu_state);
4097
4098 return -ENOMEM;
4099}
4100
134fac3f 4101static void iommu_resume(void)
f59c7b69
FY
4102{
4103 struct dmar_drhd_unit *drhd;
4104 struct intel_iommu *iommu = NULL;
4105 unsigned long flag;
4106
4107 if (init_iommu_hw()) {
b779260b
JC
4108 if (force_on)
4109 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4110 else
4111 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4112 return;
f59c7b69
FY
4113 }
4114
4115 for_each_active_iommu(iommu, drhd) {
4116
1f5b3c3f 4117 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4118
4119 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4120 iommu->reg + DMAR_FECTL_REG);
4121 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4122 iommu->reg + DMAR_FEDATA_REG);
4123 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4124 iommu->reg + DMAR_FEADDR_REG);
4125 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4126 iommu->reg + DMAR_FEUADDR_REG);
4127
1f5b3c3f 4128 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4129 }
4130
4131 for_each_active_iommu(iommu, drhd)
4132 kfree(iommu->iommu_state);
f59c7b69
FY
4133}
4134
134fac3f 4135static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4136 .resume = iommu_resume,
4137 .suspend = iommu_suspend,
4138};
4139
134fac3f 4140static void __init init_iommu_pm_ops(void)
f59c7b69 4141{
134fac3f 4142 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4143}
4144
4145#else
99592ba4 4146static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4147#endif /* CONFIG_PM */
4148
318fe7df 4149
c2a0b538 4150int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4151{
4152 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4153 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4154 struct dmar_rmrr_unit *rmrru;
0659b8dc 4155 size_t length;
318fe7df
SS
4156
4157 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4158 if (!rmrru)
0659b8dc 4159 goto out;
318fe7df
SS
4160
4161 rmrru->hdr = header;
4162 rmrr = (struct acpi_dmar_reserved_memory *)header;
4163 rmrru->base_address = rmrr->base_address;
4164 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4165
4166 length = rmrr->end_address - rmrr->base_address + 1;
4167 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4168 IOMMU_RESV_DIRECT);
4169 if (!rmrru->resv)
4170 goto free_rmrru;
4171
2e455289
JL
4172 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4173 ((void *)rmrr) + rmrr->header.length,
4174 &rmrru->devices_cnt);
0659b8dc
EA
4175 if (rmrru->devices_cnt && rmrru->devices == NULL)
4176 goto free_all;
318fe7df 4177
2e455289 4178 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4179
2e455289 4180 return 0;
0659b8dc
EA
4181free_all:
4182 kfree(rmrru->resv);
4183free_rmrru:
4184 kfree(rmrru);
4185out:
4186 return -ENOMEM;
318fe7df
SS
4187}
4188
6b197249
JL
4189static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4190{
4191 struct dmar_atsr_unit *atsru;
4192 struct acpi_dmar_atsr *tmp;
4193
4194 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4195 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4196 if (atsr->segment != tmp->segment)
4197 continue;
4198 if (atsr->header.length != tmp->header.length)
4199 continue;
4200 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4201 return atsru;
4202 }
4203
4204 return NULL;
4205}
4206
4207int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4208{
4209 struct acpi_dmar_atsr *atsr;
4210 struct dmar_atsr_unit *atsru;
4211
b608fe35 4212 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4213 return 0;
4214
318fe7df 4215 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4216 atsru = dmar_find_atsr(atsr);
4217 if (atsru)
4218 return 0;
4219
4220 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4221 if (!atsru)
4222 return -ENOMEM;
4223
6b197249
JL
4224 /*
4225 * If memory is allocated from slab by ACPI _DSM method, we need to
4226 * copy the memory content because the memory buffer will be freed
4227 * on return.
4228 */
4229 atsru->hdr = (void *)(atsru + 1);
4230 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4231 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4232 if (!atsru->include_all) {
4233 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4234 (void *)atsr + atsr->header.length,
4235 &atsru->devices_cnt);
4236 if (atsru->devices_cnt && atsru->devices == NULL) {
4237 kfree(atsru);
4238 return -ENOMEM;
4239 }
4240 }
318fe7df 4241
0e242612 4242 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4243
4244 return 0;
4245}
4246
9bdc531e
JL
4247static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4248{
4249 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4250 kfree(atsru);
4251}
4252
6b197249
JL
4253int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4254{
4255 struct acpi_dmar_atsr *atsr;
4256 struct dmar_atsr_unit *atsru;
4257
4258 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4259 atsru = dmar_find_atsr(atsr);
4260 if (atsru) {
4261 list_del_rcu(&atsru->list);
4262 synchronize_rcu();
4263 intel_iommu_free_atsr(atsru);
4264 }
4265
4266 return 0;
4267}
4268
4269int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4270{
4271 int i;
4272 struct device *dev;
4273 struct acpi_dmar_atsr *atsr;
4274 struct dmar_atsr_unit *atsru;
4275
4276 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4277 atsru = dmar_find_atsr(atsr);
4278 if (!atsru)
4279 return 0;
4280
194dc870 4281 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4282 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4283 i, dev)
4284 return -EBUSY;
194dc870 4285 }
6b197249
JL
4286
4287 return 0;
4288}
4289
ffebeb46
JL
4290static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4291{
4292 int sp, ret = 0;
4293 struct intel_iommu *iommu = dmaru->iommu;
4294
4295 if (g_iommus[iommu->seq_id])
4296 return 0;
4297
4298 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4299 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4300 iommu->name);
4301 return -ENXIO;
4302 }
4303 if (!ecap_sc_support(iommu->ecap) &&
4304 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4305 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4306 iommu->name);
4307 return -ENXIO;
4308 }
4309 sp = domain_update_iommu_superpage(iommu) - 1;
4310 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4311 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4312 iommu->name);
4313 return -ENXIO;
4314 }
4315
4316 /*
4317 * Disable translation if already enabled prior to OS handover.
4318 */
4319 if (iommu->gcmd & DMA_GCMD_TE)
4320 iommu_disable_translation(iommu);
4321
4322 g_iommus[iommu->seq_id] = iommu;
4323 ret = iommu_init_domains(iommu);
4324 if (ret == 0)
4325 ret = iommu_alloc_root_entry(iommu);
4326 if (ret)
4327 goto out;
4328
8a94ade4
DW
4329#ifdef CONFIG_INTEL_IOMMU_SVM
4330 if (pasid_enabled(iommu))
4331 intel_svm_alloc_pasid_tables(iommu);
4332#endif
4333
ffebeb46
JL
4334 if (dmaru->ignored) {
4335 /*
4336 * we always have to disable PMRs or DMA may fail on this device
4337 */
4338 if (force_on)
4339 iommu_disable_protect_mem_regions(iommu);
4340 return 0;
4341 }
4342
4343 intel_iommu_init_qi(iommu);
4344 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4345
4346#ifdef CONFIG_INTEL_IOMMU_SVM
4347 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4348 ret = intel_svm_enable_prq(iommu);
4349 if (ret)
4350 goto disable_iommu;
4351 }
4352#endif
ffebeb46
JL
4353 ret = dmar_set_interrupt(iommu);
4354 if (ret)
4355 goto disable_iommu;
4356
4357 iommu_set_root_entry(iommu);
4358 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4359 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4360 iommu_enable_translation(iommu);
4361
ffebeb46
JL
4362 iommu_disable_protect_mem_regions(iommu);
4363 return 0;
4364
4365disable_iommu:
4366 disable_dmar_iommu(iommu);
4367out:
4368 free_dmar_iommu(iommu);
4369 return ret;
4370}
4371
6b197249
JL
4372int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4373{
ffebeb46
JL
4374 int ret = 0;
4375 struct intel_iommu *iommu = dmaru->iommu;
4376
4377 if (!intel_iommu_enabled)
4378 return 0;
4379 if (iommu == NULL)
4380 return -EINVAL;
4381
4382 if (insert) {
4383 ret = intel_iommu_add(dmaru);
4384 } else {
4385 disable_dmar_iommu(iommu);
4386 free_dmar_iommu(iommu);
4387 }
4388
4389 return ret;
6b197249
JL
4390}
4391
9bdc531e
JL
4392static void intel_iommu_free_dmars(void)
4393{
4394 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4395 struct dmar_atsr_unit *atsru, *atsr_n;
4396
4397 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4398 list_del(&rmrru->list);
4399 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4400 kfree(rmrru->resv);
9bdc531e 4401 kfree(rmrru);
318fe7df
SS
4402 }
4403
9bdc531e
JL
4404 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4405 list_del(&atsru->list);
4406 intel_iommu_free_atsr(atsru);
4407 }
318fe7df
SS
4408}
4409
4410int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4411{
b683b230 4412 int i, ret = 1;
318fe7df 4413 struct pci_bus *bus;
832bd858
DW
4414 struct pci_dev *bridge = NULL;
4415 struct device *tmp;
318fe7df
SS
4416 struct acpi_dmar_atsr *atsr;
4417 struct dmar_atsr_unit *atsru;
4418
4419 dev = pci_physfn(dev);
318fe7df 4420 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4421 bridge = bus->self;
d14053b3
DW
4422 /* If it's an integrated device, allow ATS */
4423 if (!bridge)
4424 return 1;
4425 /* Connected via non-PCIe: no ATS */
4426 if (!pci_is_pcie(bridge) ||
62f87c0e 4427 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4428 return 0;
d14053b3 4429 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4430 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4431 break;
318fe7df
SS
4432 }
4433
0e242612 4434 rcu_read_lock();
b5f82ddf
JL
4435 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4436 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4437 if (atsr->segment != pci_domain_nr(dev->bus))
4438 continue;
4439
b683b230 4440 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4441 if (tmp == &bridge->dev)
b683b230 4442 goto out;
b5f82ddf
JL
4443
4444 if (atsru->include_all)
b683b230 4445 goto out;
b5f82ddf 4446 }
b683b230
JL
4447 ret = 0;
4448out:
0e242612 4449 rcu_read_unlock();
318fe7df 4450
b683b230 4451 return ret;
318fe7df
SS
4452}
4453
59ce0515
JL
4454int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4455{
4456 int ret = 0;
4457 struct dmar_rmrr_unit *rmrru;
4458 struct dmar_atsr_unit *atsru;
4459 struct acpi_dmar_atsr *atsr;
4460 struct acpi_dmar_reserved_memory *rmrr;
4461
b608fe35 4462 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4463 return 0;
4464
4465 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4466 rmrr = container_of(rmrru->hdr,
4467 struct acpi_dmar_reserved_memory, header);
4468 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4469 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4470 ((void *)rmrr) + rmrr->header.length,
4471 rmrr->segment, rmrru->devices,
4472 rmrru->devices_cnt);
27e24950 4473 if(ret < 0)
59ce0515 4474 return ret;
e6a8c9b3 4475 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4476 dmar_remove_dev_scope(info, rmrr->segment,
4477 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4478 }
4479 }
4480
4481 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4482 if (atsru->include_all)
4483 continue;
4484
4485 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4486 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4487 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4488 (void *)atsr + atsr->header.length,
4489 atsr->segment, atsru->devices,
4490 atsru->devices_cnt);
4491 if (ret > 0)
4492 break;
4493 else if(ret < 0)
4494 return ret;
e6a8c9b3 4495 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4496 if (dmar_remove_dev_scope(info, atsr->segment,
4497 atsru->devices, atsru->devices_cnt))
4498 break;
4499 }
4500 }
4501
4502 return 0;
4503}
4504
99dcaded
FY
4505/*
4506 * Here we only respond to action of unbound device from driver.
4507 *
4508 * Added device is not attached to its DMAR domain here yet. That will happen
4509 * when mapping the device to iova.
4510 */
4511static int device_notifier(struct notifier_block *nb,
4512 unsigned long action, void *data)
4513{
4514 struct device *dev = data;
99dcaded
FY
4515 struct dmar_domain *domain;
4516
3d89194a 4517 if (iommu_dummy(dev))
44cd613c
DW
4518 return 0;
4519
1196c2fb 4520 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4521 return 0;
4522
1525a29a 4523 domain = find_domain(dev);
99dcaded
FY
4524 if (!domain)
4525 return 0;
4526
e6de0f8d 4527 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4528 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4529 domain_exit(domain);
a97590e5 4530
99dcaded
FY
4531 return 0;
4532}
4533
4534static struct notifier_block device_nb = {
4535 .notifier_call = device_notifier,
4536};
4537
75f05569
JL
4538static int intel_iommu_memory_notifier(struct notifier_block *nb,
4539 unsigned long val, void *v)
4540{
4541 struct memory_notify *mhp = v;
4542 unsigned long long start, end;
4543 unsigned long start_vpfn, last_vpfn;
4544
4545 switch (val) {
4546 case MEM_GOING_ONLINE:
4547 start = mhp->start_pfn << PAGE_SHIFT;
4548 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4549 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4550 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4551 start, end);
4552 return NOTIFY_BAD;
4553 }
4554 break;
4555
4556 case MEM_OFFLINE:
4557 case MEM_CANCEL_ONLINE:
4558 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4559 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4560 while (start_vpfn <= last_vpfn) {
4561 struct iova *iova;
4562 struct dmar_drhd_unit *drhd;
4563 struct intel_iommu *iommu;
ea8ea460 4564 struct page *freelist;
75f05569
JL
4565
4566 iova = find_iova(&si_domain->iovad, start_vpfn);
4567 if (iova == NULL) {
9f10e5bf 4568 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4569 start_vpfn);
4570 break;
4571 }
4572
4573 iova = split_and_remove_iova(&si_domain->iovad, iova,
4574 start_vpfn, last_vpfn);
4575 if (iova == NULL) {
9f10e5bf 4576 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4577 start_vpfn, last_vpfn);
4578 return NOTIFY_BAD;
4579 }
4580
ea8ea460
DW
4581 freelist = domain_unmap(si_domain, iova->pfn_lo,
4582 iova->pfn_hi);
4583
75f05569
JL
4584 rcu_read_lock();
4585 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4586 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4587 iova->pfn_lo, iova_size(iova),
ea8ea460 4588 !freelist, 0);
75f05569 4589 rcu_read_unlock();
ea8ea460 4590 dma_free_pagelist(freelist);
75f05569
JL
4591
4592 start_vpfn = iova->pfn_hi + 1;
4593 free_iova_mem(iova);
4594 }
4595 break;
4596 }
4597
4598 return NOTIFY_OK;
4599}
4600
4601static struct notifier_block intel_iommu_memory_nb = {
4602 .notifier_call = intel_iommu_memory_notifier,
4603 .priority = 0
4604};
4605
22e2f9fa
OP
4606static void free_all_cpu_cached_iovas(unsigned int cpu)
4607{
4608 int i;
4609
4610 for (i = 0; i < g_num_of_iommus; i++) {
4611 struct intel_iommu *iommu = g_iommus[i];
4612 struct dmar_domain *domain;
0caa7616 4613 int did;
22e2f9fa
OP
4614
4615 if (!iommu)
4616 continue;
4617
3bd4f911 4618 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4619 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4620
4621 if (!domain)
4622 continue;
4623 free_cpu_cached_iovas(cpu, &domain->iovad);
4624 }
4625 }
4626}
4627
21647615 4628static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4629{
21647615 4630 free_all_cpu_cached_iovas(cpu);
21647615 4631 return 0;
aa473240
OP
4632}
4633
161b28aa
JR
4634static void intel_disable_iommus(void)
4635{
4636 struct intel_iommu *iommu = NULL;
4637 struct dmar_drhd_unit *drhd;
4638
4639 for_each_iommu(iommu, drhd)
4640 iommu_disable_translation(iommu);
4641}
4642
a7fdb6e6
JR
4643static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4644{
2926a2aa
JR
4645 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4646
4647 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4648}
4649
a5459cfe
AW
4650static ssize_t intel_iommu_show_version(struct device *dev,
4651 struct device_attribute *attr,
4652 char *buf)
4653{
a7fdb6e6 4654 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4655 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4656 return sprintf(buf, "%d:%d\n",
4657 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4658}
4659static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4660
4661static ssize_t intel_iommu_show_address(struct device *dev,
4662 struct device_attribute *attr,
4663 char *buf)
4664{
a7fdb6e6 4665 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4666 return sprintf(buf, "%llx\n", iommu->reg_phys);
4667}
4668static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4669
4670static ssize_t intel_iommu_show_cap(struct device *dev,
4671 struct device_attribute *attr,
4672 char *buf)
4673{
a7fdb6e6 4674 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4675 return sprintf(buf, "%llx\n", iommu->cap);
4676}
4677static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4678
4679static ssize_t intel_iommu_show_ecap(struct device *dev,
4680 struct device_attribute *attr,
4681 char *buf)
4682{
a7fdb6e6 4683 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4684 return sprintf(buf, "%llx\n", iommu->ecap);
4685}
4686static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4687
2238c082
AW
4688static ssize_t intel_iommu_show_ndoms(struct device *dev,
4689 struct device_attribute *attr,
4690 char *buf)
4691{
a7fdb6e6 4692 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4693 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4694}
4695static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4696
4697static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4698 struct device_attribute *attr,
4699 char *buf)
4700{
a7fdb6e6 4701 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4702 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4703 cap_ndoms(iommu->cap)));
4704}
4705static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4706
a5459cfe
AW
4707static struct attribute *intel_iommu_attrs[] = {
4708 &dev_attr_version.attr,
4709 &dev_attr_address.attr,
4710 &dev_attr_cap.attr,
4711 &dev_attr_ecap.attr,
2238c082
AW
4712 &dev_attr_domains_supported.attr,
4713 &dev_attr_domains_used.attr,
a5459cfe
AW
4714 NULL,
4715};
4716
4717static struct attribute_group intel_iommu_group = {
4718 .name = "intel-iommu",
4719 .attrs = intel_iommu_attrs,
4720};
4721
4722const struct attribute_group *intel_iommu_groups[] = {
4723 &intel_iommu_group,
4724 NULL,
4725};
4726
ba395927
KA
4727int __init intel_iommu_init(void)
4728{
9bdc531e 4729 int ret = -ENODEV;
3a93c841 4730 struct dmar_drhd_unit *drhd;
7c919779 4731 struct intel_iommu *iommu;
ba395927 4732
a59b50e9
JC
4733 /* VT-d is required for a TXT/tboot launch, so enforce that */
4734 force_on = tboot_force_iommu();
4735
3a5670e8
JL
4736 if (iommu_init_mempool()) {
4737 if (force_on)
4738 panic("tboot: Failed to initialize iommu memory\n");
4739 return -ENOMEM;
4740 }
4741
4742 down_write(&dmar_global_lock);
a59b50e9
JC
4743 if (dmar_table_init()) {
4744 if (force_on)
4745 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4746 goto out_free_dmar;
a59b50e9 4747 }
ba395927 4748
c2c7286a 4749 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4750 if (force_on)
4751 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4752 goto out_free_dmar;
a59b50e9 4753 }
1886e8a9 4754
161b28aa 4755 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4756 /*
4757 * We exit the function here to ensure IOMMU's remapping and
4758 * mempool aren't setup, which means that the IOMMU's PMRs
4759 * won't be disabled via the call to init_dmars(). So disable
4760 * it explicitly here. The PMRs were setup by tboot prior to
4761 * calling SENTER, but the kernel is expected to reset/tear
4762 * down the PMRs.
4763 */
4764 if (intel_iommu_tboot_noforce) {
4765 for_each_iommu(iommu, drhd)
4766 iommu_disable_protect_mem_regions(iommu);
4767 }
4768
161b28aa
JR
4769 /*
4770 * Make sure the IOMMUs are switched off, even when we
4771 * boot into a kexec kernel and the previous kernel left
4772 * them enabled
4773 */
4774 intel_disable_iommus();
9bdc531e 4775 goto out_free_dmar;
161b28aa 4776 }
2ae21010 4777
318fe7df 4778 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4779 pr_info("No RMRR found\n");
318fe7df
SS
4780
4781 if (list_empty(&dmar_atsr_units))
9f10e5bf 4782 pr_info("No ATSR found\n");
318fe7df 4783
51a63e67
JC
4784 if (dmar_init_reserved_ranges()) {
4785 if (force_on)
4786 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4787 goto out_free_reserved_range;
51a63e67 4788 }
ba395927
KA
4789
4790 init_no_remapping_devices();
4791
b779260b 4792 ret = init_dmars();
ba395927 4793 if (ret) {
a59b50e9
JC
4794 if (force_on)
4795 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4796 pr_err("Initialization failed\n");
9bdc531e 4797 goto out_free_reserved_range;
ba395927 4798 }
3a5670e8 4799 up_write(&dmar_global_lock);
9f10e5bf 4800 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4801
75f1cdf1
FT
4802#ifdef CONFIG_SWIOTLB
4803 swiotlb = 0;
4804#endif
19943b0e 4805 dma_ops = &intel_dma_ops;
4ed0d3e6 4806
134fac3f 4807 init_iommu_pm_ops();
a8bcbb0d 4808
39ab9555
JR
4809 for_each_active_iommu(iommu, drhd) {
4810 iommu_device_sysfs_add(&iommu->iommu, NULL,
4811 intel_iommu_groups,
4812 "%s", iommu->name);
4813 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4814 iommu_device_register(&iommu->iommu);
4815 }
a5459cfe 4816
4236d97d 4817 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4818 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4819 if (si_domain && !hw_pass_through)
4820 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4821 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4822 intel_iommu_cpu_dead);
8bc1f85c
ED
4823 intel_iommu_enabled = 1;
4824
ba395927 4825 return 0;
9bdc531e
JL
4826
4827out_free_reserved_range:
4828 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4829out_free_dmar:
4830 intel_iommu_free_dmars();
3a5670e8
JL
4831 up_write(&dmar_global_lock);
4832 iommu_exit_mempool();
9bdc531e 4833 return ret;
ba395927 4834}
e820482c 4835
2452d9db 4836static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4837{
4838 struct intel_iommu *iommu = opaque;
4839
2452d9db 4840 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4841 return 0;
4842}
4843
4844/*
4845 * NB - intel-iommu lacks any sort of reference counting for the users of
4846 * dependent devices. If multiple endpoints have intersecting dependent
4847 * devices, unbinding the driver from any one of them will possibly leave
4848 * the others unable to operate.
4849 */
2452d9db 4850static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4851{
0bcb3e28 4852 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4853 return;
4854
2452d9db 4855 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4856}
4857
127c7615 4858static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4859{
c7151a8d
WH
4860 struct intel_iommu *iommu;
4861 unsigned long flags;
c7151a8d 4862
55d94043
JR
4863 assert_spin_locked(&device_domain_lock);
4864
127c7615 4865 if (WARN_ON(!info))
c7151a8d
WH
4866 return;
4867
127c7615 4868 iommu = info->iommu;
c7151a8d 4869
127c7615
JR
4870 if (info->dev) {
4871 iommu_disable_dev_iotlb(info);
4872 domain_context_clear(iommu, info->dev);
4873 }
c7151a8d 4874
b608ac3b 4875 unlink_domain_info(info);
c7151a8d 4876
d160aca5 4877 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4878 domain_detach_iommu(info->domain, iommu);
d160aca5 4879 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4880
127c7615 4881 free_devinfo_mem(info);
c7151a8d 4882}
c7151a8d 4883
55d94043
JR
4884static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4885 struct device *dev)
4886{
127c7615 4887 struct device_domain_info *info;
55d94043 4888 unsigned long flags;
3e7abe25 4889
55d94043 4890 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4891 info = dev->archdata.iommu;
4892 __dmar_remove_one_dev_info(info);
55d94043 4893 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4894}
4895
2c2e2c38 4896static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4897{
4898 int adjust_width;
4899
0fb5fe87
RM
4900 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4901 DMA_32BIT_PFN);
5e98c4b1
WH
4902 domain_reserve_special_ranges(domain);
4903
4904 /* calculate AGAW */
4905 domain->gaw = guest_width;
4906 adjust_width = guestwidth_to_adjustwidth(guest_width);
4907 domain->agaw = width_to_agaw(adjust_width);
4908
5e98c4b1 4909 domain->iommu_coherency = 0;
c5b15255 4910 domain->iommu_snooping = 0;
6dd9a7c7 4911 domain->iommu_superpage = 0;
fe40f1e0 4912 domain->max_addr = 0;
5e98c4b1
WH
4913
4914 /* always allocate the top pgd */
4c923d47 4915 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4916 if (!domain->pgd)
4917 return -ENOMEM;
4918 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4919 return 0;
4920}
4921
00a77deb 4922static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4923{
5d450806 4924 struct dmar_domain *dmar_domain;
00a77deb
JR
4925 struct iommu_domain *domain;
4926
4927 if (type != IOMMU_DOMAIN_UNMANAGED)
4928 return NULL;
38717946 4929
ab8dfe25 4930 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4931 if (!dmar_domain) {
9f10e5bf 4932 pr_err("Can't allocate dmar_domain\n");
00a77deb 4933 return NULL;
38717946 4934 }
2c2e2c38 4935 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4936 pr_err("Domain initialization failed\n");
92d03cc8 4937 domain_exit(dmar_domain);
00a77deb 4938 return NULL;
38717946 4939 }
8140a95d 4940 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4941
00a77deb 4942 domain = &dmar_domain->domain;
8a0e715b
JR
4943 domain->geometry.aperture_start = 0;
4944 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4945 domain->geometry.force_aperture = true;
4946
00a77deb 4947 return domain;
38717946 4948}
38717946 4949
00a77deb 4950static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4951{
00a77deb 4952 domain_exit(to_dmar_domain(domain));
38717946 4953}
38717946 4954
4c5478c9
JR
4955static int intel_iommu_attach_device(struct iommu_domain *domain,
4956 struct device *dev)
38717946 4957{
00a77deb 4958 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4959 struct intel_iommu *iommu;
4960 int addr_width;
156baca8 4961 u8 bus, devfn;
faa3d6f5 4962
c875d2c1
AW
4963 if (device_is_rmrr_locked(dev)) {
4964 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4965 return -EPERM;
4966 }
4967
7207d8f9
DW
4968 /* normally dev is not mapped */
4969 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4970 struct dmar_domain *old_domain;
4971
1525a29a 4972 old_domain = find_domain(dev);
faa3d6f5 4973 if (old_domain) {
d160aca5 4974 rcu_read_lock();
de7e8886 4975 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 4976 rcu_read_unlock();
62c22167
JR
4977
4978 if (!domain_type_is_vm_or_si(old_domain) &&
4979 list_empty(&old_domain->devices))
4980 domain_exit(old_domain);
faa3d6f5
WH
4981 }
4982 }
4983
156baca8 4984 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4985 if (!iommu)
4986 return -ENODEV;
4987
4988 /* check if this iommu agaw is sufficient for max mapped address */
4989 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4990 if (addr_width > cap_mgaw(iommu->cap))
4991 addr_width = cap_mgaw(iommu->cap);
4992
4993 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 4994 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4995 "sufficient for the mapped address (%llx)\n",
a99c47a2 4996 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4997 return -EFAULT;
4998 }
a99c47a2
TL
4999 dmar_domain->gaw = addr_width;
5000
5001 /*
5002 * Knock out extra levels of page tables if necessary
5003 */
5004 while (iommu->agaw < dmar_domain->agaw) {
5005 struct dma_pte *pte;
5006
5007 pte = dmar_domain->pgd;
5008 if (dma_pte_present(pte)) {
25cbff16
SY
5009 dmar_domain->pgd = (struct dma_pte *)
5010 phys_to_virt(dma_pte_addr(pte));
7a661013 5011 free_pgtable_page(pte);
a99c47a2
TL
5012 }
5013 dmar_domain->agaw--;
5014 }
fe40f1e0 5015
28ccce0d 5016 return domain_add_dev_info(dmar_domain, dev);
38717946 5017}
38717946 5018
4c5478c9
JR
5019static void intel_iommu_detach_device(struct iommu_domain *domain,
5020 struct device *dev)
38717946 5021{
e6de0f8d 5022 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 5023}
c7151a8d 5024
b146a1c9
JR
5025static int intel_iommu_map(struct iommu_domain *domain,
5026 unsigned long iova, phys_addr_t hpa,
5009065d 5027 size_t size, int iommu_prot)
faa3d6f5 5028{
00a77deb 5029 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5030 u64 max_addr;
dde57a21 5031 int prot = 0;
faa3d6f5 5032 int ret;
fe40f1e0 5033
dde57a21
JR
5034 if (iommu_prot & IOMMU_READ)
5035 prot |= DMA_PTE_READ;
5036 if (iommu_prot & IOMMU_WRITE)
5037 prot |= DMA_PTE_WRITE;
9cf06697
SY
5038 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5039 prot |= DMA_PTE_SNP;
dde57a21 5040
163cc52c 5041 max_addr = iova + size;
dde57a21 5042 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5043 u64 end;
5044
5045 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5046 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5047 if (end < max_addr) {
9f10e5bf 5048 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5049 "sufficient for the mapped address (%llx)\n",
8954da1f 5050 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5051 return -EFAULT;
5052 }
dde57a21 5053 dmar_domain->max_addr = max_addr;
fe40f1e0 5054 }
ad051221
DW
5055 /* Round up size to next multiple of PAGE_SIZE, if it and
5056 the low bits of hpa would take us onto the next page */
88cb6a74 5057 size = aligned_nrpages(hpa, size);
ad051221
DW
5058 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5059 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5060 return ret;
38717946 5061}
38717946 5062
5009065d 5063static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5064 unsigned long iova, size_t size)
38717946 5065{
00a77deb 5066 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
5067 struct page *freelist = NULL;
5068 struct intel_iommu *iommu;
5069 unsigned long start_pfn, last_pfn;
5070 unsigned int npages;
42e8c186 5071 int iommu_id, level = 0;
5cf0a76f
DW
5072
5073 /* Cope with horrid API which requires us to unmap more than the
5074 size argument if it happens to be a large-page mapping. */
dc02e46e 5075 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5076
5077 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5078 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5079
ea8ea460
DW
5080 start_pfn = iova >> VTD_PAGE_SHIFT;
5081 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5082
5083 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5084
5085 npages = last_pfn - start_pfn + 1;
5086
29a27719 5087 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 5088 iommu = g_iommus[iommu_id];
ea8ea460 5089
42e8c186
JR
5090 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5091 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5092 }
5093
5094 dma_free_pagelist(freelist);
fe40f1e0 5095
163cc52c
DW
5096 if (dmar_domain->max_addr == iova + size)
5097 dmar_domain->max_addr = iova;
b146a1c9 5098
5cf0a76f 5099 return size;
38717946 5100}
38717946 5101
d14d6577 5102static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5103 dma_addr_t iova)
38717946 5104{
00a77deb 5105 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5106 struct dma_pte *pte;
5cf0a76f 5107 int level = 0;
faa3d6f5 5108 u64 phys = 0;
38717946 5109
5cf0a76f 5110 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5111 if (pte)
faa3d6f5 5112 phys = dma_pte_addr(pte);
38717946 5113
faa3d6f5 5114 return phys;
38717946 5115}
a8bcbb0d 5116
5d587b8d 5117static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5118{
dbb9fd86 5119 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5120 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5121 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5122 return irq_remapping_enabled == 1;
dbb9fd86 5123
5d587b8d 5124 return false;
dbb9fd86
SY
5125}
5126
abdfdde2
AW
5127static int intel_iommu_add_device(struct device *dev)
5128{
a5459cfe 5129 struct intel_iommu *iommu;
abdfdde2 5130 struct iommu_group *group;
156baca8 5131 u8 bus, devfn;
70ae6f0d 5132
a5459cfe
AW
5133 iommu = device_to_iommu(dev, &bus, &devfn);
5134 if (!iommu)
70ae6f0d
AW
5135 return -ENODEV;
5136
e3d10af1 5137 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5138
e17f9ff4 5139 group = iommu_group_get_for_dev(dev);
783f157b 5140
e17f9ff4
AW
5141 if (IS_ERR(group))
5142 return PTR_ERR(group);
bcb71abe 5143
abdfdde2 5144 iommu_group_put(group);
e17f9ff4 5145 return 0;
abdfdde2 5146}
70ae6f0d 5147
abdfdde2
AW
5148static void intel_iommu_remove_device(struct device *dev)
5149{
a5459cfe
AW
5150 struct intel_iommu *iommu;
5151 u8 bus, devfn;
5152
5153 iommu = device_to_iommu(dev, &bus, &devfn);
5154 if (!iommu)
5155 return;
5156
abdfdde2 5157 iommu_group_remove_device(dev);
a5459cfe 5158
e3d10af1 5159 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5160}
5161
0659b8dc
EA
5162static void intel_iommu_get_resv_regions(struct device *device,
5163 struct list_head *head)
5164{
5165 struct iommu_resv_region *reg;
5166 struct dmar_rmrr_unit *rmrr;
5167 struct device *i_dev;
5168 int i;
5169
5170 rcu_read_lock();
5171 for_each_rmrr_units(rmrr) {
5172 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5173 i, i_dev) {
5174 if (i_dev != device)
5175 continue;
5176
5177 list_add_tail(&rmrr->resv->list, head);
5178 }
5179 }
5180 rcu_read_unlock();
5181
5182 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5183 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5184 0, IOMMU_RESV_MSI);
0659b8dc
EA
5185 if (!reg)
5186 return;
5187 list_add_tail(&reg->list, head);
5188}
5189
5190static void intel_iommu_put_resv_regions(struct device *dev,
5191 struct list_head *head)
5192{
5193 struct iommu_resv_region *entry, *next;
5194
5195 list_for_each_entry_safe(entry, next, head, list) {
5196 if (entry->type == IOMMU_RESV_RESERVED)
5197 kfree(entry);
5198 }
70ae6f0d
AW
5199}
5200
2f26e0a9 5201#ifdef CONFIG_INTEL_IOMMU_SVM
65ca7f5f
JP
5202#define MAX_NR_PASID_BITS (20)
5203static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5204{
5205 /*
5206 * Convert ecap_pss to extend context entry pts encoding, also
5207 * respect the soft pasid_max value set by the iommu.
5208 * - number of PASID bits = ecap_pss + 1
5209 * - number of PASID table entries = 2^(pts + 5)
5210 * Therefore, pts = ecap_pss - 4
5211 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5212 */
5213 if (ecap_pss(iommu->ecap) < 5)
5214 return 0;
5215
5216 /* pasid_max is encoded as actual number of entries not the bits */
5217 return find_first_bit((unsigned long *)&iommu->pasid_max,
5218 MAX_NR_PASID_BITS) - 5;
5219}
5220
2f26e0a9
DW
5221int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5222{
5223 struct device_domain_info *info;
5224 struct context_entry *context;
5225 struct dmar_domain *domain;
5226 unsigned long flags;
5227 u64 ctx_lo;
5228 int ret;
5229
5230 domain = get_valid_domain_for_dev(sdev->dev);
5231 if (!domain)
5232 return -EINVAL;
5233
5234 spin_lock_irqsave(&device_domain_lock, flags);
5235 spin_lock(&iommu->lock);
5236
5237 ret = -EINVAL;
5238 info = sdev->dev->archdata.iommu;
5239 if (!info || !info->pasid_supported)
5240 goto out;
5241
5242 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5243 if (WARN_ON(!context))
5244 goto out;
5245
5246 ctx_lo = context[0].lo;
5247
5248 sdev->did = domain->iommu_did[iommu->seq_id];
5249 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5250
5251 if (!(ctx_lo & CONTEXT_PASIDE)) {
11b93ebf
AR
5252 if (iommu->pasid_state_table)
5253 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
65ca7f5f
JP
5254 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5255 intel_iommu_get_pts(iommu);
5256
2f26e0a9
DW
5257 wmb();
5258 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5259 * extended to permit requests-with-PASID if the PASIDE bit
5260 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5261 * however, the PASIDE bit is ignored and requests-with-PASID
5262 * are unconditionally blocked. Which makes less sense.
5263 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5264 * "guest mode" translation types depending on whether ATS
5265 * is available or not. Annoyingly, we can't use the new
5266 * modes *unless* PASIDE is set. */
5267 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5268 ctx_lo &= ~CONTEXT_TT_MASK;
5269 if (info->ats_supported)
5270 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5271 else
5272 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5273 }
5274 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5275 if (iommu->pasid_state_table)
5276 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5277 if (info->pri_supported)
5278 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5279 context[0].lo = ctx_lo;
5280 wmb();
5281 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5282 DMA_CCMD_MASK_NOBIT,
5283 DMA_CCMD_DEVICE_INVL);
5284 }
5285
5286 /* Enable PASID support in the device, if it wasn't already */
5287 if (!info->pasid_enabled)
5288 iommu_enable_dev_iotlb(info);
5289
5290 if (info->ats_enabled) {
5291 sdev->dev_iotlb = 1;
5292 sdev->qdep = info->ats_qdep;
5293 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5294 sdev->qdep = 0;
5295 }
5296 ret = 0;
5297
5298 out:
5299 spin_unlock(&iommu->lock);
5300 spin_unlock_irqrestore(&device_domain_lock, flags);
5301
5302 return ret;
5303}
5304
5305struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5306{
5307 struct intel_iommu *iommu;
5308 u8 bus, devfn;
5309
5310 if (iommu_dummy(dev)) {
5311 dev_warn(dev,
5312 "No IOMMU translation for device; cannot enable SVM\n");
5313 return NULL;
5314 }
5315
5316 iommu = device_to_iommu(dev, &bus, &devfn);
5317 if ((!iommu)) {
b9997e38 5318 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5319 return NULL;
5320 }
5321
5322 if (!iommu->pasid_table) {
b9997e38 5323 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
2f26e0a9
DW
5324 return NULL;
5325 }
5326
5327 return iommu;
5328}
5329#endif /* CONFIG_INTEL_IOMMU_SVM */
5330
b0119e87 5331const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5332 .capable = intel_iommu_capable,
5333 .domain_alloc = intel_iommu_domain_alloc,
5334 .domain_free = intel_iommu_domain_free,
5335 .attach_dev = intel_iommu_attach_device,
5336 .detach_dev = intel_iommu_detach_device,
5337 .map = intel_iommu_map,
5338 .unmap = intel_iommu_unmap,
5339 .map_sg = default_iommu_map_sg,
5340 .iova_to_phys = intel_iommu_iova_to_phys,
5341 .add_device = intel_iommu_add_device,
5342 .remove_device = intel_iommu_remove_device,
5343 .get_resv_regions = intel_iommu_get_resv_regions,
5344 .put_resv_regions = intel_iommu_put_resv_regions,
5345 .device_group = pci_device_group,
5346 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5347};
9af88143 5348
9452618e
DV
5349static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5350{
5351 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5352 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5353 dmar_map_gfx = 0;
5354}
5355
5356DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5357DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5358DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5359DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5360DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5361DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5362DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5363
d34d6517 5364static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5365{
5366 /*
5367 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5368 * but needs it. Same seems to hold for the desktop versions.
9af88143 5369 */
9f10e5bf 5370 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5371 rwbf_quirk = 1;
5372}
5373
5374DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5375DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5376DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5377DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5378DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5379DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5380DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5381
eecfd57f
AJ
5382#define GGC 0x52
5383#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5384#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5385#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5386#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5387#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5388#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5389#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5390#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5391
d34d6517 5392static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5393{
5394 unsigned short ggc;
5395
eecfd57f 5396 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5397 return;
5398
eecfd57f 5399 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5400 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5401 dmar_map_gfx = 0;
6fbcfb3e
DW
5402 } else if (dmar_map_gfx) {
5403 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5404 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5405 intel_iommu_strict = 1;
5406 }
9eecabcb
DW
5407}
5408DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5409DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5410DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5411DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5412
e0fc7e0b
DW
5413/* On Tylersburg chipsets, some BIOSes have been known to enable the
5414 ISOCH DMAR unit for the Azalia sound device, but not give it any
5415 TLB entries, which causes it to deadlock. Check for that. We do
5416 this in a function called from init_dmars(), instead of in a PCI
5417 quirk, because we don't want to print the obnoxious "BIOS broken"
5418 message if VT-d is actually disabled.
5419*/
5420static void __init check_tylersburg_isoch(void)
5421{
5422 struct pci_dev *pdev;
5423 uint32_t vtisochctrl;
5424
5425 /* If there's no Azalia in the system anyway, forget it. */
5426 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5427 if (!pdev)
5428 return;
5429 pci_dev_put(pdev);
5430
5431 /* System Management Registers. Might be hidden, in which case
5432 we can't do the sanity check. But that's OK, because the
5433 known-broken BIOSes _don't_ actually hide it, so far. */
5434 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5435 if (!pdev)
5436 return;
5437
5438 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5439 pci_dev_put(pdev);
5440 return;
5441 }
5442
5443 pci_dev_put(pdev);
5444
5445 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5446 if (vtisochctrl & 1)
5447 return;
5448
5449 /* Drop all bits other than the number of TLB entries */
5450 vtisochctrl &= 0x1c;
5451
5452 /* If we have the recommended number of TLB entries (16), fine. */
5453 if (vtisochctrl == 0x10)
5454 return;
5455
5456 /* Zero TLB entries? You get to ride the short bus to school. */
5457 if (!vtisochctrl) {
5458 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5459 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5460 dmi_get_system_info(DMI_BIOS_VENDOR),
5461 dmi_get_system_info(DMI_BIOS_VERSION),
5462 dmi_get_system_info(DMI_PRODUCT_VERSION));
5463 iommu_identity_mapping |= IDENTMAP_AZALIA;
5464 return;
5465 }
9f10e5bf
JR
5466
5467 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5468 vtisochctrl);
5469}