]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/iommu/intel-iommu.c
nvme: depend on INFINIBAND_ADDR_TRANS
[mirror_ubuntu-bionic-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
aa473240 36#include <linux/cpu.h>
5e0d2a6f 37#include <linux/timer.h>
dfddb969 38#include <linux/io.h>
38717946 39#include <linux/iova.h>
5d450806 40#include <linux/iommu.h>
38717946 41#include <linux/intel-iommu.h>
134fac3f 42#include <linux/syscore_ops.h>
69575d38 43#include <linux/tboot.h>
adb2fe02 44#include <linux/dmi.h>
5cdede24 45#include <linux/pci-ats.h>
0ee332c1 46#include <linux/memblock.h>
36746436 47#include <linux/dma-contiguous.h>
091d42e4 48#include <linux/crash_dump.h>
8a8f422d 49#include <asm/irq_remapping.h>
ba395927 50#include <asm/cacheflush.h>
46a7fa27 51#include <asm/iommu.h>
ba395927 52
078e1ee2
JR
53#include "irq_remapping.h"
54
5b6985ce
FY
55#define ROOT_SIZE VTD_PAGE_SIZE
56#define CONTEXT_SIZE VTD_PAGE_SIZE
57
ba395927 58#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 59#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 60#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 61#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
62
63#define IOAPIC_RANGE_START (0xfee00000)
64#define IOAPIC_RANGE_END (0xfeefffff)
65#define IOVA_START_ADDR (0x1000)
66
67#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
68
4ed0d3e6 69#define MAX_AGAW_WIDTH 64
5c645b35 70#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 71
2ebe3151
DW
72#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
73#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
74
75/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
76 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
77#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
78 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
79#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 80
1b722500
RM
81/* IO virtual address start page frame number */
82#define IOVA_START_PFN (1)
83
f27be03b 84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 85
df08cdc7
AM
86/* page table handling */
87#define LEVEL_STRIDE (9)
88#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89
6d1c56a9
OBC
90/*
91 * This bitmap is used to advertise the page sizes our hardware support
92 * to the IOMMU core, which will then use this information to split
93 * physically contiguous memory regions it is mapping into page sizes
94 * that we support.
95 *
96 * Traditionally the IOMMU core just handed us the mappings directly,
97 * after making sure the size is an order of a 4KiB page and that the
98 * mapping has natural alignment.
99 *
100 * To retain this behavior, we currently advertise that we support
101 * all page sizes that are an order of 4KiB.
102 *
103 * If at some point we'd like to utilize the IOMMU core's new behavior,
104 * we could change this to advertise the real page sizes we support.
105 */
106#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
107
df08cdc7
AM
108static inline int agaw_to_level(int agaw)
109{
110 return agaw + 2;
111}
112
113static inline int agaw_to_width(int agaw)
114{
5c645b35 115 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
116}
117
118static inline int width_to_agaw(int width)
119{
5c645b35 120 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
121}
122
123static inline unsigned int level_to_offset_bits(int level)
124{
125 return (level - 1) * LEVEL_STRIDE;
126}
127
128static inline int pfn_level_offset(unsigned long pfn, int level)
129{
130 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131}
132
133static inline unsigned long level_mask(int level)
134{
135 return -1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long level_size(int level)
139{
140 return 1UL << level_to_offset_bits(level);
141}
142
143static inline unsigned long align_to_level(unsigned long pfn, int level)
144{
145 return (pfn + level_size(level) - 1) & level_mask(level);
146}
fd18de50 147
6dd9a7c7
YS
148static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149{
5c645b35 150 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
151}
152
dd4e8319
DW
153/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154 are never going to work. */
155static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156{
157 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159
160static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161{
162 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163}
164static inline unsigned long page_to_dma_pfn(struct page *pg)
165{
166 return mm_to_dma_pfn(page_to_pfn(pg));
167}
168static inline unsigned long virt_to_dma_pfn(void *p)
169{
170 return page_to_dma_pfn(virt_to_page(p));
171}
172
d9630fe9
WH
173/* global iommu list, set NULL for ignored DMAR units */
174static struct intel_iommu **g_iommus;
175
e0fc7e0b 176static void __init check_tylersburg_isoch(void);
9af88143
DW
177static int rwbf_quirk;
178
b779260b
JC
179/*
180 * set to 1 to panic kernel if can't successfully enable VT-d
181 * (used when kernel is launched w/ TXT)
182 */
183static int force_on = 0;
bfd20f1c 184int intel_iommu_tboot_noforce;
b779260b 185
46b08e1a
MM
186/*
187 * 0: Present
188 * 1-11: Reserved
189 * 12-63: Context Ptr (12 - (haw-1))
190 * 64-127: Reserved
191 */
192struct root_entry {
03ecc32c
DW
193 u64 lo;
194 u64 hi;
46b08e1a
MM
195};
196#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 197
091d42e4
JR
198/*
199 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
200 * if marked present.
201 */
202static phys_addr_t root_entry_lctp(struct root_entry *re)
203{
204 if (!(re->lo & 1))
205 return 0;
206
207 return re->lo & VTD_PAGE_MASK;
208}
209
210/*
211 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
212 * if marked present.
213 */
214static phys_addr_t root_entry_uctp(struct root_entry *re)
215{
216 if (!(re->hi & 1))
217 return 0;
46b08e1a 218
091d42e4
JR
219 return re->hi & VTD_PAGE_MASK;
220}
7a8fc25e
MM
221/*
222 * low 64 bits:
223 * 0: present
224 * 1: fault processing disable
225 * 2-3: translation type
226 * 12-63: address space root
227 * high 64 bits:
228 * 0-2: address width
229 * 3-6: aval
230 * 8-23: domain id
231 */
232struct context_entry {
233 u64 lo;
234 u64 hi;
235};
c07e7d21 236
cf484d0e
JR
237static inline void context_clear_pasid_enable(struct context_entry *context)
238{
239 context->lo &= ~(1ULL << 11);
240}
241
242static inline bool context_pasid_enabled(struct context_entry *context)
243{
244 return !!(context->lo & (1ULL << 11));
245}
246
247static inline void context_set_copied(struct context_entry *context)
248{
249 context->hi |= (1ull << 3);
250}
251
252static inline bool context_copied(struct context_entry *context)
253{
254 return !!(context->hi & (1ULL << 3));
255}
256
257static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
258{
259 return (context->lo & 1);
260}
cf484d0e
JR
261
262static inline bool context_present(struct context_entry *context)
263{
264 return context_pasid_enabled(context) ?
265 __context_present(context) :
266 __context_present(context) && !context_copied(context);
267}
268
c07e7d21
MM
269static inline void context_set_present(struct context_entry *context)
270{
271 context->lo |= 1;
272}
273
274static inline void context_set_fault_enable(struct context_entry *context)
275{
276 context->lo &= (((u64)-1) << 2) | 1;
277}
278
c07e7d21
MM
279static inline void context_set_translation_type(struct context_entry *context,
280 unsigned long value)
281{
282 context->lo &= (((u64)-1) << 4) | 3;
283 context->lo |= (value & 3) << 2;
284}
285
286static inline void context_set_address_root(struct context_entry *context,
287 unsigned long value)
288{
1a2262f9 289 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
290 context->lo |= value & VTD_PAGE_MASK;
291}
292
293static inline void context_set_address_width(struct context_entry *context,
294 unsigned long value)
295{
296 context->hi |= value & 7;
297}
298
299static inline void context_set_domain_id(struct context_entry *context,
300 unsigned long value)
301{
302 context->hi |= (value & ((1 << 16) - 1)) << 8;
303}
304
dbcd861f
JR
305static inline int context_domain_id(struct context_entry *c)
306{
307 return((c->hi >> 8) & 0xffff);
308}
309
c07e7d21
MM
310static inline void context_clear_entry(struct context_entry *context)
311{
312 context->lo = 0;
313 context->hi = 0;
314}
7a8fc25e 315
622ba12a
MM
316/*
317 * 0: readable
318 * 1: writable
319 * 2-6: reserved
320 * 7: super page
9cf06697
SY
321 * 8-10: available
322 * 11: snoop behavior
622ba12a
MM
323 * 12-63: Host physcial address
324 */
325struct dma_pte {
326 u64 val;
327};
622ba12a 328
19c239ce
MM
329static inline void dma_clear_pte(struct dma_pte *pte)
330{
331 pte->val = 0;
332}
333
19c239ce
MM
334static inline u64 dma_pte_addr(struct dma_pte *pte)
335{
c85994e4
DW
336#ifdef CONFIG_64BIT
337 return pte->val & VTD_PAGE_MASK;
338#else
339 /* Must have a full atomic 64-bit read */
1a8bd481 340 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 341#endif
19c239ce
MM
342}
343
19c239ce
MM
344static inline bool dma_pte_present(struct dma_pte *pte)
345{
346 return (pte->val & 3) != 0;
347}
622ba12a 348
4399c8bf
AK
349static inline bool dma_pte_superpage(struct dma_pte *pte)
350{
c3c75eb7 351 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
352}
353
75e6bf96
DW
354static inline int first_pte_in_page(struct dma_pte *pte)
355{
356 return !((unsigned long)pte & ~VTD_PAGE_MASK);
357}
358
2c2e2c38
FY
359/*
360 * This domain is a statically identity mapping domain.
361 * 1. This domain creats a static 1:1 mapping to all usable memory.
362 * 2. It maps to each iommu if successful.
363 * 3. Each iommu mapps to this domain if successful.
364 */
19943b0e
DW
365static struct dmar_domain *si_domain;
366static int hw_pass_through = 1;
2c2e2c38 367
28ccce0d
JR
368/*
369 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
370 * across iommus may be owned in one domain, e.g. kvm guest.
371 */
ab8dfe25 372#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 373
2c2e2c38 374/* si_domain contains mulitple devices */
ab8dfe25 375#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 376
29a27719
JR
377#define for_each_domain_iommu(idx, domain) \
378 for (idx = 0; idx < g_num_of_iommus; idx++) \
379 if (domain->iommu_refcnt[idx])
380
99126f7c 381struct dmar_domain {
4c923d47 382 int nid; /* node id */
29a27719
JR
383
384 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
385 /* Refcount of devices per iommu */
386
99126f7c 387
c0e8a6c8
JR
388 u16 iommu_did[DMAR_UNITS_SUPPORTED];
389 /* Domain ids per IOMMU. Use u16 since
390 * domain ids are 16 bit wide according
391 * to VT-d spec, section 9.3 */
99126f7c 392
0824c592 393 bool has_iotlb_device;
00a77deb 394 struct list_head devices; /* all devices' list */
99126f7c
MM
395 struct iova_domain iovad; /* iova's that belong to this domain */
396
397 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
398 int gaw; /* max guest address width */
399
400 /* adjusted guest address width, 0 is level 2 30-bit */
401 int agaw;
402
3b5410e7 403 int flags; /* flags to find out type of domain */
8e604097
WH
404
405 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 406 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 407 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
408 int iommu_superpage;/* Level of superpages supported:
409 0 == 4KiB (no superpages), 1 == 2MiB,
410 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 411 u64 max_addr; /* maximum mapped address */
00a77deb
JR
412
413 struct iommu_domain domain; /* generic domain data structure for
414 iommu core */
99126f7c
MM
415};
416
a647dacb
MM
417/* PCI domain-device relationship */
418struct device_domain_info {
419 struct list_head link; /* link to domain siblings */
420 struct list_head global; /* link to global list */
276dbf99 421 u8 bus; /* PCI bus number */
a647dacb 422 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
423 u8 pasid_supported:3;
424 u8 pasid_enabled:1;
425 u8 pri_supported:1;
426 u8 pri_enabled:1;
427 u8 ats_supported:1;
428 u8 ats_enabled:1;
429 u8 ats_qdep;
0bcb3e28 430 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 431 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
432 struct dmar_domain *domain; /* pointer to domain */
433};
434
b94e4117
JL
435struct dmar_rmrr_unit {
436 struct list_head list; /* list of rmrr units */
437 struct acpi_dmar_header *hdr; /* ACPI header */
438 u64 base_address; /* reserved base address*/
439 u64 end_address; /* reserved end address */
832bd858 440 struct dmar_dev_scope *devices; /* target devices */
b94e4117 441 int devices_cnt; /* target device count */
0659b8dc 442 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
443};
444
445struct dmar_atsr_unit {
446 struct list_head list; /* list of ATSR units */
447 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 448 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
449 int devices_cnt; /* target device count */
450 u8 include_all:1; /* include all ports */
451};
452
453static LIST_HEAD(dmar_atsr_units);
454static LIST_HEAD(dmar_rmrr_units);
455
456#define for_each_rmrr_units(rmrr) \
457 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
458
5e0d2a6f 459/* bitmap for indexing intel_iommus */
5e0d2a6f 460static int g_num_of_iommus;
461
92d03cc8 462static void domain_exit(struct dmar_domain *domain);
ba395927 463static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
464static void dmar_remove_one_dev_info(struct dmar_domain *domain,
465 struct device *dev);
127c7615 466static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
467static void domain_context_clear(struct intel_iommu *iommu,
468 struct device *dev);
2a46ddf7
JL
469static int domain_detach_iommu(struct dmar_domain *domain,
470 struct intel_iommu *iommu);
ba395927 471
d3f13810 472#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
473int dmar_disabled = 0;
474#else
475int dmar_disabled = 1;
d3f13810 476#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 477
8bc1f85c
ED
478int intel_iommu_enabled = 0;
479EXPORT_SYMBOL_GPL(intel_iommu_enabled);
480
2d9e667e 481static int dmar_map_gfx = 1;
7d3b03ce 482static int dmar_forcedac;
5e0d2a6f 483static int intel_iommu_strict;
6dd9a7c7 484static int intel_iommu_superpage = 1;
c83b2f20 485static int intel_iommu_ecs = 1;
ae853ddb
DW
486static int intel_iommu_pasid28;
487static int iommu_identity_mapping;
c83b2f20 488
ae853ddb
DW
489#define IDENTMAP_ALL 1
490#define IDENTMAP_GFX 2
491#define IDENTMAP_AZALIA 4
c83b2f20 492
d42fde70
DW
493/* Broadwell and Skylake have broken ECS support — normal so-called "second
494 * level" translation of DMA requests-without-PASID doesn't actually happen
495 * unless you also set the NESTE bit in an extended context-entry. Which of
496 * course means that SVM doesn't work because it's trying to do nested
497 * translation of the physical addresses it finds in the process page tables,
498 * through the IOVA->phys mapping found in the "second level" page tables.
499 *
500 * The VT-d specification was retroactively changed to change the definition
501 * of the capability bits and pretend that Broadwell/Skylake never happened...
502 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
503 * for some reason it was the PASID capability bit which was redefined (from
504 * bit 28 on BDW/SKL to bit 40 in future).
505 *
506 * So our test for ECS needs to eschew those implementations which set the old
507 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
508 * Unless we are working around the 'pasid28' limitations, that is, by putting
509 * the device into passthrough mode for normal DMA and thus masking the bug.
510 */
c83b2f20 511#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
d42fde70
DW
512 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
513/* PASID support is thus enabled if ECS is enabled and *either* of the old
514 * or new capability bits are set. */
515#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
516 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 517
c0771df8
DW
518int intel_iommu_gfx_mapped;
519EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
520
ba395927
KA
521#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
522static DEFINE_SPINLOCK(device_domain_lock);
523static LIST_HEAD(device_domain_list);
524
b0119e87 525const struct iommu_ops intel_iommu_ops;
a8bcbb0d 526
4158c2ec
JR
527static bool translation_pre_enabled(struct intel_iommu *iommu)
528{
529 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
530}
531
091d42e4
JR
532static void clear_translation_pre_enabled(struct intel_iommu *iommu)
533{
534 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
535}
536
4158c2ec
JR
537static void init_translation_status(struct intel_iommu *iommu)
538{
539 u32 gsts;
540
541 gsts = readl(iommu->reg + DMAR_GSTS_REG);
542 if (gsts & DMA_GSTS_TES)
543 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
544}
545
00a77deb
JR
546/* Convert generic 'struct iommu_domain to private struct dmar_domain */
547static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
548{
549 return container_of(dom, struct dmar_domain, domain);
550}
551
ba395927
KA
552static int __init intel_iommu_setup(char *str)
553{
554 if (!str)
555 return -EINVAL;
556 while (*str) {
0cd5c3c8
KM
557 if (!strncmp(str, "on", 2)) {
558 dmar_disabled = 0;
9f10e5bf 559 pr_info("IOMMU enabled\n");
0cd5c3c8 560 } else if (!strncmp(str, "off", 3)) {
ba395927 561 dmar_disabled = 1;
9f10e5bf 562 pr_info("IOMMU disabled\n");
ba395927
KA
563 } else if (!strncmp(str, "igfx_off", 8)) {
564 dmar_map_gfx = 0;
9f10e5bf 565 pr_info("Disable GFX device mapping\n");
7d3b03ce 566 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 567 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 568 dmar_forcedac = 1;
5e0d2a6f 569 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 570 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 571 intel_iommu_strict = 1;
6dd9a7c7 572 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 573 pr_info("Disable supported super page\n");
6dd9a7c7 574 intel_iommu_superpage = 0;
c83b2f20
DW
575 } else if (!strncmp(str, "ecs_off", 7)) {
576 printk(KERN_INFO
577 "Intel-IOMMU: disable extended context table support\n");
578 intel_iommu_ecs = 0;
ae853ddb
DW
579 } else if (!strncmp(str, "pasid28", 7)) {
580 printk(KERN_INFO
581 "Intel-IOMMU: enable pre-production PASID support\n");
582 intel_iommu_pasid28 = 1;
583 iommu_identity_mapping |= IDENTMAP_GFX;
bfd20f1c
SL
584 } else if (!strncmp(str, "tboot_noforce", 13)) {
585 printk(KERN_INFO
586 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
587 intel_iommu_tboot_noforce = 1;
ba395927
KA
588 }
589
590 str += strcspn(str, ",");
591 while (*str == ',')
592 str++;
593 }
594 return 0;
595}
596__setup("intel_iommu=", intel_iommu_setup);
597
598static struct kmem_cache *iommu_domain_cache;
599static struct kmem_cache *iommu_devinfo_cache;
ba395927 600
9452d5bf
JR
601static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
602{
8bf47816
JR
603 struct dmar_domain **domains;
604 int idx = did >> 8;
605
606 domains = iommu->domains[idx];
607 if (!domains)
608 return NULL;
609
610 return domains[did & 0xff];
9452d5bf
JR
611}
612
613static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
614 struct dmar_domain *domain)
615{
8bf47816
JR
616 struct dmar_domain **domains;
617 int idx = did >> 8;
618
619 if (!iommu->domains[idx]) {
620 size_t size = 256 * sizeof(struct dmar_domain *);
621 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
622 }
623
624 domains = iommu->domains[idx];
625 if (WARN_ON(!domains))
626 return;
627 else
628 domains[did & 0xff] = domain;
9452d5bf
JR
629}
630
4c923d47 631static inline void *alloc_pgtable_page(int node)
eb3fa7cb 632{
4c923d47
SS
633 struct page *page;
634 void *vaddr = NULL;
eb3fa7cb 635
4c923d47
SS
636 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
637 if (page)
638 vaddr = page_address(page);
eb3fa7cb 639 return vaddr;
ba395927
KA
640}
641
642static inline void free_pgtable_page(void *vaddr)
643{
644 free_page((unsigned long)vaddr);
645}
646
647static inline void *alloc_domain_mem(void)
648{
354bb65e 649 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
650}
651
38717946 652static void free_domain_mem(void *vaddr)
ba395927
KA
653{
654 kmem_cache_free(iommu_domain_cache, vaddr);
655}
656
657static inline void * alloc_devinfo_mem(void)
658{
354bb65e 659 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
660}
661
662static inline void free_devinfo_mem(void *vaddr)
663{
664 kmem_cache_free(iommu_devinfo_cache, vaddr);
665}
666
ab8dfe25
JL
667static inline int domain_type_is_vm(struct dmar_domain *domain)
668{
669 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
670}
671
28ccce0d
JR
672static inline int domain_type_is_si(struct dmar_domain *domain)
673{
674 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
675}
676
ab8dfe25
JL
677static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
678{
679 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
680 DOMAIN_FLAG_STATIC_IDENTITY);
681}
1b573683 682
162d1b10
JL
683static inline int domain_pfn_supported(struct dmar_domain *domain,
684 unsigned long pfn)
685{
686 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
687
688 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
689}
690
4ed0d3e6 691static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
692{
693 unsigned long sagaw;
694 int agaw = -1;
695
696 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 697 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
698 agaw >= 0; agaw--) {
699 if (test_bit(agaw, &sagaw))
700 break;
701 }
702
703 return agaw;
704}
705
4ed0d3e6
FY
706/*
707 * Calculate max SAGAW for each iommu.
708 */
709int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
710{
711 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
712}
713
714/*
715 * calculate agaw for each iommu.
716 * "SAGAW" may be different across iommus, use a default agaw, and
717 * get a supported less agaw for iommus that don't support the default agaw.
718 */
719int iommu_calculate_agaw(struct intel_iommu *iommu)
720{
721 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
722}
723
2c2e2c38 724/* This functionin only returns single iommu in a domain */
8c11e798
WH
725static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
726{
727 int iommu_id;
728
2c2e2c38 729 /* si_domain and vm domain should not get here. */
ab8dfe25 730 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
731 for_each_domain_iommu(iommu_id, domain)
732 break;
733
8c11e798
WH
734 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
735 return NULL;
736
737 return g_iommus[iommu_id];
738}
739
8e604097
WH
740static void domain_update_iommu_coherency(struct dmar_domain *domain)
741{
d0501960
DW
742 struct dmar_drhd_unit *drhd;
743 struct intel_iommu *iommu;
2f119c78
QL
744 bool found = false;
745 int i;
2e12bc29 746
d0501960 747 domain->iommu_coherency = 1;
8e604097 748
29a27719 749 for_each_domain_iommu(i, domain) {
2f119c78 750 found = true;
8e604097
WH
751 if (!ecap_coherent(g_iommus[i]->ecap)) {
752 domain->iommu_coherency = 0;
753 break;
754 }
8e604097 755 }
d0501960
DW
756 if (found)
757 return;
758
759 /* No hardware attached; use lowest common denominator */
760 rcu_read_lock();
761 for_each_active_iommu(iommu, drhd) {
762 if (!ecap_coherent(iommu->ecap)) {
763 domain->iommu_coherency = 0;
764 break;
765 }
766 }
767 rcu_read_unlock();
8e604097
WH
768}
769
161f6934 770static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 771{
161f6934
JL
772 struct dmar_drhd_unit *drhd;
773 struct intel_iommu *iommu;
774 int ret = 1;
58c610bd 775
161f6934
JL
776 rcu_read_lock();
777 for_each_active_iommu(iommu, drhd) {
778 if (iommu != skip) {
779 if (!ecap_sc_support(iommu->ecap)) {
780 ret = 0;
781 break;
782 }
58c610bd 783 }
58c610bd 784 }
161f6934
JL
785 rcu_read_unlock();
786
787 return ret;
58c610bd
SY
788}
789
161f6934 790static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 791{
8140a95d 792 struct dmar_drhd_unit *drhd;
161f6934 793 struct intel_iommu *iommu;
8140a95d 794 int mask = 0xf;
6dd9a7c7
YS
795
796 if (!intel_iommu_superpage) {
161f6934 797 return 0;
6dd9a7c7
YS
798 }
799
8140a95d 800 /* set iommu_superpage to the smallest common denominator */
0e242612 801 rcu_read_lock();
8140a95d 802 for_each_active_iommu(iommu, drhd) {
161f6934
JL
803 if (iommu != skip) {
804 mask &= cap_super_page_val(iommu->cap);
805 if (!mask)
806 break;
6dd9a7c7
YS
807 }
808 }
0e242612
JL
809 rcu_read_unlock();
810
161f6934 811 return fls(mask);
6dd9a7c7
YS
812}
813
58c610bd
SY
814/* Some capabilities may be different across iommus */
815static void domain_update_iommu_cap(struct dmar_domain *domain)
816{
817 domain_update_iommu_coherency(domain);
161f6934
JL
818 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
819 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
820}
821
03ecc32c
DW
822static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
823 u8 bus, u8 devfn, int alloc)
824{
825 struct root_entry *root = &iommu->root_entry[bus];
826 struct context_entry *context;
827 u64 *entry;
828
4df4eab1 829 entry = &root->lo;
c83b2f20 830 if (ecs_enabled(iommu)) {
03ecc32c
DW
831 if (devfn >= 0x80) {
832 devfn -= 0x80;
833 entry = &root->hi;
834 }
835 devfn *= 2;
836 }
03ecc32c
DW
837 if (*entry & 1)
838 context = phys_to_virt(*entry & VTD_PAGE_MASK);
839 else {
840 unsigned long phy_addr;
841 if (!alloc)
842 return NULL;
843
844 context = alloc_pgtable_page(iommu->node);
845 if (!context)
846 return NULL;
847
848 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
849 phy_addr = virt_to_phys((void *)context);
850 *entry = phy_addr | 1;
851 __iommu_flush_cache(iommu, entry, sizeof(*entry));
852 }
853 return &context[devfn];
854}
855
4ed6a540
DW
856static int iommu_dummy(struct device *dev)
857{
858 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
859}
860
156baca8 861static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
862{
863 struct dmar_drhd_unit *drhd = NULL;
b683b230 864 struct intel_iommu *iommu;
156baca8
DW
865 struct device *tmp;
866 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 867 u16 segment = 0;
c7151a8d
WH
868 int i;
869
4ed6a540
DW
870 if (iommu_dummy(dev))
871 return NULL;
872
156baca8 873 if (dev_is_pci(dev)) {
1c387188
AR
874 struct pci_dev *pf_pdev;
875
156baca8 876 pdev = to_pci_dev(dev);
5823e330
JD
877
878#ifdef CONFIG_X86
879 /* VMD child devices currently cannot be handled individually */
880 if (is_vmd(pdev->bus))
881 return NULL;
882#endif
883
1c387188
AR
884 /* VFs aren't listed in scope tables; we need to look up
885 * the PF instead to find the IOMMU. */
886 pf_pdev = pci_physfn(pdev);
887 dev = &pf_pdev->dev;
156baca8 888 segment = pci_domain_nr(pdev->bus);
ca5b74d2 889 } else if (has_acpi_companion(dev))
156baca8
DW
890 dev = &ACPI_COMPANION(dev)->dev;
891
0e242612 892 rcu_read_lock();
b683b230 893 for_each_active_iommu(iommu, drhd) {
156baca8 894 if (pdev && segment != drhd->segment)
276dbf99 895 continue;
c7151a8d 896
b683b230 897 for_each_active_dev_scope(drhd->devices,
156baca8
DW
898 drhd->devices_cnt, i, tmp) {
899 if (tmp == dev) {
1c387188
AR
900 /* For a VF use its original BDF# not that of the PF
901 * which we used for the IOMMU lookup. Strictly speaking
902 * we could do this for all PCI devices; we only need to
903 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 904 if (pdev && pdev->is_virtfn)
1c387188
AR
905 goto got_pdev;
906
156baca8
DW
907 *bus = drhd->devices[i].bus;
908 *devfn = drhd->devices[i].devfn;
b683b230 909 goto out;
156baca8
DW
910 }
911
912 if (!pdev || !dev_is_pci(tmp))
913 continue;
914
915 ptmp = to_pci_dev(tmp);
916 if (ptmp->subordinate &&
917 ptmp->subordinate->number <= pdev->bus->number &&
918 ptmp->subordinate->busn_res.end >= pdev->bus->number)
919 goto got_pdev;
924b6231 920 }
c7151a8d 921
156baca8
DW
922 if (pdev && drhd->include_all) {
923 got_pdev:
924 *bus = pdev->bus->number;
925 *devfn = pdev->devfn;
b683b230 926 goto out;
156baca8 927 }
c7151a8d 928 }
b683b230 929 iommu = NULL;
156baca8 930 out:
0e242612 931 rcu_read_unlock();
c7151a8d 932
b683b230 933 return iommu;
c7151a8d
WH
934}
935
5331fe6f
WH
936static void domain_flush_cache(struct dmar_domain *domain,
937 void *addr, int size)
938{
939 if (!domain->iommu_coherency)
940 clflush_cache_range(addr, size);
941}
942
ba395927
KA
943static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
944{
ba395927 945 struct context_entry *context;
03ecc32c 946 int ret = 0;
ba395927
KA
947 unsigned long flags;
948
949 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
950 context = iommu_context_addr(iommu, bus, devfn, 0);
951 if (context)
952 ret = context_present(context);
ba395927
KA
953 spin_unlock_irqrestore(&iommu->lock, flags);
954 return ret;
955}
956
ba395927
KA
957static void free_context_table(struct intel_iommu *iommu)
958{
ba395927
KA
959 int i;
960 unsigned long flags;
961 struct context_entry *context;
962
963 spin_lock_irqsave(&iommu->lock, flags);
964 if (!iommu->root_entry) {
965 goto out;
966 }
967 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 968 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
969 if (context)
970 free_pgtable_page(context);
03ecc32c 971
c83b2f20 972 if (!ecs_enabled(iommu))
03ecc32c
DW
973 continue;
974
975 context = iommu_context_addr(iommu, i, 0x80, 0);
976 if (context)
977 free_pgtable_page(context);
978
ba395927
KA
979 }
980 free_pgtable_page(iommu->root_entry);
981 iommu->root_entry = NULL;
982out:
983 spin_unlock_irqrestore(&iommu->lock, flags);
984}
985
b026fd28 986static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 987 unsigned long pfn, int *target_level)
ba395927 988{
ba395927
KA
989 struct dma_pte *parent, *pte = NULL;
990 int level = agaw_to_level(domain->agaw);
4399c8bf 991 int offset;
ba395927
KA
992
993 BUG_ON(!domain->pgd);
f9423606 994
162d1b10 995 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
996 /* Address beyond IOMMU's addressing capabilities. */
997 return NULL;
998
ba395927
KA
999 parent = domain->pgd;
1000
5cf0a76f 1001 while (1) {
ba395927
KA
1002 void *tmp_page;
1003
b026fd28 1004 offset = pfn_level_offset(pfn, level);
ba395927 1005 pte = &parent[offset];
5cf0a76f 1006 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 1007 break;
5cf0a76f 1008 if (level == *target_level)
ba395927
KA
1009 break;
1010
19c239ce 1011 if (!dma_pte_present(pte)) {
c85994e4
DW
1012 uint64_t pteval;
1013
4c923d47 1014 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1015
206a73c1 1016 if (!tmp_page)
ba395927 1017 return NULL;
206a73c1 1018
c85994e4 1019 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1020 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1021 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1022 /* Someone else set it while we were thinking; use theirs. */
1023 free_pgtable_page(tmp_page);
effad4b5 1024 else
c85994e4 1025 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1026 }
5cf0a76f
DW
1027 if (level == 1)
1028 break;
1029
19c239ce 1030 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1031 level--;
1032 }
1033
5cf0a76f
DW
1034 if (!*target_level)
1035 *target_level = level;
1036
ba395927
KA
1037 return pte;
1038}
1039
6dd9a7c7 1040
ba395927 1041/* return address's pte at specific level */
90dcfb5e
DW
1042static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1043 unsigned long pfn,
6dd9a7c7 1044 int level, int *large_page)
ba395927
KA
1045{
1046 struct dma_pte *parent, *pte = NULL;
1047 int total = agaw_to_level(domain->agaw);
1048 int offset;
1049
1050 parent = domain->pgd;
1051 while (level <= total) {
90dcfb5e 1052 offset = pfn_level_offset(pfn, total);
ba395927
KA
1053 pte = &parent[offset];
1054 if (level == total)
1055 return pte;
1056
6dd9a7c7
YS
1057 if (!dma_pte_present(pte)) {
1058 *large_page = total;
ba395927 1059 break;
6dd9a7c7
YS
1060 }
1061
e16922af 1062 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1063 *large_page = total;
1064 return pte;
1065 }
1066
19c239ce 1067 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1068 total--;
1069 }
1070 return NULL;
1071}
1072
ba395927 1073/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1074static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1075 unsigned long start_pfn,
1076 unsigned long last_pfn)
ba395927 1077{
6dd9a7c7 1078 unsigned int large_page = 1;
310a5ab9 1079 struct dma_pte *first_pte, *pte;
66eae846 1080
162d1b10
JL
1081 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1082 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1083 BUG_ON(start_pfn > last_pfn);
ba395927 1084
04b18e65 1085 /* we don't need lock here; nobody else touches the iova range */
59c36286 1086 do {
6dd9a7c7
YS
1087 large_page = 1;
1088 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1089 if (!pte) {
6dd9a7c7 1090 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1091 continue;
1092 }
6dd9a7c7 1093 do {
310a5ab9 1094 dma_clear_pte(pte);
6dd9a7c7 1095 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1096 pte++;
75e6bf96
DW
1097 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1098
310a5ab9
DW
1099 domain_flush_cache(domain, first_pte,
1100 (void *)pte - (void *)first_pte);
59c36286
DW
1101
1102 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1103}
1104
3269ee0b 1105static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
1106 int retain_level, struct dma_pte *pte,
1107 unsigned long pfn, unsigned long start_pfn,
1108 unsigned long last_pfn)
3269ee0b
AW
1109{
1110 pfn = max(start_pfn, pfn);
1111 pte = &pte[pfn_level_offset(pfn, level)];
1112
1113 do {
1114 unsigned long level_pfn;
1115 struct dma_pte *level_pte;
1116
1117 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1118 goto next;
1119
f7116e11 1120 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1121 level_pte = phys_to_virt(dma_pte_addr(pte));
1122
bc24c571
DD
1123 if (level > 2) {
1124 dma_pte_free_level(domain, level - 1, retain_level,
1125 level_pte, level_pfn, start_pfn,
1126 last_pfn);
1127 }
3269ee0b 1128
bc24c571
DD
1129 /*
1130 * Free the page table if we're below the level we want to
1131 * retain and the range covers the entire table.
1132 */
1133 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1134 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1135 dma_clear_pte(pte);
1136 domain_flush_cache(domain, pte, sizeof(*pte));
1137 free_pgtable_page(level_pte);
1138 }
1139next:
1140 pfn += level_size(level);
1141 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1142}
1143
bc24c571
DD
1144/*
1145 * clear last level (leaf) ptes and free page table pages below the
1146 * level we wish to keep intact.
1147 */
ba395927 1148static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1149 unsigned long start_pfn,
bc24c571
DD
1150 unsigned long last_pfn,
1151 int retain_level)
ba395927 1152{
162d1b10
JL
1153 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1154 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1155 BUG_ON(start_pfn > last_pfn);
ba395927 1156
d41a4adb
JL
1157 dma_pte_clear_range(domain, start_pfn, last_pfn);
1158
f3a0a52f 1159 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1160 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1161 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1162
ba395927 1163 /* free pgd */
d794dc9b 1164 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1165 free_pgtable_page(domain->pgd);
1166 domain->pgd = NULL;
1167 }
1168}
1169
ea8ea460
DW
1170/* When a page at a given level is being unlinked from its parent, we don't
1171 need to *modify* it at all. All we need to do is make a list of all the
1172 pages which can be freed just as soon as we've flushed the IOTLB and we
1173 know the hardware page-walk will no longer touch them.
1174 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1175 be freed. */
1176static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1177 int level, struct dma_pte *pte,
1178 struct page *freelist)
1179{
1180 struct page *pg;
1181
1182 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1183 pg->freelist = freelist;
1184 freelist = pg;
1185
1186 if (level == 1)
1187 return freelist;
1188
adeb2590
JL
1189 pte = page_address(pg);
1190 do {
ea8ea460
DW
1191 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1192 freelist = dma_pte_list_pagetables(domain, level - 1,
1193 pte, freelist);
adeb2590
JL
1194 pte++;
1195 } while (!first_pte_in_page(pte));
ea8ea460
DW
1196
1197 return freelist;
1198}
1199
1200static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1201 struct dma_pte *pte, unsigned long pfn,
1202 unsigned long start_pfn,
1203 unsigned long last_pfn,
1204 struct page *freelist)
1205{
1206 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1207
1208 pfn = max(start_pfn, pfn);
1209 pte = &pte[pfn_level_offset(pfn, level)];
1210
1211 do {
1212 unsigned long level_pfn;
1213
1214 if (!dma_pte_present(pte))
1215 goto next;
1216
1217 level_pfn = pfn & level_mask(level);
1218
1219 /* If range covers entire pagetable, free it */
1220 if (start_pfn <= level_pfn &&
1221 last_pfn >= level_pfn + level_size(level) - 1) {
1222 /* These suborbinate page tables are going away entirely. Don't
1223 bother to clear them; we're just going to *free* them. */
1224 if (level > 1 && !dma_pte_superpage(pte))
1225 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1226
1227 dma_clear_pte(pte);
1228 if (!first_pte)
1229 first_pte = pte;
1230 last_pte = pte;
1231 } else if (level > 1) {
1232 /* Recurse down into a level that isn't *entirely* obsolete */
1233 freelist = dma_pte_clear_level(domain, level - 1,
1234 phys_to_virt(dma_pte_addr(pte)),
1235 level_pfn, start_pfn, last_pfn,
1236 freelist);
1237 }
1238next:
1239 pfn += level_size(level);
1240 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1241
1242 if (first_pte)
1243 domain_flush_cache(domain, first_pte,
1244 (void *)++last_pte - (void *)first_pte);
1245
1246 return freelist;
1247}
1248
1249/* We can't just free the pages because the IOMMU may still be walking
1250 the page tables, and may have cached the intermediate levels. The
1251 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1252static struct page *domain_unmap(struct dmar_domain *domain,
1253 unsigned long start_pfn,
1254 unsigned long last_pfn)
ea8ea460 1255{
ea8ea460
DW
1256 struct page *freelist = NULL;
1257
162d1b10
JL
1258 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1259 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1260 BUG_ON(start_pfn > last_pfn);
1261
1262 /* we don't need lock here; nobody else touches the iova range */
1263 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1264 domain->pgd, 0, start_pfn, last_pfn, NULL);
1265
1266 /* free pgd */
1267 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1268 struct page *pgd_page = virt_to_page(domain->pgd);
1269 pgd_page->freelist = freelist;
1270 freelist = pgd_page;
1271
1272 domain->pgd = NULL;
1273 }
1274
1275 return freelist;
1276}
1277
b690420a 1278static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1279{
1280 struct page *pg;
1281
1282 while ((pg = freelist)) {
1283 freelist = pg->freelist;
1284 free_pgtable_page(page_address(pg));
1285 }
1286}
1287
13cf0174
JR
1288static void iova_entry_free(unsigned long data)
1289{
1290 struct page *freelist = (struct page *)data;
1291
1292 dma_free_pagelist(freelist);
1293}
1294
ba395927
KA
1295/* iommu handling */
1296static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1297{
1298 struct root_entry *root;
1299 unsigned long flags;
1300
4c923d47 1301 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1302 if (!root) {
9f10e5bf 1303 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1304 iommu->name);
ba395927 1305 return -ENOMEM;
ffebeb46 1306 }
ba395927 1307
5b6985ce 1308 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1309
1310 spin_lock_irqsave(&iommu->lock, flags);
1311 iommu->root_entry = root;
1312 spin_unlock_irqrestore(&iommu->lock, flags);
1313
1314 return 0;
1315}
1316
ba395927
KA
1317static void iommu_set_root_entry(struct intel_iommu *iommu)
1318{
03ecc32c 1319 u64 addr;
c416daa9 1320 u32 sts;
ba395927
KA
1321 unsigned long flag;
1322
03ecc32c 1323 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1324 if (ecs_enabled(iommu))
03ecc32c 1325 addr |= DMA_RTADDR_RTT;
ba395927 1326
1f5b3c3f 1327 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1328 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1329
c416daa9 1330 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1331
1332 /* Make sure hardware complete it */
1333 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1334 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1335
1f5b3c3f 1336 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1337}
1338
1339static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1340{
1341 u32 val;
1342 unsigned long flag;
1343
9af88143 1344 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1345 return;
ba395927 1346
1f5b3c3f 1347 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1348 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1349
1350 /* Make sure hardware complete it */
1351 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1352 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1353
1f5b3c3f 1354 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1355}
1356
1357/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1358static void __iommu_flush_context(struct intel_iommu *iommu,
1359 u16 did, u16 source_id, u8 function_mask,
1360 u64 type)
ba395927
KA
1361{
1362 u64 val = 0;
1363 unsigned long flag;
1364
ba395927
KA
1365 switch (type) {
1366 case DMA_CCMD_GLOBAL_INVL:
1367 val = DMA_CCMD_GLOBAL_INVL;
1368 break;
1369 case DMA_CCMD_DOMAIN_INVL:
1370 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1371 break;
1372 case DMA_CCMD_DEVICE_INVL:
1373 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1374 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1375 break;
1376 default:
1377 BUG();
1378 }
1379 val |= DMA_CCMD_ICC;
1380
1f5b3c3f 1381 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1382 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1383
1384 /* Make sure hardware complete it */
1385 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1386 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1387
1f5b3c3f 1388 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1389}
1390
ba395927 1391/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1392static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1393 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1394{
1395 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1396 u64 val = 0, val_iva = 0;
1397 unsigned long flag;
1398
ba395927
KA
1399 switch (type) {
1400 case DMA_TLB_GLOBAL_FLUSH:
1401 /* global flush doesn't need set IVA_REG */
1402 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1403 break;
1404 case DMA_TLB_DSI_FLUSH:
1405 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1406 break;
1407 case DMA_TLB_PSI_FLUSH:
1408 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1409 /* IH bit is passed in as part of address */
ba395927
KA
1410 val_iva = size_order | addr;
1411 break;
1412 default:
1413 BUG();
1414 }
1415 /* Note: set drain read/write */
1416#if 0
1417 /*
1418 * This is probably to be super secure.. Looks like we can
1419 * ignore it without any impact.
1420 */
1421 if (cap_read_drain(iommu->cap))
1422 val |= DMA_TLB_READ_DRAIN;
1423#endif
1424 if (cap_write_drain(iommu->cap))
1425 val |= DMA_TLB_WRITE_DRAIN;
1426
1f5b3c3f 1427 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1428 /* Note: Only uses first TLB reg currently */
1429 if (val_iva)
1430 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1431 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1432
1433 /* Make sure hardware complete it */
1434 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1435 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1436
1f5b3c3f 1437 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1438
1439 /* check IOTLB invalidation granularity */
1440 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1441 pr_err("Flush IOTLB failed\n");
ba395927 1442 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1443 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1444 (unsigned long long)DMA_TLB_IIRG(type),
1445 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1446}
1447
64ae892b
DW
1448static struct device_domain_info *
1449iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1450 u8 bus, u8 devfn)
93a23a72 1451{
93a23a72 1452 struct device_domain_info *info;
93a23a72 1453
55d94043
JR
1454 assert_spin_locked(&device_domain_lock);
1455
93a23a72
YZ
1456 if (!iommu->qi)
1457 return NULL;
1458
93a23a72 1459 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1460 if (info->iommu == iommu && info->bus == bus &&
1461 info->devfn == devfn) {
b16d0cb9
DW
1462 if (info->ats_supported && info->dev)
1463 return info;
93a23a72
YZ
1464 break;
1465 }
93a23a72 1466
b16d0cb9 1467 return NULL;
93a23a72
YZ
1468}
1469
0824c592
OP
1470static void domain_update_iotlb(struct dmar_domain *domain)
1471{
1472 struct device_domain_info *info;
1473 bool has_iotlb_device = false;
1474
1475 assert_spin_locked(&device_domain_lock);
1476
1477 list_for_each_entry(info, &domain->devices, link) {
1478 struct pci_dev *pdev;
1479
1480 if (!info->dev || !dev_is_pci(info->dev))
1481 continue;
1482
1483 pdev = to_pci_dev(info->dev);
1484 if (pdev->ats_enabled) {
1485 has_iotlb_device = true;
1486 break;
1487 }
1488 }
1489
1490 domain->has_iotlb_device = has_iotlb_device;
1491}
1492
93a23a72 1493static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1494{
fb0cc3aa
BH
1495 struct pci_dev *pdev;
1496
0824c592
OP
1497 assert_spin_locked(&device_domain_lock);
1498
0bcb3e28 1499 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1500 return;
1501
fb0cc3aa 1502 pdev = to_pci_dev(info->dev);
fb0cc3aa 1503
b16d0cb9
DW
1504#ifdef CONFIG_INTEL_IOMMU_SVM
1505 /* The PCIe spec, in its wisdom, declares that the behaviour of
1506 the device if you enable PASID support after ATS support is
1507 undefined. So always enable PASID support on devices which
1508 have it, even if we can't yet know if we're ever going to
1509 use it. */
1510 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1511 info->pasid_enabled = 1;
1512
1513 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1514 info->pri_enabled = 1;
1515#endif
1516 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1517 info->ats_enabled = 1;
0824c592 1518 domain_update_iotlb(info->domain);
b16d0cb9
DW
1519 info->ats_qdep = pci_ats_queue_depth(pdev);
1520 }
93a23a72
YZ
1521}
1522
1523static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1524{
b16d0cb9
DW
1525 struct pci_dev *pdev;
1526
0824c592
OP
1527 assert_spin_locked(&device_domain_lock);
1528
da972fb1 1529 if (!dev_is_pci(info->dev))
93a23a72
YZ
1530 return;
1531
b16d0cb9
DW
1532 pdev = to_pci_dev(info->dev);
1533
1534 if (info->ats_enabled) {
1535 pci_disable_ats(pdev);
1536 info->ats_enabled = 0;
0824c592 1537 domain_update_iotlb(info->domain);
b16d0cb9
DW
1538 }
1539#ifdef CONFIG_INTEL_IOMMU_SVM
1540 if (info->pri_enabled) {
1541 pci_disable_pri(pdev);
1542 info->pri_enabled = 0;
1543 }
1544 if (info->pasid_enabled) {
1545 pci_disable_pasid(pdev);
1546 info->pasid_enabled = 0;
1547 }
1548#endif
93a23a72
YZ
1549}
1550
1551static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1552 u64 addr, unsigned mask)
1553{
1554 u16 sid, qdep;
1555 unsigned long flags;
1556 struct device_domain_info *info;
1557
0824c592
OP
1558 if (!domain->has_iotlb_device)
1559 return;
1560
93a23a72
YZ
1561 spin_lock_irqsave(&device_domain_lock, flags);
1562 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1563 if (!info->ats_enabled)
93a23a72
YZ
1564 continue;
1565
1566 sid = info->bus << 8 | info->devfn;
b16d0cb9 1567 qdep = info->ats_qdep;
93a23a72
YZ
1568 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1569 }
1570 spin_unlock_irqrestore(&device_domain_lock, flags);
1571}
1572
a1ddcbe9
JR
1573static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1574 struct dmar_domain *domain,
1575 unsigned long pfn, unsigned int pages,
1576 int ih, int map)
ba395927 1577{
9dd2fe89 1578 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1579 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1580 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1581
ba395927
KA
1582 BUG_ON(pages == 0);
1583
ea8ea460
DW
1584 if (ih)
1585 ih = 1 << 6;
ba395927 1586 /*
9dd2fe89
YZ
1587 * Fallback to domain selective flush if no PSI support or the size is
1588 * too big.
ba395927
KA
1589 * PSI requires page size to be 2 ^ x, and the base address is naturally
1590 * aligned to the size
1591 */
9dd2fe89
YZ
1592 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1593 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1594 DMA_TLB_DSI_FLUSH);
9dd2fe89 1595 else
ea8ea460 1596 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1597 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1598
1599 /*
82653633
NA
1600 * In caching mode, changes of pages from non-present to present require
1601 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1602 */
82653633 1603 if (!cap_caching_mode(iommu->cap) || !map)
00dabee5 1604 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1605}
1606
13cf0174
JR
1607static void iommu_flush_iova(struct iova_domain *iovad)
1608{
1609 struct dmar_domain *domain;
1610 int idx;
1611
1612 domain = container_of(iovad, struct dmar_domain, iovad);
1613
1614 for_each_domain_iommu(idx, domain) {
1615 struct intel_iommu *iommu = g_iommus[idx];
1616 u16 did = domain->iommu_did[iommu->seq_id];
1617
1618 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1619
1620 if (!cap_caching_mode(iommu->cap))
1621 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1622 0, MAX_AGAW_PFN_WIDTH);
1623 }
1624}
1625
f8bab735 1626static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1627{
1628 u32 pmen;
1629 unsigned long flags;
1630
1f5b3c3f 1631 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1632 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1633 pmen &= ~DMA_PMEN_EPM;
1634 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1635
1636 /* wait for the protected region status bit to clear */
1637 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1638 readl, !(pmen & DMA_PMEN_PRS), pmen);
1639
1f5b3c3f 1640 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1641}
1642
2a41ccee 1643static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1644{
1645 u32 sts;
1646 unsigned long flags;
1647
1f5b3c3f 1648 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1649 iommu->gcmd |= DMA_GCMD_TE;
1650 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1651
1652 /* Make sure hardware complete it */
1653 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1654 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1655
1f5b3c3f 1656 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1657}
1658
2a41ccee 1659static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1660{
1661 u32 sts;
1662 unsigned long flag;
1663
1f5b3c3f 1664 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1665 iommu->gcmd &= ~DMA_GCMD_TE;
1666 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1667
1668 /* Make sure hardware complete it */
1669 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1670 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1671
1f5b3c3f 1672 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1673}
1674
3460a6d9 1675
ba395927
KA
1676static int iommu_init_domains(struct intel_iommu *iommu)
1677{
8bf47816
JR
1678 u32 ndomains, nlongs;
1679 size_t size;
ba395927
KA
1680
1681 ndomains = cap_ndoms(iommu->cap);
8bf47816 1682 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1683 iommu->name, ndomains);
ba395927
KA
1684 nlongs = BITS_TO_LONGS(ndomains);
1685
94a91b50
DD
1686 spin_lock_init(&iommu->lock);
1687
ba395927
KA
1688 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1689 if (!iommu->domain_ids) {
9f10e5bf
JR
1690 pr_err("%s: Allocating domain id array failed\n",
1691 iommu->name);
ba395927
KA
1692 return -ENOMEM;
1693 }
8bf47816 1694
86f004c7 1695 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1696 iommu->domains = kzalloc(size, GFP_KERNEL);
1697
1698 if (iommu->domains) {
1699 size = 256 * sizeof(struct dmar_domain *);
1700 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1701 }
1702
1703 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1704 pr_err("%s: Allocating domain array failed\n",
1705 iommu->name);
852bdb04 1706 kfree(iommu->domain_ids);
8bf47816 1707 kfree(iommu->domains);
852bdb04 1708 iommu->domain_ids = NULL;
8bf47816 1709 iommu->domains = NULL;
ba395927
KA
1710 return -ENOMEM;
1711 }
1712
8bf47816
JR
1713
1714
ba395927 1715 /*
c0e8a6c8
JR
1716 * If Caching mode is set, then invalid translations are tagged
1717 * with domain-id 0, hence we need to pre-allocate it. We also
1718 * use domain-id 0 as a marker for non-allocated domain-id, so
1719 * make sure it is not used for a real domain.
ba395927 1720 */
c0e8a6c8
JR
1721 set_bit(0, iommu->domain_ids);
1722
ba395927
KA
1723 return 0;
1724}
ba395927 1725
ffebeb46 1726static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1727{
29a27719 1728 struct device_domain_info *info, *tmp;
55d94043 1729 unsigned long flags;
ba395927 1730
29a27719
JR
1731 if (!iommu->domains || !iommu->domain_ids)
1732 return;
a4eaa86c 1733
bea64033 1734again:
55d94043 1735 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1736 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1737 struct dmar_domain *domain;
1738
1739 if (info->iommu != iommu)
1740 continue;
1741
1742 if (!info->dev || !info->domain)
1743 continue;
1744
1745 domain = info->domain;
1746
bea64033 1747 __dmar_remove_one_dev_info(info);
29a27719 1748
bea64033
JR
1749 if (!domain_type_is_vm_or_si(domain)) {
1750 /*
1751 * The domain_exit() function can't be called under
1752 * device_domain_lock, as it takes this lock itself.
1753 * So release the lock here and re-run the loop
1754 * afterwards.
1755 */
1756 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1757 domain_exit(domain);
bea64033
JR
1758 goto again;
1759 }
ba395927 1760 }
55d94043 1761 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1762
1763 if (iommu->gcmd & DMA_GCMD_TE)
1764 iommu_disable_translation(iommu);
ffebeb46 1765}
ba395927 1766
ffebeb46
JL
1767static void free_dmar_iommu(struct intel_iommu *iommu)
1768{
1769 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1770 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1771 int i;
1772
1773 for (i = 0; i < elems; i++)
1774 kfree(iommu->domains[i]);
ffebeb46
JL
1775 kfree(iommu->domains);
1776 kfree(iommu->domain_ids);
1777 iommu->domains = NULL;
1778 iommu->domain_ids = NULL;
1779 }
ba395927 1780
d9630fe9
WH
1781 g_iommus[iommu->seq_id] = NULL;
1782
ba395927
KA
1783 /* free context mapping */
1784 free_context_table(iommu);
8a94ade4
DW
1785
1786#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1787 if (pasid_enabled(iommu)) {
1788 if (ecap_prs(iommu->ecap))
1789 intel_svm_finish_prq(iommu);
8a94ade4 1790 intel_svm_free_pasid_tables(iommu);
a222a7f0 1791 }
8a94ade4 1792#endif
ba395927
KA
1793}
1794
ab8dfe25 1795static struct dmar_domain *alloc_domain(int flags)
ba395927 1796{
ba395927 1797 struct dmar_domain *domain;
ba395927
KA
1798
1799 domain = alloc_domain_mem();
1800 if (!domain)
1801 return NULL;
1802
ab8dfe25 1803 memset(domain, 0, sizeof(*domain));
4c923d47 1804 domain->nid = -1;
ab8dfe25 1805 domain->flags = flags;
0824c592 1806 domain->has_iotlb_device = false;
92d03cc8 1807 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1808
1809 return domain;
1810}
1811
d160aca5
JR
1812/* Must be called with iommu->lock */
1813static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1814 struct intel_iommu *iommu)
1815{
44bde614 1816 unsigned long ndomains;
55d94043 1817 int num;
44bde614 1818
55d94043 1819 assert_spin_locked(&device_domain_lock);
d160aca5 1820 assert_spin_locked(&iommu->lock);
ba395927 1821
29a27719
JR
1822 domain->iommu_refcnt[iommu->seq_id] += 1;
1823 domain->iommu_count += 1;
1824 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1825 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1826 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1827
1828 if (num >= ndomains) {
1829 pr_err("%s: No free domain ids\n", iommu->name);
1830 domain->iommu_refcnt[iommu->seq_id] -= 1;
1831 domain->iommu_count -= 1;
55d94043 1832 return -ENOSPC;
2c2e2c38 1833 }
ba395927 1834
d160aca5
JR
1835 set_bit(num, iommu->domain_ids);
1836 set_iommu_domain(iommu, num, domain);
1837
1838 domain->iommu_did[iommu->seq_id] = num;
1839 domain->nid = iommu->node;
fb170fb4 1840
fb170fb4
JL
1841 domain_update_iommu_cap(domain);
1842 }
d160aca5 1843
55d94043 1844 return 0;
fb170fb4
JL
1845}
1846
1847static int domain_detach_iommu(struct dmar_domain *domain,
1848 struct intel_iommu *iommu)
1849{
d160aca5 1850 int num, count = INT_MAX;
d160aca5 1851
55d94043 1852 assert_spin_locked(&device_domain_lock);
d160aca5 1853 assert_spin_locked(&iommu->lock);
fb170fb4 1854
29a27719
JR
1855 domain->iommu_refcnt[iommu->seq_id] -= 1;
1856 count = --domain->iommu_count;
1857 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1858 num = domain->iommu_did[iommu->seq_id];
1859 clear_bit(num, iommu->domain_ids);
1860 set_iommu_domain(iommu, num, NULL);
fb170fb4 1861
fb170fb4 1862 domain_update_iommu_cap(domain);
c0e8a6c8 1863 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1864 }
fb170fb4
JL
1865
1866 return count;
1867}
1868
ba395927 1869static struct iova_domain reserved_iova_list;
8a443df4 1870static struct lock_class_key reserved_rbtree_key;
ba395927 1871
51a63e67 1872static int dmar_init_reserved_ranges(void)
ba395927
KA
1873{
1874 struct pci_dev *pdev = NULL;
1875 struct iova *iova;
1876 int i;
ba395927 1877
aa3ac946 1878 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1879
8a443df4
MG
1880 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1881 &reserved_rbtree_key);
1882
ba395927
KA
1883 /* IOAPIC ranges shouldn't be accessed by DMA */
1884 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1885 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1886 if (!iova) {
9f10e5bf 1887 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1888 return -ENODEV;
1889 }
ba395927
KA
1890
1891 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1892 for_each_pci_dev(pdev) {
1893 struct resource *r;
1894
1895 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1896 r = &pdev->resource[i];
1897 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1898 continue;
1a4a4551
DW
1899 iova = reserve_iova(&reserved_iova_list,
1900 IOVA_PFN(r->start),
1901 IOVA_PFN(r->end));
51a63e67 1902 if (!iova) {
9f10e5bf 1903 pr_err("Reserve iova failed\n");
51a63e67
JC
1904 return -ENODEV;
1905 }
ba395927
KA
1906 }
1907 }
51a63e67 1908 return 0;
ba395927
KA
1909}
1910
1911static void domain_reserve_special_ranges(struct dmar_domain *domain)
1912{
1913 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1914}
1915
1916static inline int guestwidth_to_adjustwidth(int gaw)
1917{
1918 int agaw;
1919 int r = (gaw - 12) % 9;
1920
1921 if (r == 0)
1922 agaw = gaw;
1923 else
1924 agaw = gaw + 9 - r;
1925 if (agaw > 64)
1926 agaw = 64;
1927 return agaw;
1928}
1929
dc534b25
JR
1930static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1931 int guest_width)
ba395927 1932{
ba395927
KA
1933 int adjust_width, agaw;
1934 unsigned long sagaw;
13cf0174 1935 int err;
ba395927 1936
aa3ac946 1937 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1938
1939 err = init_iova_flush_queue(&domain->iovad,
1940 iommu_flush_iova, iova_entry_free);
1941 if (err)
1942 return err;
1943
ba395927
KA
1944 domain_reserve_special_ranges(domain);
1945
1946 /* calculate AGAW */
ba395927
KA
1947 if (guest_width > cap_mgaw(iommu->cap))
1948 guest_width = cap_mgaw(iommu->cap);
1949 domain->gaw = guest_width;
1950 adjust_width = guestwidth_to_adjustwidth(guest_width);
1951 agaw = width_to_agaw(adjust_width);
1952 sagaw = cap_sagaw(iommu->cap);
1953 if (!test_bit(agaw, &sagaw)) {
1954 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1955 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1956 agaw = find_next_bit(&sagaw, 5, agaw);
1957 if (agaw >= 5)
1958 return -ENODEV;
1959 }
1960 domain->agaw = agaw;
ba395927 1961
8e604097
WH
1962 if (ecap_coherent(iommu->ecap))
1963 domain->iommu_coherency = 1;
1964 else
1965 domain->iommu_coherency = 0;
1966
58c610bd
SY
1967 if (ecap_sc_support(iommu->ecap))
1968 domain->iommu_snooping = 1;
1969 else
1970 domain->iommu_snooping = 0;
1971
214e39aa
DW
1972 if (intel_iommu_superpage)
1973 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1974 else
1975 domain->iommu_superpage = 0;
1976
4c923d47 1977 domain->nid = iommu->node;
c7151a8d 1978
ba395927 1979 /* always allocate the top pgd */
4c923d47 1980 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1981 if (!domain->pgd)
1982 return -ENOMEM;
5b6985ce 1983 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1984 return 0;
1985}
1986
1987static void domain_exit(struct dmar_domain *domain)
1988{
ea8ea460 1989 struct page *freelist = NULL;
ba395927
KA
1990
1991 /* Domain 0 is reserved, so dont process it */
1992 if (!domain)
1993 return;
1994
d160aca5
JR
1995 /* Remove associated devices and clear attached or cached domains */
1996 rcu_read_lock();
ba395927 1997 domain_remove_dev_info(domain);
d160aca5 1998 rcu_read_unlock();
92d03cc8 1999
ba395927
KA
2000 /* destroy iovas */
2001 put_iova_domain(&domain->iovad);
ba395927 2002
ea8ea460 2003 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 2004
ea8ea460
DW
2005 dma_free_pagelist(freelist);
2006
ba395927
KA
2007 free_domain_mem(domain);
2008}
2009
64ae892b
DW
2010static int domain_context_mapping_one(struct dmar_domain *domain,
2011 struct intel_iommu *iommu,
28ccce0d 2012 u8 bus, u8 devfn)
ba395927 2013{
c6c2cebd 2014 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
2015 int translation = CONTEXT_TT_MULTI_LEVEL;
2016 struct device_domain_info *info = NULL;
ba395927 2017 struct context_entry *context;
ba395927 2018 unsigned long flags;
ea6606b0 2019 struct dma_pte *pgd;
55d94043 2020 int ret, agaw;
28ccce0d 2021
c6c2cebd
JR
2022 WARN_ON(did == 0);
2023
28ccce0d
JR
2024 if (hw_pass_through && domain_type_is_si(domain))
2025 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
2026
2027 pr_debug("Set context mapping for %02x:%02x.%d\n",
2028 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2029
ba395927 2030 BUG_ON(!domain->pgd);
5331fe6f 2031
55d94043
JR
2032 spin_lock_irqsave(&device_domain_lock, flags);
2033 spin_lock(&iommu->lock);
2034
2035 ret = -ENOMEM;
03ecc32c 2036 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2037 if (!context)
55d94043 2038 goto out_unlock;
ba395927 2039
55d94043
JR
2040 ret = 0;
2041 if (context_present(context))
2042 goto out_unlock;
cf484d0e 2043
aec0e861
XP
2044 /*
2045 * For kdump cases, old valid entries may be cached due to the
2046 * in-flight DMA and copied pgtable, but there is no unmapping
2047 * behaviour for them, thus we need an explicit cache flush for
2048 * the newly-mapped device. For kdump, at this point, the device
2049 * is supposed to finish reset at its driver probe stage, so no
2050 * in-flight DMA will exist, and we don't need to worry anymore
2051 * hereafter.
2052 */
2053 if (context_copied(context)) {
2054 u16 did_old = context_domain_id(context);
2055
b117e038 2056 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2057 iommu->flush.flush_context(iommu, did_old,
2058 (((u16)bus) << 8) | devfn,
2059 DMA_CCMD_MASK_NOBIT,
2060 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2061 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2062 DMA_TLB_DSI_FLUSH);
2063 }
aec0e861
XP
2064 }
2065
ea6606b0
WH
2066 pgd = domain->pgd;
2067
de24e553 2068 context_clear_entry(context);
c6c2cebd 2069 context_set_domain_id(context, did);
ea6606b0 2070
de24e553
JR
2071 /*
2072 * Skip top levels of page tables for iommu which has less agaw
2073 * than default. Unnecessary for PT mode.
2074 */
93a23a72 2075 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 2076 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 2077 ret = -ENOMEM;
de24e553 2078 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
2079 if (!dma_pte_present(pgd))
2080 goto out_unlock;
ea6606b0 2081 }
4ed0d3e6 2082
64ae892b 2083 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
2084 if (info && info->ats_supported)
2085 translation = CONTEXT_TT_DEV_IOTLB;
2086 else
2087 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 2088
93a23a72
YZ
2089 context_set_address_root(context, virt_to_phys(pgd));
2090 context_set_address_width(context, iommu->agaw);
de24e553
JR
2091 } else {
2092 /*
2093 * In pass through mode, AW must be programmed to
2094 * indicate the largest AGAW value supported by
2095 * hardware. And ASR is ignored by hardware.
2096 */
2097 context_set_address_width(context, iommu->msagaw);
93a23a72 2098 }
4ed0d3e6
FY
2099
2100 context_set_translation_type(context, translation);
c07e7d21
MM
2101 context_set_fault_enable(context);
2102 context_set_present(context);
5331fe6f 2103 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2104
4c25a2c1
DW
2105 /*
2106 * It's a non-present to present mapping. If hardware doesn't cache
2107 * non-present entry we only need to flush the write-buffer. If the
2108 * _does_ cache non-present entries, then it does so in the special
2109 * domain #0, which we have to flush:
2110 */
2111 if (cap_caching_mode(iommu->cap)) {
2112 iommu->flush.flush_context(iommu, 0,
2113 (((u16)bus) << 8) | devfn,
2114 DMA_CCMD_MASK_NOBIT,
2115 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2116 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2117 } else {
ba395927 2118 iommu_flush_write_buffer(iommu);
4c25a2c1 2119 }
93a23a72 2120 iommu_enable_dev_iotlb(info);
c7151a8d 2121
55d94043
JR
2122 ret = 0;
2123
2124out_unlock:
2125 spin_unlock(&iommu->lock);
2126 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2127
5c365d18 2128 return ret;
ba395927
KA
2129}
2130
579305f7
AW
2131struct domain_context_mapping_data {
2132 struct dmar_domain *domain;
2133 struct intel_iommu *iommu;
579305f7
AW
2134};
2135
2136static int domain_context_mapping_cb(struct pci_dev *pdev,
2137 u16 alias, void *opaque)
2138{
2139 struct domain_context_mapping_data *data = opaque;
2140
2141 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2142 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2143}
2144
ba395927 2145static int
28ccce0d 2146domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2147{
64ae892b 2148 struct intel_iommu *iommu;
156baca8 2149 u8 bus, devfn;
579305f7 2150 struct domain_context_mapping_data data;
64ae892b 2151
e1f167f3 2152 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2153 if (!iommu)
2154 return -ENODEV;
ba395927 2155
579305f7 2156 if (!dev_is_pci(dev))
28ccce0d 2157 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2158
2159 data.domain = domain;
2160 data.iommu = iommu;
579305f7
AW
2161
2162 return pci_for_each_dma_alias(to_pci_dev(dev),
2163 &domain_context_mapping_cb, &data);
2164}
2165
2166static int domain_context_mapped_cb(struct pci_dev *pdev,
2167 u16 alias, void *opaque)
2168{
2169 struct intel_iommu *iommu = opaque;
2170
2171 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2172}
2173
e1f167f3 2174static int domain_context_mapped(struct device *dev)
ba395927 2175{
5331fe6f 2176 struct intel_iommu *iommu;
156baca8 2177 u8 bus, devfn;
5331fe6f 2178
e1f167f3 2179 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2180 if (!iommu)
2181 return -ENODEV;
ba395927 2182
579305f7
AW
2183 if (!dev_is_pci(dev))
2184 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2185
579305f7
AW
2186 return !pci_for_each_dma_alias(to_pci_dev(dev),
2187 domain_context_mapped_cb, iommu);
ba395927
KA
2188}
2189
f532959b
FY
2190/* Returns a number of VTD pages, but aligned to MM page size */
2191static inline unsigned long aligned_nrpages(unsigned long host_addr,
2192 size_t size)
2193{
2194 host_addr &= ~PAGE_MASK;
2195 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2196}
2197
6dd9a7c7
YS
2198/* Return largest possible superpage level for a given mapping */
2199static inline int hardware_largepage_caps(struct dmar_domain *domain,
2200 unsigned long iov_pfn,
2201 unsigned long phy_pfn,
2202 unsigned long pages)
2203{
2204 int support, level = 1;
2205 unsigned long pfnmerge;
2206
2207 support = domain->iommu_superpage;
2208
2209 /* To use a large page, the virtual *and* physical addresses
2210 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2211 of them will mean we have to use smaller pages. So just
2212 merge them and check both at once. */
2213 pfnmerge = iov_pfn | phy_pfn;
2214
2215 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2216 pages >>= VTD_STRIDE_SHIFT;
2217 if (!pages)
2218 break;
2219 pfnmerge >>= VTD_STRIDE_SHIFT;
2220 level++;
2221 support--;
2222 }
2223 return level;
2224}
2225
9051aa02
DW
2226static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2227 struct scatterlist *sg, unsigned long phys_pfn,
2228 unsigned long nr_pages, int prot)
e1605495
DW
2229{
2230 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2231 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2232 unsigned long sg_res = 0;
6dd9a7c7
YS
2233 unsigned int largepage_lvl = 0;
2234 unsigned long lvl_pages = 0;
e1605495 2235
162d1b10 2236 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2237
2238 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2239 return -EINVAL;
2240
2241 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2242
cc4f14aa
JL
2243 if (!sg) {
2244 sg_res = nr_pages;
9051aa02
DW
2245 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2246 }
2247
6dd9a7c7 2248 while (nr_pages > 0) {
c85994e4
DW
2249 uint64_t tmp;
2250
e1605495 2251 if (!sg_res) {
29a90b70
RM
2252 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2253
f532959b 2254 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2255 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2256 sg->dma_length = sg->length;
29a90b70 2257 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2258 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2259 }
6dd9a7c7 2260
e1605495 2261 if (!pte) {
6dd9a7c7
YS
2262 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2263
5cf0a76f 2264 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2265 if (!pte)
2266 return -ENOMEM;
6dd9a7c7 2267 /* It is large page*/
6491d4d0 2268 if (largepage_lvl > 1) {
ba2374fd
CZ
2269 unsigned long nr_superpages, end_pfn;
2270
6dd9a7c7 2271 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2272 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2273
2274 nr_superpages = sg_res / lvl_pages;
2275 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2276
d41a4adb
JL
2277 /*
2278 * Ensure that old small page tables are
ba2374fd 2279 * removed to make room for superpage(s).
bc24c571
DD
2280 * We're adding new large pages, so make sure
2281 * we don't remove their parent tables.
d41a4adb 2282 */
bc24c571
DD
2283 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2284 largepage_lvl + 1);
6491d4d0 2285 } else {
6dd9a7c7 2286 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2287 }
6dd9a7c7 2288
e1605495
DW
2289 }
2290 /* We don't need lock here, nobody else
2291 * touches the iova range
2292 */
7766a3fb 2293 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2294 if (tmp) {
1bf20f0d 2295 static int dumps = 5;
9f10e5bf
JR
2296 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2297 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2298 if (dumps) {
2299 dumps--;
2300 debug_dma_dump_mappings(NULL);
2301 }
2302 WARN_ON(1);
2303 }
6dd9a7c7
YS
2304
2305 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2306
2307 BUG_ON(nr_pages < lvl_pages);
2308 BUG_ON(sg_res < lvl_pages);
2309
2310 nr_pages -= lvl_pages;
2311 iov_pfn += lvl_pages;
2312 phys_pfn += lvl_pages;
2313 pteval += lvl_pages * VTD_PAGE_SIZE;
2314 sg_res -= lvl_pages;
2315
2316 /* If the next PTE would be the first in a new page, then we
2317 need to flush the cache on the entries we've just written.
2318 And then we'll need to recalculate 'pte', so clear it and
2319 let it get set again in the if (!pte) block above.
2320
2321 If we're done (!nr_pages) we need to flush the cache too.
2322
2323 Also if we've been setting superpages, we may need to
2324 recalculate 'pte' and switch back to smaller pages for the
2325 end of the mapping, if the trailing size is not enough to
2326 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2327 pte++;
6dd9a7c7
YS
2328 if (!nr_pages || first_pte_in_page(pte) ||
2329 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2330 domain_flush_cache(domain, first_pte,
2331 (void *)pte - (void *)first_pte);
2332 pte = NULL;
2333 }
6dd9a7c7
YS
2334
2335 if (!sg_res && nr_pages)
e1605495
DW
2336 sg = sg_next(sg);
2337 }
2338 return 0;
2339}
2340
9051aa02
DW
2341static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2342 struct scatterlist *sg, unsigned long nr_pages,
2343 int prot)
ba395927 2344{
9051aa02
DW
2345 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2346}
6f6a00e4 2347
9051aa02
DW
2348static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2349 unsigned long phys_pfn, unsigned long nr_pages,
2350 int prot)
2351{
2352 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2353}
2354
2452d9db 2355static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2356{
5082219b
FS
2357 unsigned long flags;
2358 struct context_entry *context;
2359 u16 did_old;
2360
c7151a8d
WH
2361 if (!iommu)
2362 return;
8c11e798 2363
5082219b
FS
2364 spin_lock_irqsave(&iommu->lock, flags);
2365 context = iommu_context_addr(iommu, bus, devfn, 0);
2366 if (!context) {
2367 spin_unlock_irqrestore(&iommu->lock, flags);
2368 return;
2369 }
2370 did_old = context_domain_id(context);
2371 context_clear_entry(context);
2372 __iommu_flush_cache(iommu, context, sizeof(*context));
2373 spin_unlock_irqrestore(&iommu->lock, flags);
2374 iommu->flush.flush_context(iommu,
2375 did_old,
2376 (((u16)bus) << 8) | devfn,
2377 DMA_CCMD_MASK_NOBIT,
2378 DMA_CCMD_DEVICE_INVL);
2379 iommu->flush.flush_iotlb(iommu,
2380 did_old,
2381 0,
2382 0,
2383 DMA_TLB_DSI_FLUSH);
ba395927
KA
2384}
2385
109b9b04
DW
2386static inline void unlink_domain_info(struct device_domain_info *info)
2387{
2388 assert_spin_locked(&device_domain_lock);
2389 list_del(&info->link);
2390 list_del(&info->global);
2391 if (info->dev)
0bcb3e28 2392 info->dev->archdata.iommu = NULL;
109b9b04
DW
2393}
2394
ba395927
KA
2395static void domain_remove_dev_info(struct dmar_domain *domain)
2396{
3a74ca01 2397 struct device_domain_info *info, *tmp;
fb170fb4 2398 unsigned long flags;
ba395927
KA
2399
2400 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2401 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2402 __dmar_remove_one_dev_info(info);
ba395927
KA
2403 spin_unlock_irqrestore(&device_domain_lock, flags);
2404}
2405
2406/*
2407 * find_domain
1525a29a 2408 * Note: we use struct device->archdata.iommu stores the info
ba395927 2409 */
1525a29a 2410static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2411{
2412 struct device_domain_info *info;
2413
2414 /* No lock here, assumes no domain exit in normal case */
1525a29a 2415 info = dev->archdata.iommu;
b316d02a 2416 if (likely(info))
ba395927
KA
2417 return info->domain;
2418 return NULL;
2419}
2420
5a8f40e8 2421static inline struct device_domain_info *
745f2586
JL
2422dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2423{
2424 struct device_domain_info *info;
2425
2426 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2427 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2428 info->devfn == devfn)
5a8f40e8 2429 return info;
745f2586
JL
2430
2431 return NULL;
2432}
2433
5db31569
JR
2434static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2435 int bus, int devfn,
2436 struct device *dev,
2437 struct dmar_domain *domain)
745f2586 2438{
5a8f40e8 2439 struct dmar_domain *found = NULL;
745f2586
JL
2440 struct device_domain_info *info;
2441 unsigned long flags;
d160aca5 2442 int ret;
745f2586
JL
2443
2444 info = alloc_devinfo_mem();
2445 if (!info)
b718cd3d 2446 return NULL;
745f2586 2447
745f2586
JL
2448 info->bus = bus;
2449 info->devfn = devfn;
b16d0cb9
DW
2450 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2451 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2452 info->ats_qdep = 0;
745f2586
JL
2453 info->dev = dev;
2454 info->domain = domain;
5a8f40e8 2455 info->iommu = iommu;
745f2586 2456
b16d0cb9
DW
2457 if (dev && dev_is_pci(dev)) {
2458 struct pci_dev *pdev = to_pci_dev(info->dev);
2459
2460 if (ecap_dev_iotlb_support(iommu->ecap) &&
2461 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2462 dmar_find_matched_atsr_unit(pdev))
2463 info->ats_supported = 1;
2464
2465 if (ecs_enabled(iommu)) {
2466 if (pasid_enabled(iommu)) {
2467 int features = pci_pasid_features(pdev);
2468 if (features >= 0)
2469 info->pasid_supported = features | 1;
2470 }
2471
2472 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2473 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2474 info->pri_supported = 1;
2475 }
2476 }
2477
745f2586
JL
2478 spin_lock_irqsave(&device_domain_lock, flags);
2479 if (dev)
0bcb3e28 2480 found = find_domain(dev);
f303e507
JR
2481
2482 if (!found) {
5a8f40e8 2483 struct device_domain_info *info2;
41e80dca 2484 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2485 if (info2) {
2486 found = info2->domain;
2487 info2->dev = dev;
2488 }
5a8f40e8 2489 }
f303e507 2490
745f2586
JL
2491 if (found) {
2492 spin_unlock_irqrestore(&device_domain_lock, flags);
2493 free_devinfo_mem(info);
b718cd3d
DW
2494 /* Caller must free the original domain */
2495 return found;
745f2586
JL
2496 }
2497
d160aca5
JR
2498 spin_lock(&iommu->lock);
2499 ret = domain_attach_iommu(domain, iommu);
2500 spin_unlock(&iommu->lock);
2501
2502 if (ret) {
c6c2cebd 2503 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2504 free_devinfo_mem(info);
c6c2cebd
JR
2505 return NULL;
2506 }
c6c2cebd 2507
b718cd3d
DW
2508 list_add(&info->link, &domain->devices);
2509 list_add(&info->global, &device_domain_list);
2510 if (dev)
2511 dev->archdata.iommu = info;
2512 spin_unlock_irqrestore(&device_domain_lock, flags);
2513
cc4e2575
JR
2514 if (dev && domain_context_mapping(domain, dev)) {
2515 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2516 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2517 return NULL;
2518 }
2519
b718cd3d 2520 return domain;
745f2586
JL
2521}
2522
579305f7
AW
2523static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2524{
2525 *(u16 *)opaque = alias;
2526 return 0;
2527}
2528
76208356 2529static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2530{
cc4e2575 2531 struct device_domain_info *info = NULL;
76208356 2532 struct dmar_domain *domain = NULL;
579305f7 2533 struct intel_iommu *iommu;
08a7f456 2534 u16 req_id, dma_alias;
ba395927 2535 unsigned long flags;
aa4d066a 2536 u8 bus, devfn;
ba395927 2537
579305f7
AW
2538 iommu = device_to_iommu(dev, &bus, &devfn);
2539 if (!iommu)
2540 return NULL;
2541
08a7f456
JR
2542 req_id = ((u16)bus << 8) | devfn;
2543
146922ec
DW
2544 if (dev_is_pci(dev)) {
2545 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2546
579305f7
AW
2547 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2548
2549 spin_lock_irqsave(&device_domain_lock, flags);
2550 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2551 PCI_BUS_NUM(dma_alias),
2552 dma_alias & 0xff);
2553 if (info) {
2554 iommu = info->iommu;
2555 domain = info->domain;
5a8f40e8 2556 }
579305f7 2557 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2558
76208356 2559 /* DMA alias already has a domain, use it */
579305f7 2560 if (info)
76208356 2561 goto out;
579305f7 2562 }
ba395927 2563
146922ec 2564 /* Allocate and initialize new domain for the device */
ab8dfe25 2565 domain = alloc_domain(0);
745f2586 2566 if (!domain)
579305f7 2567 return NULL;
dc534b25 2568 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2569 domain_exit(domain);
2570 return NULL;
2c2e2c38 2571 }
ba395927 2572
76208356 2573out:
579305f7 2574
76208356
JR
2575 return domain;
2576}
579305f7 2577
76208356
JR
2578static struct dmar_domain *set_domain_for_dev(struct device *dev,
2579 struct dmar_domain *domain)
2580{
2581 struct intel_iommu *iommu;
2582 struct dmar_domain *tmp;
2583 u16 req_id, dma_alias;
2584 u8 bus, devfn;
2585
2586 iommu = device_to_iommu(dev, &bus, &devfn);
2587 if (!iommu)
2588 return NULL;
2589
2590 req_id = ((u16)bus << 8) | devfn;
2591
2592 if (dev_is_pci(dev)) {
2593 struct pci_dev *pdev = to_pci_dev(dev);
2594
2595 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2596
2597 /* register PCI DMA alias device */
2598 if (req_id != dma_alias) {
2599 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2600 dma_alias & 0xff, NULL, domain);
2601
2602 if (!tmp || tmp != domain)
2603 return tmp;
2604 }
ba395927
KA
2605 }
2606
5db31569 2607 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2608 if (!tmp || tmp != domain)
2609 return tmp;
2610
2611 return domain;
2612}
579305f7 2613
76208356
JR
2614static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2615{
2616 struct dmar_domain *domain, *tmp;
2617
2618 domain = find_domain(dev);
2619 if (domain)
2620 goto out;
2621
2622 domain = find_or_alloc_domain(dev, gaw);
2623 if (!domain)
2624 goto out;
2625
2626 tmp = set_domain_for_dev(dev, domain);
2627 if (!tmp || domain != tmp) {
579305f7
AW
2628 domain_exit(domain);
2629 domain = tmp;
2630 }
b718cd3d 2631
76208356
JR
2632out:
2633
b718cd3d 2634 return domain;
ba395927
KA
2635}
2636
b213203e
DW
2637static int iommu_domain_identity_map(struct dmar_domain *domain,
2638 unsigned long long start,
2639 unsigned long long end)
ba395927 2640{
c5395d5c
DW
2641 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2642 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2643
2644 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2645 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2646 pr_err("Reserving iova failed\n");
b213203e 2647 return -ENOMEM;
ba395927
KA
2648 }
2649
af1089ce 2650 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2651 /*
2652 * RMRR range might have overlap with physical memory range,
2653 * clear it first
2654 */
c5395d5c 2655 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2656
c5395d5c
DW
2657 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2658 last_vpfn - first_vpfn + 1,
61df7443 2659 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2660}
2661
d66ce54b
JR
2662static int domain_prepare_identity_map(struct device *dev,
2663 struct dmar_domain *domain,
2664 unsigned long long start,
2665 unsigned long long end)
b213203e 2666{
19943b0e
DW
2667 /* For _hardware_ passthrough, don't bother. But for software
2668 passthrough, we do it anyway -- it may indicate a memory
2669 range which is reserved in E820, so which didn't get set
2670 up to start with in si_domain */
2671 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2672 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2673 dev_name(dev), start, end);
19943b0e
DW
2674 return 0;
2675 }
2676
9f10e5bf
JR
2677 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2678 dev_name(dev), start, end);
2679
5595b528
DW
2680 if (end < start) {
2681 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2682 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2683 dmi_get_system_info(DMI_BIOS_VENDOR),
2684 dmi_get_system_info(DMI_BIOS_VERSION),
2685 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2686 return -EIO;
5595b528
DW
2687 }
2688
2ff729f5
DW
2689 if (end >> agaw_to_width(domain->agaw)) {
2690 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2691 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2692 agaw_to_width(domain->agaw),
2693 dmi_get_system_info(DMI_BIOS_VENDOR),
2694 dmi_get_system_info(DMI_BIOS_VERSION),
2695 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2696 return -EIO;
2ff729f5 2697 }
19943b0e 2698
d66ce54b
JR
2699 return iommu_domain_identity_map(domain, start, end);
2700}
ba395927 2701
d66ce54b
JR
2702static int iommu_prepare_identity_map(struct device *dev,
2703 unsigned long long start,
2704 unsigned long long end)
2705{
2706 struct dmar_domain *domain;
2707 int ret;
2708
2709 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2710 if (!domain)
2711 return -ENOMEM;
2712
2713 ret = domain_prepare_identity_map(dev, domain, start, end);
2714 if (ret)
2715 domain_exit(domain);
b213203e 2716
ba395927 2717 return ret;
ba395927
KA
2718}
2719
2720static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2721 struct device *dev)
ba395927 2722{
0b9d9753 2723 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2724 return 0;
0b9d9753
DW
2725 return iommu_prepare_identity_map(dev, rmrr->base_address,
2726 rmrr->end_address);
ba395927
KA
2727}
2728
d3f13810 2729#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2730static inline void iommu_prepare_isa(void)
2731{
2732 struct pci_dev *pdev;
2733 int ret;
2734
2735 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2736 if (!pdev)
2737 return;
2738
9f10e5bf 2739 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2740 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2741
2742 if (ret)
9f10e5bf 2743 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2744
9b27e82d 2745 pci_dev_put(pdev);
49a0429e
KA
2746}
2747#else
2748static inline void iommu_prepare_isa(void)
2749{
2750 return;
2751}
d3f13810 2752#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2753
2c2e2c38 2754static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2755
071e1374 2756static int __init si_domain_init(int hw)
2c2e2c38 2757{
c7ab48d2 2758 int nid, ret = 0;
2c2e2c38 2759
ab8dfe25 2760 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2761 if (!si_domain)
2762 return -EFAULT;
2763
2c2e2c38
FY
2764 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2765 domain_exit(si_domain);
2766 return -EFAULT;
2767 }
2768
0dc79715 2769 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2770
19943b0e
DW
2771 if (hw)
2772 return 0;
2773
c7ab48d2 2774 for_each_online_node(nid) {
5dfe8660
TH
2775 unsigned long start_pfn, end_pfn;
2776 int i;
2777
2778 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2779 ret = iommu_domain_identity_map(si_domain,
2780 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2781 if (ret)
2782 return ret;
2783 }
c7ab48d2
DW
2784 }
2785
2c2e2c38
FY
2786 return 0;
2787}
2788
9b226624 2789static int identity_mapping(struct device *dev)
2c2e2c38
FY
2790{
2791 struct device_domain_info *info;
2792
2793 if (likely(!iommu_identity_mapping))
2794 return 0;
2795
9b226624 2796 info = dev->archdata.iommu;
cb452a40
MT
2797 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2798 return (info->domain == si_domain);
2c2e2c38 2799
2c2e2c38
FY
2800 return 0;
2801}
2802
28ccce0d 2803static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2804{
0ac72664 2805 struct dmar_domain *ndomain;
5a8f40e8 2806 struct intel_iommu *iommu;
156baca8 2807 u8 bus, devfn;
2c2e2c38 2808
5913c9bf 2809 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2810 if (!iommu)
2811 return -ENODEV;
2812
5db31569 2813 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2814 if (ndomain != domain)
2815 return -EBUSY;
2c2e2c38
FY
2816
2817 return 0;
2818}
2819
0b9d9753 2820static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2821{
2822 struct dmar_rmrr_unit *rmrr;
832bd858 2823 struct device *tmp;
ea2447f7
TM
2824 int i;
2825
0e242612 2826 rcu_read_lock();
ea2447f7 2827 for_each_rmrr_units(rmrr) {
b683b230
JL
2828 /*
2829 * Return TRUE if this RMRR contains the device that
2830 * is passed in.
2831 */
2832 for_each_active_dev_scope(rmrr->devices,
2833 rmrr->devices_cnt, i, tmp)
0b9d9753 2834 if (tmp == dev) {
0e242612 2835 rcu_read_unlock();
ea2447f7 2836 return true;
b683b230 2837 }
ea2447f7 2838 }
0e242612 2839 rcu_read_unlock();
ea2447f7
TM
2840 return false;
2841}
2842
c875d2c1
AW
2843/*
2844 * There are a couple cases where we need to restrict the functionality of
2845 * devices associated with RMRRs. The first is when evaluating a device for
2846 * identity mapping because problems exist when devices are moved in and out
2847 * of domains and their respective RMRR information is lost. This means that
2848 * a device with associated RMRRs will never be in a "passthrough" domain.
2849 * The second is use of the device through the IOMMU API. This interface
2850 * expects to have full control of the IOVA space for the device. We cannot
2851 * satisfy both the requirement that RMRR access is maintained and have an
2852 * unencumbered IOVA space. We also have no ability to quiesce the device's
2853 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2854 * We therefore prevent devices associated with an RMRR from participating in
2855 * the IOMMU API, which eliminates them from device assignment.
2856 *
2857 * In both cases we assume that PCI USB devices with RMRRs have them largely
2858 * for historical reasons and that the RMRR space is not actively used post
2859 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2860 *
2861 * The same exception is made for graphics devices, with the requirement that
2862 * any use of the RMRR regions will be torn down before assigning the device
2863 * to a guest.
c875d2c1
AW
2864 */
2865static bool device_is_rmrr_locked(struct device *dev)
2866{
2867 if (!device_has_rmrr(dev))
2868 return false;
2869
2870 if (dev_is_pci(dev)) {
2871 struct pci_dev *pdev = to_pci_dev(dev);
2872
18436afd 2873 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2874 return false;
2875 }
2876
2877 return true;
2878}
2879
3bdb2591 2880static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2881{
ea2447f7 2882
3bdb2591
DW
2883 if (dev_is_pci(dev)) {
2884 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2885
c875d2c1 2886 if (device_is_rmrr_locked(dev))
3bdb2591 2887 return 0;
e0fc7e0b 2888
3bdb2591
DW
2889 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2890 return 1;
e0fc7e0b 2891
3bdb2591
DW
2892 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2893 return 1;
6941af28 2894
3bdb2591 2895 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2896 return 0;
3bdb2591
DW
2897
2898 /*
2899 * We want to start off with all devices in the 1:1 domain, and
2900 * take them out later if we find they can't access all of memory.
2901 *
2902 * However, we can't do this for PCI devices behind bridges,
2903 * because all PCI devices behind the same bridge will end up
2904 * with the same source-id on their transactions.
2905 *
2906 * Practically speaking, we can't change things around for these
2907 * devices at run-time, because we can't be sure there'll be no
2908 * DMA transactions in flight for any of their siblings.
2909 *
2910 * So PCI devices (unless they're on the root bus) as well as
2911 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2912 * the 1:1 domain, just in _case_ one of their siblings turns out
2913 * not to be able to map all of memory.
2914 */
2915 if (!pci_is_pcie(pdev)) {
2916 if (!pci_is_root_bus(pdev->bus))
2917 return 0;
2918 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2919 return 0;
2920 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2921 return 0;
3bdb2591
DW
2922 } else {
2923 if (device_has_rmrr(dev))
2924 return 0;
2925 }
3dfc813d 2926
3bdb2591 2927 /*
3dfc813d 2928 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2929 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2930 * take them out of the 1:1 domain later.
2931 */
8fcc5372
CW
2932 if (!startup) {
2933 /*
2934 * If the device's dma_mask is less than the system's memory
2935 * size then this is not a candidate for identity mapping.
2936 */
3bdb2591 2937 u64 dma_mask = *dev->dma_mask;
8fcc5372 2938
3bdb2591
DW
2939 if (dev->coherent_dma_mask &&
2940 dev->coherent_dma_mask < dma_mask)
2941 dma_mask = dev->coherent_dma_mask;
8fcc5372 2942
3bdb2591 2943 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2944 }
6941af28
DW
2945
2946 return 1;
2947}
2948
cf04eee8
DW
2949static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2950{
2951 int ret;
2952
2953 if (!iommu_should_identity_map(dev, 1))
2954 return 0;
2955
28ccce0d 2956 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2957 if (!ret)
9f10e5bf
JR
2958 pr_info("%s identity mapping for device %s\n",
2959 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2960 else if (ret == -ENODEV)
2961 /* device not associated with an iommu */
2962 ret = 0;
2963
2964 return ret;
2965}
2966
2967
071e1374 2968static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2969{
2c2e2c38 2970 struct pci_dev *pdev = NULL;
cf04eee8
DW
2971 struct dmar_drhd_unit *drhd;
2972 struct intel_iommu *iommu;
2973 struct device *dev;
2974 int i;
2975 int ret = 0;
2c2e2c38 2976
2c2e2c38 2977 for_each_pci_dev(pdev) {
cf04eee8
DW
2978 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2979 if (ret)
2980 return ret;
2981 }
2982
2983 for_each_active_iommu(iommu, drhd)
2984 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2985 struct acpi_device_physical_node *pn;
2986 struct acpi_device *adev;
2987
2988 if (dev->bus != &acpi_bus_type)
2989 continue;
86080ccc 2990
cf04eee8
DW
2991 adev= to_acpi_device(dev);
2992 mutex_lock(&adev->physical_node_lock);
2993 list_for_each_entry(pn, &adev->physical_node_list, node) {
2994 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2995 if (ret)
2996 break;
eae460b6 2997 }
cf04eee8
DW
2998 mutex_unlock(&adev->physical_node_lock);
2999 if (ret)
3000 return ret;
62edf5dc 3001 }
2c2e2c38
FY
3002
3003 return 0;
3004}
3005
ffebeb46
JL
3006static void intel_iommu_init_qi(struct intel_iommu *iommu)
3007{
3008 /*
3009 * Start from the sane iommu hardware state.
3010 * If the queued invalidation is already initialized by us
3011 * (for example, while enabling interrupt-remapping) then
3012 * we got the things already rolling from a sane state.
3013 */
3014 if (!iommu->qi) {
3015 /*
3016 * Clear any previous faults.
3017 */
3018 dmar_fault(-1, iommu);
3019 /*
3020 * Disable queued invalidation if supported and already enabled
3021 * before OS handover.
3022 */
3023 dmar_disable_qi(iommu);
3024 }
3025
3026 if (dmar_enable_qi(iommu)) {
3027 /*
3028 * Queued Invalidate not enabled, use Register Based Invalidate
3029 */
3030 iommu->flush.flush_context = __iommu_flush_context;
3031 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3032 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3033 iommu->name);
3034 } else {
3035 iommu->flush.flush_context = qi_flush_context;
3036 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3037 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3038 }
3039}
3040
091d42e4 3041static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3042 struct root_entry *old_re,
091d42e4
JR
3043 struct context_entry **tbl,
3044 int bus, bool ext)
3045{
dbcd861f 3046 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3047 struct context_entry *new_ce = NULL, ce;
dfddb969 3048 struct context_entry *old_ce = NULL;
543c8dcf 3049 struct root_entry re;
091d42e4
JR
3050 phys_addr_t old_ce_phys;
3051
3052 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3053 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3054
3055 for (devfn = 0; devfn < 256; devfn++) {
3056 /* First calculate the correct index */
3057 idx = (ext ? devfn * 2 : devfn) % 256;
3058
3059 if (idx == 0) {
3060 /* First save what we may have and clean up */
3061 if (new_ce) {
3062 tbl[tbl_idx] = new_ce;
3063 __iommu_flush_cache(iommu, new_ce,
3064 VTD_PAGE_SIZE);
3065 pos = 1;
3066 }
3067
3068 if (old_ce)
3069 iounmap(old_ce);
3070
3071 ret = 0;
3072 if (devfn < 0x80)
543c8dcf 3073 old_ce_phys = root_entry_lctp(&re);
091d42e4 3074 else
543c8dcf 3075 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3076
3077 if (!old_ce_phys) {
3078 if (ext && devfn == 0) {
3079 /* No LCTP, try UCTP */
3080 devfn = 0x7f;
3081 continue;
3082 } else {
3083 goto out;
3084 }
3085 }
3086
3087 ret = -ENOMEM;
dfddb969
DW
3088 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3089 MEMREMAP_WB);
091d42e4
JR
3090 if (!old_ce)
3091 goto out;
3092
3093 new_ce = alloc_pgtable_page(iommu->node);
3094 if (!new_ce)
3095 goto out_unmap;
3096
3097 ret = 0;
3098 }
3099
3100 /* Now copy the context entry */
dfddb969 3101 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3102
cf484d0e 3103 if (!__context_present(&ce))
091d42e4
JR
3104 continue;
3105
dbcd861f
JR
3106 did = context_domain_id(&ce);
3107 if (did >= 0 && did < cap_ndoms(iommu->cap))
3108 set_bit(did, iommu->domain_ids);
3109
cf484d0e
JR
3110 /*
3111 * We need a marker for copied context entries. This
3112 * marker needs to work for the old format as well as
3113 * for extended context entries.
3114 *
3115 * Bit 67 of the context entry is used. In the old
3116 * format this bit is available to software, in the
3117 * extended format it is the PGE bit, but PGE is ignored
3118 * by HW if PASIDs are disabled (and thus still
3119 * available).
3120 *
3121 * So disable PASIDs first and then mark the entry
3122 * copied. This means that we don't copy PASID
3123 * translations from the old kernel, but this is fine as
3124 * faults there are not fatal.
3125 */
3126 context_clear_pasid_enable(&ce);
3127 context_set_copied(&ce);
3128
091d42e4
JR
3129 new_ce[idx] = ce;
3130 }
3131
3132 tbl[tbl_idx + pos] = new_ce;
3133
3134 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3135
3136out_unmap:
dfddb969 3137 memunmap(old_ce);
091d42e4
JR
3138
3139out:
3140 return ret;
3141}
3142
3143static int copy_translation_tables(struct intel_iommu *iommu)
3144{
3145 struct context_entry **ctxt_tbls;
dfddb969 3146 struct root_entry *old_rt;
091d42e4
JR
3147 phys_addr_t old_rt_phys;
3148 int ctxt_table_entries;
3149 unsigned long flags;
3150 u64 rtaddr_reg;
3151 int bus, ret;
c3361f2f 3152 bool new_ext, ext;
091d42e4
JR
3153
3154 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3155 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3156 new_ext = !!ecap_ecs(iommu->ecap);
3157
3158 /*
3159 * The RTT bit can only be changed when translation is disabled,
3160 * but disabling translation means to open a window for data
3161 * corruption. So bail out and don't copy anything if we would
3162 * have to change the bit.
3163 */
3164 if (new_ext != ext)
3165 return -EINVAL;
091d42e4
JR
3166
3167 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3168 if (!old_rt_phys)
3169 return -EINVAL;
3170
dfddb969 3171 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3172 if (!old_rt)
3173 return -ENOMEM;
3174
3175 /* This is too big for the stack - allocate it from slab */
3176 ctxt_table_entries = ext ? 512 : 256;
3177 ret = -ENOMEM;
3178 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3179 if (!ctxt_tbls)
3180 goto out_unmap;
3181
3182 for (bus = 0; bus < 256; bus++) {
3183 ret = copy_context_table(iommu, &old_rt[bus],
3184 ctxt_tbls, bus, ext);
3185 if (ret) {
3186 pr_err("%s: Failed to copy context table for bus %d\n",
3187 iommu->name, bus);
3188 continue;
3189 }
3190 }
3191
3192 spin_lock_irqsave(&iommu->lock, flags);
3193
3194 /* Context tables are copied, now write them to the root_entry table */
3195 for (bus = 0; bus < 256; bus++) {
3196 int idx = ext ? bus * 2 : bus;
3197 u64 val;
3198
3199 if (ctxt_tbls[idx]) {
3200 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3201 iommu->root_entry[bus].lo = val;
3202 }
3203
3204 if (!ext || !ctxt_tbls[idx + 1])
3205 continue;
3206
3207 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3208 iommu->root_entry[bus].hi = val;
3209 }
3210
3211 spin_unlock_irqrestore(&iommu->lock, flags);
3212
3213 kfree(ctxt_tbls);
3214
3215 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3216
3217 ret = 0;
3218
3219out_unmap:
dfddb969 3220 memunmap(old_rt);
091d42e4
JR
3221
3222 return ret;
3223}
3224
b779260b 3225static int __init init_dmars(void)
ba395927
KA
3226{
3227 struct dmar_drhd_unit *drhd;
3228 struct dmar_rmrr_unit *rmrr;
a87f4918 3229 bool copied_tables = false;
832bd858 3230 struct device *dev;
ba395927 3231 struct intel_iommu *iommu;
13cf0174 3232 int i, ret;
2c2e2c38 3233
ba395927
KA
3234 /*
3235 * for each drhd
3236 * allocate root
3237 * initialize and program root entry to not present
3238 * endfor
3239 */
3240 for_each_drhd_unit(drhd) {
5e0d2a6f 3241 /*
3242 * lock not needed as this is only incremented in the single
3243 * threaded kernel __init code path all other access are read
3244 * only
3245 */
78d8e704 3246 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3247 g_num_of_iommus++;
3248 continue;
3249 }
9f10e5bf 3250 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3251 }
3252
ffebeb46
JL
3253 /* Preallocate enough resources for IOMMU hot-addition */
3254 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3255 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3256
d9630fe9
WH
3257 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3258 GFP_KERNEL);
3259 if (!g_iommus) {
9f10e5bf 3260 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3261 ret = -ENOMEM;
3262 goto error;
3263 }
3264
7c919779 3265 for_each_active_iommu(iommu, drhd) {
d9630fe9 3266 g_iommus[iommu->seq_id] = iommu;
ba395927 3267
b63d80d1
JR
3268 intel_iommu_init_qi(iommu);
3269
e61d98d8
SS
3270 ret = iommu_init_domains(iommu);
3271 if (ret)
989d51fc 3272 goto free_iommu;
e61d98d8 3273
4158c2ec
JR
3274 init_translation_status(iommu);
3275
091d42e4
JR
3276 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3277 iommu_disable_translation(iommu);
3278 clear_translation_pre_enabled(iommu);
3279 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3280 iommu->name);
3281 }
4158c2ec 3282
ba395927
KA
3283 /*
3284 * TBD:
3285 * we could share the same root & context tables
25985edc 3286 * among all IOMMU's. Need to Split it later.
ba395927
KA
3287 */
3288 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3289 if (ret)
989d51fc 3290 goto free_iommu;
5f0a7f76 3291
091d42e4
JR
3292 if (translation_pre_enabled(iommu)) {
3293 pr_info("Translation already enabled - trying to copy translation structures\n");
3294
3295 ret = copy_translation_tables(iommu);
3296 if (ret) {
3297 /*
3298 * We found the IOMMU with translation
3299 * enabled - but failed to copy over the
3300 * old root-entry table. Try to proceed
3301 * by disabling translation now and
3302 * allocating a clean root-entry table.
3303 * This might cause DMAR faults, but
3304 * probably the dump will still succeed.
3305 */
3306 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3307 iommu->name);
3308 iommu_disable_translation(iommu);
3309 clear_translation_pre_enabled(iommu);
3310 } else {
3311 pr_info("Copied translation tables from previous kernel for %s\n",
3312 iommu->name);
a87f4918 3313 copied_tables = true;
091d42e4
JR
3314 }
3315 }
3316
4ed0d3e6 3317 if (!ecap_pass_through(iommu->ecap))
19943b0e 3318 hw_pass_through = 0;
8a94ade4
DW
3319#ifdef CONFIG_INTEL_IOMMU_SVM
3320 if (pasid_enabled(iommu))
3321 intel_svm_alloc_pasid_tables(iommu);
3322#endif
ba395927
KA
3323 }
3324
a4c34ff1
JR
3325 /*
3326 * Now that qi is enabled on all iommus, set the root entry and flush
3327 * caches. This is required on some Intel X58 chipsets, otherwise the
3328 * flush_context function will loop forever and the boot hangs.
3329 */
3330 for_each_active_iommu(iommu, drhd) {
3331 iommu_flush_write_buffer(iommu);
3332 iommu_set_root_entry(iommu);
3333 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3334 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3335 }
3336
19943b0e 3337 if (iommu_pass_through)
e0fc7e0b
DW
3338 iommu_identity_mapping |= IDENTMAP_ALL;
3339
d3f13810 3340#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3341 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3342#endif
e0fc7e0b 3343
21e722c4
AR
3344 check_tylersburg_isoch();
3345
86080ccc
JR
3346 if (iommu_identity_mapping) {
3347 ret = si_domain_init(hw_pass_through);
3348 if (ret)
3349 goto free_iommu;
3350 }
3351
e0fc7e0b 3352
a87f4918
JR
3353 /*
3354 * If we copied translations from a previous kernel in the kdump
3355 * case, we can not assign the devices to domains now, as that
3356 * would eliminate the old mappings. So skip this part and defer
3357 * the assignment to device driver initialization time.
3358 */
3359 if (copied_tables)
3360 goto domains_done;
3361
ba395927 3362 /*
19943b0e
DW
3363 * If pass through is not set or not enabled, setup context entries for
3364 * identity mappings for rmrr, gfx, and isa and may fall back to static
3365 * identity mapping if iommu_identity_mapping is set.
ba395927 3366 */
19943b0e
DW
3367 if (iommu_identity_mapping) {
3368 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3369 if (ret) {
9f10e5bf 3370 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3371 goto free_iommu;
ba395927
KA
3372 }
3373 }
ba395927 3374 /*
19943b0e
DW
3375 * For each rmrr
3376 * for each dev attached to rmrr
3377 * do
3378 * locate drhd for dev, alloc domain for dev
3379 * allocate free domain
3380 * allocate page table entries for rmrr
3381 * if context not allocated for bus
3382 * allocate and init context
3383 * set present in root table for this bus
3384 * init context with domain, translation etc
3385 * endfor
3386 * endfor
ba395927 3387 */
9f10e5bf 3388 pr_info("Setting RMRR:\n");
19943b0e 3389 for_each_rmrr_units(rmrr) {
b683b230
JL
3390 /* some BIOS lists non-exist devices in DMAR table. */
3391 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3392 i, dev) {
0b9d9753 3393 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3394 if (ret)
9f10e5bf 3395 pr_err("Mapping reserved region failed\n");
ba395927 3396 }
4ed0d3e6 3397 }
49a0429e 3398
19943b0e
DW
3399 iommu_prepare_isa();
3400
a87f4918
JR
3401domains_done:
3402
ba395927
KA
3403 /*
3404 * for each drhd
3405 * enable fault log
3406 * global invalidate context cache
3407 * global invalidate iotlb
3408 * enable translation
3409 */
7c919779 3410 for_each_iommu(iommu, drhd) {
51a63e67
JC
3411 if (drhd->ignored) {
3412 /*
3413 * we always have to disable PMRs or DMA may fail on
3414 * this device
3415 */
3416 if (force_on)
7c919779 3417 iommu_disable_protect_mem_regions(iommu);
ba395927 3418 continue;
51a63e67 3419 }
ba395927
KA
3420
3421 iommu_flush_write_buffer(iommu);
3422
a222a7f0
DW
3423#ifdef CONFIG_INTEL_IOMMU_SVM
3424 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3425 ret = intel_svm_enable_prq(iommu);
3426 if (ret)
3427 goto free_iommu;
3428 }
3429#endif
3460a6d9
KA
3430 ret = dmar_set_interrupt(iommu);
3431 if (ret)
989d51fc 3432 goto free_iommu;
3460a6d9 3433
8939ddf6
JR
3434 if (!translation_pre_enabled(iommu))
3435 iommu_enable_translation(iommu);
3436
b94996c9 3437 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3438 }
3439
3440 return 0;
989d51fc
JL
3441
3442free_iommu:
ffebeb46
JL
3443 for_each_active_iommu(iommu, drhd) {
3444 disable_dmar_iommu(iommu);
a868e6b7 3445 free_dmar_iommu(iommu);
ffebeb46 3446 }
13cf0174 3447
d9630fe9 3448 kfree(g_iommus);
13cf0174 3449
989d51fc 3450error:
ba395927
KA
3451 return ret;
3452}
3453
5a5e02a6 3454/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3455static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3456 struct dmar_domain *domain,
3457 unsigned long nrpages, uint64_t dma_mask)
ba395927 3458{
22e2f9fa 3459 unsigned long iova_pfn = 0;
ba395927 3460
875764de
DW
3461 /* Restrict dma_mask to the width that the iommu can handle */
3462 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3463 /* Ensure we reserve the whole size-aligned region */
3464 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3465
3466 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3467 /*
3468 * First try to allocate an io virtual address in
284901a9 3469 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3470 * from higher range
ba395927 3471 */
22e2f9fa 3472 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3473 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3474 if (iova_pfn)
3475 return iova_pfn;
875764de 3476 }
538d5b33
TN
3477 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3478 IOVA_PFN(dma_mask), true);
22e2f9fa 3479 if (unlikely(!iova_pfn)) {
9f10e5bf 3480 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3481 nrpages, dev_name(dev));
2aac6304 3482 return 0;
f76aec76
KA
3483 }
3484
22e2f9fa 3485 return iova_pfn;
f76aec76
KA
3486}
3487
b316d02a 3488static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3489{
1c5ebba9 3490 struct dmar_domain *domain, *tmp;
b1ce5b79 3491 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3492 struct device *i_dev;
3493 int i, ret;
f76aec76 3494
1c5ebba9
JR
3495 domain = find_domain(dev);
3496 if (domain)
3497 goto out;
3498
3499 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3500 if (!domain)
3501 goto out;
ba395927 3502
b1ce5b79
JR
3503 /* We have a new domain - setup possible RMRRs for the device */
3504 rcu_read_lock();
3505 for_each_rmrr_units(rmrr) {
3506 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3507 i, i_dev) {
3508 if (i_dev != dev)
3509 continue;
3510
3511 ret = domain_prepare_identity_map(dev, domain,
3512 rmrr->base_address,
3513 rmrr->end_address);
3514 if (ret)
3515 dev_err(dev, "Mapping reserved region failed\n");
3516 }
3517 }
3518 rcu_read_unlock();
3519
1c5ebba9
JR
3520 tmp = set_domain_for_dev(dev, domain);
3521 if (!tmp || domain != tmp) {
3522 domain_exit(domain);
3523 domain = tmp;
3524 }
3525
3526out:
3527
3528 if (!domain)
3529 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3530
3531
f76aec76
KA
3532 return domain;
3533}
3534
ecb509ec 3535/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3536static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3537{
3538 int found;
3539
3d89194a 3540 if (iommu_dummy(dev))
1e4c64c4
DW
3541 return 1;
3542
2c2e2c38 3543 if (!iommu_identity_mapping)
1e4c64c4 3544 return 0;
2c2e2c38 3545
9b226624 3546 found = identity_mapping(dev);
2c2e2c38 3547 if (found) {
ecb509ec 3548 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3549 return 1;
3550 else {
3551 /*
3552 * 32 bit DMA is removed from si_domain and fall back
3553 * to non-identity mapping.
3554 */
e6de0f8d 3555 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3556 pr_info("32bit %s uses non-identity mapping\n",
3557 dev_name(dev));
2c2e2c38
FY
3558 return 0;
3559 }
3560 } else {
3561 /*
3562 * In case of a detached 64 bit DMA device from vm, the device
3563 * is put into si_domain for identity mapping.
3564 */
ecb509ec 3565 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3566 int ret;
28ccce0d 3567 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3568 if (!ret) {
9f10e5bf
JR
3569 pr_info("64bit %s uses identity mapping\n",
3570 dev_name(dev));
2c2e2c38
FY
3571 return 1;
3572 }
3573 }
3574 }
3575
1e4c64c4 3576 return 0;
2c2e2c38
FY
3577}
3578
5040a918 3579static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3580 size_t size, int dir, u64 dma_mask)
f76aec76 3581{
f76aec76 3582 struct dmar_domain *domain;
5b6985ce 3583 phys_addr_t start_paddr;
2aac6304 3584 unsigned long iova_pfn;
f76aec76 3585 int prot = 0;
6865f0d1 3586 int ret;
8c11e798 3587 struct intel_iommu *iommu;
33041ec0 3588 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3589
3590 BUG_ON(dir == DMA_NONE);
2c2e2c38 3591
5040a918 3592 if (iommu_no_mapping(dev))
6865f0d1 3593 return paddr;
f76aec76 3594
5040a918 3595 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3596 if (!domain)
3597 return 0;
3598
8c11e798 3599 iommu = domain_get_iommu(domain);
88cb6a74 3600 size = aligned_nrpages(paddr, size);
f76aec76 3601
2aac6304
OP
3602 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3603 if (!iova_pfn)
f76aec76
KA
3604 goto error;
3605
ba395927
KA
3606 /*
3607 * Check if DMAR supports zero-length reads on write only
3608 * mappings..
3609 */
3610 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3611 !cap_zlr(iommu->cap))
ba395927
KA
3612 prot |= DMA_PTE_READ;
3613 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3614 prot |= DMA_PTE_WRITE;
3615 /*
6865f0d1 3616 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3617 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3618 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3619 * is not a big problem
3620 */
2aac6304 3621 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3622 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3623 if (ret)
3624 goto error;
3625
1f0ef2aa
DW
3626 /* it's a non-present to present mapping. Only flush if caching mode */
3627 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3628 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3629 mm_to_dma_pfn(iova_pfn),
a1ddcbe9 3630 size, 0, 1);
1f0ef2aa 3631 else
8c11e798 3632 iommu_flush_write_buffer(iommu);
f76aec76 3633
2aac6304 3634 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3635 start_paddr += paddr & ~PAGE_MASK;
3636 return start_paddr;
ba395927 3637
ba395927 3638error:
2aac6304 3639 if (iova_pfn)
22e2f9fa 3640 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
9f10e5bf 3641 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3642 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3643 return 0;
3644}
3645
ffbbef5c
FT
3646static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3647 unsigned long offset, size_t size,
3648 enum dma_data_direction dir,
00085f1e 3649 unsigned long attrs)
bb9e6d65 3650{
ffbbef5c 3651 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3652 dir, *dev->dma_mask);
bb9e6d65
FT
3653}
3654
769530e4 3655static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3656{
f76aec76 3657 struct dmar_domain *domain;
d794dc9b 3658 unsigned long start_pfn, last_pfn;
769530e4 3659 unsigned long nrpages;
2aac6304 3660 unsigned long iova_pfn;
8c11e798 3661 struct intel_iommu *iommu;
ea8ea460 3662 struct page *freelist;
ba395927 3663
73676832 3664 if (iommu_no_mapping(dev))
f76aec76 3665 return;
2c2e2c38 3666
1525a29a 3667 domain = find_domain(dev);
ba395927
KA
3668 BUG_ON(!domain);
3669
8c11e798
WH
3670 iommu = domain_get_iommu(domain);
3671
2aac6304 3672 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3673
769530e4 3674 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3675 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3676 last_pfn = start_pfn + nrpages - 1;
ba395927 3677
d794dc9b 3678 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3679 dev_name(dev), start_pfn, last_pfn);
ba395927 3680
ea8ea460 3681 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3682
5e0d2a6f 3683 if (intel_iommu_strict) {
a1ddcbe9 3684 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3685 nrpages, !freelist, 0);
5e0d2a6f 3686 /* free iova */
22e2f9fa 3687 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3688 dma_free_pagelist(freelist);
5e0d2a6f 3689 } else {
13cf0174
JR
3690 queue_iova(&domain->iovad, iova_pfn, nrpages,
3691 (unsigned long)freelist);
5e0d2a6f 3692 /*
3693 * queue up the release of the unmap to save the 1/6th of the
3694 * cpu used up by the iotlb flush operation...
3695 */
5e0d2a6f 3696 }
ba395927
KA
3697}
3698
d41a4adb
JL
3699static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3700 size_t size, enum dma_data_direction dir,
00085f1e 3701 unsigned long attrs)
d41a4adb 3702{
769530e4 3703 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3704}
3705
5040a918 3706static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3707 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3708 unsigned long attrs)
ba395927 3709{
36746436 3710 struct page *page = NULL;
ba395927
KA
3711 int order;
3712
5b6985ce 3713 size = PAGE_ALIGN(size);
ba395927 3714 order = get_order(size);
e8bb910d 3715
5040a918 3716 if (!iommu_no_mapping(dev))
e8bb910d 3717 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3718 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3719 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3720 flags |= GFP_DMA;
3721 else
3722 flags |= GFP_DMA32;
3723 }
ba395927 3724
d0164adc 3725 if (gfpflags_allow_blocking(flags)) {
36746436
AM
3726 unsigned int count = size >> PAGE_SHIFT;
3727
712c604d 3728 page = dma_alloc_from_contiguous(dev, count, order, flags);
36746436
AM
3729 if (page && iommu_no_mapping(dev) &&
3730 page_to_phys(page) + size > dev->coherent_dma_mask) {
3731 dma_release_from_contiguous(dev, page, count);
3732 page = NULL;
3733 }
3734 }
3735
3736 if (!page)
3737 page = alloc_pages(flags, order);
3738 if (!page)
ba395927 3739 return NULL;
36746436 3740 memset(page_address(page), 0, size);
ba395927 3741
36746436 3742 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3743 DMA_BIDIRECTIONAL,
5040a918 3744 dev->coherent_dma_mask);
ba395927 3745 if (*dma_handle)
36746436
AM
3746 return page_address(page);
3747 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3748 __free_pages(page, order);
3749
ba395927
KA
3750 return NULL;
3751}
3752
5040a918 3753static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3754 dma_addr_t dma_handle, unsigned long attrs)
ba395927
KA
3755{
3756 int order;
36746436 3757 struct page *page = virt_to_page(vaddr);
ba395927 3758
5b6985ce 3759 size = PAGE_ALIGN(size);
ba395927
KA
3760 order = get_order(size);
3761
769530e4 3762 intel_unmap(dev, dma_handle, size);
36746436
AM
3763 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3764 __free_pages(page, order);
ba395927
KA
3765}
3766
5040a918 3767static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3768 int nelems, enum dma_data_direction dir,
00085f1e 3769 unsigned long attrs)
ba395927 3770{
769530e4
OP
3771 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3772 unsigned long nrpages = 0;
3773 struct scatterlist *sg;
3774 int i;
3775
3776 for_each_sg(sglist, sg, nelems, i) {
3777 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3778 }
3779
3780 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3781}
3782
ba395927 3783static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3784 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3785{
3786 int i;
c03ab37c 3787 struct scatterlist *sg;
ba395927 3788
c03ab37c 3789 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3790 BUG_ON(!sg_page(sg));
29a90b70 3791 sg->dma_address = sg_phys(sg);
c03ab37c 3792 sg->dma_length = sg->length;
ba395927
KA
3793 }
3794 return nelems;
3795}
3796
5040a918 3797static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3798 enum dma_data_direction dir, unsigned long attrs)
ba395927 3799{
ba395927 3800 int i;
ba395927 3801 struct dmar_domain *domain;
f76aec76
KA
3802 size_t size = 0;
3803 int prot = 0;
2aac6304 3804 unsigned long iova_pfn;
f76aec76 3805 int ret;
c03ab37c 3806 struct scatterlist *sg;
b536d24d 3807 unsigned long start_vpfn;
8c11e798 3808 struct intel_iommu *iommu;
ba395927
KA
3809
3810 BUG_ON(dir == DMA_NONE);
5040a918
DW
3811 if (iommu_no_mapping(dev))
3812 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3813
5040a918 3814 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3815 if (!domain)
3816 return 0;
3817
8c11e798
WH
3818 iommu = domain_get_iommu(domain);
3819
b536d24d 3820 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3821 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3822
2aac6304 3823 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3824 *dev->dma_mask);
2aac6304 3825 if (!iova_pfn) {
c03ab37c 3826 sglist->dma_length = 0;
f76aec76
KA
3827 return 0;
3828 }
3829
3830 /*
3831 * Check if DMAR supports zero-length reads on write only
3832 * mappings..
3833 */
3834 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3835 !cap_zlr(iommu->cap))
f76aec76
KA
3836 prot |= DMA_PTE_READ;
3837 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3838 prot |= DMA_PTE_WRITE;
3839
2aac6304 3840 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3841
f532959b 3842 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3843 if (unlikely(ret)) {
e1605495 3844 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3845 start_vpfn + size - 1,
3846 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3847 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3848 return 0;
ba395927
KA
3849 }
3850
1f0ef2aa
DW
3851 /* it's a non-present to present mapping. Only flush if caching mode */
3852 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3853 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3854 else
8c11e798 3855 iommu_flush_write_buffer(iommu);
1f0ef2aa 3856
ba395927
KA
3857 return nelems;
3858}
3859
dfb805e8
FT
3860static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3861{
3862 return !dma_addr;
3863}
3864
01e1932a 3865const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3866 .alloc = intel_alloc_coherent,
3867 .free = intel_free_coherent,
ba395927
KA
3868 .map_sg = intel_map_sg,
3869 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3870 .map_page = intel_map_page,
3871 .unmap_page = intel_unmap_page,
dfb805e8 3872 .mapping_error = intel_mapping_error,
5860acc1
CH
3873#ifdef CONFIG_X86
3874 .dma_supported = x86_dma_supported,
3875#endif
ba395927
KA
3876};
3877
3878static inline int iommu_domain_cache_init(void)
3879{
3880 int ret = 0;
3881
3882 iommu_domain_cache = kmem_cache_create("iommu_domain",
3883 sizeof(struct dmar_domain),
3884 0,
3885 SLAB_HWCACHE_ALIGN,
3886
3887 NULL);
3888 if (!iommu_domain_cache) {
9f10e5bf 3889 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3890 ret = -ENOMEM;
3891 }
3892
3893 return ret;
3894}
3895
3896static inline int iommu_devinfo_cache_init(void)
3897{
3898 int ret = 0;
3899
3900 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3901 sizeof(struct device_domain_info),
3902 0,
3903 SLAB_HWCACHE_ALIGN,
ba395927
KA
3904 NULL);
3905 if (!iommu_devinfo_cache) {
9f10e5bf 3906 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3907 ret = -ENOMEM;
3908 }
3909
3910 return ret;
3911}
3912
ba395927
KA
3913static int __init iommu_init_mempool(void)
3914{
3915 int ret;
ae1ff3d6 3916 ret = iova_cache_get();
ba395927
KA
3917 if (ret)
3918 return ret;
3919
3920 ret = iommu_domain_cache_init();
3921 if (ret)
3922 goto domain_error;
3923
3924 ret = iommu_devinfo_cache_init();
3925 if (!ret)
3926 return ret;
3927
3928 kmem_cache_destroy(iommu_domain_cache);
3929domain_error:
ae1ff3d6 3930 iova_cache_put();
ba395927
KA
3931
3932 return -ENOMEM;
3933}
3934
3935static void __init iommu_exit_mempool(void)
3936{
3937 kmem_cache_destroy(iommu_devinfo_cache);
3938 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3939 iova_cache_put();
ba395927
KA
3940}
3941
556ab45f
DW
3942static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3943{
3944 struct dmar_drhd_unit *drhd;
3945 u32 vtbar;
3946 int rc;
3947
3948 /* We know that this device on this chipset has its own IOMMU.
3949 * If we find it under a different IOMMU, then the BIOS is lying
3950 * to us. Hope that the IOMMU for this device is actually
3951 * disabled, and it needs no translation...
3952 */
3953 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3954 if (rc) {
3955 /* "can't" happen */
3956 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3957 return;
3958 }
3959 vtbar &= 0xffff0000;
3960
3961 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3962 drhd = dmar_find_matched_drhd_unit(pdev);
3963 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3964 TAINT_FIRMWARE_WORKAROUND,
3965 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3966 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3967}
3968DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3969
ba395927
KA
3970static void __init init_no_remapping_devices(void)
3971{
3972 struct dmar_drhd_unit *drhd;
832bd858 3973 struct device *dev;
b683b230 3974 int i;
ba395927
KA
3975
3976 for_each_drhd_unit(drhd) {
3977 if (!drhd->include_all) {
b683b230
JL
3978 for_each_active_dev_scope(drhd->devices,
3979 drhd->devices_cnt, i, dev)
3980 break;
832bd858 3981 /* ignore DMAR unit if no devices exist */
ba395927
KA
3982 if (i == drhd->devices_cnt)
3983 drhd->ignored = 1;
3984 }
3985 }
3986
7c919779 3987 for_each_active_drhd_unit(drhd) {
7c919779 3988 if (drhd->include_all)
ba395927
KA
3989 continue;
3990
b683b230
JL
3991 for_each_active_dev_scope(drhd->devices,
3992 drhd->devices_cnt, i, dev)
832bd858 3993 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3994 break;
ba395927
KA
3995 if (i < drhd->devices_cnt)
3996 continue;
3997
c0771df8
DW
3998 /* This IOMMU has *only* gfx devices. Either bypass it or
3999 set the gfx_mapped flag, as appropriate */
4000 if (dmar_map_gfx) {
4001 intel_iommu_gfx_mapped = 1;
4002 } else {
4003 drhd->ignored = 1;
b683b230
JL
4004 for_each_active_dev_scope(drhd->devices,
4005 drhd->devices_cnt, i, dev)
832bd858 4006 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4007 }
4008 }
4009}
4010
f59c7b69
FY
4011#ifdef CONFIG_SUSPEND
4012static int init_iommu_hw(void)
4013{
4014 struct dmar_drhd_unit *drhd;
4015 struct intel_iommu *iommu = NULL;
4016
4017 for_each_active_iommu(iommu, drhd)
4018 if (iommu->qi)
4019 dmar_reenable_qi(iommu);
4020
b779260b
JC
4021 for_each_iommu(iommu, drhd) {
4022 if (drhd->ignored) {
4023 /*
4024 * we always have to disable PMRs or DMA may fail on
4025 * this device
4026 */
4027 if (force_on)
4028 iommu_disable_protect_mem_regions(iommu);
4029 continue;
4030 }
4031
f59c7b69
FY
4032 iommu_flush_write_buffer(iommu);
4033
4034 iommu_set_root_entry(iommu);
4035
4036 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4037 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4038 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4039 iommu_enable_translation(iommu);
b94996c9 4040 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4041 }
4042
4043 return 0;
4044}
4045
4046static void iommu_flush_all(void)
4047{
4048 struct dmar_drhd_unit *drhd;
4049 struct intel_iommu *iommu;
4050
4051 for_each_active_iommu(iommu, drhd) {
4052 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4053 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4054 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4055 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4056 }
4057}
4058
134fac3f 4059static int iommu_suspend(void)
f59c7b69
FY
4060{
4061 struct dmar_drhd_unit *drhd;
4062 struct intel_iommu *iommu = NULL;
4063 unsigned long flag;
4064
4065 for_each_active_iommu(iommu, drhd) {
4066 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4067 GFP_ATOMIC);
4068 if (!iommu->iommu_state)
4069 goto nomem;
4070 }
4071
4072 iommu_flush_all();
4073
4074 for_each_active_iommu(iommu, drhd) {
4075 iommu_disable_translation(iommu);
4076
1f5b3c3f 4077 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4078
4079 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4080 readl(iommu->reg + DMAR_FECTL_REG);
4081 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4082 readl(iommu->reg + DMAR_FEDATA_REG);
4083 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4084 readl(iommu->reg + DMAR_FEADDR_REG);
4085 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4086 readl(iommu->reg + DMAR_FEUADDR_REG);
4087
1f5b3c3f 4088 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4089 }
4090 return 0;
4091
4092nomem:
4093 for_each_active_iommu(iommu, drhd)
4094 kfree(iommu->iommu_state);
4095
4096 return -ENOMEM;
4097}
4098
134fac3f 4099static void iommu_resume(void)
f59c7b69
FY
4100{
4101 struct dmar_drhd_unit *drhd;
4102 struct intel_iommu *iommu = NULL;
4103 unsigned long flag;
4104
4105 if (init_iommu_hw()) {
b779260b
JC
4106 if (force_on)
4107 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4108 else
4109 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4110 return;
f59c7b69
FY
4111 }
4112
4113 for_each_active_iommu(iommu, drhd) {
4114
1f5b3c3f 4115 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4116
4117 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4118 iommu->reg + DMAR_FECTL_REG);
4119 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4120 iommu->reg + DMAR_FEDATA_REG);
4121 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4122 iommu->reg + DMAR_FEADDR_REG);
4123 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4124 iommu->reg + DMAR_FEUADDR_REG);
4125
1f5b3c3f 4126 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4127 }
4128
4129 for_each_active_iommu(iommu, drhd)
4130 kfree(iommu->iommu_state);
f59c7b69
FY
4131}
4132
134fac3f 4133static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4134 .resume = iommu_resume,
4135 .suspend = iommu_suspend,
4136};
4137
134fac3f 4138static void __init init_iommu_pm_ops(void)
f59c7b69 4139{
134fac3f 4140 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4141}
4142
4143#else
99592ba4 4144static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4145#endif /* CONFIG_PM */
4146
318fe7df 4147
c2a0b538 4148int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4149{
4150 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4151 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4152 struct dmar_rmrr_unit *rmrru;
0659b8dc 4153 size_t length;
318fe7df
SS
4154
4155 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4156 if (!rmrru)
0659b8dc 4157 goto out;
318fe7df
SS
4158
4159 rmrru->hdr = header;
4160 rmrr = (struct acpi_dmar_reserved_memory *)header;
4161 rmrru->base_address = rmrr->base_address;
4162 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4163
4164 length = rmrr->end_address - rmrr->base_address + 1;
4165 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4166 IOMMU_RESV_DIRECT);
4167 if (!rmrru->resv)
4168 goto free_rmrru;
4169
2e455289
JL
4170 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4171 ((void *)rmrr) + rmrr->header.length,
4172 &rmrru->devices_cnt);
0659b8dc
EA
4173 if (rmrru->devices_cnt && rmrru->devices == NULL)
4174 goto free_all;
318fe7df 4175
2e455289 4176 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4177
2e455289 4178 return 0;
0659b8dc
EA
4179free_all:
4180 kfree(rmrru->resv);
4181free_rmrru:
4182 kfree(rmrru);
4183out:
4184 return -ENOMEM;
318fe7df
SS
4185}
4186
6b197249
JL
4187static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4188{
4189 struct dmar_atsr_unit *atsru;
4190 struct acpi_dmar_atsr *tmp;
4191
4192 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4193 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4194 if (atsr->segment != tmp->segment)
4195 continue;
4196 if (atsr->header.length != tmp->header.length)
4197 continue;
4198 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4199 return atsru;
4200 }
4201
4202 return NULL;
4203}
4204
4205int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4206{
4207 struct acpi_dmar_atsr *atsr;
4208 struct dmar_atsr_unit *atsru;
4209
b608fe35 4210 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4211 return 0;
4212
318fe7df 4213 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4214 atsru = dmar_find_atsr(atsr);
4215 if (atsru)
4216 return 0;
4217
4218 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4219 if (!atsru)
4220 return -ENOMEM;
4221
6b197249
JL
4222 /*
4223 * If memory is allocated from slab by ACPI _DSM method, we need to
4224 * copy the memory content because the memory buffer will be freed
4225 * on return.
4226 */
4227 atsru->hdr = (void *)(atsru + 1);
4228 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4229 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4230 if (!atsru->include_all) {
4231 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4232 (void *)atsr + atsr->header.length,
4233 &atsru->devices_cnt);
4234 if (atsru->devices_cnt && atsru->devices == NULL) {
4235 kfree(atsru);
4236 return -ENOMEM;
4237 }
4238 }
318fe7df 4239
0e242612 4240 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4241
4242 return 0;
4243}
4244
9bdc531e
JL
4245static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4246{
4247 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4248 kfree(atsru);
4249}
4250
6b197249
JL
4251int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4252{
4253 struct acpi_dmar_atsr *atsr;
4254 struct dmar_atsr_unit *atsru;
4255
4256 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4257 atsru = dmar_find_atsr(atsr);
4258 if (atsru) {
4259 list_del_rcu(&atsru->list);
4260 synchronize_rcu();
4261 intel_iommu_free_atsr(atsru);
4262 }
4263
4264 return 0;
4265}
4266
4267int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4268{
4269 int i;
4270 struct device *dev;
4271 struct acpi_dmar_atsr *atsr;
4272 struct dmar_atsr_unit *atsru;
4273
4274 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4275 atsru = dmar_find_atsr(atsr);
4276 if (!atsru)
4277 return 0;
4278
194dc870 4279 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4280 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4281 i, dev)
4282 return -EBUSY;
194dc870 4283 }
6b197249
JL
4284
4285 return 0;
4286}
4287
ffebeb46
JL
4288static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4289{
4290 int sp, ret = 0;
4291 struct intel_iommu *iommu = dmaru->iommu;
4292
4293 if (g_iommus[iommu->seq_id])
4294 return 0;
4295
4296 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4297 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4298 iommu->name);
4299 return -ENXIO;
4300 }
4301 if (!ecap_sc_support(iommu->ecap) &&
4302 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4303 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4304 iommu->name);
4305 return -ENXIO;
4306 }
4307 sp = domain_update_iommu_superpage(iommu) - 1;
4308 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4309 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4310 iommu->name);
4311 return -ENXIO;
4312 }
4313
4314 /*
4315 * Disable translation if already enabled prior to OS handover.
4316 */
4317 if (iommu->gcmd & DMA_GCMD_TE)
4318 iommu_disable_translation(iommu);
4319
4320 g_iommus[iommu->seq_id] = iommu;
4321 ret = iommu_init_domains(iommu);
4322 if (ret == 0)
4323 ret = iommu_alloc_root_entry(iommu);
4324 if (ret)
4325 goto out;
4326
8a94ade4
DW
4327#ifdef CONFIG_INTEL_IOMMU_SVM
4328 if (pasid_enabled(iommu))
4329 intel_svm_alloc_pasid_tables(iommu);
4330#endif
4331
ffebeb46
JL
4332 if (dmaru->ignored) {
4333 /*
4334 * we always have to disable PMRs or DMA may fail on this device
4335 */
4336 if (force_on)
4337 iommu_disable_protect_mem_regions(iommu);
4338 return 0;
4339 }
4340
4341 intel_iommu_init_qi(iommu);
4342 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4343
4344#ifdef CONFIG_INTEL_IOMMU_SVM
4345 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4346 ret = intel_svm_enable_prq(iommu);
4347 if (ret)
4348 goto disable_iommu;
4349 }
4350#endif
ffebeb46
JL
4351 ret = dmar_set_interrupt(iommu);
4352 if (ret)
4353 goto disable_iommu;
4354
4355 iommu_set_root_entry(iommu);
4356 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4357 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4358 iommu_enable_translation(iommu);
4359
ffebeb46
JL
4360 iommu_disable_protect_mem_regions(iommu);
4361 return 0;
4362
4363disable_iommu:
4364 disable_dmar_iommu(iommu);
4365out:
4366 free_dmar_iommu(iommu);
4367 return ret;
4368}
4369
6b197249
JL
4370int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4371{
ffebeb46
JL
4372 int ret = 0;
4373 struct intel_iommu *iommu = dmaru->iommu;
4374
4375 if (!intel_iommu_enabled)
4376 return 0;
4377 if (iommu == NULL)
4378 return -EINVAL;
4379
4380 if (insert) {
4381 ret = intel_iommu_add(dmaru);
4382 } else {
4383 disable_dmar_iommu(iommu);
4384 free_dmar_iommu(iommu);
4385 }
4386
4387 return ret;
6b197249
JL
4388}
4389
9bdc531e
JL
4390static void intel_iommu_free_dmars(void)
4391{
4392 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4393 struct dmar_atsr_unit *atsru, *atsr_n;
4394
4395 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4396 list_del(&rmrru->list);
4397 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4398 kfree(rmrru->resv);
9bdc531e 4399 kfree(rmrru);
318fe7df
SS
4400 }
4401
9bdc531e
JL
4402 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4403 list_del(&atsru->list);
4404 intel_iommu_free_atsr(atsru);
4405 }
318fe7df
SS
4406}
4407
4408int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4409{
b683b230 4410 int i, ret = 1;
318fe7df 4411 struct pci_bus *bus;
832bd858
DW
4412 struct pci_dev *bridge = NULL;
4413 struct device *tmp;
318fe7df
SS
4414 struct acpi_dmar_atsr *atsr;
4415 struct dmar_atsr_unit *atsru;
4416
4417 dev = pci_physfn(dev);
318fe7df 4418 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4419 bridge = bus->self;
d14053b3
DW
4420 /* If it's an integrated device, allow ATS */
4421 if (!bridge)
4422 return 1;
4423 /* Connected via non-PCIe: no ATS */
4424 if (!pci_is_pcie(bridge) ||
62f87c0e 4425 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4426 return 0;
d14053b3 4427 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4428 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4429 break;
318fe7df
SS
4430 }
4431
0e242612 4432 rcu_read_lock();
b5f82ddf
JL
4433 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4434 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4435 if (atsr->segment != pci_domain_nr(dev->bus))
4436 continue;
4437
b683b230 4438 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4439 if (tmp == &bridge->dev)
b683b230 4440 goto out;
b5f82ddf
JL
4441
4442 if (atsru->include_all)
b683b230 4443 goto out;
b5f82ddf 4444 }
b683b230
JL
4445 ret = 0;
4446out:
0e242612 4447 rcu_read_unlock();
318fe7df 4448
b683b230 4449 return ret;
318fe7df
SS
4450}
4451
59ce0515
JL
4452int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4453{
4454 int ret = 0;
4455 struct dmar_rmrr_unit *rmrru;
4456 struct dmar_atsr_unit *atsru;
4457 struct acpi_dmar_atsr *atsr;
4458 struct acpi_dmar_reserved_memory *rmrr;
4459
b608fe35 4460 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4461 return 0;
4462
4463 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4464 rmrr = container_of(rmrru->hdr,
4465 struct acpi_dmar_reserved_memory, header);
4466 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4467 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4468 ((void *)rmrr) + rmrr->header.length,
4469 rmrr->segment, rmrru->devices,
4470 rmrru->devices_cnt);
27e24950 4471 if(ret < 0)
59ce0515 4472 return ret;
e6a8c9b3 4473 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4474 dmar_remove_dev_scope(info, rmrr->segment,
4475 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4476 }
4477 }
4478
4479 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4480 if (atsru->include_all)
4481 continue;
4482
4483 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4484 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4485 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4486 (void *)atsr + atsr->header.length,
4487 atsr->segment, atsru->devices,
4488 atsru->devices_cnt);
4489 if (ret > 0)
4490 break;
4491 else if(ret < 0)
4492 return ret;
e6a8c9b3 4493 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4494 if (dmar_remove_dev_scope(info, atsr->segment,
4495 atsru->devices, atsru->devices_cnt))
4496 break;
4497 }
4498 }
4499
4500 return 0;
4501}
4502
99dcaded
FY
4503/*
4504 * Here we only respond to action of unbound device from driver.
4505 *
4506 * Added device is not attached to its DMAR domain here yet. That will happen
4507 * when mapping the device to iova.
4508 */
4509static int device_notifier(struct notifier_block *nb,
4510 unsigned long action, void *data)
4511{
4512 struct device *dev = data;
99dcaded
FY
4513 struct dmar_domain *domain;
4514
3d89194a 4515 if (iommu_dummy(dev))
44cd613c
DW
4516 return 0;
4517
1196c2fb 4518 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4519 return 0;
4520
1525a29a 4521 domain = find_domain(dev);
99dcaded
FY
4522 if (!domain)
4523 return 0;
4524
e6de0f8d 4525 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4526 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4527 domain_exit(domain);
a97590e5 4528
99dcaded
FY
4529 return 0;
4530}
4531
4532static struct notifier_block device_nb = {
4533 .notifier_call = device_notifier,
4534};
4535
75f05569
JL
4536static int intel_iommu_memory_notifier(struct notifier_block *nb,
4537 unsigned long val, void *v)
4538{
4539 struct memory_notify *mhp = v;
4540 unsigned long long start, end;
4541 unsigned long start_vpfn, last_vpfn;
4542
4543 switch (val) {
4544 case MEM_GOING_ONLINE:
4545 start = mhp->start_pfn << PAGE_SHIFT;
4546 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4547 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4548 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4549 start, end);
4550 return NOTIFY_BAD;
4551 }
4552 break;
4553
4554 case MEM_OFFLINE:
4555 case MEM_CANCEL_ONLINE:
4556 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4557 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4558 while (start_vpfn <= last_vpfn) {
4559 struct iova *iova;
4560 struct dmar_drhd_unit *drhd;
4561 struct intel_iommu *iommu;
ea8ea460 4562 struct page *freelist;
75f05569
JL
4563
4564 iova = find_iova(&si_domain->iovad, start_vpfn);
4565 if (iova == NULL) {
9f10e5bf 4566 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4567 start_vpfn);
4568 break;
4569 }
4570
4571 iova = split_and_remove_iova(&si_domain->iovad, iova,
4572 start_vpfn, last_vpfn);
4573 if (iova == NULL) {
9f10e5bf 4574 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4575 start_vpfn, last_vpfn);
4576 return NOTIFY_BAD;
4577 }
4578
ea8ea460
DW
4579 freelist = domain_unmap(si_domain, iova->pfn_lo,
4580 iova->pfn_hi);
4581
75f05569
JL
4582 rcu_read_lock();
4583 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4584 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4585 iova->pfn_lo, iova_size(iova),
ea8ea460 4586 !freelist, 0);
75f05569 4587 rcu_read_unlock();
ea8ea460 4588 dma_free_pagelist(freelist);
75f05569
JL
4589
4590 start_vpfn = iova->pfn_hi + 1;
4591 free_iova_mem(iova);
4592 }
4593 break;
4594 }
4595
4596 return NOTIFY_OK;
4597}
4598
4599static struct notifier_block intel_iommu_memory_nb = {
4600 .notifier_call = intel_iommu_memory_notifier,
4601 .priority = 0
4602};
4603
22e2f9fa
OP
4604static void free_all_cpu_cached_iovas(unsigned int cpu)
4605{
4606 int i;
4607
4608 for (i = 0; i < g_num_of_iommus; i++) {
4609 struct intel_iommu *iommu = g_iommus[i];
4610 struct dmar_domain *domain;
0caa7616 4611 int did;
22e2f9fa
OP
4612
4613 if (!iommu)
4614 continue;
4615
3bd4f911 4616 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4617 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4618
4619 if (!domain)
4620 continue;
4621 free_cpu_cached_iovas(cpu, &domain->iovad);
4622 }
4623 }
4624}
4625
21647615 4626static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4627{
21647615 4628 free_all_cpu_cached_iovas(cpu);
21647615 4629 return 0;
aa473240
OP
4630}
4631
161b28aa
JR
4632static void intel_disable_iommus(void)
4633{
4634 struct intel_iommu *iommu = NULL;
4635 struct dmar_drhd_unit *drhd;
4636
4637 for_each_iommu(iommu, drhd)
4638 iommu_disable_translation(iommu);
4639}
4640
a7fdb6e6
JR
4641static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4642{
2926a2aa
JR
4643 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4644
4645 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4646}
4647
a5459cfe
AW
4648static ssize_t intel_iommu_show_version(struct device *dev,
4649 struct device_attribute *attr,
4650 char *buf)
4651{
a7fdb6e6 4652 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4653 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4654 return sprintf(buf, "%d:%d\n",
4655 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4656}
4657static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4658
4659static ssize_t intel_iommu_show_address(struct device *dev,
4660 struct device_attribute *attr,
4661 char *buf)
4662{
a7fdb6e6 4663 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4664 return sprintf(buf, "%llx\n", iommu->reg_phys);
4665}
4666static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4667
4668static ssize_t intel_iommu_show_cap(struct device *dev,
4669 struct device_attribute *attr,
4670 char *buf)
4671{
a7fdb6e6 4672 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4673 return sprintf(buf, "%llx\n", iommu->cap);
4674}
4675static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4676
4677static ssize_t intel_iommu_show_ecap(struct device *dev,
4678 struct device_attribute *attr,
4679 char *buf)
4680{
a7fdb6e6 4681 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4682 return sprintf(buf, "%llx\n", iommu->ecap);
4683}
4684static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4685
2238c082
AW
4686static ssize_t intel_iommu_show_ndoms(struct device *dev,
4687 struct device_attribute *attr,
4688 char *buf)
4689{
a7fdb6e6 4690 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4691 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4692}
4693static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4694
4695static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4696 struct device_attribute *attr,
4697 char *buf)
4698{
a7fdb6e6 4699 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4700 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4701 cap_ndoms(iommu->cap)));
4702}
4703static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4704
a5459cfe
AW
4705static struct attribute *intel_iommu_attrs[] = {
4706 &dev_attr_version.attr,
4707 &dev_attr_address.attr,
4708 &dev_attr_cap.attr,
4709 &dev_attr_ecap.attr,
2238c082
AW
4710 &dev_attr_domains_supported.attr,
4711 &dev_attr_domains_used.attr,
a5459cfe
AW
4712 NULL,
4713};
4714
4715static struct attribute_group intel_iommu_group = {
4716 .name = "intel-iommu",
4717 .attrs = intel_iommu_attrs,
4718};
4719
4720const struct attribute_group *intel_iommu_groups[] = {
4721 &intel_iommu_group,
4722 NULL,
4723};
4724
ba395927
KA
4725int __init intel_iommu_init(void)
4726{
9bdc531e 4727 int ret = -ENODEV;
3a93c841 4728 struct dmar_drhd_unit *drhd;
7c919779 4729 struct intel_iommu *iommu;
ba395927 4730
a59b50e9
JC
4731 /* VT-d is required for a TXT/tboot launch, so enforce that */
4732 force_on = tboot_force_iommu();
4733
3a5670e8
JL
4734 if (iommu_init_mempool()) {
4735 if (force_on)
4736 panic("tboot: Failed to initialize iommu memory\n");
4737 return -ENOMEM;
4738 }
4739
4740 down_write(&dmar_global_lock);
a59b50e9
JC
4741 if (dmar_table_init()) {
4742 if (force_on)
4743 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4744 goto out_free_dmar;
a59b50e9 4745 }
ba395927 4746
c2c7286a 4747 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4748 if (force_on)
4749 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4750 goto out_free_dmar;
a59b50e9 4751 }
1886e8a9 4752
ec154bf5
JR
4753 up_write(&dmar_global_lock);
4754
4755 /*
4756 * The bus notifier takes the dmar_global_lock, so lockdep will
4757 * complain later when we register it under the lock.
4758 */
4759 dmar_register_bus_notifier();
4760
4761 down_write(&dmar_global_lock);
4762
161b28aa 4763 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4764 /*
4765 * We exit the function here to ensure IOMMU's remapping and
4766 * mempool aren't setup, which means that the IOMMU's PMRs
4767 * won't be disabled via the call to init_dmars(). So disable
4768 * it explicitly here. The PMRs were setup by tboot prior to
4769 * calling SENTER, but the kernel is expected to reset/tear
4770 * down the PMRs.
4771 */
4772 if (intel_iommu_tboot_noforce) {
4773 for_each_iommu(iommu, drhd)
4774 iommu_disable_protect_mem_regions(iommu);
4775 }
4776
161b28aa
JR
4777 /*
4778 * Make sure the IOMMUs are switched off, even when we
4779 * boot into a kexec kernel and the previous kernel left
4780 * them enabled
4781 */
4782 intel_disable_iommus();
9bdc531e 4783 goto out_free_dmar;
161b28aa 4784 }
2ae21010 4785
318fe7df 4786 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4787 pr_info("No RMRR found\n");
318fe7df
SS
4788
4789 if (list_empty(&dmar_atsr_units))
9f10e5bf 4790 pr_info("No ATSR found\n");
318fe7df 4791
51a63e67
JC
4792 if (dmar_init_reserved_ranges()) {
4793 if (force_on)
4794 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4795 goto out_free_reserved_range;
51a63e67 4796 }
ba395927
KA
4797
4798 init_no_remapping_devices();
4799
b779260b 4800 ret = init_dmars();
ba395927 4801 if (ret) {
a59b50e9
JC
4802 if (force_on)
4803 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4804 pr_err("Initialization failed\n");
9bdc531e 4805 goto out_free_reserved_range;
ba395927 4806 }
3a5670e8 4807 up_write(&dmar_global_lock);
9f10e5bf 4808 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4809
75f1cdf1
FT
4810#ifdef CONFIG_SWIOTLB
4811 swiotlb = 0;
4812#endif
19943b0e 4813 dma_ops = &intel_dma_ops;
4ed0d3e6 4814
134fac3f 4815 init_iommu_pm_ops();
a8bcbb0d 4816
39ab9555
JR
4817 for_each_active_iommu(iommu, drhd) {
4818 iommu_device_sysfs_add(&iommu->iommu, NULL,
4819 intel_iommu_groups,
4820 "%s", iommu->name);
4821 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4822 iommu_device_register(&iommu->iommu);
4823 }
a5459cfe 4824
4236d97d 4825 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4826 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4827 if (si_domain && !hw_pass_through)
4828 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4829 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4830 intel_iommu_cpu_dead);
8bc1f85c
ED
4831 intel_iommu_enabled = 1;
4832
ba395927 4833 return 0;
9bdc531e
JL
4834
4835out_free_reserved_range:
4836 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4837out_free_dmar:
4838 intel_iommu_free_dmars();
3a5670e8
JL
4839 up_write(&dmar_global_lock);
4840 iommu_exit_mempool();
9bdc531e 4841 return ret;
ba395927 4842}
e820482c 4843
2452d9db 4844static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4845{
4846 struct intel_iommu *iommu = opaque;
4847
2452d9db 4848 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4849 return 0;
4850}
4851
4852/*
4853 * NB - intel-iommu lacks any sort of reference counting for the users of
4854 * dependent devices. If multiple endpoints have intersecting dependent
4855 * devices, unbinding the driver from any one of them will possibly leave
4856 * the others unable to operate.
4857 */
2452d9db 4858static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4859{
0bcb3e28 4860 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4861 return;
4862
2452d9db 4863 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4864}
4865
127c7615 4866static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4867{
c7151a8d
WH
4868 struct intel_iommu *iommu;
4869 unsigned long flags;
c7151a8d 4870
55d94043
JR
4871 assert_spin_locked(&device_domain_lock);
4872
127c7615 4873 if (WARN_ON(!info))
c7151a8d
WH
4874 return;
4875
127c7615 4876 iommu = info->iommu;
c7151a8d 4877
127c7615
JR
4878 if (info->dev) {
4879 iommu_disable_dev_iotlb(info);
4880 domain_context_clear(iommu, info->dev);
4881 }
c7151a8d 4882
b608ac3b 4883 unlink_domain_info(info);
c7151a8d 4884
d160aca5 4885 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4886 domain_detach_iommu(info->domain, iommu);
d160aca5 4887 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4888
127c7615 4889 free_devinfo_mem(info);
c7151a8d 4890}
c7151a8d 4891
55d94043
JR
4892static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4893 struct device *dev)
4894{
127c7615 4895 struct device_domain_info *info;
55d94043 4896 unsigned long flags;
3e7abe25 4897
55d94043 4898 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4899 info = dev->archdata.iommu;
4900 __dmar_remove_one_dev_info(info);
55d94043 4901 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4902}
4903
2c2e2c38 4904static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4905{
4906 int adjust_width;
4907
aa3ac946 4908 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
4909 domain_reserve_special_ranges(domain);
4910
4911 /* calculate AGAW */
4912 domain->gaw = guest_width;
4913 adjust_width = guestwidth_to_adjustwidth(guest_width);
4914 domain->agaw = width_to_agaw(adjust_width);
4915
5e98c4b1 4916 domain->iommu_coherency = 0;
c5b15255 4917 domain->iommu_snooping = 0;
6dd9a7c7 4918 domain->iommu_superpage = 0;
fe40f1e0 4919 domain->max_addr = 0;
5e98c4b1
WH
4920
4921 /* always allocate the top pgd */
4c923d47 4922 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4923 if (!domain->pgd)
4924 return -ENOMEM;
4925 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4926 return 0;
4927}
4928
00a77deb 4929static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4930{
5d450806 4931 struct dmar_domain *dmar_domain;
00a77deb
JR
4932 struct iommu_domain *domain;
4933
4934 if (type != IOMMU_DOMAIN_UNMANAGED)
4935 return NULL;
38717946 4936
ab8dfe25 4937 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4938 if (!dmar_domain) {
9f10e5bf 4939 pr_err("Can't allocate dmar_domain\n");
00a77deb 4940 return NULL;
38717946 4941 }
2c2e2c38 4942 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4943 pr_err("Domain initialization failed\n");
92d03cc8 4944 domain_exit(dmar_domain);
00a77deb 4945 return NULL;
38717946 4946 }
8140a95d 4947 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4948
00a77deb 4949 domain = &dmar_domain->domain;
8a0e715b
JR
4950 domain->geometry.aperture_start = 0;
4951 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4952 domain->geometry.force_aperture = true;
4953
00a77deb 4954 return domain;
38717946 4955}
38717946 4956
00a77deb 4957static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4958{
00a77deb 4959 domain_exit(to_dmar_domain(domain));
38717946 4960}
38717946 4961
4c5478c9
JR
4962static int intel_iommu_attach_device(struct iommu_domain *domain,
4963 struct device *dev)
38717946 4964{
00a77deb 4965 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4966 struct intel_iommu *iommu;
4967 int addr_width;
156baca8 4968 u8 bus, devfn;
faa3d6f5 4969
c875d2c1
AW
4970 if (device_is_rmrr_locked(dev)) {
4971 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4972 return -EPERM;
4973 }
4974
7207d8f9
DW
4975 /* normally dev is not mapped */
4976 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4977 struct dmar_domain *old_domain;
4978
1525a29a 4979 old_domain = find_domain(dev);
faa3d6f5 4980 if (old_domain) {
d160aca5 4981 rcu_read_lock();
de7e8886 4982 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 4983 rcu_read_unlock();
62c22167
JR
4984
4985 if (!domain_type_is_vm_or_si(old_domain) &&
4986 list_empty(&old_domain->devices))
4987 domain_exit(old_domain);
faa3d6f5
WH
4988 }
4989 }
4990
156baca8 4991 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4992 if (!iommu)
4993 return -ENODEV;
4994
4995 /* check if this iommu agaw is sufficient for max mapped address */
4996 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4997 if (addr_width > cap_mgaw(iommu->cap))
4998 addr_width = cap_mgaw(iommu->cap);
4999
5000 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 5001 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5002 "sufficient for the mapped address (%llx)\n",
a99c47a2 5003 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5004 return -EFAULT;
5005 }
a99c47a2
TL
5006 dmar_domain->gaw = addr_width;
5007
5008 /*
5009 * Knock out extra levels of page tables if necessary
5010 */
5011 while (iommu->agaw < dmar_domain->agaw) {
5012 struct dma_pte *pte;
5013
5014 pte = dmar_domain->pgd;
5015 if (dma_pte_present(pte)) {
25cbff16
SY
5016 dmar_domain->pgd = (struct dma_pte *)
5017 phys_to_virt(dma_pte_addr(pte));
7a661013 5018 free_pgtable_page(pte);
a99c47a2
TL
5019 }
5020 dmar_domain->agaw--;
5021 }
fe40f1e0 5022
28ccce0d 5023 return domain_add_dev_info(dmar_domain, dev);
38717946 5024}
38717946 5025
4c5478c9
JR
5026static void intel_iommu_detach_device(struct iommu_domain *domain,
5027 struct device *dev)
38717946 5028{
e6de0f8d 5029 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 5030}
c7151a8d 5031
b146a1c9
JR
5032static int intel_iommu_map(struct iommu_domain *domain,
5033 unsigned long iova, phys_addr_t hpa,
5009065d 5034 size_t size, int iommu_prot)
faa3d6f5 5035{
00a77deb 5036 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5037 u64 max_addr;
dde57a21 5038 int prot = 0;
faa3d6f5 5039 int ret;
fe40f1e0 5040
dde57a21
JR
5041 if (iommu_prot & IOMMU_READ)
5042 prot |= DMA_PTE_READ;
5043 if (iommu_prot & IOMMU_WRITE)
5044 prot |= DMA_PTE_WRITE;
9cf06697
SY
5045 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5046 prot |= DMA_PTE_SNP;
dde57a21 5047
163cc52c 5048 max_addr = iova + size;
dde57a21 5049 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5050 u64 end;
5051
5052 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5053 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5054 if (end < max_addr) {
9f10e5bf 5055 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5056 "sufficient for the mapped address (%llx)\n",
8954da1f 5057 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5058 return -EFAULT;
5059 }
dde57a21 5060 dmar_domain->max_addr = max_addr;
fe40f1e0 5061 }
ad051221
DW
5062 /* Round up size to next multiple of PAGE_SIZE, if it and
5063 the low bits of hpa would take us onto the next page */
88cb6a74 5064 size = aligned_nrpages(hpa, size);
ad051221
DW
5065 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5066 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5067 return ret;
38717946 5068}
38717946 5069
5009065d 5070static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5071 unsigned long iova, size_t size)
38717946 5072{
00a77deb 5073 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
5074 struct page *freelist = NULL;
5075 struct intel_iommu *iommu;
5076 unsigned long start_pfn, last_pfn;
5077 unsigned int npages;
42e8c186 5078 int iommu_id, level = 0;
5cf0a76f
DW
5079
5080 /* Cope with horrid API which requires us to unmap more than the
5081 size argument if it happens to be a large-page mapping. */
dc02e46e 5082 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5083
5084 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5085 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5086
ea8ea460
DW
5087 start_pfn = iova >> VTD_PAGE_SHIFT;
5088 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5089
5090 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5091
5092 npages = last_pfn - start_pfn + 1;
5093
29a27719 5094 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 5095 iommu = g_iommus[iommu_id];
ea8ea460 5096
42e8c186
JR
5097 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5098 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5099 }
5100
5101 dma_free_pagelist(freelist);
fe40f1e0 5102
163cc52c
DW
5103 if (dmar_domain->max_addr == iova + size)
5104 dmar_domain->max_addr = iova;
b146a1c9 5105
5cf0a76f 5106 return size;
38717946 5107}
38717946 5108
d14d6577 5109static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5110 dma_addr_t iova)
38717946 5111{
00a77deb 5112 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5113 struct dma_pte *pte;
5cf0a76f 5114 int level = 0;
faa3d6f5 5115 u64 phys = 0;
38717946 5116
5cf0a76f 5117 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5118 if (pte)
faa3d6f5 5119 phys = dma_pte_addr(pte);
38717946 5120
faa3d6f5 5121 return phys;
38717946 5122}
a8bcbb0d 5123
5d587b8d 5124static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5125{
dbb9fd86 5126 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5127 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5128 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5129 return irq_remapping_enabled == 1;
dbb9fd86 5130
5d587b8d 5131 return false;
dbb9fd86
SY
5132}
5133
abdfdde2
AW
5134static int intel_iommu_add_device(struct device *dev)
5135{
a5459cfe 5136 struct intel_iommu *iommu;
abdfdde2 5137 struct iommu_group *group;
156baca8 5138 u8 bus, devfn;
70ae6f0d 5139
a5459cfe
AW
5140 iommu = device_to_iommu(dev, &bus, &devfn);
5141 if (!iommu)
70ae6f0d
AW
5142 return -ENODEV;
5143
e3d10af1 5144 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5145
e17f9ff4 5146 group = iommu_group_get_for_dev(dev);
783f157b 5147
e17f9ff4
AW
5148 if (IS_ERR(group))
5149 return PTR_ERR(group);
bcb71abe 5150
abdfdde2 5151 iommu_group_put(group);
e17f9ff4 5152 return 0;
abdfdde2 5153}
70ae6f0d 5154
abdfdde2
AW
5155static void intel_iommu_remove_device(struct device *dev)
5156{
a5459cfe
AW
5157 struct intel_iommu *iommu;
5158 u8 bus, devfn;
5159
5160 iommu = device_to_iommu(dev, &bus, &devfn);
5161 if (!iommu)
5162 return;
5163
abdfdde2 5164 iommu_group_remove_device(dev);
a5459cfe 5165
e3d10af1 5166 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5167}
5168
0659b8dc
EA
5169static void intel_iommu_get_resv_regions(struct device *device,
5170 struct list_head *head)
5171{
5172 struct iommu_resv_region *reg;
5173 struct dmar_rmrr_unit *rmrr;
5174 struct device *i_dev;
5175 int i;
5176
5177 rcu_read_lock();
5178 for_each_rmrr_units(rmrr) {
5179 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5180 i, i_dev) {
5181 if (i_dev != device)
5182 continue;
5183
5184 list_add_tail(&rmrr->resv->list, head);
5185 }
5186 }
5187 rcu_read_unlock();
5188
5189 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5190 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5191 0, IOMMU_RESV_MSI);
0659b8dc
EA
5192 if (!reg)
5193 return;
5194 list_add_tail(&reg->list, head);
5195}
5196
5197static void intel_iommu_put_resv_regions(struct device *dev,
5198 struct list_head *head)
5199{
5200 struct iommu_resv_region *entry, *next;
5201
5202 list_for_each_entry_safe(entry, next, head, list) {
5203 if (entry->type == IOMMU_RESV_RESERVED)
5204 kfree(entry);
5205 }
70ae6f0d
AW
5206}
5207
2f26e0a9 5208#ifdef CONFIG_INTEL_IOMMU_SVM
65ca7f5f
JP
5209#define MAX_NR_PASID_BITS (20)
5210static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5211{
5212 /*
5213 * Convert ecap_pss to extend context entry pts encoding, also
5214 * respect the soft pasid_max value set by the iommu.
5215 * - number of PASID bits = ecap_pss + 1
5216 * - number of PASID table entries = 2^(pts + 5)
5217 * Therefore, pts = ecap_pss - 4
5218 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5219 */
5220 if (ecap_pss(iommu->ecap) < 5)
5221 return 0;
5222
5223 /* pasid_max is encoded as actual number of entries not the bits */
5224 return find_first_bit((unsigned long *)&iommu->pasid_max,
5225 MAX_NR_PASID_BITS) - 5;
5226}
5227
2f26e0a9
DW
5228int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5229{
5230 struct device_domain_info *info;
5231 struct context_entry *context;
5232 struct dmar_domain *domain;
5233 unsigned long flags;
5234 u64 ctx_lo;
5235 int ret;
5236
5237 domain = get_valid_domain_for_dev(sdev->dev);
5238 if (!domain)
5239 return -EINVAL;
5240
5241 spin_lock_irqsave(&device_domain_lock, flags);
5242 spin_lock(&iommu->lock);
5243
5244 ret = -EINVAL;
5245 info = sdev->dev->archdata.iommu;
5246 if (!info || !info->pasid_supported)
5247 goto out;
5248
5249 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5250 if (WARN_ON(!context))
5251 goto out;
5252
5253 ctx_lo = context[0].lo;
5254
5255 sdev->did = domain->iommu_did[iommu->seq_id];
5256 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5257
5258 if (!(ctx_lo & CONTEXT_PASIDE)) {
11b93ebf
AR
5259 if (iommu->pasid_state_table)
5260 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
65ca7f5f
JP
5261 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5262 intel_iommu_get_pts(iommu);
5263
2f26e0a9
DW
5264 wmb();
5265 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5266 * extended to permit requests-with-PASID if the PASIDE bit
5267 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5268 * however, the PASIDE bit is ignored and requests-with-PASID
5269 * are unconditionally blocked. Which makes less sense.
5270 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5271 * "guest mode" translation types depending on whether ATS
5272 * is available or not. Annoyingly, we can't use the new
5273 * modes *unless* PASIDE is set. */
5274 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5275 ctx_lo &= ~CONTEXT_TT_MASK;
5276 if (info->ats_supported)
5277 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5278 else
5279 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5280 }
5281 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5282 if (iommu->pasid_state_table)
5283 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5284 if (info->pri_supported)
5285 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5286 context[0].lo = ctx_lo;
5287 wmb();
5288 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5289 DMA_CCMD_MASK_NOBIT,
5290 DMA_CCMD_DEVICE_INVL);
5291 }
5292
5293 /* Enable PASID support in the device, if it wasn't already */
5294 if (!info->pasid_enabled)
5295 iommu_enable_dev_iotlb(info);
5296
5297 if (info->ats_enabled) {
5298 sdev->dev_iotlb = 1;
5299 sdev->qdep = info->ats_qdep;
5300 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5301 sdev->qdep = 0;
5302 }
5303 ret = 0;
5304
5305 out:
5306 spin_unlock(&iommu->lock);
5307 spin_unlock_irqrestore(&device_domain_lock, flags);
5308
5309 return ret;
5310}
5311
5312struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5313{
5314 struct intel_iommu *iommu;
5315 u8 bus, devfn;
5316
5317 if (iommu_dummy(dev)) {
5318 dev_warn(dev,
5319 "No IOMMU translation for device; cannot enable SVM\n");
5320 return NULL;
5321 }
5322
5323 iommu = device_to_iommu(dev, &bus, &devfn);
5324 if ((!iommu)) {
b9997e38 5325 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5326 return NULL;
5327 }
5328
5329 if (!iommu->pasid_table) {
b9997e38 5330 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
2f26e0a9
DW
5331 return NULL;
5332 }
5333
5334 return iommu;
5335}
5336#endif /* CONFIG_INTEL_IOMMU_SVM */
5337
b0119e87 5338const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5339 .capable = intel_iommu_capable,
5340 .domain_alloc = intel_iommu_domain_alloc,
5341 .domain_free = intel_iommu_domain_free,
5342 .attach_dev = intel_iommu_attach_device,
5343 .detach_dev = intel_iommu_detach_device,
5344 .map = intel_iommu_map,
5345 .unmap = intel_iommu_unmap,
5346 .map_sg = default_iommu_map_sg,
5347 .iova_to_phys = intel_iommu_iova_to_phys,
5348 .add_device = intel_iommu_add_device,
5349 .remove_device = intel_iommu_remove_device,
5350 .get_resv_regions = intel_iommu_get_resv_regions,
5351 .put_resv_regions = intel_iommu_put_resv_regions,
5352 .device_group = pci_device_group,
5353 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5354};
9af88143 5355
9452618e
DV
5356static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5357{
5358 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5359 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5360 dmar_map_gfx = 0;
5361}
5362
5363DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5364DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5365DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5366DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5367DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5368DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5369DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5370
d34d6517 5371static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5372{
5373 /*
5374 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5375 * but needs it. Same seems to hold for the desktop versions.
9af88143 5376 */
9f10e5bf 5377 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5378 rwbf_quirk = 1;
5379}
5380
5381DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5382DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5383DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5384DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5385DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5386DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5387DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5388
eecfd57f
AJ
5389#define GGC 0x52
5390#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5391#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5392#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5393#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5394#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5395#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5396#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5397#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5398
d34d6517 5399static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5400{
5401 unsigned short ggc;
5402
eecfd57f 5403 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5404 return;
5405
eecfd57f 5406 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5407 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5408 dmar_map_gfx = 0;
6fbcfb3e
DW
5409 } else if (dmar_map_gfx) {
5410 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5411 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5412 intel_iommu_strict = 1;
5413 }
9eecabcb
DW
5414}
5415DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5416DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5417DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5418DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5419
e0fc7e0b
DW
5420/* On Tylersburg chipsets, some BIOSes have been known to enable the
5421 ISOCH DMAR unit for the Azalia sound device, but not give it any
5422 TLB entries, which causes it to deadlock. Check for that. We do
5423 this in a function called from init_dmars(), instead of in a PCI
5424 quirk, because we don't want to print the obnoxious "BIOS broken"
5425 message if VT-d is actually disabled.
5426*/
5427static void __init check_tylersburg_isoch(void)
5428{
5429 struct pci_dev *pdev;
5430 uint32_t vtisochctrl;
5431
5432 /* If there's no Azalia in the system anyway, forget it. */
5433 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5434 if (!pdev)
5435 return;
5436 pci_dev_put(pdev);
5437
5438 /* System Management Registers. Might be hidden, in which case
5439 we can't do the sanity check. But that's OK, because the
5440 known-broken BIOSes _don't_ actually hide it, so far. */
5441 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5442 if (!pdev)
5443 return;
5444
5445 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5446 pci_dev_put(pdev);
5447 return;
5448 }
5449
5450 pci_dev_put(pdev);
5451
5452 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5453 if (vtisochctrl & 1)
5454 return;
5455
5456 /* Drop all bits other than the number of TLB entries */
5457 vtisochctrl &= 0x1c;
5458
5459 /* If we have the recommended number of TLB entries (16), fine. */
5460 if (vtisochctrl == 0x10)
5461 return;
5462
5463 /* Zero TLB entries? You get to ride the short bus to school. */
5464 if (!vtisochctrl) {
5465 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5466 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5467 dmi_get_system_info(DMI_BIOS_VENDOR),
5468 dmi_get_system_info(DMI_BIOS_VERSION),
5469 dmi_get_system_info(DMI_PRODUCT_VERSION));
5470 iommu_identity_mapping |= IDENTMAP_AZALIA;
5471 return;
5472 }
9f10e5bf
JR
5473
5474 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5475 vtisochctrl);
5476}