]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/iommu/intel-iommu.c
UBUNTU: Start new release
[mirror_ubuntu-artful-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
aa473240 36#include <linux/cpu.h>
5e0d2a6f 37#include <linux/timer.h>
dfddb969 38#include <linux/io.h>
38717946 39#include <linux/iova.h>
5d450806 40#include <linux/iommu.h>
38717946 41#include <linux/intel-iommu.h>
134fac3f 42#include <linux/syscore_ops.h>
69575d38 43#include <linux/tboot.h>
adb2fe02 44#include <linux/dmi.h>
5cdede24 45#include <linux/pci-ats.h>
0ee332c1 46#include <linux/memblock.h>
36746436 47#include <linux/dma-contiguous.h>
091d42e4 48#include <linux/crash_dump.h>
8a8f422d 49#include <asm/irq_remapping.h>
ba395927 50#include <asm/cacheflush.h>
46a7fa27 51#include <asm/iommu.h>
ba395927 52
078e1ee2
JR
53#include "irq_remapping.h"
54
5b6985ce
FY
55#define ROOT_SIZE VTD_PAGE_SIZE
56#define CONTEXT_SIZE VTD_PAGE_SIZE
57
ba395927 58#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 59#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 60#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 61#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
62
63#define IOAPIC_RANGE_START (0xfee00000)
64#define IOAPIC_RANGE_END (0xfeefffff)
65#define IOVA_START_ADDR (0x1000)
66
67#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
68
4ed0d3e6 69#define MAX_AGAW_WIDTH 64
5c645b35 70#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 71
2ebe3151
DW
72#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
73#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
74
75/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
76 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
77#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
78 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
79#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 80
1b722500
RM
81/* IO virtual address start page frame number */
82#define IOVA_START_PFN (1)
83
f27be03b 84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 85#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 86#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 87
df08cdc7
AM
88/* page table handling */
89#define LEVEL_STRIDE (9)
90#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
6d1c56a9
OBC
92/*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
df08cdc7
AM
110static inline int agaw_to_level(int agaw)
111{
112 return agaw + 2;
113}
114
115static inline int agaw_to_width(int agaw)
116{
5c645b35 117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
118}
119
120static inline int width_to_agaw(int width)
121{
5c645b35 122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
123}
124
125static inline unsigned int level_to_offset_bits(int level)
126{
127 return (level - 1) * LEVEL_STRIDE;
128}
129
130static inline int pfn_level_offset(unsigned long pfn, int level)
131{
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133}
134
135static inline unsigned long level_mask(int level)
136{
137 return -1UL << level_to_offset_bits(level);
138}
139
140static inline unsigned long level_size(int level)
141{
142 return 1UL << level_to_offset_bits(level);
143}
144
145static inline unsigned long align_to_level(unsigned long pfn, int level)
146{
147 return (pfn + level_size(level) - 1) & level_mask(level);
148}
fd18de50 149
6dd9a7c7
YS
150static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151{
5c645b35 152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
153}
154
dd4e8319
DW
155/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158{
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160}
161
162static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163{
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165}
166static inline unsigned long page_to_dma_pfn(struct page *pg)
167{
168 return mm_to_dma_pfn(page_to_pfn(pg));
169}
170static inline unsigned long virt_to_dma_pfn(void *p)
171{
172 return page_to_dma_pfn(virt_to_page(p));
173}
174
d9630fe9
WH
175/* global iommu list, set NULL for ignored DMAR units */
176static struct intel_iommu **g_iommus;
177
e0fc7e0b 178static void __init check_tylersburg_isoch(void);
9af88143
DW
179static int rwbf_quirk;
180
b779260b
JC
181/*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185static int force_on = 0;
bfd20f1c 186int intel_iommu_tboot_noforce;
b779260b 187
46b08e1a
MM
188/*
189 * 0: Present
190 * 1-11: Reserved
191 * 12-63: Context Ptr (12 - (haw-1))
192 * 64-127: Reserved
193 */
194struct root_entry {
03ecc32c
DW
195 u64 lo;
196 u64 hi;
46b08e1a
MM
197};
198#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 199
091d42e4
JR
200/*
201 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
202 * if marked present.
203 */
204static phys_addr_t root_entry_lctp(struct root_entry *re)
205{
206 if (!(re->lo & 1))
207 return 0;
208
209 return re->lo & VTD_PAGE_MASK;
210}
211
212/*
213 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
214 * if marked present.
215 */
216static phys_addr_t root_entry_uctp(struct root_entry *re)
217{
218 if (!(re->hi & 1))
219 return 0;
46b08e1a 220
091d42e4
JR
221 return re->hi & VTD_PAGE_MASK;
222}
7a8fc25e
MM
223/*
224 * low 64 bits:
225 * 0: present
226 * 1: fault processing disable
227 * 2-3: translation type
228 * 12-63: address space root
229 * high 64 bits:
230 * 0-2: address width
231 * 3-6: aval
232 * 8-23: domain id
233 */
234struct context_entry {
235 u64 lo;
236 u64 hi;
237};
c07e7d21 238
cf484d0e
JR
239static inline void context_clear_pasid_enable(struct context_entry *context)
240{
241 context->lo &= ~(1ULL << 11);
242}
243
244static inline bool context_pasid_enabled(struct context_entry *context)
245{
246 return !!(context->lo & (1ULL << 11));
247}
248
249static inline void context_set_copied(struct context_entry *context)
250{
251 context->hi |= (1ull << 3);
252}
253
254static inline bool context_copied(struct context_entry *context)
255{
256 return !!(context->hi & (1ULL << 3));
257}
258
259static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
260{
261 return (context->lo & 1);
262}
cf484d0e
JR
263
264static inline bool context_present(struct context_entry *context)
265{
266 return context_pasid_enabled(context) ?
267 __context_present(context) :
268 __context_present(context) && !context_copied(context);
269}
270
c07e7d21
MM
271static inline void context_set_present(struct context_entry *context)
272{
273 context->lo |= 1;
274}
275
276static inline void context_set_fault_enable(struct context_entry *context)
277{
278 context->lo &= (((u64)-1) << 2) | 1;
279}
280
c07e7d21
MM
281static inline void context_set_translation_type(struct context_entry *context,
282 unsigned long value)
283{
284 context->lo &= (((u64)-1) << 4) | 3;
285 context->lo |= (value & 3) << 2;
286}
287
288static inline void context_set_address_root(struct context_entry *context,
289 unsigned long value)
290{
1a2262f9 291 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
292 context->lo |= value & VTD_PAGE_MASK;
293}
294
295static inline void context_set_address_width(struct context_entry *context,
296 unsigned long value)
297{
298 context->hi |= value & 7;
299}
300
301static inline void context_set_domain_id(struct context_entry *context,
302 unsigned long value)
303{
304 context->hi |= (value & ((1 << 16) - 1)) << 8;
305}
306
dbcd861f
JR
307static inline int context_domain_id(struct context_entry *c)
308{
309 return((c->hi >> 8) & 0xffff);
310}
311
c07e7d21
MM
312static inline void context_clear_entry(struct context_entry *context)
313{
314 context->lo = 0;
315 context->hi = 0;
316}
7a8fc25e 317
622ba12a
MM
318/*
319 * 0: readable
320 * 1: writable
321 * 2-6: reserved
322 * 7: super page
9cf06697
SY
323 * 8-10: available
324 * 11: snoop behavior
622ba12a
MM
325 * 12-63: Host physcial address
326 */
327struct dma_pte {
328 u64 val;
329};
622ba12a 330
19c239ce
MM
331static inline void dma_clear_pte(struct dma_pte *pte)
332{
333 pte->val = 0;
334}
335
19c239ce
MM
336static inline u64 dma_pte_addr(struct dma_pte *pte)
337{
c85994e4
DW
338#ifdef CONFIG_64BIT
339 return pte->val & VTD_PAGE_MASK;
340#else
341 /* Must have a full atomic 64-bit read */
1a8bd481 342 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 343#endif
19c239ce
MM
344}
345
19c239ce
MM
346static inline bool dma_pte_present(struct dma_pte *pte)
347{
348 return (pte->val & 3) != 0;
349}
622ba12a 350
4399c8bf
AK
351static inline bool dma_pte_superpage(struct dma_pte *pte)
352{
c3c75eb7 353 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
354}
355
75e6bf96
DW
356static inline int first_pte_in_page(struct dma_pte *pte)
357{
358 return !((unsigned long)pte & ~VTD_PAGE_MASK);
359}
360
2c2e2c38
FY
361/*
362 * This domain is a statically identity mapping domain.
363 * 1. This domain creats a static 1:1 mapping to all usable memory.
364 * 2. It maps to each iommu if successful.
365 * 3. Each iommu mapps to this domain if successful.
366 */
19943b0e
DW
367static struct dmar_domain *si_domain;
368static int hw_pass_through = 1;
2c2e2c38 369
28ccce0d
JR
370/*
371 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
372 * across iommus may be owned in one domain, e.g. kvm guest.
373 */
ab8dfe25 374#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 375
2c2e2c38 376/* si_domain contains mulitple devices */
ab8dfe25 377#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 378
29a27719
JR
379#define for_each_domain_iommu(idx, domain) \
380 for (idx = 0; idx < g_num_of_iommus; idx++) \
381 if (domain->iommu_refcnt[idx])
382
99126f7c 383struct dmar_domain {
4c923d47 384 int nid; /* node id */
29a27719
JR
385
386 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
387 /* Refcount of devices per iommu */
388
99126f7c 389
c0e8a6c8
JR
390 u16 iommu_did[DMAR_UNITS_SUPPORTED];
391 /* Domain ids per IOMMU. Use u16 since
392 * domain ids are 16 bit wide according
393 * to VT-d spec, section 9.3 */
99126f7c 394
0824c592 395 bool has_iotlb_device;
00a77deb 396 struct list_head devices; /* all devices' list */
99126f7c
MM
397 struct iova_domain iovad; /* iova's that belong to this domain */
398
399 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
400 int gaw; /* max guest address width */
401
402 /* adjusted guest address width, 0 is level 2 30-bit */
403 int agaw;
404
3b5410e7 405 int flags; /* flags to find out type of domain */
8e604097
WH
406
407 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 408 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 409 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
410 int iommu_superpage;/* Level of superpages supported:
411 0 == 4KiB (no superpages), 1 == 2MiB,
412 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 413 u64 max_addr; /* maximum mapped address */
00a77deb
JR
414
415 struct iommu_domain domain; /* generic domain data structure for
416 iommu core */
99126f7c
MM
417};
418
a647dacb
MM
419/* PCI domain-device relationship */
420struct device_domain_info {
421 struct list_head link; /* link to domain siblings */
422 struct list_head global; /* link to global list */
276dbf99 423 u8 bus; /* PCI bus number */
a647dacb 424 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
425 u8 pasid_supported:3;
426 u8 pasid_enabled:1;
427 u8 pri_supported:1;
428 u8 pri_enabled:1;
429 u8 ats_supported:1;
430 u8 ats_enabled:1;
431 u8 ats_qdep;
0bcb3e28 432 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 433 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
434 struct dmar_domain *domain; /* pointer to domain */
435};
436
b94e4117
JL
437struct dmar_rmrr_unit {
438 struct list_head list; /* list of rmrr units */
439 struct acpi_dmar_header *hdr; /* ACPI header */
440 u64 base_address; /* reserved base address*/
441 u64 end_address; /* reserved end address */
832bd858 442 struct dmar_dev_scope *devices; /* target devices */
b94e4117 443 int devices_cnt; /* target device count */
0659b8dc 444 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
445};
446
447struct dmar_atsr_unit {
448 struct list_head list; /* list of ATSR units */
449 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 450 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
451 int devices_cnt; /* target device count */
452 u8 include_all:1; /* include all ports */
453};
454
455static LIST_HEAD(dmar_atsr_units);
456static LIST_HEAD(dmar_rmrr_units);
457
458#define for_each_rmrr_units(rmrr) \
459 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
460
5e0d2a6f 461static void flush_unmaps_timeout(unsigned long data);
462
314f1dc1 463struct deferred_flush_entry {
2aac6304 464 unsigned long iova_pfn;
769530e4 465 unsigned long nrpages;
314f1dc1
OP
466 struct dmar_domain *domain;
467 struct page *freelist;
468};
5e0d2a6f 469
80b20dd8 470#define HIGH_WATER_MARK 250
314f1dc1 471struct deferred_flush_table {
80b20dd8 472 int next;
314f1dc1 473 struct deferred_flush_entry entries[HIGH_WATER_MARK];
80b20dd8 474};
475
aa473240
OP
476struct deferred_flush_data {
477 spinlock_t lock;
478 int timer_on;
479 struct timer_list timer;
480 long size;
481 struct deferred_flush_table *tables;
80b20dd8 482};
483
58c4a95f 484static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
80b20dd8 485
5e0d2a6f 486/* bitmap for indexing intel_iommus */
5e0d2a6f 487static int g_num_of_iommus;
488
92d03cc8 489static void domain_exit(struct dmar_domain *domain);
ba395927 490static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
491static void dmar_remove_one_dev_info(struct dmar_domain *domain,
492 struct device *dev);
127c7615 493static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
494static void domain_context_clear(struct intel_iommu *iommu,
495 struct device *dev);
2a46ddf7
JL
496static int domain_detach_iommu(struct dmar_domain *domain,
497 struct intel_iommu *iommu);
ba395927 498
d3f13810 499#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
500int dmar_disabled = 0;
501#else
502int dmar_disabled = 1;
d3f13810 503#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 504
8bc1f85c
ED
505int intel_iommu_enabled = 0;
506EXPORT_SYMBOL_GPL(intel_iommu_enabled);
507
2d9e667e 508static int dmar_map_gfx = 1;
7d3b03ce 509static int dmar_forcedac;
5e0d2a6f 510static int intel_iommu_strict;
6dd9a7c7 511static int intel_iommu_superpage = 1;
c83b2f20 512static int intel_iommu_ecs = 1;
ae853ddb
DW
513static int intel_iommu_pasid28;
514static int iommu_identity_mapping;
c83b2f20 515
ae853ddb
DW
516#define IDENTMAP_ALL 1
517#define IDENTMAP_GFX 2
518#define IDENTMAP_AZALIA 4
c83b2f20 519
d42fde70
DW
520/* Broadwell and Skylake have broken ECS support — normal so-called "second
521 * level" translation of DMA requests-without-PASID doesn't actually happen
522 * unless you also set the NESTE bit in an extended context-entry. Which of
523 * course means that SVM doesn't work because it's trying to do nested
524 * translation of the physical addresses it finds in the process page tables,
525 * through the IOVA->phys mapping found in the "second level" page tables.
526 *
527 * The VT-d specification was retroactively changed to change the definition
528 * of the capability bits and pretend that Broadwell/Skylake never happened...
529 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
530 * for some reason it was the PASID capability bit which was redefined (from
531 * bit 28 on BDW/SKL to bit 40 in future).
532 *
533 * So our test for ECS needs to eschew those implementations which set the old
534 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
535 * Unless we are working around the 'pasid28' limitations, that is, by putting
536 * the device into passthrough mode for normal DMA and thus masking the bug.
537 */
c83b2f20 538#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
d42fde70
DW
539 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
540/* PASID support is thus enabled if ECS is enabled and *either* of the old
541 * or new capability bits are set. */
542#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
543 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 544
c0771df8
DW
545int intel_iommu_gfx_mapped;
546EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
547
ba395927
KA
548#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
549static DEFINE_SPINLOCK(device_domain_lock);
550static LIST_HEAD(device_domain_list);
551
b0119e87 552const struct iommu_ops intel_iommu_ops;
a8bcbb0d 553
4158c2ec
JR
554static bool translation_pre_enabled(struct intel_iommu *iommu)
555{
556 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
557}
558
091d42e4
JR
559static void clear_translation_pre_enabled(struct intel_iommu *iommu)
560{
561 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
562}
563
4158c2ec
JR
564static void init_translation_status(struct intel_iommu *iommu)
565{
566 u32 gsts;
567
568 gsts = readl(iommu->reg + DMAR_GSTS_REG);
569 if (gsts & DMA_GSTS_TES)
570 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
571}
572
00a77deb
JR
573/* Convert generic 'struct iommu_domain to private struct dmar_domain */
574static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
575{
576 return container_of(dom, struct dmar_domain, domain);
577}
578
ba395927
KA
579static int __init intel_iommu_setup(char *str)
580{
581 if (!str)
582 return -EINVAL;
583 while (*str) {
0cd5c3c8
KM
584 if (!strncmp(str, "on", 2)) {
585 dmar_disabled = 0;
9f10e5bf 586 pr_info("IOMMU enabled\n");
0cd5c3c8 587 } else if (!strncmp(str, "off", 3)) {
ba395927 588 dmar_disabled = 1;
9f10e5bf 589 pr_info("IOMMU disabled\n");
ba395927
KA
590 } else if (!strncmp(str, "igfx_off", 8)) {
591 dmar_map_gfx = 0;
9f10e5bf 592 pr_info("Disable GFX device mapping\n");
7d3b03ce 593 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 594 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 595 dmar_forcedac = 1;
5e0d2a6f 596 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 597 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 598 intel_iommu_strict = 1;
6dd9a7c7 599 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 600 pr_info("Disable supported super page\n");
6dd9a7c7 601 intel_iommu_superpage = 0;
c83b2f20
DW
602 } else if (!strncmp(str, "ecs_off", 7)) {
603 printk(KERN_INFO
604 "Intel-IOMMU: disable extended context table support\n");
605 intel_iommu_ecs = 0;
ae853ddb
DW
606 } else if (!strncmp(str, "pasid28", 7)) {
607 printk(KERN_INFO
608 "Intel-IOMMU: enable pre-production PASID support\n");
609 intel_iommu_pasid28 = 1;
610 iommu_identity_mapping |= IDENTMAP_GFX;
bfd20f1c
SL
611 } else if (!strncmp(str, "tboot_noforce", 13)) {
612 printk(KERN_INFO
613 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
614 intel_iommu_tboot_noforce = 1;
ba395927
KA
615 }
616
617 str += strcspn(str, ",");
618 while (*str == ',')
619 str++;
620 }
621 return 0;
622}
623__setup("intel_iommu=", intel_iommu_setup);
624
625static struct kmem_cache *iommu_domain_cache;
626static struct kmem_cache *iommu_devinfo_cache;
ba395927 627
9452d5bf
JR
628static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
629{
8bf47816
JR
630 struct dmar_domain **domains;
631 int idx = did >> 8;
632
633 domains = iommu->domains[idx];
634 if (!domains)
635 return NULL;
636
637 return domains[did & 0xff];
9452d5bf
JR
638}
639
640static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
641 struct dmar_domain *domain)
642{
8bf47816
JR
643 struct dmar_domain **domains;
644 int idx = did >> 8;
645
646 if (!iommu->domains[idx]) {
647 size_t size = 256 * sizeof(struct dmar_domain *);
648 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
649 }
650
651 domains = iommu->domains[idx];
652 if (WARN_ON(!domains))
653 return;
654 else
655 domains[did & 0xff] = domain;
9452d5bf
JR
656}
657
4c923d47 658static inline void *alloc_pgtable_page(int node)
eb3fa7cb 659{
4c923d47
SS
660 struct page *page;
661 void *vaddr = NULL;
eb3fa7cb 662
4c923d47
SS
663 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
664 if (page)
665 vaddr = page_address(page);
eb3fa7cb 666 return vaddr;
ba395927
KA
667}
668
669static inline void free_pgtable_page(void *vaddr)
670{
671 free_page((unsigned long)vaddr);
672}
673
674static inline void *alloc_domain_mem(void)
675{
354bb65e 676 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
677}
678
38717946 679static void free_domain_mem(void *vaddr)
ba395927
KA
680{
681 kmem_cache_free(iommu_domain_cache, vaddr);
682}
683
684static inline void * alloc_devinfo_mem(void)
685{
354bb65e 686 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
687}
688
689static inline void free_devinfo_mem(void *vaddr)
690{
691 kmem_cache_free(iommu_devinfo_cache, vaddr);
692}
693
ab8dfe25
JL
694static inline int domain_type_is_vm(struct dmar_domain *domain)
695{
696 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
697}
698
28ccce0d
JR
699static inline int domain_type_is_si(struct dmar_domain *domain)
700{
701 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
702}
703
ab8dfe25
JL
704static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
705{
706 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
707 DOMAIN_FLAG_STATIC_IDENTITY);
708}
1b573683 709
162d1b10
JL
710static inline int domain_pfn_supported(struct dmar_domain *domain,
711 unsigned long pfn)
712{
713 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
714
715 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
716}
717
4ed0d3e6 718static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
719{
720 unsigned long sagaw;
721 int agaw = -1;
722
723 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 724 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
725 agaw >= 0; agaw--) {
726 if (test_bit(agaw, &sagaw))
727 break;
728 }
729
730 return agaw;
731}
732
4ed0d3e6
FY
733/*
734 * Calculate max SAGAW for each iommu.
735 */
736int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
737{
738 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
739}
740
741/*
742 * calculate agaw for each iommu.
743 * "SAGAW" may be different across iommus, use a default agaw, and
744 * get a supported less agaw for iommus that don't support the default agaw.
745 */
746int iommu_calculate_agaw(struct intel_iommu *iommu)
747{
748 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
749}
750
2c2e2c38 751/* This functionin only returns single iommu in a domain */
8c11e798
WH
752static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
753{
754 int iommu_id;
755
2c2e2c38 756 /* si_domain and vm domain should not get here. */
ab8dfe25 757 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
758 for_each_domain_iommu(iommu_id, domain)
759 break;
760
8c11e798
WH
761 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
762 return NULL;
763
764 return g_iommus[iommu_id];
765}
766
8e604097
WH
767static void domain_update_iommu_coherency(struct dmar_domain *domain)
768{
d0501960
DW
769 struct dmar_drhd_unit *drhd;
770 struct intel_iommu *iommu;
2f119c78
QL
771 bool found = false;
772 int i;
2e12bc29 773
d0501960 774 domain->iommu_coherency = 1;
8e604097 775
29a27719 776 for_each_domain_iommu(i, domain) {
2f119c78 777 found = true;
8e604097
WH
778 if (!ecap_coherent(g_iommus[i]->ecap)) {
779 domain->iommu_coherency = 0;
780 break;
781 }
8e604097 782 }
d0501960
DW
783 if (found)
784 return;
785
786 /* No hardware attached; use lowest common denominator */
787 rcu_read_lock();
788 for_each_active_iommu(iommu, drhd) {
789 if (!ecap_coherent(iommu->ecap)) {
790 domain->iommu_coherency = 0;
791 break;
792 }
793 }
794 rcu_read_unlock();
8e604097
WH
795}
796
161f6934 797static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 798{
161f6934
JL
799 struct dmar_drhd_unit *drhd;
800 struct intel_iommu *iommu;
801 int ret = 1;
58c610bd 802
161f6934
JL
803 rcu_read_lock();
804 for_each_active_iommu(iommu, drhd) {
805 if (iommu != skip) {
806 if (!ecap_sc_support(iommu->ecap)) {
807 ret = 0;
808 break;
809 }
58c610bd 810 }
58c610bd 811 }
161f6934
JL
812 rcu_read_unlock();
813
814 return ret;
58c610bd
SY
815}
816
161f6934 817static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 818{
8140a95d 819 struct dmar_drhd_unit *drhd;
161f6934 820 struct intel_iommu *iommu;
8140a95d 821 int mask = 0xf;
6dd9a7c7
YS
822
823 if (!intel_iommu_superpage) {
161f6934 824 return 0;
6dd9a7c7
YS
825 }
826
8140a95d 827 /* set iommu_superpage to the smallest common denominator */
0e242612 828 rcu_read_lock();
8140a95d 829 for_each_active_iommu(iommu, drhd) {
161f6934
JL
830 if (iommu != skip) {
831 mask &= cap_super_page_val(iommu->cap);
832 if (!mask)
833 break;
6dd9a7c7
YS
834 }
835 }
0e242612
JL
836 rcu_read_unlock();
837
161f6934 838 return fls(mask);
6dd9a7c7
YS
839}
840
58c610bd
SY
841/* Some capabilities may be different across iommus */
842static void domain_update_iommu_cap(struct dmar_domain *domain)
843{
844 domain_update_iommu_coherency(domain);
161f6934
JL
845 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
846 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
847}
848
03ecc32c
DW
849static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
850 u8 bus, u8 devfn, int alloc)
851{
852 struct root_entry *root = &iommu->root_entry[bus];
853 struct context_entry *context;
854 u64 *entry;
855
4df4eab1 856 entry = &root->lo;
c83b2f20 857 if (ecs_enabled(iommu)) {
03ecc32c
DW
858 if (devfn >= 0x80) {
859 devfn -= 0x80;
860 entry = &root->hi;
861 }
862 devfn *= 2;
863 }
03ecc32c
DW
864 if (*entry & 1)
865 context = phys_to_virt(*entry & VTD_PAGE_MASK);
866 else {
867 unsigned long phy_addr;
868 if (!alloc)
869 return NULL;
870
871 context = alloc_pgtable_page(iommu->node);
872 if (!context)
873 return NULL;
874
875 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
876 phy_addr = virt_to_phys((void *)context);
877 *entry = phy_addr | 1;
878 __iommu_flush_cache(iommu, entry, sizeof(*entry));
879 }
880 return &context[devfn];
881}
882
4ed6a540
DW
883static int iommu_dummy(struct device *dev)
884{
885 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
886}
887
156baca8 888static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
889{
890 struct dmar_drhd_unit *drhd = NULL;
b683b230 891 struct intel_iommu *iommu;
156baca8
DW
892 struct device *tmp;
893 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 894 u16 segment = 0;
c7151a8d
WH
895 int i;
896
4ed6a540
DW
897 if (iommu_dummy(dev))
898 return NULL;
899
156baca8 900 if (dev_is_pci(dev)) {
1c387188
AR
901 struct pci_dev *pf_pdev;
902
156baca8 903 pdev = to_pci_dev(dev);
1c387188
AR
904 /* VFs aren't listed in scope tables; we need to look up
905 * the PF instead to find the IOMMU. */
906 pf_pdev = pci_physfn(pdev);
907 dev = &pf_pdev->dev;
156baca8 908 segment = pci_domain_nr(pdev->bus);
ca5b74d2 909 } else if (has_acpi_companion(dev))
156baca8
DW
910 dev = &ACPI_COMPANION(dev)->dev;
911
0e242612 912 rcu_read_lock();
b683b230 913 for_each_active_iommu(iommu, drhd) {
156baca8 914 if (pdev && segment != drhd->segment)
276dbf99 915 continue;
c7151a8d 916
b683b230 917 for_each_active_dev_scope(drhd->devices,
156baca8
DW
918 drhd->devices_cnt, i, tmp) {
919 if (tmp == dev) {
1c387188
AR
920 /* For a VF use its original BDF# not that of the PF
921 * which we used for the IOMMU lookup. Strictly speaking
922 * we could do this for all PCI devices; we only need to
923 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 924 if (pdev && pdev->is_virtfn)
1c387188
AR
925 goto got_pdev;
926
156baca8
DW
927 *bus = drhd->devices[i].bus;
928 *devfn = drhd->devices[i].devfn;
b683b230 929 goto out;
156baca8
DW
930 }
931
932 if (!pdev || !dev_is_pci(tmp))
933 continue;
934
935 ptmp = to_pci_dev(tmp);
936 if (ptmp->subordinate &&
937 ptmp->subordinate->number <= pdev->bus->number &&
938 ptmp->subordinate->busn_res.end >= pdev->bus->number)
939 goto got_pdev;
924b6231 940 }
c7151a8d 941
156baca8
DW
942 if (pdev && drhd->include_all) {
943 got_pdev:
944 *bus = pdev->bus->number;
945 *devfn = pdev->devfn;
b683b230 946 goto out;
156baca8 947 }
c7151a8d 948 }
b683b230 949 iommu = NULL;
156baca8 950 out:
0e242612 951 rcu_read_unlock();
c7151a8d 952
b683b230 953 return iommu;
c7151a8d
WH
954}
955
5331fe6f
WH
956static void domain_flush_cache(struct dmar_domain *domain,
957 void *addr, int size)
958{
959 if (!domain->iommu_coherency)
960 clflush_cache_range(addr, size);
961}
962
ba395927
KA
963static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
964{
ba395927 965 struct context_entry *context;
03ecc32c 966 int ret = 0;
ba395927
KA
967 unsigned long flags;
968
969 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
970 context = iommu_context_addr(iommu, bus, devfn, 0);
971 if (context)
972 ret = context_present(context);
ba395927
KA
973 spin_unlock_irqrestore(&iommu->lock, flags);
974 return ret;
975}
976
977static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
978{
ba395927
KA
979 struct context_entry *context;
980 unsigned long flags;
981
982 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 983 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 984 if (context) {
03ecc32c
DW
985 context_clear_entry(context);
986 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
987 }
988 spin_unlock_irqrestore(&iommu->lock, flags);
989}
990
991static void free_context_table(struct intel_iommu *iommu)
992{
ba395927
KA
993 int i;
994 unsigned long flags;
995 struct context_entry *context;
996
997 spin_lock_irqsave(&iommu->lock, flags);
998 if (!iommu->root_entry) {
999 goto out;
1000 }
1001 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 1002 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
1003 if (context)
1004 free_pgtable_page(context);
03ecc32c 1005
c83b2f20 1006 if (!ecs_enabled(iommu))
03ecc32c
DW
1007 continue;
1008
1009 context = iommu_context_addr(iommu, i, 0x80, 0);
1010 if (context)
1011 free_pgtable_page(context);
1012
ba395927
KA
1013 }
1014 free_pgtable_page(iommu->root_entry);
1015 iommu->root_entry = NULL;
1016out:
1017 spin_unlock_irqrestore(&iommu->lock, flags);
1018}
1019
b026fd28 1020static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 1021 unsigned long pfn, int *target_level)
ba395927 1022{
ba395927
KA
1023 struct dma_pte *parent, *pte = NULL;
1024 int level = agaw_to_level(domain->agaw);
4399c8bf 1025 int offset;
ba395927
KA
1026
1027 BUG_ON(!domain->pgd);
f9423606 1028
162d1b10 1029 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
1030 /* Address beyond IOMMU's addressing capabilities. */
1031 return NULL;
1032
ba395927
KA
1033 parent = domain->pgd;
1034
5cf0a76f 1035 while (1) {
ba395927
KA
1036 void *tmp_page;
1037
b026fd28 1038 offset = pfn_level_offset(pfn, level);
ba395927 1039 pte = &parent[offset];
5cf0a76f 1040 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 1041 break;
5cf0a76f 1042 if (level == *target_level)
ba395927
KA
1043 break;
1044
19c239ce 1045 if (!dma_pte_present(pte)) {
c85994e4
DW
1046 uint64_t pteval;
1047
4c923d47 1048 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1049
206a73c1 1050 if (!tmp_page)
ba395927 1051 return NULL;
206a73c1 1052
c85994e4 1053 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1054 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1055 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1056 /* Someone else set it while we were thinking; use theirs. */
1057 free_pgtable_page(tmp_page);
effad4b5 1058 else
c85994e4 1059 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1060 }
5cf0a76f
DW
1061 if (level == 1)
1062 break;
1063
19c239ce 1064 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1065 level--;
1066 }
1067
5cf0a76f
DW
1068 if (!*target_level)
1069 *target_level = level;
1070
ba395927
KA
1071 return pte;
1072}
1073
6dd9a7c7 1074
ba395927 1075/* return address's pte at specific level */
90dcfb5e
DW
1076static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1077 unsigned long pfn,
6dd9a7c7 1078 int level, int *large_page)
ba395927
KA
1079{
1080 struct dma_pte *parent, *pte = NULL;
1081 int total = agaw_to_level(domain->agaw);
1082 int offset;
1083
1084 parent = domain->pgd;
1085 while (level <= total) {
90dcfb5e 1086 offset = pfn_level_offset(pfn, total);
ba395927
KA
1087 pte = &parent[offset];
1088 if (level == total)
1089 return pte;
1090
6dd9a7c7
YS
1091 if (!dma_pte_present(pte)) {
1092 *large_page = total;
ba395927 1093 break;
6dd9a7c7
YS
1094 }
1095
e16922af 1096 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1097 *large_page = total;
1098 return pte;
1099 }
1100
19c239ce 1101 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1102 total--;
1103 }
1104 return NULL;
1105}
1106
ba395927 1107/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1108static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1109 unsigned long start_pfn,
1110 unsigned long last_pfn)
ba395927 1111{
6dd9a7c7 1112 unsigned int large_page = 1;
310a5ab9 1113 struct dma_pte *first_pte, *pte;
66eae846 1114
162d1b10
JL
1115 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1116 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1117 BUG_ON(start_pfn > last_pfn);
ba395927 1118
04b18e65 1119 /* we don't need lock here; nobody else touches the iova range */
59c36286 1120 do {
6dd9a7c7
YS
1121 large_page = 1;
1122 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1123 if (!pte) {
6dd9a7c7 1124 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1125 continue;
1126 }
6dd9a7c7 1127 do {
310a5ab9 1128 dma_clear_pte(pte);
6dd9a7c7 1129 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1130 pte++;
75e6bf96
DW
1131 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1132
310a5ab9
DW
1133 domain_flush_cache(domain, first_pte,
1134 (void *)pte - (void *)first_pte);
59c36286
DW
1135
1136 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1137}
1138
3269ee0b
AW
1139static void dma_pte_free_level(struct dmar_domain *domain, int level,
1140 struct dma_pte *pte, unsigned long pfn,
1141 unsigned long start_pfn, unsigned long last_pfn)
1142{
1143 pfn = max(start_pfn, pfn);
1144 pte = &pte[pfn_level_offset(pfn, level)];
1145
1146 do {
1147 unsigned long level_pfn;
1148 struct dma_pte *level_pte;
1149
1150 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1151 goto next;
1152
f7116e11 1153 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1154 level_pte = phys_to_virt(dma_pte_addr(pte));
1155
1156 if (level > 2)
1157 dma_pte_free_level(domain, level - 1, level_pte,
1158 level_pfn, start_pfn, last_pfn);
1159
1160 /* If range covers entire pagetable, free it */
1161 if (!(start_pfn > level_pfn ||
08336fd2 1162 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1163 dma_clear_pte(pte);
1164 domain_flush_cache(domain, pte, sizeof(*pte));
1165 free_pgtable_page(level_pte);
1166 }
1167next:
1168 pfn += level_size(level);
1169 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1170}
1171
3d1a2442 1172/* clear last level (leaf) ptes and free page table pages. */
ba395927 1173static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1174 unsigned long start_pfn,
1175 unsigned long last_pfn)
ba395927 1176{
162d1b10
JL
1177 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1178 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1179 BUG_ON(start_pfn > last_pfn);
ba395927 1180
d41a4adb
JL
1181 dma_pte_clear_range(domain, start_pfn, last_pfn);
1182
f3a0a52f 1183 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1184 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1185 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1186
ba395927 1187 /* free pgd */
d794dc9b 1188 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1189 free_pgtable_page(domain->pgd);
1190 domain->pgd = NULL;
1191 }
1192}
1193
ea8ea460
DW
1194/* When a page at a given level is being unlinked from its parent, we don't
1195 need to *modify* it at all. All we need to do is make a list of all the
1196 pages which can be freed just as soon as we've flushed the IOTLB and we
1197 know the hardware page-walk will no longer touch them.
1198 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1199 be freed. */
1200static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1201 int level, struct dma_pte *pte,
1202 struct page *freelist)
1203{
1204 struct page *pg;
1205
1206 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1207 pg->freelist = freelist;
1208 freelist = pg;
1209
1210 if (level == 1)
1211 return freelist;
1212
adeb2590
JL
1213 pte = page_address(pg);
1214 do {
ea8ea460
DW
1215 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1216 freelist = dma_pte_list_pagetables(domain, level - 1,
1217 pte, freelist);
adeb2590
JL
1218 pte++;
1219 } while (!first_pte_in_page(pte));
ea8ea460
DW
1220
1221 return freelist;
1222}
1223
1224static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1225 struct dma_pte *pte, unsigned long pfn,
1226 unsigned long start_pfn,
1227 unsigned long last_pfn,
1228 struct page *freelist)
1229{
1230 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1231
1232 pfn = max(start_pfn, pfn);
1233 pte = &pte[pfn_level_offset(pfn, level)];
1234
1235 do {
1236 unsigned long level_pfn;
1237
1238 if (!dma_pte_present(pte))
1239 goto next;
1240
1241 level_pfn = pfn & level_mask(level);
1242
1243 /* If range covers entire pagetable, free it */
1244 if (start_pfn <= level_pfn &&
1245 last_pfn >= level_pfn + level_size(level) - 1) {
1246 /* These suborbinate page tables are going away entirely. Don't
1247 bother to clear them; we're just going to *free* them. */
1248 if (level > 1 && !dma_pte_superpage(pte))
1249 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1250
1251 dma_clear_pte(pte);
1252 if (!first_pte)
1253 first_pte = pte;
1254 last_pte = pte;
1255 } else if (level > 1) {
1256 /* Recurse down into a level that isn't *entirely* obsolete */
1257 freelist = dma_pte_clear_level(domain, level - 1,
1258 phys_to_virt(dma_pte_addr(pte)),
1259 level_pfn, start_pfn, last_pfn,
1260 freelist);
1261 }
1262next:
1263 pfn += level_size(level);
1264 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1265
1266 if (first_pte)
1267 domain_flush_cache(domain, first_pte,
1268 (void *)++last_pte - (void *)first_pte);
1269
1270 return freelist;
1271}
1272
1273/* We can't just free the pages because the IOMMU may still be walking
1274 the page tables, and may have cached the intermediate levels. The
1275 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1276static struct page *domain_unmap(struct dmar_domain *domain,
1277 unsigned long start_pfn,
1278 unsigned long last_pfn)
ea8ea460 1279{
ea8ea460
DW
1280 struct page *freelist = NULL;
1281
162d1b10
JL
1282 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1283 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1284 BUG_ON(start_pfn > last_pfn);
1285
1286 /* we don't need lock here; nobody else touches the iova range */
1287 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1288 domain->pgd, 0, start_pfn, last_pfn, NULL);
1289
1290 /* free pgd */
1291 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1292 struct page *pgd_page = virt_to_page(domain->pgd);
1293 pgd_page->freelist = freelist;
1294 freelist = pgd_page;
1295
1296 domain->pgd = NULL;
1297 }
1298
1299 return freelist;
1300}
1301
b690420a 1302static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1303{
1304 struct page *pg;
1305
1306 while ((pg = freelist)) {
1307 freelist = pg->freelist;
1308 free_pgtable_page(page_address(pg));
1309 }
1310}
1311
ba395927
KA
1312/* iommu handling */
1313static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1314{
1315 struct root_entry *root;
1316 unsigned long flags;
1317
4c923d47 1318 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1319 if (!root) {
9f10e5bf 1320 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1321 iommu->name);
ba395927 1322 return -ENOMEM;
ffebeb46 1323 }
ba395927 1324
5b6985ce 1325 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1326
1327 spin_lock_irqsave(&iommu->lock, flags);
1328 iommu->root_entry = root;
1329 spin_unlock_irqrestore(&iommu->lock, flags);
1330
1331 return 0;
1332}
1333
ba395927
KA
1334static void iommu_set_root_entry(struct intel_iommu *iommu)
1335{
03ecc32c 1336 u64 addr;
c416daa9 1337 u32 sts;
ba395927
KA
1338 unsigned long flag;
1339
03ecc32c 1340 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1341 if (ecs_enabled(iommu))
03ecc32c 1342 addr |= DMA_RTADDR_RTT;
ba395927 1343
1f5b3c3f 1344 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1345 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1346
c416daa9 1347 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1348
1349 /* Make sure hardware complete it */
1350 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1351 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1352
1f5b3c3f 1353 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1354}
1355
1356static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1357{
1358 u32 val;
1359 unsigned long flag;
1360
9af88143 1361 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1362 return;
ba395927 1363
1f5b3c3f 1364 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1365 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1366
1367 /* Make sure hardware complete it */
1368 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1369 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1370
1f5b3c3f 1371 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1372}
1373
1374/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1375static void __iommu_flush_context(struct intel_iommu *iommu,
1376 u16 did, u16 source_id, u8 function_mask,
1377 u64 type)
ba395927
KA
1378{
1379 u64 val = 0;
1380 unsigned long flag;
1381
ba395927
KA
1382 switch (type) {
1383 case DMA_CCMD_GLOBAL_INVL:
1384 val = DMA_CCMD_GLOBAL_INVL;
1385 break;
1386 case DMA_CCMD_DOMAIN_INVL:
1387 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1388 break;
1389 case DMA_CCMD_DEVICE_INVL:
1390 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1391 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1392 break;
1393 default:
1394 BUG();
1395 }
1396 val |= DMA_CCMD_ICC;
1397
1f5b3c3f 1398 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1399 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1400
1401 /* Make sure hardware complete it */
1402 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1403 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1404
1f5b3c3f 1405 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1406}
1407
ba395927 1408/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1409static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1410 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1411{
1412 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1413 u64 val = 0, val_iva = 0;
1414 unsigned long flag;
1415
ba395927
KA
1416 switch (type) {
1417 case DMA_TLB_GLOBAL_FLUSH:
1418 /* global flush doesn't need set IVA_REG */
1419 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1420 break;
1421 case DMA_TLB_DSI_FLUSH:
1422 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1423 break;
1424 case DMA_TLB_PSI_FLUSH:
1425 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1426 /* IH bit is passed in as part of address */
ba395927
KA
1427 val_iva = size_order | addr;
1428 break;
1429 default:
1430 BUG();
1431 }
1432 /* Note: set drain read/write */
1433#if 0
1434 /*
1435 * This is probably to be super secure.. Looks like we can
1436 * ignore it without any impact.
1437 */
1438 if (cap_read_drain(iommu->cap))
1439 val |= DMA_TLB_READ_DRAIN;
1440#endif
1441 if (cap_write_drain(iommu->cap))
1442 val |= DMA_TLB_WRITE_DRAIN;
1443
1f5b3c3f 1444 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1445 /* Note: Only uses first TLB reg currently */
1446 if (val_iva)
1447 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1448 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1449
1450 /* Make sure hardware complete it */
1451 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1452 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1453
1f5b3c3f 1454 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1455
1456 /* check IOTLB invalidation granularity */
1457 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1458 pr_err("Flush IOTLB failed\n");
ba395927 1459 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1460 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1461 (unsigned long long)DMA_TLB_IIRG(type),
1462 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1463}
1464
64ae892b
DW
1465static struct device_domain_info *
1466iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1467 u8 bus, u8 devfn)
93a23a72 1468{
93a23a72 1469 struct device_domain_info *info;
93a23a72 1470
55d94043
JR
1471 assert_spin_locked(&device_domain_lock);
1472
93a23a72
YZ
1473 if (!iommu->qi)
1474 return NULL;
1475
93a23a72 1476 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1477 if (info->iommu == iommu && info->bus == bus &&
1478 info->devfn == devfn) {
b16d0cb9
DW
1479 if (info->ats_supported && info->dev)
1480 return info;
93a23a72
YZ
1481 break;
1482 }
93a23a72 1483
b16d0cb9 1484 return NULL;
93a23a72
YZ
1485}
1486
0824c592
OP
1487static void domain_update_iotlb(struct dmar_domain *domain)
1488{
1489 struct device_domain_info *info;
1490 bool has_iotlb_device = false;
1491
1492 assert_spin_locked(&device_domain_lock);
1493
1494 list_for_each_entry(info, &domain->devices, link) {
1495 struct pci_dev *pdev;
1496
1497 if (!info->dev || !dev_is_pci(info->dev))
1498 continue;
1499
1500 pdev = to_pci_dev(info->dev);
1501 if (pdev->ats_enabled) {
1502 has_iotlb_device = true;
1503 break;
1504 }
1505 }
1506
1507 domain->has_iotlb_device = has_iotlb_device;
1508}
1509
93a23a72 1510static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1511{
fb0cc3aa
BH
1512 struct pci_dev *pdev;
1513
0824c592
OP
1514 assert_spin_locked(&device_domain_lock);
1515
0bcb3e28 1516 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1517 return;
1518
fb0cc3aa 1519 pdev = to_pci_dev(info->dev);
fb0cc3aa 1520
b16d0cb9
DW
1521#ifdef CONFIG_INTEL_IOMMU_SVM
1522 /* The PCIe spec, in its wisdom, declares that the behaviour of
1523 the device if you enable PASID support after ATS support is
1524 undefined. So always enable PASID support on devices which
1525 have it, even if we can't yet know if we're ever going to
1526 use it. */
1527 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1528 info->pasid_enabled = 1;
1529
1530 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1531 info->pri_enabled = 1;
1532#endif
1533 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1534 info->ats_enabled = 1;
0824c592 1535 domain_update_iotlb(info->domain);
b16d0cb9
DW
1536 info->ats_qdep = pci_ats_queue_depth(pdev);
1537 }
93a23a72
YZ
1538}
1539
1540static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1541{
b16d0cb9
DW
1542 struct pci_dev *pdev;
1543
0824c592
OP
1544 assert_spin_locked(&device_domain_lock);
1545
da972fb1 1546 if (!dev_is_pci(info->dev))
93a23a72
YZ
1547 return;
1548
b16d0cb9
DW
1549 pdev = to_pci_dev(info->dev);
1550
1551 if (info->ats_enabled) {
1552 pci_disable_ats(pdev);
1553 info->ats_enabled = 0;
0824c592 1554 domain_update_iotlb(info->domain);
b16d0cb9
DW
1555 }
1556#ifdef CONFIG_INTEL_IOMMU_SVM
1557 if (info->pri_enabled) {
1558 pci_disable_pri(pdev);
1559 info->pri_enabled = 0;
1560 }
1561 if (info->pasid_enabled) {
1562 pci_disable_pasid(pdev);
1563 info->pasid_enabled = 0;
1564 }
1565#endif
93a23a72
YZ
1566}
1567
1568static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1569 u64 addr, unsigned mask)
1570{
1571 u16 sid, qdep;
1572 unsigned long flags;
1573 struct device_domain_info *info;
1574
0824c592
OP
1575 if (!domain->has_iotlb_device)
1576 return;
1577
93a23a72
YZ
1578 spin_lock_irqsave(&device_domain_lock, flags);
1579 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1580 if (!info->ats_enabled)
93a23a72
YZ
1581 continue;
1582
1583 sid = info->bus << 8 | info->devfn;
b16d0cb9 1584 qdep = info->ats_qdep;
93a23a72
YZ
1585 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1586 }
1587 spin_unlock_irqrestore(&device_domain_lock, flags);
1588}
1589
a1ddcbe9
JR
1590static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1591 struct dmar_domain *domain,
1592 unsigned long pfn, unsigned int pages,
1593 int ih, int map)
ba395927 1594{
9dd2fe89 1595 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1596 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1597 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1598
ba395927
KA
1599 BUG_ON(pages == 0);
1600
ea8ea460
DW
1601 if (ih)
1602 ih = 1 << 6;
ba395927 1603 /*
9dd2fe89
YZ
1604 * Fallback to domain selective flush if no PSI support or the size is
1605 * too big.
ba395927
KA
1606 * PSI requires page size to be 2 ^ x, and the base address is naturally
1607 * aligned to the size
1608 */
9dd2fe89
YZ
1609 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1610 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1611 DMA_TLB_DSI_FLUSH);
9dd2fe89 1612 else
ea8ea460 1613 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1614 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1615
1616 /*
82653633
NA
1617 * In caching mode, changes of pages from non-present to present require
1618 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1619 */
82653633 1620 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1621 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1622 addr, mask);
ba395927
KA
1623}
1624
f8bab735 1625static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1626{
1627 u32 pmen;
1628 unsigned long flags;
1629
1f5b3c3f 1630 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1631 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1632 pmen &= ~DMA_PMEN_EPM;
1633 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1634
1635 /* wait for the protected region status bit to clear */
1636 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1637 readl, !(pmen & DMA_PMEN_PRS), pmen);
1638
1f5b3c3f 1639 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1640}
1641
2a41ccee 1642static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1643{
1644 u32 sts;
1645 unsigned long flags;
1646
1f5b3c3f 1647 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1648 iommu->gcmd |= DMA_GCMD_TE;
1649 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1650
1651 /* Make sure hardware complete it */
1652 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1653 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1654
1f5b3c3f 1655 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1656}
1657
2a41ccee 1658static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1659{
1660 u32 sts;
1661 unsigned long flag;
1662
1f5b3c3f 1663 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1664 iommu->gcmd &= ~DMA_GCMD_TE;
1665 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1666
1667 /* Make sure hardware complete it */
1668 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1669 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1670
1f5b3c3f 1671 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1672}
1673
3460a6d9 1674
ba395927
KA
1675static int iommu_init_domains(struct intel_iommu *iommu)
1676{
8bf47816
JR
1677 u32 ndomains, nlongs;
1678 size_t size;
ba395927
KA
1679
1680 ndomains = cap_ndoms(iommu->cap);
8bf47816 1681 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1682 iommu->name, ndomains);
ba395927
KA
1683 nlongs = BITS_TO_LONGS(ndomains);
1684
94a91b50
DD
1685 spin_lock_init(&iommu->lock);
1686
ba395927
KA
1687 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1688 if (!iommu->domain_ids) {
9f10e5bf
JR
1689 pr_err("%s: Allocating domain id array failed\n",
1690 iommu->name);
ba395927
KA
1691 return -ENOMEM;
1692 }
8bf47816 1693
86f004c7 1694 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1695 iommu->domains = kzalloc(size, GFP_KERNEL);
1696
1697 if (iommu->domains) {
1698 size = 256 * sizeof(struct dmar_domain *);
1699 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1700 }
1701
1702 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1703 pr_err("%s: Allocating domain array failed\n",
1704 iommu->name);
852bdb04 1705 kfree(iommu->domain_ids);
8bf47816 1706 kfree(iommu->domains);
852bdb04 1707 iommu->domain_ids = NULL;
8bf47816 1708 iommu->domains = NULL;
ba395927
KA
1709 return -ENOMEM;
1710 }
1711
8bf47816
JR
1712
1713
ba395927 1714 /*
c0e8a6c8
JR
1715 * If Caching mode is set, then invalid translations are tagged
1716 * with domain-id 0, hence we need to pre-allocate it. We also
1717 * use domain-id 0 as a marker for non-allocated domain-id, so
1718 * make sure it is not used for a real domain.
ba395927 1719 */
c0e8a6c8
JR
1720 set_bit(0, iommu->domain_ids);
1721
ba395927
KA
1722 return 0;
1723}
ba395927 1724
ffebeb46 1725static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1726{
29a27719 1727 struct device_domain_info *info, *tmp;
55d94043 1728 unsigned long flags;
ba395927 1729
29a27719
JR
1730 if (!iommu->domains || !iommu->domain_ids)
1731 return;
a4eaa86c 1732
bea64033 1733again:
55d94043 1734 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1735 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1736 struct dmar_domain *domain;
1737
1738 if (info->iommu != iommu)
1739 continue;
1740
1741 if (!info->dev || !info->domain)
1742 continue;
1743
1744 domain = info->domain;
1745
bea64033 1746 __dmar_remove_one_dev_info(info);
29a27719 1747
bea64033
JR
1748 if (!domain_type_is_vm_or_si(domain)) {
1749 /*
1750 * The domain_exit() function can't be called under
1751 * device_domain_lock, as it takes this lock itself.
1752 * So release the lock here and re-run the loop
1753 * afterwards.
1754 */
1755 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1756 domain_exit(domain);
bea64033
JR
1757 goto again;
1758 }
ba395927 1759 }
55d94043 1760 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1761
1762 if (iommu->gcmd & DMA_GCMD_TE)
1763 iommu_disable_translation(iommu);
ffebeb46 1764}
ba395927 1765
ffebeb46
JL
1766static void free_dmar_iommu(struct intel_iommu *iommu)
1767{
1768 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1769 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1770 int i;
1771
1772 for (i = 0; i < elems; i++)
1773 kfree(iommu->domains[i]);
ffebeb46
JL
1774 kfree(iommu->domains);
1775 kfree(iommu->domain_ids);
1776 iommu->domains = NULL;
1777 iommu->domain_ids = NULL;
1778 }
ba395927 1779
d9630fe9
WH
1780 g_iommus[iommu->seq_id] = NULL;
1781
ba395927
KA
1782 /* free context mapping */
1783 free_context_table(iommu);
8a94ade4
DW
1784
1785#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1786 if (pasid_enabled(iommu)) {
1787 if (ecap_prs(iommu->ecap))
1788 intel_svm_finish_prq(iommu);
8a94ade4 1789 intel_svm_free_pasid_tables(iommu);
a222a7f0 1790 }
8a94ade4 1791#endif
ba395927
KA
1792}
1793
ab8dfe25 1794static struct dmar_domain *alloc_domain(int flags)
ba395927 1795{
ba395927 1796 struct dmar_domain *domain;
ba395927
KA
1797
1798 domain = alloc_domain_mem();
1799 if (!domain)
1800 return NULL;
1801
ab8dfe25 1802 memset(domain, 0, sizeof(*domain));
4c923d47 1803 domain->nid = -1;
ab8dfe25 1804 domain->flags = flags;
0824c592 1805 domain->has_iotlb_device = false;
92d03cc8 1806 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1807
1808 return domain;
1809}
1810
d160aca5
JR
1811/* Must be called with iommu->lock */
1812static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1813 struct intel_iommu *iommu)
1814{
44bde614 1815 unsigned long ndomains;
55d94043 1816 int num;
44bde614 1817
55d94043 1818 assert_spin_locked(&device_domain_lock);
d160aca5 1819 assert_spin_locked(&iommu->lock);
ba395927 1820
29a27719
JR
1821 domain->iommu_refcnt[iommu->seq_id] += 1;
1822 domain->iommu_count += 1;
1823 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1824 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1825 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1826
1827 if (num >= ndomains) {
1828 pr_err("%s: No free domain ids\n", iommu->name);
1829 domain->iommu_refcnt[iommu->seq_id] -= 1;
1830 domain->iommu_count -= 1;
55d94043 1831 return -ENOSPC;
2c2e2c38 1832 }
ba395927 1833
d160aca5
JR
1834 set_bit(num, iommu->domain_ids);
1835 set_iommu_domain(iommu, num, domain);
1836
1837 domain->iommu_did[iommu->seq_id] = num;
1838 domain->nid = iommu->node;
fb170fb4 1839
fb170fb4
JL
1840 domain_update_iommu_cap(domain);
1841 }
d160aca5 1842
55d94043 1843 return 0;
fb170fb4
JL
1844}
1845
1846static int domain_detach_iommu(struct dmar_domain *domain,
1847 struct intel_iommu *iommu)
1848{
d160aca5 1849 int num, count = INT_MAX;
d160aca5 1850
55d94043 1851 assert_spin_locked(&device_domain_lock);
d160aca5 1852 assert_spin_locked(&iommu->lock);
fb170fb4 1853
29a27719
JR
1854 domain->iommu_refcnt[iommu->seq_id] -= 1;
1855 count = --domain->iommu_count;
1856 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1857 num = domain->iommu_did[iommu->seq_id];
1858 clear_bit(num, iommu->domain_ids);
1859 set_iommu_domain(iommu, num, NULL);
fb170fb4 1860
fb170fb4 1861 domain_update_iommu_cap(domain);
c0e8a6c8 1862 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1863 }
fb170fb4
JL
1864
1865 return count;
1866}
1867
ba395927 1868static struct iova_domain reserved_iova_list;
8a443df4 1869static struct lock_class_key reserved_rbtree_key;
ba395927 1870
51a63e67 1871static int dmar_init_reserved_ranges(void)
ba395927
KA
1872{
1873 struct pci_dev *pdev = NULL;
1874 struct iova *iova;
1875 int i;
ba395927 1876
0fb5fe87
RM
1877 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1878 DMA_32BIT_PFN);
ba395927 1879
8a443df4
MG
1880 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1881 &reserved_rbtree_key);
1882
ba395927
KA
1883 /* IOAPIC ranges shouldn't be accessed by DMA */
1884 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1885 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1886 if (!iova) {
9f10e5bf 1887 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1888 return -ENODEV;
1889 }
ba395927
KA
1890
1891 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1892 for_each_pci_dev(pdev) {
1893 struct resource *r;
1894
1895 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1896 r = &pdev->resource[i];
1897 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1898 continue;
1a4a4551
DW
1899 iova = reserve_iova(&reserved_iova_list,
1900 IOVA_PFN(r->start),
1901 IOVA_PFN(r->end));
51a63e67 1902 if (!iova) {
9f10e5bf 1903 pr_err("Reserve iova failed\n");
51a63e67
JC
1904 return -ENODEV;
1905 }
ba395927
KA
1906 }
1907 }
51a63e67 1908 return 0;
ba395927
KA
1909}
1910
1911static void domain_reserve_special_ranges(struct dmar_domain *domain)
1912{
1913 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1914}
1915
1916static inline int guestwidth_to_adjustwidth(int gaw)
1917{
1918 int agaw;
1919 int r = (gaw - 12) % 9;
1920
1921 if (r == 0)
1922 agaw = gaw;
1923 else
1924 agaw = gaw + 9 - r;
1925 if (agaw > 64)
1926 agaw = 64;
1927 return agaw;
1928}
1929
dc534b25
JR
1930static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1931 int guest_width)
ba395927 1932{
ba395927
KA
1933 int adjust_width, agaw;
1934 unsigned long sagaw;
1935
0fb5fe87
RM
1936 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1937 DMA_32BIT_PFN);
ba395927
KA
1938 domain_reserve_special_ranges(domain);
1939
1940 /* calculate AGAW */
ba395927
KA
1941 if (guest_width > cap_mgaw(iommu->cap))
1942 guest_width = cap_mgaw(iommu->cap);
1943 domain->gaw = guest_width;
1944 adjust_width = guestwidth_to_adjustwidth(guest_width);
1945 agaw = width_to_agaw(adjust_width);
1946 sagaw = cap_sagaw(iommu->cap);
1947 if (!test_bit(agaw, &sagaw)) {
1948 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1949 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1950 agaw = find_next_bit(&sagaw, 5, agaw);
1951 if (agaw >= 5)
1952 return -ENODEV;
1953 }
1954 domain->agaw = agaw;
ba395927 1955
8e604097
WH
1956 if (ecap_coherent(iommu->ecap))
1957 domain->iommu_coherency = 1;
1958 else
1959 domain->iommu_coherency = 0;
1960
58c610bd
SY
1961 if (ecap_sc_support(iommu->ecap))
1962 domain->iommu_snooping = 1;
1963 else
1964 domain->iommu_snooping = 0;
1965
214e39aa
DW
1966 if (intel_iommu_superpage)
1967 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1968 else
1969 domain->iommu_superpage = 0;
1970
4c923d47 1971 domain->nid = iommu->node;
c7151a8d 1972
ba395927 1973 /* always allocate the top pgd */
4c923d47 1974 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1975 if (!domain->pgd)
1976 return -ENOMEM;
5b6985ce 1977 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1978 return 0;
1979}
1980
1981static void domain_exit(struct dmar_domain *domain)
1982{
ea8ea460 1983 struct page *freelist = NULL;
ba395927
KA
1984
1985 /* Domain 0 is reserved, so dont process it */
1986 if (!domain)
1987 return;
1988
7b668357 1989 /* Flush any lazy unmaps that may reference this domain */
aa473240
OP
1990 if (!intel_iommu_strict) {
1991 int cpu;
1992
1993 for_each_possible_cpu(cpu)
1994 flush_unmaps_timeout(cpu);
1995 }
7b668357 1996
d160aca5
JR
1997 /* Remove associated devices and clear attached or cached domains */
1998 rcu_read_lock();
ba395927 1999 domain_remove_dev_info(domain);
d160aca5 2000 rcu_read_unlock();
92d03cc8 2001
ba395927
KA
2002 /* destroy iovas */
2003 put_iova_domain(&domain->iovad);
ba395927 2004
ea8ea460 2005 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 2006
ea8ea460
DW
2007 dma_free_pagelist(freelist);
2008
ba395927
KA
2009 free_domain_mem(domain);
2010}
2011
64ae892b
DW
2012static int domain_context_mapping_one(struct dmar_domain *domain,
2013 struct intel_iommu *iommu,
28ccce0d 2014 u8 bus, u8 devfn)
ba395927 2015{
c6c2cebd 2016 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
2017 int translation = CONTEXT_TT_MULTI_LEVEL;
2018 struct device_domain_info *info = NULL;
ba395927 2019 struct context_entry *context;
ba395927 2020 unsigned long flags;
ea6606b0 2021 struct dma_pte *pgd;
55d94043 2022 int ret, agaw;
28ccce0d 2023
c6c2cebd
JR
2024 WARN_ON(did == 0);
2025
28ccce0d
JR
2026 if (hw_pass_through && domain_type_is_si(domain))
2027 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
2028
2029 pr_debug("Set context mapping for %02x:%02x.%d\n",
2030 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2031
ba395927 2032 BUG_ON(!domain->pgd);
5331fe6f 2033
55d94043
JR
2034 spin_lock_irqsave(&device_domain_lock, flags);
2035 spin_lock(&iommu->lock);
2036
2037 ret = -ENOMEM;
03ecc32c 2038 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2039 if (!context)
55d94043 2040 goto out_unlock;
ba395927 2041
55d94043
JR
2042 ret = 0;
2043 if (context_present(context))
2044 goto out_unlock;
cf484d0e 2045
aec0e861
XP
2046 /*
2047 * For kdump cases, old valid entries may be cached due to the
2048 * in-flight DMA and copied pgtable, but there is no unmapping
2049 * behaviour for them, thus we need an explicit cache flush for
2050 * the newly-mapped device. For kdump, at this point, the device
2051 * is supposed to finish reset at its driver probe stage, so no
2052 * in-flight DMA will exist, and we don't need to worry anymore
2053 * hereafter.
2054 */
2055 if (context_copied(context)) {
2056 u16 did_old = context_domain_id(context);
2057
f73a7eee 2058 if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2059 iommu->flush.flush_context(iommu, did_old,
2060 (((u16)bus) << 8) | devfn,
2061 DMA_CCMD_MASK_NOBIT,
2062 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2063 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2064 DMA_TLB_DSI_FLUSH);
2065 }
aec0e861
XP
2066 }
2067
ea6606b0
WH
2068 pgd = domain->pgd;
2069
de24e553 2070 context_clear_entry(context);
c6c2cebd 2071 context_set_domain_id(context, did);
ea6606b0 2072
de24e553
JR
2073 /*
2074 * Skip top levels of page tables for iommu which has less agaw
2075 * than default. Unnecessary for PT mode.
2076 */
93a23a72 2077 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 2078 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 2079 ret = -ENOMEM;
de24e553 2080 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
2081 if (!dma_pte_present(pgd))
2082 goto out_unlock;
ea6606b0 2083 }
4ed0d3e6 2084
64ae892b 2085 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
2086 if (info && info->ats_supported)
2087 translation = CONTEXT_TT_DEV_IOTLB;
2088 else
2089 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 2090
93a23a72
YZ
2091 context_set_address_root(context, virt_to_phys(pgd));
2092 context_set_address_width(context, iommu->agaw);
de24e553
JR
2093 } else {
2094 /*
2095 * In pass through mode, AW must be programmed to
2096 * indicate the largest AGAW value supported by
2097 * hardware. And ASR is ignored by hardware.
2098 */
2099 context_set_address_width(context, iommu->msagaw);
93a23a72 2100 }
4ed0d3e6
FY
2101
2102 context_set_translation_type(context, translation);
c07e7d21
MM
2103 context_set_fault_enable(context);
2104 context_set_present(context);
5331fe6f 2105 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2106
4c25a2c1
DW
2107 /*
2108 * It's a non-present to present mapping. If hardware doesn't cache
2109 * non-present entry we only need to flush the write-buffer. If the
2110 * _does_ cache non-present entries, then it does so in the special
2111 * domain #0, which we have to flush:
2112 */
2113 if (cap_caching_mode(iommu->cap)) {
2114 iommu->flush.flush_context(iommu, 0,
2115 (((u16)bus) << 8) | devfn,
2116 DMA_CCMD_MASK_NOBIT,
2117 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2118 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2119 } else {
ba395927 2120 iommu_flush_write_buffer(iommu);
4c25a2c1 2121 }
93a23a72 2122 iommu_enable_dev_iotlb(info);
c7151a8d 2123
55d94043
JR
2124 ret = 0;
2125
2126out_unlock:
2127 spin_unlock(&iommu->lock);
2128 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2129
5c365d18 2130 return ret;
ba395927
KA
2131}
2132
579305f7
AW
2133struct domain_context_mapping_data {
2134 struct dmar_domain *domain;
2135 struct intel_iommu *iommu;
579305f7
AW
2136};
2137
2138static int domain_context_mapping_cb(struct pci_dev *pdev,
2139 u16 alias, void *opaque)
2140{
2141 struct domain_context_mapping_data *data = opaque;
2142
2143 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2144 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2145}
2146
ba395927 2147static int
28ccce0d 2148domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2149{
64ae892b 2150 struct intel_iommu *iommu;
156baca8 2151 u8 bus, devfn;
579305f7 2152 struct domain_context_mapping_data data;
64ae892b 2153
e1f167f3 2154 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2155 if (!iommu)
2156 return -ENODEV;
ba395927 2157
579305f7 2158 if (!dev_is_pci(dev))
28ccce0d 2159 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2160
2161 data.domain = domain;
2162 data.iommu = iommu;
579305f7
AW
2163
2164 return pci_for_each_dma_alias(to_pci_dev(dev),
2165 &domain_context_mapping_cb, &data);
2166}
2167
2168static int domain_context_mapped_cb(struct pci_dev *pdev,
2169 u16 alias, void *opaque)
2170{
2171 struct intel_iommu *iommu = opaque;
2172
2173 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2174}
2175
e1f167f3 2176static int domain_context_mapped(struct device *dev)
ba395927 2177{
5331fe6f 2178 struct intel_iommu *iommu;
156baca8 2179 u8 bus, devfn;
5331fe6f 2180
e1f167f3 2181 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2182 if (!iommu)
2183 return -ENODEV;
ba395927 2184
579305f7
AW
2185 if (!dev_is_pci(dev))
2186 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2187
579305f7
AW
2188 return !pci_for_each_dma_alias(to_pci_dev(dev),
2189 domain_context_mapped_cb, iommu);
ba395927
KA
2190}
2191
f532959b
FY
2192/* Returns a number of VTD pages, but aligned to MM page size */
2193static inline unsigned long aligned_nrpages(unsigned long host_addr,
2194 size_t size)
2195{
2196 host_addr &= ~PAGE_MASK;
2197 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2198}
2199
6dd9a7c7
YS
2200/* Return largest possible superpage level for a given mapping */
2201static inline int hardware_largepage_caps(struct dmar_domain *domain,
2202 unsigned long iov_pfn,
2203 unsigned long phy_pfn,
2204 unsigned long pages)
2205{
2206 int support, level = 1;
2207 unsigned long pfnmerge;
2208
2209 support = domain->iommu_superpage;
2210
2211 /* To use a large page, the virtual *and* physical addresses
2212 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2213 of them will mean we have to use smaller pages. So just
2214 merge them and check both at once. */
2215 pfnmerge = iov_pfn | phy_pfn;
2216
2217 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2218 pages >>= VTD_STRIDE_SHIFT;
2219 if (!pages)
2220 break;
2221 pfnmerge >>= VTD_STRIDE_SHIFT;
2222 level++;
2223 support--;
2224 }
2225 return level;
2226}
2227
9051aa02
DW
2228static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2229 struct scatterlist *sg, unsigned long phys_pfn,
2230 unsigned long nr_pages, int prot)
e1605495
DW
2231{
2232 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2233 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2234 unsigned long sg_res = 0;
6dd9a7c7
YS
2235 unsigned int largepage_lvl = 0;
2236 unsigned long lvl_pages = 0;
e1605495 2237
162d1b10 2238 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2239
2240 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2241 return -EINVAL;
2242
2243 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2244
cc4f14aa
JL
2245 if (!sg) {
2246 sg_res = nr_pages;
9051aa02
DW
2247 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2248 }
2249
6dd9a7c7 2250 while (nr_pages > 0) {
c85994e4
DW
2251 uint64_t tmp;
2252
e1605495 2253 if (!sg_res) {
f532959b 2254 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2255 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2256 sg->dma_length = sg->length;
3e6110fd 2257 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2258 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2259 }
6dd9a7c7 2260
e1605495 2261 if (!pte) {
6dd9a7c7
YS
2262 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2263
5cf0a76f 2264 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2265 if (!pte)
2266 return -ENOMEM;
6dd9a7c7 2267 /* It is large page*/
6491d4d0 2268 if (largepage_lvl > 1) {
ba2374fd
CZ
2269 unsigned long nr_superpages, end_pfn;
2270
6dd9a7c7 2271 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2272 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2273
2274 nr_superpages = sg_res / lvl_pages;
2275 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2276
d41a4adb
JL
2277 /*
2278 * Ensure that old small page tables are
ba2374fd 2279 * removed to make room for superpage(s).
d41a4adb 2280 */
ba2374fd 2281 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
6491d4d0 2282 } else {
6dd9a7c7 2283 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2284 }
6dd9a7c7 2285
e1605495
DW
2286 }
2287 /* We don't need lock here, nobody else
2288 * touches the iova range
2289 */
7766a3fb 2290 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2291 if (tmp) {
1bf20f0d 2292 static int dumps = 5;
9f10e5bf
JR
2293 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2294 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2295 if (dumps) {
2296 dumps--;
2297 debug_dma_dump_mappings(NULL);
2298 }
2299 WARN_ON(1);
2300 }
6dd9a7c7
YS
2301
2302 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2303
2304 BUG_ON(nr_pages < lvl_pages);
2305 BUG_ON(sg_res < lvl_pages);
2306
2307 nr_pages -= lvl_pages;
2308 iov_pfn += lvl_pages;
2309 phys_pfn += lvl_pages;
2310 pteval += lvl_pages * VTD_PAGE_SIZE;
2311 sg_res -= lvl_pages;
2312
2313 /* If the next PTE would be the first in a new page, then we
2314 need to flush the cache on the entries we've just written.
2315 And then we'll need to recalculate 'pte', so clear it and
2316 let it get set again in the if (!pte) block above.
2317
2318 If we're done (!nr_pages) we need to flush the cache too.
2319
2320 Also if we've been setting superpages, we may need to
2321 recalculate 'pte' and switch back to smaller pages for the
2322 end of the mapping, if the trailing size is not enough to
2323 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2324 pte++;
6dd9a7c7
YS
2325 if (!nr_pages || first_pte_in_page(pte) ||
2326 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2327 domain_flush_cache(domain, first_pte,
2328 (void *)pte - (void *)first_pte);
2329 pte = NULL;
2330 }
6dd9a7c7
YS
2331
2332 if (!sg_res && nr_pages)
e1605495
DW
2333 sg = sg_next(sg);
2334 }
2335 return 0;
2336}
2337
9051aa02
DW
2338static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2339 struct scatterlist *sg, unsigned long nr_pages,
2340 int prot)
ba395927 2341{
9051aa02
DW
2342 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2343}
6f6a00e4 2344
9051aa02
DW
2345static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2346 unsigned long phys_pfn, unsigned long nr_pages,
2347 int prot)
2348{
2349 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2350}
2351
2452d9db 2352static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2353{
c7151a8d
WH
2354 if (!iommu)
2355 return;
8c11e798
WH
2356
2357 clear_context_table(iommu, bus, devfn);
2358 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2359 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2360 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2361}
2362
109b9b04
DW
2363static inline void unlink_domain_info(struct device_domain_info *info)
2364{
2365 assert_spin_locked(&device_domain_lock);
2366 list_del(&info->link);
2367 list_del(&info->global);
2368 if (info->dev)
0bcb3e28 2369 info->dev->archdata.iommu = NULL;
109b9b04
DW
2370}
2371
ba395927
KA
2372static void domain_remove_dev_info(struct dmar_domain *domain)
2373{
3a74ca01 2374 struct device_domain_info *info, *tmp;
fb170fb4 2375 unsigned long flags;
ba395927
KA
2376
2377 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2378 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2379 __dmar_remove_one_dev_info(info);
ba395927
KA
2380 spin_unlock_irqrestore(&device_domain_lock, flags);
2381}
2382
2383/*
2384 * find_domain
1525a29a 2385 * Note: we use struct device->archdata.iommu stores the info
ba395927 2386 */
1525a29a 2387static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2388{
2389 struct device_domain_info *info;
2390
2391 /* No lock here, assumes no domain exit in normal case */
1525a29a 2392 info = dev->archdata.iommu;
b316d02a 2393 if (likely(info))
ba395927
KA
2394 return info->domain;
2395 return NULL;
2396}
2397
5a8f40e8 2398static inline struct device_domain_info *
745f2586
JL
2399dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2400{
2401 struct device_domain_info *info;
2402
2403 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2404 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2405 info->devfn == devfn)
5a8f40e8 2406 return info;
745f2586
JL
2407
2408 return NULL;
2409}
2410
5db31569
JR
2411static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2412 int bus, int devfn,
2413 struct device *dev,
2414 struct dmar_domain *domain)
745f2586 2415{
5a8f40e8 2416 struct dmar_domain *found = NULL;
745f2586
JL
2417 struct device_domain_info *info;
2418 unsigned long flags;
d160aca5 2419 int ret;
745f2586
JL
2420
2421 info = alloc_devinfo_mem();
2422 if (!info)
b718cd3d 2423 return NULL;
745f2586 2424
745f2586
JL
2425 info->bus = bus;
2426 info->devfn = devfn;
b16d0cb9
DW
2427 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2428 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2429 info->ats_qdep = 0;
745f2586
JL
2430 info->dev = dev;
2431 info->domain = domain;
5a8f40e8 2432 info->iommu = iommu;
745f2586 2433
b16d0cb9
DW
2434 if (dev && dev_is_pci(dev)) {
2435 struct pci_dev *pdev = to_pci_dev(info->dev);
2436
2437 if (ecap_dev_iotlb_support(iommu->ecap) &&
2438 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2439 dmar_find_matched_atsr_unit(pdev))
2440 info->ats_supported = 1;
2441
2442 if (ecs_enabled(iommu)) {
2443 if (pasid_enabled(iommu)) {
2444 int features = pci_pasid_features(pdev);
2445 if (features >= 0)
2446 info->pasid_supported = features | 1;
2447 }
2448
2449 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2450 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2451 info->pri_supported = 1;
2452 }
2453 }
2454
745f2586
JL
2455 spin_lock_irqsave(&device_domain_lock, flags);
2456 if (dev)
0bcb3e28 2457 found = find_domain(dev);
f303e507
JR
2458
2459 if (!found) {
5a8f40e8 2460 struct device_domain_info *info2;
41e80dca 2461 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2462 if (info2) {
2463 found = info2->domain;
2464 info2->dev = dev;
2465 }
5a8f40e8 2466 }
f303e507 2467
745f2586
JL
2468 if (found) {
2469 spin_unlock_irqrestore(&device_domain_lock, flags);
2470 free_devinfo_mem(info);
b718cd3d
DW
2471 /* Caller must free the original domain */
2472 return found;
745f2586
JL
2473 }
2474
d160aca5
JR
2475 spin_lock(&iommu->lock);
2476 ret = domain_attach_iommu(domain, iommu);
2477 spin_unlock(&iommu->lock);
2478
2479 if (ret) {
c6c2cebd 2480 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2481 free_devinfo_mem(info);
c6c2cebd
JR
2482 return NULL;
2483 }
c6c2cebd 2484
b718cd3d
DW
2485 list_add(&info->link, &domain->devices);
2486 list_add(&info->global, &device_domain_list);
2487 if (dev)
2488 dev->archdata.iommu = info;
2489 spin_unlock_irqrestore(&device_domain_lock, flags);
2490
cc4e2575
JR
2491 if (dev && domain_context_mapping(domain, dev)) {
2492 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2493 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2494 return NULL;
2495 }
2496
b718cd3d 2497 return domain;
745f2586
JL
2498}
2499
579305f7
AW
2500static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2501{
2502 *(u16 *)opaque = alias;
2503 return 0;
2504}
2505
76208356 2506static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2507{
cc4e2575 2508 struct device_domain_info *info = NULL;
76208356 2509 struct dmar_domain *domain = NULL;
579305f7 2510 struct intel_iommu *iommu;
08a7f456 2511 u16 req_id, dma_alias;
ba395927 2512 unsigned long flags;
aa4d066a 2513 u8 bus, devfn;
ba395927 2514
579305f7
AW
2515 iommu = device_to_iommu(dev, &bus, &devfn);
2516 if (!iommu)
2517 return NULL;
2518
08a7f456
JR
2519 req_id = ((u16)bus << 8) | devfn;
2520
146922ec
DW
2521 if (dev_is_pci(dev)) {
2522 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2523
579305f7
AW
2524 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2525
2526 spin_lock_irqsave(&device_domain_lock, flags);
2527 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2528 PCI_BUS_NUM(dma_alias),
2529 dma_alias & 0xff);
2530 if (info) {
2531 iommu = info->iommu;
2532 domain = info->domain;
5a8f40e8 2533 }
579305f7 2534 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2535
76208356 2536 /* DMA alias already has a domain, use it */
579305f7 2537 if (info)
76208356 2538 goto out;
579305f7 2539 }
ba395927 2540
146922ec 2541 /* Allocate and initialize new domain for the device */
ab8dfe25 2542 domain = alloc_domain(0);
745f2586 2543 if (!domain)
579305f7 2544 return NULL;
dc534b25 2545 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2546 domain_exit(domain);
2547 return NULL;
2c2e2c38 2548 }
ba395927 2549
76208356 2550out:
579305f7 2551
76208356
JR
2552 return domain;
2553}
579305f7 2554
76208356
JR
2555static struct dmar_domain *set_domain_for_dev(struct device *dev,
2556 struct dmar_domain *domain)
2557{
2558 struct intel_iommu *iommu;
2559 struct dmar_domain *tmp;
2560 u16 req_id, dma_alias;
2561 u8 bus, devfn;
2562
2563 iommu = device_to_iommu(dev, &bus, &devfn);
2564 if (!iommu)
2565 return NULL;
2566
2567 req_id = ((u16)bus << 8) | devfn;
2568
2569 if (dev_is_pci(dev)) {
2570 struct pci_dev *pdev = to_pci_dev(dev);
2571
2572 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2573
2574 /* register PCI DMA alias device */
2575 if (req_id != dma_alias) {
2576 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2577 dma_alias & 0xff, NULL, domain);
2578
2579 if (!tmp || tmp != domain)
2580 return tmp;
2581 }
ba395927
KA
2582 }
2583
5db31569 2584 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2585 if (!tmp || tmp != domain)
2586 return tmp;
2587
2588 return domain;
2589}
579305f7 2590
76208356
JR
2591static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2592{
2593 struct dmar_domain *domain, *tmp;
2594
2595 domain = find_domain(dev);
2596 if (domain)
2597 goto out;
2598
2599 domain = find_or_alloc_domain(dev, gaw);
2600 if (!domain)
2601 goto out;
2602
2603 tmp = set_domain_for_dev(dev, domain);
2604 if (!tmp || domain != tmp) {
579305f7
AW
2605 domain_exit(domain);
2606 domain = tmp;
2607 }
b718cd3d 2608
76208356
JR
2609out:
2610
b718cd3d 2611 return domain;
ba395927
KA
2612}
2613
b213203e
DW
2614static int iommu_domain_identity_map(struct dmar_domain *domain,
2615 unsigned long long start,
2616 unsigned long long end)
ba395927 2617{
c5395d5c
DW
2618 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2619 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2620
2621 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2622 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2623 pr_err("Reserving iova failed\n");
b213203e 2624 return -ENOMEM;
ba395927
KA
2625 }
2626
af1089ce 2627 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2628 /*
2629 * RMRR range might have overlap with physical memory range,
2630 * clear it first
2631 */
c5395d5c 2632 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2633
c5395d5c
DW
2634 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2635 last_vpfn - first_vpfn + 1,
61df7443 2636 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2637}
2638
d66ce54b
JR
2639static int domain_prepare_identity_map(struct device *dev,
2640 struct dmar_domain *domain,
2641 unsigned long long start,
2642 unsigned long long end)
b213203e 2643{
19943b0e
DW
2644 /* For _hardware_ passthrough, don't bother. But for software
2645 passthrough, we do it anyway -- it may indicate a memory
2646 range which is reserved in E820, so which didn't get set
2647 up to start with in si_domain */
2648 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2649 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2650 dev_name(dev), start, end);
19943b0e
DW
2651 return 0;
2652 }
2653
9f10e5bf
JR
2654 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2655 dev_name(dev), start, end);
2656
5595b528
DW
2657 if (end < start) {
2658 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2659 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2660 dmi_get_system_info(DMI_BIOS_VENDOR),
2661 dmi_get_system_info(DMI_BIOS_VERSION),
2662 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2663 return -EIO;
5595b528
DW
2664 }
2665
2ff729f5
DW
2666 if (end >> agaw_to_width(domain->agaw)) {
2667 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2668 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2669 agaw_to_width(domain->agaw),
2670 dmi_get_system_info(DMI_BIOS_VENDOR),
2671 dmi_get_system_info(DMI_BIOS_VERSION),
2672 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2673 return -EIO;
2ff729f5 2674 }
19943b0e 2675
d66ce54b
JR
2676 return iommu_domain_identity_map(domain, start, end);
2677}
ba395927 2678
d66ce54b
JR
2679static int iommu_prepare_identity_map(struct device *dev,
2680 unsigned long long start,
2681 unsigned long long end)
2682{
2683 struct dmar_domain *domain;
2684 int ret;
2685
2686 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2687 if (!domain)
2688 return -ENOMEM;
2689
2690 ret = domain_prepare_identity_map(dev, domain, start, end);
2691 if (ret)
2692 domain_exit(domain);
b213203e 2693
ba395927 2694 return ret;
ba395927
KA
2695}
2696
2697static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2698 struct device *dev)
ba395927 2699{
0b9d9753 2700 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2701 return 0;
0b9d9753
DW
2702 return iommu_prepare_identity_map(dev, rmrr->base_address,
2703 rmrr->end_address);
ba395927
KA
2704}
2705
d3f13810 2706#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2707static inline void iommu_prepare_isa(void)
2708{
2709 struct pci_dev *pdev;
2710 int ret;
2711
2712 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2713 if (!pdev)
2714 return;
2715
9f10e5bf 2716 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2717 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2718
2719 if (ret)
9f10e5bf 2720 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2721
9b27e82d 2722 pci_dev_put(pdev);
49a0429e
KA
2723}
2724#else
2725static inline void iommu_prepare_isa(void)
2726{
2727 return;
2728}
d3f13810 2729#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2730
2c2e2c38 2731static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2732
071e1374 2733static int __init si_domain_init(int hw)
2c2e2c38 2734{
c7ab48d2 2735 int nid, ret = 0;
2c2e2c38 2736
ab8dfe25 2737 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2738 if (!si_domain)
2739 return -EFAULT;
2740
2c2e2c38
FY
2741 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2742 domain_exit(si_domain);
2743 return -EFAULT;
2744 }
2745
0dc79715 2746 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2747
19943b0e
DW
2748 if (hw)
2749 return 0;
2750
c7ab48d2 2751 for_each_online_node(nid) {
5dfe8660
TH
2752 unsigned long start_pfn, end_pfn;
2753 int i;
2754
2755 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2756 ret = iommu_domain_identity_map(si_domain,
2757 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2758 if (ret)
2759 return ret;
2760 }
c7ab48d2
DW
2761 }
2762
2c2e2c38
FY
2763 return 0;
2764}
2765
9b226624 2766static int identity_mapping(struct device *dev)
2c2e2c38
FY
2767{
2768 struct device_domain_info *info;
2769
2770 if (likely(!iommu_identity_mapping))
2771 return 0;
2772
9b226624 2773 info = dev->archdata.iommu;
cb452a40
MT
2774 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2775 return (info->domain == si_domain);
2c2e2c38 2776
2c2e2c38
FY
2777 return 0;
2778}
2779
28ccce0d 2780static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2781{
0ac72664 2782 struct dmar_domain *ndomain;
5a8f40e8 2783 struct intel_iommu *iommu;
156baca8 2784 u8 bus, devfn;
2c2e2c38 2785
5913c9bf 2786 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2787 if (!iommu)
2788 return -ENODEV;
2789
5db31569 2790 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2791 if (ndomain != domain)
2792 return -EBUSY;
2c2e2c38
FY
2793
2794 return 0;
2795}
2796
0b9d9753 2797static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2798{
2799 struct dmar_rmrr_unit *rmrr;
832bd858 2800 struct device *tmp;
ea2447f7
TM
2801 int i;
2802
0e242612 2803 rcu_read_lock();
ea2447f7 2804 for_each_rmrr_units(rmrr) {
b683b230
JL
2805 /*
2806 * Return TRUE if this RMRR contains the device that
2807 * is passed in.
2808 */
2809 for_each_active_dev_scope(rmrr->devices,
2810 rmrr->devices_cnt, i, tmp)
0b9d9753 2811 if (tmp == dev) {
0e242612 2812 rcu_read_unlock();
ea2447f7 2813 return true;
b683b230 2814 }
ea2447f7 2815 }
0e242612 2816 rcu_read_unlock();
ea2447f7
TM
2817 return false;
2818}
2819
c875d2c1
AW
2820/*
2821 * There are a couple cases where we need to restrict the functionality of
2822 * devices associated with RMRRs. The first is when evaluating a device for
2823 * identity mapping because problems exist when devices are moved in and out
2824 * of domains and their respective RMRR information is lost. This means that
2825 * a device with associated RMRRs will never be in a "passthrough" domain.
2826 * The second is use of the device through the IOMMU API. This interface
2827 * expects to have full control of the IOVA space for the device. We cannot
2828 * satisfy both the requirement that RMRR access is maintained and have an
2829 * unencumbered IOVA space. We also have no ability to quiesce the device's
2830 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2831 * We therefore prevent devices associated with an RMRR from participating in
2832 * the IOMMU API, which eliminates them from device assignment.
2833 *
2834 * In both cases we assume that PCI USB devices with RMRRs have them largely
2835 * for historical reasons and that the RMRR space is not actively used post
2836 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2837 *
2838 * The same exception is made for graphics devices, with the requirement that
2839 * any use of the RMRR regions will be torn down before assigning the device
2840 * to a guest.
c875d2c1
AW
2841 */
2842static bool device_is_rmrr_locked(struct device *dev)
2843{
2844 if (!device_has_rmrr(dev))
2845 return false;
2846
2847 if (dev_is_pci(dev)) {
2848 struct pci_dev *pdev = to_pci_dev(dev);
2849
18436afd 2850 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2851 return false;
2852 }
2853
2854 return true;
2855}
2856
3bdb2591 2857static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2858{
ea2447f7 2859
3bdb2591
DW
2860 if (dev_is_pci(dev)) {
2861 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2862
c875d2c1 2863 if (device_is_rmrr_locked(dev))
3bdb2591 2864 return 0;
e0fc7e0b 2865
3bdb2591
DW
2866 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2867 return 1;
e0fc7e0b 2868
3bdb2591
DW
2869 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2870 return 1;
6941af28 2871
3bdb2591 2872 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2873 return 0;
3bdb2591
DW
2874
2875 /*
2876 * We want to start off with all devices in the 1:1 domain, and
2877 * take them out later if we find they can't access all of memory.
2878 *
2879 * However, we can't do this for PCI devices behind bridges,
2880 * because all PCI devices behind the same bridge will end up
2881 * with the same source-id on their transactions.
2882 *
2883 * Practically speaking, we can't change things around for these
2884 * devices at run-time, because we can't be sure there'll be no
2885 * DMA transactions in flight for any of their siblings.
2886 *
2887 * So PCI devices (unless they're on the root bus) as well as
2888 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2889 * the 1:1 domain, just in _case_ one of their siblings turns out
2890 * not to be able to map all of memory.
2891 */
2892 if (!pci_is_pcie(pdev)) {
2893 if (!pci_is_root_bus(pdev->bus))
2894 return 0;
2895 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2896 return 0;
2897 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2898 return 0;
3bdb2591
DW
2899 } else {
2900 if (device_has_rmrr(dev))
2901 return 0;
2902 }
3dfc813d 2903
3bdb2591 2904 /*
3dfc813d 2905 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2906 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2907 * take them out of the 1:1 domain later.
2908 */
8fcc5372
CW
2909 if (!startup) {
2910 /*
2911 * If the device's dma_mask is less than the system's memory
2912 * size then this is not a candidate for identity mapping.
2913 */
3bdb2591 2914 u64 dma_mask = *dev->dma_mask;
8fcc5372 2915
3bdb2591
DW
2916 if (dev->coherent_dma_mask &&
2917 dev->coherent_dma_mask < dma_mask)
2918 dma_mask = dev->coherent_dma_mask;
8fcc5372 2919
3bdb2591 2920 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2921 }
6941af28
DW
2922
2923 return 1;
2924}
2925
cf04eee8
DW
2926static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2927{
2928 int ret;
2929
2930 if (!iommu_should_identity_map(dev, 1))
2931 return 0;
2932
28ccce0d 2933 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2934 if (!ret)
9f10e5bf
JR
2935 pr_info("%s identity mapping for device %s\n",
2936 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2937 else if (ret == -ENODEV)
2938 /* device not associated with an iommu */
2939 ret = 0;
2940
2941 return ret;
2942}
2943
2944
071e1374 2945static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2946{
2c2e2c38 2947 struct pci_dev *pdev = NULL;
cf04eee8
DW
2948 struct dmar_drhd_unit *drhd;
2949 struct intel_iommu *iommu;
2950 struct device *dev;
2951 int i;
2952 int ret = 0;
2c2e2c38 2953
2c2e2c38 2954 for_each_pci_dev(pdev) {
cf04eee8
DW
2955 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2956 if (ret)
2957 return ret;
2958 }
2959
2960 for_each_active_iommu(iommu, drhd)
2961 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2962 struct acpi_device_physical_node *pn;
2963 struct acpi_device *adev;
2964
2965 if (dev->bus != &acpi_bus_type)
2966 continue;
86080ccc 2967
cf04eee8
DW
2968 adev= to_acpi_device(dev);
2969 mutex_lock(&adev->physical_node_lock);
2970 list_for_each_entry(pn, &adev->physical_node_list, node) {
2971 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2972 if (ret)
2973 break;
eae460b6 2974 }
cf04eee8
DW
2975 mutex_unlock(&adev->physical_node_lock);
2976 if (ret)
2977 return ret;
62edf5dc 2978 }
2c2e2c38
FY
2979
2980 return 0;
2981}
2982
ffebeb46
JL
2983static void intel_iommu_init_qi(struct intel_iommu *iommu)
2984{
2985 /*
2986 * Start from the sane iommu hardware state.
2987 * If the queued invalidation is already initialized by us
2988 * (for example, while enabling interrupt-remapping) then
2989 * we got the things already rolling from a sane state.
2990 */
2991 if (!iommu->qi) {
2992 /*
2993 * Clear any previous faults.
2994 */
2995 dmar_fault(-1, iommu);
2996 /*
2997 * Disable queued invalidation if supported and already enabled
2998 * before OS handover.
2999 */
3000 dmar_disable_qi(iommu);
3001 }
3002
3003 if (dmar_enable_qi(iommu)) {
3004 /*
3005 * Queued Invalidate not enabled, use Register Based Invalidate
3006 */
3007 iommu->flush.flush_context = __iommu_flush_context;
3008 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3009 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3010 iommu->name);
3011 } else {
3012 iommu->flush.flush_context = qi_flush_context;
3013 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3014 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3015 }
3016}
3017
091d42e4 3018static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3019 struct root_entry *old_re,
091d42e4
JR
3020 struct context_entry **tbl,
3021 int bus, bool ext)
3022{
dbcd861f 3023 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3024 struct context_entry *new_ce = NULL, ce;
dfddb969 3025 struct context_entry *old_ce = NULL;
543c8dcf 3026 struct root_entry re;
091d42e4
JR
3027 phys_addr_t old_ce_phys;
3028
3029 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3030 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3031
3032 for (devfn = 0; devfn < 256; devfn++) {
3033 /* First calculate the correct index */
3034 idx = (ext ? devfn * 2 : devfn) % 256;
3035
3036 if (idx == 0) {
3037 /* First save what we may have and clean up */
3038 if (new_ce) {
3039 tbl[tbl_idx] = new_ce;
3040 __iommu_flush_cache(iommu, new_ce,
3041 VTD_PAGE_SIZE);
3042 pos = 1;
3043 }
3044
3045 if (old_ce)
3046 iounmap(old_ce);
3047
3048 ret = 0;
3049 if (devfn < 0x80)
543c8dcf 3050 old_ce_phys = root_entry_lctp(&re);
091d42e4 3051 else
543c8dcf 3052 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3053
3054 if (!old_ce_phys) {
3055 if (ext && devfn == 0) {
3056 /* No LCTP, try UCTP */
3057 devfn = 0x7f;
3058 continue;
3059 } else {
3060 goto out;
3061 }
3062 }
3063
3064 ret = -ENOMEM;
dfddb969
DW
3065 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3066 MEMREMAP_WB);
091d42e4
JR
3067 if (!old_ce)
3068 goto out;
3069
3070 new_ce = alloc_pgtable_page(iommu->node);
3071 if (!new_ce)
3072 goto out_unmap;
3073
3074 ret = 0;
3075 }
3076
3077 /* Now copy the context entry */
dfddb969 3078 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3079
cf484d0e 3080 if (!__context_present(&ce))
091d42e4
JR
3081 continue;
3082
dbcd861f
JR
3083 did = context_domain_id(&ce);
3084 if (did >= 0 && did < cap_ndoms(iommu->cap))
3085 set_bit(did, iommu->domain_ids);
3086
cf484d0e
JR
3087 /*
3088 * We need a marker for copied context entries. This
3089 * marker needs to work for the old format as well as
3090 * for extended context entries.
3091 *
3092 * Bit 67 of the context entry is used. In the old
3093 * format this bit is available to software, in the
3094 * extended format it is the PGE bit, but PGE is ignored
3095 * by HW if PASIDs are disabled (and thus still
3096 * available).
3097 *
3098 * So disable PASIDs first and then mark the entry
3099 * copied. This means that we don't copy PASID
3100 * translations from the old kernel, but this is fine as
3101 * faults there are not fatal.
3102 */
3103 context_clear_pasid_enable(&ce);
3104 context_set_copied(&ce);
3105
091d42e4
JR
3106 new_ce[idx] = ce;
3107 }
3108
3109 tbl[tbl_idx + pos] = new_ce;
3110
3111 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3112
3113out_unmap:
dfddb969 3114 memunmap(old_ce);
091d42e4
JR
3115
3116out:
3117 return ret;
3118}
3119
3120static int copy_translation_tables(struct intel_iommu *iommu)
3121{
3122 struct context_entry **ctxt_tbls;
dfddb969 3123 struct root_entry *old_rt;
091d42e4
JR
3124 phys_addr_t old_rt_phys;
3125 int ctxt_table_entries;
3126 unsigned long flags;
3127 u64 rtaddr_reg;
3128 int bus, ret;
c3361f2f 3129 bool new_ext, ext;
091d42e4
JR
3130
3131 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3132 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3133 new_ext = !!ecap_ecs(iommu->ecap);
3134
3135 /*
3136 * The RTT bit can only be changed when translation is disabled,
3137 * but disabling translation means to open a window for data
3138 * corruption. So bail out and don't copy anything if we would
3139 * have to change the bit.
3140 */
3141 if (new_ext != ext)
3142 return -EINVAL;
091d42e4
JR
3143
3144 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3145 if (!old_rt_phys)
3146 return -EINVAL;
3147
dfddb969 3148 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3149 if (!old_rt)
3150 return -ENOMEM;
3151
3152 /* This is too big for the stack - allocate it from slab */
3153 ctxt_table_entries = ext ? 512 : 256;
3154 ret = -ENOMEM;
3155 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3156 if (!ctxt_tbls)
3157 goto out_unmap;
3158
3159 for (bus = 0; bus < 256; bus++) {
3160 ret = copy_context_table(iommu, &old_rt[bus],
3161 ctxt_tbls, bus, ext);
3162 if (ret) {
3163 pr_err("%s: Failed to copy context table for bus %d\n",
3164 iommu->name, bus);
3165 continue;
3166 }
3167 }
3168
3169 spin_lock_irqsave(&iommu->lock, flags);
3170
3171 /* Context tables are copied, now write them to the root_entry table */
3172 for (bus = 0; bus < 256; bus++) {
3173 int idx = ext ? bus * 2 : bus;
3174 u64 val;
3175
3176 if (ctxt_tbls[idx]) {
3177 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3178 iommu->root_entry[bus].lo = val;
3179 }
3180
3181 if (!ext || !ctxt_tbls[idx + 1])
3182 continue;
3183
3184 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3185 iommu->root_entry[bus].hi = val;
3186 }
3187
3188 spin_unlock_irqrestore(&iommu->lock, flags);
3189
3190 kfree(ctxt_tbls);
3191
3192 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3193
3194 ret = 0;
3195
3196out_unmap:
dfddb969 3197 memunmap(old_rt);
091d42e4
JR
3198
3199 return ret;
3200}
3201
b779260b 3202static int __init init_dmars(void)
ba395927
KA
3203{
3204 struct dmar_drhd_unit *drhd;
3205 struct dmar_rmrr_unit *rmrr;
a87f4918 3206 bool copied_tables = false;
832bd858 3207 struct device *dev;
ba395927 3208 struct intel_iommu *iommu;
aa473240 3209 int i, ret, cpu;
2c2e2c38 3210
ba395927
KA
3211 /*
3212 * for each drhd
3213 * allocate root
3214 * initialize and program root entry to not present
3215 * endfor
3216 */
3217 for_each_drhd_unit(drhd) {
5e0d2a6f 3218 /*
3219 * lock not needed as this is only incremented in the single
3220 * threaded kernel __init code path all other access are read
3221 * only
3222 */
78d8e704 3223 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3224 g_num_of_iommus++;
3225 continue;
3226 }
9f10e5bf 3227 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3228 }
3229
ffebeb46
JL
3230 /* Preallocate enough resources for IOMMU hot-addition */
3231 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3232 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3233
d9630fe9
WH
3234 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3235 GFP_KERNEL);
3236 if (!g_iommus) {
9f10e5bf 3237 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3238 ret = -ENOMEM;
3239 goto error;
3240 }
3241
aa473240
OP
3242 for_each_possible_cpu(cpu) {
3243 struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
3244 cpu);
3245
3246 dfd->tables = kzalloc(g_num_of_iommus *
3247 sizeof(struct deferred_flush_table),
3248 GFP_KERNEL);
3249 if (!dfd->tables) {
3250 ret = -ENOMEM;
3251 goto free_g_iommus;
3252 }
3253
3254 spin_lock_init(&dfd->lock);
3255 setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
5e0d2a6f 3256 }
3257
7c919779 3258 for_each_active_iommu(iommu, drhd) {
d9630fe9 3259 g_iommus[iommu->seq_id] = iommu;
ba395927 3260
b63d80d1
JR
3261 intel_iommu_init_qi(iommu);
3262
e61d98d8
SS
3263 ret = iommu_init_domains(iommu);
3264 if (ret)
989d51fc 3265 goto free_iommu;
e61d98d8 3266
4158c2ec
JR
3267 init_translation_status(iommu);
3268
091d42e4
JR
3269 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3270 iommu_disable_translation(iommu);
3271 clear_translation_pre_enabled(iommu);
3272 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3273 iommu->name);
3274 }
4158c2ec 3275
ba395927
KA
3276 /*
3277 * TBD:
3278 * we could share the same root & context tables
25985edc 3279 * among all IOMMU's. Need to Split it later.
ba395927
KA
3280 */
3281 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3282 if (ret)
989d51fc 3283 goto free_iommu;
5f0a7f76 3284
091d42e4
JR
3285 if (translation_pre_enabled(iommu)) {
3286 pr_info("Translation already enabled - trying to copy translation structures\n");
3287
3288 ret = copy_translation_tables(iommu);
3289 if (ret) {
3290 /*
3291 * We found the IOMMU with translation
3292 * enabled - but failed to copy over the
3293 * old root-entry table. Try to proceed
3294 * by disabling translation now and
3295 * allocating a clean root-entry table.
3296 * This might cause DMAR faults, but
3297 * probably the dump will still succeed.
3298 */
3299 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3300 iommu->name);
3301 iommu_disable_translation(iommu);
3302 clear_translation_pre_enabled(iommu);
3303 } else {
3304 pr_info("Copied translation tables from previous kernel for %s\n",
3305 iommu->name);
a87f4918 3306 copied_tables = true;
091d42e4
JR
3307 }
3308 }
3309
4ed0d3e6 3310 if (!ecap_pass_through(iommu->ecap))
19943b0e 3311 hw_pass_through = 0;
8a94ade4
DW
3312#ifdef CONFIG_INTEL_IOMMU_SVM
3313 if (pasid_enabled(iommu))
3314 intel_svm_alloc_pasid_tables(iommu);
3315#endif
ba395927
KA
3316 }
3317
a4c34ff1
JR
3318 /*
3319 * Now that qi is enabled on all iommus, set the root entry and flush
3320 * caches. This is required on some Intel X58 chipsets, otherwise the
3321 * flush_context function will loop forever and the boot hangs.
3322 */
3323 for_each_active_iommu(iommu, drhd) {
3324 iommu_flush_write_buffer(iommu);
3325 iommu_set_root_entry(iommu);
3326 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3327 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3328 }
3329
19943b0e 3330 if (iommu_pass_through)
e0fc7e0b
DW
3331 iommu_identity_mapping |= IDENTMAP_ALL;
3332
d3f13810 3333#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3334 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3335#endif
e0fc7e0b 3336
21e722c4
AR
3337 check_tylersburg_isoch();
3338
86080ccc
JR
3339 if (iommu_identity_mapping) {
3340 ret = si_domain_init(hw_pass_through);
3341 if (ret)
3342 goto free_iommu;
3343 }
3344
e0fc7e0b 3345
a87f4918
JR
3346 /*
3347 * If we copied translations from a previous kernel in the kdump
3348 * case, we can not assign the devices to domains now, as that
3349 * would eliminate the old mappings. So skip this part and defer
3350 * the assignment to device driver initialization time.
3351 */
3352 if (copied_tables)
3353 goto domains_done;
3354
ba395927 3355 /*
19943b0e
DW
3356 * If pass through is not set or not enabled, setup context entries for
3357 * identity mappings for rmrr, gfx, and isa and may fall back to static
3358 * identity mapping if iommu_identity_mapping is set.
ba395927 3359 */
19943b0e
DW
3360 if (iommu_identity_mapping) {
3361 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3362 if (ret) {
9f10e5bf 3363 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3364 goto free_iommu;
ba395927
KA
3365 }
3366 }
ba395927 3367 /*
19943b0e
DW
3368 * For each rmrr
3369 * for each dev attached to rmrr
3370 * do
3371 * locate drhd for dev, alloc domain for dev
3372 * allocate free domain
3373 * allocate page table entries for rmrr
3374 * if context not allocated for bus
3375 * allocate and init context
3376 * set present in root table for this bus
3377 * init context with domain, translation etc
3378 * endfor
3379 * endfor
ba395927 3380 */
9f10e5bf 3381 pr_info("Setting RMRR:\n");
19943b0e 3382 for_each_rmrr_units(rmrr) {
b683b230
JL
3383 /* some BIOS lists non-exist devices in DMAR table. */
3384 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3385 i, dev) {
0b9d9753 3386 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3387 if (ret)
9f10e5bf 3388 pr_err("Mapping reserved region failed\n");
ba395927 3389 }
4ed0d3e6 3390 }
49a0429e 3391
19943b0e
DW
3392 iommu_prepare_isa();
3393
a87f4918
JR
3394domains_done:
3395
ba395927
KA
3396 /*
3397 * for each drhd
3398 * enable fault log
3399 * global invalidate context cache
3400 * global invalidate iotlb
3401 * enable translation
3402 */
7c919779 3403 for_each_iommu(iommu, drhd) {
51a63e67
JC
3404 if (drhd->ignored) {
3405 /*
3406 * we always have to disable PMRs or DMA may fail on
3407 * this device
3408 */
3409 if (force_on)
7c919779 3410 iommu_disable_protect_mem_regions(iommu);
ba395927 3411 continue;
51a63e67 3412 }
ba395927
KA
3413
3414 iommu_flush_write_buffer(iommu);
3415
a222a7f0
DW
3416#ifdef CONFIG_INTEL_IOMMU_SVM
3417 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3418 ret = intel_svm_enable_prq(iommu);
3419 if (ret)
3420 goto free_iommu;
3421 }
3422#endif
3460a6d9
KA
3423 ret = dmar_set_interrupt(iommu);
3424 if (ret)
989d51fc 3425 goto free_iommu;
3460a6d9 3426
8939ddf6
JR
3427 if (!translation_pre_enabled(iommu))
3428 iommu_enable_translation(iommu);
3429
b94996c9 3430 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3431 }
3432
3433 return 0;
989d51fc
JL
3434
3435free_iommu:
ffebeb46
JL
3436 for_each_active_iommu(iommu, drhd) {
3437 disable_dmar_iommu(iommu);
a868e6b7 3438 free_dmar_iommu(iommu);
ffebeb46 3439 }
989d51fc 3440free_g_iommus:
aa473240
OP
3441 for_each_possible_cpu(cpu)
3442 kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
d9630fe9 3443 kfree(g_iommus);
989d51fc 3444error:
ba395927
KA
3445 return ret;
3446}
3447
5a5e02a6 3448/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3449static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3450 struct dmar_domain *domain,
3451 unsigned long nrpages, uint64_t dma_mask)
ba395927 3452{
22e2f9fa 3453 unsigned long iova_pfn = 0;
ba395927 3454
875764de
DW
3455 /* Restrict dma_mask to the width that the iommu can handle */
3456 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3457 /* Ensure we reserve the whole size-aligned region */
3458 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3459
3460 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3461 /*
3462 * First try to allocate an io virtual address in
284901a9 3463 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3464 * from higher range
ba395927 3465 */
22e2f9fa
OP
3466 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3467 IOVA_PFN(DMA_BIT_MASK(32)));
3468 if (iova_pfn)
3469 return iova_pfn;
875764de 3470 }
22e2f9fa
OP
3471 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask));
3472 if (unlikely(!iova_pfn)) {
9f10e5bf 3473 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3474 nrpages, dev_name(dev));
2aac6304 3475 return 0;
f76aec76
KA
3476 }
3477
22e2f9fa 3478 return iova_pfn;
f76aec76
KA
3479}
3480
b316d02a 3481static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3482{
1c5ebba9 3483 struct dmar_domain *domain, *tmp;
b1ce5b79 3484 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3485 struct device *i_dev;
3486 int i, ret;
f76aec76 3487
1c5ebba9
JR
3488 domain = find_domain(dev);
3489 if (domain)
3490 goto out;
3491
3492 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3493 if (!domain)
3494 goto out;
ba395927 3495
b1ce5b79
JR
3496 /* We have a new domain - setup possible RMRRs for the device */
3497 rcu_read_lock();
3498 for_each_rmrr_units(rmrr) {
3499 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3500 i, i_dev) {
3501 if (i_dev != dev)
3502 continue;
3503
3504 ret = domain_prepare_identity_map(dev, domain,
3505 rmrr->base_address,
3506 rmrr->end_address);
3507 if (ret)
3508 dev_err(dev, "Mapping reserved region failed\n");
3509 }
3510 }
3511 rcu_read_unlock();
3512
1c5ebba9
JR
3513 tmp = set_domain_for_dev(dev, domain);
3514 if (!tmp || domain != tmp) {
3515 domain_exit(domain);
3516 domain = tmp;
3517 }
3518
3519out:
3520
3521 if (!domain)
3522 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3523
3524
f76aec76
KA
3525 return domain;
3526}
3527
ecb509ec 3528/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3529static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3530{
3531 int found;
3532
3d89194a 3533 if (iommu_dummy(dev))
1e4c64c4
DW
3534 return 1;
3535
2c2e2c38 3536 if (!iommu_identity_mapping)
1e4c64c4 3537 return 0;
2c2e2c38 3538
9b226624 3539 found = identity_mapping(dev);
2c2e2c38 3540 if (found) {
ecb509ec 3541 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3542 return 1;
3543 else {
3544 /*
3545 * 32 bit DMA is removed from si_domain and fall back
3546 * to non-identity mapping.
3547 */
e6de0f8d 3548 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3549 pr_info("32bit %s uses non-identity mapping\n",
3550 dev_name(dev));
2c2e2c38
FY
3551 return 0;
3552 }
3553 } else {
3554 /*
3555 * In case of a detached 64 bit DMA device from vm, the device
3556 * is put into si_domain for identity mapping.
3557 */
ecb509ec 3558 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3559 int ret;
28ccce0d 3560 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3561 if (!ret) {
9f10e5bf
JR
3562 pr_info("64bit %s uses identity mapping\n",
3563 dev_name(dev));
2c2e2c38
FY
3564 return 1;
3565 }
3566 }
3567 }
3568
1e4c64c4 3569 return 0;
2c2e2c38
FY
3570}
3571
5040a918 3572static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3573 size_t size, int dir, u64 dma_mask)
f76aec76 3574{
f76aec76 3575 struct dmar_domain *domain;
5b6985ce 3576 phys_addr_t start_paddr;
2aac6304 3577 unsigned long iova_pfn;
f76aec76 3578 int prot = 0;
6865f0d1 3579 int ret;
8c11e798 3580 struct intel_iommu *iommu;
33041ec0 3581 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3582
3583 BUG_ON(dir == DMA_NONE);
2c2e2c38 3584
5040a918 3585 if (iommu_no_mapping(dev))
6865f0d1 3586 return paddr;
f76aec76 3587
5040a918 3588 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3589 if (!domain)
3590 return 0;
3591
8c11e798 3592 iommu = domain_get_iommu(domain);
88cb6a74 3593 size = aligned_nrpages(paddr, size);
f76aec76 3594
2aac6304
OP
3595 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3596 if (!iova_pfn)
f76aec76
KA
3597 goto error;
3598
ba395927
KA
3599 /*
3600 * Check if DMAR supports zero-length reads on write only
3601 * mappings..
3602 */
3603 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3604 !cap_zlr(iommu->cap))
ba395927
KA
3605 prot |= DMA_PTE_READ;
3606 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3607 prot |= DMA_PTE_WRITE;
3608 /*
6865f0d1 3609 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3610 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3611 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3612 * is not a big problem
3613 */
2aac6304 3614 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3615 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3616 if (ret)
3617 goto error;
3618
1f0ef2aa
DW
3619 /* it's a non-present to present mapping. Only flush if caching mode */
3620 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3621 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3622 mm_to_dma_pfn(iova_pfn),
a1ddcbe9 3623 size, 0, 1);
1f0ef2aa 3624 else
8c11e798 3625 iommu_flush_write_buffer(iommu);
f76aec76 3626
2aac6304 3627 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3628 start_paddr += paddr & ~PAGE_MASK;
3629 return start_paddr;
ba395927 3630
ba395927 3631error:
2aac6304 3632 if (iova_pfn)
22e2f9fa 3633 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
9f10e5bf 3634 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3635 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3636 return 0;
3637}
3638
ffbbef5c
FT
3639static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3640 unsigned long offset, size_t size,
3641 enum dma_data_direction dir,
00085f1e 3642 unsigned long attrs)
bb9e6d65 3643{
ffbbef5c 3644 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3645 dir, *dev->dma_mask);
bb9e6d65
FT
3646}
3647
aa473240 3648static void flush_unmaps(struct deferred_flush_data *flush_data)
5e0d2a6f 3649{
80b20dd8 3650 int i, j;
5e0d2a6f 3651
aa473240 3652 flush_data->timer_on = 0;
5e0d2a6f 3653
3654 /* just flush them all */
3655 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459 3656 struct intel_iommu *iommu = g_iommus[i];
aa473240
OP
3657 struct deferred_flush_table *flush_table =
3658 &flush_data->tables[i];
a2bb8459
WH
3659 if (!iommu)
3660 continue;
c42d9f32 3661
aa473240 3662 if (!flush_table->next)
9dd2fe89
YZ
3663 continue;
3664
78d5f0f5
NA
3665 /* In caching mode, global flushes turn emulation expensive */
3666 if (!cap_caching_mode(iommu->cap))
3667 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3668 DMA_TLB_GLOBAL_FLUSH);
aa473240 3669 for (j = 0; j < flush_table->next; j++) {
93a23a72 3670 unsigned long mask;
314f1dc1 3671 struct deferred_flush_entry *entry =
aa473240 3672 &flush_table->entries[j];
2aac6304 3673 unsigned long iova_pfn = entry->iova_pfn;
769530e4 3674 unsigned long nrpages = entry->nrpages;
314f1dc1
OP
3675 struct dmar_domain *domain = entry->domain;
3676 struct page *freelist = entry->freelist;
78d5f0f5
NA
3677
3678 /* On real hardware multiple invalidations are expensive */
3679 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3680 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3681 mm_to_dma_pfn(iova_pfn),
769530e4 3682 nrpages, !freelist, 0);
78d5f0f5 3683 else {
769530e4 3684 mask = ilog2(nrpages);
314f1dc1 3685 iommu_flush_dev_iotlb(domain,
2aac6304 3686 (uint64_t)iova_pfn << PAGE_SHIFT, mask);
78d5f0f5 3687 }
22e2f9fa 3688 free_iova_fast(&domain->iovad, iova_pfn, nrpages);
314f1dc1
OP
3689 if (freelist)
3690 dma_free_pagelist(freelist);
80b20dd8 3691 }
aa473240 3692 flush_table->next = 0;
5e0d2a6f 3693 }
3694
aa473240 3695 flush_data->size = 0;
5e0d2a6f 3696}
3697
aa473240 3698static void flush_unmaps_timeout(unsigned long cpuid)
5e0d2a6f 3699{
aa473240 3700 struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
80b20dd8 3701 unsigned long flags;
3702
aa473240
OP
3703 spin_lock_irqsave(&flush_data->lock, flags);
3704 flush_unmaps(flush_data);
3705 spin_unlock_irqrestore(&flush_data->lock, flags);
5e0d2a6f 3706}
3707
2aac6304 3708static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
769530e4 3709 unsigned long nrpages, struct page *freelist)
5e0d2a6f 3710{
3711 unsigned long flags;
314f1dc1 3712 int entry_id, iommu_id;
8c11e798 3713 struct intel_iommu *iommu;
314f1dc1 3714 struct deferred_flush_entry *entry;
aa473240 3715 struct deferred_flush_data *flush_data;
5e0d2a6f 3716
58c4a95f 3717 flush_data = raw_cpu_ptr(&deferred_flush);
aa473240
OP
3718
3719 /* Flush all CPUs' entries to avoid deferring too much. If
3720 * this becomes a bottleneck, can just flush us, and rely on
3721 * flush timer for the rest.
3722 */
3723 if (flush_data->size == HIGH_WATER_MARK) {
3724 int cpu;
3725
3726 for_each_online_cpu(cpu)
3727 flush_unmaps_timeout(cpu);
3728 }
3729
3730 spin_lock_irqsave(&flush_data->lock, flags);
80b20dd8 3731
8c11e798
WH
3732 iommu = domain_get_iommu(dom);
3733 iommu_id = iommu->seq_id;
c42d9f32 3734
aa473240
OP
3735 entry_id = flush_data->tables[iommu_id].next;
3736 ++(flush_data->tables[iommu_id].next);
5e0d2a6f 3737
aa473240 3738 entry = &flush_data->tables[iommu_id].entries[entry_id];
314f1dc1 3739 entry->domain = dom;
2aac6304 3740 entry->iova_pfn = iova_pfn;
769530e4 3741 entry->nrpages = nrpages;
314f1dc1 3742 entry->freelist = freelist;
5e0d2a6f 3743
aa473240
OP
3744 if (!flush_data->timer_on) {
3745 mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
3746 flush_data->timer_on = 1;
5e0d2a6f 3747 }
aa473240
OP
3748 flush_data->size++;
3749 spin_unlock_irqrestore(&flush_data->lock, flags);
5e0d2a6f 3750}
3751
769530e4 3752static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3753{
f76aec76 3754 struct dmar_domain *domain;
d794dc9b 3755 unsigned long start_pfn, last_pfn;
769530e4 3756 unsigned long nrpages;
2aac6304 3757 unsigned long iova_pfn;
8c11e798 3758 struct intel_iommu *iommu;
ea8ea460 3759 struct page *freelist;
ba395927 3760
73676832 3761 if (iommu_no_mapping(dev))
f76aec76 3762 return;
2c2e2c38 3763
1525a29a 3764 domain = find_domain(dev);
ba395927
KA
3765 BUG_ON(!domain);
3766
8c11e798
WH
3767 iommu = domain_get_iommu(domain);
3768
2aac6304 3769 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3770
769530e4 3771 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3772 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3773 last_pfn = start_pfn + nrpages - 1;
ba395927 3774
d794dc9b 3775 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3776 dev_name(dev), start_pfn, last_pfn);
ba395927 3777
ea8ea460 3778 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3779
5e0d2a6f 3780 if (intel_iommu_strict) {
a1ddcbe9 3781 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3782 nrpages, !freelist, 0);
5e0d2a6f 3783 /* free iova */
22e2f9fa 3784 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3785 dma_free_pagelist(freelist);
5e0d2a6f 3786 } else {
2aac6304 3787 add_unmap(domain, iova_pfn, nrpages, freelist);
5e0d2a6f 3788 /*
3789 * queue up the release of the unmap to save the 1/6th of the
3790 * cpu used up by the iotlb flush operation...
3791 */
5e0d2a6f 3792 }
ba395927
KA
3793}
3794
d41a4adb
JL
3795static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3796 size_t size, enum dma_data_direction dir,
00085f1e 3797 unsigned long attrs)
d41a4adb 3798{
769530e4 3799 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3800}
3801
5040a918 3802static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3803 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3804 unsigned long attrs)
ba395927 3805{
36746436 3806 struct page *page = NULL;
ba395927
KA
3807 int order;
3808
5b6985ce 3809 size = PAGE_ALIGN(size);
ba395927 3810 order = get_order(size);
e8bb910d 3811
5040a918 3812 if (!iommu_no_mapping(dev))
e8bb910d 3813 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3814 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3815 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3816 flags |= GFP_DMA;
3817 else
3818 flags |= GFP_DMA32;
3819 }
ba395927 3820
d0164adc 3821 if (gfpflags_allow_blocking(flags)) {
36746436
AM
3822 unsigned int count = size >> PAGE_SHIFT;
3823
712c604d 3824 page = dma_alloc_from_contiguous(dev, count, order, flags);
36746436
AM
3825 if (page && iommu_no_mapping(dev) &&
3826 page_to_phys(page) + size > dev->coherent_dma_mask) {
3827 dma_release_from_contiguous(dev, page, count);
3828 page = NULL;
3829 }
3830 }
3831
3832 if (!page)
3833 page = alloc_pages(flags, order);
3834 if (!page)
ba395927 3835 return NULL;
36746436 3836 memset(page_address(page), 0, size);
ba395927 3837
36746436 3838 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3839 DMA_BIDIRECTIONAL,
5040a918 3840 dev->coherent_dma_mask);
ba395927 3841 if (*dma_handle)
36746436
AM
3842 return page_address(page);
3843 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3844 __free_pages(page, order);
3845
ba395927
KA
3846 return NULL;
3847}
3848
5040a918 3849static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3850 dma_addr_t dma_handle, unsigned long attrs)
ba395927
KA
3851{
3852 int order;
36746436 3853 struct page *page = virt_to_page(vaddr);
ba395927 3854
5b6985ce 3855 size = PAGE_ALIGN(size);
ba395927
KA
3856 order = get_order(size);
3857
769530e4 3858 intel_unmap(dev, dma_handle, size);
36746436
AM
3859 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3860 __free_pages(page, order);
ba395927
KA
3861}
3862
5040a918 3863static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3864 int nelems, enum dma_data_direction dir,
00085f1e 3865 unsigned long attrs)
ba395927 3866{
769530e4
OP
3867 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3868 unsigned long nrpages = 0;
3869 struct scatterlist *sg;
3870 int i;
3871
3872 for_each_sg(sglist, sg, nelems, i) {
3873 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3874 }
3875
3876 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3877}
3878
ba395927 3879static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3880 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3881{
3882 int i;
c03ab37c 3883 struct scatterlist *sg;
ba395927 3884
c03ab37c 3885 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3886 BUG_ON(!sg_page(sg));
3e6110fd 3887 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3888 sg->dma_length = sg->length;
ba395927
KA
3889 }
3890 return nelems;
3891}
3892
5040a918 3893static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3894 enum dma_data_direction dir, unsigned long attrs)
ba395927 3895{
ba395927 3896 int i;
ba395927 3897 struct dmar_domain *domain;
f76aec76
KA
3898 size_t size = 0;
3899 int prot = 0;
2aac6304 3900 unsigned long iova_pfn;
f76aec76 3901 int ret;
c03ab37c 3902 struct scatterlist *sg;
b536d24d 3903 unsigned long start_vpfn;
8c11e798 3904 struct intel_iommu *iommu;
ba395927
KA
3905
3906 BUG_ON(dir == DMA_NONE);
5040a918
DW
3907 if (iommu_no_mapping(dev))
3908 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3909
5040a918 3910 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3911 if (!domain)
3912 return 0;
3913
8c11e798
WH
3914 iommu = domain_get_iommu(domain);
3915
b536d24d 3916 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3917 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3918
2aac6304 3919 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3920 *dev->dma_mask);
2aac6304 3921 if (!iova_pfn) {
c03ab37c 3922 sglist->dma_length = 0;
f76aec76
KA
3923 return 0;
3924 }
3925
3926 /*
3927 * Check if DMAR supports zero-length reads on write only
3928 * mappings..
3929 */
3930 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3931 !cap_zlr(iommu->cap))
f76aec76
KA
3932 prot |= DMA_PTE_READ;
3933 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3934 prot |= DMA_PTE_WRITE;
3935
2aac6304 3936 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3937
f532959b 3938 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3939 if (unlikely(ret)) {
e1605495
DW
3940 dma_pte_free_pagetable(domain, start_vpfn,
3941 start_vpfn + size - 1);
22e2f9fa 3942 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3943 return 0;
ba395927
KA
3944 }
3945
1f0ef2aa
DW
3946 /* it's a non-present to present mapping. Only flush if caching mode */
3947 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3948 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3949 else
8c11e798 3950 iommu_flush_write_buffer(iommu);
1f0ef2aa 3951
ba395927
KA
3952 return nelems;
3953}
3954
dfb805e8
FT
3955static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3956{
3957 return !dma_addr;
3958}
3959
01e1932a 3960const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3961 .alloc = intel_alloc_coherent,
3962 .free = intel_free_coherent,
ba395927
KA
3963 .map_sg = intel_map_sg,
3964 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3965 .map_page = intel_map_page,
3966 .unmap_page = intel_unmap_page,
dfb805e8 3967 .mapping_error = intel_mapping_error,
5860acc1
CH
3968#ifdef CONFIG_X86
3969 .dma_supported = x86_dma_supported,
3970#endif
ba395927
KA
3971};
3972
3973static inline int iommu_domain_cache_init(void)
3974{
3975 int ret = 0;
3976
3977 iommu_domain_cache = kmem_cache_create("iommu_domain",
3978 sizeof(struct dmar_domain),
3979 0,
3980 SLAB_HWCACHE_ALIGN,
3981
3982 NULL);
3983 if (!iommu_domain_cache) {
9f10e5bf 3984 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3985 ret = -ENOMEM;
3986 }
3987
3988 return ret;
3989}
3990
3991static inline int iommu_devinfo_cache_init(void)
3992{
3993 int ret = 0;
3994
3995 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3996 sizeof(struct device_domain_info),
3997 0,
3998 SLAB_HWCACHE_ALIGN,
ba395927
KA
3999 NULL);
4000 if (!iommu_devinfo_cache) {
9f10e5bf 4001 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
4002 ret = -ENOMEM;
4003 }
4004
4005 return ret;
4006}
4007
ba395927
KA
4008static int __init iommu_init_mempool(void)
4009{
4010 int ret;
ae1ff3d6 4011 ret = iova_cache_get();
ba395927
KA
4012 if (ret)
4013 return ret;
4014
4015 ret = iommu_domain_cache_init();
4016 if (ret)
4017 goto domain_error;
4018
4019 ret = iommu_devinfo_cache_init();
4020 if (!ret)
4021 return ret;
4022
4023 kmem_cache_destroy(iommu_domain_cache);
4024domain_error:
ae1ff3d6 4025 iova_cache_put();
ba395927
KA
4026
4027 return -ENOMEM;
4028}
4029
4030static void __init iommu_exit_mempool(void)
4031{
4032 kmem_cache_destroy(iommu_devinfo_cache);
4033 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 4034 iova_cache_put();
ba395927
KA
4035}
4036
556ab45f
DW
4037static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4038{
4039 struct dmar_drhd_unit *drhd;
4040 u32 vtbar;
4041 int rc;
4042
4043 /* We know that this device on this chipset has its own IOMMU.
4044 * If we find it under a different IOMMU, then the BIOS is lying
4045 * to us. Hope that the IOMMU for this device is actually
4046 * disabled, and it needs no translation...
4047 */
4048 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4049 if (rc) {
4050 /* "can't" happen */
4051 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4052 return;
4053 }
4054 vtbar &= 0xffff0000;
4055
4056 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4057 drhd = dmar_find_matched_drhd_unit(pdev);
4058 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4059 TAINT_FIRMWARE_WORKAROUND,
4060 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4061 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4062}
4063DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4064
ba395927
KA
4065static void __init init_no_remapping_devices(void)
4066{
4067 struct dmar_drhd_unit *drhd;
832bd858 4068 struct device *dev;
b683b230 4069 int i;
ba395927
KA
4070
4071 for_each_drhd_unit(drhd) {
4072 if (!drhd->include_all) {
b683b230
JL
4073 for_each_active_dev_scope(drhd->devices,
4074 drhd->devices_cnt, i, dev)
4075 break;
832bd858 4076 /* ignore DMAR unit if no devices exist */
ba395927
KA
4077 if (i == drhd->devices_cnt)
4078 drhd->ignored = 1;
4079 }
4080 }
4081
7c919779 4082 for_each_active_drhd_unit(drhd) {
7c919779 4083 if (drhd->include_all)
ba395927
KA
4084 continue;
4085
b683b230
JL
4086 for_each_active_dev_scope(drhd->devices,
4087 drhd->devices_cnt, i, dev)
832bd858 4088 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4089 break;
ba395927
KA
4090 if (i < drhd->devices_cnt)
4091 continue;
4092
c0771df8
DW
4093 /* This IOMMU has *only* gfx devices. Either bypass it or
4094 set the gfx_mapped flag, as appropriate */
4095 if (dmar_map_gfx) {
4096 intel_iommu_gfx_mapped = 1;
4097 } else {
4098 drhd->ignored = 1;
b683b230
JL
4099 for_each_active_dev_scope(drhd->devices,
4100 drhd->devices_cnt, i, dev)
832bd858 4101 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4102 }
4103 }
4104}
4105
f59c7b69
FY
4106#ifdef CONFIG_SUSPEND
4107static int init_iommu_hw(void)
4108{
4109 struct dmar_drhd_unit *drhd;
4110 struct intel_iommu *iommu = NULL;
4111
4112 for_each_active_iommu(iommu, drhd)
4113 if (iommu->qi)
4114 dmar_reenable_qi(iommu);
4115
b779260b
JC
4116 for_each_iommu(iommu, drhd) {
4117 if (drhd->ignored) {
4118 /*
4119 * we always have to disable PMRs or DMA may fail on
4120 * this device
4121 */
4122 if (force_on)
4123 iommu_disable_protect_mem_regions(iommu);
4124 continue;
4125 }
4126
f59c7b69
FY
4127 iommu_flush_write_buffer(iommu);
4128
4129 iommu_set_root_entry(iommu);
4130
4131 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4132 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4133 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4134 iommu_enable_translation(iommu);
b94996c9 4135 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4136 }
4137
4138 return 0;
4139}
4140
4141static void iommu_flush_all(void)
4142{
4143 struct dmar_drhd_unit *drhd;
4144 struct intel_iommu *iommu;
4145
4146 for_each_active_iommu(iommu, drhd) {
4147 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4148 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4149 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4150 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4151 }
4152}
4153
134fac3f 4154static int iommu_suspend(void)
f59c7b69
FY
4155{
4156 struct dmar_drhd_unit *drhd;
4157 struct intel_iommu *iommu = NULL;
4158 unsigned long flag;
4159
4160 for_each_active_iommu(iommu, drhd) {
4161 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4162 GFP_ATOMIC);
4163 if (!iommu->iommu_state)
4164 goto nomem;
4165 }
4166
4167 iommu_flush_all();
4168
4169 for_each_active_iommu(iommu, drhd) {
4170 iommu_disable_translation(iommu);
4171
1f5b3c3f 4172 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4173
4174 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4175 readl(iommu->reg + DMAR_FECTL_REG);
4176 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4177 readl(iommu->reg + DMAR_FEDATA_REG);
4178 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4179 readl(iommu->reg + DMAR_FEADDR_REG);
4180 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4181 readl(iommu->reg + DMAR_FEUADDR_REG);
4182
1f5b3c3f 4183 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4184 }
4185 return 0;
4186
4187nomem:
4188 for_each_active_iommu(iommu, drhd)
4189 kfree(iommu->iommu_state);
4190
4191 return -ENOMEM;
4192}
4193
134fac3f 4194static void iommu_resume(void)
f59c7b69
FY
4195{
4196 struct dmar_drhd_unit *drhd;
4197 struct intel_iommu *iommu = NULL;
4198 unsigned long flag;
4199
4200 if (init_iommu_hw()) {
b779260b
JC
4201 if (force_on)
4202 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4203 else
4204 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4205 return;
f59c7b69
FY
4206 }
4207
4208 for_each_active_iommu(iommu, drhd) {
4209
1f5b3c3f 4210 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4211
4212 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4213 iommu->reg + DMAR_FECTL_REG);
4214 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4215 iommu->reg + DMAR_FEDATA_REG);
4216 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4217 iommu->reg + DMAR_FEADDR_REG);
4218 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4219 iommu->reg + DMAR_FEUADDR_REG);
4220
1f5b3c3f 4221 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4222 }
4223
4224 for_each_active_iommu(iommu, drhd)
4225 kfree(iommu->iommu_state);
f59c7b69
FY
4226}
4227
134fac3f 4228static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4229 .resume = iommu_resume,
4230 .suspend = iommu_suspend,
4231};
4232
134fac3f 4233static void __init init_iommu_pm_ops(void)
f59c7b69 4234{
134fac3f 4235 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4236}
4237
4238#else
99592ba4 4239static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4240#endif /* CONFIG_PM */
4241
318fe7df 4242
c2a0b538 4243int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4244{
4245 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4246 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4247 struct dmar_rmrr_unit *rmrru;
0659b8dc 4248 size_t length;
318fe7df
SS
4249
4250 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4251 if (!rmrru)
0659b8dc 4252 goto out;
318fe7df
SS
4253
4254 rmrru->hdr = header;
4255 rmrr = (struct acpi_dmar_reserved_memory *)header;
4256 rmrru->base_address = rmrr->base_address;
4257 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4258
4259 length = rmrr->end_address - rmrr->base_address + 1;
4260 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4261 IOMMU_RESV_DIRECT);
4262 if (!rmrru->resv)
4263 goto free_rmrru;
4264
2e455289
JL
4265 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4266 ((void *)rmrr) + rmrr->header.length,
4267 &rmrru->devices_cnt);
0659b8dc
EA
4268 if (rmrru->devices_cnt && rmrru->devices == NULL)
4269 goto free_all;
318fe7df 4270
2e455289 4271 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4272
2e455289 4273 return 0;
0659b8dc
EA
4274free_all:
4275 kfree(rmrru->resv);
4276free_rmrru:
4277 kfree(rmrru);
4278out:
4279 return -ENOMEM;
318fe7df
SS
4280}
4281
6b197249
JL
4282static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4283{
4284 struct dmar_atsr_unit *atsru;
4285 struct acpi_dmar_atsr *tmp;
4286
4287 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4288 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4289 if (atsr->segment != tmp->segment)
4290 continue;
4291 if (atsr->header.length != tmp->header.length)
4292 continue;
4293 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4294 return atsru;
4295 }
4296
4297 return NULL;
4298}
4299
4300int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4301{
4302 struct acpi_dmar_atsr *atsr;
4303 struct dmar_atsr_unit *atsru;
4304
b608fe35 4305 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4306 return 0;
4307
318fe7df 4308 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4309 atsru = dmar_find_atsr(atsr);
4310 if (atsru)
4311 return 0;
4312
4313 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4314 if (!atsru)
4315 return -ENOMEM;
4316
6b197249
JL
4317 /*
4318 * If memory is allocated from slab by ACPI _DSM method, we need to
4319 * copy the memory content because the memory buffer will be freed
4320 * on return.
4321 */
4322 atsru->hdr = (void *)(atsru + 1);
4323 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4324 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4325 if (!atsru->include_all) {
4326 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4327 (void *)atsr + atsr->header.length,
4328 &atsru->devices_cnt);
4329 if (atsru->devices_cnt && atsru->devices == NULL) {
4330 kfree(atsru);
4331 return -ENOMEM;
4332 }
4333 }
318fe7df 4334
0e242612 4335 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4336
4337 return 0;
4338}
4339
9bdc531e
JL
4340static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4341{
4342 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4343 kfree(atsru);
4344}
4345
6b197249
JL
4346int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4347{
4348 struct acpi_dmar_atsr *atsr;
4349 struct dmar_atsr_unit *atsru;
4350
4351 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4352 atsru = dmar_find_atsr(atsr);
4353 if (atsru) {
4354 list_del_rcu(&atsru->list);
4355 synchronize_rcu();
4356 intel_iommu_free_atsr(atsru);
4357 }
4358
4359 return 0;
4360}
4361
4362int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4363{
4364 int i;
4365 struct device *dev;
4366 struct acpi_dmar_atsr *atsr;
4367 struct dmar_atsr_unit *atsru;
4368
4369 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4370 atsru = dmar_find_atsr(atsr);
4371 if (!atsru)
4372 return 0;
4373
194dc870 4374 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4375 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4376 i, dev)
4377 return -EBUSY;
194dc870 4378 }
6b197249
JL
4379
4380 return 0;
4381}
4382
ffebeb46
JL
4383static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4384{
4385 int sp, ret = 0;
4386 struct intel_iommu *iommu = dmaru->iommu;
4387
4388 if (g_iommus[iommu->seq_id])
4389 return 0;
4390
4391 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4392 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4393 iommu->name);
4394 return -ENXIO;
4395 }
4396 if (!ecap_sc_support(iommu->ecap) &&
4397 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4398 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4399 iommu->name);
4400 return -ENXIO;
4401 }
4402 sp = domain_update_iommu_superpage(iommu) - 1;
4403 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4404 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4405 iommu->name);
4406 return -ENXIO;
4407 }
4408
4409 /*
4410 * Disable translation if already enabled prior to OS handover.
4411 */
4412 if (iommu->gcmd & DMA_GCMD_TE)
4413 iommu_disable_translation(iommu);
4414
4415 g_iommus[iommu->seq_id] = iommu;
4416 ret = iommu_init_domains(iommu);
4417 if (ret == 0)
4418 ret = iommu_alloc_root_entry(iommu);
4419 if (ret)
4420 goto out;
4421
8a94ade4
DW
4422#ifdef CONFIG_INTEL_IOMMU_SVM
4423 if (pasid_enabled(iommu))
4424 intel_svm_alloc_pasid_tables(iommu);
4425#endif
4426
ffebeb46
JL
4427 if (dmaru->ignored) {
4428 /*
4429 * we always have to disable PMRs or DMA may fail on this device
4430 */
4431 if (force_on)
4432 iommu_disable_protect_mem_regions(iommu);
4433 return 0;
4434 }
4435
4436 intel_iommu_init_qi(iommu);
4437 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4438
4439#ifdef CONFIG_INTEL_IOMMU_SVM
4440 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4441 ret = intel_svm_enable_prq(iommu);
4442 if (ret)
4443 goto disable_iommu;
4444 }
4445#endif
ffebeb46
JL
4446 ret = dmar_set_interrupt(iommu);
4447 if (ret)
4448 goto disable_iommu;
4449
4450 iommu_set_root_entry(iommu);
4451 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4452 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4453 iommu_enable_translation(iommu);
4454
ffebeb46
JL
4455 iommu_disable_protect_mem_regions(iommu);
4456 return 0;
4457
4458disable_iommu:
4459 disable_dmar_iommu(iommu);
4460out:
4461 free_dmar_iommu(iommu);
4462 return ret;
4463}
4464
6b197249
JL
4465int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4466{
ffebeb46
JL
4467 int ret = 0;
4468 struct intel_iommu *iommu = dmaru->iommu;
4469
4470 if (!intel_iommu_enabled)
4471 return 0;
4472 if (iommu == NULL)
4473 return -EINVAL;
4474
4475 if (insert) {
4476 ret = intel_iommu_add(dmaru);
4477 } else {
4478 disable_dmar_iommu(iommu);
4479 free_dmar_iommu(iommu);
4480 }
4481
4482 return ret;
6b197249
JL
4483}
4484
9bdc531e
JL
4485static void intel_iommu_free_dmars(void)
4486{
4487 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4488 struct dmar_atsr_unit *atsru, *atsr_n;
4489
4490 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4491 list_del(&rmrru->list);
4492 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4493 kfree(rmrru->resv);
9bdc531e 4494 kfree(rmrru);
318fe7df
SS
4495 }
4496
9bdc531e
JL
4497 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4498 list_del(&atsru->list);
4499 intel_iommu_free_atsr(atsru);
4500 }
318fe7df
SS
4501}
4502
4503int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4504{
b683b230 4505 int i, ret = 1;
318fe7df 4506 struct pci_bus *bus;
832bd858
DW
4507 struct pci_dev *bridge = NULL;
4508 struct device *tmp;
318fe7df
SS
4509 struct acpi_dmar_atsr *atsr;
4510 struct dmar_atsr_unit *atsru;
4511
4512 dev = pci_physfn(dev);
318fe7df 4513 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4514 bridge = bus->self;
d14053b3
DW
4515 /* If it's an integrated device, allow ATS */
4516 if (!bridge)
4517 return 1;
4518 /* Connected via non-PCIe: no ATS */
4519 if (!pci_is_pcie(bridge) ||
62f87c0e 4520 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4521 return 0;
d14053b3 4522 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4523 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4524 break;
318fe7df
SS
4525 }
4526
0e242612 4527 rcu_read_lock();
b5f82ddf
JL
4528 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4529 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4530 if (atsr->segment != pci_domain_nr(dev->bus))
4531 continue;
4532
b683b230 4533 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4534 if (tmp == &bridge->dev)
b683b230 4535 goto out;
b5f82ddf
JL
4536
4537 if (atsru->include_all)
b683b230 4538 goto out;
b5f82ddf 4539 }
b683b230
JL
4540 ret = 0;
4541out:
0e242612 4542 rcu_read_unlock();
318fe7df 4543
b683b230 4544 return ret;
318fe7df
SS
4545}
4546
59ce0515
JL
4547int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4548{
4549 int ret = 0;
4550 struct dmar_rmrr_unit *rmrru;
4551 struct dmar_atsr_unit *atsru;
4552 struct acpi_dmar_atsr *atsr;
4553 struct acpi_dmar_reserved_memory *rmrr;
4554
b608fe35 4555 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4556 return 0;
4557
4558 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4559 rmrr = container_of(rmrru->hdr,
4560 struct acpi_dmar_reserved_memory, header);
4561 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4562 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4563 ((void *)rmrr) + rmrr->header.length,
4564 rmrr->segment, rmrru->devices,
4565 rmrru->devices_cnt);
27e24950 4566 if(ret < 0)
59ce0515 4567 return ret;
e6a8c9b3 4568 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4569 dmar_remove_dev_scope(info, rmrr->segment,
4570 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4571 }
4572 }
4573
4574 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4575 if (atsru->include_all)
4576 continue;
4577
4578 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4579 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4580 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4581 (void *)atsr + atsr->header.length,
4582 atsr->segment, atsru->devices,
4583 atsru->devices_cnt);
4584 if (ret > 0)
4585 break;
4586 else if(ret < 0)
4587 return ret;
e6a8c9b3 4588 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4589 if (dmar_remove_dev_scope(info, atsr->segment,
4590 atsru->devices, atsru->devices_cnt))
4591 break;
4592 }
4593 }
4594
4595 return 0;
4596}
4597
99dcaded
FY
4598/*
4599 * Here we only respond to action of unbound device from driver.
4600 *
4601 * Added device is not attached to its DMAR domain here yet. That will happen
4602 * when mapping the device to iova.
4603 */
4604static int device_notifier(struct notifier_block *nb,
4605 unsigned long action, void *data)
4606{
4607 struct device *dev = data;
99dcaded
FY
4608 struct dmar_domain *domain;
4609
3d89194a 4610 if (iommu_dummy(dev))
44cd613c
DW
4611 return 0;
4612
1196c2fb 4613 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4614 return 0;
4615
1525a29a 4616 domain = find_domain(dev);
99dcaded
FY
4617 if (!domain)
4618 return 0;
4619
e6de0f8d 4620 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4621 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4622 domain_exit(domain);
a97590e5 4623
99dcaded
FY
4624 return 0;
4625}
4626
4627static struct notifier_block device_nb = {
4628 .notifier_call = device_notifier,
4629};
4630
75f05569
JL
4631static int intel_iommu_memory_notifier(struct notifier_block *nb,
4632 unsigned long val, void *v)
4633{
4634 struct memory_notify *mhp = v;
4635 unsigned long long start, end;
4636 unsigned long start_vpfn, last_vpfn;
4637
4638 switch (val) {
4639 case MEM_GOING_ONLINE:
4640 start = mhp->start_pfn << PAGE_SHIFT;
4641 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4642 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4643 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4644 start, end);
4645 return NOTIFY_BAD;
4646 }
4647 break;
4648
4649 case MEM_OFFLINE:
4650 case MEM_CANCEL_ONLINE:
4651 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4652 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4653 while (start_vpfn <= last_vpfn) {
4654 struct iova *iova;
4655 struct dmar_drhd_unit *drhd;
4656 struct intel_iommu *iommu;
ea8ea460 4657 struct page *freelist;
75f05569
JL
4658
4659 iova = find_iova(&si_domain->iovad, start_vpfn);
4660 if (iova == NULL) {
9f10e5bf 4661 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4662 start_vpfn);
4663 break;
4664 }
4665
4666 iova = split_and_remove_iova(&si_domain->iovad, iova,
4667 start_vpfn, last_vpfn);
4668 if (iova == NULL) {
9f10e5bf 4669 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4670 start_vpfn, last_vpfn);
4671 return NOTIFY_BAD;
4672 }
4673
ea8ea460
DW
4674 freelist = domain_unmap(si_domain, iova->pfn_lo,
4675 iova->pfn_hi);
4676
75f05569
JL
4677 rcu_read_lock();
4678 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4679 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4680 iova->pfn_lo, iova_size(iova),
ea8ea460 4681 !freelist, 0);
75f05569 4682 rcu_read_unlock();
ea8ea460 4683 dma_free_pagelist(freelist);
75f05569
JL
4684
4685 start_vpfn = iova->pfn_hi + 1;
4686 free_iova_mem(iova);
4687 }
4688 break;
4689 }
4690
4691 return NOTIFY_OK;
4692}
4693
4694static struct notifier_block intel_iommu_memory_nb = {
4695 .notifier_call = intel_iommu_memory_notifier,
4696 .priority = 0
4697};
4698
22e2f9fa
OP
4699static void free_all_cpu_cached_iovas(unsigned int cpu)
4700{
4701 int i;
4702
4703 for (i = 0; i < g_num_of_iommus; i++) {
4704 struct intel_iommu *iommu = g_iommus[i];
4705 struct dmar_domain *domain;
0caa7616 4706 int did;
22e2f9fa
OP
4707
4708 if (!iommu)
4709 continue;
4710
3bd4f911 4711 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4712 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4713
4714 if (!domain)
4715 continue;
4716 free_cpu_cached_iovas(cpu, &domain->iovad);
4717 }
4718 }
4719}
4720
21647615 4721static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4722{
21647615
AMG
4723 free_all_cpu_cached_iovas(cpu);
4724 flush_unmaps_timeout(cpu);
4725 return 0;
aa473240
OP
4726}
4727
161b28aa
JR
4728static void intel_disable_iommus(void)
4729{
4730 struct intel_iommu *iommu = NULL;
4731 struct dmar_drhd_unit *drhd;
4732
4733 for_each_iommu(iommu, drhd)
4734 iommu_disable_translation(iommu);
4735}
4736
a7fdb6e6
JR
4737static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4738{
2926a2aa
JR
4739 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4740
4741 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4742}
4743
a5459cfe
AW
4744static ssize_t intel_iommu_show_version(struct device *dev,
4745 struct device_attribute *attr,
4746 char *buf)
4747{
a7fdb6e6 4748 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4749 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4750 return sprintf(buf, "%d:%d\n",
4751 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4752}
4753static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4754
4755static ssize_t intel_iommu_show_address(struct device *dev,
4756 struct device_attribute *attr,
4757 char *buf)
4758{
a7fdb6e6 4759 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4760 return sprintf(buf, "%llx\n", iommu->reg_phys);
4761}
4762static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4763
4764static ssize_t intel_iommu_show_cap(struct device *dev,
4765 struct device_attribute *attr,
4766 char *buf)
4767{
a7fdb6e6 4768 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4769 return sprintf(buf, "%llx\n", iommu->cap);
4770}
4771static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4772
4773static ssize_t intel_iommu_show_ecap(struct device *dev,
4774 struct device_attribute *attr,
4775 char *buf)
4776{
a7fdb6e6 4777 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4778 return sprintf(buf, "%llx\n", iommu->ecap);
4779}
4780static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4781
2238c082
AW
4782static ssize_t intel_iommu_show_ndoms(struct device *dev,
4783 struct device_attribute *attr,
4784 char *buf)
4785{
a7fdb6e6 4786 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4787 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4788}
4789static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4790
4791static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4792 struct device_attribute *attr,
4793 char *buf)
4794{
a7fdb6e6 4795 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4796 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4797 cap_ndoms(iommu->cap)));
4798}
4799static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4800
a5459cfe
AW
4801static struct attribute *intel_iommu_attrs[] = {
4802 &dev_attr_version.attr,
4803 &dev_attr_address.attr,
4804 &dev_attr_cap.attr,
4805 &dev_attr_ecap.attr,
2238c082
AW
4806 &dev_attr_domains_supported.attr,
4807 &dev_attr_domains_used.attr,
a5459cfe
AW
4808 NULL,
4809};
4810
4811static struct attribute_group intel_iommu_group = {
4812 .name = "intel-iommu",
4813 .attrs = intel_iommu_attrs,
4814};
4815
4816const struct attribute_group *intel_iommu_groups[] = {
4817 &intel_iommu_group,
4818 NULL,
4819};
4820
ba395927
KA
4821int __init intel_iommu_init(void)
4822{
9bdc531e 4823 int ret = -ENODEV;
3a93c841 4824 struct dmar_drhd_unit *drhd;
7c919779 4825 struct intel_iommu *iommu;
ba395927 4826
a59b50e9
JC
4827 /* VT-d is required for a TXT/tboot launch, so enforce that */
4828 force_on = tboot_force_iommu();
4829
3a5670e8
JL
4830 if (iommu_init_mempool()) {
4831 if (force_on)
4832 panic("tboot: Failed to initialize iommu memory\n");
4833 return -ENOMEM;
4834 }
4835
4836 down_write(&dmar_global_lock);
a59b50e9
JC
4837 if (dmar_table_init()) {
4838 if (force_on)
4839 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4840 goto out_free_dmar;
a59b50e9 4841 }
ba395927 4842
c2c7286a 4843 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4844 if (force_on)
4845 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4846 goto out_free_dmar;
a59b50e9 4847 }
1886e8a9 4848
161b28aa 4849 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4850 /*
4851 * We exit the function here to ensure IOMMU's remapping and
4852 * mempool aren't setup, which means that the IOMMU's PMRs
4853 * won't be disabled via the call to init_dmars(). So disable
4854 * it explicitly here. The PMRs were setup by tboot prior to
4855 * calling SENTER, but the kernel is expected to reset/tear
4856 * down the PMRs.
4857 */
4858 if (intel_iommu_tboot_noforce) {
4859 for_each_iommu(iommu, drhd)
4860 iommu_disable_protect_mem_regions(iommu);
4861 }
4862
161b28aa
JR
4863 /*
4864 * Make sure the IOMMUs are switched off, even when we
4865 * boot into a kexec kernel and the previous kernel left
4866 * them enabled
4867 */
4868 intel_disable_iommus();
9bdc531e 4869 goto out_free_dmar;
161b28aa 4870 }
2ae21010 4871
318fe7df 4872 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4873 pr_info("No RMRR found\n");
318fe7df
SS
4874
4875 if (list_empty(&dmar_atsr_units))
9f10e5bf 4876 pr_info("No ATSR found\n");
318fe7df 4877
51a63e67
JC
4878 if (dmar_init_reserved_ranges()) {
4879 if (force_on)
4880 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4881 goto out_free_reserved_range;
51a63e67 4882 }
ba395927
KA
4883
4884 init_no_remapping_devices();
4885
b779260b 4886 ret = init_dmars();
ba395927 4887 if (ret) {
a59b50e9
JC
4888 if (force_on)
4889 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4890 pr_err("Initialization failed\n");
9bdc531e 4891 goto out_free_reserved_range;
ba395927 4892 }
3a5670e8 4893 up_write(&dmar_global_lock);
9f10e5bf 4894 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4895
75f1cdf1
FT
4896#ifdef CONFIG_SWIOTLB
4897 swiotlb = 0;
4898#endif
19943b0e 4899 dma_ops = &intel_dma_ops;
4ed0d3e6 4900
134fac3f 4901 init_iommu_pm_ops();
a8bcbb0d 4902
39ab9555
JR
4903 for_each_active_iommu(iommu, drhd) {
4904 iommu_device_sysfs_add(&iommu->iommu, NULL,
4905 intel_iommu_groups,
4906 "%s", iommu->name);
4907 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4908 iommu_device_register(&iommu->iommu);
4909 }
a5459cfe 4910
4236d97d 4911 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4912 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4913 if (si_domain && !hw_pass_through)
4914 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4915 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4916 intel_iommu_cpu_dead);
8bc1f85c
ED
4917 intel_iommu_enabled = 1;
4918
ba395927 4919 return 0;
9bdc531e
JL
4920
4921out_free_reserved_range:
4922 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4923out_free_dmar:
4924 intel_iommu_free_dmars();
3a5670e8
JL
4925 up_write(&dmar_global_lock);
4926 iommu_exit_mempool();
9bdc531e 4927 return ret;
ba395927 4928}
e820482c 4929
2452d9db 4930static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4931{
4932 struct intel_iommu *iommu = opaque;
4933
2452d9db 4934 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4935 return 0;
4936}
4937
4938/*
4939 * NB - intel-iommu lacks any sort of reference counting for the users of
4940 * dependent devices. If multiple endpoints have intersecting dependent
4941 * devices, unbinding the driver from any one of them will possibly leave
4942 * the others unable to operate.
4943 */
2452d9db 4944static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4945{
0bcb3e28 4946 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4947 return;
4948
2452d9db 4949 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4950}
4951
127c7615 4952static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4953{
c7151a8d
WH
4954 struct intel_iommu *iommu;
4955 unsigned long flags;
c7151a8d 4956
55d94043
JR
4957 assert_spin_locked(&device_domain_lock);
4958
127c7615 4959 if (WARN_ON(!info))
c7151a8d
WH
4960 return;
4961
127c7615 4962 iommu = info->iommu;
c7151a8d 4963
127c7615
JR
4964 if (info->dev) {
4965 iommu_disable_dev_iotlb(info);
4966 domain_context_clear(iommu, info->dev);
4967 }
c7151a8d 4968
b608ac3b 4969 unlink_domain_info(info);
c7151a8d 4970
d160aca5 4971 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4972 domain_detach_iommu(info->domain, iommu);
d160aca5 4973 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4974
127c7615 4975 free_devinfo_mem(info);
c7151a8d 4976}
c7151a8d 4977
55d94043
JR
4978static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4979 struct device *dev)
4980{
127c7615 4981 struct device_domain_info *info;
55d94043 4982 unsigned long flags;
3e7abe25 4983
55d94043 4984 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4985 info = dev->archdata.iommu;
4986 __dmar_remove_one_dev_info(info);
55d94043 4987 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4988}
4989
2c2e2c38 4990static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4991{
4992 int adjust_width;
4993
0fb5fe87
RM
4994 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4995 DMA_32BIT_PFN);
5e98c4b1
WH
4996 domain_reserve_special_ranges(domain);
4997
4998 /* calculate AGAW */
4999 domain->gaw = guest_width;
5000 adjust_width = guestwidth_to_adjustwidth(guest_width);
5001 domain->agaw = width_to_agaw(adjust_width);
5002
5e98c4b1 5003 domain->iommu_coherency = 0;
c5b15255 5004 domain->iommu_snooping = 0;
6dd9a7c7 5005 domain->iommu_superpage = 0;
fe40f1e0 5006 domain->max_addr = 0;
5e98c4b1
WH
5007
5008 /* always allocate the top pgd */
4c923d47 5009 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
5010 if (!domain->pgd)
5011 return -ENOMEM;
5012 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5013 return 0;
5014}
5015
00a77deb 5016static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5017{
5d450806 5018 struct dmar_domain *dmar_domain;
00a77deb
JR
5019 struct iommu_domain *domain;
5020
5021 if (type != IOMMU_DOMAIN_UNMANAGED)
5022 return NULL;
38717946 5023
ab8dfe25 5024 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 5025 if (!dmar_domain) {
9f10e5bf 5026 pr_err("Can't allocate dmar_domain\n");
00a77deb 5027 return NULL;
38717946 5028 }
2c2e2c38 5029 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 5030 pr_err("Domain initialization failed\n");
92d03cc8 5031 domain_exit(dmar_domain);
00a77deb 5032 return NULL;
38717946 5033 }
8140a95d 5034 domain_update_iommu_cap(dmar_domain);
faa3d6f5 5035
00a77deb 5036 domain = &dmar_domain->domain;
8a0e715b
JR
5037 domain->geometry.aperture_start = 0;
5038 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5039 domain->geometry.force_aperture = true;
5040
00a77deb 5041 return domain;
38717946 5042}
38717946 5043
00a77deb 5044static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5045{
00a77deb 5046 domain_exit(to_dmar_domain(domain));
38717946 5047}
38717946 5048
4c5478c9
JR
5049static int intel_iommu_attach_device(struct iommu_domain *domain,
5050 struct device *dev)
38717946 5051{
00a77deb 5052 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5053 struct intel_iommu *iommu;
5054 int addr_width;
156baca8 5055 u8 bus, devfn;
faa3d6f5 5056
c875d2c1
AW
5057 if (device_is_rmrr_locked(dev)) {
5058 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5059 return -EPERM;
5060 }
5061
7207d8f9
DW
5062 /* normally dev is not mapped */
5063 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
5064 struct dmar_domain *old_domain;
5065
1525a29a 5066 old_domain = find_domain(dev);
faa3d6f5 5067 if (old_domain) {
d160aca5 5068 rcu_read_lock();
de7e8886 5069 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 5070 rcu_read_unlock();
62c22167
JR
5071
5072 if (!domain_type_is_vm_or_si(old_domain) &&
5073 list_empty(&old_domain->devices))
5074 domain_exit(old_domain);
faa3d6f5
WH
5075 }
5076 }
5077
156baca8 5078 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5079 if (!iommu)
5080 return -ENODEV;
5081
5082 /* check if this iommu agaw is sufficient for max mapped address */
5083 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5084 if (addr_width > cap_mgaw(iommu->cap))
5085 addr_width = cap_mgaw(iommu->cap);
5086
5087 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 5088 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5089 "sufficient for the mapped address (%llx)\n",
a99c47a2 5090 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5091 return -EFAULT;
5092 }
a99c47a2
TL
5093 dmar_domain->gaw = addr_width;
5094
5095 /*
5096 * Knock out extra levels of page tables if necessary
5097 */
5098 while (iommu->agaw < dmar_domain->agaw) {
5099 struct dma_pte *pte;
5100
5101 pte = dmar_domain->pgd;
5102 if (dma_pte_present(pte)) {
25cbff16
SY
5103 dmar_domain->pgd = (struct dma_pte *)
5104 phys_to_virt(dma_pte_addr(pte));
7a661013 5105 free_pgtable_page(pte);
a99c47a2
TL
5106 }
5107 dmar_domain->agaw--;
5108 }
fe40f1e0 5109
28ccce0d 5110 return domain_add_dev_info(dmar_domain, dev);
38717946 5111}
38717946 5112
4c5478c9
JR
5113static void intel_iommu_detach_device(struct iommu_domain *domain,
5114 struct device *dev)
38717946 5115{
e6de0f8d 5116 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 5117}
c7151a8d 5118
b146a1c9
JR
5119static int intel_iommu_map(struct iommu_domain *domain,
5120 unsigned long iova, phys_addr_t hpa,
5009065d 5121 size_t size, int iommu_prot)
faa3d6f5 5122{
00a77deb 5123 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5124 u64 max_addr;
dde57a21 5125 int prot = 0;
faa3d6f5 5126 int ret;
fe40f1e0 5127
dde57a21
JR
5128 if (iommu_prot & IOMMU_READ)
5129 prot |= DMA_PTE_READ;
5130 if (iommu_prot & IOMMU_WRITE)
5131 prot |= DMA_PTE_WRITE;
9cf06697
SY
5132 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5133 prot |= DMA_PTE_SNP;
dde57a21 5134
163cc52c 5135 max_addr = iova + size;
dde57a21 5136 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5137 u64 end;
5138
5139 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5140 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5141 if (end < max_addr) {
9f10e5bf 5142 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5143 "sufficient for the mapped address (%llx)\n",
8954da1f 5144 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5145 return -EFAULT;
5146 }
dde57a21 5147 dmar_domain->max_addr = max_addr;
fe40f1e0 5148 }
ad051221
DW
5149 /* Round up size to next multiple of PAGE_SIZE, if it and
5150 the low bits of hpa would take us onto the next page */
88cb6a74 5151 size = aligned_nrpages(hpa, size);
ad051221
DW
5152 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5153 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5154 return ret;
38717946 5155}
38717946 5156
5009065d 5157static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5158 unsigned long iova, size_t size)
38717946 5159{
00a77deb 5160 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
5161 struct page *freelist = NULL;
5162 struct intel_iommu *iommu;
5163 unsigned long start_pfn, last_pfn;
5164 unsigned int npages;
42e8c186 5165 int iommu_id, level = 0;
5cf0a76f
DW
5166
5167 /* Cope with horrid API which requires us to unmap more than the
5168 size argument if it happens to be a large-page mapping. */
dc02e46e 5169 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5170
5171 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5172 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5173
ea8ea460
DW
5174 start_pfn = iova >> VTD_PAGE_SHIFT;
5175 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5176
5177 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5178
5179 npages = last_pfn - start_pfn + 1;
5180
29a27719 5181 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 5182 iommu = g_iommus[iommu_id];
ea8ea460 5183
42e8c186
JR
5184 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5185 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5186 }
5187
5188 dma_free_pagelist(freelist);
fe40f1e0 5189
163cc52c
DW
5190 if (dmar_domain->max_addr == iova + size)
5191 dmar_domain->max_addr = iova;
b146a1c9 5192
5cf0a76f 5193 return size;
38717946 5194}
38717946 5195
d14d6577 5196static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5197 dma_addr_t iova)
38717946 5198{
00a77deb 5199 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5200 struct dma_pte *pte;
5cf0a76f 5201 int level = 0;
faa3d6f5 5202 u64 phys = 0;
38717946 5203
5cf0a76f 5204 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5205 if (pte)
faa3d6f5 5206 phys = dma_pte_addr(pte);
38717946 5207
faa3d6f5 5208 return phys;
38717946 5209}
a8bcbb0d 5210
5d587b8d 5211static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5212{
dbb9fd86 5213 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5214 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5215 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5216 return irq_remapping_enabled == 1;
dbb9fd86 5217
5d587b8d 5218 return false;
dbb9fd86
SY
5219}
5220
abdfdde2
AW
5221static int intel_iommu_add_device(struct device *dev)
5222{
a5459cfe 5223 struct intel_iommu *iommu;
abdfdde2 5224 struct iommu_group *group;
156baca8 5225 u8 bus, devfn;
70ae6f0d 5226
a5459cfe
AW
5227 iommu = device_to_iommu(dev, &bus, &devfn);
5228 if (!iommu)
70ae6f0d
AW
5229 return -ENODEV;
5230
e3d10af1 5231 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5232
e17f9ff4 5233 group = iommu_group_get_for_dev(dev);
783f157b 5234
e17f9ff4
AW
5235 if (IS_ERR(group))
5236 return PTR_ERR(group);
bcb71abe 5237
abdfdde2 5238 iommu_group_put(group);
e17f9ff4 5239 return 0;
abdfdde2 5240}
70ae6f0d 5241
abdfdde2
AW
5242static void intel_iommu_remove_device(struct device *dev)
5243{
a5459cfe
AW
5244 struct intel_iommu *iommu;
5245 u8 bus, devfn;
5246
5247 iommu = device_to_iommu(dev, &bus, &devfn);
5248 if (!iommu)
5249 return;
5250
abdfdde2 5251 iommu_group_remove_device(dev);
a5459cfe 5252
e3d10af1 5253 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5254}
5255
0659b8dc
EA
5256static void intel_iommu_get_resv_regions(struct device *device,
5257 struct list_head *head)
5258{
5259 struct iommu_resv_region *reg;
5260 struct dmar_rmrr_unit *rmrr;
5261 struct device *i_dev;
5262 int i;
5263
5264 rcu_read_lock();
5265 for_each_rmrr_units(rmrr) {
5266 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5267 i, i_dev) {
5268 if (i_dev != device)
5269 continue;
5270
5271 list_add_tail(&rmrr->resv->list, head);
5272 }
5273 }
5274 rcu_read_unlock();
5275
5276 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5277 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5278 0, IOMMU_RESV_MSI);
0659b8dc
EA
5279 if (!reg)
5280 return;
5281 list_add_tail(&reg->list, head);
5282}
5283
5284static void intel_iommu_put_resv_regions(struct device *dev,
5285 struct list_head *head)
5286{
5287 struct iommu_resv_region *entry, *next;
5288
5289 list_for_each_entry_safe(entry, next, head, list) {
5290 if (entry->type == IOMMU_RESV_RESERVED)
5291 kfree(entry);
5292 }
70ae6f0d
AW
5293}
5294
2f26e0a9 5295#ifdef CONFIG_INTEL_IOMMU_SVM
65ca7f5f
JP
5296#define MAX_NR_PASID_BITS (20)
5297static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5298{
5299 /*
5300 * Convert ecap_pss to extend context entry pts encoding, also
5301 * respect the soft pasid_max value set by the iommu.
5302 * - number of PASID bits = ecap_pss + 1
5303 * - number of PASID table entries = 2^(pts + 5)
5304 * Therefore, pts = ecap_pss - 4
5305 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5306 */
5307 if (ecap_pss(iommu->ecap) < 5)
5308 return 0;
5309
5310 /* pasid_max is encoded as actual number of entries not the bits */
5311 return find_first_bit((unsigned long *)&iommu->pasid_max,
5312 MAX_NR_PASID_BITS) - 5;
5313}
5314
2f26e0a9
DW
5315int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5316{
5317 struct device_domain_info *info;
5318 struct context_entry *context;
5319 struct dmar_domain *domain;
5320 unsigned long flags;
5321 u64 ctx_lo;
5322 int ret;
5323
5324 domain = get_valid_domain_for_dev(sdev->dev);
5325 if (!domain)
5326 return -EINVAL;
5327
5328 spin_lock_irqsave(&device_domain_lock, flags);
5329 spin_lock(&iommu->lock);
5330
5331 ret = -EINVAL;
5332 info = sdev->dev->archdata.iommu;
5333 if (!info || !info->pasid_supported)
5334 goto out;
5335
5336 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5337 if (WARN_ON(!context))
5338 goto out;
5339
5340 ctx_lo = context[0].lo;
5341
5342 sdev->did = domain->iommu_did[iommu->seq_id];
5343 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5344
5345 if (!(ctx_lo & CONTEXT_PASIDE)) {
5346 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
65ca7f5f
JP
5347 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5348 intel_iommu_get_pts(iommu);
5349
2f26e0a9
DW
5350 wmb();
5351 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5352 * extended to permit requests-with-PASID if the PASIDE bit
5353 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5354 * however, the PASIDE bit is ignored and requests-with-PASID
5355 * are unconditionally blocked. Which makes less sense.
5356 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5357 * "guest mode" translation types depending on whether ATS
5358 * is available or not. Annoyingly, we can't use the new
5359 * modes *unless* PASIDE is set. */
5360 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5361 ctx_lo &= ~CONTEXT_TT_MASK;
5362 if (info->ats_supported)
5363 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5364 else
5365 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5366 }
5367 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5368 if (iommu->pasid_state_table)
5369 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5370 if (info->pri_supported)
5371 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5372 context[0].lo = ctx_lo;
5373 wmb();
5374 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5375 DMA_CCMD_MASK_NOBIT,
5376 DMA_CCMD_DEVICE_INVL);
5377 }
5378
5379 /* Enable PASID support in the device, if it wasn't already */
5380 if (!info->pasid_enabled)
5381 iommu_enable_dev_iotlb(info);
5382
5383 if (info->ats_enabled) {
5384 sdev->dev_iotlb = 1;
5385 sdev->qdep = info->ats_qdep;
5386 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5387 sdev->qdep = 0;
5388 }
5389 ret = 0;
5390
5391 out:
5392 spin_unlock(&iommu->lock);
5393 spin_unlock_irqrestore(&device_domain_lock, flags);
5394
5395 return ret;
5396}
5397
5398struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5399{
5400 struct intel_iommu *iommu;
5401 u8 bus, devfn;
5402
5403 if (iommu_dummy(dev)) {
5404 dev_warn(dev,
5405 "No IOMMU translation for device; cannot enable SVM\n");
5406 return NULL;
5407 }
5408
5409 iommu = device_to_iommu(dev, &bus, &devfn);
5410 if ((!iommu)) {
b9997e38 5411 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5412 return NULL;
5413 }
5414
5415 if (!iommu->pasid_table) {
b9997e38 5416 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
2f26e0a9
DW
5417 return NULL;
5418 }
5419
5420 return iommu;
5421}
5422#endif /* CONFIG_INTEL_IOMMU_SVM */
5423
b0119e87 5424const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5425 .capable = intel_iommu_capable,
5426 .domain_alloc = intel_iommu_domain_alloc,
5427 .domain_free = intel_iommu_domain_free,
5428 .attach_dev = intel_iommu_attach_device,
5429 .detach_dev = intel_iommu_detach_device,
5430 .map = intel_iommu_map,
5431 .unmap = intel_iommu_unmap,
5432 .map_sg = default_iommu_map_sg,
5433 .iova_to_phys = intel_iommu_iova_to_phys,
5434 .add_device = intel_iommu_add_device,
5435 .remove_device = intel_iommu_remove_device,
5436 .get_resv_regions = intel_iommu_get_resv_regions,
5437 .put_resv_regions = intel_iommu_put_resv_regions,
5438 .device_group = pci_device_group,
5439 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5440};
9af88143 5441
9452618e
DV
5442static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5443{
5444 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5445 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5446 dmar_map_gfx = 0;
5447}
5448
5449DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5450DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5451DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5452DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5453DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5454DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5455DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5456
d34d6517 5457static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5458{
5459 /*
5460 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5461 * but needs it. Same seems to hold for the desktop versions.
9af88143 5462 */
9f10e5bf 5463 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5464 rwbf_quirk = 1;
5465}
5466
5467DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5468DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5469DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5470DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5471DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5472DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5473DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5474
eecfd57f
AJ
5475#define GGC 0x52
5476#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5477#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5478#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5479#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5480#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5481#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5482#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5483#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5484
d34d6517 5485static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5486{
5487 unsigned short ggc;
5488
eecfd57f 5489 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5490 return;
5491
eecfd57f 5492 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5493 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5494 dmar_map_gfx = 0;
6fbcfb3e
DW
5495 } else if (dmar_map_gfx) {
5496 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5497 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5498 intel_iommu_strict = 1;
5499 }
9eecabcb
DW
5500}
5501DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5502DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5503DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5505
e0fc7e0b
DW
5506/* On Tylersburg chipsets, some BIOSes have been known to enable the
5507 ISOCH DMAR unit for the Azalia sound device, but not give it any
5508 TLB entries, which causes it to deadlock. Check for that. We do
5509 this in a function called from init_dmars(), instead of in a PCI
5510 quirk, because we don't want to print the obnoxious "BIOS broken"
5511 message if VT-d is actually disabled.
5512*/
5513static void __init check_tylersburg_isoch(void)
5514{
5515 struct pci_dev *pdev;
5516 uint32_t vtisochctrl;
5517
5518 /* If there's no Azalia in the system anyway, forget it. */
5519 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5520 if (!pdev)
5521 return;
5522 pci_dev_put(pdev);
5523
5524 /* System Management Registers. Might be hidden, in which case
5525 we can't do the sanity check. But that's OK, because the
5526 known-broken BIOSes _don't_ actually hide it, so far. */
5527 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5528 if (!pdev)
5529 return;
5530
5531 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5532 pci_dev_put(pdev);
5533 return;
5534 }
5535
5536 pci_dev_put(pdev);
5537
5538 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5539 if (vtisochctrl & 1)
5540 return;
5541
5542 /* Drop all bits other than the number of TLB entries */
5543 vtisochctrl &= 0x1c;
5544
5545 /* If we have the recommended number of TLB entries (16), fine. */
5546 if (vtisochctrl == 0x10)
5547 return;
5548
5549 /* Zero TLB entries? You get to ride the short bus to school. */
5550 if (!vtisochctrl) {
5551 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5552 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5553 dmi_get_system_info(DMI_BIOS_VENDOR),
5554 dmi_get_system_info(DMI_BIOS_VERSION),
5555 dmi_get_system_info(DMI_PRODUCT_VERSION));
5556 iommu_identity_mapping |= IDENTMAP_AZALIA;
5557 return;
5558 }
9f10e5bf
JR
5559
5560 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5561 vtisochctrl);
5562}