]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Use lo_hi_readq() / lo_hi_writeq()
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
aa473240 36#include <linux/cpu.h>
5e0d2a6f 37#include <linux/timer.h>
dfddb969 38#include <linux/io.h>
38717946 39#include <linux/iova.h>
5d450806 40#include <linux/iommu.h>
38717946 41#include <linux/intel-iommu.h>
134fac3f 42#include <linux/syscore_ops.h>
69575d38 43#include <linux/tboot.h>
adb2fe02 44#include <linux/dmi.h>
5cdede24 45#include <linux/pci-ats.h>
0ee332c1 46#include <linux/memblock.h>
36746436 47#include <linux/dma-contiguous.h>
091d42e4 48#include <linux/crash_dump.h>
8a8f422d 49#include <asm/irq_remapping.h>
ba395927 50#include <asm/cacheflush.h>
46a7fa27 51#include <asm/iommu.h>
ba395927 52
078e1ee2
JR
53#include "irq_remapping.h"
54
5b6985ce
FY
55#define ROOT_SIZE VTD_PAGE_SIZE
56#define CONTEXT_SIZE VTD_PAGE_SIZE
57
ba395927 58#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 59#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 60#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 61#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
62
63#define IOAPIC_RANGE_START (0xfee00000)
64#define IOAPIC_RANGE_END (0xfeefffff)
65#define IOVA_START_ADDR (0x1000)
66
67#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
68
4ed0d3e6 69#define MAX_AGAW_WIDTH 64
5c645b35 70#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 71
2ebe3151
DW
72#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
73#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
74
75/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
76 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
77#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
78 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
79#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 80
1b722500
RM
81/* IO virtual address start page frame number */
82#define IOVA_START_PFN (1)
83
f27be03b 84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 85#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 86#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 87
df08cdc7
AM
88/* page table handling */
89#define LEVEL_STRIDE (9)
90#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
6d1c56a9
OBC
92/*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
df08cdc7
AM
110static inline int agaw_to_level(int agaw)
111{
112 return agaw + 2;
113}
114
115static inline int agaw_to_width(int agaw)
116{
5c645b35 117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
118}
119
120static inline int width_to_agaw(int width)
121{
5c645b35 122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
123}
124
125static inline unsigned int level_to_offset_bits(int level)
126{
127 return (level - 1) * LEVEL_STRIDE;
128}
129
130static inline int pfn_level_offset(unsigned long pfn, int level)
131{
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133}
134
135static inline unsigned long level_mask(int level)
136{
137 return -1UL << level_to_offset_bits(level);
138}
139
140static inline unsigned long level_size(int level)
141{
142 return 1UL << level_to_offset_bits(level);
143}
144
145static inline unsigned long align_to_level(unsigned long pfn, int level)
146{
147 return (pfn + level_size(level) - 1) & level_mask(level);
148}
fd18de50 149
6dd9a7c7
YS
150static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151{
5c645b35 152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
153}
154
dd4e8319
DW
155/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158{
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160}
161
162static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163{
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165}
166static inline unsigned long page_to_dma_pfn(struct page *pg)
167{
168 return mm_to_dma_pfn(page_to_pfn(pg));
169}
170static inline unsigned long virt_to_dma_pfn(void *p)
171{
172 return page_to_dma_pfn(virt_to_page(p));
173}
174
d9630fe9
WH
175/* global iommu list, set NULL for ignored DMAR units */
176static struct intel_iommu **g_iommus;
177
e0fc7e0b 178static void __init check_tylersburg_isoch(void);
9af88143
DW
179static int rwbf_quirk;
180
b779260b
JC
181/*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185static int force_on = 0;
186
46b08e1a
MM
187/*
188 * 0: Present
189 * 1-11: Reserved
190 * 12-63: Context Ptr (12 - (haw-1))
191 * 64-127: Reserved
192 */
193struct root_entry {
03ecc32c
DW
194 u64 lo;
195 u64 hi;
46b08e1a
MM
196};
197#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 198
091d42e4
JR
199/*
200 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
201 * if marked present.
202 */
203static phys_addr_t root_entry_lctp(struct root_entry *re)
204{
205 if (!(re->lo & 1))
206 return 0;
207
208 return re->lo & VTD_PAGE_MASK;
209}
210
211/*
212 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
213 * if marked present.
214 */
215static phys_addr_t root_entry_uctp(struct root_entry *re)
216{
217 if (!(re->hi & 1))
218 return 0;
46b08e1a 219
091d42e4
JR
220 return re->hi & VTD_PAGE_MASK;
221}
7a8fc25e
MM
222/*
223 * low 64 bits:
224 * 0: present
225 * 1: fault processing disable
226 * 2-3: translation type
227 * 12-63: address space root
228 * high 64 bits:
229 * 0-2: address width
230 * 3-6: aval
231 * 8-23: domain id
232 */
233struct context_entry {
234 u64 lo;
235 u64 hi;
236};
c07e7d21 237
cf484d0e
JR
238static inline void context_clear_pasid_enable(struct context_entry *context)
239{
240 context->lo &= ~(1ULL << 11);
241}
242
243static inline bool context_pasid_enabled(struct context_entry *context)
244{
245 return !!(context->lo & (1ULL << 11));
246}
247
248static inline void context_set_copied(struct context_entry *context)
249{
250 context->hi |= (1ull << 3);
251}
252
253static inline bool context_copied(struct context_entry *context)
254{
255 return !!(context->hi & (1ULL << 3));
256}
257
258static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
259{
260 return (context->lo & 1);
261}
cf484d0e
JR
262
263static inline bool context_present(struct context_entry *context)
264{
265 return context_pasid_enabled(context) ?
266 __context_present(context) :
267 __context_present(context) && !context_copied(context);
268}
269
c07e7d21
MM
270static inline void context_set_present(struct context_entry *context)
271{
272 context->lo |= 1;
273}
274
275static inline void context_set_fault_enable(struct context_entry *context)
276{
277 context->lo &= (((u64)-1) << 2) | 1;
278}
279
c07e7d21
MM
280static inline void context_set_translation_type(struct context_entry *context,
281 unsigned long value)
282{
283 context->lo &= (((u64)-1) << 4) | 3;
284 context->lo |= (value & 3) << 2;
285}
286
287static inline void context_set_address_root(struct context_entry *context,
288 unsigned long value)
289{
1a2262f9 290 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
291 context->lo |= value & VTD_PAGE_MASK;
292}
293
294static inline void context_set_address_width(struct context_entry *context,
295 unsigned long value)
296{
297 context->hi |= value & 7;
298}
299
300static inline void context_set_domain_id(struct context_entry *context,
301 unsigned long value)
302{
303 context->hi |= (value & ((1 << 16) - 1)) << 8;
304}
305
dbcd861f
JR
306static inline int context_domain_id(struct context_entry *c)
307{
308 return((c->hi >> 8) & 0xffff);
309}
310
c07e7d21
MM
311static inline void context_clear_entry(struct context_entry *context)
312{
313 context->lo = 0;
314 context->hi = 0;
315}
7a8fc25e 316
622ba12a
MM
317/*
318 * 0: readable
319 * 1: writable
320 * 2-6: reserved
321 * 7: super page
9cf06697
SY
322 * 8-10: available
323 * 11: snoop behavior
622ba12a
MM
324 * 12-63: Host physcial address
325 */
326struct dma_pte {
327 u64 val;
328};
622ba12a 329
19c239ce
MM
330static inline void dma_clear_pte(struct dma_pte *pte)
331{
332 pte->val = 0;
333}
334
19c239ce
MM
335static inline u64 dma_pte_addr(struct dma_pte *pte)
336{
c85994e4
DW
337#ifdef CONFIG_64BIT
338 return pte->val & VTD_PAGE_MASK;
339#else
340 /* Must have a full atomic 64-bit read */
1a8bd481 341 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 342#endif
19c239ce
MM
343}
344
19c239ce
MM
345static inline bool dma_pte_present(struct dma_pte *pte)
346{
347 return (pte->val & 3) != 0;
348}
622ba12a 349
4399c8bf
AK
350static inline bool dma_pte_superpage(struct dma_pte *pte)
351{
c3c75eb7 352 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
353}
354
75e6bf96
DW
355static inline int first_pte_in_page(struct dma_pte *pte)
356{
357 return !((unsigned long)pte & ~VTD_PAGE_MASK);
358}
359
2c2e2c38
FY
360/*
361 * This domain is a statically identity mapping domain.
362 * 1. This domain creats a static 1:1 mapping to all usable memory.
363 * 2. It maps to each iommu if successful.
364 * 3. Each iommu mapps to this domain if successful.
365 */
19943b0e
DW
366static struct dmar_domain *si_domain;
367static int hw_pass_through = 1;
2c2e2c38 368
28ccce0d
JR
369/*
370 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
371 * across iommus may be owned in one domain, e.g. kvm guest.
372 */
ab8dfe25 373#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 374
2c2e2c38 375/* si_domain contains mulitple devices */
ab8dfe25 376#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 377
29a27719
JR
378#define for_each_domain_iommu(idx, domain) \
379 for (idx = 0; idx < g_num_of_iommus; idx++) \
380 if (domain->iommu_refcnt[idx])
381
99126f7c 382struct dmar_domain {
4c923d47 383 int nid; /* node id */
29a27719
JR
384
385 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
386 /* Refcount of devices per iommu */
387
99126f7c 388
c0e8a6c8
JR
389 u16 iommu_did[DMAR_UNITS_SUPPORTED];
390 /* Domain ids per IOMMU. Use u16 since
391 * domain ids are 16 bit wide according
392 * to VT-d spec, section 9.3 */
99126f7c 393
0824c592 394 bool has_iotlb_device;
00a77deb 395 struct list_head devices; /* all devices' list */
99126f7c
MM
396 struct iova_domain iovad; /* iova's that belong to this domain */
397
398 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
399 int gaw; /* max guest address width */
400
401 /* adjusted guest address width, 0 is level 2 30-bit */
402 int agaw;
403
3b5410e7 404 int flags; /* flags to find out type of domain */
8e604097
WH
405
406 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 407 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 408 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
409 int iommu_superpage;/* Level of superpages supported:
410 0 == 4KiB (no superpages), 1 == 2MiB,
411 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 412 u64 max_addr; /* maximum mapped address */
00a77deb
JR
413
414 struct iommu_domain domain; /* generic domain data structure for
415 iommu core */
99126f7c
MM
416};
417
a647dacb
MM
418/* PCI domain-device relationship */
419struct device_domain_info {
420 struct list_head link; /* link to domain siblings */
421 struct list_head global; /* link to global list */
276dbf99 422 u8 bus; /* PCI bus number */
a647dacb 423 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
424 u8 pasid_supported:3;
425 u8 pasid_enabled:1;
426 u8 pri_supported:1;
427 u8 pri_enabled:1;
428 u8 ats_supported:1;
429 u8 ats_enabled:1;
430 u8 ats_qdep;
0bcb3e28 431 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 432 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
433 struct dmar_domain *domain; /* pointer to domain */
434};
435
b94e4117
JL
436struct dmar_rmrr_unit {
437 struct list_head list; /* list of rmrr units */
438 struct acpi_dmar_header *hdr; /* ACPI header */
439 u64 base_address; /* reserved base address*/
440 u64 end_address; /* reserved end address */
832bd858 441 struct dmar_dev_scope *devices; /* target devices */
b94e4117 442 int devices_cnt; /* target device count */
0659b8dc 443 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
444};
445
446struct dmar_atsr_unit {
447 struct list_head list; /* list of ATSR units */
448 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 449 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
450 int devices_cnt; /* target device count */
451 u8 include_all:1; /* include all ports */
452};
453
454static LIST_HEAD(dmar_atsr_units);
455static LIST_HEAD(dmar_rmrr_units);
456
457#define for_each_rmrr_units(rmrr) \
458 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
459
5e0d2a6f 460static void flush_unmaps_timeout(unsigned long data);
461
314f1dc1 462struct deferred_flush_entry {
2aac6304 463 unsigned long iova_pfn;
769530e4 464 unsigned long nrpages;
314f1dc1
OP
465 struct dmar_domain *domain;
466 struct page *freelist;
467};
5e0d2a6f 468
80b20dd8 469#define HIGH_WATER_MARK 250
314f1dc1 470struct deferred_flush_table {
80b20dd8 471 int next;
314f1dc1 472 struct deferred_flush_entry entries[HIGH_WATER_MARK];
80b20dd8 473};
474
aa473240
OP
475struct deferred_flush_data {
476 spinlock_t lock;
477 int timer_on;
478 struct timer_list timer;
479 long size;
480 struct deferred_flush_table *tables;
80b20dd8 481};
482
aa473240 483DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
80b20dd8 484
5e0d2a6f 485/* bitmap for indexing intel_iommus */
5e0d2a6f 486static int g_num_of_iommus;
487
92d03cc8 488static void domain_exit(struct dmar_domain *domain);
ba395927 489static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
490static void dmar_remove_one_dev_info(struct dmar_domain *domain,
491 struct device *dev);
127c7615 492static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
493static void domain_context_clear(struct intel_iommu *iommu,
494 struct device *dev);
2a46ddf7
JL
495static int domain_detach_iommu(struct dmar_domain *domain,
496 struct intel_iommu *iommu);
ba395927 497
d3f13810 498#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
499int dmar_disabled = 0;
500#else
501int dmar_disabled = 1;
d3f13810 502#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 503
8bc1f85c
ED
504int intel_iommu_enabled = 0;
505EXPORT_SYMBOL_GPL(intel_iommu_enabled);
506
2d9e667e 507static int dmar_map_gfx = 1;
7d3b03ce 508static int dmar_forcedac;
5e0d2a6f 509static int intel_iommu_strict;
6dd9a7c7 510static int intel_iommu_superpage = 1;
c83b2f20 511static int intel_iommu_ecs = 1;
ae853ddb
DW
512static int intel_iommu_pasid28;
513static int iommu_identity_mapping;
c83b2f20 514
ae853ddb
DW
515#define IDENTMAP_ALL 1
516#define IDENTMAP_GFX 2
517#define IDENTMAP_AZALIA 4
c83b2f20 518
d42fde70
DW
519/* Broadwell and Skylake have broken ECS support — normal so-called "second
520 * level" translation of DMA requests-without-PASID doesn't actually happen
521 * unless you also set the NESTE bit in an extended context-entry. Which of
522 * course means that SVM doesn't work because it's trying to do nested
523 * translation of the physical addresses it finds in the process page tables,
524 * through the IOVA->phys mapping found in the "second level" page tables.
525 *
526 * The VT-d specification was retroactively changed to change the definition
527 * of the capability bits and pretend that Broadwell/Skylake never happened...
528 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
529 * for some reason it was the PASID capability bit which was redefined (from
530 * bit 28 on BDW/SKL to bit 40 in future).
531 *
532 * So our test for ECS needs to eschew those implementations which set the old
533 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
534 * Unless we are working around the 'pasid28' limitations, that is, by putting
535 * the device into passthrough mode for normal DMA and thus masking the bug.
536 */
c83b2f20 537#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
d42fde70
DW
538 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
539/* PASID support is thus enabled if ECS is enabled and *either* of the old
540 * or new capability bits are set. */
541#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
542 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 543
c0771df8
DW
544int intel_iommu_gfx_mapped;
545EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
546
ba395927
KA
547#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
548static DEFINE_SPINLOCK(device_domain_lock);
549static LIST_HEAD(device_domain_list);
550
b0119e87 551const struct iommu_ops intel_iommu_ops;
a8bcbb0d 552
4158c2ec
JR
553static bool translation_pre_enabled(struct intel_iommu *iommu)
554{
555 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
556}
557
091d42e4
JR
558static void clear_translation_pre_enabled(struct intel_iommu *iommu)
559{
560 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
561}
562
4158c2ec
JR
563static void init_translation_status(struct intel_iommu *iommu)
564{
565 u32 gsts;
566
567 gsts = readl(iommu->reg + DMAR_GSTS_REG);
568 if (gsts & DMA_GSTS_TES)
569 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
570}
571
00a77deb
JR
572/* Convert generic 'struct iommu_domain to private struct dmar_domain */
573static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
574{
575 return container_of(dom, struct dmar_domain, domain);
576}
577
ba395927
KA
578static int __init intel_iommu_setup(char *str)
579{
580 if (!str)
581 return -EINVAL;
582 while (*str) {
0cd5c3c8
KM
583 if (!strncmp(str, "on", 2)) {
584 dmar_disabled = 0;
9f10e5bf 585 pr_info("IOMMU enabled\n");
0cd5c3c8 586 } else if (!strncmp(str, "off", 3)) {
ba395927 587 dmar_disabled = 1;
9f10e5bf 588 pr_info("IOMMU disabled\n");
ba395927
KA
589 } else if (!strncmp(str, "igfx_off", 8)) {
590 dmar_map_gfx = 0;
9f10e5bf 591 pr_info("Disable GFX device mapping\n");
7d3b03ce 592 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 593 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 594 dmar_forcedac = 1;
5e0d2a6f 595 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 596 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 597 intel_iommu_strict = 1;
6dd9a7c7 598 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 599 pr_info("Disable supported super page\n");
6dd9a7c7 600 intel_iommu_superpage = 0;
c83b2f20
DW
601 } else if (!strncmp(str, "ecs_off", 7)) {
602 printk(KERN_INFO
603 "Intel-IOMMU: disable extended context table support\n");
604 intel_iommu_ecs = 0;
ae853ddb
DW
605 } else if (!strncmp(str, "pasid28", 7)) {
606 printk(KERN_INFO
607 "Intel-IOMMU: enable pre-production PASID support\n");
608 intel_iommu_pasid28 = 1;
609 iommu_identity_mapping |= IDENTMAP_GFX;
ba395927
KA
610 }
611
612 str += strcspn(str, ",");
613 while (*str == ',')
614 str++;
615 }
616 return 0;
617}
618__setup("intel_iommu=", intel_iommu_setup);
619
620static struct kmem_cache *iommu_domain_cache;
621static struct kmem_cache *iommu_devinfo_cache;
ba395927 622
9452d5bf
JR
623static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
624{
8bf47816
JR
625 struct dmar_domain **domains;
626 int idx = did >> 8;
627
628 domains = iommu->domains[idx];
629 if (!domains)
630 return NULL;
631
632 return domains[did & 0xff];
9452d5bf
JR
633}
634
635static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
636 struct dmar_domain *domain)
637{
8bf47816
JR
638 struct dmar_domain **domains;
639 int idx = did >> 8;
640
641 if (!iommu->domains[idx]) {
642 size_t size = 256 * sizeof(struct dmar_domain *);
643 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
644 }
645
646 domains = iommu->domains[idx];
647 if (WARN_ON(!domains))
648 return;
649 else
650 domains[did & 0xff] = domain;
9452d5bf
JR
651}
652
4c923d47 653static inline void *alloc_pgtable_page(int node)
eb3fa7cb 654{
4c923d47
SS
655 struct page *page;
656 void *vaddr = NULL;
eb3fa7cb 657
4c923d47
SS
658 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
659 if (page)
660 vaddr = page_address(page);
eb3fa7cb 661 return vaddr;
ba395927
KA
662}
663
664static inline void free_pgtable_page(void *vaddr)
665{
666 free_page((unsigned long)vaddr);
667}
668
669static inline void *alloc_domain_mem(void)
670{
354bb65e 671 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
672}
673
38717946 674static void free_domain_mem(void *vaddr)
ba395927
KA
675{
676 kmem_cache_free(iommu_domain_cache, vaddr);
677}
678
679static inline void * alloc_devinfo_mem(void)
680{
354bb65e 681 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
682}
683
684static inline void free_devinfo_mem(void *vaddr)
685{
686 kmem_cache_free(iommu_devinfo_cache, vaddr);
687}
688
ab8dfe25
JL
689static inline int domain_type_is_vm(struct dmar_domain *domain)
690{
691 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
692}
693
28ccce0d
JR
694static inline int domain_type_is_si(struct dmar_domain *domain)
695{
696 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
697}
698
ab8dfe25
JL
699static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
700{
701 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
702 DOMAIN_FLAG_STATIC_IDENTITY);
703}
1b573683 704
162d1b10
JL
705static inline int domain_pfn_supported(struct dmar_domain *domain,
706 unsigned long pfn)
707{
708 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
709
710 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
711}
712
4ed0d3e6 713static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
714{
715 unsigned long sagaw;
716 int agaw = -1;
717
718 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 719 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
720 agaw >= 0; agaw--) {
721 if (test_bit(agaw, &sagaw))
722 break;
723 }
724
725 return agaw;
726}
727
4ed0d3e6
FY
728/*
729 * Calculate max SAGAW for each iommu.
730 */
731int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
732{
733 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
734}
735
736/*
737 * calculate agaw for each iommu.
738 * "SAGAW" may be different across iommus, use a default agaw, and
739 * get a supported less agaw for iommus that don't support the default agaw.
740 */
741int iommu_calculate_agaw(struct intel_iommu *iommu)
742{
743 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
744}
745
2c2e2c38 746/* This functionin only returns single iommu in a domain */
8c11e798
WH
747static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
748{
749 int iommu_id;
750
2c2e2c38 751 /* si_domain and vm domain should not get here. */
ab8dfe25 752 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
753 for_each_domain_iommu(iommu_id, domain)
754 break;
755
8c11e798
WH
756 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
757 return NULL;
758
759 return g_iommus[iommu_id];
760}
761
8e604097
WH
762static void domain_update_iommu_coherency(struct dmar_domain *domain)
763{
d0501960
DW
764 struct dmar_drhd_unit *drhd;
765 struct intel_iommu *iommu;
2f119c78
QL
766 bool found = false;
767 int i;
2e12bc29 768
d0501960 769 domain->iommu_coherency = 1;
8e604097 770
29a27719 771 for_each_domain_iommu(i, domain) {
2f119c78 772 found = true;
8e604097
WH
773 if (!ecap_coherent(g_iommus[i]->ecap)) {
774 domain->iommu_coherency = 0;
775 break;
776 }
8e604097 777 }
d0501960
DW
778 if (found)
779 return;
780
781 /* No hardware attached; use lowest common denominator */
782 rcu_read_lock();
783 for_each_active_iommu(iommu, drhd) {
784 if (!ecap_coherent(iommu->ecap)) {
785 domain->iommu_coherency = 0;
786 break;
787 }
788 }
789 rcu_read_unlock();
8e604097
WH
790}
791
161f6934 792static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 793{
161f6934
JL
794 struct dmar_drhd_unit *drhd;
795 struct intel_iommu *iommu;
796 int ret = 1;
58c610bd 797
161f6934
JL
798 rcu_read_lock();
799 for_each_active_iommu(iommu, drhd) {
800 if (iommu != skip) {
801 if (!ecap_sc_support(iommu->ecap)) {
802 ret = 0;
803 break;
804 }
58c610bd 805 }
58c610bd 806 }
161f6934
JL
807 rcu_read_unlock();
808
809 return ret;
58c610bd
SY
810}
811
161f6934 812static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 813{
8140a95d 814 struct dmar_drhd_unit *drhd;
161f6934 815 struct intel_iommu *iommu;
8140a95d 816 int mask = 0xf;
6dd9a7c7
YS
817
818 if (!intel_iommu_superpage) {
161f6934 819 return 0;
6dd9a7c7
YS
820 }
821
8140a95d 822 /* set iommu_superpage to the smallest common denominator */
0e242612 823 rcu_read_lock();
8140a95d 824 for_each_active_iommu(iommu, drhd) {
161f6934
JL
825 if (iommu != skip) {
826 mask &= cap_super_page_val(iommu->cap);
827 if (!mask)
828 break;
6dd9a7c7
YS
829 }
830 }
0e242612
JL
831 rcu_read_unlock();
832
161f6934 833 return fls(mask);
6dd9a7c7
YS
834}
835
58c610bd
SY
836/* Some capabilities may be different across iommus */
837static void domain_update_iommu_cap(struct dmar_domain *domain)
838{
839 domain_update_iommu_coherency(domain);
161f6934
JL
840 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
841 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
842}
843
03ecc32c
DW
844static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
845 u8 bus, u8 devfn, int alloc)
846{
847 struct root_entry *root = &iommu->root_entry[bus];
848 struct context_entry *context;
849 u64 *entry;
850
4df4eab1 851 entry = &root->lo;
c83b2f20 852 if (ecs_enabled(iommu)) {
03ecc32c
DW
853 if (devfn >= 0x80) {
854 devfn -= 0x80;
855 entry = &root->hi;
856 }
857 devfn *= 2;
858 }
03ecc32c
DW
859 if (*entry & 1)
860 context = phys_to_virt(*entry & VTD_PAGE_MASK);
861 else {
862 unsigned long phy_addr;
863 if (!alloc)
864 return NULL;
865
866 context = alloc_pgtable_page(iommu->node);
867 if (!context)
868 return NULL;
869
870 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
871 phy_addr = virt_to_phys((void *)context);
872 *entry = phy_addr | 1;
873 __iommu_flush_cache(iommu, entry, sizeof(*entry));
874 }
875 return &context[devfn];
876}
877
4ed6a540
DW
878static int iommu_dummy(struct device *dev)
879{
880 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
881}
882
156baca8 883static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
884{
885 struct dmar_drhd_unit *drhd = NULL;
b683b230 886 struct intel_iommu *iommu;
156baca8
DW
887 struct device *tmp;
888 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 889 u16 segment = 0;
c7151a8d
WH
890 int i;
891
4ed6a540
DW
892 if (iommu_dummy(dev))
893 return NULL;
894
156baca8 895 if (dev_is_pci(dev)) {
1c387188
AR
896 struct pci_dev *pf_pdev;
897
156baca8 898 pdev = to_pci_dev(dev);
1c387188
AR
899 /* VFs aren't listed in scope tables; we need to look up
900 * the PF instead to find the IOMMU. */
901 pf_pdev = pci_physfn(pdev);
902 dev = &pf_pdev->dev;
156baca8 903 segment = pci_domain_nr(pdev->bus);
ca5b74d2 904 } else if (has_acpi_companion(dev))
156baca8
DW
905 dev = &ACPI_COMPANION(dev)->dev;
906
0e242612 907 rcu_read_lock();
b683b230 908 for_each_active_iommu(iommu, drhd) {
156baca8 909 if (pdev && segment != drhd->segment)
276dbf99 910 continue;
c7151a8d 911
b683b230 912 for_each_active_dev_scope(drhd->devices,
156baca8
DW
913 drhd->devices_cnt, i, tmp) {
914 if (tmp == dev) {
1c387188
AR
915 /* For a VF use its original BDF# not that of the PF
916 * which we used for the IOMMU lookup. Strictly speaking
917 * we could do this for all PCI devices; we only need to
918 * get the BDF# from the scope table for ACPI matches. */
919 if (pdev->is_virtfn)
920 goto got_pdev;
921
156baca8
DW
922 *bus = drhd->devices[i].bus;
923 *devfn = drhd->devices[i].devfn;
b683b230 924 goto out;
156baca8
DW
925 }
926
927 if (!pdev || !dev_is_pci(tmp))
928 continue;
929
930 ptmp = to_pci_dev(tmp);
931 if (ptmp->subordinate &&
932 ptmp->subordinate->number <= pdev->bus->number &&
933 ptmp->subordinate->busn_res.end >= pdev->bus->number)
934 goto got_pdev;
924b6231 935 }
c7151a8d 936
156baca8
DW
937 if (pdev && drhd->include_all) {
938 got_pdev:
939 *bus = pdev->bus->number;
940 *devfn = pdev->devfn;
b683b230 941 goto out;
156baca8 942 }
c7151a8d 943 }
b683b230 944 iommu = NULL;
156baca8 945 out:
0e242612 946 rcu_read_unlock();
c7151a8d 947
b683b230 948 return iommu;
c7151a8d
WH
949}
950
5331fe6f
WH
951static void domain_flush_cache(struct dmar_domain *domain,
952 void *addr, int size)
953{
954 if (!domain->iommu_coherency)
955 clflush_cache_range(addr, size);
956}
957
ba395927
KA
958static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
959{
ba395927 960 struct context_entry *context;
03ecc32c 961 int ret = 0;
ba395927
KA
962 unsigned long flags;
963
964 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
965 context = iommu_context_addr(iommu, bus, devfn, 0);
966 if (context)
967 ret = context_present(context);
ba395927
KA
968 spin_unlock_irqrestore(&iommu->lock, flags);
969 return ret;
970}
971
972static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
973{
ba395927
KA
974 struct context_entry *context;
975 unsigned long flags;
976
977 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 978 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 979 if (context) {
03ecc32c
DW
980 context_clear_entry(context);
981 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
982 }
983 spin_unlock_irqrestore(&iommu->lock, flags);
984}
985
986static void free_context_table(struct intel_iommu *iommu)
987{
ba395927
KA
988 int i;
989 unsigned long flags;
990 struct context_entry *context;
991
992 spin_lock_irqsave(&iommu->lock, flags);
993 if (!iommu->root_entry) {
994 goto out;
995 }
996 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 997 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
998 if (context)
999 free_pgtable_page(context);
03ecc32c 1000
c83b2f20 1001 if (!ecs_enabled(iommu))
03ecc32c
DW
1002 continue;
1003
1004 context = iommu_context_addr(iommu, i, 0x80, 0);
1005 if (context)
1006 free_pgtable_page(context);
1007
ba395927
KA
1008 }
1009 free_pgtable_page(iommu->root_entry);
1010 iommu->root_entry = NULL;
1011out:
1012 spin_unlock_irqrestore(&iommu->lock, flags);
1013}
1014
b026fd28 1015static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 1016 unsigned long pfn, int *target_level)
ba395927 1017{
ba395927
KA
1018 struct dma_pte *parent, *pte = NULL;
1019 int level = agaw_to_level(domain->agaw);
4399c8bf 1020 int offset;
ba395927
KA
1021
1022 BUG_ON(!domain->pgd);
f9423606 1023
162d1b10 1024 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
1025 /* Address beyond IOMMU's addressing capabilities. */
1026 return NULL;
1027
ba395927
KA
1028 parent = domain->pgd;
1029
5cf0a76f 1030 while (1) {
ba395927
KA
1031 void *tmp_page;
1032
b026fd28 1033 offset = pfn_level_offset(pfn, level);
ba395927 1034 pte = &parent[offset];
5cf0a76f 1035 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 1036 break;
5cf0a76f 1037 if (level == *target_level)
ba395927
KA
1038 break;
1039
19c239ce 1040 if (!dma_pte_present(pte)) {
c85994e4
DW
1041 uint64_t pteval;
1042
4c923d47 1043 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1044
206a73c1 1045 if (!tmp_page)
ba395927 1046 return NULL;
206a73c1 1047
c85994e4 1048 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1049 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1050 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1051 /* Someone else set it while we were thinking; use theirs. */
1052 free_pgtable_page(tmp_page);
effad4b5 1053 else
c85994e4 1054 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1055 }
5cf0a76f
DW
1056 if (level == 1)
1057 break;
1058
19c239ce 1059 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1060 level--;
1061 }
1062
5cf0a76f
DW
1063 if (!*target_level)
1064 *target_level = level;
1065
ba395927
KA
1066 return pte;
1067}
1068
6dd9a7c7 1069
ba395927 1070/* return address's pte at specific level */
90dcfb5e
DW
1071static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1072 unsigned long pfn,
6dd9a7c7 1073 int level, int *large_page)
ba395927
KA
1074{
1075 struct dma_pte *parent, *pte = NULL;
1076 int total = agaw_to_level(domain->agaw);
1077 int offset;
1078
1079 parent = domain->pgd;
1080 while (level <= total) {
90dcfb5e 1081 offset = pfn_level_offset(pfn, total);
ba395927
KA
1082 pte = &parent[offset];
1083 if (level == total)
1084 return pte;
1085
6dd9a7c7
YS
1086 if (!dma_pte_present(pte)) {
1087 *large_page = total;
ba395927 1088 break;
6dd9a7c7
YS
1089 }
1090
e16922af 1091 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1092 *large_page = total;
1093 return pte;
1094 }
1095
19c239ce 1096 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1097 total--;
1098 }
1099 return NULL;
1100}
1101
ba395927 1102/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1103static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1104 unsigned long start_pfn,
1105 unsigned long last_pfn)
ba395927 1106{
6dd9a7c7 1107 unsigned int large_page = 1;
310a5ab9 1108 struct dma_pte *first_pte, *pte;
66eae846 1109
162d1b10
JL
1110 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1111 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1112 BUG_ON(start_pfn > last_pfn);
ba395927 1113
04b18e65 1114 /* we don't need lock here; nobody else touches the iova range */
59c36286 1115 do {
6dd9a7c7
YS
1116 large_page = 1;
1117 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1118 if (!pte) {
6dd9a7c7 1119 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1120 continue;
1121 }
6dd9a7c7 1122 do {
310a5ab9 1123 dma_clear_pte(pte);
6dd9a7c7 1124 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1125 pte++;
75e6bf96
DW
1126 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1127
310a5ab9
DW
1128 domain_flush_cache(domain, first_pte,
1129 (void *)pte - (void *)first_pte);
59c36286
DW
1130
1131 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1132}
1133
3269ee0b
AW
1134static void dma_pte_free_level(struct dmar_domain *domain, int level,
1135 struct dma_pte *pte, unsigned long pfn,
1136 unsigned long start_pfn, unsigned long last_pfn)
1137{
1138 pfn = max(start_pfn, pfn);
1139 pte = &pte[pfn_level_offset(pfn, level)];
1140
1141 do {
1142 unsigned long level_pfn;
1143 struct dma_pte *level_pte;
1144
1145 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1146 goto next;
1147
f7116e11 1148 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1149 level_pte = phys_to_virt(dma_pte_addr(pte));
1150
1151 if (level > 2)
1152 dma_pte_free_level(domain, level - 1, level_pte,
1153 level_pfn, start_pfn, last_pfn);
1154
1155 /* If range covers entire pagetable, free it */
1156 if (!(start_pfn > level_pfn ||
08336fd2 1157 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1158 dma_clear_pte(pte);
1159 domain_flush_cache(domain, pte, sizeof(*pte));
1160 free_pgtable_page(level_pte);
1161 }
1162next:
1163 pfn += level_size(level);
1164 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1165}
1166
3d1a2442 1167/* clear last level (leaf) ptes and free page table pages. */
ba395927 1168static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1169 unsigned long start_pfn,
1170 unsigned long last_pfn)
ba395927 1171{
162d1b10
JL
1172 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1173 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1174 BUG_ON(start_pfn > last_pfn);
ba395927 1175
d41a4adb
JL
1176 dma_pte_clear_range(domain, start_pfn, last_pfn);
1177
f3a0a52f 1178 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1179 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1180 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1181
ba395927 1182 /* free pgd */
d794dc9b 1183 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1184 free_pgtable_page(domain->pgd);
1185 domain->pgd = NULL;
1186 }
1187}
1188
ea8ea460
DW
1189/* When a page at a given level is being unlinked from its parent, we don't
1190 need to *modify* it at all. All we need to do is make a list of all the
1191 pages which can be freed just as soon as we've flushed the IOTLB and we
1192 know the hardware page-walk will no longer touch them.
1193 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1194 be freed. */
1195static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1196 int level, struct dma_pte *pte,
1197 struct page *freelist)
1198{
1199 struct page *pg;
1200
1201 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1202 pg->freelist = freelist;
1203 freelist = pg;
1204
1205 if (level == 1)
1206 return freelist;
1207
adeb2590
JL
1208 pte = page_address(pg);
1209 do {
ea8ea460
DW
1210 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1211 freelist = dma_pte_list_pagetables(domain, level - 1,
1212 pte, freelist);
adeb2590
JL
1213 pte++;
1214 } while (!first_pte_in_page(pte));
ea8ea460
DW
1215
1216 return freelist;
1217}
1218
1219static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1220 struct dma_pte *pte, unsigned long pfn,
1221 unsigned long start_pfn,
1222 unsigned long last_pfn,
1223 struct page *freelist)
1224{
1225 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1226
1227 pfn = max(start_pfn, pfn);
1228 pte = &pte[pfn_level_offset(pfn, level)];
1229
1230 do {
1231 unsigned long level_pfn;
1232
1233 if (!dma_pte_present(pte))
1234 goto next;
1235
1236 level_pfn = pfn & level_mask(level);
1237
1238 /* If range covers entire pagetable, free it */
1239 if (start_pfn <= level_pfn &&
1240 last_pfn >= level_pfn + level_size(level) - 1) {
1241 /* These suborbinate page tables are going away entirely. Don't
1242 bother to clear them; we're just going to *free* them. */
1243 if (level > 1 && !dma_pte_superpage(pte))
1244 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1245
1246 dma_clear_pte(pte);
1247 if (!first_pte)
1248 first_pte = pte;
1249 last_pte = pte;
1250 } else if (level > 1) {
1251 /* Recurse down into a level that isn't *entirely* obsolete */
1252 freelist = dma_pte_clear_level(domain, level - 1,
1253 phys_to_virt(dma_pte_addr(pte)),
1254 level_pfn, start_pfn, last_pfn,
1255 freelist);
1256 }
1257next:
1258 pfn += level_size(level);
1259 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1260
1261 if (first_pte)
1262 domain_flush_cache(domain, first_pte,
1263 (void *)++last_pte - (void *)first_pte);
1264
1265 return freelist;
1266}
1267
1268/* We can't just free the pages because the IOMMU may still be walking
1269 the page tables, and may have cached the intermediate levels. The
1270 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1271static struct page *domain_unmap(struct dmar_domain *domain,
1272 unsigned long start_pfn,
1273 unsigned long last_pfn)
ea8ea460 1274{
ea8ea460
DW
1275 struct page *freelist = NULL;
1276
162d1b10
JL
1277 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1278 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1279 BUG_ON(start_pfn > last_pfn);
1280
1281 /* we don't need lock here; nobody else touches the iova range */
1282 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1283 domain->pgd, 0, start_pfn, last_pfn, NULL);
1284
1285 /* free pgd */
1286 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1287 struct page *pgd_page = virt_to_page(domain->pgd);
1288 pgd_page->freelist = freelist;
1289 freelist = pgd_page;
1290
1291 domain->pgd = NULL;
1292 }
1293
1294 return freelist;
1295}
1296
b690420a 1297static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1298{
1299 struct page *pg;
1300
1301 while ((pg = freelist)) {
1302 freelist = pg->freelist;
1303 free_pgtable_page(page_address(pg));
1304 }
1305}
1306
ba395927
KA
1307/* iommu handling */
1308static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1309{
1310 struct root_entry *root;
1311 unsigned long flags;
1312
4c923d47 1313 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1314 if (!root) {
9f10e5bf 1315 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1316 iommu->name);
ba395927 1317 return -ENOMEM;
ffebeb46 1318 }
ba395927 1319
5b6985ce 1320 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1321
1322 spin_lock_irqsave(&iommu->lock, flags);
1323 iommu->root_entry = root;
1324 spin_unlock_irqrestore(&iommu->lock, flags);
1325
1326 return 0;
1327}
1328
ba395927
KA
1329static void iommu_set_root_entry(struct intel_iommu *iommu)
1330{
03ecc32c 1331 u64 addr;
c416daa9 1332 u32 sts;
ba395927
KA
1333 unsigned long flag;
1334
03ecc32c 1335 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1336 if (ecs_enabled(iommu))
03ecc32c 1337 addr |= DMA_RTADDR_RTT;
ba395927 1338
1f5b3c3f 1339 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1340 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1341
c416daa9 1342 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1343
1344 /* Make sure hardware complete it */
1345 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1346 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1347
1f5b3c3f 1348 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1349}
1350
1351static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1352{
1353 u32 val;
1354 unsigned long flag;
1355
9af88143 1356 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1357 return;
ba395927 1358
1f5b3c3f 1359 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1360 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1361
1362 /* Make sure hardware complete it */
1363 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1364 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1365
1f5b3c3f 1366 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1367}
1368
1369/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1370static void __iommu_flush_context(struct intel_iommu *iommu,
1371 u16 did, u16 source_id, u8 function_mask,
1372 u64 type)
ba395927
KA
1373{
1374 u64 val = 0;
1375 unsigned long flag;
1376
ba395927
KA
1377 switch (type) {
1378 case DMA_CCMD_GLOBAL_INVL:
1379 val = DMA_CCMD_GLOBAL_INVL;
1380 break;
1381 case DMA_CCMD_DOMAIN_INVL:
1382 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1383 break;
1384 case DMA_CCMD_DEVICE_INVL:
1385 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1386 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1387 break;
1388 default:
1389 BUG();
1390 }
1391 val |= DMA_CCMD_ICC;
1392
1f5b3c3f 1393 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1394 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1395
1396 /* Make sure hardware complete it */
1397 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1398 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1399
1f5b3c3f 1400 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1401}
1402
ba395927 1403/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1404static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1405 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1406{
1407 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1408 u64 val = 0, val_iva = 0;
1409 unsigned long flag;
1410
ba395927
KA
1411 switch (type) {
1412 case DMA_TLB_GLOBAL_FLUSH:
1413 /* global flush doesn't need set IVA_REG */
1414 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1415 break;
1416 case DMA_TLB_DSI_FLUSH:
1417 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1418 break;
1419 case DMA_TLB_PSI_FLUSH:
1420 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1421 /* IH bit is passed in as part of address */
ba395927
KA
1422 val_iva = size_order | addr;
1423 break;
1424 default:
1425 BUG();
1426 }
1427 /* Note: set drain read/write */
1428#if 0
1429 /*
1430 * This is probably to be super secure.. Looks like we can
1431 * ignore it without any impact.
1432 */
1433 if (cap_read_drain(iommu->cap))
1434 val |= DMA_TLB_READ_DRAIN;
1435#endif
1436 if (cap_write_drain(iommu->cap))
1437 val |= DMA_TLB_WRITE_DRAIN;
1438
1f5b3c3f 1439 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1440 /* Note: Only uses first TLB reg currently */
1441 if (val_iva)
1442 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1443 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1444
1445 /* Make sure hardware complete it */
1446 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1447 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1448
1f5b3c3f 1449 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1450
1451 /* check IOTLB invalidation granularity */
1452 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1453 pr_err("Flush IOTLB failed\n");
ba395927 1454 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1455 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1456 (unsigned long long)DMA_TLB_IIRG(type),
1457 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1458}
1459
64ae892b
DW
1460static struct device_domain_info *
1461iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1462 u8 bus, u8 devfn)
93a23a72 1463{
93a23a72 1464 struct device_domain_info *info;
93a23a72 1465
55d94043
JR
1466 assert_spin_locked(&device_domain_lock);
1467
93a23a72
YZ
1468 if (!iommu->qi)
1469 return NULL;
1470
93a23a72 1471 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1472 if (info->iommu == iommu && info->bus == bus &&
1473 info->devfn == devfn) {
b16d0cb9
DW
1474 if (info->ats_supported && info->dev)
1475 return info;
93a23a72
YZ
1476 break;
1477 }
93a23a72 1478
b16d0cb9 1479 return NULL;
93a23a72
YZ
1480}
1481
0824c592
OP
1482static void domain_update_iotlb(struct dmar_domain *domain)
1483{
1484 struct device_domain_info *info;
1485 bool has_iotlb_device = false;
1486
1487 assert_spin_locked(&device_domain_lock);
1488
1489 list_for_each_entry(info, &domain->devices, link) {
1490 struct pci_dev *pdev;
1491
1492 if (!info->dev || !dev_is_pci(info->dev))
1493 continue;
1494
1495 pdev = to_pci_dev(info->dev);
1496 if (pdev->ats_enabled) {
1497 has_iotlb_device = true;
1498 break;
1499 }
1500 }
1501
1502 domain->has_iotlb_device = has_iotlb_device;
1503}
1504
93a23a72 1505static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1506{
fb0cc3aa
BH
1507 struct pci_dev *pdev;
1508
0824c592
OP
1509 assert_spin_locked(&device_domain_lock);
1510
0bcb3e28 1511 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1512 return;
1513
fb0cc3aa 1514 pdev = to_pci_dev(info->dev);
fb0cc3aa 1515
b16d0cb9
DW
1516#ifdef CONFIG_INTEL_IOMMU_SVM
1517 /* The PCIe spec, in its wisdom, declares that the behaviour of
1518 the device if you enable PASID support after ATS support is
1519 undefined. So always enable PASID support on devices which
1520 have it, even if we can't yet know if we're ever going to
1521 use it. */
1522 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1523 info->pasid_enabled = 1;
1524
1525 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1526 info->pri_enabled = 1;
1527#endif
1528 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1529 info->ats_enabled = 1;
0824c592 1530 domain_update_iotlb(info->domain);
b16d0cb9
DW
1531 info->ats_qdep = pci_ats_queue_depth(pdev);
1532 }
93a23a72
YZ
1533}
1534
1535static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1536{
b16d0cb9
DW
1537 struct pci_dev *pdev;
1538
0824c592
OP
1539 assert_spin_locked(&device_domain_lock);
1540
da972fb1 1541 if (!dev_is_pci(info->dev))
93a23a72
YZ
1542 return;
1543
b16d0cb9
DW
1544 pdev = to_pci_dev(info->dev);
1545
1546 if (info->ats_enabled) {
1547 pci_disable_ats(pdev);
1548 info->ats_enabled = 0;
0824c592 1549 domain_update_iotlb(info->domain);
b16d0cb9
DW
1550 }
1551#ifdef CONFIG_INTEL_IOMMU_SVM
1552 if (info->pri_enabled) {
1553 pci_disable_pri(pdev);
1554 info->pri_enabled = 0;
1555 }
1556 if (info->pasid_enabled) {
1557 pci_disable_pasid(pdev);
1558 info->pasid_enabled = 0;
1559 }
1560#endif
93a23a72
YZ
1561}
1562
1563static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1564 u64 addr, unsigned mask)
1565{
1566 u16 sid, qdep;
1567 unsigned long flags;
1568 struct device_domain_info *info;
1569
0824c592
OP
1570 if (!domain->has_iotlb_device)
1571 return;
1572
93a23a72
YZ
1573 spin_lock_irqsave(&device_domain_lock, flags);
1574 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1575 if (!info->ats_enabled)
93a23a72
YZ
1576 continue;
1577
1578 sid = info->bus << 8 | info->devfn;
b16d0cb9 1579 qdep = info->ats_qdep;
93a23a72
YZ
1580 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1581 }
1582 spin_unlock_irqrestore(&device_domain_lock, flags);
1583}
1584
a1ddcbe9
JR
1585static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1586 struct dmar_domain *domain,
1587 unsigned long pfn, unsigned int pages,
1588 int ih, int map)
ba395927 1589{
9dd2fe89 1590 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1591 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1592 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1593
ba395927
KA
1594 BUG_ON(pages == 0);
1595
ea8ea460
DW
1596 if (ih)
1597 ih = 1 << 6;
ba395927 1598 /*
9dd2fe89
YZ
1599 * Fallback to domain selective flush if no PSI support or the size is
1600 * too big.
ba395927
KA
1601 * PSI requires page size to be 2 ^ x, and the base address is naturally
1602 * aligned to the size
1603 */
9dd2fe89
YZ
1604 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1605 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1606 DMA_TLB_DSI_FLUSH);
9dd2fe89 1607 else
ea8ea460 1608 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1609 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1610
1611 /*
82653633
NA
1612 * In caching mode, changes of pages from non-present to present require
1613 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1614 */
82653633 1615 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1616 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1617 addr, mask);
ba395927
KA
1618}
1619
f8bab735 1620static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1621{
1622 u32 pmen;
1623 unsigned long flags;
1624
1f5b3c3f 1625 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1626 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1627 pmen &= ~DMA_PMEN_EPM;
1628 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1629
1630 /* wait for the protected region status bit to clear */
1631 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1632 readl, !(pmen & DMA_PMEN_PRS), pmen);
1633
1f5b3c3f 1634 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1635}
1636
2a41ccee 1637static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1638{
1639 u32 sts;
1640 unsigned long flags;
1641
1f5b3c3f 1642 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1643 iommu->gcmd |= DMA_GCMD_TE;
1644 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1645
1646 /* Make sure hardware complete it */
1647 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1648 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1649
1f5b3c3f 1650 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1651}
1652
2a41ccee 1653static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1654{
1655 u32 sts;
1656 unsigned long flag;
1657
1f5b3c3f 1658 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1659 iommu->gcmd &= ~DMA_GCMD_TE;
1660 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1661
1662 /* Make sure hardware complete it */
1663 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1664 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1665
1f5b3c3f 1666 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1667}
1668
3460a6d9 1669
ba395927
KA
1670static int iommu_init_domains(struct intel_iommu *iommu)
1671{
8bf47816
JR
1672 u32 ndomains, nlongs;
1673 size_t size;
ba395927
KA
1674
1675 ndomains = cap_ndoms(iommu->cap);
8bf47816 1676 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1677 iommu->name, ndomains);
ba395927
KA
1678 nlongs = BITS_TO_LONGS(ndomains);
1679
94a91b50
DD
1680 spin_lock_init(&iommu->lock);
1681
ba395927
KA
1682 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1683 if (!iommu->domain_ids) {
9f10e5bf
JR
1684 pr_err("%s: Allocating domain id array failed\n",
1685 iommu->name);
ba395927
KA
1686 return -ENOMEM;
1687 }
8bf47816 1688
86f004c7 1689 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1690 iommu->domains = kzalloc(size, GFP_KERNEL);
1691
1692 if (iommu->domains) {
1693 size = 256 * sizeof(struct dmar_domain *);
1694 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1695 }
1696
1697 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1698 pr_err("%s: Allocating domain array failed\n",
1699 iommu->name);
852bdb04 1700 kfree(iommu->domain_ids);
8bf47816 1701 kfree(iommu->domains);
852bdb04 1702 iommu->domain_ids = NULL;
8bf47816 1703 iommu->domains = NULL;
ba395927
KA
1704 return -ENOMEM;
1705 }
1706
8bf47816
JR
1707
1708
ba395927 1709 /*
c0e8a6c8
JR
1710 * If Caching mode is set, then invalid translations are tagged
1711 * with domain-id 0, hence we need to pre-allocate it. We also
1712 * use domain-id 0 as a marker for non-allocated domain-id, so
1713 * make sure it is not used for a real domain.
ba395927 1714 */
c0e8a6c8
JR
1715 set_bit(0, iommu->domain_ids);
1716
ba395927
KA
1717 return 0;
1718}
ba395927 1719
ffebeb46 1720static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1721{
29a27719 1722 struct device_domain_info *info, *tmp;
55d94043 1723 unsigned long flags;
ba395927 1724
29a27719
JR
1725 if (!iommu->domains || !iommu->domain_ids)
1726 return;
a4eaa86c 1727
bea64033 1728again:
55d94043 1729 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1730 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1731 struct dmar_domain *domain;
1732
1733 if (info->iommu != iommu)
1734 continue;
1735
1736 if (!info->dev || !info->domain)
1737 continue;
1738
1739 domain = info->domain;
1740
bea64033 1741 __dmar_remove_one_dev_info(info);
29a27719 1742
bea64033
JR
1743 if (!domain_type_is_vm_or_si(domain)) {
1744 /*
1745 * The domain_exit() function can't be called under
1746 * device_domain_lock, as it takes this lock itself.
1747 * So release the lock here and re-run the loop
1748 * afterwards.
1749 */
1750 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1751 domain_exit(domain);
bea64033
JR
1752 goto again;
1753 }
ba395927 1754 }
55d94043 1755 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1756
1757 if (iommu->gcmd & DMA_GCMD_TE)
1758 iommu_disable_translation(iommu);
ffebeb46 1759}
ba395927 1760
ffebeb46
JL
1761static void free_dmar_iommu(struct intel_iommu *iommu)
1762{
1763 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1764 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1765 int i;
1766
1767 for (i = 0; i < elems; i++)
1768 kfree(iommu->domains[i]);
ffebeb46
JL
1769 kfree(iommu->domains);
1770 kfree(iommu->domain_ids);
1771 iommu->domains = NULL;
1772 iommu->domain_ids = NULL;
1773 }
ba395927 1774
d9630fe9
WH
1775 g_iommus[iommu->seq_id] = NULL;
1776
ba395927
KA
1777 /* free context mapping */
1778 free_context_table(iommu);
8a94ade4
DW
1779
1780#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1781 if (pasid_enabled(iommu)) {
1782 if (ecap_prs(iommu->ecap))
1783 intel_svm_finish_prq(iommu);
8a94ade4 1784 intel_svm_free_pasid_tables(iommu);
a222a7f0 1785 }
8a94ade4 1786#endif
ba395927
KA
1787}
1788
ab8dfe25 1789static struct dmar_domain *alloc_domain(int flags)
ba395927 1790{
ba395927 1791 struct dmar_domain *domain;
ba395927
KA
1792
1793 domain = alloc_domain_mem();
1794 if (!domain)
1795 return NULL;
1796
ab8dfe25 1797 memset(domain, 0, sizeof(*domain));
4c923d47 1798 domain->nid = -1;
ab8dfe25 1799 domain->flags = flags;
0824c592 1800 domain->has_iotlb_device = false;
92d03cc8 1801 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1802
1803 return domain;
1804}
1805
d160aca5
JR
1806/* Must be called with iommu->lock */
1807static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1808 struct intel_iommu *iommu)
1809{
44bde614 1810 unsigned long ndomains;
55d94043 1811 int num;
44bde614 1812
55d94043 1813 assert_spin_locked(&device_domain_lock);
d160aca5 1814 assert_spin_locked(&iommu->lock);
ba395927 1815
29a27719
JR
1816 domain->iommu_refcnt[iommu->seq_id] += 1;
1817 domain->iommu_count += 1;
1818 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1819 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1820 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1821
1822 if (num >= ndomains) {
1823 pr_err("%s: No free domain ids\n", iommu->name);
1824 domain->iommu_refcnt[iommu->seq_id] -= 1;
1825 domain->iommu_count -= 1;
55d94043 1826 return -ENOSPC;
2c2e2c38 1827 }
ba395927 1828
d160aca5
JR
1829 set_bit(num, iommu->domain_ids);
1830 set_iommu_domain(iommu, num, domain);
1831
1832 domain->iommu_did[iommu->seq_id] = num;
1833 domain->nid = iommu->node;
fb170fb4 1834
fb170fb4
JL
1835 domain_update_iommu_cap(domain);
1836 }
d160aca5 1837
55d94043 1838 return 0;
fb170fb4
JL
1839}
1840
1841static int domain_detach_iommu(struct dmar_domain *domain,
1842 struct intel_iommu *iommu)
1843{
d160aca5 1844 int num, count = INT_MAX;
d160aca5 1845
55d94043 1846 assert_spin_locked(&device_domain_lock);
d160aca5 1847 assert_spin_locked(&iommu->lock);
fb170fb4 1848
29a27719
JR
1849 domain->iommu_refcnt[iommu->seq_id] -= 1;
1850 count = --domain->iommu_count;
1851 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1852 num = domain->iommu_did[iommu->seq_id];
1853 clear_bit(num, iommu->domain_ids);
1854 set_iommu_domain(iommu, num, NULL);
fb170fb4 1855
fb170fb4 1856 domain_update_iommu_cap(domain);
c0e8a6c8 1857 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1858 }
fb170fb4
JL
1859
1860 return count;
1861}
1862
ba395927 1863static struct iova_domain reserved_iova_list;
8a443df4 1864static struct lock_class_key reserved_rbtree_key;
ba395927 1865
51a63e67 1866static int dmar_init_reserved_ranges(void)
ba395927
KA
1867{
1868 struct pci_dev *pdev = NULL;
1869 struct iova *iova;
1870 int i;
ba395927 1871
0fb5fe87
RM
1872 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1873 DMA_32BIT_PFN);
ba395927 1874
8a443df4
MG
1875 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1876 &reserved_rbtree_key);
1877
ba395927
KA
1878 /* IOAPIC ranges shouldn't be accessed by DMA */
1879 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1880 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1881 if (!iova) {
9f10e5bf 1882 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1883 return -ENODEV;
1884 }
ba395927
KA
1885
1886 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1887 for_each_pci_dev(pdev) {
1888 struct resource *r;
1889
1890 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1891 r = &pdev->resource[i];
1892 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1893 continue;
1a4a4551
DW
1894 iova = reserve_iova(&reserved_iova_list,
1895 IOVA_PFN(r->start),
1896 IOVA_PFN(r->end));
51a63e67 1897 if (!iova) {
9f10e5bf 1898 pr_err("Reserve iova failed\n");
51a63e67
JC
1899 return -ENODEV;
1900 }
ba395927
KA
1901 }
1902 }
51a63e67 1903 return 0;
ba395927
KA
1904}
1905
1906static void domain_reserve_special_ranges(struct dmar_domain *domain)
1907{
1908 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1909}
1910
1911static inline int guestwidth_to_adjustwidth(int gaw)
1912{
1913 int agaw;
1914 int r = (gaw - 12) % 9;
1915
1916 if (r == 0)
1917 agaw = gaw;
1918 else
1919 agaw = gaw + 9 - r;
1920 if (agaw > 64)
1921 agaw = 64;
1922 return agaw;
1923}
1924
dc534b25
JR
1925static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1926 int guest_width)
ba395927 1927{
ba395927
KA
1928 int adjust_width, agaw;
1929 unsigned long sagaw;
1930
0fb5fe87
RM
1931 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1932 DMA_32BIT_PFN);
ba395927
KA
1933 domain_reserve_special_ranges(domain);
1934
1935 /* calculate AGAW */
ba395927
KA
1936 if (guest_width > cap_mgaw(iommu->cap))
1937 guest_width = cap_mgaw(iommu->cap);
1938 domain->gaw = guest_width;
1939 adjust_width = guestwidth_to_adjustwidth(guest_width);
1940 agaw = width_to_agaw(adjust_width);
1941 sagaw = cap_sagaw(iommu->cap);
1942 if (!test_bit(agaw, &sagaw)) {
1943 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1944 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1945 agaw = find_next_bit(&sagaw, 5, agaw);
1946 if (agaw >= 5)
1947 return -ENODEV;
1948 }
1949 domain->agaw = agaw;
ba395927 1950
8e604097
WH
1951 if (ecap_coherent(iommu->ecap))
1952 domain->iommu_coherency = 1;
1953 else
1954 domain->iommu_coherency = 0;
1955
58c610bd
SY
1956 if (ecap_sc_support(iommu->ecap))
1957 domain->iommu_snooping = 1;
1958 else
1959 domain->iommu_snooping = 0;
1960
214e39aa
DW
1961 if (intel_iommu_superpage)
1962 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1963 else
1964 domain->iommu_superpage = 0;
1965
4c923d47 1966 domain->nid = iommu->node;
c7151a8d 1967
ba395927 1968 /* always allocate the top pgd */
4c923d47 1969 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1970 if (!domain->pgd)
1971 return -ENOMEM;
5b6985ce 1972 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1973 return 0;
1974}
1975
1976static void domain_exit(struct dmar_domain *domain)
1977{
ea8ea460 1978 struct page *freelist = NULL;
ba395927
KA
1979
1980 /* Domain 0 is reserved, so dont process it */
1981 if (!domain)
1982 return;
1983
7b668357 1984 /* Flush any lazy unmaps that may reference this domain */
aa473240
OP
1985 if (!intel_iommu_strict) {
1986 int cpu;
1987
1988 for_each_possible_cpu(cpu)
1989 flush_unmaps_timeout(cpu);
1990 }
7b668357 1991
d160aca5
JR
1992 /* Remove associated devices and clear attached or cached domains */
1993 rcu_read_lock();
ba395927 1994 domain_remove_dev_info(domain);
d160aca5 1995 rcu_read_unlock();
92d03cc8 1996
ba395927
KA
1997 /* destroy iovas */
1998 put_iova_domain(&domain->iovad);
ba395927 1999
ea8ea460 2000 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 2001
ea8ea460
DW
2002 dma_free_pagelist(freelist);
2003
ba395927
KA
2004 free_domain_mem(domain);
2005}
2006
64ae892b
DW
2007static int domain_context_mapping_one(struct dmar_domain *domain,
2008 struct intel_iommu *iommu,
28ccce0d 2009 u8 bus, u8 devfn)
ba395927 2010{
c6c2cebd 2011 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
2012 int translation = CONTEXT_TT_MULTI_LEVEL;
2013 struct device_domain_info *info = NULL;
ba395927 2014 struct context_entry *context;
ba395927 2015 unsigned long flags;
ea6606b0 2016 struct dma_pte *pgd;
55d94043 2017 int ret, agaw;
28ccce0d 2018
c6c2cebd
JR
2019 WARN_ON(did == 0);
2020
28ccce0d
JR
2021 if (hw_pass_through && domain_type_is_si(domain))
2022 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
2023
2024 pr_debug("Set context mapping for %02x:%02x.%d\n",
2025 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2026
ba395927 2027 BUG_ON(!domain->pgd);
5331fe6f 2028
55d94043
JR
2029 spin_lock_irqsave(&device_domain_lock, flags);
2030 spin_lock(&iommu->lock);
2031
2032 ret = -ENOMEM;
03ecc32c 2033 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2034 if (!context)
55d94043 2035 goto out_unlock;
ba395927 2036
55d94043
JR
2037 ret = 0;
2038 if (context_present(context))
2039 goto out_unlock;
cf484d0e 2040
aec0e861
XP
2041 /*
2042 * For kdump cases, old valid entries may be cached due to the
2043 * in-flight DMA and copied pgtable, but there is no unmapping
2044 * behaviour for them, thus we need an explicit cache flush for
2045 * the newly-mapped device. For kdump, at this point, the device
2046 * is supposed to finish reset at its driver probe stage, so no
2047 * in-flight DMA will exist, and we don't need to worry anymore
2048 * hereafter.
2049 */
2050 if (context_copied(context)) {
2051 u16 did_old = context_domain_id(context);
2052
2053 if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
2054 iommu->flush.flush_context(iommu, did_old,
2055 (((u16)bus) << 8) | devfn,
2056 DMA_CCMD_MASK_NOBIT,
2057 DMA_CCMD_DEVICE_INVL);
2058 }
2059
ea6606b0
WH
2060 pgd = domain->pgd;
2061
de24e553 2062 context_clear_entry(context);
c6c2cebd 2063 context_set_domain_id(context, did);
ea6606b0 2064
de24e553
JR
2065 /*
2066 * Skip top levels of page tables for iommu which has less agaw
2067 * than default. Unnecessary for PT mode.
2068 */
93a23a72 2069 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 2070 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 2071 ret = -ENOMEM;
de24e553 2072 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
2073 if (!dma_pte_present(pgd))
2074 goto out_unlock;
ea6606b0 2075 }
4ed0d3e6 2076
64ae892b 2077 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
2078 if (info && info->ats_supported)
2079 translation = CONTEXT_TT_DEV_IOTLB;
2080 else
2081 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 2082
93a23a72
YZ
2083 context_set_address_root(context, virt_to_phys(pgd));
2084 context_set_address_width(context, iommu->agaw);
de24e553
JR
2085 } else {
2086 /*
2087 * In pass through mode, AW must be programmed to
2088 * indicate the largest AGAW value supported by
2089 * hardware. And ASR is ignored by hardware.
2090 */
2091 context_set_address_width(context, iommu->msagaw);
93a23a72 2092 }
4ed0d3e6
FY
2093
2094 context_set_translation_type(context, translation);
c07e7d21
MM
2095 context_set_fault_enable(context);
2096 context_set_present(context);
5331fe6f 2097 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2098
4c25a2c1
DW
2099 /*
2100 * It's a non-present to present mapping. If hardware doesn't cache
2101 * non-present entry we only need to flush the write-buffer. If the
2102 * _does_ cache non-present entries, then it does so in the special
2103 * domain #0, which we have to flush:
2104 */
2105 if (cap_caching_mode(iommu->cap)) {
2106 iommu->flush.flush_context(iommu, 0,
2107 (((u16)bus) << 8) | devfn,
2108 DMA_CCMD_MASK_NOBIT,
2109 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2110 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2111 } else {
ba395927 2112 iommu_flush_write_buffer(iommu);
4c25a2c1 2113 }
93a23a72 2114 iommu_enable_dev_iotlb(info);
c7151a8d 2115
55d94043
JR
2116 ret = 0;
2117
2118out_unlock:
2119 spin_unlock(&iommu->lock);
2120 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2121
5c365d18 2122 return ret;
ba395927
KA
2123}
2124
579305f7
AW
2125struct domain_context_mapping_data {
2126 struct dmar_domain *domain;
2127 struct intel_iommu *iommu;
579305f7
AW
2128};
2129
2130static int domain_context_mapping_cb(struct pci_dev *pdev,
2131 u16 alias, void *opaque)
2132{
2133 struct domain_context_mapping_data *data = opaque;
2134
2135 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2136 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2137}
2138
ba395927 2139static int
28ccce0d 2140domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2141{
64ae892b 2142 struct intel_iommu *iommu;
156baca8 2143 u8 bus, devfn;
579305f7 2144 struct domain_context_mapping_data data;
64ae892b 2145
e1f167f3 2146 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2147 if (!iommu)
2148 return -ENODEV;
ba395927 2149
579305f7 2150 if (!dev_is_pci(dev))
28ccce0d 2151 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2152
2153 data.domain = domain;
2154 data.iommu = iommu;
579305f7
AW
2155
2156 return pci_for_each_dma_alias(to_pci_dev(dev),
2157 &domain_context_mapping_cb, &data);
2158}
2159
2160static int domain_context_mapped_cb(struct pci_dev *pdev,
2161 u16 alias, void *opaque)
2162{
2163 struct intel_iommu *iommu = opaque;
2164
2165 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2166}
2167
e1f167f3 2168static int domain_context_mapped(struct device *dev)
ba395927 2169{
5331fe6f 2170 struct intel_iommu *iommu;
156baca8 2171 u8 bus, devfn;
5331fe6f 2172
e1f167f3 2173 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2174 if (!iommu)
2175 return -ENODEV;
ba395927 2176
579305f7
AW
2177 if (!dev_is_pci(dev))
2178 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2179
579305f7
AW
2180 return !pci_for_each_dma_alias(to_pci_dev(dev),
2181 domain_context_mapped_cb, iommu);
ba395927
KA
2182}
2183
f532959b
FY
2184/* Returns a number of VTD pages, but aligned to MM page size */
2185static inline unsigned long aligned_nrpages(unsigned long host_addr,
2186 size_t size)
2187{
2188 host_addr &= ~PAGE_MASK;
2189 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2190}
2191
6dd9a7c7
YS
2192/* Return largest possible superpage level for a given mapping */
2193static inline int hardware_largepage_caps(struct dmar_domain *domain,
2194 unsigned long iov_pfn,
2195 unsigned long phy_pfn,
2196 unsigned long pages)
2197{
2198 int support, level = 1;
2199 unsigned long pfnmerge;
2200
2201 support = domain->iommu_superpage;
2202
2203 /* To use a large page, the virtual *and* physical addresses
2204 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2205 of them will mean we have to use smaller pages. So just
2206 merge them and check both at once. */
2207 pfnmerge = iov_pfn | phy_pfn;
2208
2209 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2210 pages >>= VTD_STRIDE_SHIFT;
2211 if (!pages)
2212 break;
2213 pfnmerge >>= VTD_STRIDE_SHIFT;
2214 level++;
2215 support--;
2216 }
2217 return level;
2218}
2219
9051aa02
DW
2220static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2221 struct scatterlist *sg, unsigned long phys_pfn,
2222 unsigned long nr_pages, int prot)
e1605495
DW
2223{
2224 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2225 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2226 unsigned long sg_res = 0;
6dd9a7c7
YS
2227 unsigned int largepage_lvl = 0;
2228 unsigned long lvl_pages = 0;
e1605495 2229
162d1b10 2230 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2231
2232 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2233 return -EINVAL;
2234
2235 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2236
cc4f14aa
JL
2237 if (!sg) {
2238 sg_res = nr_pages;
9051aa02
DW
2239 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2240 }
2241
6dd9a7c7 2242 while (nr_pages > 0) {
c85994e4
DW
2243 uint64_t tmp;
2244
e1605495 2245 if (!sg_res) {
f532959b 2246 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2247 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2248 sg->dma_length = sg->length;
3e6110fd 2249 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2250 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2251 }
6dd9a7c7 2252
e1605495 2253 if (!pte) {
6dd9a7c7
YS
2254 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2255
5cf0a76f 2256 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2257 if (!pte)
2258 return -ENOMEM;
6dd9a7c7 2259 /* It is large page*/
6491d4d0 2260 if (largepage_lvl > 1) {
ba2374fd
CZ
2261 unsigned long nr_superpages, end_pfn;
2262
6dd9a7c7 2263 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2264 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2265
2266 nr_superpages = sg_res / lvl_pages;
2267 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2268
d41a4adb
JL
2269 /*
2270 * Ensure that old small page tables are
ba2374fd 2271 * removed to make room for superpage(s).
d41a4adb 2272 */
ba2374fd 2273 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
6491d4d0 2274 } else {
6dd9a7c7 2275 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2276 }
6dd9a7c7 2277
e1605495
DW
2278 }
2279 /* We don't need lock here, nobody else
2280 * touches the iova range
2281 */
7766a3fb 2282 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2283 if (tmp) {
1bf20f0d 2284 static int dumps = 5;
9f10e5bf
JR
2285 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2286 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2287 if (dumps) {
2288 dumps--;
2289 debug_dma_dump_mappings(NULL);
2290 }
2291 WARN_ON(1);
2292 }
6dd9a7c7
YS
2293
2294 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2295
2296 BUG_ON(nr_pages < lvl_pages);
2297 BUG_ON(sg_res < lvl_pages);
2298
2299 nr_pages -= lvl_pages;
2300 iov_pfn += lvl_pages;
2301 phys_pfn += lvl_pages;
2302 pteval += lvl_pages * VTD_PAGE_SIZE;
2303 sg_res -= lvl_pages;
2304
2305 /* If the next PTE would be the first in a new page, then we
2306 need to flush the cache on the entries we've just written.
2307 And then we'll need to recalculate 'pte', so clear it and
2308 let it get set again in the if (!pte) block above.
2309
2310 If we're done (!nr_pages) we need to flush the cache too.
2311
2312 Also if we've been setting superpages, we may need to
2313 recalculate 'pte' and switch back to smaller pages for the
2314 end of the mapping, if the trailing size is not enough to
2315 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2316 pte++;
6dd9a7c7
YS
2317 if (!nr_pages || first_pte_in_page(pte) ||
2318 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2319 domain_flush_cache(domain, first_pte,
2320 (void *)pte - (void *)first_pte);
2321 pte = NULL;
2322 }
6dd9a7c7
YS
2323
2324 if (!sg_res && nr_pages)
e1605495
DW
2325 sg = sg_next(sg);
2326 }
2327 return 0;
2328}
2329
9051aa02
DW
2330static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2331 struct scatterlist *sg, unsigned long nr_pages,
2332 int prot)
ba395927 2333{
9051aa02
DW
2334 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2335}
6f6a00e4 2336
9051aa02
DW
2337static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2338 unsigned long phys_pfn, unsigned long nr_pages,
2339 int prot)
2340{
2341 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2342}
2343
2452d9db 2344static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2345{
c7151a8d
WH
2346 if (!iommu)
2347 return;
8c11e798
WH
2348
2349 clear_context_table(iommu, bus, devfn);
2350 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2351 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2352 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2353}
2354
109b9b04
DW
2355static inline void unlink_domain_info(struct device_domain_info *info)
2356{
2357 assert_spin_locked(&device_domain_lock);
2358 list_del(&info->link);
2359 list_del(&info->global);
2360 if (info->dev)
0bcb3e28 2361 info->dev->archdata.iommu = NULL;
109b9b04
DW
2362}
2363
ba395927
KA
2364static void domain_remove_dev_info(struct dmar_domain *domain)
2365{
3a74ca01 2366 struct device_domain_info *info, *tmp;
fb170fb4 2367 unsigned long flags;
ba395927
KA
2368
2369 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2370 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2371 __dmar_remove_one_dev_info(info);
ba395927
KA
2372 spin_unlock_irqrestore(&device_domain_lock, flags);
2373}
2374
2375/*
2376 * find_domain
1525a29a 2377 * Note: we use struct device->archdata.iommu stores the info
ba395927 2378 */
1525a29a 2379static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2380{
2381 struct device_domain_info *info;
2382
2383 /* No lock here, assumes no domain exit in normal case */
1525a29a 2384 info = dev->archdata.iommu;
ba395927
KA
2385 if (info)
2386 return info->domain;
2387 return NULL;
2388}
2389
5a8f40e8 2390static inline struct device_domain_info *
745f2586
JL
2391dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2392{
2393 struct device_domain_info *info;
2394
2395 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2396 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2397 info->devfn == devfn)
5a8f40e8 2398 return info;
745f2586
JL
2399
2400 return NULL;
2401}
2402
5db31569
JR
2403static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2404 int bus, int devfn,
2405 struct device *dev,
2406 struct dmar_domain *domain)
745f2586 2407{
5a8f40e8 2408 struct dmar_domain *found = NULL;
745f2586
JL
2409 struct device_domain_info *info;
2410 unsigned long flags;
d160aca5 2411 int ret;
745f2586
JL
2412
2413 info = alloc_devinfo_mem();
2414 if (!info)
b718cd3d 2415 return NULL;
745f2586 2416
745f2586
JL
2417 info->bus = bus;
2418 info->devfn = devfn;
b16d0cb9
DW
2419 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2420 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2421 info->ats_qdep = 0;
745f2586
JL
2422 info->dev = dev;
2423 info->domain = domain;
5a8f40e8 2424 info->iommu = iommu;
745f2586 2425
b16d0cb9
DW
2426 if (dev && dev_is_pci(dev)) {
2427 struct pci_dev *pdev = to_pci_dev(info->dev);
2428
2429 if (ecap_dev_iotlb_support(iommu->ecap) &&
2430 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2431 dmar_find_matched_atsr_unit(pdev))
2432 info->ats_supported = 1;
2433
2434 if (ecs_enabled(iommu)) {
2435 if (pasid_enabled(iommu)) {
2436 int features = pci_pasid_features(pdev);
2437 if (features >= 0)
2438 info->pasid_supported = features | 1;
2439 }
2440
2441 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2442 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2443 info->pri_supported = 1;
2444 }
2445 }
2446
745f2586
JL
2447 spin_lock_irqsave(&device_domain_lock, flags);
2448 if (dev)
0bcb3e28 2449 found = find_domain(dev);
f303e507
JR
2450
2451 if (!found) {
5a8f40e8 2452 struct device_domain_info *info2;
41e80dca 2453 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2454 if (info2) {
2455 found = info2->domain;
2456 info2->dev = dev;
2457 }
5a8f40e8 2458 }
f303e507 2459
745f2586
JL
2460 if (found) {
2461 spin_unlock_irqrestore(&device_domain_lock, flags);
2462 free_devinfo_mem(info);
b718cd3d
DW
2463 /* Caller must free the original domain */
2464 return found;
745f2586
JL
2465 }
2466
d160aca5
JR
2467 spin_lock(&iommu->lock);
2468 ret = domain_attach_iommu(domain, iommu);
2469 spin_unlock(&iommu->lock);
2470
2471 if (ret) {
c6c2cebd 2472 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2473 free_devinfo_mem(info);
c6c2cebd
JR
2474 return NULL;
2475 }
c6c2cebd 2476
b718cd3d
DW
2477 list_add(&info->link, &domain->devices);
2478 list_add(&info->global, &device_domain_list);
2479 if (dev)
2480 dev->archdata.iommu = info;
2481 spin_unlock_irqrestore(&device_domain_lock, flags);
2482
cc4e2575
JR
2483 if (dev && domain_context_mapping(domain, dev)) {
2484 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2485 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2486 return NULL;
2487 }
2488
b718cd3d 2489 return domain;
745f2586
JL
2490}
2491
579305f7
AW
2492static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2493{
2494 *(u16 *)opaque = alias;
2495 return 0;
2496}
2497
76208356 2498static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2499{
cc4e2575 2500 struct device_domain_info *info = NULL;
76208356 2501 struct dmar_domain *domain = NULL;
579305f7 2502 struct intel_iommu *iommu;
08a7f456 2503 u16 req_id, dma_alias;
ba395927 2504 unsigned long flags;
aa4d066a 2505 u8 bus, devfn;
ba395927 2506
579305f7
AW
2507 iommu = device_to_iommu(dev, &bus, &devfn);
2508 if (!iommu)
2509 return NULL;
2510
08a7f456
JR
2511 req_id = ((u16)bus << 8) | devfn;
2512
146922ec
DW
2513 if (dev_is_pci(dev)) {
2514 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2515
579305f7
AW
2516 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2517
2518 spin_lock_irqsave(&device_domain_lock, flags);
2519 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2520 PCI_BUS_NUM(dma_alias),
2521 dma_alias & 0xff);
2522 if (info) {
2523 iommu = info->iommu;
2524 domain = info->domain;
5a8f40e8 2525 }
579305f7 2526 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2527
76208356 2528 /* DMA alias already has a domain, use it */
579305f7 2529 if (info)
76208356 2530 goto out;
579305f7 2531 }
ba395927 2532
146922ec 2533 /* Allocate and initialize new domain for the device */
ab8dfe25 2534 domain = alloc_domain(0);
745f2586 2535 if (!domain)
579305f7 2536 return NULL;
dc534b25 2537 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2538 domain_exit(domain);
2539 return NULL;
2c2e2c38 2540 }
ba395927 2541
76208356 2542out:
579305f7 2543
76208356
JR
2544 return domain;
2545}
579305f7 2546
76208356
JR
2547static struct dmar_domain *set_domain_for_dev(struct device *dev,
2548 struct dmar_domain *domain)
2549{
2550 struct intel_iommu *iommu;
2551 struct dmar_domain *tmp;
2552 u16 req_id, dma_alias;
2553 u8 bus, devfn;
2554
2555 iommu = device_to_iommu(dev, &bus, &devfn);
2556 if (!iommu)
2557 return NULL;
2558
2559 req_id = ((u16)bus << 8) | devfn;
2560
2561 if (dev_is_pci(dev)) {
2562 struct pci_dev *pdev = to_pci_dev(dev);
2563
2564 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2565
2566 /* register PCI DMA alias device */
2567 if (req_id != dma_alias) {
2568 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2569 dma_alias & 0xff, NULL, domain);
2570
2571 if (!tmp || tmp != domain)
2572 return tmp;
2573 }
ba395927
KA
2574 }
2575
5db31569 2576 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2577 if (!tmp || tmp != domain)
2578 return tmp;
2579
2580 return domain;
2581}
579305f7 2582
76208356
JR
2583static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2584{
2585 struct dmar_domain *domain, *tmp;
2586
2587 domain = find_domain(dev);
2588 if (domain)
2589 goto out;
2590
2591 domain = find_or_alloc_domain(dev, gaw);
2592 if (!domain)
2593 goto out;
2594
2595 tmp = set_domain_for_dev(dev, domain);
2596 if (!tmp || domain != tmp) {
579305f7
AW
2597 domain_exit(domain);
2598 domain = tmp;
2599 }
b718cd3d 2600
76208356
JR
2601out:
2602
b718cd3d 2603 return domain;
ba395927
KA
2604}
2605
b213203e
DW
2606static int iommu_domain_identity_map(struct dmar_domain *domain,
2607 unsigned long long start,
2608 unsigned long long end)
ba395927 2609{
c5395d5c
DW
2610 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2611 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2612
2613 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2614 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2615 pr_err("Reserving iova failed\n");
b213203e 2616 return -ENOMEM;
ba395927
KA
2617 }
2618
af1089ce 2619 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2620 /*
2621 * RMRR range might have overlap with physical memory range,
2622 * clear it first
2623 */
c5395d5c 2624 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2625
c5395d5c
DW
2626 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2627 last_vpfn - first_vpfn + 1,
61df7443 2628 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2629}
2630
d66ce54b
JR
2631static int domain_prepare_identity_map(struct device *dev,
2632 struct dmar_domain *domain,
2633 unsigned long long start,
2634 unsigned long long end)
b213203e 2635{
19943b0e
DW
2636 /* For _hardware_ passthrough, don't bother. But for software
2637 passthrough, we do it anyway -- it may indicate a memory
2638 range which is reserved in E820, so which didn't get set
2639 up to start with in si_domain */
2640 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2641 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2642 dev_name(dev), start, end);
19943b0e
DW
2643 return 0;
2644 }
2645
9f10e5bf
JR
2646 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2647 dev_name(dev), start, end);
2648
5595b528
DW
2649 if (end < start) {
2650 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2651 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2652 dmi_get_system_info(DMI_BIOS_VENDOR),
2653 dmi_get_system_info(DMI_BIOS_VERSION),
2654 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2655 return -EIO;
5595b528
DW
2656 }
2657
2ff729f5
DW
2658 if (end >> agaw_to_width(domain->agaw)) {
2659 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2660 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2661 agaw_to_width(domain->agaw),
2662 dmi_get_system_info(DMI_BIOS_VENDOR),
2663 dmi_get_system_info(DMI_BIOS_VERSION),
2664 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2665 return -EIO;
2ff729f5 2666 }
19943b0e 2667
d66ce54b
JR
2668 return iommu_domain_identity_map(domain, start, end);
2669}
ba395927 2670
d66ce54b
JR
2671static int iommu_prepare_identity_map(struct device *dev,
2672 unsigned long long start,
2673 unsigned long long end)
2674{
2675 struct dmar_domain *domain;
2676 int ret;
2677
2678 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2679 if (!domain)
2680 return -ENOMEM;
2681
2682 ret = domain_prepare_identity_map(dev, domain, start, end);
2683 if (ret)
2684 domain_exit(domain);
b213203e 2685
ba395927 2686 return ret;
ba395927
KA
2687}
2688
2689static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2690 struct device *dev)
ba395927 2691{
0b9d9753 2692 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2693 return 0;
0b9d9753
DW
2694 return iommu_prepare_identity_map(dev, rmrr->base_address,
2695 rmrr->end_address);
ba395927
KA
2696}
2697
d3f13810 2698#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2699static inline void iommu_prepare_isa(void)
2700{
2701 struct pci_dev *pdev;
2702 int ret;
2703
2704 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2705 if (!pdev)
2706 return;
2707
9f10e5bf 2708 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2709 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2710
2711 if (ret)
9f10e5bf 2712 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2713
9b27e82d 2714 pci_dev_put(pdev);
49a0429e
KA
2715}
2716#else
2717static inline void iommu_prepare_isa(void)
2718{
2719 return;
2720}
d3f13810 2721#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2722
2c2e2c38 2723static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2724
071e1374 2725static int __init si_domain_init(int hw)
2c2e2c38 2726{
c7ab48d2 2727 int nid, ret = 0;
2c2e2c38 2728
ab8dfe25 2729 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2730 if (!si_domain)
2731 return -EFAULT;
2732
2c2e2c38
FY
2733 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2734 domain_exit(si_domain);
2735 return -EFAULT;
2736 }
2737
0dc79715 2738 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2739
19943b0e
DW
2740 if (hw)
2741 return 0;
2742
c7ab48d2 2743 for_each_online_node(nid) {
5dfe8660
TH
2744 unsigned long start_pfn, end_pfn;
2745 int i;
2746
2747 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2748 ret = iommu_domain_identity_map(si_domain,
2749 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2750 if (ret)
2751 return ret;
2752 }
c7ab48d2
DW
2753 }
2754
2c2e2c38
FY
2755 return 0;
2756}
2757
9b226624 2758static int identity_mapping(struct device *dev)
2c2e2c38
FY
2759{
2760 struct device_domain_info *info;
2761
2762 if (likely(!iommu_identity_mapping))
2763 return 0;
2764
9b226624 2765 info = dev->archdata.iommu;
cb452a40
MT
2766 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2767 return (info->domain == si_domain);
2c2e2c38 2768
2c2e2c38
FY
2769 return 0;
2770}
2771
28ccce0d 2772static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2773{
0ac72664 2774 struct dmar_domain *ndomain;
5a8f40e8 2775 struct intel_iommu *iommu;
156baca8 2776 u8 bus, devfn;
2c2e2c38 2777
5913c9bf 2778 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2779 if (!iommu)
2780 return -ENODEV;
2781
5db31569 2782 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2783 if (ndomain != domain)
2784 return -EBUSY;
2c2e2c38
FY
2785
2786 return 0;
2787}
2788
0b9d9753 2789static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2790{
2791 struct dmar_rmrr_unit *rmrr;
832bd858 2792 struct device *tmp;
ea2447f7
TM
2793 int i;
2794
0e242612 2795 rcu_read_lock();
ea2447f7 2796 for_each_rmrr_units(rmrr) {
b683b230
JL
2797 /*
2798 * Return TRUE if this RMRR contains the device that
2799 * is passed in.
2800 */
2801 for_each_active_dev_scope(rmrr->devices,
2802 rmrr->devices_cnt, i, tmp)
0b9d9753 2803 if (tmp == dev) {
0e242612 2804 rcu_read_unlock();
ea2447f7 2805 return true;
b683b230 2806 }
ea2447f7 2807 }
0e242612 2808 rcu_read_unlock();
ea2447f7
TM
2809 return false;
2810}
2811
c875d2c1
AW
2812/*
2813 * There are a couple cases where we need to restrict the functionality of
2814 * devices associated with RMRRs. The first is when evaluating a device for
2815 * identity mapping because problems exist when devices are moved in and out
2816 * of domains and their respective RMRR information is lost. This means that
2817 * a device with associated RMRRs will never be in a "passthrough" domain.
2818 * The second is use of the device through the IOMMU API. This interface
2819 * expects to have full control of the IOVA space for the device. We cannot
2820 * satisfy both the requirement that RMRR access is maintained and have an
2821 * unencumbered IOVA space. We also have no ability to quiesce the device's
2822 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2823 * We therefore prevent devices associated with an RMRR from participating in
2824 * the IOMMU API, which eliminates them from device assignment.
2825 *
2826 * In both cases we assume that PCI USB devices with RMRRs have them largely
2827 * for historical reasons and that the RMRR space is not actively used post
2828 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2829 *
2830 * The same exception is made for graphics devices, with the requirement that
2831 * any use of the RMRR regions will be torn down before assigning the device
2832 * to a guest.
c875d2c1
AW
2833 */
2834static bool device_is_rmrr_locked(struct device *dev)
2835{
2836 if (!device_has_rmrr(dev))
2837 return false;
2838
2839 if (dev_is_pci(dev)) {
2840 struct pci_dev *pdev = to_pci_dev(dev);
2841
18436afd 2842 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2843 return false;
2844 }
2845
2846 return true;
2847}
2848
3bdb2591 2849static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2850{
ea2447f7 2851
3bdb2591
DW
2852 if (dev_is_pci(dev)) {
2853 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2854
c875d2c1 2855 if (device_is_rmrr_locked(dev))
3bdb2591 2856 return 0;
e0fc7e0b 2857
3bdb2591
DW
2858 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2859 return 1;
e0fc7e0b 2860
3bdb2591
DW
2861 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2862 return 1;
6941af28 2863
3bdb2591 2864 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2865 return 0;
3bdb2591
DW
2866
2867 /*
2868 * We want to start off with all devices in the 1:1 domain, and
2869 * take them out later if we find they can't access all of memory.
2870 *
2871 * However, we can't do this for PCI devices behind bridges,
2872 * because all PCI devices behind the same bridge will end up
2873 * with the same source-id on their transactions.
2874 *
2875 * Practically speaking, we can't change things around for these
2876 * devices at run-time, because we can't be sure there'll be no
2877 * DMA transactions in flight for any of their siblings.
2878 *
2879 * So PCI devices (unless they're on the root bus) as well as
2880 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2881 * the 1:1 domain, just in _case_ one of their siblings turns out
2882 * not to be able to map all of memory.
2883 */
2884 if (!pci_is_pcie(pdev)) {
2885 if (!pci_is_root_bus(pdev->bus))
2886 return 0;
2887 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2888 return 0;
2889 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2890 return 0;
3bdb2591
DW
2891 } else {
2892 if (device_has_rmrr(dev))
2893 return 0;
2894 }
3dfc813d 2895
3bdb2591 2896 /*
3dfc813d 2897 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2898 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2899 * take them out of the 1:1 domain later.
2900 */
8fcc5372
CW
2901 if (!startup) {
2902 /*
2903 * If the device's dma_mask is less than the system's memory
2904 * size then this is not a candidate for identity mapping.
2905 */
3bdb2591 2906 u64 dma_mask = *dev->dma_mask;
8fcc5372 2907
3bdb2591
DW
2908 if (dev->coherent_dma_mask &&
2909 dev->coherent_dma_mask < dma_mask)
2910 dma_mask = dev->coherent_dma_mask;
8fcc5372 2911
3bdb2591 2912 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2913 }
6941af28
DW
2914
2915 return 1;
2916}
2917
cf04eee8
DW
2918static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2919{
2920 int ret;
2921
2922 if (!iommu_should_identity_map(dev, 1))
2923 return 0;
2924
28ccce0d 2925 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2926 if (!ret)
9f10e5bf
JR
2927 pr_info("%s identity mapping for device %s\n",
2928 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2929 else if (ret == -ENODEV)
2930 /* device not associated with an iommu */
2931 ret = 0;
2932
2933 return ret;
2934}
2935
2936
071e1374 2937static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2938{
2c2e2c38 2939 struct pci_dev *pdev = NULL;
cf04eee8
DW
2940 struct dmar_drhd_unit *drhd;
2941 struct intel_iommu *iommu;
2942 struct device *dev;
2943 int i;
2944 int ret = 0;
2c2e2c38 2945
2c2e2c38 2946 for_each_pci_dev(pdev) {
cf04eee8
DW
2947 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2948 if (ret)
2949 return ret;
2950 }
2951
2952 for_each_active_iommu(iommu, drhd)
2953 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2954 struct acpi_device_physical_node *pn;
2955 struct acpi_device *adev;
2956
2957 if (dev->bus != &acpi_bus_type)
2958 continue;
86080ccc 2959
cf04eee8
DW
2960 adev= to_acpi_device(dev);
2961 mutex_lock(&adev->physical_node_lock);
2962 list_for_each_entry(pn, &adev->physical_node_list, node) {
2963 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2964 if (ret)
2965 break;
eae460b6 2966 }
cf04eee8
DW
2967 mutex_unlock(&adev->physical_node_lock);
2968 if (ret)
2969 return ret;
62edf5dc 2970 }
2c2e2c38
FY
2971
2972 return 0;
2973}
2974
ffebeb46
JL
2975static void intel_iommu_init_qi(struct intel_iommu *iommu)
2976{
2977 /*
2978 * Start from the sane iommu hardware state.
2979 * If the queued invalidation is already initialized by us
2980 * (for example, while enabling interrupt-remapping) then
2981 * we got the things already rolling from a sane state.
2982 */
2983 if (!iommu->qi) {
2984 /*
2985 * Clear any previous faults.
2986 */
2987 dmar_fault(-1, iommu);
2988 /*
2989 * Disable queued invalidation if supported and already enabled
2990 * before OS handover.
2991 */
2992 dmar_disable_qi(iommu);
2993 }
2994
2995 if (dmar_enable_qi(iommu)) {
2996 /*
2997 * Queued Invalidate not enabled, use Register Based Invalidate
2998 */
2999 iommu->flush.flush_context = __iommu_flush_context;
3000 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3001 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3002 iommu->name);
3003 } else {
3004 iommu->flush.flush_context = qi_flush_context;
3005 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3006 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3007 }
3008}
3009
091d42e4 3010static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3011 struct root_entry *old_re,
091d42e4
JR
3012 struct context_entry **tbl,
3013 int bus, bool ext)
3014{
dbcd861f 3015 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3016 struct context_entry *new_ce = NULL, ce;
dfddb969 3017 struct context_entry *old_ce = NULL;
543c8dcf 3018 struct root_entry re;
091d42e4
JR
3019 phys_addr_t old_ce_phys;
3020
3021 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3022 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3023
3024 for (devfn = 0; devfn < 256; devfn++) {
3025 /* First calculate the correct index */
3026 idx = (ext ? devfn * 2 : devfn) % 256;
3027
3028 if (idx == 0) {
3029 /* First save what we may have and clean up */
3030 if (new_ce) {
3031 tbl[tbl_idx] = new_ce;
3032 __iommu_flush_cache(iommu, new_ce,
3033 VTD_PAGE_SIZE);
3034 pos = 1;
3035 }
3036
3037 if (old_ce)
3038 iounmap(old_ce);
3039
3040 ret = 0;
3041 if (devfn < 0x80)
543c8dcf 3042 old_ce_phys = root_entry_lctp(&re);
091d42e4 3043 else
543c8dcf 3044 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3045
3046 if (!old_ce_phys) {
3047 if (ext && devfn == 0) {
3048 /* No LCTP, try UCTP */
3049 devfn = 0x7f;
3050 continue;
3051 } else {
3052 goto out;
3053 }
3054 }
3055
3056 ret = -ENOMEM;
dfddb969
DW
3057 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3058 MEMREMAP_WB);
091d42e4
JR
3059 if (!old_ce)
3060 goto out;
3061
3062 new_ce = alloc_pgtable_page(iommu->node);
3063 if (!new_ce)
3064 goto out_unmap;
3065
3066 ret = 0;
3067 }
3068
3069 /* Now copy the context entry */
dfddb969 3070 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3071
cf484d0e 3072 if (!__context_present(&ce))
091d42e4
JR
3073 continue;
3074
dbcd861f
JR
3075 did = context_domain_id(&ce);
3076 if (did >= 0 && did < cap_ndoms(iommu->cap))
3077 set_bit(did, iommu->domain_ids);
3078
cf484d0e
JR
3079 /*
3080 * We need a marker for copied context entries. This
3081 * marker needs to work for the old format as well as
3082 * for extended context entries.
3083 *
3084 * Bit 67 of the context entry is used. In the old
3085 * format this bit is available to software, in the
3086 * extended format it is the PGE bit, but PGE is ignored
3087 * by HW if PASIDs are disabled (and thus still
3088 * available).
3089 *
3090 * So disable PASIDs first and then mark the entry
3091 * copied. This means that we don't copy PASID
3092 * translations from the old kernel, but this is fine as
3093 * faults there are not fatal.
3094 */
3095 context_clear_pasid_enable(&ce);
3096 context_set_copied(&ce);
3097
091d42e4
JR
3098 new_ce[idx] = ce;
3099 }
3100
3101 tbl[tbl_idx + pos] = new_ce;
3102
3103 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3104
3105out_unmap:
dfddb969 3106 memunmap(old_ce);
091d42e4
JR
3107
3108out:
3109 return ret;
3110}
3111
3112static int copy_translation_tables(struct intel_iommu *iommu)
3113{
3114 struct context_entry **ctxt_tbls;
dfddb969 3115 struct root_entry *old_rt;
091d42e4
JR
3116 phys_addr_t old_rt_phys;
3117 int ctxt_table_entries;
3118 unsigned long flags;
3119 u64 rtaddr_reg;
3120 int bus, ret;
c3361f2f 3121 bool new_ext, ext;
091d42e4
JR
3122
3123 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3124 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3125 new_ext = !!ecap_ecs(iommu->ecap);
3126
3127 /*
3128 * The RTT bit can only be changed when translation is disabled,
3129 * but disabling translation means to open a window for data
3130 * corruption. So bail out and don't copy anything if we would
3131 * have to change the bit.
3132 */
3133 if (new_ext != ext)
3134 return -EINVAL;
091d42e4
JR
3135
3136 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3137 if (!old_rt_phys)
3138 return -EINVAL;
3139
dfddb969 3140 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3141 if (!old_rt)
3142 return -ENOMEM;
3143
3144 /* This is too big for the stack - allocate it from slab */
3145 ctxt_table_entries = ext ? 512 : 256;
3146 ret = -ENOMEM;
3147 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3148 if (!ctxt_tbls)
3149 goto out_unmap;
3150
3151 for (bus = 0; bus < 256; bus++) {
3152 ret = copy_context_table(iommu, &old_rt[bus],
3153 ctxt_tbls, bus, ext);
3154 if (ret) {
3155 pr_err("%s: Failed to copy context table for bus %d\n",
3156 iommu->name, bus);
3157 continue;
3158 }
3159 }
3160
3161 spin_lock_irqsave(&iommu->lock, flags);
3162
3163 /* Context tables are copied, now write them to the root_entry table */
3164 for (bus = 0; bus < 256; bus++) {
3165 int idx = ext ? bus * 2 : bus;
3166 u64 val;
3167
3168 if (ctxt_tbls[idx]) {
3169 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3170 iommu->root_entry[bus].lo = val;
3171 }
3172
3173 if (!ext || !ctxt_tbls[idx + 1])
3174 continue;
3175
3176 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3177 iommu->root_entry[bus].hi = val;
3178 }
3179
3180 spin_unlock_irqrestore(&iommu->lock, flags);
3181
3182 kfree(ctxt_tbls);
3183
3184 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3185
3186 ret = 0;
3187
3188out_unmap:
dfddb969 3189 memunmap(old_rt);
091d42e4
JR
3190
3191 return ret;
3192}
3193
b779260b 3194static int __init init_dmars(void)
ba395927
KA
3195{
3196 struct dmar_drhd_unit *drhd;
3197 struct dmar_rmrr_unit *rmrr;
a87f4918 3198 bool copied_tables = false;
832bd858 3199 struct device *dev;
ba395927 3200 struct intel_iommu *iommu;
aa473240 3201 int i, ret, cpu;
2c2e2c38 3202
ba395927
KA
3203 /*
3204 * for each drhd
3205 * allocate root
3206 * initialize and program root entry to not present
3207 * endfor
3208 */
3209 for_each_drhd_unit(drhd) {
5e0d2a6f 3210 /*
3211 * lock not needed as this is only incremented in the single
3212 * threaded kernel __init code path all other access are read
3213 * only
3214 */
78d8e704 3215 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3216 g_num_of_iommus++;
3217 continue;
3218 }
9f10e5bf 3219 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3220 }
3221
ffebeb46
JL
3222 /* Preallocate enough resources for IOMMU hot-addition */
3223 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3224 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3225
d9630fe9
WH
3226 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3227 GFP_KERNEL);
3228 if (!g_iommus) {
9f10e5bf 3229 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3230 ret = -ENOMEM;
3231 goto error;
3232 }
3233
aa473240
OP
3234 for_each_possible_cpu(cpu) {
3235 struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
3236 cpu);
3237
3238 dfd->tables = kzalloc(g_num_of_iommus *
3239 sizeof(struct deferred_flush_table),
3240 GFP_KERNEL);
3241 if (!dfd->tables) {
3242 ret = -ENOMEM;
3243 goto free_g_iommus;
3244 }
3245
3246 spin_lock_init(&dfd->lock);
3247 setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
5e0d2a6f 3248 }
3249
7c919779 3250 for_each_active_iommu(iommu, drhd) {
d9630fe9 3251 g_iommus[iommu->seq_id] = iommu;
ba395927 3252
b63d80d1
JR
3253 intel_iommu_init_qi(iommu);
3254
e61d98d8
SS
3255 ret = iommu_init_domains(iommu);
3256 if (ret)
989d51fc 3257 goto free_iommu;
e61d98d8 3258
4158c2ec
JR
3259 init_translation_status(iommu);
3260
091d42e4
JR
3261 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3262 iommu_disable_translation(iommu);
3263 clear_translation_pre_enabled(iommu);
3264 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3265 iommu->name);
3266 }
4158c2ec 3267
ba395927
KA
3268 /*
3269 * TBD:
3270 * we could share the same root & context tables
25985edc 3271 * among all IOMMU's. Need to Split it later.
ba395927
KA
3272 */
3273 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3274 if (ret)
989d51fc 3275 goto free_iommu;
5f0a7f76 3276
091d42e4
JR
3277 if (translation_pre_enabled(iommu)) {
3278 pr_info("Translation already enabled - trying to copy translation structures\n");
3279
3280 ret = copy_translation_tables(iommu);
3281 if (ret) {
3282 /*
3283 * We found the IOMMU with translation
3284 * enabled - but failed to copy over the
3285 * old root-entry table. Try to proceed
3286 * by disabling translation now and
3287 * allocating a clean root-entry table.
3288 * This might cause DMAR faults, but
3289 * probably the dump will still succeed.
3290 */
3291 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3292 iommu->name);
3293 iommu_disable_translation(iommu);
3294 clear_translation_pre_enabled(iommu);
3295 } else {
3296 pr_info("Copied translation tables from previous kernel for %s\n",
3297 iommu->name);
a87f4918 3298 copied_tables = true;
091d42e4
JR
3299 }
3300 }
3301
4ed0d3e6 3302 if (!ecap_pass_through(iommu->ecap))
19943b0e 3303 hw_pass_through = 0;
8a94ade4
DW
3304#ifdef CONFIG_INTEL_IOMMU_SVM
3305 if (pasid_enabled(iommu))
3306 intel_svm_alloc_pasid_tables(iommu);
3307#endif
ba395927
KA
3308 }
3309
a4c34ff1
JR
3310 /*
3311 * Now that qi is enabled on all iommus, set the root entry and flush
3312 * caches. This is required on some Intel X58 chipsets, otherwise the
3313 * flush_context function will loop forever and the boot hangs.
3314 */
3315 for_each_active_iommu(iommu, drhd) {
3316 iommu_flush_write_buffer(iommu);
3317 iommu_set_root_entry(iommu);
3318 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3319 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3320 }
3321
19943b0e 3322 if (iommu_pass_through)
e0fc7e0b
DW
3323 iommu_identity_mapping |= IDENTMAP_ALL;
3324
d3f13810 3325#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3326 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3327#endif
e0fc7e0b 3328
21e722c4
AR
3329 check_tylersburg_isoch();
3330
86080ccc
JR
3331 if (iommu_identity_mapping) {
3332 ret = si_domain_init(hw_pass_through);
3333 if (ret)
3334 goto free_iommu;
3335 }
3336
e0fc7e0b 3337
a87f4918
JR
3338 /*
3339 * If we copied translations from a previous kernel in the kdump
3340 * case, we can not assign the devices to domains now, as that
3341 * would eliminate the old mappings. So skip this part and defer
3342 * the assignment to device driver initialization time.
3343 */
3344 if (copied_tables)
3345 goto domains_done;
3346
ba395927 3347 /*
19943b0e
DW
3348 * If pass through is not set or not enabled, setup context entries for
3349 * identity mappings for rmrr, gfx, and isa and may fall back to static
3350 * identity mapping if iommu_identity_mapping is set.
ba395927 3351 */
19943b0e
DW
3352 if (iommu_identity_mapping) {
3353 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3354 if (ret) {
9f10e5bf 3355 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3356 goto free_iommu;
ba395927
KA
3357 }
3358 }
ba395927 3359 /*
19943b0e
DW
3360 * For each rmrr
3361 * for each dev attached to rmrr
3362 * do
3363 * locate drhd for dev, alloc domain for dev
3364 * allocate free domain
3365 * allocate page table entries for rmrr
3366 * if context not allocated for bus
3367 * allocate and init context
3368 * set present in root table for this bus
3369 * init context with domain, translation etc
3370 * endfor
3371 * endfor
ba395927 3372 */
9f10e5bf 3373 pr_info("Setting RMRR:\n");
19943b0e 3374 for_each_rmrr_units(rmrr) {
b683b230
JL
3375 /* some BIOS lists non-exist devices in DMAR table. */
3376 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3377 i, dev) {
0b9d9753 3378 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3379 if (ret)
9f10e5bf 3380 pr_err("Mapping reserved region failed\n");
ba395927 3381 }
4ed0d3e6 3382 }
49a0429e 3383
19943b0e
DW
3384 iommu_prepare_isa();
3385
a87f4918
JR
3386domains_done:
3387
ba395927
KA
3388 /*
3389 * for each drhd
3390 * enable fault log
3391 * global invalidate context cache
3392 * global invalidate iotlb
3393 * enable translation
3394 */
7c919779 3395 for_each_iommu(iommu, drhd) {
51a63e67
JC
3396 if (drhd->ignored) {
3397 /*
3398 * we always have to disable PMRs or DMA may fail on
3399 * this device
3400 */
3401 if (force_on)
7c919779 3402 iommu_disable_protect_mem_regions(iommu);
ba395927 3403 continue;
51a63e67 3404 }
ba395927
KA
3405
3406 iommu_flush_write_buffer(iommu);
3407
a222a7f0
DW
3408#ifdef CONFIG_INTEL_IOMMU_SVM
3409 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3410 ret = intel_svm_enable_prq(iommu);
3411 if (ret)
3412 goto free_iommu;
3413 }
3414#endif
3460a6d9
KA
3415 ret = dmar_set_interrupt(iommu);
3416 if (ret)
989d51fc 3417 goto free_iommu;
3460a6d9 3418
8939ddf6
JR
3419 if (!translation_pre_enabled(iommu))
3420 iommu_enable_translation(iommu);
3421
b94996c9 3422 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3423 }
3424
3425 return 0;
989d51fc
JL
3426
3427free_iommu:
ffebeb46
JL
3428 for_each_active_iommu(iommu, drhd) {
3429 disable_dmar_iommu(iommu);
a868e6b7 3430 free_dmar_iommu(iommu);
ffebeb46 3431 }
989d51fc 3432free_g_iommus:
aa473240
OP
3433 for_each_possible_cpu(cpu)
3434 kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
d9630fe9 3435 kfree(g_iommus);
989d51fc 3436error:
ba395927
KA
3437 return ret;
3438}
3439
5a5e02a6 3440/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3441static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3442 struct dmar_domain *domain,
3443 unsigned long nrpages, uint64_t dma_mask)
ba395927 3444{
22e2f9fa 3445 unsigned long iova_pfn = 0;
ba395927 3446
875764de
DW
3447 /* Restrict dma_mask to the width that the iommu can handle */
3448 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3449 /* Ensure we reserve the whole size-aligned region */
3450 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3451
3452 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3453 /*
3454 * First try to allocate an io virtual address in
284901a9 3455 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3456 * from higher range
ba395927 3457 */
22e2f9fa
OP
3458 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3459 IOVA_PFN(DMA_BIT_MASK(32)));
3460 if (iova_pfn)
3461 return iova_pfn;
875764de 3462 }
22e2f9fa
OP
3463 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask));
3464 if (unlikely(!iova_pfn)) {
9f10e5bf 3465 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3466 nrpages, dev_name(dev));
2aac6304 3467 return 0;
f76aec76
KA
3468 }
3469
22e2f9fa 3470 return iova_pfn;
f76aec76
KA
3471}
3472
d4b709f4 3473static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76 3474{
1c5ebba9 3475 struct dmar_domain *domain, *tmp;
b1ce5b79 3476 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3477 struct device *i_dev;
3478 int i, ret;
f76aec76 3479
1c5ebba9
JR
3480 domain = find_domain(dev);
3481 if (domain)
3482 goto out;
3483
3484 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3485 if (!domain)
3486 goto out;
ba395927 3487
b1ce5b79
JR
3488 /* We have a new domain - setup possible RMRRs for the device */
3489 rcu_read_lock();
3490 for_each_rmrr_units(rmrr) {
3491 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3492 i, i_dev) {
3493 if (i_dev != dev)
3494 continue;
3495
3496 ret = domain_prepare_identity_map(dev, domain,
3497 rmrr->base_address,
3498 rmrr->end_address);
3499 if (ret)
3500 dev_err(dev, "Mapping reserved region failed\n");
3501 }
3502 }
3503 rcu_read_unlock();
3504
1c5ebba9
JR
3505 tmp = set_domain_for_dev(dev, domain);
3506 if (!tmp || domain != tmp) {
3507 domain_exit(domain);
3508 domain = tmp;
3509 }
3510
3511out:
3512
3513 if (!domain)
3514 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3515
3516
f76aec76
KA
3517 return domain;
3518}
3519
d4b709f4 3520static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
3521{
3522 struct device_domain_info *info;
3523
3524 /* No lock here, assumes no domain exit in normal case */
d4b709f4 3525 info = dev->archdata.iommu;
147202aa
DW
3526 if (likely(info))
3527 return info->domain;
3528
3529 return __get_valid_domain_for_dev(dev);
3530}
3531
ecb509ec 3532/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3533static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3534{
3535 int found;
3536
3d89194a 3537 if (iommu_dummy(dev))
1e4c64c4
DW
3538 return 1;
3539
2c2e2c38 3540 if (!iommu_identity_mapping)
1e4c64c4 3541 return 0;
2c2e2c38 3542
9b226624 3543 found = identity_mapping(dev);
2c2e2c38 3544 if (found) {
ecb509ec 3545 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3546 return 1;
3547 else {
3548 /*
3549 * 32 bit DMA is removed from si_domain and fall back
3550 * to non-identity mapping.
3551 */
e6de0f8d 3552 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3553 pr_info("32bit %s uses non-identity mapping\n",
3554 dev_name(dev));
2c2e2c38
FY
3555 return 0;
3556 }
3557 } else {
3558 /*
3559 * In case of a detached 64 bit DMA device from vm, the device
3560 * is put into si_domain for identity mapping.
3561 */
ecb509ec 3562 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3563 int ret;
28ccce0d 3564 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3565 if (!ret) {
9f10e5bf
JR
3566 pr_info("64bit %s uses identity mapping\n",
3567 dev_name(dev));
2c2e2c38
FY
3568 return 1;
3569 }
3570 }
3571 }
3572
1e4c64c4 3573 return 0;
2c2e2c38
FY
3574}
3575
5040a918 3576static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3577 size_t size, int dir, u64 dma_mask)
f76aec76 3578{
f76aec76 3579 struct dmar_domain *domain;
5b6985ce 3580 phys_addr_t start_paddr;
2aac6304 3581 unsigned long iova_pfn;
f76aec76 3582 int prot = 0;
6865f0d1 3583 int ret;
8c11e798 3584 struct intel_iommu *iommu;
33041ec0 3585 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3586
3587 BUG_ON(dir == DMA_NONE);
2c2e2c38 3588
5040a918 3589 if (iommu_no_mapping(dev))
6865f0d1 3590 return paddr;
f76aec76 3591
5040a918 3592 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3593 if (!domain)
3594 return 0;
3595
8c11e798 3596 iommu = domain_get_iommu(domain);
88cb6a74 3597 size = aligned_nrpages(paddr, size);
f76aec76 3598
2aac6304
OP
3599 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3600 if (!iova_pfn)
f76aec76
KA
3601 goto error;
3602
ba395927
KA
3603 /*
3604 * Check if DMAR supports zero-length reads on write only
3605 * mappings..
3606 */
3607 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3608 !cap_zlr(iommu->cap))
ba395927
KA
3609 prot |= DMA_PTE_READ;
3610 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3611 prot |= DMA_PTE_WRITE;
3612 /*
6865f0d1 3613 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3614 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3615 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3616 * is not a big problem
3617 */
2aac6304 3618 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3619 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3620 if (ret)
3621 goto error;
3622
1f0ef2aa
DW
3623 /* it's a non-present to present mapping. Only flush if caching mode */
3624 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3625 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3626 mm_to_dma_pfn(iova_pfn),
a1ddcbe9 3627 size, 0, 1);
1f0ef2aa 3628 else
8c11e798 3629 iommu_flush_write_buffer(iommu);
f76aec76 3630
2aac6304 3631 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3632 start_paddr += paddr & ~PAGE_MASK;
3633 return start_paddr;
ba395927 3634
ba395927 3635error:
2aac6304 3636 if (iova_pfn)
22e2f9fa 3637 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
9f10e5bf 3638 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3639 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3640 return 0;
3641}
3642
ffbbef5c
FT
3643static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3644 unsigned long offset, size_t size,
3645 enum dma_data_direction dir,
00085f1e 3646 unsigned long attrs)
bb9e6d65 3647{
ffbbef5c 3648 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3649 dir, *dev->dma_mask);
bb9e6d65
FT
3650}
3651
aa473240 3652static void flush_unmaps(struct deferred_flush_data *flush_data)
5e0d2a6f 3653{
80b20dd8 3654 int i, j;
5e0d2a6f 3655
aa473240 3656 flush_data->timer_on = 0;
5e0d2a6f 3657
3658 /* just flush them all */
3659 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459 3660 struct intel_iommu *iommu = g_iommus[i];
aa473240
OP
3661 struct deferred_flush_table *flush_table =
3662 &flush_data->tables[i];
a2bb8459
WH
3663 if (!iommu)
3664 continue;
c42d9f32 3665
aa473240 3666 if (!flush_table->next)
9dd2fe89
YZ
3667 continue;
3668
78d5f0f5
NA
3669 /* In caching mode, global flushes turn emulation expensive */
3670 if (!cap_caching_mode(iommu->cap))
3671 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3672 DMA_TLB_GLOBAL_FLUSH);
aa473240 3673 for (j = 0; j < flush_table->next; j++) {
93a23a72 3674 unsigned long mask;
314f1dc1 3675 struct deferred_flush_entry *entry =
aa473240 3676 &flush_table->entries[j];
2aac6304 3677 unsigned long iova_pfn = entry->iova_pfn;
769530e4 3678 unsigned long nrpages = entry->nrpages;
314f1dc1
OP
3679 struct dmar_domain *domain = entry->domain;
3680 struct page *freelist = entry->freelist;
78d5f0f5
NA
3681
3682 /* On real hardware multiple invalidations are expensive */
3683 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3684 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3685 mm_to_dma_pfn(iova_pfn),
769530e4 3686 nrpages, !freelist, 0);
78d5f0f5 3687 else {
769530e4 3688 mask = ilog2(nrpages);
314f1dc1 3689 iommu_flush_dev_iotlb(domain,
2aac6304 3690 (uint64_t)iova_pfn << PAGE_SHIFT, mask);
78d5f0f5 3691 }
22e2f9fa 3692 free_iova_fast(&domain->iovad, iova_pfn, nrpages);
314f1dc1
OP
3693 if (freelist)
3694 dma_free_pagelist(freelist);
80b20dd8 3695 }
aa473240 3696 flush_table->next = 0;
5e0d2a6f 3697 }
3698
aa473240 3699 flush_data->size = 0;
5e0d2a6f 3700}
3701
aa473240 3702static void flush_unmaps_timeout(unsigned long cpuid)
5e0d2a6f 3703{
aa473240 3704 struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
80b20dd8 3705 unsigned long flags;
3706
aa473240
OP
3707 spin_lock_irqsave(&flush_data->lock, flags);
3708 flush_unmaps(flush_data);
3709 spin_unlock_irqrestore(&flush_data->lock, flags);
5e0d2a6f 3710}
3711
2aac6304 3712static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
769530e4 3713 unsigned long nrpages, struct page *freelist)
5e0d2a6f 3714{
3715 unsigned long flags;
314f1dc1 3716 int entry_id, iommu_id;
8c11e798 3717 struct intel_iommu *iommu;
314f1dc1 3718 struct deferred_flush_entry *entry;
aa473240
OP
3719 struct deferred_flush_data *flush_data;
3720 unsigned int cpuid;
5e0d2a6f 3721
aa473240
OP
3722 cpuid = get_cpu();
3723 flush_data = per_cpu_ptr(&deferred_flush, cpuid);
3724
3725 /* Flush all CPUs' entries to avoid deferring too much. If
3726 * this becomes a bottleneck, can just flush us, and rely on
3727 * flush timer for the rest.
3728 */
3729 if (flush_data->size == HIGH_WATER_MARK) {
3730 int cpu;
3731
3732 for_each_online_cpu(cpu)
3733 flush_unmaps_timeout(cpu);
3734 }
3735
3736 spin_lock_irqsave(&flush_data->lock, flags);
80b20dd8 3737
8c11e798
WH
3738 iommu = domain_get_iommu(dom);
3739 iommu_id = iommu->seq_id;
c42d9f32 3740
aa473240
OP
3741 entry_id = flush_data->tables[iommu_id].next;
3742 ++(flush_data->tables[iommu_id].next);
5e0d2a6f 3743
aa473240 3744 entry = &flush_data->tables[iommu_id].entries[entry_id];
314f1dc1 3745 entry->domain = dom;
2aac6304 3746 entry->iova_pfn = iova_pfn;
769530e4 3747 entry->nrpages = nrpages;
314f1dc1 3748 entry->freelist = freelist;
5e0d2a6f 3749
aa473240
OP
3750 if (!flush_data->timer_on) {
3751 mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
3752 flush_data->timer_on = 1;
5e0d2a6f 3753 }
aa473240
OP
3754 flush_data->size++;
3755 spin_unlock_irqrestore(&flush_data->lock, flags);
3756
3757 put_cpu();
5e0d2a6f 3758}
3759
769530e4 3760static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3761{
f76aec76 3762 struct dmar_domain *domain;
d794dc9b 3763 unsigned long start_pfn, last_pfn;
769530e4 3764 unsigned long nrpages;
2aac6304 3765 unsigned long iova_pfn;
8c11e798 3766 struct intel_iommu *iommu;
ea8ea460 3767 struct page *freelist;
ba395927 3768
73676832 3769 if (iommu_no_mapping(dev))
f76aec76 3770 return;
2c2e2c38 3771
1525a29a 3772 domain = find_domain(dev);
ba395927
KA
3773 BUG_ON(!domain);
3774
8c11e798
WH
3775 iommu = domain_get_iommu(domain);
3776
2aac6304 3777 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3778
769530e4 3779 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3780 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3781 last_pfn = start_pfn + nrpages - 1;
ba395927 3782
d794dc9b 3783 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3784 dev_name(dev), start_pfn, last_pfn);
ba395927 3785
ea8ea460 3786 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3787
5e0d2a6f 3788 if (intel_iommu_strict) {
a1ddcbe9 3789 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3790 nrpages, !freelist, 0);
5e0d2a6f 3791 /* free iova */
22e2f9fa 3792 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3793 dma_free_pagelist(freelist);
5e0d2a6f 3794 } else {
2aac6304 3795 add_unmap(domain, iova_pfn, nrpages, freelist);
5e0d2a6f 3796 /*
3797 * queue up the release of the unmap to save the 1/6th of the
3798 * cpu used up by the iotlb flush operation...
3799 */
5e0d2a6f 3800 }
ba395927
KA
3801}
3802
d41a4adb
JL
3803static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3804 size_t size, enum dma_data_direction dir,
00085f1e 3805 unsigned long attrs)
d41a4adb 3806{
769530e4 3807 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3808}
3809
5040a918 3810static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3811 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3812 unsigned long attrs)
ba395927 3813{
36746436 3814 struct page *page = NULL;
ba395927
KA
3815 int order;
3816
5b6985ce 3817 size = PAGE_ALIGN(size);
ba395927 3818 order = get_order(size);
e8bb910d 3819
5040a918 3820 if (!iommu_no_mapping(dev))
e8bb910d 3821 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3822 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3823 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3824 flags |= GFP_DMA;
3825 else
3826 flags |= GFP_DMA32;
3827 }
ba395927 3828
d0164adc 3829 if (gfpflags_allow_blocking(flags)) {
36746436
AM
3830 unsigned int count = size >> PAGE_SHIFT;
3831
712c604d 3832 page = dma_alloc_from_contiguous(dev, count, order, flags);
36746436
AM
3833 if (page && iommu_no_mapping(dev) &&
3834 page_to_phys(page) + size > dev->coherent_dma_mask) {
3835 dma_release_from_contiguous(dev, page, count);
3836 page = NULL;
3837 }
3838 }
3839
3840 if (!page)
3841 page = alloc_pages(flags, order);
3842 if (!page)
ba395927 3843 return NULL;
36746436 3844 memset(page_address(page), 0, size);
ba395927 3845
36746436 3846 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3847 DMA_BIDIRECTIONAL,
5040a918 3848 dev->coherent_dma_mask);
ba395927 3849 if (*dma_handle)
36746436
AM
3850 return page_address(page);
3851 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3852 __free_pages(page, order);
3853
ba395927
KA
3854 return NULL;
3855}
3856
5040a918 3857static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3858 dma_addr_t dma_handle, unsigned long attrs)
ba395927
KA
3859{
3860 int order;
36746436 3861 struct page *page = virt_to_page(vaddr);
ba395927 3862
5b6985ce 3863 size = PAGE_ALIGN(size);
ba395927
KA
3864 order = get_order(size);
3865
769530e4 3866 intel_unmap(dev, dma_handle, size);
36746436
AM
3867 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3868 __free_pages(page, order);
ba395927
KA
3869}
3870
5040a918 3871static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3872 int nelems, enum dma_data_direction dir,
00085f1e 3873 unsigned long attrs)
ba395927 3874{
769530e4
OP
3875 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3876 unsigned long nrpages = 0;
3877 struct scatterlist *sg;
3878 int i;
3879
3880 for_each_sg(sglist, sg, nelems, i) {
3881 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3882 }
3883
3884 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3885}
3886
ba395927 3887static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3888 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3889{
3890 int i;
c03ab37c 3891 struct scatterlist *sg;
ba395927 3892
c03ab37c 3893 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3894 BUG_ON(!sg_page(sg));
3e6110fd 3895 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3896 sg->dma_length = sg->length;
ba395927
KA
3897 }
3898 return nelems;
3899}
3900
5040a918 3901static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3902 enum dma_data_direction dir, unsigned long attrs)
ba395927 3903{
ba395927 3904 int i;
ba395927 3905 struct dmar_domain *domain;
f76aec76
KA
3906 size_t size = 0;
3907 int prot = 0;
2aac6304 3908 unsigned long iova_pfn;
f76aec76 3909 int ret;
c03ab37c 3910 struct scatterlist *sg;
b536d24d 3911 unsigned long start_vpfn;
8c11e798 3912 struct intel_iommu *iommu;
ba395927
KA
3913
3914 BUG_ON(dir == DMA_NONE);
5040a918
DW
3915 if (iommu_no_mapping(dev))
3916 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3917
5040a918 3918 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3919 if (!domain)
3920 return 0;
3921
8c11e798
WH
3922 iommu = domain_get_iommu(domain);
3923
b536d24d 3924 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3925 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3926
2aac6304 3927 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3928 *dev->dma_mask);
2aac6304 3929 if (!iova_pfn) {
c03ab37c 3930 sglist->dma_length = 0;
f76aec76
KA
3931 return 0;
3932 }
3933
3934 /*
3935 * Check if DMAR supports zero-length reads on write only
3936 * mappings..
3937 */
3938 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3939 !cap_zlr(iommu->cap))
f76aec76
KA
3940 prot |= DMA_PTE_READ;
3941 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3942 prot |= DMA_PTE_WRITE;
3943
2aac6304 3944 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3945
f532959b 3946 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3947 if (unlikely(ret)) {
e1605495
DW
3948 dma_pte_free_pagetable(domain, start_vpfn,
3949 start_vpfn + size - 1);
22e2f9fa 3950 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3951 return 0;
ba395927
KA
3952 }
3953
1f0ef2aa
DW
3954 /* it's a non-present to present mapping. Only flush if caching mode */
3955 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3956 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3957 else
8c11e798 3958 iommu_flush_write_buffer(iommu);
1f0ef2aa 3959
ba395927
KA
3960 return nelems;
3961}
3962
dfb805e8
FT
3963static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3964{
3965 return !dma_addr;
3966}
3967
160c1d8e 3968struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3969 .alloc = intel_alloc_coherent,
3970 .free = intel_free_coherent,
ba395927
KA
3971 .map_sg = intel_map_sg,
3972 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3973 .map_page = intel_map_page,
3974 .unmap_page = intel_unmap_page,
dfb805e8 3975 .mapping_error = intel_mapping_error,
ba395927
KA
3976};
3977
3978static inline int iommu_domain_cache_init(void)
3979{
3980 int ret = 0;
3981
3982 iommu_domain_cache = kmem_cache_create("iommu_domain",
3983 sizeof(struct dmar_domain),
3984 0,
3985 SLAB_HWCACHE_ALIGN,
3986
3987 NULL);
3988 if (!iommu_domain_cache) {
9f10e5bf 3989 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3990 ret = -ENOMEM;
3991 }
3992
3993 return ret;
3994}
3995
3996static inline int iommu_devinfo_cache_init(void)
3997{
3998 int ret = 0;
3999
4000 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4001 sizeof(struct device_domain_info),
4002 0,
4003 SLAB_HWCACHE_ALIGN,
ba395927
KA
4004 NULL);
4005 if (!iommu_devinfo_cache) {
9f10e5bf 4006 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
4007 ret = -ENOMEM;
4008 }
4009
4010 return ret;
4011}
4012
ba395927
KA
4013static int __init iommu_init_mempool(void)
4014{
4015 int ret;
ae1ff3d6 4016 ret = iova_cache_get();
ba395927
KA
4017 if (ret)
4018 return ret;
4019
4020 ret = iommu_domain_cache_init();
4021 if (ret)
4022 goto domain_error;
4023
4024 ret = iommu_devinfo_cache_init();
4025 if (!ret)
4026 return ret;
4027
4028 kmem_cache_destroy(iommu_domain_cache);
4029domain_error:
ae1ff3d6 4030 iova_cache_put();
ba395927
KA
4031
4032 return -ENOMEM;
4033}
4034
4035static void __init iommu_exit_mempool(void)
4036{
4037 kmem_cache_destroy(iommu_devinfo_cache);
4038 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 4039 iova_cache_put();
ba395927
KA
4040}
4041
556ab45f
DW
4042static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4043{
4044 struct dmar_drhd_unit *drhd;
4045 u32 vtbar;
4046 int rc;
4047
4048 /* We know that this device on this chipset has its own IOMMU.
4049 * If we find it under a different IOMMU, then the BIOS is lying
4050 * to us. Hope that the IOMMU for this device is actually
4051 * disabled, and it needs no translation...
4052 */
4053 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4054 if (rc) {
4055 /* "can't" happen */
4056 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4057 return;
4058 }
4059 vtbar &= 0xffff0000;
4060
4061 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4062 drhd = dmar_find_matched_drhd_unit(pdev);
4063 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4064 TAINT_FIRMWARE_WORKAROUND,
4065 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4066 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4067}
4068DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4069
ba395927
KA
4070static void __init init_no_remapping_devices(void)
4071{
4072 struct dmar_drhd_unit *drhd;
832bd858 4073 struct device *dev;
b683b230 4074 int i;
ba395927
KA
4075
4076 for_each_drhd_unit(drhd) {
4077 if (!drhd->include_all) {
b683b230
JL
4078 for_each_active_dev_scope(drhd->devices,
4079 drhd->devices_cnt, i, dev)
4080 break;
832bd858 4081 /* ignore DMAR unit if no devices exist */
ba395927
KA
4082 if (i == drhd->devices_cnt)
4083 drhd->ignored = 1;
4084 }
4085 }
4086
7c919779 4087 for_each_active_drhd_unit(drhd) {
7c919779 4088 if (drhd->include_all)
ba395927
KA
4089 continue;
4090
b683b230
JL
4091 for_each_active_dev_scope(drhd->devices,
4092 drhd->devices_cnt, i, dev)
832bd858 4093 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4094 break;
ba395927
KA
4095 if (i < drhd->devices_cnt)
4096 continue;
4097
c0771df8
DW
4098 /* This IOMMU has *only* gfx devices. Either bypass it or
4099 set the gfx_mapped flag, as appropriate */
4100 if (dmar_map_gfx) {
4101 intel_iommu_gfx_mapped = 1;
4102 } else {
4103 drhd->ignored = 1;
b683b230
JL
4104 for_each_active_dev_scope(drhd->devices,
4105 drhd->devices_cnt, i, dev)
832bd858 4106 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4107 }
4108 }
4109}
4110
f59c7b69
FY
4111#ifdef CONFIG_SUSPEND
4112static int init_iommu_hw(void)
4113{
4114 struct dmar_drhd_unit *drhd;
4115 struct intel_iommu *iommu = NULL;
4116
4117 for_each_active_iommu(iommu, drhd)
4118 if (iommu->qi)
4119 dmar_reenable_qi(iommu);
4120
b779260b
JC
4121 for_each_iommu(iommu, drhd) {
4122 if (drhd->ignored) {
4123 /*
4124 * we always have to disable PMRs or DMA may fail on
4125 * this device
4126 */
4127 if (force_on)
4128 iommu_disable_protect_mem_regions(iommu);
4129 continue;
4130 }
4131
f59c7b69
FY
4132 iommu_flush_write_buffer(iommu);
4133
4134 iommu_set_root_entry(iommu);
4135
4136 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4137 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4138 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4139 iommu_enable_translation(iommu);
b94996c9 4140 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4141 }
4142
4143 return 0;
4144}
4145
4146static void iommu_flush_all(void)
4147{
4148 struct dmar_drhd_unit *drhd;
4149 struct intel_iommu *iommu;
4150
4151 for_each_active_iommu(iommu, drhd) {
4152 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4153 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4154 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4155 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4156 }
4157}
4158
134fac3f 4159static int iommu_suspend(void)
f59c7b69
FY
4160{
4161 struct dmar_drhd_unit *drhd;
4162 struct intel_iommu *iommu = NULL;
4163 unsigned long flag;
4164
4165 for_each_active_iommu(iommu, drhd) {
4166 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4167 GFP_ATOMIC);
4168 if (!iommu->iommu_state)
4169 goto nomem;
4170 }
4171
4172 iommu_flush_all();
4173
4174 for_each_active_iommu(iommu, drhd) {
4175 iommu_disable_translation(iommu);
4176
1f5b3c3f 4177 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4178
4179 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4180 readl(iommu->reg + DMAR_FECTL_REG);
4181 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4182 readl(iommu->reg + DMAR_FEDATA_REG);
4183 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4184 readl(iommu->reg + DMAR_FEADDR_REG);
4185 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4186 readl(iommu->reg + DMAR_FEUADDR_REG);
4187
1f5b3c3f 4188 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4189 }
4190 return 0;
4191
4192nomem:
4193 for_each_active_iommu(iommu, drhd)
4194 kfree(iommu->iommu_state);
4195
4196 return -ENOMEM;
4197}
4198
134fac3f 4199static void iommu_resume(void)
f59c7b69
FY
4200{
4201 struct dmar_drhd_unit *drhd;
4202 struct intel_iommu *iommu = NULL;
4203 unsigned long flag;
4204
4205 if (init_iommu_hw()) {
b779260b
JC
4206 if (force_on)
4207 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4208 else
4209 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4210 return;
f59c7b69
FY
4211 }
4212
4213 for_each_active_iommu(iommu, drhd) {
4214
1f5b3c3f 4215 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4216
4217 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4218 iommu->reg + DMAR_FECTL_REG);
4219 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4220 iommu->reg + DMAR_FEDATA_REG);
4221 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4222 iommu->reg + DMAR_FEADDR_REG);
4223 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4224 iommu->reg + DMAR_FEUADDR_REG);
4225
1f5b3c3f 4226 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4227 }
4228
4229 for_each_active_iommu(iommu, drhd)
4230 kfree(iommu->iommu_state);
f59c7b69
FY
4231}
4232
134fac3f 4233static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4234 .resume = iommu_resume,
4235 .suspend = iommu_suspend,
4236};
4237
134fac3f 4238static void __init init_iommu_pm_ops(void)
f59c7b69 4239{
134fac3f 4240 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4241}
4242
4243#else
99592ba4 4244static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4245#endif /* CONFIG_PM */
4246
318fe7df 4247
c2a0b538 4248int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4249{
4250 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4251 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4252 struct dmar_rmrr_unit *rmrru;
0659b8dc 4253 size_t length;
318fe7df
SS
4254
4255 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4256 if (!rmrru)
0659b8dc 4257 goto out;
318fe7df
SS
4258
4259 rmrru->hdr = header;
4260 rmrr = (struct acpi_dmar_reserved_memory *)header;
4261 rmrru->base_address = rmrr->base_address;
4262 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4263
4264 length = rmrr->end_address - rmrr->base_address + 1;
4265 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4266 IOMMU_RESV_DIRECT);
4267 if (!rmrru->resv)
4268 goto free_rmrru;
4269
2e455289
JL
4270 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4271 ((void *)rmrr) + rmrr->header.length,
4272 &rmrru->devices_cnt);
0659b8dc
EA
4273 if (rmrru->devices_cnt && rmrru->devices == NULL)
4274 goto free_all;
318fe7df 4275
2e455289 4276 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4277
2e455289 4278 return 0;
0659b8dc
EA
4279free_all:
4280 kfree(rmrru->resv);
4281free_rmrru:
4282 kfree(rmrru);
4283out:
4284 return -ENOMEM;
318fe7df
SS
4285}
4286
6b197249
JL
4287static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4288{
4289 struct dmar_atsr_unit *atsru;
4290 struct acpi_dmar_atsr *tmp;
4291
4292 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4293 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4294 if (atsr->segment != tmp->segment)
4295 continue;
4296 if (atsr->header.length != tmp->header.length)
4297 continue;
4298 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4299 return atsru;
4300 }
4301
4302 return NULL;
4303}
4304
4305int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4306{
4307 struct acpi_dmar_atsr *atsr;
4308 struct dmar_atsr_unit *atsru;
4309
6b197249
JL
4310 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4311 return 0;
4312
318fe7df 4313 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4314 atsru = dmar_find_atsr(atsr);
4315 if (atsru)
4316 return 0;
4317
4318 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4319 if (!atsru)
4320 return -ENOMEM;
4321
6b197249
JL
4322 /*
4323 * If memory is allocated from slab by ACPI _DSM method, we need to
4324 * copy the memory content because the memory buffer will be freed
4325 * on return.
4326 */
4327 atsru->hdr = (void *)(atsru + 1);
4328 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4329 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4330 if (!atsru->include_all) {
4331 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4332 (void *)atsr + atsr->header.length,
4333 &atsru->devices_cnt);
4334 if (atsru->devices_cnt && atsru->devices == NULL) {
4335 kfree(atsru);
4336 return -ENOMEM;
4337 }
4338 }
318fe7df 4339
0e242612 4340 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4341
4342 return 0;
4343}
4344
9bdc531e
JL
4345static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4346{
4347 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4348 kfree(atsru);
4349}
4350
6b197249
JL
4351int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4352{
4353 struct acpi_dmar_atsr *atsr;
4354 struct dmar_atsr_unit *atsru;
4355
4356 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4357 atsru = dmar_find_atsr(atsr);
4358 if (atsru) {
4359 list_del_rcu(&atsru->list);
4360 synchronize_rcu();
4361 intel_iommu_free_atsr(atsru);
4362 }
4363
4364 return 0;
4365}
4366
4367int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4368{
4369 int i;
4370 struct device *dev;
4371 struct acpi_dmar_atsr *atsr;
4372 struct dmar_atsr_unit *atsru;
4373
4374 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4375 atsru = dmar_find_atsr(atsr);
4376 if (!atsru)
4377 return 0;
4378
194dc870 4379 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4380 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4381 i, dev)
4382 return -EBUSY;
194dc870 4383 }
6b197249
JL
4384
4385 return 0;
4386}
4387
ffebeb46
JL
4388static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4389{
4390 int sp, ret = 0;
4391 struct intel_iommu *iommu = dmaru->iommu;
4392
4393 if (g_iommus[iommu->seq_id])
4394 return 0;
4395
4396 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4397 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4398 iommu->name);
4399 return -ENXIO;
4400 }
4401 if (!ecap_sc_support(iommu->ecap) &&
4402 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4403 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4404 iommu->name);
4405 return -ENXIO;
4406 }
4407 sp = domain_update_iommu_superpage(iommu) - 1;
4408 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4409 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4410 iommu->name);
4411 return -ENXIO;
4412 }
4413
4414 /*
4415 * Disable translation if already enabled prior to OS handover.
4416 */
4417 if (iommu->gcmd & DMA_GCMD_TE)
4418 iommu_disable_translation(iommu);
4419
4420 g_iommus[iommu->seq_id] = iommu;
4421 ret = iommu_init_domains(iommu);
4422 if (ret == 0)
4423 ret = iommu_alloc_root_entry(iommu);
4424 if (ret)
4425 goto out;
4426
8a94ade4
DW
4427#ifdef CONFIG_INTEL_IOMMU_SVM
4428 if (pasid_enabled(iommu))
4429 intel_svm_alloc_pasid_tables(iommu);
4430#endif
4431
ffebeb46
JL
4432 if (dmaru->ignored) {
4433 /*
4434 * we always have to disable PMRs or DMA may fail on this device
4435 */
4436 if (force_on)
4437 iommu_disable_protect_mem_regions(iommu);
4438 return 0;
4439 }
4440
4441 intel_iommu_init_qi(iommu);
4442 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4443
4444#ifdef CONFIG_INTEL_IOMMU_SVM
4445 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4446 ret = intel_svm_enable_prq(iommu);
4447 if (ret)
4448 goto disable_iommu;
4449 }
4450#endif
ffebeb46
JL
4451 ret = dmar_set_interrupt(iommu);
4452 if (ret)
4453 goto disable_iommu;
4454
4455 iommu_set_root_entry(iommu);
4456 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4457 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4458 iommu_enable_translation(iommu);
4459
ffebeb46
JL
4460 iommu_disable_protect_mem_regions(iommu);
4461 return 0;
4462
4463disable_iommu:
4464 disable_dmar_iommu(iommu);
4465out:
4466 free_dmar_iommu(iommu);
4467 return ret;
4468}
4469
6b197249
JL
4470int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4471{
ffebeb46
JL
4472 int ret = 0;
4473 struct intel_iommu *iommu = dmaru->iommu;
4474
4475 if (!intel_iommu_enabled)
4476 return 0;
4477 if (iommu == NULL)
4478 return -EINVAL;
4479
4480 if (insert) {
4481 ret = intel_iommu_add(dmaru);
4482 } else {
4483 disable_dmar_iommu(iommu);
4484 free_dmar_iommu(iommu);
4485 }
4486
4487 return ret;
6b197249
JL
4488}
4489
9bdc531e
JL
4490static void intel_iommu_free_dmars(void)
4491{
4492 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4493 struct dmar_atsr_unit *atsru, *atsr_n;
4494
4495 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4496 list_del(&rmrru->list);
4497 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4498 kfree(rmrru->resv);
9bdc531e 4499 kfree(rmrru);
318fe7df
SS
4500 }
4501
9bdc531e
JL
4502 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4503 list_del(&atsru->list);
4504 intel_iommu_free_atsr(atsru);
4505 }
318fe7df
SS
4506}
4507
4508int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4509{
b683b230 4510 int i, ret = 1;
318fe7df 4511 struct pci_bus *bus;
832bd858
DW
4512 struct pci_dev *bridge = NULL;
4513 struct device *tmp;
318fe7df
SS
4514 struct acpi_dmar_atsr *atsr;
4515 struct dmar_atsr_unit *atsru;
4516
4517 dev = pci_physfn(dev);
318fe7df 4518 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4519 bridge = bus->self;
d14053b3
DW
4520 /* If it's an integrated device, allow ATS */
4521 if (!bridge)
4522 return 1;
4523 /* Connected via non-PCIe: no ATS */
4524 if (!pci_is_pcie(bridge) ||
62f87c0e 4525 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4526 return 0;
d14053b3 4527 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4528 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4529 break;
318fe7df
SS
4530 }
4531
0e242612 4532 rcu_read_lock();
b5f82ddf
JL
4533 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4534 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4535 if (atsr->segment != pci_domain_nr(dev->bus))
4536 continue;
4537
b683b230 4538 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4539 if (tmp == &bridge->dev)
b683b230 4540 goto out;
b5f82ddf
JL
4541
4542 if (atsru->include_all)
b683b230 4543 goto out;
b5f82ddf 4544 }
b683b230
JL
4545 ret = 0;
4546out:
0e242612 4547 rcu_read_unlock();
318fe7df 4548
b683b230 4549 return ret;
318fe7df
SS
4550}
4551
59ce0515
JL
4552int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4553{
4554 int ret = 0;
4555 struct dmar_rmrr_unit *rmrru;
4556 struct dmar_atsr_unit *atsru;
4557 struct acpi_dmar_atsr *atsr;
4558 struct acpi_dmar_reserved_memory *rmrr;
4559
4560 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4561 return 0;
4562
4563 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4564 rmrr = container_of(rmrru->hdr,
4565 struct acpi_dmar_reserved_memory, header);
4566 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4567 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4568 ((void *)rmrr) + rmrr->header.length,
4569 rmrr->segment, rmrru->devices,
4570 rmrru->devices_cnt);
27e24950 4571 if(ret < 0)
59ce0515 4572 return ret;
e6a8c9b3 4573 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4574 dmar_remove_dev_scope(info, rmrr->segment,
4575 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4576 }
4577 }
4578
4579 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4580 if (atsru->include_all)
4581 continue;
4582
4583 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4584 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4585 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4586 (void *)atsr + atsr->header.length,
4587 atsr->segment, atsru->devices,
4588 atsru->devices_cnt);
4589 if (ret > 0)
4590 break;
4591 else if(ret < 0)
4592 return ret;
e6a8c9b3 4593 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4594 if (dmar_remove_dev_scope(info, atsr->segment,
4595 atsru->devices, atsru->devices_cnt))
4596 break;
4597 }
4598 }
4599
4600 return 0;
4601}
4602
99dcaded
FY
4603/*
4604 * Here we only respond to action of unbound device from driver.
4605 *
4606 * Added device is not attached to its DMAR domain here yet. That will happen
4607 * when mapping the device to iova.
4608 */
4609static int device_notifier(struct notifier_block *nb,
4610 unsigned long action, void *data)
4611{
4612 struct device *dev = data;
99dcaded
FY
4613 struct dmar_domain *domain;
4614
3d89194a 4615 if (iommu_dummy(dev))
44cd613c
DW
4616 return 0;
4617
1196c2fb 4618 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4619 return 0;
4620
1525a29a 4621 domain = find_domain(dev);
99dcaded
FY
4622 if (!domain)
4623 return 0;
4624
e6de0f8d 4625 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4626 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4627 domain_exit(domain);
a97590e5 4628
99dcaded
FY
4629 return 0;
4630}
4631
4632static struct notifier_block device_nb = {
4633 .notifier_call = device_notifier,
4634};
4635
75f05569
JL
4636static int intel_iommu_memory_notifier(struct notifier_block *nb,
4637 unsigned long val, void *v)
4638{
4639 struct memory_notify *mhp = v;
4640 unsigned long long start, end;
4641 unsigned long start_vpfn, last_vpfn;
4642
4643 switch (val) {
4644 case MEM_GOING_ONLINE:
4645 start = mhp->start_pfn << PAGE_SHIFT;
4646 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4647 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4648 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4649 start, end);
4650 return NOTIFY_BAD;
4651 }
4652 break;
4653
4654 case MEM_OFFLINE:
4655 case MEM_CANCEL_ONLINE:
4656 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4657 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4658 while (start_vpfn <= last_vpfn) {
4659 struct iova *iova;
4660 struct dmar_drhd_unit *drhd;
4661 struct intel_iommu *iommu;
ea8ea460 4662 struct page *freelist;
75f05569
JL
4663
4664 iova = find_iova(&si_domain->iovad, start_vpfn);
4665 if (iova == NULL) {
9f10e5bf 4666 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4667 start_vpfn);
4668 break;
4669 }
4670
4671 iova = split_and_remove_iova(&si_domain->iovad, iova,
4672 start_vpfn, last_vpfn);
4673 if (iova == NULL) {
9f10e5bf 4674 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4675 start_vpfn, last_vpfn);
4676 return NOTIFY_BAD;
4677 }
4678
ea8ea460
DW
4679 freelist = domain_unmap(si_domain, iova->pfn_lo,
4680 iova->pfn_hi);
4681
75f05569
JL
4682 rcu_read_lock();
4683 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4684 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4685 iova->pfn_lo, iova_size(iova),
ea8ea460 4686 !freelist, 0);
75f05569 4687 rcu_read_unlock();
ea8ea460 4688 dma_free_pagelist(freelist);
75f05569
JL
4689
4690 start_vpfn = iova->pfn_hi + 1;
4691 free_iova_mem(iova);
4692 }
4693 break;
4694 }
4695
4696 return NOTIFY_OK;
4697}
4698
4699static struct notifier_block intel_iommu_memory_nb = {
4700 .notifier_call = intel_iommu_memory_notifier,
4701 .priority = 0
4702};
4703
22e2f9fa
OP
4704static void free_all_cpu_cached_iovas(unsigned int cpu)
4705{
4706 int i;
4707
4708 for (i = 0; i < g_num_of_iommus; i++) {
4709 struct intel_iommu *iommu = g_iommus[i];
4710 struct dmar_domain *domain;
0caa7616 4711 int did;
22e2f9fa
OP
4712
4713 if (!iommu)
4714 continue;
4715
3bd4f911 4716 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4717 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4718
4719 if (!domain)
4720 continue;
4721 free_cpu_cached_iovas(cpu, &domain->iovad);
4722 }
4723 }
4724}
4725
21647615 4726static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4727{
21647615
AMG
4728 free_all_cpu_cached_iovas(cpu);
4729 flush_unmaps_timeout(cpu);
4730 return 0;
aa473240
OP
4731}
4732
a7fdb6e6
JR
4733static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4734{
4735 return container_of(dev, struct intel_iommu, iommu.dev);
4736}
4737
a5459cfe
AW
4738static ssize_t intel_iommu_show_version(struct device *dev,
4739 struct device_attribute *attr,
4740 char *buf)
4741{
a7fdb6e6 4742 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4743 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4744 return sprintf(buf, "%d:%d\n",
4745 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4746}
4747static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4748
4749static ssize_t intel_iommu_show_address(struct device *dev,
4750 struct device_attribute *attr,
4751 char *buf)
4752{
a7fdb6e6 4753 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4754 return sprintf(buf, "%llx\n", iommu->reg_phys);
4755}
4756static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4757
4758static ssize_t intel_iommu_show_cap(struct device *dev,
4759 struct device_attribute *attr,
4760 char *buf)
4761{
a7fdb6e6 4762 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4763 return sprintf(buf, "%llx\n", iommu->cap);
4764}
4765static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4766
4767static ssize_t intel_iommu_show_ecap(struct device *dev,
4768 struct device_attribute *attr,
4769 char *buf)
4770{
a7fdb6e6 4771 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4772 return sprintf(buf, "%llx\n", iommu->ecap);
4773}
4774static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4775
2238c082
AW
4776static ssize_t intel_iommu_show_ndoms(struct device *dev,
4777 struct device_attribute *attr,
4778 char *buf)
4779{
a7fdb6e6 4780 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4781 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4782}
4783static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4784
4785static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4786 struct device_attribute *attr,
4787 char *buf)
4788{
a7fdb6e6 4789 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4790 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4791 cap_ndoms(iommu->cap)));
4792}
4793static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4794
a5459cfe
AW
4795static struct attribute *intel_iommu_attrs[] = {
4796 &dev_attr_version.attr,
4797 &dev_attr_address.attr,
4798 &dev_attr_cap.attr,
4799 &dev_attr_ecap.attr,
2238c082
AW
4800 &dev_attr_domains_supported.attr,
4801 &dev_attr_domains_used.attr,
a5459cfe
AW
4802 NULL,
4803};
4804
4805static struct attribute_group intel_iommu_group = {
4806 .name = "intel-iommu",
4807 .attrs = intel_iommu_attrs,
4808};
4809
4810const struct attribute_group *intel_iommu_groups[] = {
4811 &intel_iommu_group,
4812 NULL,
4813};
4814
ba395927
KA
4815int __init intel_iommu_init(void)
4816{
9bdc531e 4817 int ret = -ENODEV;
3a93c841 4818 struct dmar_drhd_unit *drhd;
7c919779 4819 struct intel_iommu *iommu;
ba395927 4820
a59b50e9
JC
4821 /* VT-d is required for a TXT/tboot launch, so enforce that */
4822 force_on = tboot_force_iommu();
4823
3a5670e8
JL
4824 if (iommu_init_mempool()) {
4825 if (force_on)
4826 panic("tboot: Failed to initialize iommu memory\n");
4827 return -ENOMEM;
4828 }
4829
4830 down_write(&dmar_global_lock);
a59b50e9
JC
4831 if (dmar_table_init()) {
4832 if (force_on)
4833 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4834 goto out_free_dmar;
a59b50e9 4835 }
ba395927 4836
c2c7286a 4837 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4838 if (force_on)
4839 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4840 goto out_free_dmar;
a59b50e9 4841 }
1886e8a9 4842
75f1cdf1 4843 if (no_iommu || dmar_disabled)
9bdc531e 4844 goto out_free_dmar;
2ae21010 4845
318fe7df 4846 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4847 pr_info("No RMRR found\n");
318fe7df
SS
4848
4849 if (list_empty(&dmar_atsr_units))
9f10e5bf 4850 pr_info("No ATSR found\n");
318fe7df 4851
51a63e67
JC
4852 if (dmar_init_reserved_ranges()) {
4853 if (force_on)
4854 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4855 goto out_free_reserved_range;
51a63e67 4856 }
ba395927
KA
4857
4858 init_no_remapping_devices();
4859
b779260b 4860 ret = init_dmars();
ba395927 4861 if (ret) {
a59b50e9
JC
4862 if (force_on)
4863 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4864 pr_err("Initialization failed\n");
9bdc531e 4865 goto out_free_reserved_range;
ba395927 4866 }
3a5670e8 4867 up_write(&dmar_global_lock);
9f10e5bf 4868 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4869
75f1cdf1
FT
4870#ifdef CONFIG_SWIOTLB
4871 swiotlb = 0;
4872#endif
19943b0e 4873 dma_ops = &intel_dma_ops;
4ed0d3e6 4874
134fac3f 4875 init_iommu_pm_ops();
a8bcbb0d 4876
39ab9555
JR
4877 for_each_active_iommu(iommu, drhd) {
4878 iommu_device_sysfs_add(&iommu->iommu, NULL,
4879 intel_iommu_groups,
4880 "%s", iommu->name);
4881 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4882 iommu_device_register(&iommu->iommu);
4883 }
a5459cfe 4884
4236d97d 4885 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4886 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4887 if (si_domain && !hw_pass_through)
4888 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4889 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4890 intel_iommu_cpu_dead);
8bc1f85c
ED
4891 intel_iommu_enabled = 1;
4892
ba395927 4893 return 0;
9bdc531e
JL
4894
4895out_free_reserved_range:
4896 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4897out_free_dmar:
4898 intel_iommu_free_dmars();
3a5670e8
JL
4899 up_write(&dmar_global_lock);
4900 iommu_exit_mempool();
9bdc531e 4901 return ret;
ba395927 4902}
e820482c 4903
2452d9db 4904static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4905{
4906 struct intel_iommu *iommu = opaque;
4907
2452d9db 4908 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4909 return 0;
4910}
4911
4912/*
4913 * NB - intel-iommu lacks any sort of reference counting for the users of
4914 * dependent devices. If multiple endpoints have intersecting dependent
4915 * devices, unbinding the driver from any one of them will possibly leave
4916 * the others unable to operate.
4917 */
2452d9db 4918static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4919{
0bcb3e28 4920 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4921 return;
4922
2452d9db 4923 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4924}
4925
127c7615 4926static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4927{
c7151a8d
WH
4928 struct intel_iommu *iommu;
4929 unsigned long flags;
c7151a8d 4930
55d94043
JR
4931 assert_spin_locked(&device_domain_lock);
4932
127c7615 4933 if (WARN_ON(!info))
c7151a8d
WH
4934 return;
4935
127c7615 4936 iommu = info->iommu;
c7151a8d 4937
127c7615
JR
4938 if (info->dev) {
4939 iommu_disable_dev_iotlb(info);
4940 domain_context_clear(iommu, info->dev);
4941 }
c7151a8d 4942
b608ac3b 4943 unlink_domain_info(info);
c7151a8d 4944
d160aca5 4945 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4946 domain_detach_iommu(info->domain, iommu);
d160aca5 4947 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4948
127c7615 4949 free_devinfo_mem(info);
c7151a8d 4950}
c7151a8d 4951
55d94043
JR
4952static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4953 struct device *dev)
4954{
127c7615 4955 struct device_domain_info *info;
55d94043 4956 unsigned long flags;
3e7abe25 4957
55d94043 4958 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4959 info = dev->archdata.iommu;
4960 __dmar_remove_one_dev_info(info);
55d94043 4961 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4962}
4963
2c2e2c38 4964static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4965{
4966 int adjust_width;
4967
0fb5fe87
RM
4968 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4969 DMA_32BIT_PFN);
5e98c4b1
WH
4970 domain_reserve_special_ranges(domain);
4971
4972 /* calculate AGAW */
4973 domain->gaw = guest_width;
4974 adjust_width = guestwidth_to_adjustwidth(guest_width);
4975 domain->agaw = width_to_agaw(adjust_width);
4976
5e98c4b1 4977 domain->iommu_coherency = 0;
c5b15255 4978 domain->iommu_snooping = 0;
6dd9a7c7 4979 domain->iommu_superpage = 0;
fe40f1e0 4980 domain->max_addr = 0;
5e98c4b1
WH
4981
4982 /* always allocate the top pgd */
4c923d47 4983 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4984 if (!domain->pgd)
4985 return -ENOMEM;
4986 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4987 return 0;
4988}
4989
00a77deb 4990static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4991{
5d450806 4992 struct dmar_domain *dmar_domain;
00a77deb
JR
4993 struct iommu_domain *domain;
4994
4995 if (type != IOMMU_DOMAIN_UNMANAGED)
4996 return NULL;
38717946 4997
ab8dfe25 4998 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4999 if (!dmar_domain) {
9f10e5bf 5000 pr_err("Can't allocate dmar_domain\n");
00a77deb 5001 return NULL;
38717946 5002 }
2c2e2c38 5003 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 5004 pr_err("Domain initialization failed\n");
92d03cc8 5005 domain_exit(dmar_domain);
00a77deb 5006 return NULL;
38717946 5007 }
8140a95d 5008 domain_update_iommu_cap(dmar_domain);
faa3d6f5 5009
00a77deb 5010 domain = &dmar_domain->domain;
8a0e715b
JR
5011 domain->geometry.aperture_start = 0;
5012 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5013 domain->geometry.force_aperture = true;
5014
00a77deb 5015 return domain;
38717946 5016}
38717946 5017
00a77deb 5018static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5019{
00a77deb 5020 domain_exit(to_dmar_domain(domain));
38717946 5021}
38717946 5022
4c5478c9
JR
5023static int intel_iommu_attach_device(struct iommu_domain *domain,
5024 struct device *dev)
38717946 5025{
00a77deb 5026 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5027 struct intel_iommu *iommu;
5028 int addr_width;
156baca8 5029 u8 bus, devfn;
faa3d6f5 5030
c875d2c1
AW
5031 if (device_is_rmrr_locked(dev)) {
5032 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5033 return -EPERM;
5034 }
5035
7207d8f9
DW
5036 /* normally dev is not mapped */
5037 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
5038 struct dmar_domain *old_domain;
5039
1525a29a 5040 old_domain = find_domain(dev);
faa3d6f5 5041 if (old_domain) {
d160aca5 5042 rcu_read_lock();
de7e8886 5043 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 5044 rcu_read_unlock();
62c22167
JR
5045
5046 if (!domain_type_is_vm_or_si(old_domain) &&
5047 list_empty(&old_domain->devices))
5048 domain_exit(old_domain);
faa3d6f5
WH
5049 }
5050 }
5051
156baca8 5052 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5053 if (!iommu)
5054 return -ENODEV;
5055
5056 /* check if this iommu agaw is sufficient for max mapped address */
5057 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5058 if (addr_width > cap_mgaw(iommu->cap))
5059 addr_width = cap_mgaw(iommu->cap);
5060
5061 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 5062 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5063 "sufficient for the mapped address (%llx)\n",
a99c47a2 5064 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5065 return -EFAULT;
5066 }
a99c47a2
TL
5067 dmar_domain->gaw = addr_width;
5068
5069 /*
5070 * Knock out extra levels of page tables if necessary
5071 */
5072 while (iommu->agaw < dmar_domain->agaw) {
5073 struct dma_pte *pte;
5074
5075 pte = dmar_domain->pgd;
5076 if (dma_pte_present(pte)) {
25cbff16
SY
5077 dmar_domain->pgd = (struct dma_pte *)
5078 phys_to_virt(dma_pte_addr(pte));
7a661013 5079 free_pgtable_page(pte);
a99c47a2
TL
5080 }
5081 dmar_domain->agaw--;
5082 }
fe40f1e0 5083
28ccce0d 5084 return domain_add_dev_info(dmar_domain, dev);
38717946 5085}
38717946 5086
4c5478c9
JR
5087static void intel_iommu_detach_device(struct iommu_domain *domain,
5088 struct device *dev)
38717946 5089{
e6de0f8d 5090 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 5091}
c7151a8d 5092
b146a1c9
JR
5093static int intel_iommu_map(struct iommu_domain *domain,
5094 unsigned long iova, phys_addr_t hpa,
5009065d 5095 size_t size, int iommu_prot)
faa3d6f5 5096{
00a77deb 5097 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5098 u64 max_addr;
dde57a21 5099 int prot = 0;
faa3d6f5 5100 int ret;
fe40f1e0 5101
dde57a21
JR
5102 if (iommu_prot & IOMMU_READ)
5103 prot |= DMA_PTE_READ;
5104 if (iommu_prot & IOMMU_WRITE)
5105 prot |= DMA_PTE_WRITE;
9cf06697
SY
5106 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5107 prot |= DMA_PTE_SNP;
dde57a21 5108
163cc52c 5109 max_addr = iova + size;
dde57a21 5110 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5111 u64 end;
5112
5113 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5114 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5115 if (end < max_addr) {
9f10e5bf 5116 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5117 "sufficient for the mapped address (%llx)\n",
8954da1f 5118 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5119 return -EFAULT;
5120 }
dde57a21 5121 dmar_domain->max_addr = max_addr;
fe40f1e0 5122 }
ad051221
DW
5123 /* Round up size to next multiple of PAGE_SIZE, if it and
5124 the low bits of hpa would take us onto the next page */
88cb6a74 5125 size = aligned_nrpages(hpa, size);
ad051221
DW
5126 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5127 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5128 return ret;
38717946 5129}
38717946 5130
5009065d 5131static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5132 unsigned long iova, size_t size)
38717946 5133{
00a77deb 5134 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
5135 struct page *freelist = NULL;
5136 struct intel_iommu *iommu;
5137 unsigned long start_pfn, last_pfn;
5138 unsigned int npages;
42e8c186 5139 int iommu_id, level = 0;
5cf0a76f
DW
5140
5141 /* Cope with horrid API which requires us to unmap more than the
5142 size argument if it happens to be a large-page mapping. */
dc02e46e 5143 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5144
5145 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5146 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5147
ea8ea460
DW
5148 start_pfn = iova >> VTD_PAGE_SHIFT;
5149 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5150
5151 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5152
5153 npages = last_pfn - start_pfn + 1;
5154
29a27719 5155 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 5156 iommu = g_iommus[iommu_id];
ea8ea460 5157
42e8c186
JR
5158 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5159 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5160 }
5161
5162 dma_free_pagelist(freelist);
fe40f1e0 5163
163cc52c
DW
5164 if (dmar_domain->max_addr == iova + size)
5165 dmar_domain->max_addr = iova;
b146a1c9 5166
5cf0a76f 5167 return size;
38717946 5168}
38717946 5169
d14d6577 5170static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5171 dma_addr_t iova)
38717946 5172{
00a77deb 5173 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5174 struct dma_pte *pte;
5cf0a76f 5175 int level = 0;
faa3d6f5 5176 u64 phys = 0;
38717946 5177
5cf0a76f 5178 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5179 if (pte)
faa3d6f5 5180 phys = dma_pte_addr(pte);
38717946 5181
faa3d6f5 5182 return phys;
38717946 5183}
a8bcbb0d 5184
5d587b8d 5185static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5186{
dbb9fd86 5187 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5188 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5189 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5190 return irq_remapping_enabled == 1;
dbb9fd86 5191
5d587b8d 5192 return false;
dbb9fd86
SY
5193}
5194
abdfdde2
AW
5195static int intel_iommu_add_device(struct device *dev)
5196{
a5459cfe 5197 struct intel_iommu *iommu;
abdfdde2 5198 struct iommu_group *group;
156baca8 5199 u8 bus, devfn;
70ae6f0d 5200
a5459cfe
AW
5201 iommu = device_to_iommu(dev, &bus, &devfn);
5202 if (!iommu)
70ae6f0d
AW
5203 return -ENODEV;
5204
e3d10af1 5205 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5206
e17f9ff4 5207 group = iommu_group_get_for_dev(dev);
783f157b 5208
e17f9ff4
AW
5209 if (IS_ERR(group))
5210 return PTR_ERR(group);
bcb71abe 5211
abdfdde2 5212 iommu_group_put(group);
e17f9ff4 5213 return 0;
abdfdde2 5214}
70ae6f0d 5215
abdfdde2
AW
5216static void intel_iommu_remove_device(struct device *dev)
5217{
a5459cfe
AW
5218 struct intel_iommu *iommu;
5219 u8 bus, devfn;
5220
5221 iommu = device_to_iommu(dev, &bus, &devfn);
5222 if (!iommu)
5223 return;
5224
abdfdde2 5225 iommu_group_remove_device(dev);
a5459cfe 5226
e3d10af1 5227 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5228}
5229
0659b8dc
EA
5230static void intel_iommu_get_resv_regions(struct device *device,
5231 struct list_head *head)
5232{
5233 struct iommu_resv_region *reg;
5234 struct dmar_rmrr_unit *rmrr;
5235 struct device *i_dev;
5236 int i;
5237
5238 rcu_read_lock();
5239 for_each_rmrr_units(rmrr) {
5240 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5241 i, i_dev) {
5242 if (i_dev != device)
5243 continue;
5244
5245 list_add_tail(&rmrr->resv->list, head);
5246 }
5247 }
5248 rcu_read_unlock();
5249
5250 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5251 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5252 0, IOMMU_RESV_RESERVED);
5253 if (!reg)
5254 return;
5255 list_add_tail(&reg->list, head);
5256}
5257
5258static void intel_iommu_put_resv_regions(struct device *dev,
5259 struct list_head *head)
5260{
5261 struct iommu_resv_region *entry, *next;
5262
5263 list_for_each_entry_safe(entry, next, head, list) {
5264 if (entry->type == IOMMU_RESV_RESERVED)
5265 kfree(entry);
5266 }
70ae6f0d
AW
5267}
5268
2f26e0a9 5269#ifdef CONFIG_INTEL_IOMMU_SVM
65ca7f5f
JP
5270#define MAX_NR_PASID_BITS (20)
5271static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5272{
5273 /*
5274 * Convert ecap_pss to extend context entry pts encoding, also
5275 * respect the soft pasid_max value set by the iommu.
5276 * - number of PASID bits = ecap_pss + 1
5277 * - number of PASID table entries = 2^(pts + 5)
5278 * Therefore, pts = ecap_pss - 4
5279 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5280 */
5281 if (ecap_pss(iommu->ecap) < 5)
5282 return 0;
5283
5284 /* pasid_max is encoded as actual number of entries not the bits */
5285 return find_first_bit((unsigned long *)&iommu->pasid_max,
5286 MAX_NR_PASID_BITS) - 5;
5287}
5288
2f26e0a9
DW
5289int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5290{
5291 struct device_domain_info *info;
5292 struct context_entry *context;
5293 struct dmar_domain *domain;
5294 unsigned long flags;
5295 u64 ctx_lo;
5296 int ret;
5297
5298 domain = get_valid_domain_for_dev(sdev->dev);
5299 if (!domain)
5300 return -EINVAL;
5301
5302 spin_lock_irqsave(&device_domain_lock, flags);
5303 spin_lock(&iommu->lock);
5304
5305 ret = -EINVAL;
5306 info = sdev->dev->archdata.iommu;
5307 if (!info || !info->pasid_supported)
5308 goto out;
5309
5310 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5311 if (WARN_ON(!context))
5312 goto out;
5313
5314 ctx_lo = context[0].lo;
5315
5316 sdev->did = domain->iommu_did[iommu->seq_id];
5317 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5318
5319 if (!(ctx_lo & CONTEXT_PASIDE)) {
5320 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
65ca7f5f
JP
5321 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5322 intel_iommu_get_pts(iommu);
5323
2f26e0a9
DW
5324 wmb();
5325 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5326 * extended to permit requests-with-PASID if the PASIDE bit
5327 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5328 * however, the PASIDE bit is ignored and requests-with-PASID
5329 * are unconditionally blocked. Which makes less sense.
5330 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5331 * "guest mode" translation types depending on whether ATS
5332 * is available or not. Annoyingly, we can't use the new
5333 * modes *unless* PASIDE is set. */
5334 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5335 ctx_lo &= ~CONTEXT_TT_MASK;
5336 if (info->ats_supported)
5337 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5338 else
5339 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5340 }
5341 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5342 if (iommu->pasid_state_table)
5343 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5344 if (info->pri_supported)
5345 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5346 context[0].lo = ctx_lo;
5347 wmb();
5348 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5349 DMA_CCMD_MASK_NOBIT,
5350 DMA_CCMD_DEVICE_INVL);
5351 }
5352
5353 /* Enable PASID support in the device, if it wasn't already */
5354 if (!info->pasid_enabled)
5355 iommu_enable_dev_iotlb(info);
5356
5357 if (info->ats_enabled) {
5358 sdev->dev_iotlb = 1;
5359 sdev->qdep = info->ats_qdep;
5360 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5361 sdev->qdep = 0;
5362 }
5363 ret = 0;
5364
5365 out:
5366 spin_unlock(&iommu->lock);
5367 spin_unlock_irqrestore(&device_domain_lock, flags);
5368
5369 return ret;
5370}
5371
5372struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5373{
5374 struct intel_iommu *iommu;
5375 u8 bus, devfn;
5376
5377 if (iommu_dummy(dev)) {
5378 dev_warn(dev,
5379 "No IOMMU translation for device; cannot enable SVM\n");
5380 return NULL;
5381 }
5382
5383 iommu = device_to_iommu(dev, &bus, &devfn);
5384 if ((!iommu)) {
b9997e38 5385 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5386 return NULL;
5387 }
5388
5389 if (!iommu->pasid_table) {
b9997e38 5390 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
2f26e0a9
DW
5391 return NULL;
5392 }
5393
5394 return iommu;
5395}
5396#endif /* CONFIG_INTEL_IOMMU_SVM */
5397
b0119e87 5398const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5399 .capable = intel_iommu_capable,
5400 .domain_alloc = intel_iommu_domain_alloc,
5401 .domain_free = intel_iommu_domain_free,
5402 .attach_dev = intel_iommu_attach_device,
5403 .detach_dev = intel_iommu_detach_device,
5404 .map = intel_iommu_map,
5405 .unmap = intel_iommu_unmap,
5406 .map_sg = default_iommu_map_sg,
5407 .iova_to_phys = intel_iommu_iova_to_phys,
5408 .add_device = intel_iommu_add_device,
5409 .remove_device = intel_iommu_remove_device,
5410 .get_resv_regions = intel_iommu_get_resv_regions,
5411 .put_resv_regions = intel_iommu_put_resv_regions,
5412 .device_group = pci_device_group,
5413 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5414};
9af88143 5415
9452618e
DV
5416static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5417{
5418 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5419 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5420 dmar_map_gfx = 0;
5421}
5422
5423DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5424DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5425DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5426DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5427DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5428DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5429DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5430
d34d6517 5431static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5432{
5433 /*
5434 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5435 * but needs it. Same seems to hold for the desktop versions.
9af88143 5436 */
9f10e5bf 5437 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5438 rwbf_quirk = 1;
5439}
5440
5441DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5442DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5443DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5444DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5445DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5446DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5447DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5448
eecfd57f
AJ
5449#define GGC 0x52
5450#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5451#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5452#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5453#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5454#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5455#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5456#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5457#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5458
d34d6517 5459static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5460{
5461 unsigned short ggc;
5462
eecfd57f 5463 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5464 return;
5465
eecfd57f 5466 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5467 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5468 dmar_map_gfx = 0;
6fbcfb3e
DW
5469 } else if (dmar_map_gfx) {
5470 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5471 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5472 intel_iommu_strict = 1;
5473 }
9eecabcb
DW
5474}
5475DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5476DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5477DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5478DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5479
e0fc7e0b
DW
5480/* On Tylersburg chipsets, some BIOSes have been known to enable the
5481 ISOCH DMAR unit for the Azalia sound device, but not give it any
5482 TLB entries, which causes it to deadlock. Check for that. We do
5483 this in a function called from init_dmars(), instead of in a PCI
5484 quirk, because we don't want to print the obnoxious "BIOS broken"
5485 message if VT-d is actually disabled.
5486*/
5487static void __init check_tylersburg_isoch(void)
5488{
5489 struct pci_dev *pdev;
5490 uint32_t vtisochctrl;
5491
5492 /* If there's no Azalia in the system anyway, forget it. */
5493 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5494 if (!pdev)
5495 return;
5496 pci_dev_put(pdev);
5497
5498 /* System Management Registers. Might be hidden, in which case
5499 we can't do the sanity check. But that's OK, because the
5500 known-broken BIOSes _don't_ actually hide it, so far. */
5501 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5502 if (!pdev)
5503 return;
5504
5505 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5506 pci_dev_put(pdev);
5507 return;
5508 }
5509
5510 pci_dev_put(pdev);
5511
5512 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5513 if (vtisochctrl & 1)
5514 return;
5515
5516 /* Drop all bits other than the number of TLB entries */
5517 vtisochctrl &= 0x1c;
5518
5519 /* If we have the recommended number of TLB entries (16), fine. */
5520 if (vtisochctrl == 0x10)
5521 return;
5522
5523 /* Zero TLB entries? You get to ride the short bus to school. */
5524 if (!vtisochctrl) {
5525 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5526 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5527 dmi_get_system_info(DMI_BIOS_VENDOR),
5528 dmi_get_system_info(DMI_BIOS_VERSION),
5529 dmi_get_system_info(DMI_PRODUCT_VERSION));
5530 iommu_identity_mapping |= IDENTMAP_AZALIA;
5531 return;
5532 }
9f10e5bf
JR
5533
5534 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5535 vtisochctrl);
5536}