]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/dma: Don't touch invalid iova_domain members
[mirror_ubuntu-artful-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
aa473240 36#include <linux/cpu.h>
5e0d2a6f 37#include <linux/timer.h>
dfddb969 38#include <linux/io.h>
38717946 39#include <linux/iova.h>
5d450806 40#include <linux/iommu.h>
38717946 41#include <linux/intel-iommu.h>
134fac3f 42#include <linux/syscore_ops.h>
69575d38 43#include <linux/tboot.h>
adb2fe02 44#include <linux/dmi.h>
5cdede24 45#include <linux/pci-ats.h>
0ee332c1 46#include <linux/memblock.h>
36746436 47#include <linux/dma-contiguous.h>
091d42e4 48#include <linux/crash_dump.h>
8a8f422d 49#include <asm/irq_remapping.h>
ba395927 50#include <asm/cacheflush.h>
46a7fa27 51#include <asm/iommu.h>
ba395927 52
078e1ee2
JR
53#include "irq_remapping.h"
54
5b6985ce
FY
55#define ROOT_SIZE VTD_PAGE_SIZE
56#define CONTEXT_SIZE VTD_PAGE_SIZE
57
ba395927 58#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 59#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 60#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 61#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
62
63#define IOAPIC_RANGE_START (0xfee00000)
64#define IOAPIC_RANGE_END (0xfeefffff)
65#define IOVA_START_ADDR (0x1000)
66
67#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
68
4ed0d3e6 69#define MAX_AGAW_WIDTH 64
5c645b35 70#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 71
2ebe3151
DW
72#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
73#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
74
75/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
76 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
77#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
78 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
79#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 80
1b722500
RM
81/* IO virtual address start page frame number */
82#define IOVA_START_PFN (1)
83
f27be03b 84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 85#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 86#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 87
df08cdc7
AM
88/* page table handling */
89#define LEVEL_STRIDE (9)
90#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
6d1c56a9
OBC
92/*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
df08cdc7
AM
110static inline int agaw_to_level(int agaw)
111{
112 return agaw + 2;
113}
114
115static inline int agaw_to_width(int agaw)
116{
5c645b35 117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
118}
119
120static inline int width_to_agaw(int width)
121{
5c645b35 122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
123}
124
125static inline unsigned int level_to_offset_bits(int level)
126{
127 return (level - 1) * LEVEL_STRIDE;
128}
129
130static inline int pfn_level_offset(unsigned long pfn, int level)
131{
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133}
134
135static inline unsigned long level_mask(int level)
136{
137 return -1UL << level_to_offset_bits(level);
138}
139
140static inline unsigned long level_size(int level)
141{
142 return 1UL << level_to_offset_bits(level);
143}
144
145static inline unsigned long align_to_level(unsigned long pfn, int level)
146{
147 return (pfn + level_size(level) - 1) & level_mask(level);
148}
fd18de50 149
6dd9a7c7
YS
150static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151{
5c645b35 152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
153}
154
dd4e8319
DW
155/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158{
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160}
161
162static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163{
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165}
166static inline unsigned long page_to_dma_pfn(struct page *pg)
167{
168 return mm_to_dma_pfn(page_to_pfn(pg));
169}
170static inline unsigned long virt_to_dma_pfn(void *p)
171{
172 return page_to_dma_pfn(virt_to_page(p));
173}
174
d9630fe9
WH
175/* global iommu list, set NULL for ignored DMAR units */
176static struct intel_iommu **g_iommus;
177
e0fc7e0b 178static void __init check_tylersburg_isoch(void);
9af88143
DW
179static int rwbf_quirk;
180
b779260b
JC
181/*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185static int force_on = 0;
bfd20f1c 186int intel_iommu_tboot_noforce;
b779260b 187
46b08e1a
MM
188/*
189 * 0: Present
190 * 1-11: Reserved
191 * 12-63: Context Ptr (12 - (haw-1))
192 * 64-127: Reserved
193 */
194struct root_entry {
03ecc32c
DW
195 u64 lo;
196 u64 hi;
46b08e1a
MM
197};
198#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 199
091d42e4
JR
200/*
201 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
202 * if marked present.
203 */
204static phys_addr_t root_entry_lctp(struct root_entry *re)
205{
206 if (!(re->lo & 1))
207 return 0;
208
209 return re->lo & VTD_PAGE_MASK;
210}
211
212/*
213 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
214 * if marked present.
215 */
216static phys_addr_t root_entry_uctp(struct root_entry *re)
217{
218 if (!(re->hi & 1))
219 return 0;
46b08e1a 220
091d42e4
JR
221 return re->hi & VTD_PAGE_MASK;
222}
7a8fc25e
MM
223/*
224 * low 64 bits:
225 * 0: present
226 * 1: fault processing disable
227 * 2-3: translation type
228 * 12-63: address space root
229 * high 64 bits:
230 * 0-2: address width
231 * 3-6: aval
232 * 8-23: domain id
233 */
234struct context_entry {
235 u64 lo;
236 u64 hi;
237};
c07e7d21 238
cf484d0e
JR
239static inline void context_clear_pasid_enable(struct context_entry *context)
240{
241 context->lo &= ~(1ULL << 11);
242}
243
244static inline bool context_pasid_enabled(struct context_entry *context)
245{
246 return !!(context->lo & (1ULL << 11));
247}
248
249static inline void context_set_copied(struct context_entry *context)
250{
251 context->hi |= (1ull << 3);
252}
253
254static inline bool context_copied(struct context_entry *context)
255{
256 return !!(context->hi & (1ULL << 3));
257}
258
259static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
260{
261 return (context->lo & 1);
262}
cf484d0e
JR
263
264static inline bool context_present(struct context_entry *context)
265{
266 return context_pasid_enabled(context) ?
267 __context_present(context) :
268 __context_present(context) && !context_copied(context);
269}
270
c07e7d21
MM
271static inline void context_set_present(struct context_entry *context)
272{
273 context->lo |= 1;
274}
275
276static inline void context_set_fault_enable(struct context_entry *context)
277{
278 context->lo &= (((u64)-1) << 2) | 1;
279}
280
c07e7d21
MM
281static inline void context_set_translation_type(struct context_entry *context,
282 unsigned long value)
283{
284 context->lo &= (((u64)-1) << 4) | 3;
285 context->lo |= (value & 3) << 2;
286}
287
288static inline void context_set_address_root(struct context_entry *context,
289 unsigned long value)
290{
1a2262f9 291 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
292 context->lo |= value & VTD_PAGE_MASK;
293}
294
295static inline void context_set_address_width(struct context_entry *context,
296 unsigned long value)
297{
298 context->hi |= value & 7;
299}
300
301static inline void context_set_domain_id(struct context_entry *context,
302 unsigned long value)
303{
304 context->hi |= (value & ((1 << 16) - 1)) << 8;
305}
306
dbcd861f
JR
307static inline int context_domain_id(struct context_entry *c)
308{
309 return((c->hi >> 8) & 0xffff);
310}
311
c07e7d21
MM
312static inline void context_clear_entry(struct context_entry *context)
313{
314 context->lo = 0;
315 context->hi = 0;
316}
7a8fc25e 317
622ba12a
MM
318/*
319 * 0: readable
320 * 1: writable
321 * 2-6: reserved
322 * 7: super page
9cf06697
SY
323 * 8-10: available
324 * 11: snoop behavior
622ba12a
MM
325 * 12-63: Host physcial address
326 */
327struct dma_pte {
328 u64 val;
329};
622ba12a 330
19c239ce
MM
331static inline void dma_clear_pte(struct dma_pte *pte)
332{
333 pte->val = 0;
334}
335
19c239ce
MM
336static inline u64 dma_pte_addr(struct dma_pte *pte)
337{
c85994e4
DW
338#ifdef CONFIG_64BIT
339 return pte->val & VTD_PAGE_MASK;
340#else
341 /* Must have a full atomic 64-bit read */
1a8bd481 342 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 343#endif
19c239ce
MM
344}
345
19c239ce
MM
346static inline bool dma_pte_present(struct dma_pte *pte)
347{
348 return (pte->val & 3) != 0;
349}
622ba12a 350
4399c8bf
AK
351static inline bool dma_pte_superpage(struct dma_pte *pte)
352{
c3c75eb7 353 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
354}
355
75e6bf96
DW
356static inline int first_pte_in_page(struct dma_pte *pte)
357{
358 return !((unsigned long)pte & ~VTD_PAGE_MASK);
359}
360
2c2e2c38
FY
361/*
362 * This domain is a statically identity mapping domain.
363 * 1. This domain creats a static 1:1 mapping to all usable memory.
364 * 2. It maps to each iommu if successful.
365 * 3. Each iommu mapps to this domain if successful.
366 */
19943b0e
DW
367static struct dmar_domain *si_domain;
368static int hw_pass_through = 1;
2c2e2c38 369
28ccce0d
JR
370/*
371 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
372 * across iommus may be owned in one domain, e.g. kvm guest.
373 */
ab8dfe25 374#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 375
2c2e2c38 376/* si_domain contains mulitple devices */
ab8dfe25 377#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 378
29a27719
JR
379#define for_each_domain_iommu(idx, domain) \
380 for (idx = 0; idx < g_num_of_iommus; idx++) \
381 if (domain->iommu_refcnt[idx])
382
99126f7c 383struct dmar_domain {
4c923d47 384 int nid; /* node id */
29a27719
JR
385
386 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
387 /* Refcount of devices per iommu */
388
99126f7c 389
c0e8a6c8
JR
390 u16 iommu_did[DMAR_UNITS_SUPPORTED];
391 /* Domain ids per IOMMU. Use u16 since
392 * domain ids are 16 bit wide according
393 * to VT-d spec, section 9.3 */
99126f7c 394
0824c592 395 bool has_iotlb_device;
00a77deb 396 struct list_head devices; /* all devices' list */
99126f7c
MM
397 struct iova_domain iovad; /* iova's that belong to this domain */
398
399 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
400 int gaw; /* max guest address width */
401
402 /* adjusted guest address width, 0 is level 2 30-bit */
403 int agaw;
404
3b5410e7 405 int flags; /* flags to find out type of domain */
8e604097
WH
406
407 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 408 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 409 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
410 int iommu_superpage;/* Level of superpages supported:
411 0 == 4KiB (no superpages), 1 == 2MiB,
412 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 413 u64 max_addr; /* maximum mapped address */
00a77deb
JR
414
415 struct iommu_domain domain; /* generic domain data structure for
416 iommu core */
99126f7c
MM
417};
418
a647dacb
MM
419/* PCI domain-device relationship */
420struct device_domain_info {
421 struct list_head link; /* link to domain siblings */
422 struct list_head global; /* link to global list */
276dbf99 423 u8 bus; /* PCI bus number */
a647dacb 424 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
425 u8 pasid_supported:3;
426 u8 pasid_enabled:1;
427 u8 pri_supported:1;
428 u8 pri_enabled:1;
429 u8 ats_supported:1;
430 u8 ats_enabled:1;
431 u8 ats_qdep;
0bcb3e28 432 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 433 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
434 struct dmar_domain *domain; /* pointer to domain */
435};
436
b94e4117
JL
437struct dmar_rmrr_unit {
438 struct list_head list; /* list of rmrr units */
439 struct acpi_dmar_header *hdr; /* ACPI header */
440 u64 base_address; /* reserved base address*/
441 u64 end_address; /* reserved end address */
832bd858 442 struct dmar_dev_scope *devices; /* target devices */
b94e4117 443 int devices_cnt; /* target device count */
0659b8dc 444 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
445};
446
447struct dmar_atsr_unit {
448 struct list_head list; /* list of ATSR units */
449 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 450 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
451 int devices_cnt; /* target device count */
452 u8 include_all:1; /* include all ports */
453};
454
455static LIST_HEAD(dmar_atsr_units);
456static LIST_HEAD(dmar_rmrr_units);
457
458#define for_each_rmrr_units(rmrr) \
459 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
460
5e0d2a6f 461static void flush_unmaps_timeout(unsigned long data);
462
314f1dc1 463struct deferred_flush_entry {
2aac6304 464 unsigned long iova_pfn;
769530e4 465 unsigned long nrpages;
314f1dc1
OP
466 struct dmar_domain *domain;
467 struct page *freelist;
468};
5e0d2a6f 469
80b20dd8 470#define HIGH_WATER_MARK 250
314f1dc1 471struct deferred_flush_table {
80b20dd8 472 int next;
314f1dc1 473 struct deferred_flush_entry entries[HIGH_WATER_MARK];
80b20dd8 474};
475
aa473240
OP
476struct deferred_flush_data {
477 spinlock_t lock;
478 int timer_on;
479 struct timer_list timer;
480 long size;
481 struct deferred_flush_table *tables;
80b20dd8 482};
483
aa473240 484DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
80b20dd8 485
5e0d2a6f 486/* bitmap for indexing intel_iommus */
5e0d2a6f 487static int g_num_of_iommus;
488
92d03cc8 489static void domain_exit(struct dmar_domain *domain);
ba395927 490static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
491static void dmar_remove_one_dev_info(struct dmar_domain *domain,
492 struct device *dev);
127c7615 493static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
494static void domain_context_clear(struct intel_iommu *iommu,
495 struct device *dev);
2a46ddf7
JL
496static int domain_detach_iommu(struct dmar_domain *domain,
497 struct intel_iommu *iommu);
ba395927 498
d3f13810 499#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
500int dmar_disabled = 0;
501#else
502int dmar_disabled = 1;
d3f13810 503#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 504
8bc1f85c
ED
505int intel_iommu_enabled = 0;
506EXPORT_SYMBOL_GPL(intel_iommu_enabled);
507
2d9e667e 508static int dmar_map_gfx = 1;
7d3b03ce 509static int dmar_forcedac;
5e0d2a6f 510static int intel_iommu_strict;
6dd9a7c7 511static int intel_iommu_superpage = 1;
c83b2f20 512static int intel_iommu_ecs = 1;
ae853ddb
DW
513static int intel_iommu_pasid28;
514static int iommu_identity_mapping;
c83b2f20 515
ae853ddb
DW
516#define IDENTMAP_ALL 1
517#define IDENTMAP_GFX 2
518#define IDENTMAP_AZALIA 4
c83b2f20 519
d42fde70
DW
520/* Broadwell and Skylake have broken ECS support — normal so-called "second
521 * level" translation of DMA requests-without-PASID doesn't actually happen
522 * unless you also set the NESTE bit in an extended context-entry. Which of
523 * course means that SVM doesn't work because it's trying to do nested
524 * translation of the physical addresses it finds in the process page tables,
525 * through the IOVA->phys mapping found in the "second level" page tables.
526 *
527 * The VT-d specification was retroactively changed to change the definition
528 * of the capability bits and pretend that Broadwell/Skylake never happened...
529 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
530 * for some reason it was the PASID capability bit which was redefined (from
531 * bit 28 on BDW/SKL to bit 40 in future).
532 *
533 * So our test for ECS needs to eschew those implementations which set the old
534 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
535 * Unless we are working around the 'pasid28' limitations, that is, by putting
536 * the device into passthrough mode for normal DMA and thus masking the bug.
537 */
c83b2f20 538#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
d42fde70
DW
539 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
540/* PASID support is thus enabled if ECS is enabled and *either* of the old
541 * or new capability bits are set. */
542#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
543 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 544
c0771df8
DW
545int intel_iommu_gfx_mapped;
546EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
547
ba395927
KA
548#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
549static DEFINE_SPINLOCK(device_domain_lock);
550static LIST_HEAD(device_domain_list);
551
b0119e87 552const struct iommu_ops intel_iommu_ops;
a8bcbb0d 553
4158c2ec
JR
554static bool translation_pre_enabled(struct intel_iommu *iommu)
555{
556 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
557}
558
091d42e4
JR
559static void clear_translation_pre_enabled(struct intel_iommu *iommu)
560{
561 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
562}
563
4158c2ec
JR
564static void init_translation_status(struct intel_iommu *iommu)
565{
566 u32 gsts;
567
568 gsts = readl(iommu->reg + DMAR_GSTS_REG);
569 if (gsts & DMA_GSTS_TES)
570 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
571}
572
00a77deb
JR
573/* Convert generic 'struct iommu_domain to private struct dmar_domain */
574static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
575{
576 return container_of(dom, struct dmar_domain, domain);
577}
578
ba395927
KA
579static int __init intel_iommu_setup(char *str)
580{
581 if (!str)
582 return -EINVAL;
583 while (*str) {
0cd5c3c8
KM
584 if (!strncmp(str, "on", 2)) {
585 dmar_disabled = 0;
9f10e5bf 586 pr_info("IOMMU enabled\n");
0cd5c3c8 587 } else if (!strncmp(str, "off", 3)) {
ba395927 588 dmar_disabled = 1;
9f10e5bf 589 pr_info("IOMMU disabled\n");
ba395927
KA
590 } else if (!strncmp(str, "igfx_off", 8)) {
591 dmar_map_gfx = 0;
9f10e5bf 592 pr_info("Disable GFX device mapping\n");
7d3b03ce 593 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 594 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 595 dmar_forcedac = 1;
5e0d2a6f 596 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 597 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 598 intel_iommu_strict = 1;
6dd9a7c7 599 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 600 pr_info("Disable supported super page\n");
6dd9a7c7 601 intel_iommu_superpage = 0;
c83b2f20
DW
602 } else if (!strncmp(str, "ecs_off", 7)) {
603 printk(KERN_INFO
604 "Intel-IOMMU: disable extended context table support\n");
605 intel_iommu_ecs = 0;
ae853ddb
DW
606 } else if (!strncmp(str, "pasid28", 7)) {
607 printk(KERN_INFO
608 "Intel-IOMMU: enable pre-production PASID support\n");
609 intel_iommu_pasid28 = 1;
610 iommu_identity_mapping |= IDENTMAP_GFX;
bfd20f1c
SL
611 } else if (!strncmp(str, "tboot_noforce", 13)) {
612 printk(KERN_INFO
613 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
614 intel_iommu_tboot_noforce = 1;
ba395927
KA
615 }
616
617 str += strcspn(str, ",");
618 while (*str == ',')
619 str++;
620 }
621 return 0;
622}
623__setup("intel_iommu=", intel_iommu_setup);
624
625static struct kmem_cache *iommu_domain_cache;
626static struct kmem_cache *iommu_devinfo_cache;
ba395927 627
9452d5bf
JR
628static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
629{
8bf47816
JR
630 struct dmar_domain **domains;
631 int idx = did >> 8;
632
633 domains = iommu->domains[idx];
634 if (!domains)
635 return NULL;
636
637 return domains[did & 0xff];
9452d5bf
JR
638}
639
640static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
641 struct dmar_domain *domain)
642{
8bf47816
JR
643 struct dmar_domain **domains;
644 int idx = did >> 8;
645
646 if (!iommu->domains[idx]) {
647 size_t size = 256 * sizeof(struct dmar_domain *);
648 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
649 }
650
651 domains = iommu->domains[idx];
652 if (WARN_ON(!domains))
653 return;
654 else
655 domains[did & 0xff] = domain;
9452d5bf
JR
656}
657
4c923d47 658static inline void *alloc_pgtable_page(int node)
eb3fa7cb 659{
4c923d47
SS
660 struct page *page;
661 void *vaddr = NULL;
eb3fa7cb 662
4c923d47
SS
663 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
664 if (page)
665 vaddr = page_address(page);
eb3fa7cb 666 return vaddr;
ba395927
KA
667}
668
669static inline void free_pgtable_page(void *vaddr)
670{
671 free_page((unsigned long)vaddr);
672}
673
674static inline void *alloc_domain_mem(void)
675{
354bb65e 676 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
677}
678
38717946 679static void free_domain_mem(void *vaddr)
ba395927
KA
680{
681 kmem_cache_free(iommu_domain_cache, vaddr);
682}
683
684static inline void * alloc_devinfo_mem(void)
685{
354bb65e 686 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
687}
688
689static inline void free_devinfo_mem(void *vaddr)
690{
691 kmem_cache_free(iommu_devinfo_cache, vaddr);
692}
693
ab8dfe25
JL
694static inline int domain_type_is_vm(struct dmar_domain *domain)
695{
696 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
697}
698
28ccce0d
JR
699static inline int domain_type_is_si(struct dmar_domain *domain)
700{
701 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
702}
703
ab8dfe25
JL
704static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
705{
706 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
707 DOMAIN_FLAG_STATIC_IDENTITY);
708}
1b573683 709
162d1b10
JL
710static inline int domain_pfn_supported(struct dmar_domain *domain,
711 unsigned long pfn)
712{
713 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
714
715 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
716}
717
4ed0d3e6 718static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
719{
720 unsigned long sagaw;
721 int agaw = -1;
722
723 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 724 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
725 agaw >= 0; agaw--) {
726 if (test_bit(agaw, &sagaw))
727 break;
728 }
729
730 return agaw;
731}
732
4ed0d3e6
FY
733/*
734 * Calculate max SAGAW for each iommu.
735 */
736int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
737{
738 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
739}
740
741/*
742 * calculate agaw for each iommu.
743 * "SAGAW" may be different across iommus, use a default agaw, and
744 * get a supported less agaw for iommus that don't support the default agaw.
745 */
746int iommu_calculate_agaw(struct intel_iommu *iommu)
747{
748 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
749}
750
2c2e2c38 751/* This functionin only returns single iommu in a domain */
8c11e798
WH
752static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
753{
754 int iommu_id;
755
2c2e2c38 756 /* si_domain and vm domain should not get here. */
ab8dfe25 757 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
758 for_each_domain_iommu(iommu_id, domain)
759 break;
760
8c11e798
WH
761 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
762 return NULL;
763
764 return g_iommus[iommu_id];
765}
766
8e604097
WH
767static void domain_update_iommu_coherency(struct dmar_domain *domain)
768{
d0501960
DW
769 struct dmar_drhd_unit *drhd;
770 struct intel_iommu *iommu;
2f119c78
QL
771 bool found = false;
772 int i;
2e12bc29 773
d0501960 774 domain->iommu_coherency = 1;
8e604097 775
29a27719 776 for_each_domain_iommu(i, domain) {
2f119c78 777 found = true;
8e604097
WH
778 if (!ecap_coherent(g_iommus[i]->ecap)) {
779 domain->iommu_coherency = 0;
780 break;
781 }
8e604097 782 }
d0501960
DW
783 if (found)
784 return;
785
786 /* No hardware attached; use lowest common denominator */
787 rcu_read_lock();
788 for_each_active_iommu(iommu, drhd) {
789 if (!ecap_coherent(iommu->ecap)) {
790 domain->iommu_coherency = 0;
791 break;
792 }
793 }
794 rcu_read_unlock();
8e604097
WH
795}
796
161f6934 797static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 798{
161f6934
JL
799 struct dmar_drhd_unit *drhd;
800 struct intel_iommu *iommu;
801 int ret = 1;
58c610bd 802
161f6934
JL
803 rcu_read_lock();
804 for_each_active_iommu(iommu, drhd) {
805 if (iommu != skip) {
806 if (!ecap_sc_support(iommu->ecap)) {
807 ret = 0;
808 break;
809 }
58c610bd 810 }
58c610bd 811 }
161f6934
JL
812 rcu_read_unlock();
813
814 return ret;
58c610bd
SY
815}
816
161f6934 817static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 818{
8140a95d 819 struct dmar_drhd_unit *drhd;
161f6934 820 struct intel_iommu *iommu;
8140a95d 821 int mask = 0xf;
6dd9a7c7
YS
822
823 if (!intel_iommu_superpage) {
161f6934 824 return 0;
6dd9a7c7
YS
825 }
826
8140a95d 827 /* set iommu_superpage to the smallest common denominator */
0e242612 828 rcu_read_lock();
8140a95d 829 for_each_active_iommu(iommu, drhd) {
161f6934
JL
830 if (iommu != skip) {
831 mask &= cap_super_page_val(iommu->cap);
832 if (!mask)
833 break;
6dd9a7c7
YS
834 }
835 }
0e242612
JL
836 rcu_read_unlock();
837
161f6934 838 return fls(mask);
6dd9a7c7
YS
839}
840
58c610bd
SY
841/* Some capabilities may be different across iommus */
842static void domain_update_iommu_cap(struct dmar_domain *domain)
843{
844 domain_update_iommu_coherency(domain);
161f6934
JL
845 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
846 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
847}
848
03ecc32c
DW
849static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
850 u8 bus, u8 devfn, int alloc)
851{
852 struct root_entry *root = &iommu->root_entry[bus];
853 struct context_entry *context;
854 u64 *entry;
855
4df4eab1 856 entry = &root->lo;
c83b2f20 857 if (ecs_enabled(iommu)) {
03ecc32c
DW
858 if (devfn >= 0x80) {
859 devfn -= 0x80;
860 entry = &root->hi;
861 }
862 devfn *= 2;
863 }
03ecc32c
DW
864 if (*entry & 1)
865 context = phys_to_virt(*entry & VTD_PAGE_MASK);
866 else {
867 unsigned long phy_addr;
868 if (!alloc)
869 return NULL;
870
871 context = alloc_pgtable_page(iommu->node);
872 if (!context)
873 return NULL;
874
875 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
876 phy_addr = virt_to_phys((void *)context);
877 *entry = phy_addr | 1;
878 __iommu_flush_cache(iommu, entry, sizeof(*entry));
879 }
880 return &context[devfn];
881}
882
4ed6a540
DW
883static int iommu_dummy(struct device *dev)
884{
885 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
886}
887
156baca8 888static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
889{
890 struct dmar_drhd_unit *drhd = NULL;
b683b230 891 struct intel_iommu *iommu;
156baca8
DW
892 struct device *tmp;
893 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 894 u16 segment = 0;
c7151a8d
WH
895 int i;
896
4ed6a540
DW
897 if (iommu_dummy(dev))
898 return NULL;
899
156baca8 900 if (dev_is_pci(dev)) {
1c387188
AR
901 struct pci_dev *pf_pdev;
902
156baca8 903 pdev = to_pci_dev(dev);
1c387188
AR
904 /* VFs aren't listed in scope tables; we need to look up
905 * the PF instead to find the IOMMU. */
906 pf_pdev = pci_physfn(pdev);
907 dev = &pf_pdev->dev;
156baca8 908 segment = pci_domain_nr(pdev->bus);
ca5b74d2 909 } else if (has_acpi_companion(dev))
156baca8
DW
910 dev = &ACPI_COMPANION(dev)->dev;
911
0e242612 912 rcu_read_lock();
b683b230 913 for_each_active_iommu(iommu, drhd) {
156baca8 914 if (pdev && segment != drhd->segment)
276dbf99 915 continue;
c7151a8d 916
b683b230 917 for_each_active_dev_scope(drhd->devices,
156baca8
DW
918 drhd->devices_cnt, i, tmp) {
919 if (tmp == dev) {
1c387188
AR
920 /* For a VF use its original BDF# not that of the PF
921 * which we used for the IOMMU lookup. Strictly speaking
922 * we could do this for all PCI devices; we only need to
923 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 924 if (pdev && pdev->is_virtfn)
1c387188
AR
925 goto got_pdev;
926
156baca8
DW
927 *bus = drhd->devices[i].bus;
928 *devfn = drhd->devices[i].devfn;
b683b230 929 goto out;
156baca8
DW
930 }
931
932 if (!pdev || !dev_is_pci(tmp))
933 continue;
934
935 ptmp = to_pci_dev(tmp);
936 if (ptmp->subordinate &&
937 ptmp->subordinate->number <= pdev->bus->number &&
938 ptmp->subordinate->busn_res.end >= pdev->bus->number)
939 goto got_pdev;
924b6231 940 }
c7151a8d 941
156baca8
DW
942 if (pdev && drhd->include_all) {
943 got_pdev:
944 *bus = pdev->bus->number;
945 *devfn = pdev->devfn;
b683b230 946 goto out;
156baca8 947 }
c7151a8d 948 }
b683b230 949 iommu = NULL;
156baca8 950 out:
0e242612 951 rcu_read_unlock();
c7151a8d 952
b683b230 953 return iommu;
c7151a8d
WH
954}
955
5331fe6f
WH
956static void domain_flush_cache(struct dmar_domain *domain,
957 void *addr, int size)
958{
959 if (!domain->iommu_coherency)
960 clflush_cache_range(addr, size);
961}
962
ba395927
KA
963static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
964{
ba395927 965 struct context_entry *context;
03ecc32c 966 int ret = 0;
ba395927
KA
967 unsigned long flags;
968
969 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
970 context = iommu_context_addr(iommu, bus, devfn, 0);
971 if (context)
972 ret = context_present(context);
ba395927
KA
973 spin_unlock_irqrestore(&iommu->lock, flags);
974 return ret;
975}
976
977static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
978{
ba395927
KA
979 struct context_entry *context;
980 unsigned long flags;
981
982 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 983 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 984 if (context) {
03ecc32c
DW
985 context_clear_entry(context);
986 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
987 }
988 spin_unlock_irqrestore(&iommu->lock, flags);
989}
990
991static void free_context_table(struct intel_iommu *iommu)
992{
ba395927
KA
993 int i;
994 unsigned long flags;
995 struct context_entry *context;
996
997 spin_lock_irqsave(&iommu->lock, flags);
998 if (!iommu->root_entry) {
999 goto out;
1000 }
1001 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 1002 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
1003 if (context)
1004 free_pgtable_page(context);
03ecc32c 1005
c83b2f20 1006 if (!ecs_enabled(iommu))
03ecc32c
DW
1007 continue;
1008
1009 context = iommu_context_addr(iommu, i, 0x80, 0);
1010 if (context)
1011 free_pgtable_page(context);
1012
ba395927
KA
1013 }
1014 free_pgtable_page(iommu->root_entry);
1015 iommu->root_entry = NULL;
1016out:
1017 spin_unlock_irqrestore(&iommu->lock, flags);
1018}
1019
b026fd28 1020static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 1021 unsigned long pfn, int *target_level)
ba395927 1022{
ba395927
KA
1023 struct dma_pte *parent, *pte = NULL;
1024 int level = agaw_to_level(domain->agaw);
4399c8bf 1025 int offset;
ba395927
KA
1026
1027 BUG_ON(!domain->pgd);
f9423606 1028
162d1b10 1029 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
1030 /* Address beyond IOMMU's addressing capabilities. */
1031 return NULL;
1032
ba395927
KA
1033 parent = domain->pgd;
1034
5cf0a76f 1035 while (1) {
ba395927
KA
1036 void *tmp_page;
1037
b026fd28 1038 offset = pfn_level_offset(pfn, level);
ba395927 1039 pte = &parent[offset];
5cf0a76f 1040 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 1041 break;
5cf0a76f 1042 if (level == *target_level)
ba395927
KA
1043 break;
1044
19c239ce 1045 if (!dma_pte_present(pte)) {
c85994e4
DW
1046 uint64_t pteval;
1047
4c923d47 1048 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1049
206a73c1 1050 if (!tmp_page)
ba395927 1051 return NULL;
206a73c1 1052
c85994e4 1053 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1054 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1055 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1056 /* Someone else set it while we were thinking; use theirs. */
1057 free_pgtable_page(tmp_page);
effad4b5 1058 else
c85994e4 1059 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1060 }
5cf0a76f
DW
1061 if (level == 1)
1062 break;
1063
19c239ce 1064 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1065 level--;
1066 }
1067
5cf0a76f
DW
1068 if (!*target_level)
1069 *target_level = level;
1070
ba395927
KA
1071 return pte;
1072}
1073
6dd9a7c7 1074
ba395927 1075/* return address's pte at specific level */
90dcfb5e
DW
1076static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1077 unsigned long pfn,
6dd9a7c7 1078 int level, int *large_page)
ba395927
KA
1079{
1080 struct dma_pte *parent, *pte = NULL;
1081 int total = agaw_to_level(domain->agaw);
1082 int offset;
1083
1084 parent = domain->pgd;
1085 while (level <= total) {
90dcfb5e 1086 offset = pfn_level_offset(pfn, total);
ba395927
KA
1087 pte = &parent[offset];
1088 if (level == total)
1089 return pte;
1090
6dd9a7c7
YS
1091 if (!dma_pte_present(pte)) {
1092 *large_page = total;
ba395927 1093 break;
6dd9a7c7
YS
1094 }
1095
e16922af 1096 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1097 *large_page = total;
1098 return pte;
1099 }
1100
19c239ce 1101 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1102 total--;
1103 }
1104 return NULL;
1105}
1106
ba395927 1107/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1108static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1109 unsigned long start_pfn,
1110 unsigned long last_pfn)
ba395927 1111{
6dd9a7c7 1112 unsigned int large_page = 1;
310a5ab9 1113 struct dma_pte *first_pte, *pte;
66eae846 1114
162d1b10
JL
1115 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1116 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1117 BUG_ON(start_pfn > last_pfn);
ba395927 1118
04b18e65 1119 /* we don't need lock here; nobody else touches the iova range */
59c36286 1120 do {
6dd9a7c7
YS
1121 large_page = 1;
1122 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1123 if (!pte) {
6dd9a7c7 1124 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1125 continue;
1126 }
6dd9a7c7 1127 do {
310a5ab9 1128 dma_clear_pte(pte);
6dd9a7c7 1129 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1130 pte++;
75e6bf96
DW
1131 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1132
310a5ab9
DW
1133 domain_flush_cache(domain, first_pte,
1134 (void *)pte - (void *)first_pte);
59c36286
DW
1135
1136 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1137}
1138
3269ee0b
AW
1139static void dma_pte_free_level(struct dmar_domain *domain, int level,
1140 struct dma_pte *pte, unsigned long pfn,
1141 unsigned long start_pfn, unsigned long last_pfn)
1142{
1143 pfn = max(start_pfn, pfn);
1144 pte = &pte[pfn_level_offset(pfn, level)];
1145
1146 do {
1147 unsigned long level_pfn;
1148 struct dma_pte *level_pte;
1149
1150 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1151 goto next;
1152
f7116e11 1153 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1154 level_pte = phys_to_virt(dma_pte_addr(pte));
1155
1156 if (level > 2)
1157 dma_pte_free_level(domain, level - 1, level_pte,
1158 level_pfn, start_pfn, last_pfn);
1159
1160 /* If range covers entire pagetable, free it */
1161 if (!(start_pfn > level_pfn ||
08336fd2 1162 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1163 dma_clear_pte(pte);
1164 domain_flush_cache(domain, pte, sizeof(*pte));
1165 free_pgtable_page(level_pte);
1166 }
1167next:
1168 pfn += level_size(level);
1169 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1170}
1171
3d1a2442 1172/* clear last level (leaf) ptes and free page table pages. */
ba395927 1173static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1174 unsigned long start_pfn,
1175 unsigned long last_pfn)
ba395927 1176{
162d1b10
JL
1177 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1178 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1179 BUG_ON(start_pfn > last_pfn);
ba395927 1180
d41a4adb
JL
1181 dma_pte_clear_range(domain, start_pfn, last_pfn);
1182
f3a0a52f 1183 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1184 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1185 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1186
ba395927 1187 /* free pgd */
d794dc9b 1188 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1189 free_pgtable_page(domain->pgd);
1190 domain->pgd = NULL;
1191 }
1192}
1193
ea8ea460
DW
1194/* When a page at a given level is being unlinked from its parent, we don't
1195 need to *modify* it at all. All we need to do is make a list of all the
1196 pages which can be freed just as soon as we've flushed the IOTLB and we
1197 know the hardware page-walk will no longer touch them.
1198 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1199 be freed. */
1200static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1201 int level, struct dma_pte *pte,
1202 struct page *freelist)
1203{
1204 struct page *pg;
1205
1206 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1207 pg->freelist = freelist;
1208 freelist = pg;
1209
1210 if (level == 1)
1211 return freelist;
1212
adeb2590
JL
1213 pte = page_address(pg);
1214 do {
ea8ea460
DW
1215 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1216 freelist = dma_pte_list_pagetables(domain, level - 1,
1217 pte, freelist);
adeb2590
JL
1218 pte++;
1219 } while (!first_pte_in_page(pte));
ea8ea460
DW
1220
1221 return freelist;
1222}
1223
1224static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1225 struct dma_pte *pte, unsigned long pfn,
1226 unsigned long start_pfn,
1227 unsigned long last_pfn,
1228 struct page *freelist)
1229{
1230 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1231
1232 pfn = max(start_pfn, pfn);
1233 pte = &pte[pfn_level_offset(pfn, level)];
1234
1235 do {
1236 unsigned long level_pfn;
1237
1238 if (!dma_pte_present(pte))
1239 goto next;
1240
1241 level_pfn = pfn & level_mask(level);
1242
1243 /* If range covers entire pagetable, free it */
1244 if (start_pfn <= level_pfn &&
1245 last_pfn >= level_pfn + level_size(level) - 1) {
1246 /* These suborbinate page tables are going away entirely. Don't
1247 bother to clear them; we're just going to *free* them. */
1248 if (level > 1 && !dma_pte_superpage(pte))
1249 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1250
1251 dma_clear_pte(pte);
1252 if (!first_pte)
1253 first_pte = pte;
1254 last_pte = pte;
1255 } else if (level > 1) {
1256 /* Recurse down into a level that isn't *entirely* obsolete */
1257 freelist = dma_pte_clear_level(domain, level - 1,
1258 phys_to_virt(dma_pte_addr(pte)),
1259 level_pfn, start_pfn, last_pfn,
1260 freelist);
1261 }
1262next:
1263 pfn += level_size(level);
1264 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1265
1266 if (first_pte)
1267 domain_flush_cache(domain, first_pte,
1268 (void *)++last_pte - (void *)first_pte);
1269
1270 return freelist;
1271}
1272
1273/* We can't just free the pages because the IOMMU may still be walking
1274 the page tables, and may have cached the intermediate levels. The
1275 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1276static struct page *domain_unmap(struct dmar_domain *domain,
1277 unsigned long start_pfn,
1278 unsigned long last_pfn)
ea8ea460 1279{
ea8ea460
DW
1280 struct page *freelist = NULL;
1281
162d1b10
JL
1282 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1283 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1284 BUG_ON(start_pfn > last_pfn);
1285
1286 /* we don't need lock here; nobody else touches the iova range */
1287 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1288 domain->pgd, 0, start_pfn, last_pfn, NULL);
1289
1290 /* free pgd */
1291 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1292 struct page *pgd_page = virt_to_page(domain->pgd);
1293 pgd_page->freelist = freelist;
1294 freelist = pgd_page;
1295
1296 domain->pgd = NULL;
1297 }
1298
1299 return freelist;
1300}
1301
b690420a 1302static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1303{
1304 struct page *pg;
1305
1306 while ((pg = freelist)) {
1307 freelist = pg->freelist;
1308 free_pgtable_page(page_address(pg));
1309 }
1310}
1311
ba395927
KA
1312/* iommu handling */
1313static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1314{
1315 struct root_entry *root;
1316 unsigned long flags;
1317
4c923d47 1318 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1319 if (!root) {
9f10e5bf 1320 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1321 iommu->name);
ba395927 1322 return -ENOMEM;
ffebeb46 1323 }
ba395927 1324
5b6985ce 1325 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1326
1327 spin_lock_irqsave(&iommu->lock, flags);
1328 iommu->root_entry = root;
1329 spin_unlock_irqrestore(&iommu->lock, flags);
1330
1331 return 0;
1332}
1333
ba395927
KA
1334static void iommu_set_root_entry(struct intel_iommu *iommu)
1335{
03ecc32c 1336 u64 addr;
c416daa9 1337 u32 sts;
ba395927
KA
1338 unsigned long flag;
1339
03ecc32c 1340 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1341 if (ecs_enabled(iommu))
03ecc32c 1342 addr |= DMA_RTADDR_RTT;
ba395927 1343
1f5b3c3f 1344 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1345 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1346
c416daa9 1347 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1348
1349 /* Make sure hardware complete it */
1350 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1351 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1352
1f5b3c3f 1353 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1354}
1355
1356static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1357{
1358 u32 val;
1359 unsigned long flag;
1360
9af88143 1361 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1362 return;
ba395927 1363
1f5b3c3f 1364 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1365 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1366
1367 /* Make sure hardware complete it */
1368 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1369 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1370
1f5b3c3f 1371 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1372}
1373
1374/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1375static void __iommu_flush_context(struct intel_iommu *iommu,
1376 u16 did, u16 source_id, u8 function_mask,
1377 u64 type)
ba395927
KA
1378{
1379 u64 val = 0;
1380 unsigned long flag;
1381
ba395927
KA
1382 switch (type) {
1383 case DMA_CCMD_GLOBAL_INVL:
1384 val = DMA_CCMD_GLOBAL_INVL;
1385 break;
1386 case DMA_CCMD_DOMAIN_INVL:
1387 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1388 break;
1389 case DMA_CCMD_DEVICE_INVL:
1390 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1391 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1392 break;
1393 default:
1394 BUG();
1395 }
1396 val |= DMA_CCMD_ICC;
1397
1f5b3c3f 1398 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1399 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1400
1401 /* Make sure hardware complete it */
1402 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1403 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1404
1f5b3c3f 1405 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1406}
1407
ba395927 1408/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1409static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1410 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1411{
1412 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1413 u64 val = 0, val_iva = 0;
1414 unsigned long flag;
1415
ba395927
KA
1416 switch (type) {
1417 case DMA_TLB_GLOBAL_FLUSH:
1418 /* global flush doesn't need set IVA_REG */
1419 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1420 break;
1421 case DMA_TLB_DSI_FLUSH:
1422 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1423 break;
1424 case DMA_TLB_PSI_FLUSH:
1425 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1426 /* IH bit is passed in as part of address */
ba395927
KA
1427 val_iva = size_order | addr;
1428 break;
1429 default:
1430 BUG();
1431 }
1432 /* Note: set drain read/write */
1433#if 0
1434 /*
1435 * This is probably to be super secure.. Looks like we can
1436 * ignore it without any impact.
1437 */
1438 if (cap_read_drain(iommu->cap))
1439 val |= DMA_TLB_READ_DRAIN;
1440#endif
1441 if (cap_write_drain(iommu->cap))
1442 val |= DMA_TLB_WRITE_DRAIN;
1443
1f5b3c3f 1444 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1445 /* Note: Only uses first TLB reg currently */
1446 if (val_iva)
1447 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1448 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1449
1450 /* Make sure hardware complete it */
1451 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1452 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1453
1f5b3c3f 1454 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1455
1456 /* check IOTLB invalidation granularity */
1457 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1458 pr_err("Flush IOTLB failed\n");
ba395927 1459 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1460 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1461 (unsigned long long)DMA_TLB_IIRG(type),
1462 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1463}
1464
64ae892b
DW
1465static struct device_domain_info *
1466iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1467 u8 bus, u8 devfn)
93a23a72 1468{
93a23a72 1469 struct device_domain_info *info;
93a23a72 1470
55d94043
JR
1471 assert_spin_locked(&device_domain_lock);
1472
93a23a72
YZ
1473 if (!iommu->qi)
1474 return NULL;
1475
93a23a72 1476 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1477 if (info->iommu == iommu && info->bus == bus &&
1478 info->devfn == devfn) {
b16d0cb9
DW
1479 if (info->ats_supported && info->dev)
1480 return info;
93a23a72
YZ
1481 break;
1482 }
93a23a72 1483
b16d0cb9 1484 return NULL;
93a23a72
YZ
1485}
1486
0824c592
OP
1487static void domain_update_iotlb(struct dmar_domain *domain)
1488{
1489 struct device_domain_info *info;
1490 bool has_iotlb_device = false;
1491
1492 assert_spin_locked(&device_domain_lock);
1493
1494 list_for_each_entry(info, &domain->devices, link) {
1495 struct pci_dev *pdev;
1496
1497 if (!info->dev || !dev_is_pci(info->dev))
1498 continue;
1499
1500 pdev = to_pci_dev(info->dev);
1501 if (pdev->ats_enabled) {
1502 has_iotlb_device = true;
1503 break;
1504 }
1505 }
1506
1507 domain->has_iotlb_device = has_iotlb_device;
1508}
1509
93a23a72 1510static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1511{
fb0cc3aa
BH
1512 struct pci_dev *pdev;
1513
0824c592
OP
1514 assert_spin_locked(&device_domain_lock);
1515
0bcb3e28 1516 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1517 return;
1518
fb0cc3aa 1519 pdev = to_pci_dev(info->dev);
fb0cc3aa 1520
b16d0cb9
DW
1521#ifdef CONFIG_INTEL_IOMMU_SVM
1522 /* The PCIe spec, in its wisdom, declares that the behaviour of
1523 the device if you enable PASID support after ATS support is
1524 undefined. So always enable PASID support on devices which
1525 have it, even if we can't yet know if we're ever going to
1526 use it. */
1527 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1528 info->pasid_enabled = 1;
1529
1530 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1531 info->pri_enabled = 1;
1532#endif
1533 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1534 info->ats_enabled = 1;
0824c592 1535 domain_update_iotlb(info->domain);
b16d0cb9
DW
1536 info->ats_qdep = pci_ats_queue_depth(pdev);
1537 }
93a23a72
YZ
1538}
1539
1540static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1541{
b16d0cb9
DW
1542 struct pci_dev *pdev;
1543
0824c592
OP
1544 assert_spin_locked(&device_domain_lock);
1545
da972fb1 1546 if (!dev_is_pci(info->dev))
93a23a72
YZ
1547 return;
1548
b16d0cb9
DW
1549 pdev = to_pci_dev(info->dev);
1550
1551 if (info->ats_enabled) {
1552 pci_disable_ats(pdev);
1553 info->ats_enabled = 0;
0824c592 1554 domain_update_iotlb(info->domain);
b16d0cb9
DW
1555 }
1556#ifdef CONFIG_INTEL_IOMMU_SVM
1557 if (info->pri_enabled) {
1558 pci_disable_pri(pdev);
1559 info->pri_enabled = 0;
1560 }
1561 if (info->pasid_enabled) {
1562 pci_disable_pasid(pdev);
1563 info->pasid_enabled = 0;
1564 }
1565#endif
93a23a72
YZ
1566}
1567
1568static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1569 u64 addr, unsigned mask)
1570{
1571 u16 sid, qdep;
1572 unsigned long flags;
1573 struct device_domain_info *info;
1574
0824c592
OP
1575 if (!domain->has_iotlb_device)
1576 return;
1577
93a23a72
YZ
1578 spin_lock_irqsave(&device_domain_lock, flags);
1579 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1580 if (!info->ats_enabled)
93a23a72
YZ
1581 continue;
1582
1583 sid = info->bus << 8 | info->devfn;
b16d0cb9 1584 qdep = info->ats_qdep;
93a23a72
YZ
1585 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1586 }
1587 spin_unlock_irqrestore(&device_domain_lock, flags);
1588}
1589
a1ddcbe9
JR
1590static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1591 struct dmar_domain *domain,
1592 unsigned long pfn, unsigned int pages,
1593 int ih, int map)
ba395927 1594{
9dd2fe89 1595 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1596 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1597 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1598
ba395927
KA
1599 BUG_ON(pages == 0);
1600
ea8ea460
DW
1601 if (ih)
1602 ih = 1 << 6;
ba395927 1603 /*
9dd2fe89
YZ
1604 * Fallback to domain selective flush if no PSI support or the size is
1605 * too big.
ba395927
KA
1606 * PSI requires page size to be 2 ^ x, and the base address is naturally
1607 * aligned to the size
1608 */
9dd2fe89
YZ
1609 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1610 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1611 DMA_TLB_DSI_FLUSH);
9dd2fe89 1612 else
ea8ea460 1613 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1614 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1615
1616 /*
82653633
NA
1617 * In caching mode, changes of pages from non-present to present require
1618 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1619 */
82653633 1620 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1621 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1622 addr, mask);
ba395927
KA
1623}
1624
f8bab735 1625static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1626{
1627 u32 pmen;
1628 unsigned long flags;
1629
1f5b3c3f 1630 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1631 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1632 pmen &= ~DMA_PMEN_EPM;
1633 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1634
1635 /* wait for the protected region status bit to clear */
1636 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1637 readl, !(pmen & DMA_PMEN_PRS), pmen);
1638
1f5b3c3f 1639 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1640}
1641
2a41ccee 1642static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1643{
1644 u32 sts;
1645 unsigned long flags;
1646
1f5b3c3f 1647 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1648 iommu->gcmd |= DMA_GCMD_TE;
1649 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1650
1651 /* Make sure hardware complete it */
1652 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1653 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1654
1f5b3c3f 1655 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1656}
1657
2a41ccee 1658static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1659{
1660 u32 sts;
1661 unsigned long flag;
1662
1f5b3c3f 1663 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1664 iommu->gcmd &= ~DMA_GCMD_TE;
1665 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1666
1667 /* Make sure hardware complete it */
1668 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1669 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1670
1f5b3c3f 1671 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1672}
1673
3460a6d9 1674
ba395927
KA
1675static int iommu_init_domains(struct intel_iommu *iommu)
1676{
8bf47816
JR
1677 u32 ndomains, nlongs;
1678 size_t size;
ba395927
KA
1679
1680 ndomains = cap_ndoms(iommu->cap);
8bf47816 1681 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1682 iommu->name, ndomains);
ba395927
KA
1683 nlongs = BITS_TO_LONGS(ndomains);
1684
94a91b50
DD
1685 spin_lock_init(&iommu->lock);
1686
ba395927
KA
1687 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1688 if (!iommu->domain_ids) {
9f10e5bf
JR
1689 pr_err("%s: Allocating domain id array failed\n",
1690 iommu->name);
ba395927
KA
1691 return -ENOMEM;
1692 }
8bf47816 1693
86f004c7 1694 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1695 iommu->domains = kzalloc(size, GFP_KERNEL);
1696
1697 if (iommu->domains) {
1698 size = 256 * sizeof(struct dmar_domain *);
1699 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1700 }
1701
1702 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1703 pr_err("%s: Allocating domain array failed\n",
1704 iommu->name);
852bdb04 1705 kfree(iommu->domain_ids);
8bf47816 1706 kfree(iommu->domains);
852bdb04 1707 iommu->domain_ids = NULL;
8bf47816 1708 iommu->domains = NULL;
ba395927
KA
1709 return -ENOMEM;
1710 }
1711
8bf47816
JR
1712
1713
ba395927 1714 /*
c0e8a6c8
JR
1715 * If Caching mode is set, then invalid translations are tagged
1716 * with domain-id 0, hence we need to pre-allocate it. We also
1717 * use domain-id 0 as a marker for non-allocated domain-id, so
1718 * make sure it is not used for a real domain.
ba395927 1719 */
c0e8a6c8
JR
1720 set_bit(0, iommu->domain_ids);
1721
ba395927
KA
1722 return 0;
1723}
ba395927 1724
ffebeb46 1725static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1726{
29a27719 1727 struct device_domain_info *info, *tmp;
55d94043 1728 unsigned long flags;
ba395927 1729
29a27719
JR
1730 if (!iommu->domains || !iommu->domain_ids)
1731 return;
a4eaa86c 1732
bea64033 1733again:
55d94043 1734 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1735 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1736 struct dmar_domain *domain;
1737
1738 if (info->iommu != iommu)
1739 continue;
1740
1741 if (!info->dev || !info->domain)
1742 continue;
1743
1744 domain = info->domain;
1745
bea64033 1746 __dmar_remove_one_dev_info(info);
29a27719 1747
bea64033
JR
1748 if (!domain_type_is_vm_or_si(domain)) {
1749 /*
1750 * The domain_exit() function can't be called under
1751 * device_domain_lock, as it takes this lock itself.
1752 * So release the lock here and re-run the loop
1753 * afterwards.
1754 */
1755 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1756 domain_exit(domain);
bea64033
JR
1757 goto again;
1758 }
ba395927 1759 }
55d94043 1760 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1761
1762 if (iommu->gcmd & DMA_GCMD_TE)
1763 iommu_disable_translation(iommu);
ffebeb46 1764}
ba395927 1765
ffebeb46
JL
1766static void free_dmar_iommu(struct intel_iommu *iommu)
1767{
1768 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1769 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1770 int i;
1771
1772 for (i = 0; i < elems; i++)
1773 kfree(iommu->domains[i]);
ffebeb46
JL
1774 kfree(iommu->domains);
1775 kfree(iommu->domain_ids);
1776 iommu->domains = NULL;
1777 iommu->domain_ids = NULL;
1778 }
ba395927 1779
d9630fe9
WH
1780 g_iommus[iommu->seq_id] = NULL;
1781
ba395927
KA
1782 /* free context mapping */
1783 free_context_table(iommu);
8a94ade4
DW
1784
1785#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1786 if (pasid_enabled(iommu)) {
1787 if (ecap_prs(iommu->ecap))
1788 intel_svm_finish_prq(iommu);
8a94ade4 1789 intel_svm_free_pasid_tables(iommu);
a222a7f0 1790 }
8a94ade4 1791#endif
ba395927
KA
1792}
1793
ab8dfe25 1794static struct dmar_domain *alloc_domain(int flags)
ba395927 1795{
ba395927 1796 struct dmar_domain *domain;
ba395927
KA
1797
1798 domain = alloc_domain_mem();
1799 if (!domain)
1800 return NULL;
1801
ab8dfe25 1802 memset(domain, 0, sizeof(*domain));
4c923d47 1803 domain->nid = -1;
ab8dfe25 1804 domain->flags = flags;
0824c592 1805 domain->has_iotlb_device = false;
92d03cc8 1806 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1807
1808 return domain;
1809}
1810
d160aca5
JR
1811/* Must be called with iommu->lock */
1812static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1813 struct intel_iommu *iommu)
1814{
44bde614 1815 unsigned long ndomains;
55d94043 1816 int num;
44bde614 1817
55d94043 1818 assert_spin_locked(&device_domain_lock);
d160aca5 1819 assert_spin_locked(&iommu->lock);
ba395927 1820
29a27719
JR
1821 domain->iommu_refcnt[iommu->seq_id] += 1;
1822 domain->iommu_count += 1;
1823 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1824 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1825 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1826
1827 if (num >= ndomains) {
1828 pr_err("%s: No free domain ids\n", iommu->name);
1829 domain->iommu_refcnt[iommu->seq_id] -= 1;
1830 domain->iommu_count -= 1;
55d94043 1831 return -ENOSPC;
2c2e2c38 1832 }
ba395927 1833
d160aca5
JR
1834 set_bit(num, iommu->domain_ids);
1835 set_iommu_domain(iommu, num, domain);
1836
1837 domain->iommu_did[iommu->seq_id] = num;
1838 domain->nid = iommu->node;
fb170fb4 1839
fb170fb4
JL
1840 domain_update_iommu_cap(domain);
1841 }
d160aca5 1842
55d94043 1843 return 0;
fb170fb4
JL
1844}
1845
1846static int domain_detach_iommu(struct dmar_domain *domain,
1847 struct intel_iommu *iommu)
1848{
d160aca5 1849 int num, count = INT_MAX;
d160aca5 1850
55d94043 1851 assert_spin_locked(&device_domain_lock);
d160aca5 1852 assert_spin_locked(&iommu->lock);
fb170fb4 1853
29a27719
JR
1854 domain->iommu_refcnt[iommu->seq_id] -= 1;
1855 count = --domain->iommu_count;
1856 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1857 num = domain->iommu_did[iommu->seq_id];
1858 clear_bit(num, iommu->domain_ids);
1859 set_iommu_domain(iommu, num, NULL);
fb170fb4 1860
fb170fb4 1861 domain_update_iommu_cap(domain);
c0e8a6c8 1862 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1863 }
fb170fb4
JL
1864
1865 return count;
1866}
1867
ba395927 1868static struct iova_domain reserved_iova_list;
8a443df4 1869static struct lock_class_key reserved_rbtree_key;
ba395927 1870
51a63e67 1871static int dmar_init_reserved_ranges(void)
ba395927
KA
1872{
1873 struct pci_dev *pdev = NULL;
1874 struct iova *iova;
1875 int i;
ba395927 1876
0fb5fe87
RM
1877 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1878 DMA_32BIT_PFN);
ba395927 1879
8a443df4
MG
1880 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1881 &reserved_rbtree_key);
1882
ba395927
KA
1883 /* IOAPIC ranges shouldn't be accessed by DMA */
1884 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1885 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1886 if (!iova) {
9f10e5bf 1887 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1888 return -ENODEV;
1889 }
ba395927
KA
1890
1891 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1892 for_each_pci_dev(pdev) {
1893 struct resource *r;
1894
1895 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1896 r = &pdev->resource[i];
1897 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1898 continue;
1a4a4551
DW
1899 iova = reserve_iova(&reserved_iova_list,
1900 IOVA_PFN(r->start),
1901 IOVA_PFN(r->end));
51a63e67 1902 if (!iova) {
9f10e5bf 1903 pr_err("Reserve iova failed\n");
51a63e67
JC
1904 return -ENODEV;
1905 }
ba395927
KA
1906 }
1907 }
51a63e67 1908 return 0;
ba395927
KA
1909}
1910
1911static void domain_reserve_special_ranges(struct dmar_domain *domain)
1912{
1913 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1914}
1915
1916static inline int guestwidth_to_adjustwidth(int gaw)
1917{
1918 int agaw;
1919 int r = (gaw - 12) % 9;
1920
1921 if (r == 0)
1922 agaw = gaw;
1923 else
1924 agaw = gaw + 9 - r;
1925 if (agaw > 64)
1926 agaw = 64;
1927 return agaw;
1928}
1929
dc534b25
JR
1930static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1931 int guest_width)
ba395927 1932{
ba395927
KA
1933 int adjust_width, agaw;
1934 unsigned long sagaw;
1935
0fb5fe87
RM
1936 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1937 DMA_32BIT_PFN);
ba395927
KA
1938 domain_reserve_special_ranges(domain);
1939
1940 /* calculate AGAW */
ba395927
KA
1941 if (guest_width > cap_mgaw(iommu->cap))
1942 guest_width = cap_mgaw(iommu->cap);
1943 domain->gaw = guest_width;
1944 adjust_width = guestwidth_to_adjustwidth(guest_width);
1945 agaw = width_to_agaw(adjust_width);
1946 sagaw = cap_sagaw(iommu->cap);
1947 if (!test_bit(agaw, &sagaw)) {
1948 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1949 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1950 agaw = find_next_bit(&sagaw, 5, agaw);
1951 if (agaw >= 5)
1952 return -ENODEV;
1953 }
1954 domain->agaw = agaw;
ba395927 1955
8e604097
WH
1956 if (ecap_coherent(iommu->ecap))
1957 domain->iommu_coherency = 1;
1958 else
1959 domain->iommu_coherency = 0;
1960
58c610bd
SY
1961 if (ecap_sc_support(iommu->ecap))
1962 domain->iommu_snooping = 1;
1963 else
1964 domain->iommu_snooping = 0;
1965
214e39aa
DW
1966 if (intel_iommu_superpage)
1967 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1968 else
1969 domain->iommu_superpage = 0;
1970
4c923d47 1971 domain->nid = iommu->node;
c7151a8d 1972
ba395927 1973 /* always allocate the top pgd */
4c923d47 1974 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1975 if (!domain->pgd)
1976 return -ENOMEM;
5b6985ce 1977 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1978 return 0;
1979}
1980
1981static void domain_exit(struct dmar_domain *domain)
1982{
ea8ea460 1983 struct page *freelist = NULL;
ba395927
KA
1984
1985 /* Domain 0 is reserved, so dont process it */
1986 if (!domain)
1987 return;
1988
7b668357 1989 /* Flush any lazy unmaps that may reference this domain */
aa473240
OP
1990 if (!intel_iommu_strict) {
1991 int cpu;
1992
1993 for_each_possible_cpu(cpu)
1994 flush_unmaps_timeout(cpu);
1995 }
7b668357 1996
d160aca5
JR
1997 /* Remove associated devices and clear attached or cached domains */
1998 rcu_read_lock();
ba395927 1999 domain_remove_dev_info(domain);
d160aca5 2000 rcu_read_unlock();
92d03cc8 2001
ba395927
KA
2002 /* destroy iovas */
2003 put_iova_domain(&domain->iovad);
ba395927 2004
ea8ea460 2005 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 2006
ea8ea460
DW
2007 dma_free_pagelist(freelist);
2008
ba395927
KA
2009 free_domain_mem(domain);
2010}
2011
64ae892b
DW
2012static int domain_context_mapping_one(struct dmar_domain *domain,
2013 struct intel_iommu *iommu,
28ccce0d 2014 u8 bus, u8 devfn)
ba395927 2015{
c6c2cebd 2016 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
2017 int translation = CONTEXT_TT_MULTI_LEVEL;
2018 struct device_domain_info *info = NULL;
ba395927 2019 struct context_entry *context;
ba395927 2020 unsigned long flags;
ea6606b0 2021 struct dma_pte *pgd;
55d94043 2022 int ret, agaw;
28ccce0d 2023
c6c2cebd
JR
2024 WARN_ON(did == 0);
2025
28ccce0d
JR
2026 if (hw_pass_through && domain_type_is_si(domain))
2027 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
2028
2029 pr_debug("Set context mapping for %02x:%02x.%d\n",
2030 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2031
ba395927 2032 BUG_ON(!domain->pgd);
5331fe6f 2033
55d94043
JR
2034 spin_lock_irqsave(&device_domain_lock, flags);
2035 spin_lock(&iommu->lock);
2036
2037 ret = -ENOMEM;
03ecc32c 2038 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2039 if (!context)
55d94043 2040 goto out_unlock;
ba395927 2041
55d94043
JR
2042 ret = 0;
2043 if (context_present(context))
2044 goto out_unlock;
cf484d0e 2045
aec0e861
XP
2046 /*
2047 * For kdump cases, old valid entries may be cached due to the
2048 * in-flight DMA and copied pgtable, but there is no unmapping
2049 * behaviour for them, thus we need an explicit cache flush for
2050 * the newly-mapped device. For kdump, at this point, the device
2051 * is supposed to finish reset at its driver probe stage, so no
2052 * in-flight DMA will exist, and we don't need to worry anymore
2053 * hereafter.
2054 */
2055 if (context_copied(context)) {
2056 u16 did_old = context_domain_id(context);
2057
2058 if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
2059 iommu->flush.flush_context(iommu, did_old,
2060 (((u16)bus) << 8) | devfn,
2061 DMA_CCMD_MASK_NOBIT,
2062 DMA_CCMD_DEVICE_INVL);
2063 }
2064
ea6606b0
WH
2065 pgd = domain->pgd;
2066
de24e553 2067 context_clear_entry(context);
c6c2cebd 2068 context_set_domain_id(context, did);
ea6606b0 2069
de24e553
JR
2070 /*
2071 * Skip top levels of page tables for iommu which has less agaw
2072 * than default. Unnecessary for PT mode.
2073 */
93a23a72 2074 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 2075 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 2076 ret = -ENOMEM;
de24e553 2077 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
2078 if (!dma_pte_present(pgd))
2079 goto out_unlock;
ea6606b0 2080 }
4ed0d3e6 2081
64ae892b 2082 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
2083 if (info && info->ats_supported)
2084 translation = CONTEXT_TT_DEV_IOTLB;
2085 else
2086 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 2087
93a23a72
YZ
2088 context_set_address_root(context, virt_to_phys(pgd));
2089 context_set_address_width(context, iommu->agaw);
de24e553
JR
2090 } else {
2091 /*
2092 * In pass through mode, AW must be programmed to
2093 * indicate the largest AGAW value supported by
2094 * hardware. And ASR is ignored by hardware.
2095 */
2096 context_set_address_width(context, iommu->msagaw);
93a23a72 2097 }
4ed0d3e6
FY
2098
2099 context_set_translation_type(context, translation);
c07e7d21
MM
2100 context_set_fault_enable(context);
2101 context_set_present(context);
5331fe6f 2102 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2103
4c25a2c1
DW
2104 /*
2105 * It's a non-present to present mapping. If hardware doesn't cache
2106 * non-present entry we only need to flush the write-buffer. If the
2107 * _does_ cache non-present entries, then it does so in the special
2108 * domain #0, which we have to flush:
2109 */
2110 if (cap_caching_mode(iommu->cap)) {
2111 iommu->flush.flush_context(iommu, 0,
2112 (((u16)bus) << 8) | devfn,
2113 DMA_CCMD_MASK_NOBIT,
2114 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2115 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2116 } else {
ba395927 2117 iommu_flush_write_buffer(iommu);
4c25a2c1 2118 }
93a23a72 2119 iommu_enable_dev_iotlb(info);
c7151a8d 2120
55d94043
JR
2121 ret = 0;
2122
2123out_unlock:
2124 spin_unlock(&iommu->lock);
2125 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2126
5c365d18 2127 return ret;
ba395927
KA
2128}
2129
579305f7
AW
2130struct domain_context_mapping_data {
2131 struct dmar_domain *domain;
2132 struct intel_iommu *iommu;
579305f7
AW
2133};
2134
2135static int domain_context_mapping_cb(struct pci_dev *pdev,
2136 u16 alias, void *opaque)
2137{
2138 struct domain_context_mapping_data *data = opaque;
2139
2140 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2141 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2142}
2143
ba395927 2144static int
28ccce0d 2145domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2146{
64ae892b 2147 struct intel_iommu *iommu;
156baca8 2148 u8 bus, devfn;
579305f7 2149 struct domain_context_mapping_data data;
64ae892b 2150
e1f167f3 2151 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2152 if (!iommu)
2153 return -ENODEV;
ba395927 2154
579305f7 2155 if (!dev_is_pci(dev))
28ccce0d 2156 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2157
2158 data.domain = domain;
2159 data.iommu = iommu;
579305f7
AW
2160
2161 return pci_for_each_dma_alias(to_pci_dev(dev),
2162 &domain_context_mapping_cb, &data);
2163}
2164
2165static int domain_context_mapped_cb(struct pci_dev *pdev,
2166 u16 alias, void *opaque)
2167{
2168 struct intel_iommu *iommu = opaque;
2169
2170 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2171}
2172
e1f167f3 2173static int domain_context_mapped(struct device *dev)
ba395927 2174{
5331fe6f 2175 struct intel_iommu *iommu;
156baca8 2176 u8 bus, devfn;
5331fe6f 2177
e1f167f3 2178 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2179 if (!iommu)
2180 return -ENODEV;
ba395927 2181
579305f7
AW
2182 if (!dev_is_pci(dev))
2183 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2184
579305f7
AW
2185 return !pci_for_each_dma_alias(to_pci_dev(dev),
2186 domain_context_mapped_cb, iommu);
ba395927
KA
2187}
2188
f532959b
FY
2189/* Returns a number of VTD pages, but aligned to MM page size */
2190static inline unsigned long aligned_nrpages(unsigned long host_addr,
2191 size_t size)
2192{
2193 host_addr &= ~PAGE_MASK;
2194 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2195}
2196
6dd9a7c7
YS
2197/* Return largest possible superpage level for a given mapping */
2198static inline int hardware_largepage_caps(struct dmar_domain *domain,
2199 unsigned long iov_pfn,
2200 unsigned long phy_pfn,
2201 unsigned long pages)
2202{
2203 int support, level = 1;
2204 unsigned long pfnmerge;
2205
2206 support = domain->iommu_superpage;
2207
2208 /* To use a large page, the virtual *and* physical addresses
2209 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2210 of them will mean we have to use smaller pages. So just
2211 merge them and check both at once. */
2212 pfnmerge = iov_pfn | phy_pfn;
2213
2214 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2215 pages >>= VTD_STRIDE_SHIFT;
2216 if (!pages)
2217 break;
2218 pfnmerge >>= VTD_STRIDE_SHIFT;
2219 level++;
2220 support--;
2221 }
2222 return level;
2223}
2224
9051aa02
DW
2225static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2226 struct scatterlist *sg, unsigned long phys_pfn,
2227 unsigned long nr_pages, int prot)
e1605495
DW
2228{
2229 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2230 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2231 unsigned long sg_res = 0;
6dd9a7c7
YS
2232 unsigned int largepage_lvl = 0;
2233 unsigned long lvl_pages = 0;
e1605495 2234
162d1b10 2235 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2236
2237 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2238 return -EINVAL;
2239
2240 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2241
cc4f14aa
JL
2242 if (!sg) {
2243 sg_res = nr_pages;
9051aa02
DW
2244 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2245 }
2246
6dd9a7c7 2247 while (nr_pages > 0) {
c85994e4
DW
2248 uint64_t tmp;
2249
e1605495 2250 if (!sg_res) {
f532959b 2251 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2252 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2253 sg->dma_length = sg->length;
3e6110fd 2254 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2255 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2256 }
6dd9a7c7 2257
e1605495 2258 if (!pte) {
6dd9a7c7
YS
2259 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2260
5cf0a76f 2261 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2262 if (!pte)
2263 return -ENOMEM;
6dd9a7c7 2264 /* It is large page*/
6491d4d0 2265 if (largepage_lvl > 1) {
ba2374fd
CZ
2266 unsigned long nr_superpages, end_pfn;
2267
6dd9a7c7 2268 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2269 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2270
2271 nr_superpages = sg_res / lvl_pages;
2272 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2273
d41a4adb
JL
2274 /*
2275 * Ensure that old small page tables are
ba2374fd 2276 * removed to make room for superpage(s).
d41a4adb 2277 */
ba2374fd 2278 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
6491d4d0 2279 } else {
6dd9a7c7 2280 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2281 }
6dd9a7c7 2282
e1605495
DW
2283 }
2284 /* We don't need lock here, nobody else
2285 * touches the iova range
2286 */
7766a3fb 2287 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2288 if (tmp) {
1bf20f0d 2289 static int dumps = 5;
9f10e5bf
JR
2290 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2291 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2292 if (dumps) {
2293 dumps--;
2294 debug_dma_dump_mappings(NULL);
2295 }
2296 WARN_ON(1);
2297 }
6dd9a7c7
YS
2298
2299 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2300
2301 BUG_ON(nr_pages < lvl_pages);
2302 BUG_ON(sg_res < lvl_pages);
2303
2304 nr_pages -= lvl_pages;
2305 iov_pfn += lvl_pages;
2306 phys_pfn += lvl_pages;
2307 pteval += lvl_pages * VTD_PAGE_SIZE;
2308 sg_res -= lvl_pages;
2309
2310 /* If the next PTE would be the first in a new page, then we
2311 need to flush the cache on the entries we've just written.
2312 And then we'll need to recalculate 'pte', so clear it and
2313 let it get set again in the if (!pte) block above.
2314
2315 If we're done (!nr_pages) we need to flush the cache too.
2316
2317 Also if we've been setting superpages, we may need to
2318 recalculate 'pte' and switch back to smaller pages for the
2319 end of the mapping, if the trailing size is not enough to
2320 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2321 pte++;
6dd9a7c7
YS
2322 if (!nr_pages || first_pte_in_page(pte) ||
2323 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2324 domain_flush_cache(domain, first_pte,
2325 (void *)pte - (void *)first_pte);
2326 pte = NULL;
2327 }
6dd9a7c7
YS
2328
2329 if (!sg_res && nr_pages)
e1605495
DW
2330 sg = sg_next(sg);
2331 }
2332 return 0;
2333}
2334
9051aa02
DW
2335static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2336 struct scatterlist *sg, unsigned long nr_pages,
2337 int prot)
ba395927 2338{
9051aa02
DW
2339 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2340}
6f6a00e4 2341
9051aa02
DW
2342static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2343 unsigned long phys_pfn, unsigned long nr_pages,
2344 int prot)
2345{
2346 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2347}
2348
2452d9db 2349static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2350{
c7151a8d
WH
2351 if (!iommu)
2352 return;
8c11e798
WH
2353
2354 clear_context_table(iommu, bus, devfn);
2355 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2356 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2357 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2358}
2359
109b9b04
DW
2360static inline void unlink_domain_info(struct device_domain_info *info)
2361{
2362 assert_spin_locked(&device_domain_lock);
2363 list_del(&info->link);
2364 list_del(&info->global);
2365 if (info->dev)
0bcb3e28 2366 info->dev->archdata.iommu = NULL;
109b9b04
DW
2367}
2368
ba395927
KA
2369static void domain_remove_dev_info(struct dmar_domain *domain)
2370{
3a74ca01 2371 struct device_domain_info *info, *tmp;
fb170fb4 2372 unsigned long flags;
ba395927
KA
2373
2374 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2375 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2376 __dmar_remove_one_dev_info(info);
ba395927
KA
2377 spin_unlock_irqrestore(&device_domain_lock, flags);
2378}
2379
2380/*
2381 * find_domain
1525a29a 2382 * Note: we use struct device->archdata.iommu stores the info
ba395927 2383 */
1525a29a 2384static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2385{
2386 struct device_domain_info *info;
2387
2388 /* No lock here, assumes no domain exit in normal case */
1525a29a 2389 info = dev->archdata.iommu;
ba395927
KA
2390 if (info)
2391 return info->domain;
2392 return NULL;
2393}
2394
5a8f40e8 2395static inline struct device_domain_info *
745f2586
JL
2396dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2397{
2398 struct device_domain_info *info;
2399
2400 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2401 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2402 info->devfn == devfn)
5a8f40e8 2403 return info;
745f2586
JL
2404
2405 return NULL;
2406}
2407
5db31569
JR
2408static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2409 int bus, int devfn,
2410 struct device *dev,
2411 struct dmar_domain *domain)
745f2586 2412{
5a8f40e8 2413 struct dmar_domain *found = NULL;
745f2586
JL
2414 struct device_domain_info *info;
2415 unsigned long flags;
d160aca5 2416 int ret;
745f2586
JL
2417
2418 info = alloc_devinfo_mem();
2419 if (!info)
b718cd3d 2420 return NULL;
745f2586 2421
745f2586
JL
2422 info->bus = bus;
2423 info->devfn = devfn;
b16d0cb9
DW
2424 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2425 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2426 info->ats_qdep = 0;
745f2586
JL
2427 info->dev = dev;
2428 info->domain = domain;
5a8f40e8 2429 info->iommu = iommu;
745f2586 2430
b16d0cb9
DW
2431 if (dev && dev_is_pci(dev)) {
2432 struct pci_dev *pdev = to_pci_dev(info->dev);
2433
2434 if (ecap_dev_iotlb_support(iommu->ecap) &&
2435 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2436 dmar_find_matched_atsr_unit(pdev))
2437 info->ats_supported = 1;
2438
2439 if (ecs_enabled(iommu)) {
2440 if (pasid_enabled(iommu)) {
2441 int features = pci_pasid_features(pdev);
2442 if (features >= 0)
2443 info->pasid_supported = features | 1;
2444 }
2445
2446 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2447 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2448 info->pri_supported = 1;
2449 }
2450 }
2451
745f2586
JL
2452 spin_lock_irqsave(&device_domain_lock, flags);
2453 if (dev)
0bcb3e28 2454 found = find_domain(dev);
f303e507
JR
2455
2456 if (!found) {
5a8f40e8 2457 struct device_domain_info *info2;
41e80dca 2458 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2459 if (info2) {
2460 found = info2->domain;
2461 info2->dev = dev;
2462 }
5a8f40e8 2463 }
f303e507 2464
745f2586
JL
2465 if (found) {
2466 spin_unlock_irqrestore(&device_domain_lock, flags);
2467 free_devinfo_mem(info);
b718cd3d
DW
2468 /* Caller must free the original domain */
2469 return found;
745f2586
JL
2470 }
2471
d160aca5
JR
2472 spin_lock(&iommu->lock);
2473 ret = domain_attach_iommu(domain, iommu);
2474 spin_unlock(&iommu->lock);
2475
2476 if (ret) {
c6c2cebd 2477 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2478 free_devinfo_mem(info);
c6c2cebd
JR
2479 return NULL;
2480 }
c6c2cebd 2481
b718cd3d
DW
2482 list_add(&info->link, &domain->devices);
2483 list_add(&info->global, &device_domain_list);
2484 if (dev)
2485 dev->archdata.iommu = info;
2486 spin_unlock_irqrestore(&device_domain_lock, flags);
2487
cc4e2575
JR
2488 if (dev && domain_context_mapping(domain, dev)) {
2489 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2490 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2491 return NULL;
2492 }
2493
b718cd3d 2494 return domain;
745f2586
JL
2495}
2496
579305f7
AW
2497static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2498{
2499 *(u16 *)opaque = alias;
2500 return 0;
2501}
2502
76208356 2503static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2504{
cc4e2575 2505 struct device_domain_info *info = NULL;
76208356 2506 struct dmar_domain *domain = NULL;
579305f7 2507 struct intel_iommu *iommu;
08a7f456 2508 u16 req_id, dma_alias;
ba395927 2509 unsigned long flags;
aa4d066a 2510 u8 bus, devfn;
ba395927 2511
579305f7
AW
2512 iommu = device_to_iommu(dev, &bus, &devfn);
2513 if (!iommu)
2514 return NULL;
2515
08a7f456
JR
2516 req_id = ((u16)bus << 8) | devfn;
2517
146922ec
DW
2518 if (dev_is_pci(dev)) {
2519 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2520
579305f7
AW
2521 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2522
2523 spin_lock_irqsave(&device_domain_lock, flags);
2524 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2525 PCI_BUS_NUM(dma_alias),
2526 dma_alias & 0xff);
2527 if (info) {
2528 iommu = info->iommu;
2529 domain = info->domain;
5a8f40e8 2530 }
579305f7 2531 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2532
76208356 2533 /* DMA alias already has a domain, use it */
579305f7 2534 if (info)
76208356 2535 goto out;
579305f7 2536 }
ba395927 2537
146922ec 2538 /* Allocate and initialize new domain for the device */
ab8dfe25 2539 domain = alloc_domain(0);
745f2586 2540 if (!domain)
579305f7 2541 return NULL;
dc534b25 2542 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2543 domain_exit(domain);
2544 return NULL;
2c2e2c38 2545 }
ba395927 2546
76208356 2547out:
579305f7 2548
76208356
JR
2549 return domain;
2550}
579305f7 2551
76208356
JR
2552static struct dmar_domain *set_domain_for_dev(struct device *dev,
2553 struct dmar_domain *domain)
2554{
2555 struct intel_iommu *iommu;
2556 struct dmar_domain *tmp;
2557 u16 req_id, dma_alias;
2558 u8 bus, devfn;
2559
2560 iommu = device_to_iommu(dev, &bus, &devfn);
2561 if (!iommu)
2562 return NULL;
2563
2564 req_id = ((u16)bus << 8) | devfn;
2565
2566 if (dev_is_pci(dev)) {
2567 struct pci_dev *pdev = to_pci_dev(dev);
2568
2569 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2570
2571 /* register PCI DMA alias device */
2572 if (req_id != dma_alias) {
2573 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2574 dma_alias & 0xff, NULL, domain);
2575
2576 if (!tmp || tmp != domain)
2577 return tmp;
2578 }
ba395927
KA
2579 }
2580
5db31569 2581 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2582 if (!tmp || tmp != domain)
2583 return tmp;
2584
2585 return domain;
2586}
579305f7 2587
76208356
JR
2588static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2589{
2590 struct dmar_domain *domain, *tmp;
2591
2592 domain = find_domain(dev);
2593 if (domain)
2594 goto out;
2595
2596 domain = find_or_alloc_domain(dev, gaw);
2597 if (!domain)
2598 goto out;
2599
2600 tmp = set_domain_for_dev(dev, domain);
2601 if (!tmp || domain != tmp) {
579305f7
AW
2602 domain_exit(domain);
2603 domain = tmp;
2604 }
b718cd3d 2605
76208356
JR
2606out:
2607
b718cd3d 2608 return domain;
ba395927
KA
2609}
2610
b213203e
DW
2611static int iommu_domain_identity_map(struct dmar_domain *domain,
2612 unsigned long long start,
2613 unsigned long long end)
ba395927 2614{
c5395d5c
DW
2615 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2616 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2617
2618 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2619 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2620 pr_err("Reserving iova failed\n");
b213203e 2621 return -ENOMEM;
ba395927
KA
2622 }
2623
af1089ce 2624 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2625 /*
2626 * RMRR range might have overlap with physical memory range,
2627 * clear it first
2628 */
c5395d5c 2629 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2630
c5395d5c
DW
2631 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2632 last_vpfn - first_vpfn + 1,
61df7443 2633 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2634}
2635
d66ce54b
JR
2636static int domain_prepare_identity_map(struct device *dev,
2637 struct dmar_domain *domain,
2638 unsigned long long start,
2639 unsigned long long end)
b213203e 2640{
19943b0e
DW
2641 /* For _hardware_ passthrough, don't bother. But for software
2642 passthrough, we do it anyway -- it may indicate a memory
2643 range which is reserved in E820, so which didn't get set
2644 up to start with in si_domain */
2645 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2646 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2647 dev_name(dev), start, end);
19943b0e
DW
2648 return 0;
2649 }
2650
9f10e5bf
JR
2651 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2652 dev_name(dev), start, end);
2653
5595b528
DW
2654 if (end < start) {
2655 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2656 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2657 dmi_get_system_info(DMI_BIOS_VENDOR),
2658 dmi_get_system_info(DMI_BIOS_VERSION),
2659 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2660 return -EIO;
5595b528
DW
2661 }
2662
2ff729f5
DW
2663 if (end >> agaw_to_width(domain->agaw)) {
2664 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2665 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2666 agaw_to_width(domain->agaw),
2667 dmi_get_system_info(DMI_BIOS_VENDOR),
2668 dmi_get_system_info(DMI_BIOS_VERSION),
2669 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2670 return -EIO;
2ff729f5 2671 }
19943b0e 2672
d66ce54b
JR
2673 return iommu_domain_identity_map(domain, start, end);
2674}
ba395927 2675
d66ce54b
JR
2676static int iommu_prepare_identity_map(struct device *dev,
2677 unsigned long long start,
2678 unsigned long long end)
2679{
2680 struct dmar_domain *domain;
2681 int ret;
2682
2683 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2684 if (!domain)
2685 return -ENOMEM;
2686
2687 ret = domain_prepare_identity_map(dev, domain, start, end);
2688 if (ret)
2689 domain_exit(domain);
b213203e 2690
ba395927 2691 return ret;
ba395927
KA
2692}
2693
2694static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2695 struct device *dev)
ba395927 2696{
0b9d9753 2697 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2698 return 0;
0b9d9753
DW
2699 return iommu_prepare_identity_map(dev, rmrr->base_address,
2700 rmrr->end_address);
ba395927
KA
2701}
2702
d3f13810 2703#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2704static inline void iommu_prepare_isa(void)
2705{
2706 struct pci_dev *pdev;
2707 int ret;
2708
2709 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2710 if (!pdev)
2711 return;
2712
9f10e5bf 2713 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2714 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2715
2716 if (ret)
9f10e5bf 2717 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2718
9b27e82d 2719 pci_dev_put(pdev);
49a0429e
KA
2720}
2721#else
2722static inline void iommu_prepare_isa(void)
2723{
2724 return;
2725}
d3f13810 2726#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2727
2c2e2c38 2728static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2729
071e1374 2730static int __init si_domain_init(int hw)
2c2e2c38 2731{
c7ab48d2 2732 int nid, ret = 0;
2c2e2c38 2733
ab8dfe25 2734 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2735 if (!si_domain)
2736 return -EFAULT;
2737
2c2e2c38
FY
2738 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2739 domain_exit(si_domain);
2740 return -EFAULT;
2741 }
2742
0dc79715 2743 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2744
19943b0e
DW
2745 if (hw)
2746 return 0;
2747
c7ab48d2 2748 for_each_online_node(nid) {
5dfe8660
TH
2749 unsigned long start_pfn, end_pfn;
2750 int i;
2751
2752 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2753 ret = iommu_domain_identity_map(si_domain,
2754 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2755 if (ret)
2756 return ret;
2757 }
c7ab48d2
DW
2758 }
2759
2c2e2c38
FY
2760 return 0;
2761}
2762
9b226624 2763static int identity_mapping(struct device *dev)
2c2e2c38
FY
2764{
2765 struct device_domain_info *info;
2766
2767 if (likely(!iommu_identity_mapping))
2768 return 0;
2769
9b226624 2770 info = dev->archdata.iommu;
cb452a40
MT
2771 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2772 return (info->domain == si_domain);
2c2e2c38 2773
2c2e2c38
FY
2774 return 0;
2775}
2776
28ccce0d 2777static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2778{
0ac72664 2779 struct dmar_domain *ndomain;
5a8f40e8 2780 struct intel_iommu *iommu;
156baca8 2781 u8 bus, devfn;
2c2e2c38 2782
5913c9bf 2783 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2784 if (!iommu)
2785 return -ENODEV;
2786
5db31569 2787 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2788 if (ndomain != domain)
2789 return -EBUSY;
2c2e2c38
FY
2790
2791 return 0;
2792}
2793
0b9d9753 2794static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2795{
2796 struct dmar_rmrr_unit *rmrr;
832bd858 2797 struct device *tmp;
ea2447f7
TM
2798 int i;
2799
0e242612 2800 rcu_read_lock();
ea2447f7 2801 for_each_rmrr_units(rmrr) {
b683b230
JL
2802 /*
2803 * Return TRUE if this RMRR contains the device that
2804 * is passed in.
2805 */
2806 for_each_active_dev_scope(rmrr->devices,
2807 rmrr->devices_cnt, i, tmp)
0b9d9753 2808 if (tmp == dev) {
0e242612 2809 rcu_read_unlock();
ea2447f7 2810 return true;
b683b230 2811 }
ea2447f7 2812 }
0e242612 2813 rcu_read_unlock();
ea2447f7
TM
2814 return false;
2815}
2816
c875d2c1
AW
2817/*
2818 * There are a couple cases where we need to restrict the functionality of
2819 * devices associated with RMRRs. The first is when evaluating a device for
2820 * identity mapping because problems exist when devices are moved in and out
2821 * of domains and their respective RMRR information is lost. This means that
2822 * a device with associated RMRRs will never be in a "passthrough" domain.
2823 * The second is use of the device through the IOMMU API. This interface
2824 * expects to have full control of the IOVA space for the device. We cannot
2825 * satisfy both the requirement that RMRR access is maintained and have an
2826 * unencumbered IOVA space. We also have no ability to quiesce the device's
2827 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2828 * We therefore prevent devices associated with an RMRR from participating in
2829 * the IOMMU API, which eliminates them from device assignment.
2830 *
2831 * In both cases we assume that PCI USB devices with RMRRs have them largely
2832 * for historical reasons and that the RMRR space is not actively used post
2833 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2834 *
2835 * The same exception is made for graphics devices, with the requirement that
2836 * any use of the RMRR regions will be torn down before assigning the device
2837 * to a guest.
c875d2c1
AW
2838 */
2839static bool device_is_rmrr_locked(struct device *dev)
2840{
2841 if (!device_has_rmrr(dev))
2842 return false;
2843
2844 if (dev_is_pci(dev)) {
2845 struct pci_dev *pdev = to_pci_dev(dev);
2846
18436afd 2847 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2848 return false;
2849 }
2850
2851 return true;
2852}
2853
3bdb2591 2854static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2855{
ea2447f7 2856
3bdb2591
DW
2857 if (dev_is_pci(dev)) {
2858 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2859
c875d2c1 2860 if (device_is_rmrr_locked(dev))
3bdb2591 2861 return 0;
e0fc7e0b 2862
3bdb2591
DW
2863 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2864 return 1;
e0fc7e0b 2865
3bdb2591
DW
2866 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2867 return 1;
6941af28 2868
3bdb2591 2869 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2870 return 0;
3bdb2591
DW
2871
2872 /*
2873 * We want to start off with all devices in the 1:1 domain, and
2874 * take them out later if we find they can't access all of memory.
2875 *
2876 * However, we can't do this for PCI devices behind bridges,
2877 * because all PCI devices behind the same bridge will end up
2878 * with the same source-id on their transactions.
2879 *
2880 * Practically speaking, we can't change things around for these
2881 * devices at run-time, because we can't be sure there'll be no
2882 * DMA transactions in flight for any of their siblings.
2883 *
2884 * So PCI devices (unless they're on the root bus) as well as
2885 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2886 * the 1:1 domain, just in _case_ one of their siblings turns out
2887 * not to be able to map all of memory.
2888 */
2889 if (!pci_is_pcie(pdev)) {
2890 if (!pci_is_root_bus(pdev->bus))
2891 return 0;
2892 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2893 return 0;
2894 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2895 return 0;
3bdb2591
DW
2896 } else {
2897 if (device_has_rmrr(dev))
2898 return 0;
2899 }
3dfc813d 2900
3bdb2591 2901 /*
3dfc813d 2902 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2903 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2904 * take them out of the 1:1 domain later.
2905 */
8fcc5372
CW
2906 if (!startup) {
2907 /*
2908 * If the device's dma_mask is less than the system's memory
2909 * size then this is not a candidate for identity mapping.
2910 */
3bdb2591 2911 u64 dma_mask = *dev->dma_mask;
8fcc5372 2912
3bdb2591
DW
2913 if (dev->coherent_dma_mask &&
2914 dev->coherent_dma_mask < dma_mask)
2915 dma_mask = dev->coherent_dma_mask;
8fcc5372 2916
3bdb2591 2917 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2918 }
6941af28
DW
2919
2920 return 1;
2921}
2922
cf04eee8
DW
2923static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2924{
2925 int ret;
2926
2927 if (!iommu_should_identity_map(dev, 1))
2928 return 0;
2929
28ccce0d 2930 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2931 if (!ret)
9f10e5bf
JR
2932 pr_info("%s identity mapping for device %s\n",
2933 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2934 else if (ret == -ENODEV)
2935 /* device not associated with an iommu */
2936 ret = 0;
2937
2938 return ret;
2939}
2940
2941
071e1374 2942static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2943{
2c2e2c38 2944 struct pci_dev *pdev = NULL;
cf04eee8
DW
2945 struct dmar_drhd_unit *drhd;
2946 struct intel_iommu *iommu;
2947 struct device *dev;
2948 int i;
2949 int ret = 0;
2c2e2c38 2950
2c2e2c38 2951 for_each_pci_dev(pdev) {
cf04eee8
DW
2952 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2953 if (ret)
2954 return ret;
2955 }
2956
2957 for_each_active_iommu(iommu, drhd)
2958 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2959 struct acpi_device_physical_node *pn;
2960 struct acpi_device *adev;
2961
2962 if (dev->bus != &acpi_bus_type)
2963 continue;
86080ccc 2964
cf04eee8
DW
2965 adev= to_acpi_device(dev);
2966 mutex_lock(&adev->physical_node_lock);
2967 list_for_each_entry(pn, &adev->physical_node_list, node) {
2968 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2969 if (ret)
2970 break;
eae460b6 2971 }
cf04eee8
DW
2972 mutex_unlock(&adev->physical_node_lock);
2973 if (ret)
2974 return ret;
62edf5dc 2975 }
2c2e2c38
FY
2976
2977 return 0;
2978}
2979
ffebeb46
JL
2980static void intel_iommu_init_qi(struct intel_iommu *iommu)
2981{
2982 /*
2983 * Start from the sane iommu hardware state.
2984 * If the queued invalidation is already initialized by us
2985 * (for example, while enabling interrupt-remapping) then
2986 * we got the things already rolling from a sane state.
2987 */
2988 if (!iommu->qi) {
2989 /*
2990 * Clear any previous faults.
2991 */
2992 dmar_fault(-1, iommu);
2993 /*
2994 * Disable queued invalidation if supported and already enabled
2995 * before OS handover.
2996 */
2997 dmar_disable_qi(iommu);
2998 }
2999
3000 if (dmar_enable_qi(iommu)) {
3001 /*
3002 * Queued Invalidate not enabled, use Register Based Invalidate
3003 */
3004 iommu->flush.flush_context = __iommu_flush_context;
3005 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3006 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3007 iommu->name);
3008 } else {
3009 iommu->flush.flush_context = qi_flush_context;
3010 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3011 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3012 }
3013}
3014
091d42e4 3015static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3016 struct root_entry *old_re,
091d42e4
JR
3017 struct context_entry **tbl,
3018 int bus, bool ext)
3019{
dbcd861f 3020 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3021 struct context_entry *new_ce = NULL, ce;
dfddb969 3022 struct context_entry *old_ce = NULL;
543c8dcf 3023 struct root_entry re;
091d42e4
JR
3024 phys_addr_t old_ce_phys;
3025
3026 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3027 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3028
3029 for (devfn = 0; devfn < 256; devfn++) {
3030 /* First calculate the correct index */
3031 idx = (ext ? devfn * 2 : devfn) % 256;
3032
3033 if (idx == 0) {
3034 /* First save what we may have and clean up */
3035 if (new_ce) {
3036 tbl[tbl_idx] = new_ce;
3037 __iommu_flush_cache(iommu, new_ce,
3038 VTD_PAGE_SIZE);
3039 pos = 1;
3040 }
3041
3042 if (old_ce)
3043 iounmap(old_ce);
3044
3045 ret = 0;
3046 if (devfn < 0x80)
543c8dcf 3047 old_ce_phys = root_entry_lctp(&re);
091d42e4 3048 else
543c8dcf 3049 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3050
3051 if (!old_ce_phys) {
3052 if (ext && devfn == 0) {
3053 /* No LCTP, try UCTP */
3054 devfn = 0x7f;
3055 continue;
3056 } else {
3057 goto out;
3058 }
3059 }
3060
3061 ret = -ENOMEM;
dfddb969
DW
3062 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3063 MEMREMAP_WB);
091d42e4
JR
3064 if (!old_ce)
3065 goto out;
3066
3067 new_ce = alloc_pgtable_page(iommu->node);
3068 if (!new_ce)
3069 goto out_unmap;
3070
3071 ret = 0;
3072 }
3073
3074 /* Now copy the context entry */
dfddb969 3075 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3076
cf484d0e 3077 if (!__context_present(&ce))
091d42e4
JR
3078 continue;
3079
dbcd861f
JR
3080 did = context_domain_id(&ce);
3081 if (did >= 0 && did < cap_ndoms(iommu->cap))
3082 set_bit(did, iommu->domain_ids);
3083
cf484d0e
JR
3084 /*
3085 * We need a marker for copied context entries. This
3086 * marker needs to work for the old format as well as
3087 * for extended context entries.
3088 *
3089 * Bit 67 of the context entry is used. In the old
3090 * format this bit is available to software, in the
3091 * extended format it is the PGE bit, but PGE is ignored
3092 * by HW if PASIDs are disabled (and thus still
3093 * available).
3094 *
3095 * So disable PASIDs first and then mark the entry
3096 * copied. This means that we don't copy PASID
3097 * translations from the old kernel, but this is fine as
3098 * faults there are not fatal.
3099 */
3100 context_clear_pasid_enable(&ce);
3101 context_set_copied(&ce);
3102
091d42e4
JR
3103 new_ce[idx] = ce;
3104 }
3105
3106 tbl[tbl_idx + pos] = new_ce;
3107
3108 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3109
3110out_unmap:
dfddb969 3111 memunmap(old_ce);
091d42e4
JR
3112
3113out:
3114 return ret;
3115}
3116
3117static int copy_translation_tables(struct intel_iommu *iommu)
3118{
3119 struct context_entry **ctxt_tbls;
dfddb969 3120 struct root_entry *old_rt;
091d42e4
JR
3121 phys_addr_t old_rt_phys;
3122 int ctxt_table_entries;
3123 unsigned long flags;
3124 u64 rtaddr_reg;
3125 int bus, ret;
c3361f2f 3126 bool new_ext, ext;
091d42e4
JR
3127
3128 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3129 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3130 new_ext = !!ecap_ecs(iommu->ecap);
3131
3132 /*
3133 * The RTT bit can only be changed when translation is disabled,
3134 * but disabling translation means to open a window for data
3135 * corruption. So bail out and don't copy anything if we would
3136 * have to change the bit.
3137 */
3138 if (new_ext != ext)
3139 return -EINVAL;
091d42e4
JR
3140
3141 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3142 if (!old_rt_phys)
3143 return -EINVAL;
3144
dfddb969 3145 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3146 if (!old_rt)
3147 return -ENOMEM;
3148
3149 /* This is too big for the stack - allocate it from slab */
3150 ctxt_table_entries = ext ? 512 : 256;
3151 ret = -ENOMEM;
3152 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3153 if (!ctxt_tbls)
3154 goto out_unmap;
3155
3156 for (bus = 0; bus < 256; bus++) {
3157 ret = copy_context_table(iommu, &old_rt[bus],
3158 ctxt_tbls, bus, ext);
3159 if (ret) {
3160 pr_err("%s: Failed to copy context table for bus %d\n",
3161 iommu->name, bus);
3162 continue;
3163 }
3164 }
3165
3166 spin_lock_irqsave(&iommu->lock, flags);
3167
3168 /* Context tables are copied, now write them to the root_entry table */
3169 for (bus = 0; bus < 256; bus++) {
3170 int idx = ext ? bus * 2 : bus;
3171 u64 val;
3172
3173 if (ctxt_tbls[idx]) {
3174 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3175 iommu->root_entry[bus].lo = val;
3176 }
3177
3178 if (!ext || !ctxt_tbls[idx + 1])
3179 continue;
3180
3181 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3182 iommu->root_entry[bus].hi = val;
3183 }
3184
3185 spin_unlock_irqrestore(&iommu->lock, flags);
3186
3187 kfree(ctxt_tbls);
3188
3189 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3190
3191 ret = 0;
3192
3193out_unmap:
dfddb969 3194 memunmap(old_rt);
091d42e4
JR
3195
3196 return ret;
3197}
3198
b779260b 3199static int __init init_dmars(void)
ba395927
KA
3200{
3201 struct dmar_drhd_unit *drhd;
3202 struct dmar_rmrr_unit *rmrr;
a87f4918 3203 bool copied_tables = false;
832bd858 3204 struct device *dev;
ba395927 3205 struct intel_iommu *iommu;
aa473240 3206 int i, ret, cpu;
2c2e2c38 3207
ba395927
KA
3208 /*
3209 * for each drhd
3210 * allocate root
3211 * initialize and program root entry to not present
3212 * endfor
3213 */
3214 for_each_drhd_unit(drhd) {
5e0d2a6f 3215 /*
3216 * lock not needed as this is only incremented in the single
3217 * threaded kernel __init code path all other access are read
3218 * only
3219 */
78d8e704 3220 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3221 g_num_of_iommus++;
3222 continue;
3223 }
9f10e5bf 3224 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3225 }
3226
ffebeb46
JL
3227 /* Preallocate enough resources for IOMMU hot-addition */
3228 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3229 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3230
d9630fe9
WH
3231 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3232 GFP_KERNEL);
3233 if (!g_iommus) {
9f10e5bf 3234 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3235 ret = -ENOMEM;
3236 goto error;
3237 }
3238
aa473240
OP
3239 for_each_possible_cpu(cpu) {
3240 struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
3241 cpu);
3242
3243 dfd->tables = kzalloc(g_num_of_iommus *
3244 sizeof(struct deferred_flush_table),
3245 GFP_KERNEL);
3246 if (!dfd->tables) {
3247 ret = -ENOMEM;
3248 goto free_g_iommus;
3249 }
3250
3251 spin_lock_init(&dfd->lock);
3252 setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
5e0d2a6f 3253 }
3254
7c919779 3255 for_each_active_iommu(iommu, drhd) {
d9630fe9 3256 g_iommus[iommu->seq_id] = iommu;
ba395927 3257
b63d80d1
JR
3258 intel_iommu_init_qi(iommu);
3259
e61d98d8
SS
3260 ret = iommu_init_domains(iommu);
3261 if (ret)
989d51fc 3262 goto free_iommu;
e61d98d8 3263
4158c2ec
JR
3264 init_translation_status(iommu);
3265
091d42e4
JR
3266 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3267 iommu_disable_translation(iommu);
3268 clear_translation_pre_enabled(iommu);
3269 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3270 iommu->name);
3271 }
4158c2ec 3272
ba395927
KA
3273 /*
3274 * TBD:
3275 * we could share the same root & context tables
25985edc 3276 * among all IOMMU's. Need to Split it later.
ba395927
KA
3277 */
3278 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3279 if (ret)
989d51fc 3280 goto free_iommu;
5f0a7f76 3281
091d42e4
JR
3282 if (translation_pre_enabled(iommu)) {
3283 pr_info("Translation already enabled - trying to copy translation structures\n");
3284
3285 ret = copy_translation_tables(iommu);
3286 if (ret) {
3287 /*
3288 * We found the IOMMU with translation
3289 * enabled - but failed to copy over the
3290 * old root-entry table. Try to proceed
3291 * by disabling translation now and
3292 * allocating a clean root-entry table.
3293 * This might cause DMAR faults, but
3294 * probably the dump will still succeed.
3295 */
3296 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3297 iommu->name);
3298 iommu_disable_translation(iommu);
3299 clear_translation_pre_enabled(iommu);
3300 } else {
3301 pr_info("Copied translation tables from previous kernel for %s\n",
3302 iommu->name);
a87f4918 3303 copied_tables = true;
091d42e4
JR
3304 }
3305 }
3306
4ed0d3e6 3307 if (!ecap_pass_through(iommu->ecap))
19943b0e 3308 hw_pass_through = 0;
8a94ade4
DW
3309#ifdef CONFIG_INTEL_IOMMU_SVM
3310 if (pasid_enabled(iommu))
3311 intel_svm_alloc_pasid_tables(iommu);
3312#endif
ba395927
KA
3313 }
3314
a4c34ff1
JR
3315 /*
3316 * Now that qi is enabled on all iommus, set the root entry and flush
3317 * caches. This is required on some Intel X58 chipsets, otherwise the
3318 * flush_context function will loop forever and the boot hangs.
3319 */
3320 for_each_active_iommu(iommu, drhd) {
3321 iommu_flush_write_buffer(iommu);
3322 iommu_set_root_entry(iommu);
3323 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3324 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3325 }
3326
19943b0e 3327 if (iommu_pass_through)
e0fc7e0b
DW
3328 iommu_identity_mapping |= IDENTMAP_ALL;
3329
d3f13810 3330#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3331 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3332#endif
e0fc7e0b 3333
21e722c4
AR
3334 check_tylersburg_isoch();
3335
86080ccc
JR
3336 if (iommu_identity_mapping) {
3337 ret = si_domain_init(hw_pass_through);
3338 if (ret)
3339 goto free_iommu;
3340 }
3341
e0fc7e0b 3342
a87f4918
JR
3343 /*
3344 * If we copied translations from a previous kernel in the kdump
3345 * case, we can not assign the devices to domains now, as that
3346 * would eliminate the old mappings. So skip this part and defer
3347 * the assignment to device driver initialization time.
3348 */
3349 if (copied_tables)
3350 goto domains_done;
3351
ba395927 3352 /*
19943b0e
DW
3353 * If pass through is not set or not enabled, setup context entries for
3354 * identity mappings for rmrr, gfx, and isa and may fall back to static
3355 * identity mapping if iommu_identity_mapping is set.
ba395927 3356 */
19943b0e
DW
3357 if (iommu_identity_mapping) {
3358 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3359 if (ret) {
9f10e5bf 3360 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3361 goto free_iommu;
ba395927
KA
3362 }
3363 }
ba395927 3364 /*
19943b0e
DW
3365 * For each rmrr
3366 * for each dev attached to rmrr
3367 * do
3368 * locate drhd for dev, alloc domain for dev
3369 * allocate free domain
3370 * allocate page table entries for rmrr
3371 * if context not allocated for bus
3372 * allocate and init context
3373 * set present in root table for this bus
3374 * init context with domain, translation etc
3375 * endfor
3376 * endfor
ba395927 3377 */
9f10e5bf 3378 pr_info("Setting RMRR:\n");
19943b0e 3379 for_each_rmrr_units(rmrr) {
b683b230
JL
3380 /* some BIOS lists non-exist devices in DMAR table. */
3381 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3382 i, dev) {
0b9d9753 3383 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3384 if (ret)
9f10e5bf 3385 pr_err("Mapping reserved region failed\n");
ba395927 3386 }
4ed0d3e6 3387 }
49a0429e 3388
19943b0e
DW
3389 iommu_prepare_isa();
3390
a87f4918
JR
3391domains_done:
3392
ba395927
KA
3393 /*
3394 * for each drhd
3395 * enable fault log
3396 * global invalidate context cache
3397 * global invalidate iotlb
3398 * enable translation
3399 */
7c919779 3400 for_each_iommu(iommu, drhd) {
51a63e67
JC
3401 if (drhd->ignored) {
3402 /*
3403 * we always have to disable PMRs or DMA may fail on
3404 * this device
3405 */
3406 if (force_on)
7c919779 3407 iommu_disable_protect_mem_regions(iommu);
ba395927 3408 continue;
51a63e67 3409 }
ba395927
KA
3410
3411 iommu_flush_write_buffer(iommu);
3412
a222a7f0
DW
3413#ifdef CONFIG_INTEL_IOMMU_SVM
3414 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3415 ret = intel_svm_enable_prq(iommu);
3416 if (ret)
3417 goto free_iommu;
3418 }
3419#endif
3460a6d9
KA
3420 ret = dmar_set_interrupt(iommu);
3421 if (ret)
989d51fc 3422 goto free_iommu;
3460a6d9 3423
8939ddf6
JR
3424 if (!translation_pre_enabled(iommu))
3425 iommu_enable_translation(iommu);
3426
b94996c9 3427 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3428 }
3429
3430 return 0;
989d51fc
JL
3431
3432free_iommu:
ffebeb46
JL
3433 for_each_active_iommu(iommu, drhd) {
3434 disable_dmar_iommu(iommu);
a868e6b7 3435 free_dmar_iommu(iommu);
ffebeb46 3436 }
989d51fc 3437free_g_iommus:
aa473240
OP
3438 for_each_possible_cpu(cpu)
3439 kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
d9630fe9 3440 kfree(g_iommus);
989d51fc 3441error:
ba395927
KA
3442 return ret;
3443}
3444
5a5e02a6 3445/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3446static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3447 struct dmar_domain *domain,
3448 unsigned long nrpages, uint64_t dma_mask)
ba395927 3449{
22e2f9fa 3450 unsigned long iova_pfn = 0;
ba395927 3451
875764de
DW
3452 /* Restrict dma_mask to the width that the iommu can handle */
3453 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3454 /* Ensure we reserve the whole size-aligned region */
3455 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3456
3457 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3458 /*
3459 * First try to allocate an io virtual address in
284901a9 3460 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3461 * from higher range
ba395927 3462 */
22e2f9fa
OP
3463 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3464 IOVA_PFN(DMA_BIT_MASK(32)));
3465 if (iova_pfn)
3466 return iova_pfn;
875764de 3467 }
22e2f9fa
OP
3468 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask));
3469 if (unlikely(!iova_pfn)) {
9f10e5bf 3470 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3471 nrpages, dev_name(dev));
2aac6304 3472 return 0;
f76aec76
KA
3473 }
3474
22e2f9fa 3475 return iova_pfn;
f76aec76
KA
3476}
3477
d4b709f4 3478static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76 3479{
1c5ebba9 3480 struct dmar_domain *domain, *tmp;
b1ce5b79 3481 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3482 struct device *i_dev;
3483 int i, ret;
f76aec76 3484
1c5ebba9
JR
3485 domain = find_domain(dev);
3486 if (domain)
3487 goto out;
3488
3489 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3490 if (!domain)
3491 goto out;
ba395927 3492
b1ce5b79
JR
3493 /* We have a new domain - setup possible RMRRs for the device */
3494 rcu_read_lock();
3495 for_each_rmrr_units(rmrr) {
3496 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3497 i, i_dev) {
3498 if (i_dev != dev)
3499 continue;
3500
3501 ret = domain_prepare_identity_map(dev, domain,
3502 rmrr->base_address,
3503 rmrr->end_address);
3504 if (ret)
3505 dev_err(dev, "Mapping reserved region failed\n");
3506 }
3507 }
3508 rcu_read_unlock();
3509
1c5ebba9
JR
3510 tmp = set_domain_for_dev(dev, domain);
3511 if (!tmp || domain != tmp) {
3512 domain_exit(domain);
3513 domain = tmp;
3514 }
3515
3516out:
3517
3518 if (!domain)
3519 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3520
3521
f76aec76
KA
3522 return domain;
3523}
3524
d4b709f4 3525static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
3526{
3527 struct device_domain_info *info;
3528
3529 /* No lock here, assumes no domain exit in normal case */
d4b709f4 3530 info = dev->archdata.iommu;
147202aa
DW
3531 if (likely(info))
3532 return info->domain;
3533
3534 return __get_valid_domain_for_dev(dev);
3535}
3536
ecb509ec 3537/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3538static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3539{
3540 int found;
3541
3d89194a 3542 if (iommu_dummy(dev))
1e4c64c4
DW
3543 return 1;
3544
2c2e2c38 3545 if (!iommu_identity_mapping)
1e4c64c4 3546 return 0;
2c2e2c38 3547
9b226624 3548 found = identity_mapping(dev);
2c2e2c38 3549 if (found) {
ecb509ec 3550 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3551 return 1;
3552 else {
3553 /*
3554 * 32 bit DMA is removed from si_domain and fall back
3555 * to non-identity mapping.
3556 */
e6de0f8d 3557 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3558 pr_info("32bit %s uses non-identity mapping\n",
3559 dev_name(dev));
2c2e2c38
FY
3560 return 0;
3561 }
3562 } else {
3563 /*
3564 * In case of a detached 64 bit DMA device from vm, the device
3565 * is put into si_domain for identity mapping.
3566 */
ecb509ec 3567 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3568 int ret;
28ccce0d 3569 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3570 if (!ret) {
9f10e5bf
JR
3571 pr_info("64bit %s uses identity mapping\n",
3572 dev_name(dev));
2c2e2c38
FY
3573 return 1;
3574 }
3575 }
3576 }
3577
1e4c64c4 3578 return 0;
2c2e2c38
FY
3579}
3580
5040a918 3581static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3582 size_t size, int dir, u64 dma_mask)
f76aec76 3583{
f76aec76 3584 struct dmar_domain *domain;
5b6985ce 3585 phys_addr_t start_paddr;
2aac6304 3586 unsigned long iova_pfn;
f76aec76 3587 int prot = 0;
6865f0d1 3588 int ret;
8c11e798 3589 struct intel_iommu *iommu;
33041ec0 3590 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3591
3592 BUG_ON(dir == DMA_NONE);
2c2e2c38 3593
5040a918 3594 if (iommu_no_mapping(dev))
6865f0d1 3595 return paddr;
f76aec76 3596
5040a918 3597 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3598 if (!domain)
3599 return 0;
3600
8c11e798 3601 iommu = domain_get_iommu(domain);
88cb6a74 3602 size = aligned_nrpages(paddr, size);
f76aec76 3603
2aac6304
OP
3604 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3605 if (!iova_pfn)
f76aec76
KA
3606 goto error;
3607
ba395927
KA
3608 /*
3609 * Check if DMAR supports zero-length reads on write only
3610 * mappings..
3611 */
3612 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3613 !cap_zlr(iommu->cap))
ba395927
KA
3614 prot |= DMA_PTE_READ;
3615 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3616 prot |= DMA_PTE_WRITE;
3617 /*
6865f0d1 3618 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3619 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3620 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3621 * is not a big problem
3622 */
2aac6304 3623 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3624 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3625 if (ret)
3626 goto error;
3627
1f0ef2aa
DW
3628 /* it's a non-present to present mapping. Only flush if caching mode */
3629 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3630 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3631 mm_to_dma_pfn(iova_pfn),
a1ddcbe9 3632 size, 0, 1);
1f0ef2aa 3633 else
8c11e798 3634 iommu_flush_write_buffer(iommu);
f76aec76 3635
2aac6304 3636 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3637 start_paddr += paddr & ~PAGE_MASK;
3638 return start_paddr;
ba395927 3639
ba395927 3640error:
2aac6304 3641 if (iova_pfn)
22e2f9fa 3642 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
9f10e5bf 3643 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3644 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3645 return 0;
3646}
3647
ffbbef5c
FT
3648static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3649 unsigned long offset, size_t size,
3650 enum dma_data_direction dir,
00085f1e 3651 unsigned long attrs)
bb9e6d65 3652{
ffbbef5c 3653 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3654 dir, *dev->dma_mask);
bb9e6d65
FT
3655}
3656
aa473240 3657static void flush_unmaps(struct deferred_flush_data *flush_data)
5e0d2a6f 3658{
80b20dd8 3659 int i, j;
5e0d2a6f 3660
aa473240 3661 flush_data->timer_on = 0;
5e0d2a6f 3662
3663 /* just flush them all */
3664 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459 3665 struct intel_iommu *iommu = g_iommus[i];
aa473240
OP
3666 struct deferred_flush_table *flush_table =
3667 &flush_data->tables[i];
a2bb8459
WH
3668 if (!iommu)
3669 continue;
c42d9f32 3670
aa473240 3671 if (!flush_table->next)
9dd2fe89
YZ
3672 continue;
3673
78d5f0f5
NA
3674 /* In caching mode, global flushes turn emulation expensive */
3675 if (!cap_caching_mode(iommu->cap))
3676 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3677 DMA_TLB_GLOBAL_FLUSH);
aa473240 3678 for (j = 0; j < flush_table->next; j++) {
93a23a72 3679 unsigned long mask;
314f1dc1 3680 struct deferred_flush_entry *entry =
aa473240 3681 &flush_table->entries[j];
2aac6304 3682 unsigned long iova_pfn = entry->iova_pfn;
769530e4 3683 unsigned long nrpages = entry->nrpages;
314f1dc1
OP
3684 struct dmar_domain *domain = entry->domain;
3685 struct page *freelist = entry->freelist;
78d5f0f5
NA
3686
3687 /* On real hardware multiple invalidations are expensive */
3688 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3689 iommu_flush_iotlb_psi(iommu, domain,
2aac6304 3690 mm_to_dma_pfn(iova_pfn),
769530e4 3691 nrpages, !freelist, 0);
78d5f0f5 3692 else {
769530e4 3693 mask = ilog2(nrpages);
314f1dc1 3694 iommu_flush_dev_iotlb(domain,
2aac6304 3695 (uint64_t)iova_pfn << PAGE_SHIFT, mask);
78d5f0f5 3696 }
22e2f9fa 3697 free_iova_fast(&domain->iovad, iova_pfn, nrpages);
314f1dc1
OP
3698 if (freelist)
3699 dma_free_pagelist(freelist);
80b20dd8 3700 }
aa473240 3701 flush_table->next = 0;
5e0d2a6f 3702 }
3703
aa473240 3704 flush_data->size = 0;
5e0d2a6f 3705}
3706
aa473240 3707static void flush_unmaps_timeout(unsigned long cpuid)
5e0d2a6f 3708{
aa473240 3709 struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
80b20dd8 3710 unsigned long flags;
3711
aa473240
OP
3712 spin_lock_irqsave(&flush_data->lock, flags);
3713 flush_unmaps(flush_data);
3714 spin_unlock_irqrestore(&flush_data->lock, flags);
5e0d2a6f 3715}
3716
2aac6304 3717static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
769530e4 3718 unsigned long nrpages, struct page *freelist)
5e0d2a6f 3719{
3720 unsigned long flags;
314f1dc1 3721 int entry_id, iommu_id;
8c11e798 3722 struct intel_iommu *iommu;
314f1dc1 3723 struct deferred_flush_entry *entry;
aa473240
OP
3724 struct deferred_flush_data *flush_data;
3725 unsigned int cpuid;
5e0d2a6f 3726
aa473240
OP
3727 cpuid = get_cpu();
3728 flush_data = per_cpu_ptr(&deferred_flush, cpuid);
3729
3730 /* Flush all CPUs' entries to avoid deferring too much. If
3731 * this becomes a bottleneck, can just flush us, and rely on
3732 * flush timer for the rest.
3733 */
3734 if (flush_data->size == HIGH_WATER_MARK) {
3735 int cpu;
3736
3737 for_each_online_cpu(cpu)
3738 flush_unmaps_timeout(cpu);
3739 }
3740
3741 spin_lock_irqsave(&flush_data->lock, flags);
80b20dd8 3742
8c11e798
WH
3743 iommu = domain_get_iommu(dom);
3744 iommu_id = iommu->seq_id;
c42d9f32 3745
aa473240
OP
3746 entry_id = flush_data->tables[iommu_id].next;
3747 ++(flush_data->tables[iommu_id].next);
5e0d2a6f 3748
aa473240 3749 entry = &flush_data->tables[iommu_id].entries[entry_id];
314f1dc1 3750 entry->domain = dom;
2aac6304 3751 entry->iova_pfn = iova_pfn;
769530e4 3752 entry->nrpages = nrpages;
314f1dc1 3753 entry->freelist = freelist;
5e0d2a6f 3754
aa473240
OP
3755 if (!flush_data->timer_on) {
3756 mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
3757 flush_data->timer_on = 1;
5e0d2a6f 3758 }
aa473240
OP
3759 flush_data->size++;
3760 spin_unlock_irqrestore(&flush_data->lock, flags);
3761
3762 put_cpu();
5e0d2a6f 3763}
3764
769530e4 3765static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3766{
f76aec76 3767 struct dmar_domain *domain;
d794dc9b 3768 unsigned long start_pfn, last_pfn;
769530e4 3769 unsigned long nrpages;
2aac6304 3770 unsigned long iova_pfn;
8c11e798 3771 struct intel_iommu *iommu;
ea8ea460 3772 struct page *freelist;
ba395927 3773
73676832 3774 if (iommu_no_mapping(dev))
f76aec76 3775 return;
2c2e2c38 3776
1525a29a 3777 domain = find_domain(dev);
ba395927
KA
3778 BUG_ON(!domain);
3779
8c11e798
WH
3780 iommu = domain_get_iommu(domain);
3781
2aac6304 3782 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3783
769530e4 3784 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3785 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3786 last_pfn = start_pfn + nrpages - 1;
ba395927 3787
d794dc9b 3788 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3789 dev_name(dev), start_pfn, last_pfn);
ba395927 3790
ea8ea460 3791 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3792
5e0d2a6f 3793 if (intel_iommu_strict) {
a1ddcbe9 3794 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3795 nrpages, !freelist, 0);
5e0d2a6f 3796 /* free iova */
22e2f9fa 3797 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3798 dma_free_pagelist(freelist);
5e0d2a6f 3799 } else {
2aac6304 3800 add_unmap(domain, iova_pfn, nrpages, freelist);
5e0d2a6f 3801 /*
3802 * queue up the release of the unmap to save the 1/6th of the
3803 * cpu used up by the iotlb flush operation...
3804 */
5e0d2a6f 3805 }
ba395927
KA
3806}
3807
d41a4adb
JL
3808static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3809 size_t size, enum dma_data_direction dir,
00085f1e 3810 unsigned long attrs)
d41a4adb 3811{
769530e4 3812 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3813}
3814
5040a918 3815static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3816 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3817 unsigned long attrs)
ba395927 3818{
36746436 3819 struct page *page = NULL;
ba395927
KA
3820 int order;
3821
5b6985ce 3822 size = PAGE_ALIGN(size);
ba395927 3823 order = get_order(size);
e8bb910d 3824
5040a918 3825 if (!iommu_no_mapping(dev))
e8bb910d 3826 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3827 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3828 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3829 flags |= GFP_DMA;
3830 else
3831 flags |= GFP_DMA32;
3832 }
ba395927 3833
d0164adc 3834 if (gfpflags_allow_blocking(flags)) {
36746436
AM
3835 unsigned int count = size >> PAGE_SHIFT;
3836
712c604d 3837 page = dma_alloc_from_contiguous(dev, count, order, flags);
36746436
AM
3838 if (page && iommu_no_mapping(dev) &&
3839 page_to_phys(page) + size > dev->coherent_dma_mask) {
3840 dma_release_from_contiguous(dev, page, count);
3841 page = NULL;
3842 }
3843 }
3844
3845 if (!page)
3846 page = alloc_pages(flags, order);
3847 if (!page)
ba395927 3848 return NULL;
36746436 3849 memset(page_address(page), 0, size);
ba395927 3850
36746436 3851 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3852 DMA_BIDIRECTIONAL,
5040a918 3853 dev->coherent_dma_mask);
ba395927 3854 if (*dma_handle)
36746436
AM
3855 return page_address(page);
3856 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3857 __free_pages(page, order);
3858
ba395927
KA
3859 return NULL;
3860}
3861
5040a918 3862static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3863 dma_addr_t dma_handle, unsigned long attrs)
ba395927
KA
3864{
3865 int order;
36746436 3866 struct page *page = virt_to_page(vaddr);
ba395927 3867
5b6985ce 3868 size = PAGE_ALIGN(size);
ba395927
KA
3869 order = get_order(size);
3870
769530e4 3871 intel_unmap(dev, dma_handle, size);
36746436
AM
3872 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3873 __free_pages(page, order);
ba395927
KA
3874}
3875
5040a918 3876static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3877 int nelems, enum dma_data_direction dir,
00085f1e 3878 unsigned long attrs)
ba395927 3879{
769530e4
OP
3880 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3881 unsigned long nrpages = 0;
3882 struct scatterlist *sg;
3883 int i;
3884
3885 for_each_sg(sglist, sg, nelems, i) {
3886 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3887 }
3888
3889 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3890}
3891
ba395927 3892static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3893 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3894{
3895 int i;
c03ab37c 3896 struct scatterlist *sg;
ba395927 3897
c03ab37c 3898 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3899 BUG_ON(!sg_page(sg));
3e6110fd 3900 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3901 sg->dma_length = sg->length;
ba395927
KA
3902 }
3903 return nelems;
3904}
3905
5040a918 3906static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3907 enum dma_data_direction dir, unsigned long attrs)
ba395927 3908{
ba395927 3909 int i;
ba395927 3910 struct dmar_domain *domain;
f76aec76
KA
3911 size_t size = 0;
3912 int prot = 0;
2aac6304 3913 unsigned long iova_pfn;
f76aec76 3914 int ret;
c03ab37c 3915 struct scatterlist *sg;
b536d24d 3916 unsigned long start_vpfn;
8c11e798 3917 struct intel_iommu *iommu;
ba395927
KA
3918
3919 BUG_ON(dir == DMA_NONE);
5040a918
DW
3920 if (iommu_no_mapping(dev))
3921 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3922
5040a918 3923 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3924 if (!domain)
3925 return 0;
3926
8c11e798
WH
3927 iommu = domain_get_iommu(domain);
3928
b536d24d 3929 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3930 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3931
2aac6304 3932 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3933 *dev->dma_mask);
2aac6304 3934 if (!iova_pfn) {
c03ab37c 3935 sglist->dma_length = 0;
f76aec76
KA
3936 return 0;
3937 }
3938
3939 /*
3940 * Check if DMAR supports zero-length reads on write only
3941 * mappings..
3942 */
3943 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3944 !cap_zlr(iommu->cap))
f76aec76
KA
3945 prot |= DMA_PTE_READ;
3946 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3947 prot |= DMA_PTE_WRITE;
3948
2aac6304 3949 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3950
f532959b 3951 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3952 if (unlikely(ret)) {
e1605495
DW
3953 dma_pte_free_pagetable(domain, start_vpfn,
3954 start_vpfn + size - 1);
22e2f9fa 3955 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3956 return 0;
ba395927
KA
3957 }
3958
1f0ef2aa
DW
3959 /* it's a non-present to present mapping. Only flush if caching mode */
3960 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3961 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3962 else
8c11e798 3963 iommu_flush_write_buffer(iommu);
1f0ef2aa 3964
ba395927
KA
3965 return nelems;
3966}
3967
dfb805e8
FT
3968static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3969{
3970 return !dma_addr;
3971}
3972
160c1d8e 3973struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3974 .alloc = intel_alloc_coherent,
3975 .free = intel_free_coherent,
ba395927
KA
3976 .map_sg = intel_map_sg,
3977 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3978 .map_page = intel_map_page,
3979 .unmap_page = intel_unmap_page,
dfb805e8 3980 .mapping_error = intel_mapping_error,
ba395927
KA
3981};
3982
3983static inline int iommu_domain_cache_init(void)
3984{
3985 int ret = 0;
3986
3987 iommu_domain_cache = kmem_cache_create("iommu_domain",
3988 sizeof(struct dmar_domain),
3989 0,
3990 SLAB_HWCACHE_ALIGN,
3991
3992 NULL);
3993 if (!iommu_domain_cache) {
9f10e5bf 3994 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3995 ret = -ENOMEM;
3996 }
3997
3998 return ret;
3999}
4000
4001static inline int iommu_devinfo_cache_init(void)
4002{
4003 int ret = 0;
4004
4005 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4006 sizeof(struct device_domain_info),
4007 0,
4008 SLAB_HWCACHE_ALIGN,
ba395927
KA
4009 NULL);
4010 if (!iommu_devinfo_cache) {
9f10e5bf 4011 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
4012 ret = -ENOMEM;
4013 }
4014
4015 return ret;
4016}
4017
ba395927
KA
4018static int __init iommu_init_mempool(void)
4019{
4020 int ret;
ae1ff3d6 4021 ret = iova_cache_get();
ba395927
KA
4022 if (ret)
4023 return ret;
4024
4025 ret = iommu_domain_cache_init();
4026 if (ret)
4027 goto domain_error;
4028
4029 ret = iommu_devinfo_cache_init();
4030 if (!ret)
4031 return ret;
4032
4033 kmem_cache_destroy(iommu_domain_cache);
4034domain_error:
ae1ff3d6 4035 iova_cache_put();
ba395927
KA
4036
4037 return -ENOMEM;
4038}
4039
4040static void __init iommu_exit_mempool(void)
4041{
4042 kmem_cache_destroy(iommu_devinfo_cache);
4043 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 4044 iova_cache_put();
ba395927
KA
4045}
4046
556ab45f
DW
4047static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4048{
4049 struct dmar_drhd_unit *drhd;
4050 u32 vtbar;
4051 int rc;
4052
4053 /* We know that this device on this chipset has its own IOMMU.
4054 * If we find it under a different IOMMU, then the BIOS is lying
4055 * to us. Hope that the IOMMU for this device is actually
4056 * disabled, and it needs no translation...
4057 */
4058 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4059 if (rc) {
4060 /* "can't" happen */
4061 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4062 return;
4063 }
4064 vtbar &= 0xffff0000;
4065
4066 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4067 drhd = dmar_find_matched_drhd_unit(pdev);
4068 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4069 TAINT_FIRMWARE_WORKAROUND,
4070 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4071 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4072}
4073DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4074
ba395927
KA
4075static void __init init_no_remapping_devices(void)
4076{
4077 struct dmar_drhd_unit *drhd;
832bd858 4078 struct device *dev;
b683b230 4079 int i;
ba395927
KA
4080
4081 for_each_drhd_unit(drhd) {
4082 if (!drhd->include_all) {
b683b230
JL
4083 for_each_active_dev_scope(drhd->devices,
4084 drhd->devices_cnt, i, dev)
4085 break;
832bd858 4086 /* ignore DMAR unit if no devices exist */
ba395927
KA
4087 if (i == drhd->devices_cnt)
4088 drhd->ignored = 1;
4089 }
4090 }
4091
7c919779 4092 for_each_active_drhd_unit(drhd) {
7c919779 4093 if (drhd->include_all)
ba395927
KA
4094 continue;
4095
b683b230
JL
4096 for_each_active_dev_scope(drhd->devices,
4097 drhd->devices_cnt, i, dev)
832bd858 4098 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4099 break;
ba395927
KA
4100 if (i < drhd->devices_cnt)
4101 continue;
4102
c0771df8
DW
4103 /* This IOMMU has *only* gfx devices. Either bypass it or
4104 set the gfx_mapped flag, as appropriate */
4105 if (dmar_map_gfx) {
4106 intel_iommu_gfx_mapped = 1;
4107 } else {
4108 drhd->ignored = 1;
b683b230
JL
4109 for_each_active_dev_scope(drhd->devices,
4110 drhd->devices_cnt, i, dev)
832bd858 4111 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4112 }
4113 }
4114}
4115
f59c7b69
FY
4116#ifdef CONFIG_SUSPEND
4117static int init_iommu_hw(void)
4118{
4119 struct dmar_drhd_unit *drhd;
4120 struct intel_iommu *iommu = NULL;
4121
4122 for_each_active_iommu(iommu, drhd)
4123 if (iommu->qi)
4124 dmar_reenable_qi(iommu);
4125
b779260b
JC
4126 for_each_iommu(iommu, drhd) {
4127 if (drhd->ignored) {
4128 /*
4129 * we always have to disable PMRs or DMA may fail on
4130 * this device
4131 */
4132 if (force_on)
4133 iommu_disable_protect_mem_regions(iommu);
4134 continue;
4135 }
4136
f59c7b69
FY
4137 iommu_flush_write_buffer(iommu);
4138
4139 iommu_set_root_entry(iommu);
4140
4141 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4142 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4143 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4144 iommu_enable_translation(iommu);
b94996c9 4145 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4146 }
4147
4148 return 0;
4149}
4150
4151static void iommu_flush_all(void)
4152{
4153 struct dmar_drhd_unit *drhd;
4154 struct intel_iommu *iommu;
4155
4156 for_each_active_iommu(iommu, drhd) {
4157 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4158 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4159 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4160 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4161 }
4162}
4163
134fac3f 4164static int iommu_suspend(void)
f59c7b69
FY
4165{
4166 struct dmar_drhd_unit *drhd;
4167 struct intel_iommu *iommu = NULL;
4168 unsigned long flag;
4169
4170 for_each_active_iommu(iommu, drhd) {
4171 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4172 GFP_ATOMIC);
4173 if (!iommu->iommu_state)
4174 goto nomem;
4175 }
4176
4177 iommu_flush_all();
4178
4179 for_each_active_iommu(iommu, drhd) {
4180 iommu_disable_translation(iommu);
4181
1f5b3c3f 4182 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4183
4184 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4185 readl(iommu->reg + DMAR_FECTL_REG);
4186 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4187 readl(iommu->reg + DMAR_FEDATA_REG);
4188 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4189 readl(iommu->reg + DMAR_FEADDR_REG);
4190 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4191 readl(iommu->reg + DMAR_FEUADDR_REG);
4192
1f5b3c3f 4193 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4194 }
4195 return 0;
4196
4197nomem:
4198 for_each_active_iommu(iommu, drhd)
4199 kfree(iommu->iommu_state);
4200
4201 return -ENOMEM;
4202}
4203
134fac3f 4204static void iommu_resume(void)
f59c7b69
FY
4205{
4206 struct dmar_drhd_unit *drhd;
4207 struct intel_iommu *iommu = NULL;
4208 unsigned long flag;
4209
4210 if (init_iommu_hw()) {
b779260b
JC
4211 if (force_on)
4212 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4213 else
4214 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4215 return;
f59c7b69
FY
4216 }
4217
4218 for_each_active_iommu(iommu, drhd) {
4219
1f5b3c3f 4220 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4221
4222 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4223 iommu->reg + DMAR_FECTL_REG);
4224 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4225 iommu->reg + DMAR_FEDATA_REG);
4226 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4227 iommu->reg + DMAR_FEADDR_REG);
4228 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4229 iommu->reg + DMAR_FEUADDR_REG);
4230
1f5b3c3f 4231 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4232 }
4233
4234 for_each_active_iommu(iommu, drhd)
4235 kfree(iommu->iommu_state);
f59c7b69
FY
4236}
4237
134fac3f 4238static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4239 .resume = iommu_resume,
4240 .suspend = iommu_suspend,
4241};
4242
134fac3f 4243static void __init init_iommu_pm_ops(void)
f59c7b69 4244{
134fac3f 4245 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4246}
4247
4248#else
99592ba4 4249static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4250#endif /* CONFIG_PM */
4251
318fe7df 4252
c2a0b538 4253int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4254{
4255 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4256 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4257 struct dmar_rmrr_unit *rmrru;
0659b8dc 4258 size_t length;
318fe7df
SS
4259
4260 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4261 if (!rmrru)
0659b8dc 4262 goto out;
318fe7df
SS
4263
4264 rmrru->hdr = header;
4265 rmrr = (struct acpi_dmar_reserved_memory *)header;
4266 rmrru->base_address = rmrr->base_address;
4267 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4268
4269 length = rmrr->end_address - rmrr->base_address + 1;
4270 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4271 IOMMU_RESV_DIRECT);
4272 if (!rmrru->resv)
4273 goto free_rmrru;
4274
2e455289
JL
4275 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4276 ((void *)rmrr) + rmrr->header.length,
4277 &rmrru->devices_cnt);
0659b8dc
EA
4278 if (rmrru->devices_cnt && rmrru->devices == NULL)
4279 goto free_all;
318fe7df 4280
2e455289 4281 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4282
2e455289 4283 return 0;
0659b8dc
EA
4284free_all:
4285 kfree(rmrru->resv);
4286free_rmrru:
4287 kfree(rmrru);
4288out:
4289 return -ENOMEM;
318fe7df
SS
4290}
4291
6b197249
JL
4292static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4293{
4294 struct dmar_atsr_unit *atsru;
4295 struct acpi_dmar_atsr *tmp;
4296
4297 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4298 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4299 if (atsr->segment != tmp->segment)
4300 continue;
4301 if (atsr->header.length != tmp->header.length)
4302 continue;
4303 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4304 return atsru;
4305 }
4306
4307 return NULL;
4308}
4309
4310int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4311{
4312 struct acpi_dmar_atsr *atsr;
4313 struct dmar_atsr_unit *atsru;
4314
6b197249
JL
4315 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4316 return 0;
4317
318fe7df 4318 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4319 atsru = dmar_find_atsr(atsr);
4320 if (atsru)
4321 return 0;
4322
4323 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4324 if (!atsru)
4325 return -ENOMEM;
4326
6b197249
JL
4327 /*
4328 * If memory is allocated from slab by ACPI _DSM method, we need to
4329 * copy the memory content because the memory buffer will be freed
4330 * on return.
4331 */
4332 atsru->hdr = (void *)(atsru + 1);
4333 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4334 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4335 if (!atsru->include_all) {
4336 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4337 (void *)atsr + atsr->header.length,
4338 &atsru->devices_cnt);
4339 if (atsru->devices_cnt && atsru->devices == NULL) {
4340 kfree(atsru);
4341 return -ENOMEM;
4342 }
4343 }
318fe7df 4344
0e242612 4345 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4346
4347 return 0;
4348}
4349
9bdc531e
JL
4350static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4351{
4352 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4353 kfree(atsru);
4354}
4355
6b197249
JL
4356int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4357{
4358 struct acpi_dmar_atsr *atsr;
4359 struct dmar_atsr_unit *atsru;
4360
4361 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4362 atsru = dmar_find_atsr(atsr);
4363 if (atsru) {
4364 list_del_rcu(&atsru->list);
4365 synchronize_rcu();
4366 intel_iommu_free_atsr(atsru);
4367 }
4368
4369 return 0;
4370}
4371
4372int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4373{
4374 int i;
4375 struct device *dev;
4376 struct acpi_dmar_atsr *atsr;
4377 struct dmar_atsr_unit *atsru;
4378
4379 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4380 atsru = dmar_find_atsr(atsr);
4381 if (!atsru)
4382 return 0;
4383
194dc870 4384 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4385 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4386 i, dev)
4387 return -EBUSY;
194dc870 4388 }
6b197249
JL
4389
4390 return 0;
4391}
4392
ffebeb46
JL
4393static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4394{
4395 int sp, ret = 0;
4396 struct intel_iommu *iommu = dmaru->iommu;
4397
4398 if (g_iommus[iommu->seq_id])
4399 return 0;
4400
4401 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4402 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4403 iommu->name);
4404 return -ENXIO;
4405 }
4406 if (!ecap_sc_support(iommu->ecap) &&
4407 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4408 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4409 iommu->name);
4410 return -ENXIO;
4411 }
4412 sp = domain_update_iommu_superpage(iommu) - 1;
4413 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4414 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4415 iommu->name);
4416 return -ENXIO;
4417 }
4418
4419 /*
4420 * Disable translation if already enabled prior to OS handover.
4421 */
4422 if (iommu->gcmd & DMA_GCMD_TE)
4423 iommu_disable_translation(iommu);
4424
4425 g_iommus[iommu->seq_id] = iommu;
4426 ret = iommu_init_domains(iommu);
4427 if (ret == 0)
4428 ret = iommu_alloc_root_entry(iommu);
4429 if (ret)
4430 goto out;
4431
8a94ade4
DW
4432#ifdef CONFIG_INTEL_IOMMU_SVM
4433 if (pasid_enabled(iommu))
4434 intel_svm_alloc_pasid_tables(iommu);
4435#endif
4436
ffebeb46
JL
4437 if (dmaru->ignored) {
4438 /*
4439 * we always have to disable PMRs or DMA may fail on this device
4440 */
4441 if (force_on)
4442 iommu_disable_protect_mem_regions(iommu);
4443 return 0;
4444 }
4445
4446 intel_iommu_init_qi(iommu);
4447 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4448
4449#ifdef CONFIG_INTEL_IOMMU_SVM
4450 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4451 ret = intel_svm_enable_prq(iommu);
4452 if (ret)
4453 goto disable_iommu;
4454 }
4455#endif
ffebeb46
JL
4456 ret = dmar_set_interrupt(iommu);
4457 if (ret)
4458 goto disable_iommu;
4459
4460 iommu_set_root_entry(iommu);
4461 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4462 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4463 iommu_enable_translation(iommu);
4464
ffebeb46
JL
4465 iommu_disable_protect_mem_regions(iommu);
4466 return 0;
4467
4468disable_iommu:
4469 disable_dmar_iommu(iommu);
4470out:
4471 free_dmar_iommu(iommu);
4472 return ret;
4473}
4474
6b197249
JL
4475int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4476{
ffebeb46
JL
4477 int ret = 0;
4478 struct intel_iommu *iommu = dmaru->iommu;
4479
4480 if (!intel_iommu_enabled)
4481 return 0;
4482 if (iommu == NULL)
4483 return -EINVAL;
4484
4485 if (insert) {
4486 ret = intel_iommu_add(dmaru);
4487 } else {
4488 disable_dmar_iommu(iommu);
4489 free_dmar_iommu(iommu);
4490 }
4491
4492 return ret;
6b197249
JL
4493}
4494
9bdc531e
JL
4495static void intel_iommu_free_dmars(void)
4496{
4497 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4498 struct dmar_atsr_unit *atsru, *atsr_n;
4499
4500 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4501 list_del(&rmrru->list);
4502 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4503 kfree(rmrru->resv);
9bdc531e 4504 kfree(rmrru);
318fe7df
SS
4505 }
4506
9bdc531e
JL
4507 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4508 list_del(&atsru->list);
4509 intel_iommu_free_atsr(atsru);
4510 }
318fe7df
SS
4511}
4512
4513int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4514{
b683b230 4515 int i, ret = 1;
318fe7df 4516 struct pci_bus *bus;
832bd858
DW
4517 struct pci_dev *bridge = NULL;
4518 struct device *tmp;
318fe7df
SS
4519 struct acpi_dmar_atsr *atsr;
4520 struct dmar_atsr_unit *atsru;
4521
4522 dev = pci_physfn(dev);
318fe7df 4523 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4524 bridge = bus->self;
d14053b3
DW
4525 /* If it's an integrated device, allow ATS */
4526 if (!bridge)
4527 return 1;
4528 /* Connected via non-PCIe: no ATS */
4529 if (!pci_is_pcie(bridge) ||
62f87c0e 4530 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4531 return 0;
d14053b3 4532 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4533 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4534 break;
318fe7df
SS
4535 }
4536
0e242612 4537 rcu_read_lock();
b5f82ddf
JL
4538 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4539 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4540 if (atsr->segment != pci_domain_nr(dev->bus))
4541 continue;
4542
b683b230 4543 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4544 if (tmp == &bridge->dev)
b683b230 4545 goto out;
b5f82ddf
JL
4546
4547 if (atsru->include_all)
b683b230 4548 goto out;
b5f82ddf 4549 }
b683b230
JL
4550 ret = 0;
4551out:
0e242612 4552 rcu_read_unlock();
318fe7df 4553
b683b230 4554 return ret;
318fe7df
SS
4555}
4556
59ce0515
JL
4557int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4558{
4559 int ret = 0;
4560 struct dmar_rmrr_unit *rmrru;
4561 struct dmar_atsr_unit *atsru;
4562 struct acpi_dmar_atsr *atsr;
4563 struct acpi_dmar_reserved_memory *rmrr;
4564
4565 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4566 return 0;
4567
4568 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4569 rmrr = container_of(rmrru->hdr,
4570 struct acpi_dmar_reserved_memory, header);
4571 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4572 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4573 ((void *)rmrr) + rmrr->header.length,
4574 rmrr->segment, rmrru->devices,
4575 rmrru->devices_cnt);
27e24950 4576 if(ret < 0)
59ce0515 4577 return ret;
e6a8c9b3 4578 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4579 dmar_remove_dev_scope(info, rmrr->segment,
4580 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4581 }
4582 }
4583
4584 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4585 if (atsru->include_all)
4586 continue;
4587
4588 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4589 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4590 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4591 (void *)atsr + atsr->header.length,
4592 atsr->segment, atsru->devices,
4593 atsru->devices_cnt);
4594 if (ret > 0)
4595 break;
4596 else if(ret < 0)
4597 return ret;
e6a8c9b3 4598 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4599 if (dmar_remove_dev_scope(info, atsr->segment,
4600 atsru->devices, atsru->devices_cnt))
4601 break;
4602 }
4603 }
4604
4605 return 0;
4606}
4607
99dcaded
FY
4608/*
4609 * Here we only respond to action of unbound device from driver.
4610 *
4611 * Added device is not attached to its DMAR domain here yet. That will happen
4612 * when mapping the device to iova.
4613 */
4614static int device_notifier(struct notifier_block *nb,
4615 unsigned long action, void *data)
4616{
4617 struct device *dev = data;
99dcaded
FY
4618 struct dmar_domain *domain;
4619
3d89194a 4620 if (iommu_dummy(dev))
44cd613c
DW
4621 return 0;
4622
1196c2fb 4623 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4624 return 0;
4625
1525a29a 4626 domain = find_domain(dev);
99dcaded
FY
4627 if (!domain)
4628 return 0;
4629
e6de0f8d 4630 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4631 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4632 domain_exit(domain);
a97590e5 4633
99dcaded
FY
4634 return 0;
4635}
4636
4637static struct notifier_block device_nb = {
4638 .notifier_call = device_notifier,
4639};
4640
75f05569
JL
4641static int intel_iommu_memory_notifier(struct notifier_block *nb,
4642 unsigned long val, void *v)
4643{
4644 struct memory_notify *mhp = v;
4645 unsigned long long start, end;
4646 unsigned long start_vpfn, last_vpfn;
4647
4648 switch (val) {
4649 case MEM_GOING_ONLINE:
4650 start = mhp->start_pfn << PAGE_SHIFT;
4651 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4652 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4653 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4654 start, end);
4655 return NOTIFY_BAD;
4656 }
4657 break;
4658
4659 case MEM_OFFLINE:
4660 case MEM_CANCEL_ONLINE:
4661 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4662 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4663 while (start_vpfn <= last_vpfn) {
4664 struct iova *iova;
4665 struct dmar_drhd_unit *drhd;
4666 struct intel_iommu *iommu;
ea8ea460 4667 struct page *freelist;
75f05569
JL
4668
4669 iova = find_iova(&si_domain->iovad, start_vpfn);
4670 if (iova == NULL) {
9f10e5bf 4671 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4672 start_vpfn);
4673 break;
4674 }
4675
4676 iova = split_and_remove_iova(&si_domain->iovad, iova,
4677 start_vpfn, last_vpfn);
4678 if (iova == NULL) {
9f10e5bf 4679 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4680 start_vpfn, last_vpfn);
4681 return NOTIFY_BAD;
4682 }
4683
ea8ea460
DW
4684 freelist = domain_unmap(si_domain, iova->pfn_lo,
4685 iova->pfn_hi);
4686
75f05569
JL
4687 rcu_read_lock();
4688 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4689 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4690 iova->pfn_lo, iova_size(iova),
ea8ea460 4691 !freelist, 0);
75f05569 4692 rcu_read_unlock();
ea8ea460 4693 dma_free_pagelist(freelist);
75f05569
JL
4694
4695 start_vpfn = iova->pfn_hi + 1;
4696 free_iova_mem(iova);
4697 }
4698 break;
4699 }
4700
4701 return NOTIFY_OK;
4702}
4703
4704static struct notifier_block intel_iommu_memory_nb = {
4705 .notifier_call = intel_iommu_memory_notifier,
4706 .priority = 0
4707};
4708
22e2f9fa
OP
4709static void free_all_cpu_cached_iovas(unsigned int cpu)
4710{
4711 int i;
4712
4713 for (i = 0; i < g_num_of_iommus; i++) {
4714 struct intel_iommu *iommu = g_iommus[i];
4715 struct dmar_domain *domain;
0caa7616 4716 int did;
22e2f9fa
OP
4717
4718 if (!iommu)
4719 continue;
4720
3bd4f911 4721 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4722 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4723
4724 if (!domain)
4725 continue;
4726 free_cpu_cached_iovas(cpu, &domain->iovad);
4727 }
4728 }
4729}
4730
21647615 4731static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4732{
21647615
AMG
4733 free_all_cpu_cached_iovas(cpu);
4734 flush_unmaps_timeout(cpu);
4735 return 0;
aa473240
OP
4736}
4737
161b28aa
JR
4738static void intel_disable_iommus(void)
4739{
4740 struct intel_iommu *iommu = NULL;
4741 struct dmar_drhd_unit *drhd;
4742
4743 for_each_iommu(iommu, drhd)
4744 iommu_disable_translation(iommu);
4745}
4746
a7fdb6e6
JR
4747static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4748{
4749 return container_of(dev, struct intel_iommu, iommu.dev);
4750}
4751
a5459cfe
AW
4752static ssize_t intel_iommu_show_version(struct device *dev,
4753 struct device_attribute *attr,
4754 char *buf)
4755{
a7fdb6e6 4756 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4757 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4758 return sprintf(buf, "%d:%d\n",
4759 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4760}
4761static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4762
4763static ssize_t intel_iommu_show_address(struct device *dev,
4764 struct device_attribute *attr,
4765 char *buf)
4766{
a7fdb6e6 4767 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4768 return sprintf(buf, "%llx\n", iommu->reg_phys);
4769}
4770static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4771
4772static ssize_t intel_iommu_show_cap(struct device *dev,
4773 struct device_attribute *attr,
4774 char *buf)
4775{
a7fdb6e6 4776 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4777 return sprintf(buf, "%llx\n", iommu->cap);
4778}
4779static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4780
4781static ssize_t intel_iommu_show_ecap(struct device *dev,
4782 struct device_attribute *attr,
4783 char *buf)
4784{
a7fdb6e6 4785 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4786 return sprintf(buf, "%llx\n", iommu->ecap);
4787}
4788static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4789
2238c082
AW
4790static ssize_t intel_iommu_show_ndoms(struct device *dev,
4791 struct device_attribute *attr,
4792 char *buf)
4793{
a7fdb6e6 4794 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4795 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4796}
4797static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4798
4799static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4800 struct device_attribute *attr,
4801 char *buf)
4802{
a7fdb6e6 4803 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4804 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4805 cap_ndoms(iommu->cap)));
4806}
4807static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4808
a5459cfe
AW
4809static struct attribute *intel_iommu_attrs[] = {
4810 &dev_attr_version.attr,
4811 &dev_attr_address.attr,
4812 &dev_attr_cap.attr,
4813 &dev_attr_ecap.attr,
2238c082
AW
4814 &dev_attr_domains_supported.attr,
4815 &dev_attr_domains_used.attr,
a5459cfe
AW
4816 NULL,
4817};
4818
4819static struct attribute_group intel_iommu_group = {
4820 .name = "intel-iommu",
4821 .attrs = intel_iommu_attrs,
4822};
4823
4824const struct attribute_group *intel_iommu_groups[] = {
4825 &intel_iommu_group,
4826 NULL,
4827};
4828
ba395927
KA
4829int __init intel_iommu_init(void)
4830{
9bdc531e 4831 int ret = -ENODEV;
3a93c841 4832 struct dmar_drhd_unit *drhd;
7c919779 4833 struct intel_iommu *iommu;
ba395927 4834
a59b50e9
JC
4835 /* VT-d is required for a TXT/tboot launch, so enforce that */
4836 force_on = tboot_force_iommu();
4837
3a5670e8
JL
4838 if (iommu_init_mempool()) {
4839 if (force_on)
4840 panic("tboot: Failed to initialize iommu memory\n");
4841 return -ENOMEM;
4842 }
4843
4844 down_write(&dmar_global_lock);
a59b50e9
JC
4845 if (dmar_table_init()) {
4846 if (force_on)
4847 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4848 goto out_free_dmar;
a59b50e9 4849 }
ba395927 4850
c2c7286a 4851 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4852 if (force_on)
4853 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4854 goto out_free_dmar;
a59b50e9 4855 }
1886e8a9 4856
161b28aa 4857 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4858 /*
4859 * We exit the function here to ensure IOMMU's remapping and
4860 * mempool aren't setup, which means that the IOMMU's PMRs
4861 * won't be disabled via the call to init_dmars(). So disable
4862 * it explicitly here. The PMRs were setup by tboot prior to
4863 * calling SENTER, but the kernel is expected to reset/tear
4864 * down the PMRs.
4865 */
4866 if (intel_iommu_tboot_noforce) {
4867 for_each_iommu(iommu, drhd)
4868 iommu_disable_protect_mem_regions(iommu);
4869 }
4870
161b28aa
JR
4871 /*
4872 * Make sure the IOMMUs are switched off, even when we
4873 * boot into a kexec kernel and the previous kernel left
4874 * them enabled
4875 */
4876 intel_disable_iommus();
9bdc531e 4877 goto out_free_dmar;
161b28aa 4878 }
2ae21010 4879
318fe7df 4880 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4881 pr_info("No RMRR found\n");
318fe7df
SS
4882
4883 if (list_empty(&dmar_atsr_units))
9f10e5bf 4884 pr_info("No ATSR found\n");
318fe7df 4885
51a63e67
JC
4886 if (dmar_init_reserved_ranges()) {
4887 if (force_on)
4888 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4889 goto out_free_reserved_range;
51a63e67 4890 }
ba395927
KA
4891
4892 init_no_remapping_devices();
4893
b779260b 4894 ret = init_dmars();
ba395927 4895 if (ret) {
a59b50e9
JC
4896 if (force_on)
4897 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4898 pr_err("Initialization failed\n");
9bdc531e 4899 goto out_free_reserved_range;
ba395927 4900 }
3a5670e8 4901 up_write(&dmar_global_lock);
9f10e5bf 4902 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4903
75f1cdf1
FT
4904#ifdef CONFIG_SWIOTLB
4905 swiotlb = 0;
4906#endif
19943b0e 4907 dma_ops = &intel_dma_ops;
4ed0d3e6 4908
134fac3f 4909 init_iommu_pm_ops();
a8bcbb0d 4910
39ab9555
JR
4911 for_each_active_iommu(iommu, drhd) {
4912 iommu_device_sysfs_add(&iommu->iommu, NULL,
4913 intel_iommu_groups,
4914 "%s", iommu->name);
4915 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4916 iommu_device_register(&iommu->iommu);
4917 }
a5459cfe 4918
4236d97d 4919 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4920 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4921 if (si_domain && !hw_pass_through)
4922 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4923 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4924 intel_iommu_cpu_dead);
8bc1f85c
ED
4925 intel_iommu_enabled = 1;
4926
ba395927 4927 return 0;
9bdc531e
JL
4928
4929out_free_reserved_range:
4930 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4931out_free_dmar:
4932 intel_iommu_free_dmars();
3a5670e8
JL
4933 up_write(&dmar_global_lock);
4934 iommu_exit_mempool();
9bdc531e 4935 return ret;
ba395927 4936}
e820482c 4937
2452d9db 4938static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4939{
4940 struct intel_iommu *iommu = opaque;
4941
2452d9db 4942 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4943 return 0;
4944}
4945
4946/*
4947 * NB - intel-iommu lacks any sort of reference counting for the users of
4948 * dependent devices. If multiple endpoints have intersecting dependent
4949 * devices, unbinding the driver from any one of them will possibly leave
4950 * the others unable to operate.
4951 */
2452d9db 4952static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4953{
0bcb3e28 4954 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4955 return;
4956
2452d9db 4957 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4958}
4959
127c7615 4960static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4961{
c7151a8d
WH
4962 struct intel_iommu *iommu;
4963 unsigned long flags;
c7151a8d 4964
55d94043
JR
4965 assert_spin_locked(&device_domain_lock);
4966
127c7615 4967 if (WARN_ON(!info))
c7151a8d
WH
4968 return;
4969
127c7615 4970 iommu = info->iommu;
c7151a8d 4971
127c7615
JR
4972 if (info->dev) {
4973 iommu_disable_dev_iotlb(info);
4974 domain_context_clear(iommu, info->dev);
4975 }
c7151a8d 4976
b608ac3b 4977 unlink_domain_info(info);
c7151a8d 4978
d160aca5 4979 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4980 domain_detach_iommu(info->domain, iommu);
d160aca5 4981 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4982
127c7615 4983 free_devinfo_mem(info);
c7151a8d 4984}
c7151a8d 4985
55d94043
JR
4986static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4987 struct device *dev)
4988{
127c7615 4989 struct device_domain_info *info;
55d94043 4990 unsigned long flags;
3e7abe25 4991
55d94043 4992 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4993 info = dev->archdata.iommu;
4994 __dmar_remove_one_dev_info(info);
55d94043 4995 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4996}
4997
2c2e2c38 4998static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4999{
5000 int adjust_width;
5001
0fb5fe87
RM
5002 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
5003 DMA_32BIT_PFN);
5e98c4b1
WH
5004 domain_reserve_special_ranges(domain);
5005
5006 /* calculate AGAW */
5007 domain->gaw = guest_width;
5008 adjust_width = guestwidth_to_adjustwidth(guest_width);
5009 domain->agaw = width_to_agaw(adjust_width);
5010
5e98c4b1 5011 domain->iommu_coherency = 0;
c5b15255 5012 domain->iommu_snooping = 0;
6dd9a7c7 5013 domain->iommu_superpage = 0;
fe40f1e0 5014 domain->max_addr = 0;
5e98c4b1
WH
5015
5016 /* always allocate the top pgd */
4c923d47 5017 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
5018 if (!domain->pgd)
5019 return -ENOMEM;
5020 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5021 return 0;
5022}
5023
00a77deb 5024static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5025{
5d450806 5026 struct dmar_domain *dmar_domain;
00a77deb
JR
5027 struct iommu_domain *domain;
5028
5029 if (type != IOMMU_DOMAIN_UNMANAGED)
5030 return NULL;
38717946 5031
ab8dfe25 5032 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 5033 if (!dmar_domain) {
9f10e5bf 5034 pr_err("Can't allocate dmar_domain\n");
00a77deb 5035 return NULL;
38717946 5036 }
2c2e2c38 5037 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 5038 pr_err("Domain initialization failed\n");
92d03cc8 5039 domain_exit(dmar_domain);
00a77deb 5040 return NULL;
38717946 5041 }
8140a95d 5042 domain_update_iommu_cap(dmar_domain);
faa3d6f5 5043
00a77deb 5044 domain = &dmar_domain->domain;
8a0e715b
JR
5045 domain->geometry.aperture_start = 0;
5046 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5047 domain->geometry.force_aperture = true;
5048
00a77deb 5049 return domain;
38717946 5050}
38717946 5051
00a77deb 5052static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5053{
00a77deb 5054 domain_exit(to_dmar_domain(domain));
38717946 5055}
38717946 5056
4c5478c9
JR
5057static int intel_iommu_attach_device(struct iommu_domain *domain,
5058 struct device *dev)
38717946 5059{
00a77deb 5060 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5061 struct intel_iommu *iommu;
5062 int addr_width;
156baca8 5063 u8 bus, devfn;
faa3d6f5 5064
c875d2c1
AW
5065 if (device_is_rmrr_locked(dev)) {
5066 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5067 return -EPERM;
5068 }
5069
7207d8f9
DW
5070 /* normally dev is not mapped */
5071 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
5072 struct dmar_domain *old_domain;
5073
1525a29a 5074 old_domain = find_domain(dev);
faa3d6f5 5075 if (old_domain) {
d160aca5 5076 rcu_read_lock();
de7e8886 5077 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 5078 rcu_read_unlock();
62c22167
JR
5079
5080 if (!domain_type_is_vm_or_si(old_domain) &&
5081 list_empty(&old_domain->devices))
5082 domain_exit(old_domain);
faa3d6f5
WH
5083 }
5084 }
5085
156baca8 5086 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5087 if (!iommu)
5088 return -ENODEV;
5089
5090 /* check if this iommu agaw is sufficient for max mapped address */
5091 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5092 if (addr_width > cap_mgaw(iommu->cap))
5093 addr_width = cap_mgaw(iommu->cap);
5094
5095 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 5096 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5097 "sufficient for the mapped address (%llx)\n",
a99c47a2 5098 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5099 return -EFAULT;
5100 }
a99c47a2
TL
5101 dmar_domain->gaw = addr_width;
5102
5103 /*
5104 * Knock out extra levels of page tables if necessary
5105 */
5106 while (iommu->agaw < dmar_domain->agaw) {
5107 struct dma_pte *pte;
5108
5109 pte = dmar_domain->pgd;
5110 if (dma_pte_present(pte)) {
25cbff16
SY
5111 dmar_domain->pgd = (struct dma_pte *)
5112 phys_to_virt(dma_pte_addr(pte));
7a661013 5113 free_pgtable_page(pte);
a99c47a2
TL
5114 }
5115 dmar_domain->agaw--;
5116 }
fe40f1e0 5117
28ccce0d 5118 return domain_add_dev_info(dmar_domain, dev);
38717946 5119}
38717946 5120
4c5478c9
JR
5121static void intel_iommu_detach_device(struct iommu_domain *domain,
5122 struct device *dev)
38717946 5123{
e6de0f8d 5124 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 5125}
c7151a8d 5126
b146a1c9
JR
5127static int intel_iommu_map(struct iommu_domain *domain,
5128 unsigned long iova, phys_addr_t hpa,
5009065d 5129 size_t size, int iommu_prot)
faa3d6f5 5130{
00a77deb 5131 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5132 u64 max_addr;
dde57a21 5133 int prot = 0;
faa3d6f5 5134 int ret;
fe40f1e0 5135
dde57a21
JR
5136 if (iommu_prot & IOMMU_READ)
5137 prot |= DMA_PTE_READ;
5138 if (iommu_prot & IOMMU_WRITE)
5139 prot |= DMA_PTE_WRITE;
9cf06697
SY
5140 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5141 prot |= DMA_PTE_SNP;
dde57a21 5142
163cc52c 5143 max_addr = iova + size;
dde57a21 5144 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5145 u64 end;
5146
5147 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5148 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5149 if (end < max_addr) {
9f10e5bf 5150 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5151 "sufficient for the mapped address (%llx)\n",
8954da1f 5152 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5153 return -EFAULT;
5154 }
dde57a21 5155 dmar_domain->max_addr = max_addr;
fe40f1e0 5156 }
ad051221
DW
5157 /* Round up size to next multiple of PAGE_SIZE, if it and
5158 the low bits of hpa would take us onto the next page */
88cb6a74 5159 size = aligned_nrpages(hpa, size);
ad051221
DW
5160 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5161 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5162 return ret;
38717946 5163}
38717946 5164
5009065d 5165static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5166 unsigned long iova, size_t size)
38717946 5167{
00a77deb 5168 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
5169 struct page *freelist = NULL;
5170 struct intel_iommu *iommu;
5171 unsigned long start_pfn, last_pfn;
5172 unsigned int npages;
42e8c186 5173 int iommu_id, level = 0;
5cf0a76f
DW
5174
5175 /* Cope with horrid API which requires us to unmap more than the
5176 size argument if it happens to be a large-page mapping. */
dc02e46e 5177 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5178
5179 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5180 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5181
ea8ea460
DW
5182 start_pfn = iova >> VTD_PAGE_SHIFT;
5183 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5184
5185 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5186
5187 npages = last_pfn - start_pfn + 1;
5188
29a27719 5189 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 5190 iommu = g_iommus[iommu_id];
ea8ea460 5191
42e8c186
JR
5192 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5193 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5194 }
5195
5196 dma_free_pagelist(freelist);
fe40f1e0 5197
163cc52c
DW
5198 if (dmar_domain->max_addr == iova + size)
5199 dmar_domain->max_addr = iova;
b146a1c9 5200
5cf0a76f 5201 return size;
38717946 5202}
38717946 5203
d14d6577 5204static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5205 dma_addr_t iova)
38717946 5206{
00a77deb 5207 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5208 struct dma_pte *pte;
5cf0a76f 5209 int level = 0;
faa3d6f5 5210 u64 phys = 0;
38717946 5211
5cf0a76f 5212 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5213 if (pte)
faa3d6f5 5214 phys = dma_pte_addr(pte);
38717946 5215
faa3d6f5 5216 return phys;
38717946 5217}
a8bcbb0d 5218
5d587b8d 5219static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5220{
dbb9fd86 5221 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5222 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5223 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5224 return irq_remapping_enabled == 1;
dbb9fd86 5225
5d587b8d 5226 return false;
dbb9fd86
SY
5227}
5228
abdfdde2
AW
5229static int intel_iommu_add_device(struct device *dev)
5230{
a5459cfe 5231 struct intel_iommu *iommu;
abdfdde2 5232 struct iommu_group *group;
156baca8 5233 u8 bus, devfn;
70ae6f0d 5234
a5459cfe
AW
5235 iommu = device_to_iommu(dev, &bus, &devfn);
5236 if (!iommu)
70ae6f0d
AW
5237 return -ENODEV;
5238
e3d10af1 5239 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5240
e17f9ff4 5241 group = iommu_group_get_for_dev(dev);
783f157b 5242
e17f9ff4
AW
5243 if (IS_ERR(group))
5244 return PTR_ERR(group);
bcb71abe 5245
abdfdde2 5246 iommu_group_put(group);
e17f9ff4 5247 return 0;
abdfdde2 5248}
70ae6f0d 5249
abdfdde2
AW
5250static void intel_iommu_remove_device(struct device *dev)
5251{
a5459cfe
AW
5252 struct intel_iommu *iommu;
5253 u8 bus, devfn;
5254
5255 iommu = device_to_iommu(dev, &bus, &devfn);
5256 if (!iommu)
5257 return;
5258
abdfdde2 5259 iommu_group_remove_device(dev);
a5459cfe 5260
e3d10af1 5261 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5262}
5263
0659b8dc
EA
5264static void intel_iommu_get_resv_regions(struct device *device,
5265 struct list_head *head)
5266{
5267 struct iommu_resv_region *reg;
5268 struct dmar_rmrr_unit *rmrr;
5269 struct device *i_dev;
5270 int i;
5271
5272 rcu_read_lock();
5273 for_each_rmrr_units(rmrr) {
5274 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5275 i, i_dev) {
5276 if (i_dev != device)
5277 continue;
5278
5279 list_add_tail(&rmrr->resv->list, head);
5280 }
5281 }
5282 rcu_read_unlock();
5283
5284 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5285 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5286 0, IOMMU_RESV_MSI);
0659b8dc
EA
5287 if (!reg)
5288 return;
5289 list_add_tail(&reg->list, head);
5290}
5291
5292static void intel_iommu_put_resv_regions(struct device *dev,
5293 struct list_head *head)
5294{
5295 struct iommu_resv_region *entry, *next;
5296
5297 list_for_each_entry_safe(entry, next, head, list) {
5298 if (entry->type == IOMMU_RESV_RESERVED)
5299 kfree(entry);
5300 }
70ae6f0d
AW
5301}
5302
2f26e0a9 5303#ifdef CONFIG_INTEL_IOMMU_SVM
65ca7f5f
JP
5304#define MAX_NR_PASID_BITS (20)
5305static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5306{
5307 /*
5308 * Convert ecap_pss to extend context entry pts encoding, also
5309 * respect the soft pasid_max value set by the iommu.
5310 * - number of PASID bits = ecap_pss + 1
5311 * - number of PASID table entries = 2^(pts + 5)
5312 * Therefore, pts = ecap_pss - 4
5313 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5314 */
5315 if (ecap_pss(iommu->ecap) < 5)
5316 return 0;
5317
5318 /* pasid_max is encoded as actual number of entries not the bits */
5319 return find_first_bit((unsigned long *)&iommu->pasid_max,
5320 MAX_NR_PASID_BITS) - 5;
5321}
5322
2f26e0a9
DW
5323int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5324{
5325 struct device_domain_info *info;
5326 struct context_entry *context;
5327 struct dmar_domain *domain;
5328 unsigned long flags;
5329 u64 ctx_lo;
5330 int ret;
5331
5332 domain = get_valid_domain_for_dev(sdev->dev);
5333 if (!domain)
5334 return -EINVAL;
5335
5336 spin_lock_irqsave(&device_domain_lock, flags);
5337 spin_lock(&iommu->lock);
5338
5339 ret = -EINVAL;
5340 info = sdev->dev->archdata.iommu;
5341 if (!info || !info->pasid_supported)
5342 goto out;
5343
5344 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5345 if (WARN_ON(!context))
5346 goto out;
5347
5348 ctx_lo = context[0].lo;
5349
5350 sdev->did = domain->iommu_did[iommu->seq_id];
5351 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5352
5353 if (!(ctx_lo & CONTEXT_PASIDE)) {
5354 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
65ca7f5f
JP
5355 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5356 intel_iommu_get_pts(iommu);
5357
2f26e0a9
DW
5358 wmb();
5359 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5360 * extended to permit requests-with-PASID if the PASIDE bit
5361 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5362 * however, the PASIDE bit is ignored and requests-with-PASID
5363 * are unconditionally blocked. Which makes less sense.
5364 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5365 * "guest mode" translation types depending on whether ATS
5366 * is available or not. Annoyingly, we can't use the new
5367 * modes *unless* PASIDE is set. */
5368 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5369 ctx_lo &= ~CONTEXT_TT_MASK;
5370 if (info->ats_supported)
5371 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5372 else
5373 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5374 }
5375 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5376 if (iommu->pasid_state_table)
5377 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5378 if (info->pri_supported)
5379 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5380 context[0].lo = ctx_lo;
5381 wmb();
5382 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5383 DMA_CCMD_MASK_NOBIT,
5384 DMA_CCMD_DEVICE_INVL);
5385 }
5386
5387 /* Enable PASID support in the device, if it wasn't already */
5388 if (!info->pasid_enabled)
5389 iommu_enable_dev_iotlb(info);
5390
5391 if (info->ats_enabled) {
5392 sdev->dev_iotlb = 1;
5393 sdev->qdep = info->ats_qdep;
5394 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5395 sdev->qdep = 0;
5396 }
5397 ret = 0;
5398
5399 out:
5400 spin_unlock(&iommu->lock);
5401 spin_unlock_irqrestore(&device_domain_lock, flags);
5402
5403 return ret;
5404}
5405
5406struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5407{
5408 struct intel_iommu *iommu;
5409 u8 bus, devfn;
5410
5411 if (iommu_dummy(dev)) {
5412 dev_warn(dev,
5413 "No IOMMU translation for device; cannot enable SVM\n");
5414 return NULL;
5415 }
5416
5417 iommu = device_to_iommu(dev, &bus, &devfn);
5418 if ((!iommu)) {
b9997e38 5419 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5420 return NULL;
5421 }
5422
5423 if (!iommu->pasid_table) {
b9997e38 5424 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
2f26e0a9
DW
5425 return NULL;
5426 }
5427
5428 return iommu;
5429}
5430#endif /* CONFIG_INTEL_IOMMU_SVM */
5431
b0119e87 5432const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5433 .capable = intel_iommu_capable,
5434 .domain_alloc = intel_iommu_domain_alloc,
5435 .domain_free = intel_iommu_domain_free,
5436 .attach_dev = intel_iommu_attach_device,
5437 .detach_dev = intel_iommu_detach_device,
5438 .map = intel_iommu_map,
5439 .unmap = intel_iommu_unmap,
5440 .map_sg = default_iommu_map_sg,
5441 .iova_to_phys = intel_iommu_iova_to_phys,
5442 .add_device = intel_iommu_add_device,
5443 .remove_device = intel_iommu_remove_device,
5444 .get_resv_regions = intel_iommu_get_resv_regions,
5445 .put_resv_regions = intel_iommu_put_resv_regions,
5446 .device_group = pci_device_group,
5447 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5448};
9af88143 5449
9452618e
DV
5450static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5451{
5452 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5453 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5454 dmar_map_gfx = 0;
5455}
5456
5457DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5458DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5459DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5460DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5461DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5462DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5463DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5464
d34d6517 5465static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5466{
5467 /*
5468 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5469 * but needs it. Same seems to hold for the desktop versions.
9af88143 5470 */
9f10e5bf 5471 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5472 rwbf_quirk = 1;
5473}
5474
5475DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5476DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5477DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5478DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5479DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5480DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5481DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5482
eecfd57f
AJ
5483#define GGC 0x52
5484#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5485#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5486#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5487#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5488#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5489#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5490#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5491#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5492
d34d6517 5493static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5494{
5495 unsigned short ggc;
5496
eecfd57f 5497 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5498 return;
5499
eecfd57f 5500 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5501 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5502 dmar_map_gfx = 0;
6fbcfb3e
DW
5503 } else if (dmar_map_gfx) {
5504 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5505 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5506 intel_iommu_strict = 1;
5507 }
9eecabcb
DW
5508}
5509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5510DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5513
e0fc7e0b
DW
5514/* On Tylersburg chipsets, some BIOSes have been known to enable the
5515 ISOCH DMAR unit for the Azalia sound device, but not give it any
5516 TLB entries, which causes it to deadlock. Check for that. We do
5517 this in a function called from init_dmars(), instead of in a PCI
5518 quirk, because we don't want to print the obnoxious "BIOS broken"
5519 message if VT-d is actually disabled.
5520*/
5521static void __init check_tylersburg_isoch(void)
5522{
5523 struct pci_dev *pdev;
5524 uint32_t vtisochctrl;
5525
5526 /* If there's no Azalia in the system anyway, forget it. */
5527 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5528 if (!pdev)
5529 return;
5530 pci_dev_put(pdev);
5531
5532 /* System Management Registers. Might be hidden, in which case
5533 we can't do the sanity check. But that's OK, because the
5534 known-broken BIOSes _don't_ actually hide it, so far. */
5535 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5536 if (!pdev)
5537 return;
5538
5539 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5540 pci_dev_put(pdev);
5541 return;
5542 }
5543
5544 pci_dev_put(pdev);
5545
5546 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5547 if (vtisochctrl & 1)
5548 return;
5549
5550 /* Drop all bits other than the number of TLB entries */
5551 vtisochctrl &= 0x1c;
5552
5553 /* If we have the recommended number of TLB entries (16), fine. */
5554 if (vtisochctrl == 0x10)
5555 return;
5556
5557 /* Zero TLB entries? You get to ride the short bus to school. */
5558 if (!vtisochctrl) {
5559 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5560 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5561 dmi_get_system_info(DMI_BIOS_VENDOR),
5562 dmi_get_system_info(DMI_BIOS_VERSION),
5563 dmi_get_system_info(DMI_PRODUCT_VERSION));
5564 iommu_identity_mapping |= IDENTMAP_AZALIA;
5565 return;
5566 }
9f10e5bf
JR
5567
5568 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5569 vtisochctrl);
5570}