]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Keep track of per-iommu domain ids
[mirror_ubuntu-artful-kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
36746436 45#include <linux/dma-contiguous.h>
091d42e4 46#include <linux/crash_dump.h>
8a8f422d 47#include <asm/irq_remapping.h>
ba395927 48#include <asm/cacheflush.h>
46a7fa27 49#include <asm/iommu.h>
ba395927 50
078e1ee2
JR
51#include "irq_remapping.h"
52
5b6985ce
FY
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
ba395927 56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
66
4ed0d3e6 67#define MAX_AGAW_WIDTH 64
5c645b35 68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 69
2ebe3151
DW
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 78
1b722500
RM
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
f27be03b 82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 83#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 84#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 85
df08cdc7
AM
86/* page table handling */
87#define LEVEL_STRIDE (9)
88#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89
6d1c56a9
OBC
90/*
91 * This bitmap is used to advertise the page sizes our hardware support
92 * to the IOMMU core, which will then use this information to split
93 * physically contiguous memory regions it is mapping into page sizes
94 * that we support.
95 *
96 * Traditionally the IOMMU core just handed us the mappings directly,
97 * after making sure the size is an order of a 4KiB page and that the
98 * mapping has natural alignment.
99 *
100 * To retain this behavior, we currently advertise that we support
101 * all page sizes that are an order of 4KiB.
102 *
103 * If at some point we'd like to utilize the IOMMU core's new behavior,
104 * we could change this to advertise the real page sizes we support.
105 */
106#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
107
df08cdc7
AM
108static inline int agaw_to_level(int agaw)
109{
110 return agaw + 2;
111}
112
113static inline int agaw_to_width(int agaw)
114{
5c645b35 115 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
116}
117
118static inline int width_to_agaw(int width)
119{
5c645b35 120 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
121}
122
123static inline unsigned int level_to_offset_bits(int level)
124{
125 return (level - 1) * LEVEL_STRIDE;
126}
127
128static inline int pfn_level_offset(unsigned long pfn, int level)
129{
130 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131}
132
133static inline unsigned long level_mask(int level)
134{
135 return -1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long level_size(int level)
139{
140 return 1UL << level_to_offset_bits(level);
141}
142
143static inline unsigned long align_to_level(unsigned long pfn, int level)
144{
145 return (pfn + level_size(level) - 1) & level_mask(level);
146}
fd18de50 147
6dd9a7c7
YS
148static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149{
5c645b35 150 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
151}
152
dd4e8319
DW
153/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154 are never going to work. */
155static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156{
157 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159
160static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161{
162 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163}
164static inline unsigned long page_to_dma_pfn(struct page *pg)
165{
166 return mm_to_dma_pfn(page_to_pfn(pg));
167}
168static inline unsigned long virt_to_dma_pfn(void *p)
169{
170 return page_to_dma_pfn(virt_to_page(p));
171}
172
d9630fe9
WH
173/* global iommu list, set NULL for ignored DMAR units */
174static struct intel_iommu **g_iommus;
175
e0fc7e0b 176static void __init check_tylersburg_isoch(void);
9af88143
DW
177static int rwbf_quirk;
178
b779260b
JC
179/*
180 * set to 1 to panic kernel if can't successfully enable VT-d
181 * (used when kernel is launched w/ TXT)
182 */
183static int force_on = 0;
184
46b08e1a
MM
185/*
186 * 0: Present
187 * 1-11: Reserved
188 * 12-63: Context Ptr (12 - (haw-1))
189 * 64-127: Reserved
190 */
191struct root_entry {
03ecc32c
DW
192 u64 lo;
193 u64 hi;
46b08e1a
MM
194};
195#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 196
091d42e4
JR
197/*
198 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_lctp(struct root_entry *re)
202{
203 if (!(re->lo & 1))
204 return 0;
205
206 return re->lo & VTD_PAGE_MASK;
207}
208
209/*
210 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211 * if marked present.
212 */
213static phys_addr_t root_entry_uctp(struct root_entry *re)
214{
215 if (!(re->hi & 1))
216 return 0;
46b08e1a 217
091d42e4
JR
218 return re->hi & VTD_PAGE_MASK;
219}
7a8fc25e
MM
220/*
221 * low 64 bits:
222 * 0: present
223 * 1: fault processing disable
224 * 2-3: translation type
225 * 12-63: address space root
226 * high 64 bits:
227 * 0-2: address width
228 * 3-6: aval
229 * 8-23: domain id
230 */
231struct context_entry {
232 u64 lo;
233 u64 hi;
234};
c07e7d21 235
cf484d0e
JR
236static inline void context_clear_pasid_enable(struct context_entry *context)
237{
238 context->lo &= ~(1ULL << 11);
239}
240
241static inline bool context_pasid_enabled(struct context_entry *context)
242{
243 return !!(context->lo & (1ULL << 11));
244}
245
246static inline void context_set_copied(struct context_entry *context)
247{
248 context->hi |= (1ull << 3);
249}
250
251static inline bool context_copied(struct context_entry *context)
252{
253 return !!(context->hi & (1ULL << 3));
254}
255
256static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
257{
258 return (context->lo & 1);
259}
cf484d0e
JR
260
261static inline bool context_present(struct context_entry *context)
262{
263 return context_pasid_enabled(context) ?
264 __context_present(context) :
265 __context_present(context) && !context_copied(context);
266}
267
c07e7d21
MM
268static inline void context_set_present(struct context_entry *context)
269{
270 context->lo |= 1;
271}
272
273static inline void context_set_fault_enable(struct context_entry *context)
274{
275 context->lo &= (((u64)-1) << 2) | 1;
276}
277
c07e7d21
MM
278static inline void context_set_translation_type(struct context_entry *context,
279 unsigned long value)
280{
281 context->lo &= (((u64)-1) << 4) | 3;
282 context->lo |= (value & 3) << 2;
283}
284
285static inline void context_set_address_root(struct context_entry *context,
286 unsigned long value)
287{
1a2262f9 288 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
289 context->lo |= value & VTD_PAGE_MASK;
290}
291
292static inline void context_set_address_width(struct context_entry *context,
293 unsigned long value)
294{
295 context->hi |= value & 7;
296}
297
298static inline void context_set_domain_id(struct context_entry *context,
299 unsigned long value)
300{
301 context->hi |= (value & ((1 << 16) - 1)) << 8;
302}
303
dbcd861f
JR
304static inline int context_domain_id(struct context_entry *c)
305{
306 return((c->hi >> 8) & 0xffff);
307}
308
c07e7d21
MM
309static inline void context_clear_entry(struct context_entry *context)
310{
311 context->lo = 0;
312 context->hi = 0;
313}
7a8fc25e 314
622ba12a
MM
315/*
316 * 0: readable
317 * 1: writable
318 * 2-6: reserved
319 * 7: super page
9cf06697
SY
320 * 8-10: available
321 * 11: snoop behavior
622ba12a
MM
322 * 12-63: Host physcial address
323 */
324struct dma_pte {
325 u64 val;
326};
622ba12a 327
19c239ce
MM
328static inline void dma_clear_pte(struct dma_pte *pte)
329{
330 pte->val = 0;
331}
332
19c239ce
MM
333static inline u64 dma_pte_addr(struct dma_pte *pte)
334{
c85994e4
DW
335#ifdef CONFIG_64BIT
336 return pte->val & VTD_PAGE_MASK;
337#else
338 /* Must have a full atomic 64-bit read */
1a8bd481 339 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 340#endif
19c239ce
MM
341}
342
19c239ce
MM
343static inline bool dma_pte_present(struct dma_pte *pte)
344{
345 return (pte->val & 3) != 0;
346}
622ba12a 347
4399c8bf
AK
348static inline bool dma_pte_superpage(struct dma_pte *pte)
349{
c3c75eb7 350 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
351}
352
75e6bf96
DW
353static inline int first_pte_in_page(struct dma_pte *pte)
354{
355 return !((unsigned long)pte & ~VTD_PAGE_MASK);
356}
357
2c2e2c38
FY
358/*
359 * This domain is a statically identity mapping domain.
360 * 1. This domain creats a static 1:1 mapping to all usable memory.
361 * 2. It maps to each iommu if successful.
362 * 3. Each iommu mapps to this domain if successful.
363 */
19943b0e
DW
364static struct dmar_domain *si_domain;
365static int hw_pass_through = 1;
2c2e2c38 366
1ce28feb
WH
367/* domain represents a virtual machine, more than one devices
368 * across iommus may be owned in one domain, e.g. kvm guest.
369 */
ab8dfe25 370#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 371
2c2e2c38 372/* si_domain contains mulitple devices */
ab8dfe25 373#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 374
99126f7c
MM
375struct dmar_domain {
376 int id; /* domain id */
4c923d47 377 int nid; /* node id */
78d8e704 378 DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
1b198bb0 379 /* bitmap of iommus this domain uses*/
99126f7c 380
c0e8a6c8
JR
381 u16 iommu_did[DMAR_UNITS_SUPPORTED];
382 /* Domain ids per IOMMU. Use u16 since
383 * domain ids are 16 bit wide according
384 * to VT-d spec, section 9.3 */
385
00a77deb 386 struct list_head devices; /* all devices' list */
99126f7c
MM
387 struct iova_domain iovad; /* iova's that belong to this domain */
388
389 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
390 int gaw; /* max guest address width */
391
392 /* adjusted guest address width, 0 is level 2 30-bit */
393 int agaw;
394
3b5410e7 395 int flags; /* flags to find out type of domain */
8e604097
WH
396
397 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 398 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 399 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
400 int iommu_superpage;/* Level of superpages supported:
401 0 == 4KiB (no superpages), 1 == 2MiB,
402 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 403 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 404 u64 max_addr; /* maximum mapped address */
00a77deb
JR
405
406 struct iommu_domain domain; /* generic domain data structure for
407 iommu core */
99126f7c
MM
408};
409
a647dacb
MM
410/* PCI domain-device relationship */
411struct device_domain_info {
412 struct list_head link; /* link to domain siblings */
413 struct list_head global; /* link to global list */
276dbf99 414 u8 bus; /* PCI bus number */
a647dacb 415 u8 devfn; /* PCI devfn number */
0bcb3e28 416 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 417 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
418 struct dmar_domain *domain; /* pointer to domain */
419};
420
b94e4117
JL
421struct dmar_rmrr_unit {
422 struct list_head list; /* list of rmrr units */
423 struct acpi_dmar_header *hdr; /* ACPI header */
424 u64 base_address; /* reserved base address*/
425 u64 end_address; /* reserved end address */
832bd858 426 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
427 int devices_cnt; /* target device count */
428};
429
430struct dmar_atsr_unit {
431 struct list_head list; /* list of ATSR units */
432 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 433 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
434 int devices_cnt; /* target device count */
435 u8 include_all:1; /* include all ports */
436};
437
438static LIST_HEAD(dmar_atsr_units);
439static LIST_HEAD(dmar_rmrr_units);
440
441#define for_each_rmrr_units(rmrr) \
442 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
443
5e0d2a6f 444static void flush_unmaps_timeout(unsigned long data);
445
b707cb02 446static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 447
80b20dd8 448#define HIGH_WATER_MARK 250
449struct deferred_flush_tables {
450 int next;
451 struct iova *iova[HIGH_WATER_MARK];
452 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 453 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 454};
455
456static struct deferred_flush_tables *deferred_flush;
457
5e0d2a6f 458/* bitmap for indexing intel_iommus */
5e0d2a6f 459static int g_num_of_iommus;
460
461static DEFINE_SPINLOCK(async_umap_flush_lock);
462static LIST_HEAD(unmaps_to_do);
463
464static int timer_on;
465static long list_size;
5e0d2a6f 466
92d03cc8 467static void domain_exit(struct dmar_domain *domain);
ba395927 468static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 469static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 470 struct device *dev);
92d03cc8 471static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 472 struct device *dev);
2a46ddf7
JL
473static int domain_detach_iommu(struct dmar_domain *domain,
474 struct intel_iommu *iommu);
ba395927 475
d3f13810 476#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
477int dmar_disabled = 0;
478#else
479int dmar_disabled = 1;
d3f13810 480#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 481
8bc1f85c
ED
482int intel_iommu_enabled = 0;
483EXPORT_SYMBOL_GPL(intel_iommu_enabled);
484
2d9e667e 485static int dmar_map_gfx = 1;
7d3b03ce 486static int dmar_forcedac;
5e0d2a6f 487static int intel_iommu_strict;
6dd9a7c7 488static int intel_iommu_superpage = 1;
c83b2f20
DW
489static int intel_iommu_ecs = 1;
490
491/* We only actually use ECS when PASID support (on the new bit 40)
492 * is also advertised. Some early implementations — the ones with
493 * PASID support on bit 28 — have issues even when we *only* use
494 * extended root/context tables. */
495#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
496 ecap_pasid(iommu->ecap))
ba395927 497
c0771df8
DW
498int intel_iommu_gfx_mapped;
499EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
500
ba395927
KA
501#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
502static DEFINE_SPINLOCK(device_domain_lock);
503static LIST_HEAD(device_domain_list);
504
b22f6434 505static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 506
4158c2ec
JR
507static bool translation_pre_enabled(struct intel_iommu *iommu)
508{
509 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
510}
511
091d42e4
JR
512static void clear_translation_pre_enabled(struct intel_iommu *iommu)
513{
514 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
515}
516
4158c2ec
JR
517static void init_translation_status(struct intel_iommu *iommu)
518{
519 u32 gsts;
520
521 gsts = readl(iommu->reg + DMAR_GSTS_REG);
522 if (gsts & DMA_GSTS_TES)
523 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
524}
525
00a77deb
JR
526/* Convert generic 'struct iommu_domain to private struct dmar_domain */
527static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
528{
529 return container_of(dom, struct dmar_domain, domain);
530}
531
ba395927
KA
532static int __init intel_iommu_setup(char *str)
533{
534 if (!str)
535 return -EINVAL;
536 while (*str) {
0cd5c3c8
KM
537 if (!strncmp(str, "on", 2)) {
538 dmar_disabled = 0;
9f10e5bf 539 pr_info("IOMMU enabled\n");
0cd5c3c8 540 } else if (!strncmp(str, "off", 3)) {
ba395927 541 dmar_disabled = 1;
9f10e5bf 542 pr_info("IOMMU disabled\n");
ba395927
KA
543 } else if (!strncmp(str, "igfx_off", 8)) {
544 dmar_map_gfx = 0;
9f10e5bf 545 pr_info("Disable GFX device mapping\n");
7d3b03ce 546 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 547 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 548 dmar_forcedac = 1;
5e0d2a6f 549 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 550 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 551 intel_iommu_strict = 1;
6dd9a7c7 552 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 553 pr_info("Disable supported super page\n");
6dd9a7c7 554 intel_iommu_superpage = 0;
c83b2f20
DW
555 } else if (!strncmp(str, "ecs_off", 7)) {
556 printk(KERN_INFO
557 "Intel-IOMMU: disable extended context table support\n");
558 intel_iommu_ecs = 0;
ba395927
KA
559 }
560
561 str += strcspn(str, ",");
562 while (*str == ',')
563 str++;
564 }
565 return 0;
566}
567__setup("intel_iommu=", intel_iommu_setup);
568
569static struct kmem_cache *iommu_domain_cache;
570static struct kmem_cache *iommu_devinfo_cache;
ba395927 571
4c923d47 572static inline void *alloc_pgtable_page(int node)
eb3fa7cb 573{
4c923d47
SS
574 struct page *page;
575 void *vaddr = NULL;
eb3fa7cb 576
4c923d47
SS
577 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
578 if (page)
579 vaddr = page_address(page);
eb3fa7cb 580 return vaddr;
ba395927
KA
581}
582
583static inline void free_pgtable_page(void *vaddr)
584{
585 free_page((unsigned long)vaddr);
586}
587
588static inline void *alloc_domain_mem(void)
589{
354bb65e 590 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
591}
592
38717946 593static void free_domain_mem(void *vaddr)
ba395927
KA
594{
595 kmem_cache_free(iommu_domain_cache, vaddr);
596}
597
598static inline void * alloc_devinfo_mem(void)
599{
354bb65e 600 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
601}
602
603static inline void free_devinfo_mem(void *vaddr)
604{
605 kmem_cache_free(iommu_devinfo_cache, vaddr);
606}
607
ab8dfe25
JL
608static inline int domain_type_is_vm(struct dmar_domain *domain)
609{
610 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
611}
612
613static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
614{
615 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
616 DOMAIN_FLAG_STATIC_IDENTITY);
617}
1b573683 618
162d1b10
JL
619static inline int domain_pfn_supported(struct dmar_domain *domain,
620 unsigned long pfn)
621{
622 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
623
624 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
625}
626
4ed0d3e6 627static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
628{
629 unsigned long sagaw;
630 int agaw = -1;
631
632 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 633 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
634 agaw >= 0; agaw--) {
635 if (test_bit(agaw, &sagaw))
636 break;
637 }
638
639 return agaw;
640}
641
4ed0d3e6
FY
642/*
643 * Calculate max SAGAW for each iommu.
644 */
645int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
646{
647 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
648}
649
650/*
651 * calculate agaw for each iommu.
652 * "SAGAW" may be different across iommus, use a default agaw, and
653 * get a supported less agaw for iommus that don't support the default agaw.
654 */
655int iommu_calculate_agaw(struct intel_iommu *iommu)
656{
657 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
658}
659
2c2e2c38 660/* This functionin only returns single iommu in a domain */
8c11e798
WH
661static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
662{
663 int iommu_id;
664
2c2e2c38 665 /* si_domain and vm domain should not get here. */
ab8dfe25 666 BUG_ON(domain_type_is_vm_or_si(domain));
1b198bb0 667 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
668 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
669 return NULL;
670
671 return g_iommus[iommu_id];
672}
673
8e604097
WH
674static void domain_update_iommu_coherency(struct dmar_domain *domain)
675{
d0501960
DW
676 struct dmar_drhd_unit *drhd;
677 struct intel_iommu *iommu;
2f119c78
QL
678 bool found = false;
679 int i;
2e12bc29 680
d0501960 681 domain->iommu_coherency = 1;
8e604097 682
1b198bb0 683 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
2f119c78 684 found = true;
8e604097
WH
685 if (!ecap_coherent(g_iommus[i]->ecap)) {
686 domain->iommu_coherency = 0;
687 break;
688 }
8e604097 689 }
d0501960
DW
690 if (found)
691 return;
692
693 /* No hardware attached; use lowest common denominator */
694 rcu_read_lock();
695 for_each_active_iommu(iommu, drhd) {
696 if (!ecap_coherent(iommu->ecap)) {
697 domain->iommu_coherency = 0;
698 break;
699 }
700 }
701 rcu_read_unlock();
8e604097
WH
702}
703
161f6934 704static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 705{
161f6934
JL
706 struct dmar_drhd_unit *drhd;
707 struct intel_iommu *iommu;
708 int ret = 1;
58c610bd 709
161f6934
JL
710 rcu_read_lock();
711 for_each_active_iommu(iommu, drhd) {
712 if (iommu != skip) {
713 if (!ecap_sc_support(iommu->ecap)) {
714 ret = 0;
715 break;
716 }
58c610bd 717 }
58c610bd 718 }
161f6934
JL
719 rcu_read_unlock();
720
721 return ret;
58c610bd
SY
722}
723
161f6934 724static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 725{
8140a95d 726 struct dmar_drhd_unit *drhd;
161f6934 727 struct intel_iommu *iommu;
8140a95d 728 int mask = 0xf;
6dd9a7c7
YS
729
730 if (!intel_iommu_superpage) {
161f6934 731 return 0;
6dd9a7c7
YS
732 }
733
8140a95d 734 /* set iommu_superpage to the smallest common denominator */
0e242612 735 rcu_read_lock();
8140a95d 736 for_each_active_iommu(iommu, drhd) {
161f6934
JL
737 if (iommu != skip) {
738 mask &= cap_super_page_val(iommu->cap);
739 if (!mask)
740 break;
6dd9a7c7
YS
741 }
742 }
0e242612
JL
743 rcu_read_unlock();
744
161f6934 745 return fls(mask);
6dd9a7c7
YS
746}
747
58c610bd
SY
748/* Some capabilities may be different across iommus */
749static void domain_update_iommu_cap(struct dmar_domain *domain)
750{
751 domain_update_iommu_coherency(domain);
161f6934
JL
752 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
753 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
754}
755
03ecc32c
DW
756static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
757 u8 bus, u8 devfn, int alloc)
758{
759 struct root_entry *root = &iommu->root_entry[bus];
760 struct context_entry *context;
761 u64 *entry;
762
c83b2f20 763 if (ecs_enabled(iommu)) {
03ecc32c
DW
764 if (devfn >= 0x80) {
765 devfn -= 0x80;
766 entry = &root->hi;
767 }
768 devfn *= 2;
769 }
770 entry = &root->lo;
771 if (*entry & 1)
772 context = phys_to_virt(*entry & VTD_PAGE_MASK);
773 else {
774 unsigned long phy_addr;
775 if (!alloc)
776 return NULL;
777
778 context = alloc_pgtable_page(iommu->node);
779 if (!context)
780 return NULL;
781
782 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
783 phy_addr = virt_to_phys((void *)context);
784 *entry = phy_addr | 1;
785 __iommu_flush_cache(iommu, entry, sizeof(*entry));
786 }
787 return &context[devfn];
788}
789
4ed6a540
DW
790static int iommu_dummy(struct device *dev)
791{
792 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
793}
794
156baca8 795static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
796{
797 struct dmar_drhd_unit *drhd = NULL;
b683b230 798 struct intel_iommu *iommu;
156baca8
DW
799 struct device *tmp;
800 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 801 u16 segment = 0;
c7151a8d
WH
802 int i;
803
4ed6a540
DW
804 if (iommu_dummy(dev))
805 return NULL;
806
156baca8
DW
807 if (dev_is_pci(dev)) {
808 pdev = to_pci_dev(dev);
809 segment = pci_domain_nr(pdev->bus);
ca5b74d2 810 } else if (has_acpi_companion(dev))
156baca8
DW
811 dev = &ACPI_COMPANION(dev)->dev;
812
0e242612 813 rcu_read_lock();
b683b230 814 for_each_active_iommu(iommu, drhd) {
156baca8 815 if (pdev && segment != drhd->segment)
276dbf99 816 continue;
c7151a8d 817
b683b230 818 for_each_active_dev_scope(drhd->devices,
156baca8
DW
819 drhd->devices_cnt, i, tmp) {
820 if (tmp == dev) {
821 *bus = drhd->devices[i].bus;
822 *devfn = drhd->devices[i].devfn;
b683b230 823 goto out;
156baca8
DW
824 }
825
826 if (!pdev || !dev_is_pci(tmp))
827 continue;
828
829 ptmp = to_pci_dev(tmp);
830 if (ptmp->subordinate &&
831 ptmp->subordinate->number <= pdev->bus->number &&
832 ptmp->subordinate->busn_res.end >= pdev->bus->number)
833 goto got_pdev;
924b6231 834 }
c7151a8d 835
156baca8
DW
836 if (pdev && drhd->include_all) {
837 got_pdev:
838 *bus = pdev->bus->number;
839 *devfn = pdev->devfn;
b683b230 840 goto out;
156baca8 841 }
c7151a8d 842 }
b683b230 843 iommu = NULL;
156baca8 844 out:
0e242612 845 rcu_read_unlock();
c7151a8d 846
b683b230 847 return iommu;
c7151a8d
WH
848}
849
5331fe6f
WH
850static void domain_flush_cache(struct dmar_domain *domain,
851 void *addr, int size)
852{
853 if (!domain->iommu_coherency)
854 clflush_cache_range(addr, size);
855}
856
ba395927
KA
857static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
858{
ba395927 859 struct context_entry *context;
03ecc32c 860 int ret = 0;
ba395927
KA
861 unsigned long flags;
862
863 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
864 context = iommu_context_addr(iommu, bus, devfn, 0);
865 if (context)
866 ret = context_present(context);
ba395927
KA
867 spin_unlock_irqrestore(&iommu->lock, flags);
868 return ret;
869}
870
871static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
872{
ba395927
KA
873 struct context_entry *context;
874 unsigned long flags;
875
876 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 877 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 878 if (context) {
03ecc32c
DW
879 context_clear_entry(context);
880 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
881 }
882 spin_unlock_irqrestore(&iommu->lock, flags);
883}
884
885static void free_context_table(struct intel_iommu *iommu)
886{
ba395927
KA
887 int i;
888 unsigned long flags;
889 struct context_entry *context;
890
891 spin_lock_irqsave(&iommu->lock, flags);
892 if (!iommu->root_entry) {
893 goto out;
894 }
895 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 896 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
897 if (context)
898 free_pgtable_page(context);
03ecc32c 899
c83b2f20 900 if (!ecs_enabled(iommu))
03ecc32c
DW
901 continue;
902
903 context = iommu_context_addr(iommu, i, 0x80, 0);
904 if (context)
905 free_pgtable_page(context);
906
ba395927
KA
907 }
908 free_pgtable_page(iommu->root_entry);
909 iommu->root_entry = NULL;
910out:
911 spin_unlock_irqrestore(&iommu->lock, flags);
912}
913
b026fd28 914static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 915 unsigned long pfn, int *target_level)
ba395927 916{
ba395927
KA
917 struct dma_pte *parent, *pte = NULL;
918 int level = agaw_to_level(domain->agaw);
4399c8bf 919 int offset;
ba395927
KA
920
921 BUG_ON(!domain->pgd);
f9423606 922
162d1b10 923 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
924 /* Address beyond IOMMU's addressing capabilities. */
925 return NULL;
926
ba395927
KA
927 parent = domain->pgd;
928
5cf0a76f 929 while (1) {
ba395927
KA
930 void *tmp_page;
931
b026fd28 932 offset = pfn_level_offset(pfn, level);
ba395927 933 pte = &parent[offset];
5cf0a76f 934 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 935 break;
5cf0a76f 936 if (level == *target_level)
ba395927
KA
937 break;
938
19c239ce 939 if (!dma_pte_present(pte)) {
c85994e4
DW
940 uint64_t pteval;
941
4c923d47 942 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 943
206a73c1 944 if (!tmp_page)
ba395927 945 return NULL;
206a73c1 946
c85994e4 947 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 948 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 949 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
950 /* Someone else set it while we were thinking; use theirs. */
951 free_pgtable_page(tmp_page);
effad4b5 952 else
c85994e4 953 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 954 }
5cf0a76f
DW
955 if (level == 1)
956 break;
957
19c239ce 958 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
959 level--;
960 }
961
5cf0a76f
DW
962 if (!*target_level)
963 *target_level = level;
964
ba395927
KA
965 return pte;
966}
967
6dd9a7c7 968
ba395927 969/* return address's pte at specific level */
90dcfb5e
DW
970static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
971 unsigned long pfn,
6dd9a7c7 972 int level, int *large_page)
ba395927
KA
973{
974 struct dma_pte *parent, *pte = NULL;
975 int total = agaw_to_level(domain->agaw);
976 int offset;
977
978 parent = domain->pgd;
979 while (level <= total) {
90dcfb5e 980 offset = pfn_level_offset(pfn, total);
ba395927
KA
981 pte = &parent[offset];
982 if (level == total)
983 return pte;
984
6dd9a7c7
YS
985 if (!dma_pte_present(pte)) {
986 *large_page = total;
ba395927 987 break;
6dd9a7c7
YS
988 }
989
e16922af 990 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
991 *large_page = total;
992 return pte;
993 }
994
19c239ce 995 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
996 total--;
997 }
998 return NULL;
999}
1000
ba395927 1001/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1002static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1003 unsigned long start_pfn,
1004 unsigned long last_pfn)
ba395927 1005{
6dd9a7c7 1006 unsigned int large_page = 1;
310a5ab9 1007 struct dma_pte *first_pte, *pte;
66eae846 1008
162d1b10
JL
1009 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1010 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1011 BUG_ON(start_pfn > last_pfn);
ba395927 1012
04b18e65 1013 /* we don't need lock here; nobody else touches the iova range */
59c36286 1014 do {
6dd9a7c7
YS
1015 large_page = 1;
1016 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1017 if (!pte) {
6dd9a7c7 1018 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1019 continue;
1020 }
6dd9a7c7 1021 do {
310a5ab9 1022 dma_clear_pte(pte);
6dd9a7c7 1023 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1024 pte++;
75e6bf96
DW
1025 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1026
310a5ab9
DW
1027 domain_flush_cache(domain, first_pte,
1028 (void *)pte - (void *)first_pte);
59c36286
DW
1029
1030 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1031}
1032
3269ee0b
AW
1033static void dma_pte_free_level(struct dmar_domain *domain, int level,
1034 struct dma_pte *pte, unsigned long pfn,
1035 unsigned long start_pfn, unsigned long last_pfn)
1036{
1037 pfn = max(start_pfn, pfn);
1038 pte = &pte[pfn_level_offset(pfn, level)];
1039
1040 do {
1041 unsigned long level_pfn;
1042 struct dma_pte *level_pte;
1043
1044 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1045 goto next;
1046
1047 level_pfn = pfn & level_mask(level - 1);
1048 level_pte = phys_to_virt(dma_pte_addr(pte));
1049
1050 if (level > 2)
1051 dma_pte_free_level(domain, level - 1, level_pte,
1052 level_pfn, start_pfn, last_pfn);
1053
1054 /* If range covers entire pagetable, free it */
1055 if (!(start_pfn > level_pfn ||
08336fd2 1056 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1057 dma_clear_pte(pte);
1058 domain_flush_cache(domain, pte, sizeof(*pte));
1059 free_pgtable_page(level_pte);
1060 }
1061next:
1062 pfn += level_size(level);
1063 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1064}
1065
ba395927
KA
1066/* free page table pages. last level pte should already be cleared */
1067static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1068 unsigned long start_pfn,
1069 unsigned long last_pfn)
ba395927 1070{
162d1b10
JL
1071 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1072 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1073 BUG_ON(start_pfn > last_pfn);
ba395927 1074
d41a4adb
JL
1075 dma_pte_clear_range(domain, start_pfn, last_pfn);
1076
f3a0a52f 1077 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1078 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1079 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1080
ba395927 1081 /* free pgd */
d794dc9b 1082 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1083 free_pgtable_page(domain->pgd);
1084 domain->pgd = NULL;
1085 }
1086}
1087
ea8ea460
DW
1088/* When a page at a given level is being unlinked from its parent, we don't
1089 need to *modify* it at all. All we need to do is make a list of all the
1090 pages which can be freed just as soon as we've flushed the IOTLB and we
1091 know the hardware page-walk will no longer touch them.
1092 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1093 be freed. */
1094static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1095 int level, struct dma_pte *pte,
1096 struct page *freelist)
1097{
1098 struct page *pg;
1099
1100 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1101 pg->freelist = freelist;
1102 freelist = pg;
1103
1104 if (level == 1)
1105 return freelist;
1106
adeb2590
JL
1107 pte = page_address(pg);
1108 do {
ea8ea460
DW
1109 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1110 freelist = dma_pte_list_pagetables(domain, level - 1,
1111 pte, freelist);
adeb2590
JL
1112 pte++;
1113 } while (!first_pte_in_page(pte));
ea8ea460
DW
1114
1115 return freelist;
1116}
1117
1118static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1119 struct dma_pte *pte, unsigned long pfn,
1120 unsigned long start_pfn,
1121 unsigned long last_pfn,
1122 struct page *freelist)
1123{
1124 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1125
1126 pfn = max(start_pfn, pfn);
1127 pte = &pte[pfn_level_offset(pfn, level)];
1128
1129 do {
1130 unsigned long level_pfn;
1131
1132 if (!dma_pte_present(pte))
1133 goto next;
1134
1135 level_pfn = pfn & level_mask(level);
1136
1137 /* If range covers entire pagetable, free it */
1138 if (start_pfn <= level_pfn &&
1139 last_pfn >= level_pfn + level_size(level) - 1) {
1140 /* These suborbinate page tables are going away entirely. Don't
1141 bother to clear them; we're just going to *free* them. */
1142 if (level > 1 && !dma_pte_superpage(pte))
1143 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1144
1145 dma_clear_pte(pte);
1146 if (!first_pte)
1147 first_pte = pte;
1148 last_pte = pte;
1149 } else if (level > 1) {
1150 /* Recurse down into a level that isn't *entirely* obsolete */
1151 freelist = dma_pte_clear_level(domain, level - 1,
1152 phys_to_virt(dma_pte_addr(pte)),
1153 level_pfn, start_pfn, last_pfn,
1154 freelist);
1155 }
1156next:
1157 pfn += level_size(level);
1158 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1159
1160 if (first_pte)
1161 domain_flush_cache(domain, first_pte,
1162 (void *)++last_pte - (void *)first_pte);
1163
1164 return freelist;
1165}
1166
1167/* We can't just free the pages because the IOMMU may still be walking
1168 the page tables, and may have cached the intermediate levels. The
1169 pages can only be freed after the IOTLB flush has been done. */
1170struct page *domain_unmap(struct dmar_domain *domain,
1171 unsigned long start_pfn,
1172 unsigned long last_pfn)
1173{
ea8ea460
DW
1174 struct page *freelist = NULL;
1175
162d1b10
JL
1176 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1177 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1178 BUG_ON(start_pfn > last_pfn);
1179
1180 /* we don't need lock here; nobody else touches the iova range */
1181 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1182 domain->pgd, 0, start_pfn, last_pfn, NULL);
1183
1184 /* free pgd */
1185 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1186 struct page *pgd_page = virt_to_page(domain->pgd);
1187 pgd_page->freelist = freelist;
1188 freelist = pgd_page;
1189
1190 domain->pgd = NULL;
1191 }
1192
1193 return freelist;
1194}
1195
1196void dma_free_pagelist(struct page *freelist)
1197{
1198 struct page *pg;
1199
1200 while ((pg = freelist)) {
1201 freelist = pg->freelist;
1202 free_pgtable_page(page_address(pg));
1203 }
1204}
1205
ba395927
KA
1206/* iommu handling */
1207static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1208{
1209 struct root_entry *root;
1210 unsigned long flags;
1211
4c923d47 1212 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1213 if (!root) {
9f10e5bf 1214 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1215 iommu->name);
ba395927 1216 return -ENOMEM;
ffebeb46 1217 }
ba395927 1218
5b6985ce 1219 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1220
1221 spin_lock_irqsave(&iommu->lock, flags);
1222 iommu->root_entry = root;
1223 spin_unlock_irqrestore(&iommu->lock, flags);
1224
1225 return 0;
1226}
1227
ba395927
KA
1228static void iommu_set_root_entry(struct intel_iommu *iommu)
1229{
03ecc32c 1230 u64 addr;
c416daa9 1231 u32 sts;
ba395927
KA
1232 unsigned long flag;
1233
03ecc32c 1234 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1235 if (ecs_enabled(iommu))
03ecc32c 1236 addr |= DMA_RTADDR_RTT;
ba395927 1237
1f5b3c3f 1238 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1239 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1240
c416daa9 1241 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1242
1243 /* Make sure hardware complete it */
1244 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1245 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1246
1f5b3c3f 1247 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1248}
1249
1250static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1251{
1252 u32 val;
1253 unsigned long flag;
1254
9af88143 1255 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1256 return;
ba395927 1257
1f5b3c3f 1258 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1259 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1260
1261 /* Make sure hardware complete it */
1262 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1263 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1264
1f5b3c3f 1265 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1266}
1267
1268/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1269static void __iommu_flush_context(struct intel_iommu *iommu,
1270 u16 did, u16 source_id, u8 function_mask,
1271 u64 type)
ba395927
KA
1272{
1273 u64 val = 0;
1274 unsigned long flag;
1275
ba395927
KA
1276 switch (type) {
1277 case DMA_CCMD_GLOBAL_INVL:
1278 val = DMA_CCMD_GLOBAL_INVL;
1279 break;
1280 case DMA_CCMD_DOMAIN_INVL:
1281 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1282 break;
1283 case DMA_CCMD_DEVICE_INVL:
1284 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1285 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1286 break;
1287 default:
1288 BUG();
1289 }
1290 val |= DMA_CCMD_ICC;
1291
1f5b3c3f 1292 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1293 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1294
1295 /* Make sure hardware complete it */
1296 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1297 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1298
1f5b3c3f 1299 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1300}
1301
ba395927 1302/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1303static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1304 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1305{
1306 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1307 u64 val = 0, val_iva = 0;
1308 unsigned long flag;
1309
ba395927
KA
1310 switch (type) {
1311 case DMA_TLB_GLOBAL_FLUSH:
1312 /* global flush doesn't need set IVA_REG */
1313 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1314 break;
1315 case DMA_TLB_DSI_FLUSH:
1316 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1317 break;
1318 case DMA_TLB_PSI_FLUSH:
1319 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1320 /* IH bit is passed in as part of address */
ba395927
KA
1321 val_iva = size_order | addr;
1322 break;
1323 default:
1324 BUG();
1325 }
1326 /* Note: set drain read/write */
1327#if 0
1328 /*
1329 * This is probably to be super secure.. Looks like we can
1330 * ignore it without any impact.
1331 */
1332 if (cap_read_drain(iommu->cap))
1333 val |= DMA_TLB_READ_DRAIN;
1334#endif
1335 if (cap_write_drain(iommu->cap))
1336 val |= DMA_TLB_WRITE_DRAIN;
1337
1f5b3c3f 1338 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1339 /* Note: Only uses first TLB reg currently */
1340 if (val_iva)
1341 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1342 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1343
1344 /* Make sure hardware complete it */
1345 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1346 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1347
1f5b3c3f 1348 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1349
1350 /* check IOTLB invalidation granularity */
1351 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1352 pr_err("Flush IOTLB failed\n");
ba395927 1353 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1354 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1355 (unsigned long long)DMA_TLB_IIRG(type),
1356 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1357}
1358
64ae892b
DW
1359static struct device_domain_info *
1360iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1361 u8 bus, u8 devfn)
93a23a72 1362{
2f119c78 1363 bool found = false;
93a23a72
YZ
1364 unsigned long flags;
1365 struct device_domain_info *info;
0bcb3e28 1366 struct pci_dev *pdev;
93a23a72
YZ
1367
1368 if (!ecap_dev_iotlb_support(iommu->ecap))
1369 return NULL;
1370
1371 if (!iommu->qi)
1372 return NULL;
1373
1374 spin_lock_irqsave(&device_domain_lock, flags);
1375 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1376 if (info->iommu == iommu && info->bus == bus &&
1377 info->devfn == devfn) {
2f119c78 1378 found = true;
93a23a72
YZ
1379 break;
1380 }
1381 spin_unlock_irqrestore(&device_domain_lock, flags);
1382
0bcb3e28 1383 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1384 return NULL;
1385
0bcb3e28
DW
1386 pdev = to_pci_dev(info->dev);
1387
1388 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1389 return NULL;
1390
0bcb3e28 1391 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1392 return NULL;
1393
93a23a72
YZ
1394 return info;
1395}
1396
1397static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1398{
0bcb3e28 1399 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1400 return;
1401
0bcb3e28 1402 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1403}
1404
1405static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1406{
0bcb3e28
DW
1407 if (!info->dev || !dev_is_pci(info->dev) ||
1408 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1409 return;
1410
0bcb3e28 1411 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1412}
1413
1414static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1415 u64 addr, unsigned mask)
1416{
1417 u16 sid, qdep;
1418 unsigned long flags;
1419 struct device_domain_info *info;
1420
1421 spin_lock_irqsave(&device_domain_lock, flags);
1422 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1423 struct pci_dev *pdev;
1424 if (!info->dev || !dev_is_pci(info->dev))
1425 continue;
1426
1427 pdev = to_pci_dev(info->dev);
1428 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1429 continue;
1430
1431 sid = info->bus << 8 | info->devfn;
0bcb3e28 1432 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1433 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1434 }
1435 spin_unlock_irqrestore(&device_domain_lock, flags);
1436}
1437
1f0ef2aa 1438static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1439 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1440{
9dd2fe89 1441 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1442 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1443
ba395927
KA
1444 BUG_ON(pages == 0);
1445
ea8ea460
DW
1446 if (ih)
1447 ih = 1 << 6;
ba395927 1448 /*
9dd2fe89
YZ
1449 * Fallback to domain selective flush if no PSI support or the size is
1450 * too big.
ba395927
KA
1451 * PSI requires page size to be 2 ^ x, and the base address is naturally
1452 * aligned to the size
1453 */
9dd2fe89
YZ
1454 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1455 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1456 DMA_TLB_DSI_FLUSH);
9dd2fe89 1457 else
ea8ea460 1458 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1459 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1460
1461 /*
82653633
NA
1462 * In caching mode, changes of pages from non-present to present require
1463 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1464 */
82653633 1465 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1466 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1467}
1468
f8bab735 1469static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1470{
1471 u32 pmen;
1472 unsigned long flags;
1473
1f5b3c3f 1474 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1475 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1476 pmen &= ~DMA_PMEN_EPM;
1477 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1478
1479 /* wait for the protected region status bit to clear */
1480 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1481 readl, !(pmen & DMA_PMEN_PRS), pmen);
1482
1f5b3c3f 1483 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1484}
1485
2a41ccee 1486static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1487{
1488 u32 sts;
1489 unsigned long flags;
1490
1f5b3c3f 1491 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1492 iommu->gcmd |= DMA_GCMD_TE;
1493 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1494
1495 /* Make sure hardware complete it */
1496 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1497 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1498
1f5b3c3f 1499 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1500}
1501
2a41ccee 1502static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1503{
1504 u32 sts;
1505 unsigned long flag;
1506
1f5b3c3f 1507 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1508 iommu->gcmd &= ~DMA_GCMD_TE;
1509 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1510
1511 /* Make sure hardware complete it */
1512 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1513 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1514
1f5b3c3f 1515 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1516}
1517
3460a6d9 1518
ba395927
KA
1519static int iommu_init_domains(struct intel_iommu *iommu)
1520{
1521 unsigned long ndomains;
1522 unsigned long nlongs;
1523
1524 ndomains = cap_ndoms(iommu->cap);
9f10e5bf
JR
1525 pr_debug("%s: Number of Domains supported <%ld>\n",
1526 iommu->name, ndomains);
ba395927
KA
1527 nlongs = BITS_TO_LONGS(ndomains);
1528
94a91b50
DD
1529 spin_lock_init(&iommu->lock);
1530
ba395927
KA
1531 /* TBD: there might be 64K domains,
1532 * consider other allocation for future chip
1533 */
1534 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1535 if (!iommu->domain_ids) {
9f10e5bf
JR
1536 pr_err("%s: Allocating domain id array failed\n",
1537 iommu->name);
ba395927
KA
1538 return -ENOMEM;
1539 }
1540 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1541 GFP_KERNEL);
1542 if (!iommu->domains) {
9f10e5bf
JR
1543 pr_err("%s: Allocating domain array failed\n",
1544 iommu->name);
852bdb04
JL
1545 kfree(iommu->domain_ids);
1546 iommu->domain_ids = NULL;
ba395927
KA
1547 return -ENOMEM;
1548 }
1549
1550 /*
c0e8a6c8
JR
1551 * If Caching mode is set, then invalid translations are tagged
1552 * with domain-id 0, hence we need to pre-allocate it. We also
1553 * use domain-id 0 as a marker for non-allocated domain-id, so
1554 * make sure it is not used for a real domain.
ba395927 1555 */
c0e8a6c8
JR
1556 set_bit(0, iommu->domain_ids);
1557
ba395927
KA
1558 return 0;
1559}
ba395927 1560
ffebeb46 1561static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1562{
1563 struct dmar_domain *domain;
2a46ddf7 1564 int i;
ba395927 1565
94a91b50 1566 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1567 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1568 /*
1569 * Domain id 0 is reserved for invalid translation
c0e8a6c8
JR
1570 * if hardware supports caching mode and used as
1571 * a non-allocated marker.
a4eaa86c 1572 */
c0e8a6c8 1573 if (i == 0)
a4eaa86c
JL
1574 continue;
1575
94a91b50
DD
1576 domain = iommu->domains[i];
1577 clear_bit(i, iommu->domain_ids);
129ad281
JL
1578 if (domain_detach_iommu(domain, iommu) == 0 &&
1579 !domain_type_is_vm(domain))
92d03cc8 1580 domain_exit(domain);
5e98c4b1 1581 }
ba395927
KA
1582 }
1583
1584 if (iommu->gcmd & DMA_GCMD_TE)
1585 iommu_disable_translation(iommu);
ffebeb46 1586}
ba395927 1587
ffebeb46
JL
1588static void free_dmar_iommu(struct intel_iommu *iommu)
1589{
1590 if ((iommu->domains) && (iommu->domain_ids)) {
1591 kfree(iommu->domains);
1592 kfree(iommu->domain_ids);
1593 iommu->domains = NULL;
1594 iommu->domain_ids = NULL;
1595 }
ba395927 1596
d9630fe9
WH
1597 g_iommus[iommu->seq_id] = NULL;
1598
ba395927
KA
1599 /* free context mapping */
1600 free_context_table(iommu);
ba395927
KA
1601}
1602
ab8dfe25 1603static struct dmar_domain *alloc_domain(int flags)
ba395927 1604{
92d03cc8
JL
1605 /* domain id for virtual machine, it won't be set in context */
1606 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1607 struct dmar_domain *domain;
ba395927
KA
1608
1609 domain = alloc_domain_mem();
1610 if (!domain)
1611 return NULL;
1612
ab8dfe25 1613 memset(domain, 0, sizeof(*domain));
4c923d47 1614 domain->nid = -1;
ab8dfe25 1615 domain->flags = flags;
92d03cc8
JL
1616 spin_lock_init(&domain->iommu_lock);
1617 INIT_LIST_HEAD(&domain->devices);
ab8dfe25 1618 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
92d03cc8 1619 domain->id = atomic_inc_return(&vm_domid);
2c2e2c38
FY
1620
1621 return domain;
1622}
1623
fb170fb4
JL
1624static int __iommu_attach_domain(struct dmar_domain *domain,
1625 struct intel_iommu *iommu)
2c2e2c38
FY
1626{
1627 int num;
1628 unsigned long ndomains;
2c2e2c38 1629
ba395927 1630 ndomains = cap_ndoms(iommu->cap);
ba395927 1631 num = find_first_zero_bit(iommu->domain_ids, ndomains);
fb170fb4
JL
1632 if (num < ndomains) {
1633 set_bit(num, iommu->domain_ids);
1634 iommu->domains[num] = domain;
c0e8a6c8 1635 domain->iommu_did[iommu->seq_id] = num;
fb170fb4
JL
1636 } else {
1637 num = -ENOSPC;
ba395927
KA
1638 }
1639
fb170fb4
JL
1640 return num;
1641}
1642
1643static int iommu_attach_domain(struct dmar_domain *domain,
1644 struct intel_iommu *iommu)
1645{
1646 int num;
1647 unsigned long flags;
1648
1649 spin_lock_irqsave(&iommu->lock, flags);
1650 num = __iommu_attach_domain(domain, iommu);
44bde614 1651 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4 1652 if (num < 0)
9f10e5bf 1653 pr_err("%s: No free domain ids\n", iommu->name);
ba395927 1654
fb170fb4 1655 return num;
ba395927
KA
1656}
1657
44bde614
JL
1658static int iommu_attach_vm_domain(struct dmar_domain *domain,
1659 struct intel_iommu *iommu)
1660{
1661 int num;
44bde614 1662
c0e8a6c8
JR
1663 num = domain->iommu_did[iommu->seq_id];
1664 if (num)
1665 return num;
44bde614
JL
1666
1667 return __iommu_attach_domain(domain, iommu);
1668}
1669
2c2e2c38
FY
1670static void iommu_detach_domain(struct dmar_domain *domain,
1671 struct intel_iommu *iommu)
ba395927
KA
1672{
1673 unsigned long flags;
c0e8a6c8 1674 int num;
ba395927 1675
8c11e798 1676 spin_lock_irqsave(&iommu->lock, flags);
c0e8a6c8
JR
1677
1678 num = domain->iommu_did[iommu->seq_id];
1679
1680 if (num == 0)
1681 return;
1682
1683 clear_bit(num, iommu->domain_ids);
1684 iommu->domains[num] = NULL;
1685
8c11e798 1686 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1687}
1688
fb170fb4
JL
1689static void domain_attach_iommu(struct dmar_domain *domain,
1690 struct intel_iommu *iommu)
1691{
1692 unsigned long flags;
1693
1694 spin_lock_irqsave(&domain->iommu_lock, flags);
1695 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1696 domain->iommu_count++;
1697 if (domain->iommu_count == 1)
1698 domain->nid = iommu->node;
1699 domain_update_iommu_cap(domain);
1700 }
1701 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1702}
1703
1704static int domain_detach_iommu(struct dmar_domain *domain,
1705 struct intel_iommu *iommu)
1706{
1707 unsigned long flags;
1708 int count = INT_MAX;
1709
1710 spin_lock_irqsave(&domain->iommu_lock, flags);
1711 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1712 count = --domain->iommu_count;
1713 domain_update_iommu_cap(domain);
c0e8a6c8 1714 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4
JL
1715 }
1716 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1717
1718 return count;
1719}
1720
ba395927 1721static struct iova_domain reserved_iova_list;
8a443df4 1722static struct lock_class_key reserved_rbtree_key;
ba395927 1723
51a63e67 1724static int dmar_init_reserved_ranges(void)
ba395927
KA
1725{
1726 struct pci_dev *pdev = NULL;
1727 struct iova *iova;
1728 int i;
ba395927 1729
0fb5fe87
RM
1730 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1731 DMA_32BIT_PFN);
ba395927 1732
8a443df4
MG
1733 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1734 &reserved_rbtree_key);
1735
ba395927
KA
1736 /* IOAPIC ranges shouldn't be accessed by DMA */
1737 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1738 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1739 if (!iova) {
9f10e5bf 1740 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1741 return -ENODEV;
1742 }
ba395927
KA
1743
1744 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1745 for_each_pci_dev(pdev) {
1746 struct resource *r;
1747
1748 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1749 r = &pdev->resource[i];
1750 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1751 continue;
1a4a4551
DW
1752 iova = reserve_iova(&reserved_iova_list,
1753 IOVA_PFN(r->start),
1754 IOVA_PFN(r->end));
51a63e67 1755 if (!iova) {
9f10e5bf 1756 pr_err("Reserve iova failed\n");
51a63e67
JC
1757 return -ENODEV;
1758 }
ba395927
KA
1759 }
1760 }
51a63e67 1761 return 0;
ba395927
KA
1762}
1763
1764static void domain_reserve_special_ranges(struct dmar_domain *domain)
1765{
1766 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1767}
1768
1769static inline int guestwidth_to_adjustwidth(int gaw)
1770{
1771 int agaw;
1772 int r = (gaw - 12) % 9;
1773
1774 if (r == 0)
1775 agaw = gaw;
1776 else
1777 agaw = gaw + 9 - r;
1778 if (agaw > 64)
1779 agaw = 64;
1780 return agaw;
1781}
1782
1783static int domain_init(struct dmar_domain *domain, int guest_width)
1784{
1785 struct intel_iommu *iommu;
1786 int adjust_width, agaw;
1787 unsigned long sagaw;
1788
0fb5fe87
RM
1789 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1790 DMA_32BIT_PFN);
ba395927
KA
1791 domain_reserve_special_ranges(domain);
1792
1793 /* calculate AGAW */
8c11e798 1794 iommu = domain_get_iommu(domain);
ba395927
KA
1795 if (guest_width > cap_mgaw(iommu->cap))
1796 guest_width = cap_mgaw(iommu->cap);
1797 domain->gaw = guest_width;
1798 adjust_width = guestwidth_to_adjustwidth(guest_width);
1799 agaw = width_to_agaw(adjust_width);
1800 sagaw = cap_sagaw(iommu->cap);
1801 if (!test_bit(agaw, &sagaw)) {
1802 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1803 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1804 agaw = find_next_bit(&sagaw, 5, agaw);
1805 if (agaw >= 5)
1806 return -ENODEV;
1807 }
1808 domain->agaw = agaw;
ba395927 1809
8e604097
WH
1810 if (ecap_coherent(iommu->ecap))
1811 domain->iommu_coherency = 1;
1812 else
1813 domain->iommu_coherency = 0;
1814
58c610bd
SY
1815 if (ecap_sc_support(iommu->ecap))
1816 domain->iommu_snooping = 1;
1817 else
1818 domain->iommu_snooping = 0;
1819
214e39aa
DW
1820 if (intel_iommu_superpage)
1821 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1822 else
1823 domain->iommu_superpage = 0;
1824
4c923d47 1825 domain->nid = iommu->node;
c7151a8d 1826
ba395927 1827 /* always allocate the top pgd */
4c923d47 1828 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1829 if (!domain->pgd)
1830 return -ENOMEM;
5b6985ce 1831 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1832 return 0;
1833}
1834
1835static void domain_exit(struct dmar_domain *domain)
1836{
46ebb7af
AW
1837 struct dmar_drhd_unit *drhd;
1838 struct intel_iommu *iommu;
ea8ea460 1839 struct page *freelist = NULL;
ba395927
KA
1840
1841 /* Domain 0 is reserved, so dont process it */
1842 if (!domain)
1843 return;
1844
7b668357
AW
1845 /* Flush any lazy unmaps that may reference this domain */
1846 if (!intel_iommu_strict)
1847 flush_unmaps_timeout(0);
1848
92d03cc8 1849 /* remove associated devices */
ba395927 1850 domain_remove_dev_info(domain);
92d03cc8 1851
ba395927
KA
1852 /* destroy iovas */
1853 put_iova_domain(&domain->iovad);
ba395927 1854
ea8ea460 1855 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1856
92d03cc8 1857 /* clear attached or cached domains */
0e242612 1858 rcu_read_lock();
46ebb7af
AW
1859 for_each_active_iommu(iommu, drhd)
1860 if (domain_type_is_vm(domain) ||
1861 test_bit(iommu->seq_id, domain->iommu_bmp))
1862 iommu_detach_domain(domain, iommu);
0e242612 1863 rcu_read_unlock();
2c2e2c38 1864
ea8ea460
DW
1865 dma_free_pagelist(freelist);
1866
ba395927
KA
1867 free_domain_mem(domain);
1868}
1869
64ae892b
DW
1870static int domain_context_mapping_one(struct dmar_domain *domain,
1871 struct intel_iommu *iommu,
1872 u8 bus, u8 devfn, int translation)
ba395927
KA
1873{
1874 struct context_entry *context;
ba395927 1875 unsigned long flags;
ea6606b0 1876 struct dma_pte *pgd;
ea6606b0
WH
1877 int id;
1878 int agaw;
93a23a72 1879 struct device_domain_info *info = NULL;
ba395927
KA
1880
1881 pr_debug("Set context mapping for %02x:%02x.%d\n",
1882 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1883
ba395927 1884 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1885 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1886 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1887
03ecc32c
DW
1888 spin_lock_irqsave(&iommu->lock, flags);
1889 context = iommu_context_addr(iommu, bus, devfn, 1);
1890 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1891 if (!context)
1892 return -ENOMEM;
1893 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1894 if (context_present(context)) {
ba395927
KA
1895 spin_unlock_irqrestore(&iommu->lock, flags);
1896 return 0;
1897 }
1898
cf484d0e
JR
1899 context_clear_entry(context);
1900
ea6606b0
WH
1901 id = domain->id;
1902 pgd = domain->pgd;
1903
ab8dfe25 1904 if (domain_type_is_vm_or_si(domain)) {
44bde614
JL
1905 if (domain_type_is_vm(domain)) {
1906 id = iommu_attach_vm_domain(domain, iommu);
fb170fb4 1907 if (id < 0) {
ea6606b0 1908 spin_unlock_irqrestore(&iommu->lock, flags);
9f10e5bf 1909 pr_err("%s: No free domain ids\n", iommu->name);
ea6606b0
WH
1910 return -EFAULT;
1911 }
ea6606b0
WH
1912 }
1913
1914 /* Skip top levels of page tables for
1915 * iommu which has less agaw than default.
1672af11 1916 * Unnecessary for PT mode.
ea6606b0 1917 */
1672af11
CW
1918 if (translation != CONTEXT_TT_PASS_THROUGH) {
1919 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1920 pgd = phys_to_virt(dma_pte_addr(pgd));
1921 if (!dma_pte_present(pgd)) {
1922 spin_unlock_irqrestore(&iommu->lock, flags);
1923 return -ENOMEM;
1924 }
ea6606b0
WH
1925 }
1926 }
1927 }
1928
1929 context_set_domain_id(context, id);
4ed0d3e6 1930
93a23a72 1931 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1932 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1933 translation = info ? CONTEXT_TT_DEV_IOTLB :
1934 CONTEXT_TT_MULTI_LEVEL;
1935 }
4ed0d3e6
FY
1936 /*
1937 * In pass through mode, AW must be programmed to indicate the largest
1938 * AGAW value supported by hardware. And ASR is ignored by hardware.
1939 */
93a23a72 1940 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1941 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1942 else {
1943 context_set_address_root(context, virt_to_phys(pgd));
1944 context_set_address_width(context, iommu->agaw);
1945 }
4ed0d3e6
FY
1946
1947 context_set_translation_type(context, translation);
c07e7d21
MM
1948 context_set_fault_enable(context);
1949 context_set_present(context);
5331fe6f 1950 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1951
4c25a2c1
DW
1952 /*
1953 * It's a non-present to present mapping. If hardware doesn't cache
1954 * non-present entry we only need to flush the write-buffer. If the
1955 * _does_ cache non-present entries, then it does so in the special
1956 * domain #0, which we have to flush:
1957 */
1958 if (cap_caching_mode(iommu->cap)) {
1959 iommu->flush.flush_context(iommu, 0,
1960 (((u16)bus) << 8) | devfn,
1961 DMA_CCMD_MASK_NOBIT,
1962 DMA_CCMD_DEVICE_INVL);
18fd779a 1963 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1964 } else {
ba395927 1965 iommu_flush_write_buffer(iommu);
4c25a2c1 1966 }
93a23a72 1967 iommu_enable_dev_iotlb(info);
ba395927 1968 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 1969
fb170fb4
JL
1970 domain_attach_iommu(domain, iommu);
1971
ba395927
KA
1972 return 0;
1973}
1974
579305f7
AW
1975struct domain_context_mapping_data {
1976 struct dmar_domain *domain;
1977 struct intel_iommu *iommu;
1978 int translation;
1979};
1980
1981static int domain_context_mapping_cb(struct pci_dev *pdev,
1982 u16 alias, void *opaque)
1983{
1984 struct domain_context_mapping_data *data = opaque;
1985
1986 return domain_context_mapping_one(data->domain, data->iommu,
1987 PCI_BUS_NUM(alias), alias & 0xff,
1988 data->translation);
1989}
1990
ba395927 1991static int
e1f167f3
DW
1992domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1993 int translation)
ba395927 1994{
64ae892b 1995 struct intel_iommu *iommu;
156baca8 1996 u8 bus, devfn;
579305f7 1997 struct domain_context_mapping_data data;
64ae892b 1998
e1f167f3 1999 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2000 if (!iommu)
2001 return -ENODEV;
ba395927 2002
579305f7
AW
2003 if (!dev_is_pci(dev))
2004 return domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 2005 translation);
579305f7
AW
2006
2007 data.domain = domain;
2008 data.iommu = iommu;
2009 data.translation = translation;
2010
2011 return pci_for_each_dma_alias(to_pci_dev(dev),
2012 &domain_context_mapping_cb, &data);
2013}
2014
2015static int domain_context_mapped_cb(struct pci_dev *pdev,
2016 u16 alias, void *opaque)
2017{
2018 struct intel_iommu *iommu = opaque;
2019
2020 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2021}
2022
e1f167f3 2023static int domain_context_mapped(struct device *dev)
ba395927 2024{
5331fe6f 2025 struct intel_iommu *iommu;
156baca8 2026 u8 bus, devfn;
5331fe6f 2027
e1f167f3 2028 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2029 if (!iommu)
2030 return -ENODEV;
ba395927 2031
579305f7
AW
2032 if (!dev_is_pci(dev))
2033 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2034
579305f7
AW
2035 return !pci_for_each_dma_alias(to_pci_dev(dev),
2036 domain_context_mapped_cb, iommu);
ba395927
KA
2037}
2038
f532959b
FY
2039/* Returns a number of VTD pages, but aligned to MM page size */
2040static inline unsigned long aligned_nrpages(unsigned long host_addr,
2041 size_t size)
2042{
2043 host_addr &= ~PAGE_MASK;
2044 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2045}
2046
6dd9a7c7
YS
2047/* Return largest possible superpage level for a given mapping */
2048static inline int hardware_largepage_caps(struct dmar_domain *domain,
2049 unsigned long iov_pfn,
2050 unsigned long phy_pfn,
2051 unsigned long pages)
2052{
2053 int support, level = 1;
2054 unsigned long pfnmerge;
2055
2056 support = domain->iommu_superpage;
2057
2058 /* To use a large page, the virtual *and* physical addresses
2059 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2060 of them will mean we have to use smaller pages. So just
2061 merge them and check both at once. */
2062 pfnmerge = iov_pfn | phy_pfn;
2063
2064 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2065 pages >>= VTD_STRIDE_SHIFT;
2066 if (!pages)
2067 break;
2068 pfnmerge >>= VTD_STRIDE_SHIFT;
2069 level++;
2070 support--;
2071 }
2072 return level;
2073}
2074
9051aa02
DW
2075static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2076 struct scatterlist *sg, unsigned long phys_pfn,
2077 unsigned long nr_pages, int prot)
e1605495
DW
2078{
2079 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2080 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2081 unsigned long sg_res = 0;
6dd9a7c7
YS
2082 unsigned int largepage_lvl = 0;
2083 unsigned long lvl_pages = 0;
e1605495 2084
162d1b10 2085 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2086
2087 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2088 return -EINVAL;
2089
2090 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2091
cc4f14aa
JL
2092 if (!sg) {
2093 sg_res = nr_pages;
9051aa02
DW
2094 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2095 }
2096
6dd9a7c7 2097 while (nr_pages > 0) {
c85994e4
DW
2098 uint64_t tmp;
2099
e1605495 2100 if (!sg_res) {
f532959b 2101 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2102 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2103 sg->dma_length = sg->length;
2104 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2105 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2106 }
6dd9a7c7 2107
e1605495 2108 if (!pte) {
6dd9a7c7
YS
2109 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2110
5cf0a76f 2111 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2112 if (!pte)
2113 return -ENOMEM;
6dd9a7c7 2114 /* It is large page*/
6491d4d0 2115 if (largepage_lvl > 1) {
6dd9a7c7 2116 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb
JL
2117 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2118 /*
2119 * Ensure that old small page tables are
2120 * removed to make room for superpage,
2121 * if they exist.
2122 */
6491d4d0 2123 dma_pte_free_pagetable(domain, iov_pfn,
d41a4adb 2124 iov_pfn + lvl_pages - 1);
6491d4d0 2125 } else {
6dd9a7c7 2126 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2127 }
6dd9a7c7 2128
e1605495
DW
2129 }
2130 /* We don't need lock here, nobody else
2131 * touches the iova range
2132 */
7766a3fb 2133 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2134 if (tmp) {
1bf20f0d 2135 static int dumps = 5;
9f10e5bf
JR
2136 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2137 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2138 if (dumps) {
2139 dumps--;
2140 debug_dma_dump_mappings(NULL);
2141 }
2142 WARN_ON(1);
2143 }
6dd9a7c7
YS
2144
2145 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2146
2147 BUG_ON(nr_pages < lvl_pages);
2148 BUG_ON(sg_res < lvl_pages);
2149
2150 nr_pages -= lvl_pages;
2151 iov_pfn += lvl_pages;
2152 phys_pfn += lvl_pages;
2153 pteval += lvl_pages * VTD_PAGE_SIZE;
2154 sg_res -= lvl_pages;
2155
2156 /* If the next PTE would be the first in a new page, then we
2157 need to flush the cache on the entries we've just written.
2158 And then we'll need to recalculate 'pte', so clear it and
2159 let it get set again in the if (!pte) block above.
2160
2161 If we're done (!nr_pages) we need to flush the cache too.
2162
2163 Also if we've been setting superpages, we may need to
2164 recalculate 'pte' and switch back to smaller pages for the
2165 end of the mapping, if the trailing size is not enough to
2166 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2167 pte++;
6dd9a7c7
YS
2168 if (!nr_pages || first_pte_in_page(pte) ||
2169 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2170 domain_flush_cache(domain, first_pte,
2171 (void *)pte - (void *)first_pte);
2172 pte = NULL;
2173 }
6dd9a7c7
YS
2174
2175 if (!sg_res && nr_pages)
e1605495
DW
2176 sg = sg_next(sg);
2177 }
2178 return 0;
2179}
2180
9051aa02
DW
2181static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2182 struct scatterlist *sg, unsigned long nr_pages,
2183 int prot)
ba395927 2184{
9051aa02
DW
2185 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2186}
6f6a00e4 2187
9051aa02
DW
2188static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2189 unsigned long phys_pfn, unsigned long nr_pages,
2190 int prot)
2191{
2192 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2193}
2194
c7151a8d 2195static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2196{
c7151a8d
WH
2197 if (!iommu)
2198 return;
8c11e798
WH
2199
2200 clear_context_table(iommu, bus, devfn);
2201 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2202 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2203 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2204}
2205
109b9b04
DW
2206static inline void unlink_domain_info(struct device_domain_info *info)
2207{
2208 assert_spin_locked(&device_domain_lock);
2209 list_del(&info->link);
2210 list_del(&info->global);
2211 if (info->dev)
0bcb3e28 2212 info->dev->archdata.iommu = NULL;
109b9b04
DW
2213}
2214
ba395927
KA
2215static void domain_remove_dev_info(struct dmar_domain *domain)
2216{
3a74ca01 2217 struct device_domain_info *info, *tmp;
fb170fb4 2218 unsigned long flags;
ba395927
KA
2219
2220 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2221 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2222 unlink_domain_info(info);
ba395927
KA
2223 spin_unlock_irqrestore(&device_domain_lock, flags);
2224
93a23a72 2225 iommu_disable_dev_iotlb(info);
7c7faa11 2226 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2227
ab8dfe25 2228 if (domain_type_is_vm(domain)) {
7c7faa11 2229 iommu_detach_dependent_devices(info->iommu, info->dev);
fb170fb4 2230 domain_detach_iommu(domain, info->iommu);
92d03cc8
JL
2231 }
2232
2233 free_devinfo_mem(info);
ba395927
KA
2234 spin_lock_irqsave(&device_domain_lock, flags);
2235 }
2236 spin_unlock_irqrestore(&device_domain_lock, flags);
2237}
2238
2239/*
2240 * find_domain
1525a29a 2241 * Note: we use struct device->archdata.iommu stores the info
ba395927 2242 */
1525a29a 2243static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2244{
2245 struct device_domain_info *info;
2246
2247 /* No lock here, assumes no domain exit in normal case */
1525a29a 2248 info = dev->archdata.iommu;
ba395927
KA
2249 if (info)
2250 return info->domain;
2251 return NULL;
2252}
2253
5a8f40e8 2254static inline struct device_domain_info *
745f2586
JL
2255dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2256{
2257 struct device_domain_info *info;
2258
2259 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2260 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2261 info->devfn == devfn)
5a8f40e8 2262 return info;
745f2586
JL
2263
2264 return NULL;
2265}
2266
5a8f40e8 2267static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2268 int bus, int devfn,
b718cd3d
DW
2269 struct device *dev,
2270 struct dmar_domain *domain)
745f2586 2271{
5a8f40e8 2272 struct dmar_domain *found = NULL;
745f2586
JL
2273 struct device_domain_info *info;
2274 unsigned long flags;
2275
2276 info = alloc_devinfo_mem();
2277 if (!info)
b718cd3d 2278 return NULL;
745f2586 2279
745f2586
JL
2280 info->bus = bus;
2281 info->devfn = devfn;
2282 info->dev = dev;
2283 info->domain = domain;
5a8f40e8 2284 info->iommu = iommu;
745f2586
JL
2285
2286 spin_lock_irqsave(&device_domain_lock, flags);
2287 if (dev)
0bcb3e28 2288 found = find_domain(dev);
5a8f40e8
DW
2289 else {
2290 struct device_domain_info *info2;
41e80dca 2291 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2292 if (info2)
2293 found = info2->domain;
2294 }
745f2586
JL
2295 if (found) {
2296 spin_unlock_irqrestore(&device_domain_lock, flags);
2297 free_devinfo_mem(info);
b718cd3d
DW
2298 /* Caller must free the original domain */
2299 return found;
745f2586
JL
2300 }
2301
b718cd3d
DW
2302 list_add(&info->link, &domain->devices);
2303 list_add(&info->global, &device_domain_list);
2304 if (dev)
2305 dev->archdata.iommu = info;
2306 spin_unlock_irqrestore(&device_domain_lock, flags);
2307
2308 return domain;
745f2586
JL
2309}
2310
579305f7
AW
2311static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2312{
2313 *(u16 *)opaque = alias;
2314 return 0;
2315}
2316
ba395927 2317/* domain is initialized */
146922ec 2318static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2319{
579305f7
AW
2320 struct dmar_domain *domain, *tmp;
2321 struct intel_iommu *iommu;
5a8f40e8 2322 struct device_domain_info *info;
579305f7 2323 u16 dma_alias;
ba395927 2324 unsigned long flags;
aa4d066a 2325 u8 bus, devfn;
ba395927 2326
146922ec 2327 domain = find_domain(dev);
ba395927
KA
2328 if (domain)
2329 return domain;
2330
579305f7
AW
2331 iommu = device_to_iommu(dev, &bus, &devfn);
2332 if (!iommu)
2333 return NULL;
2334
146922ec
DW
2335 if (dev_is_pci(dev)) {
2336 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2337
579305f7
AW
2338 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2339
2340 spin_lock_irqsave(&device_domain_lock, flags);
2341 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2342 PCI_BUS_NUM(dma_alias),
2343 dma_alias & 0xff);
2344 if (info) {
2345 iommu = info->iommu;
2346 domain = info->domain;
5a8f40e8 2347 }
579305f7 2348 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2349
579305f7
AW
2350 /* DMA alias already has a domain, uses it */
2351 if (info)
2352 goto found_domain;
2353 }
ba395927 2354
146922ec 2355 /* Allocate and initialize new domain for the device */
ab8dfe25 2356 domain = alloc_domain(0);
745f2586 2357 if (!domain)
579305f7 2358 return NULL;
44bde614
JL
2359 domain->id = iommu_attach_domain(domain, iommu);
2360 if (domain->id < 0) {
2fe9723d 2361 free_domain_mem(domain);
579305f7 2362 return NULL;
2c2e2c38 2363 }
fb170fb4 2364 domain_attach_iommu(domain, iommu);
579305f7
AW
2365 if (domain_init(domain, gaw)) {
2366 domain_exit(domain);
2367 return NULL;
2c2e2c38 2368 }
ba395927 2369
579305f7
AW
2370 /* register PCI DMA alias device */
2371 if (dev_is_pci(dev)) {
2372 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2373 dma_alias & 0xff, NULL, domain);
2374
2375 if (!tmp || tmp != domain) {
2376 domain_exit(domain);
2377 domain = tmp;
2378 }
2379
b718cd3d 2380 if (!domain)
579305f7 2381 return NULL;
ba395927
KA
2382 }
2383
2384found_domain:
579305f7
AW
2385 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2386
2387 if (!tmp || tmp != domain) {
2388 domain_exit(domain);
2389 domain = tmp;
2390 }
b718cd3d
DW
2391
2392 return domain;
ba395927
KA
2393}
2394
2c2e2c38 2395static int iommu_identity_mapping;
e0fc7e0b
DW
2396#define IDENTMAP_ALL 1
2397#define IDENTMAP_GFX 2
2398#define IDENTMAP_AZALIA 4
2c2e2c38 2399
b213203e
DW
2400static int iommu_domain_identity_map(struct dmar_domain *domain,
2401 unsigned long long start,
2402 unsigned long long end)
ba395927 2403{
c5395d5c
DW
2404 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2405 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2406
2407 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2408 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2409 pr_err("Reserving iova failed\n");
b213203e 2410 return -ENOMEM;
ba395927
KA
2411 }
2412
c5395d5c
DW
2413 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2414 start, end, domain->id);
ba395927
KA
2415 /*
2416 * RMRR range might have overlap with physical memory range,
2417 * clear it first
2418 */
c5395d5c 2419 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2420
c5395d5c
DW
2421 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2422 last_vpfn - first_vpfn + 1,
61df7443 2423 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2424}
2425
0b9d9753 2426static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2427 unsigned long long start,
2428 unsigned long long end)
2429{
2430 struct dmar_domain *domain;
2431 int ret;
2432
0b9d9753 2433 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2434 if (!domain)
2435 return -ENOMEM;
2436
19943b0e
DW
2437 /* For _hardware_ passthrough, don't bother. But for software
2438 passthrough, we do it anyway -- it may indicate a memory
2439 range which is reserved in E820, so which didn't get set
2440 up to start with in si_domain */
2441 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2442 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2443 dev_name(dev), start, end);
19943b0e
DW
2444 return 0;
2445 }
2446
9f10e5bf
JR
2447 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2448 dev_name(dev), start, end);
2449
5595b528
DW
2450 if (end < start) {
2451 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2452 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2453 dmi_get_system_info(DMI_BIOS_VENDOR),
2454 dmi_get_system_info(DMI_BIOS_VERSION),
2455 dmi_get_system_info(DMI_PRODUCT_VERSION));
2456 ret = -EIO;
2457 goto error;
2458 }
2459
2ff729f5
DW
2460 if (end >> agaw_to_width(domain->agaw)) {
2461 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2462 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2463 agaw_to_width(domain->agaw),
2464 dmi_get_system_info(DMI_BIOS_VENDOR),
2465 dmi_get_system_info(DMI_BIOS_VERSION),
2466 dmi_get_system_info(DMI_PRODUCT_VERSION));
2467 ret = -EIO;
2468 goto error;
2469 }
19943b0e 2470
b213203e 2471 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2472 if (ret)
2473 goto error;
2474
2475 /* context entry init */
0b9d9753 2476 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2477 if (ret)
2478 goto error;
2479
2480 return 0;
2481
2482 error:
ba395927
KA
2483 domain_exit(domain);
2484 return ret;
ba395927
KA
2485}
2486
2487static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2488 struct device *dev)
ba395927 2489{
0b9d9753 2490 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2491 return 0;
0b9d9753
DW
2492 return iommu_prepare_identity_map(dev, rmrr->base_address,
2493 rmrr->end_address);
ba395927
KA
2494}
2495
d3f13810 2496#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2497static inline void iommu_prepare_isa(void)
2498{
2499 struct pci_dev *pdev;
2500 int ret;
2501
2502 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2503 if (!pdev)
2504 return;
2505
9f10e5bf 2506 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2507 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2508
2509 if (ret)
9f10e5bf 2510 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2511
9b27e82d 2512 pci_dev_put(pdev);
49a0429e
KA
2513}
2514#else
2515static inline void iommu_prepare_isa(void)
2516{
2517 return;
2518}
d3f13810 2519#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2520
2c2e2c38 2521static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2522
071e1374 2523static int __init si_domain_init(int hw)
2c2e2c38
FY
2524{
2525 struct dmar_drhd_unit *drhd;
2526 struct intel_iommu *iommu;
c7ab48d2 2527 int nid, ret = 0;
44bde614 2528 bool first = true;
2c2e2c38 2529
ab8dfe25 2530 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2531 if (!si_domain)
2532 return -EFAULT;
2533
2c2e2c38
FY
2534 for_each_active_iommu(iommu, drhd) {
2535 ret = iommu_attach_domain(si_domain, iommu);
fb170fb4 2536 if (ret < 0) {
2c2e2c38
FY
2537 domain_exit(si_domain);
2538 return -EFAULT;
44bde614
JL
2539 } else if (first) {
2540 si_domain->id = ret;
2541 first = false;
2542 } else if (si_domain->id != ret) {
2543 domain_exit(si_domain);
2544 return -EFAULT;
2c2e2c38 2545 }
fb170fb4 2546 domain_attach_iommu(si_domain, iommu);
2c2e2c38
FY
2547 }
2548
2549 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2550 domain_exit(si_domain);
2551 return -EFAULT;
2552 }
2553
9f10e5bf 2554 pr_debug("Identity mapping domain is domain %d\n",
9544c003 2555 si_domain->id);
2c2e2c38 2556
19943b0e
DW
2557 if (hw)
2558 return 0;
2559
c7ab48d2 2560 for_each_online_node(nid) {
5dfe8660
TH
2561 unsigned long start_pfn, end_pfn;
2562 int i;
2563
2564 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2565 ret = iommu_domain_identity_map(si_domain,
2566 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2567 if (ret)
2568 return ret;
2569 }
c7ab48d2
DW
2570 }
2571
2c2e2c38
FY
2572 return 0;
2573}
2574
9b226624 2575static int identity_mapping(struct device *dev)
2c2e2c38
FY
2576{
2577 struct device_domain_info *info;
2578
2579 if (likely(!iommu_identity_mapping))
2580 return 0;
2581
9b226624 2582 info = dev->archdata.iommu;
cb452a40
MT
2583 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2584 return (info->domain == si_domain);
2c2e2c38 2585
2c2e2c38
FY
2586 return 0;
2587}
2588
2589static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2590 struct device *dev, int translation)
2c2e2c38 2591{
0ac72664 2592 struct dmar_domain *ndomain;
5a8f40e8 2593 struct intel_iommu *iommu;
156baca8 2594 u8 bus, devfn;
5fe60f4e 2595 int ret;
2c2e2c38 2596
5913c9bf 2597 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2598 if (!iommu)
2599 return -ENODEV;
2600
5913c9bf 2601 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2602 if (ndomain != domain)
2603 return -EBUSY;
2c2e2c38 2604
5913c9bf 2605 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2606 if (ret) {
5913c9bf 2607 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2608 return ret;
2609 }
2610
2c2e2c38
FY
2611 return 0;
2612}
2613
0b9d9753 2614static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2615{
2616 struct dmar_rmrr_unit *rmrr;
832bd858 2617 struct device *tmp;
ea2447f7
TM
2618 int i;
2619
0e242612 2620 rcu_read_lock();
ea2447f7 2621 for_each_rmrr_units(rmrr) {
b683b230
JL
2622 /*
2623 * Return TRUE if this RMRR contains the device that
2624 * is passed in.
2625 */
2626 for_each_active_dev_scope(rmrr->devices,
2627 rmrr->devices_cnt, i, tmp)
0b9d9753 2628 if (tmp == dev) {
0e242612 2629 rcu_read_unlock();
ea2447f7 2630 return true;
b683b230 2631 }
ea2447f7 2632 }
0e242612 2633 rcu_read_unlock();
ea2447f7
TM
2634 return false;
2635}
2636
c875d2c1
AW
2637/*
2638 * There are a couple cases where we need to restrict the functionality of
2639 * devices associated with RMRRs. The first is when evaluating a device for
2640 * identity mapping because problems exist when devices are moved in and out
2641 * of domains and their respective RMRR information is lost. This means that
2642 * a device with associated RMRRs will never be in a "passthrough" domain.
2643 * The second is use of the device through the IOMMU API. This interface
2644 * expects to have full control of the IOVA space for the device. We cannot
2645 * satisfy both the requirement that RMRR access is maintained and have an
2646 * unencumbered IOVA space. We also have no ability to quiesce the device's
2647 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2648 * We therefore prevent devices associated with an RMRR from participating in
2649 * the IOMMU API, which eliminates them from device assignment.
2650 *
2651 * In both cases we assume that PCI USB devices with RMRRs have them largely
2652 * for historical reasons and that the RMRR space is not actively used post
2653 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2654 *
2655 * The same exception is made for graphics devices, with the requirement that
2656 * any use of the RMRR regions will be torn down before assigning the device
2657 * to a guest.
c875d2c1
AW
2658 */
2659static bool device_is_rmrr_locked(struct device *dev)
2660{
2661 if (!device_has_rmrr(dev))
2662 return false;
2663
2664 if (dev_is_pci(dev)) {
2665 struct pci_dev *pdev = to_pci_dev(dev);
2666
18436afd 2667 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2668 return false;
2669 }
2670
2671 return true;
2672}
2673
3bdb2591 2674static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2675{
ea2447f7 2676
3bdb2591
DW
2677 if (dev_is_pci(dev)) {
2678 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2679
c875d2c1 2680 if (device_is_rmrr_locked(dev))
3bdb2591 2681 return 0;
e0fc7e0b 2682
3bdb2591
DW
2683 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2684 return 1;
e0fc7e0b 2685
3bdb2591
DW
2686 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2687 return 1;
6941af28 2688
3bdb2591 2689 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2690 return 0;
3bdb2591
DW
2691
2692 /*
2693 * We want to start off with all devices in the 1:1 domain, and
2694 * take them out later if we find they can't access all of memory.
2695 *
2696 * However, we can't do this for PCI devices behind bridges,
2697 * because all PCI devices behind the same bridge will end up
2698 * with the same source-id on their transactions.
2699 *
2700 * Practically speaking, we can't change things around for these
2701 * devices at run-time, because we can't be sure there'll be no
2702 * DMA transactions in flight for any of their siblings.
2703 *
2704 * So PCI devices (unless they're on the root bus) as well as
2705 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2706 * the 1:1 domain, just in _case_ one of their siblings turns out
2707 * not to be able to map all of memory.
2708 */
2709 if (!pci_is_pcie(pdev)) {
2710 if (!pci_is_root_bus(pdev->bus))
2711 return 0;
2712 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2713 return 0;
2714 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2715 return 0;
3bdb2591
DW
2716 } else {
2717 if (device_has_rmrr(dev))
2718 return 0;
2719 }
3dfc813d 2720
3bdb2591 2721 /*
3dfc813d 2722 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2723 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2724 * take them out of the 1:1 domain later.
2725 */
8fcc5372
CW
2726 if (!startup) {
2727 /*
2728 * If the device's dma_mask is less than the system's memory
2729 * size then this is not a candidate for identity mapping.
2730 */
3bdb2591 2731 u64 dma_mask = *dev->dma_mask;
8fcc5372 2732
3bdb2591
DW
2733 if (dev->coherent_dma_mask &&
2734 dev->coherent_dma_mask < dma_mask)
2735 dma_mask = dev->coherent_dma_mask;
8fcc5372 2736
3bdb2591 2737 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2738 }
6941af28
DW
2739
2740 return 1;
2741}
2742
cf04eee8
DW
2743static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2744{
2745 int ret;
2746
2747 if (!iommu_should_identity_map(dev, 1))
2748 return 0;
2749
2750 ret = domain_add_dev_info(si_domain, dev,
2751 hw ? CONTEXT_TT_PASS_THROUGH :
2752 CONTEXT_TT_MULTI_LEVEL);
2753 if (!ret)
9f10e5bf
JR
2754 pr_info("%s identity mapping for device %s\n",
2755 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2756 else if (ret == -ENODEV)
2757 /* device not associated with an iommu */
2758 ret = 0;
2759
2760 return ret;
2761}
2762
2763
071e1374 2764static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2765{
2c2e2c38 2766 struct pci_dev *pdev = NULL;
cf04eee8
DW
2767 struct dmar_drhd_unit *drhd;
2768 struct intel_iommu *iommu;
2769 struct device *dev;
2770 int i;
2771 int ret = 0;
2c2e2c38 2772
2c2e2c38 2773 for_each_pci_dev(pdev) {
cf04eee8
DW
2774 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2775 if (ret)
2776 return ret;
2777 }
2778
2779 for_each_active_iommu(iommu, drhd)
2780 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2781 struct acpi_device_physical_node *pn;
2782 struct acpi_device *adev;
2783
2784 if (dev->bus != &acpi_bus_type)
2785 continue;
86080ccc 2786
cf04eee8
DW
2787 adev= to_acpi_device(dev);
2788 mutex_lock(&adev->physical_node_lock);
2789 list_for_each_entry(pn, &adev->physical_node_list, node) {
2790 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2791 if (ret)
2792 break;
eae460b6 2793 }
cf04eee8
DW
2794 mutex_unlock(&adev->physical_node_lock);
2795 if (ret)
2796 return ret;
62edf5dc 2797 }
2c2e2c38
FY
2798
2799 return 0;
2800}
2801
ffebeb46
JL
2802static void intel_iommu_init_qi(struct intel_iommu *iommu)
2803{
2804 /*
2805 * Start from the sane iommu hardware state.
2806 * If the queued invalidation is already initialized by us
2807 * (for example, while enabling interrupt-remapping) then
2808 * we got the things already rolling from a sane state.
2809 */
2810 if (!iommu->qi) {
2811 /*
2812 * Clear any previous faults.
2813 */
2814 dmar_fault(-1, iommu);
2815 /*
2816 * Disable queued invalidation if supported and already enabled
2817 * before OS handover.
2818 */
2819 dmar_disable_qi(iommu);
2820 }
2821
2822 if (dmar_enable_qi(iommu)) {
2823 /*
2824 * Queued Invalidate not enabled, use Register Based Invalidate
2825 */
2826 iommu->flush.flush_context = __iommu_flush_context;
2827 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2828 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2829 iommu->name);
2830 } else {
2831 iommu->flush.flush_context = qi_flush_context;
2832 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2833 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2834 }
2835}
2836
091d42e4
JR
2837static int copy_context_table(struct intel_iommu *iommu,
2838 struct root_entry *old_re,
2839 struct context_entry **tbl,
2840 int bus, bool ext)
2841{
2842 struct context_entry *old_ce = NULL, *new_ce = NULL, ce;
dbcd861f 2843 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
091d42e4
JR
2844 phys_addr_t old_ce_phys;
2845
2846 tbl_idx = ext ? bus * 2 : bus;
2847
2848 for (devfn = 0; devfn < 256; devfn++) {
2849 /* First calculate the correct index */
2850 idx = (ext ? devfn * 2 : devfn) % 256;
2851
2852 if (idx == 0) {
2853 /* First save what we may have and clean up */
2854 if (new_ce) {
2855 tbl[tbl_idx] = new_ce;
2856 __iommu_flush_cache(iommu, new_ce,
2857 VTD_PAGE_SIZE);
2858 pos = 1;
2859 }
2860
2861 if (old_ce)
2862 iounmap(old_ce);
2863
2864 ret = 0;
2865 if (devfn < 0x80)
2866 old_ce_phys = root_entry_lctp(old_re);
2867 else
2868 old_ce_phys = root_entry_uctp(old_re);
2869
2870 if (!old_ce_phys) {
2871 if (ext && devfn == 0) {
2872 /* No LCTP, try UCTP */
2873 devfn = 0x7f;
2874 continue;
2875 } else {
2876 goto out;
2877 }
2878 }
2879
2880 ret = -ENOMEM;
2881 old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2882 if (!old_ce)
2883 goto out;
2884
2885 new_ce = alloc_pgtable_page(iommu->node);
2886 if (!new_ce)
2887 goto out_unmap;
2888
2889 ret = 0;
2890 }
2891
2892 /* Now copy the context entry */
2893 ce = old_ce[idx];
2894
cf484d0e 2895 if (!__context_present(&ce))
091d42e4
JR
2896 continue;
2897
dbcd861f
JR
2898 did = context_domain_id(&ce);
2899 if (did >= 0 && did < cap_ndoms(iommu->cap))
2900 set_bit(did, iommu->domain_ids);
2901
cf484d0e
JR
2902 /*
2903 * We need a marker for copied context entries. This
2904 * marker needs to work for the old format as well as
2905 * for extended context entries.
2906 *
2907 * Bit 67 of the context entry is used. In the old
2908 * format this bit is available to software, in the
2909 * extended format it is the PGE bit, but PGE is ignored
2910 * by HW if PASIDs are disabled (and thus still
2911 * available).
2912 *
2913 * So disable PASIDs first and then mark the entry
2914 * copied. This means that we don't copy PASID
2915 * translations from the old kernel, but this is fine as
2916 * faults there are not fatal.
2917 */
2918 context_clear_pasid_enable(&ce);
2919 context_set_copied(&ce);
2920
091d42e4
JR
2921 new_ce[idx] = ce;
2922 }
2923
2924 tbl[tbl_idx + pos] = new_ce;
2925
2926 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2927
2928out_unmap:
2929 iounmap(old_ce);
2930
2931out:
2932 return ret;
2933}
2934
2935static int copy_translation_tables(struct intel_iommu *iommu)
2936{
2937 struct context_entry **ctxt_tbls;
2938 struct root_entry *old_rt;
2939 phys_addr_t old_rt_phys;
2940 int ctxt_table_entries;
2941 unsigned long flags;
2942 u64 rtaddr_reg;
2943 int bus, ret;
c3361f2f 2944 bool new_ext, ext;
091d42e4
JR
2945
2946 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2947 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
2948 new_ext = !!ecap_ecs(iommu->ecap);
2949
2950 /*
2951 * The RTT bit can only be changed when translation is disabled,
2952 * but disabling translation means to open a window for data
2953 * corruption. So bail out and don't copy anything if we would
2954 * have to change the bit.
2955 */
2956 if (new_ext != ext)
2957 return -EINVAL;
091d42e4
JR
2958
2959 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2960 if (!old_rt_phys)
2961 return -EINVAL;
2962
2963 old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
2964 if (!old_rt)
2965 return -ENOMEM;
2966
2967 /* This is too big for the stack - allocate it from slab */
2968 ctxt_table_entries = ext ? 512 : 256;
2969 ret = -ENOMEM;
2970 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
2971 if (!ctxt_tbls)
2972 goto out_unmap;
2973
2974 for (bus = 0; bus < 256; bus++) {
2975 ret = copy_context_table(iommu, &old_rt[bus],
2976 ctxt_tbls, bus, ext);
2977 if (ret) {
2978 pr_err("%s: Failed to copy context table for bus %d\n",
2979 iommu->name, bus);
2980 continue;
2981 }
2982 }
2983
2984 spin_lock_irqsave(&iommu->lock, flags);
2985
2986 /* Context tables are copied, now write them to the root_entry table */
2987 for (bus = 0; bus < 256; bus++) {
2988 int idx = ext ? bus * 2 : bus;
2989 u64 val;
2990
2991 if (ctxt_tbls[idx]) {
2992 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2993 iommu->root_entry[bus].lo = val;
2994 }
2995
2996 if (!ext || !ctxt_tbls[idx + 1])
2997 continue;
2998
2999 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3000 iommu->root_entry[bus].hi = val;
3001 }
3002
3003 spin_unlock_irqrestore(&iommu->lock, flags);
3004
3005 kfree(ctxt_tbls);
3006
3007 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3008
3009 ret = 0;
3010
3011out_unmap:
3012 iounmap(old_rt);
3013
3014 return ret;
3015}
3016
b779260b 3017static int __init init_dmars(void)
ba395927
KA
3018{
3019 struct dmar_drhd_unit *drhd;
3020 struct dmar_rmrr_unit *rmrr;
a87f4918 3021 bool copied_tables = false;
832bd858 3022 struct device *dev;
ba395927 3023 struct intel_iommu *iommu;
9d783ba0 3024 int i, ret;
2c2e2c38 3025
ba395927
KA
3026 /*
3027 * for each drhd
3028 * allocate root
3029 * initialize and program root entry to not present
3030 * endfor
3031 */
3032 for_each_drhd_unit(drhd) {
5e0d2a6f 3033 /*
3034 * lock not needed as this is only incremented in the single
3035 * threaded kernel __init code path all other access are read
3036 * only
3037 */
78d8e704 3038 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3039 g_num_of_iommus++;
3040 continue;
3041 }
9f10e5bf 3042 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3043 }
3044
ffebeb46
JL
3045 /* Preallocate enough resources for IOMMU hot-addition */
3046 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3047 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3048
d9630fe9
WH
3049 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3050 GFP_KERNEL);
3051 if (!g_iommus) {
9f10e5bf 3052 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3053 ret = -ENOMEM;
3054 goto error;
3055 }
3056
80b20dd8 3057 deferred_flush = kzalloc(g_num_of_iommus *
3058 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3059 if (!deferred_flush) {
5e0d2a6f 3060 ret = -ENOMEM;
989d51fc 3061 goto free_g_iommus;
5e0d2a6f 3062 }
3063
7c919779 3064 for_each_active_iommu(iommu, drhd) {
d9630fe9 3065 g_iommus[iommu->seq_id] = iommu;
ba395927 3066
b63d80d1
JR
3067 intel_iommu_init_qi(iommu);
3068
e61d98d8
SS
3069 ret = iommu_init_domains(iommu);
3070 if (ret)
989d51fc 3071 goto free_iommu;
e61d98d8 3072
4158c2ec
JR
3073 init_translation_status(iommu);
3074
091d42e4
JR
3075 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3076 iommu_disable_translation(iommu);
3077 clear_translation_pre_enabled(iommu);
3078 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3079 iommu->name);
3080 }
4158c2ec 3081
ba395927
KA
3082 /*
3083 * TBD:
3084 * we could share the same root & context tables
25985edc 3085 * among all IOMMU's. Need to Split it later.
ba395927
KA
3086 */
3087 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3088 if (ret)
989d51fc 3089 goto free_iommu;
5f0a7f76 3090
091d42e4
JR
3091 if (translation_pre_enabled(iommu)) {
3092 pr_info("Translation already enabled - trying to copy translation structures\n");
3093
3094 ret = copy_translation_tables(iommu);
3095 if (ret) {
3096 /*
3097 * We found the IOMMU with translation
3098 * enabled - but failed to copy over the
3099 * old root-entry table. Try to proceed
3100 * by disabling translation now and
3101 * allocating a clean root-entry table.
3102 * This might cause DMAR faults, but
3103 * probably the dump will still succeed.
3104 */
3105 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3106 iommu->name);
3107 iommu_disable_translation(iommu);
3108 clear_translation_pre_enabled(iommu);
3109 } else {
3110 pr_info("Copied translation tables from previous kernel for %s\n",
3111 iommu->name);
a87f4918 3112 copied_tables = true;
091d42e4
JR
3113 }
3114 }
3115
5f0a7f76
JR
3116 iommu_flush_write_buffer(iommu);
3117 iommu_set_root_entry(iommu);
3118 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3119 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3120
4ed0d3e6 3121 if (!ecap_pass_through(iommu->ecap))
19943b0e 3122 hw_pass_through = 0;
ba395927
KA
3123 }
3124
19943b0e 3125 if (iommu_pass_through)
e0fc7e0b
DW
3126 iommu_identity_mapping |= IDENTMAP_ALL;
3127
d3f13810 3128#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3129 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3130#endif
e0fc7e0b 3131
86080ccc
JR
3132 if (iommu_identity_mapping) {
3133 ret = si_domain_init(hw_pass_through);
3134 if (ret)
3135 goto free_iommu;
3136 }
3137
e0fc7e0b
DW
3138 check_tylersburg_isoch();
3139
a87f4918
JR
3140 /*
3141 * If we copied translations from a previous kernel in the kdump
3142 * case, we can not assign the devices to domains now, as that
3143 * would eliminate the old mappings. So skip this part and defer
3144 * the assignment to device driver initialization time.
3145 */
3146 if (copied_tables)
3147 goto domains_done;
3148
ba395927 3149 /*
19943b0e
DW
3150 * If pass through is not set or not enabled, setup context entries for
3151 * identity mappings for rmrr, gfx, and isa and may fall back to static
3152 * identity mapping if iommu_identity_mapping is set.
ba395927 3153 */
19943b0e
DW
3154 if (iommu_identity_mapping) {
3155 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3156 if (ret) {
9f10e5bf 3157 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3158 goto free_iommu;
ba395927
KA
3159 }
3160 }
ba395927 3161 /*
19943b0e
DW
3162 * For each rmrr
3163 * for each dev attached to rmrr
3164 * do
3165 * locate drhd for dev, alloc domain for dev
3166 * allocate free domain
3167 * allocate page table entries for rmrr
3168 * if context not allocated for bus
3169 * allocate and init context
3170 * set present in root table for this bus
3171 * init context with domain, translation etc
3172 * endfor
3173 * endfor
ba395927 3174 */
9f10e5bf 3175 pr_info("Setting RMRR:\n");
19943b0e 3176 for_each_rmrr_units(rmrr) {
b683b230
JL
3177 /* some BIOS lists non-exist devices in DMAR table. */
3178 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3179 i, dev) {
0b9d9753 3180 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3181 if (ret)
9f10e5bf 3182 pr_err("Mapping reserved region failed\n");
ba395927 3183 }
4ed0d3e6 3184 }
49a0429e 3185
19943b0e
DW
3186 iommu_prepare_isa();
3187
a87f4918
JR
3188domains_done:
3189
ba395927
KA
3190 /*
3191 * for each drhd
3192 * enable fault log
3193 * global invalidate context cache
3194 * global invalidate iotlb
3195 * enable translation
3196 */
7c919779 3197 for_each_iommu(iommu, drhd) {
51a63e67
JC
3198 if (drhd->ignored) {
3199 /*
3200 * we always have to disable PMRs or DMA may fail on
3201 * this device
3202 */
3203 if (force_on)
7c919779 3204 iommu_disable_protect_mem_regions(iommu);
ba395927 3205 continue;
51a63e67 3206 }
ba395927
KA
3207
3208 iommu_flush_write_buffer(iommu);
3209
3460a6d9
KA
3210 ret = dmar_set_interrupt(iommu);
3211 if (ret)
989d51fc 3212 goto free_iommu;
3460a6d9 3213
8939ddf6
JR
3214 if (!translation_pre_enabled(iommu))
3215 iommu_enable_translation(iommu);
3216
b94996c9 3217 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3218 }
3219
3220 return 0;
989d51fc
JL
3221
3222free_iommu:
ffebeb46
JL
3223 for_each_active_iommu(iommu, drhd) {
3224 disable_dmar_iommu(iommu);
a868e6b7 3225 free_dmar_iommu(iommu);
ffebeb46 3226 }
9bdc531e 3227 kfree(deferred_flush);
989d51fc 3228free_g_iommus:
d9630fe9 3229 kfree(g_iommus);
989d51fc 3230error:
ba395927
KA
3231 return ret;
3232}
3233
5a5e02a6 3234/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
3235static struct iova *intel_alloc_iova(struct device *dev,
3236 struct dmar_domain *domain,
3237 unsigned long nrpages, uint64_t dma_mask)
ba395927 3238{
ba395927 3239 struct iova *iova = NULL;
ba395927 3240
875764de
DW
3241 /* Restrict dma_mask to the width that the iommu can handle */
3242 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3243
3244 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3245 /*
3246 * First try to allocate an io virtual address in
284901a9 3247 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3248 * from higher range
ba395927 3249 */
875764de
DW
3250 iova = alloc_iova(&domain->iovad, nrpages,
3251 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3252 if (iova)
3253 return iova;
3254 }
3255 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3256 if (unlikely(!iova)) {
9f10e5bf 3257 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3258 nrpages, dev_name(dev));
f76aec76
KA
3259 return NULL;
3260 }
3261
3262 return iova;
3263}
3264
d4b709f4 3265static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
3266{
3267 struct dmar_domain *domain;
3268 int ret;
3269
d4b709f4 3270 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 3271 if (!domain) {
9f10e5bf 3272 pr_err("Allocating domain for %s failed\n",
d4b709f4 3273 dev_name(dev));
4fe05bbc 3274 return NULL;
ba395927
KA
3275 }
3276
3277 /* make sure context mapping is ok */
d4b709f4
DW
3278 if (unlikely(!domain_context_mapped(dev))) {
3279 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 3280 if (ret) {
9f10e5bf 3281 pr_err("Domain context map for %s failed\n",
d4b709f4 3282 dev_name(dev));
4fe05bbc 3283 return NULL;
f76aec76 3284 }
ba395927
KA
3285 }
3286
f76aec76
KA
3287 return domain;
3288}
3289
d4b709f4 3290static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
3291{
3292 struct device_domain_info *info;
3293
3294 /* No lock here, assumes no domain exit in normal case */
d4b709f4 3295 info = dev->archdata.iommu;
147202aa
DW
3296 if (likely(info))
3297 return info->domain;
3298
3299 return __get_valid_domain_for_dev(dev);
3300}
3301
ecb509ec 3302/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3303static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3304{
3305 int found;
3306
3d89194a 3307 if (iommu_dummy(dev))
1e4c64c4
DW
3308 return 1;
3309
2c2e2c38 3310 if (!iommu_identity_mapping)
1e4c64c4 3311 return 0;
2c2e2c38 3312
9b226624 3313 found = identity_mapping(dev);
2c2e2c38 3314 if (found) {
ecb509ec 3315 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3316 return 1;
3317 else {
3318 /*
3319 * 32 bit DMA is removed from si_domain and fall back
3320 * to non-identity mapping.
3321 */
bf9c9eda 3322 domain_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3323 pr_info("32bit %s uses non-identity mapping\n",
3324 dev_name(dev));
2c2e2c38
FY
3325 return 0;
3326 }
3327 } else {
3328 /*
3329 * In case of a detached 64 bit DMA device from vm, the device
3330 * is put into si_domain for identity mapping.
3331 */
ecb509ec 3332 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3333 int ret;
5913c9bf 3334 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
3335 hw_pass_through ?
3336 CONTEXT_TT_PASS_THROUGH :
3337 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38 3338 if (!ret) {
9f10e5bf
JR
3339 pr_info("64bit %s uses identity mapping\n",
3340 dev_name(dev));
2c2e2c38
FY
3341 return 1;
3342 }
3343 }
3344 }
3345
1e4c64c4 3346 return 0;
2c2e2c38
FY
3347}
3348
5040a918 3349static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3350 size_t size, int dir, u64 dma_mask)
f76aec76 3351{
f76aec76 3352 struct dmar_domain *domain;
5b6985ce 3353 phys_addr_t start_paddr;
f76aec76
KA
3354 struct iova *iova;
3355 int prot = 0;
6865f0d1 3356 int ret;
8c11e798 3357 struct intel_iommu *iommu;
33041ec0 3358 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3359
3360 BUG_ON(dir == DMA_NONE);
2c2e2c38 3361
5040a918 3362 if (iommu_no_mapping(dev))
6865f0d1 3363 return paddr;
f76aec76 3364
5040a918 3365 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3366 if (!domain)
3367 return 0;
3368
8c11e798 3369 iommu = domain_get_iommu(domain);
88cb6a74 3370 size = aligned_nrpages(paddr, size);
f76aec76 3371
5040a918 3372 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3373 if (!iova)
3374 goto error;
3375
ba395927
KA
3376 /*
3377 * Check if DMAR supports zero-length reads on write only
3378 * mappings..
3379 */
3380 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3381 !cap_zlr(iommu->cap))
ba395927
KA
3382 prot |= DMA_PTE_READ;
3383 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3384 prot |= DMA_PTE_WRITE;
3385 /*
6865f0d1 3386 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3387 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3388 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3389 * is not a big problem
3390 */
0ab36de2 3391 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3392 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3393 if (ret)
3394 goto error;
3395
1f0ef2aa
DW
3396 /* it's a non-present to present mapping. Only flush if caching mode */
3397 if (cap_caching_mode(iommu->cap))
ea8ea460 3398 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3399 else
8c11e798 3400 iommu_flush_write_buffer(iommu);
f76aec76 3401
03d6a246
DW
3402 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3403 start_paddr += paddr & ~PAGE_MASK;
3404 return start_paddr;
ba395927 3405
ba395927 3406error:
f76aec76
KA
3407 if (iova)
3408 __free_iova(&domain->iovad, iova);
9f10e5bf 3409 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3410 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3411 return 0;
3412}
3413
ffbbef5c
FT
3414static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3415 unsigned long offset, size_t size,
3416 enum dma_data_direction dir,
3417 struct dma_attrs *attrs)
bb9e6d65 3418{
ffbbef5c 3419 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3420 dir, *dev->dma_mask);
bb9e6d65
FT
3421}
3422
5e0d2a6f 3423static void flush_unmaps(void)
3424{
80b20dd8 3425 int i, j;
5e0d2a6f 3426
5e0d2a6f 3427 timer_on = 0;
3428
3429 /* just flush them all */
3430 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3431 struct intel_iommu *iommu = g_iommus[i];
3432 if (!iommu)
3433 continue;
c42d9f32 3434
9dd2fe89
YZ
3435 if (!deferred_flush[i].next)
3436 continue;
3437
78d5f0f5
NA
3438 /* In caching mode, global flushes turn emulation expensive */
3439 if (!cap_caching_mode(iommu->cap))
3440 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3441 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3442 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3443 unsigned long mask;
3444 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3445 struct dmar_domain *domain = deferred_flush[i].domain[j];
3446
3447 /* On real hardware multiple invalidations are expensive */
3448 if (cap_caching_mode(iommu->cap))
3449 iommu_flush_iotlb_psi(iommu, domain->id,
a156ef99 3450 iova->pfn_lo, iova_size(iova),
ea8ea460 3451 !deferred_flush[i].freelist[j], 0);
78d5f0f5 3452 else {
a156ef99 3453 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
78d5f0f5
NA
3454 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3455 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3456 }
93a23a72 3457 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3458 if (deferred_flush[i].freelist[j])
3459 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3460 }
9dd2fe89 3461 deferred_flush[i].next = 0;
5e0d2a6f 3462 }
3463
5e0d2a6f 3464 list_size = 0;
5e0d2a6f 3465}
3466
3467static void flush_unmaps_timeout(unsigned long data)
3468{
80b20dd8 3469 unsigned long flags;
3470
3471 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3472 flush_unmaps();
80b20dd8 3473 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3474}
3475
ea8ea460 3476static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3477{
3478 unsigned long flags;
80b20dd8 3479 int next, iommu_id;
8c11e798 3480 struct intel_iommu *iommu;
5e0d2a6f 3481
3482 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3483 if (list_size == HIGH_WATER_MARK)
3484 flush_unmaps();
3485
8c11e798
WH
3486 iommu = domain_get_iommu(dom);
3487 iommu_id = iommu->seq_id;
c42d9f32 3488
80b20dd8 3489 next = deferred_flush[iommu_id].next;
3490 deferred_flush[iommu_id].domain[next] = dom;
3491 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3492 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3493 deferred_flush[iommu_id].next++;
5e0d2a6f 3494
3495 if (!timer_on) {
3496 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3497 timer_on = 1;
3498 }
3499 list_size++;
3500 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3501}
3502
d41a4adb 3503static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
ba395927 3504{
f76aec76 3505 struct dmar_domain *domain;
d794dc9b 3506 unsigned long start_pfn, last_pfn;
ba395927 3507 struct iova *iova;
8c11e798 3508 struct intel_iommu *iommu;
ea8ea460 3509 struct page *freelist;
ba395927 3510
73676832 3511 if (iommu_no_mapping(dev))
f76aec76 3512 return;
2c2e2c38 3513
1525a29a 3514 domain = find_domain(dev);
ba395927
KA
3515 BUG_ON(!domain);
3516
8c11e798
WH
3517 iommu = domain_get_iommu(domain);
3518
ba395927 3519 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3520 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3521 (unsigned long long)dev_addr))
ba395927 3522 return;
ba395927 3523
d794dc9b
DW
3524 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3525 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3526
d794dc9b 3527 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3528 dev_name(dev), start_pfn, last_pfn);
ba395927 3529
ea8ea460 3530 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3531
5e0d2a6f 3532 if (intel_iommu_strict) {
03d6a246 3533 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3534 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3535 /* free iova */
3536 __free_iova(&domain->iovad, iova);
ea8ea460 3537 dma_free_pagelist(freelist);
5e0d2a6f 3538 } else {
ea8ea460 3539 add_unmap(domain, iova, freelist);
5e0d2a6f 3540 /*
3541 * queue up the release of the unmap to save the 1/6th of the
3542 * cpu used up by the iotlb flush operation...
3543 */
5e0d2a6f 3544 }
ba395927
KA
3545}
3546
d41a4adb
JL
3547static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3548 size_t size, enum dma_data_direction dir,
3549 struct dma_attrs *attrs)
3550{
3551 intel_unmap(dev, dev_addr);
3552}
3553
5040a918 3554static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3555 dma_addr_t *dma_handle, gfp_t flags,
3556 struct dma_attrs *attrs)
ba395927 3557{
36746436 3558 struct page *page = NULL;
ba395927
KA
3559 int order;
3560
5b6985ce 3561 size = PAGE_ALIGN(size);
ba395927 3562 order = get_order(size);
e8bb910d 3563
5040a918 3564 if (!iommu_no_mapping(dev))
e8bb910d 3565 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3566 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3567 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3568 flags |= GFP_DMA;
3569 else
3570 flags |= GFP_DMA32;
3571 }
ba395927 3572
36746436
AM
3573 if (flags & __GFP_WAIT) {
3574 unsigned int count = size >> PAGE_SHIFT;
3575
3576 page = dma_alloc_from_contiguous(dev, count, order);
3577 if (page && iommu_no_mapping(dev) &&
3578 page_to_phys(page) + size > dev->coherent_dma_mask) {
3579 dma_release_from_contiguous(dev, page, count);
3580 page = NULL;
3581 }
3582 }
3583
3584 if (!page)
3585 page = alloc_pages(flags, order);
3586 if (!page)
ba395927 3587 return NULL;
36746436 3588 memset(page_address(page), 0, size);
ba395927 3589
36746436 3590 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3591 DMA_BIDIRECTIONAL,
5040a918 3592 dev->coherent_dma_mask);
ba395927 3593 if (*dma_handle)
36746436
AM
3594 return page_address(page);
3595 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3596 __free_pages(page, order);
3597
ba395927
KA
3598 return NULL;
3599}
3600
5040a918 3601static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3602 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3603{
3604 int order;
36746436 3605 struct page *page = virt_to_page(vaddr);
ba395927 3606
5b6985ce 3607 size = PAGE_ALIGN(size);
ba395927
KA
3608 order = get_order(size);
3609
d41a4adb 3610 intel_unmap(dev, dma_handle);
36746436
AM
3611 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3612 __free_pages(page, order);
ba395927
KA
3613}
3614
5040a918 3615static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3616 int nelems, enum dma_data_direction dir,
3617 struct dma_attrs *attrs)
ba395927 3618{
d41a4adb 3619 intel_unmap(dev, sglist[0].dma_address);
ba395927
KA
3620}
3621
ba395927 3622static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3623 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3624{
3625 int i;
c03ab37c 3626 struct scatterlist *sg;
ba395927 3627
c03ab37c 3628 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3629 BUG_ON(!sg_page(sg));
4cf2e75d 3630 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3631 sg->dma_length = sg->length;
ba395927
KA
3632 }
3633 return nelems;
3634}
3635
5040a918 3636static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3637 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3638{
ba395927 3639 int i;
ba395927 3640 struct dmar_domain *domain;
f76aec76
KA
3641 size_t size = 0;
3642 int prot = 0;
f76aec76
KA
3643 struct iova *iova = NULL;
3644 int ret;
c03ab37c 3645 struct scatterlist *sg;
b536d24d 3646 unsigned long start_vpfn;
8c11e798 3647 struct intel_iommu *iommu;
ba395927
KA
3648
3649 BUG_ON(dir == DMA_NONE);
5040a918
DW
3650 if (iommu_no_mapping(dev))
3651 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3652
5040a918 3653 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3654 if (!domain)
3655 return 0;
3656
8c11e798
WH
3657 iommu = domain_get_iommu(domain);
3658
b536d24d 3659 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3660 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3661
5040a918
DW
3662 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3663 *dev->dma_mask);
f76aec76 3664 if (!iova) {
c03ab37c 3665 sglist->dma_length = 0;
f76aec76
KA
3666 return 0;
3667 }
3668
3669 /*
3670 * Check if DMAR supports zero-length reads on write only
3671 * mappings..
3672 */
3673 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3674 !cap_zlr(iommu->cap))
f76aec76
KA
3675 prot |= DMA_PTE_READ;
3676 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3677 prot |= DMA_PTE_WRITE;
3678
b536d24d 3679 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3680
f532959b 3681 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3682 if (unlikely(ret)) {
e1605495
DW
3683 dma_pte_free_pagetable(domain, start_vpfn,
3684 start_vpfn + size - 1);
e1605495
DW
3685 __free_iova(&domain->iovad, iova);
3686 return 0;
ba395927
KA
3687 }
3688
1f0ef2aa
DW
3689 /* it's a non-present to present mapping. Only flush if caching mode */
3690 if (cap_caching_mode(iommu->cap))
ea8ea460 3691 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3692 else
8c11e798 3693 iommu_flush_write_buffer(iommu);
1f0ef2aa 3694
ba395927
KA
3695 return nelems;
3696}
3697
dfb805e8
FT
3698static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3699{
3700 return !dma_addr;
3701}
3702
160c1d8e 3703struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3704 .alloc = intel_alloc_coherent,
3705 .free = intel_free_coherent,
ba395927
KA
3706 .map_sg = intel_map_sg,
3707 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3708 .map_page = intel_map_page,
3709 .unmap_page = intel_unmap_page,
dfb805e8 3710 .mapping_error = intel_mapping_error,
ba395927
KA
3711};
3712
3713static inline int iommu_domain_cache_init(void)
3714{
3715 int ret = 0;
3716
3717 iommu_domain_cache = kmem_cache_create("iommu_domain",
3718 sizeof(struct dmar_domain),
3719 0,
3720 SLAB_HWCACHE_ALIGN,
3721
3722 NULL);
3723 if (!iommu_domain_cache) {
9f10e5bf 3724 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3725 ret = -ENOMEM;
3726 }
3727
3728 return ret;
3729}
3730
3731static inline int iommu_devinfo_cache_init(void)
3732{
3733 int ret = 0;
3734
3735 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3736 sizeof(struct device_domain_info),
3737 0,
3738 SLAB_HWCACHE_ALIGN,
ba395927
KA
3739 NULL);
3740 if (!iommu_devinfo_cache) {
9f10e5bf 3741 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3742 ret = -ENOMEM;
3743 }
3744
3745 return ret;
3746}
3747
ba395927
KA
3748static int __init iommu_init_mempool(void)
3749{
3750 int ret;
3751 ret = iommu_iova_cache_init();
3752 if (ret)
3753 return ret;
3754
3755 ret = iommu_domain_cache_init();
3756 if (ret)
3757 goto domain_error;
3758
3759 ret = iommu_devinfo_cache_init();
3760 if (!ret)
3761 return ret;
3762
3763 kmem_cache_destroy(iommu_domain_cache);
3764domain_error:
85b45456 3765 iommu_iova_cache_destroy();
ba395927
KA
3766
3767 return -ENOMEM;
3768}
3769
3770static void __init iommu_exit_mempool(void)
3771{
3772 kmem_cache_destroy(iommu_devinfo_cache);
3773 kmem_cache_destroy(iommu_domain_cache);
85b45456 3774 iommu_iova_cache_destroy();
ba395927
KA
3775}
3776
556ab45f
DW
3777static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3778{
3779 struct dmar_drhd_unit *drhd;
3780 u32 vtbar;
3781 int rc;
3782
3783 /* We know that this device on this chipset has its own IOMMU.
3784 * If we find it under a different IOMMU, then the BIOS is lying
3785 * to us. Hope that the IOMMU for this device is actually
3786 * disabled, and it needs no translation...
3787 */
3788 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3789 if (rc) {
3790 /* "can't" happen */
3791 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3792 return;
3793 }
3794 vtbar &= 0xffff0000;
3795
3796 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3797 drhd = dmar_find_matched_drhd_unit(pdev);
3798 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3799 TAINT_FIRMWARE_WORKAROUND,
3800 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3801 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3802}
3803DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3804
ba395927
KA
3805static void __init init_no_remapping_devices(void)
3806{
3807 struct dmar_drhd_unit *drhd;
832bd858 3808 struct device *dev;
b683b230 3809 int i;
ba395927
KA
3810
3811 for_each_drhd_unit(drhd) {
3812 if (!drhd->include_all) {
b683b230
JL
3813 for_each_active_dev_scope(drhd->devices,
3814 drhd->devices_cnt, i, dev)
3815 break;
832bd858 3816 /* ignore DMAR unit if no devices exist */
ba395927
KA
3817 if (i == drhd->devices_cnt)
3818 drhd->ignored = 1;
3819 }
3820 }
3821
7c919779 3822 for_each_active_drhd_unit(drhd) {
7c919779 3823 if (drhd->include_all)
ba395927
KA
3824 continue;
3825
b683b230
JL
3826 for_each_active_dev_scope(drhd->devices,
3827 drhd->devices_cnt, i, dev)
832bd858 3828 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3829 break;
ba395927
KA
3830 if (i < drhd->devices_cnt)
3831 continue;
3832
c0771df8
DW
3833 /* This IOMMU has *only* gfx devices. Either bypass it or
3834 set the gfx_mapped flag, as appropriate */
3835 if (dmar_map_gfx) {
3836 intel_iommu_gfx_mapped = 1;
3837 } else {
3838 drhd->ignored = 1;
b683b230
JL
3839 for_each_active_dev_scope(drhd->devices,
3840 drhd->devices_cnt, i, dev)
832bd858 3841 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3842 }
3843 }
3844}
3845
f59c7b69
FY
3846#ifdef CONFIG_SUSPEND
3847static int init_iommu_hw(void)
3848{
3849 struct dmar_drhd_unit *drhd;
3850 struct intel_iommu *iommu = NULL;
3851
3852 for_each_active_iommu(iommu, drhd)
3853 if (iommu->qi)
3854 dmar_reenable_qi(iommu);
3855
b779260b
JC
3856 for_each_iommu(iommu, drhd) {
3857 if (drhd->ignored) {
3858 /*
3859 * we always have to disable PMRs or DMA may fail on
3860 * this device
3861 */
3862 if (force_on)
3863 iommu_disable_protect_mem_regions(iommu);
3864 continue;
3865 }
3866
f59c7b69
FY
3867 iommu_flush_write_buffer(iommu);
3868
3869 iommu_set_root_entry(iommu);
3870
3871 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3872 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3873 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3874 iommu_enable_translation(iommu);
b94996c9 3875 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3876 }
3877
3878 return 0;
3879}
3880
3881static void iommu_flush_all(void)
3882{
3883 struct dmar_drhd_unit *drhd;
3884 struct intel_iommu *iommu;
3885
3886 for_each_active_iommu(iommu, drhd) {
3887 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3888 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3889 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3890 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3891 }
3892}
3893
134fac3f 3894static int iommu_suspend(void)
f59c7b69
FY
3895{
3896 struct dmar_drhd_unit *drhd;
3897 struct intel_iommu *iommu = NULL;
3898 unsigned long flag;
3899
3900 for_each_active_iommu(iommu, drhd) {
3901 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3902 GFP_ATOMIC);
3903 if (!iommu->iommu_state)
3904 goto nomem;
3905 }
3906
3907 iommu_flush_all();
3908
3909 for_each_active_iommu(iommu, drhd) {
3910 iommu_disable_translation(iommu);
3911
1f5b3c3f 3912 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3913
3914 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3915 readl(iommu->reg + DMAR_FECTL_REG);
3916 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3917 readl(iommu->reg + DMAR_FEDATA_REG);
3918 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3919 readl(iommu->reg + DMAR_FEADDR_REG);
3920 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3921 readl(iommu->reg + DMAR_FEUADDR_REG);
3922
1f5b3c3f 3923 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3924 }
3925 return 0;
3926
3927nomem:
3928 for_each_active_iommu(iommu, drhd)
3929 kfree(iommu->iommu_state);
3930
3931 return -ENOMEM;
3932}
3933
134fac3f 3934static void iommu_resume(void)
f59c7b69
FY
3935{
3936 struct dmar_drhd_unit *drhd;
3937 struct intel_iommu *iommu = NULL;
3938 unsigned long flag;
3939
3940 if (init_iommu_hw()) {
b779260b
JC
3941 if (force_on)
3942 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3943 else
3944 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3945 return;
f59c7b69
FY
3946 }
3947
3948 for_each_active_iommu(iommu, drhd) {
3949
1f5b3c3f 3950 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3951
3952 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3953 iommu->reg + DMAR_FECTL_REG);
3954 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3955 iommu->reg + DMAR_FEDATA_REG);
3956 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3957 iommu->reg + DMAR_FEADDR_REG);
3958 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3959 iommu->reg + DMAR_FEUADDR_REG);
3960
1f5b3c3f 3961 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3962 }
3963
3964 for_each_active_iommu(iommu, drhd)
3965 kfree(iommu->iommu_state);
f59c7b69
FY
3966}
3967
134fac3f 3968static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3969 .resume = iommu_resume,
3970 .suspend = iommu_suspend,
3971};
3972
134fac3f 3973static void __init init_iommu_pm_ops(void)
f59c7b69 3974{
134fac3f 3975 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3976}
3977
3978#else
99592ba4 3979static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3980#endif /* CONFIG_PM */
3981
318fe7df 3982
c2a0b538 3983int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
3984{
3985 struct acpi_dmar_reserved_memory *rmrr;
3986 struct dmar_rmrr_unit *rmrru;
3987
3988 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3989 if (!rmrru)
3990 return -ENOMEM;
3991
3992 rmrru->hdr = header;
3993 rmrr = (struct acpi_dmar_reserved_memory *)header;
3994 rmrru->base_address = rmrr->base_address;
3995 rmrru->end_address = rmrr->end_address;
2e455289
JL
3996 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3997 ((void *)rmrr) + rmrr->header.length,
3998 &rmrru->devices_cnt);
3999 if (rmrru->devices_cnt && rmrru->devices == NULL) {
4000 kfree(rmrru);
4001 return -ENOMEM;
4002 }
318fe7df 4003
2e455289 4004 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4005
2e455289 4006 return 0;
318fe7df
SS
4007}
4008
6b197249
JL
4009static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4010{
4011 struct dmar_atsr_unit *atsru;
4012 struct acpi_dmar_atsr *tmp;
4013
4014 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4015 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4016 if (atsr->segment != tmp->segment)
4017 continue;
4018 if (atsr->header.length != tmp->header.length)
4019 continue;
4020 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4021 return atsru;
4022 }
4023
4024 return NULL;
4025}
4026
4027int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4028{
4029 struct acpi_dmar_atsr *atsr;
4030 struct dmar_atsr_unit *atsru;
4031
6b197249
JL
4032 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4033 return 0;
4034
318fe7df 4035 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4036 atsru = dmar_find_atsr(atsr);
4037 if (atsru)
4038 return 0;
4039
4040 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4041 if (!atsru)
4042 return -ENOMEM;
4043
6b197249
JL
4044 /*
4045 * If memory is allocated from slab by ACPI _DSM method, we need to
4046 * copy the memory content because the memory buffer will be freed
4047 * on return.
4048 */
4049 atsru->hdr = (void *)(atsru + 1);
4050 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4051 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4052 if (!atsru->include_all) {
4053 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4054 (void *)atsr + atsr->header.length,
4055 &atsru->devices_cnt);
4056 if (atsru->devices_cnt && atsru->devices == NULL) {
4057 kfree(atsru);
4058 return -ENOMEM;
4059 }
4060 }
318fe7df 4061
0e242612 4062 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4063
4064 return 0;
4065}
4066
9bdc531e
JL
4067static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4068{
4069 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4070 kfree(atsru);
4071}
4072
6b197249
JL
4073int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4074{
4075 struct acpi_dmar_atsr *atsr;
4076 struct dmar_atsr_unit *atsru;
4077
4078 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4079 atsru = dmar_find_atsr(atsr);
4080 if (atsru) {
4081 list_del_rcu(&atsru->list);
4082 synchronize_rcu();
4083 intel_iommu_free_atsr(atsru);
4084 }
4085
4086 return 0;
4087}
4088
4089int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4090{
4091 int i;
4092 struct device *dev;
4093 struct acpi_dmar_atsr *atsr;
4094 struct dmar_atsr_unit *atsru;
4095
4096 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4097 atsru = dmar_find_atsr(atsr);
4098 if (!atsru)
4099 return 0;
4100
4101 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4102 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4103 i, dev)
4104 return -EBUSY;
4105
4106 return 0;
4107}
4108
ffebeb46
JL
4109static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4110{
4111 int sp, ret = 0;
4112 struct intel_iommu *iommu = dmaru->iommu;
4113
4114 if (g_iommus[iommu->seq_id])
4115 return 0;
4116
4117 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4118 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4119 iommu->name);
4120 return -ENXIO;
4121 }
4122 if (!ecap_sc_support(iommu->ecap) &&
4123 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4124 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4125 iommu->name);
4126 return -ENXIO;
4127 }
4128 sp = domain_update_iommu_superpage(iommu) - 1;
4129 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4130 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4131 iommu->name);
4132 return -ENXIO;
4133 }
4134
4135 /*
4136 * Disable translation if already enabled prior to OS handover.
4137 */
4138 if (iommu->gcmd & DMA_GCMD_TE)
4139 iommu_disable_translation(iommu);
4140
4141 g_iommus[iommu->seq_id] = iommu;
4142 ret = iommu_init_domains(iommu);
4143 if (ret == 0)
4144 ret = iommu_alloc_root_entry(iommu);
4145 if (ret)
4146 goto out;
4147
4148 if (dmaru->ignored) {
4149 /*
4150 * we always have to disable PMRs or DMA may fail on this device
4151 */
4152 if (force_on)
4153 iommu_disable_protect_mem_regions(iommu);
4154 return 0;
4155 }
4156
4157 intel_iommu_init_qi(iommu);
4158 iommu_flush_write_buffer(iommu);
4159 ret = dmar_set_interrupt(iommu);
4160 if (ret)
4161 goto disable_iommu;
4162
4163 iommu_set_root_entry(iommu);
4164 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4165 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4166 iommu_enable_translation(iommu);
4167
4168 if (si_domain) {
4169 ret = iommu_attach_domain(si_domain, iommu);
4170 if (ret < 0 || si_domain->id != ret)
4171 goto disable_iommu;
4172 domain_attach_iommu(si_domain, iommu);
4173 }
4174
4175 iommu_disable_protect_mem_regions(iommu);
4176 return 0;
4177
4178disable_iommu:
4179 disable_dmar_iommu(iommu);
4180out:
4181 free_dmar_iommu(iommu);
4182 return ret;
4183}
4184
6b197249
JL
4185int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4186{
ffebeb46
JL
4187 int ret = 0;
4188 struct intel_iommu *iommu = dmaru->iommu;
4189
4190 if (!intel_iommu_enabled)
4191 return 0;
4192 if (iommu == NULL)
4193 return -EINVAL;
4194
4195 if (insert) {
4196 ret = intel_iommu_add(dmaru);
4197 } else {
4198 disable_dmar_iommu(iommu);
4199 free_dmar_iommu(iommu);
4200 }
4201
4202 return ret;
6b197249
JL
4203}
4204
9bdc531e
JL
4205static void intel_iommu_free_dmars(void)
4206{
4207 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4208 struct dmar_atsr_unit *atsru, *atsr_n;
4209
4210 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4211 list_del(&rmrru->list);
4212 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4213 kfree(rmrru);
318fe7df
SS
4214 }
4215
9bdc531e
JL
4216 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4217 list_del(&atsru->list);
4218 intel_iommu_free_atsr(atsru);
4219 }
318fe7df
SS
4220}
4221
4222int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4223{
b683b230 4224 int i, ret = 1;
318fe7df 4225 struct pci_bus *bus;
832bd858
DW
4226 struct pci_dev *bridge = NULL;
4227 struct device *tmp;
318fe7df
SS
4228 struct acpi_dmar_atsr *atsr;
4229 struct dmar_atsr_unit *atsru;
4230
4231 dev = pci_physfn(dev);
318fe7df 4232 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4233 bridge = bus->self;
318fe7df 4234 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 4235 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4236 return 0;
b5f82ddf 4237 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4238 break;
318fe7df 4239 }
b5f82ddf
JL
4240 if (!bridge)
4241 return 0;
318fe7df 4242
0e242612 4243 rcu_read_lock();
b5f82ddf
JL
4244 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4245 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4246 if (atsr->segment != pci_domain_nr(dev->bus))
4247 continue;
4248
b683b230 4249 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4250 if (tmp == &bridge->dev)
b683b230 4251 goto out;
b5f82ddf
JL
4252
4253 if (atsru->include_all)
b683b230 4254 goto out;
b5f82ddf 4255 }
b683b230
JL
4256 ret = 0;
4257out:
0e242612 4258 rcu_read_unlock();
318fe7df 4259
b683b230 4260 return ret;
318fe7df
SS
4261}
4262
59ce0515
JL
4263int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4264{
4265 int ret = 0;
4266 struct dmar_rmrr_unit *rmrru;
4267 struct dmar_atsr_unit *atsru;
4268 struct acpi_dmar_atsr *atsr;
4269 struct acpi_dmar_reserved_memory *rmrr;
4270
4271 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4272 return 0;
4273
4274 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4275 rmrr = container_of(rmrru->hdr,
4276 struct acpi_dmar_reserved_memory, header);
4277 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4278 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4279 ((void *)rmrr) + rmrr->header.length,
4280 rmrr->segment, rmrru->devices,
4281 rmrru->devices_cnt);
27e24950 4282 if(ret < 0)
59ce0515
JL
4283 return ret;
4284 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
4285 dmar_remove_dev_scope(info, rmrr->segment,
4286 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4287 }
4288 }
4289
4290 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4291 if (atsru->include_all)
4292 continue;
4293
4294 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4295 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4296 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4297 (void *)atsr + atsr->header.length,
4298 atsr->segment, atsru->devices,
4299 atsru->devices_cnt);
4300 if (ret > 0)
4301 break;
4302 else if(ret < 0)
4303 return ret;
4304 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4305 if (dmar_remove_dev_scope(info, atsr->segment,
4306 atsru->devices, atsru->devices_cnt))
4307 break;
4308 }
4309 }
4310
4311 return 0;
4312}
4313
99dcaded
FY
4314/*
4315 * Here we only respond to action of unbound device from driver.
4316 *
4317 * Added device is not attached to its DMAR domain here yet. That will happen
4318 * when mapping the device to iova.
4319 */
4320static int device_notifier(struct notifier_block *nb,
4321 unsigned long action, void *data)
4322{
4323 struct device *dev = data;
99dcaded
FY
4324 struct dmar_domain *domain;
4325
3d89194a 4326 if (iommu_dummy(dev))
44cd613c
DW
4327 return 0;
4328
1196c2fb 4329 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4330 return 0;
4331
1525a29a 4332 domain = find_domain(dev);
99dcaded
FY
4333 if (!domain)
4334 return 0;
4335
3a5670e8 4336 down_read(&dmar_global_lock);
bf9c9eda 4337 domain_remove_one_dev_info(domain, dev);
ab8dfe25 4338 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4339 domain_exit(domain);
3a5670e8 4340 up_read(&dmar_global_lock);
a97590e5 4341
99dcaded
FY
4342 return 0;
4343}
4344
4345static struct notifier_block device_nb = {
4346 .notifier_call = device_notifier,
4347};
4348
75f05569
JL
4349static int intel_iommu_memory_notifier(struct notifier_block *nb,
4350 unsigned long val, void *v)
4351{
4352 struct memory_notify *mhp = v;
4353 unsigned long long start, end;
4354 unsigned long start_vpfn, last_vpfn;
4355
4356 switch (val) {
4357 case MEM_GOING_ONLINE:
4358 start = mhp->start_pfn << PAGE_SHIFT;
4359 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4360 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4361 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4362 start, end);
4363 return NOTIFY_BAD;
4364 }
4365 break;
4366
4367 case MEM_OFFLINE:
4368 case MEM_CANCEL_ONLINE:
4369 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4370 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4371 while (start_vpfn <= last_vpfn) {
4372 struct iova *iova;
4373 struct dmar_drhd_unit *drhd;
4374 struct intel_iommu *iommu;
ea8ea460 4375 struct page *freelist;
75f05569
JL
4376
4377 iova = find_iova(&si_domain->iovad, start_vpfn);
4378 if (iova == NULL) {
9f10e5bf 4379 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4380 start_vpfn);
4381 break;
4382 }
4383
4384 iova = split_and_remove_iova(&si_domain->iovad, iova,
4385 start_vpfn, last_vpfn);
4386 if (iova == NULL) {
9f10e5bf 4387 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4388 start_vpfn, last_vpfn);
4389 return NOTIFY_BAD;
4390 }
4391
ea8ea460
DW
4392 freelist = domain_unmap(si_domain, iova->pfn_lo,
4393 iova->pfn_hi);
4394
75f05569
JL
4395 rcu_read_lock();
4396 for_each_active_iommu(iommu, drhd)
4397 iommu_flush_iotlb_psi(iommu, si_domain->id,
a156ef99 4398 iova->pfn_lo, iova_size(iova),
ea8ea460 4399 !freelist, 0);
75f05569 4400 rcu_read_unlock();
ea8ea460 4401 dma_free_pagelist(freelist);
75f05569
JL
4402
4403 start_vpfn = iova->pfn_hi + 1;
4404 free_iova_mem(iova);
4405 }
4406 break;
4407 }
4408
4409 return NOTIFY_OK;
4410}
4411
4412static struct notifier_block intel_iommu_memory_nb = {
4413 .notifier_call = intel_iommu_memory_notifier,
4414 .priority = 0
4415};
4416
a5459cfe
AW
4417
4418static ssize_t intel_iommu_show_version(struct device *dev,
4419 struct device_attribute *attr,
4420 char *buf)
4421{
4422 struct intel_iommu *iommu = dev_get_drvdata(dev);
4423 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4424 return sprintf(buf, "%d:%d\n",
4425 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4426}
4427static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4428
4429static ssize_t intel_iommu_show_address(struct device *dev,
4430 struct device_attribute *attr,
4431 char *buf)
4432{
4433 struct intel_iommu *iommu = dev_get_drvdata(dev);
4434 return sprintf(buf, "%llx\n", iommu->reg_phys);
4435}
4436static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4437
4438static ssize_t intel_iommu_show_cap(struct device *dev,
4439 struct device_attribute *attr,
4440 char *buf)
4441{
4442 struct intel_iommu *iommu = dev_get_drvdata(dev);
4443 return sprintf(buf, "%llx\n", iommu->cap);
4444}
4445static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4446
4447static ssize_t intel_iommu_show_ecap(struct device *dev,
4448 struct device_attribute *attr,
4449 char *buf)
4450{
4451 struct intel_iommu *iommu = dev_get_drvdata(dev);
4452 return sprintf(buf, "%llx\n", iommu->ecap);
4453}
4454static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4455
2238c082
AW
4456static ssize_t intel_iommu_show_ndoms(struct device *dev,
4457 struct device_attribute *attr,
4458 char *buf)
4459{
4460 struct intel_iommu *iommu = dev_get_drvdata(dev);
4461 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4462}
4463static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4464
4465static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4466 struct device_attribute *attr,
4467 char *buf)
4468{
4469 struct intel_iommu *iommu = dev_get_drvdata(dev);
4470 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4471 cap_ndoms(iommu->cap)));
4472}
4473static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4474
a5459cfe
AW
4475static struct attribute *intel_iommu_attrs[] = {
4476 &dev_attr_version.attr,
4477 &dev_attr_address.attr,
4478 &dev_attr_cap.attr,
4479 &dev_attr_ecap.attr,
2238c082
AW
4480 &dev_attr_domains_supported.attr,
4481 &dev_attr_domains_used.attr,
a5459cfe
AW
4482 NULL,
4483};
4484
4485static struct attribute_group intel_iommu_group = {
4486 .name = "intel-iommu",
4487 .attrs = intel_iommu_attrs,
4488};
4489
4490const struct attribute_group *intel_iommu_groups[] = {
4491 &intel_iommu_group,
4492 NULL,
4493};
4494
ba395927
KA
4495int __init intel_iommu_init(void)
4496{
9bdc531e 4497 int ret = -ENODEV;
3a93c841 4498 struct dmar_drhd_unit *drhd;
7c919779 4499 struct intel_iommu *iommu;
ba395927 4500
a59b50e9
JC
4501 /* VT-d is required for a TXT/tboot launch, so enforce that */
4502 force_on = tboot_force_iommu();
4503
3a5670e8
JL
4504 if (iommu_init_mempool()) {
4505 if (force_on)
4506 panic("tboot: Failed to initialize iommu memory\n");
4507 return -ENOMEM;
4508 }
4509
4510 down_write(&dmar_global_lock);
a59b50e9
JC
4511 if (dmar_table_init()) {
4512 if (force_on)
4513 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4514 goto out_free_dmar;
a59b50e9 4515 }
ba395927 4516
c2c7286a 4517 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4518 if (force_on)
4519 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4520 goto out_free_dmar;
a59b50e9 4521 }
1886e8a9 4522
75f1cdf1 4523 if (no_iommu || dmar_disabled)
9bdc531e 4524 goto out_free_dmar;
2ae21010 4525
318fe7df 4526 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4527 pr_info("No RMRR found\n");
318fe7df
SS
4528
4529 if (list_empty(&dmar_atsr_units))
9f10e5bf 4530 pr_info("No ATSR found\n");
318fe7df 4531
51a63e67
JC
4532 if (dmar_init_reserved_ranges()) {
4533 if (force_on)
4534 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4535 goto out_free_reserved_range;
51a63e67 4536 }
ba395927
KA
4537
4538 init_no_remapping_devices();
4539
b779260b 4540 ret = init_dmars();
ba395927 4541 if (ret) {
a59b50e9
JC
4542 if (force_on)
4543 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4544 pr_err("Initialization failed\n");
9bdc531e 4545 goto out_free_reserved_range;
ba395927 4546 }
3a5670e8 4547 up_write(&dmar_global_lock);
9f10e5bf 4548 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4549
5e0d2a6f 4550 init_timer(&unmap_timer);
75f1cdf1
FT
4551#ifdef CONFIG_SWIOTLB
4552 swiotlb = 0;
4553#endif
19943b0e 4554 dma_ops = &intel_dma_ops;
4ed0d3e6 4555
134fac3f 4556 init_iommu_pm_ops();
a8bcbb0d 4557
a5459cfe
AW
4558 for_each_active_iommu(iommu, drhd)
4559 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4560 intel_iommu_groups,
2439d4aa 4561 "%s", iommu->name);
a5459cfe 4562
4236d97d 4563 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4564 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4565 if (si_domain && !hw_pass_through)
4566 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4567
8bc1f85c
ED
4568 intel_iommu_enabled = 1;
4569
ba395927 4570 return 0;
9bdc531e
JL
4571
4572out_free_reserved_range:
4573 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4574out_free_dmar:
4575 intel_iommu_free_dmars();
3a5670e8
JL
4576 up_write(&dmar_global_lock);
4577 iommu_exit_mempool();
9bdc531e 4578 return ret;
ba395927 4579}
e820482c 4580
579305f7
AW
4581static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4582{
4583 struct intel_iommu *iommu = opaque;
4584
4585 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4586 return 0;
4587}
4588
4589/*
4590 * NB - intel-iommu lacks any sort of reference counting for the users of
4591 * dependent devices. If multiple endpoints have intersecting dependent
4592 * devices, unbinding the driver from any one of them will possibly leave
4593 * the others unable to operate.
4594 */
3199aa6b 4595static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4596 struct device *dev)
3199aa6b 4597{
0bcb3e28 4598 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4599 return;
4600
579305f7 4601 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
3199aa6b
HW
4602}
4603
2c2e2c38 4604static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4605 struct device *dev)
c7151a8d 4606{
bca2b916 4607 struct device_domain_info *info, *tmp;
c7151a8d
WH
4608 struct intel_iommu *iommu;
4609 unsigned long flags;
2f119c78 4610 bool found = false;
156baca8 4611 u8 bus, devfn;
c7151a8d 4612
bf9c9eda 4613 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4614 if (!iommu)
4615 return;
4616
4617 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4618 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4619 if (info->iommu == iommu && info->bus == bus &&
4620 info->devfn == devfn) {
109b9b04 4621 unlink_domain_info(info);
c7151a8d
WH
4622 spin_unlock_irqrestore(&device_domain_lock, flags);
4623
93a23a72 4624 iommu_disable_dev_iotlb(info);
c7151a8d 4625 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4626 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4627 free_devinfo_mem(info);
4628
4629 spin_lock_irqsave(&device_domain_lock, flags);
4630
4631 if (found)
4632 break;
4633 else
4634 continue;
4635 }
4636
4637 /* if there is no other devices under the same iommu
4638 * owned by this domain, clear this iommu in iommu_bmp
4639 * update iommu count and coherency
4640 */
8bbc4410 4641 if (info->iommu == iommu)
2f119c78 4642 found = true;
c7151a8d
WH
4643 }
4644
3e7abe25
RD
4645 spin_unlock_irqrestore(&device_domain_lock, flags);
4646
c7151a8d 4647 if (found == 0) {
fb170fb4
JL
4648 domain_detach_iommu(domain, iommu);
4649 if (!domain_type_is_vm_or_si(domain))
4650 iommu_detach_domain(domain, iommu);
c7151a8d 4651 }
c7151a8d
WH
4652}
4653
2c2e2c38 4654static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4655{
4656 int adjust_width;
4657
0fb5fe87
RM
4658 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4659 DMA_32BIT_PFN);
5e98c4b1
WH
4660 domain_reserve_special_ranges(domain);
4661
4662 /* calculate AGAW */
4663 domain->gaw = guest_width;
4664 adjust_width = guestwidth_to_adjustwidth(guest_width);
4665 domain->agaw = width_to_agaw(adjust_width);
4666
5e98c4b1 4667 domain->iommu_coherency = 0;
c5b15255 4668 domain->iommu_snooping = 0;
6dd9a7c7 4669 domain->iommu_superpage = 0;
fe40f1e0 4670 domain->max_addr = 0;
5e98c4b1
WH
4671
4672 /* always allocate the top pgd */
4c923d47 4673 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4674 if (!domain->pgd)
4675 return -ENOMEM;
4676 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4677 return 0;
4678}
4679
00a77deb 4680static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4681{
5d450806 4682 struct dmar_domain *dmar_domain;
00a77deb
JR
4683 struct iommu_domain *domain;
4684
4685 if (type != IOMMU_DOMAIN_UNMANAGED)
4686 return NULL;
38717946 4687
ab8dfe25 4688 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4689 if (!dmar_domain) {
9f10e5bf 4690 pr_err("Can't allocate dmar_domain\n");
00a77deb 4691 return NULL;
38717946 4692 }
2c2e2c38 4693 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4694 pr_err("Domain initialization failed\n");
92d03cc8 4695 domain_exit(dmar_domain);
00a77deb 4696 return NULL;
38717946 4697 }
8140a95d 4698 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4699
00a77deb 4700 domain = &dmar_domain->domain;
8a0e715b
JR
4701 domain->geometry.aperture_start = 0;
4702 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4703 domain->geometry.force_aperture = true;
4704
00a77deb 4705 return domain;
38717946 4706}
38717946 4707
00a77deb 4708static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4709{
00a77deb 4710 domain_exit(to_dmar_domain(domain));
38717946 4711}
38717946 4712
4c5478c9
JR
4713static int intel_iommu_attach_device(struct iommu_domain *domain,
4714 struct device *dev)
38717946 4715{
00a77deb 4716 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4717 struct intel_iommu *iommu;
4718 int addr_width;
156baca8 4719 u8 bus, devfn;
faa3d6f5 4720
c875d2c1
AW
4721 if (device_is_rmrr_locked(dev)) {
4722 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4723 return -EPERM;
4724 }
4725
7207d8f9
DW
4726 /* normally dev is not mapped */
4727 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4728 struct dmar_domain *old_domain;
4729
1525a29a 4730 old_domain = find_domain(dev);
faa3d6f5 4731 if (old_domain) {
ab8dfe25 4732 if (domain_type_is_vm_or_si(dmar_domain))
bf9c9eda 4733 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4734 else
4735 domain_remove_dev_info(old_domain);
62c22167
JR
4736
4737 if (!domain_type_is_vm_or_si(old_domain) &&
4738 list_empty(&old_domain->devices))
4739 domain_exit(old_domain);
faa3d6f5
WH
4740 }
4741 }
4742
156baca8 4743 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4744 if (!iommu)
4745 return -ENODEV;
4746
4747 /* check if this iommu agaw is sufficient for max mapped address */
4748 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4749 if (addr_width > cap_mgaw(iommu->cap))
4750 addr_width = cap_mgaw(iommu->cap);
4751
4752 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 4753 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4754 "sufficient for the mapped address (%llx)\n",
a99c47a2 4755 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4756 return -EFAULT;
4757 }
a99c47a2
TL
4758 dmar_domain->gaw = addr_width;
4759
4760 /*
4761 * Knock out extra levels of page tables if necessary
4762 */
4763 while (iommu->agaw < dmar_domain->agaw) {
4764 struct dma_pte *pte;
4765
4766 pte = dmar_domain->pgd;
4767 if (dma_pte_present(pte)) {
25cbff16
SY
4768 dmar_domain->pgd = (struct dma_pte *)
4769 phys_to_virt(dma_pte_addr(pte));
7a661013 4770 free_pgtable_page(pte);
a99c47a2
TL
4771 }
4772 dmar_domain->agaw--;
4773 }
fe40f1e0 4774
5913c9bf 4775 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4776}
38717946 4777
4c5478c9
JR
4778static void intel_iommu_detach_device(struct iommu_domain *domain,
4779 struct device *dev)
38717946 4780{
00a77deb 4781 domain_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 4782}
c7151a8d 4783
b146a1c9
JR
4784static int intel_iommu_map(struct iommu_domain *domain,
4785 unsigned long iova, phys_addr_t hpa,
5009065d 4786 size_t size, int iommu_prot)
faa3d6f5 4787{
00a77deb 4788 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 4789 u64 max_addr;
dde57a21 4790 int prot = 0;
faa3d6f5 4791 int ret;
fe40f1e0 4792
dde57a21
JR
4793 if (iommu_prot & IOMMU_READ)
4794 prot |= DMA_PTE_READ;
4795 if (iommu_prot & IOMMU_WRITE)
4796 prot |= DMA_PTE_WRITE;
9cf06697
SY
4797 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4798 prot |= DMA_PTE_SNP;
dde57a21 4799
163cc52c 4800 max_addr = iova + size;
dde57a21 4801 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4802 u64 end;
4803
4804 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4805 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4806 if (end < max_addr) {
9f10e5bf 4807 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4808 "sufficient for the mapped address (%llx)\n",
8954da1f 4809 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4810 return -EFAULT;
4811 }
dde57a21 4812 dmar_domain->max_addr = max_addr;
fe40f1e0 4813 }
ad051221
DW
4814 /* Round up size to next multiple of PAGE_SIZE, if it and
4815 the low bits of hpa would take us onto the next page */
88cb6a74 4816 size = aligned_nrpages(hpa, size);
ad051221
DW
4817 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4818 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4819 return ret;
38717946 4820}
38717946 4821
5009065d 4822static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4823 unsigned long iova, size_t size)
38717946 4824{
00a77deb 4825 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
4826 struct page *freelist = NULL;
4827 struct intel_iommu *iommu;
4828 unsigned long start_pfn, last_pfn;
4829 unsigned int npages;
4830 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4831
4832 /* Cope with horrid API which requires us to unmap more than the
4833 size argument if it happens to be a large-page mapping. */
4834 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4835 BUG();
4836
4837 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4838 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4839
ea8ea460
DW
4840 start_pfn = iova >> VTD_PAGE_SHIFT;
4841 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4842
4843 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4844
4845 npages = last_pfn - start_pfn + 1;
4846
4847 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4848 iommu = g_iommus[iommu_id];
4849
4850 /*
4851 * find bit position of dmar_domain
4852 */
4853 ndomains = cap_ndoms(iommu->cap);
4854 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4855 if (iommu->domains[num] == dmar_domain)
4856 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4857 npages, !freelist, 0);
4858 }
4859
4860 }
4861
4862 dma_free_pagelist(freelist);
fe40f1e0 4863
163cc52c
DW
4864 if (dmar_domain->max_addr == iova + size)
4865 dmar_domain->max_addr = iova;
b146a1c9 4866
5cf0a76f 4867 return size;
38717946 4868}
38717946 4869
d14d6577 4870static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4871 dma_addr_t iova)
38717946 4872{
00a77deb 4873 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 4874 struct dma_pte *pte;
5cf0a76f 4875 int level = 0;
faa3d6f5 4876 u64 phys = 0;
38717946 4877
5cf0a76f 4878 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4879 if (pte)
faa3d6f5 4880 phys = dma_pte_addr(pte);
38717946 4881
faa3d6f5 4882 return phys;
38717946 4883}
a8bcbb0d 4884
5d587b8d 4885static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 4886{
dbb9fd86 4887 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 4888 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 4889 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 4890 return irq_remapping_enabled == 1;
dbb9fd86 4891
5d587b8d 4892 return false;
dbb9fd86
SY
4893}
4894
abdfdde2
AW
4895static int intel_iommu_add_device(struct device *dev)
4896{
a5459cfe 4897 struct intel_iommu *iommu;
abdfdde2 4898 struct iommu_group *group;
156baca8 4899 u8 bus, devfn;
70ae6f0d 4900
a5459cfe
AW
4901 iommu = device_to_iommu(dev, &bus, &devfn);
4902 if (!iommu)
70ae6f0d
AW
4903 return -ENODEV;
4904
a5459cfe 4905 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4906
e17f9ff4 4907 group = iommu_group_get_for_dev(dev);
783f157b 4908
e17f9ff4
AW
4909 if (IS_ERR(group))
4910 return PTR_ERR(group);
bcb71abe 4911
abdfdde2 4912 iommu_group_put(group);
e17f9ff4 4913 return 0;
abdfdde2 4914}
70ae6f0d 4915
abdfdde2
AW
4916static void intel_iommu_remove_device(struct device *dev)
4917{
a5459cfe
AW
4918 struct intel_iommu *iommu;
4919 u8 bus, devfn;
4920
4921 iommu = device_to_iommu(dev, &bus, &devfn);
4922 if (!iommu)
4923 return;
4924
abdfdde2 4925 iommu_group_remove_device(dev);
a5459cfe
AW
4926
4927 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4928}
4929
b22f6434 4930static const struct iommu_ops intel_iommu_ops = {
5d587b8d 4931 .capable = intel_iommu_capable,
00a77deb
JR
4932 .domain_alloc = intel_iommu_domain_alloc,
4933 .domain_free = intel_iommu_domain_free,
a8bcbb0d
JR
4934 .attach_dev = intel_iommu_attach_device,
4935 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4936 .map = intel_iommu_map,
4937 .unmap = intel_iommu_unmap,
315786eb 4938 .map_sg = default_iommu_map_sg,
a8bcbb0d 4939 .iova_to_phys = intel_iommu_iova_to_phys,
abdfdde2
AW
4940 .add_device = intel_iommu_add_device,
4941 .remove_device = intel_iommu_remove_device,
6d1c56a9 4942 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4943};
9af88143 4944
9452618e
DV
4945static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4946{
4947 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 4948 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
4949 dmar_map_gfx = 0;
4950}
4951
4952DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4953DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4954DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4955DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4956DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4957DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4958DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4959
d34d6517 4960static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4961{
4962 /*
4963 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4964 * but needs it. Same seems to hold for the desktop versions.
9af88143 4965 */
9f10e5bf 4966 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
4967 rwbf_quirk = 1;
4968}
4969
4970DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4971DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4972DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4973DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4974DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4975DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4976DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4977
eecfd57f
AJ
4978#define GGC 0x52
4979#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4980#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4981#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4982#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4983#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4984#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4985#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4986#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4987
d34d6517 4988static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4989{
4990 unsigned short ggc;
4991
eecfd57f 4992 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4993 return;
4994
eecfd57f 4995 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 4996 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 4997 dmar_map_gfx = 0;
6fbcfb3e
DW
4998 } else if (dmar_map_gfx) {
4999 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5000 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5001 intel_iommu_strict = 1;
5002 }
9eecabcb
DW
5003}
5004DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5005DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5006DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5007DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5008
e0fc7e0b
DW
5009/* On Tylersburg chipsets, some BIOSes have been known to enable the
5010 ISOCH DMAR unit for the Azalia sound device, but not give it any
5011 TLB entries, which causes it to deadlock. Check for that. We do
5012 this in a function called from init_dmars(), instead of in a PCI
5013 quirk, because we don't want to print the obnoxious "BIOS broken"
5014 message if VT-d is actually disabled.
5015*/
5016static void __init check_tylersburg_isoch(void)
5017{
5018 struct pci_dev *pdev;
5019 uint32_t vtisochctrl;
5020
5021 /* If there's no Azalia in the system anyway, forget it. */
5022 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5023 if (!pdev)
5024 return;
5025 pci_dev_put(pdev);
5026
5027 /* System Management Registers. Might be hidden, in which case
5028 we can't do the sanity check. But that's OK, because the
5029 known-broken BIOSes _don't_ actually hide it, so far. */
5030 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5031 if (!pdev)
5032 return;
5033
5034 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5035 pci_dev_put(pdev);
5036 return;
5037 }
5038
5039 pci_dev_put(pdev);
5040
5041 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5042 if (vtisochctrl & 1)
5043 return;
5044
5045 /* Drop all bits other than the number of TLB entries */
5046 vtisochctrl &= 0x1c;
5047
5048 /* If we have the recommended number of TLB entries (16), fine. */
5049 if (vtisochctrl == 0x10)
5050 return;
5051
5052 /* Zero TLB entries? You get to ride the short bus to school. */
5053 if (!vtisochctrl) {
5054 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5055 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5056 dmi_get_system_info(DMI_BIOS_VENDOR),
5057 dmi_get_system_info(DMI_BIOS_VERSION),
5058 dmi_get_system_info(DMI_PRODUCT_VERSION));
5059 iommu_identity_mapping |= IDENTMAP_AZALIA;
5060 return;
5061 }
9f10e5bf
JR
5062
5063 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5064 vtisochctrl);
5065}