]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame_incremental - drivers/iommu/intel-iommu.c
iommu/vt-d: Keep track of per-iommu domain ids
[mirror_ubuntu-artful-kernel.git] / drivers / iommu / intel-iommu.c
... / ...
CommitLineData
1/*
2 * Copyright © 2006-2014 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
18 * Joerg Roedel <jroedel@suse.de>
19 */
20
21#define pr_fmt(fmt) "DMAR: " fmt
22
23#include <linux/init.h>
24#include <linux/bitmap.h>
25#include <linux/debugfs.h>
26#include <linux/export.h>
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
35#include <linux/memory.h>
36#include <linux/timer.h>
37#include <linux/iova.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/syscore_ops.h>
41#include <linux/tboot.h>
42#include <linux/dmi.h>
43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
45#include <linux/dma-contiguous.h>
46#include <linux/crash_dump.h>
47#include <asm/irq_remapping.h>
48#include <asm/cacheflush.h>
49#include <asm/iommu.h>
50
51#include "irq_remapping.h"
52
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
66
67#define MAX_AGAW_WIDTH 64
68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
83#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
84#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
85
86/* page table handling */
87#define LEVEL_STRIDE (9)
88#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89
90/*
91 * This bitmap is used to advertise the page sizes our hardware support
92 * to the IOMMU core, which will then use this information to split
93 * physically contiguous memory regions it is mapping into page sizes
94 * that we support.
95 *
96 * Traditionally the IOMMU core just handed us the mappings directly,
97 * after making sure the size is an order of a 4KiB page and that the
98 * mapping has natural alignment.
99 *
100 * To retain this behavior, we currently advertise that we support
101 * all page sizes that are an order of 4KiB.
102 *
103 * If at some point we'd like to utilize the IOMMU core's new behavior,
104 * we could change this to advertise the real page sizes we support.
105 */
106#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
107
108static inline int agaw_to_level(int agaw)
109{
110 return agaw + 2;
111}
112
113static inline int agaw_to_width(int agaw)
114{
115 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
116}
117
118static inline int width_to_agaw(int width)
119{
120 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
121}
122
123static inline unsigned int level_to_offset_bits(int level)
124{
125 return (level - 1) * LEVEL_STRIDE;
126}
127
128static inline int pfn_level_offset(unsigned long pfn, int level)
129{
130 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131}
132
133static inline unsigned long level_mask(int level)
134{
135 return -1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long level_size(int level)
139{
140 return 1UL << level_to_offset_bits(level);
141}
142
143static inline unsigned long align_to_level(unsigned long pfn, int level)
144{
145 return (pfn + level_size(level) - 1) & level_mask(level);
146}
147
148static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149{
150 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
151}
152
153/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154 are never going to work. */
155static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156{
157 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159
160static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161{
162 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163}
164static inline unsigned long page_to_dma_pfn(struct page *pg)
165{
166 return mm_to_dma_pfn(page_to_pfn(pg));
167}
168static inline unsigned long virt_to_dma_pfn(void *p)
169{
170 return page_to_dma_pfn(virt_to_page(p));
171}
172
173/* global iommu list, set NULL for ignored DMAR units */
174static struct intel_iommu **g_iommus;
175
176static void __init check_tylersburg_isoch(void);
177static int rwbf_quirk;
178
179/*
180 * set to 1 to panic kernel if can't successfully enable VT-d
181 * (used when kernel is launched w/ TXT)
182 */
183static int force_on = 0;
184
185/*
186 * 0: Present
187 * 1-11: Reserved
188 * 12-63: Context Ptr (12 - (haw-1))
189 * 64-127: Reserved
190 */
191struct root_entry {
192 u64 lo;
193 u64 hi;
194};
195#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
196
197/*
198 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_lctp(struct root_entry *re)
202{
203 if (!(re->lo & 1))
204 return 0;
205
206 return re->lo & VTD_PAGE_MASK;
207}
208
209/*
210 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211 * if marked present.
212 */
213static phys_addr_t root_entry_uctp(struct root_entry *re)
214{
215 if (!(re->hi & 1))
216 return 0;
217
218 return re->hi & VTD_PAGE_MASK;
219}
220/*
221 * low 64 bits:
222 * 0: present
223 * 1: fault processing disable
224 * 2-3: translation type
225 * 12-63: address space root
226 * high 64 bits:
227 * 0-2: address width
228 * 3-6: aval
229 * 8-23: domain id
230 */
231struct context_entry {
232 u64 lo;
233 u64 hi;
234};
235
236static inline void context_clear_pasid_enable(struct context_entry *context)
237{
238 context->lo &= ~(1ULL << 11);
239}
240
241static inline bool context_pasid_enabled(struct context_entry *context)
242{
243 return !!(context->lo & (1ULL << 11));
244}
245
246static inline void context_set_copied(struct context_entry *context)
247{
248 context->hi |= (1ull << 3);
249}
250
251static inline bool context_copied(struct context_entry *context)
252{
253 return !!(context->hi & (1ULL << 3));
254}
255
256static inline bool __context_present(struct context_entry *context)
257{
258 return (context->lo & 1);
259}
260
261static inline bool context_present(struct context_entry *context)
262{
263 return context_pasid_enabled(context) ?
264 __context_present(context) :
265 __context_present(context) && !context_copied(context);
266}
267
268static inline void context_set_present(struct context_entry *context)
269{
270 context->lo |= 1;
271}
272
273static inline void context_set_fault_enable(struct context_entry *context)
274{
275 context->lo &= (((u64)-1) << 2) | 1;
276}
277
278static inline void context_set_translation_type(struct context_entry *context,
279 unsigned long value)
280{
281 context->lo &= (((u64)-1) << 4) | 3;
282 context->lo |= (value & 3) << 2;
283}
284
285static inline void context_set_address_root(struct context_entry *context,
286 unsigned long value)
287{
288 context->lo &= ~VTD_PAGE_MASK;
289 context->lo |= value & VTD_PAGE_MASK;
290}
291
292static inline void context_set_address_width(struct context_entry *context,
293 unsigned long value)
294{
295 context->hi |= value & 7;
296}
297
298static inline void context_set_domain_id(struct context_entry *context,
299 unsigned long value)
300{
301 context->hi |= (value & ((1 << 16) - 1)) << 8;
302}
303
304static inline int context_domain_id(struct context_entry *c)
305{
306 return((c->hi >> 8) & 0xffff);
307}
308
309static inline void context_clear_entry(struct context_entry *context)
310{
311 context->lo = 0;
312 context->hi = 0;
313}
314
315/*
316 * 0: readable
317 * 1: writable
318 * 2-6: reserved
319 * 7: super page
320 * 8-10: available
321 * 11: snoop behavior
322 * 12-63: Host physcial address
323 */
324struct dma_pte {
325 u64 val;
326};
327
328static inline void dma_clear_pte(struct dma_pte *pte)
329{
330 pte->val = 0;
331}
332
333static inline u64 dma_pte_addr(struct dma_pte *pte)
334{
335#ifdef CONFIG_64BIT
336 return pte->val & VTD_PAGE_MASK;
337#else
338 /* Must have a full atomic 64-bit read */
339 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
340#endif
341}
342
343static inline bool dma_pte_present(struct dma_pte *pte)
344{
345 return (pte->val & 3) != 0;
346}
347
348static inline bool dma_pte_superpage(struct dma_pte *pte)
349{
350 return (pte->val & DMA_PTE_LARGE_PAGE);
351}
352
353static inline int first_pte_in_page(struct dma_pte *pte)
354{
355 return !((unsigned long)pte & ~VTD_PAGE_MASK);
356}
357
358/*
359 * This domain is a statically identity mapping domain.
360 * 1. This domain creats a static 1:1 mapping to all usable memory.
361 * 2. It maps to each iommu if successful.
362 * 3. Each iommu mapps to this domain if successful.
363 */
364static struct dmar_domain *si_domain;
365static int hw_pass_through = 1;
366
367/* domain represents a virtual machine, more than one devices
368 * across iommus may be owned in one domain, e.g. kvm guest.
369 */
370#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
371
372/* si_domain contains mulitple devices */
373#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
374
375struct dmar_domain {
376 int id; /* domain id */
377 int nid; /* node id */
378 DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
379 /* bitmap of iommus this domain uses*/
380
381 u16 iommu_did[DMAR_UNITS_SUPPORTED];
382 /* Domain ids per IOMMU. Use u16 since
383 * domain ids are 16 bit wide according
384 * to VT-d spec, section 9.3 */
385
386 struct list_head devices; /* all devices' list */
387 struct iova_domain iovad; /* iova's that belong to this domain */
388
389 struct dma_pte *pgd; /* virtual address */
390 int gaw; /* max guest address width */
391
392 /* adjusted guest address width, 0 is level 2 30-bit */
393 int agaw;
394
395 int flags; /* flags to find out type of domain */
396
397 int iommu_coherency;/* indicate coherency of iommu access */
398 int iommu_snooping; /* indicate snooping control feature*/
399 int iommu_count; /* reference count of iommu */
400 int iommu_superpage;/* Level of superpages supported:
401 0 == 4KiB (no superpages), 1 == 2MiB,
402 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
403 spinlock_t iommu_lock; /* protect iommu set in domain */
404 u64 max_addr; /* maximum mapped address */
405
406 struct iommu_domain domain; /* generic domain data structure for
407 iommu core */
408};
409
410/* PCI domain-device relationship */
411struct device_domain_info {
412 struct list_head link; /* link to domain siblings */
413 struct list_head global; /* link to global list */
414 u8 bus; /* PCI bus number */
415 u8 devfn; /* PCI devfn number */
416 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
417 struct intel_iommu *iommu; /* IOMMU used by this device */
418 struct dmar_domain *domain; /* pointer to domain */
419};
420
421struct dmar_rmrr_unit {
422 struct list_head list; /* list of rmrr units */
423 struct acpi_dmar_header *hdr; /* ACPI header */
424 u64 base_address; /* reserved base address*/
425 u64 end_address; /* reserved end address */
426 struct dmar_dev_scope *devices; /* target devices */
427 int devices_cnt; /* target device count */
428};
429
430struct dmar_atsr_unit {
431 struct list_head list; /* list of ATSR units */
432 struct acpi_dmar_header *hdr; /* ACPI header */
433 struct dmar_dev_scope *devices; /* target devices */
434 int devices_cnt; /* target device count */
435 u8 include_all:1; /* include all ports */
436};
437
438static LIST_HEAD(dmar_atsr_units);
439static LIST_HEAD(dmar_rmrr_units);
440
441#define for_each_rmrr_units(rmrr) \
442 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
443
444static void flush_unmaps_timeout(unsigned long data);
445
446static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
447
448#define HIGH_WATER_MARK 250
449struct deferred_flush_tables {
450 int next;
451 struct iova *iova[HIGH_WATER_MARK];
452 struct dmar_domain *domain[HIGH_WATER_MARK];
453 struct page *freelist[HIGH_WATER_MARK];
454};
455
456static struct deferred_flush_tables *deferred_flush;
457
458/* bitmap for indexing intel_iommus */
459static int g_num_of_iommus;
460
461static DEFINE_SPINLOCK(async_umap_flush_lock);
462static LIST_HEAD(unmaps_to_do);
463
464static int timer_on;
465static long list_size;
466
467static void domain_exit(struct dmar_domain *domain);
468static void domain_remove_dev_info(struct dmar_domain *domain);
469static void domain_remove_one_dev_info(struct dmar_domain *domain,
470 struct device *dev);
471static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
472 struct device *dev);
473static int domain_detach_iommu(struct dmar_domain *domain,
474 struct intel_iommu *iommu);
475
476#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
477int dmar_disabled = 0;
478#else
479int dmar_disabled = 1;
480#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
481
482int intel_iommu_enabled = 0;
483EXPORT_SYMBOL_GPL(intel_iommu_enabled);
484
485static int dmar_map_gfx = 1;
486static int dmar_forcedac;
487static int intel_iommu_strict;
488static int intel_iommu_superpage = 1;
489static int intel_iommu_ecs = 1;
490
491/* We only actually use ECS when PASID support (on the new bit 40)
492 * is also advertised. Some early implementations — the ones with
493 * PASID support on bit 28 — have issues even when we *only* use
494 * extended root/context tables. */
495#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
496 ecap_pasid(iommu->ecap))
497
498int intel_iommu_gfx_mapped;
499EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
500
501#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
502static DEFINE_SPINLOCK(device_domain_lock);
503static LIST_HEAD(device_domain_list);
504
505static const struct iommu_ops intel_iommu_ops;
506
507static bool translation_pre_enabled(struct intel_iommu *iommu)
508{
509 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
510}
511
512static void clear_translation_pre_enabled(struct intel_iommu *iommu)
513{
514 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
515}
516
517static void init_translation_status(struct intel_iommu *iommu)
518{
519 u32 gsts;
520
521 gsts = readl(iommu->reg + DMAR_GSTS_REG);
522 if (gsts & DMA_GSTS_TES)
523 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
524}
525
526/* Convert generic 'struct iommu_domain to private struct dmar_domain */
527static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
528{
529 return container_of(dom, struct dmar_domain, domain);
530}
531
532static int __init intel_iommu_setup(char *str)
533{
534 if (!str)
535 return -EINVAL;
536 while (*str) {
537 if (!strncmp(str, "on", 2)) {
538 dmar_disabled = 0;
539 pr_info("IOMMU enabled\n");
540 } else if (!strncmp(str, "off", 3)) {
541 dmar_disabled = 1;
542 pr_info("IOMMU disabled\n");
543 } else if (!strncmp(str, "igfx_off", 8)) {
544 dmar_map_gfx = 0;
545 pr_info("Disable GFX device mapping\n");
546 } else if (!strncmp(str, "forcedac", 8)) {
547 pr_info("Forcing DAC for PCI devices\n");
548 dmar_forcedac = 1;
549 } else if (!strncmp(str, "strict", 6)) {
550 pr_info("Disable batched IOTLB flush\n");
551 intel_iommu_strict = 1;
552 } else if (!strncmp(str, "sp_off", 6)) {
553 pr_info("Disable supported super page\n");
554 intel_iommu_superpage = 0;
555 } else if (!strncmp(str, "ecs_off", 7)) {
556 printk(KERN_INFO
557 "Intel-IOMMU: disable extended context table support\n");
558 intel_iommu_ecs = 0;
559 }
560
561 str += strcspn(str, ",");
562 while (*str == ',')
563 str++;
564 }
565 return 0;
566}
567__setup("intel_iommu=", intel_iommu_setup);
568
569static struct kmem_cache *iommu_domain_cache;
570static struct kmem_cache *iommu_devinfo_cache;
571
572static inline void *alloc_pgtable_page(int node)
573{
574 struct page *page;
575 void *vaddr = NULL;
576
577 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
578 if (page)
579 vaddr = page_address(page);
580 return vaddr;
581}
582
583static inline void free_pgtable_page(void *vaddr)
584{
585 free_page((unsigned long)vaddr);
586}
587
588static inline void *alloc_domain_mem(void)
589{
590 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
591}
592
593static void free_domain_mem(void *vaddr)
594{
595 kmem_cache_free(iommu_domain_cache, vaddr);
596}
597
598static inline void * alloc_devinfo_mem(void)
599{
600 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
601}
602
603static inline void free_devinfo_mem(void *vaddr)
604{
605 kmem_cache_free(iommu_devinfo_cache, vaddr);
606}
607
608static inline int domain_type_is_vm(struct dmar_domain *domain)
609{
610 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
611}
612
613static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
614{
615 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
616 DOMAIN_FLAG_STATIC_IDENTITY);
617}
618
619static inline int domain_pfn_supported(struct dmar_domain *domain,
620 unsigned long pfn)
621{
622 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
623
624 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
625}
626
627static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
628{
629 unsigned long sagaw;
630 int agaw = -1;
631
632 sagaw = cap_sagaw(iommu->cap);
633 for (agaw = width_to_agaw(max_gaw);
634 agaw >= 0; agaw--) {
635 if (test_bit(agaw, &sagaw))
636 break;
637 }
638
639 return agaw;
640}
641
642/*
643 * Calculate max SAGAW for each iommu.
644 */
645int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
646{
647 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
648}
649
650/*
651 * calculate agaw for each iommu.
652 * "SAGAW" may be different across iommus, use a default agaw, and
653 * get a supported less agaw for iommus that don't support the default agaw.
654 */
655int iommu_calculate_agaw(struct intel_iommu *iommu)
656{
657 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
658}
659
660/* This functionin only returns single iommu in a domain */
661static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
662{
663 int iommu_id;
664
665 /* si_domain and vm domain should not get here. */
666 BUG_ON(domain_type_is_vm_or_si(domain));
667 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
668 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
669 return NULL;
670
671 return g_iommus[iommu_id];
672}
673
674static void domain_update_iommu_coherency(struct dmar_domain *domain)
675{
676 struct dmar_drhd_unit *drhd;
677 struct intel_iommu *iommu;
678 bool found = false;
679 int i;
680
681 domain->iommu_coherency = 1;
682
683 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
684 found = true;
685 if (!ecap_coherent(g_iommus[i]->ecap)) {
686 domain->iommu_coherency = 0;
687 break;
688 }
689 }
690 if (found)
691 return;
692
693 /* No hardware attached; use lowest common denominator */
694 rcu_read_lock();
695 for_each_active_iommu(iommu, drhd) {
696 if (!ecap_coherent(iommu->ecap)) {
697 domain->iommu_coherency = 0;
698 break;
699 }
700 }
701 rcu_read_unlock();
702}
703
704static int domain_update_iommu_snooping(struct intel_iommu *skip)
705{
706 struct dmar_drhd_unit *drhd;
707 struct intel_iommu *iommu;
708 int ret = 1;
709
710 rcu_read_lock();
711 for_each_active_iommu(iommu, drhd) {
712 if (iommu != skip) {
713 if (!ecap_sc_support(iommu->ecap)) {
714 ret = 0;
715 break;
716 }
717 }
718 }
719 rcu_read_unlock();
720
721 return ret;
722}
723
724static int domain_update_iommu_superpage(struct intel_iommu *skip)
725{
726 struct dmar_drhd_unit *drhd;
727 struct intel_iommu *iommu;
728 int mask = 0xf;
729
730 if (!intel_iommu_superpage) {
731 return 0;
732 }
733
734 /* set iommu_superpage to the smallest common denominator */
735 rcu_read_lock();
736 for_each_active_iommu(iommu, drhd) {
737 if (iommu != skip) {
738 mask &= cap_super_page_val(iommu->cap);
739 if (!mask)
740 break;
741 }
742 }
743 rcu_read_unlock();
744
745 return fls(mask);
746}
747
748/* Some capabilities may be different across iommus */
749static void domain_update_iommu_cap(struct dmar_domain *domain)
750{
751 domain_update_iommu_coherency(domain);
752 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
753 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
754}
755
756static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
757 u8 bus, u8 devfn, int alloc)
758{
759 struct root_entry *root = &iommu->root_entry[bus];
760 struct context_entry *context;
761 u64 *entry;
762
763 if (ecs_enabled(iommu)) {
764 if (devfn >= 0x80) {
765 devfn -= 0x80;
766 entry = &root->hi;
767 }
768 devfn *= 2;
769 }
770 entry = &root->lo;
771 if (*entry & 1)
772 context = phys_to_virt(*entry & VTD_PAGE_MASK);
773 else {
774 unsigned long phy_addr;
775 if (!alloc)
776 return NULL;
777
778 context = alloc_pgtable_page(iommu->node);
779 if (!context)
780 return NULL;
781
782 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
783 phy_addr = virt_to_phys((void *)context);
784 *entry = phy_addr | 1;
785 __iommu_flush_cache(iommu, entry, sizeof(*entry));
786 }
787 return &context[devfn];
788}
789
790static int iommu_dummy(struct device *dev)
791{
792 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
793}
794
795static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
796{
797 struct dmar_drhd_unit *drhd = NULL;
798 struct intel_iommu *iommu;
799 struct device *tmp;
800 struct pci_dev *ptmp, *pdev = NULL;
801 u16 segment = 0;
802 int i;
803
804 if (iommu_dummy(dev))
805 return NULL;
806
807 if (dev_is_pci(dev)) {
808 pdev = to_pci_dev(dev);
809 segment = pci_domain_nr(pdev->bus);
810 } else if (has_acpi_companion(dev))
811 dev = &ACPI_COMPANION(dev)->dev;
812
813 rcu_read_lock();
814 for_each_active_iommu(iommu, drhd) {
815 if (pdev && segment != drhd->segment)
816 continue;
817
818 for_each_active_dev_scope(drhd->devices,
819 drhd->devices_cnt, i, tmp) {
820 if (tmp == dev) {
821 *bus = drhd->devices[i].bus;
822 *devfn = drhd->devices[i].devfn;
823 goto out;
824 }
825
826 if (!pdev || !dev_is_pci(tmp))
827 continue;
828
829 ptmp = to_pci_dev(tmp);
830 if (ptmp->subordinate &&
831 ptmp->subordinate->number <= pdev->bus->number &&
832 ptmp->subordinate->busn_res.end >= pdev->bus->number)
833 goto got_pdev;
834 }
835
836 if (pdev && drhd->include_all) {
837 got_pdev:
838 *bus = pdev->bus->number;
839 *devfn = pdev->devfn;
840 goto out;
841 }
842 }
843 iommu = NULL;
844 out:
845 rcu_read_unlock();
846
847 return iommu;
848}
849
850static void domain_flush_cache(struct dmar_domain *domain,
851 void *addr, int size)
852{
853 if (!domain->iommu_coherency)
854 clflush_cache_range(addr, size);
855}
856
857static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
858{
859 struct context_entry *context;
860 int ret = 0;
861 unsigned long flags;
862
863 spin_lock_irqsave(&iommu->lock, flags);
864 context = iommu_context_addr(iommu, bus, devfn, 0);
865 if (context)
866 ret = context_present(context);
867 spin_unlock_irqrestore(&iommu->lock, flags);
868 return ret;
869}
870
871static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
872{
873 struct context_entry *context;
874 unsigned long flags;
875
876 spin_lock_irqsave(&iommu->lock, flags);
877 context = iommu_context_addr(iommu, bus, devfn, 0);
878 if (context) {
879 context_clear_entry(context);
880 __iommu_flush_cache(iommu, context, sizeof(*context));
881 }
882 spin_unlock_irqrestore(&iommu->lock, flags);
883}
884
885static void free_context_table(struct intel_iommu *iommu)
886{
887 int i;
888 unsigned long flags;
889 struct context_entry *context;
890
891 spin_lock_irqsave(&iommu->lock, flags);
892 if (!iommu->root_entry) {
893 goto out;
894 }
895 for (i = 0; i < ROOT_ENTRY_NR; i++) {
896 context = iommu_context_addr(iommu, i, 0, 0);
897 if (context)
898 free_pgtable_page(context);
899
900 if (!ecs_enabled(iommu))
901 continue;
902
903 context = iommu_context_addr(iommu, i, 0x80, 0);
904 if (context)
905 free_pgtable_page(context);
906
907 }
908 free_pgtable_page(iommu->root_entry);
909 iommu->root_entry = NULL;
910out:
911 spin_unlock_irqrestore(&iommu->lock, flags);
912}
913
914static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
915 unsigned long pfn, int *target_level)
916{
917 struct dma_pte *parent, *pte = NULL;
918 int level = agaw_to_level(domain->agaw);
919 int offset;
920
921 BUG_ON(!domain->pgd);
922
923 if (!domain_pfn_supported(domain, pfn))
924 /* Address beyond IOMMU's addressing capabilities. */
925 return NULL;
926
927 parent = domain->pgd;
928
929 while (1) {
930 void *tmp_page;
931
932 offset = pfn_level_offset(pfn, level);
933 pte = &parent[offset];
934 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
935 break;
936 if (level == *target_level)
937 break;
938
939 if (!dma_pte_present(pte)) {
940 uint64_t pteval;
941
942 tmp_page = alloc_pgtable_page(domain->nid);
943
944 if (!tmp_page)
945 return NULL;
946
947 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
948 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
949 if (cmpxchg64(&pte->val, 0ULL, pteval))
950 /* Someone else set it while we were thinking; use theirs. */
951 free_pgtable_page(tmp_page);
952 else
953 domain_flush_cache(domain, pte, sizeof(*pte));
954 }
955 if (level == 1)
956 break;
957
958 parent = phys_to_virt(dma_pte_addr(pte));
959 level--;
960 }
961
962 if (!*target_level)
963 *target_level = level;
964
965 return pte;
966}
967
968
969/* return address's pte at specific level */
970static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
971 unsigned long pfn,
972 int level, int *large_page)
973{
974 struct dma_pte *parent, *pte = NULL;
975 int total = agaw_to_level(domain->agaw);
976 int offset;
977
978 parent = domain->pgd;
979 while (level <= total) {
980 offset = pfn_level_offset(pfn, total);
981 pte = &parent[offset];
982 if (level == total)
983 return pte;
984
985 if (!dma_pte_present(pte)) {
986 *large_page = total;
987 break;
988 }
989
990 if (dma_pte_superpage(pte)) {
991 *large_page = total;
992 return pte;
993 }
994
995 parent = phys_to_virt(dma_pte_addr(pte));
996 total--;
997 }
998 return NULL;
999}
1000
1001/* clear last level pte, a tlb flush should be followed */
1002static void dma_pte_clear_range(struct dmar_domain *domain,
1003 unsigned long start_pfn,
1004 unsigned long last_pfn)
1005{
1006 unsigned int large_page = 1;
1007 struct dma_pte *first_pte, *pte;
1008
1009 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1010 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1011 BUG_ON(start_pfn > last_pfn);
1012
1013 /* we don't need lock here; nobody else touches the iova range */
1014 do {
1015 large_page = 1;
1016 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1017 if (!pte) {
1018 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1019 continue;
1020 }
1021 do {
1022 dma_clear_pte(pte);
1023 start_pfn += lvl_to_nr_pages(large_page);
1024 pte++;
1025 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1026
1027 domain_flush_cache(domain, first_pte,
1028 (void *)pte - (void *)first_pte);
1029
1030 } while (start_pfn && start_pfn <= last_pfn);
1031}
1032
1033static void dma_pte_free_level(struct dmar_domain *domain, int level,
1034 struct dma_pte *pte, unsigned long pfn,
1035 unsigned long start_pfn, unsigned long last_pfn)
1036{
1037 pfn = max(start_pfn, pfn);
1038 pte = &pte[pfn_level_offset(pfn, level)];
1039
1040 do {
1041 unsigned long level_pfn;
1042 struct dma_pte *level_pte;
1043
1044 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1045 goto next;
1046
1047 level_pfn = pfn & level_mask(level - 1);
1048 level_pte = phys_to_virt(dma_pte_addr(pte));
1049
1050 if (level > 2)
1051 dma_pte_free_level(domain, level - 1, level_pte,
1052 level_pfn, start_pfn, last_pfn);
1053
1054 /* If range covers entire pagetable, free it */
1055 if (!(start_pfn > level_pfn ||
1056 last_pfn < level_pfn + level_size(level) - 1)) {
1057 dma_clear_pte(pte);
1058 domain_flush_cache(domain, pte, sizeof(*pte));
1059 free_pgtable_page(level_pte);
1060 }
1061next:
1062 pfn += level_size(level);
1063 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1064}
1065
1066/* free page table pages. last level pte should already be cleared */
1067static void dma_pte_free_pagetable(struct dmar_domain *domain,
1068 unsigned long start_pfn,
1069 unsigned long last_pfn)
1070{
1071 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1072 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1073 BUG_ON(start_pfn > last_pfn);
1074
1075 dma_pte_clear_range(domain, start_pfn, last_pfn);
1076
1077 /* We don't need lock here; nobody else touches the iova range */
1078 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1079 domain->pgd, 0, start_pfn, last_pfn);
1080
1081 /* free pgd */
1082 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1083 free_pgtable_page(domain->pgd);
1084 domain->pgd = NULL;
1085 }
1086}
1087
1088/* When a page at a given level is being unlinked from its parent, we don't
1089 need to *modify* it at all. All we need to do is make a list of all the
1090 pages which can be freed just as soon as we've flushed the IOTLB and we
1091 know the hardware page-walk will no longer touch them.
1092 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1093 be freed. */
1094static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1095 int level, struct dma_pte *pte,
1096 struct page *freelist)
1097{
1098 struct page *pg;
1099
1100 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1101 pg->freelist = freelist;
1102 freelist = pg;
1103
1104 if (level == 1)
1105 return freelist;
1106
1107 pte = page_address(pg);
1108 do {
1109 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1110 freelist = dma_pte_list_pagetables(domain, level - 1,
1111 pte, freelist);
1112 pte++;
1113 } while (!first_pte_in_page(pte));
1114
1115 return freelist;
1116}
1117
1118static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1119 struct dma_pte *pte, unsigned long pfn,
1120 unsigned long start_pfn,
1121 unsigned long last_pfn,
1122 struct page *freelist)
1123{
1124 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1125
1126 pfn = max(start_pfn, pfn);
1127 pte = &pte[pfn_level_offset(pfn, level)];
1128
1129 do {
1130 unsigned long level_pfn;
1131
1132 if (!dma_pte_present(pte))
1133 goto next;
1134
1135 level_pfn = pfn & level_mask(level);
1136
1137 /* If range covers entire pagetable, free it */
1138 if (start_pfn <= level_pfn &&
1139 last_pfn >= level_pfn + level_size(level) - 1) {
1140 /* These suborbinate page tables are going away entirely. Don't
1141 bother to clear them; we're just going to *free* them. */
1142 if (level > 1 && !dma_pte_superpage(pte))
1143 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1144
1145 dma_clear_pte(pte);
1146 if (!first_pte)
1147 first_pte = pte;
1148 last_pte = pte;
1149 } else if (level > 1) {
1150 /* Recurse down into a level that isn't *entirely* obsolete */
1151 freelist = dma_pte_clear_level(domain, level - 1,
1152 phys_to_virt(dma_pte_addr(pte)),
1153 level_pfn, start_pfn, last_pfn,
1154 freelist);
1155 }
1156next:
1157 pfn += level_size(level);
1158 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1159
1160 if (first_pte)
1161 domain_flush_cache(domain, first_pte,
1162 (void *)++last_pte - (void *)first_pte);
1163
1164 return freelist;
1165}
1166
1167/* We can't just free the pages because the IOMMU may still be walking
1168 the page tables, and may have cached the intermediate levels. The
1169 pages can only be freed after the IOTLB flush has been done. */
1170struct page *domain_unmap(struct dmar_domain *domain,
1171 unsigned long start_pfn,
1172 unsigned long last_pfn)
1173{
1174 struct page *freelist = NULL;
1175
1176 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1177 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1178 BUG_ON(start_pfn > last_pfn);
1179
1180 /* we don't need lock here; nobody else touches the iova range */
1181 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1182 domain->pgd, 0, start_pfn, last_pfn, NULL);
1183
1184 /* free pgd */
1185 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1186 struct page *pgd_page = virt_to_page(domain->pgd);
1187 pgd_page->freelist = freelist;
1188 freelist = pgd_page;
1189
1190 domain->pgd = NULL;
1191 }
1192
1193 return freelist;
1194}
1195
1196void dma_free_pagelist(struct page *freelist)
1197{
1198 struct page *pg;
1199
1200 while ((pg = freelist)) {
1201 freelist = pg->freelist;
1202 free_pgtable_page(page_address(pg));
1203 }
1204}
1205
1206/* iommu handling */
1207static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1208{
1209 struct root_entry *root;
1210 unsigned long flags;
1211
1212 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1213 if (!root) {
1214 pr_err("Allocating root entry for %s failed\n",
1215 iommu->name);
1216 return -ENOMEM;
1217 }
1218
1219 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1220
1221 spin_lock_irqsave(&iommu->lock, flags);
1222 iommu->root_entry = root;
1223 spin_unlock_irqrestore(&iommu->lock, flags);
1224
1225 return 0;
1226}
1227
1228static void iommu_set_root_entry(struct intel_iommu *iommu)
1229{
1230 u64 addr;
1231 u32 sts;
1232 unsigned long flag;
1233
1234 addr = virt_to_phys(iommu->root_entry);
1235 if (ecs_enabled(iommu))
1236 addr |= DMA_RTADDR_RTT;
1237
1238 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1239 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1240
1241 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1242
1243 /* Make sure hardware complete it */
1244 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1245 readl, (sts & DMA_GSTS_RTPS), sts);
1246
1247 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1248}
1249
1250static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1251{
1252 u32 val;
1253 unsigned long flag;
1254
1255 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1256 return;
1257
1258 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1259 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1260
1261 /* Make sure hardware complete it */
1262 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1263 readl, (!(val & DMA_GSTS_WBFS)), val);
1264
1265 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1266}
1267
1268/* return value determine if we need a write buffer flush */
1269static void __iommu_flush_context(struct intel_iommu *iommu,
1270 u16 did, u16 source_id, u8 function_mask,
1271 u64 type)
1272{
1273 u64 val = 0;
1274 unsigned long flag;
1275
1276 switch (type) {
1277 case DMA_CCMD_GLOBAL_INVL:
1278 val = DMA_CCMD_GLOBAL_INVL;
1279 break;
1280 case DMA_CCMD_DOMAIN_INVL:
1281 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1282 break;
1283 case DMA_CCMD_DEVICE_INVL:
1284 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1285 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1286 break;
1287 default:
1288 BUG();
1289 }
1290 val |= DMA_CCMD_ICC;
1291
1292 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1293 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1294
1295 /* Make sure hardware complete it */
1296 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1297 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1298
1299 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1300}
1301
1302/* return value determine if we need a write buffer flush */
1303static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1304 u64 addr, unsigned int size_order, u64 type)
1305{
1306 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1307 u64 val = 0, val_iva = 0;
1308 unsigned long flag;
1309
1310 switch (type) {
1311 case DMA_TLB_GLOBAL_FLUSH:
1312 /* global flush doesn't need set IVA_REG */
1313 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1314 break;
1315 case DMA_TLB_DSI_FLUSH:
1316 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1317 break;
1318 case DMA_TLB_PSI_FLUSH:
1319 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1320 /* IH bit is passed in as part of address */
1321 val_iva = size_order | addr;
1322 break;
1323 default:
1324 BUG();
1325 }
1326 /* Note: set drain read/write */
1327#if 0
1328 /*
1329 * This is probably to be super secure.. Looks like we can
1330 * ignore it without any impact.
1331 */
1332 if (cap_read_drain(iommu->cap))
1333 val |= DMA_TLB_READ_DRAIN;
1334#endif
1335 if (cap_write_drain(iommu->cap))
1336 val |= DMA_TLB_WRITE_DRAIN;
1337
1338 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1339 /* Note: Only uses first TLB reg currently */
1340 if (val_iva)
1341 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1342 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1343
1344 /* Make sure hardware complete it */
1345 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1346 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1347
1348 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1349
1350 /* check IOTLB invalidation granularity */
1351 if (DMA_TLB_IAIG(val) == 0)
1352 pr_err("Flush IOTLB failed\n");
1353 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1354 pr_debug("TLB flush request %Lx, actual %Lx\n",
1355 (unsigned long long)DMA_TLB_IIRG(type),
1356 (unsigned long long)DMA_TLB_IAIG(val));
1357}
1358
1359static struct device_domain_info *
1360iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1361 u8 bus, u8 devfn)
1362{
1363 bool found = false;
1364 unsigned long flags;
1365 struct device_domain_info *info;
1366 struct pci_dev *pdev;
1367
1368 if (!ecap_dev_iotlb_support(iommu->ecap))
1369 return NULL;
1370
1371 if (!iommu->qi)
1372 return NULL;
1373
1374 spin_lock_irqsave(&device_domain_lock, flags);
1375 list_for_each_entry(info, &domain->devices, link)
1376 if (info->iommu == iommu && info->bus == bus &&
1377 info->devfn == devfn) {
1378 found = true;
1379 break;
1380 }
1381 spin_unlock_irqrestore(&device_domain_lock, flags);
1382
1383 if (!found || !info->dev || !dev_is_pci(info->dev))
1384 return NULL;
1385
1386 pdev = to_pci_dev(info->dev);
1387
1388 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1389 return NULL;
1390
1391 if (!dmar_find_matched_atsr_unit(pdev))
1392 return NULL;
1393
1394 return info;
1395}
1396
1397static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1398{
1399 if (!info || !dev_is_pci(info->dev))
1400 return;
1401
1402 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1403}
1404
1405static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1406{
1407 if (!info->dev || !dev_is_pci(info->dev) ||
1408 !pci_ats_enabled(to_pci_dev(info->dev)))
1409 return;
1410
1411 pci_disable_ats(to_pci_dev(info->dev));
1412}
1413
1414static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1415 u64 addr, unsigned mask)
1416{
1417 u16 sid, qdep;
1418 unsigned long flags;
1419 struct device_domain_info *info;
1420
1421 spin_lock_irqsave(&device_domain_lock, flags);
1422 list_for_each_entry(info, &domain->devices, link) {
1423 struct pci_dev *pdev;
1424 if (!info->dev || !dev_is_pci(info->dev))
1425 continue;
1426
1427 pdev = to_pci_dev(info->dev);
1428 if (!pci_ats_enabled(pdev))
1429 continue;
1430
1431 sid = info->bus << 8 | info->devfn;
1432 qdep = pci_ats_queue_depth(pdev);
1433 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1434 }
1435 spin_unlock_irqrestore(&device_domain_lock, flags);
1436}
1437
1438static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1439 unsigned long pfn, unsigned int pages, int ih, int map)
1440{
1441 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1442 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1443
1444 BUG_ON(pages == 0);
1445
1446 if (ih)
1447 ih = 1 << 6;
1448 /*
1449 * Fallback to domain selective flush if no PSI support or the size is
1450 * too big.
1451 * PSI requires page size to be 2 ^ x, and the base address is naturally
1452 * aligned to the size
1453 */
1454 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1455 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1456 DMA_TLB_DSI_FLUSH);
1457 else
1458 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1459 DMA_TLB_PSI_FLUSH);
1460
1461 /*
1462 * In caching mode, changes of pages from non-present to present require
1463 * flush. However, device IOTLB doesn't need to be flushed in this case.
1464 */
1465 if (!cap_caching_mode(iommu->cap) || !map)
1466 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1467}
1468
1469static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1470{
1471 u32 pmen;
1472 unsigned long flags;
1473
1474 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1475 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1476 pmen &= ~DMA_PMEN_EPM;
1477 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1478
1479 /* wait for the protected region status bit to clear */
1480 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1481 readl, !(pmen & DMA_PMEN_PRS), pmen);
1482
1483 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1484}
1485
1486static void iommu_enable_translation(struct intel_iommu *iommu)
1487{
1488 u32 sts;
1489 unsigned long flags;
1490
1491 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1492 iommu->gcmd |= DMA_GCMD_TE;
1493 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1494
1495 /* Make sure hardware complete it */
1496 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1497 readl, (sts & DMA_GSTS_TES), sts);
1498
1499 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1500}
1501
1502static void iommu_disable_translation(struct intel_iommu *iommu)
1503{
1504 u32 sts;
1505 unsigned long flag;
1506
1507 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1508 iommu->gcmd &= ~DMA_GCMD_TE;
1509 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1510
1511 /* Make sure hardware complete it */
1512 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1513 readl, (!(sts & DMA_GSTS_TES)), sts);
1514
1515 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1516}
1517
1518
1519static int iommu_init_domains(struct intel_iommu *iommu)
1520{
1521 unsigned long ndomains;
1522 unsigned long nlongs;
1523
1524 ndomains = cap_ndoms(iommu->cap);
1525 pr_debug("%s: Number of Domains supported <%ld>\n",
1526 iommu->name, ndomains);
1527 nlongs = BITS_TO_LONGS(ndomains);
1528
1529 spin_lock_init(&iommu->lock);
1530
1531 /* TBD: there might be 64K domains,
1532 * consider other allocation for future chip
1533 */
1534 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1535 if (!iommu->domain_ids) {
1536 pr_err("%s: Allocating domain id array failed\n",
1537 iommu->name);
1538 return -ENOMEM;
1539 }
1540 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1541 GFP_KERNEL);
1542 if (!iommu->domains) {
1543 pr_err("%s: Allocating domain array failed\n",
1544 iommu->name);
1545 kfree(iommu->domain_ids);
1546 iommu->domain_ids = NULL;
1547 return -ENOMEM;
1548 }
1549
1550 /*
1551 * If Caching mode is set, then invalid translations are tagged
1552 * with domain-id 0, hence we need to pre-allocate it. We also
1553 * use domain-id 0 as a marker for non-allocated domain-id, so
1554 * make sure it is not used for a real domain.
1555 */
1556 set_bit(0, iommu->domain_ids);
1557
1558 return 0;
1559}
1560
1561static void disable_dmar_iommu(struct intel_iommu *iommu)
1562{
1563 struct dmar_domain *domain;
1564 int i;
1565
1566 if ((iommu->domains) && (iommu->domain_ids)) {
1567 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1568 /*
1569 * Domain id 0 is reserved for invalid translation
1570 * if hardware supports caching mode and used as
1571 * a non-allocated marker.
1572 */
1573 if (i == 0)
1574 continue;
1575
1576 domain = iommu->domains[i];
1577 clear_bit(i, iommu->domain_ids);
1578 if (domain_detach_iommu(domain, iommu) == 0 &&
1579 !domain_type_is_vm(domain))
1580 domain_exit(domain);
1581 }
1582 }
1583
1584 if (iommu->gcmd & DMA_GCMD_TE)
1585 iommu_disable_translation(iommu);
1586}
1587
1588static void free_dmar_iommu(struct intel_iommu *iommu)
1589{
1590 if ((iommu->domains) && (iommu->domain_ids)) {
1591 kfree(iommu->domains);
1592 kfree(iommu->domain_ids);
1593 iommu->domains = NULL;
1594 iommu->domain_ids = NULL;
1595 }
1596
1597 g_iommus[iommu->seq_id] = NULL;
1598
1599 /* free context mapping */
1600 free_context_table(iommu);
1601}
1602
1603static struct dmar_domain *alloc_domain(int flags)
1604{
1605 /* domain id for virtual machine, it won't be set in context */
1606 static atomic_t vm_domid = ATOMIC_INIT(0);
1607 struct dmar_domain *domain;
1608
1609 domain = alloc_domain_mem();
1610 if (!domain)
1611 return NULL;
1612
1613 memset(domain, 0, sizeof(*domain));
1614 domain->nid = -1;
1615 domain->flags = flags;
1616 spin_lock_init(&domain->iommu_lock);
1617 INIT_LIST_HEAD(&domain->devices);
1618 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1619 domain->id = atomic_inc_return(&vm_domid);
1620
1621 return domain;
1622}
1623
1624static int __iommu_attach_domain(struct dmar_domain *domain,
1625 struct intel_iommu *iommu)
1626{
1627 int num;
1628 unsigned long ndomains;
1629
1630 ndomains = cap_ndoms(iommu->cap);
1631 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1632 if (num < ndomains) {
1633 set_bit(num, iommu->domain_ids);
1634 iommu->domains[num] = domain;
1635 domain->iommu_did[iommu->seq_id] = num;
1636 } else {
1637 num = -ENOSPC;
1638 }
1639
1640 return num;
1641}
1642
1643static int iommu_attach_domain(struct dmar_domain *domain,
1644 struct intel_iommu *iommu)
1645{
1646 int num;
1647 unsigned long flags;
1648
1649 spin_lock_irqsave(&iommu->lock, flags);
1650 num = __iommu_attach_domain(domain, iommu);
1651 spin_unlock_irqrestore(&iommu->lock, flags);
1652 if (num < 0)
1653 pr_err("%s: No free domain ids\n", iommu->name);
1654
1655 return num;
1656}
1657
1658static int iommu_attach_vm_domain(struct dmar_domain *domain,
1659 struct intel_iommu *iommu)
1660{
1661 int num;
1662
1663 num = domain->iommu_did[iommu->seq_id];
1664 if (num)
1665 return num;
1666
1667 return __iommu_attach_domain(domain, iommu);
1668}
1669
1670static void iommu_detach_domain(struct dmar_domain *domain,
1671 struct intel_iommu *iommu)
1672{
1673 unsigned long flags;
1674 int num;
1675
1676 spin_lock_irqsave(&iommu->lock, flags);
1677
1678 num = domain->iommu_did[iommu->seq_id];
1679
1680 if (num == 0)
1681 return;
1682
1683 clear_bit(num, iommu->domain_ids);
1684 iommu->domains[num] = NULL;
1685
1686 spin_unlock_irqrestore(&iommu->lock, flags);
1687}
1688
1689static void domain_attach_iommu(struct dmar_domain *domain,
1690 struct intel_iommu *iommu)
1691{
1692 unsigned long flags;
1693
1694 spin_lock_irqsave(&domain->iommu_lock, flags);
1695 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1696 domain->iommu_count++;
1697 if (domain->iommu_count == 1)
1698 domain->nid = iommu->node;
1699 domain_update_iommu_cap(domain);
1700 }
1701 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1702}
1703
1704static int domain_detach_iommu(struct dmar_domain *domain,
1705 struct intel_iommu *iommu)
1706{
1707 unsigned long flags;
1708 int count = INT_MAX;
1709
1710 spin_lock_irqsave(&domain->iommu_lock, flags);
1711 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1712 count = --domain->iommu_count;
1713 domain_update_iommu_cap(domain);
1714 domain->iommu_did[iommu->seq_id] = 0;
1715 }
1716 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1717
1718 return count;
1719}
1720
1721static struct iova_domain reserved_iova_list;
1722static struct lock_class_key reserved_rbtree_key;
1723
1724static int dmar_init_reserved_ranges(void)
1725{
1726 struct pci_dev *pdev = NULL;
1727 struct iova *iova;
1728 int i;
1729
1730 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1731 DMA_32BIT_PFN);
1732
1733 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1734 &reserved_rbtree_key);
1735
1736 /* IOAPIC ranges shouldn't be accessed by DMA */
1737 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1738 IOVA_PFN(IOAPIC_RANGE_END));
1739 if (!iova) {
1740 pr_err("Reserve IOAPIC range failed\n");
1741 return -ENODEV;
1742 }
1743
1744 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1745 for_each_pci_dev(pdev) {
1746 struct resource *r;
1747
1748 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1749 r = &pdev->resource[i];
1750 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1751 continue;
1752 iova = reserve_iova(&reserved_iova_list,
1753 IOVA_PFN(r->start),
1754 IOVA_PFN(r->end));
1755 if (!iova) {
1756 pr_err("Reserve iova failed\n");
1757 return -ENODEV;
1758 }
1759 }
1760 }
1761 return 0;
1762}
1763
1764static void domain_reserve_special_ranges(struct dmar_domain *domain)
1765{
1766 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1767}
1768
1769static inline int guestwidth_to_adjustwidth(int gaw)
1770{
1771 int agaw;
1772 int r = (gaw - 12) % 9;
1773
1774 if (r == 0)
1775 agaw = gaw;
1776 else
1777 agaw = gaw + 9 - r;
1778 if (agaw > 64)
1779 agaw = 64;
1780 return agaw;
1781}
1782
1783static int domain_init(struct dmar_domain *domain, int guest_width)
1784{
1785 struct intel_iommu *iommu;
1786 int adjust_width, agaw;
1787 unsigned long sagaw;
1788
1789 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1790 DMA_32BIT_PFN);
1791 domain_reserve_special_ranges(domain);
1792
1793 /* calculate AGAW */
1794 iommu = domain_get_iommu(domain);
1795 if (guest_width > cap_mgaw(iommu->cap))
1796 guest_width = cap_mgaw(iommu->cap);
1797 domain->gaw = guest_width;
1798 adjust_width = guestwidth_to_adjustwidth(guest_width);
1799 agaw = width_to_agaw(adjust_width);
1800 sagaw = cap_sagaw(iommu->cap);
1801 if (!test_bit(agaw, &sagaw)) {
1802 /* hardware doesn't support it, choose a bigger one */
1803 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1804 agaw = find_next_bit(&sagaw, 5, agaw);
1805 if (agaw >= 5)
1806 return -ENODEV;
1807 }
1808 domain->agaw = agaw;
1809
1810 if (ecap_coherent(iommu->ecap))
1811 domain->iommu_coherency = 1;
1812 else
1813 domain->iommu_coherency = 0;
1814
1815 if (ecap_sc_support(iommu->ecap))
1816 domain->iommu_snooping = 1;
1817 else
1818 domain->iommu_snooping = 0;
1819
1820 if (intel_iommu_superpage)
1821 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1822 else
1823 domain->iommu_superpage = 0;
1824
1825 domain->nid = iommu->node;
1826
1827 /* always allocate the top pgd */
1828 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1829 if (!domain->pgd)
1830 return -ENOMEM;
1831 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1832 return 0;
1833}
1834
1835static void domain_exit(struct dmar_domain *domain)
1836{
1837 struct dmar_drhd_unit *drhd;
1838 struct intel_iommu *iommu;
1839 struct page *freelist = NULL;
1840
1841 /* Domain 0 is reserved, so dont process it */
1842 if (!domain)
1843 return;
1844
1845 /* Flush any lazy unmaps that may reference this domain */
1846 if (!intel_iommu_strict)
1847 flush_unmaps_timeout(0);
1848
1849 /* remove associated devices */
1850 domain_remove_dev_info(domain);
1851
1852 /* destroy iovas */
1853 put_iova_domain(&domain->iovad);
1854
1855 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1856
1857 /* clear attached or cached domains */
1858 rcu_read_lock();
1859 for_each_active_iommu(iommu, drhd)
1860 if (domain_type_is_vm(domain) ||
1861 test_bit(iommu->seq_id, domain->iommu_bmp))
1862 iommu_detach_domain(domain, iommu);
1863 rcu_read_unlock();
1864
1865 dma_free_pagelist(freelist);
1866
1867 free_domain_mem(domain);
1868}
1869
1870static int domain_context_mapping_one(struct dmar_domain *domain,
1871 struct intel_iommu *iommu,
1872 u8 bus, u8 devfn, int translation)
1873{
1874 struct context_entry *context;
1875 unsigned long flags;
1876 struct dma_pte *pgd;
1877 int id;
1878 int agaw;
1879 struct device_domain_info *info = NULL;
1880
1881 pr_debug("Set context mapping for %02x:%02x.%d\n",
1882 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1883
1884 BUG_ON(!domain->pgd);
1885 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1886 translation != CONTEXT_TT_MULTI_LEVEL);
1887
1888 spin_lock_irqsave(&iommu->lock, flags);
1889 context = iommu_context_addr(iommu, bus, devfn, 1);
1890 spin_unlock_irqrestore(&iommu->lock, flags);
1891 if (!context)
1892 return -ENOMEM;
1893 spin_lock_irqsave(&iommu->lock, flags);
1894 if (context_present(context)) {
1895 spin_unlock_irqrestore(&iommu->lock, flags);
1896 return 0;
1897 }
1898
1899 context_clear_entry(context);
1900
1901 id = domain->id;
1902 pgd = domain->pgd;
1903
1904 if (domain_type_is_vm_or_si(domain)) {
1905 if (domain_type_is_vm(domain)) {
1906 id = iommu_attach_vm_domain(domain, iommu);
1907 if (id < 0) {
1908 spin_unlock_irqrestore(&iommu->lock, flags);
1909 pr_err("%s: No free domain ids\n", iommu->name);
1910 return -EFAULT;
1911 }
1912 }
1913
1914 /* Skip top levels of page tables for
1915 * iommu which has less agaw than default.
1916 * Unnecessary for PT mode.
1917 */
1918 if (translation != CONTEXT_TT_PASS_THROUGH) {
1919 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1920 pgd = phys_to_virt(dma_pte_addr(pgd));
1921 if (!dma_pte_present(pgd)) {
1922 spin_unlock_irqrestore(&iommu->lock, flags);
1923 return -ENOMEM;
1924 }
1925 }
1926 }
1927 }
1928
1929 context_set_domain_id(context, id);
1930
1931 if (translation != CONTEXT_TT_PASS_THROUGH) {
1932 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1933 translation = info ? CONTEXT_TT_DEV_IOTLB :
1934 CONTEXT_TT_MULTI_LEVEL;
1935 }
1936 /*
1937 * In pass through mode, AW must be programmed to indicate the largest
1938 * AGAW value supported by hardware. And ASR is ignored by hardware.
1939 */
1940 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1941 context_set_address_width(context, iommu->msagaw);
1942 else {
1943 context_set_address_root(context, virt_to_phys(pgd));
1944 context_set_address_width(context, iommu->agaw);
1945 }
1946
1947 context_set_translation_type(context, translation);
1948 context_set_fault_enable(context);
1949 context_set_present(context);
1950 domain_flush_cache(domain, context, sizeof(*context));
1951
1952 /*
1953 * It's a non-present to present mapping. If hardware doesn't cache
1954 * non-present entry we only need to flush the write-buffer. If the
1955 * _does_ cache non-present entries, then it does so in the special
1956 * domain #0, which we have to flush:
1957 */
1958 if (cap_caching_mode(iommu->cap)) {
1959 iommu->flush.flush_context(iommu, 0,
1960 (((u16)bus) << 8) | devfn,
1961 DMA_CCMD_MASK_NOBIT,
1962 DMA_CCMD_DEVICE_INVL);
1963 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1964 } else {
1965 iommu_flush_write_buffer(iommu);
1966 }
1967 iommu_enable_dev_iotlb(info);
1968 spin_unlock_irqrestore(&iommu->lock, flags);
1969
1970 domain_attach_iommu(domain, iommu);
1971
1972 return 0;
1973}
1974
1975struct domain_context_mapping_data {
1976 struct dmar_domain *domain;
1977 struct intel_iommu *iommu;
1978 int translation;
1979};
1980
1981static int domain_context_mapping_cb(struct pci_dev *pdev,
1982 u16 alias, void *opaque)
1983{
1984 struct domain_context_mapping_data *data = opaque;
1985
1986 return domain_context_mapping_one(data->domain, data->iommu,
1987 PCI_BUS_NUM(alias), alias & 0xff,
1988 data->translation);
1989}
1990
1991static int
1992domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1993 int translation)
1994{
1995 struct intel_iommu *iommu;
1996 u8 bus, devfn;
1997 struct domain_context_mapping_data data;
1998
1999 iommu = device_to_iommu(dev, &bus, &devfn);
2000 if (!iommu)
2001 return -ENODEV;
2002
2003 if (!dev_is_pci(dev))
2004 return domain_context_mapping_one(domain, iommu, bus, devfn,
2005 translation);
2006
2007 data.domain = domain;
2008 data.iommu = iommu;
2009 data.translation = translation;
2010
2011 return pci_for_each_dma_alias(to_pci_dev(dev),
2012 &domain_context_mapping_cb, &data);
2013}
2014
2015static int domain_context_mapped_cb(struct pci_dev *pdev,
2016 u16 alias, void *opaque)
2017{
2018 struct intel_iommu *iommu = opaque;
2019
2020 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2021}
2022
2023static int domain_context_mapped(struct device *dev)
2024{
2025 struct intel_iommu *iommu;
2026 u8 bus, devfn;
2027
2028 iommu = device_to_iommu(dev, &bus, &devfn);
2029 if (!iommu)
2030 return -ENODEV;
2031
2032 if (!dev_is_pci(dev))
2033 return device_context_mapped(iommu, bus, devfn);
2034
2035 return !pci_for_each_dma_alias(to_pci_dev(dev),
2036 domain_context_mapped_cb, iommu);
2037}
2038
2039/* Returns a number of VTD pages, but aligned to MM page size */
2040static inline unsigned long aligned_nrpages(unsigned long host_addr,
2041 size_t size)
2042{
2043 host_addr &= ~PAGE_MASK;
2044 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2045}
2046
2047/* Return largest possible superpage level for a given mapping */
2048static inline int hardware_largepage_caps(struct dmar_domain *domain,
2049 unsigned long iov_pfn,
2050 unsigned long phy_pfn,
2051 unsigned long pages)
2052{
2053 int support, level = 1;
2054 unsigned long pfnmerge;
2055
2056 support = domain->iommu_superpage;
2057
2058 /* To use a large page, the virtual *and* physical addresses
2059 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2060 of them will mean we have to use smaller pages. So just
2061 merge them and check both at once. */
2062 pfnmerge = iov_pfn | phy_pfn;
2063
2064 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2065 pages >>= VTD_STRIDE_SHIFT;
2066 if (!pages)
2067 break;
2068 pfnmerge >>= VTD_STRIDE_SHIFT;
2069 level++;
2070 support--;
2071 }
2072 return level;
2073}
2074
2075static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2076 struct scatterlist *sg, unsigned long phys_pfn,
2077 unsigned long nr_pages, int prot)
2078{
2079 struct dma_pte *first_pte = NULL, *pte = NULL;
2080 phys_addr_t uninitialized_var(pteval);
2081 unsigned long sg_res = 0;
2082 unsigned int largepage_lvl = 0;
2083 unsigned long lvl_pages = 0;
2084
2085 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2086
2087 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2088 return -EINVAL;
2089
2090 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2091
2092 if (!sg) {
2093 sg_res = nr_pages;
2094 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2095 }
2096
2097 while (nr_pages > 0) {
2098 uint64_t tmp;
2099
2100 if (!sg_res) {
2101 sg_res = aligned_nrpages(sg->offset, sg->length);
2102 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2103 sg->dma_length = sg->length;
2104 pteval = page_to_phys(sg_page(sg)) | prot;
2105 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2106 }
2107
2108 if (!pte) {
2109 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2110
2111 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2112 if (!pte)
2113 return -ENOMEM;
2114 /* It is large page*/
2115 if (largepage_lvl > 1) {
2116 pteval |= DMA_PTE_LARGE_PAGE;
2117 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2118 /*
2119 * Ensure that old small page tables are
2120 * removed to make room for superpage,
2121 * if they exist.
2122 */
2123 dma_pte_free_pagetable(domain, iov_pfn,
2124 iov_pfn + lvl_pages - 1);
2125 } else {
2126 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2127 }
2128
2129 }
2130 /* We don't need lock here, nobody else
2131 * touches the iova range
2132 */
2133 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2134 if (tmp) {
2135 static int dumps = 5;
2136 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2137 iov_pfn, tmp, (unsigned long long)pteval);
2138 if (dumps) {
2139 dumps--;
2140 debug_dma_dump_mappings(NULL);
2141 }
2142 WARN_ON(1);
2143 }
2144
2145 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2146
2147 BUG_ON(nr_pages < lvl_pages);
2148 BUG_ON(sg_res < lvl_pages);
2149
2150 nr_pages -= lvl_pages;
2151 iov_pfn += lvl_pages;
2152 phys_pfn += lvl_pages;
2153 pteval += lvl_pages * VTD_PAGE_SIZE;
2154 sg_res -= lvl_pages;
2155
2156 /* If the next PTE would be the first in a new page, then we
2157 need to flush the cache on the entries we've just written.
2158 And then we'll need to recalculate 'pte', so clear it and
2159 let it get set again in the if (!pte) block above.
2160
2161 If we're done (!nr_pages) we need to flush the cache too.
2162
2163 Also if we've been setting superpages, we may need to
2164 recalculate 'pte' and switch back to smaller pages for the
2165 end of the mapping, if the trailing size is not enough to
2166 use another superpage (i.e. sg_res < lvl_pages). */
2167 pte++;
2168 if (!nr_pages || first_pte_in_page(pte) ||
2169 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2170 domain_flush_cache(domain, first_pte,
2171 (void *)pte - (void *)first_pte);
2172 pte = NULL;
2173 }
2174
2175 if (!sg_res && nr_pages)
2176 sg = sg_next(sg);
2177 }
2178 return 0;
2179}
2180
2181static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2182 struct scatterlist *sg, unsigned long nr_pages,
2183 int prot)
2184{
2185 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2186}
2187
2188static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2189 unsigned long phys_pfn, unsigned long nr_pages,
2190 int prot)
2191{
2192 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2193}
2194
2195static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2196{
2197 if (!iommu)
2198 return;
2199
2200 clear_context_table(iommu, bus, devfn);
2201 iommu->flush.flush_context(iommu, 0, 0, 0,
2202 DMA_CCMD_GLOBAL_INVL);
2203 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2204}
2205
2206static inline void unlink_domain_info(struct device_domain_info *info)
2207{
2208 assert_spin_locked(&device_domain_lock);
2209 list_del(&info->link);
2210 list_del(&info->global);
2211 if (info->dev)
2212 info->dev->archdata.iommu = NULL;
2213}
2214
2215static void domain_remove_dev_info(struct dmar_domain *domain)
2216{
2217 struct device_domain_info *info, *tmp;
2218 unsigned long flags;
2219
2220 spin_lock_irqsave(&device_domain_lock, flags);
2221 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2222 unlink_domain_info(info);
2223 spin_unlock_irqrestore(&device_domain_lock, flags);
2224
2225 iommu_disable_dev_iotlb(info);
2226 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2227
2228 if (domain_type_is_vm(domain)) {
2229 iommu_detach_dependent_devices(info->iommu, info->dev);
2230 domain_detach_iommu(domain, info->iommu);
2231 }
2232
2233 free_devinfo_mem(info);
2234 spin_lock_irqsave(&device_domain_lock, flags);
2235 }
2236 spin_unlock_irqrestore(&device_domain_lock, flags);
2237}
2238
2239/*
2240 * find_domain
2241 * Note: we use struct device->archdata.iommu stores the info
2242 */
2243static struct dmar_domain *find_domain(struct device *dev)
2244{
2245 struct device_domain_info *info;
2246
2247 /* No lock here, assumes no domain exit in normal case */
2248 info = dev->archdata.iommu;
2249 if (info)
2250 return info->domain;
2251 return NULL;
2252}
2253
2254static inline struct device_domain_info *
2255dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2256{
2257 struct device_domain_info *info;
2258
2259 list_for_each_entry(info, &device_domain_list, global)
2260 if (info->iommu->segment == segment && info->bus == bus &&
2261 info->devfn == devfn)
2262 return info;
2263
2264 return NULL;
2265}
2266
2267static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2268 int bus, int devfn,
2269 struct device *dev,
2270 struct dmar_domain *domain)
2271{
2272 struct dmar_domain *found = NULL;
2273 struct device_domain_info *info;
2274 unsigned long flags;
2275
2276 info = alloc_devinfo_mem();
2277 if (!info)
2278 return NULL;
2279
2280 info->bus = bus;
2281 info->devfn = devfn;
2282 info->dev = dev;
2283 info->domain = domain;
2284 info->iommu = iommu;
2285
2286 spin_lock_irqsave(&device_domain_lock, flags);
2287 if (dev)
2288 found = find_domain(dev);
2289 else {
2290 struct device_domain_info *info2;
2291 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2292 if (info2)
2293 found = info2->domain;
2294 }
2295 if (found) {
2296 spin_unlock_irqrestore(&device_domain_lock, flags);
2297 free_devinfo_mem(info);
2298 /* Caller must free the original domain */
2299 return found;
2300 }
2301
2302 list_add(&info->link, &domain->devices);
2303 list_add(&info->global, &device_domain_list);
2304 if (dev)
2305 dev->archdata.iommu = info;
2306 spin_unlock_irqrestore(&device_domain_lock, flags);
2307
2308 return domain;
2309}
2310
2311static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2312{
2313 *(u16 *)opaque = alias;
2314 return 0;
2315}
2316
2317/* domain is initialized */
2318static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2319{
2320 struct dmar_domain *domain, *tmp;
2321 struct intel_iommu *iommu;
2322 struct device_domain_info *info;
2323 u16 dma_alias;
2324 unsigned long flags;
2325 u8 bus, devfn;
2326
2327 domain = find_domain(dev);
2328 if (domain)
2329 return domain;
2330
2331 iommu = device_to_iommu(dev, &bus, &devfn);
2332 if (!iommu)
2333 return NULL;
2334
2335 if (dev_is_pci(dev)) {
2336 struct pci_dev *pdev = to_pci_dev(dev);
2337
2338 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2339
2340 spin_lock_irqsave(&device_domain_lock, flags);
2341 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2342 PCI_BUS_NUM(dma_alias),
2343 dma_alias & 0xff);
2344 if (info) {
2345 iommu = info->iommu;
2346 domain = info->domain;
2347 }
2348 spin_unlock_irqrestore(&device_domain_lock, flags);
2349
2350 /* DMA alias already has a domain, uses it */
2351 if (info)
2352 goto found_domain;
2353 }
2354
2355 /* Allocate and initialize new domain for the device */
2356 domain = alloc_domain(0);
2357 if (!domain)
2358 return NULL;
2359 domain->id = iommu_attach_domain(domain, iommu);
2360 if (domain->id < 0) {
2361 free_domain_mem(domain);
2362 return NULL;
2363 }
2364 domain_attach_iommu(domain, iommu);
2365 if (domain_init(domain, gaw)) {
2366 domain_exit(domain);
2367 return NULL;
2368 }
2369
2370 /* register PCI DMA alias device */
2371 if (dev_is_pci(dev)) {
2372 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2373 dma_alias & 0xff, NULL, domain);
2374
2375 if (!tmp || tmp != domain) {
2376 domain_exit(domain);
2377 domain = tmp;
2378 }
2379
2380 if (!domain)
2381 return NULL;
2382 }
2383
2384found_domain:
2385 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2386
2387 if (!tmp || tmp != domain) {
2388 domain_exit(domain);
2389 domain = tmp;
2390 }
2391
2392 return domain;
2393}
2394
2395static int iommu_identity_mapping;
2396#define IDENTMAP_ALL 1
2397#define IDENTMAP_GFX 2
2398#define IDENTMAP_AZALIA 4
2399
2400static int iommu_domain_identity_map(struct dmar_domain *domain,
2401 unsigned long long start,
2402 unsigned long long end)
2403{
2404 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2405 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2406
2407 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2408 dma_to_mm_pfn(last_vpfn))) {
2409 pr_err("Reserving iova failed\n");
2410 return -ENOMEM;
2411 }
2412
2413 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2414 start, end, domain->id);
2415 /*
2416 * RMRR range might have overlap with physical memory range,
2417 * clear it first
2418 */
2419 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2420
2421 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2422 last_vpfn - first_vpfn + 1,
2423 DMA_PTE_READ|DMA_PTE_WRITE);
2424}
2425
2426static int iommu_prepare_identity_map(struct device *dev,
2427 unsigned long long start,
2428 unsigned long long end)
2429{
2430 struct dmar_domain *domain;
2431 int ret;
2432
2433 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2434 if (!domain)
2435 return -ENOMEM;
2436
2437 /* For _hardware_ passthrough, don't bother. But for software
2438 passthrough, we do it anyway -- it may indicate a memory
2439 range which is reserved in E820, so which didn't get set
2440 up to start with in si_domain */
2441 if (domain == si_domain && hw_pass_through) {
2442 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2443 dev_name(dev), start, end);
2444 return 0;
2445 }
2446
2447 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2448 dev_name(dev), start, end);
2449
2450 if (end < start) {
2451 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2452 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2453 dmi_get_system_info(DMI_BIOS_VENDOR),
2454 dmi_get_system_info(DMI_BIOS_VERSION),
2455 dmi_get_system_info(DMI_PRODUCT_VERSION));
2456 ret = -EIO;
2457 goto error;
2458 }
2459
2460 if (end >> agaw_to_width(domain->agaw)) {
2461 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2462 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2463 agaw_to_width(domain->agaw),
2464 dmi_get_system_info(DMI_BIOS_VENDOR),
2465 dmi_get_system_info(DMI_BIOS_VERSION),
2466 dmi_get_system_info(DMI_PRODUCT_VERSION));
2467 ret = -EIO;
2468 goto error;
2469 }
2470
2471 ret = iommu_domain_identity_map(domain, start, end);
2472 if (ret)
2473 goto error;
2474
2475 /* context entry init */
2476 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2477 if (ret)
2478 goto error;
2479
2480 return 0;
2481
2482 error:
2483 domain_exit(domain);
2484 return ret;
2485}
2486
2487static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2488 struct device *dev)
2489{
2490 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2491 return 0;
2492 return iommu_prepare_identity_map(dev, rmrr->base_address,
2493 rmrr->end_address);
2494}
2495
2496#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2497static inline void iommu_prepare_isa(void)
2498{
2499 struct pci_dev *pdev;
2500 int ret;
2501
2502 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2503 if (!pdev)
2504 return;
2505
2506 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2507 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2508
2509 if (ret)
2510 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2511
2512 pci_dev_put(pdev);
2513}
2514#else
2515static inline void iommu_prepare_isa(void)
2516{
2517 return;
2518}
2519#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2520
2521static int md_domain_init(struct dmar_domain *domain, int guest_width);
2522
2523static int __init si_domain_init(int hw)
2524{
2525 struct dmar_drhd_unit *drhd;
2526 struct intel_iommu *iommu;
2527 int nid, ret = 0;
2528 bool first = true;
2529
2530 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2531 if (!si_domain)
2532 return -EFAULT;
2533
2534 for_each_active_iommu(iommu, drhd) {
2535 ret = iommu_attach_domain(si_domain, iommu);
2536 if (ret < 0) {
2537 domain_exit(si_domain);
2538 return -EFAULT;
2539 } else if (first) {
2540 si_domain->id = ret;
2541 first = false;
2542 } else if (si_domain->id != ret) {
2543 domain_exit(si_domain);
2544 return -EFAULT;
2545 }
2546 domain_attach_iommu(si_domain, iommu);
2547 }
2548
2549 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2550 domain_exit(si_domain);
2551 return -EFAULT;
2552 }
2553
2554 pr_debug("Identity mapping domain is domain %d\n",
2555 si_domain->id);
2556
2557 if (hw)
2558 return 0;
2559
2560 for_each_online_node(nid) {
2561 unsigned long start_pfn, end_pfn;
2562 int i;
2563
2564 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2565 ret = iommu_domain_identity_map(si_domain,
2566 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2567 if (ret)
2568 return ret;
2569 }
2570 }
2571
2572 return 0;
2573}
2574
2575static int identity_mapping(struct device *dev)
2576{
2577 struct device_domain_info *info;
2578
2579 if (likely(!iommu_identity_mapping))
2580 return 0;
2581
2582 info = dev->archdata.iommu;
2583 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2584 return (info->domain == si_domain);
2585
2586 return 0;
2587}
2588
2589static int domain_add_dev_info(struct dmar_domain *domain,
2590 struct device *dev, int translation)
2591{
2592 struct dmar_domain *ndomain;
2593 struct intel_iommu *iommu;
2594 u8 bus, devfn;
2595 int ret;
2596
2597 iommu = device_to_iommu(dev, &bus, &devfn);
2598 if (!iommu)
2599 return -ENODEV;
2600
2601 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2602 if (ndomain != domain)
2603 return -EBUSY;
2604
2605 ret = domain_context_mapping(domain, dev, translation);
2606 if (ret) {
2607 domain_remove_one_dev_info(domain, dev);
2608 return ret;
2609 }
2610
2611 return 0;
2612}
2613
2614static bool device_has_rmrr(struct device *dev)
2615{
2616 struct dmar_rmrr_unit *rmrr;
2617 struct device *tmp;
2618 int i;
2619
2620 rcu_read_lock();
2621 for_each_rmrr_units(rmrr) {
2622 /*
2623 * Return TRUE if this RMRR contains the device that
2624 * is passed in.
2625 */
2626 for_each_active_dev_scope(rmrr->devices,
2627 rmrr->devices_cnt, i, tmp)
2628 if (tmp == dev) {
2629 rcu_read_unlock();
2630 return true;
2631 }
2632 }
2633 rcu_read_unlock();
2634 return false;
2635}
2636
2637/*
2638 * There are a couple cases where we need to restrict the functionality of
2639 * devices associated with RMRRs. The first is when evaluating a device for
2640 * identity mapping because problems exist when devices are moved in and out
2641 * of domains and their respective RMRR information is lost. This means that
2642 * a device with associated RMRRs will never be in a "passthrough" domain.
2643 * The second is use of the device through the IOMMU API. This interface
2644 * expects to have full control of the IOVA space for the device. We cannot
2645 * satisfy both the requirement that RMRR access is maintained and have an
2646 * unencumbered IOVA space. We also have no ability to quiesce the device's
2647 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2648 * We therefore prevent devices associated with an RMRR from participating in
2649 * the IOMMU API, which eliminates them from device assignment.
2650 *
2651 * In both cases we assume that PCI USB devices with RMRRs have them largely
2652 * for historical reasons and that the RMRR space is not actively used post
2653 * boot. This exclusion may change if vendors begin to abuse it.
2654 *
2655 * The same exception is made for graphics devices, with the requirement that
2656 * any use of the RMRR regions will be torn down before assigning the device
2657 * to a guest.
2658 */
2659static bool device_is_rmrr_locked(struct device *dev)
2660{
2661 if (!device_has_rmrr(dev))
2662 return false;
2663
2664 if (dev_is_pci(dev)) {
2665 struct pci_dev *pdev = to_pci_dev(dev);
2666
2667 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2668 return false;
2669 }
2670
2671 return true;
2672}
2673
2674static int iommu_should_identity_map(struct device *dev, int startup)
2675{
2676
2677 if (dev_is_pci(dev)) {
2678 struct pci_dev *pdev = to_pci_dev(dev);
2679
2680 if (device_is_rmrr_locked(dev))
2681 return 0;
2682
2683 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2684 return 1;
2685
2686 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2687 return 1;
2688
2689 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2690 return 0;
2691
2692 /*
2693 * We want to start off with all devices in the 1:1 domain, and
2694 * take them out later if we find they can't access all of memory.
2695 *
2696 * However, we can't do this for PCI devices behind bridges,
2697 * because all PCI devices behind the same bridge will end up
2698 * with the same source-id on their transactions.
2699 *
2700 * Practically speaking, we can't change things around for these
2701 * devices at run-time, because we can't be sure there'll be no
2702 * DMA transactions in flight for any of their siblings.
2703 *
2704 * So PCI devices (unless they're on the root bus) as well as
2705 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2706 * the 1:1 domain, just in _case_ one of their siblings turns out
2707 * not to be able to map all of memory.
2708 */
2709 if (!pci_is_pcie(pdev)) {
2710 if (!pci_is_root_bus(pdev->bus))
2711 return 0;
2712 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2713 return 0;
2714 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2715 return 0;
2716 } else {
2717 if (device_has_rmrr(dev))
2718 return 0;
2719 }
2720
2721 /*
2722 * At boot time, we don't yet know if devices will be 64-bit capable.
2723 * Assume that they will — if they turn out not to be, then we can
2724 * take them out of the 1:1 domain later.
2725 */
2726 if (!startup) {
2727 /*
2728 * If the device's dma_mask is less than the system's memory
2729 * size then this is not a candidate for identity mapping.
2730 */
2731 u64 dma_mask = *dev->dma_mask;
2732
2733 if (dev->coherent_dma_mask &&
2734 dev->coherent_dma_mask < dma_mask)
2735 dma_mask = dev->coherent_dma_mask;
2736
2737 return dma_mask >= dma_get_required_mask(dev);
2738 }
2739
2740 return 1;
2741}
2742
2743static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2744{
2745 int ret;
2746
2747 if (!iommu_should_identity_map(dev, 1))
2748 return 0;
2749
2750 ret = domain_add_dev_info(si_domain, dev,
2751 hw ? CONTEXT_TT_PASS_THROUGH :
2752 CONTEXT_TT_MULTI_LEVEL);
2753 if (!ret)
2754 pr_info("%s identity mapping for device %s\n",
2755 hw ? "Hardware" : "Software", dev_name(dev));
2756 else if (ret == -ENODEV)
2757 /* device not associated with an iommu */
2758 ret = 0;
2759
2760 return ret;
2761}
2762
2763
2764static int __init iommu_prepare_static_identity_mapping(int hw)
2765{
2766 struct pci_dev *pdev = NULL;
2767 struct dmar_drhd_unit *drhd;
2768 struct intel_iommu *iommu;
2769 struct device *dev;
2770 int i;
2771 int ret = 0;
2772
2773 for_each_pci_dev(pdev) {
2774 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2775 if (ret)
2776 return ret;
2777 }
2778
2779 for_each_active_iommu(iommu, drhd)
2780 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2781 struct acpi_device_physical_node *pn;
2782 struct acpi_device *adev;
2783
2784 if (dev->bus != &acpi_bus_type)
2785 continue;
2786
2787 adev= to_acpi_device(dev);
2788 mutex_lock(&adev->physical_node_lock);
2789 list_for_each_entry(pn, &adev->physical_node_list, node) {
2790 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2791 if (ret)
2792 break;
2793 }
2794 mutex_unlock(&adev->physical_node_lock);
2795 if (ret)
2796 return ret;
2797 }
2798
2799 return 0;
2800}
2801
2802static void intel_iommu_init_qi(struct intel_iommu *iommu)
2803{
2804 /*
2805 * Start from the sane iommu hardware state.
2806 * If the queued invalidation is already initialized by us
2807 * (for example, while enabling interrupt-remapping) then
2808 * we got the things already rolling from a sane state.
2809 */
2810 if (!iommu->qi) {
2811 /*
2812 * Clear any previous faults.
2813 */
2814 dmar_fault(-1, iommu);
2815 /*
2816 * Disable queued invalidation if supported and already enabled
2817 * before OS handover.
2818 */
2819 dmar_disable_qi(iommu);
2820 }
2821
2822 if (dmar_enable_qi(iommu)) {
2823 /*
2824 * Queued Invalidate not enabled, use Register Based Invalidate
2825 */
2826 iommu->flush.flush_context = __iommu_flush_context;
2827 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2828 pr_info("%s: Using Register based invalidation\n",
2829 iommu->name);
2830 } else {
2831 iommu->flush.flush_context = qi_flush_context;
2832 iommu->flush.flush_iotlb = qi_flush_iotlb;
2833 pr_info("%s: Using Queued invalidation\n", iommu->name);
2834 }
2835}
2836
2837static int copy_context_table(struct intel_iommu *iommu,
2838 struct root_entry *old_re,
2839 struct context_entry **tbl,
2840 int bus, bool ext)
2841{
2842 struct context_entry *old_ce = NULL, *new_ce = NULL, ce;
2843 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2844 phys_addr_t old_ce_phys;
2845
2846 tbl_idx = ext ? bus * 2 : bus;
2847
2848 for (devfn = 0; devfn < 256; devfn++) {
2849 /* First calculate the correct index */
2850 idx = (ext ? devfn * 2 : devfn) % 256;
2851
2852 if (idx == 0) {
2853 /* First save what we may have and clean up */
2854 if (new_ce) {
2855 tbl[tbl_idx] = new_ce;
2856 __iommu_flush_cache(iommu, new_ce,
2857 VTD_PAGE_SIZE);
2858 pos = 1;
2859 }
2860
2861 if (old_ce)
2862 iounmap(old_ce);
2863
2864 ret = 0;
2865 if (devfn < 0x80)
2866 old_ce_phys = root_entry_lctp(old_re);
2867 else
2868 old_ce_phys = root_entry_uctp(old_re);
2869
2870 if (!old_ce_phys) {
2871 if (ext && devfn == 0) {
2872 /* No LCTP, try UCTP */
2873 devfn = 0x7f;
2874 continue;
2875 } else {
2876 goto out;
2877 }
2878 }
2879
2880 ret = -ENOMEM;
2881 old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2882 if (!old_ce)
2883 goto out;
2884
2885 new_ce = alloc_pgtable_page(iommu->node);
2886 if (!new_ce)
2887 goto out_unmap;
2888
2889 ret = 0;
2890 }
2891
2892 /* Now copy the context entry */
2893 ce = old_ce[idx];
2894
2895 if (!__context_present(&ce))
2896 continue;
2897
2898 did = context_domain_id(&ce);
2899 if (did >= 0 && did < cap_ndoms(iommu->cap))
2900 set_bit(did, iommu->domain_ids);
2901
2902 /*
2903 * We need a marker for copied context entries. This
2904 * marker needs to work for the old format as well as
2905 * for extended context entries.
2906 *
2907 * Bit 67 of the context entry is used. In the old
2908 * format this bit is available to software, in the
2909 * extended format it is the PGE bit, but PGE is ignored
2910 * by HW if PASIDs are disabled (and thus still
2911 * available).
2912 *
2913 * So disable PASIDs first and then mark the entry
2914 * copied. This means that we don't copy PASID
2915 * translations from the old kernel, but this is fine as
2916 * faults there are not fatal.
2917 */
2918 context_clear_pasid_enable(&ce);
2919 context_set_copied(&ce);
2920
2921 new_ce[idx] = ce;
2922 }
2923
2924 tbl[tbl_idx + pos] = new_ce;
2925
2926 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2927
2928out_unmap:
2929 iounmap(old_ce);
2930
2931out:
2932 return ret;
2933}
2934
2935static int copy_translation_tables(struct intel_iommu *iommu)
2936{
2937 struct context_entry **ctxt_tbls;
2938 struct root_entry *old_rt;
2939 phys_addr_t old_rt_phys;
2940 int ctxt_table_entries;
2941 unsigned long flags;
2942 u64 rtaddr_reg;
2943 int bus, ret;
2944 bool new_ext, ext;
2945
2946 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2947 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
2948 new_ext = !!ecap_ecs(iommu->ecap);
2949
2950 /*
2951 * The RTT bit can only be changed when translation is disabled,
2952 * but disabling translation means to open a window for data
2953 * corruption. So bail out and don't copy anything if we would
2954 * have to change the bit.
2955 */
2956 if (new_ext != ext)
2957 return -EINVAL;
2958
2959 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2960 if (!old_rt_phys)
2961 return -EINVAL;
2962
2963 old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
2964 if (!old_rt)
2965 return -ENOMEM;
2966
2967 /* This is too big for the stack - allocate it from slab */
2968 ctxt_table_entries = ext ? 512 : 256;
2969 ret = -ENOMEM;
2970 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
2971 if (!ctxt_tbls)
2972 goto out_unmap;
2973
2974 for (bus = 0; bus < 256; bus++) {
2975 ret = copy_context_table(iommu, &old_rt[bus],
2976 ctxt_tbls, bus, ext);
2977 if (ret) {
2978 pr_err("%s: Failed to copy context table for bus %d\n",
2979 iommu->name, bus);
2980 continue;
2981 }
2982 }
2983
2984 spin_lock_irqsave(&iommu->lock, flags);
2985
2986 /* Context tables are copied, now write them to the root_entry table */
2987 for (bus = 0; bus < 256; bus++) {
2988 int idx = ext ? bus * 2 : bus;
2989 u64 val;
2990
2991 if (ctxt_tbls[idx]) {
2992 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2993 iommu->root_entry[bus].lo = val;
2994 }
2995
2996 if (!ext || !ctxt_tbls[idx + 1])
2997 continue;
2998
2999 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3000 iommu->root_entry[bus].hi = val;
3001 }
3002
3003 spin_unlock_irqrestore(&iommu->lock, flags);
3004
3005 kfree(ctxt_tbls);
3006
3007 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3008
3009 ret = 0;
3010
3011out_unmap:
3012 iounmap(old_rt);
3013
3014 return ret;
3015}
3016
3017static int __init init_dmars(void)
3018{
3019 struct dmar_drhd_unit *drhd;
3020 struct dmar_rmrr_unit *rmrr;
3021 bool copied_tables = false;
3022 struct device *dev;
3023 struct intel_iommu *iommu;
3024 int i, ret;
3025
3026 /*
3027 * for each drhd
3028 * allocate root
3029 * initialize and program root entry to not present
3030 * endfor
3031 */
3032 for_each_drhd_unit(drhd) {
3033 /*
3034 * lock not needed as this is only incremented in the single
3035 * threaded kernel __init code path all other access are read
3036 * only
3037 */
3038 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3039 g_num_of_iommus++;
3040 continue;
3041 }
3042 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3043 }
3044
3045 /* Preallocate enough resources for IOMMU hot-addition */
3046 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3047 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3048
3049 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3050 GFP_KERNEL);
3051 if (!g_iommus) {
3052 pr_err("Allocating global iommu array failed\n");
3053 ret = -ENOMEM;
3054 goto error;
3055 }
3056
3057 deferred_flush = kzalloc(g_num_of_iommus *
3058 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3059 if (!deferred_flush) {
3060 ret = -ENOMEM;
3061 goto free_g_iommus;
3062 }
3063
3064 for_each_active_iommu(iommu, drhd) {
3065 g_iommus[iommu->seq_id] = iommu;
3066
3067 intel_iommu_init_qi(iommu);
3068
3069 ret = iommu_init_domains(iommu);
3070 if (ret)
3071 goto free_iommu;
3072
3073 init_translation_status(iommu);
3074
3075 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3076 iommu_disable_translation(iommu);
3077 clear_translation_pre_enabled(iommu);
3078 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3079 iommu->name);
3080 }
3081
3082 /*
3083 * TBD:
3084 * we could share the same root & context tables
3085 * among all IOMMU's. Need to Split it later.
3086 */
3087 ret = iommu_alloc_root_entry(iommu);
3088 if (ret)
3089 goto free_iommu;
3090
3091 if (translation_pre_enabled(iommu)) {
3092 pr_info("Translation already enabled - trying to copy translation structures\n");
3093
3094 ret = copy_translation_tables(iommu);
3095 if (ret) {
3096 /*
3097 * We found the IOMMU with translation
3098 * enabled - but failed to copy over the
3099 * old root-entry table. Try to proceed
3100 * by disabling translation now and
3101 * allocating a clean root-entry table.
3102 * This might cause DMAR faults, but
3103 * probably the dump will still succeed.
3104 */
3105 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3106 iommu->name);
3107 iommu_disable_translation(iommu);
3108 clear_translation_pre_enabled(iommu);
3109 } else {
3110 pr_info("Copied translation tables from previous kernel for %s\n",
3111 iommu->name);
3112 copied_tables = true;
3113 }
3114 }
3115
3116 iommu_flush_write_buffer(iommu);
3117 iommu_set_root_entry(iommu);
3118 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3119 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3120
3121 if (!ecap_pass_through(iommu->ecap))
3122 hw_pass_through = 0;
3123 }
3124
3125 if (iommu_pass_through)
3126 iommu_identity_mapping |= IDENTMAP_ALL;
3127
3128#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3129 iommu_identity_mapping |= IDENTMAP_GFX;
3130#endif
3131
3132 if (iommu_identity_mapping) {
3133 ret = si_domain_init(hw_pass_through);
3134 if (ret)
3135 goto free_iommu;
3136 }
3137
3138 check_tylersburg_isoch();
3139
3140 /*
3141 * If we copied translations from a previous kernel in the kdump
3142 * case, we can not assign the devices to domains now, as that
3143 * would eliminate the old mappings. So skip this part and defer
3144 * the assignment to device driver initialization time.
3145 */
3146 if (copied_tables)
3147 goto domains_done;
3148
3149 /*
3150 * If pass through is not set or not enabled, setup context entries for
3151 * identity mappings for rmrr, gfx, and isa and may fall back to static
3152 * identity mapping if iommu_identity_mapping is set.
3153 */
3154 if (iommu_identity_mapping) {
3155 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3156 if (ret) {
3157 pr_crit("Failed to setup IOMMU pass-through\n");
3158 goto free_iommu;
3159 }
3160 }
3161 /*
3162 * For each rmrr
3163 * for each dev attached to rmrr
3164 * do
3165 * locate drhd for dev, alloc domain for dev
3166 * allocate free domain
3167 * allocate page table entries for rmrr
3168 * if context not allocated for bus
3169 * allocate and init context
3170 * set present in root table for this bus
3171 * init context with domain, translation etc
3172 * endfor
3173 * endfor
3174 */
3175 pr_info("Setting RMRR:\n");
3176 for_each_rmrr_units(rmrr) {
3177 /* some BIOS lists non-exist devices in DMAR table. */
3178 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3179 i, dev) {
3180 ret = iommu_prepare_rmrr_dev(rmrr, dev);
3181 if (ret)
3182 pr_err("Mapping reserved region failed\n");
3183 }
3184 }
3185
3186 iommu_prepare_isa();
3187
3188domains_done:
3189
3190 /*
3191 * for each drhd
3192 * enable fault log
3193 * global invalidate context cache
3194 * global invalidate iotlb
3195 * enable translation
3196 */
3197 for_each_iommu(iommu, drhd) {
3198 if (drhd->ignored) {
3199 /*
3200 * we always have to disable PMRs or DMA may fail on
3201 * this device
3202 */
3203 if (force_on)
3204 iommu_disable_protect_mem_regions(iommu);
3205 continue;
3206 }
3207
3208 iommu_flush_write_buffer(iommu);
3209
3210 ret = dmar_set_interrupt(iommu);
3211 if (ret)
3212 goto free_iommu;
3213
3214 if (!translation_pre_enabled(iommu))
3215 iommu_enable_translation(iommu);
3216
3217 iommu_disable_protect_mem_regions(iommu);
3218 }
3219
3220 return 0;
3221
3222free_iommu:
3223 for_each_active_iommu(iommu, drhd) {
3224 disable_dmar_iommu(iommu);
3225 free_dmar_iommu(iommu);
3226 }
3227 kfree(deferred_flush);
3228free_g_iommus:
3229 kfree(g_iommus);
3230error:
3231 return ret;
3232}
3233
3234/* This takes a number of _MM_ pages, not VTD pages */
3235static struct iova *intel_alloc_iova(struct device *dev,
3236 struct dmar_domain *domain,
3237 unsigned long nrpages, uint64_t dma_mask)
3238{
3239 struct iova *iova = NULL;
3240
3241 /* Restrict dma_mask to the width that the iommu can handle */
3242 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3243
3244 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3245 /*
3246 * First try to allocate an io virtual address in
3247 * DMA_BIT_MASK(32) and if that fails then try allocating
3248 * from higher range
3249 */
3250 iova = alloc_iova(&domain->iovad, nrpages,
3251 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3252 if (iova)
3253 return iova;
3254 }
3255 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3256 if (unlikely(!iova)) {
3257 pr_err("Allocating %ld-page iova for %s failed",
3258 nrpages, dev_name(dev));
3259 return NULL;
3260 }
3261
3262 return iova;
3263}
3264
3265static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3266{
3267 struct dmar_domain *domain;
3268 int ret;
3269
3270 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3271 if (!domain) {
3272 pr_err("Allocating domain for %s failed\n",
3273 dev_name(dev));
3274 return NULL;
3275 }
3276
3277 /* make sure context mapping is ok */
3278 if (unlikely(!domain_context_mapped(dev))) {
3279 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
3280 if (ret) {
3281 pr_err("Domain context map for %s failed\n",
3282 dev_name(dev));
3283 return NULL;
3284 }
3285 }
3286
3287 return domain;
3288}
3289
3290static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3291{
3292 struct device_domain_info *info;
3293
3294 /* No lock here, assumes no domain exit in normal case */
3295 info = dev->archdata.iommu;
3296 if (likely(info))
3297 return info->domain;
3298
3299 return __get_valid_domain_for_dev(dev);
3300}
3301
3302/* Check if the dev needs to go through non-identity map and unmap process.*/
3303static int iommu_no_mapping(struct device *dev)
3304{
3305 int found;
3306
3307 if (iommu_dummy(dev))
3308 return 1;
3309
3310 if (!iommu_identity_mapping)
3311 return 0;
3312
3313 found = identity_mapping(dev);
3314 if (found) {
3315 if (iommu_should_identity_map(dev, 0))
3316 return 1;
3317 else {
3318 /*
3319 * 32 bit DMA is removed from si_domain and fall back
3320 * to non-identity mapping.
3321 */
3322 domain_remove_one_dev_info(si_domain, dev);
3323 pr_info("32bit %s uses non-identity mapping\n",
3324 dev_name(dev));
3325 return 0;
3326 }
3327 } else {
3328 /*
3329 * In case of a detached 64 bit DMA device from vm, the device
3330 * is put into si_domain for identity mapping.
3331 */
3332 if (iommu_should_identity_map(dev, 0)) {
3333 int ret;
3334 ret = domain_add_dev_info(si_domain, dev,
3335 hw_pass_through ?
3336 CONTEXT_TT_PASS_THROUGH :
3337 CONTEXT_TT_MULTI_LEVEL);
3338 if (!ret) {
3339 pr_info("64bit %s uses identity mapping\n",
3340 dev_name(dev));
3341 return 1;
3342 }
3343 }
3344 }
3345
3346 return 0;
3347}
3348
3349static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3350 size_t size, int dir, u64 dma_mask)
3351{
3352 struct dmar_domain *domain;
3353 phys_addr_t start_paddr;
3354 struct iova *iova;
3355 int prot = 0;
3356 int ret;
3357 struct intel_iommu *iommu;
3358 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3359
3360 BUG_ON(dir == DMA_NONE);
3361
3362 if (iommu_no_mapping(dev))
3363 return paddr;
3364
3365 domain = get_valid_domain_for_dev(dev);
3366 if (!domain)
3367 return 0;
3368
3369 iommu = domain_get_iommu(domain);
3370 size = aligned_nrpages(paddr, size);
3371
3372 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3373 if (!iova)
3374 goto error;
3375
3376 /*
3377 * Check if DMAR supports zero-length reads on write only
3378 * mappings..
3379 */
3380 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3381 !cap_zlr(iommu->cap))
3382 prot |= DMA_PTE_READ;
3383 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3384 prot |= DMA_PTE_WRITE;
3385 /*
3386 * paddr - (paddr + size) might be partial page, we should map the whole
3387 * page. Note: if two part of one page are separately mapped, we
3388 * might have two guest_addr mapping to the same host paddr, but this
3389 * is not a big problem
3390 */
3391 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3392 mm_to_dma_pfn(paddr_pfn), size, prot);
3393 if (ret)
3394 goto error;
3395
3396 /* it's a non-present to present mapping. Only flush if caching mode */
3397 if (cap_caching_mode(iommu->cap))
3398 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3399 else
3400 iommu_flush_write_buffer(iommu);
3401
3402 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3403 start_paddr += paddr & ~PAGE_MASK;
3404 return start_paddr;
3405
3406error:
3407 if (iova)
3408 __free_iova(&domain->iovad, iova);
3409 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3410 dev_name(dev), size, (unsigned long long)paddr, dir);
3411 return 0;
3412}
3413
3414static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3415 unsigned long offset, size_t size,
3416 enum dma_data_direction dir,
3417 struct dma_attrs *attrs)
3418{
3419 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3420 dir, *dev->dma_mask);
3421}
3422
3423static void flush_unmaps(void)
3424{
3425 int i, j;
3426
3427 timer_on = 0;
3428
3429 /* just flush them all */
3430 for (i = 0; i < g_num_of_iommus; i++) {
3431 struct intel_iommu *iommu = g_iommus[i];
3432 if (!iommu)
3433 continue;
3434
3435 if (!deferred_flush[i].next)
3436 continue;
3437
3438 /* In caching mode, global flushes turn emulation expensive */
3439 if (!cap_caching_mode(iommu->cap))
3440 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3441 DMA_TLB_GLOBAL_FLUSH);
3442 for (j = 0; j < deferred_flush[i].next; j++) {
3443 unsigned long mask;
3444 struct iova *iova = deferred_flush[i].iova[j];
3445 struct dmar_domain *domain = deferred_flush[i].domain[j];
3446
3447 /* On real hardware multiple invalidations are expensive */
3448 if (cap_caching_mode(iommu->cap))
3449 iommu_flush_iotlb_psi(iommu, domain->id,
3450 iova->pfn_lo, iova_size(iova),
3451 !deferred_flush[i].freelist[j], 0);
3452 else {
3453 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3454 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3455 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3456 }
3457 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3458 if (deferred_flush[i].freelist[j])
3459 dma_free_pagelist(deferred_flush[i].freelist[j]);
3460 }
3461 deferred_flush[i].next = 0;
3462 }
3463
3464 list_size = 0;
3465}
3466
3467static void flush_unmaps_timeout(unsigned long data)
3468{
3469 unsigned long flags;
3470
3471 spin_lock_irqsave(&async_umap_flush_lock, flags);
3472 flush_unmaps();
3473 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3474}
3475
3476static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3477{
3478 unsigned long flags;
3479 int next, iommu_id;
3480 struct intel_iommu *iommu;
3481
3482 spin_lock_irqsave(&async_umap_flush_lock, flags);
3483 if (list_size == HIGH_WATER_MARK)
3484 flush_unmaps();
3485
3486 iommu = domain_get_iommu(dom);
3487 iommu_id = iommu->seq_id;
3488
3489 next = deferred_flush[iommu_id].next;
3490 deferred_flush[iommu_id].domain[next] = dom;
3491 deferred_flush[iommu_id].iova[next] = iova;
3492 deferred_flush[iommu_id].freelist[next] = freelist;
3493 deferred_flush[iommu_id].next++;
3494
3495 if (!timer_on) {
3496 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3497 timer_on = 1;
3498 }
3499 list_size++;
3500 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3501}
3502
3503static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3504{
3505 struct dmar_domain *domain;
3506 unsigned long start_pfn, last_pfn;
3507 struct iova *iova;
3508 struct intel_iommu *iommu;
3509 struct page *freelist;
3510
3511 if (iommu_no_mapping(dev))
3512 return;
3513
3514 domain = find_domain(dev);
3515 BUG_ON(!domain);
3516
3517 iommu = domain_get_iommu(domain);
3518
3519 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3520 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3521 (unsigned long long)dev_addr))
3522 return;
3523
3524 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3525 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3526
3527 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3528 dev_name(dev), start_pfn, last_pfn);
3529
3530 freelist = domain_unmap(domain, start_pfn, last_pfn);
3531
3532 if (intel_iommu_strict) {
3533 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3534 last_pfn - start_pfn + 1, !freelist, 0);
3535 /* free iova */
3536 __free_iova(&domain->iovad, iova);
3537 dma_free_pagelist(freelist);
3538 } else {
3539 add_unmap(domain, iova, freelist);
3540 /*
3541 * queue up the release of the unmap to save the 1/6th of the
3542 * cpu used up by the iotlb flush operation...
3543 */
3544 }
3545}
3546
3547static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3548 size_t size, enum dma_data_direction dir,
3549 struct dma_attrs *attrs)
3550{
3551 intel_unmap(dev, dev_addr);
3552}
3553
3554static void *intel_alloc_coherent(struct device *dev, size_t size,
3555 dma_addr_t *dma_handle, gfp_t flags,
3556 struct dma_attrs *attrs)
3557{
3558 struct page *page = NULL;
3559 int order;
3560
3561 size = PAGE_ALIGN(size);
3562 order = get_order(size);
3563
3564 if (!iommu_no_mapping(dev))
3565 flags &= ~(GFP_DMA | GFP_DMA32);
3566 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3567 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3568 flags |= GFP_DMA;
3569 else
3570 flags |= GFP_DMA32;
3571 }
3572
3573 if (flags & __GFP_WAIT) {
3574 unsigned int count = size >> PAGE_SHIFT;
3575
3576 page = dma_alloc_from_contiguous(dev, count, order);
3577 if (page && iommu_no_mapping(dev) &&
3578 page_to_phys(page) + size > dev->coherent_dma_mask) {
3579 dma_release_from_contiguous(dev, page, count);
3580 page = NULL;
3581 }
3582 }
3583
3584 if (!page)
3585 page = alloc_pages(flags, order);
3586 if (!page)
3587 return NULL;
3588 memset(page_address(page), 0, size);
3589
3590 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3591 DMA_BIDIRECTIONAL,
3592 dev->coherent_dma_mask);
3593 if (*dma_handle)
3594 return page_address(page);
3595 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3596 __free_pages(page, order);
3597
3598 return NULL;
3599}
3600
3601static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3602 dma_addr_t dma_handle, struct dma_attrs *attrs)
3603{
3604 int order;
3605 struct page *page = virt_to_page(vaddr);
3606
3607 size = PAGE_ALIGN(size);
3608 order = get_order(size);
3609
3610 intel_unmap(dev, dma_handle);
3611 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3612 __free_pages(page, order);
3613}
3614
3615static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3616 int nelems, enum dma_data_direction dir,
3617 struct dma_attrs *attrs)
3618{
3619 intel_unmap(dev, sglist[0].dma_address);
3620}
3621
3622static int intel_nontranslate_map_sg(struct device *hddev,
3623 struct scatterlist *sglist, int nelems, int dir)
3624{
3625 int i;
3626 struct scatterlist *sg;
3627
3628 for_each_sg(sglist, sg, nelems, i) {
3629 BUG_ON(!sg_page(sg));
3630 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3631 sg->dma_length = sg->length;
3632 }
3633 return nelems;
3634}
3635
3636static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3637 enum dma_data_direction dir, struct dma_attrs *attrs)
3638{
3639 int i;
3640 struct dmar_domain *domain;
3641 size_t size = 0;
3642 int prot = 0;
3643 struct iova *iova = NULL;
3644 int ret;
3645 struct scatterlist *sg;
3646 unsigned long start_vpfn;
3647 struct intel_iommu *iommu;
3648
3649 BUG_ON(dir == DMA_NONE);
3650 if (iommu_no_mapping(dev))
3651 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3652
3653 domain = get_valid_domain_for_dev(dev);
3654 if (!domain)
3655 return 0;
3656
3657 iommu = domain_get_iommu(domain);
3658
3659 for_each_sg(sglist, sg, nelems, i)
3660 size += aligned_nrpages(sg->offset, sg->length);
3661
3662 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3663 *dev->dma_mask);
3664 if (!iova) {
3665 sglist->dma_length = 0;
3666 return 0;
3667 }
3668
3669 /*
3670 * Check if DMAR supports zero-length reads on write only
3671 * mappings..
3672 */
3673 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3674 !cap_zlr(iommu->cap))
3675 prot |= DMA_PTE_READ;
3676 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3677 prot |= DMA_PTE_WRITE;
3678
3679 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3680
3681 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3682 if (unlikely(ret)) {
3683 dma_pte_free_pagetable(domain, start_vpfn,
3684 start_vpfn + size - 1);
3685 __free_iova(&domain->iovad, iova);
3686 return 0;
3687 }
3688
3689 /* it's a non-present to present mapping. Only flush if caching mode */
3690 if (cap_caching_mode(iommu->cap))
3691 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3692 else
3693 iommu_flush_write_buffer(iommu);
3694
3695 return nelems;
3696}
3697
3698static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3699{
3700 return !dma_addr;
3701}
3702
3703struct dma_map_ops intel_dma_ops = {
3704 .alloc = intel_alloc_coherent,
3705 .free = intel_free_coherent,
3706 .map_sg = intel_map_sg,
3707 .unmap_sg = intel_unmap_sg,
3708 .map_page = intel_map_page,
3709 .unmap_page = intel_unmap_page,
3710 .mapping_error = intel_mapping_error,
3711};
3712
3713static inline int iommu_domain_cache_init(void)
3714{
3715 int ret = 0;
3716
3717 iommu_domain_cache = kmem_cache_create("iommu_domain",
3718 sizeof(struct dmar_domain),
3719 0,
3720 SLAB_HWCACHE_ALIGN,
3721
3722 NULL);
3723 if (!iommu_domain_cache) {
3724 pr_err("Couldn't create iommu_domain cache\n");
3725 ret = -ENOMEM;
3726 }
3727
3728 return ret;
3729}
3730
3731static inline int iommu_devinfo_cache_init(void)
3732{
3733 int ret = 0;
3734
3735 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3736 sizeof(struct device_domain_info),
3737 0,
3738 SLAB_HWCACHE_ALIGN,
3739 NULL);
3740 if (!iommu_devinfo_cache) {
3741 pr_err("Couldn't create devinfo cache\n");
3742 ret = -ENOMEM;
3743 }
3744
3745 return ret;
3746}
3747
3748static int __init iommu_init_mempool(void)
3749{
3750 int ret;
3751 ret = iommu_iova_cache_init();
3752 if (ret)
3753 return ret;
3754
3755 ret = iommu_domain_cache_init();
3756 if (ret)
3757 goto domain_error;
3758
3759 ret = iommu_devinfo_cache_init();
3760 if (!ret)
3761 return ret;
3762
3763 kmem_cache_destroy(iommu_domain_cache);
3764domain_error:
3765 iommu_iova_cache_destroy();
3766
3767 return -ENOMEM;
3768}
3769
3770static void __init iommu_exit_mempool(void)
3771{
3772 kmem_cache_destroy(iommu_devinfo_cache);
3773 kmem_cache_destroy(iommu_domain_cache);
3774 iommu_iova_cache_destroy();
3775}
3776
3777static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3778{
3779 struct dmar_drhd_unit *drhd;
3780 u32 vtbar;
3781 int rc;
3782
3783 /* We know that this device on this chipset has its own IOMMU.
3784 * If we find it under a different IOMMU, then the BIOS is lying
3785 * to us. Hope that the IOMMU for this device is actually
3786 * disabled, and it needs no translation...
3787 */
3788 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3789 if (rc) {
3790 /* "can't" happen */
3791 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3792 return;
3793 }
3794 vtbar &= 0xffff0000;
3795
3796 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3797 drhd = dmar_find_matched_drhd_unit(pdev);
3798 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3799 TAINT_FIRMWARE_WORKAROUND,
3800 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3801 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3802}
3803DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3804
3805static void __init init_no_remapping_devices(void)
3806{
3807 struct dmar_drhd_unit *drhd;
3808 struct device *dev;
3809 int i;
3810
3811 for_each_drhd_unit(drhd) {
3812 if (!drhd->include_all) {
3813 for_each_active_dev_scope(drhd->devices,
3814 drhd->devices_cnt, i, dev)
3815 break;
3816 /* ignore DMAR unit if no devices exist */
3817 if (i == drhd->devices_cnt)
3818 drhd->ignored = 1;
3819 }
3820 }
3821
3822 for_each_active_drhd_unit(drhd) {
3823 if (drhd->include_all)
3824 continue;
3825
3826 for_each_active_dev_scope(drhd->devices,
3827 drhd->devices_cnt, i, dev)
3828 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3829 break;
3830 if (i < drhd->devices_cnt)
3831 continue;
3832
3833 /* This IOMMU has *only* gfx devices. Either bypass it or
3834 set the gfx_mapped flag, as appropriate */
3835 if (dmar_map_gfx) {
3836 intel_iommu_gfx_mapped = 1;
3837 } else {
3838 drhd->ignored = 1;
3839 for_each_active_dev_scope(drhd->devices,
3840 drhd->devices_cnt, i, dev)
3841 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3842 }
3843 }
3844}
3845
3846#ifdef CONFIG_SUSPEND
3847static int init_iommu_hw(void)
3848{
3849 struct dmar_drhd_unit *drhd;
3850 struct intel_iommu *iommu = NULL;
3851
3852 for_each_active_iommu(iommu, drhd)
3853 if (iommu->qi)
3854 dmar_reenable_qi(iommu);
3855
3856 for_each_iommu(iommu, drhd) {
3857 if (drhd->ignored) {
3858 /*
3859 * we always have to disable PMRs or DMA may fail on
3860 * this device
3861 */
3862 if (force_on)
3863 iommu_disable_protect_mem_regions(iommu);
3864 continue;
3865 }
3866
3867 iommu_flush_write_buffer(iommu);
3868
3869 iommu_set_root_entry(iommu);
3870
3871 iommu->flush.flush_context(iommu, 0, 0, 0,
3872 DMA_CCMD_GLOBAL_INVL);
3873 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3874 iommu_enable_translation(iommu);
3875 iommu_disable_protect_mem_regions(iommu);
3876 }
3877
3878 return 0;
3879}
3880
3881static void iommu_flush_all(void)
3882{
3883 struct dmar_drhd_unit *drhd;
3884 struct intel_iommu *iommu;
3885
3886 for_each_active_iommu(iommu, drhd) {
3887 iommu->flush.flush_context(iommu, 0, 0, 0,
3888 DMA_CCMD_GLOBAL_INVL);
3889 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3890 DMA_TLB_GLOBAL_FLUSH);
3891 }
3892}
3893
3894static int iommu_suspend(void)
3895{
3896 struct dmar_drhd_unit *drhd;
3897 struct intel_iommu *iommu = NULL;
3898 unsigned long flag;
3899
3900 for_each_active_iommu(iommu, drhd) {
3901 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3902 GFP_ATOMIC);
3903 if (!iommu->iommu_state)
3904 goto nomem;
3905 }
3906
3907 iommu_flush_all();
3908
3909 for_each_active_iommu(iommu, drhd) {
3910 iommu_disable_translation(iommu);
3911
3912 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3913
3914 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3915 readl(iommu->reg + DMAR_FECTL_REG);
3916 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3917 readl(iommu->reg + DMAR_FEDATA_REG);
3918 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3919 readl(iommu->reg + DMAR_FEADDR_REG);
3920 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3921 readl(iommu->reg + DMAR_FEUADDR_REG);
3922
3923 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3924 }
3925 return 0;
3926
3927nomem:
3928 for_each_active_iommu(iommu, drhd)
3929 kfree(iommu->iommu_state);
3930
3931 return -ENOMEM;
3932}
3933
3934static void iommu_resume(void)
3935{
3936 struct dmar_drhd_unit *drhd;
3937 struct intel_iommu *iommu = NULL;
3938 unsigned long flag;
3939
3940 if (init_iommu_hw()) {
3941 if (force_on)
3942 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3943 else
3944 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3945 return;
3946 }
3947
3948 for_each_active_iommu(iommu, drhd) {
3949
3950 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3951
3952 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3953 iommu->reg + DMAR_FECTL_REG);
3954 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3955 iommu->reg + DMAR_FEDATA_REG);
3956 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3957 iommu->reg + DMAR_FEADDR_REG);
3958 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3959 iommu->reg + DMAR_FEUADDR_REG);
3960
3961 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3962 }
3963
3964 for_each_active_iommu(iommu, drhd)
3965 kfree(iommu->iommu_state);
3966}
3967
3968static struct syscore_ops iommu_syscore_ops = {
3969 .resume = iommu_resume,
3970 .suspend = iommu_suspend,
3971};
3972
3973static void __init init_iommu_pm_ops(void)
3974{
3975 register_syscore_ops(&iommu_syscore_ops);
3976}
3977
3978#else
3979static inline void init_iommu_pm_ops(void) {}
3980#endif /* CONFIG_PM */
3981
3982
3983int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3984{
3985 struct acpi_dmar_reserved_memory *rmrr;
3986 struct dmar_rmrr_unit *rmrru;
3987
3988 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3989 if (!rmrru)
3990 return -ENOMEM;
3991
3992 rmrru->hdr = header;
3993 rmrr = (struct acpi_dmar_reserved_memory *)header;
3994 rmrru->base_address = rmrr->base_address;
3995 rmrru->end_address = rmrr->end_address;
3996 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3997 ((void *)rmrr) + rmrr->header.length,
3998 &rmrru->devices_cnt);
3999 if (rmrru->devices_cnt && rmrru->devices == NULL) {
4000 kfree(rmrru);
4001 return -ENOMEM;
4002 }
4003
4004 list_add(&rmrru->list, &dmar_rmrr_units);
4005
4006 return 0;
4007}
4008
4009static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4010{
4011 struct dmar_atsr_unit *atsru;
4012 struct acpi_dmar_atsr *tmp;
4013
4014 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4015 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4016 if (atsr->segment != tmp->segment)
4017 continue;
4018 if (atsr->header.length != tmp->header.length)
4019 continue;
4020 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4021 return atsru;
4022 }
4023
4024 return NULL;
4025}
4026
4027int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4028{
4029 struct acpi_dmar_atsr *atsr;
4030 struct dmar_atsr_unit *atsru;
4031
4032 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4033 return 0;
4034
4035 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4036 atsru = dmar_find_atsr(atsr);
4037 if (atsru)
4038 return 0;
4039
4040 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4041 if (!atsru)
4042 return -ENOMEM;
4043
4044 /*
4045 * If memory is allocated from slab by ACPI _DSM method, we need to
4046 * copy the memory content because the memory buffer will be freed
4047 * on return.
4048 */
4049 atsru->hdr = (void *)(atsru + 1);
4050 memcpy(atsru->hdr, hdr, hdr->length);
4051 atsru->include_all = atsr->flags & 0x1;
4052 if (!atsru->include_all) {
4053 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4054 (void *)atsr + atsr->header.length,
4055 &atsru->devices_cnt);
4056 if (atsru->devices_cnt && atsru->devices == NULL) {
4057 kfree(atsru);
4058 return -ENOMEM;
4059 }
4060 }
4061
4062 list_add_rcu(&atsru->list, &dmar_atsr_units);
4063
4064 return 0;
4065}
4066
4067static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4068{
4069 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4070 kfree(atsru);
4071}
4072
4073int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4074{
4075 struct acpi_dmar_atsr *atsr;
4076 struct dmar_atsr_unit *atsru;
4077
4078 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4079 atsru = dmar_find_atsr(atsr);
4080 if (atsru) {
4081 list_del_rcu(&atsru->list);
4082 synchronize_rcu();
4083 intel_iommu_free_atsr(atsru);
4084 }
4085
4086 return 0;
4087}
4088
4089int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4090{
4091 int i;
4092 struct device *dev;
4093 struct acpi_dmar_atsr *atsr;
4094 struct dmar_atsr_unit *atsru;
4095
4096 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4097 atsru = dmar_find_atsr(atsr);
4098 if (!atsru)
4099 return 0;
4100
4101 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4102 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4103 i, dev)
4104 return -EBUSY;
4105
4106 return 0;
4107}
4108
4109static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4110{
4111 int sp, ret = 0;
4112 struct intel_iommu *iommu = dmaru->iommu;
4113
4114 if (g_iommus[iommu->seq_id])
4115 return 0;
4116
4117 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4118 pr_warn("%s: Doesn't support hardware pass through.\n",
4119 iommu->name);
4120 return -ENXIO;
4121 }
4122 if (!ecap_sc_support(iommu->ecap) &&
4123 domain_update_iommu_snooping(iommu)) {
4124 pr_warn("%s: Doesn't support snooping.\n",
4125 iommu->name);
4126 return -ENXIO;
4127 }
4128 sp = domain_update_iommu_superpage(iommu) - 1;
4129 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4130 pr_warn("%s: Doesn't support large page.\n",
4131 iommu->name);
4132 return -ENXIO;
4133 }
4134
4135 /*
4136 * Disable translation if already enabled prior to OS handover.
4137 */
4138 if (iommu->gcmd & DMA_GCMD_TE)
4139 iommu_disable_translation(iommu);
4140
4141 g_iommus[iommu->seq_id] = iommu;
4142 ret = iommu_init_domains(iommu);
4143 if (ret == 0)
4144 ret = iommu_alloc_root_entry(iommu);
4145 if (ret)
4146 goto out;
4147
4148 if (dmaru->ignored) {
4149 /*
4150 * we always have to disable PMRs or DMA may fail on this device
4151 */
4152 if (force_on)
4153 iommu_disable_protect_mem_regions(iommu);
4154 return 0;
4155 }
4156
4157 intel_iommu_init_qi(iommu);
4158 iommu_flush_write_buffer(iommu);
4159 ret = dmar_set_interrupt(iommu);
4160 if (ret)
4161 goto disable_iommu;
4162
4163 iommu_set_root_entry(iommu);
4164 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4165 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4166 iommu_enable_translation(iommu);
4167
4168 if (si_domain) {
4169 ret = iommu_attach_domain(si_domain, iommu);
4170 if (ret < 0 || si_domain->id != ret)
4171 goto disable_iommu;
4172 domain_attach_iommu(si_domain, iommu);
4173 }
4174
4175 iommu_disable_protect_mem_regions(iommu);
4176 return 0;
4177
4178disable_iommu:
4179 disable_dmar_iommu(iommu);
4180out:
4181 free_dmar_iommu(iommu);
4182 return ret;
4183}
4184
4185int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4186{
4187 int ret = 0;
4188 struct intel_iommu *iommu = dmaru->iommu;
4189
4190 if (!intel_iommu_enabled)
4191 return 0;
4192 if (iommu == NULL)
4193 return -EINVAL;
4194
4195 if (insert) {
4196 ret = intel_iommu_add(dmaru);
4197 } else {
4198 disable_dmar_iommu(iommu);
4199 free_dmar_iommu(iommu);
4200 }
4201
4202 return ret;
4203}
4204
4205static void intel_iommu_free_dmars(void)
4206{
4207 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4208 struct dmar_atsr_unit *atsru, *atsr_n;
4209
4210 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4211 list_del(&rmrru->list);
4212 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4213 kfree(rmrru);
4214 }
4215
4216 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4217 list_del(&atsru->list);
4218 intel_iommu_free_atsr(atsru);
4219 }
4220}
4221
4222int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4223{
4224 int i, ret = 1;
4225 struct pci_bus *bus;
4226 struct pci_dev *bridge = NULL;
4227 struct device *tmp;
4228 struct acpi_dmar_atsr *atsr;
4229 struct dmar_atsr_unit *atsru;
4230
4231 dev = pci_physfn(dev);
4232 for (bus = dev->bus; bus; bus = bus->parent) {
4233 bridge = bus->self;
4234 if (!bridge || !pci_is_pcie(bridge) ||
4235 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4236 return 0;
4237 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4238 break;
4239 }
4240 if (!bridge)
4241 return 0;
4242
4243 rcu_read_lock();
4244 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4245 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4246 if (atsr->segment != pci_domain_nr(dev->bus))
4247 continue;
4248
4249 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4250 if (tmp == &bridge->dev)
4251 goto out;
4252
4253 if (atsru->include_all)
4254 goto out;
4255 }
4256 ret = 0;
4257out:
4258 rcu_read_unlock();
4259
4260 return ret;
4261}
4262
4263int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4264{
4265 int ret = 0;
4266 struct dmar_rmrr_unit *rmrru;
4267 struct dmar_atsr_unit *atsru;
4268 struct acpi_dmar_atsr *atsr;
4269 struct acpi_dmar_reserved_memory *rmrr;
4270
4271 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4272 return 0;
4273
4274 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4275 rmrr = container_of(rmrru->hdr,
4276 struct acpi_dmar_reserved_memory, header);
4277 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4278 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4279 ((void *)rmrr) + rmrr->header.length,
4280 rmrr->segment, rmrru->devices,
4281 rmrru->devices_cnt);
4282 if(ret < 0)
4283 return ret;
4284 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4285 dmar_remove_dev_scope(info, rmrr->segment,
4286 rmrru->devices, rmrru->devices_cnt);
4287 }
4288 }
4289
4290 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4291 if (atsru->include_all)
4292 continue;
4293
4294 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4295 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4296 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4297 (void *)atsr + atsr->header.length,
4298 atsr->segment, atsru->devices,
4299 atsru->devices_cnt);
4300 if (ret > 0)
4301 break;
4302 else if(ret < 0)
4303 return ret;
4304 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4305 if (dmar_remove_dev_scope(info, atsr->segment,
4306 atsru->devices, atsru->devices_cnt))
4307 break;
4308 }
4309 }
4310
4311 return 0;
4312}
4313
4314/*
4315 * Here we only respond to action of unbound device from driver.
4316 *
4317 * Added device is not attached to its DMAR domain here yet. That will happen
4318 * when mapping the device to iova.
4319 */
4320static int device_notifier(struct notifier_block *nb,
4321 unsigned long action, void *data)
4322{
4323 struct device *dev = data;
4324 struct dmar_domain *domain;
4325
4326 if (iommu_dummy(dev))
4327 return 0;
4328
4329 if (action != BUS_NOTIFY_REMOVED_DEVICE)
4330 return 0;
4331
4332 domain = find_domain(dev);
4333 if (!domain)
4334 return 0;
4335
4336 down_read(&dmar_global_lock);
4337 domain_remove_one_dev_info(domain, dev);
4338 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4339 domain_exit(domain);
4340 up_read(&dmar_global_lock);
4341
4342 return 0;
4343}
4344
4345static struct notifier_block device_nb = {
4346 .notifier_call = device_notifier,
4347};
4348
4349static int intel_iommu_memory_notifier(struct notifier_block *nb,
4350 unsigned long val, void *v)
4351{
4352 struct memory_notify *mhp = v;
4353 unsigned long long start, end;
4354 unsigned long start_vpfn, last_vpfn;
4355
4356 switch (val) {
4357 case MEM_GOING_ONLINE:
4358 start = mhp->start_pfn << PAGE_SHIFT;
4359 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4360 if (iommu_domain_identity_map(si_domain, start, end)) {
4361 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4362 start, end);
4363 return NOTIFY_BAD;
4364 }
4365 break;
4366
4367 case MEM_OFFLINE:
4368 case MEM_CANCEL_ONLINE:
4369 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4370 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4371 while (start_vpfn <= last_vpfn) {
4372 struct iova *iova;
4373 struct dmar_drhd_unit *drhd;
4374 struct intel_iommu *iommu;
4375 struct page *freelist;
4376
4377 iova = find_iova(&si_domain->iovad, start_vpfn);
4378 if (iova == NULL) {
4379 pr_debug("Failed get IOVA for PFN %lx\n",
4380 start_vpfn);
4381 break;
4382 }
4383
4384 iova = split_and_remove_iova(&si_domain->iovad, iova,
4385 start_vpfn, last_vpfn);
4386 if (iova == NULL) {
4387 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4388 start_vpfn, last_vpfn);
4389 return NOTIFY_BAD;
4390 }
4391
4392 freelist = domain_unmap(si_domain, iova->pfn_lo,
4393 iova->pfn_hi);
4394
4395 rcu_read_lock();
4396 for_each_active_iommu(iommu, drhd)
4397 iommu_flush_iotlb_psi(iommu, si_domain->id,
4398 iova->pfn_lo, iova_size(iova),
4399 !freelist, 0);
4400 rcu_read_unlock();
4401 dma_free_pagelist(freelist);
4402
4403 start_vpfn = iova->pfn_hi + 1;
4404 free_iova_mem(iova);
4405 }
4406 break;
4407 }
4408
4409 return NOTIFY_OK;
4410}
4411
4412static struct notifier_block intel_iommu_memory_nb = {
4413 .notifier_call = intel_iommu_memory_notifier,
4414 .priority = 0
4415};
4416
4417
4418static ssize_t intel_iommu_show_version(struct device *dev,
4419 struct device_attribute *attr,
4420 char *buf)
4421{
4422 struct intel_iommu *iommu = dev_get_drvdata(dev);
4423 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4424 return sprintf(buf, "%d:%d\n",
4425 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4426}
4427static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4428
4429static ssize_t intel_iommu_show_address(struct device *dev,
4430 struct device_attribute *attr,
4431 char *buf)
4432{
4433 struct intel_iommu *iommu = dev_get_drvdata(dev);
4434 return sprintf(buf, "%llx\n", iommu->reg_phys);
4435}
4436static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4437
4438static ssize_t intel_iommu_show_cap(struct device *dev,
4439 struct device_attribute *attr,
4440 char *buf)
4441{
4442 struct intel_iommu *iommu = dev_get_drvdata(dev);
4443 return sprintf(buf, "%llx\n", iommu->cap);
4444}
4445static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4446
4447static ssize_t intel_iommu_show_ecap(struct device *dev,
4448 struct device_attribute *attr,
4449 char *buf)
4450{
4451 struct intel_iommu *iommu = dev_get_drvdata(dev);
4452 return sprintf(buf, "%llx\n", iommu->ecap);
4453}
4454static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4455
4456static ssize_t intel_iommu_show_ndoms(struct device *dev,
4457 struct device_attribute *attr,
4458 char *buf)
4459{
4460 struct intel_iommu *iommu = dev_get_drvdata(dev);
4461 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4462}
4463static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4464
4465static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4466 struct device_attribute *attr,
4467 char *buf)
4468{
4469 struct intel_iommu *iommu = dev_get_drvdata(dev);
4470 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4471 cap_ndoms(iommu->cap)));
4472}
4473static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4474
4475static struct attribute *intel_iommu_attrs[] = {
4476 &dev_attr_version.attr,
4477 &dev_attr_address.attr,
4478 &dev_attr_cap.attr,
4479 &dev_attr_ecap.attr,
4480 &dev_attr_domains_supported.attr,
4481 &dev_attr_domains_used.attr,
4482 NULL,
4483};
4484
4485static struct attribute_group intel_iommu_group = {
4486 .name = "intel-iommu",
4487 .attrs = intel_iommu_attrs,
4488};
4489
4490const struct attribute_group *intel_iommu_groups[] = {
4491 &intel_iommu_group,
4492 NULL,
4493};
4494
4495int __init intel_iommu_init(void)
4496{
4497 int ret = -ENODEV;
4498 struct dmar_drhd_unit *drhd;
4499 struct intel_iommu *iommu;
4500
4501 /* VT-d is required for a TXT/tboot launch, so enforce that */
4502 force_on = tboot_force_iommu();
4503
4504 if (iommu_init_mempool()) {
4505 if (force_on)
4506 panic("tboot: Failed to initialize iommu memory\n");
4507 return -ENOMEM;
4508 }
4509
4510 down_write(&dmar_global_lock);
4511 if (dmar_table_init()) {
4512 if (force_on)
4513 panic("tboot: Failed to initialize DMAR table\n");
4514 goto out_free_dmar;
4515 }
4516
4517 if (dmar_dev_scope_init() < 0) {
4518 if (force_on)
4519 panic("tboot: Failed to initialize DMAR device scope\n");
4520 goto out_free_dmar;
4521 }
4522
4523 if (no_iommu || dmar_disabled)
4524 goto out_free_dmar;
4525
4526 if (list_empty(&dmar_rmrr_units))
4527 pr_info("No RMRR found\n");
4528
4529 if (list_empty(&dmar_atsr_units))
4530 pr_info("No ATSR found\n");
4531
4532 if (dmar_init_reserved_ranges()) {
4533 if (force_on)
4534 panic("tboot: Failed to reserve iommu ranges\n");
4535 goto out_free_reserved_range;
4536 }
4537
4538 init_no_remapping_devices();
4539
4540 ret = init_dmars();
4541 if (ret) {
4542 if (force_on)
4543 panic("tboot: Failed to initialize DMARs\n");
4544 pr_err("Initialization failed\n");
4545 goto out_free_reserved_range;
4546 }
4547 up_write(&dmar_global_lock);
4548 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4549
4550 init_timer(&unmap_timer);
4551#ifdef CONFIG_SWIOTLB
4552 swiotlb = 0;
4553#endif
4554 dma_ops = &intel_dma_ops;
4555
4556 init_iommu_pm_ops();
4557
4558 for_each_active_iommu(iommu, drhd)
4559 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4560 intel_iommu_groups,
4561 "%s", iommu->name);
4562
4563 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4564 bus_register_notifier(&pci_bus_type, &device_nb);
4565 if (si_domain && !hw_pass_through)
4566 register_memory_notifier(&intel_iommu_memory_nb);
4567
4568 intel_iommu_enabled = 1;
4569
4570 return 0;
4571
4572out_free_reserved_range:
4573 put_iova_domain(&reserved_iova_list);
4574out_free_dmar:
4575 intel_iommu_free_dmars();
4576 up_write(&dmar_global_lock);
4577 iommu_exit_mempool();
4578 return ret;
4579}
4580
4581static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4582{
4583 struct intel_iommu *iommu = opaque;
4584
4585 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4586 return 0;
4587}
4588
4589/*
4590 * NB - intel-iommu lacks any sort of reference counting for the users of
4591 * dependent devices. If multiple endpoints have intersecting dependent
4592 * devices, unbinding the driver from any one of them will possibly leave
4593 * the others unable to operate.
4594 */
4595static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4596 struct device *dev)
4597{
4598 if (!iommu || !dev || !dev_is_pci(dev))
4599 return;
4600
4601 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4602}
4603
4604static void domain_remove_one_dev_info(struct dmar_domain *domain,
4605 struct device *dev)
4606{
4607 struct device_domain_info *info, *tmp;
4608 struct intel_iommu *iommu;
4609 unsigned long flags;
4610 bool found = false;
4611 u8 bus, devfn;
4612
4613 iommu = device_to_iommu(dev, &bus, &devfn);
4614 if (!iommu)
4615 return;
4616
4617 spin_lock_irqsave(&device_domain_lock, flags);
4618 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4619 if (info->iommu == iommu && info->bus == bus &&
4620 info->devfn == devfn) {
4621 unlink_domain_info(info);
4622 spin_unlock_irqrestore(&device_domain_lock, flags);
4623
4624 iommu_disable_dev_iotlb(info);
4625 iommu_detach_dev(iommu, info->bus, info->devfn);
4626 iommu_detach_dependent_devices(iommu, dev);
4627 free_devinfo_mem(info);
4628
4629 spin_lock_irqsave(&device_domain_lock, flags);
4630
4631 if (found)
4632 break;
4633 else
4634 continue;
4635 }
4636
4637 /* if there is no other devices under the same iommu
4638 * owned by this domain, clear this iommu in iommu_bmp
4639 * update iommu count and coherency
4640 */
4641 if (info->iommu == iommu)
4642 found = true;
4643 }
4644
4645 spin_unlock_irqrestore(&device_domain_lock, flags);
4646
4647 if (found == 0) {
4648 domain_detach_iommu(domain, iommu);
4649 if (!domain_type_is_vm_or_si(domain))
4650 iommu_detach_domain(domain, iommu);
4651 }
4652}
4653
4654static int md_domain_init(struct dmar_domain *domain, int guest_width)
4655{
4656 int adjust_width;
4657
4658 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4659 DMA_32BIT_PFN);
4660 domain_reserve_special_ranges(domain);
4661
4662 /* calculate AGAW */
4663 domain->gaw = guest_width;
4664 adjust_width = guestwidth_to_adjustwidth(guest_width);
4665 domain->agaw = width_to_agaw(adjust_width);
4666
4667 domain->iommu_coherency = 0;
4668 domain->iommu_snooping = 0;
4669 domain->iommu_superpage = 0;
4670 domain->max_addr = 0;
4671
4672 /* always allocate the top pgd */
4673 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4674 if (!domain->pgd)
4675 return -ENOMEM;
4676 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4677 return 0;
4678}
4679
4680static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4681{
4682 struct dmar_domain *dmar_domain;
4683 struct iommu_domain *domain;
4684
4685 if (type != IOMMU_DOMAIN_UNMANAGED)
4686 return NULL;
4687
4688 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4689 if (!dmar_domain) {
4690 pr_err("Can't allocate dmar_domain\n");
4691 return NULL;
4692 }
4693 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4694 pr_err("Domain initialization failed\n");
4695 domain_exit(dmar_domain);
4696 return NULL;
4697 }
4698 domain_update_iommu_cap(dmar_domain);
4699
4700 domain = &dmar_domain->domain;
4701 domain->geometry.aperture_start = 0;
4702 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4703 domain->geometry.force_aperture = true;
4704
4705 return domain;
4706}
4707
4708static void intel_iommu_domain_free(struct iommu_domain *domain)
4709{
4710 domain_exit(to_dmar_domain(domain));
4711}
4712
4713static int intel_iommu_attach_device(struct iommu_domain *domain,
4714 struct device *dev)
4715{
4716 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4717 struct intel_iommu *iommu;
4718 int addr_width;
4719 u8 bus, devfn;
4720
4721 if (device_is_rmrr_locked(dev)) {
4722 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4723 return -EPERM;
4724 }
4725
4726 /* normally dev is not mapped */
4727 if (unlikely(domain_context_mapped(dev))) {
4728 struct dmar_domain *old_domain;
4729
4730 old_domain = find_domain(dev);
4731 if (old_domain) {
4732 if (domain_type_is_vm_or_si(dmar_domain))
4733 domain_remove_one_dev_info(old_domain, dev);
4734 else
4735 domain_remove_dev_info(old_domain);
4736
4737 if (!domain_type_is_vm_or_si(old_domain) &&
4738 list_empty(&old_domain->devices))
4739 domain_exit(old_domain);
4740 }
4741 }
4742
4743 iommu = device_to_iommu(dev, &bus, &devfn);
4744 if (!iommu)
4745 return -ENODEV;
4746
4747 /* check if this iommu agaw is sufficient for max mapped address */
4748 addr_width = agaw_to_width(iommu->agaw);
4749 if (addr_width > cap_mgaw(iommu->cap))
4750 addr_width = cap_mgaw(iommu->cap);
4751
4752 if (dmar_domain->max_addr > (1LL << addr_width)) {
4753 pr_err("%s: iommu width (%d) is not "
4754 "sufficient for the mapped address (%llx)\n",
4755 __func__, addr_width, dmar_domain->max_addr);
4756 return -EFAULT;
4757 }
4758 dmar_domain->gaw = addr_width;
4759
4760 /*
4761 * Knock out extra levels of page tables if necessary
4762 */
4763 while (iommu->agaw < dmar_domain->agaw) {
4764 struct dma_pte *pte;
4765
4766 pte = dmar_domain->pgd;
4767 if (dma_pte_present(pte)) {
4768 dmar_domain->pgd = (struct dma_pte *)
4769 phys_to_virt(dma_pte_addr(pte));
4770 free_pgtable_page(pte);
4771 }
4772 dmar_domain->agaw--;
4773 }
4774
4775 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4776}
4777
4778static void intel_iommu_detach_device(struct iommu_domain *domain,
4779 struct device *dev)
4780{
4781 domain_remove_one_dev_info(to_dmar_domain(domain), dev);
4782}
4783
4784static int intel_iommu_map(struct iommu_domain *domain,
4785 unsigned long iova, phys_addr_t hpa,
4786 size_t size, int iommu_prot)
4787{
4788 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4789 u64 max_addr;
4790 int prot = 0;
4791 int ret;
4792
4793 if (iommu_prot & IOMMU_READ)
4794 prot |= DMA_PTE_READ;
4795 if (iommu_prot & IOMMU_WRITE)
4796 prot |= DMA_PTE_WRITE;
4797 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4798 prot |= DMA_PTE_SNP;
4799
4800 max_addr = iova + size;
4801 if (dmar_domain->max_addr < max_addr) {
4802 u64 end;
4803
4804 /* check if minimum agaw is sufficient for mapped address */
4805 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4806 if (end < max_addr) {
4807 pr_err("%s: iommu width (%d) is not "
4808 "sufficient for the mapped address (%llx)\n",
4809 __func__, dmar_domain->gaw, max_addr);
4810 return -EFAULT;
4811 }
4812 dmar_domain->max_addr = max_addr;
4813 }
4814 /* Round up size to next multiple of PAGE_SIZE, if it and
4815 the low bits of hpa would take us onto the next page */
4816 size = aligned_nrpages(hpa, size);
4817 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4818 hpa >> VTD_PAGE_SHIFT, size, prot);
4819 return ret;
4820}
4821
4822static size_t intel_iommu_unmap(struct iommu_domain *domain,
4823 unsigned long iova, size_t size)
4824{
4825 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4826 struct page *freelist = NULL;
4827 struct intel_iommu *iommu;
4828 unsigned long start_pfn, last_pfn;
4829 unsigned int npages;
4830 int iommu_id, num, ndomains, level = 0;
4831
4832 /* Cope with horrid API which requires us to unmap more than the
4833 size argument if it happens to be a large-page mapping. */
4834 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4835 BUG();
4836
4837 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4838 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4839
4840 start_pfn = iova >> VTD_PAGE_SHIFT;
4841 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4842
4843 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4844
4845 npages = last_pfn - start_pfn + 1;
4846
4847 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4848 iommu = g_iommus[iommu_id];
4849
4850 /*
4851 * find bit position of dmar_domain
4852 */
4853 ndomains = cap_ndoms(iommu->cap);
4854 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4855 if (iommu->domains[num] == dmar_domain)
4856 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4857 npages, !freelist, 0);
4858 }
4859
4860 }
4861
4862 dma_free_pagelist(freelist);
4863
4864 if (dmar_domain->max_addr == iova + size)
4865 dmar_domain->max_addr = iova;
4866
4867 return size;
4868}
4869
4870static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4871 dma_addr_t iova)
4872{
4873 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4874 struct dma_pte *pte;
4875 int level = 0;
4876 u64 phys = 0;
4877
4878 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4879 if (pte)
4880 phys = dma_pte_addr(pte);
4881
4882 return phys;
4883}
4884
4885static bool intel_iommu_capable(enum iommu_cap cap)
4886{
4887 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4888 return domain_update_iommu_snooping(NULL) == 1;
4889 if (cap == IOMMU_CAP_INTR_REMAP)
4890 return irq_remapping_enabled == 1;
4891
4892 return false;
4893}
4894
4895static int intel_iommu_add_device(struct device *dev)
4896{
4897 struct intel_iommu *iommu;
4898 struct iommu_group *group;
4899 u8 bus, devfn;
4900
4901 iommu = device_to_iommu(dev, &bus, &devfn);
4902 if (!iommu)
4903 return -ENODEV;
4904
4905 iommu_device_link(iommu->iommu_dev, dev);
4906
4907 group = iommu_group_get_for_dev(dev);
4908
4909 if (IS_ERR(group))
4910 return PTR_ERR(group);
4911
4912 iommu_group_put(group);
4913 return 0;
4914}
4915
4916static void intel_iommu_remove_device(struct device *dev)
4917{
4918 struct intel_iommu *iommu;
4919 u8 bus, devfn;
4920
4921 iommu = device_to_iommu(dev, &bus, &devfn);
4922 if (!iommu)
4923 return;
4924
4925 iommu_group_remove_device(dev);
4926
4927 iommu_device_unlink(iommu->iommu_dev, dev);
4928}
4929
4930static const struct iommu_ops intel_iommu_ops = {
4931 .capable = intel_iommu_capable,
4932 .domain_alloc = intel_iommu_domain_alloc,
4933 .domain_free = intel_iommu_domain_free,
4934 .attach_dev = intel_iommu_attach_device,
4935 .detach_dev = intel_iommu_detach_device,
4936 .map = intel_iommu_map,
4937 .unmap = intel_iommu_unmap,
4938 .map_sg = default_iommu_map_sg,
4939 .iova_to_phys = intel_iommu_iova_to_phys,
4940 .add_device = intel_iommu_add_device,
4941 .remove_device = intel_iommu_remove_device,
4942 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4943};
4944
4945static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4946{
4947 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4948 pr_info("Disabling IOMMU for graphics on this chipset\n");
4949 dmar_map_gfx = 0;
4950}
4951
4952DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4953DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4954DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4955DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4956DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4957DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4958DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4959
4960static void quirk_iommu_rwbf(struct pci_dev *dev)
4961{
4962 /*
4963 * Mobile 4 Series Chipset neglects to set RWBF capability,
4964 * but needs it. Same seems to hold for the desktop versions.
4965 */
4966 pr_info("Forcing write-buffer flush capability\n");
4967 rwbf_quirk = 1;
4968}
4969
4970DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4971DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4972DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4973DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4974DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4975DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4976DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4977
4978#define GGC 0x52
4979#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4980#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4981#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4982#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4983#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4984#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4985#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4986#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4987
4988static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4989{
4990 unsigned short ggc;
4991
4992 if (pci_read_config_word(dev, GGC, &ggc))
4993 return;
4994
4995 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4996 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4997 dmar_map_gfx = 0;
4998 } else if (dmar_map_gfx) {
4999 /* we have to ensure the gfx device is idle before we flush */
5000 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5001 intel_iommu_strict = 1;
5002 }
5003}
5004DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5005DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5006DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5007DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5008
5009/* On Tylersburg chipsets, some BIOSes have been known to enable the
5010 ISOCH DMAR unit for the Azalia sound device, but not give it any
5011 TLB entries, which causes it to deadlock. Check for that. We do
5012 this in a function called from init_dmars(), instead of in a PCI
5013 quirk, because we don't want to print the obnoxious "BIOS broken"
5014 message if VT-d is actually disabled.
5015*/
5016static void __init check_tylersburg_isoch(void)
5017{
5018 struct pci_dev *pdev;
5019 uint32_t vtisochctrl;
5020
5021 /* If there's no Azalia in the system anyway, forget it. */
5022 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5023 if (!pdev)
5024 return;
5025 pci_dev_put(pdev);
5026
5027 /* System Management Registers. Might be hidden, in which case
5028 we can't do the sanity check. But that's OK, because the
5029 known-broken BIOSes _don't_ actually hide it, so far. */
5030 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5031 if (!pdev)
5032 return;
5033
5034 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5035 pci_dev_put(pdev);
5036 return;
5037 }
5038
5039 pci_dev_put(pdev);
5040
5041 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5042 if (vtisochctrl & 1)
5043 return;
5044
5045 /* Drop all bits other than the number of TLB entries */
5046 vtisochctrl &= 0x1c;
5047
5048 /* If we have the recommended number of TLB entries (16), fine. */
5049 if (vtisochctrl == 0x10)
5050 return;
5051
5052 /* Zero TLB entries? You get to ride the short bus to school. */
5053 if (!vtisochctrl) {
5054 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5055 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5056 dmi_get_system_info(DMI_BIOS_VENDOR),
5057 dmi_get_system_info(DMI_BIOS_VERSION),
5058 dmi_get_system_info(DMI_PRODUCT_VERSION));
5059 iommu_identity_mapping |= IDENTMAP_AZALIA;
5060 return;
5061 }
5062
5063 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5064 vtisochctrl);
5065}