]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - drivers/iommu/intel-iommu.c
iommu/vt-d: Unify domain->iommu attach/detachment
[mirror_ubuntu-zesty-kernel.git] / drivers / iommu / intel-iommu.c
1 /*
2 * Copyright © 2006-2014 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
18 * Joerg Roedel <jroedel@suse.de>
19 */
20
21 #define pr_fmt(fmt) "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/iommu.h>
39 #include <linux/intel-iommu.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/tboot.h>
42 #include <linux/dmi.h>
43 #include <linux/pci-ats.h>
44 #include <linux/memblock.h>
45 #include <linux/dma-contiguous.h>
46 #include <linux/crash_dump.h>
47 #include <asm/irq_remapping.h>
48 #include <asm/cacheflush.h>
49 #include <asm/iommu.h>
50
51 #include "irq_remapping.h"
52
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
55
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
64
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
66
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
81
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
83 #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
84 #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
85
86 /* page table handling */
87 #define LEVEL_STRIDE (9)
88 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89
90 /*
91 * This bitmap is used to advertise the page sizes our hardware support
92 * to the IOMMU core, which will then use this information to split
93 * physically contiguous memory regions it is mapping into page sizes
94 * that we support.
95 *
96 * Traditionally the IOMMU core just handed us the mappings directly,
97 * after making sure the size is an order of a 4KiB page and that the
98 * mapping has natural alignment.
99 *
100 * To retain this behavior, we currently advertise that we support
101 * all page sizes that are an order of 4KiB.
102 *
103 * If at some point we'd like to utilize the IOMMU core's new behavior,
104 * we could change this to advertise the real page sizes we support.
105 */
106 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
107
108 static inline int agaw_to_level(int agaw)
109 {
110 return agaw + 2;
111 }
112
113 static inline int agaw_to_width(int agaw)
114 {
115 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
116 }
117
118 static inline int width_to_agaw(int width)
119 {
120 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
121 }
122
123 static inline unsigned int level_to_offset_bits(int level)
124 {
125 return (level - 1) * LEVEL_STRIDE;
126 }
127
128 static inline int pfn_level_offset(unsigned long pfn, int level)
129 {
130 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131 }
132
133 static inline unsigned long level_mask(int level)
134 {
135 return -1UL << level_to_offset_bits(level);
136 }
137
138 static inline unsigned long level_size(int level)
139 {
140 return 1UL << level_to_offset_bits(level);
141 }
142
143 static inline unsigned long align_to_level(unsigned long pfn, int level)
144 {
145 return (pfn + level_size(level) - 1) & level_mask(level);
146 }
147
148 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149 {
150 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
151 }
152
153 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154 are never going to work. */
155 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156 {
157 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 }
159
160 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161 {
162 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163 }
164 static inline unsigned long page_to_dma_pfn(struct page *pg)
165 {
166 return mm_to_dma_pfn(page_to_pfn(pg));
167 }
168 static inline unsigned long virt_to_dma_pfn(void *p)
169 {
170 return page_to_dma_pfn(virt_to_page(p));
171 }
172
173 /* global iommu list, set NULL for ignored DMAR units */
174 static struct intel_iommu **g_iommus;
175
176 static void __init check_tylersburg_isoch(void);
177 static int rwbf_quirk;
178
179 /*
180 * set to 1 to panic kernel if can't successfully enable VT-d
181 * (used when kernel is launched w/ TXT)
182 */
183 static int force_on = 0;
184
185 /*
186 * 0: Present
187 * 1-11: Reserved
188 * 12-63: Context Ptr (12 - (haw-1))
189 * 64-127: Reserved
190 */
191 struct root_entry {
192 u64 lo;
193 u64 hi;
194 };
195 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
196
197 /*
198 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199 * if marked present.
200 */
201 static phys_addr_t root_entry_lctp(struct root_entry *re)
202 {
203 if (!(re->lo & 1))
204 return 0;
205
206 return re->lo & VTD_PAGE_MASK;
207 }
208
209 /*
210 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211 * if marked present.
212 */
213 static phys_addr_t root_entry_uctp(struct root_entry *re)
214 {
215 if (!(re->hi & 1))
216 return 0;
217
218 return re->hi & VTD_PAGE_MASK;
219 }
220 /*
221 * low 64 bits:
222 * 0: present
223 * 1: fault processing disable
224 * 2-3: translation type
225 * 12-63: address space root
226 * high 64 bits:
227 * 0-2: address width
228 * 3-6: aval
229 * 8-23: domain id
230 */
231 struct context_entry {
232 u64 lo;
233 u64 hi;
234 };
235
236 static inline void context_clear_pasid_enable(struct context_entry *context)
237 {
238 context->lo &= ~(1ULL << 11);
239 }
240
241 static inline bool context_pasid_enabled(struct context_entry *context)
242 {
243 return !!(context->lo & (1ULL << 11));
244 }
245
246 static inline void context_set_copied(struct context_entry *context)
247 {
248 context->hi |= (1ull << 3);
249 }
250
251 static inline bool context_copied(struct context_entry *context)
252 {
253 return !!(context->hi & (1ULL << 3));
254 }
255
256 static inline bool __context_present(struct context_entry *context)
257 {
258 return (context->lo & 1);
259 }
260
261 static inline bool context_present(struct context_entry *context)
262 {
263 return context_pasid_enabled(context) ?
264 __context_present(context) :
265 __context_present(context) && !context_copied(context);
266 }
267
268 static inline void context_set_present(struct context_entry *context)
269 {
270 context->lo |= 1;
271 }
272
273 static inline void context_set_fault_enable(struct context_entry *context)
274 {
275 context->lo &= (((u64)-1) << 2) | 1;
276 }
277
278 static inline void context_set_translation_type(struct context_entry *context,
279 unsigned long value)
280 {
281 context->lo &= (((u64)-1) << 4) | 3;
282 context->lo |= (value & 3) << 2;
283 }
284
285 static inline void context_set_address_root(struct context_entry *context,
286 unsigned long value)
287 {
288 context->lo &= ~VTD_PAGE_MASK;
289 context->lo |= value & VTD_PAGE_MASK;
290 }
291
292 static inline void context_set_address_width(struct context_entry *context,
293 unsigned long value)
294 {
295 context->hi |= value & 7;
296 }
297
298 static inline void context_set_domain_id(struct context_entry *context,
299 unsigned long value)
300 {
301 context->hi |= (value & ((1 << 16) - 1)) << 8;
302 }
303
304 static inline int context_domain_id(struct context_entry *c)
305 {
306 return((c->hi >> 8) & 0xffff);
307 }
308
309 static inline void context_clear_entry(struct context_entry *context)
310 {
311 context->lo = 0;
312 context->hi = 0;
313 }
314
315 /*
316 * 0: readable
317 * 1: writable
318 * 2-6: reserved
319 * 7: super page
320 * 8-10: available
321 * 11: snoop behavior
322 * 12-63: Host physcial address
323 */
324 struct dma_pte {
325 u64 val;
326 };
327
328 static inline void dma_clear_pte(struct dma_pte *pte)
329 {
330 pte->val = 0;
331 }
332
333 static inline u64 dma_pte_addr(struct dma_pte *pte)
334 {
335 #ifdef CONFIG_64BIT
336 return pte->val & VTD_PAGE_MASK;
337 #else
338 /* Must have a full atomic 64-bit read */
339 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
340 #endif
341 }
342
343 static inline bool dma_pte_present(struct dma_pte *pte)
344 {
345 return (pte->val & 3) != 0;
346 }
347
348 static inline bool dma_pte_superpage(struct dma_pte *pte)
349 {
350 return (pte->val & DMA_PTE_LARGE_PAGE);
351 }
352
353 static inline int first_pte_in_page(struct dma_pte *pte)
354 {
355 return !((unsigned long)pte & ~VTD_PAGE_MASK);
356 }
357
358 /*
359 * This domain is a statically identity mapping domain.
360 * 1. This domain creats a static 1:1 mapping to all usable memory.
361 * 2. It maps to each iommu if successful.
362 * 3. Each iommu mapps to this domain if successful.
363 */
364 static struct dmar_domain *si_domain;
365 static int hw_pass_through = 1;
366
367 /*
368 * Domain represents a virtual machine, more than one devices
369 * across iommus may be owned in one domain, e.g. kvm guest.
370 */
371 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
372
373 /* si_domain contains mulitple devices */
374 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
375
376 #define for_each_domain_iommu(idx, domain) \
377 for (idx = 0; idx < g_num_of_iommus; idx++) \
378 if (domain->iommu_refcnt[idx])
379
380 struct dmar_domain {
381 int nid; /* node id */
382
383 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
384 /* Refcount of devices per iommu */
385
386
387 u16 iommu_did[DMAR_UNITS_SUPPORTED];
388 /* Domain ids per IOMMU. Use u16 since
389 * domain ids are 16 bit wide according
390 * to VT-d spec, section 9.3 */
391
392 struct list_head devices; /* all devices' list */
393 struct iova_domain iovad; /* iova's that belong to this domain */
394
395 struct dma_pte *pgd; /* virtual address */
396 int gaw; /* max guest address width */
397
398 /* adjusted guest address width, 0 is level 2 30-bit */
399 int agaw;
400
401 int flags; /* flags to find out type of domain */
402
403 int iommu_coherency;/* indicate coherency of iommu access */
404 int iommu_snooping; /* indicate snooping control feature*/
405 int iommu_count; /* reference count of iommu */
406 int iommu_superpage;/* Level of superpages supported:
407 0 == 4KiB (no superpages), 1 == 2MiB,
408 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
409 spinlock_t iommu_lock; /* protect iommu set in domain */
410 u64 max_addr; /* maximum mapped address */
411
412 struct iommu_domain domain; /* generic domain data structure for
413 iommu core */
414 };
415
416 /* PCI domain-device relationship */
417 struct device_domain_info {
418 struct list_head link; /* link to domain siblings */
419 struct list_head global; /* link to global list */
420 u8 bus; /* PCI bus number */
421 u8 devfn; /* PCI devfn number */
422 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
423 struct intel_iommu *iommu; /* IOMMU used by this device */
424 struct dmar_domain *domain; /* pointer to domain */
425 };
426
427 struct dmar_rmrr_unit {
428 struct list_head list; /* list of rmrr units */
429 struct acpi_dmar_header *hdr; /* ACPI header */
430 u64 base_address; /* reserved base address*/
431 u64 end_address; /* reserved end address */
432 struct dmar_dev_scope *devices; /* target devices */
433 int devices_cnt; /* target device count */
434 };
435
436 struct dmar_atsr_unit {
437 struct list_head list; /* list of ATSR units */
438 struct acpi_dmar_header *hdr; /* ACPI header */
439 struct dmar_dev_scope *devices; /* target devices */
440 int devices_cnt; /* target device count */
441 u8 include_all:1; /* include all ports */
442 };
443
444 static LIST_HEAD(dmar_atsr_units);
445 static LIST_HEAD(dmar_rmrr_units);
446
447 #define for_each_rmrr_units(rmrr) \
448 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
449
450 static void flush_unmaps_timeout(unsigned long data);
451
452 static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
453
454 #define HIGH_WATER_MARK 250
455 struct deferred_flush_tables {
456 int next;
457 struct iova *iova[HIGH_WATER_MARK];
458 struct dmar_domain *domain[HIGH_WATER_MARK];
459 struct page *freelist[HIGH_WATER_MARK];
460 };
461
462 static struct deferred_flush_tables *deferred_flush;
463
464 /* bitmap for indexing intel_iommus */
465 static int g_num_of_iommus;
466
467 static DEFINE_SPINLOCK(async_umap_flush_lock);
468 static LIST_HEAD(unmaps_to_do);
469
470 static int timer_on;
471 static long list_size;
472
473 static void domain_exit(struct dmar_domain *domain);
474 static void domain_remove_dev_info(struct dmar_domain *domain);
475 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
476 struct device *dev);
477 static void domain_context_clear(struct intel_iommu *iommu,
478 struct device *dev);
479 static int domain_detach_iommu(struct dmar_domain *domain,
480 struct intel_iommu *iommu);
481
482 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
483 int dmar_disabled = 0;
484 #else
485 int dmar_disabled = 1;
486 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
487
488 int intel_iommu_enabled = 0;
489 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
490
491 static int dmar_map_gfx = 1;
492 static int dmar_forcedac;
493 static int intel_iommu_strict;
494 static int intel_iommu_superpage = 1;
495 static int intel_iommu_ecs = 1;
496
497 /* We only actually use ECS when PASID support (on the new bit 40)
498 * is also advertised. Some early implementations — the ones with
499 * PASID support on bit 28 — have issues even when we *only* use
500 * extended root/context tables. */
501 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
502 ecap_pasid(iommu->ecap))
503
504 int intel_iommu_gfx_mapped;
505 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
506
507 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
508 static DEFINE_SPINLOCK(device_domain_lock);
509 static LIST_HEAD(device_domain_list);
510
511 static const struct iommu_ops intel_iommu_ops;
512
513 static bool translation_pre_enabled(struct intel_iommu *iommu)
514 {
515 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
516 }
517
518 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
519 {
520 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
521 }
522
523 static void init_translation_status(struct intel_iommu *iommu)
524 {
525 u32 gsts;
526
527 gsts = readl(iommu->reg + DMAR_GSTS_REG);
528 if (gsts & DMA_GSTS_TES)
529 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
530 }
531
532 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
533 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
534 {
535 return container_of(dom, struct dmar_domain, domain);
536 }
537
538 static int __init intel_iommu_setup(char *str)
539 {
540 if (!str)
541 return -EINVAL;
542 while (*str) {
543 if (!strncmp(str, "on", 2)) {
544 dmar_disabled = 0;
545 pr_info("IOMMU enabled\n");
546 } else if (!strncmp(str, "off", 3)) {
547 dmar_disabled = 1;
548 pr_info("IOMMU disabled\n");
549 } else if (!strncmp(str, "igfx_off", 8)) {
550 dmar_map_gfx = 0;
551 pr_info("Disable GFX device mapping\n");
552 } else if (!strncmp(str, "forcedac", 8)) {
553 pr_info("Forcing DAC for PCI devices\n");
554 dmar_forcedac = 1;
555 } else if (!strncmp(str, "strict", 6)) {
556 pr_info("Disable batched IOTLB flush\n");
557 intel_iommu_strict = 1;
558 } else if (!strncmp(str, "sp_off", 6)) {
559 pr_info("Disable supported super page\n");
560 intel_iommu_superpage = 0;
561 } else if (!strncmp(str, "ecs_off", 7)) {
562 printk(KERN_INFO
563 "Intel-IOMMU: disable extended context table support\n");
564 intel_iommu_ecs = 0;
565 }
566
567 str += strcspn(str, ",");
568 while (*str == ',')
569 str++;
570 }
571 return 0;
572 }
573 __setup("intel_iommu=", intel_iommu_setup);
574
575 static struct kmem_cache *iommu_domain_cache;
576 static struct kmem_cache *iommu_devinfo_cache;
577
578 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
579 {
580 struct dmar_domain **domains;
581 int idx = did >> 8;
582
583 domains = iommu->domains[idx];
584 if (!domains)
585 return NULL;
586
587 return domains[did & 0xff];
588 }
589
590 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
591 struct dmar_domain *domain)
592 {
593 struct dmar_domain **domains;
594 int idx = did >> 8;
595
596 if (!iommu->domains[idx]) {
597 size_t size = 256 * sizeof(struct dmar_domain *);
598 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
599 }
600
601 domains = iommu->domains[idx];
602 if (WARN_ON(!domains))
603 return;
604 else
605 domains[did & 0xff] = domain;
606 }
607
608 static inline void *alloc_pgtable_page(int node)
609 {
610 struct page *page;
611 void *vaddr = NULL;
612
613 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
614 if (page)
615 vaddr = page_address(page);
616 return vaddr;
617 }
618
619 static inline void free_pgtable_page(void *vaddr)
620 {
621 free_page((unsigned long)vaddr);
622 }
623
624 static inline void *alloc_domain_mem(void)
625 {
626 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
627 }
628
629 static void free_domain_mem(void *vaddr)
630 {
631 kmem_cache_free(iommu_domain_cache, vaddr);
632 }
633
634 static inline void * alloc_devinfo_mem(void)
635 {
636 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
637 }
638
639 static inline void free_devinfo_mem(void *vaddr)
640 {
641 kmem_cache_free(iommu_devinfo_cache, vaddr);
642 }
643
644 static inline int domain_type_is_vm(struct dmar_domain *domain)
645 {
646 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
647 }
648
649 static inline int domain_type_is_si(struct dmar_domain *domain)
650 {
651 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
652 }
653
654 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
655 {
656 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
657 DOMAIN_FLAG_STATIC_IDENTITY);
658 }
659
660 static inline int domain_pfn_supported(struct dmar_domain *domain,
661 unsigned long pfn)
662 {
663 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
664
665 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
666 }
667
668 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
669 {
670 unsigned long sagaw;
671 int agaw = -1;
672
673 sagaw = cap_sagaw(iommu->cap);
674 for (agaw = width_to_agaw(max_gaw);
675 agaw >= 0; agaw--) {
676 if (test_bit(agaw, &sagaw))
677 break;
678 }
679
680 return agaw;
681 }
682
683 /*
684 * Calculate max SAGAW for each iommu.
685 */
686 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
687 {
688 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
689 }
690
691 /*
692 * calculate agaw for each iommu.
693 * "SAGAW" may be different across iommus, use a default agaw, and
694 * get a supported less agaw for iommus that don't support the default agaw.
695 */
696 int iommu_calculate_agaw(struct intel_iommu *iommu)
697 {
698 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
699 }
700
701 /* This functionin only returns single iommu in a domain */
702 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
703 {
704 int iommu_id;
705
706 /* si_domain and vm domain should not get here. */
707 BUG_ON(domain_type_is_vm_or_si(domain));
708 for_each_domain_iommu(iommu_id, domain)
709 break;
710
711 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
712 return NULL;
713
714 return g_iommus[iommu_id];
715 }
716
717 static void domain_update_iommu_coherency(struct dmar_domain *domain)
718 {
719 struct dmar_drhd_unit *drhd;
720 struct intel_iommu *iommu;
721 bool found = false;
722 int i;
723
724 domain->iommu_coherency = 1;
725
726 for_each_domain_iommu(i, domain) {
727 found = true;
728 if (!ecap_coherent(g_iommus[i]->ecap)) {
729 domain->iommu_coherency = 0;
730 break;
731 }
732 }
733 if (found)
734 return;
735
736 /* No hardware attached; use lowest common denominator */
737 rcu_read_lock();
738 for_each_active_iommu(iommu, drhd) {
739 if (!ecap_coherent(iommu->ecap)) {
740 domain->iommu_coherency = 0;
741 break;
742 }
743 }
744 rcu_read_unlock();
745 }
746
747 static int domain_update_iommu_snooping(struct intel_iommu *skip)
748 {
749 struct dmar_drhd_unit *drhd;
750 struct intel_iommu *iommu;
751 int ret = 1;
752
753 rcu_read_lock();
754 for_each_active_iommu(iommu, drhd) {
755 if (iommu != skip) {
756 if (!ecap_sc_support(iommu->ecap)) {
757 ret = 0;
758 break;
759 }
760 }
761 }
762 rcu_read_unlock();
763
764 return ret;
765 }
766
767 static int domain_update_iommu_superpage(struct intel_iommu *skip)
768 {
769 struct dmar_drhd_unit *drhd;
770 struct intel_iommu *iommu;
771 int mask = 0xf;
772
773 if (!intel_iommu_superpage) {
774 return 0;
775 }
776
777 /* set iommu_superpage to the smallest common denominator */
778 rcu_read_lock();
779 for_each_active_iommu(iommu, drhd) {
780 if (iommu != skip) {
781 mask &= cap_super_page_val(iommu->cap);
782 if (!mask)
783 break;
784 }
785 }
786 rcu_read_unlock();
787
788 return fls(mask);
789 }
790
791 /* Some capabilities may be different across iommus */
792 static void domain_update_iommu_cap(struct dmar_domain *domain)
793 {
794 domain_update_iommu_coherency(domain);
795 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
796 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
797 }
798
799 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
800 u8 bus, u8 devfn, int alloc)
801 {
802 struct root_entry *root = &iommu->root_entry[bus];
803 struct context_entry *context;
804 u64 *entry;
805
806 if (ecs_enabled(iommu)) {
807 if (devfn >= 0x80) {
808 devfn -= 0x80;
809 entry = &root->hi;
810 }
811 devfn *= 2;
812 }
813 entry = &root->lo;
814 if (*entry & 1)
815 context = phys_to_virt(*entry & VTD_PAGE_MASK);
816 else {
817 unsigned long phy_addr;
818 if (!alloc)
819 return NULL;
820
821 context = alloc_pgtable_page(iommu->node);
822 if (!context)
823 return NULL;
824
825 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
826 phy_addr = virt_to_phys((void *)context);
827 *entry = phy_addr | 1;
828 __iommu_flush_cache(iommu, entry, sizeof(*entry));
829 }
830 return &context[devfn];
831 }
832
833 static int iommu_dummy(struct device *dev)
834 {
835 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
836 }
837
838 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
839 {
840 struct dmar_drhd_unit *drhd = NULL;
841 struct intel_iommu *iommu;
842 struct device *tmp;
843 struct pci_dev *ptmp, *pdev = NULL;
844 u16 segment = 0;
845 int i;
846
847 if (iommu_dummy(dev))
848 return NULL;
849
850 if (dev_is_pci(dev)) {
851 pdev = to_pci_dev(dev);
852 segment = pci_domain_nr(pdev->bus);
853 } else if (has_acpi_companion(dev))
854 dev = &ACPI_COMPANION(dev)->dev;
855
856 rcu_read_lock();
857 for_each_active_iommu(iommu, drhd) {
858 if (pdev && segment != drhd->segment)
859 continue;
860
861 for_each_active_dev_scope(drhd->devices,
862 drhd->devices_cnt, i, tmp) {
863 if (tmp == dev) {
864 *bus = drhd->devices[i].bus;
865 *devfn = drhd->devices[i].devfn;
866 goto out;
867 }
868
869 if (!pdev || !dev_is_pci(tmp))
870 continue;
871
872 ptmp = to_pci_dev(tmp);
873 if (ptmp->subordinate &&
874 ptmp->subordinate->number <= pdev->bus->number &&
875 ptmp->subordinate->busn_res.end >= pdev->bus->number)
876 goto got_pdev;
877 }
878
879 if (pdev && drhd->include_all) {
880 got_pdev:
881 *bus = pdev->bus->number;
882 *devfn = pdev->devfn;
883 goto out;
884 }
885 }
886 iommu = NULL;
887 out:
888 rcu_read_unlock();
889
890 return iommu;
891 }
892
893 static void domain_flush_cache(struct dmar_domain *domain,
894 void *addr, int size)
895 {
896 if (!domain->iommu_coherency)
897 clflush_cache_range(addr, size);
898 }
899
900 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
901 {
902 struct context_entry *context;
903 int ret = 0;
904 unsigned long flags;
905
906 spin_lock_irqsave(&iommu->lock, flags);
907 context = iommu_context_addr(iommu, bus, devfn, 0);
908 if (context)
909 ret = context_present(context);
910 spin_unlock_irqrestore(&iommu->lock, flags);
911 return ret;
912 }
913
914 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
915 {
916 struct context_entry *context;
917 unsigned long flags;
918
919 spin_lock_irqsave(&iommu->lock, flags);
920 context = iommu_context_addr(iommu, bus, devfn, 0);
921 if (context) {
922 context_clear_entry(context);
923 __iommu_flush_cache(iommu, context, sizeof(*context));
924 }
925 spin_unlock_irqrestore(&iommu->lock, flags);
926 }
927
928 static void free_context_table(struct intel_iommu *iommu)
929 {
930 int i;
931 unsigned long flags;
932 struct context_entry *context;
933
934 spin_lock_irqsave(&iommu->lock, flags);
935 if (!iommu->root_entry) {
936 goto out;
937 }
938 for (i = 0; i < ROOT_ENTRY_NR; i++) {
939 context = iommu_context_addr(iommu, i, 0, 0);
940 if (context)
941 free_pgtable_page(context);
942
943 if (!ecs_enabled(iommu))
944 continue;
945
946 context = iommu_context_addr(iommu, i, 0x80, 0);
947 if (context)
948 free_pgtable_page(context);
949
950 }
951 free_pgtable_page(iommu->root_entry);
952 iommu->root_entry = NULL;
953 out:
954 spin_unlock_irqrestore(&iommu->lock, flags);
955 }
956
957 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
958 unsigned long pfn, int *target_level)
959 {
960 struct dma_pte *parent, *pte = NULL;
961 int level = agaw_to_level(domain->agaw);
962 int offset;
963
964 BUG_ON(!domain->pgd);
965
966 if (!domain_pfn_supported(domain, pfn))
967 /* Address beyond IOMMU's addressing capabilities. */
968 return NULL;
969
970 parent = domain->pgd;
971
972 while (1) {
973 void *tmp_page;
974
975 offset = pfn_level_offset(pfn, level);
976 pte = &parent[offset];
977 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
978 break;
979 if (level == *target_level)
980 break;
981
982 if (!dma_pte_present(pte)) {
983 uint64_t pteval;
984
985 tmp_page = alloc_pgtable_page(domain->nid);
986
987 if (!tmp_page)
988 return NULL;
989
990 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
991 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
992 if (cmpxchg64(&pte->val, 0ULL, pteval))
993 /* Someone else set it while we were thinking; use theirs. */
994 free_pgtable_page(tmp_page);
995 else
996 domain_flush_cache(domain, pte, sizeof(*pte));
997 }
998 if (level == 1)
999 break;
1000
1001 parent = phys_to_virt(dma_pte_addr(pte));
1002 level--;
1003 }
1004
1005 if (!*target_level)
1006 *target_level = level;
1007
1008 return pte;
1009 }
1010
1011
1012 /* return address's pte at specific level */
1013 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1014 unsigned long pfn,
1015 int level, int *large_page)
1016 {
1017 struct dma_pte *parent, *pte = NULL;
1018 int total = agaw_to_level(domain->agaw);
1019 int offset;
1020
1021 parent = domain->pgd;
1022 while (level <= total) {
1023 offset = pfn_level_offset(pfn, total);
1024 pte = &parent[offset];
1025 if (level == total)
1026 return pte;
1027
1028 if (!dma_pte_present(pte)) {
1029 *large_page = total;
1030 break;
1031 }
1032
1033 if (dma_pte_superpage(pte)) {
1034 *large_page = total;
1035 return pte;
1036 }
1037
1038 parent = phys_to_virt(dma_pte_addr(pte));
1039 total--;
1040 }
1041 return NULL;
1042 }
1043
1044 /* clear last level pte, a tlb flush should be followed */
1045 static void dma_pte_clear_range(struct dmar_domain *domain,
1046 unsigned long start_pfn,
1047 unsigned long last_pfn)
1048 {
1049 unsigned int large_page = 1;
1050 struct dma_pte *first_pte, *pte;
1051
1052 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1053 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1054 BUG_ON(start_pfn > last_pfn);
1055
1056 /* we don't need lock here; nobody else touches the iova range */
1057 do {
1058 large_page = 1;
1059 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1060 if (!pte) {
1061 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1062 continue;
1063 }
1064 do {
1065 dma_clear_pte(pte);
1066 start_pfn += lvl_to_nr_pages(large_page);
1067 pte++;
1068 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1069
1070 domain_flush_cache(domain, first_pte,
1071 (void *)pte - (void *)first_pte);
1072
1073 } while (start_pfn && start_pfn <= last_pfn);
1074 }
1075
1076 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1077 struct dma_pte *pte, unsigned long pfn,
1078 unsigned long start_pfn, unsigned long last_pfn)
1079 {
1080 pfn = max(start_pfn, pfn);
1081 pte = &pte[pfn_level_offset(pfn, level)];
1082
1083 do {
1084 unsigned long level_pfn;
1085 struct dma_pte *level_pte;
1086
1087 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1088 goto next;
1089
1090 level_pfn = pfn & level_mask(level - 1);
1091 level_pte = phys_to_virt(dma_pte_addr(pte));
1092
1093 if (level > 2)
1094 dma_pte_free_level(domain, level - 1, level_pte,
1095 level_pfn, start_pfn, last_pfn);
1096
1097 /* If range covers entire pagetable, free it */
1098 if (!(start_pfn > level_pfn ||
1099 last_pfn < level_pfn + level_size(level) - 1)) {
1100 dma_clear_pte(pte);
1101 domain_flush_cache(domain, pte, sizeof(*pte));
1102 free_pgtable_page(level_pte);
1103 }
1104 next:
1105 pfn += level_size(level);
1106 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1107 }
1108
1109 /* free page table pages. last level pte should already be cleared */
1110 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1111 unsigned long start_pfn,
1112 unsigned long last_pfn)
1113 {
1114 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1115 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1116 BUG_ON(start_pfn > last_pfn);
1117
1118 dma_pte_clear_range(domain, start_pfn, last_pfn);
1119
1120 /* We don't need lock here; nobody else touches the iova range */
1121 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1122 domain->pgd, 0, start_pfn, last_pfn);
1123
1124 /* free pgd */
1125 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1126 free_pgtable_page(domain->pgd);
1127 domain->pgd = NULL;
1128 }
1129 }
1130
1131 /* When a page at a given level is being unlinked from its parent, we don't
1132 need to *modify* it at all. All we need to do is make a list of all the
1133 pages which can be freed just as soon as we've flushed the IOTLB and we
1134 know the hardware page-walk will no longer touch them.
1135 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1136 be freed. */
1137 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1138 int level, struct dma_pte *pte,
1139 struct page *freelist)
1140 {
1141 struct page *pg;
1142
1143 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1144 pg->freelist = freelist;
1145 freelist = pg;
1146
1147 if (level == 1)
1148 return freelist;
1149
1150 pte = page_address(pg);
1151 do {
1152 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1153 freelist = dma_pte_list_pagetables(domain, level - 1,
1154 pte, freelist);
1155 pte++;
1156 } while (!first_pte_in_page(pte));
1157
1158 return freelist;
1159 }
1160
1161 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1162 struct dma_pte *pte, unsigned long pfn,
1163 unsigned long start_pfn,
1164 unsigned long last_pfn,
1165 struct page *freelist)
1166 {
1167 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1168
1169 pfn = max(start_pfn, pfn);
1170 pte = &pte[pfn_level_offset(pfn, level)];
1171
1172 do {
1173 unsigned long level_pfn;
1174
1175 if (!dma_pte_present(pte))
1176 goto next;
1177
1178 level_pfn = pfn & level_mask(level);
1179
1180 /* If range covers entire pagetable, free it */
1181 if (start_pfn <= level_pfn &&
1182 last_pfn >= level_pfn + level_size(level) - 1) {
1183 /* These suborbinate page tables are going away entirely. Don't
1184 bother to clear them; we're just going to *free* them. */
1185 if (level > 1 && !dma_pte_superpage(pte))
1186 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1187
1188 dma_clear_pte(pte);
1189 if (!first_pte)
1190 first_pte = pte;
1191 last_pte = pte;
1192 } else if (level > 1) {
1193 /* Recurse down into a level that isn't *entirely* obsolete */
1194 freelist = dma_pte_clear_level(domain, level - 1,
1195 phys_to_virt(dma_pte_addr(pte)),
1196 level_pfn, start_pfn, last_pfn,
1197 freelist);
1198 }
1199 next:
1200 pfn += level_size(level);
1201 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1202
1203 if (first_pte)
1204 domain_flush_cache(domain, first_pte,
1205 (void *)++last_pte - (void *)first_pte);
1206
1207 return freelist;
1208 }
1209
1210 /* We can't just free the pages because the IOMMU may still be walking
1211 the page tables, and may have cached the intermediate levels. The
1212 pages can only be freed after the IOTLB flush has been done. */
1213 struct page *domain_unmap(struct dmar_domain *domain,
1214 unsigned long start_pfn,
1215 unsigned long last_pfn)
1216 {
1217 struct page *freelist = NULL;
1218
1219 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1220 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1221 BUG_ON(start_pfn > last_pfn);
1222
1223 /* we don't need lock here; nobody else touches the iova range */
1224 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1225 domain->pgd, 0, start_pfn, last_pfn, NULL);
1226
1227 /* free pgd */
1228 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1229 struct page *pgd_page = virt_to_page(domain->pgd);
1230 pgd_page->freelist = freelist;
1231 freelist = pgd_page;
1232
1233 domain->pgd = NULL;
1234 }
1235
1236 return freelist;
1237 }
1238
1239 void dma_free_pagelist(struct page *freelist)
1240 {
1241 struct page *pg;
1242
1243 while ((pg = freelist)) {
1244 freelist = pg->freelist;
1245 free_pgtable_page(page_address(pg));
1246 }
1247 }
1248
1249 /* iommu handling */
1250 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1251 {
1252 struct root_entry *root;
1253 unsigned long flags;
1254
1255 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1256 if (!root) {
1257 pr_err("Allocating root entry for %s failed\n",
1258 iommu->name);
1259 return -ENOMEM;
1260 }
1261
1262 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1263
1264 spin_lock_irqsave(&iommu->lock, flags);
1265 iommu->root_entry = root;
1266 spin_unlock_irqrestore(&iommu->lock, flags);
1267
1268 return 0;
1269 }
1270
1271 static void iommu_set_root_entry(struct intel_iommu *iommu)
1272 {
1273 u64 addr;
1274 u32 sts;
1275 unsigned long flag;
1276
1277 addr = virt_to_phys(iommu->root_entry);
1278 if (ecs_enabled(iommu))
1279 addr |= DMA_RTADDR_RTT;
1280
1281 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1282 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1283
1284 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1285
1286 /* Make sure hardware complete it */
1287 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1288 readl, (sts & DMA_GSTS_RTPS), sts);
1289
1290 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1291 }
1292
1293 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1294 {
1295 u32 val;
1296 unsigned long flag;
1297
1298 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1299 return;
1300
1301 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1302 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1303
1304 /* Make sure hardware complete it */
1305 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1306 readl, (!(val & DMA_GSTS_WBFS)), val);
1307
1308 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1309 }
1310
1311 /* return value determine if we need a write buffer flush */
1312 static void __iommu_flush_context(struct intel_iommu *iommu,
1313 u16 did, u16 source_id, u8 function_mask,
1314 u64 type)
1315 {
1316 u64 val = 0;
1317 unsigned long flag;
1318
1319 switch (type) {
1320 case DMA_CCMD_GLOBAL_INVL:
1321 val = DMA_CCMD_GLOBAL_INVL;
1322 break;
1323 case DMA_CCMD_DOMAIN_INVL:
1324 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1325 break;
1326 case DMA_CCMD_DEVICE_INVL:
1327 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1328 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1329 break;
1330 default:
1331 BUG();
1332 }
1333 val |= DMA_CCMD_ICC;
1334
1335 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1336 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1337
1338 /* Make sure hardware complete it */
1339 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1340 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1341
1342 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1343 }
1344
1345 /* return value determine if we need a write buffer flush */
1346 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1347 u64 addr, unsigned int size_order, u64 type)
1348 {
1349 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1350 u64 val = 0, val_iva = 0;
1351 unsigned long flag;
1352
1353 switch (type) {
1354 case DMA_TLB_GLOBAL_FLUSH:
1355 /* global flush doesn't need set IVA_REG */
1356 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1357 break;
1358 case DMA_TLB_DSI_FLUSH:
1359 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1360 break;
1361 case DMA_TLB_PSI_FLUSH:
1362 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1363 /* IH bit is passed in as part of address */
1364 val_iva = size_order | addr;
1365 break;
1366 default:
1367 BUG();
1368 }
1369 /* Note: set drain read/write */
1370 #if 0
1371 /*
1372 * This is probably to be super secure.. Looks like we can
1373 * ignore it without any impact.
1374 */
1375 if (cap_read_drain(iommu->cap))
1376 val |= DMA_TLB_READ_DRAIN;
1377 #endif
1378 if (cap_write_drain(iommu->cap))
1379 val |= DMA_TLB_WRITE_DRAIN;
1380
1381 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1382 /* Note: Only uses first TLB reg currently */
1383 if (val_iva)
1384 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1385 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1386
1387 /* Make sure hardware complete it */
1388 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1389 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1390
1391 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1392
1393 /* check IOTLB invalidation granularity */
1394 if (DMA_TLB_IAIG(val) == 0)
1395 pr_err("Flush IOTLB failed\n");
1396 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1397 pr_debug("TLB flush request %Lx, actual %Lx\n",
1398 (unsigned long long)DMA_TLB_IIRG(type),
1399 (unsigned long long)DMA_TLB_IAIG(val));
1400 }
1401
1402 static struct device_domain_info *
1403 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1404 u8 bus, u8 devfn)
1405 {
1406 bool found = false;
1407 unsigned long flags;
1408 struct device_domain_info *info;
1409 struct pci_dev *pdev;
1410
1411 if (!ecap_dev_iotlb_support(iommu->ecap))
1412 return NULL;
1413
1414 if (!iommu->qi)
1415 return NULL;
1416
1417 spin_lock_irqsave(&device_domain_lock, flags);
1418 list_for_each_entry(info, &domain->devices, link)
1419 if (info->iommu == iommu && info->bus == bus &&
1420 info->devfn == devfn) {
1421 found = true;
1422 break;
1423 }
1424 spin_unlock_irqrestore(&device_domain_lock, flags);
1425
1426 if (!found || !info->dev || !dev_is_pci(info->dev))
1427 return NULL;
1428
1429 pdev = to_pci_dev(info->dev);
1430
1431 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1432 return NULL;
1433
1434 if (!dmar_find_matched_atsr_unit(pdev))
1435 return NULL;
1436
1437 return info;
1438 }
1439
1440 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1441 {
1442 if (!info || !dev_is_pci(info->dev))
1443 return;
1444
1445 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1446 }
1447
1448 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1449 {
1450 if (!info->dev || !dev_is_pci(info->dev) ||
1451 !pci_ats_enabled(to_pci_dev(info->dev)))
1452 return;
1453
1454 pci_disable_ats(to_pci_dev(info->dev));
1455 }
1456
1457 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1458 u64 addr, unsigned mask)
1459 {
1460 u16 sid, qdep;
1461 unsigned long flags;
1462 struct device_domain_info *info;
1463
1464 spin_lock_irqsave(&device_domain_lock, flags);
1465 list_for_each_entry(info, &domain->devices, link) {
1466 struct pci_dev *pdev;
1467 if (!info->dev || !dev_is_pci(info->dev))
1468 continue;
1469
1470 pdev = to_pci_dev(info->dev);
1471 if (!pci_ats_enabled(pdev))
1472 continue;
1473
1474 sid = info->bus << 8 | info->devfn;
1475 qdep = pci_ats_queue_depth(pdev);
1476 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1477 }
1478 spin_unlock_irqrestore(&device_domain_lock, flags);
1479 }
1480
1481 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1482 struct dmar_domain *domain,
1483 unsigned long pfn, unsigned int pages,
1484 int ih, int map)
1485 {
1486 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1487 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1488 u16 did = domain->iommu_did[iommu->seq_id];
1489
1490 BUG_ON(pages == 0);
1491
1492 if (ih)
1493 ih = 1 << 6;
1494 /*
1495 * Fallback to domain selective flush if no PSI support or the size is
1496 * too big.
1497 * PSI requires page size to be 2 ^ x, and the base address is naturally
1498 * aligned to the size
1499 */
1500 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1501 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1502 DMA_TLB_DSI_FLUSH);
1503 else
1504 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1505 DMA_TLB_PSI_FLUSH);
1506
1507 /*
1508 * In caching mode, changes of pages from non-present to present require
1509 * flush. However, device IOTLB doesn't need to be flushed in this case.
1510 */
1511 if (!cap_caching_mode(iommu->cap) || !map)
1512 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1513 addr, mask);
1514 }
1515
1516 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1517 {
1518 u32 pmen;
1519 unsigned long flags;
1520
1521 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1522 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1523 pmen &= ~DMA_PMEN_EPM;
1524 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1525
1526 /* wait for the protected region status bit to clear */
1527 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1528 readl, !(pmen & DMA_PMEN_PRS), pmen);
1529
1530 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1531 }
1532
1533 static void iommu_enable_translation(struct intel_iommu *iommu)
1534 {
1535 u32 sts;
1536 unsigned long flags;
1537
1538 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1539 iommu->gcmd |= DMA_GCMD_TE;
1540 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1541
1542 /* Make sure hardware complete it */
1543 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1544 readl, (sts & DMA_GSTS_TES), sts);
1545
1546 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1547 }
1548
1549 static void iommu_disable_translation(struct intel_iommu *iommu)
1550 {
1551 u32 sts;
1552 unsigned long flag;
1553
1554 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1555 iommu->gcmd &= ~DMA_GCMD_TE;
1556 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1557
1558 /* Make sure hardware complete it */
1559 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1560 readl, (!(sts & DMA_GSTS_TES)), sts);
1561
1562 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1563 }
1564
1565
1566 static int iommu_init_domains(struct intel_iommu *iommu)
1567 {
1568 u32 ndomains, nlongs;
1569 size_t size;
1570
1571 ndomains = cap_ndoms(iommu->cap);
1572 pr_debug("%s: Number of Domains supported <%d>\n",
1573 iommu->name, ndomains);
1574 nlongs = BITS_TO_LONGS(ndomains);
1575
1576 spin_lock_init(&iommu->lock);
1577
1578 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1579 if (!iommu->domain_ids) {
1580 pr_err("%s: Allocating domain id array failed\n",
1581 iommu->name);
1582 return -ENOMEM;
1583 }
1584
1585 size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1586 iommu->domains = kzalloc(size, GFP_KERNEL);
1587
1588 if (iommu->domains) {
1589 size = 256 * sizeof(struct dmar_domain *);
1590 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1591 }
1592
1593 if (!iommu->domains || !iommu->domains[0]) {
1594 pr_err("%s: Allocating domain array failed\n",
1595 iommu->name);
1596 kfree(iommu->domain_ids);
1597 kfree(iommu->domains);
1598 iommu->domain_ids = NULL;
1599 iommu->domains = NULL;
1600 return -ENOMEM;
1601 }
1602
1603
1604
1605 /*
1606 * If Caching mode is set, then invalid translations are tagged
1607 * with domain-id 0, hence we need to pre-allocate it. We also
1608 * use domain-id 0 as a marker for non-allocated domain-id, so
1609 * make sure it is not used for a real domain.
1610 */
1611 set_bit(0, iommu->domain_ids);
1612
1613 return 0;
1614 }
1615
1616 static void disable_dmar_iommu(struct intel_iommu *iommu)
1617 {
1618 struct device_domain_info *info, *tmp;
1619
1620 if (!iommu->domains || !iommu->domain_ids)
1621 return;
1622
1623 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1624 struct dmar_domain *domain;
1625
1626 if (info->iommu != iommu)
1627 continue;
1628
1629 if (!info->dev || !info->domain)
1630 continue;
1631
1632 domain = info->domain;
1633
1634 dmar_remove_one_dev_info(domain, info->dev);
1635
1636 if (!domain_type_is_vm_or_si(domain))
1637 domain_exit(domain);
1638 }
1639
1640 if (iommu->gcmd & DMA_GCMD_TE)
1641 iommu_disable_translation(iommu);
1642 }
1643
1644 static void free_dmar_iommu(struct intel_iommu *iommu)
1645 {
1646 if ((iommu->domains) && (iommu->domain_ids)) {
1647 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1648 int i;
1649
1650 for (i = 0; i < elems; i++)
1651 kfree(iommu->domains[i]);
1652 kfree(iommu->domains);
1653 kfree(iommu->domain_ids);
1654 iommu->domains = NULL;
1655 iommu->domain_ids = NULL;
1656 }
1657
1658 g_iommus[iommu->seq_id] = NULL;
1659
1660 /* free context mapping */
1661 free_context_table(iommu);
1662 }
1663
1664 static struct dmar_domain *alloc_domain(int flags)
1665 {
1666 struct dmar_domain *domain;
1667
1668 domain = alloc_domain_mem();
1669 if (!domain)
1670 return NULL;
1671
1672 memset(domain, 0, sizeof(*domain));
1673 domain->nid = -1;
1674 domain->flags = flags;
1675 spin_lock_init(&domain->iommu_lock);
1676 INIT_LIST_HEAD(&domain->devices);
1677
1678 return domain;
1679 }
1680
1681 /* Must be called with iommu->lock */
1682 static int domain_attach_iommu(struct dmar_domain *domain,
1683 struct intel_iommu *iommu)
1684 {
1685 unsigned long ndomains;
1686 unsigned long flags;
1687 int ret, num;
1688
1689 assert_spin_locked(&iommu->lock);
1690
1691 spin_lock_irqsave(&domain->iommu_lock, flags);
1692
1693 domain->iommu_refcnt[iommu->seq_id] += 1;
1694 domain->iommu_count += 1;
1695 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1696 ndomains = cap_ndoms(iommu->cap);
1697 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1698
1699 if (num >= ndomains) {
1700 pr_err("%s: No free domain ids\n", iommu->name);
1701 domain->iommu_refcnt[iommu->seq_id] -= 1;
1702 domain->iommu_count -= 1;
1703 ret = -ENOSPC;
1704 goto out_unlock;
1705 }
1706
1707 set_bit(num, iommu->domain_ids);
1708 set_iommu_domain(iommu, num, domain);
1709
1710 domain->iommu_did[iommu->seq_id] = num;
1711 domain->nid = iommu->node;
1712
1713 domain_update_iommu_cap(domain);
1714 }
1715
1716 ret = 0;
1717 out_unlock:
1718 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1719
1720 return ret;
1721 }
1722
1723 static int domain_detach_iommu(struct dmar_domain *domain,
1724 struct intel_iommu *iommu)
1725 {
1726 int num, count = INT_MAX;
1727 unsigned long flags;
1728
1729 assert_spin_locked(&iommu->lock);
1730
1731 spin_lock_irqsave(&domain->iommu_lock, flags);
1732 domain->iommu_refcnt[iommu->seq_id] -= 1;
1733 count = --domain->iommu_count;
1734 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1735 num = domain->iommu_did[iommu->seq_id];
1736 clear_bit(num, iommu->domain_ids);
1737 set_iommu_domain(iommu, num, NULL);
1738
1739 domain_update_iommu_cap(domain);
1740 domain->iommu_did[iommu->seq_id] = 0;
1741 }
1742 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1743
1744 return count;
1745 }
1746
1747 static struct iova_domain reserved_iova_list;
1748 static struct lock_class_key reserved_rbtree_key;
1749
1750 static int dmar_init_reserved_ranges(void)
1751 {
1752 struct pci_dev *pdev = NULL;
1753 struct iova *iova;
1754 int i;
1755
1756 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1757 DMA_32BIT_PFN);
1758
1759 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1760 &reserved_rbtree_key);
1761
1762 /* IOAPIC ranges shouldn't be accessed by DMA */
1763 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1764 IOVA_PFN(IOAPIC_RANGE_END));
1765 if (!iova) {
1766 pr_err("Reserve IOAPIC range failed\n");
1767 return -ENODEV;
1768 }
1769
1770 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1771 for_each_pci_dev(pdev) {
1772 struct resource *r;
1773
1774 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1775 r = &pdev->resource[i];
1776 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1777 continue;
1778 iova = reserve_iova(&reserved_iova_list,
1779 IOVA_PFN(r->start),
1780 IOVA_PFN(r->end));
1781 if (!iova) {
1782 pr_err("Reserve iova failed\n");
1783 return -ENODEV;
1784 }
1785 }
1786 }
1787 return 0;
1788 }
1789
1790 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1791 {
1792 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1793 }
1794
1795 static inline int guestwidth_to_adjustwidth(int gaw)
1796 {
1797 int agaw;
1798 int r = (gaw - 12) % 9;
1799
1800 if (r == 0)
1801 agaw = gaw;
1802 else
1803 agaw = gaw + 9 - r;
1804 if (agaw > 64)
1805 agaw = 64;
1806 return agaw;
1807 }
1808
1809 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1810 int guest_width)
1811 {
1812 int adjust_width, agaw;
1813 unsigned long sagaw;
1814
1815 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1816 DMA_32BIT_PFN);
1817 domain_reserve_special_ranges(domain);
1818
1819 /* calculate AGAW */
1820 if (guest_width > cap_mgaw(iommu->cap))
1821 guest_width = cap_mgaw(iommu->cap);
1822 domain->gaw = guest_width;
1823 adjust_width = guestwidth_to_adjustwidth(guest_width);
1824 agaw = width_to_agaw(adjust_width);
1825 sagaw = cap_sagaw(iommu->cap);
1826 if (!test_bit(agaw, &sagaw)) {
1827 /* hardware doesn't support it, choose a bigger one */
1828 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1829 agaw = find_next_bit(&sagaw, 5, agaw);
1830 if (agaw >= 5)
1831 return -ENODEV;
1832 }
1833 domain->agaw = agaw;
1834
1835 if (ecap_coherent(iommu->ecap))
1836 domain->iommu_coherency = 1;
1837 else
1838 domain->iommu_coherency = 0;
1839
1840 if (ecap_sc_support(iommu->ecap))
1841 domain->iommu_snooping = 1;
1842 else
1843 domain->iommu_snooping = 0;
1844
1845 if (intel_iommu_superpage)
1846 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1847 else
1848 domain->iommu_superpage = 0;
1849
1850 domain->nid = iommu->node;
1851
1852 /* always allocate the top pgd */
1853 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1854 if (!domain->pgd)
1855 return -ENOMEM;
1856 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1857 return 0;
1858 }
1859
1860 static void domain_exit(struct dmar_domain *domain)
1861 {
1862 struct page *freelist = NULL;
1863
1864 /* Domain 0 is reserved, so dont process it */
1865 if (!domain)
1866 return;
1867
1868 /* Flush any lazy unmaps that may reference this domain */
1869 if (!intel_iommu_strict)
1870 flush_unmaps_timeout(0);
1871
1872 /* Remove associated devices and clear attached or cached domains */
1873 rcu_read_lock();
1874 domain_remove_dev_info(domain);
1875 rcu_read_unlock();
1876
1877 /* destroy iovas */
1878 put_iova_domain(&domain->iovad);
1879
1880 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1881
1882 dma_free_pagelist(freelist);
1883
1884 free_domain_mem(domain);
1885 }
1886
1887 static int domain_context_mapping_one(struct dmar_domain *domain,
1888 struct intel_iommu *iommu,
1889 u8 bus, u8 devfn)
1890 {
1891 u16 did = domain->iommu_did[iommu->seq_id];
1892 int translation = CONTEXT_TT_MULTI_LEVEL;
1893 struct device_domain_info *info = NULL;
1894 struct context_entry *context;
1895 unsigned long flags;
1896 struct dma_pte *pgd;
1897 int agaw;
1898
1899 WARN_ON(did == 0);
1900
1901 if (hw_pass_through && domain_type_is_si(domain))
1902 translation = CONTEXT_TT_PASS_THROUGH;
1903
1904 pr_debug("Set context mapping for %02x:%02x.%d\n",
1905 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1906
1907 BUG_ON(!domain->pgd);
1908
1909 spin_lock_irqsave(&iommu->lock, flags);
1910 context = iommu_context_addr(iommu, bus, devfn, 1);
1911 spin_unlock_irqrestore(&iommu->lock, flags);
1912 if (!context)
1913 return -ENOMEM;
1914 spin_lock_irqsave(&iommu->lock, flags);
1915 if (context_present(context)) {
1916 spin_unlock_irqrestore(&iommu->lock, flags);
1917 return 0;
1918 }
1919
1920 pgd = domain->pgd;
1921
1922 context_clear_entry(context);
1923 context_set_domain_id(context, did);
1924
1925 /*
1926 * Skip top levels of page tables for iommu which has less agaw
1927 * than default. Unnecessary for PT mode.
1928 */
1929 if (translation != CONTEXT_TT_PASS_THROUGH) {
1930 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1931 pgd = phys_to_virt(dma_pte_addr(pgd));
1932 if (!dma_pte_present(pgd)) {
1933 spin_unlock_irqrestore(&iommu->lock, flags);
1934 return -ENOMEM;
1935 }
1936 }
1937
1938 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1939 translation = info ? CONTEXT_TT_DEV_IOTLB :
1940 CONTEXT_TT_MULTI_LEVEL;
1941
1942 context_set_address_root(context, virt_to_phys(pgd));
1943 context_set_address_width(context, iommu->agaw);
1944 } else {
1945 /*
1946 * In pass through mode, AW must be programmed to
1947 * indicate the largest AGAW value supported by
1948 * hardware. And ASR is ignored by hardware.
1949 */
1950 context_set_address_width(context, iommu->msagaw);
1951 }
1952
1953 context_set_translation_type(context, translation);
1954 context_set_fault_enable(context);
1955 context_set_present(context);
1956 domain_flush_cache(domain, context, sizeof(*context));
1957
1958 /*
1959 * It's a non-present to present mapping. If hardware doesn't cache
1960 * non-present entry we only need to flush the write-buffer. If the
1961 * _does_ cache non-present entries, then it does so in the special
1962 * domain #0, which we have to flush:
1963 */
1964 if (cap_caching_mode(iommu->cap)) {
1965 iommu->flush.flush_context(iommu, 0,
1966 (((u16)bus) << 8) | devfn,
1967 DMA_CCMD_MASK_NOBIT,
1968 DMA_CCMD_DEVICE_INVL);
1969 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1970 } else {
1971 iommu_flush_write_buffer(iommu);
1972 }
1973 iommu_enable_dev_iotlb(info);
1974 spin_unlock_irqrestore(&iommu->lock, flags);
1975
1976 return 0;
1977 }
1978
1979 struct domain_context_mapping_data {
1980 struct dmar_domain *domain;
1981 struct intel_iommu *iommu;
1982 };
1983
1984 static int domain_context_mapping_cb(struct pci_dev *pdev,
1985 u16 alias, void *opaque)
1986 {
1987 struct domain_context_mapping_data *data = opaque;
1988
1989 return domain_context_mapping_one(data->domain, data->iommu,
1990 PCI_BUS_NUM(alias), alias & 0xff);
1991 }
1992
1993 static int
1994 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
1995 {
1996 struct intel_iommu *iommu;
1997 u8 bus, devfn;
1998 struct domain_context_mapping_data data;
1999
2000 iommu = device_to_iommu(dev, &bus, &devfn);
2001 if (!iommu)
2002 return -ENODEV;
2003
2004 if (!dev_is_pci(dev))
2005 return domain_context_mapping_one(domain, iommu, bus, devfn);
2006
2007 data.domain = domain;
2008 data.iommu = iommu;
2009
2010 return pci_for_each_dma_alias(to_pci_dev(dev),
2011 &domain_context_mapping_cb, &data);
2012 }
2013
2014 static int domain_context_mapped_cb(struct pci_dev *pdev,
2015 u16 alias, void *opaque)
2016 {
2017 struct intel_iommu *iommu = opaque;
2018
2019 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2020 }
2021
2022 static int domain_context_mapped(struct device *dev)
2023 {
2024 struct intel_iommu *iommu;
2025 u8 bus, devfn;
2026
2027 iommu = device_to_iommu(dev, &bus, &devfn);
2028 if (!iommu)
2029 return -ENODEV;
2030
2031 if (!dev_is_pci(dev))
2032 return device_context_mapped(iommu, bus, devfn);
2033
2034 return !pci_for_each_dma_alias(to_pci_dev(dev),
2035 domain_context_mapped_cb, iommu);
2036 }
2037
2038 /* Returns a number of VTD pages, but aligned to MM page size */
2039 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2040 size_t size)
2041 {
2042 host_addr &= ~PAGE_MASK;
2043 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2044 }
2045
2046 /* Return largest possible superpage level for a given mapping */
2047 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2048 unsigned long iov_pfn,
2049 unsigned long phy_pfn,
2050 unsigned long pages)
2051 {
2052 int support, level = 1;
2053 unsigned long pfnmerge;
2054
2055 support = domain->iommu_superpage;
2056
2057 /* To use a large page, the virtual *and* physical addresses
2058 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2059 of them will mean we have to use smaller pages. So just
2060 merge them and check both at once. */
2061 pfnmerge = iov_pfn | phy_pfn;
2062
2063 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2064 pages >>= VTD_STRIDE_SHIFT;
2065 if (!pages)
2066 break;
2067 pfnmerge >>= VTD_STRIDE_SHIFT;
2068 level++;
2069 support--;
2070 }
2071 return level;
2072 }
2073
2074 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2075 struct scatterlist *sg, unsigned long phys_pfn,
2076 unsigned long nr_pages, int prot)
2077 {
2078 struct dma_pte *first_pte = NULL, *pte = NULL;
2079 phys_addr_t uninitialized_var(pteval);
2080 unsigned long sg_res = 0;
2081 unsigned int largepage_lvl = 0;
2082 unsigned long lvl_pages = 0;
2083
2084 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2085
2086 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2087 return -EINVAL;
2088
2089 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2090
2091 if (!sg) {
2092 sg_res = nr_pages;
2093 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2094 }
2095
2096 while (nr_pages > 0) {
2097 uint64_t tmp;
2098
2099 if (!sg_res) {
2100 sg_res = aligned_nrpages(sg->offset, sg->length);
2101 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2102 sg->dma_length = sg->length;
2103 pteval = page_to_phys(sg_page(sg)) | prot;
2104 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2105 }
2106
2107 if (!pte) {
2108 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2109
2110 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2111 if (!pte)
2112 return -ENOMEM;
2113 /* It is large page*/
2114 if (largepage_lvl > 1) {
2115 pteval |= DMA_PTE_LARGE_PAGE;
2116 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2117 /*
2118 * Ensure that old small page tables are
2119 * removed to make room for superpage,
2120 * if they exist.
2121 */
2122 dma_pte_free_pagetable(domain, iov_pfn,
2123 iov_pfn + lvl_pages - 1);
2124 } else {
2125 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2126 }
2127
2128 }
2129 /* We don't need lock here, nobody else
2130 * touches the iova range
2131 */
2132 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2133 if (tmp) {
2134 static int dumps = 5;
2135 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2136 iov_pfn, tmp, (unsigned long long)pteval);
2137 if (dumps) {
2138 dumps--;
2139 debug_dma_dump_mappings(NULL);
2140 }
2141 WARN_ON(1);
2142 }
2143
2144 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2145
2146 BUG_ON(nr_pages < lvl_pages);
2147 BUG_ON(sg_res < lvl_pages);
2148
2149 nr_pages -= lvl_pages;
2150 iov_pfn += lvl_pages;
2151 phys_pfn += lvl_pages;
2152 pteval += lvl_pages * VTD_PAGE_SIZE;
2153 sg_res -= lvl_pages;
2154
2155 /* If the next PTE would be the first in a new page, then we
2156 need to flush the cache on the entries we've just written.
2157 And then we'll need to recalculate 'pte', so clear it and
2158 let it get set again in the if (!pte) block above.
2159
2160 If we're done (!nr_pages) we need to flush the cache too.
2161
2162 Also if we've been setting superpages, we may need to
2163 recalculate 'pte' and switch back to smaller pages for the
2164 end of the mapping, if the trailing size is not enough to
2165 use another superpage (i.e. sg_res < lvl_pages). */
2166 pte++;
2167 if (!nr_pages || first_pte_in_page(pte) ||
2168 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2169 domain_flush_cache(domain, first_pte,
2170 (void *)pte - (void *)first_pte);
2171 pte = NULL;
2172 }
2173
2174 if (!sg_res && nr_pages)
2175 sg = sg_next(sg);
2176 }
2177 return 0;
2178 }
2179
2180 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2181 struct scatterlist *sg, unsigned long nr_pages,
2182 int prot)
2183 {
2184 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2185 }
2186
2187 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2188 unsigned long phys_pfn, unsigned long nr_pages,
2189 int prot)
2190 {
2191 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2192 }
2193
2194 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2195 {
2196 if (!iommu)
2197 return;
2198
2199 clear_context_table(iommu, bus, devfn);
2200 iommu->flush.flush_context(iommu, 0, 0, 0,
2201 DMA_CCMD_GLOBAL_INVL);
2202 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2203 }
2204
2205 static inline void unlink_domain_info(struct device_domain_info *info)
2206 {
2207 assert_spin_locked(&device_domain_lock);
2208 list_del(&info->link);
2209 list_del(&info->global);
2210 if (info->dev)
2211 info->dev->archdata.iommu = NULL;
2212 }
2213
2214 static void domain_remove_dev_info(struct dmar_domain *domain)
2215 {
2216 struct device_domain_info *info, *tmp;
2217
2218 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2219 dmar_remove_one_dev_info(domain, info->dev);
2220 }
2221
2222 /*
2223 * find_domain
2224 * Note: we use struct device->archdata.iommu stores the info
2225 */
2226 static struct dmar_domain *find_domain(struct device *dev)
2227 {
2228 struct device_domain_info *info;
2229
2230 /* No lock here, assumes no domain exit in normal case */
2231 info = dev->archdata.iommu;
2232 if (info)
2233 return info->domain;
2234 return NULL;
2235 }
2236
2237 static inline struct device_domain_info *
2238 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2239 {
2240 struct device_domain_info *info;
2241
2242 list_for_each_entry(info, &device_domain_list, global)
2243 if (info->iommu->segment == segment && info->bus == bus &&
2244 info->devfn == devfn)
2245 return info;
2246
2247 return NULL;
2248 }
2249
2250 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2251 int bus, int devfn,
2252 struct device *dev,
2253 struct dmar_domain *domain)
2254 {
2255 struct dmar_domain *found = NULL;
2256 struct device_domain_info *info;
2257 unsigned long flags;
2258 int ret;
2259
2260 info = alloc_devinfo_mem();
2261 if (!info)
2262 return NULL;
2263
2264 info->bus = bus;
2265 info->devfn = devfn;
2266 info->dev = dev;
2267 info->domain = domain;
2268 info->iommu = iommu;
2269
2270 spin_lock_irqsave(&device_domain_lock, flags);
2271 if (dev)
2272 found = find_domain(dev);
2273 else {
2274 struct device_domain_info *info2;
2275 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2276 if (info2)
2277 found = info2->domain;
2278 }
2279 if (found) {
2280 spin_unlock_irqrestore(&device_domain_lock, flags);
2281 free_devinfo_mem(info);
2282 /* Caller must free the original domain */
2283 return found;
2284 }
2285
2286 spin_lock(&iommu->lock);
2287 ret = domain_attach_iommu(domain, iommu);
2288 spin_unlock(&iommu->lock);
2289
2290 if (ret) {
2291 spin_unlock_irqrestore(&device_domain_lock, flags);
2292 return NULL;
2293 }
2294
2295 list_add(&info->link, &domain->devices);
2296 list_add(&info->global, &device_domain_list);
2297 if (dev)
2298 dev->archdata.iommu = info;
2299 spin_unlock_irqrestore(&device_domain_lock, flags);
2300
2301 if (dev && domain_context_mapping(domain, dev)) {
2302 pr_err("Domain context map for %s failed\n", dev_name(dev));
2303 dmar_remove_one_dev_info(domain, dev);
2304 return NULL;
2305 }
2306
2307 return domain;
2308 }
2309
2310 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2311 {
2312 *(u16 *)opaque = alias;
2313 return 0;
2314 }
2315
2316 /* domain is initialized */
2317 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2318 {
2319 struct device_domain_info *info = NULL;
2320 struct dmar_domain *domain, *tmp;
2321 struct intel_iommu *iommu;
2322 unsigned long flags;
2323 u16 dma_alias;
2324 u8 bus, devfn;
2325
2326 domain = find_domain(dev);
2327 if (domain)
2328 return domain;
2329
2330 iommu = device_to_iommu(dev, &bus, &devfn);
2331 if (!iommu)
2332 return NULL;
2333
2334 if (dev_is_pci(dev)) {
2335 struct pci_dev *pdev = to_pci_dev(dev);
2336
2337 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2338
2339 spin_lock_irqsave(&device_domain_lock, flags);
2340 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2341 PCI_BUS_NUM(dma_alias),
2342 dma_alias & 0xff);
2343 if (info) {
2344 iommu = info->iommu;
2345 domain = info->domain;
2346 }
2347 spin_unlock_irqrestore(&device_domain_lock, flags);
2348
2349 /* DMA alias already has a domain, uses it */
2350 if (info)
2351 goto found_domain;
2352 }
2353
2354 /* Allocate and initialize new domain for the device */
2355 domain = alloc_domain(0);
2356 if (!domain)
2357 return NULL;
2358 if (domain_init(domain, iommu, gaw)) {
2359 domain_exit(domain);
2360 return NULL;
2361 }
2362
2363 /* register PCI DMA alias device */
2364 if (dev_is_pci(dev)) {
2365 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2366 dma_alias & 0xff, NULL, domain);
2367
2368 if (!tmp || tmp != domain) {
2369 domain_exit(domain);
2370 domain = tmp;
2371 }
2372
2373 if (!domain)
2374 return NULL;
2375 }
2376
2377 found_domain:
2378 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2379
2380 if (!tmp || tmp != domain) {
2381 domain_exit(domain);
2382 domain = tmp;
2383 }
2384
2385 return domain;
2386 }
2387
2388 static int iommu_identity_mapping;
2389 #define IDENTMAP_ALL 1
2390 #define IDENTMAP_GFX 2
2391 #define IDENTMAP_AZALIA 4
2392
2393 static int iommu_domain_identity_map(struct dmar_domain *domain,
2394 unsigned long long start,
2395 unsigned long long end)
2396 {
2397 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2398 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2399
2400 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2401 dma_to_mm_pfn(last_vpfn))) {
2402 pr_err("Reserving iova failed\n");
2403 return -ENOMEM;
2404 }
2405
2406 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2407 /*
2408 * RMRR range might have overlap with physical memory range,
2409 * clear it first
2410 */
2411 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2412
2413 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2414 last_vpfn - first_vpfn + 1,
2415 DMA_PTE_READ|DMA_PTE_WRITE);
2416 }
2417
2418 static int iommu_prepare_identity_map(struct device *dev,
2419 unsigned long long start,
2420 unsigned long long end)
2421 {
2422 struct dmar_domain *domain;
2423 int ret;
2424
2425 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2426 if (!domain)
2427 return -ENOMEM;
2428
2429 /* For _hardware_ passthrough, don't bother. But for software
2430 passthrough, we do it anyway -- it may indicate a memory
2431 range which is reserved in E820, so which didn't get set
2432 up to start with in si_domain */
2433 if (domain == si_domain && hw_pass_through) {
2434 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2435 dev_name(dev), start, end);
2436 return 0;
2437 }
2438
2439 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2440 dev_name(dev), start, end);
2441
2442 if (end < start) {
2443 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2444 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2445 dmi_get_system_info(DMI_BIOS_VENDOR),
2446 dmi_get_system_info(DMI_BIOS_VERSION),
2447 dmi_get_system_info(DMI_PRODUCT_VERSION));
2448 ret = -EIO;
2449 goto error;
2450 }
2451
2452 if (end >> agaw_to_width(domain->agaw)) {
2453 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2454 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2455 agaw_to_width(domain->agaw),
2456 dmi_get_system_info(DMI_BIOS_VENDOR),
2457 dmi_get_system_info(DMI_BIOS_VERSION),
2458 dmi_get_system_info(DMI_PRODUCT_VERSION));
2459 ret = -EIO;
2460 goto error;
2461 }
2462
2463 ret = iommu_domain_identity_map(domain, start, end);
2464 if (ret)
2465 goto error;
2466
2467 return 0;
2468
2469 error:
2470 domain_exit(domain);
2471 return ret;
2472 }
2473
2474 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2475 struct device *dev)
2476 {
2477 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2478 return 0;
2479 return iommu_prepare_identity_map(dev, rmrr->base_address,
2480 rmrr->end_address);
2481 }
2482
2483 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2484 static inline void iommu_prepare_isa(void)
2485 {
2486 struct pci_dev *pdev;
2487 int ret;
2488
2489 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2490 if (!pdev)
2491 return;
2492
2493 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2494 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2495
2496 if (ret)
2497 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2498
2499 pci_dev_put(pdev);
2500 }
2501 #else
2502 static inline void iommu_prepare_isa(void)
2503 {
2504 return;
2505 }
2506 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2507
2508 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2509
2510 static int __init si_domain_init(int hw)
2511 {
2512 int nid, ret = 0;
2513
2514 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2515 if (!si_domain)
2516 return -EFAULT;
2517
2518 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2519 domain_exit(si_domain);
2520 return -EFAULT;
2521 }
2522
2523 pr_debug("Identity mapping domain allocated\n");
2524
2525 if (hw)
2526 return 0;
2527
2528 for_each_online_node(nid) {
2529 unsigned long start_pfn, end_pfn;
2530 int i;
2531
2532 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2533 ret = iommu_domain_identity_map(si_domain,
2534 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2535 if (ret)
2536 return ret;
2537 }
2538 }
2539
2540 return 0;
2541 }
2542
2543 static int identity_mapping(struct device *dev)
2544 {
2545 struct device_domain_info *info;
2546
2547 if (likely(!iommu_identity_mapping))
2548 return 0;
2549
2550 info = dev->archdata.iommu;
2551 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2552 return (info->domain == si_domain);
2553
2554 return 0;
2555 }
2556
2557 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2558 {
2559 struct dmar_domain *ndomain;
2560 struct intel_iommu *iommu;
2561 u8 bus, devfn;
2562
2563 iommu = device_to_iommu(dev, &bus, &devfn);
2564 if (!iommu)
2565 return -ENODEV;
2566
2567 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2568 if (ndomain != domain)
2569 return -EBUSY;
2570
2571 return 0;
2572 }
2573
2574 static bool device_has_rmrr(struct device *dev)
2575 {
2576 struct dmar_rmrr_unit *rmrr;
2577 struct device *tmp;
2578 int i;
2579
2580 rcu_read_lock();
2581 for_each_rmrr_units(rmrr) {
2582 /*
2583 * Return TRUE if this RMRR contains the device that
2584 * is passed in.
2585 */
2586 for_each_active_dev_scope(rmrr->devices,
2587 rmrr->devices_cnt, i, tmp)
2588 if (tmp == dev) {
2589 rcu_read_unlock();
2590 return true;
2591 }
2592 }
2593 rcu_read_unlock();
2594 return false;
2595 }
2596
2597 /*
2598 * There are a couple cases where we need to restrict the functionality of
2599 * devices associated with RMRRs. The first is when evaluating a device for
2600 * identity mapping because problems exist when devices are moved in and out
2601 * of domains and their respective RMRR information is lost. This means that
2602 * a device with associated RMRRs will never be in a "passthrough" domain.
2603 * The second is use of the device through the IOMMU API. This interface
2604 * expects to have full control of the IOVA space for the device. We cannot
2605 * satisfy both the requirement that RMRR access is maintained and have an
2606 * unencumbered IOVA space. We also have no ability to quiesce the device's
2607 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2608 * We therefore prevent devices associated with an RMRR from participating in
2609 * the IOMMU API, which eliminates them from device assignment.
2610 *
2611 * In both cases we assume that PCI USB devices with RMRRs have them largely
2612 * for historical reasons and that the RMRR space is not actively used post
2613 * boot. This exclusion may change if vendors begin to abuse it.
2614 *
2615 * The same exception is made for graphics devices, with the requirement that
2616 * any use of the RMRR regions will be torn down before assigning the device
2617 * to a guest.
2618 */
2619 static bool device_is_rmrr_locked(struct device *dev)
2620 {
2621 if (!device_has_rmrr(dev))
2622 return false;
2623
2624 if (dev_is_pci(dev)) {
2625 struct pci_dev *pdev = to_pci_dev(dev);
2626
2627 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2628 return false;
2629 }
2630
2631 return true;
2632 }
2633
2634 static int iommu_should_identity_map(struct device *dev, int startup)
2635 {
2636
2637 if (dev_is_pci(dev)) {
2638 struct pci_dev *pdev = to_pci_dev(dev);
2639
2640 if (device_is_rmrr_locked(dev))
2641 return 0;
2642
2643 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2644 return 1;
2645
2646 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2647 return 1;
2648
2649 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2650 return 0;
2651
2652 /*
2653 * We want to start off with all devices in the 1:1 domain, and
2654 * take them out later if we find they can't access all of memory.
2655 *
2656 * However, we can't do this for PCI devices behind bridges,
2657 * because all PCI devices behind the same bridge will end up
2658 * with the same source-id on their transactions.
2659 *
2660 * Practically speaking, we can't change things around for these
2661 * devices at run-time, because we can't be sure there'll be no
2662 * DMA transactions in flight for any of their siblings.
2663 *
2664 * So PCI devices (unless they're on the root bus) as well as
2665 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2666 * the 1:1 domain, just in _case_ one of their siblings turns out
2667 * not to be able to map all of memory.
2668 */
2669 if (!pci_is_pcie(pdev)) {
2670 if (!pci_is_root_bus(pdev->bus))
2671 return 0;
2672 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2673 return 0;
2674 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2675 return 0;
2676 } else {
2677 if (device_has_rmrr(dev))
2678 return 0;
2679 }
2680
2681 /*
2682 * At boot time, we don't yet know if devices will be 64-bit capable.
2683 * Assume that they will — if they turn out not to be, then we can
2684 * take them out of the 1:1 domain later.
2685 */
2686 if (!startup) {
2687 /*
2688 * If the device's dma_mask is less than the system's memory
2689 * size then this is not a candidate for identity mapping.
2690 */
2691 u64 dma_mask = *dev->dma_mask;
2692
2693 if (dev->coherent_dma_mask &&
2694 dev->coherent_dma_mask < dma_mask)
2695 dma_mask = dev->coherent_dma_mask;
2696
2697 return dma_mask >= dma_get_required_mask(dev);
2698 }
2699
2700 return 1;
2701 }
2702
2703 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2704 {
2705 int ret;
2706
2707 if (!iommu_should_identity_map(dev, 1))
2708 return 0;
2709
2710 ret = domain_add_dev_info(si_domain, dev);
2711 if (!ret)
2712 pr_info("%s identity mapping for device %s\n",
2713 hw ? "Hardware" : "Software", dev_name(dev));
2714 else if (ret == -ENODEV)
2715 /* device not associated with an iommu */
2716 ret = 0;
2717
2718 return ret;
2719 }
2720
2721
2722 static int __init iommu_prepare_static_identity_mapping(int hw)
2723 {
2724 struct pci_dev *pdev = NULL;
2725 struct dmar_drhd_unit *drhd;
2726 struct intel_iommu *iommu;
2727 struct device *dev;
2728 int i;
2729 int ret = 0;
2730
2731 for_each_pci_dev(pdev) {
2732 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2733 if (ret)
2734 return ret;
2735 }
2736
2737 for_each_active_iommu(iommu, drhd)
2738 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2739 struct acpi_device_physical_node *pn;
2740 struct acpi_device *adev;
2741
2742 if (dev->bus != &acpi_bus_type)
2743 continue;
2744
2745 adev= to_acpi_device(dev);
2746 mutex_lock(&adev->physical_node_lock);
2747 list_for_each_entry(pn, &adev->physical_node_list, node) {
2748 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2749 if (ret)
2750 break;
2751 }
2752 mutex_unlock(&adev->physical_node_lock);
2753 if (ret)
2754 return ret;
2755 }
2756
2757 return 0;
2758 }
2759
2760 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2761 {
2762 /*
2763 * Start from the sane iommu hardware state.
2764 * If the queued invalidation is already initialized by us
2765 * (for example, while enabling interrupt-remapping) then
2766 * we got the things already rolling from a sane state.
2767 */
2768 if (!iommu->qi) {
2769 /*
2770 * Clear any previous faults.
2771 */
2772 dmar_fault(-1, iommu);
2773 /*
2774 * Disable queued invalidation if supported and already enabled
2775 * before OS handover.
2776 */
2777 dmar_disable_qi(iommu);
2778 }
2779
2780 if (dmar_enable_qi(iommu)) {
2781 /*
2782 * Queued Invalidate not enabled, use Register Based Invalidate
2783 */
2784 iommu->flush.flush_context = __iommu_flush_context;
2785 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2786 pr_info("%s: Using Register based invalidation\n",
2787 iommu->name);
2788 } else {
2789 iommu->flush.flush_context = qi_flush_context;
2790 iommu->flush.flush_iotlb = qi_flush_iotlb;
2791 pr_info("%s: Using Queued invalidation\n", iommu->name);
2792 }
2793 }
2794
2795 static int copy_context_table(struct intel_iommu *iommu,
2796 struct root_entry *old_re,
2797 struct context_entry **tbl,
2798 int bus, bool ext)
2799 {
2800 struct context_entry *old_ce = NULL, *new_ce = NULL, ce;
2801 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2802 phys_addr_t old_ce_phys;
2803
2804 tbl_idx = ext ? bus * 2 : bus;
2805
2806 for (devfn = 0; devfn < 256; devfn++) {
2807 /* First calculate the correct index */
2808 idx = (ext ? devfn * 2 : devfn) % 256;
2809
2810 if (idx == 0) {
2811 /* First save what we may have and clean up */
2812 if (new_ce) {
2813 tbl[tbl_idx] = new_ce;
2814 __iommu_flush_cache(iommu, new_ce,
2815 VTD_PAGE_SIZE);
2816 pos = 1;
2817 }
2818
2819 if (old_ce)
2820 iounmap(old_ce);
2821
2822 ret = 0;
2823 if (devfn < 0x80)
2824 old_ce_phys = root_entry_lctp(old_re);
2825 else
2826 old_ce_phys = root_entry_uctp(old_re);
2827
2828 if (!old_ce_phys) {
2829 if (ext && devfn == 0) {
2830 /* No LCTP, try UCTP */
2831 devfn = 0x7f;
2832 continue;
2833 } else {
2834 goto out;
2835 }
2836 }
2837
2838 ret = -ENOMEM;
2839 old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2840 if (!old_ce)
2841 goto out;
2842
2843 new_ce = alloc_pgtable_page(iommu->node);
2844 if (!new_ce)
2845 goto out_unmap;
2846
2847 ret = 0;
2848 }
2849
2850 /* Now copy the context entry */
2851 ce = old_ce[idx];
2852
2853 if (!__context_present(&ce))
2854 continue;
2855
2856 did = context_domain_id(&ce);
2857 if (did >= 0 && did < cap_ndoms(iommu->cap))
2858 set_bit(did, iommu->domain_ids);
2859
2860 /*
2861 * We need a marker for copied context entries. This
2862 * marker needs to work for the old format as well as
2863 * for extended context entries.
2864 *
2865 * Bit 67 of the context entry is used. In the old
2866 * format this bit is available to software, in the
2867 * extended format it is the PGE bit, but PGE is ignored
2868 * by HW if PASIDs are disabled (and thus still
2869 * available).
2870 *
2871 * So disable PASIDs first and then mark the entry
2872 * copied. This means that we don't copy PASID
2873 * translations from the old kernel, but this is fine as
2874 * faults there are not fatal.
2875 */
2876 context_clear_pasid_enable(&ce);
2877 context_set_copied(&ce);
2878
2879 new_ce[idx] = ce;
2880 }
2881
2882 tbl[tbl_idx + pos] = new_ce;
2883
2884 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2885
2886 out_unmap:
2887 iounmap(old_ce);
2888
2889 out:
2890 return ret;
2891 }
2892
2893 static int copy_translation_tables(struct intel_iommu *iommu)
2894 {
2895 struct context_entry **ctxt_tbls;
2896 struct root_entry *old_rt;
2897 phys_addr_t old_rt_phys;
2898 int ctxt_table_entries;
2899 unsigned long flags;
2900 u64 rtaddr_reg;
2901 int bus, ret;
2902 bool new_ext, ext;
2903
2904 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2905 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
2906 new_ext = !!ecap_ecs(iommu->ecap);
2907
2908 /*
2909 * The RTT bit can only be changed when translation is disabled,
2910 * but disabling translation means to open a window for data
2911 * corruption. So bail out and don't copy anything if we would
2912 * have to change the bit.
2913 */
2914 if (new_ext != ext)
2915 return -EINVAL;
2916
2917 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2918 if (!old_rt_phys)
2919 return -EINVAL;
2920
2921 old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
2922 if (!old_rt)
2923 return -ENOMEM;
2924
2925 /* This is too big for the stack - allocate it from slab */
2926 ctxt_table_entries = ext ? 512 : 256;
2927 ret = -ENOMEM;
2928 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
2929 if (!ctxt_tbls)
2930 goto out_unmap;
2931
2932 for (bus = 0; bus < 256; bus++) {
2933 ret = copy_context_table(iommu, &old_rt[bus],
2934 ctxt_tbls, bus, ext);
2935 if (ret) {
2936 pr_err("%s: Failed to copy context table for bus %d\n",
2937 iommu->name, bus);
2938 continue;
2939 }
2940 }
2941
2942 spin_lock_irqsave(&iommu->lock, flags);
2943
2944 /* Context tables are copied, now write them to the root_entry table */
2945 for (bus = 0; bus < 256; bus++) {
2946 int idx = ext ? bus * 2 : bus;
2947 u64 val;
2948
2949 if (ctxt_tbls[idx]) {
2950 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2951 iommu->root_entry[bus].lo = val;
2952 }
2953
2954 if (!ext || !ctxt_tbls[idx + 1])
2955 continue;
2956
2957 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2958 iommu->root_entry[bus].hi = val;
2959 }
2960
2961 spin_unlock_irqrestore(&iommu->lock, flags);
2962
2963 kfree(ctxt_tbls);
2964
2965 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2966
2967 ret = 0;
2968
2969 out_unmap:
2970 iounmap(old_rt);
2971
2972 return ret;
2973 }
2974
2975 static int __init init_dmars(void)
2976 {
2977 struct dmar_drhd_unit *drhd;
2978 struct dmar_rmrr_unit *rmrr;
2979 bool copied_tables = false;
2980 struct device *dev;
2981 struct intel_iommu *iommu;
2982 int i, ret;
2983
2984 /*
2985 * for each drhd
2986 * allocate root
2987 * initialize and program root entry to not present
2988 * endfor
2989 */
2990 for_each_drhd_unit(drhd) {
2991 /*
2992 * lock not needed as this is only incremented in the single
2993 * threaded kernel __init code path all other access are read
2994 * only
2995 */
2996 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
2997 g_num_of_iommus++;
2998 continue;
2999 }
3000 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3001 }
3002
3003 /* Preallocate enough resources for IOMMU hot-addition */
3004 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3005 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3006
3007 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3008 GFP_KERNEL);
3009 if (!g_iommus) {
3010 pr_err("Allocating global iommu array failed\n");
3011 ret = -ENOMEM;
3012 goto error;
3013 }
3014
3015 deferred_flush = kzalloc(g_num_of_iommus *
3016 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3017 if (!deferred_flush) {
3018 ret = -ENOMEM;
3019 goto free_g_iommus;
3020 }
3021
3022 for_each_active_iommu(iommu, drhd) {
3023 g_iommus[iommu->seq_id] = iommu;
3024
3025 intel_iommu_init_qi(iommu);
3026
3027 ret = iommu_init_domains(iommu);
3028 if (ret)
3029 goto free_iommu;
3030
3031 init_translation_status(iommu);
3032
3033 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3034 iommu_disable_translation(iommu);
3035 clear_translation_pre_enabled(iommu);
3036 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3037 iommu->name);
3038 }
3039
3040 /*
3041 * TBD:
3042 * we could share the same root & context tables
3043 * among all IOMMU's. Need to Split it later.
3044 */
3045 ret = iommu_alloc_root_entry(iommu);
3046 if (ret)
3047 goto free_iommu;
3048
3049 if (translation_pre_enabled(iommu)) {
3050 pr_info("Translation already enabled - trying to copy translation structures\n");
3051
3052 ret = copy_translation_tables(iommu);
3053 if (ret) {
3054 /*
3055 * We found the IOMMU with translation
3056 * enabled - but failed to copy over the
3057 * old root-entry table. Try to proceed
3058 * by disabling translation now and
3059 * allocating a clean root-entry table.
3060 * This might cause DMAR faults, but
3061 * probably the dump will still succeed.
3062 */
3063 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3064 iommu->name);
3065 iommu_disable_translation(iommu);
3066 clear_translation_pre_enabled(iommu);
3067 } else {
3068 pr_info("Copied translation tables from previous kernel for %s\n",
3069 iommu->name);
3070 copied_tables = true;
3071 }
3072 }
3073
3074 iommu_flush_write_buffer(iommu);
3075 iommu_set_root_entry(iommu);
3076 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3077 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3078
3079 if (!ecap_pass_through(iommu->ecap))
3080 hw_pass_through = 0;
3081 }
3082
3083 if (iommu_pass_through)
3084 iommu_identity_mapping |= IDENTMAP_ALL;
3085
3086 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3087 iommu_identity_mapping |= IDENTMAP_GFX;
3088 #endif
3089
3090 if (iommu_identity_mapping) {
3091 ret = si_domain_init(hw_pass_through);
3092 if (ret)
3093 goto free_iommu;
3094 }
3095
3096 check_tylersburg_isoch();
3097
3098 /*
3099 * If we copied translations from a previous kernel in the kdump
3100 * case, we can not assign the devices to domains now, as that
3101 * would eliminate the old mappings. So skip this part and defer
3102 * the assignment to device driver initialization time.
3103 */
3104 if (copied_tables)
3105 goto domains_done;
3106
3107 /*
3108 * If pass through is not set or not enabled, setup context entries for
3109 * identity mappings for rmrr, gfx, and isa and may fall back to static
3110 * identity mapping if iommu_identity_mapping is set.
3111 */
3112 if (iommu_identity_mapping) {
3113 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3114 if (ret) {
3115 pr_crit("Failed to setup IOMMU pass-through\n");
3116 goto free_iommu;
3117 }
3118 }
3119 /*
3120 * For each rmrr
3121 * for each dev attached to rmrr
3122 * do
3123 * locate drhd for dev, alloc domain for dev
3124 * allocate free domain
3125 * allocate page table entries for rmrr
3126 * if context not allocated for bus
3127 * allocate and init context
3128 * set present in root table for this bus
3129 * init context with domain, translation etc
3130 * endfor
3131 * endfor
3132 */
3133 pr_info("Setting RMRR:\n");
3134 for_each_rmrr_units(rmrr) {
3135 /* some BIOS lists non-exist devices in DMAR table. */
3136 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3137 i, dev) {
3138 ret = iommu_prepare_rmrr_dev(rmrr, dev);
3139 if (ret)
3140 pr_err("Mapping reserved region failed\n");
3141 }
3142 }
3143
3144 iommu_prepare_isa();
3145
3146 domains_done:
3147
3148 /*
3149 * for each drhd
3150 * enable fault log
3151 * global invalidate context cache
3152 * global invalidate iotlb
3153 * enable translation
3154 */
3155 for_each_iommu(iommu, drhd) {
3156 if (drhd->ignored) {
3157 /*
3158 * we always have to disable PMRs or DMA may fail on
3159 * this device
3160 */
3161 if (force_on)
3162 iommu_disable_protect_mem_regions(iommu);
3163 continue;
3164 }
3165
3166 iommu_flush_write_buffer(iommu);
3167
3168 ret = dmar_set_interrupt(iommu);
3169 if (ret)
3170 goto free_iommu;
3171
3172 if (!translation_pre_enabled(iommu))
3173 iommu_enable_translation(iommu);
3174
3175 iommu_disable_protect_mem_regions(iommu);
3176 }
3177
3178 return 0;
3179
3180 free_iommu:
3181 for_each_active_iommu(iommu, drhd) {
3182 disable_dmar_iommu(iommu);
3183 free_dmar_iommu(iommu);
3184 }
3185 kfree(deferred_flush);
3186 free_g_iommus:
3187 kfree(g_iommus);
3188 error:
3189 return ret;
3190 }
3191
3192 /* This takes a number of _MM_ pages, not VTD pages */
3193 static struct iova *intel_alloc_iova(struct device *dev,
3194 struct dmar_domain *domain,
3195 unsigned long nrpages, uint64_t dma_mask)
3196 {
3197 struct iova *iova = NULL;
3198
3199 /* Restrict dma_mask to the width that the iommu can handle */
3200 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3201
3202 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3203 /*
3204 * First try to allocate an io virtual address in
3205 * DMA_BIT_MASK(32) and if that fails then try allocating
3206 * from higher range
3207 */
3208 iova = alloc_iova(&domain->iovad, nrpages,
3209 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3210 if (iova)
3211 return iova;
3212 }
3213 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3214 if (unlikely(!iova)) {
3215 pr_err("Allocating %ld-page iova for %s failed",
3216 nrpages, dev_name(dev));
3217 return NULL;
3218 }
3219
3220 return iova;
3221 }
3222
3223 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3224 {
3225 struct dmar_domain *domain;
3226
3227 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3228 if (!domain) {
3229 pr_err("Allocating domain for %s failed\n",
3230 dev_name(dev));
3231 return NULL;
3232 }
3233
3234 return domain;
3235 }
3236
3237 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3238 {
3239 struct device_domain_info *info;
3240
3241 /* No lock here, assumes no domain exit in normal case */
3242 info = dev->archdata.iommu;
3243 if (likely(info))
3244 return info->domain;
3245
3246 return __get_valid_domain_for_dev(dev);
3247 }
3248
3249 /* Check if the dev needs to go through non-identity map and unmap process.*/
3250 static int iommu_no_mapping(struct device *dev)
3251 {
3252 int found;
3253
3254 if (iommu_dummy(dev))
3255 return 1;
3256
3257 if (!iommu_identity_mapping)
3258 return 0;
3259
3260 found = identity_mapping(dev);
3261 if (found) {
3262 if (iommu_should_identity_map(dev, 0))
3263 return 1;
3264 else {
3265 /*
3266 * 32 bit DMA is removed from si_domain and fall back
3267 * to non-identity mapping.
3268 */
3269 dmar_remove_one_dev_info(si_domain, dev);
3270 pr_info("32bit %s uses non-identity mapping\n",
3271 dev_name(dev));
3272 return 0;
3273 }
3274 } else {
3275 /*
3276 * In case of a detached 64 bit DMA device from vm, the device
3277 * is put into si_domain for identity mapping.
3278 */
3279 if (iommu_should_identity_map(dev, 0)) {
3280 int ret;
3281 ret = domain_add_dev_info(si_domain, dev);
3282 if (!ret) {
3283 pr_info("64bit %s uses identity mapping\n",
3284 dev_name(dev));
3285 return 1;
3286 }
3287 }
3288 }
3289
3290 return 0;
3291 }
3292
3293 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3294 size_t size, int dir, u64 dma_mask)
3295 {
3296 struct dmar_domain *domain;
3297 phys_addr_t start_paddr;
3298 struct iova *iova;
3299 int prot = 0;
3300 int ret;
3301 struct intel_iommu *iommu;
3302 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3303
3304 BUG_ON(dir == DMA_NONE);
3305
3306 if (iommu_no_mapping(dev))
3307 return paddr;
3308
3309 domain = get_valid_domain_for_dev(dev);
3310 if (!domain)
3311 return 0;
3312
3313 iommu = domain_get_iommu(domain);
3314 size = aligned_nrpages(paddr, size);
3315
3316 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3317 if (!iova)
3318 goto error;
3319
3320 /*
3321 * Check if DMAR supports zero-length reads on write only
3322 * mappings..
3323 */
3324 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3325 !cap_zlr(iommu->cap))
3326 prot |= DMA_PTE_READ;
3327 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3328 prot |= DMA_PTE_WRITE;
3329 /*
3330 * paddr - (paddr + size) might be partial page, we should map the whole
3331 * page. Note: if two part of one page are separately mapped, we
3332 * might have two guest_addr mapping to the same host paddr, but this
3333 * is not a big problem
3334 */
3335 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3336 mm_to_dma_pfn(paddr_pfn), size, prot);
3337 if (ret)
3338 goto error;
3339
3340 /* it's a non-present to present mapping. Only flush if caching mode */
3341 if (cap_caching_mode(iommu->cap))
3342 iommu_flush_iotlb_psi(iommu, domain,
3343 mm_to_dma_pfn(iova->pfn_lo),
3344 size, 0, 1);
3345 else
3346 iommu_flush_write_buffer(iommu);
3347
3348 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3349 start_paddr += paddr & ~PAGE_MASK;
3350 return start_paddr;
3351
3352 error:
3353 if (iova)
3354 __free_iova(&domain->iovad, iova);
3355 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3356 dev_name(dev), size, (unsigned long long)paddr, dir);
3357 return 0;
3358 }
3359
3360 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3361 unsigned long offset, size_t size,
3362 enum dma_data_direction dir,
3363 struct dma_attrs *attrs)
3364 {
3365 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3366 dir, *dev->dma_mask);
3367 }
3368
3369 static void flush_unmaps(void)
3370 {
3371 int i, j;
3372
3373 timer_on = 0;
3374
3375 /* just flush them all */
3376 for (i = 0; i < g_num_of_iommus; i++) {
3377 struct intel_iommu *iommu = g_iommus[i];
3378 if (!iommu)
3379 continue;
3380
3381 if (!deferred_flush[i].next)
3382 continue;
3383
3384 /* In caching mode, global flushes turn emulation expensive */
3385 if (!cap_caching_mode(iommu->cap))
3386 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3387 DMA_TLB_GLOBAL_FLUSH);
3388 for (j = 0; j < deferred_flush[i].next; j++) {
3389 unsigned long mask;
3390 struct iova *iova = deferred_flush[i].iova[j];
3391 struct dmar_domain *domain = deferred_flush[i].domain[j];
3392
3393 /* On real hardware multiple invalidations are expensive */
3394 if (cap_caching_mode(iommu->cap))
3395 iommu_flush_iotlb_psi(iommu, domain,
3396 iova->pfn_lo, iova_size(iova),
3397 !deferred_flush[i].freelist[j], 0);
3398 else {
3399 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3400 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3401 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3402 }
3403 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3404 if (deferred_flush[i].freelist[j])
3405 dma_free_pagelist(deferred_flush[i].freelist[j]);
3406 }
3407 deferred_flush[i].next = 0;
3408 }
3409
3410 list_size = 0;
3411 }
3412
3413 static void flush_unmaps_timeout(unsigned long data)
3414 {
3415 unsigned long flags;
3416
3417 spin_lock_irqsave(&async_umap_flush_lock, flags);
3418 flush_unmaps();
3419 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3420 }
3421
3422 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3423 {
3424 unsigned long flags;
3425 int next, iommu_id;
3426 struct intel_iommu *iommu;
3427
3428 spin_lock_irqsave(&async_umap_flush_lock, flags);
3429 if (list_size == HIGH_WATER_MARK)
3430 flush_unmaps();
3431
3432 iommu = domain_get_iommu(dom);
3433 iommu_id = iommu->seq_id;
3434
3435 next = deferred_flush[iommu_id].next;
3436 deferred_flush[iommu_id].domain[next] = dom;
3437 deferred_flush[iommu_id].iova[next] = iova;
3438 deferred_flush[iommu_id].freelist[next] = freelist;
3439 deferred_flush[iommu_id].next++;
3440
3441 if (!timer_on) {
3442 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3443 timer_on = 1;
3444 }
3445 list_size++;
3446 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3447 }
3448
3449 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3450 {
3451 struct dmar_domain *domain;
3452 unsigned long start_pfn, last_pfn;
3453 struct iova *iova;
3454 struct intel_iommu *iommu;
3455 struct page *freelist;
3456
3457 if (iommu_no_mapping(dev))
3458 return;
3459
3460 domain = find_domain(dev);
3461 BUG_ON(!domain);
3462
3463 iommu = domain_get_iommu(domain);
3464
3465 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3466 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3467 (unsigned long long)dev_addr))
3468 return;
3469
3470 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3471 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3472
3473 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3474 dev_name(dev), start_pfn, last_pfn);
3475
3476 freelist = domain_unmap(domain, start_pfn, last_pfn);
3477
3478 if (intel_iommu_strict) {
3479 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3480 last_pfn - start_pfn + 1, !freelist, 0);
3481 /* free iova */
3482 __free_iova(&domain->iovad, iova);
3483 dma_free_pagelist(freelist);
3484 } else {
3485 add_unmap(domain, iova, freelist);
3486 /*
3487 * queue up the release of the unmap to save the 1/6th of the
3488 * cpu used up by the iotlb flush operation...
3489 */
3490 }
3491 }
3492
3493 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3494 size_t size, enum dma_data_direction dir,
3495 struct dma_attrs *attrs)
3496 {
3497 intel_unmap(dev, dev_addr);
3498 }
3499
3500 static void *intel_alloc_coherent(struct device *dev, size_t size,
3501 dma_addr_t *dma_handle, gfp_t flags,
3502 struct dma_attrs *attrs)
3503 {
3504 struct page *page = NULL;
3505 int order;
3506
3507 size = PAGE_ALIGN(size);
3508 order = get_order(size);
3509
3510 if (!iommu_no_mapping(dev))
3511 flags &= ~(GFP_DMA | GFP_DMA32);
3512 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3513 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3514 flags |= GFP_DMA;
3515 else
3516 flags |= GFP_DMA32;
3517 }
3518
3519 if (flags & __GFP_WAIT) {
3520 unsigned int count = size >> PAGE_SHIFT;
3521
3522 page = dma_alloc_from_contiguous(dev, count, order);
3523 if (page && iommu_no_mapping(dev) &&
3524 page_to_phys(page) + size > dev->coherent_dma_mask) {
3525 dma_release_from_contiguous(dev, page, count);
3526 page = NULL;
3527 }
3528 }
3529
3530 if (!page)
3531 page = alloc_pages(flags, order);
3532 if (!page)
3533 return NULL;
3534 memset(page_address(page), 0, size);
3535
3536 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3537 DMA_BIDIRECTIONAL,
3538 dev->coherent_dma_mask);
3539 if (*dma_handle)
3540 return page_address(page);
3541 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3542 __free_pages(page, order);
3543
3544 return NULL;
3545 }
3546
3547 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3548 dma_addr_t dma_handle, struct dma_attrs *attrs)
3549 {
3550 int order;
3551 struct page *page = virt_to_page(vaddr);
3552
3553 size = PAGE_ALIGN(size);
3554 order = get_order(size);
3555
3556 intel_unmap(dev, dma_handle);
3557 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3558 __free_pages(page, order);
3559 }
3560
3561 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3562 int nelems, enum dma_data_direction dir,
3563 struct dma_attrs *attrs)
3564 {
3565 intel_unmap(dev, sglist[0].dma_address);
3566 }
3567
3568 static int intel_nontranslate_map_sg(struct device *hddev,
3569 struct scatterlist *sglist, int nelems, int dir)
3570 {
3571 int i;
3572 struct scatterlist *sg;
3573
3574 for_each_sg(sglist, sg, nelems, i) {
3575 BUG_ON(!sg_page(sg));
3576 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3577 sg->dma_length = sg->length;
3578 }
3579 return nelems;
3580 }
3581
3582 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3583 enum dma_data_direction dir, struct dma_attrs *attrs)
3584 {
3585 int i;
3586 struct dmar_domain *domain;
3587 size_t size = 0;
3588 int prot = 0;
3589 struct iova *iova = NULL;
3590 int ret;
3591 struct scatterlist *sg;
3592 unsigned long start_vpfn;
3593 struct intel_iommu *iommu;
3594
3595 BUG_ON(dir == DMA_NONE);
3596 if (iommu_no_mapping(dev))
3597 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3598
3599 domain = get_valid_domain_for_dev(dev);
3600 if (!domain)
3601 return 0;
3602
3603 iommu = domain_get_iommu(domain);
3604
3605 for_each_sg(sglist, sg, nelems, i)
3606 size += aligned_nrpages(sg->offset, sg->length);
3607
3608 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3609 *dev->dma_mask);
3610 if (!iova) {
3611 sglist->dma_length = 0;
3612 return 0;
3613 }
3614
3615 /*
3616 * Check if DMAR supports zero-length reads on write only
3617 * mappings..
3618 */
3619 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3620 !cap_zlr(iommu->cap))
3621 prot |= DMA_PTE_READ;
3622 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3623 prot |= DMA_PTE_WRITE;
3624
3625 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3626
3627 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3628 if (unlikely(ret)) {
3629 dma_pte_free_pagetable(domain, start_vpfn,
3630 start_vpfn + size - 1);
3631 __free_iova(&domain->iovad, iova);
3632 return 0;
3633 }
3634
3635 /* it's a non-present to present mapping. Only flush if caching mode */
3636 if (cap_caching_mode(iommu->cap))
3637 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3638 else
3639 iommu_flush_write_buffer(iommu);
3640
3641 return nelems;
3642 }
3643
3644 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3645 {
3646 return !dma_addr;
3647 }
3648
3649 struct dma_map_ops intel_dma_ops = {
3650 .alloc = intel_alloc_coherent,
3651 .free = intel_free_coherent,
3652 .map_sg = intel_map_sg,
3653 .unmap_sg = intel_unmap_sg,
3654 .map_page = intel_map_page,
3655 .unmap_page = intel_unmap_page,
3656 .mapping_error = intel_mapping_error,
3657 };
3658
3659 static inline int iommu_domain_cache_init(void)
3660 {
3661 int ret = 0;
3662
3663 iommu_domain_cache = kmem_cache_create("iommu_domain",
3664 sizeof(struct dmar_domain),
3665 0,
3666 SLAB_HWCACHE_ALIGN,
3667
3668 NULL);
3669 if (!iommu_domain_cache) {
3670 pr_err("Couldn't create iommu_domain cache\n");
3671 ret = -ENOMEM;
3672 }
3673
3674 return ret;
3675 }
3676
3677 static inline int iommu_devinfo_cache_init(void)
3678 {
3679 int ret = 0;
3680
3681 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3682 sizeof(struct device_domain_info),
3683 0,
3684 SLAB_HWCACHE_ALIGN,
3685 NULL);
3686 if (!iommu_devinfo_cache) {
3687 pr_err("Couldn't create devinfo cache\n");
3688 ret = -ENOMEM;
3689 }
3690
3691 return ret;
3692 }
3693
3694 static int __init iommu_init_mempool(void)
3695 {
3696 int ret;
3697 ret = iommu_iova_cache_init();
3698 if (ret)
3699 return ret;
3700
3701 ret = iommu_domain_cache_init();
3702 if (ret)
3703 goto domain_error;
3704
3705 ret = iommu_devinfo_cache_init();
3706 if (!ret)
3707 return ret;
3708
3709 kmem_cache_destroy(iommu_domain_cache);
3710 domain_error:
3711 iommu_iova_cache_destroy();
3712
3713 return -ENOMEM;
3714 }
3715
3716 static void __init iommu_exit_mempool(void)
3717 {
3718 kmem_cache_destroy(iommu_devinfo_cache);
3719 kmem_cache_destroy(iommu_domain_cache);
3720 iommu_iova_cache_destroy();
3721 }
3722
3723 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3724 {
3725 struct dmar_drhd_unit *drhd;
3726 u32 vtbar;
3727 int rc;
3728
3729 /* We know that this device on this chipset has its own IOMMU.
3730 * If we find it under a different IOMMU, then the BIOS is lying
3731 * to us. Hope that the IOMMU for this device is actually
3732 * disabled, and it needs no translation...
3733 */
3734 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3735 if (rc) {
3736 /* "can't" happen */
3737 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3738 return;
3739 }
3740 vtbar &= 0xffff0000;
3741
3742 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3743 drhd = dmar_find_matched_drhd_unit(pdev);
3744 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3745 TAINT_FIRMWARE_WORKAROUND,
3746 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3747 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3748 }
3749 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3750
3751 static void __init init_no_remapping_devices(void)
3752 {
3753 struct dmar_drhd_unit *drhd;
3754 struct device *dev;
3755 int i;
3756
3757 for_each_drhd_unit(drhd) {
3758 if (!drhd->include_all) {
3759 for_each_active_dev_scope(drhd->devices,
3760 drhd->devices_cnt, i, dev)
3761 break;
3762 /* ignore DMAR unit if no devices exist */
3763 if (i == drhd->devices_cnt)
3764 drhd->ignored = 1;
3765 }
3766 }
3767
3768 for_each_active_drhd_unit(drhd) {
3769 if (drhd->include_all)
3770 continue;
3771
3772 for_each_active_dev_scope(drhd->devices,
3773 drhd->devices_cnt, i, dev)
3774 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3775 break;
3776 if (i < drhd->devices_cnt)
3777 continue;
3778
3779 /* This IOMMU has *only* gfx devices. Either bypass it or
3780 set the gfx_mapped flag, as appropriate */
3781 if (dmar_map_gfx) {
3782 intel_iommu_gfx_mapped = 1;
3783 } else {
3784 drhd->ignored = 1;
3785 for_each_active_dev_scope(drhd->devices,
3786 drhd->devices_cnt, i, dev)
3787 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3788 }
3789 }
3790 }
3791
3792 #ifdef CONFIG_SUSPEND
3793 static int init_iommu_hw(void)
3794 {
3795 struct dmar_drhd_unit *drhd;
3796 struct intel_iommu *iommu = NULL;
3797
3798 for_each_active_iommu(iommu, drhd)
3799 if (iommu->qi)
3800 dmar_reenable_qi(iommu);
3801
3802 for_each_iommu(iommu, drhd) {
3803 if (drhd->ignored) {
3804 /*
3805 * we always have to disable PMRs or DMA may fail on
3806 * this device
3807 */
3808 if (force_on)
3809 iommu_disable_protect_mem_regions(iommu);
3810 continue;
3811 }
3812
3813 iommu_flush_write_buffer(iommu);
3814
3815 iommu_set_root_entry(iommu);
3816
3817 iommu->flush.flush_context(iommu, 0, 0, 0,
3818 DMA_CCMD_GLOBAL_INVL);
3819 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3820 iommu_enable_translation(iommu);
3821 iommu_disable_protect_mem_regions(iommu);
3822 }
3823
3824 return 0;
3825 }
3826
3827 static void iommu_flush_all(void)
3828 {
3829 struct dmar_drhd_unit *drhd;
3830 struct intel_iommu *iommu;
3831
3832 for_each_active_iommu(iommu, drhd) {
3833 iommu->flush.flush_context(iommu, 0, 0, 0,
3834 DMA_CCMD_GLOBAL_INVL);
3835 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3836 DMA_TLB_GLOBAL_FLUSH);
3837 }
3838 }
3839
3840 static int iommu_suspend(void)
3841 {
3842 struct dmar_drhd_unit *drhd;
3843 struct intel_iommu *iommu = NULL;
3844 unsigned long flag;
3845
3846 for_each_active_iommu(iommu, drhd) {
3847 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3848 GFP_ATOMIC);
3849 if (!iommu->iommu_state)
3850 goto nomem;
3851 }
3852
3853 iommu_flush_all();
3854
3855 for_each_active_iommu(iommu, drhd) {
3856 iommu_disable_translation(iommu);
3857
3858 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3859
3860 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3861 readl(iommu->reg + DMAR_FECTL_REG);
3862 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3863 readl(iommu->reg + DMAR_FEDATA_REG);
3864 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3865 readl(iommu->reg + DMAR_FEADDR_REG);
3866 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3867 readl(iommu->reg + DMAR_FEUADDR_REG);
3868
3869 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3870 }
3871 return 0;
3872
3873 nomem:
3874 for_each_active_iommu(iommu, drhd)
3875 kfree(iommu->iommu_state);
3876
3877 return -ENOMEM;
3878 }
3879
3880 static void iommu_resume(void)
3881 {
3882 struct dmar_drhd_unit *drhd;
3883 struct intel_iommu *iommu = NULL;
3884 unsigned long flag;
3885
3886 if (init_iommu_hw()) {
3887 if (force_on)
3888 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3889 else
3890 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3891 return;
3892 }
3893
3894 for_each_active_iommu(iommu, drhd) {
3895
3896 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3897
3898 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3899 iommu->reg + DMAR_FECTL_REG);
3900 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3901 iommu->reg + DMAR_FEDATA_REG);
3902 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3903 iommu->reg + DMAR_FEADDR_REG);
3904 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3905 iommu->reg + DMAR_FEUADDR_REG);
3906
3907 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3908 }
3909
3910 for_each_active_iommu(iommu, drhd)
3911 kfree(iommu->iommu_state);
3912 }
3913
3914 static struct syscore_ops iommu_syscore_ops = {
3915 .resume = iommu_resume,
3916 .suspend = iommu_suspend,
3917 };
3918
3919 static void __init init_iommu_pm_ops(void)
3920 {
3921 register_syscore_ops(&iommu_syscore_ops);
3922 }
3923
3924 #else
3925 static inline void init_iommu_pm_ops(void) {}
3926 #endif /* CONFIG_PM */
3927
3928
3929 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3930 {
3931 struct acpi_dmar_reserved_memory *rmrr;
3932 struct dmar_rmrr_unit *rmrru;
3933
3934 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3935 if (!rmrru)
3936 return -ENOMEM;
3937
3938 rmrru->hdr = header;
3939 rmrr = (struct acpi_dmar_reserved_memory *)header;
3940 rmrru->base_address = rmrr->base_address;
3941 rmrru->end_address = rmrr->end_address;
3942 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3943 ((void *)rmrr) + rmrr->header.length,
3944 &rmrru->devices_cnt);
3945 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3946 kfree(rmrru);
3947 return -ENOMEM;
3948 }
3949
3950 list_add(&rmrru->list, &dmar_rmrr_units);
3951
3952 return 0;
3953 }
3954
3955 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3956 {
3957 struct dmar_atsr_unit *atsru;
3958 struct acpi_dmar_atsr *tmp;
3959
3960 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3961 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3962 if (atsr->segment != tmp->segment)
3963 continue;
3964 if (atsr->header.length != tmp->header.length)
3965 continue;
3966 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3967 return atsru;
3968 }
3969
3970 return NULL;
3971 }
3972
3973 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3974 {
3975 struct acpi_dmar_atsr *atsr;
3976 struct dmar_atsr_unit *atsru;
3977
3978 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3979 return 0;
3980
3981 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3982 atsru = dmar_find_atsr(atsr);
3983 if (atsru)
3984 return 0;
3985
3986 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3987 if (!atsru)
3988 return -ENOMEM;
3989
3990 /*
3991 * If memory is allocated from slab by ACPI _DSM method, we need to
3992 * copy the memory content because the memory buffer will be freed
3993 * on return.
3994 */
3995 atsru->hdr = (void *)(atsru + 1);
3996 memcpy(atsru->hdr, hdr, hdr->length);
3997 atsru->include_all = atsr->flags & 0x1;
3998 if (!atsru->include_all) {
3999 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4000 (void *)atsr + atsr->header.length,
4001 &atsru->devices_cnt);
4002 if (atsru->devices_cnt && atsru->devices == NULL) {
4003 kfree(atsru);
4004 return -ENOMEM;
4005 }
4006 }
4007
4008 list_add_rcu(&atsru->list, &dmar_atsr_units);
4009
4010 return 0;
4011 }
4012
4013 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4014 {
4015 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4016 kfree(atsru);
4017 }
4018
4019 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4020 {
4021 struct acpi_dmar_atsr *atsr;
4022 struct dmar_atsr_unit *atsru;
4023
4024 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4025 atsru = dmar_find_atsr(atsr);
4026 if (atsru) {
4027 list_del_rcu(&atsru->list);
4028 synchronize_rcu();
4029 intel_iommu_free_atsr(atsru);
4030 }
4031
4032 return 0;
4033 }
4034
4035 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4036 {
4037 int i;
4038 struct device *dev;
4039 struct acpi_dmar_atsr *atsr;
4040 struct dmar_atsr_unit *atsru;
4041
4042 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4043 atsru = dmar_find_atsr(atsr);
4044 if (!atsru)
4045 return 0;
4046
4047 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4048 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4049 i, dev)
4050 return -EBUSY;
4051
4052 return 0;
4053 }
4054
4055 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4056 {
4057 int sp, ret = 0;
4058 struct intel_iommu *iommu = dmaru->iommu;
4059
4060 if (g_iommus[iommu->seq_id])
4061 return 0;
4062
4063 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4064 pr_warn("%s: Doesn't support hardware pass through.\n",
4065 iommu->name);
4066 return -ENXIO;
4067 }
4068 if (!ecap_sc_support(iommu->ecap) &&
4069 domain_update_iommu_snooping(iommu)) {
4070 pr_warn("%s: Doesn't support snooping.\n",
4071 iommu->name);
4072 return -ENXIO;
4073 }
4074 sp = domain_update_iommu_superpage(iommu) - 1;
4075 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4076 pr_warn("%s: Doesn't support large page.\n",
4077 iommu->name);
4078 return -ENXIO;
4079 }
4080
4081 /*
4082 * Disable translation if already enabled prior to OS handover.
4083 */
4084 if (iommu->gcmd & DMA_GCMD_TE)
4085 iommu_disable_translation(iommu);
4086
4087 g_iommus[iommu->seq_id] = iommu;
4088 ret = iommu_init_domains(iommu);
4089 if (ret == 0)
4090 ret = iommu_alloc_root_entry(iommu);
4091 if (ret)
4092 goto out;
4093
4094 if (dmaru->ignored) {
4095 /*
4096 * we always have to disable PMRs or DMA may fail on this device
4097 */
4098 if (force_on)
4099 iommu_disable_protect_mem_regions(iommu);
4100 return 0;
4101 }
4102
4103 intel_iommu_init_qi(iommu);
4104 iommu_flush_write_buffer(iommu);
4105 ret = dmar_set_interrupt(iommu);
4106 if (ret)
4107 goto disable_iommu;
4108
4109 iommu_set_root_entry(iommu);
4110 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4111 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4112 iommu_enable_translation(iommu);
4113
4114 iommu_disable_protect_mem_regions(iommu);
4115 return 0;
4116
4117 disable_iommu:
4118 disable_dmar_iommu(iommu);
4119 out:
4120 free_dmar_iommu(iommu);
4121 return ret;
4122 }
4123
4124 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4125 {
4126 int ret = 0;
4127 struct intel_iommu *iommu = dmaru->iommu;
4128
4129 if (!intel_iommu_enabled)
4130 return 0;
4131 if (iommu == NULL)
4132 return -EINVAL;
4133
4134 if (insert) {
4135 ret = intel_iommu_add(dmaru);
4136 } else {
4137 disable_dmar_iommu(iommu);
4138 free_dmar_iommu(iommu);
4139 }
4140
4141 return ret;
4142 }
4143
4144 static void intel_iommu_free_dmars(void)
4145 {
4146 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4147 struct dmar_atsr_unit *atsru, *atsr_n;
4148
4149 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4150 list_del(&rmrru->list);
4151 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4152 kfree(rmrru);
4153 }
4154
4155 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4156 list_del(&atsru->list);
4157 intel_iommu_free_atsr(atsru);
4158 }
4159 }
4160
4161 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4162 {
4163 int i, ret = 1;
4164 struct pci_bus *bus;
4165 struct pci_dev *bridge = NULL;
4166 struct device *tmp;
4167 struct acpi_dmar_atsr *atsr;
4168 struct dmar_atsr_unit *atsru;
4169
4170 dev = pci_physfn(dev);
4171 for (bus = dev->bus; bus; bus = bus->parent) {
4172 bridge = bus->self;
4173 if (!bridge || !pci_is_pcie(bridge) ||
4174 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4175 return 0;
4176 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4177 break;
4178 }
4179 if (!bridge)
4180 return 0;
4181
4182 rcu_read_lock();
4183 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4184 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4185 if (atsr->segment != pci_domain_nr(dev->bus))
4186 continue;
4187
4188 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4189 if (tmp == &bridge->dev)
4190 goto out;
4191
4192 if (atsru->include_all)
4193 goto out;
4194 }
4195 ret = 0;
4196 out:
4197 rcu_read_unlock();
4198
4199 return ret;
4200 }
4201
4202 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4203 {
4204 int ret = 0;
4205 struct dmar_rmrr_unit *rmrru;
4206 struct dmar_atsr_unit *atsru;
4207 struct acpi_dmar_atsr *atsr;
4208 struct acpi_dmar_reserved_memory *rmrr;
4209
4210 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4211 return 0;
4212
4213 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4214 rmrr = container_of(rmrru->hdr,
4215 struct acpi_dmar_reserved_memory, header);
4216 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4217 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4218 ((void *)rmrr) + rmrr->header.length,
4219 rmrr->segment, rmrru->devices,
4220 rmrru->devices_cnt);
4221 if(ret < 0)
4222 return ret;
4223 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4224 dmar_remove_dev_scope(info, rmrr->segment,
4225 rmrru->devices, rmrru->devices_cnt);
4226 }
4227 }
4228
4229 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4230 if (atsru->include_all)
4231 continue;
4232
4233 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4234 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4235 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4236 (void *)atsr + atsr->header.length,
4237 atsr->segment, atsru->devices,
4238 atsru->devices_cnt);
4239 if (ret > 0)
4240 break;
4241 else if(ret < 0)
4242 return ret;
4243 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4244 if (dmar_remove_dev_scope(info, atsr->segment,
4245 atsru->devices, atsru->devices_cnt))
4246 break;
4247 }
4248 }
4249
4250 return 0;
4251 }
4252
4253 /*
4254 * Here we only respond to action of unbound device from driver.
4255 *
4256 * Added device is not attached to its DMAR domain here yet. That will happen
4257 * when mapping the device to iova.
4258 */
4259 static int device_notifier(struct notifier_block *nb,
4260 unsigned long action, void *data)
4261 {
4262 struct device *dev = data;
4263 struct dmar_domain *domain;
4264
4265 if (iommu_dummy(dev))
4266 return 0;
4267
4268 if (action != BUS_NOTIFY_REMOVED_DEVICE)
4269 return 0;
4270
4271 domain = find_domain(dev);
4272 if (!domain)
4273 return 0;
4274
4275 down_read(&dmar_global_lock);
4276 dmar_remove_one_dev_info(domain, dev);
4277 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4278 domain_exit(domain);
4279 up_read(&dmar_global_lock);
4280
4281 return 0;
4282 }
4283
4284 static struct notifier_block device_nb = {
4285 .notifier_call = device_notifier,
4286 };
4287
4288 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4289 unsigned long val, void *v)
4290 {
4291 struct memory_notify *mhp = v;
4292 unsigned long long start, end;
4293 unsigned long start_vpfn, last_vpfn;
4294
4295 switch (val) {
4296 case MEM_GOING_ONLINE:
4297 start = mhp->start_pfn << PAGE_SHIFT;
4298 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4299 if (iommu_domain_identity_map(si_domain, start, end)) {
4300 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4301 start, end);
4302 return NOTIFY_BAD;
4303 }
4304 break;
4305
4306 case MEM_OFFLINE:
4307 case MEM_CANCEL_ONLINE:
4308 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4309 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4310 while (start_vpfn <= last_vpfn) {
4311 struct iova *iova;
4312 struct dmar_drhd_unit *drhd;
4313 struct intel_iommu *iommu;
4314 struct page *freelist;
4315
4316 iova = find_iova(&si_domain->iovad, start_vpfn);
4317 if (iova == NULL) {
4318 pr_debug("Failed get IOVA for PFN %lx\n",
4319 start_vpfn);
4320 break;
4321 }
4322
4323 iova = split_and_remove_iova(&si_domain->iovad, iova,
4324 start_vpfn, last_vpfn);
4325 if (iova == NULL) {
4326 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4327 start_vpfn, last_vpfn);
4328 return NOTIFY_BAD;
4329 }
4330
4331 freelist = domain_unmap(si_domain, iova->pfn_lo,
4332 iova->pfn_hi);
4333
4334 rcu_read_lock();
4335 for_each_active_iommu(iommu, drhd)
4336 iommu_flush_iotlb_psi(iommu, si_domain,
4337 iova->pfn_lo, iova_size(iova),
4338 !freelist, 0);
4339 rcu_read_unlock();
4340 dma_free_pagelist(freelist);
4341
4342 start_vpfn = iova->pfn_hi + 1;
4343 free_iova_mem(iova);
4344 }
4345 break;
4346 }
4347
4348 return NOTIFY_OK;
4349 }
4350
4351 static struct notifier_block intel_iommu_memory_nb = {
4352 .notifier_call = intel_iommu_memory_notifier,
4353 .priority = 0
4354 };
4355
4356
4357 static ssize_t intel_iommu_show_version(struct device *dev,
4358 struct device_attribute *attr,
4359 char *buf)
4360 {
4361 struct intel_iommu *iommu = dev_get_drvdata(dev);
4362 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4363 return sprintf(buf, "%d:%d\n",
4364 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4365 }
4366 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4367
4368 static ssize_t intel_iommu_show_address(struct device *dev,
4369 struct device_attribute *attr,
4370 char *buf)
4371 {
4372 struct intel_iommu *iommu = dev_get_drvdata(dev);
4373 return sprintf(buf, "%llx\n", iommu->reg_phys);
4374 }
4375 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4376
4377 static ssize_t intel_iommu_show_cap(struct device *dev,
4378 struct device_attribute *attr,
4379 char *buf)
4380 {
4381 struct intel_iommu *iommu = dev_get_drvdata(dev);
4382 return sprintf(buf, "%llx\n", iommu->cap);
4383 }
4384 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4385
4386 static ssize_t intel_iommu_show_ecap(struct device *dev,
4387 struct device_attribute *attr,
4388 char *buf)
4389 {
4390 struct intel_iommu *iommu = dev_get_drvdata(dev);
4391 return sprintf(buf, "%llx\n", iommu->ecap);
4392 }
4393 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4394
4395 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4396 struct device_attribute *attr,
4397 char *buf)
4398 {
4399 struct intel_iommu *iommu = dev_get_drvdata(dev);
4400 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4401 }
4402 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4403
4404 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4405 struct device_attribute *attr,
4406 char *buf)
4407 {
4408 struct intel_iommu *iommu = dev_get_drvdata(dev);
4409 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4410 cap_ndoms(iommu->cap)));
4411 }
4412 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4413
4414 static struct attribute *intel_iommu_attrs[] = {
4415 &dev_attr_version.attr,
4416 &dev_attr_address.attr,
4417 &dev_attr_cap.attr,
4418 &dev_attr_ecap.attr,
4419 &dev_attr_domains_supported.attr,
4420 &dev_attr_domains_used.attr,
4421 NULL,
4422 };
4423
4424 static struct attribute_group intel_iommu_group = {
4425 .name = "intel-iommu",
4426 .attrs = intel_iommu_attrs,
4427 };
4428
4429 const struct attribute_group *intel_iommu_groups[] = {
4430 &intel_iommu_group,
4431 NULL,
4432 };
4433
4434 int __init intel_iommu_init(void)
4435 {
4436 int ret = -ENODEV;
4437 struct dmar_drhd_unit *drhd;
4438 struct intel_iommu *iommu;
4439
4440 /* VT-d is required for a TXT/tboot launch, so enforce that */
4441 force_on = tboot_force_iommu();
4442
4443 if (iommu_init_mempool()) {
4444 if (force_on)
4445 panic("tboot: Failed to initialize iommu memory\n");
4446 return -ENOMEM;
4447 }
4448
4449 down_write(&dmar_global_lock);
4450 if (dmar_table_init()) {
4451 if (force_on)
4452 panic("tboot: Failed to initialize DMAR table\n");
4453 goto out_free_dmar;
4454 }
4455
4456 if (dmar_dev_scope_init() < 0) {
4457 if (force_on)
4458 panic("tboot: Failed to initialize DMAR device scope\n");
4459 goto out_free_dmar;
4460 }
4461
4462 if (no_iommu || dmar_disabled)
4463 goto out_free_dmar;
4464
4465 if (list_empty(&dmar_rmrr_units))
4466 pr_info("No RMRR found\n");
4467
4468 if (list_empty(&dmar_atsr_units))
4469 pr_info("No ATSR found\n");
4470
4471 if (dmar_init_reserved_ranges()) {
4472 if (force_on)
4473 panic("tboot: Failed to reserve iommu ranges\n");
4474 goto out_free_reserved_range;
4475 }
4476
4477 init_no_remapping_devices();
4478
4479 ret = init_dmars();
4480 if (ret) {
4481 if (force_on)
4482 panic("tboot: Failed to initialize DMARs\n");
4483 pr_err("Initialization failed\n");
4484 goto out_free_reserved_range;
4485 }
4486 up_write(&dmar_global_lock);
4487 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4488
4489 init_timer(&unmap_timer);
4490 #ifdef CONFIG_SWIOTLB
4491 swiotlb = 0;
4492 #endif
4493 dma_ops = &intel_dma_ops;
4494
4495 init_iommu_pm_ops();
4496
4497 for_each_active_iommu(iommu, drhd)
4498 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4499 intel_iommu_groups,
4500 "%s", iommu->name);
4501
4502 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4503 bus_register_notifier(&pci_bus_type, &device_nb);
4504 if (si_domain && !hw_pass_through)
4505 register_memory_notifier(&intel_iommu_memory_nb);
4506
4507 intel_iommu_enabled = 1;
4508
4509 return 0;
4510
4511 out_free_reserved_range:
4512 put_iova_domain(&reserved_iova_list);
4513 out_free_dmar:
4514 intel_iommu_free_dmars();
4515 up_write(&dmar_global_lock);
4516 iommu_exit_mempool();
4517 return ret;
4518 }
4519
4520 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4521 {
4522 struct intel_iommu *iommu = opaque;
4523
4524 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4525 return 0;
4526 }
4527
4528 /*
4529 * NB - intel-iommu lacks any sort of reference counting for the users of
4530 * dependent devices. If multiple endpoints have intersecting dependent
4531 * devices, unbinding the driver from any one of them will possibly leave
4532 * the others unable to operate.
4533 */
4534 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4535 {
4536 if (!iommu || !dev || !dev_is_pci(dev))
4537 return;
4538
4539 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4540 }
4541
4542 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4543 struct device *dev)
4544 {
4545 struct device_domain_info *info;
4546 struct intel_iommu *iommu;
4547 unsigned long flags;
4548 u8 bus, devfn;
4549
4550 iommu = device_to_iommu(dev, &bus, &devfn);
4551 if (!iommu)
4552 return;
4553
4554 info = dev->archdata.iommu;
4555
4556 if (WARN_ON(!info))
4557 return;
4558
4559 spin_lock_irqsave(&device_domain_lock, flags);
4560 unlink_domain_info(info);
4561 spin_unlock_irqrestore(&device_domain_lock, flags);
4562
4563 iommu_disable_dev_iotlb(info);
4564 domain_context_clear(iommu, dev);
4565 free_devinfo_mem(info);
4566
4567 spin_lock_irqsave(&iommu->lock, flags);
4568 domain_detach_iommu(domain, iommu);
4569 spin_unlock_irqrestore(&iommu->lock, flags);
4570 }
4571
4572 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4573 {
4574 int adjust_width;
4575
4576 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4577 DMA_32BIT_PFN);
4578 domain_reserve_special_ranges(domain);
4579
4580 /* calculate AGAW */
4581 domain->gaw = guest_width;
4582 adjust_width = guestwidth_to_adjustwidth(guest_width);
4583 domain->agaw = width_to_agaw(adjust_width);
4584
4585 domain->iommu_coherency = 0;
4586 domain->iommu_snooping = 0;
4587 domain->iommu_superpage = 0;
4588 domain->max_addr = 0;
4589
4590 /* always allocate the top pgd */
4591 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4592 if (!domain->pgd)
4593 return -ENOMEM;
4594 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4595 return 0;
4596 }
4597
4598 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4599 {
4600 struct dmar_domain *dmar_domain;
4601 struct iommu_domain *domain;
4602
4603 if (type != IOMMU_DOMAIN_UNMANAGED)
4604 return NULL;
4605
4606 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4607 if (!dmar_domain) {
4608 pr_err("Can't allocate dmar_domain\n");
4609 return NULL;
4610 }
4611 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4612 pr_err("Domain initialization failed\n");
4613 domain_exit(dmar_domain);
4614 return NULL;
4615 }
4616 domain_update_iommu_cap(dmar_domain);
4617
4618 domain = &dmar_domain->domain;
4619 domain->geometry.aperture_start = 0;
4620 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4621 domain->geometry.force_aperture = true;
4622
4623 return domain;
4624 }
4625
4626 static void intel_iommu_domain_free(struct iommu_domain *domain)
4627 {
4628 domain_exit(to_dmar_domain(domain));
4629 }
4630
4631 static int intel_iommu_attach_device(struct iommu_domain *domain,
4632 struct device *dev)
4633 {
4634 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4635 struct intel_iommu *iommu;
4636 int addr_width;
4637 u8 bus, devfn;
4638
4639 if (device_is_rmrr_locked(dev)) {
4640 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4641 return -EPERM;
4642 }
4643
4644 /* normally dev is not mapped */
4645 if (unlikely(domain_context_mapped(dev))) {
4646 struct dmar_domain *old_domain;
4647
4648 old_domain = find_domain(dev);
4649 if (old_domain) {
4650 rcu_read_lock();
4651 if (domain_type_is_vm_or_si(dmar_domain))
4652 dmar_remove_one_dev_info(old_domain, dev);
4653 else
4654 domain_remove_dev_info(old_domain);
4655 rcu_read_unlock();
4656
4657 if (!domain_type_is_vm_or_si(old_domain) &&
4658 list_empty(&old_domain->devices))
4659 domain_exit(old_domain);
4660 }
4661 }
4662
4663 iommu = device_to_iommu(dev, &bus, &devfn);
4664 if (!iommu)
4665 return -ENODEV;
4666
4667 /* check if this iommu agaw is sufficient for max mapped address */
4668 addr_width = agaw_to_width(iommu->agaw);
4669 if (addr_width > cap_mgaw(iommu->cap))
4670 addr_width = cap_mgaw(iommu->cap);
4671
4672 if (dmar_domain->max_addr > (1LL << addr_width)) {
4673 pr_err("%s: iommu width (%d) is not "
4674 "sufficient for the mapped address (%llx)\n",
4675 __func__, addr_width, dmar_domain->max_addr);
4676 return -EFAULT;
4677 }
4678 dmar_domain->gaw = addr_width;
4679
4680 /*
4681 * Knock out extra levels of page tables if necessary
4682 */
4683 while (iommu->agaw < dmar_domain->agaw) {
4684 struct dma_pte *pte;
4685
4686 pte = dmar_domain->pgd;
4687 if (dma_pte_present(pte)) {
4688 dmar_domain->pgd = (struct dma_pte *)
4689 phys_to_virt(dma_pte_addr(pte));
4690 free_pgtable_page(pte);
4691 }
4692 dmar_domain->agaw--;
4693 }
4694
4695 return domain_add_dev_info(dmar_domain, dev);
4696 }
4697
4698 static void intel_iommu_detach_device(struct iommu_domain *domain,
4699 struct device *dev)
4700 {
4701 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4702 }
4703
4704 static int intel_iommu_map(struct iommu_domain *domain,
4705 unsigned long iova, phys_addr_t hpa,
4706 size_t size, int iommu_prot)
4707 {
4708 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4709 u64 max_addr;
4710 int prot = 0;
4711 int ret;
4712
4713 if (iommu_prot & IOMMU_READ)
4714 prot |= DMA_PTE_READ;
4715 if (iommu_prot & IOMMU_WRITE)
4716 prot |= DMA_PTE_WRITE;
4717 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4718 prot |= DMA_PTE_SNP;
4719
4720 max_addr = iova + size;
4721 if (dmar_domain->max_addr < max_addr) {
4722 u64 end;
4723
4724 /* check if minimum agaw is sufficient for mapped address */
4725 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4726 if (end < max_addr) {
4727 pr_err("%s: iommu width (%d) is not "
4728 "sufficient for the mapped address (%llx)\n",
4729 __func__, dmar_domain->gaw, max_addr);
4730 return -EFAULT;
4731 }
4732 dmar_domain->max_addr = max_addr;
4733 }
4734 /* Round up size to next multiple of PAGE_SIZE, if it and
4735 the low bits of hpa would take us onto the next page */
4736 size = aligned_nrpages(hpa, size);
4737 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4738 hpa >> VTD_PAGE_SHIFT, size, prot);
4739 return ret;
4740 }
4741
4742 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4743 unsigned long iova, size_t size)
4744 {
4745 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4746 struct page *freelist = NULL;
4747 struct intel_iommu *iommu;
4748 unsigned long start_pfn, last_pfn;
4749 unsigned int npages;
4750 int iommu_id, level = 0;
4751
4752 /* Cope with horrid API which requires us to unmap more than the
4753 size argument if it happens to be a large-page mapping. */
4754 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4755 BUG();
4756
4757 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4758 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4759
4760 start_pfn = iova >> VTD_PAGE_SHIFT;
4761 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4762
4763 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4764
4765 npages = last_pfn - start_pfn + 1;
4766
4767 for_each_domain_iommu(iommu_id, dmar_domain) {
4768 iommu = g_iommus[iommu_id];
4769
4770 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4771 start_pfn, npages, !freelist, 0);
4772 }
4773
4774 dma_free_pagelist(freelist);
4775
4776 if (dmar_domain->max_addr == iova + size)
4777 dmar_domain->max_addr = iova;
4778
4779 return size;
4780 }
4781
4782 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4783 dma_addr_t iova)
4784 {
4785 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4786 struct dma_pte *pte;
4787 int level = 0;
4788 u64 phys = 0;
4789
4790 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4791 if (pte)
4792 phys = dma_pte_addr(pte);
4793
4794 return phys;
4795 }
4796
4797 static bool intel_iommu_capable(enum iommu_cap cap)
4798 {
4799 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4800 return domain_update_iommu_snooping(NULL) == 1;
4801 if (cap == IOMMU_CAP_INTR_REMAP)
4802 return irq_remapping_enabled == 1;
4803
4804 return false;
4805 }
4806
4807 static int intel_iommu_add_device(struct device *dev)
4808 {
4809 struct intel_iommu *iommu;
4810 struct iommu_group *group;
4811 u8 bus, devfn;
4812
4813 iommu = device_to_iommu(dev, &bus, &devfn);
4814 if (!iommu)
4815 return -ENODEV;
4816
4817 iommu_device_link(iommu->iommu_dev, dev);
4818
4819 group = iommu_group_get_for_dev(dev);
4820
4821 if (IS_ERR(group))
4822 return PTR_ERR(group);
4823
4824 iommu_group_put(group);
4825 return 0;
4826 }
4827
4828 static void intel_iommu_remove_device(struct device *dev)
4829 {
4830 struct intel_iommu *iommu;
4831 u8 bus, devfn;
4832
4833 iommu = device_to_iommu(dev, &bus, &devfn);
4834 if (!iommu)
4835 return;
4836
4837 iommu_group_remove_device(dev);
4838
4839 iommu_device_unlink(iommu->iommu_dev, dev);
4840 }
4841
4842 static const struct iommu_ops intel_iommu_ops = {
4843 .capable = intel_iommu_capable,
4844 .domain_alloc = intel_iommu_domain_alloc,
4845 .domain_free = intel_iommu_domain_free,
4846 .attach_dev = intel_iommu_attach_device,
4847 .detach_dev = intel_iommu_detach_device,
4848 .map = intel_iommu_map,
4849 .unmap = intel_iommu_unmap,
4850 .map_sg = default_iommu_map_sg,
4851 .iova_to_phys = intel_iommu_iova_to_phys,
4852 .add_device = intel_iommu_add_device,
4853 .remove_device = intel_iommu_remove_device,
4854 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4855 };
4856
4857 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4858 {
4859 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4860 pr_info("Disabling IOMMU for graphics on this chipset\n");
4861 dmar_map_gfx = 0;
4862 }
4863
4864 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4865 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4866 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4867 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4868 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4869 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4870 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4871
4872 static void quirk_iommu_rwbf(struct pci_dev *dev)
4873 {
4874 /*
4875 * Mobile 4 Series Chipset neglects to set RWBF capability,
4876 * but needs it. Same seems to hold for the desktop versions.
4877 */
4878 pr_info("Forcing write-buffer flush capability\n");
4879 rwbf_quirk = 1;
4880 }
4881
4882 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4883 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4884 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4885 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4886 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4887 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4888 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4889
4890 #define GGC 0x52
4891 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4892 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4893 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4894 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4895 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4896 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4897 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4898 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4899
4900 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4901 {
4902 unsigned short ggc;
4903
4904 if (pci_read_config_word(dev, GGC, &ggc))
4905 return;
4906
4907 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4908 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4909 dmar_map_gfx = 0;
4910 } else if (dmar_map_gfx) {
4911 /* we have to ensure the gfx device is idle before we flush */
4912 pr_info("Disabling batched IOTLB flush on Ironlake\n");
4913 intel_iommu_strict = 1;
4914 }
4915 }
4916 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4917 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4918 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4919 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4920
4921 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4922 ISOCH DMAR unit for the Azalia sound device, but not give it any
4923 TLB entries, which causes it to deadlock. Check for that. We do
4924 this in a function called from init_dmars(), instead of in a PCI
4925 quirk, because we don't want to print the obnoxious "BIOS broken"
4926 message if VT-d is actually disabled.
4927 */
4928 static void __init check_tylersburg_isoch(void)
4929 {
4930 struct pci_dev *pdev;
4931 uint32_t vtisochctrl;
4932
4933 /* If there's no Azalia in the system anyway, forget it. */
4934 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4935 if (!pdev)
4936 return;
4937 pci_dev_put(pdev);
4938
4939 /* System Management Registers. Might be hidden, in which case
4940 we can't do the sanity check. But that's OK, because the
4941 known-broken BIOSes _don't_ actually hide it, so far. */
4942 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4943 if (!pdev)
4944 return;
4945
4946 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4947 pci_dev_put(pdev);
4948 return;
4949 }
4950
4951 pci_dev_put(pdev);
4952
4953 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4954 if (vtisochctrl & 1)
4955 return;
4956
4957 /* Drop all bits other than the number of TLB entries */
4958 vtisochctrl &= 0x1c;
4959
4960 /* If we have the recommended number of TLB entries (16), fine. */
4961 if (vtisochctrl == 0x10)
4962 return;
4963
4964 /* Zero TLB entries? You get to ride the short bus to school. */
4965 if (!vtisochctrl) {
4966 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4967 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4968 dmi_get_system_info(DMI_BIOS_VENDOR),
4969 dmi_get_system_info(DMI_BIOS_VERSION),
4970 dmi_get_system_info(DMI_PRODUCT_VERSION));
4971 iommu_identity_mapping |= IDENTMAP_AZALIA;
4972 return;
4973 }
4974
4975 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4976 vtisochctrl);
4977 }