]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/iommu/intel-iommu.c
iommu/vt-d: Move device_domain_info to header
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
1 /*
2 * Copyright © 2006-2014 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
18 * Joerg Roedel <jroedel@suse.de>
19 */
20
21 #define pr_fmt(fmt) "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/dma-direct.h>
35 #include <linux/mempool.h>
36 #include <linux/memory.h>
37 #include <linux/cpu.h>
38 #include <linux/timer.h>
39 #include <linux/io.h>
40 #include <linux/iova.h>
41 #include <linux/iommu.h>
42 #include <linux/intel-iommu.h>
43 #include <linux/syscore_ops.h>
44 #include <linux/tboot.h>
45 #include <linux/dmi.h>
46 #include <linux/pci-ats.h>
47 #include <linux/memblock.h>
48 #include <linux/dma-contiguous.h>
49 #include <linux/dma-direct.h>
50 #include <linux/crash_dump.h>
51 #include <asm/irq_remapping.h>
52 #include <asm/cacheflush.h>
53 #include <asm/iommu.h>
54
55 #include "irq_remapping.h"
56 #include "intel-pasid.h"
57
58 #define ROOT_SIZE VTD_PAGE_SIZE
59 #define CONTEXT_SIZE VTD_PAGE_SIZE
60
61 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
62 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
63 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
64 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
65
66 #define IOAPIC_RANGE_START (0xfee00000)
67 #define IOAPIC_RANGE_END (0xfeefffff)
68 #define IOVA_START_ADDR (0x1000)
69
70 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
71
72 #define MAX_AGAW_WIDTH 64
73 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
74
75 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
76 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
77
78 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
79 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
80 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
81 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
82 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
83
84 /* IO virtual address start page frame number */
85 #define IOVA_START_PFN (1)
86
87 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
88
89 /* page table handling */
90 #define LEVEL_STRIDE (9)
91 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
92
93 /*
94 * This bitmap is used to advertise the page sizes our hardware support
95 * to the IOMMU core, which will then use this information to split
96 * physically contiguous memory regions it is mapping into page sizes
97 * that we support.
98 *
99 * Traditionally the IOMMU core just handed us the mappings directly,
100 * after making sure the size is an order of a 4KiB page and that the
101 * mapping has natural alignment.
102 *
103 * To retain this behavior, we currently advertise that we support
104 * all page sizes that are an order of 4KiB.
105 *
106 * If at some point we'd like to utilize the IOMMU core's new behavior,
107 * we could change this to advertise the real page sizes we support.
108 */
109 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
110
111 static inline int agaw_to_level(int agaw)
112 {
113 return agaw + 2;
114 }
115
116 static inline int agaw_to_width(int agaw)
117 {
118 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
119 }
120
121 static inline int width_to_agaw(int width)
122 {
123 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
124 }
125
126 static inline unsigned int level_to_offset_bits(int level)
127 {
128 return (level - 1) * LEVEL_STRIDE;
129 }
130
131 static inline int pfn_level_offset(unsigned long pfn, int level)
132 {
133 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
134 }
135
136 static inline unsigned long level_mask(int level)
137 {
138 return -1UL << level_to_offset_bits(level);
139 }
140
141 static inline unsigned long level_size(int level)
142 {
143 return 1UL << level_to_offset_bits(level);
144 }
145
146 static inline unsigned long align_to_level(unsigned long pfn, int level)
147 {
148 return (pfn + level_size(level) - 1) & level_mask(level);
149 }
150
151 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
152 {
153 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
154 }
155
156 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
157 are never going to work. */
158 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
159 {
160 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
161 }
162
163 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
164 {
165 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
166 }
167 static inline unsigned long page_to_dma_pfn(struct page *pg)
168 {
169 return mm_to_dma_pfn(page_to_pfn(pg));
170 }
171 static inline unsigned long virt_to_dma_pfn(void *p)
172 {
173 return page_to_dma_pfn(virt_to_page(p));
174 }
175
176 /* global iommu list, set NULL for ignored DMAR units */
177 static struct intel_iommu **g_iommus;
178
179 static void __init check_tylersburg_isoch(void);
180 static int rwbf_quirk;
181
182 /*
183 * set to 1 to panic kernel if can't successfully enable VT-d
184 * (used when kernel is launched w/ TXT)
185 */
186 static int force_on = 0;
187 int intel_iommu_tboot_noforce;
188
189 /*
190 * 0: Present
191 * 1-11: Reserved
192 * 12-63: Context Ptr (12 - (haw-1))
193 * 64-127: Reserved
194 */
195 struct root_entry {
196 u64 lo;
197 u64 hi;
198 };
199 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
200
201 /*
202 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
203 * if marked present.
204 */
205 static phys_addr_t root_entry_lctp(struct root_entry *re)
206 {
207 if (!(re->lo & 1))
208 return 0;
209
210 return re->lo & VTD_PAGE_MASK;
211 }
212
213 /*
214 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
215 * if marked present.
216 */
217 static phys_addr_t root_entry_uctp(struct root_entry *re)
218 {
219 if (!(re->hi & 1))
220 return 0;
221
222 return re->hi & VTD_PAGE_MASK;
223 }
224 /*
225 * low 64 bits:
226 * 0: present
227 * 1: fault processing disable
228 * 2-3: translation type
229 * 12-63: address space root
230 * high 64 bits:
231 * 0-2: address width
232 * 3-6: aval
233 * 8-23: domain id
234 */
235 struct context_entry {
236 u64 lo;
237 u64 hi;
238 };
239
240 static inline void context_clear_pasid_enable(struct context_entry *context)
241 {
242 context->lo &= ~(1ULL << 11);
243 }
244
245 static inline bool context_pasid_enabled(struct context_entry *context)
246 {
247 return !!(context->lo & (1ULL << 11));
248 }
249
250 static inline void context_set_copied(struct context_entry *context)
251 {
252 context->hi |= (1ull << 3);
253 }
254
255 static inline bool context_copied(struct context_entry *context)
256 {
257 return !!(context->hi & (1ULL << 3));
258 }
259
260 static inline bool __context_present(struct context_entry *context)
261 {
262 return (context->lo & 1);
263 }
264
265 static inline bool context_present(struct context_entry *context)
266 {
267 return context_pasid_enabled(context) ?
268 __context_present(context) :
269 __context_present(context) && !context_copied(context);
270 }
271
272 static inline void context_set_present(struct context_entry *context)
273 {
274 context->lo |= 1;
275 }
276
277 static inline void context_set_fault_enable(struct context_entry *context)
278 {
279 context->lo &= (((u64)-1) << 2) | 1;
280 }
281
282 static inline void context_set_translation_type(struct context_entry *context,
283 unsigned long value)
284 {
285 context->lo &= (((u64)-1) << 4) | 3;
286 context->lo |= (value & 3) << 2;
287 }
288
289 static inline void context_set_address_root(struct context_entry *context,
290 unsigned long value)
291 {
292 context->lo &= ~VTD_PAGE_MASK;
293 context->lo |= value & VTD_PAGE_MASK;
294 }
295
296 static inline void context_set_address_width(struct context_entry *context,
297 unsigned long value)
298 {
299 context->hi |= value & 7;
300 }
301
302 static inline void context_set_domain_id(struct context_entry *context,
303 unsigned long value)
304 {
305 context->hi |= (value & ((1 << 16) - 1)) << 8;
306 }
307
308 static inline int context_domain_id(struct context_entry *c)
309 {
310 return((c->hi >> 8) & 0xffff);
311 }
312
313 static inline void context_clear_entry(struct context_entry *context)
314 {
315 context->lo = 0;
316 context->hi = 0;
317 }
318
319 /*
320 * 0: readable
321 * 1: writable
322 * 2-6: reserved
323 * 7: super page
324 * 8-10: available
325 * 11: snoop behavior
326 * 12-63: Host physcial address
327 */
328 struct dma_pte {
329 u64 val;
330 };
331
332 static inline void dma_clear_pte(struct dma_pte *pte)
333 {
334 pte->val = 0;
335 }
336
337 static inline u64 dma_pte_addr(struct dma_pte *pte)
338 {
339 #ifdef CONFIG_64BIT
340 return pte->val & VTD_PAGE_MASK;
341 #else
342 /* Must have a full atomic 64-bit read */
343 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
344 #endif
345 }
346
347 static inline bool dma_pte_present(struct dma_pte *pte)
348 {
349 return (pte->val & 3) != 0;
350 }
351
352 static inline bool dma_pte_superpage(struct dma_pte *pte)
353 {
354 return (pte->val & DMA_PTE_LARGE_PAGE);
355 }
356
357 static inline int first_pte_in_page(struct dma_pte *pte)
358 {
359 return !((unsigned long)pte & ~VTD_PAGE_MASK);
360 }
361
362 /*
363 * This domain is a statically identity mapping domain.
364 * 1. This domain creats a static 1:1 mapping to all usable memory.
365 * 2. It maps to each iommu if successful.
366 * 3. Each iommu mapps to this domain if successful.
367 */
368 static struct dmar_domain *si_domain;
369 static int hw_pass_through = 1;
370
371 /*
372 * Domain represents a virtual machine, more than one devices
373 * across iommus may be owned in one domain, e.g. kvm guest.
374 */
375 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
376
377 /* si_domain contains mulitple devices */
378 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
379
380 #define for_each_domain_iommu(idx, domain) \
381 for (idx = 0; idx < g_num_of_iommus; idx++) \
382 if (domain->iommu_refcnt[idx])
383
384 struct dmar_rmrr_unit {
385 struct list_head list; /* list of rmrr units */
386 struct acpi_dmar_header *hdr; /* ACPI header */
387 u64 base_address; /* reserved base address*/
388 u64 end_address; /* reserved end address */
389 struct dmar_dev_scope *devices; /* target devices */
390 int devices_cnt; /* target device count */
391 struct iommu_resv_region *resv; /* reserved region handle */
392 };
393
394 struct dmar_atsr_unit {
395 struct list_head list; /* list of ATSR units */
396 struct acpi_dmar_header *hdr; /* ACPI header */
397 struct dmar_dev_scope *devices; /* target devices */
398 int devices_cnt; /* target device count */
399 u8 include_all:1; /* include all ports */
400 };
401
402 static LIST_HEAD(dmar_atsr_units);
403 static LIST_HEAD(dmar_rmrr_units);
404
405 #define for_each_rmrr_units(rmrr) \
406 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
407
408 /* bitmap for indexing intel_iommus */
409 static int g_num_of_iommus;
410
411 static void domain_exit(struct dmar_domain *domain);
412 static void domain_remove_dev_info(struct dmar_domain *domain);
413 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
414 struct device *dev);
415 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
416 static void domain_context_clear(struct intel_iommu *iommu,
417 struct device *dev);
418 static int domain_detach_iommu(struct dmar_domain *domain,
419 struct intel_iommu *iommu);
420
421 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
422 int dmar_disabled = 0;
423 #else
424 int dmar_disabled = 1;
425 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
426
427 int intel_iommu_enabled = 0;
428 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
429
430 static int dmar_map_gfx = 1;
431 static int dmar_forcedac;
432 static int intel_iommu_strict;
433 static int intel_iommu_superpage = 1;
434 static int intel_iommu_ecs = 1;
435 static int iommu_identity_mapping;
436
437 #define IDENTMAP_ALL 1
438 #define IDENTMAP_GFX 2
439 #define IDENTMAP_AZALIA 4
440
441 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap))
442 #define pasid_enabled(iommu) (ecs_enabled(iommu) && ecap_pasid(iommu->ecap))
443
444 int intel_iommu_gfx_mapped;
445 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
446
447 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
448 static DEFINE_SPINLOCK(device_domain_lock);
449 static LIST_HEAD(device_domain_list);
450
451 const struct iommu_ops intel_iommu_ops;
452
453 static bool translation_pre_enabled(struct intel_iommu *iommu)
454 {
455 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
456 }
457
458 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
459 {
460 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
461 }
462
463 static void init_translation_status(struct intel_iommu *iommu)
464 {
465 u32 gsts;
466
467 gsts = readl(iommu->reg + DMAR_GSTS_REG);
468 if (gsts & DMA_GSTS_TES)
469 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
470 }
471
472 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
473 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
474 {
475 return container_of(dom, struct dmar_domain, domain);
476 }
477
478 static int __init intel_iommu_setup(char *str)
479 {
480 if (!str)
481 return -EINVAL;
482 while (*str) {
483 if (!strncmp(str, "on", 2)) {
484 dmar_disabled = 0;
485 pr_info("IOMMU enabled\n");
486 } else if (!strncmp(str, "off", 3)) {
487 dmar_disabled = 1;
488 pr_info("IOMMU disabled\n");
489 } else if (!strncmp(str, "igfx_off", 8)) {
490 dmar_map_gfx = 0;
491 pr_info("Disable GFX device mapping\n");
492 } else if (!strncmp(str, "forcedac", 8)) {
493 pr_info("Forcing DAC for PCI devices\n");
494 dmar_forcedac = 1;
495 } else if (!strncmp(str, "strict", 6)) {
496 pr_info("Disable batched IOTLB flush\n");
497 intel_iommu_strict = 1;
498 } else if (!strncmp(str, "sp_off", 6)) {
499 pr_info("Disable supported super page\n");
500 intel_iommu_superpage = 0;
501 } else if (!strncmp(str, "ecs_off", 7)) {
502 printk(KERN_INFO
503 "Intel-IOMMU: disable extended context table support\n");
504 intel_iommu_ecs = 0;
505 } else if (!strncmp(str, "tboot_noforce", 13)) {
506 printk(KERN_INFO
507 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
508 intel_iommu_tboot_noforce = 1;
509 }
510
511 str += strcspn(str, ",");
512 while (*str == ',')
513 str++;
514 }
515 return 0;
516 }
517 __setup("intel_iommu=", intel_iommu_setup);
518
519 static struct kmem_cache *iommu_domain_cache;
520 static struct kmem_cache *iommu_devinfo_cache;
521
522 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
523 {
524 struct dmar_domain **domains;
525 int idx = did >> 8;
526
527 domains = iommu->domains[idx];
528 if (!domains)
529 return NULL;
530
531 return domains[did & 0xff];
532 }
533
534 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
535 struct dmar_domain *domain)
536 {
537 struct dmar_domain **domains;
538 int idx = did >> 8;
539
540 if (!iommu->domains[idx]) {
541 size_t size = 256 * sizeof(struct dmar_domain *);
542 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
543 }
544
545 domains = iommu->domains[idx];
546 if (WARN_ON(!domains))
547 return;
548 else
549 domains[did & 0xff] = domain;
550 }
551
552 void *alloc_pgtable_page(int node)
553 {
554 struct page *page;
555 void *vaddr = NULL;
556
557 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
558 if (page)
559 vaddr = page_address(page);
560 return vaddr;
561 }
562
563 void free_pgtable_page(void *vaddr)
564 {
565 free_page((unsigned long)vaddr);
566 }
567
568 static inline void *alloc_domain_mem(void)
569 {
570 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
571 }
572
573 static void free_domain_mem(void *vaddr)
574 {
575 kmem_cache_free(iommu_domain_cache, vaddr);
576 }
577
578 static inline void * alloc_devinfo_mem(void)
579 {
580 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
581 }
582
583 static inline void free_devinfo_mem(void *vaddr)
584 {
585 kmem_cache_free(iommu_devinfo_cache, vaddr);
586 }
587
588 static inline int domain_type_is_vm(struct dmar_domain *domain)
589 {
590 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
591 }
592
593 static inline int domain_type_is_si(struct dmar_domain *domain)
594 {
595 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
596 }
597
598 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
599 {
600 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
601 DOMAIN_FLAG_STATIC_IDENTITY);
602 }
603
604 static inline int domain_pfn_supported(struct dmar_domain *domain,
605 unsigned long pfn)
606 {
607 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
608
609 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
610 }
611
612 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
613 {
614 unsigned long sagaw;
615 int agaw = -1;
616
617 sagaw = cap_sagaw(iommu->cap);
618 for (agaw = width_to_agaw(max_gaw);
619 agaw >= 0; agaw--) {
620 if (test_bit(agaw, &sagaw))
621 break;
622 }
623
624 return agaw;
625 }
626
627 /*
628 * Calculate max SAGAW for each iommu.
629 */
630 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
631 {
632 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
633 }
634
635 /*
636 * calculate agaw for each iommu.
637 * "SAGAW" may be different across iommus, use a default agaw, and
638 * get a supported less agaw for iommus that don't support the default agaw.
639 */
640 int iommu_calculate_agaw(struct intel_iommu *iommu)
641 {
642 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
643 }
644
645 /* This functionin only returns single iommu in a domain */
646 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
647 {
648 int iommu_id;
649
650 /* si_domain and vm domain should not get here. */
651 BUG_ON(domain_type_is_vm_or_si(domain));
652 for_each_domain_iommu(iommu_id, domain)
653 break;
654
655 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
656 return NULL;
657
658 return g_iommus[iommu_id];
659 }
660
661 static void domain_update_iommu_coherency(struct dmar_domain *domain)
662 {
663 struct dmar_drhd_unit *drhd;
664 struct intel_iommu *iommu;
665 bool found = false;
666 int i;
667
668 domain->iommu_coherency = 1;
669
670 for_each_domain_iommu(i, domain) {
671 found = true;
672 if (!ecap_coherent(g_iommus[i]->ecap)) {
673 domain->iommu_coherency = 0;
674 break;
675 }
676 }
677 if (found)
678 return;
679
680 /* No hardware attached; use lowest common denominator */
681 rcu_read_lock();
682 for_each_active_iommu(iommu, drhd) {
683 if (!ecap_coherent(iommu->ecap)) {
684 domain->iommu_coherency = 0;
685 break;
686 }
687 }
688 rcu_read_unlock();
689 }
690
691 static int domain_update_iommu_snooping(struct intel_iommu *skip)
692 {
693 struct dmar_drhd_unit *drhd;
694 struct intel_iommu *iommu;
695 int ret = 1;
696
697 rcu_read_lock();
698 for_each_active_iommu(iommu, drhd) {
699 if (iommu != skip) {
700 if (!ecap_sc_support(iommu->ecap)) {
701 ret = 0;
702 break;
703 }
704 }
705 }
706 rcu_read_unlock();
707
708 return ret;
709 }
710
711 static int domain_update_iommu_superpage(struct intel_iommu *skip)
712 {
713 struct dmar_drhd_unit *drhd;
714 struct intel_iommu *iommu;
715 int mask = 0xf;
716
717 if (!intel_iommu_superpage) {
718 return 0;
719 }
720
721 /* set iommu_superpage to the smallest common denominator */
722 rcu_read_lock();
723 for_each_active_iommu(iommu, drhd) {
724 if (iommu != skip) {
725 mask &= cap_super_page_val(iommu->cap);
726 if (!mask)
727 break;
728 }
729 }
730 rcu_read_unlock();
731
732 return fls(mask);
733 }
734
735 /* Some capabilities may be different across iommus */
736 static void domain_update_iommu_cap(struct dmar_domain *domain)
737 {
738 domain_update_iommu_coherency(domain);
739 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
740 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
741 }
742
743 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
744 u8 bus, u8 devfn, int alloc)
745 {
746 struct root_entry *root = &iommu->root_entry[bus];
747 struct context_entry *context;
748 u64 *entry;
749
750 entry = &root->lo;
751 if (ecs_enabled(iommu)) {
752 if (devfn >= 0x80) {
753 devfn -= 0x80;
754 entry = &root->hi;
755 }
756 devfn *= 2;
757 }
758 if (*entry & 1)
759 context = phys_to_virt(*entry & VTD_PAGE_MASK);
760 else {
761 unsigned long phy_addr;
762 if (!alloc)
763 return NULL;
764
765 context = alloc_pgtable_page(iommu->node);
766 if (!context)
767 return NULL;
768
769 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
770 phy_addr = virt_to_phys((void *)context);
771 *entry = phy_addr | 1;
772 __iommu_flush_cache(iommu, entry, sizeof(*entry));
773 }
774 return &context[devfn];
775 }
776
777 static int iommu_dummy(struct device *dev)
778 {
779 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
780 }
781
782 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
783 {
784 struct dmar_drhd_unit *drhd = NULL;
785 struct intel_iommu *iommu;
786 struct device *tmp;
787 struct pci_dev *ptmp, *pdev = NULL;
788 u16 segment = 0;
789 int i;
790
791 if (iommu_dummy(dev))
792 return NULL;
793
794 if (dev_is_pci(dev)) {
795 struct pci_dev *pf_pdev;
796
797 pdev = to_pci_dev(dev);
798
799 #ifdef CONFIG_X86
800 /* VMD child devices currently cannot be handled individually */
801 if (is_vmd(pdev->bus))
802 return NULL;
803 #endif
804
805 /* VFs aren't listed in scope tables; we need to look up
806 * the PF instead to find the IOMMU. */
807 pf_pdev = pci_physfn(pdev);
808 dev = &pf_pdev->dev;
809 segment = pci_domain_nr(pdev->bus);
810 } else if (has_acpi_companion(dev))
811 dev = &ACPI_COMPANION(dev)->dev;
812
813 rcu_read_lock();
814 for_each_active_iommu(iommu, drhd) {
815 if (pdev && segment != drhd->segment)
816 continue;
817
818 for_each_active_dev_scope(drhd->devices,
819 drhd->devices_cnt, i, tmp) {
820 if (tmp == dev) {
821 /* For a VF use its original BDF# not that of the PF
822 * which we used for the IOMMU lookup. Strictly speaking
823 * we could do this for all PCI devices; we only need to
824 * get the BDF# from the scope table for ACPI matches. */
825 if (pdev && pdev->is_virtfn)
826 goto got_pdev;
827
828 *bus = drhd->devices[i].bus;
829 *devfn = drhd->devices[i].devfn;
830 goto out;
831 }
832
833 if (!pdev || !dev_is_pci(tmp))
834 continue;
835
836 ptmp = to_pci_dev(tmp);
837 if (ptmp->subordinate &&
838 ptmp->subordinate->number <= pdev->bus->number &&
839 ptmp->subordinate->busn_res.end >= pdev->bus->number)
840 goto got_pdev;
841 }
842
843 if (pdev && drhd->include_all) {
844 got_pdev:
845 *bus = pdev->bus->number;
846 *devfn = pdev->devfn;
847 goto out;
848 }
849 }
850 iommu = NULL;
851 out:
852 rcu_read_unlock();
853
854 return iommu;
855 }
856
857 static void domain_flush_cache(struct dmar_domain *domain,
858 void *addr, int size)
859 {
860 if (!domain->iommu_coherency)
861 clflush_cache_range(addr, size);
862 }
863
864 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
865 {
866 struct context_entry *context;
867 int ret = 0;
868 unsigned long flags;
869
870 spin_lock_irqsave(&iommu->lock, flags);
871 context = iommu_context_addr(iommu, bus, devfn, 0);
872 if (context)
873 ret = context_present(context);
874 spin_unlock_irqrestore(&iommu->lock, flags);
875 return ret;
876 }
877
878 static void free_context_table(struct intel_iommu *iommu)
879 {
880 int i;
881 unsigned long flags;
882 struct context_entry *context;
883
884 spin_lock_irqsave(&iommu->lock, flags);
885 if (!iommu->root_entry) {
886 goto out;
887 }
888 for (i = 0; i < ROOT_ENTRY_NR; i++) {
889 context = iommu_context_addr(iommu, i, 0, 0);
890 if (context)
891 free_pgtable_page(context);
892
893 if (!ecs_enabled(iommu))
894 continue;
895
896 context = iommu_context_addr(iommu, i, 0x80, 0);
897 if (context)
898 free_pgtable_page(context);
899
900 }
901 free_pgtable_page(iommu->root_entry);
902 iommu->root_entry = NULL;
903 out:
904 spin_unlock_irqrestore(&iommu->lock, flags);
905 }
906
907 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
908 unsigned long pfn, int *target_level)
909 {
910 struct dma_pte *parent, *pte = NULL;
911 int level = agaw_to_level(domain->agaw);
912 int offset;
913
914 BUG_ON(!domain->pgd);
915
916 if (!domain_pfn_supported(domain, pfn))
917 /* Address beyond IOMMU's addressing capabilities. */
918 return NULL;
919
920 parent = domain->pgd;
921
922 while (1) {
923 void *tmp_page;
924
925 offset = pfn_level_offset(pfn, level);
926 pte = &parent[offset];
927 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
928 break;
929 if (level == *target_level)
930 break;
931
932 if (!dma_pte_present(pte)) {
933 uint64_t pteval;
934
935 tmp_page = alloc_pgtable_page(domain->nid);
936
937 if (!tmp_page)
938 return NULL;
939
940 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
941 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
942 if (cmpxchg64(&pte->val, 0ULL, pteval))
943 /* Someone else set it while we were thinking; use theirs. */
944 free_pgtable_page(tmp_page);
945 else
946 domain_flush_cache(domain, pte, sizeof(*pte));
947 }
948 if (level == 1)
949 break;
950
951 parent = phys_to_virt(dma_pte_addr(pte));
952 level--;
953 }
954
955 if (!*target_level)
956 *target_level = level;
957
958 return pte;
959 }
960
961
962 /* return address's pte at specific level */
963 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
964 unsigned long pfn,
965 int level, int *large_page)
966 {
967 struct dma_pte *parent, *pte = NULL;
968 int total = agaw_to_level(domain->agaw);
969 int offset;
970
971 parent = domain->pgd;
972 while (level <= total) {
973 offset = pfn_level_offset(pfn, total);
974 pte = &parent[offset];
975 if (level == total)
976 return pte;
977
978 if (!dma_pte_present(pte)) {
979 *large_page = total;
980 break;
981 }
982
983 if (dma_pte_superpage(pte)) {
984 *large_page = total;
985 return pte;
986 }
987
988 parent = phys_to_virt(dma_pte_addr(pte));
989 total--;
990 }
991 return NULL;
992 }
993
994 /* clear last level pte, a tlb flush should be followed */
995 static void dma_pte_clear_range(struct dmar_domain *domain,
996 unsigned long start_pfn,
997 unsigned long last_pfn)
998 {
999 unsigned int large_page = 1;
1000 struct dma_pte *first_pte, *pte;
1001
1002 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1003 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1004 BUG_ON(start_pfn > last_pfn);
1005
1006 /* we don't need lock here; nobody else touches the iova range */
1007 do {
1008 large_page = 1;
1009 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1010 if (!pte) {
1011 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1012 continue;
1013 }
1014 do {
1015 dma_clear_pte(pte);
1016 start_pfn += lvl_to_nr_pages(large_page);
1017 pte++;
1018 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1019
1020 domain_flush_cache(domain, first_pte,
1021 (void *)pte - (void *)first_pte);
1022
1023 } while (start_pfn && start_pfn <= last_pfn);
1024 }
1025
1026 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1027 int retain_level, struct dma_pte *pte,
1028 unsigned long pfn, unsigned long start_pfn,
1029 unsigned long last_pfn)
1030 {
1031 pfn = max(start_pfn, pfn);
1032 pte = &pte[pfn_level_offset(pfn, level)];
1033
1034 do {
1035 unsigned long level_pfn;
1036 struct dma_pte *level_pte;
1037
1038 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1039 goto next;
1040
1041 level_pfn = pfn & level_mask(level);
1042 level_pte = phys_to_virt(dma_pte_addr(pte));
1043
1044 if (level > 2) {
1045 dma_pte_free_level(domain, level - 1, retain_level,
1046 level_pte, level_pfn, start_pfn,
1047 last_pfn);
1048 }
1049
1050 /*
1051 * Free the page table if we're below the level we want to
1052 * retain and the range covers the entire table.
1053 */
1054 if (level < retain_level && !(start_pfn > level_pfn ||
1055 last_pfn < level_pfn + level_size(level) - 1)) {
1056 dma_clear_pte(pte);
1057 domain_flush_cache(domain, pte, sizeof(*pte));
1058 free_pgtable_page(level_pte);
1059 }
1060 next:
1061 pfn += level_size(level);
1062 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1063 }
1064
1065 /*
1066 * clear last level (leaf) ptes and free page table pages below the
1067 * level we wish to keep intact.
1068 */
1069 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1070 unsigned long start_pfn,
1071 unsigned long last_pfn,
1072 int retain_level)
1073 {
1074 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1075 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1076 BUG_ON(start_pfn > last_pfn);
1077
1078 dma_pte_clear_range(domain, start_pfn, last_pfn);
1079
1080 /* We don't need lock here; nobody else touches the iova range */
1081 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1082 domain->pgd, 0, start_pfn, last_pfn);
1083
1084 /* free pgd */
1085 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1086 free_pgtable_page(domain->pgd);
1087 domain->pgd = NULL;
1088 }
1089 }
1090
1091 /* When a page at a given level is being unlinked from its parent, we don't
1092 need to *modify* it at all. All we need to do is make a list of all the
1093 pages which can be freed just as soon as we've flushed the IOTLB and we
1094 know the hardware page-walk will no longer touch them.
1095 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1096 be freed. */
1097 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1098 int level, struct dma_pte *pte,
1099 struct page *freelist)
1100 {
1101 struct page *pg;
1102
1103 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1104 pg->freelist = freelist;
1105 freelist = pg;
1106
1107 if (level == 1)
1108 return freelist;
1109
1110 pte = page_address(pg);
1111 do {
1112 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1113 freelist = dma_pte_list_pagetables(domain, level - 1,
1114 pte, freelist);
1115 pte++;
1116 } while (!first_pte_in_page(pte));
1117
1118 return freelist;
1119 }
1120
1121 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1122 struct dma_pte *pte, unsigned long pfn,
1123 unsigned long start_pfn,
1124 unsigned long last_pfn,
1125 struct page *freelist)
1126 {
1127 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1128
1129 pfn = max(start_pfn, pfn);
1130 pte = &pte[pfn_level_offset(pfn, level)];
1131
1132 do {
1133 unsigned long level_pfn;
1134
1135 if (!dma_pte_present(pte))
1136 goto next;
1137
1138 level_pfn = pfn & level_mask(level);
1139
1140 /* If range covers entire pagetable, free it */
1141 if (start_pfn <= level_pfn &&
1142 last_pfn >= level_pfn + level_size(level) - 1) {
1143 /* These suborbinate page tables are going away entirely. Don't
1144 bother to clear them; we're just going to *free* them. */
1145 if (level > 1 && !dma_pte_superpage(pte))
1146 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1147
1148 dma_clear_pte(pte);
1149 if (!first_pte)
1150 first_pte = pte;
1151 last_pte = pte;
1152 } else if (level > 1) {
1153 /* Recurse down into a level that isn't *entirely* obsolete */
1154 freelist = dma_pte_clear_level(domain, level - 1,
1155 phys_to_virt(dma_pte_addr(pte)),
1156 level_pfn, start_pfn, last_pfn,
1157 freelist);
1158 }
1159 next:
1160 pfn += level_size(level);
1161 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1162
1163 if (first_pte)
1164 domain_flush_cache(domain, first_pte,
1165 (void *)++last_pte - (void *)first_pte);
1166
1167 return freelist;
1168 }
1169
1170 /* We can't just free the pages because the IOMMU may still be walking
1171 the page tables, and may have cached the intermediate levels. The
1172 pages can only be freed after the IOTLB flush has been done. */
1173 static struct page *domain_unmap(struct dmar_domain *domain,
1174 unsigned long start_pfn,
1175 unsigned long last_pfn)
1176 {
1177 struct page *freelist = NULL;
1178
1179 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1180 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1181 BUG_ON(start_pfn > last_pfn);
1182
1183 /* we don't need lock here; nobody else touches the iova range */
1184 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1185 domain->pgd, 0, start_pfn, last_pfn, NULL);
1186
1187 /* free pgd */
1188 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1189 struct page *pgd_page = virt_to_page(domain->pgd);
1190 pgd_page->freelist = freelist;
1191 freelist = pgd_page;
1192
1193 domain->pgd = NULL;
1194 }
1195
1196 return freelist;
1197 }
1198
1199 static void dma_free_pagelist(struct page *freelist)
1200 {
1201 struct page *pg;
1202
1203 while ((pg = freelist)) {
1204 freelist = pg->freelist;
1205 free_pgtable_page(page_address(pg));
1206 }
1207 }
1208
1209 static void iova_entry_free(unsigned long data)
1210 {
1211 struct page *freelist = (struct page *)data;
1212
1213 dma_free_pagelist(freelist);
1214 }
1215
1216 /* iommu handling */
1217 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1218 {
1219 struct root_entry *root;
1220 unsigned long flags;
1221
1222 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1223 if (!root) {
1224 pr_err("Allocating root entry for %s failed\n",
1225 iommu->name);
1226 return -ENOMEM;
1227 }
1228
1229 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1230
1231 spin_lock_irqsave(&iommu->lock, flags);
1232 iommu->root_entry = root;
1233 spin_unlock_irqrestore(&iommu->lock, flags);
1234
1235 return 0;
1236 }
1237
1238 static void iommu_set_root_entry(struct intel_iommu *iommu)
1239 {
1240 u64 addr;
1241 u32 sts;
1242 unsigned long flag;
1243
1244 addr = virt_to_phys(iommu->root_entry);
1245 if (ecs_enabled(iommu))
1246 addr |= DMA_RTADDR_RTT;
1247
1248 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1249 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1250
1251 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1252
1253 /* Make sure hardware complete it */
1254 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1255 readl, (sts & DMA_GSTS_RTPS), sts);
1256
1257 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1258 }
1259
1260 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1261 {
1262 u32 val;
1263 unsigned long flag;
1264
1265 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1266 return;
1267
1268 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1269 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1270
1271 /* Make sure hardware complete it */
1272 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1273 readl, (!(val & DMA_GSTS_WBFS)), val);
1274
1275 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1276 }
1277
1278 /* return value determine if we need a write buffer flush */
1279 static void __iommu_flush_context(struct intel_iommu *iommu,
1280 u16 did, u16 source_id, u8 function_mask,
1281 u64 type)
1282 {
1283 u64 val = 0;
1284 unsigned long flag;
1285
1286 switch (type) {
1287 case DMA_CCMD_GLOBAL_INVL:
1288 val = DMA_CCMD_GLOBAL_INVL;
1289 break;
1290 case DMA_CCMD_DOMAIN_INVL:
1291 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1292 break;
1293 case DMA_CCMD_DEVICE_INVL:
1294 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1295 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1296 break;
1297 default:
1298 BUG();
1299 }
1300 val |= DMA_CCMD_ICC;
1301
1302 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1303 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1304
1305 /* Make sure hardware complete it */
1306 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1307 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1308
1309 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1310 }
1311
1312 /* return value determine if we need a write buffer flush */
1313 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1314 u64 addr, unsigned int size_order, u64 type)
1315 {
1316 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1317 u64 val = 0, val_iva = 0;
1318 unsigned long flag;
1319
1320 switch (type) {
1321 case DMA_TLB_GLOBAL_FLUSH:
1322 /* global flush doesn't need set IVA_REG */
1323 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1324 break;
1325 case DMA_TLB_DSI_FLUSH:
1326 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1327 break;
1328 case DMA_TLB_PSI_FLUSH:
1329 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1330 /* IH bit is passed in as part of address */
1331 val_iva = size_order | addr;
1332 break;
1333 default:
1334 BUG();
1335 }
1336 /* Note: set drain read/write */
1337 #if 0
1338 /*
1339 * This is probably to be super secure.. Looks like we can
1340 * ignore it without any impact.
1341 */
1342 if (cap_read_drain(iommu->cap))
1343 val |= DMA_TLB_READ_DRAIN;
1344 #endif
1345 if (cap_write_drain(iommu->cap))
1346 val |= DMA_TLB_WRITE_DRAIN;
1347
1348 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1349 /* Note: Only uses first TLB reg currently */
1350 if (val_iva)
1351 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1352 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1353
1354 /* Make sure hardware complete it */
1355 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1356 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1357
1358 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1359
1360 /* check IOTLB invalidation granularity */
1361 if (DMA_TLB_IAIG(val) == 0)
1362 pr_err("Flush IOTLB failed\n");
1363 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1364 pr_debug("TLB flush request %Lx, actual %Lx\n",
1365 (unsigned long long)DMA_TLB_IIRG(type),
1366 (unsigned long long)DMA_TLB_IAIG(val));
1367 }
1368
1369 static struct device_domain_info *
1370 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1371 u8 bus, u8 devfn)
1372 {
1373 struct device_domain_info *info;
1374
1375 assert_spin_locked(&device_domain_lock);
1376
1377 if (!iommu->qi)
1378 return NULL;
1379
1380 list_for_each_entry(info, &domain->devices, link)
1381 if (info->iommu == iommu && info->bus == bus &&
1382 info->devfn == devfn) {
1383 if (info->ats_supported && info->dev)
1384 return info;
1385 break;
1386 }
1387
1388 return NULL;
1389 }
1390
1391 static void domain_update_iotlb(struct dmar_domain *domain)
1392 {
1393 struct device_domain_info *info;
1394 bool has_iotlb_device = false;
1395
1396 assert_spin_locked(&device_domain_lock);
1397
1398 list_for_each_entry(info, &domain->devices, link) {
1399 struct pci_dev *pdev;
1400
1401 if (!info->dev || !dev_is_pci(info->dev))
1402 continue;
1403
1404 pdev = to_pci_dev(info->dev);
1405 if (pdev->ats_enabled) {
1406 has_iotlb_device = true;
1407 break;
1408 }
1409 }
1410
1411 domain->has_iotlb_device = has_iotlb_device;
1412 }
1413
1414 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1415 {
1416 struct pci_dev *pdev;
1417
1418 assert_spin_locked(&device_domain_lock);
1419
1420 if (!info || !dev_is_pci(info->dev))
1421 return;
1422
1423 pdev = to_pci_dev(info->dev);
1424 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1425 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1426 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1427 * reserved, which should be set to 0.
1428 */
1429 if (!ecap_dit(info->iommu->ecap))
1430 info->pfsid = 0;
1431 else {
1432 struct pci_dev *pf_pdev;
1433
1434 /* pdev will be returned if device is not a vf */
1435 pf_pdev = pci_physfn(pdev);
1436 info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
1437 }
1438
1439 #ifdef CONFIG_INTEL_IOMMU_SVM
1440 /* The PCIe spec, in its wisdom, declares that the behaviour of
1441 the device if you enable PASID support after ATS support is
1442 undefined. So always enable PASID support on devices which
1443 have it, even if we can't yet know if we're ever going to
1444 use it. */
1445 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1446 info->pasid_enabled = 1;
1447
1448 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1449 info->pri_enabled = 1;
1450 #endif
1451 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1452 info->ats_enabled = 1;
1453 domain_update_iotlb(info->domain);
1454 info->ats_qdep = pci_ats_queue_depth(pdev);
1455 }
1456 }
1457
1458 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1459 {
1460 struct pci_dev *pdev;
1461
1462 assert_spin_locked(&device_domain_lock);
1463
1464 if (!dev_is_pci(info->dev))
1465 return;
1466
1467 pdev = to_pci_dev(info->dev);
1468
1469 if (info->ats_enabled) {
1470 pci_disable_ats(pdev);
1471 info->ats_enabled = 0;
1472 domain_update_iotlb(info->domain);
1473 }
1474 #ifdef CONFIG_INTEL_IOMMU_SVM
1475 if (info->pri_enabled) {
1476 pci_disable_pri(pdev);
1477 info->pri_enabled = 0;
1478 }
1479 if (info->pasid_enabled) {
1480 pci_disable_pasid(pdev);
1481 info->pasid_enabled = 0;
1482 }
1483 #endif
1484 }
1485
1486 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1487 u64 addr, unsigned mask)
1488 {
1489 u16 sid, qdep;
1490 unsigned long flags;
1491 struct device_domain_info *info;
1492
1493 if (!domain->has_iotlb_device)
1494 return;
1495
1496 spin_lock_irqsave(&device_domain_lock, flags);
1497 list_for_each_entry(info, &domain->devices, link) {
1498 if (!info->ats_enabled)
1499 continue;
1500
1501 sid = info->bus << 8 | info->devfn;
1502 qdep = info->ats_qdep;
1503 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1504 qdep, addr, mask);
1505 }
1506 spin_unlock_irqrestore(&device_domain_lock, flags);
1507 }
1508
1509 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1510 struct dmar_domain *domain,
1511 unsigned long pfn, unsigned int pages,
1512 int ih, int map)
1513 {
1514 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1515 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1516 u16 did = domain->iommu_did[iommu->seq_id];
1517
1518 BUG_ON(pages == 0);
1519
1520 if (ih)
1521 ih = 1 << 6;
1522 /*
1523 * Fallback to domain selective flush if no PSI support or the size is
1524 * too big.
1525 * PSI requires page size to be 2 ^ x, and the base address is naturally
1526 * aligned to the size
1527 */
1528 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1529 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1530 DMA_TLB_DSI_FLUSH);
1531 else
1532 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1533 DMA_TLB_PSI_FLUSH);
1534
1535 /*
1536 * In caching mode, changes of pages from non-present to present require
1537 * flush. However, device IOTLB doesn't need to be flushed in this case.
1538 */
1539 if (!cap_caching_mode(iommu->cap) || !map)
1540 iommu_flush_dev_iotlb(domain, addr, mask);
1541 }
1542
1543 /* Notification for newly created mappings */
1544 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1545 struct dmar_domain *domain,
1546 unsigned long pfn, unsigned int pages)
1547 {
1548 /* It's a non-present to present mapping. Only flush if caching mode */
1549 if (cap_caching_mode(iommu->cap))
1550 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1551 else
1552 iommu_flush_write_buffer(iommu);
1553 }
1554
1555 static void iommu_flush_iova(struct iova_domain *iovad)
1556 {
1557 struct dmar_domain *domain;
1558 int idx;
1559
1560 domain = container_of(iovad, struct dmar_domain, iovad);
1561
1562 for_each_domain_iommu(idx, domain) {
1563 struct intel_iommu *iommu = g_iommus[idx];
1564 u16 did = domain->iommu_did[iommu->seq_id];
1565
1566 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1567
1568 if (!cap_caching_mode(iommu->cap))
1569 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1570 0, MAX_AGAW_PFN_WIDTH);
1571 }
1572 }
1573
1574 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1575 {
1576 u32 pmen;
1577 unsigned long flags;
1578
1579 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1580 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1581 pmen &= ~DMA_PMEN_EPM;
1582 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1583
1584 /* wait for the protected region status bit to clear */
1585 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1586 readl, !(pmen & DMA_PMEN_PRS), pmen);
1587
1588 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1589 }
1590
1591 static void iommu_enable_translation(struct intel_iommu *iommu)
1592 {
1593 u32 sts;
1594 unsigned long flags;
1595
1596 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1597 iommu->gcmd |= DMA_GCMD_TE;
1598 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1599
1600 /* Make sure hardware complete it */
1601 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1602 readl, (sts & DMA_GSTS_TES), sts);
1603
1604 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1605 }
1606
1607 static void iommu_disable_translation(struct intel_iommu *iommu)
1608 {
1609 u32 sts;
1610 unsigned long flag;
1611
1612 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1613 iommu->gcmd &= ~DMA_GCMD_TE;
1614 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1615
1616 /* Make sure hardware complete it */
1617 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1618 readl, (!(sts & DMA_GSTS_TES)), sts);
1619
1620 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1621 }
1622
1623
1624 static int iommu_init_domains(struct intel_iommu *iommu)
1625 {
1626 u32 ndomains, nlongs;
1627 size_t size;
1628
1629 ndomains = cap_ndoms(iommu->cap);
1630 pr_debug("%s: Number of Domains supported <%d>\n",
1631 iommu->name, ndomains);
1632 nlongs = BITS_TO_LONGS(ndomains);
1633
1634 spin_lock_init(&iommu->lock);
1635
1636 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1637 if (!iommu->domain_ids) {
1638 pr_err("%s: Allocating domain id array failed\n",
1639 iommu->name);
1640 return -ENOMEM;
1641 }
1642
1643 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1644 iommu->domains = kzalloc(size, GFP_KERNEL);
1645
1646 if (iommu->domains) {
1647 size = 256 * sizeof(struct dmar_domain *);
1648 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1649 }
1650
1651 if (!iommu->domains || !iommu->domains[0]) {
1652 pr_err("%s: Allocating domain array failed\n",
1653 iommu->name);
1654 kfree(iommu->domain_ids);
1655 kfree(iommu->domains);
1656 iommu->domain_ids = NULL;
1657 iommu->domains = NULL;
1658 return -ENOMEM;
1659 }
1660
1661
1662
1663 /*
1664 * If Caching mode is set, then invalid translations are tagged
1665 * with domain-id 0, hence we need to pre-allocate it. We also
1666 * use domain-id 0 as a marker for non-allocated domain-id, so
1667 * make sure it is not used for a real domain.
1668 */
1669 set_bit(0, iommu->domain_ids);
1670
1671 return 0;
1672 }
1673
1674 static void disable_dmar_iommu(struct intel_iommu *iommu)
1675 {
1676 struct device_domain_info *info, *tmp;
1677 unsigned long flags;
1678
1679 if (!iommu->domains || !iommu->domain_ids)
1680 return;
1681
1682 again:
1683 spin_lock_irqsave(&device_domain_lock, flags);
1684 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1685 struct dmar_domain *domain;
1686
1687 if (info->iommu != iommu)
1688 continue;
1689
1690 if (!info->dev || !info->domain)
1691 continue;
1692
1693 domain = info->domain;
1694
1695 __dmar_remove_one_dev_info(info);
1696
1697 if (!domain_type_is_vm_or_si(domain)) {
1698 /*
1699 * The domain_exit() function can't be called under
1700 * device_domain_lock, as it takes this lock itself.
1701 * So release the lock here and re-run the loop
1702 * afterwards.
1703 */
1704 spin_unlock_irqrestore(&device_domain_lock, flags);
1705 domain_exit(domain);
1706 goto again;
1707 }
1708 }
1709 spin_unlock_irqrestore(&device_domain_lock, flags);
1710
1711 if (iommu->gcmd & DMA_GCMD_TE)
1712 iommu_disable_translation(iommu);
1713 }
1714
1715 static void free_dmar_iommu(struct intel_iommu *iommu)
1716 {
1717 if ((iommu->domains) && (iommu->domain_ids)) {
1718 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1719 int i;
1720
1721 for (i = 0; i < elems; i++)
1722 kfree(iommu->domains[i]);
1723 kfree(iommu->domains);
1724 kfree(iommu->domain_ids);
1725 iommu->domains = NULL;
1726 iommu->domain_ids = NULL;
1727 }
1728
1729 g_iommus[iommu->seq_id] = NULL;
1730
1731 /* free context mapping */
1732 free_context_table(iommu);
1733
1734 #ifdef CONFIG_INTEL_IOMMU_SVM
1735 if (pasid_enabled(iommu)) {
1736 if (ecap_prs(iommu->ecap))
1737 intel_svm_finish_prq(iommu);
1738 intel_svm_free_pasid_tables(iommu);
1739 }
1740 #endif
1741 }
1742
1743 static struct dmar_domain *alloc_domain(int flags)
1744 {
1745 struct dmar_domain *domain;
1746
1747 domain = alloc_domain_mem();
1748 if (!domain)
1749 return NULL;
1750
1751 memset(domain, 0, sizeof(*domain));
1752 domain->nid = -1;
1753 domain->flags = flags;
1754 domain->has_iotlb_device = false;
1755 INIT_LIST_HEAD(&domain->devices);
1756
1757 return domain;
1758 }
1759
1760 /* Must be called with iommu->lock */
1761 static int domain_attach_iommu(struct dmar_domain *domain,
1762 struct intel_iommu *iommu)
1763 {
1764 unsigned long ndomains;
1765 int num;
1766
1767 assert_spin_locked(&device_domain_lock);
1768 assert_spin_locked(&iommu->lock);
1769
1770 domain->iommu_refcnt[iommu->seq_id] += 1;
1771 domain->iommu_count += 1;
1772 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1773 ndomains = cap_ndoms(iommu->cap);
1774 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1775
1776 if (num >= ndomains) {
1777 pr_err("%s: No free domain ids\n", iommu->name);
1778 domain->iommu_refcnt[iommu->seq_id] -= 1;
1779 domain->iommu_count -= 1;
1780 return -ENOSPC;
1781 }
1782
1783 set_bit(num, iommu->domain_ids);
1784 set_iommu_domain(iommu, num, domain);
1785
1786 domain->iommu_did[iommu->seq_id] = num;
1787 domain->nid = iommu->node;
1788
1789 domain_update_iommu_cap(domain);
1790 }
1791
1792 return 0;
1793 }
1794
1795 static int domain_detach_iommu(struct dmar_domain *domain,
1796 struct intel_iommu *iommu)
1797 {
1798 int num, count = INT_MAX;
1799
1800 assert_spin_locked(&device_domain_lock);
1801 assert_spin_locked(&iommu->lock);
1802
1803 domain->iommu_refcnt[iommu->seq_id] -= 1;
1804 count = --domain->iommu_count;
1805 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1806 num = domain->iommu_did[iommu->seq_id];
1807 clear_bit(num, iommu->domain_ids);
1808 set_iommu_domain(iommu, num, NULL);
1809
1810 domain_update_iommu_cap(domain);
1811 domain->iommu_did[iommu->seq_id] = 0;
1812 }
1813
1814 return count;
1815 }
1816
1817 static struct iova_domain reserved_iova_list;
1818 static struct lock_class_key reserved_rbtree_key;
1819
1820 static int dmar_init_reserved_ranges(void)
1821 {
1822 struct pci_dev *pdev = NULL;
1823 struct iova *iova;
1824 int i;
1825
1826 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1827
1828 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1829 &reserved_rbtree_key);
1830
1831 /* IOAPIC ranges shouldn't be accessed by DMA */
1832 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1833 IOVA_PFN(IOAPIC_RANGE_END));
1834 if (!iova) {
1835 pr_err("Reserve IOAPIC range failed\n");
1836 return -ENODEV;
1837 }
1838
1839 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1840 for_each_pci_dev(pdev) {
1841 struct resource *r;
1842
1843 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1844 r = &pdev->resource[i];
1845 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1846 continue;
1847 iova = reserve_iova(&reserved_iova_list,
1848 IOVA_PFN(r->start),
1849 IOVA_PFN(r->end));
1850 if (!iova) {
1851 pr_err("Reserve iova failed\n");
1852 return -ENODEV;
1853 }
1854 }
1855 }
1856 return 0;
1857 }
1858
1859 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1860 {
1861 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1862 }
1863
1864 static inline int guestwidth_to_adjustwidth(int gaw)
1865 {
1866 int agaw;
1867 int r = (gaw - 12) % 9;
1868
1869 if (r == 0)
1870 agaw = gaw;
1871 else
1872 agaw = gaw + 9 - r;
1873 if (agaw > 64)
1874 agaw = 64;
1875 return agaw;
1876 }
1877
1878 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1879 int guest_width)
1880 {
1881 int adjust_width, agaw;
1882 unsigned long sagaw;
1883 int err;
1884
1885 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1886
1887 err = init_iova_flush_queue(&domain->iovad,
1888 iommu_flush_iova, iova_entry_free);
1889 if (err)
1890 return err;
1891
1892 domain_reserve_special_ranges(domain);
1893
1894 /* calculate AGAW */
1895 if (guest_width > cap_mgaw(iommu->cap))
1896 guest_width = cap_mgaw(iommu->cap);
1897 domain->gaw = guest_width;
1898 adjust_width = guestwidth_to_adjustwidth(guest_width);
1899 agaw = width_to_agaw(adjust_width);
1900 sagaw = cap_sagaw(iommu->cap);
1901 if (!test_bit(agaw, &sagaw)) {
1902 /* hardware doesn't support it, choose a bigger one */
1903 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1904 agaw = find_next_bit(&sagaw, 5, agaw);
1905 if (agaw >= 5)
1906 return -ENODEV;
1907 }
1908 domain->agaw = agaw;
1909
1910 if (ecap_coherent(iommu->ecap))
1911 domain->iommu_coherency = 1;
1912 else
1913 domain->iommu_coherency = 0;
1914
1915 if (ecap_sc_support(iommu->ecap))
1916 domain->iommu_snooping = 1;
1917 else
1918 domain->iommu_snooping = 0;
1919
1920 if (intel_iommu_superpage)
1921 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1922 else
1923 domain->iommu_superpage = 0;
1924
1925 domain->nid = iommu->node;
1926
1927 /* always allocate the top pgd */
1928 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1929 if (!domain->pgd)
1930 return -ENOMEM;
1931 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1932 return 0;
1933 }
1934
1935 static void domain_exit(struct dmar_domain *domain)
1936 {
1937 struct page *freelist = NULL;
1938
1939 /* Domain 0 is reserved, so dont process it */
1940 if (!domain)
1941 return;
1942
1943 /* Remove associated devices and clear attached or cached domains */
1944 rcu_read_lock();
1945 domain_remove_dev_info(domain);
1946 rcu_read_unlock();
1947
1948 /* destroy iovas */
1949 put_iova_domain(&domain->iovad);
1950
1951 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1952
1953 dma_free_pagelist(freelist);
1954
1955 free_domain_mem(domain);
1956 }
1957
1958 static int domain_context_mapping_one(struct dmar_domain *domain,
1959 struct intel_iommu *iommu,
1960 u8 bus, u8 devfn)
1961 {
1962 u16 did = domain->iommu_did[iommu->seq_id];
1963 int translation = CONTEXT_TT_MULTI_LEVEL;
1964 struct device_domain_info *info = NULL;
1965 struct context_entry *context;
1966 unsigned long flags;
1967 struct dma_pte *pgd;
1968 int ret, agaw;
1969
1970 WARN_ON(did == 0);
1971
1972 if (hw_pass_through && domain_type_is_si(domain))
1973 translation = CONTEXT_TT_PASS_THROUGH;
1974
1975 pr_debug("Set context mapping for %02x:%02x.%d\n",
1976 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1977
1978 BUG_ON(!domain->pgd);
1979
1980 spin_lock_irqsave(&device_domain_lock, flags);
1981 spin_lock(&iommu->lock);
1982
1983 ret = -ENOMEM;
1984 context = iommu_context_addr(iommu, bus, devfn, 1);
1985 if (!context)
1986 goto out_unlock;
1987
1988 ret = 0;
1989 if (context_present(context))
1990 goto out_unlock;
1991
1992 /*
1993 * For kdump cases, old valid entries may be cached due to the
1994 * in-flight DMA and copied pgtable, but there is no unmapping
1995 * behaviour for them, thus we need an explicit cache flush for
1996 * the newly-mapped device. For kdump, at this point, the device
1997 * is supposed to finish reset at its driver probe stage, so no
1998 * in-flight DMA will exist, and we don't need to worry anymore
1999 * hereafter.
2000 */
2001 if (context_copied(context)) {
2002 u16 did_old = context_domain_id(context);
2003
2004 if (did_old < cap_ndoms(iommu->cap)) {
2005 iommu->flush.flush_context(iommu, did_old,
2006 (((u16)bus) << 8) | devfn,
2007 DMA_CCMD_MASK_NOBIT,
2008 DMA_CCMD_DEVICE_INVL);
2009 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2010 DMA_TLB_DSI_FLUSH);
2011 }
2012 }
2013
2014 pgd = domain->pgd;
2015
2016 context_clear_entry(context);
2017 context_set_domain_id(context, did);
2018
2019 /*
2020 * Skip top levels of page tables for iommu which has less agaw
2021 * than default. Unnecessary for PT mode.
2022 */
2023 if (translation != CONTEXT_TT_PASS_THROUGH) {
2024 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
2025 ret = -ENOMEM;
2026 pgd = phys_to_virt(dma_pte_addr(pgd));
2027 if (!dma_pte_present(pgd))
2028 goto out_unlock;
2029 }
2030
2031 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2032 if (info && info->ats_supported)
2033 translation = CONTEXT_TT_DEV_IOTLB;
2034 else
2035 translation = CONTEXT_TT_MULTI_LEVEL;
2036
2037 context_set_address_root(context, virt_to_phys(pgd));
2038 context_set_address_width(context, iommu->agaw);
2039 } else {
2040 /*
2041 * In pass through mode, AW must be programmed to
2042 * indicate the largest AGAW value supported by
2043 * hardware. And ASR is ignored by hardware.
2044 */
2045 context_set_address_width(context, iommu->msagaw);
2046 }
2047
2048 context_set_translation_type(context, translation);
2049 context_set_fault_enable(context);
2050 context_set_present(context);
2051 domain_flush_cache(domain, context, sizeof(*context));
2052
2053 /*
2054 * It's a non-present to present mapping. If hardware doesn't cache
2055 * non-present entry we only need to flush the write-buffer. If the
2056 * _does_ cache non-present entries, then it does so in the special
2057 * domain #0, which we have to flush:
2058 */
2059 if (cap_caching_mode(iommu->cap)) {
2060 iommu->flush.flush_context(iommu, 0,
2061 (((u16)bus) << 8) | devfn,
2062 DMA_CCMD_MASK_NOBIT,
2063 DMA_CCMD_DEVICE_INVL);
2064 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2065 } else {
2066 iommu_flush_write_buffer(iommu);
2067 }
2068 iommu_enable_dev_iotlb(info);
2069
2070 ret = 0;
2071
2072 out_unlock:
2073 spin_unlock(&iommu->lock);
2074 spin_unlock_irqrestore(&device_domain_lock, flags);
2075
2076 return ret;
2077 }
2078
2079 struct domain_context_mapping_data {
2080 struct dmar_domain *domain;
2081 struct intel_iommu *iommu;
2082 };
2083
2084 static int domain_context_mapping_cb(struct pci_dev *pdev,
2085 u16 alias, void *opaque)
2086 {
2087 struct domain_context_mapping_data *data = opaque;
2088
2089 return domain_context_mapping_one(data->domain, data->iommu,
2090 PCI_BUS_NUM(alias), alias & 0xff);
2091 }
2092
2093 static int
2094 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2095 {
2096 struct intel_iommu *iommu;
2097 u8 bus, devfn;
2098 struct domain_context_mapping_data data;
2099
2100 iommu = device_to_iommu(dev, &bus, &devfn);
2101 if (!iommu)
2102 return -ENODEV;
2103
2104 if (!dev_is_pci(dev))
2105 return domain_context_mapping_one(domain, iommu, bus, devfn);
2106
2107 data.domain = domain;
2108 data.iommu = iommu;
2109
2110 return pci_for_each_dma_alias(to_pci_dev(dev),
2111 &domain_context_mapping_cb, &data);
2112 }
2113
2114 static int domain_context_mapped_cb(struct pci_dev *pdev,
2115 u16 alias, void *opaque)
2116 {
2117 struct intel_iommu *iommu = opaque;
2118
2119 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2120 }
2121
2122 static int domain_context_mapped(struct device *dev)
2123 {
2124 struct intel_iommu *iommu;
2125 u8 bus, devfn;
2126
2127 iommu = device_to_iommu(dev, &bus, &devfn);
2128 if (!iommu)
2129 return -ENODEV;
2130
2131 if (!dev_is_pci(dev))
2132 return device_context_mapped(iommu, bus, devfn);
2133
2134 return !pci_for_each_dma_alias(to_pci_dev(dev),
2135 domain_context_mapped_cb, iommu);
2136 }
2137
2138 /* Returns a number of VTD pages, but aligned to MM page size */
2139 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2140 size_t size)
2141 {
2142 host_addr &= ~PAGE_MASK;
2143 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2144 }
2145
2146 /* Return largest possible superpage level for a given mapping */
2147 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2148 unsigned long iov_pfn,
2149 unsigned long phy_pfn,
2150 unsigned long pages)
2151 {
2152 int support, level = 1;
2153 unsigned long pfnmerge;
2154
2155 support = domain->iommu_superpage;
2156
2157 /* To use a large page, the virtual *and* physical addresses
2158 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2159 of them will mean we have to use smaller pages. So just
2160 merge them and check both at once. */
2161 pfnmerge = iov_pfn | phy_pfn;
2162
2163 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2164 pages >>= VTD_STRIDE_SHIFT;
2165 if (!pages)
2166 break;
2167 pfnmerge >>= VTD_STRIDE_SHIFT;
2168 level++;
2169 support--;
2170 }
2171 return level;
2172 }
2173
2174 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2175 struct scatterlist *sg, unsigned long phys_pfn,
2176 unsigned long nr_pages, int prot)
2177 {
2178 struct dma_pte *first_pte = NULL, *pte = NULL;
2179 phys_addr_t uninitialized_var(pteval);
2180 unsigned long sg_res = 0;
2181 unsigned int largepage_lvl = 0;
2182 unsigned long lvl_pages = 0;
2183
2184 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2185
2186 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2187 return -EINVAL;
2188
2189 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2190
2191 if (!sg) {
2192 sg_res = nr_pages;
2193 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2194 }
2195
2196 while (nr_pages > 0) {
2197 uint64_t tmp;
2198
2199 if (!sg_res) {
2200 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2201
2202 sg_res = aligned_nrpages(sg->offset, sg->length);
2203 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2204 sg->dma_length = sg->length;
2205 pteval = (sg_phys(sg) - pgoff) | prot;
2206 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2207 }
2208
2209 if (!pte) {
2210 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2211
2212 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2213 if (!pte)
2214 return -ENOMEM;
2215 /* It is large page*/
2216 if (largepage_lvl > 1) {
2217 unsigned long nr_superpages, end_pfn;
2218
2219 pteval |= DMA_PTE_LARGE_PAGE;
2220 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2221
2222 nr_superpages = sg_res / lvl_pages;
2223 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2224
2225 /*
2226 * Ensure that old small page tables are
2227 * removed to make room for superpage(s).
2228 * We're adding new large pages, so make sure
2229 * we don't remove their parent tables.
2230 */
2231 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2232 largepage_lvl + 1);
2233 } else {
2234 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2235 }
2236
2237 }
2238 /* We don't need lock here, nobody else
2239 * touches the iova range
2240 */
2241 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2242 if (tmp) {
2243 static int dumps = 5;
2244 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2245 iov_pfn, tmp, (unsigned long long)pteval);
2246 if (dumps) {
2247 dumps--;
2248 debug_dma_dump_mappings(NULL);
2249 }
2250 WARN_ON(1);
2251 }
2252
2253 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2254
2255 BUG_ON(nr_pages < lvl_pages);
2256 BUG_ON(sg_res < lvl_pages);
2257
2258 nr_pages -= lvl_pages;
2259 iov_pfn += lvl_pages;
2260 phys_pfn += lvl_pages;
2261 pteval += lvl_pages * VTD_PAGE_SIZE;
2262 sg_res -= lvl_pages;
2263
2264 /* If the next PTE would be the first in a new page, then we
2265 need to flush the cache on the entries we've just written.
2266 And then we'll need to recalculate 'pte', so clear it and
2267 let it get set again in the if (!pte) block above.
2268
2269 If we're done (!nr_pages) we need to flush the cache too.
2270
2271 Also if we've been setting superpages, we may need to
2272 recalculate 'pte' and switch back to smaller pages for the
2273 end of the mapping, if the trailing size is not enough to
2274 use another superpage (i.e. sg_res < lvl_pages). */
2275 pte++;
2276 if (!nr_pages || first_pte_in_page(pte) ||
2277 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2278 domain_flush_cache(domain, first_pte,
2279 (void *)pte - (void *)first_pte);
2280 pte = NULL;
2281 }
2282
2283 if (!sg_res && nr_pages)
2284 sg = sg_next(sg);
2285 }
2286 return 0;
2287 }
2288
2289 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2290 struct scatterlist *sg, unsigned long phys_pfn,
2291 unsigned long nr_pages, int prot)
2292 {
2293 int ret;
2294 struct intel_iommu *iommu;
2295
2296 /* Do the real mapping first */
2297 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2298 if (ret)
2299 return ret;
2300
2301 /* Notify about the new mapping */
2302 if (domain_type_is_vm(domain)) {
2303 /* VM typed domains can have more than one IOMMUs */
2304 int iommu_id;
2305 for_each_domain_iommu(iommu_id, domain) {
2306 iommu = g_iommus[iommu_id];
2307 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2308 }
2309 } else {
2310 /* General domains only have one IOMMU */
2311 iommu = domain_get_iommu(domain);
2312 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2313 }
2314
2315 return 0;
2316 }
2317
2318 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2319 struct scatterlist *sg, unsigned long nr_pages,
2320 int prot)
2321 {
2322 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2323 }
2324
2325 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2326 unsigned long phys_pfn, unsigned long nr_pages,
2327 int prot)
2328 {
2329 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2330 }
2331
2332 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2333 {
2334 unsigned long flags;
2335 struct context_entry *context;
2336 u16 did_old;
2337
2338 if (!iommu)
2339 return;
2340
2341 spin_lock_irqsave(&iommu->lock, flags);
2342 context = iommu_context_addr(iommu, bus, devfn, 0);
2343 if (!context) {
2344 spin_unlock_irqrestore(&iommu->lock, flags);
2345 return;
2346 }
2347 did_old = context_domain_id(context);
2348 context_clear_entry(context);
2349 __iommu_flush_cache(iommu, context, sizeof(*context));
2350 spin_unlock_irqrestore(&iommu->lock, flags);
2351 iommu->flush.flush_context(iommu,
2352 did_old,
2353 (((u16)bus) << 8) | devfn,
2354 DMA_CCMD_MASK_NOBIT,
2355 DMA_CCMD_DEVICE_INVL);
2356 iommu->flush.flush_iotlb(iommu,
2357 did_old,
2358 0,
2359 0,
2360 DMA_TLB_DSI_FLUSH);
2361 }
2362
2363 static inline void unlink_domain_info(struct device_domain_info *info)
2364 {
2365 assert_spin_locked(&device_domain_lock);
2366 list_del(&info->link);
2367 list_del(&info->global);
2368 if (info->dev)
2369 info->dev->archdata.iommu = NULL;
2370 }
2371
2372 static void domain_remove_dev_info(struct dmar_domain *domain)
2373 {
2374 struct device_domain_info *info, *tmp;
2375 unsigned long flags;
2376
2377 spin_lock_irqsave(&device_domain_lock, flags);
2378 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2379 __dmar_remove_one_dev_info(info);
2380 spin_unlock_irqrestore(&device_domain_lock, flags);
2381 }
2382
2383 /*
2384 * find_domain
2385 * Note: we use struct device->archdata.iommu stores the info
2386 */
2387 static struct dmar_domain *find_domain(struct device *dev)
2388 {
2389 struct device_domain_info *info;
2390
2391 /* No lock here, assumes no domain exit in normal case */
2392 info = dev->archdata.iommu;
2393 if (likely(info))
2394 return info->domain;
2395 return NULL;
2396 }
2397
2398 static inline struct device_domain_info *
2399 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2400 {
2401 struct device_domain_info *info;
2402
2403 list_for_each_entry(info, &device_domain_list, global)
2404 if (info->iommu->segment == segment && info->bus == bus &&
2405 info->devfn == devfn)
2406 return info;
2407
2408 return NULL;
2409 }
2410
2411 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2412 int bus, int devfn,
2413 struct device *dev,
2414 struct dmar_domain *domain)
2415 {
2416 struct dmar_domain *found = NULL;
2417 struct device_domain_info *info;
2418 unsigned long flags;
2419 int ret;
2420
2421 info = alloc_devinfo_mem();
2422 if (!info)
2423 return NULL;
2424
2425 info->bus = bus;
2426 info->devfn = devfn;
2427 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2428 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2429 info->ats_qdep = 0;
2430 info->dev = dev;
2431 info->domain = domain;
2432 info->iommu = iommu;
2433
2434 if (dev && dev_is_pci(dev)) {
2435 struct pci_dev *pdev = to_pci_dev(info->dev);
2436
2437 if (!pci_ats_disabled() &&
2438 ecap_dev_iotlb_support(iommu->ecap) &&
2439 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2440 dmar_find_matched_atsr_unit(pdev))
2441 info->ats_supported = 1;
2442
2443 if (ecs_enabled(iommu)) {
2444 if (pasid_enabled(iommu)) {
2445 int features = pci_pasid_features(pdev);
2446 if (features >= 0)
2447 info->pasid_supported = features | 1;
2448 }
2449
2450 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2451 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2452 info->pri_supported = 1;
2453 }
2454 }
2455
2456 spin_lock_irqsave(&device_domain_lock, flags);
2457 if (dev)
2458 found = find_domain(dev);
2459
2460 if (!found) {
2461 struct device_domain_info *info2;
2462 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2463 if (info2) {
2464 found = info2->domain;
2465 info2->dev = dev;
2466 }
2467 }
2468
2469 if (found) {
2470 spin_unlock_irqrestore(&device_domain_lock, flags);
2471 free_devinfo_mem(info);
2472 /* Caller must free the original domain */
2473 return found;
2474 }
2475
2476 spin_lock(&iommu->lock);
2477 ret = domain_attach_iommu(domain, iommu);
2478 spin_unlock(&iommu->lock);
2479
2480 if (ret) {
2481 spin_unlock_irqrestore(&device_domain_lock, flags);
2482 free_devinfo_mem(info);
2483 return NULL;
2484 }
2485
2486 list_add(&info->link, &domain->devices);
2487 list_add(&info->global, &device_domain_list);
2488 if (dev)
2489 dev->archdata.iommu = info;
2490 spin_unlock_irqrestore(&device_domain_lock, flags);
2491
2492 if (dev && domain_context_mapping(domain, dev)) {
2493 pr_err("Domain context map for %s failed\n", dev_name(dev));
2494 dmar_remove_one_dev_info(domain, dev);
2495 return NULL;
2496 }
2497
2498 return domain;
2499 }
2500
2501 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2502 {
2503 *(u16 *)opaque = alias;
2504 return 0;
2505 }
2506
2507 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2508 {
2509 struct device_domain_info *info = NULL;
2510 struct dmar_domain *domain = NULL;
2511 struct intel_iommu *iommu;
2512 u16 dma_alias;
2513 unsigned long flags;
2514 u8 bus, devfn;
2515
2516 iommu = device_to_iommu(dev, &bus, &devfn);
2517 if (!iommu)
2518 return NULL;
2519
2520 if (dev_is_pci(dev)) {
2521 struct pci_dev *pdev = to_pci_dev(dev);
2522
2523 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2524
2525 spin_lock_irqsave(&device_domain_lock, flags);
2526 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2527 PCI_BUS_NUM(dma_alias),
2528 dma_alias & 0xff);
2529 if (info) {
2530 iommu = info->iommu;
2531 domain = info->domain;
2532 }
2533 spin_unlock_irqrestore(&device_domain_lock, flags);
2534
2535 /* DMA alias already has a domain, use it */
2536 if (info)
2537 goto out;
2538 }
2539
2540 /* Allocate and initialize new domain for the device */
2541 domain = alloc_domain(0);
2542 if (!domain)
2543 return NULL;
2544 if (domain_init(domain, iommu, gaw)) {
2545 domain_exit(domain);
2546 return NULL;
2547 }
2548
2549 out:
2550
2551 return domain;
2552 }
2553
2554 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2555 struct dmar_domain *domain)
2556 {
2557 struct intel_iommu *iommu;
2558 struct dmar_domain *tmp;
2559 u16 req_id, dma_alias;
2560 u8 bus, devfn;
2561
2562 iommu = device_to_iommu(dev, &bus, &devfn);
2563 if (!iommu)
2564 return NULL;
2565
2566 req_id = ((u16)bus << 8) | devfn;
2567
2568 if (dev_is_pci(dev)) {
2569 struct pci_dev *pdev = to_pci_dev(dev);
2570
2571 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2572
2573 /* register PCI DMA alias device */
2574 if (req_id != dma_alias) {
2575 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2576 dma_alias & 0xff, NULL, domain);
2577
2578 if (!tmp || tmp != domain)
2579 return tmp;
2580 }
2581 }
2582
2583 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2584 if (!tmp || tmp != domain)
2585 return tmp;
2586
2587 return domain;
2588 }
2589
2590 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2591 {
2592 struct dmar_domain *domain, *tmp;
2593
2594 domain = find_domain(dev);
2595 if (domain)
2596 goto out;
2597
2598 domain = find_or_alloc_domain(dev, gaw);
2599 if (!domain)
2600 goto out;
2601
2602 tmp = set_domain_for_dev(dev, domain);
2603 if (!tmp || domain != tmp) {
2604 domain_exit(domain);
2605 domain = tmp;
2606 }
2607
2608 out:
2609
2610 return domain;
2611 }
2612
2613 static int iommu_domain_identity_map(struct dmar_domain *domain,
2614 unsigned long long start,
2615 unsigned long long end)
2616 {
2617 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2618 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2619
2620 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2621 dma_to_mm_pfn(last_vpfn))) {
2622 pr_err("Reserving iova failed\n");
2623 return -ENOMEM;
2624 }
2625
2626 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2627 /*
2628 * RMRR range might have overlap with physical memory range,
2629 * clear it first
2630 */
2631 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2632
2633 return __domain_mapping(domain, first_vpfn, NULL,
2634 first_vpfn, last_vpfn - first_vpfn + 1,
2635 DMA_PTE_READ|DMA_PTE_WRITE);
2636 }
2637
2638 static int domain_prepare_identity_map(struct device *dev,
2639 struct dmar_domain *domain,
2640 unsigned long long start,
2641 unsigned long long end)
2642 {
2643 /* For _hardware_ passthrough, don't bother. But for software
2644 passthrough, we do it anyway -- it may indicate a memory
2645 range which is reserved in E820, so which didn't get set
2646 up to start with in si_domain */
2647 if (domain == si_domain && hw_pass_through) {
2648 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2649 dev_name(dev), start, end);
2650 return 0;
2651 }
2652
2653 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2654 dev_name(dev), start, end);
2655
2656 if (end < start) {
2657 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2658 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2659 dmi_get_system_info(DMI_BIOS_VENDOR),
2660 dmi_get_system_info(DMI_BIOS_VERSION),
2661 dmi_get_system_info(DMI_PRODUCT_VERSION));
2662 return -EIO;
2663 }
2664
2665 if (end >> agaw_to_width(domain->agaw)) {
2666 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2667 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2668 agaw_to_width(domain->agaw),
2669 dmi_get_system_info(DMI_BIOS_VENDOR),
2670 dmi_get_system_info(DMI_BIOS_VERSION),
2671 dmi_get_system_info(DMI_PRODUCT_VERSION));
2672 return -EIO;
2673 }
2674
2675 return iommu_domain_identity_map(domain, start, end);
2676 }
2677
2678 static int iommu_prepare_identity_map(struct device *dev,
2679 unsigned long long start,
2680 unsigned long long end)
2681 {
2682 struct dmar_domain *domain;
2683 int ret;
2684
2685 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2686 if (!domain)
2687 return -ENOMEM;
2688
2689 ret = domain_prepare_identity_map(dev, domain, start, end);
2690 if (ret)
2691 domain_exit(domain);
2692
2693 return ret;
2694 }
2695
2696 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2697 struct device *dev)
2698 {
2699 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2700 return 0;
2701 return iommu_prepare_identity_map(dev, rmrr->base_address,
2702 rmrr->end_address);
2703 }
2704
2705 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2706 static inline void iommu_prepare_isa(void)
2707 {
2708 struct pci_dev *pdev;
2709 int ret;
2710
2711 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2712 if (!pdev)
2713 return;
2714
2715 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2716 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2717
2718 if (ret)
2719 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2720
2721 pci_dev_put(pdev);
2722 }
2723 #else
2724 static inline void iommu_prepare_isa(void)
2725 {
2726 return;
2727 }
2728 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2729
2730 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2731
2732 static int __init si_domain_init(int hw)
2733 {
2734 int nid, ret = 0;
2735
2736 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2737 if (!si_domain)
2738 return -EFAULT;
2739
2740 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2741 domain_exit(si_domain);
2742 return -EFAULT;
2743 }
2744
2745 pr_debug("Identity mapping domain allocated\n");
2746
2747 if (hw)
2748 return 0;
2749
2750 for_each_online_node(nid) {
2751 unsigned long start_pfn, end_pfn;
2752 int i;
2753
2754 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2755 ret = iommu_domain_identity_map(si_domain,
2756 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2757 if (ret)
2758 return ret;
2759 }
2760 }
2761
2762 return 0;
2763 }
2764
2765 static int identity_mapping(struct device *dev)
2766 {
2767 struct device_domain_info *info;
2768
2769 if (likely(!iommu_identity_mapping))
2770 return 0;
2771
2772 info = dev->archdata.iommu;
2773 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2774 return (info->domain == si_domain);
2775
2776 return 0;
2777 }
2778
2779 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2780 {
2781 struct dmar_domain *ndomain;
2782 struct intel_iommu *iommu;
2783 u8 bus, devfn;
2784
2785 iommu = device_to_iommu(dev, &bus, &devfn);
2786 if (!iommu)
2787 return -ENODEV;
2788
2789 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2790 if (ndomain != domain)
2791 return -EBUSY;
2792
2793 return 0;
2794 }
2795
2796 static bool device_has_rmrr(struct device *dev)
2797 {
2798 struct dmar_rmrr_unit *rmrr;
2799 struct device *tmp;
2800 int i;
2801
2802 rcu_read_lock();
2803 for_each_rmrr_units(rmrr) {
2804 /*
2805 * Return TRUE if this RMRR contains the device that
2806 * is passed in.
2807 */
2808 for_each_active_dev_scope(rmrr->devices,
2809 rmrr->devices_cnt, i, tmp)
2810 if (tmp == dev) {
2811 rcu_read_unlock();
2812 return true;
2813 }
2814 }
2815 rcu_read_unlock();
2816 return false;
2817 }
2818
2819 /*
2820 * There are a couple cases where we need to restrict the functionality of
2821 * devices associated with RMRRs. The first is when evaluating a device for
2822 * identity mapping because problems exist when devices are moved in and out
2823 * of domains and their respective RMRR information is lost. This means that
2824 * a device with associated RMRRs will never be in a "passthrough" domain.
2825 * The second is use of the device through the IOMMU API. This interface
2826 * expects to have full control of the IOVA space for the device. We cannot
2827 * satisfy both the requirement that RMRR access is maintained and have an
2828 * unencumbered IOVA space. We also have no ability to quiesce the device's
2829 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2830 * We therefore prevent devices associated with an RMRR from participating in
2831 * the IOMMU API, which eliminates them from device assignment.
2832 *
2833 * In both cases we assume that PCI USB devices with RMRRs have them largely
2834 * for historical reasons and that the RMRR space is not actively used post
2835 * boot. This exclusion may change if vendors begin to abuse it.
2836 *
2837 * The same exception is made for graphics devices, with the requirement that
2838 * any use of the RMRR regions will be torn down before assigning the device
2839 * to a guest.
2840 */
2841 static bool device_is_rmrr_locked(struct device *dev)
2842 {
2843 if (!device_has_rmrr(dev))
2844 return false;
2845
2846 if (dev_is_pci(dev)) {
2847 struct pci_dev *pdev = to_pci_dev(dev);
2848
2849 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2850 return false;
2851 }
2852
2853 return true;
2854 }
2855
2856 static int iommu_should_identity_map(struct device *dev, int startup)
2857 {
2858
2859 if (dev_is_pci(dev)) {
2860 struct pci_dev *pdev = to_pci_dev(dev);
2861
2862 if (device_is_rmrr_locked(dev))
2863 return 0;
2864
2865 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2866 return 1;
2867
2868 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2869 return 1;
2870
2871 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2872 return 0;
2873
2874 /*
2875 * We want to start off with all devices in the 1:1 domain, and
2876 * take them out later if we find they can't access all of memory.
2877 *
2878 * However, we can't do this for PCI devices behind bridges,
2879 * because all PCI devices behind the same bridge will end up
2880 * with the same source-id on their transactions.
2881 *
2882 * Practically speaking, we can't change things around for these
2883 * devices at run-time, because we can't be sure there'll be no
2884 * DMA transactions in flight for any of their siblings.
2885 *
2886 * So PCI devices (unless they're on the root bus) as well as
2887 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2888 * the 1:1 domain, just in _case_ one of their siblings turns out
2889 * not to be able to map all of memory.
2890 */
2891 if (!pci_is_pcie(pdev)) {
2892 if (!pci_is_root_bus(pdev->bus))
2893 return 0;
2894 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2895 return 0;
2896 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2897 return 0;
2898 } else {
2899 if (device_has_rmrr(dev))
2900 return 0;
2901 }
2902
2903 /*
2904 * At boot time, we don't yet know if devices will be 64-bit capable.
2905 * Assume that they will — if they turn out not to be, then we can
2906 * take them out of the 1:1 domain later.
2907 */
2908 if (!startup) {
2909 /*
2910 * If the device's dma_mask is less than the system's memory
2911 * size then this is not a candidate for identity mapping.
2912 */
2913 u64 dma_mask = *dev->dma_mask;
2914
2915 if (dev->coherent_dma_mask &&
2916 dev->coherent_dma_mask < dma_mask)
2917 dma_mask = dev->coherent_dma_mask;
2918
2919 return dma_mask >= dma_get_required_mask(dev);
2920 }
2921
2922 return 1;
2923 }
2924
2925 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2926 {
2927 int ret;
2928
2929 if (!iommu_should_identity_map(dev, 1))
2930 return 0;
2931
2932 ret = domain_add_dev_info(si_domain, dev);
2933 if (!ret)
2934 pr_info("%s identity mapping for device %s\n",
2935 hw ? "Hardware" : "Software", dev_name(dev));
2936 else if (ret == -ENODEV)
2937 /* device not associated with an iommu */
2938 ret = 0;
2939
2940 return ret;
2941 }
2942
2943
2944 static int __init iommu_prepare_static_identity_mapping(int hw)
2945 {
2946 struct pci_dev *pdev = NULL;
2947 struct dmar_drhd_unit *drhd;
2948 struct intel_iommu *iommu;
2949 struct device *dev;
2950 int i;
2951 int ret = 0;
2952
2953 for_each_pci_dev(pdev) {
2954 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2955 if (ret)
2956 return ret;
2957 }
2958
2959 for_each_active_iommu(iommu, drhd)
2960 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2961 struct acpi_device_physical_node *pn;
2962 struct acpi_device *adev;
2963
2964 if (dev->bus != &acpi_bus_type)
2965 continue;
2966
2967 adev= to_acpi_device(dev);
2968 mutex_lock(&adev->physical_node_lock);
2969 list_for_each_entry(pn, &adev->physical_node_list, node) {
2970 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2971 if (ret)
2972 break;
2973 }
2974 mutex_unlock(&adev->physical_node_lock);
2975 if (ret)
2976 return ret;
2977 }
2978
2979 return 0;
2980 }
2981
2982 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2983 {
2984 /*
2985 * Start from the sane iommu hardware state.
2986 * If the queued invalidation is already initialized by us
2987 * (for example, while enabling interrupt-remapping) then
2988 * we got the things already rolling from a sane state.
2989 */
2990 if (!iommu->qi) {
2991 /*
2992 * Clear any previous faults.
2993 */
2994 dmar_fault(-1, iommu);
2995 /*
2996 * Disable queued invalidation if supported and already enabled
2997 * before OS handover.
2998 */
2999 dmar_disable_qi(iommu);
3000 }
3001
3002 if (dmar_enable_qi(iommu)) {
3003 /*
3004 * Queued Invalidate not enabled, use Register Based Invalidate
3005 */
3006 iommu->flush.flush_context = __iommu_flush_context;
3007 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3008 pr_info("%s: Using Register based invalidation\n",
3009 iommu->name);
3010 } else {
3011 iommu->flush.flush_context = qi_flush_context;
3012 iommu->flush.flush_iotlb = qi_flush_iotlb;
3013 pr_info("%s: Using Queued invalidation\n", iommu->name);
3014 }
3015 }
3016
3017 static int copy_context_table(struct intel_iommu *iommu,
3018 struct root_entry *old_re,
3019 struct context_entry **tbl,
3020 int bus, bool ext)
3021 {
3022 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3023 struct context_entry *new_ce = NULL, ce;
3024 struct context_entry *old_ce = NULL;
3025 struct root_entry re;
3026 phys_addr_t old_ce_phys;
3027
3028 tbl_idx = ext ? bus * 2 : bus;
3029 memcpy(&re, old_re, sizeof(re));
3030
3031 for (devfn = 0; devfn < 256; devfn++) {
3032 /* First calculate the correct index */
3033 idx = (ext ? devfn * 2 : devfn) % 256;
3034
3035 if (idx == 0) {
3036 /* First save what we may have and clean up */
3037 if (new_ce) {
3038 tbl[tbl_idx] = new_ce;
3039 __iommu_flush_cache(iommu, new_ce,
3040 VTD_PAGE_SIZE);
3041 pos = 1;
3042 }
3043
3044 if (old_ce)
3045 iounmap(old_ce);
3046
3047 ret = 0;
3048 if (devfn < 0x80)
3049 old_ce_phys = root_entry_lctp(&re);
3050 else
3051 old_ce_phys = root_entry_uctp(&re);
3052
3053 if (!old_ce_phys) {
3054 if (ext && devfn == 0) {
3055 /* No LCTP, try UCTP */
3056 devfn = 0x7f;
3057 continue;
3058 } else {
3059 goto out;
3060 }
3061 }
3062
3063 ret = -ENOMEM;
3064 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3065 MEMREMAP_WB);
3066 if (!old_ce)
3067 goto out;
3068
3069 new_ce = alloc_pgtable_page(iommu->node);
3070 if (!new_ce)
3071 goto out_unmap;
3072
3073 ret = 0;
3074 }
3075
3076 /* Now copy the context entry */
3077 memcpy(&ce, old_ce + idx, sizeof(ce));
3078
3079 if (!__context_present(&ce))
3080 continue;
3081
3082 did = context_domain_id(&ce);
3083 if (did >= 0 && did < cap_ndoms(iommu->cap))
3084 set_bit(did, iommu->domain_ids);
3085
3086 /*
3087 * We need a marker for copied context entries. This
3088 * marker needs to work for the old format as well as
3089 * for extended context entries.
3090 *
3091 * Bit 67 of the context entry is used. In the old
3092 * format this bit is available to software, in the
3093 * extended format it is the PGE bit, but PGE is ignored
3094 * by HW if PASIDs are disabled (and thus still
3095 * available).
3096 *
3097 * So disable PASIDs first and then mark the entry
3098 * copied. This means that we don't copy PASID
3099 * translations from the old kernel, but this is fine as
3100 * faults there are not fatal.
3101 */
3102 context_clear_pasid_enable(&ce);
3103 context_set_copied(&ce);
3104
3105 new_ce[idx] = ce;
3106 }
3107
3108 tbl[tbl_idx + pos] = new_ce;
3109
3110 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3111
3112 out_unmap:
3113 memunmap(old_ce);
3114
3115 out:
3116 return ret;
3117 }
3118
3119 static int copy_translation_tables(struct intel_iommu *iommu)
3120 {
3121 struct context_entry **ctxt_tbls;
3122 struct root_entry *old_rt;
3123 phys_addr_t old_rt_phys;
3124 int ctxt_table_entries;
3125 unsigned long flags;
3126 u64 rtaddr_reg;
3127 int bus, ret;
3128 bool new_ext, ext;
3129
3130 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3131 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3132 new_ext = !!ecap_ecs(iommu->ecap);
3133
3134 /*
3135 * The RTT bit can only be changed when translation is disabled,
3136 * but disabling translation means to open a window for data
3137 * corruption. So bail out and don't copy anything if we would
3138 * have to change the bit.
3139 */
3140 if (new_ext != ext)
3141 return -EINVAL;
3142
3143 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3144 if (!old_rt_phys)
3145 return -EINVAL;
3146
3147 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3148 if (!old_rt)
3149 return -ENOMEM;
3150
3151 /* This is too big for the stack - allocate it from slab */
3152 ctxt_table_entries = ext ? 512 : 256;
3153 ret = -ENOMEM;
3154 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3155 if (!ctxt_tbls)
3156 goto out_unmap;
3157
3158 for (bus = 0; bus < 256; bus++) {
3159 ret = copy_context_table(iommu, &old_rt[bus],
3160 ctxt_tbls, bus, ext);
3161 if (ret) {
3162 pr_err("%s: Failed to copy context table for bus %d\n",
3163 iommu->name, bus);
3164 continue;
3165 }
3166 }
3167
3168 spin_lock_irqsave(&iommu->lock, flags);
3169
3170 /* Context tables are copied, now write them to the root_entry table */
3171 for (bus = 0; bus < 256; bus++) {
3172 int idx = ext ? bus * 2 : bus;
3173 u64 val;
3174
3175 if (ctxt_tbls[idx]) {
3176 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3177 iommu->root_entry[bus].lo = val;
3178 }
3179
3180 if (!ext || !ctxt_tbls[idx + 1])
3181 continue;
3182
3183 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3184 iommu->root_entry[bus].hi = val;
3185 }
3186
3187 spin_unlock_irqrestore(&iommu->lock, flags);
3188
3189 kfree(ctxt_tbls);
3190
3191 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3192
3193 ret = 0;
3194
3195 out_unmap:
3196 memunmap(old_rt);
3197
3198 return ret;
3199 }
3200
3201 static int __init init_dmars(void)
3202 {
3203 struct dmar_drhd_unit *drhd;
3204 struct dmar_rmrr_unit *rmrr;
3205 bool copied_tables = false;
3206 struct device *dev;
3207 struct intel_iommu *iommu;
3208 int i, ret;
3209
3210 /*
3211 * for each drhd
3212 * allocate root
3213 * initialize and program root entry to not present
3214 * endfor
3215 */
3216 for_each_drhd_unit(drhd) {
3217 /*
3218 * lock not needed as this is only incremented in the single
3219 * threaded kernel __init code path all other access are read
3220 * only
3221 */
3222 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3223 g_num_of_iommus++;
3224 continue;
3225 }
3226 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3227 }
3228
3229 /* Preallocate enough resources for IOMMU hot-addition */
3230 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3231 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3232
3233 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3234 GFP_KERNEL);
3235 if (!g_iommus) {
3236 pr_err("Allocating global iommu array failed\n");
3237 ret = -ENOMEM;
3238 goto error;
3239 }
3240
3241 for_each_active_iommu(iommu, drhd) {
3242 /*
3243 * Find the max pasid size of all IOMMU's in the system.
3244 * We need to ensure the system pasid table is no bigger
3245 * than the smallest supported.
3246 */
3247 if (pasid_enabled(iommu)) {
3248 u32 temp = 2 << ecap_pss(iommu->ecap);
3249
3250 intel_pasid_max_id = min_t(u32, temp,
3251 intel_pasid_max_id);
3252 }
3253
3254 g_iommus[iommu->seq_id] = iommu;
3255
3256 intel_iommu_init_qi(iommu);
3257
3258 ret = iommu_init_domains(iommu);
3259 if (ret)
3260 goto free_iommu;
3261
3262 init_translation_status(iommu);
3263
3264 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3265 iommu_disable_translation(iommu);
3266 clear_translation_pre_enabled(iommu);
3267 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3268 iommu->name);
3269 }
3270
3271 /*
3272 * TBD:
3273 * we could share the same root & context tables
3274 * among all IOMMU's. Need to Split it later.
3275 */
3276 ret = iommu_alloc_root_entry(iommu);
3277 if (ret)
3278 goto free_iommu;
3279
3280 if (translation_pre_enabled(iommu)) {
3281 pr_info("Translation already enabled - trying to copy translation structures\n");
3282
3283 ret = copy_translation_tables(iommu);
3284 if (ret) {
3285 /*
3286 * We found the IOMMU with translation
3287 * enabled - but failed to copy over the
3288 * old root-entry table. Try to proceed
3289 * by disabling translation now and
3290 * allocating a clean root-entry table.
3291 * This might cause DMAR faults, but
3292 * probably the dump will still succeed.
3293 */
3294 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3295 iommu->name);
3296 iommu_disable_translation(iommu);
3297 clear_translation_pre_enabled(iommu);
3298 } else {
3299 pr_info("Copied translation tables from previous kernel for %s\n",
3300 iommu->name);
3301 copied_tables = true;
3302 }
3303 }
3304
3305 if (!ecap_pass_through(iommu->ecap))
3306 hw_pass_through = 0;
3307 #ifdef CONFIG_INTEL_IOMMU_SVM
3308 if (pasid_enabled(iommu))
3309 intel_svm_alloc_pasid_tables(iommu);
3310 #endif
3311 }
3312
3313 /*
3314 * Now that qi is enabled on all iommus, set the root entry and flush
3315 * caches. This is required on some Intel X58 chipsets, otherwise the
3316 * flush_context function will loop forever and the boot hangs.
3317 */
3318 for_each_active_iommu(iommu, drhd) {
3319 iommu_flush_write_buffer(iommu);
3320 iommu_set_root_entry(iommu);
3321 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3322 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3323 }
3324
3325 if (iommu_pass_through)
3326 iommu_identity_mapping |= IDENTMAP_ALL;
3327
3328 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3329 iommu_identity_mapping |= IDENTMAP_GFX;
3330 #endif
3331
3332 check_tylersburg_isoch();
3333
3334 if (iommu_identity_mapping) {
3335 ret = si_domain_init(hw_pass_through);
3336 if (ret)
3337 goto free_iommu;
3338 }
3339
3340
3341 /*
3342 * If we copied translations from a previous kernel in the kdump
3343 * case, we can not assign the devices to domains now, as that
3344 * would eliminate the old mappings. So skip this part and defer
3345 * the assignment to device driver initialization time.
3346 */
3347 if (copied_tables)
3348 goto domains_done;
3349
3350 /*
3351 * If pass through is not set or not enabled, setup context entries for
3352 * identity mappings for rmrr, gfx, and isa and may fall back to static
3353 * identity mapping if iommu_identity_mapping is set.
3354 */
3355 if (iommu_identity_mapping) {
3356 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3357 if (ret) {
3358 pr_crit("Failed to setup IOMMU pass-through\n");
3359 goto free_iommu;
3360 }
3361 }
3362 /*
3363 * For each rmrr
3364 * for each dev attached to rmrr
3365 * do
3366 * locate drhd for dev, alloc domain for dev
3367 * allocate free domain
3368 * allocate page table entries for rmrr
3369 * if context not allocated for bus
3370 * allocate and init context
3371 * set present in root table for this bus
3372 * init context with domain, translation etc
3373 * endfor
3374 * endfor
3375 */
3376 pr_info("Setting RMRR:\n");
3377 for_each_rmrr_units(rmrr) {
3378 /* some BIOS lists non-exist devices in DMAR table. */
3379 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3380 i, dev) {
3381 ret = iommu_prepare_rmrr_dev(rmrr, dev);
3382 if (ret)
3383 pr_err("Mapping reserved region failed\n");
3384 }
3385 }
3386
3387 iommu_prepare_isa();
3388
3389 domains_done:
3390
3391 /*
3392 * for each drhd
3393 * enable fault log
3394 * global invalidate context cache
3395 * global invalidate iotlb
3396 * enable translation
3397 */
3398 for_each_iommu(iommu, drhd) {
3399 if (drhd->ignored) {
3400 /*
3401 * we always have to disable PMRs or DMA may fail on
3402 * this device
3403 */
3404 if (force_on)
3405 iommu_disable_protect_mem_regions(iommu);
3406 continue;
3407 }
3408
3409 iommu_flush_write_buffer(iommu);
3410
3411 #ifdef CONFIG_INTEL_IOMMU_SVM
3412 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3413 ret = intel_svm_enable_prq(iommu);
3414 if (ret)
3415 goto free_iommu;
3416 }
3417 #endif
3418 ret = dmar_set_interrupt(iommu);
3419 if (ret)
3420 goto free_iommu;
3421
3422 if (!translation_pre_enabled(iommu))
3423 iommu_enable_translation(iommu);
3424
3425 iommu_disable_protect_mem_regions(iommu);
3426 }
3427
3428 return 0;
3429
3430 free_iommu:
3431 for_each_active_iommu(iommu, drhd) {
3432 disable_dmar_iommu(iommu);
3433 free_dmar_iommu(iommu);
3434 }
3435
3436 kfree(g_iommus);
3437
3438 error:
3439 return ret;
3440 }
3441
3442 /* This takes a number of _MM_ pages, not VTD pages */
3443 static unsigned long intel_alloc_iova(struct device *dev,
3444 struct dmar_domain *domain,
3445 unsigned long nrpages, uint64_t dma_mask)
3446 {
3447 unsigned long iova_pfn = 0;
3448
3449 /* Restrict dma_mask to the width that the iommu can handle */
3450 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3451 /* Ensure we reserve the whole size-aligned region */
3452 nrpages = __roundup_pow_of_two(nrpages);
3453
3454 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3455 /*
3456 * First try to allocate an io virtual address in
3457 * DMA_BIT_MASK(32) and if that fails then try allocating
3458 * from higher range
3459 */
3460 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3461 IOVA_PFN(DMA_BIT_MASK(32)), false);
3462 if (iova_pfn)
3463 return iova_pfn;
3464 }
3465 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3466 IOVA_PFN(dma_mask), true);
3467 if (unlikely(!iova_pfn)) {
3468 pr_err("Allocating %ld-page iova for %s failed",
3469 nrpages, dev_name(dev));
3470 return 0;
3471 }
3472
3473 return iova_pfn;
3474 }
3475
3476 struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3477 {
3478 struct dmar_domain *domain, *tmp;
3479 struct dmar_rmrr_unit *rmrr;
3480 struct device *i_dev;
3481 int i, ret;
3482
3483 domain = find_domain(dev);
3484 if (domain)
3485 goto out;
3486
3487 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3488 if (!domain)
3489 goto out;
3490
3491 /* We have a new domain - setup possible RMRRs for the device */
3492 rcu_read_lock();
3493 for_each_rmrr_units(rmrr) {
3494 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3495 i, i_dev) {
3496 if (i_dev != dev)
3497 continue;
3498
3499 ret = domain_prepare_identity_map(dev, domain,
3500 rmrr->base_address,
3501 rmrr->end_address);
3502 if (ret)
3503 dev_err(dev, "Mapping reserved region failed\n");
3504 }
3505 }
3506 rcu_read_unlock();
3507
3508 tmp = set_domain_for_dev(dev, domain);
3509 if (!tmp || domain != tmp) {
3510 domain_exit(domain);
3511 domain = tmp;
3512 }
3513
3514 out:
3515
3516 if (!domain)
3517 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3518
3519
3520 return domain;
3521 }
3522
3523 /* Check if the dev needs to go through non-identity map and unmap process.*/
3524 static int iommu_no_mapping(struct device *dev)
3525 {
3526 int found;
3527
3528 if (iommu_dummy(dev))
3529 return 1;
3530
3531 if (!iommu_identity_mapping)
3532 return 0;
3533
3534 found = identity_mapping(dev);
3535 if (found) {
3536 if (iommu_should_identity_map(dev, 0))
3537 return 1;
3538 else {
3539 /*
3540 * 32 bit DMA is removed from si_domain and fall back
3541 * to non-identity mapping.
3542 */
3543 dmar_remove_one_dev_info(si_domain, dev);
3544 pr_info("32bit %s uses non-identity mapping\n",
3545 dev_name(dev));
3546 return 0;
3547 }
3548 } else {
3549 /*
3550 * In case of a detached 64 bit DMA device from vm, the device
3551 * is put into si_domain for identity mapping.
3552 */
3553 if (iommu_should_identity_map(dev, 0)) {
3554 int ret;
3555 ret = domain_add_dev_info(si_domain, dev);
3556 if (!ret) {
3557 pr_info("64bit %s uses identity mapping\n",
3558 dev_name(dev));
3559 return 1;
3560 }
3561 }
3562 }
3563
3564 return 0;
3565 }
3566
3567 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3568 size_t size, int dir, u64 dma_mask)
3569 {
3570 struct dmar_domain *domain;
3571 phys_addr_t start_paddr;
3572 unsigned long iova_pfn;
3573 int prot = 0;
3574 int ret;
3575 struct intel_iommu *iommu;
3576 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3577
3578 BUG_ON(dir == DMA_NONE);
3579
3580 if (iommu_no_mapping(dev))
3581 return paddr;
3582
3583 domain = get_valid_domain_for_dev(dev);
3584 if (!domain)
3585 return 0;
3586
3587 iommu = domain_get_iommu(domain);
3588 size = aligned_nrpages(paddr, size);
3589
3590 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3591 if (!iova_pfn)
3592 goto error;
3593
3594 /*
3595 * Check if DMAR supports zero-length reads on write only
3596 * mappings..
3597 */
3598 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3599 !cap_zlr(iommu->cap))
3600 prot |= DMA_PTE_READ;
3601 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3602 prot |= DMA_PTE_WRITE;
3603 /*
3604 * paddr - (paddr + size) might be partial page, we should map the whole
3605 * page. Note: if two part of one page are separately mapped, we
3606 * might have two guest_addr mapping to the same host paddr, but this
3607 * is not a big problem
3608 */
3609 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3610 mm_to_dma_pfn(paddr_pfn), size, prot);
3611 if (ret)
3612 goto error;
3613
3614 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3615 start_paddr += paddr & ~PAGE_MASK;
3616 return start_paddr;
3617
3618 error:
3619 if (iova_pfn)
3620 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3621 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3622 dev_name(dev), size, (unsigned long long)paddr, dir);
3623 return 0;
3624 }
3625
3626 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3627 unsigned long offset, size_t size,
3628 enum dma_data_direction dir,
3629 unsigned long attrs)
3630 {
3631 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3632 dir, *dev->dma_mask);
3633 }
3634
3635 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3636 {
3637 struct dmar_domain *domain;
3638 unsigned long start_pfn, last_pfn;
3639 unsigned long nrpages;
3640 unsigned long iova_pfn;
3641 struct intel_iommu *iommu;
3642 struct page *freelist;
3643
3644 if (iommu_no_mapping(dev))
3645 return;
3646
3647 domain = find_domain(dev);
3648 BUG_ON(!domain);
3649
3650 iommu = domain_get_iommu(domain);
3651
3652 iova_pfn = IOVA_PFN(dev_addr);
3653
3654 nrpages = aligned_nrpages(dev_addr, size);
3655 start_pfn = mm_to_dma_pfn(iova_pfn);
3656 last_pfn = start_pfn + nrpages - 1;
3657
3658 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3659 dev_name(dev), start_pfn, last_pfn);
3660
3661 freelist = domain_unmap(domain, start_pfn, last_pfn);
3662
3663 if (intel_iommu_strict) {
3664 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3665 nrpages, !freelist, 0);
3666 /* free iova */
3667 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3668 dma_free_pagelist(freelist);
3669 } else {
3670 queue_iova(&domain->iovad, iova_pfn, nrpages,
3671 (unsigned long)freelist);
3672 /*
3673 * queue up the release of the unmap to save the 1/6th of the
3674 * cpu used up by the iotlb flush operation...
3675 */
3676 }
3677 }
3678
3679 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3680 size_t size, enum dma_data_direction dir,
3681 unsigned long attrs)
3682 {
3683 intel_unmap(dev, dev_addr, size);
3684 }
3685
3686 static void *intel_alloc_coherent(struct device *dev, size_t size,
3687 dma_addr_t *dma_handle, gfp_t flags,
3688 unsigned long attrs)
3689 {
3690 void *vaddr;
3691
3692 vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3693 if (iommu_no_mapping(dev) || !vaddr)
3694 return vaddr;
3695
3696 *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
3697 PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
3698 dev->coherent_dma_mask);
3699 if (!*dma_handle)
3700 goto out_free_pages;
3701 return vaddr;
3702
3703 out_free_pages:
3704 dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
3705 return NULL;
3706 }
3707
3708 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3709 dma_addr_t dma_handle, unsigned long attrs)
3710 {
3711 if (!iommu_no_mapping(dev))
3712 intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
3713 dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3714 }
3715
3716 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3717 int nelems, enum dma_data_direction dir,
3718 unsigned long attrs)
3719 {
3720 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3721 unsigned long nrpages = 0;
3722 struct scatterlist *sg;
3723 int i;
3724
3725 for_each_sg(sglist, sg, nelems, i) {
3726 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3727 }
3728
3729 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3730 }
3731
3732 static int intel_nontranslate_map_sg(struct device *hddev,
3733 struct scatterlist *sglist, int nelems, int dir)
3734 {
3735 int i;
3736 struct scatterlist *sg;
3737
3738 for_each_sg(sglist, sg, nelems, i) {
3739 BUG_ON(!sg_page(sg));
3740 sg->dma_address = sg_phys(sg);
3741 sg->dma_length = sg->length;
3742 }
3743 return nelems;
3744 }
3745
3746 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3747 enum dma_data_direction dir, unsigned long attrs)
3748 {
3749 int i;
3750 struct dmar_domain *domain;
3751 size_t size = 0;
3752 int prot = 0;
3753 unsigned long iova_pfn;
3754 int ret;
3755 struct scatterlist *sg;
3756 unsigned long start_vpfn;
3757 struct intel_iommu *iommu;
3758
3759 BUG_ON(dir == DMA_NONE);
3760 if (iommu_no_mapping(dev))
3761 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3762
3763 domain = get_valid_domain_for_dev(dev);
3764 if (!domain)
3765 return 0;
3766
3767 iommu = domain_get_iommu(domain);
3768
3769 for_each_sg(sglist, sg, nelems, i)
3770 size += aligned_nrpages(sg->offset, sg->length);
3771
3772 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3773 *dev->dma_mask);
3774 if (!iova_pfn) {
3775 sglist->dma_length = 0;
3776 return 0;
3777 }
3778
3779 /*
3780 * Check if DMAR supports zero-length reads on write only
3781 * mappings..
3782 */
3783 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3784 !cap_zlr(iommu->cap))
3785 prot |= DMA_PTE_READ;
3786 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3787 prot |= DMA_PTE_WRITE;
3788
3789 start_vpfn = mm_to_dma_pfn(iova_pfn);
3790
3791 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3792 if (unlikely(ret)) {
3793 dma_pte_free_pagetable(domain, start_vpfn,
3794 start_vpfn + size - 1,
3795 agaw_to_level(domain->agaw) + 1);
3796 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3797 return 0;
3798 }
3799
3800 return nelems;
3801 }
3802
3803 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3804 {
3805 return !dma_addr;
3806 }
3807
3808 const struct dma_map_ops intel_dma_ops = {
3809 .alloc = intel_alloc_coherent,
3810 .free = intel_free_coherent,
3811 .map_sg = intel_map_sg,
3812 .unmap_sg = intel_unmap_sg,
3813 .map_page = intel_map_page,
3814 .unmap_page = intel_unmap_page,
3815 .mapping_error = intel_mapping_error,
3816 #ifdef CONFIG_X86
3817 .dma_supported = dma_direct_supported,
3818 #endif
3819 };
3820
3821 static inline int iommu_domain_cache_init(void)
3822 {
3823 int ret = 0;
3824
3825 iommu_domain_cache = kmem_cache_create("iommu_domain",
3826 sizeof(struct dmar_domain),
3827 0,
3828 SLAB_HWCACHE_ALIGN,
3829
3830 NULL);
3831 if (!iommu_domain_cache) {
3832 pr_err("Couldn't create iommu_domain cache\n");
3833 ret = -ENOMEM;
3834 }
3835
3836 return ret;
3837 }
3838
3839 static inline int iommu_devinfo_cache_init(void)
3840 {
3841 int ret = 0;
3842
3843 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3844 sizeof(struct device_domain_info),
3845 0,
3846 SLAB_HWCACHE_ALIGN,
3847 NULL);
3848 if (!iommu_devinfo_cache) {
3849 pr_err("Couldn't create devinfo cache\n");
3850 ret = -ENOMEM;
3851 }
3852
3853 return ret;
3854 }
3855
3856 static int __init iommu_init_mempool(void)
3857 {
3858 int ret;
3859 ret = iova_cache_get();
3860 if (ret)
3861 return ret;
3862
3863 ret = iommu_domain_cache_init();
3864 if (ret)
3865 goto domain_error;
3866
3867 ret = iommu_devinfo_cache_init();
3868 if (!ret)
3869 return ret;
3870
3871 kmem_cache_destroy(iommu_domain_cache);
3872 domain_error:
3873 iova_cache_put();
3874
3875 return -ENOMEM;
3876 }
3877
3878 static void __init iommu_exit_mempool(void)
3879 {
3880 kmem_cache_destroy(iommu_devinfo_cache);
3881 kmem_cache_destroy(iommu_domain_cache);
3882 iova_cache_put();
3883 }
3884
3885 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3886 {
3887 struct dmar_drhd_unit *drhd;
3888 u32 vtbar;
3889 int rc;
3890
3891 /* We know that this device on this chipset has its own IOMMU.
3892 * If we find it under a different IOMMU, then the BIOS is lying
3893 * to us. Hope that the IOMMU for this device is actually
3894 * disabled, and it needs no translation...
3895 */
3896 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3897 if (rc) {
3898 /* "can't" happen */
3899 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3900 return;
3901 }
3902 vtbar &= 0xffff0000;
3903
3904 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3905 drhd = dmar_find_matched_drhd_unit(pdev);
3906 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3907 TAINT_FIRMWARE_WORKAROUND,
3908 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3909 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3910 }
3911 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3912
3913 static void __init init_no_remapping_devices(void)
3914 {
3915 struct dmar_drhd_unit *drhd;
3916 struct device *dev;
3917 int i;
3918
3919 for_each_drhd_unit(drhd) {
3920 if (!drhd->include_all) {
3921 for_each_active_dev_scope(drhd->devices,
3922 drhd->devices_cnt, i, dev)
3923 break;
3924 /* ignore DMAR unit if no devices exist */
3925 if (i == drhd->devices_cnt)
3926 drhd->ignored = 1;
3927 }
3928 }
3929
3930 for_each_active_drhd_unit(drhd) {
3931 if (drhd->include_all)
3932 continue;
3933
3934 for_each_active_dev_scope(drhd->devices,
3935 drhd->devices_cnt, i, dev)
3936 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3937 break;
3938 if (i < drhd->devices_cnt)
3939 continue;
3940
3941 /* This IOMMU has *only* gfx devices. Either bypass it or
3942 set the gfx_mapped flag, as appropriate */
3943 if (dmar_map_gfx) {
3944 intel_iommu_gfx_mapped = 1;
3945 } else {
3946 drhd->ignored = 1;
3947 for_each_active_dev_scope(drhd->devices,
3948 drhd->devices_cnt, i, dev)
3949 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3950 }
3951 }
3952 }
3953
3954 #ifdef CONFIG_SUSPEND
3955 static int init_iommu_hw(void)
3956 {
3957 struct dmar_drhd_unit *drhd;
3958 struct intel_iommu *iommu = NULL;
3959
3960 for_each_active_iommu(iommu, drhd)
3961 if (iommu->qi)
3962 dmar_reenable_qi(iommu);
3963
3964 for_each_iommu(iommu, drhd) {
3965 if (drhd->ignored) {
3966 /*
3967 * we always have to disable PMRs or DMA may fail on
3968 * this device
3969 */
3970 if (force_on)
3971 iommu_disable_protect_mem_regions(iommu);
3972 continue;
3973 }
3974
3975 iommu_flush_write_buffer(iommu);
3976
3977 iommu_set_root_entry(iommu);
3978
3979 iommu->flush.flush_context(iommu, 0, 0, 0,
3980 DMA_CCMD_GLOBAL_INVL);
3981 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3982 iommu_enable_translation(iommu);
3983 iommu_disable_protect_mem_regions(iommu);
3984 }
3985
3986 return 0;
3987 }
3988
3989 static void iommu_flush_all(void)
3990 {
3991 struct dmar_drhd_unit *drhd;
3992 struct intel_iommu *iommu;
3993
3994 for_each_active_iommu(iommu, drhd) {
3995 iommu->flush.flush_context(iommu, 0, 0, 0,
3996 DMA_CCMD_GLOBAL_INVL);
3997 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3998 DMA_TLB_GLOBAL_FLUSH);
3999 }
4000 }
4001
4002 static int iommu_suspend(void)
4003 {
4004 struct dmar_drhd_unit *drhd;
4005 struct intel_iommu *iommu = NULL;
4006 unsigned long flag;
4007
4008 for_each_active_iommu(iommu, drhd) {
4009 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4010 GFP_ATOMIC);
4011 if (!iommu->iommu_state)
4012 goto nomem;
4013 }
4014
4015 iommu_flush_all();
4016
4017 for_each_active_iommu(iommu, drhd) {
4018 iommu_disable_translation(iommu);
4019
4020 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4021
4022 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4023 readl(iommu->reg + DMAR_FECTL_REG);
4024 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4025 readl(iommu->reg + DMAR_FEDATA_REG);
4026 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4027 readl(iommu->reg + DMAR_FEADDR_REG);
4028 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4029 readl(iommu->reg + DMAR_FEUADDR_REG);
4030
4031 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4032 }
4033 return 0;
4034
4035 nomem:
4036 for_each_active_iommu(iommu, drhd)
4037 kfree(iommu->iommu_state);
4038
4039 return -ENOMEM;
4040 }
4041
4042 static void iommu_resume(void)
4043 {
4044 struct dmar_drhd_unit *drhd;
4045 struct intel_iommu *iommu = NULL;
4046 unsigned long flag;
4047
4048 if (init_iommu_hw()) {
4049 if (force_on)
4050 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4051 else
4052 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4053 return;
4054 }
4055
4056 for_each_active_iommu(iommu, drhd) {
4057
4058 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4059
4060 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4061 iommu->reg + DMAR_FECTL_REG);
4062 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4063 iommu->reg + DMAR_FEDATA_REG);
4064 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4065 iommu->reg + DMAR_FEADDR_REG);
4066 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4067 iommu->reg + DMAR_FEUADDR_REG);
4068
4069 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4070 }
4071
4072 for_each_active_iommu(iommu, drhd)
4073 kfree(iommu->iommu_state);
4074 }
4075
4076 static struct syscore_ops iommu_syscore_ops = {
4077 .resume = iommu_resume,
4078 .suspend = iommu_suspend,
4079 };
4080
4081 static void __init init_iommu_pm_ops(void)
4082 {
4083 register_syscore_ops(&iommu_syscore_ops);
4084 }
4085
4086 #else
4087 static inline void init_iommu_pm_ops(void) {}
4088 #endif /* CONFIG_PM */
4089
4090
4091 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4092 {
4093 struct acpi_dmar_reserved_memory *rmrr;
4094 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
4095 struct dmar_rmrr_unit *rmrru;
4096 size_t length;
4097
4098 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4099 if (!rmrru)
4100 goto out;
4101
4102 rmrru->hdr = header;
4103 rmrr = (struct acpi_dmar_reserved_memory *)header;
4104 rmrru->base_address = rmrr->base_address;
4105 rmrru->end_address = rmrr->end_address;
4106
4107 length = rmrr->end_address - rmrr->base_address + 1;
4108 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4109 IOMMU_RESV_DIRECT);
4110 if (!rmrru->resv)
4111 goto free_rmrru;
4112
4113 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4114 ((void *)rmrr) + rmrr->header.length,
4115 &rmrru->devices_cnt);
4116 if (rmrru->devices_cnt && rmrru->devices == NULL)
4117 goto free_all;
4118
4119 list_add(&rmrru->list, &dmar_rmrr_units);
4120
4121 return 0;
4122 free_all:
4123 kfree(rmrru->resv);
4124 free_rmrru:
4125 kfree(rmrru);
4126 out:
4127 return -ENOMEM;
4128 }
4129
4130 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4131 {
4132 struct dmar_atsr_unit *atsru;
4133 struct acpi_dmar_atsr *tmp;
4134
4135 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4136 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4137 if (atsr->segment != tmp->segment)
4138 continue;
4139 if (atsr->header.length != tmp->header.length)
4140 continue;
4141 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4142 return atsru;
4143 }
4144
4145 return NULL;
4146 }
4147
4148 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4149 {
4150 struct acpi_dmar_atsr *atsr;
4151 struct dmar_atsr_unit *atsru;
4152
4153 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4154 return 0;
4155
4156 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4157 atsru = dmar_find_atsr(atsr);
4158 if (atsru)
4159 return 0;
4160
4161 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4162 if (!atsru)
4163 return -ENOMEM;
4164
4165 /*
4166 * If memory is allocated from slab by ACPI _DSM method, we need to
4167 * copy the memory content because the memory buffer will be freed
4168 * on return.
4169 */
4170 atsru->hdr = (void *)(atsru + 1);
4171 memcpy(atsru->hdr, hdr, hdr->length);
4172 atsru->include_all = atsr->flags & 0x1;
4173 if (!atsru->include_all) {
4174 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4175 (void *)atsr + atsr->header.length,
4176 &atsru->devices_cnt);
4177 if (atsru->devices_cnt && atsru->devices == NULL) {
4178 kfree(atsru);
4179 return -ENOMEM;
4180 }
4181 }
4182
4183 list_add_rcu(&atsru->list, &dmar_atsr_units);
4184
4185 return 0;
4186 }
4187
4188 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4189 {
4190 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4191 kfree(atsru);
4192 }
4193
4194 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4195 {
4196 struct acpi_dmar_atsr *atsr;
4197 struct dmar_atsr_unit *atsru;
4198
4199 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4200 atsru = dmar_find_atsr(atsr);
4201 if (atsru) {
4202 list_del_rcu(&atsru->list);
4203 synchronize_rcu();
4204 intel_iommu_free_atsr(atsru);
4205 }
4206
4207 return 0;
4208 }
4209
4210 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4211 {
4212 int i;
4213 struct device *dev;
4214 struct acpi_dmar_atsr *atsr;
4215 struct dmar_atsr_unit *atsru;
4216
4217 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4218 atsru = dmar_find_atsr(atsr);
4219 if (!atsru)
4220 return 0;
4221
4222 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4223 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4224 i, dev)
4225 return -EBUSY;
4226 }
4227
4228 return 0;
4229 }
4230
4231 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4232 {
4233 int sp, ret = 0;
4234 struct intel_iommu *iommu = dmaru->iommu;
4235
4236 if (g_iommus[iommu->seq_id])
4237 return 0;
4238
4239 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4240 pr_warn("%s: Doesn't support hardware pass through.\n",
4241 iommu->name);
4242 return -ENXIO;
4243 }
4244 if (!ecap_sc_support(iommu->ecap) &&
4245 domain_update_iommu_snooping(iommu)) {
4246 pr_warn("%s: Doesn't support snooping.\n",
4247 iommu->name);
4248 return -ENXIO;
4249 }
4250 sp = domain_update_iommu_superpage(iommu) - 1;
4251 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4252 pr_warn("%s: Doesn't support large page.\n",
4253 iommu->name);
4254 return -ENXIO;
4255 }
4256
4257 /*
4258 * Disable translation if already enabled prior to OS handover.
4259 */
4260 if (iommu->gcmd & DMA_GCMD_TE)
4261 iommu_disable_translation(iommu);
4262
4263 g_iommus[iommu->seq_id] = iommu;
4264 ret = iommu_init_domains(iommu);
4265 if (ret == 0)
4266 ret = iommu_alloc_root_entry(iommu);
4267 if (ret)
4268 goto out;
4269
4270 #ifdef CONFIG_INTEL_IOMMU_SVM
4271 if (pasid_enabled(iommu))
4272 intel_svm_alloc_pasid_tables(iommu);
4273 #endif
4274
4275 if (dmaru->ignored) {
4276 /*
4277 * we always have to disable PMRs or DMA may fail on this device
4278 */
4279 if (force_on)
4280 iommu_disable_protect_mem_regions(iommu);
4281 return 0;
4282 }
4283
4284 intel_iommu_init_qi(iommu);
4285 iommu_flush_write_buffer(iommu);
4286
4287 #ifdef CONFIG_INTEL_IOMMU_SVM
4288 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4289 ret = intel_svm_enable_prq(iommu);
4290 if (ret)
4291 goto disable_iommu;
4292 }
4293 #endif
4294 ret = dmar_set_interrupt(iommu);
4295 if (ret)
4296 goto disable_iommu;
4297
4298 iommu_set_root_entry(iommu);
4299 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4300 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4301 iommu_enable_translation(iommu);
4302
4303 iommu_disable_protect_mem_regions(iommu);
4304 return 0;
4305
4306 disable_iommu:
4307 disable_dmar_iommu(iommu);
4308 out:
4309 free_dmar_iommu(iommu);
4310 return ret;
4311 }
4312
4313 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4314 {
4315 int ret = 0;
4316 struct intel_iommu *iommu = dmaru->iommu;
4317
4318 if (!intel_iommu_enabled)
4319 return 0;
4320 if (iommu == NULL)
4321 return -EINVAL;
4322
4323 if (insert) {
4324 ret = intel_iommu_add(dmaru);
4325 } else {
4326 disable_dmar_iommu(iommu);
4327 free_dmar_iommu(iommu);
4328 }
4329
4330 return ret;
4331 }
4332
4333 static void intel_iommu_free_dmars(void)
4334 {
4335 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4336 struct dmar_atsr_unit *atsru, *atsr_n;
4337
4338 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4339 list_del(&rmrru->list);
4340 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4341 kfree(rmrru->resv);
4342 kfree(rmrru);
4343 }
4344
4345 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4346 list_del(&atsru->list);
4347 intel_iommu_free_atsr(atsru);
4348 }
4349 }
4350
4351 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4352 {
4353 int i, ret = 1;
4354 struct pci_bus *bus;
4355 struct pci_dev *bridge = NULL;
4356 struct device *tmp;
4357 struct acpi_dmar_atsr *atsr;
4358 struct dmar_atsr_unit *atsru;
4359
4360 dev = pci_physfn(dev);
4361 for (bus = dev->bus; bus; bus = bus->parent) {
4362 bridge = bus->self;
4363 /* If it's an integrated device, allow ATS */
4364 if (!bridge)
4365 return 1;
4366 /* Connected via non-PCIe: no ATS */
4367 if (!pci_is_pcie(bridge) ||
4368 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4369 return 0;
4370 /* If we found the root port, look it up in the ATSR */
4371 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4372 break;
4373 }
4374
4375 rcu_read_lock();
4376 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4377 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4378 if (atsr->segment != pci_domain_nr(dev->bus))
4379 continue;
4380
4381 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4382 if (tmp == &bridge->dev)
4383 goto out;
4384
4385 if (atsru->include_all)
4386 goto out;
4387 }
4388 ret = 0;
4389 out:
4390 rcu_read_unlock();
4391
4392 return ret;
4393 }
4394
4395 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4396 {
4397 int ret = 0;
4398 struct dmar_rmrr_unit *rmrru;
4399 struct dmar_atsr_unit *atsru;
4400 struct acpi_dmar_atsr *atsr;
4401 struct acpi_dmar_reserved_memory *rmrr;
4402
4403 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4404 return 0;
4405
4406 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4407 rmrr = container_of(rmrru->hdr,
4408 struct acpi_dmar_reserved_memory, header);
4409 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4410 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4411 ((void *)rmrr) + rmrr->header.length,
4412 rmrr->segment, rmrru->devices,
4413 rmrru->devices_cnt);
4414 if(ret < 0)
4415 return ret;
4416 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4417 dmar_remove_dev_scope(info, rmrr->segment,
4418 rmrru->devices, rmrru->devices_cnt);
4419 }
4420 }
4421
4422 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4423 if (atsru->include_all)
4424 continue;
4425
4426 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4427 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4428 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4429 (void *)atsr + atsr->header.length,
4430 atsr->segment, atsru->devices,
4431 atsru->devices_cnt);
4432 if (ret > 0)
4433 break;
4434 else if(ret < 0)
4435 return ret;
4436 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4437 if (dmar_remove_dev_scope(info, atsr->segment,
4438 atsru->devices, atsru->devices_cnt))
4439 break;
4440 }
4441 }
4442
4443 return 0;
4444 }
4445
4446 /*
4447 * Here we only respond to action of unbound device from driver.
4448 *
4449 * Added device is not attached to its DMAR domain here yet. That will happen
4450 * when mapping the device to iova.
4451 */
4452 static int device_notifier(struct notifier_block *nb,
4453 unsigned long action, void *data)
4454 {
4455 struct device *dev = data;
4456 struct dmar_domain *domain;
4457
4458 if (iommu_dummy(dev))
4459 return 0;
4460
4461 if (action != BUS_NOTIFY_REMOVED_DEVICE)
4462 return 0;
4463
4464 domain = find_domain(dev);
4465 if (!domain)
4466 return 0;
4467
4468 dmar_remove_one_dev_info(domain, dev);
4469 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4470 domain_exit(domain);
4471
4472 return 0;
4473 }
4474
4475 static struct notifier_block device_nb = {
4476 .notifier_call = device_notifier,
4477 };
4478
4479 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4480 unsigned long val, void *v)
4481 {
4482 struct memory_notify *mhp = v;
4483 unsigned long long start, end;
4484 unsigned long start_vpfn, last_vpfn;
4485
4486 switch (val) {
4487 case MEM_GOING_ONLINE:
4488 start = mhp->start_pfn << PAGE_SHIFT;
4489 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4490 if (iommu_domain_identity_map(si_domain, start, end)) {
4491 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4492 start, end);
4493 return NOTIFY_BAD;
4494 }
4495 break;
4496
4497 case MEM_OFFLINE:
4498 case MEM_CANCEL_ONLINE:
4499 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4500 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4501 while (start_vpfn <= last_vpfn) {
4502 struct iova *iova;
4503 struct dmar_drhd_unit *drhd;
4504 struct intel_iommu *iommu;
4505 struct page *freelist;
4506
4507 iova = find_iova(&si_domain->iovad, start_vpfn);
4508 if (iova == NULL) {
4509 pr_debug("Failed get IOVA for PFN %lx\n",
4510 start_vpfn);
4511 break;
4512 }
4513
4514 iova = split_and_remove_iova(&si_domain->iovad, iova,
4515 start_vpfn, last_vpfn);
4516 if (iova == NULL) {
4517 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4518 start_vpfn, last_vpfn);
4519 return NOTIFY_BAD;
4520 }
4521
4522 freelist = domain_unmap(si_domain, iova->pfn_lo,
4523 iova->pfn_hi);
4524
4525 rcu_read_lock();
4526 for_each_active_iommu(iommu, drhd)
4527 iommu_flush_iotlb_psi(iommu, si_domain,
4528 iova->pfn_lo, iova_size(iova),
4529 !freelist, 0);
4530 rcu_read_unlock();
4531 dma_free_pagelist(freelist);
4532
4533 start_vpfn = iova->pfn_hi + 1;
4534 free_iova_mem(iova);
4535 }
4536 break;
4537 }
4538
4539 return NOTIFY_OK;
4540 }
4541
4542 static struct notifier_block intel_iommu_memory_nb = {
4543 .notifier_call = intel_iommu_memory_notifier,
4544 .priority = 0
4545 };
4546
4547 static void free_all_cpu_cached_iovas(unsigned int cpu)
4548 {
4549 int i;
4550
4551 for (i = 0; i < g_num_of_iommus; i++) {
4552 struct intel_iommu *iommu = g_iommus[i];
4553 struct dmar_domain *domain;
4554 int did;
4555
4556 if (!iommu)
4557 continue;
4558
4559 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4560 domain = get_iommu_domain(iommu, (u16)did);
4561
4562 if (!domain)
4563 continue;
4564 free_cpu_cached_iovas(cpu, &domain->iovad);
4565 }
4566 }
4567 }
4568
4569 static int intel_iommu_cpu_dead(unsigned int cpu)
4570 {
4571 free_all_cpu_cached_iovas(cpu);
4572 return 0;
4573 }
4574
4575 static void intel_disable_iommus(void)
4576 {
4577 struct intel_iommu *iommu = NULL;
4578 struct dmar_drhd_unit *drhd;
4579
4580 for_each_iommu(iommu, drhd)
4581 iommu_disable_translation(iommu);
4582 }
4583
4584 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4585 {
4586 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4587
4588 return container_of(iommu_dev, struct intel_iommu, iommu);
4589 }
4590
4591 static ssize_t intel_iommu_show_version(struct device *dev,
4592 struct device_attribute *attr,
4593 char *buf)
4594 {
4595 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4596 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4597 return sprintf(buf, "%d:%d\n",
4598 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4599 }
4600 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4601
4602 static ssize_t intel_iommu_show_address(struct device *dev,
4603 struct device_attribute *attr,
4604 char *buf)
4605 {
4606 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4607 return sprintf(buf, "%llx\n", iommu->reg_phys);
4608 }
4609 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4610
4611 static ssize_t intel_iommu_show_cap(struct device *dev,
4612 struct device_attribute *attr,
4613 char *buf)
4614 {
4615 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4616 return sprintf(buf, "%llx\n", iommu->cap);
4617 }
4618 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4619
4620 static ssize_t intel_iommu_show_ecap(struct device *dev,
4621 struct device_attribute *attr,
4622 char *buf)
4623 {
4624 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4625 return sprintf(buf, "%llx\n", iommu->ecap);
4626 }
4627 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4628
4629 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4630 struct device_attribute *attr,
4631 char *buf)
4632 {
4633 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4634 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4635 }
4636 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4637
4638 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4639 struct device_attribute *attr,
4640 char *buf)
4641 {
4642 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4643 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4644 cap_ndoms(iommu->cap)));
4645 }
4646 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4647
4648 static struct attribute *intel_iommu_attrs[] = {
4649 &dev_attr_version.attr,
4650 &dev_attr_address.attr,
4651 &dev_attr_cap.attr,
4652 &dev_attr_ecap.attr,
4653 &dev_attr_domains_supported.attr,
4654 &dev_attr_domains_used.attr,
4655 NULL,
4656 };
4657
4658 static struct attribute_group intel_iommu_group = {
4659 .name = "intel-iommu",
4660 .attrs = intel_iommu_attrs,
4661 };
4662
4663 const struct attribute_group *intel_iommu_groups[] = {
4664 &intel_iommu_group,
4665 NULL,
4666 };
4667
4668 int __init intel_iommu_init(void)
4669 {
4670 int ret = -ENODEV;
4671 struct dmar_drhd_unit *drhd;
4672 struct intel_iommu *iommu;
4673
4674 /* VT-d is required for a TXT/tboot launch, so enforce that */
4675 force_on = tboot_force_iommu();
4676
4677 if (iommu_init_mempool()) {
4678 if (force_on)
4679 panic("tboot: Failed to initialize iommu memory\n");
4680 return -ENOMEM;
4681 }
4682
4683 down_write(&dmar_global_lock);
4684 if (dmar_table_init()) {
4685 if (force_on)
4686 panic("tboot: Failed to initialize DMAR table\n");
4687 goto out_free_dmar;
4688 }
4689
4690 if (dmar_dev_scope_init() < 0) {
4691 if (force_on)
4692 panic("tboot: Failed to initialize DMAR device scope\n");
4693 goto out_free_dmar;
4694 }
4695
4696 up_write(&dmar_global_lock);
4697
4698 /*
4699 * The bus notifier takes the dmar_global_lock, so lockdep will
4700 * complain later when we register it under the lock.
4701 */
4702 dmar_register_bus_notifier();
4703
4704 down_write(&dmar_global_lock);
4705
4706 if (no_iommu || dmar_disabled) {
4707 /*
4708 * We exit the function here to ensure IOMMU's remapping and
4709 * mempool aren't setup, which means that the IOMMU's PMRs
4710 * won't be disabled via the call to init_dmars(). So disable
4711 * it explicitly here. The PMRs were setup by tboot prior to
4712 * calling SENTER, but the kernel is expected to reset/tear
4713 * down the PMRs.
4714 */
4715 if (intel_iommu_tboot_noforce) {
4716 for_each_iommu(iommu, drhd)
4717 iommu_disable_protect_mem_regions(iommu);
4718 }
4719
4720 /*
4721 * Make sure the IOMMUs are switched off, even when we
4722 * boot into a kexec kernel and the previous kernel left
4723 * them enabled
4724 */
4725 intel_disable_iommus();
4726 goto out_free_dmar;
4727 }
4728
4729 if (list_empty(&dmar_rmrr_units))
4730 pr_info("No RMRR found\n");
4731
4732 if (list_empty(&dmar_atsr_units))
4733 pr_info("No ATSR found\n");
4734
4735 if (dmar_init_reserved_ranges()) {
4736 if (force_on)
4737 panic("tboot: Failed to reserve iommu ranges\n");
4738 goto out_free_reserved_range;
4739 }
4740
4741 init_no_remapping_devices();
4742
4743 ret = init_dmars();
4744 if (ret) {
4745 if (force_on)
4746 panic("tboot: Failed to initialize DMARs\n");
4747 pr_err("Initialization failed\n");
4748 goto out_free_reserved_range;
4749 }
4750 up_write(&dmar_global_lock);
4751 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4752
4753 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4754 swiotlb = 0;
4755 #endif
4756 dma_ops = &intel_dma_ops;
4757
4758 init_iommu_pm_ops();
4759
4760 for_each_active_iommu(iommu, drhd) {
4761 iommu_device_sysfs_add(&iommu->iommu, NULL,
4762 intel_iommu_groups,
4763 "%s", iommu->name);
4764 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4765 iommu_device_register(&iommu->iommu);
4766 }
4767
4768 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4769 bus_register_notifier(&pci_bus_type, &device_nb);
4770 if (si_domain && !hw_pass_through)
4771 register_memory_notifier(&intel_iommu_memory_nb);
4772 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4773 intel_iommu_cpu_dead);
4774 intel_iommu_enabled = 1;
4775
4776 return 0;
4777
4778 out_free_reserved_range:
4779 put_iova_domain(&reserved_iova_list);
4780 out_free_dmar:
4781 intel_iommu_free_dmars();
4782 up_write(&dmar_global_lock);
4783 iommu_exit_mempool();
4784 return ret;
4785 }
4786
4787 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4788 {
4789 struct intel_iommu *iommu = opaque;
4790
4791 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4792 return 0;
4793 }
4794
4795 /*
4796 * NB - intel-iommu lacks any sort of reference counting for the users of
4797 * dependent devices. If multiple endpoints have intersecting dependent
4798 * devices, unbinding the driver from any one of them will possibly leave
4799 * the others unable to operate.
4800 */
4801 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4802 {
4803 if (!iommu || !dev || !dev_is_pci(dev))
4804 return;
4805
4806 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4807 }
4808
4809 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4810 {
4811 struct intel_iommu *iommu;
4812 unsigned long flags;
4813
4814 assert_spin_locked(&device_domain_lock);
4815
4816 if (WARN_ON(!info))
4817 return;
4818
4819 iommu = info->iommu;
4820
4821 if (info->dev) {
4822 iommu_disable_dev_iotlb(info);
4823 domain_context_clear(iommu, info->dev);
4824 }
4825
4826 unlink_domain_info(info);
4827
4828 spin_lock_irqsave(&iommu->lock, flags);
4829 domain_detach_iommu(info->domain, iommu);
4830 spin_unlock_irqrestore(&iommu->lock, flags);
4831
4832 free_devinfo_mem(info);
4833 }
4834
4835 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4836 struct device *dev)
4837 {
4838 struct device_domain_info *info;
4839 unsigned long flags;
4840
4841 spin_lock_irqsave(&device_domain_lock, flags);
4842 info = dev->archdata.iommu;
4843 __dmar_remove_one_dev_info(info);
4844 spin_unlock_irqrestore(&device_domain_lock, flags);
4845 }
4846
4847 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4848 {
4849 int adjust_width;
4850
4851 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
4852 domain_reserve_special_ranges(domain);
4853
4854 /* calculate AGAW */
4855 domain->gaw = guest_width;
4856 adjust_width = guestwidth_to_adjustwidth(guest_width);
4857 domain->agaw = width_to_agaw(adjust_width);
4858
4859 domain->iommu_coherency = 0;
4860 domain->iommu_snooping = 0;
4861 domain->iommu_superpage = 0;
4862 domain->max_addr = 0;
4863
4864 /* always allocate the top pgd */
4865 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4866 if (!domain->pgd)
4867 return -ENOMEM;
4868 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4869 return 0;
4870 }
4871
4872 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4873 {
4874 struct dmar_domain *dmar_domain;
4875 struct iommu_domain *domain;
4876
4877 if (type != IOMMU_DOMAIN_UNMANAGED)
4878 return NULL;
4879
4880 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4881 if (!dmar_domain) {
4882 pr_err("Can't allocate dmar_domain\n");
4883 return NULL;
4884 }
4885 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4886 pr_err("Domain initialization failed\n");
4887 domain_exit(dmar_domain);
4888 return NULL;
4889 }
4890 domain_update_iommu_cap(dmar_domain);
4891
4892 domain = &dmar_domain->domain;
4893 domain->geometry.aperture_start = 0;
4894 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4895 domain->geometry.force_aperture = true;
4896
4897 return domain;
4898 }
4899
4900 static void intel_iommu_domain_free(struct iommu_domain *domain)
4901 {
4902 domain_exit(to_dmar_domain(domain));
4903 }
4904
4905 static int intel_iommu_attach_device(struct iommu_domain *domain,
4906 struct device *dev)
4907 {
4908 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4909 struct intel_iommu *iommu;
4910 int addr_width;
4911 u8 bus, devfn;
4912
4913 if (device_is_rmrr_locked(dev)) {
4914 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4915 return -EPERM;
4916 }
4917
4918 /* normally dev is not mapped */
4919 if (unlikely(domain_context_mapped(dev))) {
4920 struct dmar_domain *old_domain;
4921
4922 old_domain = find_domain(dev);
4923 if (old_domain) {
4924 rcu_read_lock();
4925 dmar_remove_one_dev_info(old_domain, dev);
4926 rcu_read_unlock();
4927
4928 if (!domain_type_is_vm_or_si(old_domain) &&
4929 list_empty(&old_domain->devices))
4930 domain_exit(old_domain);
4931 }
4932 }
4933
4934 iommu = device_to_iommu(dev, &bus, &devfn);
4935 if (!iommu)
4936 return -ENODEV;
4937
4938 /* check if this iommu agaw is sufficient for max mapped address */
4939 addr_width = agaw_to_width(iommu->agaw);
4940 if (addr_width > cap_mgaw(iommu->cap))
4941 addr_width = cap_mgaw(iommu->cap);
4942
4943 if (dmar_domain->max_addr > (1LL << addr_width)) {
4944 pr_err("%s: iommu width (%d) is not "
4945 "sufficient for the mapped address (%llx)\n",
4946 __func__, addr_width, dmar_domain->max_addr);
4947 return -EFAULT;
4948 }
4949 dmar_domain->gaw = addr_width;
4950
4951 /*
4952 * Knock out extra levels of page tables if necessary
4953 */
4954 while (iommu->agaw < dmar_domain->agaw) {
4955 struct dma_pte *pte;
4956
4957 pte = dmar_domain->pgd;
4958 if (dma_pte_present(pte)) {
4959 dmar_domain->pgd = (struct dma_pte *)
4960 phys_to_virt(dma_pte_addr(pte));
4961 free_pgtable_page(pte);
4962 }
4963 dmar_domain->agaw--;
4964 }
4965
4966 return domain_add_dev_info(dmar_domain, dev);
4967 }
4968
4969 static void intel_iommu_detach_device(struct iommu_domain *domain,
4970 struct device *dev)
4971 {
4972 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4973 }
4974
4975 static int intel_iommu_map(struct iommu_domain *domain,
4976 unsigned long iova, phys_addr_t hpa,
4977 size_t size, int iommu_prot)
4978 {
4979 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4980 u64 max_addr;
4981 int prot = 0;
4982 int ret;
4983
4984 if (iommu_prot & IOMMU_READ)
4985 prot |= DMA_PTE_READ;
4986 if (iommu_prot & IOMMU_WRITE)
4987 prot |= DMA_PTE_WRITE;
4988 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4989 prot |= DMA_PTE_SNP;
4990
4991 max_addr = iova + size;
4992 if (dmar_domain->max_addr < max_addr) {
4993 u64 end;
4994
4995 /* check if minimum agaw is sufficient for mapped address */
4996 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4997 if (end < max_addr) {
4998 pr_err("%s: iommu width (%d) is not "
4999 "sufficient for the mapped address (%llx)\n",
5000 __func__, dmar_domain->gaw, max_addr);
5001 return -EFAULT;
5002 }
5003 dmar_domain->max_addr = max_addr;
5004 }
5005 /* Round up size to next multiple of PAGE_SIZE, if it and
5006 the low bits of hpa would take us onto the next page */
5007 size = aligned_nrpages(hpa, size);
5008 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5009 hpa >> VTD_PAGE_SHIFT, size, prot);
5010 return ret;
5011 }
5012
5013 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5014 unsigned long iova, size_t size)
5015 {
5016 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5017 struct page *freelist = NULL;
5018 unsigned long start_pfn, last_pfn;
5019 unsigned int npages;
5020 int iommu_id, level = 0;
5021
5022 /* Cope with horrid API which requires us to unmap more than the
5023 size argument if it happens to be a large-page mapping. */
5024 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5025
5026 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5027 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5028
5029 start_pfn = iova >> VTD_PAGE_SHIFT;
5030 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5031
5032 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5033
5034 npages = last_pfn - start_pfn + 1;
5035
5036 for_each_domain_iommu(iommu_id, dmar_domain)
5037 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5038 start_pfn, npages, !freelist, 0);
5039
5040 dma_free_pagelist(freelist);
5041
5042 if (dmar_domain->max_addr == iova + size)
5043 dmar_domain->max_addr = iova;
5044
5045 return size;
5046 }
5047
5048 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5049 dma_addr_t iova)
5050 {
5051 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5052 struct dma_pte *pte;
5053 int level = 0;
5054 u64 phys = 0;
5055
5056 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5057 if (pte)
5058 phys = dma_pte_addr(pte);
5059
5060 return phys;
5061 }
5062
5063 static bool intel_iommu_capable(enum iommu_cap cap)
5064 {
5065 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5066 return domain_update_iommu_snooping(NULL) == 1;
5067 if (cap == IOMMU_CAP_INTR_REMAP)
5068 return irq_remapping_enabled == 1;
5069
5070 return false;
5071 }
5072
5073 static int intel_iommu_add_device(struct device *dev)
5074 {
5075 struct intel_iommu *iommu;
5076 struct iommu_group *group;
5077 u8 bus, devfn;
5078
5079 iommu = device_to_iommu(dev, &bus, &devfn);
5080 if (!iommu)
5081 return -ENODEV;
5082
5083 iommu_device_link(&iommu->iommu, dev);
5084
5085 group = iommu_group_get_for_dev(dev);
5086
5087 if (IS_ERR(group))
5088 return PTR_ERR(group);
5089
5090 iommu_group_put(group);
5091 return 0;
5092 }
5093
5094 static void intel_iommu_remove_device(struct device *dev)
5095 {
5096 struct intel_iommu *iommu;
5097 u8 bus, devfn;
5098
5099 iommu = device_to_iommu(dev, &bus, &devfn);
5100 if (!iommu)
5101 return;
5102
5103 iommu_group_remove_device(dev);
5104
5105 iommu_device_unlink(&iommu->iommu, dev);
5106 }
5107
5108 static void intel_iommu_get_resv_regions(struct device *device,
5109 struct list_head *head)
5110 {
5111 struct iommu_resv_region *reg;
5112 struct dmar_rmrr_unit *rmrr;
5113 struct device *i_dev;
5114 int i;
5115
5116 rcu_read_lock();
5117 for_each_rmrr_units(rmrr) {
5118 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5119 i, i_dev) {
5120 if (i_dev != device)
5121 continue;
5122
5123 list_add_tail(&rmrr->resv->list, head);
5124 }
5125 }
5126 rcu_read_unlock();
5127
5128 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5129 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5130 0, IOMMU_RESV_MSI);
5131 if (!reg)
5132 return;
5133 list_add_tail(&reg->list, head);
5134 }
5135
5136 static void intel_iommu_put_resv_regions(struct device *dev,
5137 struct list_head *head)
5138 {
5139 struct iommu_resv_region *entry, *next;
5140
5141 list_for_each_entry_safe(entry, next, head, list) {
5142 if (entry->type == IOMMU_RESV_RESERVED)
5143 kfree(entry);
5144 }
5145 }
5146
5147 #ifdef CONFIG_INTEL_IOMMU_SVM
5148 #define MAX_NR_PASID_BITS (20)
5149 static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5150 {
5151 /*
5152 * Convert ecap_pss to extend context entry pts encoding, also
5153 * respect the soft pasid_max value set by the iommu.
5154 * - number of PASID bits = ecap_pss + 1
5155 * - number of PASID table entries = 2^(pts + 5)
5156 * Therefore, pts = ecap_pss - 4
5157 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5158 */
5159 if (ecap_pss(iommu->ecap) < 5)
5160 return 0;
5161
5162 /* pasid_max is encoded as actual number of entries not the bits */
5163 return find_first_bit((unsigned long *)&iommu->pasid_max,
5164 MAX_NR_PASID_BITS) - 5;
5165 }
5166
5167 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5168 {
5169 struct device_domain_info *info;
5170 struct context_entry *context;
5171 struct dmar_domain *domain;
5172 unsigned long flags;
5173 u64 ctx_lo;
5174 int ret;
5175
5176 domain = get_valid_domain_for_dev(sdev->dev);
5177 if (!domain)
5178 return -EINVAL;
5179
5180 spin_lock_irqsave(&device_domain_lock, flags);
5181 spin_lock(&iommu->lock);
5182
5183 ret = -EINVAL;
5184 info = sdev->dev->archdata.iommu;
5185 if (!info || !info->pasid_supported)
5186 goto out;
5187
5188 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5189 if (WARN_ON(!context))
5190 goto out;
5191
5192 ctx_lo = context[0].lo;
5193
5194 sdev->did = domain->iommu_did[iommu->seq_id];
5195 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5196
5197 if (!(ctx_lo & CONTEXT_PASIDE)) {
5198 if (iommu->pasid_state_table)
5199 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5200 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5201 intel_iommu_get_pts(iommu);
5202
5203 wmb();
5204 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5205 * extended to permit requests-with-PASID if the PASIDE bit
5206 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5207 * however, the PASIDE bit is ignored and requests-with-PASID
5208 * are unconditionally blocked. Which makes less sense.
5209 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5210 * "guest mode" translation types depending on whether ATS
5211 * is available or not. Annoyingly, we can't use the new
5212 * modes *unless* PASIDE is set. */
5213 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5214 ctx_lo &= ~CONTEXT_TT_MASK;
5215 if (info->ats_supported)
5216 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5217 else
5218 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5219 }
5220 ctx_lo |= CONTEXT_PASIDE;
5221 if (iommu->pasid_state_table)
5222 ctx_lo |= CONTEXT_DINVE;
5223 if (info->pri_supported)
5224 ctx_lo |= CONTEXT_PRS;
5225 context[0].lo = ctx_lo;
5226 wmb();
5227 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5228 DMA_CCMD_MASK_NOBIT,
5229 DMA_CCMD_DEVICE_INVL);
5230 }
5231
5232 /* Enable PASID support in the device, if it wasn't already */
5233 if (!info->pasid_enabled)
5234 iommu_enable_dev_iotlb(info);
5235
5236 if (info->ats_enabled) {
5237 sdev->dev_iotlb = 1;
5238 sdev->qdep = info->ats_qdep;
5239 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5240 sdev->qdep = 0;
5241 }
5242 ret = 0;
5243
5244 out:
5245 spin_unlock(&iommu->lock);
5246 spin_unlock_irqrestore(&device_domain_lock, flags);
5247
5248 return ret;
5249 }
5250
5251 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5252 {
5253 struct intel_iommu *iommu;
5254 u8 bus, devfn;
5255
5256 if (iommu_dummy(dev)) {
5257 dev_warn(dev,
5258 "No IOMMU translation for device; cannot enable SVM\n");
5259 return NULL;
5260 }
5261
5262 iommu = device_to_iommu(dev, &bus, &devfn);
5263 if ((!iommu)) {
5264 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5265 return NULL;
5266 }
5267
5268 if (!iommu->pasid_table) {
5269 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5270 return NULL;
5271 }
5272
5273 return iommu;
5274 }
5275 #endif /* CONFIG_INTEL_IOMMU_SVM */
5276
5277 const struct iommu_ops intel_iommu_ops = {
5278 .capable = intel_iommu_capable,
5279 .domain_alloc = intel_iommu_domain_alloc,
5280 .domain_free = intel_iommu_domain_free,
5281 .attach_dev = intel_iommu_attach_device,
5282 .detach_dev = intel_iommu_detach_device,
5283 .map = intel_iommu_map,
5284 .unmap = intel_iommu_unmap,
5285 .map_sg = default_iommu_map_sg,
5286 .iova_to_phys = intel_iommu_iova_to_phys,
5287 .add_device = intel_iommu_add_device,
5288 .remove_device = intel_iommu_remove_device,
5289 .get_resv_regions = intel_iommu_get_resv_regions,
5290 .put_resv_regions = intel_iommu_put_resv_regions,
5291 .device_group = pci_device_group,
5292 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5293 };
5294
5295 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5296 {
5297 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5298 pr_info("Disabling IOMMU for graphics on this chipset\n");
5299 dmar_map_gfx = 0;
5300 }
5301
5302 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5303 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5304 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5305 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5306 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5307 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5308 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5309
5310 static void quirk_iommu_rwbf(struct pci_dev *dev)
5311 {
5312 /*
5313 * Mobile 4 Series Chipset neglects to set RWBF capability,
5314 * but needs it. Same seems to hold for the desktop versions.
5315 */
5316 pr_info("Forcing write-buffer flush capability\n");
5317 rwbf_quirk = 1;
5318 }
5319
5320 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5321 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5322 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5323 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5324 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5325 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5326 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5327
5328 #define GGC 0x52
5329 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
5330 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5331 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
5332 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
5333 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5334 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5335 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5336 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5337
5338 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5339 {
5340 unsigned short ggc;
5341
5342 if (pci_read_config_word(dev, GGC, &ggc))
5343 return;
5344
5345 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5346 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5347 dmar_map_gfx = 0;
5348 } else if (dmar_map_gfx) {
5349 /* we have to ensure the gfx device is idle before we flush */
5350 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5351 intel_iommu_strict = 1;
5352 }
5353 }
5354 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5355 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5356 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5357 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5358
5359 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5360 ISOCH DMAR unit for the Azalia sound device, but not give it any
5361 TLB entries, which causes it to deadlock. Check for that. We do
5362 this in a function called from init_dmars(), instead of in a PCI
5363 quirk, because we don't want to print the obnoxious "BIOS broken"
5364 message if VT-d is actually disabled.
5365 */
5366 static void __init check_tylersburg_isoch(void)
5367 {
5368 struct pci_dev *pdev;
5369 uint32_t vtisochctrl;
5370
5371 /* If there's no Azalia in the system anyway, forget it. */
5372 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5373 if (!pdev)
5374 return;
5375 pci_dev_put(pdev);
5376
5377 /* System Management Registers. Might be hidden, in which case
5378 we can't do the sanity check. But that's OK, because the
5379 known-broken BIOSes _don't_ actually hide it, so far. */
5380 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5381 if (!pdev)
5382 return;
5383
5384 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5385 pci_dev_put(pdev);
5386 return;
5387 }
5388
5389 pci_dev_put(pdev);
5390
5391 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5392 if (vtisochctrl & 1)
5393 return;
5394
5395 /* Drop all bits other than the number of TLB entries */
5396 vtisochctrl &= 0x1c;
5397
5398 /* If we have the recommended number of TLB entries (16), fine. */
5399 if (vtisochctrl == 0x10)
5400 return;
5401
5402 /* Zero TLB entries? You get to ride the short bus to school. */
5403 if (!vtisochctrl) {
5404 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5405 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5406 dmi_get_system_info(DMI_BIOS_VENDOR),
5407 dmi_get_system_info(DMI_BIOS_VERSION),
5408 dmi_get_system_info(DMI_PRODUCT_VERSION));
5409 iommu_identity_mapping |= IDENTMAP_AZALIA;
5410 return;
5411 }
5412
5413 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5414 vtisochctrl);
5415 }