]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/iommu/intel-iommu.c
Merge branches 'iommu/fixes', 'arm/renesas', 'arm/mediatek', 'arm/tegra', 'arm/omap...
[mirror_ubuntu-jammy-kernel.git] / drivers / iommu / intel-iommu.c
1 /*
2 * Copyright © 2006-2014 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
18 * Joerg Roedel <jroedel@suse.de>
19 */
20
21 #define pr_fmt(fmt) "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/cpu.h>
37 #include <linux/timer.h>
38 #include <linux/io.h>
39 #include <linux/iova.h>
40 #include <linux/iommu.h>
41 #include <linux/intel-iommu.h>
42 #include <linux/syscore_ops.h>
43 #include <linux/tboot.h>
44 #include <linux/dmi.h>
45 #include <linux/pci-ats.h>
46 #include <linux/memblock.h>
47 #include <linux/dma-contiguous.h>
48 #include <linux/dma-direct.h>
49 #include <linux/crash_dump.h>
50 #include <asm/irq_remapping.h>
51 #include <asm/cacheflush.h>
52 #include <asm/iommu.h>
53
54 #include "irq_remapping.h"
55 #include "intel-pasid.h"
56
57 #define ROOT_SIZE VTD_PAGE_SIZE
58 #define CONTEXT_SIZE VTD_PAGE_SIZE
59
60 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
61 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
62 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
63 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
64
65 #define IOAPIC_RANGE_START (0xfee00000)
66 #define IOAPIC_RANGE_END (0xfeefffff)
67 #define IOVA_START_ADDR (0x1000)
68
69 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
70
71 #define MAX_AGAW_WIDTH 64
72 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
73
74 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
75 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
76
77 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
78 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
79 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
80 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
81 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
82
83 /* IO virtual address start page frame number */
84 #define IOVA_START_PFN (1)
85
86 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
87
88 /* page table handling */
89 #define LEVEL_STRIDE (9)
90 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
92 /*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
110 static inline int agaw_to_level(int agaw)
111 {
112 return agaw + 2;
113 }
114
115 static inline int agaw_to_width(int agaw)
116 {
117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
118 }
119
120 static inline int width_to_agaw(int width)
121 {
122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
123 }
124
125 static inline unsigned int level_to_offset_bits(int level)
126 {
127 return (level - 1) * LEVEL_STRIDE;
128 }
129
130 static inline int pfn_level_offset(unsigned long pfn, int level)
131 {
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133 }
134
135 static inline unsigned long level_mask(int level)
136 {
137 return -1UL << level_to_offset_bits(level);
138 }
139
140 static inline unsigned long level_size(int level)
141 {
142 return 1UL << level_to_offset_bits(level);
143 }
144
145 static inline unsigned long align_to_level(unsigned long pfn, int level)
146 {
147 return (pfn + level_size(level) - 1) & level_mask(level);
148 }
149
150 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151 {
152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
153 }
154
155 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158 {
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160 }
161
162 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163 {
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165 }
166 static inline unsigned long page_to_dma_pfn(struct page *pg)
167 {
168 return mm_to_dma_pfn(page_to_pfn(pg));
169 }
170 static inline unsigned long virt_to_dma_pfn(void *p)
171 {
172 return page_to_dma_pfn(virt_to_page(p));
173 }
174
175 /* global iommu list, set NULL for ignored DMAR units */
176 static struct intel_iommu **g_iommus;
177
178 static void __init check_tylersburg_isoch(void);
179 static int rwbf_quirk;
180
181 /*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185 static int force_on = 0;
186 int intel_iommu_tboot_noforce;
187
188 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
189
190 /*
191 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
192 * if marked present.
193 */
194 static phys_addr_t root_entry_lctp(struct root_entry *re)
195 {
196 if (!(re->lo & 1))
197 return 0;
198
199 return re->lo & VTD_PAGE_MASK;
200 }
201
202 /*
203 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
204 * if marked present.
205 */
206 static phys_addr_t root_entry_uctp(struct root_entry *re)
207 {
208 if (!(re->hi & 1))
209 return 0;
210
211 return re->hi & VTD_PAGE_MASK;
212 }
213
214 static inline void context_clear_pasid_enable(struct context_entry *context)
215 {
216 context->lo &= ~(1ULL << 11);
217 }
218
219 static inline bool context_pasid_enabled(struct context_entry *context)
220 {
221 return !!(context->lo & (1ULL << 11));
222 }
223
224 static inline void context_set_copied(struct context_entry *context)
225 {
226 context->hi |= (1ull << 3);
227 }
228
229 static inline bool context_copied(struct context_entry *context)
230 {
231 return !!(context->hi & (1ULL << 3));
232 }
233
234 static inline bool __context_present(struct context_entry *context)
235 {
236 return (context->lo & 1);
237 }
238
239 bool context_present(struct context_entry *context)
240 {
241 return context_pasid_enabled(context) ?
242 __context_present(context) :
243 __context_present(context) && !context_copied(context);
244 }
245
246 static inline void context_set_present(struct context_entry *context)
247 {
248 context->lo |= 1;
249 }
250
251 static inline void context_set_fault_enable(struct context_entry *context)
252 {
253 context->lo &= (((u64)-1) << 2) | 1;
254 }
255
256 static inline void context_set_translation_type(struct context_entry *context,
257 unsigned long value)
258 {
259 context->lo &= (((u64)-1) << 4) | 3;
260 context->lo |= (value & 3) << 2;
261 }
262
263 static inline void context_set_address_root(struct context_entry *context,
264 unsigned long value)
265 {
266 context->lo &= ~VTD_PAGE_MASK;
267 context->lo |= value & VTD_PAGE_MASK;
268 }
269
270 static inline void context_set_address_width(struct context_entry *context,
271 unsigned long value)
272 {
273 context->hi |= value & 7;
274 }
275
276 static inline void context_set_domain_id(struct context_entry *context,
277 unsigned long value)
278 {
279 context->hi |= (value & ((1 << 16) - 1)) << 8;
280 }
281
282 static inline int context_domain_id(struct context_entry *c)
283 {
284 return((c->hi >> 8) & 0xffff);
285 }
286
287 static inline void context_clear_entry(struct context_entry *context)
288 {
289 context->lo = 0;
290 context->hi = 0;
291 }
292
293 /*
294 * This domain is a statically identity mapping domain.
295 * 1. This domain creats a static 1:1 mapping to all usable memory.
296 * 2. It maps to each iommu if successful.
297 * 3. Each iommu mapps to this domain if successful.
298 */
299 static struct dmar_domain *si_domain;
300 static int hw_pass_through = 1;
301
302 /*
303 * Domain represents a virtual machine, more than one devices
304 * across iommus may be owned in one domain, e.g. kvm guest.
305 */
306 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
307
308 /* si_domain contains mulitple devices */
309 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
310
311 #define for_each_domain_iommu(idx, domain) \
312 for (idx = 0; idx < g_num_of_iommus; idx++) \
313 if (domain->iommu_refcnt[idx])
314
315 struct dmar_rmrr_unit {
316 struct list_head list; /* list of rmrr units */
317 struct acpi_dmar_header *hdr; /* ACPI header */
318 u64 base_address; /* reserved base address*/
319 u64 end_address; /* reserved end address */
320 struct dmar_dev_scope *devices; /* target devices */
321 int devices_cnt; /* target device count */
322 struct iommu_resv_region *resv; /* reserved region handle */
323 };
324
325 struct dmar_atsr_unit {
326 struct list_head list; /* list of ATSR units */
327 struct acpi_dmar_header *hdr; /* ACPI header */
328 struct dmar_dev_scope *devices; /* target devices */
329 int devices_cnt; /* target device count */
330 u8 include_all:1; /* include all ports */
331 };
332
333 static LIST_HEAD(dmar_atsr_units);
334 static LIST_HEAD(dmar_rmrr_units);
335
336 #define for_each_rmrr_units(rmrr) \
337 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
338
339 /* bitmap for indexing intel_iommus */
340 static int g_num_of_iommus;
341
342 static void domain_exit(struct dmar_domain *domain);
343 static void domain_remove_dev_info(struct dmar_domain *domain);
344 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
345 struct device *dev);
346 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
347 static void domain_context_clear(struct intel_iommu *iommu,
348 struct device *dev);
349 static int domain_detach_iommu(struct dmar_domain *domain,
350 struct intel_iommu *iommu);
351
352 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
353 int dmar_disabled = 0;
354 #else
355 int dmar_disabled = 1;
356 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
357
358 int intel_iommu_enabled = 0;
359 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
360
361 static int dmar_map_gfx = 1;
362 static int dmar_forcedac;
363 static int intel_iommu_strict;
364 static int intel_iommu_superpage = 1;
365 static int intel_iommu_sm = 1;
366 static int iommu_identity_mapping;
367
368 #define IDENTMAP_ALL 1
369 #define IDENTMAP_GFX 2
370 #define IDENTMAP_AZALIA 4
371
372 #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
373 #define pasid_supported(iommu) (sm_supported(iommu) && \
374 ecap_pasid((iommu)->ecap))
375
376 int intel_iommu_gfx_mapped;
377 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
378
379 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
380 static DEFINE_SPINLOCK(device_domain_lock);
381 static LIST_HEAD(device_domain_list);
382
383 /*
384 * Iterate over elements in device_domain_list and call the specified
385 * callback @fn against each element.
386 */
387 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
388 void *data), void *data)
389 {
390 int ret = 0;
391 unsigned long flags;
392 struct device_domain_info *info;
393
394 spin_lock_irqsave(&device_domain_lock, flags);
395 list_for_each_entry(info, &device_domain_list, global) {
396 ret = fn(info, data);
397 if (ret) {
398 spin_unlock_irqrestore(&device_domain_lock, flags);
399 return ret;
400 }
401 }
402 spin_unlock_irqrestore(&device_domain_lock, flags);
403
404 return 0;
405 }
406
407 const struct iommu_ops intel_iommu_ops;
408
409 static bool translation_pre_enabled(struct intel_iommu *iommu)
410 {
411 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
412 }
413
414 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
415 {
416 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
417 }
418
419 static void init_translation_status(struct intel_iommu *iommu)
420 {
421 u32 gsts;
422
423 gsts = readl(iommu->reg + DMAR_GSTS_REG);
424 if (gsts & DMA_GSTS_TES)
425 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
426 }
427
428 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
429 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
430 {
431 return container_of(dom, struct dmar_domain, domain);
432 }
433
434 static int __init intel_iommu_setup(char *str)
435 {
436 if (!str)
437 return -EINVAL;
438 while (*str) {
439 if (!strncmp(str, "on", 2)) {
440 dmar_disabled = 0;
441 pr_info("IOMMU enabled\n");
442 } else if (!strncmp(str, "off", 3)) {
443 dmar_disabled = 1;
444 pr_info("IOMMU disabled\n");
445 } else if (!strncmp(str, "igfx_off", 8)) {
446 dmar_map_gfx = 0;
447 pr_info("Disable GFX device mapping\n");
448 } else if (!strncmp(str, "forcedac", 8)) {
449 pr_info("Forcing DAC for PCI devices\n");
450 dmar_forcedac = 1;
451 } else if (!strncmp(str, "strict", 6)) {
452 pr_info("Disable batched IOTLB flush\n");
453 intel_iommu_strict = 1;
454 } else if (!strncmp(str, "sp_off", 6)) {
455 pr_info("Disable supported super page\n");
456 intel_iommu_superpage = 0;
457 } else if (!strncmp(str, "sm_off", 6)) {
458 pr_info("Intel-IOMMU: disable scalable mode support\n");
459 intel_iommu_sm = 0;
460 } else if (!strncmp(str, "tboot_noforce", 13)) {
461 printk(KERN_INFO
462 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
463 intel_iommu_tboot_noforce = 1;
464 }
465
466 str += strcspn(str, ",");
467 while (*str == ',')
468 str++;
469 }
470 return 0;
471 }
472 __setup("intel_iommu=", intel_iommu_setup);
473
474 static struct kmem_cache *iommu_domain_cache;
475 static struct kmem_cache *iommu_devinfo_cache;
476
477 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
478 {
479 struct dmar_domain **domains;
480 int idx = did >> 8;
481
482 domains = iommu->domains[idx];
483 if (!domains)
484 return NULL;
485
486 return domains[did & 0xff];
487 }
488
489 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
490 struct dmar_domain *domain)
491 {
492 struct dmar_domain **domains;
493 int idx = did >> 8;
494
495 if (!iommu->domains[idx]) {
496 size_t size = 256 * sizeof(struct dmar_domain *);
497 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
498 }
499
500 domains = iommu->domains[idx];
501 if (WARN_ON(!domains))
502 return;
503 else
504 domains[did & 0xff] = domain;
505 }
506
507 void *alloc_pgtable_page(int node)
508 {
509 struct page *page;
510 void *vaddr = NULL;
511
512 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
513 if (page)
514 vaddr = page_address(page);
515 return vaddr;
516 }
517
518 void free_pgtable_page(void *vaddr)
519 {
520 free_page((unsigned long)vaddr);
521 }
522
523 static inline void *alloc_domain_mem(void)
524 {
525 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
526 }
527
528 static void free_domain_mem(void *vaddr)
529 {
530 kmem_cache_free(iommu_domain_cache, vaddr);
531 }
532
533 static inline void * alloc_devinfo_mem(void)
534 {
535 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
536 }
537
538 static inline void free_devinfo_mem(void *vaddr)
539 {
540 kmem_cache_free(iommu_devinfo_cache, vaddr);
541 }
542
543 static inline int domain_type_is_vm(struct dmar_domain *domain)
544 {
545 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
546 }
547
548 static inline int domain_type_is_si(struct dmar_domain *domain)
549 {
550 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
551 }
552
553 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
554 {
555 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
556 DOMAIN_FLAG_STATIC_IDENTITY);
557 }
558
559 static inline int domain_pfn_supported(struct dmar_domain *domain,
560 unsigned long pfn)
561 {
562 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
563
564 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
565 }
566
567 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
568 {
569 unsigned long sagaw;
570 int agaw = -1;
571
572 sagaw = cap_sagaw(iommu->cap);
573 for (agaw = width_to_agaw(max_gaw);
574 agaw >= 0; agaw--) {
575 if (test_bit(agaw, &sagaw))
576 break;
577 }
578
579 return agaw;
580 }
581
582 /*
583 * Calculate max SAGAW for each iommu.
584 */
585 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
586 {
587 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
588 }
589
590 /*
591 * calculate agaw for each iommu.
592 * "SAGAW" may be different across iommus, use a default agaw, and
593 * get a supported less agaw for iommus that don't support the default agaw.
594 */
595 int iommu_calculate_agaw(struct intel_iommu *iommu)
596 {
597 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
598 }
599
600 /* This functionin only returns single iommu in a domain */
601 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
602 {
603 int iommu_id;
604
605 /* si_domain and vm domain should not get here. */
606 BUG_ON(domain_type_is_vm_or_si(domain));
607 for_each_domain_iommu(iommu_id, domain)
608 break;
609
610 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
611 return NULL;
612
613 return g_iommus[iommu_id];
614 }
615
616 static void domain_update_iommu_coherency(struct dmar_domain *domain)
617 {
618 struct dmar_drhd_unit *drhd;
619 struct intel_iommu *iommu;
620 bool found = false;
621 int i;
622
623 domain->iommu_coherency = 1;
624
625 for_each_domain_iommu(i, domain) {
626 found = true;
627 if (!ecap_coherent(g_iommus[i]->ecap)) {
628 domain->iommu_coherency = 0;
629 break;
630 }
631 }
632 if (found)
633 return;
634
635 /* No hardware attached; use lowest common denominator */
636 rcu_read_lock();
637 for_each_active_iommu(iommu, drhd) {
638 if (!ecap_coherent(iommu->ecap)) {
639 domain->iommu_coherency = 0;
640 break;
641 }
642 }
643 rcu_read_unlock();
644 }
645
646 static int domain_update_iommu_snooping(struct intel_iommu *skip)
647 {
648 struct dmar_drhd_unit *drhd;
649 struct intel_iommu *iommu;
650 int ret = 1;
651
652 rcu_read_lock();
653 for_each_active_iommu(iommu, drhd) {
654 if (iommu != skip) {
655 if (!ecap_sc_support(iommu->ecap)) {
656 ret = 0;
657 break;
658 }
659 }
660 }
661 rcu_read_unlock();
662
663 return ret;
664 }
665
666 static int domain_update_iommu_superpage(struct intel_iommu *skip)
667 {
668 struct dmar_drhd_unit *drhd;
669 struct intel_iommu *iommu;
670 int mask = 0xf;
671
672 if (!intel_iommu_superpage) {
673 return 0;
674 }
675
676 /* set iommu_superpage to the smallest common denominator */
677 rcu_read_lock();
678 for_each_active_iommu(iommu, drhd) {
679 if (iommu != skip) {
680 mask &= cap_super_page_val(iommu->cap);
681 if (!mask)
682 break;
683 }
684 }
685 rcu_read_unlock();
686
687 return fls(mask);
688 }
689
690 /* Some capabilities may be different across iommus */
691 static void domain_update_iommu_cap(struct dmar_domain *domain)
692 {
693 domain_update_iommu_coherency(domain);
694 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
695 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
696 }
697
698 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
699 u8 devfn, int alloc)
700 {
701 struct root_entry *root = &iommu->root_entry[bus];
702 struct context_entry *context;
703 u64 *entry;
704
705 entry = &root->lo;
706 if (sm_supported(iommu)) {
707 if (devfn >= 0x80) {
708 devfn -= 0x80;
709 entry = &root->hi;
710 }
711 devfn *= 2;
712 }
713 if (*entry & 1)
714 context = phys_to_virt(*entry & VTD_PAGE_MASK);
715 else {
716 unsigned long phy_addr;
717 if (!alloc)
718 return NULL;
719
720 context = alloc_pgtable_page(iommu->node);
721 if (!context)
722 return NULL;
723
724 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
725 phy_addr = virt_to_phys((void *)context);
726 *entry = phy_addr | 1;
727 __iommu_flush_cache(iommu, entry, sizeof(*entry));
728 }
729 return &context[devfn];
730 }
731
732 static int iommu_dummy(struct device *dev)
733 {
734 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
735 }
736
737 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
738 {
739 struct dmar_drhd_unit *drhd = NULL;
740 struct intel_iommu *iommu;
741 struct device *tmp;
742 struct pci_dev *ptmp, *pdev = NULL;
743 u16 segment = 0;
744 int i;
745
746 if (iommu_dummy(dev))
747 return NULL;
748
749 if (dev_is_pci(dev)) {
750 struct pci_dev *pf_pdev;
751
752 pdev = to_pci_dev(dev);
753
754 #ifdef CONFIG_X86
755 /* VMD child devices currently cannot be handled individually */
756 if (is_vmd(pdev->bus))
757 return NULL;
758 #endif
759
760 /* VFs aren't listed in scope tables; we need to look up
761 * the PF instead to find the IOMMU. */
762 pf_pdev = pci_physfn(pdev);
763 dev = &pf_pdev->dev;
764 segment = pci_domain_nr(pdev->bus);
765 } else if (has_acpi_companion(dev))
766 dev = &ACPI_COMPANION(dev)->dev;
767
768 rcu_read_lock();
769 for_each_active_iommu(iommu, drhd) {
770 if (pdev && segment != drhd->segment)
771 continue;
772
773 for_each_active_dev_scope(drhd->devices,
774 drhd->devices_cnt, i, tmp) {
775 if (tmp == dev) {
776 /* For a VF use its original BDF# not that of the PF
777 * which we used for the IOMMU lookup. Strictly speaking
778 * we could do this for all PCI devices; we only need to
779 * get the BDF# from the scope table for ACPI matches. */
780 if (pdev && pdev->is_virtfn)
781 goto got_pdev;
782
783 *bus = drhd->devices[i].bus;
784 *devfn = drhd->devices[i].devfn;
785 goto out;
786 }
787
788 if (!pdev || !dev_is_pci(tmp))
789 continue;
790
791 ptmp = to_pci_dev(tmp);
792 if (ptmp->subordinate &&
793 ptmp->subordinate->number <= pdev->bus->number &&
794 ptmp->subordinate->busn_res.end >= pdev->bus->number)
795 goto got_pdev;
796 }
797
798 if (pdev && drhd->include_all) {
799 got_pdev:
800 *bus = pdev->bus->number;
801 *devfn = pdev->devfn;
802 goto out;
803 }
804 }
805 iommu = NULL;
806 out:
807 rcu_read_unlock();
808
809 return iommu;
810 }
811
812 static void domain_flush_cache(struct dmar_domain *domain,
813 void *addr, int size)
814 {
815 if (!domain->iommu_coherency)
816 clflush_cache_range(addr, size);
817 }
818
819 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
820 {
821 struct context_entry *context;
822 int ret = 0;
823 unsigned long flags;
824
825 spin_lock_irqsave(&iommu->lock, flags);
826 context = iommu_context_addr(iommu, bus, devfn, 0);
827 if (context)
828 ret = context_present(context);
829 spin_unlock_irqrestore(&iommu->lock, flags);
830 return ret;
831 }
832
833 static void free_context_table(struct intel_iommu *iommu)
834 {
835 int i;
836 unsigned long flags;
837 struct context_entry *context;
838
839 spin_lock_irqsave(&iommu->lock, flags);
840 if (!iommu->root_entry) {
841 goto out;
842 }
843 for (i = 0; i < ROOT_ENTRY_NR; i++) {
844 context = iommu_context_addr(iommu, i, 0, 0);
845 if (context)
846 free_pgtable_page(context);
847
848 if (!sm_supported(iommu))
849 continue;
850
851 context = iommu_context_addr(iommu, i, 0x80, 0);
852 if (context)
853 free_pgtable_page(context);
854
855 }
856 free_pgtable_page(iommu->root_entry);
857 iommu->root_entry = NULL;
858 out:
859 spin_unlock_irqrestore(&iommu->lock, flags);
860 }
861
862 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
863 unsigned long pfn, int *target_level)
864 {
865 struct dma_pte *parent, *pte = NULL;
866 int level = agaw_to_level(domain->agaw);
867 int offset;
868
869 BUG_ON(!domain->pgd);
870
871 if (!domain_pfn_supported(domain, pfn))
872 /* Address beyond IOMMU's addressing capabilities. */
873 return NULL;
874
875 parent = domain->pgd;
876
877 while (1) {
878 void *tmp_page;
879
880 offset = pfn_level_offset(pfn, level);
881 pte = &parent[offset];
882 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
883 break;
884 if (level == *target_level)
885 break;
886
887 if (!dma_pte_present(pte)) {
888 uint64_t pteval;
889
890 tmp_page = alloc_pgtable_page(domain->nid);
891
892 if (!tmp_page)
893 return NULL;
894
895 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
896 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
897 if (cmpxchg64(&pte->val, 0ULL, pteval))
898 /* Someone else set it while we were thinking; use theirs. */
899 free_pgtable_page(tmp_page);
900 else
901 domain_flush_cache(domain, pte, sizeof(*pte));
902 }
903 if (level == 1)
904 break;
905
906 parent = phys_to_virt(dma_pte_addr(pte));
907 level--;
908 }
909
910 if (!*target_level)
911 *target_level = level;
912
913 return pte;
914 }
915
916
917 /* return address's pte at specific level */
918 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
919 unsigned long pfn,
920 int level, int *large_page)
921 {
922 struct dma_pte *parent, *pte = NULL;
923 int total = agaw_to_level(domain->agaw);
924 int offset;
925
926 parent = domain->pgd;
927 while (level <= total) {
928 offset = pfn_level_offset(pfn, total);
929 pte = &parent[offset];
930 if (level == total)
931 return pte;
932
933 if (!dma_pte_present(pte)) {
934 *large_page = total;
935 break;
936 }
937
938 if (dma_pte_superpage(pte)) {
939 *large_page = total;
940 return pte;
941 }
942
943 parent = phys_to_virt(dma_pte_addr(pte));
944 total--;
945 }
946 return NULL;
947 }
948
949 /* clear last level pte, a tlb flush should be followed */
950 static void dma_pte_clear_range(struct dmar_domain *domain,
951 unsigned long start_pfn,
952 unsigned long last_pfn)
953 {
954 unsigned int large_page = 1;
955 struct dma_pte *first_pte, *pte;
956
957 BUG_ON(!domain_pfn_supported(domain, start_pfn));
958 BUG_ON(!domain_pfn_supported(domain, last_pfn));
959 BUG_ON(start_pfn > last_pfn);
960
961 /* we don't need lock here; nobody else touches the iova range */
962 do {
963 large_page = 1;
964 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
965 if (!pte) {
966 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
967 continue;
968 }
969 do {
970 dma_clear_pte(pte);
971 start_pfn += lvl_to_nr_pages(large_page);
972 pte++;
973 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
974
975 domain_flush_cache(domain, first_pte,
976 (void *)pte - (void *)first_pte);
977
978 } while (start_pfn && start_pfn <= last_pfn);
979 }
980
981 static void dma_pte_free_level(struct dmar_domain *domain, int level,
982 int retain_level, struct dma_pte *pte,
983 unsigned long pfn, unsigned long start_pfn,
984 unsigned long last_pfn)
985 {
986 pfn = max(start_pfn, pfn);
987 pte = &pte[pfn_level_offset(pfn, level)];
988
989 do {
990 unsigned long level_pfn;
991 struct dma_pte *level_pte;
992
993 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
994 goto next;
995
996 level_pfn = pfn & level_mask(level);
997 level_pte = phys_to_virt(dma_pte_addr(pte));
998
999 if (level > 2) {
1000 dma_pte_free_level(domain, level - 1, retain_level,
1001 level_pte, level_pfn, start_pfn,
1002 last_pfn);
1003 }
1004
1005 /*
1006 * Free the page table if we're below the level we want to
1007 * retain and the range covers the entire table.
1008 */
1009 if (level < retain_level && !(start_pfn > level_pfn ||
1010 last_pfn < level_pfn + level_size(level) - 1)) {
1011 dma_clear_pte(pte);
1012 domain_flush_cache(domain, pte, sizeof(*pte));
1013 free_pgtable_page(level_pte);
1014 }
1015 next:
1016 pfn += level_size(level);
1017 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1018 }
1019
1020 /*
1021 * clear last level (leaf) ptes and free page table pages below the
1022 * level we wish to keep intact.
1023 */
1024 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1025 unsigned long start_pfn,
1026 unsigned long last_pfn,
1027 int retain_level)
1028 {
1029 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1030 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1031 BUG_ON(start_pfn > last_pfn);
1032
1033 dma_pte_clear_range(domain, start_pfn, last_pfn);
1034
1035 /* We don't need lock here; nobody else touches the iova range */
1036 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1037 domain->pgd, 0, start_pfn, last_pfn);
1038
1039 /* free pgd */
1040 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1041 free_pgtable_page(domain->pgd);
1042 domain->pgd = NULL;
1043 }
1044 }
1045
1046 /* When a page at a given level is being unlinked from its parent, we don't
1047 need to *modify* it at all. All we need to do is make a list of all the
1048 pages which can be freed just as soon as we've flushed the IOTLB and we
1049 know the hardware page-walk will no longer touch them.
1050 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1051 be freed. */
1052 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1053 int level, struct dma_pte *pte,
1054 struct page *freelist)
1055 {
1056 struct page *pg;
1057
1058 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1059 pg->freelist = freelist;
1060 freelist = pg;
1061
1062 if (level == 1)
1063 return freelist;
1064
1065 pte = page_address(pg);
1066 do {
1067 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1068 freelist = dma_pte_list_pagetables(domain, level - 1,
1069 pte, freelist);
1070 pte++;
1071 } while (!first_pte_in_page(pte));
1072
1073 return freelist;
1074 }
1075
1076 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1077 struct dma_pte *pte, unsigned long pfn,
1078 unsigned long start_pfn,
1079 unsigned long last_pfn,
1080 struct page *freelist)
1081 {
1082 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1083
1084 pfn = max(start_pfn, pfn);
1085 pte = &pte[pfn_level_offset(pfn, level)];
1086
1087 do {
1088 unsigned long level_pfn;
1089
1090 if (!dma_pte_present(pte))
1091 goto next;
1092
1093 level_pfn = pfn & level_mask(level);
1094
1095 /* If range covers entire pagetable, free it */
1096 if (start_pfn <= level_pfn &&
1097 last_pfn >= level_pfn + level_size(level) - 1) {
1098 /* These suborbinate page tables are going away entirely. Don't
1099 bother to clear them; we're just going to *free* them. */
1100 if (level > 1 && !dma_pte_superpage(pte))
1101 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1102
1103 dma_clear_pte(pte);
1104 if (!first_pte)
1105 first_pte = pte;
1106 last_pte = pte;
1107 } else if (level > 1) {
1108 /* Recurse down into a level that isn't *entirely* obsolete */
1109 freelist = dma_pte_clear_level(domain, level - 1,
1110 phys_to_virt(dma_pte_addr(pte)),
1111 level_pfn, start_pfn, last_pfn,
1112 freelist);
1113 }
1114 next:
1115 pfn += level_size(level);
1116 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1117
1118 if (first_pte)
1119 domain_flush_cache(domain, first_pte,
1120 (void *)++last_pte - (void *)first_pte);
1121
1122 return freelist;
1123 }
1124
1125 /* We can't just free the pages because the IOMMU may still be walking
1126 the page tables, and may have cached the intermediate levels. The
1127 pages can only be freed after the IOTLB flush has been done. */
1128 static struct page *domain_unmap(struct dmar_domain *domain,
1129 unsigned long start_pfn,
1130 unsigned long last_pfn)
1131 {
1132 struct page *freelist = NULL;
1133
1134 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1135 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1136 BUG_ON(start_pfn > last_pfn);
1137
1138 /* we don't need lock here; nobody else touches the iova range */
1139 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1140 domain->pgd, 0, start_pfn, last_pfn, NULL);
1141
1142 /* free pgd */
1143 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1144 struct page *pgd_page = virt_to_page(domain->pgd);
1145 pgd_page->freelist = freelist;
1146 freelist = pgd_page;
1147
1148 domain->pgd = NULL;
1149 }
1150
1151 return freelist;
1152 }
1153
1154 static void dma_free_pagelist(struct page *freelist)
1155 {
1156 struct page *pg;
1157
1158 while ((pg = freelist)) {
1159 freelist = pg->freelist;
1160 free_pgtable_page(page_address(pg));
1161 }
1162 }
1163
1164 static void iova_entry_free(unsigned long data)
1165 {
1166 struct page *freelist = (struct page *)data;
1167
1168 dma_free_pagelist(freelist);
1169 }
1170
1171 /* iommu handling */
1172 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1173 {
1174 struct root_entry *root;
1175 unsigned long flags;
1176
1177 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1178 if (!root) {
1179 pr_err("Allocating root entry for %s failed\n",
1180 iommu->name);
1181 return -ENOMEM;
1182 }
1183
1184 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1185
1186 spin_lock_irqsave(&iommu->lock, flags);
1187 iommu->root_entry = root;
1188 spin_unlock_irqrestore(&iommu->lock, flags);
1189
1190 return 0;
1191 }
1192
1193 static void iommu_set_root_entry(struct intel_iommu *iommu)
1194 {
1195 u64 addr;
1196 u32 sts;
1197 unsigned long flag;
1198
1199 addr = virt_to_phys(iommu->root_entry);
1200 if (sm_supported(iommu))
1201 addr |= DMA_RTADDR_SMT;
1202
1203 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1204 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1205
1206 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1207
1208 /* Make sure hardware complete it */
1209 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1210 readl, (sts & DMA_GSTS_RTPS), sts);
1211
1212 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1213 }
1214
1215 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1216 {
1217 u32 val;
1218 unsigned long flag;
1219
1220 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1221 return;
1222
1223 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1224 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1225
1226 /* Make sure hardware complete it */
1227 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1228 readl, (!(val & DMA_GSTS_WBFS)), val);
1229
1230 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1231 }
1232
1233 /* return value determine if we need a write buffer flush */
1234 static void __iommu_flush_context(struct intel_iommu *iommu,
1235 u16 did, u16 source_id, u8 function_mask,
1236 u64 type)
1237 {
1238 u64 val = 0;
1239 unsigned long flag;
1240
1241 switch (type) {
1242 case DMA_CCMD_GLOBAL_INVL:
1243 val = DMA_CCMD_GLOBAL_INVL;
1244 break;
1245 case DMA_CCMD_DOMAIN_INVL:
1246 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1247 break;
1248 case DMA_CCMD_DEVICE_INVL:
1249 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1250 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1251 break;
1252 default:
1253 BUG();
1254 }
1255 val |= DMA_CCMD_ICC;
1256
1257 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1258 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1259
1260 /* Make sure hardware complete it */
1261 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1262 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1263
1264 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1265 }
1266
1267 /* return value determine if we need a write buffer flush */
1268 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1269 u64 addr, unsigned int size_order, u64 type)
1270 {
1271 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1272 u64 val = 0, val_iva = 0;
1273 unsigned long flag;
1274
1275 switch (type) {
1276 case DMA_TLB_GLOBAL_FLUSH:
1277 /* global flush doesn't need set IVA_REG */
1278 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1279 break;
1280 case DMA_TLB_DSI_FLUSH:
1281 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1282 break;
1283 case DMA_TLB_PSI_FLUSH:
1284 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1285 /* IH bit is passed in as part of address */
1286 val_iva = size_order | addr;
1287 break;
1288 default:
1289 BUG();
1290 }
1291 /* Note: set drain read/write */
1292 #if 0
1293 /*
1294 * This is probably to be super secure.. Looks like we can
1295 * ignore it without any impact.
1296 */
1297 if (cap_read_drain(iommu->cap))
1298 val |= DMA_TLB_READ_DRAIN;
1299 #endif
1300 if (cap_write_drain(iommu->cap))
1301 val |= DMA_TLB_WRITE_DRAIN;
1302
1303 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1304 /* Note: Only uses first TLB reg currently */
1305 if (val_iva)
1306 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1307 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1308
1309 /* Make sure hardware complete it */
1310 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1311 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1312
1313 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1314
1315 /* check IOTLB invalidation granularity */
1316 if (DMA_TLB_IAIG(val) == 0)
1317 pr_err("Flush IOTLB failed\n");
1318 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1319 pr_debug("TLB flush request %Lx, actual %Lx\n",
1320 (unsigned long long)DMA_TLB_IIRG(type),
1321 (unsigned long long)DMA_TLB_IAIG(val));
1322 }
1323
1324 static struct device_domain_info *
1325 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1326 u8 bus, u8 devfn)
1327 {
1328 struct device_domain_info *info;
1329
1330 assert_spin_locked(&device_domain_lock);
1331
1332 if (!iommu->qi)
1333 return NULL;
1334
1335 list_for_each_entry(info, &domain->devices, link)
1336 if (info->iommu == iommu && info->bus == bus &&
1337 info->devfn == devfn) {
1338 if (info->ats_supported && info->dev)
1339 return info;
1340 break;
1341 }
1342
1343 return NULL;
1344 }
1345
1346 static void domain_update_iotlb(struct dmar_domain *domain)
1347 {
1348 struct device_domain_info *info;
1349 bool has_iotlb_device = false;
1350
1351 assert_spin_locked(&device_domain_lock);
1352
1353 list_for_each_entry(info, &domain->devices, link) {
1354 struct pci_dev *pdev;
1355
1356 if (!info->dev || !dev_is_pci(info->dev))
1357 continue;
1358
1359 pdev = to_pci_dev(info->dev);
1360 if (pdev->ats_enabled) {
1361 has_iotlb_device = true;
1362 break;
1363 }
1364 }
1365
1366 domain->has_iotlb_device = has_iotlb_device;
1367 }
1368
1369 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1370 {
1371 struct pci_dev *pdev;
1372
1373 assert_spin_locked(&device_domain_lock);
1374
1375 if (!info || !dev_is_pci(info->dev))
1376 return;
1377
1378 pdev = to_pci_dev(info->dev);
1379 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1380 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1381 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1382 * reserved, which should be set to 0.
1383 */
1384 if (!ecap_dit(info->iommu->ecap))
1385 info->pfsid = 0;
1386 else {
1387 struct pci_dev *pf_pdev;
1388
1389 /* pdev will be returned if device is not a vf */
1390 pf_pdev = pci_physfn(pdev);
1391 info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
1392 }
1393
1394 #ifdef CONFIG_INTEL_IOMMU_SVM
1395 /* The PCIe spec, in its wisdom, declares that the behaviour of
1396 the device if you enable PASID support after ATS support is
1397 undefined. So always enable PASID support on devices which
1398 have it, even if we can't yet know if we're ever going to
1399 use it. */
1400 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1401 info->pasid_enabled = 1;
1402
1403 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1404 info->pri_enabled = 1;
1405 #endif
1406 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1407 info->ats_enabled = 1;
1408 domain_update_iotlb(info->domain);
1409 info->ats_qdep = pci_ats_queue_depth(pdev);
1410 }
1411 }
1412
1413 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1414 {
1415 struct pci_dev *pdev;
1416
1417 assert_spin_locked(&device_domain_lock);
1418
1419 if (!dev_is_pci(info->dev))
1420 return;
1421
1422 pdev = to_pci_dev(info->dev);
1423
1424 if (info->ats_enabled) {
1425 pci_disable_ats(pdev);
1426 info->ats_enabled = 0;
1427 domain_update_iotlb(info->domain);
1428 }
1429 #ifdef CONFIG_INTEL_IOMMU_SVM
1430 if (info->pri_enabled) {
1431 pci_disable_pri(pdev);
1432 info->pri_enabled = 0;
1433 }
1434 if (info->pasid_enabled) {
1435 pci_disable_pasid(pdev);
1436 info->pasid_enabled = 0;
1437 }
1438 #endif
1439 }
1440
1441 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1442 u64 addr, unsigned mask)
1443 {
1444 u16 sid, qdep;
1445 unsigned long flags;
1446 struct device_domain_info *info;
1447
1448 if (!domain->has_iotlb_device)
1449 return;
1450
1451 spin_lock_irqsave(&device_domain_lock, flags);
1452 list_for_each_entry(info, &domain->devices, link) {
1453 if (!info->ats_enabled)
1454 continue;
1455
1456 sid = info->bus << 8 | info->devfn;
1457 qdep = info->ats_qdep;
1458 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1459 qdep, addr, mask);
1460 }
1461 spin_unlock_irqrestore(&device_domain_lock, flags);
1462 }
1463
1464 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1465 struct dmar_domain *domain,
1466 unsigned long pfn, unsigned int pages,
1467 int ih, int map)
1468 {
1469 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1470 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1471 u16 did = domain->iommu_did[iommu->seq_id];
1472
1473 BUG_ON(pages == 0);
1474
1475 if (ih)
1476 ih = 1 << 6;
1477 /*
1478 * Fallback to domain selective flush if no PSI support or the size is
1479 * too big.
1480 * PSI requires page size to be 2 ^ x, and the base address is naturally
1481 * aligned to the size
1482 */
1483 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1484 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1485 DMA_TLB_DSI_FLUSH);
1486 else
1487 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1488 DMA_TLB_PSI_FLUSH);
1489
1490 /*
1491 * In caching mode, changes of pages from non-present to present require
1492 * flush. However, device IOTLB doesn't need to be flushed in this case.
1493 */
1494 if (!cap_caching_mode(iommu->cap) || !map)
1495 iommu_flush_dev_iotlb(domain, addr, mask);
1496 }
1497
1498 /* Notification for newly created mappings */
1499 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1500 struct dmar_domain *domain,
1501 unsigned long pfn, unsigned int pages)
1502 {
1503 /* It's a non-present to present mapping. Only flush if caching mode */
1504 if (cap_caching_mode(iommu->cap))
1505 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1506 else
1507 iommu_flush_write_buffer(iommu);
1508 }
1509
1510 static void iommu_flush_iova(struct iova_domain *iovad)
1511 {
1512 struct dmar_domain *domain;
1513 int idx;
1514
1515 domain = container_of(iovad, struct dmar_domain, iovad);
1516
1517 for_each_domain_iommu(idx, domain) {
1518 struct intel_iommu *iommu = g_iommus[idx];
1519 u16 did = domain->iommu_did[iommu->seq_id];
1520
1521 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1522
1523 if (!cap_caching_mode(iommu->cap))
1524 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1525 0, MAX_AGAW_PFN_WIDTH);
1526 }
1527 }
1528
1529 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1530 {
1531 u32 pmen;
1532 unsigned long flags;
1533
1534 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1535 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1536 pmen &= ~DMA_PMEN_EPM;
1537 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1538
1539 /* wait for the protected region status bit to clear */
1540 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1541 readl, !(pmen & DMA_PMEN_PRS), pmen);
1542
1543 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1544 }
1545
1546 static void iommu_enable_translation(struct intel_iommu *iommu)
1547 {
1548 u32 sts;
1549 unsigned long flags;
1550
1551 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1552 iommu->gcmd |= DMA_GCMD_TE;
1553 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1554
1555 /* Make sure hardware complete it */
1556 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1557 readl, (sts & DMA_GSTS_TES), sts);
1558
1559 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1560 }
1561
1562 static void iommu_disable_translation(struct intel_iommu *iommu)
1563 {
1564 u32 sts;
1565 unsigned long flag;
1566
1567 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1568 iommu->gcmd &= ~DMA_GCMD_TE;
1569 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1570
1571 /* Make sure hardware complete it */
1572 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1573 readl, (!(sts & DMA_GSTS_TES)), sts);
1574
1575 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1576 }
1577
1578
1579 static int iommu_init_domains(struct intel_iommu *iommu)
1580 {
1581 u32 ndomains, nlongs;
1582 size_t size;
1583
1584 ndomains = cap_ndoms(iommu->cap);
1585 pr_debug("%s: Number of Domains supported <%d>\n",
1586 iommu->name, ndomains);
1587 nlongs = BITS_TO_LONGS(ndomains);
1588
1589 spin_lock_init(&iommu->lock);
1590
1591 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1592 if (!iommu->domain_ids) {
1593 pr_err("%s: Allocating domain id array failed\n",
1594 iommu->name);
1595 return -ENOMEM;
1596 }
1597
1598 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1599 iommu->domains = kzalloc(size, GFP_KERNEL);
1600
1601 if (iommu->domains) {
1602 size = 256 * sizeof(struct dmar_domain *);
1603 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1604 }
1605
1606 if (!iommu->domains || !iommu->domains[0]) {
1607 pr_err("%s: Allocating domain array failed\n",
1608 iommu->name);
1609 kfree(iommu->domain_ids);
1610 kfree(iommu->domains);
1611 iommu->domain_ids = NULL;
1612 iommu->domains = NULL;
1613 return -ENOMEM;
1614 }
1615
1616
1617
1618 /*
1619 * If Caching mode is set, then invalid translations are tagged
1620 * with domain-id 0, hence we need to pre-allocate it. We also
1621 * use domain-id 0 as a marker for non-allocated domain-id, so
1622 * make sure it is not used for a real domain.
1623 */
1624 set_bit(0, iommu->domain_ids);
1625
1626 /*
1627 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1628 * entry for first-level or pass-through translation modes should
1629 * be programmed with a domain id different from those used for
1630 * second-level or nested translation. We reserve a domain id for
1631 * this purpose.
1632 */
1633 if (sm_supported(iommu))
1634 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1635
1636 return 0;
1637 }
1638
1639 static void disable_dmar_iommu(struct intel_iommu *iommu)
1640 {
1641 struct device_domain_info *info, *tmp;
1642 unsigned long flags;
1643
1644 if (!iommu->domains || !iommu->domain_ids)
1645 return;
1646
1647 again:
1648 spin_lock_irqsave(&device_domain_lock, flags);
1649 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1650 struct dmar_domain *domain;
1651
1652 if (info->iommu != iommu)
1653 continue;
1654
1655 if (!info->dev || !info->domain)
1656 continue;
1657
1658 domain = info->domain;
1659
1660 __dmar_remove_one_dev_info(info);
1661
1662 if (!domain_type_is_vm_or_si(domain)) {
1663 /*
1664 * The domain_exit() function can't be called under
1665 * device_domain_lock, as it takes this lock itself.
1666 * So release the lock here and re-run the loop
1667 * afterwards.
1668 */
1669 spin_unlock_irqrestore(&device_domain_lock, flags);
1670 domain_exit(domain);
1671 goto again;
1672 }
1673 }
1674 spin_unlock_irqrestore(&device_domain_lock, flags);
1675
1676 if (iommu->gcmd & DMA_GCMD_TE)
1677 iommu_disable_translation(iommu);
1678 }
1679
1680 static void free_dmar_iommu(struct intel_iommu *iommu)
1681 {
1682 if ((iommu->domains) && (iommu->domain_ids)) {
1683 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1684 int i;
1685
1686 for (i = 0; i < elems; i++)
1687 kfree(iommu->domains[i]);
1688 kfree(iommu->domains);
1689 kfree(iommu->domain_ids);
1690 iommu->domains = NULL;
1691 iommu->domain_ids = NULL;
1692 }
1693
1694 g_iommus[iommu->seq_id] = NULL;
1695
1696 /* free context mapping */
1697 free_context_table(iommu);
1698
1699 #ifdef CONFIG_INTEL_IOMMU_SVM
1700 if (pasid_supported(iommu)) {
1701 if (ecap_prs(iommu->ecap))
1702 intel_svm_finish_prq(iommu);
1703 }
1704 #endif
1705 }
1706
1707 static struct dmar_domain *alloc_domain(int flags)
1708 {
1709 struct dmar_domain *domain;
1710
1711 domain = alloc_domain_mem();
1712 if (!domain)
1713 return NULL;
1714
1715 memset(domain, 0, sizeof(*domain));
1716 domain->nid = -1;
1717 domain->flags = flags;
1718 domain->has_iotlb_device = false;
1719 INIT_LIST_HEAD(&domain->devices);
1720
1721 return domain;
1722 }
1723
1724 /* Must be called with iommu->lock */
1725 static int domain_attach_iommu(struct dmar_domain *domain,
1726 struct intel_iommu *iommu)
1727 {
1728 unsigned long ndomains;
1729 int num;
1730
1731 assert_spin_locked(&device_domain_lock);
1732 assert_spin_locked(&iommu->lock);
1733
1734 domain->iommu_refcnt[iommu->seq_id] += 1;
1735 domain->iommu_count += 1;
1736 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1737 ndomains = cap_ndoms(iommu->cap);
1738 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1739
1740 if (num >= ndomains) {
1741 pr_err("%s: No free domain ids\n", iommu->name);
1742 domain->iommu_refcnt[iommu->seq_id] -= 1;
1743 domain->iommu_count -= 1;
1744 return -ENOSPC;
1745 }
1746
1747 set_bit(num, iommu->domain_ids);
1748 set_iommu_domain(iommu, num, domain);
1749
1750 domain->iommu_did[iommu->seq_id] = num;
1751 domain->nid = iommu->node;
1752
1753 domain_update_iommu_cap(domain);
1754 }
1755
1756 return 0;
1757 }
1758
1759 static int domain_detach_iommu(struct dmar_domain *domain,
1760 struct intel_iommu *iommu)
1761 {
1762 int num, count = INT_MAX;
1763
1764 assert_spin_locked(&device_domain_lock);
1765 assert_spin_locked(&iommu->lock);
1766
1767 domain->iommu_refcnt[iommu->seq_id] -= 1;
1768 count = --domain->iommu_count;
1769 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1770 num = domain->iommu_did[iommu->seq_id];
1771 clear_bit(num, iommu->domain_ids);
1772 set_iommu_domain(iommu, num, NULL);
1773
1774 domain_update_iommu_cap(domain);
1775 domain->iommu_did[iommu->seq_id] = 0;
1776 }
1777
1778 return count;
1779 }
1780
1781 static struct iova_domain reserved_iova_list;
1782 static struct lock_class_key reserved_rbtree_key;
1783
1784 static int dmar_init_reserved_ranges(void)
1785 {
1786 struct pci_dev *pdev = NULL;
1787 struct iova *iova;
1788 int i;
1789
1790 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1791
1792 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1793 &reserved_rbtree_key);
1794
1795 /* IOAPIC ranges shouldn't be accessed by DMA */
1796 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1797 IOVA_PFN(IOAPIC_RANGE_END));
1798 if (!iova) {
1799 pr_err("Reserve IOAPIC range failed\n");
1800 return -ENODEV;
1801 }
1802
1803 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1804 for_each_pci_dev(pdev) {
1805 struct resource *r;
1806
1807 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1808 r = &pdev->resource[i];
1809 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1810 continue;
1811 iova = reserve_iova(&reserved_iova_list,
1812 IOVA_PFN(r->start),
1813 IOVA_PFN(r->end));
1814 if (!iova) {
1815 pr_err("Reserve iova failed\n");
1816 return -ENODEV;
1817 }
1818 }
1819 }
1820 return 0;
1821 }
1822
1823 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1824 {
1825 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1826 }
1827
1828 static inline int guestwidth_to_adjustwidth(int gaw)
1829 {
1830 int agaw;
1831 int r = (gaw - 12) % 9;
1832
1833 if (r == 0)
1834 agaw = gaw;
1835 else
1836 agaw = gaw + 9 - r;
1837 if (agaw > 64)
1838 agaw = 64;
1839 return agaw;
1840 }
1841
1842 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1843 int guest_width)
1844 {
1845 int adjust_width, agaw;
1846 unsigned long sagaw;
1847 int err;
1848
1849 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1850
1851 err = init_iova_flush_queue(&domain->iovad,
1852 iommu_flush_iova, iova_entry_free);
1853 if (err)
1854 return err;
1855
1856 domain_reserve_special_ranges(domain);
1857
1858 /* calculate AGAW */
1859 if (guest_width > cap_mgaw(iommu->cap))
1860 guest_width = cap_mgaw(iommu->cap);
1861 domain->gaw = guest_width;
1862 adjust_width = guestwidth_to_adjustwidth(guest_width);
1863 agaw = width_to_agaw(adjust_width);
1864 sagaw = cap_sagaw(iommu->cap);
1865 if (!test_bit(agaw, &sagaw)) {
1866 /* hardware doesn't support it, choose a bigger one */
1867 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1868 agaw = find_next_bit(&sagaw, 5, agaw);
1869 if (agaw >= 5)
1870 return -ENODEV;
1871 }
1872 domain->agaw = agaw;
1873
1874 if (ecap_coherent(iommu->ecap))
1875 domain->iommu_coherency = 1;
1876 else
1877 domain->iommu_coherency = 0;
1878
1879 if (ecap_sc_support(iommu->ecap))
1880 domain->iommu_snooping = 1;
1881 else
1882 domain->iommu_snooping = 0;
1883
1884 if (intel_iommu_superpage)
1885 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1886 else
1887 domain->iommu_superpage = 0;
1888
1889 domain->nid = iommu->node;
1890
1891 /* always allocate the top pgd */
1892 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1893 if (!domain->pgd)
1894 return -ENOMEM;
1895 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1896 return 0;
1897 }
1898
1899 static void domain_exit(struct dmar_domain *domain)
1900 {
1901 struct page *freelist = NULL;
1902
1903 /* Domain 0 is reserved, so dont process it */
1904 if (!domain)
1905 return;
1906
1907 /* Remove associated devices and clear attached or cached domains */
1908 rcu_read_lock();
1909 domain_remove_dev_info(domain);
1910 rcu_read_unlock();
1911
1912 /* destroy iovas */
1913 put_iova_domain(&domain->iovad);
1914
1915 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1916
1917 dma_free_pagelist(freelist);
1918
1919 free_domain_mem(domain);
1920 }
1921
1922 /*
1923 * Get the PASID directory size for scalable mode context entry.
1924 * Value of X in the PDTS field of a scalable mode context entry
1925 * indicates PASID directory with 2^(X + 7) entries.
1926 */
1927 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1928 {
1929 int pds, max_pde;
1930
1931 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1932 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1933 if (pds < 7)
1934 return 0;
1935
1936 return pds - 7;
1937 }
1938
1939 /*
1940 * Set the RID_PASID field of a scalable mode context entry. The
1941 * IOMMU hardware will use the PASID value set in this field for
1942 * DMA translations of DMA requests without PASID.
1943 */
1944 static inline void
1945 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1946 {
1947 context->hi |= pasid & ((1 << 20) - 1);
1948 context->hi |= (1 << 20);
1949 }
1950
1951 /*
1952 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1953 * entry.
1954 */
1955 static inline void context_set_sm_dte(struct context_entry *context)
1956 {
1957 context->lo |= (1 << 2);
1958 }
1959
1960 /*
1961 * Set the PRE(Page Request Enable) field of a scalable mode context
1962 * entry.
1963 */
1964 static inline void context_set_sm_pre(struct context_entry *context)
1965 {
1966 context->lo |= (1 << 4);
1967 }
1968
1969 /* Convert value to context PASID directory size field coding. */
1970 #define context_pdts(pds) (((pds) & 0x7) << 9)
1971
1972 static int domain_context_mapping_one(struct dmar_domain *domain,
1973 struct intel_iommu *iommu,
1974 struct pasid_table *table,
1975 u8 bus, u8 devfn)
1976 {
1977 u16 did = domain->iommu_did[iommu->seq_id];
1978 int translation = CONTEXT_TT_MULTI_LEVEL;
1979 struct device_domain_info *info = NULL;
1980 struct context_entry *context;
1981 unsigned long flags;
1982 int ret;
1983
1984 WARN_ON(did == 0);
1985
1986 if (hw_pass_through && domain_type_is_si(domain))
1987 translation = CONTEXT_TT_PASS_THROUGH;
1988
1989 pr_debug("Set context mapping for %02x:%02x.%d\n",
1990 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1991
1992 BUG_ON(!domain->pgd);
1993
1994 spin_lock_irqsave(&device_domain_lock, flags);
1995 spin_lock(&iommu->lock);
1996
1997 ret = -ENOMEM;
1998 context = iommu_context_addr(iommu, bus, devfn, 1);
1999 if (!context)
2000 goto out_unlock;
2001
2002 ret = 0;
2003 if (context_present(context))
2004 goto out_unlock;
2005
2006 /*
2007 * For kdump cases, old valid entries may be cached due to the
2008 * in-flight DMA and copied pgtable, but there is no unmapping
2009 * behaviour for them, thus we need an explicit cache flush for
2010 * the newly-mapped device. For kdump, at this point, the device
2011 * is supposed to finish reset at its driver probe stage, so no
2012 * in-flight DMA will exist, and we don't need to worry anymore
2013 * hereafter.
2014 */
2015 if (context_copied(context)) {
2016 u16 did_old = context_domain_id(context);
2017
2018 if (did_old < cap_ndoms(iommu->cap)) {
2019 iommu->flush.flush_context(iommu, did_old,
2020 (((u16)bus) << 8) | devfn,
2021 DMA_CCMD_MASK_NOBIT,
2022 DMA_CCMD_DEVICE_INVL);
2023 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2024 DMA_TLB_DSI_FLUSH);
2025 }
2026 }
2027
2028 context_clear_entry(context);
2029
2030 if (sm_supported(iommu)) {
2031 unsigned long pds;
2032
2033 WARN_ON(!table);
2034
2035 /* Setup the PASID DIR pointer: */
2036 pds = context_get_sm_pds(table);
2037 context->lo = (u64)virt_to_phys(table->table) |
2038 context_pdts(pds);
2039
2040 /* Setup the RID_PASID field: */
2041 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2042
2043 /*
2044 * Setup the Device-TLB enable bit and Page request
2045 * Enable bit:
2046 */
2047 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2048 if (info && info->ats_supported)
2049 context_set_sm_dte(context);
2050 if (info && info->pri_supported)
2051 context_set_sm_pre(context);
2052 } else {
2053 struct dma_pte *pgd = domain->pgd;
2054 int agaw;
2055
2056 context_set_domain_id(context, did);
2057 context_set_translation_type(context, translation);
2058
2059 if (translation != CONTEXT_TT_PASS_THROUGH) {
2060 /*
2061 * Skip top levels of page tables for iommu which has
2062 * less agaw than default. Unnecessary for PT mode.
2063 */
2064 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2065 ret = -ENOMEM;
2066 pgd = phys_to_virt(dma_pte_addr(pgd));
2067 if (!dma_pte_present(pgd))
2068 goto out_unlock;
2069 }
2070
2071 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2072 if (info && info->ats_supported)
2073 translation = CONTEXT_TT_DEV_IOTLB;
2074 else
2075 translation = CONTEXT_TT_MULTI_LEVEL;
2076
2077 context_set_address_root(context, virt_to_phys(pgd));
2078 context_set_address_width(context, agaw);
2079 } else {
2080 /*
2081 * In pass through mode, AW must be programmed to
2082 * indicate the largest AGAW value supported by
2083 * hardware. And ASR is ignored by hardware.
2084 */
2085 context_set_address_width(context, iommu->msagaw);
2086 }
2087 }
2088
2089 context_set_fault_enable(context);
2090 context_set_present(context);
2091 domain_flush_cache(domain, context, sizeof(*context));
2092
2093 /*
2094 * It's a non-present to present mapping. If hardware doesn't cache
2095 * non-present entry we only need to flush the write-buffer. If the
2096 * _does_ cache non-present entries, then it does so in the special
2097 * domain #0, which we have to flush:
2098 */
2099 if (cap_caching_mode(iommu->cap)) {
2100 iommu->flush.flush_context(iommu, 0,
2101 (((u16)bus) << 8) | devfn,
2102 DMA_CCMD_MASK_NOBIT,
2103 DMA_CCMD_DEVICE_INVL);
2104 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2105 } else {
2106 iommu_flush_write_buffer(iommu);
2107 }
2108 iommu_enable_dev_iotlb(info);
2109
2110 ret = 0;
2111
2112 out_unlock:
2113 spin_unlock(&iommu->lock);
2114 spin_unlock_irqrestore(&device_domain_lock, flags);
2115
2116 return ret;
2117 }
2118
2119 struct domain_context_mapping_data {
2120 struct dmar_domain *domain;
2121 struct intel_iommu *iommu;
2122 struct pasid_table *table;
2123 };
2124
2125 static int domain_context_mapping_cb(struct pci_dev *pdev,
2126 u16 alias, void *opaque)
2127 {
2128 struct domain_context_mapping_data *data = opaque;
2129
2130 return domain_context_mapping_one(data->domain, data->iommu,
2131 data->table, PCI_BUS_NUM(alias),
2132 alias & 0xff);
2133 }
2134
2135 static int
2136 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2137 {
2138 struct domain_context_mapping_data data;
2139 struct pasid_table *table;
2140 struct intel_iommu *iommu;
2141 u8 bus, devfn;
2142
2143 iommu = device_to_iommu(dev, &bus, &devfn);
2144 if (!iommu)
2145 return -ENODEV;
2146
2147 table = intel_pasid_get_table(dev);
2148
2149 if (!dev_is_pci(dev))
2150 return domain_context_mapping_one(domain, iommu, table,
2151 bus, devfn);
2152
2153 data.domain = domain;
2154 data.iommu = iommu;
2155 data.table = table;
2156
2157 return pci_for_each_dma_alias(to_pci_dev(dev),
2158 &domain_context_mapping_cb, &data);
2159 }
2160
2161 static int domain_context_mapped_cb(struct pci_dev *pdev,
2162 u16 alias, void *opaque)
2163 {
2164 struct intel_iommu *iommu = opaque;
2165
2166 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2167 }
2168
2169 static int domain_context_mapped(struct device *dev)
2170 {
2171 struct intel_iommu *iommu;
2172 u8 bus, devfn;
2173
2174 iommu = device_to_iommu(dev, &bus, &devfn);
2175 if (!iommu)
2176 return -ENODEV;
2177
2178 if (!dev_is_pci(dev))
2179 return device_context_mapped(iommu, bus, devfn);
2180
2181 return !pci_for_each_dma_alias(to_pci_dev(dev),
2182 domain_context_mapped_cb, iommu);
2183 }
2184
2185 /* Returns a number of VTD pages, but aligned to MM page size */
2186 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2187 size_t size)
2188 {
2189 host_addr &= ~PAGE_MASK;
2190 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2191 }
2192
2193 /* Return largest possible superpage level for a given mapping */
2194 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2195 unsigned long iov_pfn,
2196 unsigned long phy_pfn,
2197 unsigned long pages)
2198 {
2199 int support, level = 1;
2200 unsigned long pfnmerge;
2201
2202 support = domain->iommu_superpage;
2203
2204 /* To use a large page, the virtual *and* physical addresses
2205 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2206 of them will mean we have to use smaller pages. So just
2207 merge them and check both at once. */
2208 pfnmerge = iov_pfn | phy_pfn;
2209
2210 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2211 pages >>= VTD_STRIDE_SHIFT;
2212 if (!pages)
2213 break;
2214 pfnmerge >>= VTD_STRIDE_SHIFT;
2215 level++;
2216 support--;
2217 }
2218 return level;
2219 }
2220
2221 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2222 struct scatterlist *sg, unsigned long phys_pfn,
2223 unsigned long nr_pages, int prot)
2224 {
2225 struct dma_pte *first_pte = NULL, *pte = NULL;
2226 phys_addr_t uninitialized_var(pteval);
2227 unsigned long sg_res = 0;
2228 unsigned int largepage_lvl = 0;
2229 unsigned long lvl_pages = 0;
2230
2231 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2232
2233 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2234 return -EINVAL;
2235
2236 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2237
2238 if (!sg) {
2239 sg_res = nr_pages;
2240 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2241 }
2242
2243 while (nr_pages > 0) {
2244 uint64_t tmp;
2245
2246 if (!sg_res) {
2247 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2248
2249 sg_res = aligned_nrpages(sg->offset, sg->length);
2250 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2251 sg->dma_length = sg->length;
2252 pteval = (sg_phys(sg) - pgoff) | prot;
2253 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2254 }
2255
2256 if (!pte) {
2257 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2258
2259 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2260 if (!pte)
2261 return -ENOMEM;
2262 /* It is large page*/
2263 if (largepage_lvl > 1) {
2264 unsigned long nr_superpages, end_pfn;
2265
2266 pteval |= DMA_PTE_LARGE_PAGE;
2267 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2268
2269 nr_superpages = sg_res / lvl_pages;
2270 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2271
2272 /*
2273 * Ensure that old small page tables are
2274 * removed to make room for superpage(s).
2275 * We're adding new large pages, so make sure
2276 * we don't remove their parent tables.
2277 */
2278 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2279 largepage_lvl + 1);
2280 } else {
2281 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2282 }
2283
2284 }
2285 /* We don't need lock here, nobody else
2286 * touches the iova range
2287 */
2288 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2289 if (tmp) {
2290 static int dumps = 5;
2291 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2292 iov_pfn, tmp, (unsigned long long)pteval);
2293 if (dumps) {
2294 dumps--;
2295 debug_dma_dump_mappings(NULL);
2296 }
2297 WARN_ON(1);
2298 }
2299
2300 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2301
2302 BUG_ON(nr_pages < lvl_pages);
2303 BUG_ON(sg_res < lvl_pages);
2304
2305 nr_pages -= lvl_pages;
2306 iov_pfn += lvl_pages;
2307 phys_pfn += lvl_pages;
2308 pteval += lvl_pages * VTD_PAGE_SIZE;
2309 sg_res -= lvl_pages;
2310
2311 /* If the next PTE would be the first in a new page, then we
2312 need to flush the cache on the entries we've just written.
2313 And then we'll need to recalculate 'pte', so clear it and
2314 let it get set again in the if (!pte) block above.
2315
2316 If we're done (!nr_pages) we need to flush the cache too.
2317
2318 Also if we've been setting superpages, we may need to
2319 recalculate 'pte' and switch back to smaller pages for the
2320 end of the mapping, if the trailing size is not enough to
2321 use another superpage (i.e. sg_res < lvl_pages). */
2322 pte++;
2323 if (!nr_pages || first_pte_in_page(pte) ||
2324 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2325 domain_flush_cache(domain, first_pte,
2326 (void *)pte - (void *)first_pte);
2327 pte = NULL;
2328 }
2329
2330 if (!sg_res && nr_pages)
2331 sg = sg_next(sg);
2332 }
2333 return 0;
2334 }
2335
2336 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2337 struct scatterlist *sg, unsigned long phys_pfn,
2338 unsigned long nr_pages, int prot)
2339 {
2340 int ret;
2341 struct intel_iommu *iommu;
2342
2343 /* Do the real mapping first */
2344 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2345 if (ret)
2346 return ret;
2347
2348 /* Notify about the new mapping */
2349 if (domain_type_is_vm(domain)) {
2350 /* VM typed domains can have more than one IOMMUs */
2351 int iommu_id;
2352 for_each_domain_iommu(iommu_id, domain) {
2353 iommu = g_iommus[iommu_id];
2354 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2355 }
2356 } else {
2357 /* General domains only have one IOMMU */
2358 iommu = domain_get_iommu(domain);
2359 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2360 }
2361
2362 return 0;
2363 }
2364
2365 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366 struct scatterlist *sg, unsigned long nr_pages,
2367 int prot)
2368 {
2369 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2370 }
2371
2372 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2373 unsigned long phys_pfn, unsigned long nr_pages,
2374 int prot)
2375 {
2376 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2377 }
2378
2379 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2380 {
2381 unsigned long flags;
2382 struct context_entry *context;
2383 u16 did_old;
2384
2385 if (!iommu)
2386 return;
2387
2388 spin_lock_irqsave(&iommu->lock, flags);
2389 context = iommu_context_addr(iommu, bus, devfn, 0);
2390 if (!context) {
2391 spin_unlock_irqrestore(&iommu->lock, flags);
2392 return;
2393 }
2394 did_old = context_domain_id(context);
2395 context_clear_entry(context);
2396 __iommu_flush_cache(iommu, context, sizeof(*context));
2397 spin_unlock_irqrestore(&iommu->lock, flags);
2398 iommu->flush.flush_context(iommu,
2399 did_old,
2400 (((u16)bus) << 8) | devfn,
2401 DMA_CCMD_MASK_NOBIT,
2402 DMA_CCMD_DEVICE_INVL);
2403 iommu->flush.flush_iotlb(iommu,
2404 did_old,
2405 0,
2406 0,
2407 DMA_TLB_DSI_FLUSH);
2408 }
2409
2410 static inline void unlink_domain_info(struct device_domain_info *info)
2411 {
2412 assert_spin_locked(&device_domain_lock);
2413 list_del(&info->link);
2414 list_del(&info->global);
2415 if (info->dev)
2416 info->dev->archdata.iommu = NULL;
2417 }
2418
2419 static void domain_remove_dev_info(struct dmar_domain *domain)
2420 {
2421 struct device_domain_info *info, *tmp;
2422 unsigned long flags;
2423
2424 spin_lock_irqsave(&device_domain_lock, flags);
2425 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2426 __dmar_remove_one_dev_info(info);
2427 spin_unlock_irqrestore(&device_domain_lock, flags);
2428 }
2429
2430 /*
2431 * find_domain
2432 * Note: we use struct device->archdata.iommu stores the info
2433 */
2434 static struct dmar_domain *find_domain(struct device *dev)
2435 {
2436 struct device_domain_info *info;
2437
2438 /* No lock here, assumes no domain exit in normal case */
2439 info = dev->archdata.iommu;
2440 if (likely(info))
2441 return info->domain;
2442 return NULL;
2443 }
2444
2445 static inline struct device_domain_info *
2446 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2447 {
2448 struct device_domain_info *info;
2449
2450 list_for_each_entry(info, &device_domain_list, global)
2451 if (info->iommu->segment == segment && info->bus == bus &&
2452 info->devfn == devfn)
2453 return info;
2454
2455 return NULL;
2456 }
2457
2458 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2459 int bus, int devfn,
2460 struct device *dev,
2461 struct dmar_domain *domain)
2462 {
2463 struct dmar_domain *found = NULL;
2464 struct device_domain_info *info;
2465 unsigned long flags;
2466 int ret;
2467
2468 info = alloc_devinfo_mem();
2469 if (!info)
2470 return NULL;
2471
2472 info->bus = bus;
2473 info->devfn = devfn;
2474 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2475 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2476 info->ats_qdep = 0;
2477 info->dev = dev;
2478 info->domain = domain;
2479 info->iommu = iommu;
2480 info->pasid_table = NULL;
2481
2482 if (dev && dev_is_pci(dev)) {
2483 struct pci_dev *pdev = to_pci_dev(info->dev);
2484
2485 if (!pci_ats_disabled() &&
2486 ecap_dev_iotlb_support(iommu->ecap) &&
2487 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2488 dmar_find_matched_atsr_unit(pdev))
2489 info->ats_supported = 1;
2490
2491 if (sm_supported(iommu)) {
2492 if (pasid_supported(iommu)) {
2493 int features = pci_pasid_features(pdev);
2494 if (features >= 0)
2495 info->pasid_supported = features | 1;
2496 }
2497
2498 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2499 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2500 info->pri_supported = 1;
2501 }
2502 }
2503
2504 spin_lock_irqsave(&device_domain_lock, flags);
2505 if (dev)
2506 found = find_domain(dev);
2507
2508 if (!found) {
2509 struct device_domain_info *info2;
2510 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2511 if (info2) {
2512 found = info2->domain;
2513 info2->dev = dev;
2514 }
2515 }
2516
2517 if (found) {
2518 spin_unlock_irqrestore(&device_domain_lock, flags);
2519 free_devinfo_mem(info);
2520 /* Caller must free the original domain */
2521 return found;
2522 }
2523
2524 spin_lock(&iommu->lock);
2525 ret = domain_attach_iommu(domain, iommu);
2526 spin_unlock(&iommu->lock);
2527
2528 if (ret) {
2529 spin_unlock_irqrestore(&device_domain_lock, flags);
2530 free_devinfo_mem(info);
2531 return NULL;
2532 }
2533
2534 list_add(&info->link, &domain->devices);
2535 list_add(&info->global, &device_domain_list);
2536 if (dev)
2537 dev->archdata.iommu = info;
2538 spin_unlock_irqrestore(&device_domain_lock, flags);
2539
2540 /* PASID table is mandatory for a PCI device in scalable mode. */
2541 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2542 ret = intel_pasid_alloc_table(dev);
2543 if (ret) {
2544 pr_err("PASID table allocation for %s failed\n",
2545 dev_name(dev));
2546 dmar_remove_one_dev_info(domain, dev);
2547 return NULL;
2548 }
2549
2550 /* Setup the PASID entry for requests without PASID: */
2551 spin_lock(&iommu->lock);
2552 if (hw_pass_through && domain_type_is_si(domain))
2553 ret = intel_pasid_setup_pass_through(iommu, domain,
2554 dev, PASID_RID2PASID);
2555 else
2556 ret = intel_pasid_setup_second_level(iommu, domain,
2557 dev, PASID_RID2PASID);
2558 spin_unlock(&iommu->lock);
2559 if (ret) {
2560 pr_err("Setup RID2PASID for %s failed\n",
2561 dev_name(dev));
2562 dmar_remove_one_dev_info(domain, dev);
2563 return NULL;
2564 }
2565 }
2566
2567 if (dev && domain_context_mapping(domain, dev)) {
2568 pr_err("Domain context map for %s failed\n", dev_name(dev));
2569 dmar_remove_one_dev_info(domain, dev);
2570 return NULL;
2571 }
2572
2573 return domain;
2574 }
2575
2576 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2577 {
2578 *(u16 *)opaque = alias;
2579 return 0;
2580 }
2581
2582 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2583 {
2584 struct device_domain_info *info = NULL;
2585 struct dmar_domain *domain = NULL;
2586 struct intel_iommu *iommu;
2587 u16 dma_alias;
2588 unsigned long flags;
2589 u8 bus, devfn;
2590
2591 iommu = device_to_iommu(dev, &bus, &devfn);
2592 if (!iommu)
2593 return NULL;
2594
2595 if (dev_is_pci(dev)) {
2596 struct pci_dev *pdev = to_pci_dev(dev);
2597
2598 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2599
2600 spin_lock_irqsave(&device_domain_lock, flags);
2601 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2602 PCI_BUS_NUM(dma_alias),
2603 dma_alias & 0xff);
2604 if (info) {
2605 iommu = info->iommu;
2606 domain = info->domain;
2607 }
2608 spin_unlock_irqrestore(&device_domain_lock, flags);
2609
2610 /* DMA alias already has a domain, use it */
2611 if (info)
2612 goto out;
2613 }
2614
2615 /* Allocate and initialize new domain for the device */
2616 domain = alloc_domain(0);
2617 if (!domain)
2618 return NULL;
2619 if (domain_init(domain, iommu, gaw)) {
2620 domain_exit(domain);
2621 return NULL;
2622 }
2623
2624 out:
2625
2626 return domain;
2627 }
2628
2629 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2630 struct dmar_domain *domain)
2631 {
2632 struct intel_iommu *iommu;
2633 struct dmar_domain *tmp;
2634 u16 req_id, dma_alias;
2635 u8 bus, devfn;
2636
2637 iommu = device_to_iommu(dev, &bus, &devfn);
2638 if (!iommu)
2639 return NULL;
2640
2641 req_id = ((u16)bus << 8) | devfn;
2642
2643 if (dev_is_pci(dev)) {
2644 struct pci_dev *pdev = to_pci_dev(dev);
2645
2646 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2647
2648 /* register PCI DMA alias device */
2649 if (req_id != dma_alias) {
2650 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2651 dma_alias & 0xff, NULL, domain);
2652
2653 if (!tmp || tmp != domain)
2654 return tmp;
2655 }
2656 }
2657
2658 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2659 if (!tmp || tmp != domain)
2660 return tmp;
2661
2662 return domain;
2663 }
2664
2665 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2666 {
2667 struct dmar_domain *domain, *tmp;
2668
2669 domain = find_domain(dev);
2670 if (domain)
2671 goto out;
2672
2673 domain = find_or_alloc_domain(dev, gaw);
2674 if (!domain)
2675 goto out;
2676
2677 tmp = set_domain_for_dev(dev, domain);
2678 if (!tmp || domain != tmp) {
2679 domain_exit(domain);
2680 domain = tmp;
2681 }
2682
2683 out:
2684
2685 return domain;
2686 }
2687
2688 static int iommu_domain_identity_map(struct dmar_domain *domain,
2689 unsigned long long start,
2690 unsigned long long end)
2691 {
2692 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2693 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2694
2695 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2696 dma_to_mm_pfn(last_vpfn))) {
2697 pr_err("Reserving iova failed\n");
2698 return -ENOMEM;
2699 }
2700
2701 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2702 /*
2703 * RMRR range might have overlap with physical memory range,
2704 * clear it first
2705 */
2706 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2707
2708 return __domain_mapping(domain, first_vpfn, NULL,
2709 first_vpfn, last_vpfn - first_vpfn + 1,
2710 DMA_PTE_READ|DMA_PTE_WRITE);
2711 }
2712
2713 static int domain_prepare_identity_map(struct device *dev,
2714 struct dmar_domain *domain,
2715 unsigned long long start,
2716 unsigned long long end)
2717 {
2718 /* For _hardware_ passthrough, don't bother. But for software
2719 passthrough, we do it anyway -- it may indicate a memory
2720 range which is reserved in E820, so which didn't get set
2721 up to start with in si_domain */
2722 if (domain == si_domain && hw_pass_through) {
2723 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2724 dev_name(dev), start, end);
2725 return 0;
2726 }
2727
2728 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2729 dev_name(dev), start, end);
2730
2731 if (end < start) {
2732 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2733 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2734 dmi_get_system_info(DMI_BIOS_VENDOR),
2735 dmi_get_system_info(DMI_BIOS_VERSION),
2736 dmi_get_system_info(DMI_PRODUCT_VERSION));
2737 return -EIO;
2738 }
2739
2740 if (end >> agaw_to_width(domain->agaw)) {
2741 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2742 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2743 agaw_to_width(domain->agaw),
2744 dmi_get_system_info(DMI_BIOS_VENDOR),
2745 dmi_get_system_info(DMI_BIOS_VERSION),
2746 dmi_get_system_info(DMI_PRODUCT_VERSION));
2747 return -EIO;
2748 }
2749
2750 return iommu_domain_identity_map(domain, start, end);
2751 }
2752
2753 static int iommu_prepare_identity_map(struct device *dev,
2754 unsigned long long start,
2755 unsigned long long end)
2756 {
2757 struct dmar_domain *domain;
2758 int ret;
2759
2760 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2761 if (!domain)
2762 return -ENOMEM;
2763
2764 ret = domain_prepare_identity_map(dev, domain, start, end);
2765 if (ret)
2766 domain_exit(domain);
2767
2768 return ret;
2769 }
2770
2771 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2772 struct device *dev)
2773 {
2774 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2775 return 0;
2776 return iommu_prepare_identity_map(dev, rmrr->base_address,
2777 rmrr->end_address);
2778 }
2779
2780 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2781 static inline void iommu_prepare_isa(void)
2782 {
2783 struct pci_dev *pdev;
2784 int ret;
2785
2786 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2787 if (!pdev)
2788 return;
2789
2790 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2791 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2792
2793 if (ret)
2794 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2795
2796 pci_dev_put(pdev);
2797 }
2798 #else
2799 static inline void iommu_prepare_isa(void)
2800 {
2801 return;
2802 }
2803 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2804
2805 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2806
2807 static int __init si_domain_init(int hw)
2808 {
2809 int nid, ret = 0;
2810
2811 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2812 if (!si_domain)
2813 return -EFAULT;
2814
2815 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2816 domain_exit(si_domain);
2817 return -EFAULT;
2818 }
2819
2820 pr_debug("Identity mapping domain allocated\n");
2821
2822 if (hw)
2823 return 0;
2824
2825 for_each_online_node(nid) {
2826 unsigned long start_pfn, end_pfn;
2827 int i;
2828
2829 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2830 ret = iommu_domain_identity_map(si_domain,
2831 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2832 if (ret)
2833 return ret;
2834 }
2835 }
2836
2837 return 0;
2838 }
2839
2840 static int identity_mapping(struct device *dev)
2841 {
2842 struct device_domain_info *info;
2843
2844 if (likely(!iommu_identity_mapping))
2845 return 0;
2846
2847 info = dev->archdata.iommu;
2848 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2849 return (info->domain == si_domain);
2850
2851 return 0;
2852 }
2853
2854 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2855 {
2856 struct dmar_domain *ndomain;
2857 struct intel_iommu *iommu;
2858 u8 bus, devfn;
2859
2860 iommu = device_to_iommu(dev, &bus, &devfn);
2861 if (!iommu)
2862 return -ENODEV;
2863
2864 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2865 if (ndomain != domain)
2866 return -EBUSY;
2867
2868 return 0;
2869 }
2870
2871 static bool device_has_rmrr(struct device *dev)
2872 {
2873 struct dmar_rmrr_unit *rmrr;
2874 struct device *tmp;
2875 int i;
2876
2877 rcu_read_lock();
2878 for_each_rmrr_units(rmrr) {
2879 /*
2880 * Return TRUE if this RMRR contains the device that
2881 * is passed in.
2882 */
2883 for_each_active_dev_scope(rmrr->devices,
2884 rmrr->devices_cnt, i, tmp)
2885 if (tmp == dev) {
2886 rcu_read_unlock();
2887 return true;
2888 }
2889 }
2890 rcu_read_unlock();
2891 return false;
2892 }
2893
2894 /*
2895 * There are a couple cases where we need to restrict the functionality of
2896 * devices associated with RMRRs. The first is when evaluating a device for
2897 * identity mapping because problems exist when devices are moved in and out
2898 * of domains and their respective RMRR information is lost. This means that
2899 * a device with associated RMRRs will never be in a "passthrough" domain.
2900 * The second is use of the device through the IOMMU API. This interface
2901 * expects to have full control of the IOVA space for the device. We cannot
2902 * satisfy both the requirement that RMRR access is maintained and have an
2903 * unencumbered IOVA space. We also have no ability to quiesce the device's
2904 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2905 * We therefore prevent devices associated with an RMRR from participating in
2906 * the IOMMU API, which eliminates them from device assignment.
2907 *
2908 * In both cases we assume that PCI USB devices with RMRRs have them largely
2909 * for historical reasons and that the RMRR space is not actively used post
2910 * boot. This exclusion may change if vendors begin to abuse it.
2911 *
2912 * The same exception is made for graphics devices, with the requirement that
2913 * any use of the RMRR regions will be torn down before assigning the device
2914 * to a guest.
2915 */
2916 static bool device_is_rmrr_locked(struct device *dev)
2917 {
2918 if (!device_has_rmrr(dev))
2919 return false;
2920
2921 if (dev_is_pci(dev)) {
2922 struct pci_dev *pdev = to_pci_dev(dev);
2923
2924 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2925 return false;
2926 }
2927
2928 return true;
2929 }
2930
2931 static int iommu_should_identity_map(struct device *dev, int startup)
2932 {
2933
2934 if (dev_is_pci(dev)) {
2935 struct pci_dev *pdev = to_pci_dev(dev);
2936
2937 if (device_is_rmrr_locked(dev))
2938 return 0;
2939
2940 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2941 return 1;
2942
2943 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2944 return 1;
2945
2946 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2947 return 0;
2948
2949 /*
2950 * We want to start off with all devices in the 1:1 domain, and
2951 * take them out later if we find they can't access all of memory.
2952 *
2953 * However, we can't do this for PCI devices behind bridges,
2954 * because all PCI devices behind the same bridge will end up
2955 * with the same source-id on their transactions.
2956 *
2957 * Practically speaking, we can't change things around for these
2958 * devices at run-time, because we can't be sure there'll be no
2959 * DMA transactions in flight for any of their siblings.
2960 *
2961 * So PCI devices (unless they're on the root bus) as well as
2962 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2963 * the 1:1 domain, just in _case_ one of their siblings turns out
2964 * not to be able to map all of memory.
2965 */
2966 if (!pci_is_pcie(pdev)) {
2967 if (!pci_is_root_bus(pdev->bus))
2968 return 0;
2969 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2970 return 0;
2971 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2972 return 0;
2973 } else {
2974 if (device_has_rmrr(dev))
2975 return 0;
2976 }
2977
2978 /*
2979 * At boot time, we don't yet know if devices will be 64-bit capable.
2980 * Assume that they will — if they turn out not to be, then we can
2981 * take them out of the 1:1 domain later.
2982 */
2983 if (!startup) {
2984 /*
2985 * If the device's dma_mask is less than the system's memory
2986 * size then this is not a candidate for identity mapping.
2987 */
2988 u64 dma_mask = *dev->dma_mask;
2989
2990 if (dev->coherent_dma_mask &&
2991 dev->coherent_dma_mask < dma_mask)
2992 dma_mask = dev->coherent_dma_mask;
2993
2994 return dma_mask >= dma_get_required_mask(dev);
2995 }
2996
2997 return 1;
2998 }
2999
3000 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
3001 {
3002 int ret;
3003
3004 if (!iommu_should_identity_map(dev, 1))
3005 return 0;
3006
3007 ret = domain_add_dev_info(si_domain, dev);
3008 if (!ret)
3009 pr_info("%s identity mapping for device %s\n",
3010 hw ? "Hardware" : "Software", dev_name(dev));
3011 else if (ret == -ENODEV)
3012 /* device not associated with an iommu */
3013 ret = 0;
3014
3015 return ret;
3016 }
3017
3018
3019 static int __init iommu_prepare_static_identity_mapping(int hw)
3020 {
3021 struct pci_dev *pdev = NULL;
3022 struct dmar_drhd_unit *drhd;
3023 struct intel_iommu *iommu;
3024 struct device *dev;
3025 int i;
3026 int ret = 0;
3027
3028 for_each_pci_dev(pdev) {
3029 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3030 if (ret)
3031 return ret;
3032 }
3033
3034 for_each_active_iommu(iommu, drhd)
3035 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3036 struct acpi_device_physical_node *pn;
3037 struct acpi_device *adev;
3038
3039 if (dev->bus != &acpi_bus_type)
3040 continue;
3041
3042 adev= to_acpi_device(dev);
3043 mutex_lock(&adev->physical_node_lock);
3044 list_for_each_entry(pn, &adev->physical_node_list, node) {
3045 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3046 if (ret)
3047 break;
3048 }
3049 mutex_unlock(&adev->physical_node_lock);
3050 if (ret)
3051 return ret;
3052 }
3053
3054 return 0;
3055 }
3056
3057 static void intel_iommu_init_qi(struct intel_iommu *iommu)
3058 {
3059 /*
3060 * Start from the sane iommu hardware state.
3061 * If the queued invalidation is already initialized by us
3062 * (for example, while enabling interrupt-remapping) then
3063 * we got the things already rolling from a sane state.
3064 */
3065 if (!iommu->qi) {
3066 /*
3067 * Clear any previous faults.
3068 */
3069 dmar_fault(-1, iommu);
3070 /*
3071 * Disable queued invalidation if supported and already enabled
3072 * before OS handover.
3073 */
3074 dmar_disable_qi(iommu);
3075 }
3076
3077 if (dmar_enable_qi(iommu)) {
3078 /*
3079 * Queued Invalidate not enabled, use Register Based Invalidate
3080 */
3081 iommu->flush.flush_context = __iommu_flush_context;
3082 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3083 pr_info("%s: Using Register based invalidation\n",
3084 iommu->name);
3085 } else {
3086 iommu->flush.flush_context = qi_flush_context;
3087 iommu->flush.flush_iotlb = qi_flush_iotlb;
3088 pr_info("%s: Using Queued invalidation\n", iommu->name);
3089 }
3090 }
3091
3092 static int copy_context_table(struct intel_iommu *iommu,
3093 struct root_entry *old_re,
3094 struct context_entry **tbl,
3095 int bus, bool ext)
3096 {
3097 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3098 struct context_entry *new_ce = NULL, ce;
3099 struct context_entry *old_ce = NULL;
3100 struct root_entry re;
3101 phys_addr_t old_ce_phys;
3102
3103 tbl_idx = ext ? bus * 2 : bus;
3104 memcpy(&re, old_re, sizeof(re));
3105
3106 for (devfn = 0; devfn < 256; devfn++) {
3107 /* First calculate the correct index */
3108 idx = (ext ? devfn * 2 : devfn) % 256;
3109
3110 if (idx == 0) {
3111 /* First save what we may have and clean up */
3112 if (new_ce) {
3113 tbl[tbl_idx] = new_ce;
3114 __iommu_flush_cache(iommu, new_ce,
3115 VTD_PAGE_SIZE);
3116 pos = 1;
3117 }
3118
3119 if (old_ce)
3120 memunmap(old_ce);
3121
3122 ret = 0;
3123 if (devfn < 0x80)
3124 old_ce_phys = root_entry_lctp(&re);
3125 else
3126 old_ce_phys = root_entry_uctp(&re);
3127
3128 if (!old_ce_phys) {
3129 if (ext && devfn == 0) {
3130 /* No LCTP, try UCTP */
3131 devfn = 0x7f;
3132 continue;
3133 } else {
3134 goto out;
3135 }
3136 }
3137
3138 ret = -ENOMEM;
3139 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3140 MEMREMAP_WB);
3141 if (!old_ce)
3142 goto out;
3143
3144 new_ce = alloc_pgtable_page(iommu->node);
3145 if (!new_ce)
3146 goto out_unmap;
3147
3148 ret = 0;
3149 }
3150
3151 /* Now copy the context entry */
3152 memcpy(&ce, old_ce + idx, sizeof(ce));
3153
3154 if (!__context_present(&ce))
3155 continue;
3156
3157 did = context_domain_id(&ce);
3158 if (did >= 0 && did < cap_ndoms(iommu->cap))
3159 set_bit(did, iommu->domain_ids);
3160
3161 /*
3162 * We need a marker for copied context entries. This
3163 * marker needs to work for the old format as well as
3164 * for extended context entries.
3165 *
3166 * Bit 67 of the context entry is used. In the old
3167 * format this bit is available to software, in the
3168 * extended format it is the PGE bit, but PGE is ignored
3169 * by HW if PASIDs are disabled (and thus still
3170 * available).
3171 *
3172 * So disable PASIDs first and then mark the entry
3173 * copied. This means that we don't copy PASID
3174 * translations from the old kernel, but this is fine as
3175 * faults there are not fatal.
3176 */
3177 context_clear_pasid_enable(&ce);
3178 context_set_copied(&ce);
3179
3180 new_ce[idx] = ce;
3181 }
3182
3183 tbl[tbl_idx + pos] = new_ce;
3184
3185 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3186
3187 out_unmap:
3188 memunmap(old_ce);
3189
3190 out:
3191 return ret;
3192 }
3193
3194 static int copy_translation_tables(struct intel_iommu *iommu)
3195 {
3196 struct context_entry **ctxt_tbls;
3197 struct root_entry *old_rt;
3198 phys_addr_t old_rt_phys;
3199 int ctxt_table_entries;
3200 unsigned long flags;
3201 u64 rtaddr_reg;
3202 int bus, ret;
3203 bool new_ext, ext;
3204
3205 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3206 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3207 new_ext = !!ecap_ecs(iommu->ecap);
3208
3209 /*
3210 * The RTT bit can only be changed when translation is disabled,
3211 * but disabling translation means to open a window for data
3212 * corruption. So bail out and don't copy anything if we would
3213 * have to change the bit.
3214 */
3215 if (new_ext != ext)
3216 return -EINVAL;
3217
3218 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3219 if (!old_rt_phys)
3220 return -EINVAL;
3221
3222 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3223 if (!old_rt)
3224 return -ENOMEM;
3225
3226 /* This is too big for the stack - allocate it from slab */
3227 ctxt_table_entries = ext ? 512 : 256;
3228 ret = -ENOMEM;
3229 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3230 if (!ctxt_tbls)
3231 goto out_unmap;
3232
3233 for (bus = 0; bus < 256; bus++) {
3234 ret = copy_context_table(iommu, &old_rt[bus],
3235 ctxt_tbls, bus, ext);
3236 if (ret) {
3237 pr_err("%s: Failed to copy context table for bus %d\n",
3238 iommu->name, bus);
3239 continue;
3240 }
3241 }
3242
3243 spin_lock_irqsave(&iommu->lock, flags);
3244
3245 /* Context tables are copied, now write them to the root_entry table */
3246 for (bus = 0; bus < 256; bus++) {
3247 int idx = ext ? bus * 2 : bus;
3248 u64 val;
3249
3250 if (ctxt_tbls[idx]) {
3251 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3252 iommu->root_entry[bus].lo = val;
3253 }
3254
3255 if (!ext || !ctxt_tbls[idx + 1])
3256 continue;
3257
3258 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3259 iommu->root_entry[bus].hi = val;
3260 }
3261
3262 spin_unlock_irqrestore(&iommu->lock, flags);
3263
3264 kfree(ctxt_tbls);
3265
3266 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3267
3268 ret = 0;
3269
3270 out_unmap:
3271 memunmap(old_rt);
3272
3273 return ret;
3274 }
3275
3276 static int __init init_dmars(void)
3277 {
3278 struct dmar_drhd_unit *drhd;
3279 struct dmar_rmrr_unit *rmrr;
3280 bool copied_tables = false;
3281 struct device *dev;
3282 struct intel_iommu *iommu;
3283 int i, ret;
3284
3285 /*
3286 * for each drhd
3287 * allocate root
3288 * initialize and program root entry to not present
3289 * endfor
3290 */
3291 for_each_drhd_unit(drhd) {
3292 /*
3293 * lock not needed as this is only incremented in the single
3294 * threaded kernel __init code path all other access are read
3295 * only
3296 */
3297 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3298 g_num_of_iommus++;
3299 continue;
3300 }
3301 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3302 }
3303
3304 /* Preallocate enough resources for IOMMU hot-addition */
3305 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3306 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3307
3308 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3309 GFP_KERNEL);
3310 if (!g_iommus) {
3311 pr_err("Allocating global iommu array failed\n");
3312 ret = -ENOMEM;
3313 goto error;
3314 }
3315
3316 for_each_active_iommu(iommu, drhd) {
3317 /*
3318 * Find the max pasid size of all IOMMU's in the system.
3319 * We need to ensure the system pasid table is no bigger
3320 * than the smallest supported.
3321 */
3322 if (pasid_supported(iommu)) {
3323 u32 temp = 2 << ecap_pss(iommu->ecap);
3324
3325 intel_pasid_max_id = min_t(u32, temp,
3326 intel_pasid_max_id);
3327 }
3328
3329 g_iommus[iommu->seq_id] = iommu;
3330
3331 intel_iommu_init_qi(iommu);
3332
3333 ret = iommu_init_domains(iommu);
3334 if (ret)
3335 goto free_iommu;
3336
3337 init_translation_status(iommu);
3338
3339 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3340 iommu_disable_translation(iommu);
3341 clear_translation_pre_enabled(iommu);
3342 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3343 iommu->name);
3344 }
3345
3346 /*
3347 * TBD:
3348 * we could share the same root & context tables
3349 * among all IOMMU's. Need to Split it later.
3350 */
3351 ret = iommu_alloc_root_entry(iommu);
3352 if (ret)
3353 goto free_iommu;
3354
3355 if (translation_pre_enabled(iommu)) {
3356 pr_info("Translation already enabled - trying to copy translation structures\n");
3357
3358 ret = copy_translation_tables(iommu);
3359 if (ret) {
3360 /*
3361 * We found the IOMMU with translation
3362 * enabled - but failed to copy over the
3363 * old root-entry table. Try to proceed
3364 * by disabling translation now and
3365 * allocating a clean root-entry table.
3366 * This might cause DMAR faults, but
3367 * probably the dump will still succeed.
3368 */
3369 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3370 iommu->name);
3371 iommu_disable_translation(iommu);
3372 clear_translation_pre_enabled(iommu);
3373 } else {
3374 pr_info("Copied translation tables from previous kernel for %s\n",
3375 iommu->name);
3376 copied_tables = true;
3377 }
3378 }
3379
3380 if (!ecap_pass_through(iommu->ecap))
3381 hw_pass_through = 0;
3382 #ifdef CONFIG_INTEL_IOMMU_SVM
3383 if (pasid_supported(iommu))
3384 intel_svm_init(iommu);
3385 #endif
3386 }
3387
3388 /*
3389 * Now that qi is enabled on all iommus, set the root entry and flush
3390 * caches. This is required on some Intel X58 chipsets, otherwise the
3391 * flush_context function will loop forever and the boot hangs.
3392 */
3393 for_each_active_iommu(iommu, drhd) {
3394 iommu_flush_write_buffer(iommu);
3395 iommu_set_root_entry(iommu);
3396 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3397 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3398 }
3399
3400 if (iommu_pass_through)
3401 iommu_identity_mapping |= IDENTMAP_ALL;
3402
3403 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3404 iommu_identity_mapping |= IDENTMAP_GFX;
3405 #endif
3406
3407 check_tylersburg_isoch();
3408
3409 if (iommu_identity_mapping) {
3410 ret = si_domain_init(hw_pass_through);
3411 if (ret)
3412 goto free_iommu;
3413 }
3414
3415
3416 /*
3417 * If we copied translations from a previous kernel in the kdump
3418 * case, we can not assign the devices to domains now, as that
3419 * would eliminate the old mappings. So skip this part and defer
3420 * the assignment to device driver initialization time.
3421 */
3422 if (copied_tables)
3423 goto domains_done;
3424
3425 /*
3426 * If pass through is not set or not enabled, setup context entries for
3427 * identity mappings for rmrr, gfx, and isa and may fall back to static
3428 * identity mapping if iommu_identity_mapping is set.
3429 */
3430 if (iommu_identity_mapping) {
3431 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3432 if (ret) {
3433 pr_crit("Failed to setup IOMMU pass-through\n");
3434 goto free_iommu;
3435 }
3436 }
3437 /*
3438 * For each rmrr
3439 * for each dev attached to rmrr
3440 * do
3441 * locate drhd for dev, alloc domain for dev
3442 * allocate free domain
3443 * allocate page table entries for rmrr
3444 * if context not allocated for bus
3445 * allocate and init context
3446 * set present in root table for this bus
3447 * init context with domain, translation etc
3448 * endfor
3449 * endfor
3450 */
3451 pr_info("Setting RMRR:\n");
3452 for_each_rmrr_units(rmrr) {
3453 /* some BIOS lists non-exist devices in DMAR table. */
3454 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3455 i, dev) {
3456 ret = iommu_prepare_rmrr_dev(rmrr, dev);
3457 if (ret)
3458 pr_err("Mapping reserved region failed\n");
3459 }
3460 }
3461
3462 iommu_prepare_isa();
3463
3464 domains_done:
3465
3466 /*
3467 * for each drhd
3468 * enable fault log
3469 * global invalidate context cache
3470 * global invalidate iotlb
3471 * enable translation
3472 */
3473 for_each_iommu(iommu, drhd) {
3474 if (drhd->ignored) {
3475 /*
3476 * we always have to disable PMRs or DMA may fail on
3477 * this device
3478 */
3479 if (force_on)
3480 iommu_disable_protect_mem_regions(iommu);
3481 continue;
3482 }
3483
3484 iommu_flush_write_buffer(iommu);
3485
3486 #ifdef CONFIG_INTEL_IOMMU_SVM
3487 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3488 ret = intel_svm_enable_prq(iommu);
3489 if (ret)
3490 goto free_iommu;
3491 }
3492 #endif
3493 ret = dmar_set_interrupt(iommu);
3494 if (ret)
3495 goto free_iommu;
3496
3497 if (!translation_pre_enabled(iommu))
3498 iommu_enable_translation(iommu);
3499
3500 iommu_disable_protect_mem_regions(iommu);
3501 }
3502
3503 return 0;
3504
3505 free_iommu:
3506 for_each_active_iommu(iommu, drhd) {
3507 disable_dmar_iommu(iommu);
3508 free_dmar_iommu(iommu);
3509 }
3510
3511 kfree(g_iommus);
3512
3513 error:
3514 return ret;
3515 }
3516
3517 /* This takes a number of _MM_ pages, not VTD pages */
3518 static unsigned long intel_alloc_iova(struct device *dev,
3519 struct dmar_domain *domain,
3520 unsigned long nrpages, uint64_t dma_mask)
3521 {
3522 unsigned long iova_pfn = 0;
3523
3524 /* Restrict dma_mask to the width that the iommu can handle */
3525 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3526 /* Ensure we reserve the whole size-aligned region */
3527 nrpages = __roundup_pow_of_two(nrpages);
3528
3529 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3530 /*
3531 * First try to allocate an io virtual address in
3532 * DMA_BIT_MASK(32) and if that fails then try allocating
3533 * from higher range
3534 */
3535 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3536 IOVA_PFN(DMA_BIT_MASK(32)), false);
3537 if (iova_pfn)
3538 return iova_pfn;
3539 }
3540 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3541 IOVA_PFN(dma_mask), true);
3542 if (unlikely(!iova_pfn)) {
3543 pr_err("Allocating %ld-page iova for %s failed",
3544 nrpages, dev_name(dev));
3545 return 0;
3546 }
3547
3548 return iova_pfn;
3549 }
3550
3551 struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3552 {
3553 struct dmar_domain *domain, *tmp;
3554 struct dmar_rmrr_unit *rmrr;
3555 struct device *i_dev;
3556 int i, ret;
3557
3558 domain = find_domain(dev);
3559 if (domain)
3560 goto out;
3561
3562 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3563 if (!domain)
3564 goto out;
3565
3566 /* We have a new domain - setup possible RMRRs for the device */
3567 rcu_read_lock();
3568 for_each_rmrr_units(rmrr) {
3569 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3570 i, i_dev) {
3571 if (i_dev != dev)
3572 continue;
3573
3574 ret = domain_prepare_identity_map(dev, domain,
3575 rmrr->base_address,
3576 rmrr->end_address);
3577 if (ret)
3578 dev_err(dev, "Mapping reserved region failed\n");
3579 }
3580 }
3581 rcu_read_unlock();
3582
3583 tmp = set_domain_for_dev(dev, domain);
3584 if (!tmp || domain != tmp) {
3585 domain_exit(domain);
3586 domain = tmp;
3587 }
3588
3589 out:
3590
3591 if (!domain)
3592 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3593
3594
3595 return domain;
3596 }
3597
3598 /* Check if the dev needs to go through non-identity map and unmap process.*/
3599 static int iommu_no_mapping(struct device *dev)
3600 {
3601 int found;
3602
3603 if (iommu_dummy(dev))
3604 return 1;
3605
3606 if (!iommu_identity_mapping)
3607 return 0;
3608
3609 found = identity_mapping(dev);
3610 if (found) {
3611 if (iommu_should_identity_map(dev, 0))
3612 return 1;
3613 else {
3614 /*
3615 * 32 bit DMA is removed from si_domain and fall back
3616 * to non-identity mapping.
3617 */
3618 dmar_remove_one_dev_info(si_domain, dev);
3619 pr_info("32bit %s uses non-identity mapping\n",
3620 dev_name(dev));
3621 return 0;
3622 }
3623 } else {
3624 /*
3625 * In case of a detached 64 bit DMA device from vm, the device
3626 * is put into si_domain for identity mapping.
3627 */
3628 if (iommu_should_identity_map(dev, 0)) {
3629 int ret;
3630 ret = domain_add_dev_info(si_domain, dev);
3631 if (!ret) {
3632 pr_info("64bit %s uses identity mapping\n",
3633 dev_name(dev));
3634 return 1;
3635 }
3636 }
3637 }
3638
3639 return 0;
3640 }
3641
3642 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3643 size_t size, int dir, u64 dma_mask)
3644 {
3645 struct dmar_domain *domain;
3646 phys_addr_t start_paddr;
3647 unsigned long iova_pfn;
3648 int prot = 0;
3649 int ret;
3650 struct intel_iommu *iommu;
3651 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3652
3653 BUG_ON(dir == DMA_NONE);
3654
3655 if (iommu_no_mapping(dev))
3656 return paddr;
3657
3658 domain = get_valid_domain_for_dev(dev);
3659 if (!domain)
3660 return 0;
3661
3662 iommu = domain_get_iommu(domain);
3663 size = aligned_nrpages(paddr, size);
3664
3665 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3666 if (!iova_pfn)
3667 goto error;
3668
3669 /*
3670 * Check if DMAR supports zero-length reads on write only
3671 * mappings..
3672 */
3673 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3674 !cap_zlr(iommu->cap))
3675 prot |= DMA_PTE_READ;
3676 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3677 prot |= DMA_PTE_WRITE;
3678 /*
3679 * paddr - (paddr + size) might be partial page, we should map the whole
3680 * page. Note: if two part of one page are separately mapped, we
3681 * might have two guest_addr mapping to the same host paddr, but this
3682 * is not a big problem
3683 */
3684 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3685 mm_to_dma_pfn(paddr_pfn), size, prot);
3686 if (ret)
3687 goto error;
3688
3689 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3690 start_paddr += paddr & ~PAGE_MASK;
3691 return start_paddr;
3692
3693 error:
3694 if (iova_pfn)
3695 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3696 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3697 dev_name(dev), size, (unsigned long long)paddr, dir);
3698 return 0;
3699 }
3700
3701 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3702 unsigned long offset, size_t size,
3703 enum dma_data_direction dir,
3704 unsigned long attrs)
3705 {
3706 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3707 dir, *dev->dma_mask);
3708 }
3709
3710 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3711 {
3712 struct dmar_domain *domain;
3713 unsigned long start_pfn, last_pfn;
3714 unsigned long nrpages;
3715 unsigned long iova_pfn;
3716 struct intel_iommu *iommu;
3717 struct page *freelist;
3718
3719 if (iommu_no_mapping(dev))
3720 return;
3721
3722 domain = find_domain(dev);
3723 BUG_ON(!domain);
3724
3725 iommu = domain_get_iommu(domain);
3726
3727 iova_pfn = IOVA_PFN(dev_addr);
3728
3729 nrpages = aligned_nrpages(dev_addr, size);
3730 start_pfn = mm_to_dma_pfn(iova_pfn);
3731 last_pfn = start_pfn + nrpages - 1;
3732
3733 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3734 dev_name(dev), start_pfn, last_pfn);
3735
3736 freelist = domain_unmap(domain, start_pfn, last_pfn);
3737
3738 if (intel_iommu_strict) {
3739 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3740 nrpages, !freelist, 0);
3741 /* free iova */
3742 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3743 dma_free_pagelist(freelist);
3744 } else {
3745 queue_iova(&domain->iovad, iova_pfn, nrpages,
3746 (unsigned long)freelist);
3747 /*
3748 * queue up the release of the unmap to save the 1/6th of the
3749 * cpu used up by the iotlb flush operation...
3750 */
3751 }
3752 }
3753
3754 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3755 size_t size, enum dma_data_direction dir,
3756 unsigned long attrs)
3757 {
3758 intel_unmap(dev, dev_addr, size);
3759 }
3760
3761 static void *intel_alloc_coherent(struct device *dev, size_t size,
3762 dma_addr_t *dma_handle, gfp_t flags,
3763 unsigned long attrs)
3764 {
3765 struct page *page = NULL;
3766 int order;
3767
3768 size = PAGE_ALIGN(size);
3769 order = get_order(size);
3770
3771 if (!iommu_no_mapping(dev))
3772 flags &= ~(GFP_DMA | GFP_DMA32);
3773 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3774 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3775 flags |= GFP_DMA;
3776 else
3777 flags |= GFP_DMA32;
3778 }
3779
3780 if (gfpflags_allow_blocking(flags)) {
3781 unsigned int count = size >> PAGE_SHIFT;
3782
3783 page = dma_alloc_from_contiguous(dev, count, order,
3784 flags & __GFP_NOWARN);
3785 if (page && iommu_no_mapping(dev) &&
3786 page_to_phys(page) + size > dev->coherent_dma_mask) {
3787 dma_release_from_contiguous(dev, page, count);
3788 page = NULL;
3789 }
3790 }
3791
3792 if (!page)
3793 page = alloc_pages(flags, order);
3794 if (!page)
3795 return NULL;
3796 memset(page_address(page), 0, size);
3797
3798 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3799 DMA_BIDIRECTIONAL,
3800 dev->coherent_dma_mask);
3801 if (*dma_handle)
3802 return page_address(page);
3803 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3804 __free_pages(page, order);
3805
3806 return NULL;
3807 }
3808
3809 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3810 dma_addr_t dma_handle, unsigned long attrs)
3811 {
3812 int order;
3813 struct page *page = virt_to_page(vaddr);
3814
3815 size = PAGE_ALIGN(size);
3816 order = get_order(size);
3817
3818 intel_unmap(dev, dma_handle, size);
3819 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3820 __free_pages(page, order);
3821 }
3822
3823 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3824 int nelems, enum dma_data_direction dir,
3825 unsigned long attrs)
3826 {
3827 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3828 unsigned long nrpages = 0;
3829 struct scatterlist *sg;
3830 int i;
3831
3832 for_each_sg(sglist, sg, nelems, i) {
3833 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3834 }
3835
3836 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3837 }
3838
3839 static int intel_nontranslate_map_sg(struct device *hddev,
3840 struct scatterlist *sglist, int nelems, int dir)
3841 {
3842 int i;
3843 struct scatterlist *sg;
3844
3845 for_each_sg(sglist, sg, nelems, i) {
3846 BUG_ON(!sg_page(sg));
3847 sg->dma_address = sg_phys(sg);
3848 sg->dma_length = sg->length;
3849 }
3850 return nelems;
3851 }
3852
3853 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3854 enum dma_data_direction dir, unsigned long attrs)
3855 {
3856 int i;
3857 struct dmar_domain *domain;
3858 size_t size = 0;
3859 int prot = 0;
3860 unsigned long iova_pfn;
3861 int ret;
3862 struct scatterlist *sg;
3863 unsigned long start_vpfn;
3864 struct intel_iommu *iommu;
3865
3866 BUG_ON(dir == DMA_NONE);
3867 if (iommu_no_mapping(dev))
3868 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3869
3870 domain = get_valid_domain_for_dev(dev);
3871 if (!domain)
3872 return 0;
3873
3874 iommu = domain_get_iommu(domain);
3875
3876 for_each_sg(sglist, sg, nelems, i)
3877 size += aligned_nrpages(sg->offset, sg->length);
3878
3879 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3880 *dev->dma_mask);
3881 if (!iova_pfn) {
3882 sglist->dma_length = 0;
3883 return 0;
3884 }
3885
3886 /*
3887 * Check if DMAR supports zero-length reads on write only
3888 * mappings..
3889 */
3890 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3891 !cap_zlr(iommu->cap))
3892 prot |= DMA_PTE_READ;
3893 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3894 prot |= DMA_PTE_WRITE;
3895
3896 start_vpfn = mm_to_dma_pfn(iova_pfn);
3897
3898 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3899 if (unlikely(ret)) {
3900 dma_pte_free_pagetable(domain, start_vpfn,
3901 start_vpfn + size - 1,
3902 agaw_to_level(domain->agaw) + 1);
3903 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3904 return 0;
3905 }
3906
3907 return nelems;
3908 }
3909
3910 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3911 {
3912 return !dma_addr;
3913 }
3914
3915 static const struct dma_map_ops intel_dma_ops = {
3916 .alloc = intel_alloc_coherent,
3917 .free = intel_free_coherent,
3918 .map_sg = intel_map_sg,
3919 .unmap_sg = intel_unmap_sg,
3920 .map_page = intel_map_page,
3921 .unmap_page = intel_unmap_page,
3922 .mapping_error = intel_mapping_error,
3923 .dma_supported = dma_direct_supported,
3924 };
3925
3926 static inline int iommu_domain_cache_init(void)
3927 {
3928 int ret = 0;
3929
3930 iommu_domain_cache = kmem_cache_create("iommu_domain",
3931 sizeof(struct dmar_domain),
3932 0,
3933 SLAB_HWCACHE_ALIGN,
3934
3935 NULL);
3936 if (!iommu_domain_cache) {
3937 pr_err("Couldn't create iommu_domain cache\n");
3938 ret = -ENOMEM;
3939 }
3940
3941 return ret;
3942 }
3943
3944 static inline int iommu_devinfo_cache_init(void)
3945 {
3946 int ret = 0;
3947
3948 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3949 sizeof(struct device_domain_info),
3950 0,
3951 SLAB_HWCACHE_ALIGN,
3952 NULL);
3953 if (!iommu_devinfo_cache) {
3954 pr_err("Couldn't create devinfo cache\n");
3955 ret = -ENOMEM;
3956 }
3957
3958 return ret;
3959 }
3960
3961 static int __init iommu_init_mempool(void)
3962 {
3963 int ret;
3964 ret = iova_cache_get();
3965 if (ret)
3966 return ret;
3967
3968 ret = iommu_domain_cache_init();
3969 if (ret)
3970 goto domain_error;
3971
3972 ret = iommu_devinfo_cache_init();
3973 if (!ret)
3974 return ret;
3975
3976 kmem_cache_destroy(iommu_domain_cache);
3977 domain_error:
3978 iova_cache_put();
3979
3980 return -ENOMEM;
3981 }
3982
3983 static void __init iommu_exit_mempool(void)
3984 {
3985 kmem_cache_destroy(iommu_devinfo_cache);
3986 kmem_cache_destroy(iommu_domain_cache);
3987 iova_cache_put();
3988 }
3989
3990 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3991 {
3992 struct dmar_drhd_unit *drhd;
3993 u32 vtbar;
3994 int rc;
3995
3996 /* We know that this device on this chipset has its own IOMMU.
3997 * If we find it under a different IOMMU, then the BIOS is lying
3998 * to us. Hope that the IOMMU for this device is actually
3999 * disabled, and it needs no translation...
4000 */
4001 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4002 if (rc) {
4003 /* "can't" happen */
4004 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4005 return;
4006 }
4007 vtbar &= 0xffff0000;
4008
4009 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4010 drhd = dmar_find_matched_drhd_unit(pdev);
4011 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4012 TAINT_FIRMWARE_WORKAROUND,
4013 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4014 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4015 }
4016 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4017
4018 static void __init init_no_remapping_devices(void)
4019 {
4020 struct dmar_drhd_unit *drhd;
4021 struct device *dev;
4022 int i;
4023
4024 for_each_drhd_unit(drhd) {
4025 if (!drhd->include_all) {
4026 for_each_active_dev_scope(drhd->devices,
4027 drhd->devices_cnt, i, dev)
4028 break;
4029 /* ignore DMAR unit if no devices exist */
4030 if (i == drhd->devices_cnt)
4031 drhd->ignored = 1;
4032 }
4033 }
4034
4035 for_each_active_drhd_unit(drhd) {
4036 if (drhd->include_all)
4037 continue;
4038
4039 for_each_active_dev_scope(drhd->devices,
4040 drhd->devices_cnt, i, dev)
4041 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4042 break;
4043 if (i < drhd->devices_cnt)
4044 continue;
4045
4046 /* This IOMMU has *only* gfx devices. Either bypass it or
4047 set the gfx_mapped flag, as appropriate */
4048 if (dmar_map_gfx) {
4049 intel_iommu_gfx_mapped = 1;
4050 } else {
4051 drhd->ignored = 1;
4052 for_each_active_dev_scope(drhd->devices,
4053 drhd->devices_cnt, i, dev)
4054 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4055 }
4056 }
4057 }
4058
4059 #ifdef CONFIG_SUSPEND
4060 static int init_iommu_hw(void)
4061 {
4062 struct dmar_drhd_unit *drhd;
4063 struct intel_iommu *iommu = NULL;
4064
4065 for_each_active_iommu(iommu, drhd)
4066 if (iommu->qi)
4067 dmar_reenable_qi(iommu);
4068
4069 for_each_iommu(iommu, drhd) {
4070 if (drhd->ignored) {
4071 /*
4072 * we always have to disable PMRs or DMA may fail on
4073 * this device
4074 */
4075 if (force_on)
4076 iommu_disable_protect_mem_regions(iommu);
4077 continue;
4078 }
4079
4080 iommu_flush_write_buffer(iommu);
4081
4082 iommu_set_root_entry(iommu);
4083
4084 iommu->flush.flush_context(iommu, 0, 0, 0,
4085 DMA_CCMD_GLOBAL_INVL);
4086 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4087 iommu_enable_translation(iommu);
4088 iommu_disable_protect_mem_regions(iommu);
4089 }
4090
4091 return 0;
4092 }
4093
4094 static void iommu_flush_all(void)
4095 {
4096 struct dmar_drhd_unit *drhd;
4097 struct intel_iommu *iommu;
4098
4099 for_each_active_iommu(iommu, drhd) {
4100 iommu->flush.flush_context(iommu, 0, 0, 0,
4101 DMA_CCMD_GLOBAL_INVL);
4102 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4103 DMA_TLB_GLOBAL_FLUSH);
4104 }
4105 }
4106
4107 static int iommu_suspend(void)
4108 {
4109 struct dmar_drhd_unit *drhd;
4110 struct intel_iommu *iommu = NULL;
4111 unsigned long flag;
4112
4113 for_each_active_iommu(iommu, drhd) {
4114 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4115 GFP_ATOMIC);
4116 if (!iommu->iommu_state)
4117 goto nomem;
4118 }
4119
4120 iommu_flush_all();
4121
4122 for_each_active_iommu(iommu, drhd) {
4123 iommu_disable_translation(iommu);
4124
4125 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4126
4127 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4128 readl(iommu->reg + DMAR_FECTL_REG);
4129 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4130 readl(iommu->reg + DMAR_FEDATA_REG);
4131 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4132 readl(iommu->reg + DMAR_FEADDR_REG);
4133 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4134 readl(iommu->reg + DMAR_FEUADDR_REG);
4135
4136 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4137 }
4138 return 0;
4139
4140 nomem:
4141 for_each_active_iommu(iommu, drhd)
4142 kfree(iommu->iommu_state);
4143
4144 return -ENOMEM;
4145 }
4146
4147 static void iommu_resume(void)
4148 {
4149 struct dmar_drhd_unit *drhd;
4150 struct intel_iommu *iommu = NULL;
4151 unsigned long flag;
4152
4153 if (init_iommu_hw()) {
4154 if (force_on)
4155 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4156 else
4157 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4158 return;
4159 }
4160
4161 for_each_active_iommu(iommu, drhd) {
4162
4163 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4164
4165 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4166 iommu->reg + DMAR_FECTL_REG);
4167 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4168 iommu->reg + DMAR_FEDATA_REG);
4169 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4170 iommu->reg + DMAR_FEADDR_REG);
4171 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4172 iommu->reg + DMAR_FEUADDR_REG);
4173
4174 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4175 }
4176
4177 for_each_active_iommu(iommu, drhd)
4178 kfree(iommu->iommu_state);
4179 }
4180
4181 static struct syscore_ops iommu_syscore_ops = {
4182 .resume = iommu_resume,
4183 .suspend = iommu_suspend,
4184 };
4185
4186 static void __init init_iommu_pm_ops(void)
4187 {
4188 register_syscore_ops(&iommu_syscore_ops);
4189 }
4190
4191 #else
4192 static inline void init_iommu_pm_ops(void) {}
4193 #endif /* CONFIG_PM */
4194
4195
4196 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4197 {
4198 struct acpi_dmar_reserved_memory *rmrr;
4199 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
4200 struct dmar_rmrr_unit *rmrru;
4201 size_t length;
4202
4203 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4204 if (!rmrru)
4205 goto out;
4206
4207 rmrru->hdr = header;
4208 rmrr = (struct acpi_dmar_reserved_memory *)header;
4209 rmrru->base_address = rmrr->base_address;
4210 rmrru->end_address = rmrr->end_address;
4211
4212 length = rmrr->end_address - rmrr->base_address + 1;
4213 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4214 IOMMU_RESV_DIRECT);
4215 if (!rmrru->resv)
4216 goto free_rmrru;
4217
4218 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4219 ((void *)rmrr) + rmrr->header.length,
4220 &rmrru->devices_cnt);
4221 if (rmrru->devices_cnt && rmrru->devices == NULL)
4222 goto free_all;
4223
4224 list_add(&rmrru->list, &dmar_rmrr_units);
4225
4226 return 0;
4227 free_all:
4228 kfree(rmrru->resv);
4229 free_rmrru:
4230 kfree(rmrru);
4231 out:
4232 return -ENOMEM;
4233 }
4234
4235 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4236 {
4237 struct dmar_atsr_unit *atsru;
4238 struct acpi_dmar_atsr *tmp;
4239
4240 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4241 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4242 if (atsr->segment != tmp->segment)
4243 continue;
4244 if (atsr->header.length != tmp->header.length)
4245 continue;
4246 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4247 return atsru;
4248 }
4249
4250 return NULL;
4251 }
4252
4253 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4254 {
4255 struct acpi_dmar_atsr *atsr;
4256 struct dmar_atsr_unit *atsru;
4257
4258 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4259 return 0;
4260
4261 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4262 atsru = dmar_find_atsr(atsr);
4263 if (atsru)
4264 return 0;
4265
4266 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4267 if (!atsru)
4268 return -ENOMEM;
4269
4270 /*
4271 * If memory is allocated from slab by ACPI _DSM method, we need to
4272 * copy the memory content because the memory buffer will be freed
4273 * on return.
4274 */
4275 atsru->hdr = (void *)(atsru + 1);
4276 memcpy(atsru->hdr, hdr, hdr->length);
4277 atsru->include_all = atsr->flags & 0x1;
4278 if (!atsru->include_all) {
4279 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4280 (void *)atsr + atsr->header.length,
4281 &atsru->devices_cnt);
4282 if (atsru->devices_cnt && atsru->devices == NULL) {
4283 kfree(atsru);
4284 return -ENOMEM;
4285 }
4286 }
4287
4288 list_add_rcu(&atsru->list, &dmar_atsr_units);
4289
4290 return 0;
4291 }
4292
4293 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4294 {
4295 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4296 kfree(atsru);
4297 }
4298
4299 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4300 {
4301 struct acpi_dmar_atsr *atsr;
4302 struct dmar_atsr_unit *atsru;
4303
4304 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4305 atsru = dmar_find_atsr(atsr);
4306 if (atsru) {
4307 list_del_rcu(&atsru->list);
4308 synchronize_rcu();
4309 intel_iommu_free_atsr(atsru);
4310 }
4311
4312 return 0;
4313 }
4314
4315 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4316 {
4317 int i;
4318 struct device *dev;
4319 struct acpi_dmar_atsr *atsr;
4320 struct dmar_atsr_unit *atsru;
4321
4322 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4323 atsru = dmar_find_atsr(atsr);
4324 if (!atsru)
4325 return 0;
4326
4327 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4328 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4329 i, dev)
4330 return -EBUSY;
4331 }
4332
4333 return 0;
4334 }
4335
4336 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4337 {
4338 int sp, ret = 0;
4339 struct intel_iommu *iommu = dmaru->iommu;
4340
4341 if (g_iommus[iommu->seq_id])
4342 return 0;
4343
4344 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4345 pr_warn("%s: Doesn't support hardware pass through.\n",
4346 iommu->name);
4347 return -ENXIO;
4348 }
4349 if (!ecap_sc_support(iommu->ecap) &&
4350 domain_update_iommu_snooping(iommu)) {
4351 pr_warn("%s: Doesn't support snooping.\n",
4352 iommu->name);
4353 return -ENXIO;
4354 }
4355 sp = domain_update_iommu_superpage(iommu) - 1;
4356 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4357 pr_warn("%s: Doesn't support large page.\n",
4358 iommu->name);
4359 return -ENXIO;
4360 }
4361
4362 /*
4363 * Disable translation if already enabled prior to OS handover.
4364 */
4365 if (iommu->gcmd & DMA_GCMD_TE)
4366 iommu_disable_translation(iommu);
4367
4368 g_iommus[iommu->seq_id] = iommu;
4369 ret = iommu_init_domains(iommu);
4370 if (ret == 0)
4371 ret = iommu_alloc_root_entry(iommu);
4372 if (ret)
4373 goto out;
4374
4375 #ifdef CONFIG_INTEL_IOMMU_SVM
4376 if (pasid_supported(iommu))
4377 intel_svm_init(iommu);
4378 #endif
4379
4380 if (dmaru->ignored) {
4381 /*
4382 * we always have to disable PMRs or DMA may fail on this device
4383 */
4384 if (force_on)
4385 iommu_disable_protect_mem_regions(iommu);
4386 return 0;
4387 }
4388
4389 intel_iommu_init_qi(iommu);
4390 iommu_flush_write_buffer(iommu);
4391
4392 #ifdef CONFIG_INTEL_IOMMU_SVM
4393 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4394 ret = intel_svm_enable_prq(iommu);
4395 if (ret)
4396 goto disable_iommu;
4397 }
4398 #endif
4399 ret = dmar_set_interrupt(iommu);
4400 if (ret)
4401 goto disable_iommu;
4402
4403 iommu_set_root_entry(iommu);
4404 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4405 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4406 iommu_enable_translation(iommu);
4407
4408 iommu_disable_protect_mem_regions(iommu);
4409 return 0;
4410
4411 disable_iommu:
4412 disable_dmar_iommu(iommu);
4413 out:
4414 free_dmar_iommu(iommu);
4415 return ret;
4416 }
4417
4418 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4419 {
4420 int ret = 0;
4421 struct intel_iommu *iommu = dmaru->iommu;
4422
4423 if (!intel_iommu_enabled)
4424 return 0;
4425 if (iommu == NULL)
4426 return -EINVAL;
4427
4428 if (insert) {
4429 ret = intel_iommu_add(dmaru);
4430 } else {
4431 disable_dmar_iommu(iommu);
4432 free_dmar_iommu(iommu);
4433 }
4434
4435 return ret;
4436 }
4437
4438 static void intel_iommu_free_dmars(void)
4439 {
4440 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4441 struct dmar_atsr_unit *atsru, *atsr_n;
4442
4443 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4444 list_del(&rmrru->list);
4445 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4446 kfree(rmrru->resv);
4447 kfree(rmrru);
4448 }
4449
4450 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4451 list_del(&atsru->list);
4452 intel_iommu_free_atsr(atsru);
4453 }
4454 }
4455
4456 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4457 {
4458 int i, ret = 1;
4459 struct pci_bus *bus;
4460 struct pci_dev *bridge = NULL;
4461 struct device *tmp;
4462 struct acpi_dmar_atsr *atsr;
4463 struct dmar_atsr_unit *atsru;
4464
4465 dev = pci_physfn(dev);
4466 for (bus = dev->bus; bus; bus = bus->parent) {
4467 bridge = bus->self;
4468 /* If it's an integrated device, allow ATS */
4469 if (!bridge)
4470 return 1;
4471 /* Connected via non-PCIe: no ATS */
4472 if (!pci_is_pcie(bridge) ||
4473 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4474 return 0;
4475 /* If we found the root port, look it up in the ATSR */
4476 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4477 break;
4478 }
4479
4480 rcu_read_lock();
4481 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4482 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4483 if (atsr->segment != pci_domain_nr(dev->bus))
4484 continue;
4485
4486 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4487 if (tmp == &bridge->dev)
4488 goto out;
4489
4490 if (atsru->include_all)
4491 goto out;
4492 }
4493 ret = 0;
4494 out:
4495 rcu_read_unlock();
4496
4497 return ret;
4498 }
4499
4500 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4501 {
4502 int ret = 0;
4503 struct dmar_rmrr_unit *rmrru;
4504 struct dmar_atsr_unit *atsru;
4505 struct acpi_dmar_atsr *atsr;
4506 struct acpi_dmar_reserved_memory *rmrr;
4507
4508 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4509 return 0;
4510
4511 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4512 rmrr = container_of(rmrru->hdr,
4513 struct acpi_dmar_reserved_memory, header);
4514 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4515 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4516 ((void *)rmrr) + rmrr->header.length,
4517 rmrr->segment, rmrru->devices,
4518 rmrru->devices_cnt);
4519 if(ret < 0)
4520 return ret;
4521 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4522 dmar_remove_dev_scope(info, rmrr->segment,
4523 rmrru->devices, rmrru->devices_cnt);
4524 }
4525 }
4526
4527 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4528 if (atsru->include_all)
4529 continue;
4530
4531 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4532 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4533 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4534 (void *)atsr + atsr->header.length,
4535 atsr->segment, atsru->devices,
4536 atsru->devices_cnt);
4537 if (ret > 0)
4538 break;
4539 else if(ret < 0)
4540 return ret;
4541 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4542 if (dmar_remove_dev_scope(info, atsr->segment,
4543 atsru->devices, atsru->devices_cnt))
4544 break;
4545 }
4546 }
4547
4548 return 0;
4549 }
4550
4551 /*
4552 * Here we only respond to action of unbound device from driver.
4553 *
4554 * Added device is not attached to its DMAR domain here yet. That will happen
4555 * when mapping the device to iova.
4556 */
4557 static int device_notifier(struct notifier_block *nb,
4558 unsigned long action, void *data)
4559 {
4560 struct device *dev = data;
4561 struct dmar_domain *domain;
4562
4563 if (iommu_dummy(dev))
4564 return 0;
4565
4566 if (action != BUS_NOTIFY_REMOVED_DEVICE)
4567 return 0;
4568
4569 domain = find_domain(dev);
4570 if (!domain)
4571 return 0;
4572
4573 dmar_remove_one_dev_info(domain, dev);
4574 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4575 domain_exit(domain);
4576
4577 return 0;
4578 }
4579
4580 static struct notifier_block device_nb = {
4581 .notifier_call = device_notifier,
4582 };
4583
4584 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4585 unsigned long val, void *v)
4586 {
4587 struct memory_notify *mhp = v;
4588 unsigned long long start, end;
4589 unsigned long start_vpfn, last_vpfn;
4590
4591 switch (val) {
4592 case MEM_GOING_ONLINE:
4593 start = mhp->start_pfn << PAGE_SHIFT;
4594 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4595 if (iommu_domain_identity_map(si_domain, start, end)) {
4596 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4597 start, end);
4598 return NOTIFY_BAD;
4599 }
4600 break;
4601
4602 case MEM_OFFLINE:
4603 case MEM_CANCEL_ONLINE:
4604 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4605 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4606 while (start_vpfn <= last_vpfn) {
4607 struct iova *iova;
4608 struct dmar_drhd_unit *drhd;
4609 struct intel_iommu *iommu;
4610 struct page *freelist;
4611
4612 iova = find_iova(&si_domain->iovad, start_vpfn);
4613 if (iova == NULL) {
4614 pr_debug("Failed get IOVA for PFN %lx\n",
4615 start_vpfn);
4616 break;
4617 }
4618
4619 iova = split_and_remove_iova(&si_domain->iovad, iova,
4620 start_vpfn, last_vpfn);
4621 if (iova == NULL) {
4622 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4623 start_vpfn, last_vpfn);
4624 return NOTIFY_BAD;
4625 }
4626
4627 freelist = domain_unmap(si_domain, iova->pfn_lo,
4628 iova->pfn_hi);
4629
4630 rcu_read_lock();
4631 for_each_active_iommu(iommu, drhd)
4632 iommu_flush_iotlb_psi(iommu, si_domain,
4633 iova->pfn_lo, iova_size(iova),
4634 !freelist, 0);
4635 rcu_read_unlock();
4636 dma_free_pagelist(freelist);
4637
4638 start_vpfn = iova->pfn_hi + 1;
4639 free_iova_mem(iova);
4640 }
4641 break;
4642 }
4643
4644 return NOTIFY_OK;
4645 }
4646
4647 static struct notifier_block intel_iommu_memory_nb = {
4648 .notifier_call = intel_iommu_memory_notifier,
4649 .priority = 0
4650 };
4651
4652 static void free_all_cpu_cached_iovas(unsigned int cpu)
4653 {
4654 int i;
4655
4656 for (i = 0; i < g_num_of_iommus; i++) {
4657 struct intel_iommu *iommu = g_iommus[i];
4658 struct dmar_domain *domain;
4659 int did;
4660
4661 if (!iommu)
4662 continue;
4663
4664 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4665 domain = get_iommu_domain(iommu, (u16)did);
4666
4667 if (!domain)
4668 continue;
4669 free_cpu_cached_iovas(cpu, &domain->iovad);
4670 }
4671 }
4672 }
4673
4674 static int intel_iommu_cpu_dead(unsigned int cpu)
4675 {
4676 free_all_cpu_cached_iovas(cpu);
4677 return 0;
4678 }
4679
4680 static void intel_disable_iommus(void)
4681 {
4682 struct intel_iommu *iommu = NULL;
4683 struct dmar_drhd_unit *drhd;
4684
4685 for_each_iommu(iommu, drhd)
4686 iommu_disable_translation(iommu);
4687 }
4688
4689 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4690 {
4691 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4692
4693 return container_of(iommu_dev, struct intel_iommu, iommu);
4694 }
4695
4696 static ssize_t intel_iommu_show_version(struct device *dev,
4697 struct device_attribute *attr,
4698 char *buf)
4699 {
4700 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4701 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4702 return sprintf(buf, "%d:%d\n",
4703 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4704 }
4705 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4706
4707 static ssize_t intel_iommu_show_address(struct device *dev,
4708 struct device_attribute *attr,
4709 char *buf)
4710 {
4711 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4712 return sprintf(buf, "%llx\n", iommu->reg_phys);
4713 }
4714 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4715
4716 static ssize_t intel_iommu_show_cap(struct device *dev,
4717 struct device_attribute *attr,
4718 char *buf)
4719 {
4720 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4721 return sprintf(buf, "%llx\n", iommu->cap);
4722 }
4723 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4724
4725 static ssize_t intel_iommu_show_ecap(struct device *dev,
4726 struct device_attribute *attr,
4727 char *buf)
4728 {
4729 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4730 return sprintf(buf, "%llx\n", iommu->ecap);
4731 }
4732 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4733
4734 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4735 struct device_attribute *attr,
4736 char *buf)
4737 {
4738 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4739 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4740 }
4741 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4742
4743 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4744 struct device_attribute *attr,
4745 char *buf)
4746 {
4747 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4748 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4749 cap_ndoms(iommu->cap)));
4750 }
4751 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4752
4753 static struct attribute *intel_iommu_attrs[] = {
4754 &dev_attr_version.attr,
4755 &dev_attr_address.attr,
4756 &dev_attr_cap.attr,
4757 &dev_attr_ecap.attr,
4758 &dev_attr_domains_supported.attr,
4759 &dev_attr_domains_used.attr,
4760 NULL,
4761 };
4762
4763 static struct attribute_group intel_iommu_group = {
4764 .name = "intel-iommu",
4765 .attrs = intel_iommu_attrs,
4766 };
4767
4768 const struct attribute_group *intel_iommu_groups[] = {
4769 &intel_iommu_group,
4770 NULL,
4771 };
4772
4773 int __init intel_iommu_init(void)
4774 {
4775 int ret = -ENODEV;
4776 struct dmar_drhd_unit *drhd;
4777 struct intel_iommu *iommu;
4778
4779 /* VT-d is required for a TXT/tboot launch, so enforce that */
4780 force_on = tboot_force_iommu();
4781
4782 if (iommu_init_mempool()) {
4783 if (force_on)
4784 panic("tboot: Failed to initialize iommu memory\n");
4785 return -ENOMEM;
4786 }
4787
4788 down_write(&dmar_global_lock);
4789 if (dmar_table_init()) {
4790 if (force_on)
4791 panic("tboot: Failed to initialize DMAR table\n");
4792 goto out_free_dmar;
4793 }
4794
4795 if (dmar_dev_scope_init() < 0) {
4796 if (force_on)
4797 panic("tboot: Failed to initialize DMAR device scope\n");
4798 goto out_free_dmar;
4799 }
4800
4801 up_write(&dmar_global_lock);
4802
4803 /*
4804 * The bus notifier takes the dmar_global_lock, so lockdep will
4805 * complain later when we register it under the lock.
4806 */
4807 dmar_register_bus_notifier();
4808
4809 down_write(&dmar_global_lock);
4810
4811 if (no_iommu || dmar_disabled) {
4812 /*
4813 * We exit the function here to ensure IOMMU's remapping and
4814 * mempool aren't setup, which means that the IOMMU's PMRs
4815 * won't be disabled via the call to init_dmars(). So disable
4816 * it explicitly here. The PMRs were setup by tboot prior to
4817 * calling SENTER, but the kernel is expected to reset/tear
4818 * down the PMRs.
4819 */
4820 if (intel_iommu_tboot_noforce) {
4821 for_each_iommu(iommu, drhd)
4822 iommu_disable_protect_mem_regions(iommu);
4823 }
4824
4825 /*
4826 * Make sure the IOMMUs are switched off, even when we
4827 * boot into a kexec kernel and the previous kernel left
4828 * them enabled
4829 */
4830 intel_disable_iommus();
4831 goto out_free_dmar;
4832 }
4833
4834 if (list_empty(&dmar_rmrr_units))
4835 pr_info("No RMRR found\n");
4836
4837 if (list_empty(&dmar_atsr_units))
4838 pr_info("No ATSR found\n");
4839
4840 if (dmar_init_reserved_ranges()) {
4841 if (force_on)
4842 panic("tboot: Failed to reserve iommu ranges\n");
4843 goto out_free_reserved_range;
4844 }
4845
4846 init_no_remapping_devices();
4847
4848 ret = init_dmars();
4849 if (ret) {
4850 if (force_on)
4851 panic("tboot: Failed to initialize DMARs\n");
4852 pr_err("Initialization failed\n");
4853 goto out_free_reserved_range;
4854 }
4855 up_write(&dmar_global_lock);
4856 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4857
4858 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4859 swiotlb = 0;
4860 #endif
4861 dma_ops = &intel_dma_ops;
4862
4863 init_iommu_pm_ops();
4864
4865 for_each_active_iommu(iommu, drhd) {
4866 iommu_device_sysfs_add(&iommu->iommu, NULL,
4867 intel_iommu_groups,
4868 "%s", iommu->name);
4869 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4870 iommu_device_register(&iommu->iommu);
4871 }
4872
4873 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4874 bus_register_notifier(&pci_bus_type, &device_nb);
4875 if (si_domain && !hw_pass_through)
4876 register_memory_notifier(&intel_iommu_memory_nb);
4877 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4878 intel_iommu_cpu_dead);
4879 intel_iommu_enabled = 1;
4880 intel_iommu_debugfs_init();
4881
4882 return 0;
4883
4884 out_free_reserved_range:
4885 put_iova_domain(&reserved_iova_list);
4886 out_free_dmar:
4887 intel_iommu_free_dmars();
4888 up_write(&dmar_global_lock);
4889 iommu_exit_mempool();
4890 return ret;
4891 }
4892
4893 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4894 {
4895 struct intel_iommu *iommu = opaque;
4896
4897 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4898 return 0;
4899 }
4900
4901 /*
4902 * NB - intel-iommu lacks any sort of reference counting for the users of
4903 * dependent devices. If multiple endpoints have intersecting dependent
4904 * devices, unbinding the driver from any one of them will possibly leave
4905 * the others unable to operate.
4906 */
4907 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4908 {
4909 if (!iommu || !dev || !dev_is_pci(dev))
4910 return;
4911
4912 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4913 }
4914
4915 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4916 {
4917 struct intel_iommu *iommu;
4918 unsigned long flags;
4919
4920 assert_spin_locked(&device_domain_lock);
4921
4922 if (WARN_ON(!info))
4923 return;
4924
4925 iommu = info->iommu;
4926
4927 if (info->dev) {
4928 if (dev_is_pci(info->dev) && sm_supported(iommu))
4929 intel_pasid_tear_down_entry(iommu, info->dev,
4930 PASID_RID2PASID);
4931
4932 iommu_disable_dev_iotlb(info);
4933 domain_context_clear(iommu, info->dev);
4934 intel_pasid_free_table(info->dev);
4935 }
4936
4937 unlink_domain_info(info);
4938
4939 spin_lock_irqsave(&iommu->lock, flags);
4940 domain_detach_iommu(info->domain, iommu);
4941 spin_unlock_irqrestore(&iommu->lock, flags);
4942
4943 free_devinfo_mem(info);
4944 }
4945
4946 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4947 struct device *dev)
4948 {
4949 struct device_domain_info *info;
4950 unsigned long flags;
4951
4952 spin_lock_irqsave(&device_domain_lock, flags);
4953 info = dev->archdata.iommu;
4954 __dmar_remove_one_dev_info(info);
4955 spin_unlock_irqrestore(&device_domain_lock, flags);
4956 }
4957
4958 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4959 {
4960 int adjust_width;
4961
4962 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
4963 domain_reserve_special_ranges(domain);
4964
4965 /* calculate AGAW */
4966 domain->gaw = guest_width;
4967 adjust_width = guestwidth_to_adjustwidth(guest_width);
4968 domain->agaw = width_to_agaw(adjust_width);
4969
4970 domain->iommu_coherency = 0;
4971 domain->iommu_snooping = 0;
4972 domain->iommu_superpage = 0;
4973 domain->max_addr = 0;
4974
4975 /* always allocate the top pgd */
4976 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4977 if (!domain->pgd)
4978 return -ENOMEM;
4979 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4980 return 0;
4981 }
4982
4983 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4984 {
4985 struct dmar_domain *dmar_domain;
4986 struct iommu_domain *domain;
4987
4988 if (type != IOMMU_DOMAIN_UNMANAGED)
4989 return NULL;
4990
4991 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4992 if (!dmar_domain) {
4993 pr_err("Can't allocate dmar_domain\n");
4994 return NULL;
4995 }
4996 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4997 pr_err("Domain initialization failed\n");
4998 domain_exit(dmar_domain);
4999 return NULL;
5000 }
5001 domain_update_iommu_cap(dmar_domain);
5002
5003 domain = &dmar_domain->domain;
5004 domain->geometry.aperture_start = 0;
5005 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5006 domain->geometry.force_aperture = true;
5007
5008 return domain;
5009 }
5010
5011 static void intel_iommu_domain_free(struct iommu_domain *domain)
5012 {
5013 domain_exit(to_dmar_domain(domain));
5014 }
5015
5016 static int intel_iommu_attach_device(struct iommu_domain *domain,
5017 struct device *dev)
5018 {
5019 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5020 struct intel_iommu *iommu;
5021 int addr_width;
5022 u8 bus, devfn;
5023
5024 if (device_is_rmrr_locked(dev)) {
5025 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5026 return -EPERM;
5027 }
5028
5029 /* normally dev is not mapped */
5030 if (unlikely(domain_context_mapped(dev))) {
5031 struct dmar_domain *old_domain;
5032
5033 old_domain = find_domain(dev);
5034 if (old_domain) {
5035 rcu_read_lock();
5036 dmar_remove_one_dev_info(old_domain, dev);
5037 rcu_read_unlock();
5038
5039 if (!domain_type_is_vm_or_si(old_domain) &&
5040 list_empty(&old_domain->devices))
5041 domain_exit(old_domain);
5042 }
5043 }
5044
5045 iommu = device_to_iommu(dev, &bus, &devfn);
5046 if (!iommu)
5047 return -ENODEV;
5048
5049 /* check if this iommu agaw is sufficient for max mapped address */
5050 addr_width = agaw_to_width(iommu->agaw);
5051 if (addr_width > cap_mgaw(iommu->cap))
5052 addr_width = cap_mgaw(iommu->cap);
5053
5054 if (dmar_domain->max_addr > (1LL << addr_width)) {
5055 pr_err("%s: iommu width (%d) is not "
5056 "sufficient for the mapped address (%llx)\n",
5057 __func__, addr_width, dmar_domain->max_addr);
5058 return -EFAULT;
5059 }
5060 dmar_domain->gaw = addr_width;
5061
5062 /*
5063 * Knock out extra levels of page tables if necessary
5064 */
5065 while (iommu->agaw < dmar_domain->agaw) {
5066 struct dma_pte *pte;
5067
5068 pte = dmar_domain->pgd;
5069 if (dma_pte_present(pte)) {
5070 dmar_domain->pgd = (struct dma_pte *)
5071 phys_to_virt(dma_pte_addr(pte));
5072 free_pgtable_page(pte);
5073 }
5074 dmar_domain->agaw--;
5075 }
5076
5077 return domain_add_dev_info(dmar_domain, dev);
5078 }
5079
5080 static void intel_iommu_detach_device(struct iommu_domain *domain,
5081 struct device *dev)
5082 {
5083 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
5084 }
5085
5086 static int intel_iommu_map(struct iommu_domain *domain,
5087 unsigned long iova, phys_addr_t hpa,
5088 size_t size, int iommu_prot)
5089 {
5090 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5091 u64 max_addr;
5092 int prot = 0;
5093 int ret;
5094
5095 if (iommu_prot & IOMMU_READ)
5096 prot |= DMA_PTE_READ;
5097 if (iommu_prot & IOMMU_WRITE)
5098 prot |= DMA_PTE_WRITE;
5099 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5100 prot |= DMA_PTE_SNP;
5101
5102 max_addr = iova + size;
5103 if (dmar_domain->max_addr < max_addr) {
5104 u64 end;
5105
5106 /* check if minimum agaw is sufficient for mapped address */
5107 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5108 if (end < max_addr) {
5109 pr_err("%s: iommu width (%d) is not "
5110 "sufficient for the mapped address (%llx)\n",
5111 __func__, dmar_domain->gaw, max_addr);
5112 return -EFAULT;
5113 }
5114 dmar_domain->max_addr = max_addr;
5115 }
5116 /* Round up size to next multiple of PAGE_SIZE, if it and
5117 the low bits of hpa would take us onto the next page */
5118 size = aligned_nrpages(hpa, size);
5119 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5120 hpa >> VTD_PAGE_SHIFT, size, prot);
5121 return ret;
5122 }
5123
5124 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5125 unsigned long iova, size_t size)
5126 {
5127 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5128 struct page *freelist = NULL;
5129 unsigned long start_pfn, last_pfn;
5130 unsigned int npages;
5131 int iommu_id, level = 0;
5132
5133 /* Cope with horrid API which requires us to unmap more than the
5134 size argument if it happens to be a large-page mapping. */
5135 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5136
5137 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5138 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5139
5140 start_pfn = iova >> VTD_PAGE_SHIFT;
5141 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5142
5143 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5144
5145 npages = last_pfn - start_pfn + 1;
5146
5147 for_each_domain_iommu(iommu_id, dmar_domain)
5148 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5149 start_pfn, npages, !freelist, 0);
5150
5151 dma_free_pagelist(freelist);
5152
5153 if (dmar_domain->max_addr == iova + size)
5154 dmar_domain->max_addr = iova;
5155
5156 return size;
5157 }
5158
5159 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5160 dma_addr_t iova)
5161 {
5162 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5163 struct dma_pte *pte;
5164 int level = 0;
5165 u64 phys = 0;
5166
5167 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5168 if (pte)
5169 phys = dma_pte_addr(pte);
5170
5171 return phys;
5172 }
5173
5174 static bool intel_iommu_capable(enum iommu_cap cap)
5175 {
5176 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5177 return domain_update_iommu_snooping(NULL) == 1;
5178 if (cap == IOMMU_CAP_INTR_REMAP)
5179 return irq_remapping_enabled == 1;
5180
5181 return false;
5182 }
5183
5184 static int intel_iommu_add_device(struct device *dev)
5185 {
5186 struct intel_iommu *iommu;
5187 struct iommu_group *group;
5188 u8 bus, devfn;
5189
5190 iommu = device_to_iommu(dev, &bus, &devfn);
5191 if (!iommu)
5192 return -ENODEV;
5193
5194 iommu_device_link(&iommu->iommu, dev);
5195
5196 group = iommu_group_get_for_dev(dev);
5197
5198 if (IS_ERR(group))
5199 return PTR_ERR(group);
5200
5201 iommu_group_put(group);
5202 return 0;
5203 }
5204
5205 static void intel_iommu_remove_device(struct device *dev)
5206 {
5207 struct intel_iommu *iommu;
5208 u8 bus, devfn;
5209
5210 iommu = device_to_iommu(dev, &bus, &devfn);
5211 if (!iommu)
5212 return;
5213
5214 iommu_group_remove_device(dev);
5215
5216 iommu_device_unlink(&iommu->iommu, dev);
5217 }
5218
5219 static void intel_iommu_get_resv_regions(struct device *device,
5220 struct list_head *head)
5221 {
5222 struct iommu_resv_region *reg;
5223 struct dmar_rmrr_unit *rmrr;
5224 struct device *i_dev;
5225 int i;
5226
5227 rcu_read_lock();
5228 for_each_rmrr_units(rmrr) {
5229 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5230 i, i_dev) {
5231 if (i_dev != device)
5232 continue;
5233
5234 list_add_tail(&rmrr->resv->list, head);
5235 }
5236 }
5237 rcu_read_unlock();
5238
5239 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5240 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5241 0, IOMMU_RESV_MSI);
5242 if (!reg)
5243 return;
5244 list_add_tail(&reg->list, head);
5245 }
5246
5247 static void intel_iommu_put_resv_regions(struct device *dev,
5248 struct list_head *head)
5249 {
5250 struct iommu_resv_region *entry, *next;
5251
5252 list_for_each_entry_safe(entry, next, head, list) {
5253 if (entry->type == IOMMU_RESV_RESERVED)
5254 kfree(entry);
5255 }
5256 }
5257
5258 #ifdef CONFIG_INTEL_IOMMU_SVM
5259 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5260 {
5261 struct device_domain_info *info;
5262 struct context_entry *context;
5263 struct dmar_domain *domain;
5264 unsigned long flags;
5265 u64 ctx_lo;
5266 int ret;
5267
5268 domain = get_valid_domain_for_dev(sdev->dev);
5269 if (!domain)
5270 return -EINVAL;
5271
5272 spin_lock_irqsave(&device_domain_lock, flags);
5273 spin_lock(&iommu->lock);
5274
5275 ret = -EINVAL;
5276 info = sdev->dev->archdata.iommu;
5277 if (!info || !info->pasid_supported)
5278 goto out;
5279
5280 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5281 if (WARN_ON(!context))
5282 goto out;
5283
5284 ctx_lo = context[0].lo;
5285
5286 sdev->did = domain->iommu_did[iommu->seq_id];
5287 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5288
5289 if (!(ctx_lo & CONTEXT_PASIDE)) {
5290 ctx_lo |= CONTEXT_PASIDE;
5291 context[0].lo = ctx_lo;
5292 wmb();
5293 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5294 DMA_CCMD_MASK_NOBIT,
5295 DMA_CCMD_DEVICE_INVL);
5296 }
5297
5298 /* Enable PASID support in the device, if it wasn't already */
5299 if (!info->pasid_enabled)
5300 iommu_enable_dev_iotlb(info);
5301
5302 if (info->ats_enabled) {
5303 sdev->dev_iotlb = 1;
5304 sdev->qdep = info->ats_qdep;
5305 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5306 sdev->qdep = 0;
5307 }
5308 ret = 0;
5309
5310 out:
5311 spin_unlock(&iommu->lock);
5312 spin_unlock_irqrestore(&device_domain_lock, flags);
5313
5314 return ret;
5315 }
5316
5317 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5318 {
5319 struct intel_iommu *iommu;
5320 u8 bus, devfn;
5321
5322 if (iommu_dummy(dev)) {
5323 dev_warn(dev,
5324 "No IOMMU translation for device; cannot enable SVM\n");
5325 return NULL;
5326 }
5327
5328 iommu = device_to_iommu(dev, &bus, &devfn);
5329 if ((!iommu)) {
5330 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5331 return NULL;
5332 }
5333
5334 return iommu;
5335 }
5336 #endif /* CONFIG_INTEL_IOMMU_SVM */
5337
5338 const struct iommu_ops intel_iommu_ops = {
5339 .capable = intel_iommu_capable,
5340 .domain_alloc = intel_iommu_domain_alloc,
5341 .domain_free = intel_iommu_domain_free,
5342 .attach_dev = intel_iommu_attach_device,
5343 .detach_dev = intel_iommu_detach_device,
5344 .map = intel_iommu_map,
5345 .unmap = intel_iommu_unmap,
5346 .iova_to_phys = intel_iommu_iova_to_phys,
5347 .add_device = intel_iommu_add_device,
5348 .remove_device = intel_iommu_remove_device,
5349 .get_resv_regions = intel_iommu_get_resv_regions,
5350 .put_resv_regions = intel_iommu_put_resv_regions,
5351 .device_group = pci_device_group,
5352 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5353 };
5354
5355 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5356 {
5357 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5358 pr_info("Disabling IOMMU for graphics on this chipset\n");
5359 dmar_map_gfx = 0;
5360 }
5361
5362 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5363 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5364 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5365 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5366 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5367 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5368 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5369
5370 static void quirk_iommu_rwbf(struct pci_dev *dev)
5371 {
5372 /*
5373 * Mobile 4 Series Chipset neglects to set RWBF capability,
5374 * but needs it. Same seems to hold for the desktop versions.
5375 */
5376 pr_info("Forcing write-buffer flush capability\n");
5377 rwbf_quirk = 1;
5378 }
5379
5380 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5381 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5382 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5383 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5384 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5385 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5386 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5387
5388 #define GGC 0x52
5389 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
5390 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5391 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
5392 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
5393 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5394 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5395 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5396 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5397
5398 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5399 {
5400 unsigned short ggc;
5401
5402 if (pci_read_config_word(dev, GGC, &ggc))
5403 return;
5404
5405 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5406 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5407 dmar_map_gfx = 0;
5408 } else if (dmar_map_gfx) {
5409 /* we have to ensure the gfx device is idle before we flush */
5410 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5411 intel_iommu_strict = 1;
5412 }
5413 }
5414 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5415 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5416 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5417 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5418
5419 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5420 ISOCH DMAR unit for the Azalia sound device, but not give it any
5421 TLB entries, which causes it to deadlock. Check for that. We do
5422 this in a function called from init_dmars(), instead of in a PCI
5423 quirk, because we don't want to print the obnoxious "BIOS broken"
5424 message if VT-d is actually disabled.
5425 */
5426 static void __init check_tylersburg_isoch(void)
5427 {
5428 struct pci_dev *pdev;
5429 uint32_t vtisochctrl;
5430
5431 /* If there's no Azalia in the system anyway, forget it. */
5432 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5433 if (!pdev)
5434 return;
5435 pci_dev_put(pdev);
5436
5437 /* System Management Registers. Might be hidden, in which case
5438 we can't do the sanity check. But that's OK, because the
5439 known-broken BIOSes _don't_ actually hide it, so far. */
5440 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5441 if (!pdev)
5442 return;
5443
5444 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5445 pci_dev_put(pdev);
5446 return;
5447 }
5448
5449 pci_dev_put(pdev);
5450
5451 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5452 if (vtisochctrl & 1)
5453 return;
5454
5455 /* Drop all bits other than the number of TLB entries */
5456 vtisochctrl &= 0x1c;
5457
5458 /* If we have the recommended number of TLB entries (16), fine. */
5459 if (vtisochctrl == 0x10)
5460 return;
5461
5462 /* Zero TLB entries? You get to ride the short bus to school. */
5463 if (!vtisochctrl) {
5464 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5465 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5466 dmi_get_system_info(DMI_BIOS_VENDOR),
5467 dmi_get_system_info(DMI_BIOS_VERSION),
5468 dmi_get_system_info(DMI_PRODUCT_VERSION));
5469 iommu_identity_mapping |= IDENTMAP_AZALIA;
5470 return;
5471 }
5472
5473 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5474 vtisochctrl);
5475 }