]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/pci/intel-iommu.c
VT-d: adapt domain iova_to_phys function for IOMMU API
[mirror_ubuntu-bionic-kernel.git] / drivers / pci / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
41 #include "pci.h"
42
43 #define ROOT_SIZE VTD_PAGE_SIZE
44 #define CONTEXT_SIZE VTD_PAGE_SIZE
45
46 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48
49 #define IOAPIC_RANGE_START (0xfee00000)
50 #define IOAPIC_RANGE_END (0xfeefffff)
51 #define IOVA_START_ADDR (0x1000)
52
53 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54
55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56
57 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
60
61 /* global iommu list, set NULL for ignored DMAR units */
62 static struct intel_iommu **g_iommus;
63
64 /*
65 * 0: Present
66 * 1-11: Reserved
67 * 12-63: Context Ptr (12 - (haw-1))
68 * 64-127: Reserved
69 */
70 struct root_entry {
71 u64 val;
72 u64 rsvd1;
73 };
74 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
75 static inline bool root_present(struct root_entry *root)
76 {
77 return (root->val & 1);
78 }
79 static inline void set_root_present(struct root_entry *root)
80 {
81 root->val |= 1;
82 }
83 static inline void set_root_value(struct root_entry *root, unsigned long value)
84 {
85 root->val |= value & VTD_PAGE_MASK;
86 }
87
88 static inline struct context_entry *
89 get_context_addr_from_root(struct root_entry *root)
90 {
91 return (struct context_entry *)
92 (root_present(root)?phys_to_virt(
93 root->val & VTD_PAGE_MASK) :
94 NULL);
95 }
96
97 /*
98 * low 64 bits:
99 * 0: present
100 * 1: fault processing disable
101 * 2-3: translation type
102 * 12-63: address space root
103 * high 64 bits:
104 * 0-2: address width
105 * 3-6: aval
106 * 8-23: domain id
107 */
108 struct context_entry {
109 u64 lo;
110 u64 hi;
111 };
112
113 static inline bool context_present(struct context_entry *context)
114 {
115 return (context->lo & 1);
116 }
117 static inline void context_set_present(struct context_entry *context)
118 {
119 context->lo |= 1;
120 }
121
122 static inline void context_set_fault_enable(struct context_entry *context)
123 {
124 context->lo &= (((u64)-1) << 2) | 1;
125 }
126
127 #define CONTEXT_TT_MULTI_LEVEL 0
128
129 static inline void context_set_translation_type(struct context_entry *context,
130 unsigned long value)
131 {
132 context->lo &= (((u64)-1) << 4) | 3;
133 context->lo |= (value & 3) << 2;
134 }
135
136 static inline void context_set_address_root(struct context_entry *context,
137 unsigned long value)
138 {
139 context->lo |= value & VTD_PAGE_MASK;
140 }
141
142 static inline void context_set_address_width(struct context_entry *context,
143 unsigned long value)
144 {
145 context->hi |= value & 7;
146 }
147
148 static inline void context_set_domain_id(struct context_entry *context,
149 unsigned long value)
150 {
151 context->hi |= (value & ((1 << 16) - 1)) << 8;
152 }
153
154 static inline void context_clear_entry(struct context_entry *context)
155 {
156 context->lo = 0;
157 context->hi = 0;
158 }
159
160 /*
161 * 0: readable
162 * 1: writable
163 * 2-6: reserved
164 * 7: super page
165 * 8-11: available
166 * 12-63: Host physcial address
167 */
168 struct dma_pte {
169 u64 val;
170 };
171
172 static inline void dma_clear_pte(struct dma_pte *pte)
173 {
174 pte->val = 0;
175 }
176
177 static inline void dma_set_pte_readable(struct dma_pte *pte)
178 {
179 pte->val |= DMA_PTE_READ;
180 }
181
182 static inline void dma_set_pte_writable(struct dma_pte *pte)
183 {
184 pte->val |= DMA_PTE_WRITE;
185 }
186
187 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
188 {
189 pte->val = (pte->val & ~3) | (prot & 3);
190 }
191
192 static inline u64 dma_pte_addr(struct dma_pte *pte)
193 {
194 return (pte->val & VTD_PAGE_MASK);
195 }
196
197 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
198 {
199 pte->val |= (addr & VTD_PAGE_MASK);
200 }
201
202 static inline bool dma_pte_present(struct dma_pte *pte)
203 {
204 return (pte->val & 3) != 0;
205 }
206
207 /* devices under the same p2p bridge are owned in one domain */
208 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
209
210 /* domain represents a virtual machine, more than one devices
211 * across iommus may be owned in one domain, e.g. kvm guest.
212 */
213 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
214
215 struct dmar_domain {
216 int id; /* domain id */
217 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
218
219 struct list_head devices; /* all devices' list */
220 struct iova_domain iovad; /* iova's that belong to this domain */
221
222 struct dma_pte *pgd; /* virtual address */
223 spinlock_t mapping_lock; /* page table lock */
224 int gaw; /* max guest address width */
225
226 /* adjusted guest address width, 0 is level 2 30-bit */
227 int agaw;
228
229 int flags; /* flags to find out type of domain */
230
231 int iommu_coherency;/* indicate coherency of iommu access */
232 int iommu_count; /* reference count of iommu */
233 spinlock_t iommu_lock; /* protect iommu set in domain */
234 u64 max_addr; /* maximum mapped address */
235 };
236
237 /* PCI domain-device relationship */
238 struct device_domain_info {
239 struct list_head link; /* link to domain siblings */
240 struct list_head global; /* link to global list */
241 u8 bus; /* PCI bus numer */
242 u8 devfn; /* PCI devfn number */
243 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
244 struct dmar_domain *domain; /* pointer to domain */
245 };
246
247 static void flush_unmaps_timeout(unsigned long data);
248
249 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
250
251 #define HIGH_WATER_MARK 250
252 struct deferred_flush_tables {
253 int next;
254 struct iova *iova[HIGH_WATER_MARK];
255 struct dmar_domain *domain[HIGH_WATER_MARK];
256 };
257
258 static struct deferred_flush_tables *deferred_flush;
259
260 /* bitmap for indexing intel_iommus */
261 static int g_num_of_iommus;
262
263 static DEFINE_SPINLOCK(async_umap_flush_lock);
264 static LIST_HEAD(unmaps_to_do);
265
266 static int timer_on;
267 static long list_size;
268
269 static void domain_remove_dev_info(struct dmar_domain *domain);
270
271 int dmar_disabled;
272 static int __initdata dmar_map_gfx = 1;
273 static int dmar_forcedac;
274 static int intel_iommu_strict;
275
276 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
277 static DEFINE_SPINLOCK(device_domain_lock);
278 static LIST_HEAD(device_domain_list);
279
280 static int __init intel_iommu_setup(char *str)
281 {
282 if (!str)
283 return -EINVAL;
284 while (*str) {
285 if (!strncmp(str, "off", 3)) {
286 dmar_disabled = 1;
287 printk(KERN_INFO"Intel-IOMMU: disabled\n");
288 } else if (!strncmp(str, "igfx_off", 8)) {
289 dmar_map_gfx = 0;
290 printk(KERN_INFO
291 "Intel-IOMMU: disable GFX device mapping\n");
292 } else if (!strncmp(str, "forcedac", 8)) {
293 printk(KERN_INFO
294 "Intel-IOMMU: Forcing DAC for PCI devices\n");
295 dmar_forcedac = 1;
296 } else if (!strncmp(str, "strict", 6)) {
297 printk(KERN_INFO
298 "Intel-IOMMU: disable batched IOTLB flush\n");
299 intel_iommu_strict = 1;
300 }
301
302 str += strcspn(str, ",");
303 while (*str == ',')
304 str++;
305 }
306 return 0;
307 }
308 __setup("intel_iommu=", intel_iommu_setup);
309
310 static struct kmem_cache *iommu_domain_cache;
311 static struct kmem_cache *iommu_devinfo_cache;
312 static struct kmem_cache *iommu_iova_cache;
313
314 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
315 {
316 unsigned int flags;
317 void *vaddr;
318
319 /* trying to avoid low memory issues */
320 flags = current->flags & PF_MEMALLOC;
321 current->flags |= PF_MEMALLOC;
322 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
323 current->flags &= (~PF_MEMALLOC | flags);
324 return vaddr;
325 }
326
327
328 static inline void *alloc_pgtable_page(void)
329 {
330 unsigned int flags;
331 void *vaddr;
332
333 /* trying to avoid low memory issues */
334 flags = current->flags & PF_MEMALLOC;
335 current->flags |= PF_MEMALLOC;
336 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
337 current->flags &= (~PF_MEMALLOC | flags);
338 return vaddr;
339 }
340
341 static inline void free_pgtable_page(void *vaddr)
342 {
343 free_page((unsigned long)vaddr);
344 }
345
346 static inline void *alloc_domain_mem(void)
347 {
348 return iommu_kmem_cache_alloc(iommu_domain_cache);
349 }
350
351 static void free_domain_mem(void *vaddr)
352 {
353 kmem_cache_free(iommu_domain_cache, vaddr);
354 }
355
356 static inline void * alloc_devinfo_mem(void)
357 {
358 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
359 }
360
361 static inline void free_devinfo_mem(void *vaddr)
362 {
363 kmem_cache_free(iommu_devinfo_cache, vaddr);
364 }
365
366 struct iova *alloc_iova_mem(void)
367 {
368 return iommu_kmem_cache_alloc(iommu_iova_cache);
369 }
370
371 void free_iova_mem(struct iova *iova)
372 {
373 kmem_cache_free(iommu_iova_cache, iova);
374 }
375
376
377 static inline int width_to_agaw(int width);
378
379 /* calculate agaw for each iommu.
380 * "SAGAW" may be different across iommus, use a default agaw, and
381 * get a supported less agaw for iommus that don't support the default agaw.
382 */
383 int iommu_calculate_agaw(struct intel_iommu *iommu)
384 {
385 unsigned long sagaw;
386 int agaw = -1;
387
388 sagaw = cap_sagaw(iommu->cap);
389 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
390 agaw >= 0; agaw--) {
391 if (test_bit(agaw, &sagaw))
392 break;
393 }
394
395 return agaw;
396 }
397
398 /* in native case, each domain is related to only one iommu */
399 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
400 {
401 int iommu_id;
402
403 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
404
405 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
406 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
407 return NULL;
408
409 return g_iommus[iommu_id];
410 }
411
412 /* "Coherency" capability may be different across iommus */
413 static void domain_update_iommu_coherency(struct dmar_domain *domain)
414 {
415 int i;
416
417 domain->iommu_coherency = 1;
418
419 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
420 for (; i < g_num_of_iommus; ) {
421 if (!ecap_coherent(g_iommus[i]->ecap)) {
422 domain->iommu_coherency = 0;
423 break;
424 }
425 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
426 }
427 }
428
429 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
430 {
431 struct dmar_drhd_unit *drhd = NULL;
432 int i;
433
434 for_each_drhd_unit(drhd) {
435 if (drhd->ignored)
436 continue;
437
438 for (i = 0; i < drhd->devices_cnt; i++)
439 if (drhd->devices[i]->bus->number == bus &&
440 drhd->devices[i]->devfn == devfn)
441 return drhd->iommu;
442
443 if (drhd->include_all)
444 return drhd->iommu;
445 }
446
447 return NULL;
448 }
449
450 static void domain_flush_cache(struct dmar_domain *domain,
451 void *addr, int size)
452 {
453 if (!domain->iommu_coherency)
454 clflush_cache_range(addr, size);
455 }
456
457 /* Gets context entry for a given bus and devfn */
458 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
459 u8 bus, u8 devfn)
460 {
461 struct root_entry *root;
462 struct context_entry *context;
463 unsigned long phy_addr;
464 unsigned long flags;
465
466 spin_lock_irqsave(&iommu->lock, flags);
467 root = &iommu->root_entry[bus];
468 context = get_context_addr_from_root(root);
469 if (!context) {
470 context = (struct context_entry *)alloc_pgtable_page();
471 if (!context) {
472 spin_unlock_irqrestore(&iommu->lock, flags);
473 return NULL;
474 }
475 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
476 phy_addr = virt_to_phys((void *)context);
477 set_root_value(root, phy_addr);
478 set_root_present(root);
479 __iommu_flush_cache(iommu, root, sizeof(*root));
480 }
481 spin_unlock_irqrestore(&iommu->lock, flags);
482 return &context[devfn];
483 }
484
485 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
486 {
487 struct root_entry *root;
488 struct context_entry *context;
489 int ret;
490 unsigned long flags;
491
492 spin_lock_irqsave(&iommu->lock, flags);
493 root = &iommu->root_entry[bus];
494 context = get_context_addr_from_root(root);
495 if (!context) {
496 ret = 0;
497 goto out;
498 }
499 ret = context_present(&context[devfn]);
500 out:
501 spin_unlock_irqrestore(&iommu->lock, flags);
502 return ret;
503 }
504
505 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
506 {
507 struct root_entry *root;
508 struct context_entry *context;
509 unsigned long flags;
510
511 spin_lock_irqsave(&iommu->lock, flags);
512 root = &iommu->root_entry[bus];
513 context = get_context_addr_from_root(root);
514 if (context) {
515 context_clear_entry(&context[devfn]);
516 __iommu_flush_cache(iommu, &context[devfn], \
517 sizeof(*context));
518 }
519 spin_unlock_irqrestore(&iommu->lock, flags);
520 }
521
522 static void free_context_table(struct intel_iommu *iommu)
523 {
524 struct root_entry *root;
525 int i;
526 unsigned long flags;
527 struct context_entry *context;
528
529 spin_lock_irqsave(&iommu->lock, flags);
530 if (!iommu->root_entry) {
531 goto out;
532 }
533 for (i = 0; i < ROOT_ENTRY_NR; i++) {
534 root = &iommu->root_entry[i];
535 context = get_context_addr_from_root(root);
536 if (context)
537 free_pgtable_page(context);
538 }
539 free_pgtable_page(iommu->root_entry);
540 iommu->root_entry = NULL;
541 out:
542 spin_unlock_irqrestore(&iommu->lock, flags);
543 }
544
545 /* page table handling */
546 #define LEVEL_STRIDE (9)
547 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
548
549 static inline int agaw_to_level(int agaw)
550 {
551 return agaw + 2;
552 }
553
554 static inline int agaw_to_width(int agaw)
555 {
556 return 30 + agaw * LEVEL_STRIDE;
557
558 }
559
560 static inline int width_to_agaw(int width)
561 {
562 return (width - 30) / LEVEL_STRIDE;
563 }
564
565 static inline unsigned int level_to_offset_bits(int level)
566 {
567 return (12 + (level - 1) * LEVEL_STRIDE);
568 }
569
570 static inline int address_level_offset(u64 addr, int level)
571 {
572 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
573 }
574
575 static inline u64 level_mask(int level)
576 {
577 return ((u64)-1 << level_to_offset_bits(level));
578 }
579
580 static inline u64 level_size(int level)
581 {
582 return ((u64)1 << level_to_offset_bits(level));
583 }
584
585 static inline u64 align_to_level(u64 addr, int level)
586 {
587 return ((addr + level_size(level) - 1) & level_mask(level));
588 }
589
590 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
591 {
592 int addr_width = agaw_to_width(domain->agaw);
593 struct dma_pte *parent, *pte = NULL;
594 int level = agaw_to_level(domain->agaw);
595 int offset;
596 unsigned long flags;
597
598 BUG_ON(!domain->pgd);
599
600 addr &= (((u64)1) << addr_width) - 1;
601 parent = domain->pgd;
602
603 spin_lock_irqsave(&domain->mapping_lock, flags);
604 while (level > 0) {
605 void *tmp_page;
606
607 offset = address_level_offset(addr, level);
608 pte = &parent[offset];
609 if (level == 1)
610 break;
611
612 if (!dma_pte_present(pte)) {
613 tmp_page = alloc_pgtable_page();
614
615 if (!tmp_page) {
616 spin_unlock_irqrestore(&domain->mapping_lock,
617 flags);
618 return NULL;
619 }
620 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
621 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
622 /*
623 * high level table always sets r/w, last level page
624 * table control read/write
625 */
626 dma_set_pte_readable(pte);
627 dma_set_pte_writable(pte);
628 domain_flush_cache(domain, pte, sizeof(*pte));
629 }
630 parent = phys_to_virt(dma_pte_addr(pte));
631 level--;
632 }
633
634 spin_unlock_irqrestore(&domain->mapping_lock, flags);
635 return pte;
636 }
637
638 /* return address's pte at specific level */
639 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
640 int level)
641 {
642 struct dma_pte *parent, *pte = NULL;
643 int total = agaw_to_level(domain->agaw);
644 int offset;
645
646 parent = domain->pgd;
647 while (level <= total) {
648 offset = address_level_offset(addr, total);
649 pte = &parent[offset];
650 if (level == total)
651 return pte;
652
653 if (!dma_pte_present(pte))
654 break;
655 parent = phys_to_virt(dma_pte_addr(pte));
656 total--;
657 }
658 return NULL;
659 }
660
661 /* clear one page's page table */
662 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
663 {
664 struct dma_pte *pte = NULL;
665
666 /* get last level pte */
667 pte = dma_addr_level_pte(domain, addr, 1);
668
669 if (pte) {
670 dma_clear_pte(pte);
671 domain_flush_cache(domain, pte, sizeof(*pte));
672 }
673 }
674
675 /* clear last level pte, a tlb flush should be followed */
676 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
677 {
678 int addr_width = agaw_to_width(domain->agaw);
679
680 start &= (((u64)1) << addr_width) - 1;
681 end &= (((u64)1) << addr_width) - 1;
682 /* in case it's partial page */
683 start = PAGE_ALIGN(start);
684 end &= PAGE_MASK;
685
686 /* we don't need lock here, nobody else touches the iova range */
687 while (start < end) {
688 dma_pte_clear_one(domain, start);
689 start += VTD_PAGE_SIZE;
690 }
691 }
692
693 /* free page table pages. last level pte should already be cleared */
694 static void dma_pte_free_pagetable(struct dmar_domain *domain,
695 u64 start, u64 end)
696 {
697 int addr_width = agaw_to_width(domain->agaw);
698 struct dma_pte *pte;
699 int total = agaw_to_level(domain->agaw);
700 int level;
701 u64 tmp;
702
703 start &= (((u64)1) << addr_width) - 1;
704 end &= (((u64)1) << addr_width) - 1;
705
706 /* we don't need lock here, nobody else touches the iova range */
707 level = 2;
708 while (level <= total) {
709 tmp = align_to_level(start, level);
710 if (tmp >= end || (tmp + level_size(level) > end))
711 return;
712
713 while (tmp < end) {
714 pte = dma_addr_level_pte(domain, tmp, level);
715 if (pte) {
716 free_pgtable_page(
717 phys_to_virt(dma_pte_addr(pte)));
718 dma_clear_pte(pte);
719 domain_flush_cache(domain, pte, sizeof(*pte));
720 }
721 tmp += level_size(level);
722 }
723 level++;
724 }
725 /* free pgd */
726 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
727 free_pgtable_page(domain->pgd);
728 domain->pgd = NULL;
729 }
730 }
731
732 /* iommu handling */
733 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
734 {
735 struct root_entry *root;
736 unsigned long flags;
737
738 root = (struct root_entry *)alloc_pgtable_page();
739 if (!root)
740 return -ENOMEM;
741
742 __iommu_flush_cache(iommu, root, ROOT_SIZE);
743
744 spin_lock_irqsave(&iommu->lock, flags);
745 iommu->root_entry = root;
746 spin_unlock_irqrestore(&iommu->lock, flags);
747
748 return 0;
749 }
750
751 static void iommu_set_root_entry(struct intel_iommu *iommu)
752 {
753 void *addr;
754 u32 cmd, sts;
755 unsigned long flag;
756
757 addr = iommu->root_entry;
758
759 spin_lock_irqsave(&iommu->register_lock, flag);
760 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
761
762 cmd = iommu->gcmd | DMA_GCMD_SRTP;
763 writel(cmd, iommu->reg + DMAR_GCMD_REG);
764
765 /* Make sure hardware complete it */
766 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
767 readl, (sts & DMA_GSTS_RTPS), sts);
768
769 spin_unlock_irqrestore(&iommu->register_lock, flag);
770 }
771
772 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
773 {
774 u32 val;
775 unsigned long flag;
776
777 if (!cap_rwbf(iommu->cap))
778 return;
779 val = iommu->gcmd | DMA_GCMD_WBF;
780
781 spin_lock_irqsave(&iommu->register_lock, flag);
782 writel(val, iommu->reg + DMAR_GCMD_REG);
783
784 /* Make sure hardware complete it */
785 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
786 readl, (!(val & DMA_GSTS_WBFS)), val);
787
788 spin_unlock_irqrestore(&iommu->register_lock, flag);
789 }
790
791 /* return value determine if we need a write buffer flush */
792 static int __iommu_flush_context(struct intel_iommu *iommu,
793 u16 did, u16 source_id, u8 function_mask, u64 type,
794 int non_present_entry_flush)
795 {
796 u64 val = 0;
797 unsigned long flag;
798
799 /*
800 * In the non-present entry flush case, if hardware doesn't cache
801 * non-present entry we do nothing and if hardware cache non-present
802 * entry, we flush entries of domain 0 (the domain id is used to cache
803 * any non-present entries)
804 */
805 if (non_present_entry_flush) {
806 if (!cap_caching_mode(iommu->cap))
807 return 1;
808 else
809 did = 0;
810 }
811
812 switch (type) {
813 case DMA_CCMD_GLOBAL_INVL:
814 val = DMA_CCMD_GLOBAL_INVL;
815 break;
816 case DMA_CCMD_DOMAIN_INVL:
817 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
818 break;
819 case DMA_CCMD_DEVICE_INVL:
820 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
821 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
822 break;
823 default:
824 BUG();
825 }
826 val |= DMA_CCMD_ICC;
827
828 spin_lock_irqsave(&iommu->register_lock, flag);
829 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
830
831 /* Make sure hardware complete it */
832 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
833 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
834
835 spin_unlock_irqrestore(&iommu->register_lock, flag);
836
837 /* flush context entry will implicitly flush write buffer */
838 return 0;
839 }
840
841 /* return value determine if we need a write buffer flush */
842 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
843 u64 addr, unsigned int size_order, u64 type,
844 int non_present_entry_flush)
845 {
846 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
847 u64 val = 0, val_iva = 0;
848 unsigned long flag;
849
850 /*
851 * In the non-present entry flush case, if hardware doesn't cache
852 * non-present entry we do nothing and if hardware cache non-present
853 * entry, we flush entries of domain 0 (the domain id is used to cache
854 * any non-present entries)
855 */
856 if (non_present_entry_flush) {
857 if (!cap_caching_mode(iommu->cap))
858 return 1;
859 else
860 did = 0;
861 }
862
863 switch (type) {
864 case DMA_TLB_GLOBAL_FLUSH:
865 /* global flush doesn't need set IVA_REG */
866 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
867 break;
868 case DMA_TLB_DSI_FLUSH:
869 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
870 break;
871 case DMA_TLB_PSI_FLUSH:
872 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
873 /* Note: always flush non-leaf currently */
874 val_iva = size_order | addr;
875 break;
876 default:
877 BUG();
878 }
879 /* Note: set drain read/write */
880 #if 0
881 /*
882 * This is probably to be super secure.. Looks like we can
883 * ignore it without any impact.
884 */
885 if (cap_read_drain(iommu->cap))
886 val |= DMA_TLB_READ_DRAIN;
887 #endif
888 if (cap_write_drain(iommu->cap))
889 val |= DMA_TLB_WRITE_DRAIN;
890
891 spin_lock_irqsave(&iommu->register_lock, flag);
892 /* Note: Only uses first TLB reg currently */
893 if (val_iva)
894 dmar_writeq(iommu->reg + tlb_offset, val_iva);
895 dmar_writeq(iommu->reg + tlb_offset + 8, val);
896
897 /* Make sure hardware complete it */
898 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
899 dmar_readq, (!(val & DMA_TLB_IVT)), val);
900
901 spin_unlock_irqrestore(&iommu->register_lock, flag);
902
903 /* check IOTLB invalidation granularity */
904 if (DMA_TLB_IAIG(val) == 0)
905 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
906 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
907 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
908 (unsigned long long)DMA_TLB_IIRG(type),
909 (unsigned long long)DMA_TLB_IAIG(val));
910 /* flush iotlb entry will implicitly flush write buffer */
911 return 0;
912 }
913
914 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
915 u64 addr, unsigned int pages, int non_present_entry_flush)
916 {
917 unsigned int mask;
918
919 BUG_ON(addr & (~VTD_PAGE_MASK));
920 BUG_ON(pages == 0);
921
922 /* Fallback to domain selective flush if no PSI support */
923 if (!cap_pgsel_inv(iommu->cap))
924 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
925 DMA_TLB_DSI_FLUSH,
926 non_present_entry_flush);
927
928 /*
929 * PSI requires page size to be 2 ^ x, and the base address is naturally
930 * aligned to the size
931 */
932 mask = ilog2(__roundup_pow_of_two(pages));
933 /* Fallback to domain selective flush if size is too big */
934 if (mask > cap_max_amask_val(iommu->cap))
935 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
936 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
937
938 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
939 DMA_TLB_PSI_FLUSH,
940 non_present_entry_flush);
941 }
942
943 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
944 {
945 u32 pmen;
946 unsigned long flags;
947
948 spin_lock_irqsave(&iommu->register_lock, flags);
949 pmen = readl(iommu->reg + DMAR_PMEN_REG);
950 pmen &= ~DMA_PMEN_EPM;
951 writel(pmen, iommu->reg + DMAR_PMEN_REG);
952
953 /* wait for the protected region status bit to clear */
954 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
955 readl, !(pmen & DMA_PMEN_PRS), pmen);
956
957 spin_unlock_irqrestore(&iommu->register_lock, flags);
958 }
959
960 static int iommu_enable_translation(struct intel_iommu *iommu)
961 {
962 u32 sts;
963 unsigned long flags;
964
965 spin_lock_irqsave(&iommu->register_lock, flags);
966 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
967
968 /* Make sure hardware complete it */
969 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
970 readl, (sts & DMA_GSTS_TES), sts);
971
972 iommu->gcmd |= DMA_GCMD_TE;
973 spin_unlock_irqrestore(&iommu->register_lock, flags);
974 return 0;
975 }
976
977 static int iommu_disable_translation(struct intel_iommu *iommu)
978 {
979 u32 sts;
980 unsigned long flag;
981
982 spin_lock_irqsave(&iommu->register_lock, flag);
983 iommu->gcmd &= ~DMA_GCMD_TE;
984 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
985
986 /* Make sure hardware complete it */
987 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
988 readl, (!(sts & DMA_GSTS_TES)), sts);
989
990 spin_unlock_irqrestore(&iommu->register_lock, flag);
991 return 0;
992 }
993
994 /* iommu interrupt handling. Most stuff are MSI-like. */
995
996 static const char *fault_reason_strings[] =
997 {
998 "Software",
999 "Present bit in root entry is clear",
1000 "Present bit in context entry is clear",
1001 "Invalid context entry",
1002 "Access beyond MGAW",
1003 "PTE Write access is not set",
1004 "PTE Read access is not set",
1005 "Next page table ptr is invalid",
1006 "Root table address invalid",
1007 "Context table ptr is invalid",
1008 "non-zero reserved fields in RTP",
1009 "non-zero reserved fields in CTP",
1010 "non-zero reserved fields in PTE",
1011 };
1012 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1013
1014 const char *dmar_get_fault_reason(u8 fault_reason)
1015 {
1016 if (fault_reason > MAX_FAULT_REASON_IDX)
1017 return "Unknown";
1018 else
1019 return fault_reason_strings[fault_reason];
1020 }
1021
1022 void dmar_msi_unmask(unsigned int irq)
1023 {
1024 struct intel_iommu *iommu = get_irq_data(irq);
1025 unsigned long flag;
1026
1027 /* unmask it */
1028 spin_lock_irqsave(&iommu->register_lock, flag);
1029 writel(0, iommu->reg + DMAR_FECTL_REG);
1030 /* Read a reg to force flush the post write */
1031 readl(iommu->reg + DMAR_FECTL_REG);
1032 spin_unlock_irqrestore(&iommu->register_lock, flag);
1033 }
1034
1035 void dmar_msi_mask(unsigned int irq)
1036 {
1037 unsigned long flag;
1038 struct intel_iommu *iommu = get_irq_data(irq);
1039
1040 /* mask it */
1041 spin_lock_irqsave(&iommu->register_lock, flag);
1042 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1043 /* Read a reg to force flush the post write */
1044 readl(iommu->reg + DMAR_FECTL_REG);
1045 spin_unlock_irqrestore(&iommu->register_lock, flag);
1046 }
1047
1048 void dmar_msi_write(int irq, struct msi_msg *msg)
1049 {
1050 struct intel_iommu *iommu = get_irq_data(irq);
1051 unsigned long flag;
1052
1053 spin_lock_irqsave(&iommu->register_lock, flag);
1054 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1055 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1056 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1057 spin_unlock_irqrestore(&iommu->register_lock, flag);
1058 }
1059
1060 void dmar_msi_read(int irq, struct msi_msg *msg)
1061 {
1062 struct intel_iommu *iommu = get_irq_data(irq);
1063 unsigned long flag;
1064
1065 spin_lock_irqsave(&iommu->register_lock, flag);
1066 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1067 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1068 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1069 spin_unlock_irqrestore(&iommu->register_lock, flag);
1070 }
1071
1072 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1073 u8 fault_reason, u16 source_id, unsigned long long addr)
1074 {
1075 const char *reason;
1076
1077 reason = dmar_get_fault_reason(fault_reason);
1078
1079 printk(KERN_ERR
1080 "DMAR:[%s] Request device [%02x:%02x.%d] "
1081 "fault addr %llx \n"
1082 "DMAR:[fault reason %02d] %s\n",
1083 (type ? "DMA Read" : "DMA Write"),
1084 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1085 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1086 return 0;
1087 }
1088
1089 #define PRIMARY_FAULT_REG_LEN (16)
1090 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1091 {
1092 struct intel_iommu *iommu = dev_id;
1093 int reg, fault_index;
1094 u32 fault_status;
1095 unsigned long flag;
1096
1097 spin_lock_irqsave(&iommu->register_lock, flag);
1098 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1099
1100 /* TBD: ignore advanced fault log currently */
1101 if (!(fault_status & DMA_FSTS_PPF))
1102 goto clear_overflow;
1103
1104 fault_index = dma_fsts_fault_record_index(fault_status);
1105 reg = cap_fault_reg_offset(iommu->cap);
1106 while (1) {
1107 u8 fault_reason;
1108 u16 source_id;
1109 u64 guest_addr;
1110 int type;
1111 u32 data;
1112
1113 /* highest 32 bits */
1114 data = readl(iommu->reg + reg +
1115 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1116 if (!(data & DMA_FRCD_F))
1117 break;
1118
1119 fault_reason = dma_frcd_fault_reason(data);
1120 type = dma_frcd_type(data);
1121
1122 data = readl(iommu->reg + reg +
1123 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1124 source_id = dma_frcd_source_id(data);
1125
1126 guest_addr = dmar_readq(iommu->reg + reg +
1127 fault_index * PRIMARY_FAULT_REG_LEN);
1128 guest_addr = dma_frcd_page_addr(guest_addr);
1129 /* clear the fault */
1130 writel(DMA_FRCD_F, iommu->reg + reg +
1131 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1132
1133 spin_unlock_irqrestore(&iommu->register_lock, flag);
1134
1135 iommu_page_fault_do_one(iommu, type, fault_reason,
1136 source_id, guest_addr);
1137
1138 fault_index++;
1139 if (fault_index > cap_num_fault_regs(iommu->cap))
1140 fault_index = 0;
1141 spin_lock_irqsave(&iommu->register_lock, flag);
1142 }
1143 clear_overflow:
1144 /* clear primary fault overflow */
1145 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1146 if (fault_status & DMA_FSTS_PFO)
1147 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1148
1149 spin_unlock_irqrestore(&iommu->register_lock, flag);
1150 return IRQ_HANDLED;
1151 }
1152
1153 int dmar_set_interrupt(struct intel_iommu *iommu)
1154 {
1155 int irq, ret;
1156
1157 irq = create_irq();
1158 if (!irq) {
1159 printk(KERN_ERR "IOMMU: no free vectors\n");
1160 return -EINVAL;
1161 }
1162
1163 set_irq_data(irq, iommu);
1164 iommu->irq = irq;
1165
1166 ret = arch_setup_dmar_msi(irq);
1167 if (ret) {
1168 set_irq_data(irq, NULL);
1169 iommu->irq = 0;
1170 destroy_irq(irq);
1171 return 0;
1172 }
1173
1174 /* Force fault register is cleared */
1175 iommu_page_fault(irq, iommu);
1176
1177 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1178 if (ret)
1179 printk(KERN_ERR "IOMMU: can't request irq\n");
1180 return ret;
1181 }
1182
1183 static int iommu_init_domains(struct intel_iommu *iommu)
1184 {
1185 unsigned long ndomains;
1186 unsigned long nlongs;
1187
1188 ndomains = cap_ndoms(iommu->cap);
1189 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1190 nlongs = BITS_TO_LONGS(ndomains);
1191
1192 /* TBD: there might be 64K domains,
1193 * consider other allocation for future chip
1194 */
1195 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1196 if (!iommu->domain_ids) {
1197 printk(KERN_ERR "Allocating domain id array failed\n");
1198 return -ENOMEM;
1199 }
1200 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1201 GFP_KERNEL);
1202 if (!iommu->domains) {
1203 printk(KERN_ERR "Allocating domain array failed\n");
1204 kfree(iommu->domain_ids);
1205 return -ENOMEM;
1206 }
1207
1208 spin_lock_init(&iommu->lock);
1209
1210 /*
1211 * if Caching mode is set, then invalid translations are tagged
1212 * with domainid 0. Hence we need to pre-allocate it.
1213 */
1214 if (cap_caching_mode(iommu->cap))
1215 set_bit(0, iommu->domain_ids);
1216 return 0;
1217 }
1218
1219
1220 static void domain_exit(struct dmar_domain *domain);
1221 static void vm_domain_exit(struct dmar_domain *domain);
1222
1223 void free_dmar_iommu(struct intel_iommu *iommu)
1224 {
1225 struct dmar_domain *domain;
1226 int i;
1227 unsigned long flags;
1228
1229 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1230 for (; i < cap_ndoms(iommu->cap); ) {
1231 domain = iommu->domains[i];
1232 clear_bit(i, iommu->domain_ids);
1233
1234 spin_lock_irqsave(&domain->iommu_lock, flags);
1235 if (--domain->iommu_count == 0) {
1236 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1237 vm_domain_exit(domain);
1238 else
1239 domain_exit(domain);
1240 }
1241 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1242
1243 i = find_next_bit(iommu->domain_ids,
1244 cap_ndoms(iommu->cap), i+1);
1245 }
1246
1247 if (iommu->gcmd & DMA_GCMD_TE)
1248 iommu_disable_translation(iommu);
1249
1250 if (iommu->irq) {
1251 set_irq_data(iommu->irq, NULL);
1252 /* This will mask the irq */
1253 free_irq(iommu->irq, iommu);
1254 destroy_irq(iommu->irq);
1255 }
1256
1257 kfree(iommu->domains);
1258 kfree(iommu->domain_ids);
1259
1260 g_iommus[iommu->seq_id] = NULL;
1261
1262 /* if all iommus are freed, free g_iommus */
1263 for (i = 0; i < g_num_of_iommus; i++) {
1264 if (g_iommus[i])
1265 break;
1266 }
1267
1268 if (i == g_num_of_iommus)
1269 kfree(g_iommus);
1270
1271 /* free context mapping */
1272 free_context_table(iommu);
1273 }
1274
1275 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1276 {
1277 unsigned long num;
1278 unsigned long ndomains;
1279 struct dmar_domain *domain;
1280 unsigned long flags;
1281
1282 domain = alloc_domain_mem();
1283 if (!domain)
1284 return NULL;
1285
1286 ndomains = cap_ndoms(iommu->cap);
1287
1288 spin_lock_irqsave(&iommu->lock, flags);
1289 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1290 if (num >= ndomains) {
1291 spin_unlock_irqrestore(&iommu->lock, flags);
1292 free_domain_mem(domain);
1293 printk(KERN_ERR "IOMMU: no free domain ids\n");
1294 return NULL;
1295 }
1296
1297 set_bit(num, iommu->domain_ids);
1298 domain->id = num;
1299 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1300 set_bit(iommu->seq_id, &domain->iommu_bmp);
1301 domain->flags = 0;
1302 iommu->domains[num] = domain;
1303 spin_unlock_irqrestore(&iommu->lock, flags);
1304
1305 return domain;
1306 }
1307
1308 static void iommu_free_domain(struct dmar_domain *domain)
1309 {
1310 unsigned long flags;
1311 struct intel_iommu *iommu;
1312
1313 iommu = domain_get_iommu(domain);
1314
1315 spin_lock_irqsave(&iommu->lock, flags);
1316 clear_bit(domain->id, iommu->domain_ids);
1317 spin_unlock_irqrestore(&iommu->lock, flags);
1318 }
1319
1320 static struct iova_domain reserved_iova_list;
1321 static struct lock_class_key reserved_alloc_key;
1322 static struct lock_class_key reserved_rbtree_key;
1323
1324 static void dmar_init_reserved_ranges(void)
1325 {
1326 struct pci_dev *pdev = NULL;
1327 struct iova *iova;
1328 int i;
1329 u64 addr, size;
1330
1331 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1332
1333 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1334 &reserved_alloc_key);
1335 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1336 &reserved_rbtree_key);
1337
1338 /* IOAPIC ranges shouldn't be accessed by DMA */
1339 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1340 IOVA_PFN(IOAPIC_RANGE_END));
1341 if (!iova)
1342 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1343
1344 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1345 for_each_pci_dev(pdev) {
1346 struct resource *r;
1347
1348 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1349 r = &pdev->resource[i];
1350 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1351 continue;
1352 addr = r->start;
1353 addr &= PAGE_MASK;
1354 size = r->end - addr;
1355 size = PAGE_ALIGN(size);
1356 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1357 IOVA_PFN(size + addr) - 1);
1358 if (!iova)
1359 printk(KERN_ERR "Reserve iova failed\n");
1360 }
1361 }
1362
1363 }
1364
1365 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1366 {
1367 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1368 }
1369
1370 static inline int guestwidth_to_adjustwidth(int gaw)
1371 {
1372 int agaw;
1373 int r = (gaw - 12) % 9;
1374
1375 if (r == 0)
1376 agaw = gaw;
1377 else
1378 agaw = gaw + 9 - r;
1379 if (agaw > 64)
1380 agaw = 64;
1381 return agaw;
1382 }
1383
1384 static int domain_init(struct dmar_domain *domain, int guest_width)
1385 {
1386 struct intel_iommu *iommu;
1387 int adjust_width, agaw;
1388 unsigned long sagaw;
1389
1390 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1391 spin_lock_init(&domain->mapping_lock);
1392 spin_lock_init(&domain->iommu_lock);
1393
1394 domain_reserve_special_ranges(domain);
1395
1396 /* calculate AGAW */
1397 iommu = domain_get_iommu(domain);
1398 if (guest_width > cap_mgaw(iommu->cap))
1399 guest_width = cap_mgaw(iommu->cap);
1400 domain->gaw = guest_width;
1401 adjust_width = guestwidth_to_adjustwidth(guest_width);
1402 agaw = width_to_agaw(adjust_width);
1403 sagaw = cap_sagaw(iommu->cap);
1404 if (!test_bit(agaw, &sagaw)) {
1405 /* hardware doesn't support it, choose a bigger one */
1406 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1407 agaw = find_next_bit(&sagaw, 5, agaw);
1408 if (agaw >= 5)
1409 return -ENODEV;
1410 }
1411 domain->agaw = agaw;
1412 INIT_LIST_HEAD(&domain->devices);
1413
1414 if (ecap_coherent(iommu->ecap))
1415 domain->iommu_coherency = 1;
1416 else
1417 domain->iommu_coherency = 0;
1418
1419 domain->iommu_count = 1;
1420
1421 /* always allocate the top pgd */
1422 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1423 if (!domain->pgd)
1424 return -ENOMEM;
1425 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1426 return 0;
1427 }
1428
1429 static void domain_exit(struct dmar_domain *domain)
1430 {
1431 u64 end;
1432
1433 /* Domain 0 is reserved, so dont process it */
1434 if (!domain)
1435 return;
1436
1437 domain_remove_dev_info(domain);
1438 /* destroy iovas */
1439 put_iova_domain(&domain->iovad);
1440 end = DOMAIN_MAX_ADDR(domain->gaw);
1441 end = end & (~PAGE_MASK);
1442
1443 /* clear ptes */
1444 dma_pte_clear_range(domain, 0, end);
1445
1446 /* free page tables */
1447 dma_pte_free_pagetable(domain, 0, end);
1448
1449 iommu_free_domain(domain);
1450 free_domain_mem(domain);
1451 }
1452
1453 static int domain_context_mapping_one(struct dmar_domain *domain,
1454 u8 bus, u8 devfn)
1455 {
1456 struct context_entry *context;
1457 unsigned long flags;
1458 struct intel_iommu *iommu;
1459 struct dma_pte *pgd;
1460 unsigned long num;
1461 unsigned long ndomains;
1462 int id;
1463 int agaw;
1464
1465 pr_debug("Set context mapping for %02x:%02x.%d\n",
1466 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1467 BUG_ON(!domain->pgd);
1468
1469 iommu = device_to_iommu(bus, devfn);
1470 if (!iommu)
1471 return -ENODEV;
1472
1473 context = device_to_context_entry(iommu, bus, devfn);
1474 if (!context)
1475 return -ENOMEM;
1476 spin_lock_irqsave(&iommu->lock, flags);
1477 if (context_present(context)) {
1478 spin_unlock_irqrestore(&iommu->lock, flags);
1479 return 0;
1480 }
1481
1482 id = domain->id;
1483 pgd = domain->pgd;
1484
1485 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1486 int found = 0;
1487
1488 /* find an available domain id for this device in iommu */
1489 ndomains = cap_ndoms(iommu->cap);
1490 num = find_first_bit(iommu->domain_ids, ndomains);
1491 for (; num < ndomains; ) {
1492 if (iommu->domains[num] == domain) {
1493 id = num;
1494 found = 1;
1495 break;
1496 }
1497 num = find_next_bit(iommu->domain_ids,
1498 cap_ndoms(iommu->cap), num+1);
1499 }
1500
1501 if (found == 0) {
1502 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1503 if (num >= ndomains) {
1504 spin_unlock_irqrestore(&iommu->lock, flags);
1505 printk(KERN_ERR "IOMMU: no free domain ids\n");
1506 return -EFAULT;
1507 }
1508
1509 set_bit(num, iommu->domain_ids);
1510 iommu->domains[num] = domain;
1511 id = num;
1512 }
1513
1514 /* Skip top levels of page tables for
1515 * iommu which has less agaw than default.
1516 */
1517 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1518 pgd = phys_to_virt(dma_pte_addr(pgd));
1519 if (!dma_pte_present(pgd)) {
1520 spin_unlock_irqrestore(&iommu->lock, flags);
1521 return -ENOMEM;
1522 }
1523 }
1524 }
1525
1526 context_set_domain_id(context, id);
1527 context_set_address_width(context, iommu->agaw);
1528 context_set_address_root(context, virt_to_phys(pgd));
1529 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1530 context_set_fault_enable(context);
1531 context_set_present(context);
1532 domain_flush_cache(domain, context, sizeof(*context));
1533
1534 /* it's a non-present to present mapping */
1535 if (iommu->flush.flush_context(iommu, domain->id,
1536 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1537 DMA_CCMD_DEVICE_INVL, 1))
1538 iommu_flush_write_buffer(iommu);
1539 else
1540 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1541
1542 spin_unlock_irqrestore(&iommu->lock, flags);
1543
1544 spin_lock_irqsave(&domain->iommu_lock, flags);
1545 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1546 domain->iommu_count++;
1547 domain_update_iommu_coherency(domain);
1548 }
1549 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1550 return 0;
1551 }
1552
1553 static int
1554 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1555 {
1556 int ret;
1557 struct pci_dev *tmp, *parent;
1558
1559 ret = domain_context_mapping_one(domain, pdev->bus->number,
1560 pdev->devfn);
1561 if (ret)
1562 return ret;
1563
1564 /* dependent device mapping */
1565 tmp = pci_find_upstream_pcie_bridge(pdev);
1566 if (!tmp)
1567 return 0;
1568 /* Secondary interface's bus number and devfn 0 */
1569 parent = pdev->bus->self;
1570 while (parent != tmp) {
1571 ret = domain_context_mapping_one(domain, parent->bus->number,
1572 parent->devfn);
1573 if (ret)
1574 return ret;
1575 parent = parent->bus->self;
1576 }
1577 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1578 return domain_context_mapping_one(domain,
1579 tmp->subordinate->number, 0);
1580 else /* this is a legacy PCI bridge */
1581 return domain_context_mapping_one(domain,
1582 tmp->bus->number, tmp->devfn);
1583 }
1584
1585 static int domain_context_mapped(struct pci_dev *pdev)
1586 {
1587 int ret;
1588 struct pci_dev *tmp, *parent;
1589 struct intel_iommu *iommu;
1590
1591 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1592 if (!iommu)
1593 return -ENODEV;
1594
1595 ret = device_context_mapped(iommu,
1596 pdev->bus->number, pdev->devfn);
1597 if (!ret)
1598 return ret;
1599 /* dependent device mapping */
1600 tmp = pci_find_upstream_pcie_bridge(pdev);
1601 if (!tmp)
1602 return ret;
1603 /* Secondary interface's bus number and devfn 0 */
1604 parent = pdev->bus->self;
1605 while (parent != tmp) {
1606 ret = device_context_mapped(iommu, parent->bus->number,
1607 parent->devfn);
1608 if (!ret)
1609 return ret;
1610 parent = parent->bus->self;
1611 }
1612 if (tmp->is_pcie)
1613 return device_context_mapped(iommu,
1614 tmp->subordinate->number, 0);
1615 else
1616 return device_context_mapped(iommu,
1617 tmp->bus->number, tmp->devfn);
1618 }
1619
1620 static int
1621 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1622 u64 hpa, size_t size, int prot)
1623 {
1624 u64 start_pfn, end_pfn;
1625 struct dma_pte *pte;
1626 int index;
1627 int addr_width = agaw_to_width(domain->agaw);
1628
1629 hpa &= (((u64)1) << addr_width) - 1;
1630
1631 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1632 return -EINVAL;
1633 iova &= PAGE_MASK;
1634 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1635 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1636 index = 0;
1637 while (start_pfn < end_pfn) {
1638 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1639 if (!pte)
1640 return -ENOMEM;
1641 /* We don't need lock here, nobody else
1642 * touches the iova range
1643 */
1644 BUG_ON(dma_pte_addr(pte));
1645 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1646 dma_set_pte_prot(pte, prot);
1647 domain_flush_cache(domain, pte, sizeof(*pte));
1648 start_pfn++;
1649 index++;
1650 }
1651 return 0;
1652 }
1653
1654 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1655 {
1656 if (!iommu)
1657 return;
1658
1659 clear_context_table(iommu, bus, devfn);
1660 iommu->flush.flush_context(iommu, 0, 0, 0,
1661 DMA_CCMD_GLOBAL_INVL, 0);
1662 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1663 DMA_TLB_GLOBAL_FLUSH, 0);
1664 }
1665
1666 static void domain_remove_dev_info(struct dmar_domain *domain)
1667 {
1668 struct device_domain_info *info;
1669 unsigned long flags;
1670 struct intel_iommu *iommu;
1671
1672 spin_lock_irqsave(&device_domain_lock, flags);
1673 while (!list_empty(&domain->devices)) {
1674 info = list_entry(domain->devices.next,
1675 struct device_domain_info, link);
1676 list_del(&info->link);
1677 list_del(&info->global);
1678 if (info->dev)
1679 info->dev->dev.archdata.iommu = NULL;
1680 spin_unlock_irqrestore(&device_domain_lock, flags);
1681
1682 iommu = device_to_iommu(info->bus, info->devfn);
1683 iommu_detach_dev(iommu, info->bus, info->devfn);
1684 free_devinfo_mem(info);
1685
1686 spin_lock_irqsave(&device_domain_lock, flags);
1687 }
1688 spin_unlock_irqrestore(&device_domain_lock, flags);
1689 }
1690
1691 /*
1692 * find_domain
1693 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1694 */
1695 static struct dmar_domain *
1696 find_domain(struct pci_dev *pdev)
1697 {
1698 struct device_domain_info *info;
1699
1700 /* No lock here, assumes no domain exit in normal case */
1701 info = pdev->dev.archdata.iommu;
1702 if (info)
1703 return info->domain;
1704 return NULL;
1705 }
1706
1707 /* domain is initialized */
1708 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1709 {
1710 struct dmar_domain *domain, *found = NULL;
1711 struct intel_iommu *iommu;
1712 struct dmar_drhd_unit *drhd;
1713 struct device_domain_info *info, *tmp;
1714 struct pci_dev *dev_tmp;
1715 unsigned long flags;
1716 int bus = 0, devfn = 0;
1717
1718 domain = find_domain(pdev);
1719 if (domain)
1720 return domain;
1721
1722 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1723 if (dev_tmp) {
1724 if (dev_tmp->is_pcie) {
1725 bus = dev_tmp->subordinate->number;
1726 devfn = 0;
1727 } else {
1728 bus = dev_tmp->bus->number;
1729 devfn = dev_tmp->devfn;
1730 }
1731 spin_lock_irqsave(&device_domain_lock, flags);
1732 list_for_each_entry(info, &device_domain_list, global) {
1733 if (info->bus == bus && info->devfn == devfn) {
1734 found = info->domain;
1735 break;
1736 }
1737 }
1738 spin_unlock_irqrestore(&device_domain_lock, flags);
1739 /* pcie-pci bridge already has a domain, uses it */
1740 if (found) {
1741 domain = found;
1742 goto found_domain;
1743 }
1744 }
1745
1746 /* Allocate new domain for the device */
1747 drhd = dmar_find_matched_drhd_unit(pdev);
1748 if (!drhd) {
1749 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1750 pci_name(pdev));
1751 return NULL;
1752 }
1753 iommu = drhd->iommu;
1754
1755 domain = iommu_alloc_domain(iommu);
1756 if (!domain)
1757 goto error;
1758
1759 if (domain_init(domain, gaw)) {
1760 domain_exit(domain);
1761 goto error;
1762 }
1763
1764 /* register pcie-to-pci device */
1765 if (dev_tmp) {
1766 info = alloc_devinfo_mem();
1767 if (!info) {
1768 domain_exit(domain);
1769 goto error;
1770 }
1771 info->bus = bus;
1772 info->devfn = devfn;
1773 info->dev = NULL;
1774 info->domain = domain;
1775 /* This domain is shared by devices under p2p bridge */
1776 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1777
1778 /* pcie-to-pci bridge already has a domain, uses it */
1779 found = NULL;
1780 spin_lock_irqsave(&device_domain_lock, flags);
1781 list_for_each_entry(tmp, &device_domain_list, global) {
1782 if (tmp->bus == bus && tmp->devfn == devfn) {
1783 found = tmp->domain;
1784 break;
1785 }
1786 }
1787 if (found) {
1788 free_devinfo_mem(info);
1789 domain_exit(domain);
1790 domain = found;
1791 } else {
1792 list_add(&info->link, &domain->devices);
1793 list_add(&info->global, &device_domain_list);
1794 }
1795 spin_unlock_irqrestore(&device_domain_lock, flags);
1796 }
1797
1798 found_domain:
1799 info = alloc_devinfo_mem();
1800 if (!info)
1801 goto error;
1802 info->bus = pdev->bus->number;
1803 info->devfn = pdev->devfn;
1804 info->dev = pdev;
1805 info->domain = domain;
1806 spin_lock_irqsave(&device_domain_lock, flags);
1807 /* somebody is fast */
1808 found = find_domain(pdev);
1809 if (found != NULL) {
1810 spin_unlock_irqrestore(&device_domain_lock, flags);
1811 if (found != domain) {
1812 domain_exit(domain);
1813 domain = found;
1814 }
1815 free_devinfo_mem(info);
1816 return domain;
1817 }
1818 list_add(&info->link, &domain->devices);
1819 list_add(&info->global, &device_domain_list);
1820 pdev->dev.archdata.iommu = info;
1821 spin_unlock_irqrestore(&device_domain_lock, flags);
1822 return domain;
1823 error:
1824 /* recheck it here, maybe others set it */
1825 return find_domain(pdev);
1826 }
1827
1828 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1829 unsigned long long start,
1830 unsigned long long end)
1831 {
1832 struct dmar_domain *domain;
1833 unsigned long size;
1834 unsigned long long base;
1835 int ret;
1836
1837 printk(KERN_INFO
1838 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1839 pci_name(pdev), start, end);
1840 /* page table init */
1841 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1842 if (!domain)
1843 return -ENOMEM;
1844
1845 /* The address might not be aligned */
1846 base = start & PAGE_MASK;
1847 size = end - base;
1848 size = PAGE_ALIGN(size);
1849 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1850 IOVA_PFN(base + size) - 1)) {
1851 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1852 ret = -ENOMEM;
1853 goto error;
1854 }
1855
1856 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1857 size, base, pci_name(pdev));
1858 /*
1859 * RMRR range might have overlap with physical memory range,
1860 * clear it first
1861 */
1862 dma_pte_clear_range(domain, base, base + size);
1863
1864 ret = domain_page_mapping(domain, base, base, size,
1865 DMA_PTE_READ|DMA_PTE_WRITE);
1866 if (ret)
1867 goto error;
1868
1869 /* context entry init */
1870 ret = domain_context_mapping(domain, pdev);
1871 if (!ret)
1872 return 0;
1873 error:
1874 domain_exit(domain);
1875 return ret;
1876
1877 }
1878
1879 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1880 struct pci_dev *pdev)
1881 {
1882 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1883 return 0;
1884 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1885 rmrr->end_address + 1);
1886 }
1887
1888 #ifdef CONFIG_DMAR_GFX_WA
1889 struct iommu_prepare_data {
1890 struct pci_dev *pdev;
1891 int ret;
1892 };
1893
1894 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1895 unsigned long end_pfn, void *datax)
1896 {
1897 struct iommu_prepare_data *data;
1898
1899 data = (struct iommu_prepare_data *)datax;
1900
1901 data->ret = iommu_prepare_identity_map(data->pdev,
1902 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1903 return data->ret;
1904
1905 }
1906
1907 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1908 {
1909 int nid;
1910 struct iommu_prepare_data data;
1911
1912 data.pdev = pdev;
1913 data.ret = 0;
1914
1915 for_each_online_node(nid) {
1916 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1917 if (data.ret)
1918 return data.ret;
1919 }
1920 return data.ret;
1921 }
1922
1923 static void __init iommu_prepare_gfx_mapping(void)
1924 {
1925 struct pci_dev *pdev = NULL;
1926 int ret;
1927
1928 for_each_pci_dev(pdev) {
1929 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1930 !IS_GFX_DEVICE(pdev))
1931 continue;
1932 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1933 pci_name(pdev));
1934 ret = iommu_prepare_with_active_regions(pdev);
1935 if (ret)
1936 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1937 }
1938 }
1939 #else /* !CONFIG_DMAR_GFX_WA */
1940 static inline void iommu_prepare_gfx_mapping(void)
1941 {
1942 return;
1943 }
1944 #endif
1945
1946 #ifdef CONFIG_DMAR_FLOPPY_WA
1947 static inline void iommu_prepare_isa(void)
1948 {
1949 struct pci_dev *pdev;
1950 int ret;
1951
1952 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1953 if (!pdev)
1954 return;
1955
1956 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1957 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1958
1959 if (ret)
1960 printk("IOMMU: Failed to create 0-64M identity map, "
1961 "floppy might not work\n");
1962
1963 }
1964 #else
1965 static inline void iommu_prepare_isa(void)
1966 {
1967 return;
1968 }
1969 #endif /* !CONFIG_DMAR_FLPY_WA */
1970
1971 static int __init init_dmars(void)
1972 {
1973 struct dmar_drhd_unit *drhd;
1974 struct dmar_rmrr_unit *rmrr;
1975 struct pci_dev *pdev;
1976 struct intel_iommu *iommu;
1977 int i, ret, unit = 0;
1978
1979 /*
1980 * for each drhd
1981 * allocate root
1982 * initialize and program root entry to not present
1983 * endfor
1984 */
1985 for_each_drhd_unit(drhd) {
1986 g_num_of_iommus++;
1987 /*
1988 * lock not needed as this is only incremented in the single
1989 * threaded kernel __init code path all other access are read
1990 * only
1991 */
1992 }
1993
1994 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1995 GFP_KERNEL);
1996 if (!g_iommus) {
1997 printk(KERN_ERR "Allocating global iommu array failed\n");
1998 ret = -ENOMEM;
1999 goto error;
2000 }
2001
2002 deferred_flush = kzalloc(g_num_of_iommus *
2003 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2004 if (!deferred_flush) {
2005 kfree(g_iommus);
2006 ret = -ENOMEM;
2007 goto error;
2008 }
2009
2010 for_each_drhd_unit(drhd) {
2011 if (drhd->ignored)
2012 continue;
2013
2014 iommu = drhd->iommu;
2015 g_iommus[iommu->seq_id] = iommu;
2016
2017 ret = iommu_init_domains(iommu);
2018 if (ret)
2019 goto error;
2020
2021 /*
2022 * TBD:
2023 * we could share the same root & context tables
2024 * amoung all IOMMU's. Need to Split it later.
2025 */
2026 ret = iommu_alloc_root_entry(iommu);
2027 if (ret) {
2028 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2029 goto error;
2030 }
2031 }
2032
2033 for_each_drhd_unit(drhd) {
2034 if (drhd->ignored)
2035 continue;
2036
2037 iommu = drhd->iommu;
2038 if (dmar_enable_qi(iommu)) {
2039 /*
2040 * Queued Invalidate not enabled, use Register Based
2041 * Invalidate
2042 */
2043 iommu->flush.flush_context = __iommu_flush_context;
2044 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2045 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
2046 "invalidation\n",
2047 (unsigned long long)drhd->reg_base_addr);
2048 } else {
2049 iommu->flush.flush_context = qi_flush_context;
2050 iommu->flush.flush_iotlb = qi_flush_iotlb;
2051 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
2052 "invalidation\n",
2053 (unsigned long long)drhd->reg_base_addr);
2054 }
2055 }
2056
2057 /*
2058 * For each rmrr
2059 * for each dev attached to rmrr
2060 * do
2061 * locate drhd for dev, alloc domain for dev
2062 * allocate free domain
2063 * allocate page table entries for rmrr
2064 * if context not allocated for bus
2065 * allocate and init context
2066 * set present in root table for this bus
2067 * init context with domain, translation etc
2068 * endfor
2069 * endfor
2070 */
2071 for_each_rmrr_units(rmrr) {
2072 for (i = 0; i < rmrr->devices_cnt; i++) {
2073 pdev = rmrr->devices[i];
2074 /* some BIOS lists non-exist devices in DMAR table */
2075 if (!pdev)
2076 continue;
2077 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2078 if (ret)
2079 printk(KERN_ERR
2080 "IOMMU: mapping reserved region failed\n");
2081 }
2082 }
2083
2084 iommu_prepare_gfx_mapping();
2085
2086 iommu_prepare_isa();
2087
2088 /*
2089 * for each drhd
2090 * enable fault log
2091 * global invalidate context cache
2092 * global invalidate iotlb
2093 * enable translation
2094 */
2095 for_each_drhd_unit(drhd) {
2096 if (drhd->ignored)
2097 continue;
2098 iommu = drhd->iommu;
2099 sprintf (iommu->name, "dmar%d", unit++);
2100
2101 iommu_flush_write_buffer(iommu);
2102
2103 ret = dmar_set_interrupt(iommu);
2104 if (ret)
2105 goto error;
2106
2107 iommu_set_root_entry(iommu);
2108
2109 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2110 0);
2111 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2112 0);
2113 iommu_disable_protect_mem_regions(iommu);
2114
2115 ret = iommu_enable_translation(iommu);
2116 if (ret)
2117 goto error;
2118 }
2119
2120 return 0;
2121 error:
2122 for_each_drhd_unit(drhd) {
2123 if (drhd->ignored)
2124 continue;
2125 iommu = drhd->iommu;
2126 free_iommu(iommu);
2127 }
2128 kfree(g_iommus);
2129 return ret;
2130 }
2131
2132 static inline u64 aligned_size(u64 host_addr, size_t size)
2133 {
2134 u64 addr;
2135 addr = (host_addr & (~PAGE_MASK)) + size;
2136 return PAGE_ALIGN(addr);
2137 }
2138
2139 struct iova *
2140 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2141 {
2142 struct iova *piova;
2143
2144 /* Make sure it's in range */
2145 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2146 if (!size || (IOVA_START_ADDR + size > end))
2147 return NULL;
2148
2149 piova = alloc_iova(&domain->iovad,
2150 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2151 return piova;
2152 }
2153
2154 static struct iova *
2155 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2156 size_t size, u64 dma_mask)
2157 {
2158 struct pci_dev *pdev = to_pci_dev(dev);
2159 struct iova *iova = NULL;
2160
2161 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2162 iova = iommu_alloc_iova(domain, size, dma_mask);
2163 else {
2164 /*
2165 * First try to allocate an io virtual address in
2166 * DMA_32BIT_MASK and if that fails then try allocating
2167 * from higher range
2168 */
2169 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2170 if (!iova)
2171 iova = iommu_alloc_iova(domain, size, dma_mask);
2172 }
2173
2174 if (!iova) {
2175 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2176 return NULL;
2177 }
2178
2179 return iova;
2180 }
2181
2182 static struct dmar_domain *
2183 get_valid_domain_for_dev(struct pci_dev *pdev)
2184 {
2185 struct dmar_domain *domain;
2186 int ret;
2187
2188 domain = get_domain_for_dev(pdev,
2189 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2190 if (!domain) {
2191 printk(KERN_ERR
2192 "Allocating domain for %s failed", pci_name(pdev));
2193 return NULL;
2194 }
2195
2196 /* make sure context mapping is ok */
2197 if (unlikely(!domain_context_mapped(pdev))) {
2198 ret = domain_context_mapping(domain, pdev);
2199 if (ret) {
2200 printk(KERN_ERR
2201 "Domain context map for %s failed",
2202 pci_name(pdev));
2203 return NULL;
2204 }
2205 }
2206
2207 return domain;
2208 }
2209
2210 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2211 size_t size, int dir, u64 dma_mask)
2212 {
2213 struct pci_dev *pdev = to_pci_dev(hwdev);
2214 struct dmar_domain *domain;
2215 phys_addr_t start_paddr;
2216 struct iova *iova;
2217 int prot = 0;
2218 int ret;
2219 struct intel_iommu *iommu;
2220
2221 BUG_ON(dir == DMA_NONE);
2222 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2223 return paddr;
2224
2225 domain = get_valid_domain_for_dev(pdev);
2226 if (!domain)
2227 return 0;
2228
2229 iommu = domain_get_iommu(domain);
2230 size = aligned_size((u64)paddr, size);
2231
2232 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2233 if (!iova)
2234 goto error;
2235
2236 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2237
2238 /*
2239 * Check if DMAR supports zero-length reads on write only
2240 * mappings..
2241 */
2242 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2243 !cap_zlr(iommu->cap))
2244 prot |= DMA_PTE_READ;
2245 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2246 prot |= DMA_PTE_WRITE;
2247 /*
2248 * paddr - (paddr + size) might be partial page, we should map the whole
2249 * page. Note: if two part of one page are separately mapped, we
2250 * might have two guest_addr mapping to the same host paddr, but this
2251 * is not a big problem
2252 */
2253 ret = domain_page_mapping(domain, start_paddr,
2254 ((u64)paddr) & PAGE_MASK, size, prot);
2255 if (ret)
2256 goto error;
2257
2258 /* it's a non-present to present mapping */
2259 ret = iommu_flush_iotlb_psi(iommu, domain->id,
2260 start_paddr, size >> VTD_PAGE_SHIFT, 1);
2261 if (ret)
2262 iommu_flush_write_buffer(iommu);
2263
2264 return start_paddr + ((u64)paddr & (~PAGE_MASK));
2265
2266 error:
2267 if (iova)
2268 __free_iova(&domain->iovad, iova);
2269 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2270 pci_name(pdev), size, (unsigned long long)paddr, dir);
2271 return 0;
2272 }
2273
2274 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2275 size_t size, int dir)
2276 {
2277 return __intel_map_single(hwdev, paddr, size, dir,
2278 to_pci_dev(hwdev)->dma_mask);
2279 }
2280
2281 static void flush_unmaps(void)
2282 {
2283 int i, j;
2284
2285 timer_on = 0;
2286
2287 /* just flush them all */
2288 for (i = 0; i < g_num_of_iommus; i++) {
2289 struct intel_iommu *iommu = g_iommus[i];
2290 if (!iommu)
2291 continue;
2292
2293 if (deferred_flush[i].next) {
2294 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2295 DMA_TLB_GLOBAL_FLUSH, 0);
2296 for (j = 0; j < deferred_flush[i].next; j++) {
2297 __free_iova(&deferred_flush[i].domain[j]->iovad,
2298 deferred_flush[i].iova[j]);
2299 }
2300 deferred_flush[i].next = 0;
2301 }
2302 }
2303
2304 list_size = 0;
2305 }
2306
2307 static void flush_unmaps_timeout(unsigned long data)
2308 {
2309 unsigned long flags;
2310
2311 spin_lock_irqsave(&async_umap_flush_lock, flags);
2312 flush_unmaps();
2313 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2314 }
2315
2316 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2317 {
2318 unsigned long flags;
2319 int next, iommu_id;
2320 struct intel_iommu *iommu;
2321
2322 spin_lock_irqsave(&async_umap_flush_lock, flags);
2323 if (list_size == HIGH_WATER_MARK)
2324 flush_unmaps();
2325
2326 iommu = domain_get_iommu(dom);
2327 iommu_id = iommu->seq_id;
2328
2329 next = deferred_flush[iommu_id].next;
2330 deferred_flush[iommu_id].domain[next] = dom;
2331 deferred_flush[iommu_id].iova[next] = iova;
2332 deferred_flush[iommu_id].next++;
2333
2334 if (!timer_on) {
2335 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2336 timer_on = 1;
2337 }
2338 list_size++;
2339 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2340 }
2341
2342 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2343 int dir)
2344 {
2345 struct pci_dev *pdev = to_pci_dev(dev);
2346 struct dmar_domain *domain;
2347 unsigned long start_addr;
2348 struct iova *iova;
2349 struct intel_iommu *iommu;
2350
2351 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2352 return;
2353 domain = find_domain(pdev);
2354 BUG_ON(!domain);
2355
2356 iommu = domain_get_iommu(domain);
2357
2358 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2359 if (!iova)
2360 return;
2361
2362 start_addr = iova->pfn_lo << PAGE_SHIFT;
2363 size = aligned_size((u64)dev_addr, size);
2364
2365 pr_debug("Device %s unmapping: %lx@%llx\n",
2366 pci_name(pdev), size, (unsigned long long)start_addr);
2367
2368 /* clear the whole page */
2369 dma_pte_clear_range(domain, start_addr, start_addr + size);
2370 /* free page tables */
2371 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2372 if (intel_iommu_strict) {
2373 if (iommu_flush_iotlb_psi(iommu,
2374 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2375 iommu_flush_write_buffer(iommu);
2376 /* free iova */
2377 __free_iova(&domain->iovad, iova);
2378 } else {
2379 add_unmap(domain, iova);
2380 /*
2381 * queue up the release of the unmap to save the 1/6th of the
2382 * cpu used up by the iotlb flush operation...
2383 */
2384 }
2385 }
2386
2387 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2388 dma_addr_t *dma_handle, gfp_t flags)
2389 {
2390 void *vaddr;
2391 int order;
2392
2393 size = PAGE_ALIGN(size);
2394 order = get_order(size);
2395 flags &= ~(GFP_DMA | GFP_DMA32);
2396
2397 vaddr = (void *)__get_free_pages(flags, order);
2398 if (!vaddr)
2399 return NULL;
2400 memset(vaddr, 0, size);
2401
2402 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2403 DMA_BIDIRECTIONAL,
2404 hwdev->coherent_dma_mask);
2405 if (*dma_handle)
2406 return vaddr;
2407 free_pages((unsigned long)vaddr, order);
2408 return NULL;
2409 }
2410
2411 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2412 dma_addr_t dma_handle)
2413 {
2414 int order;
2415
2416 size = PAGE_ALIGN(size);
2417 order = get_order(size);
2418
2419 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2420 free_pages((unsigned long)vaddr, order);
2421 }
2422
2423 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2424
2425 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2426 int nelems, int dir)
2427 {
2428 int i;
2429 struct pci_dev *pdev = to_pci_dev(hwdev);
2430 struct dmar_domain *domain;
2431 unsigned long start_addr;
2432 struct iova *iova;
2433 size_t size = 0;
2434 void *addr;
2435 struct scatterlist *sg;
2436 struct intel_iommu *iommu;
2437
2438 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2439 return;
2440
2441 domain = find_domain(pdev);
2442 BUG_ON(!domain);
2443
2444 iommu = domain_get_iommu(domain);
2445
2446 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2447 if (!iova)
2448 return;
2449 for_each_sg(sglist, sg, nelems, i) {
2450 addr = SG_ENT_VIRT_ADDRESS(sg);
2451 size += aligned_size((u64)addr, sg->length);
2452 }
2453
2454 start_addr = iova->pfn_lo << PAGE_SHIFT;
2455
2456 /* clear the whole page */
2457 dma_pte_clear_range(domain, start_addr, start_addr + size);
2458 /* free page tables */
2459 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2460
2461 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2462 size >> VTD_PAGE_SHIFT, 0))
2463 iommu_flush_write_buffer(iommu);
2464
2465 /* free iova */
2466 __free_iova(&domain->iovad, iova);
2467 }
2468
2469 static int intel_nontranslate_map_sg(struct device *hddev,
2470 struct scatterlist *sglist, int nelems, int dir)
2471 {
2472 int i;
2473 struct scatterlist *sg;
2474
2475 for_each_sg(sglist, sg, nelems, i) {
2476 BUG_ON(!sg_page(sg));
2477 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2478 sg->dma_length = sg->length;
2479 }
2480 return nelems;
2481 }
2482
2483 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2484 int dir)
2485 {
2486 void *addr;
2487 int i;
2488 struct pci_dev *pdev = to_pci_dev(hwdev);
2489 struct dmar_domain *domain;
2490 size_t size = 0;
2491 int prot = 0;
2492 size_t offset = 0;
2493 struct iova *iova = NULL;
2494 int ret;
2495 struct scatterlist *sg;
2496 unsigned long start_addr;
2497 struct intel_iommu *iommu;
2498
2499 BUG_ON(dir == DMA_NONE);
2500 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2501 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2502
2503 domain = get_valid_domain_for_dev(pdev);
2504 if (!domain)
2505 return 0;
2506
2507 iommu = domain_get_iommu(domain);
2508
2509 for_each_sg(sglist, sg, nelems, i) {
2510 addr = SG_ENT_VIRT_ADDRESS(sg);
2511 addr = (void *)virt_to_phys(addr);
2512 size += aligned_size((u64)addr, sg->length);
2513 }
2514
2515 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2516 if (!iova) {
2517 sglist->dma_length = 0;
2518 return 0;
2519 }
2520
2521 /*
2522 * Check if DMAR supports zero-length reads on write only
2523 * mappings..
2524 */
2525 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2526 !cap_zlr(iommu->cap))
2527 prot |= DMA_PTE_READ;
2528 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2529 prot |= DMA_PTE_WRITE;
2530
2531 start_addr = iova->pfn_lo << PAGE_SHIFT;
2532 offset = 0;
2533 for_each_sg(sglist, sg, nelems, i) {
2534 addr = SG_ENT_VIRT_ADDRESS(sg);
2535 addr = (void *)virt_to_phys(addr);
2536 size = aligned_size((u64)addr, sg->length);
2537 ret = domain_page_mapping(domain, start_addr + offset,
2538 ((u64)addr) & PAGE_MASK,
2539 size, prot);
2540 if (ret) {
2541 /* clear the page */
2542 dma_pte_clear_range(domain, start_addr,
2543 start_addr + offset);
2544 /* free page tables */
2545 dma_pte_free_pagetable(domain, start_addr,
2546 start_addr + offset);
2547 /* free iova */
2548 __free_iova(&domain->iovad, iova);
2549 return 0;
2550 }
2551 sg->dma_address = start_addr + offset +
2552 ((u64)addr & (~PAGE_MASK));
2553 sg->dma_length = sg->length;
2554 offset += size;
2555 }
2556
2557 /* it's a non-present to present mapping */
2558 if (iommu_flush_iotlb_psi(iommu, domain->id,
2559 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2560 iommu_flush_write_buffer(iommu);
2561 return nelems;
2562 }
2563
2564 static struct dma_mapping_ops intel_dma_ops = {
2565 .alloc_coherent = intel_alloc_coherent,
2566 .free_coherent = intel_free_coherent,
2567 .map_single = intel_map_single,
2568 .unmap_single = intel_unmap_single,
2569 .map_sg = intel_map_sg,
2570 .unmap_sg = intel_unmap_sg,
2571 };
2572
2573 static inline int iommu_domain_cache_init(void)
2574 {
2575 int ret = 0;
2576
2577 iommu_domain_cache = kmem_cache_create("iommu_domain",
2578 sizeof(struct dmar_domain),
2579 0,
2580 SLAB_HWCACHE_ALIGN,
2581
2582 NULL);
2583 if (!iommu_domain_cache) {
2584 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2585 ret = -ENOMEM;
2586 }
2587
2588 return ret;
2589 }
2590
2591 static inline int iommu_devinfo_cache_init(void)
2592 {
2593 int ret = 0;
2594
2595 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2596 sizeof(struct device_domain_info),
2597 0,
2598 SLAB_HWCACHE_ALIGN,
2599 NULL);
2600 if (!iommu_devinfo_cache) {
2601 printk(KERN_ERR "Couldn't create devinfo cache\n");
2602 ret = -ENOMEM;
2603 }
2604
2605 return ret;
2606 }
2607
2608 static inline int iommu_iova_cache_init(void)
2609 {
2610 int ret = 0;
2611
2612 iommu_iova_cache = kmem_cache_create("iommu_iova",
2613 sizeof(struct iova),
2614 0,
2615 SLAB_HWCACHE_ALIGN,
2616 NULL);
2617 if (!iommu_iova_cache) {
2618 printk(KERN_ERR "Couldn't create iova cache\n");
2619 ret = -ENOMEM;
2620 }
2621
2622 return ret;
2623 }
2624
2625 static int __init iommu_init_mempool(void)
2626 {
2627 int ret;
2628 ret = iommu_iova_cache_init();
2629 if (ret)
2630 return ret;
2631
2632 ret = iommu_domain_cache_init();
2633 if (ret)
2634 goto domain_error;
2635
2636 ret = iommu_devinfo_cache_init();
2637 if (!ret)
2638 return ret;
2639
2640 kmem_cache_destroy(iommu_domain_cache);
2641 domain_error:
2642 kmem_cache_destroy(iommu_iova_cache);
2643
2644 return -ENOMEM;
2645 }
2646
2647 static void __init iommu_exit_mempool(void)
2648 {
2649 kmem_cache_destroy(iommu_devinfo_cache);
2650 kmem_cache_destroy(iommu_domain_cache);
2651 kmem_cache_destroy(iommu_iova_cache);
2652
2653 }
2654
2655 static void __init init_no_remapping_devices(void)
2656 {
2657 struct dmar_drhd_unit *drhd;
2658
2659 for_each_drhd_unit(drhd) {
2660 if (!drhd->include_all) {
2661 int i;
2662 for (i = 0; i < drhd->devices_cnt; i++)
2663 if (drhd->devices[i] != NULL)
2664 break;
2665 /* ignore DMAR unit if no pci devices exist */
2666 if (i == drhd->devices_cnt)
2667 drhd->ignored = 1;
2668 }
2669 }
2670
2671 if (dmar_map_gfx)
2672 return;
2673
2674 for_each_drhd_unit(drhd) {
2675 int i;
2676 if (drhd->ignored || drhd->include_all)
2677 continue;
2678
2679 for (i = 0; i < drhd->devices_cnt; i++)
2680 if (drhd->devices[i] &&
2681 !IS_GFX_DEVICE(drhd->devices[i]))
2682 break;
2683
2684 if (i < drhd->devices_cnt)
2685 continue;
2686
2687 /* bypass IOMMU if it is just for gfx devices */
2688 drhd->ignored = 1;
2689 for (i = 0; i < drhd->devices_cnt; i++) {
2690 if (!drhd->devices[i])
2691 continue;
2692 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2693 }
2694 }
2695 }
2696
2697 int __init intel_iommu_init(void)
2698 {
2699 int ret = 0;
2700
2701 if (dmar_table_init())
2702 return -ENODEV;
2703
2704 if (dmar_dev_scope_init())
2705 return -ENODEV;
2706
2707 /*
2708 * Check the need for DMA-remapping initialization now.
2709 * Above initialization will also be used by Interrupt-remapping.
2710 */
2711 if (no_iommu || swiotlb || dmar_disabled)
2712 return -ENODEV;
2713
2714 iommu_init_mempool();
2715 dmar_init_reserved_ranges();
2716
2717 init_no_remapping_devices();
2718
2719 ret = init_dmars();
2720 if (ret) {
2721 printk(KERN_ERR "IOMMU: dmar init failed\n");
2722 put_iova_domain(&reserved_iova_list);
2723 iommu_exit_mempool();
2724 return ret;
2725 }
2726 printk(KERN_INFO
2727 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2728
2729 init_timer(&unmap_timer);
2730 force_iommu = 1;
2731 dma_ops = &intel_dma_ops;
2732 return 0;
2733 }
2734
2735 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2736 struct pci_dev *pdev)
2737 {
2738 struct device_domain_info *info;
2739 unsigned long flags;
2740
2741 info = alloc_devinfo_mem();
2742 if (!info)
2743 return -ENOMEM;
2744
2745 info->bus = pdev->bus->number;
2746 info->devfn = pdev->devfn;
2747 info->dev = pdev;
2748 info->domain = domain;
2749
2750 spin_lock_irqsave(&device_domain_lock, flags);
2751 list_add(&info->link, &domain->devices);
2752 list_add(&info->global, &device_domain_list);
2753 pdev->dev.archdata.iommu = info;
2754 spin_unlock_irqrestore(&device_domain_lock, flags);
2755
2756 return 0;
2757 }
2758
2759 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2760 struct pci_dev *pdev)
2761 {
2762 struct device_domain_info *info;
2763 struct intel_iommu *iommu;
2764 unsigned long flags;
2765 int found = 0;
2766 struct list_head *entry, *tmp;
2767
2768 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2769 if (!iommu)
2770 return;
2771
2772 spin_lock_irqsave(&device_domain_lock, flags);
2773 list_for_each_safe(entry, tmp, &domain->devices) {
2774 info = list_entry(entry, struct device_domain_info, link);
2775 if (info->bus == pdev->bus->number &&
2776 info->devfn == pdev->devfn) {
2777 list_del(&info->link);
2778 list_del(&info->global);
2779 if (info->dev)
2780 info->dev->dev.archdata.iommu = NULL;
2781 spin_unlock_irqrestore(&device_domain_lock, flags);
2782
2783 iommu_detach_dev(iommu, info->bus, info->devfn);
2784 free_devinfo_mem(info);
2785
2786 spin_lock_irqsave(&device_domain_lock, flags);
2787
2788 if (found)
2789 break;
2790 else
2791 continue;
2792 }
2793
2794 /* if there is no other devices under the same iommu
2795 * owned by this domain, clear this iommu in iommu_bmp
2796 * update iommu count and coherency
2797 */
2798 if (device_to_iommu(info->bus, info->devfn) == iommu)
2799 found = 1;
2800 }
2801
2802 if (found == 0) {
2803 unsigned long tmp_flags;
2804 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2805 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2806 domain->iommu_count--;
2807 domain_update_iommu_coherency(domain);
2808 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2809 }
2810
2811 spin_unlock_irqrestore(&device_domain_lock, flags);
2812 }
2813
2814 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2815 {
2816 struct device_domain_info *info;
2817 struct intel_iommu *iommu;
2818 unsigned long flags1, flags2;
2819
2820 spin_lock_irqsave(&device_domain_lock, flags1);
2821 while (!list_empty(&domain->devices)) {
2822 info = list_entry(domain->devices.next,
2823 struct device_domain_info, link);
2824 list_del(&info->link);
2825 list_del(&info->global);
2826 if (info->dev)
2827 info->dev->dev.archdata.iommu = NULL;
2828
2829 spin_unlock_irqrestore(&device_domain_lock, flags1);
2830
2831 iommu = device_to_iommu(info->bus, info->devfn);
2832 iommu_detach_dev(iommu, info->bus, info->devfn);
2833
2834 /* clear this iommu in iommu_bmp, update iommu count
2835 * and coherency
2836 */
2837 spin_lock_irqsave(&domain->iommu_lock, flags2);
2838 if (test_and_clear_bit(iommu->seq_id,
2839 &domain->iommu_bmp)) {
2840 domain->iommu_count--;
2841 domain_update_iommu_coherency(domain);
2842 }
2843 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2844
2845 free_devinfo_mem(info);
2846 spin_lock_irqsave(&device_domain_lock, flags1);
2847 }
2848 spin_unlock_irqrestore(&device_domain_lock, flags1);
2849 }
2850
2851 /* domain id for virtual machine, it won't be set in context */
2852 static unsigned long vm_domid;
2853
2854 static int vm_domain_min_agaw(struct dmar_domain *domain)
2855 {
2856 int i;
2857 int min_agaw = domain->agaw;
2858
2859 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2860 for (; i < g_num_of_iommus; ) {
2861 if (min_agaw > g_iommus[i]->agaw)
2862 min_agaw = g_iommus[i]->agaw;
2863
2864 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2865 }
2866
2867 return min_agaw;
2868 }
2869
2870 static struct dmar_domain *iommu_alloc_vm_domain(void)
2871 {
2872 struct dmar_domain *domain;
2873
2874 domain = alloc_domain_mem();
2875 if (!domain)
2876 return NULL;
2877
2878 domain->id = vm_domid++;
2879 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2880 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2881
2882 return domain;
2883 }
2884
2885 static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2886 {
2887 int adjust_width;
2888
2889 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2890 spin_lock_init(&domain->mapping_lock);
2891 spin_lock_init(&domain->iommu_lock);
2892
2893 domain_reserve_special_ranges(domain);
2894
2895 /* calculate AGAW */
2896 domain->gaw = guest_width;
2897 adjust_width = guestwidth_to_adjustwidth(guest_width);
2898 domain->agaw = width_to_agaw(adjust_width);
2899
2900 INIT_LIST_HEAD(&domain->devices);
2901
2902 domain->iommu_count = 0;
2903 domain->iommu_coherency = 0;
2904 domain->max_addr = 0;
2905
2906 /* always allocate the top pgd */
2907 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2908 if (!domain->pgd)
2909 return -ENOMEM;
2910 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2911 return 0;
2912 }
2913
2914 static void iommu_free_vm_domain(struct dmar_domain *domain)
2915 {
2916 unsigned long flags;
2917 struct dmar_drhd_unit *drhd;
2918 struct intel_iommu *iommu;
2919 unsigned long i;
2920 unsigned long ndomains;
2921
2922 for_each_drhd_unit(drhd) {
2923 if (drhd->ignored)
2924 continue;
2925 iommu = drhd->iommu;
2926
2927 ndomains = cap_ndoms(iommu->cap);
2928 i = find_first_bit(iommu->domain_ids, ndomains);
2929 for (; i < ndomains; ) {
2930 if (iommu->domains[i] == domain) {
2931 spin_lock_irqsave(&iommu->lock, flags);
2932 clear_bit(i, iommu->domain_ids);
2933 iommu->domains[i] = NULL;
2934 spin_unlock_irqrestore(&iommu->lock, flags);
2935 break;
2936 }
2937 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2938 }
2939 }
2940 }
2941
2942 static void vm_domain_exit(struct dmar_domain *domain)
2943 {
2944 u64 end;
2945
2946 /* Domain 0 is reserved, so dont process it */
2947 if (!domain)
2948 return;
2949
2950 vm_domain_remove_all_dev_info(domain);
2951 /* destroy iovas */
2952 put_iova_domain(&domain->iovad);
2953 end = DOMAIN_MAX_ADDR(domain->gaw);
2954 end = end & (~VTD_PAGE_MASK);
2955
2956 /* clear ptes */
2957 dma_pte_clear_range(domain, 0, end);
2958
2959 /* free page tables */
2960 dma_pte_free_pagetable(domain, 0, end);
2961
2962 iommu_free_vm_domain(domain);
2963 free_domain_mem(domain);
2964 }
2965
2966 static int intel_iommu_domain_init(struct iommu_domain *domain)
2967 {
2968 struct dmar_domain *dmar_domain;
2969
2970 dmar_domain = iommu_alloc_vm_domain();
2971 if (!dmar_domain) {
2972 printk(KERN_ERR
2973 "intel_iommu_domain_init: dmar_domain == NULL\n");
2974 return -ENOMEM;
2975 }
2976 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2977 printk(KERN_ERR
2978 "intel_iommu_domain_init() failed\n");
2979 vm_domain_exit(dmar_domain);
2980 return -ENOMEM;
2981 }
2982 domain->priv = dmar_domain;
2983
2984 return 0;
2985 }
2986
2987 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
2988 {
2989 struct dmar_domain *dmar_domain = domain->priv;
2990
2991 domain->priv = NULL;
2992 vm_domain_exit(dmar_domain);
2993 }
2994
2995 static int intel_iommu_attach_device(struct iommu_domain *domain,
2996 struct device *dev)
2997 {
2998 struct dmar_domain *dmar_domain = domain->priv;
2999 struct pci_dev *pdev = to_pci_dev(dev);
3000 struct intel_iommu *iommu;
3001 int addr_width;
3002 u64 end;
3003 int ret;
3004
3005 /* normally pdev is not mapped */
3006 if (unlikely(domain_context_mapped(pdev))) {
3007 struct dmar_domain *old_domain;
3008
3009 old_domain = find_domain(pdev);
3010 if (old_domain) {
3011 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
3012 vm_domain_remove_one_dev_info(old_domain, pdev);
3013 else
3014 domain_remove_dev_info(old_domain);
3015 }
3016 }
3017
3018 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3019 if (!iommu)
3020 return -ENODEV;
3021
3022 /* check if this iommu agaw is sufficient for max mapped address */
3023 addr_width = agaw_to_width(iommu->agaw);
3024 end = DOMAIN_MAX_ADDR(addr_width);
3025 end = end & VTD_PAGE_MASK;
3026 if (end < dmar_domain->max_addr) {
3027 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3028 "sufficient for the mapped address (%llx)\n",
3029 __func__, iommu->agaw, dmar_domain->max_addr);
3030 return -EFAULT;
3031 }
3032
3033 ret = domain_context_mapping(dmar_domain, pdev);
3034 if (ret)
3035 return ret;
3036
3037 ret = vm_domain_add_dev_info(dmar_domain, pdev);
3038 return ret;
3039 }
3040
3041 static void intel_iommu_detach_device(struct iommu_domain *domain,
3042 struct device *dev)
3043 {
3044 struct dmar_domain *dmar_domain = domain->priv;
3045 struct pci_dev *pdev = to_pci_dev(dev);
3046
3047 vm_domain_remove_one_dev_info(dmar_domain, pdev);
3048 }
3049
3050 static int intel_iommu_map_range(struct iommu_domain *domain,
3051 unsigned long iova, phys_addr_t hpa,
3052 size_t size, int iommu_prot)
3053 {
3054 struct dmar_domain *dmar_domain = domain->priv;
3055 u64 max_addr;
3056 int addr_width;
3057 int prot = 0;
3058 int ret;
3059
3060 if (iommu_prot & IOMMU_READ)
3061 prot |= DMA_PTE_READ;
3062 if (iommu_prot & IOMMU_WRITE)
3063 prot |= DMA_PTE_WRITE;
3064
3065 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
3066 if (dmar_domain->max_addr < max_addr) {
3067 int min_agaw;
3068 u64 end;
3069
3070 /* check if minimum agaw is sufficient for mapped address */
3071 min_agaw = vm_domain_min_agaw(dmar_domain);
3072 addr_width = agaw_to_width(min_agaw);
3073 end = DOMAIN_MAX_ADDR(addr_width);
3074 end = end & VTD_PAGE_MASK;
3075 if (end < max_addr) {
3076 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3077 "sufficient for the mapped address (%llx)\n",
3078 __func__, min_agaw, max_addr);
3079 return -EFAULT;
3080 }
3081 dmar_domain->max_addr = max_addr;
3082 }
3083
3084 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
3085 return ret;
3086 }
3087
3088 static void intel_iommu_unmap_range(struct iommu_domain *domain,
3089 unsigned long iova, size_t size)
3090 {
3091 struct dmar_domain *dmar_domain = domain->priv;
3092 dma_addr_t base;
3093
3094 /* The address might not be aligned */
3095 base = iova & VTD_PAGE_MASK;
3096 size = VTD_PAGE_ALIGN(size);
3097 dma_pte_clear_range(dmar_domain, base, base + size);
3098
3099 if (dmar_domain->max_addr == base + size)
3100 dmar_domain->max_addr = base;
3101 }
3102
3103 int intel_iommu_found(void)
3104 {
3105 return g_num_of_iommus;
3106 }
3107 EXPORT_SYMBOL_GPL(intel_iommu_found);
3108
3109 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3110 unsigned long iova)
3111 {
3112 struct dmar_domain *dmar_domain = domain->priv;
3113 struct dma_pte *pte;
3114 u64 phys = 0;
3115
3116 pte = addr_to_dma_pte(dmar_domain, iova);
3117 if (pte)
3118 phys = dma_pte_addr(pte);
3119
3120 return phys;
3121 }