]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/pci/intel-iommu.c
proc: fix ->open'less usage due to ->proc_fops flip
[mirror_ubuntu-bionic-kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 */
21
22#include <linux/init.h>
23#include <linux/bitmap.h>
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/sysdev.h>
28#include <linux/spinlock.h>
29#include <linux/pci.h>
30#include <linux/dmar.h>
31#include <linux/dma-mapping.h>
32#include <linux/mempool.h>
33#include "iova.h"
34#include "intel-iommu.h"
35#include <asm/proto.h> /* force_iommu in this header in x86-64*/
36#include <asm/cacheflush.h>
395624fc 37#include <asm/gart.h>
ba395927
KA
38#include "pci.h"
39
40#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
42
43#define IOAPIC_RANGE_START (0xfee00000)
44#define IOAPIC_RANGE_END (0xfeefffff)
45#define IOVA_START_ADDR (0x1000)
46
47#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
48
49#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
50
51#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
52
53static void domain_remove_dev_info(struct dmar_domain *domain);
54
55static int dmar_disabled;
56static int __initdata dmar_map_gfx = 1;
7d3b03ce 57static int dmar_forcedac;
ba395927
KA
58
59#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60static DEFINE_SPINLOCK(device_domain_lock);
61static LIST_HEAD(device_domain_list);
62
63static int __init intel_iommu_setup(char *str)
64{
65 if (!str)
66 return -EINVAL;
67 while (*str) {
68 if (!strncmp(str, "off", 3)) {
69 dmar_disabled = 1;
70 printk(KERN_INFO"Intel-IOMMU: disabled\n");
71 } else if (!strncmp(str, "igfx_off", 8)) {
72 dmar_map_gfx = 0;
73 printk(KERN_INFO
74 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce
KA
75 } else if (!strncmp(str, "forcedac", 8)) {
76 printk (KERN_INFO
77 "Intel-IOMMU: Forcing DAC for PCI devices\n");
78 dmar_forcedac = 1;
ba395927
KA
79 }
80
81 str += strcspn(str, ",");
82 while (*str == ',')
83 str++;
84 }
85 return 0;
86}
87__setup("intel_iommu=", intel_iommu_setup);
88
89static struct kmem_cache *iommu_domain_cache;
90static struct kmem_cache *iommu_devinfo_cache;
91static struct kmem_cache *iommu_iova_cache;
92
eb3fa7cb
KA
93static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
94{
95 unsigned int flags;
96 void *vaddr;
97
98 /* trying to avoid low memory issues */
99 flags = current->flags & PF_MEMALLOC;
100 current->flags |= PF_MEMALLOC;
101 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
102 current->flags &= (~PF_MEMALLOC | flags);
103 return vaddr;
104}
105
106
ba395927
KA
107static inline void *alloc_pgtable_page(void)
108{
eb3fa7cb
KA
109 unsigned int flags;
110 void *vaddr;
111
112 /* trying to avoid low memory issues */
113 flags = current->flags & PF_MEMALLOC;
114 current->flags |= PF_MEMALLOC;
115 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
116 current->flags &= (~PF_MEMALLOC | flags);
117 return vaddr;
ba395927
KA
118}
119
120static inline void free_pgtable_page(void *vaddr)
121{
122 free_page((unsigned long)vaddr);
123}
124
125static inline void *alloc_domain_mem(void)
126{
eb3fa7cb 127 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
128}
129
130static inline void free_domain_mem(void *vaddr)
131{
132 kmem_cache_free(iommu_domain_cache, vaddr);
133}
134
135static inline void * alloc_devinfo_mem(void)
136{
eb3fa7cb 137 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
138}
139
140static inline void free_devinfo_mem(void *vaddr)
141{
142 kmem_cache_free(iommu_devinfo_cache, vaddr);
143}
144
145struct iova *alloc_iova_mem(void)
146{
eb3fa7cb 147 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
148}
149
150void free_iova_mem(struct iova *iova)
151{
152 kmem_cache_free(iommu_iova_cache, iova);
153}
154
155static inline void __iommu_flush_cache(
156 struct intel_iommu *iommu, void *addr, int size)
157{
158 if (!ecap_coherent(iommu->ecap))
159 clflush_cache_range(addr, size);
160}
161
162/* Gets context entry for a given bus and devfn */
163static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
164 u8 bus, u8 devfn)
165{
166 struct root_entry *root;
167 struct context_entry *context;
168 unsigned long phy_addr;
169 unsigned long flags;
170
171 spin_lock_irqsave(&iommu->lock, flags);
172 root = &iommu->root_entry[bus];
173 context = get_context_addr_from_root(root);
174 if (!context) {
175 context = (struct context_entry *)alloc_pgtable_page();
176 if (!context) {
177 spin_unlock_irqrestore(&iommu->lock, flags);
178 return NULL;
179 }
180 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
181 phy_addr = virt_to_phys((void *)context);
182 set_root_value(root, phy_addr);
183 set_root_present(root);
184 __iommu_flush_cache(iommu, root, sizeof(*root));
185 }
186 spin_unlock_irqrestore(&iommu->lock, flags);
187 return &context[devfn];
188}
189
190static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
191{
192 struct root_entry *root;
193 struct context_entry *context;
194 int ret;
195 unsigned long flags;
196
197 spin_lock_irqsave(&iommu->lock, flags);
198 root = &iommu->root_entry[bus];
199 context = get_context_addr_from_root(root);
200 if (!context) {
201 ret = 0;
202 goto out;
203 }
204 ret = context_present(context[devfn]);
205out:
206 spin_unlock_irqrestore(&iommu->lock, flags);
207 return ret;
208}
209
210static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
211{
212 struct root_entry *root;
213 struct context_entry *context;
214 unsigned long flags;
215
216 spin_lock_irqsave(&iommu->lock, flags);
217 root = &iommu->root_entry[bus];
218 context = get_context_addr_from_root(root);
219 if (context) {
220 context_clear_entry(context[devfn]);
221 __iommu_flush_cache(iommu, &context[devfn], \
222 sizeof(*context));
223 }
224 spin_unlock_irqrestore(&iommu->lock, flags);
225}
226
227static void free_context_table(struct intel_iommu *iommu)
228{
229 struct root_entry *root;
230 int i;
231 unsigned long flags;
232 struct context_entry *context;
233
234 spin_lock_irqsave(&iommu->lock, flags);
235 if (!iommu->root_entry) {
236 goto out;
237 }
238 for (i = 0; i < ROOT_ENTRY_NR; i++) {
239 root = &iommu->root_entry[i];
240 context = get_context_addr_from_root(root);
241 if (context)
242 free_pgtable_page(context);
243 }
244 free_pgtable_page(iommu->root_entry);
245 iommu->root_entry = NULL;
246out:
247 spin_unlock_irqrestore(&iommu->lock, flags);
248}
249
250/* page table handling */
251#define LEVEL_STRIDE (9)
252#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
253
254static inline int agaw_to_level(int agaw)
255{
256 return agaw + 2;
257}
258
259static inline int agaw_to_width(int agaw)
260{
261 return 30 + agaw * LEVEL_STRIDE;
262
263}
264
265static inline int width_to_agaw(int width)
266{
267 return (width - 30) / LEVEL_STRIDE;
268}
269
270static inline unsigned int level_to_offset_bits(int level)
271{
272 return (12 + (level - 1) * LEVEL_STRIDE);
273}
274
275static inline int address_level_offset(u64 addr, int level)
276{
277 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
278}
279
280static inline u64 level_mask(int level)
281{
282 return ((u64)-1 << level_to_offset_bits(level));
283}
284
285static inline u64 level_size(int level)
286{
287 return ((u64)1 << level_to_offset_bits(level));
288}
289
290static inline u64 align_to_level(u64 addr, int level)
291{
292 return ((addr + level_size(level) - 1) & level_mask(level));
293}
294
295static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
296{
297 int addr_width = agaw_to_width(domain->agaw);
298 struct dma_pte *parent, *pte = NULL;
299 int level = agaw_to_level(domain->agaw);
300 int offset;
301 unsigned long flags;
302
303 BUG_ON(!domain->pgd);
304
305 addr &= (((u64)1) << addr_width) - 1;
306 parent = domain->pgd;
307
308 spin_lock_irqsave(&domain->mapping_lock, flags);
309 while (level > 0) {
310 void *tmp_page;
311
312 offset = address_level_offset(addr, level);
313 pte = &parent[offset];
314 if (level == 1)
315 break;
316
317 if (!dma_pte_present(*pte)) {
318 tmp_page = alloc_pgtable_page();
319
320 if (!tmp_page) {
321 spin_unlock_irqrestore(&domain->mapping_lock,
322 flags);
323 return NULL;
324 }
325 __iommu_flush_cache(domain->iommu, tmp_page,
326 PAGE_SIZE_4K);
327 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
328 /*
329 * high level table always sets r/w, last level page
330 * table control read/write
331 */
332 dma_set_pte_readable(*pte);
333 dma_set_pte_writable(*pte);
334 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
335 }
336 parent = phys_to_virt(dma_pte_addr(*pte));
337 level--;
338 }
339
340 spin_unlock_irqrestore(&domain->mapping_lock, flags);
341 return pte;
342}
343
344/* return address's pte at specific level */
345static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
346 int level)
347{
348 struct dma_pte *parent, *pte = NULL;
349 int total = agaw_to_level(domain->agaw);
350 int offset;
351
352 parent = domain->pgd;
353 while (level <= total) {
354 offset = address_level_offset(addr, total);
355 pte = &parent[offset];
356 if (level == total)
357 return pte;
358
359 if (!dma_pte_present(*pte))
360 break;
361 parent = phys_to_virt(dma_pte_addr(*pte));
362 total--;
363 }
364 return NULL;
365}
366
367/* clear one page's page table */
368static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
369{
370 struct dma_pte *pte = NULL;
371
372 /* get last level pte */
373 pte = dma_addr_level_pte(domain, addr, 1);
374
375 if (pte) {
376 dma_clear_pte(*pte);
377 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
378 }
379}
380
381/* clear last level pte, a tlb flush should be followed */
382static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
383{
384 int addr_width = agaw_to_width(domain->agaw);
385
386 start &= (((u64)1) << addr_width) - 1;
387 end &= (((u64)1) << addr_width) - 1;
388 /* in case it's partial page */
389 start = PAGE_ALIGN_4K(start);
390 end &= PAGE_MASK_4K;
391
392 /* we don't need lock here, nobody else touches the iova range */
393 while (start < end) {
394 dma_pte_clear_one(domain, start);
395 start += PAGE_SIZE_4K;
396 }
397}
398
399/* free page table pages. last level pte should already be cleared */
400static void dma_pte_free_pagetable(struct dmar_domain *domain,
401 u64 start, u64 end)
402{
403 int addr_width = agaw_to_width(domain->agaw);
404 struct dma_pte *pte;
405 int total = agaw_to_level(domain->agaw);
406 int level;
407 u64 tmp;
408
409 start &= (((u64)1) << addr_width) - 1;
410 end &= (((u64)1) << addr_width) - 1;
411
412 /* we don't need lock here, nobody else touches the iova range */
413 level = 2;
414 while (level <= total) {
415 tmp = align_to_level(start, level);
416 if (tmp >= end || (tmp + level_size(level) > end))
417 return;
418
419 while (tmp < end) {
420 pte = dma_addr_level_pte(domain, tmp, level);
421 if (pte) {
422 free_pgtable_page(
423 phys_to_virt(dma_pte_addr(*pte)));
424 dma_clear_pte(*pte);
425 __iommu_flush_cache(domain->iommu,
426 pte, sizeof(*pte));
427 }
428 tmp += level_size(level);
429 }
430 level++;
431 }
432 /* free pgd */
433 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
434 free_pgtable_page(domain->pgd);
435 domain->pgd = NULL;
436 }
437}
438
439/* iommu handling */
440static int iommu_alloc_root_entry(struct intel_iommu *iommu)
441{
442 struct root_entry *root;
443 unsigned long flags;
444
445 root = (struct root_entry *)alloc_pgtable_page();
446 if (!root)
447 return -ENOMEM;
448
449 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
450
451 spin_lock_irqsave(&iommu->lock, flags);
452 iommu->root_entry = root;
453 spin_unlock_irqrestore(&iommu->lock, flags);
454
455 return 0;
456}
457
458#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
459{\
460 unsigned long start_time = jiffies;\
461 while (1) {\
462 sts = op (iommu->reg + offset);\
463 if (cond)\
464 break;\
465 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466 panic("DMAR hardware is malfunctioning\n");\
467 cpu_relax();\
468 }\
469}
470
471static void iommu_set_root_entry(struct intel_iommu *iommu)
472{
473 void *addr;
474 u32 cmd, sts;
475 unsigned long flag;
476
477 addr = iommu->root_entry;
478
479 spin_lock_irqsave(&iommu->register_lock, flag);
480 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
481
482 cmd = iommu->gcmd | DMA_GCMD_SRTP;
483 writel(cmd, iommu->reg + DMAR_GCMD_REG);
484
485 /* Make sure hardware complete it */
486 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
487 readl, (sts & DMA_GSTS_RTPS), sts);
488
489 spin_unlock_irqrestore(&iommu->register_lock, flag);
490}
491
492static void iommu_flush_write_buffer(struct intel_iommu *iommu)
493{
494 u32 val;
495 unsigned long flag;
496
497 if (!cap_rwbf(iommu->cap))
498 return;
499 val = iommu->gcmd | DMA_GCMD_WBF;
500
501 spin_lock_irqsave(&iommu->register_lock, flag);
502 writel(val, iommu->reg + DMAR_GCMD_REG);
503
504 /* Make sure hardware complete it */
505 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
506 readl, (!(val & DMA_GSTS_WBFS)), val);
507
508 spin_unlock_irqrestore(&iommu->register_lock, flag);
509}
510
511/* return value determine if we need a write buffer flush */
512static int __iommu_flush_context(struct intel_iommu *iommu,
513 u16 did, u16 source_id, u8 function_mask, u64 type,
514 int non_present_entry_flush)
515{
516 u64 val = 0;
517 unsigned long flag;
518
519 /*
520 * In the non-present entry flush case, if hardware doesn't cache
521 * non-present entry we do nothing and if hardware cache non-present
522 * entry, we flush entries of domain 0 (the domain id is used to cache
523 * any non-present entries)
524 */
525 if (non_present_entry_flush) {
526 if (!cap_caching_mode(iommu->cap))
527 return 1;
528 else
529 did = 0;
530 }
531
532 switch (type) {
533 case DMA_CCMD_GLOBAL_INVL:
534 val = DMA_CCMD_GLOBAL_INVL;
535 break;
536 case DMA_CCMD_DOMAIN_INVL:
537 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
538 break;
539 case DMA_CCMD_DEVICE_INVL:
540 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
541 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
542 break;
543 default:
544 BUG();
545 }
546 val |= DMA_CCMD_ICC;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
553 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556
557 /* flush context entry will implictly flush write buffer */
558 return 0;
559}
560
561static int inline iommu_flush_context_global(struct intel_iommu *iommu,
562 int non_present_entry_flush)
563{
564 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
565 non_present_entry_flush);
566}
567
568static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
569 int non_present_entry_flush)
570{
571 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
572 non_present_entry_flush);
573}
574
575static int inline iommu_flush_context_device(struct intel_iommu *iommu,
576 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
577{
578 return __iommu_flush_context(iommu, did, source_id, function_mask,
579 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
580}
581
582/* return value determine if we need a write buffer flush */
583static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
584 u64 addr, unsigned int size_order, u64 type,
585 int non_present_entry_flush)
586{
587 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
588 u64 val = 0, val_iva = 0;
589 unsigned long flag;
590
591 /*
592 * In the non-present entry flush case, if hardware doesn't cache
593 * non-present entry we do nothing and if hardware cache non-present
594 * entry, we flush entries of domain 0 (the domain id is used to cache
595 * any non-present entries)
596 */
597 if (non_present_entry_flush) {
598 if (!cap_caching_mode(iommu->cap))
599 return 1;
600 else
601 did = 0;
602 }
603
604 switch (type) {
605 case DMA_TLB_GLOBAL_FLUSH:
606 /* global flush doesn't need set IVA_REG */
607 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
608 break;
609 case DMA_TLB_DSI_FLUSH:
610 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
611 break;
612 case DMA_TLB_PSI_FLUSH:
613 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
614 /* Note: always flush non-leaf currently */
615 val_iva = size_order | addr;
616 break;
617 default:
618 BUG();
619 }
620 /* Note: set drain read/write */
621#if 0
622 /*
623 * This is probably to be super secure.. Looks like we can
624 * ignore it without any impact.
625 */
626 if (cap_read_drain(iommu->cap))
627 val |= DMA_TLB_READ_DRAIN;
628#endif
629 if (cap_write_drain(iommu->cap))
630 val |= DMA_TLB_WRITE_DRAIN;
631
632 spin_lock_irqsave(&iommu->register_lock, flag);
633 /* Note: Only uses first TLB reg currently */
634 if (val_iva)
635 dmar_writeq(iommu->reg + tlb_offset, val_iva);
636 dmar_writeq(iommu->reg + tlb_offset + 8, val);
637
638 /* Make sure hardware complete it */
639 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
640 dmar_readq, (!(val & DMA_TLB_IVT)), val);
641
642 spin_unlock_irqrestore(&iommu->register_lock, flag);
643
644 /* check IOTLB invalidation granularity */
645 if (DMA_TLB_IAIG(val) == 0)
646 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
647 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
648 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
650 /* flush context entry will implictly flush write buffer */
651 return 0;
652}
653
654static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
655 int non_present_entry_flush)
656{
657 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
658 non_present_entry_flush);
659}
660
661static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
662 int non_present_entry_flush)
663{
664 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
665 non_present_entry_flush);
666}
667
ba395927
KA
668static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
669 u64 addr, unsigned int pages, int non_present_entry_flush)
670{
f76aec76 671 unsigned int mask;
ba395927
KA
672
673 BUG_ON(addr & (~PAGE_MASK_4K));
674 BUG_ON(pages == 0);
675
676 /* Fallback to domain selective flush if no PSI support */
677 if (!cap_pgsel_inv(iommu->cap))
678 return iommu_flush_iotlb_dsi(iommu, did,
679 non_present_entry_flush);
680
681 /*
682 * PSI requires page size to be 2 ^ x, and the base address is naturally
683 * aligned to the size
684 */
f76aec76 685 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 686 /* Fallback to domain selective flush if size is too big */
f76aec76 687 if (mask > cap_max_amask_val(iommu->cap))
ba395927
KA
688 return iommu_flush_iotlb_dsi(iommu, did,
689 non_present_entry_flush);
690
f76aec76 691 return __iommu_flush_iotlb(iommu, did, addr, mask,
ba395927
KA
692 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
693}
694
695static int iommu_enable_translation(struct intel_iommu *iommu)
696{
697 u32 sts;
698 unsigned long flags;
699
700 spin_lock_irqsave(&iommu->register_lock, flags);
701 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
702
703 /* Make sure hardware complete it */
704 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
705 readl, (sts & DMA_GSTS_TES), sts);
706
707 iommu->gcmd |= DMA_GCMD_TE;
708 spin_unlock_irqrestore(&iommu->register_lock, flags);
709 return 0;
710}
711
712static int iommu_disable_translation(struct intel_iommu *iommu)
713{
714 u32 sts;
715 unsigned long flag;
716
717 spin_lock_irqsave(&iommu->register_lock, flag);
718 iommu->gcmd &= ~DMA_GCMD_TE;
719 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
720
721 /* Make sure hardware complete it */
722 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723 readl, (!(sts & DMA_GSTS_TES)), sts);
724
725 spin_unlock_irqrestore(&iommu->register_lock, flag);
726 return 0;
727}
728
3460a6d9
KA
729/* iommu interrupt handling. Most stuff are MSI-like. */
730
731static char *fault_reason_strings[] =
732{
733 "Software",
734 "Present bit in root entry is clear",
735 "Present bit in context entry is clear",
736 "Invalid context entry",
737 "Access beyond MGAW",
738 "PTE Write access is not set",
739 "PTE Read access is not set",
740 "Next page table ptr is invalid",
741 "Root table address invalid",
742 "Context table ptr is invalid",
743 "non-zero reserved fields in RTP",
744 "non-zero reserved fields in CTP",
745 "non-zero reserved fields in PTE",
746 "Unknown"
747};
4fe05bbc 748#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1
3460a6d9
KA
749
750char *dmar_get_fault_reason(u8 fault_reason)
751{
10e27ed4
TI
752 if (fault_reason >= MAX_FAULT_REASON_IDX)
753 return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];
3460a6d9
KA
754 else
755 return fault_reason_strings[fault_reason];
756}
757
758void dmar_msi_unmask(unsigned int irq)
759{
760 struct intel_iommu *iommu = get_irq_data(irq);
761 unsigned long flag;
762
763 /* unmask it */
764 spin_lock_irqsave(&iommu->register_lock, flag);
765 writel(0, iommu->reg + DMAR_FECTL_REG);
766 /* Read a reg to force flush the post write */
767 readl(iommu->reg + DMAR_FECTL_REG);
768 spin_unlock_irqrestore(&iommu->register_lock, flag);
769}
770
771void dmar_msi_mask(unsigned int irq)
772{
773 unsigned long flag;
774 struct intel_iommu *iommu = get_irq_data(irq);
775
776 /* mask it */
777 spin_lock_irqsave(&iommu->register_lock, flag);
778 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
779 /* Read a reg to force flush the post write */
780 readl(iommu->reg + DMAR_FECTL_REG);
781 spin_unlock_irqrestore(&iommu->register_lock, flag);
782}
783
784void dmar_msi_write(int irq, struct msi_msg *msg)
785{
786 struct intel_iommu *iommu = get_irq_data(irq);
787 unsigned long flag;
788
789 spin_lock_irqsave(&iommu->register_lock, flag);
790 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
791 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
792 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
793 spin_unlock_irqrestore(&iommu->register_lock, flag);
794}
795
796void dmar_msi_read(int irq, struct msi_msg *msg)
797{
798 struct intel_iommu *iommu = get_irq_data(irq);
799 unsigned long flag;
800
801 spin_lock_irqsave(&iommu->register_lock, flag);
802 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
803 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
804 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
805 spin_unlock_irqrestore(&iommu->register_lock, flag);
806}
807
808static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
809 u8 fault_reason, u16 source_id, u64 addr)
810{
811 char *reason;
812
813 reason = dmar_get_fault_reason(fault_reason);
814
815 printk(KERN_ERR
816 "DMAR:[%s] Request device [%02x:%02x.%d] "
817 "fault addr %llx \n"
818 "DMAR:[fault reason %02d] %s\n",
819 (type ? "DMA Read" : "DMA Write"),
820 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
821 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
822 return 0;
823}
824
825#define PRIMARY_FAULT_REG_LEN (16)
826static irqreturn_t iommu_page_fault(int irq, void *dev_id)
827{
828 struct intel_iommu *iommu = dev_id;
829 int reg, fault_index;
830 u32 fault_status;
831 unsigned long flag;
832
833 spin_lock_irqsave(&iommu->register_lock, flag);
834 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
835
836 /* TBD: ignore advanced fault log currently */
837 if (!(fault_status & DMA_FSTS_PPF))
838 goto clear_overflow;
839
840 fault_index = dma_fsts_fault_record_index(fault_status);
841 reg = cap_fault_reg_offset(iommu->cap);
842 while (1) {
843 u8 fault_reason;
844 u16 source_id;
845 u64 guest_addr;
846 int type;
847 u32 data;
848
849 /* highest 32 bits */
850 data = readl(iommu->reg + reg +
851 fault_index * PRIMARY_FAULT_REG_LEN + 12);
852 if (!(data & DMA_FRCD_F))
853 break;
854
855 fault_reason = dma_frcd_fault_reason(data);
856 type = dma_frcd_type(data);
857
858 data = readl(iommu->reg + reg +
859 fault_index * PRIMARY_FAULT_REG_LEN + 8);
860 source_id = dma_frcd_source_id(data);
861
862 guest_addr = dmar_readq(iommu->reg + reg +
863 fault_index * PRIMARY_FAULT_REG_LEN);
864 guest_addr = dma_frcd_page_addr(guest_addr);
865 /* clear the fault */
866 writel(DMA_FRCD_F, iommu->reg + reg +
867 fault_index * PRIMARY_FAULT_REG_LEN + 12);
868
869 spin_unlock_irqrestore(&iommu->register_lock, flag);
870
871 iommu_page_fault_do_one(iommu, type, fault_reason,
872 source_id, guest_addr);
873
874 fault_index++;
875 if (fault_index > cap_num_fault_regs(iommu->cap))
876 fault_index = 0;
877 spin_lock_irqsave(&iommu->register_lock, flag);
878 }
879clear_overflow:
880 /* clear primary fault overflow */
881 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
882 if (fault_status & DMA_FSTS_PFO)
883 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
884
885 spin_unlock_irqrestore(&iommu->register_lock, flag);
886 return IRQ_HANDLED;
887}
888
889int dmar_set_interrupt(struct intel_iommu *iommu)
890{
891 int irq, ret;
892
893 irq = create_irq();
894 if (!irq) {
895 printk(KERN_ERR "IOMMU: no free vectors\n");
896 return -EINVAL;
897 }
898
899 set_irq_data(irq, iommu);
900 iommu->irq = irq;
901
902 ret = arch_setup_dmar_msi(irq);
903 if (ret) {
904 set_irq_data(irq, NULL);
905 iommu->irq = 0;
906 destroy_irq(irq);
907 return 0;
908 }
909
910 /* Force fault register is cleared */
911 iommu_page_fault(irq, iommu);
912
913 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
914 if (ret)
915 printk(KERN_ERR "IOMMU: can't request irq\n");
916 return ret;
917}
918
ba395927
KA
919static int iommu_init_domains(struct intel_iommu *iommu)
920{
921 unsigned long ndomains;
922 unsigned long nlongs;
923
924 ndomains = cap_ndoms(iommu->cap);
925 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
926 nlongs = BITS_TO_LONGS(ndomains);
927
928 /* TBD: there might be 64K domains,
929 * consider other allocation for future chip
930 */
931 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
932 if (!iommu->domain_ids) {
933 printk(KERN_ERR "Allocating domain id array failed\n");
934 return -ENOMEM;
935 }
936 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
937 GFP_KERNEL);
938 if (!iommu->domains) {
939 printk(KERN_ERR "Allocating domain array failed\n");
940 kfree(iommu->domain_ids);
941 return -ENOMEM;
942 }
943
944 /*
945 * if Caching mode is set, then invalid translations are tagged
946 * with domainid 0. Hence we need to pre-allocate it.
947 */
948 if (cap_caching_mode(iommu->cap))
949 set_bit(0, iommu->domain_ids);
950 return 0;
951}
952
953static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
954{
955 struct intel_iommu *iommu;
956 int ret;
957 int map_size;
958 u32 ver;
959
960 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
961 if (!iommu)
962 return NULL;
963 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
964 if (!iommu->reg) {
965 printk(KERN_ERR "IOMMU: can't map the region\n");
966 goto error;
967 }
968 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
969 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
970
971 /* the registers might be more than one page */
972 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
973 cap_max_fault_reg_offset(iommu->cap));
974 map_size = PAGE_ALIGN_4K(map_size);
975 if (map_size > PAGE_SIZE_4K) {
976 iounmap(iommu->reg);
977 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
978 if (!iommu->reg) {
979 printk(KERN_ERR "IOMMU: can't map the region\n");
980 goto error;
981 }
982 }
983
984 ver = readl(iommu->reg + DMAR_VER_REG);
985 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
986 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
987 iommu->cap, iommu->ecap);
988 ret = iommu_init_domains(iommu);
989 if (ret)
990 goto error_unmap;
991 spin_lock_init(&iommu->lock);
992 spin_lock_init(&iommu->register_lock);
993
994 drhd->iommu = iommu;
995 return iommu;
996error_unmap:
997 iounmap(iommu->reg);
ba395927
KA
998error:
999 kfree(iommu);
1000 return NULL;
1001}
1002
1003static void domain_exit(struct dmar_domain *domain);
1004static void free_iommu(struct intel_iommu *iommu)
1005{
1006 struct dmar_domain *domain;
1007 int i;
1008
1009 if (!iommu)
1010 return;
1011
1012 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1013 for (; i < cap_ndoms(iommu->cap); ) {
1014 domain = iommu->domains[i];
1015 clear_bit(i, iommu->domain_ids);
1016 domain_exit(domain);
1017 i = find_next_bit(iommu->domain_ids,
1018 cap_ndoms(iommu->cap), i+1);
1019 }
1020
1021 if (iommu->gcmd & DMA_GCMD_TE)
1022 iommu_disable_translation(iommu);
1023
1024 if (iommu->irq) {
1025 set_irq_data(iommu->irq, NULL);
1026 /* This will mask the irq */
1027 free_irq(iommu->irq, iommu);
1028 destroy_irq(iommu->irq);
1029 }
1030
1031 kfree(iommu->domains);
1032 kfree(iommu->domain_ids);
1033
1034 /* free context mapping */
1035 free_context_table(iommu);
1036
1037 if (iommu->reg)
1038 iounmap(iommu->reg);
1039 kfree(iommu);
1040}
1041
1042static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1043{
1044 unsigned long num;
1045 unsigned long ndomains;
1046 struct dmar_domain *domain;
1047 unsigned long flags;
1048
1049 domain = alloc_domain_mem();
1050 if (!domain)
1051 return NULL;
1052
1053 ndomains = cap_ndoms(iommu->cap);
1054
1055 spin_lock_irqsave(&iommu->lock, flags);
1056 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1057 if (num >= ndomains) {
1058 spin_unlock_irqrestore(&iommu->lock, flags);
1059 free_domain_mem(domain);
1060 printk(KERN_ERR "IOMMU: no free domain ids\n");
1061 return NULL;
1062 }
1063
1064 set_bit(num, iommu->domain_ids);
1065 domain->id = num;
1066 domain->iommu = iommu;
1067 iommu->domains[num] = domain;
1068 spin_unlock_irqrestore(&iommu->lock, flags);
1069
1070 return domain;
1071}
1072
1073static void iommu_free_domain(struct dmar_domain *domain)
1074{
1075 unsigned long flags;
1076
1077 spin_lock_irqsave(&domain->iommu->lock, flags);
1078 clear_bit(domain->id, domain->iommu->domain_ids);
1079 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1080}
1081
1082static struct iova_domain reserved_iova_list;
1083
1084static void dmar_init_reserved_ranges(void)
1085{
1086 struct pci_dev *pdev = NULL;
1087 struct iova *iova;
1088 int i;
1089 u64 addr, size;
1090
f661197e 1091 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927
KA
1092
1093 /* IOAPIC ranges shouldn't be accessed by DMA */
1094 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1095 IOVA_PFN(IOAPIC_RANGE_END));
1096 if (!iova)
1097 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1098
1099 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1100 for_each_pci_dev(pdev) {
1101 struct resource *r;
1102
1103 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1104 r = &pdev->resource[i];
1105 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1106 continue;
1107 addr = r->start;
1108 addr &= PAGE_MASK_4K;
1109 size = r->end - addr;
1110 size = PAGE_ALIGN_4K(size);
1111 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1112 IOVA_PFN(size + addr) - 1);
1113 if (!iova)
1114 printk(KERN_ERR "Reserve iova failed\n");
1115 }
1116 }
1117
1118}
1119
1120static void domain_reserve_special_ranges(struct dmar_domain *domain)
1121{
1122 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1123}
1124
1125static inline int guestwidth_to_adjustwidth(int gaw)
1126{
1127 int agaw;
1128 int r = (gaw - 12) % 9;
1129
1130 if (r == 0)
1131 agaw = gaw;
1132 else
1133 agaw = gaw + 9 - r;
1134 if (agaw > 64)
1135 agaw = 64;
1136 return agaw;
1137}
1138
1139static int domain_init(struct dmar_domain *domain, int guest_width)
1140{
1141 struct intel_iommu *iommu;
1142 int adjust_width, agaw;
1143 unsigned long sagaw;
1144
f661197e 1145 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1146 spin_lock_init(&domain->mapping_lock);
1147
1148 domain_reserve_special_ranges(domain);
1149
1150 /* calculate AGAW */
1151 iommu = domain->iommu;
1152 if (guest_width > cap_mgaw(iommu->cap))
1153 guest_width = cap_mgaw(iommu->cap);
1154 domain->gaw = guest_width;
1155 adjust_width = guestwidth_to_adjustwidth(guest_width);
1156 agaw = width_to_agaw(adjust_width);
1157 sagaw = cap_sagaw(iommu->cap);
1158 if (!test_bit(agaw, &sagaw)) {
1159 /* hardware doesn't support it, choose a bigger one */
1160 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1161 agaw = find_next_bit(&sagaw, 5, agaw);
1162 if (agaw >= 5)
1163 return -ENODEV;
1164 }
1165 domain->agaw = agaw;
1166 INIT_LIST_HEAD(&domain->devices);
1167
1168 /* always allocate the top pgd */
1169 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1170 if (!domain->pgd)
1171 return -ENOMEM;
1172 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1173 return 0;
1174}
1175
1176static void domain_exit(struct dmar_domain *domain)
1177{
1178 u64 end;
1179
1180 /* Domain 0 is reserved, so dont process it */
1181 if (!domain)
1182 return;
1183
1184 domain_remove_dev_info(domain);
1185 /* destroy iovas */
1186 put_iova_domain(&domain->iovad);
1187 end = DOMAIN_MAX_ADDR(domain->gaw);
1188 end = end & (~PAGE_MASK_4K);
1189
1190 /* clear ptes */
1191 dma_pte_clear_range(domain, 0, end);
1192
1193 /* free page tables */
1194 dma_pte_free_pagetable(domain, 0, end);
1195
1196 iommu_free_domain(domain);
1197 free_domain_mem(domain);
1198}
1199
1200static int domain_context_mapping_one(struct dmar_domain *domain,
1201 u8 bus, u8 devfn)
1202{
1203 struct context_entry *context;
1204 struct intel_iommu *iommu = domain->iommu;
1205 unsigned long flags;
1206
1207 pr_debug("Set context mapping for %02x:%02x.%d\n",
1208 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1209 BUG_ON(!domain->pgd);
1210 context = device_to_context_entry(iommu, bus, devfn);
1211 if (!context)
1212 return -ENOMEM;
1213 spin_lock_irqsave(&iommu->lock, flags);
1214 if (context_present(*context)) {
1215 spin_unlock_irqrestore(&iommu->lock, flags);
1216 return 0;
1217 }
1218
1219 context_set_domain_id(*context, domain->id);
1220 context_set_address_width(*context, domain->agaw);
1221 context_set_address_root(*context, virt_to_phys(domain->pgd));
1222 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1223 context_set_fault_enable(*context);
1224 context_set_present(*context);
1225 __iommu_flush_cache(iommu, context, sizeof(*context));
1226
1227 /* it's a non-present to present mapping */
1228 if (iommu_flush_context_device(iommu, domain->id,
1229 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1230 iommu_flush_write_buffer(iommu);
1231 else
1232 iommu_flush_iotlb_dsi(iommu, 0, 0);
1233 spin_unlock_irqrestore(&iommu->lock, flags);
1234 return 0;
1235}
1236
1237static int
1238domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1239{
1240 int ret;
1241 struct pci_dev *tmp, *parent;
1242
1243 ret = domain_context_mapping_one(domain, pdev->bus->number,
1244 pdev->devfn);
1245 if (ret)
1246 return ret;
1247
1248 /* dependent device mapping */
1249 tmp = pci_find_upstream_pcie_bridge(pdev);
1250 if (!tmp)
1251 return 0;
1252 /* Secondary interface's bus number and devfn 0 */
1253 parent = pdev->bus->self;
1254 while (parent != tmp) {
1255 ret = domain_context_mapping_one(domain, parent->bus->number,
1256 parent->devfn);
1257 if (ret)
1258 return ret;
1259 parent = parent->bus->self;
1260 }
1261 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1262 return domain_context_mapping_one(domain,
1263 tmp->subordinate->number, 0);
1264 else /* this is a legacy PCI bridge */
1265 return domain_context_mapping_one(domain,
1266 tmp->bus->number, tmp->devfn);
1267}
1268
1269static int domain_context_mapped(struct dmar_domain *domain,
1270 struct pci_dev *pdev)
1271{
1272 int ret;
1273 struct pci_dev *tmp, *parent;
1274
1275 ret = device_context_mapped(domain->iommu,
1276 pdev->bus->number, pdev->devfn);
1277 if (!ret)
1278 return ret;
1279 /* dependent device mapping */
1280 tmp = pci_find_upstream_pcie_bridge(pdev);
1281 if (!tmp)
1282 return ret;
1283 /* Secondary interface's bus number and devfn 0 */
1284 parent = pdev->bus->self;
1285 while (parent != tmp) {
1286 ret = device_context_mapped(domain->iommu, parent->bus->number,
1287 parent->devfn);
1288 if (!ret)
1289 return ret;
1290 parent = parent->bus->self;
1291 }
1292 if (tmp->is_pcie)
1293 return device_context_mapped(domain->iommu,
1294 tmp->subordinate->number, 0);
1295 else
1296 return device_context_mapped(domain->iommu,
1297 tmp->bus->number, tmp->devfn);
1298}
1299
1300static int
1301domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1302 u64 hpa, size_t size, int prot)
1303{
1304 u64 start_pfn, end_pfn;
1305 struct dma_pte *pte;
1306 int index;
1307
1308 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1309 return -EINVAL;
1310 iova &= PAGE_MASK_4K;
1311 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1312 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1313 index = 0;
1314 while (start_pfn < end_pfn) {
1315 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1316 if (!pte)
1317 return -ENOMEM;
1318 /* We don't need lock here, nobody else
1319 * touches the iova range
1320 */
1321 BUG_ON(dma_pte_addr(*pte));
1322 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1323 dma_set_pte_prot(*pte, prot);
1324 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1325 start_pfn++;
1326 index++;
1327 }
1328 return 0;
1329}
1330
1331static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1332{
1333 clear_context_table(domain->iommu, bus, devfn);
1334 iommu_flush_context_global(domain->iommu, 0);
1335 iommu_flush_iotlb_global(domain->iommu, 0);
1336}
1337
1338static void domain_remove_dev_info(struct dmar_domain *domain)
1339{
1340 struct device_domain_info *info;
1341 unsigned long flags;
1342
1343 spin_lock_irqsave(&device_domain_lock, flags);
1344 while (!list_empty(&domain->devices)) {
1345 info = list_entry(domain->devices.next,
1346 struct device_domain_info, link);
1347 list_del(&info->link);
1348 list_del(&info->global);
1349 if (info->dev)
358dd8ac 1350 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1351 spin_unlock_irqrestore(&device_domain_lock, flags);
1352
1353 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1354 free_devinfo_mem(info);
1355
1356 spin_lock_irqsave(&device_domain_lock, flags);
1357 }
1358 spin_unlock_irqrestore(&device_domain_lock, flags);
1359}
1360
1361/*
1362 * find_domain
358dd8ac 1363 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927
KA
1364 */
1365struct dmar_domain *
1366find_domain(struct pci_dev *pdev)
1367{
1368 struct device_domain_info *info;
1369
1370 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1371 info = pdev->dev.archdata.iommu;
ba395927
KA
1372 if (info)
1373 return info->domain;
1374 return NULL;
1375}
1376
1377static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1378 struct pci_dev *dev)
1379{
1380 int index;
1381
1382 while (dev) {
1383 for (index = 0; index < cnt; index ++)
1384 if (dev == devices[index])
1385 return 1;
1386
1387 /* Check our parent */
1388 dev = dev->bus->self;
1389 }
1390
1391 return 0;
1392}
1393
1394static struct dmar_drhd_unit *
1395dmar_find_matched_drhd_unit(struct pci_dev *dev)
1396{
1397 struct dmar_drhd_unit *drhd = NULL;
1398
1399 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1400 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1401 drhd->devices_cnt, dev))
1402 return drhd;
1403 }
1404
1405 return NULL;
1406}
1407
1408/* domain is initialized */
1409static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1410{
1411 struct dmar_domain *domain, *found = NULL;
1412 struct intel_iommu *iommu;
1413 struct dmar_drhd_unit *drhd;
1414 struct device_domain_info *info, *tmp;
1415 struct pci_dev *dev_tmp;
1416 unsigned long flags;
1417 int bus = 0, devfn = 0;
1418
1419 domain = find_domain(pdev);
1420 if (domain)
1421 return domain;
1422
1423 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1424 if (dev_tmp) {
1425 if (dev_tmp->is_pcie) {
1426 bus = dev_tmp->subordinate->number;
1427 devfn = 0;
1428 } else {
1429 bus = dev_tmp->bus->number;
1430 devfn = dev_tmp->devfn;
1431 }
1432 spin_lock_irqsave(&device_domain_lock, flags);
1433 list_for_each_entry(info, &device_domain_list, global) {
1434 if (info->bus == bus && info->devfn == devfn) {
1435 found = info->domain;
1436 break;
1437 }
1438 }
1439 spin_unlock_irqrestore(&device_domain_lock, flags);
1440 /* pcie-pci bridge already has a domain, uses it */
1441 if (found) {
1442 domain = found;
1443 goto found_domain;
1444 }
1445 }
1446
1447 /* Allocate new domain for the device */
1448 drhd = dmar_find_matched_drhd_unit(pdev);
1449 if (!drhd) {
1450 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1451 pci_name(pdev));
1452 return NULL;
1453 }
1454 iommu = drhd->iommu;
1455
1456 domain = iommu_alloc_domain(iommu);
1457 if (!domain)
1458 goto error;
1459
1460 if (domain_init(domain, gaw)) {
1461 domain_exit(domain);
1462 goto error;
1463 }
1464
1465 /* register pcie-to-pci device */
1466 if (dev_tmp) {
1467 info = alloc_devinfo_mem();
1468 if (!info) {
1469 domain_exit(domain);
1470 goto error;
1471 }
1472 info->bus = bus;
1473 info->devfn = devfn;
1474 info->dev = NULL;
1475 info->domain = domain;
1476 /* This domain is shared by devices under p2p bridge */
1477 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1478
1479 /* pcie-to-pci bridge already has a domain, uses it */
1480 found = NULL;
1481 spin_lock_irqsave(&device_domain_lock, flags);
1482 list_for_each_entry(tmp, &device_domain_list, global) {
1483 if (tmp->bus == bus && tmp->devfn == devfn) {
1484 found = tmp->domain;
1485 break;
1486 }
1487 }
1488 if (found) {
1489 free_devinfo_mem(info);
1490 domain_exit(domain);
1491 domain = found;
1492 } else {
1493 list_add(&info->link, &domain->devices);
1494 list_add(&info->global, &device_domain_list);
1495 }
1496 spin_unlock_irqrestore(&device_domain_lock, flags);
1497 }
1498
1499found_domain:
1500 info = alloc_devinfo_mem();
1501 if (!info)
1502 goto error;
1503 info->bus = pdev->bus->number;
1504 info->devfn = pdev->devfn;
1505 info->dev = pdev;
1506 info->domain = domain;
1507 spin_lock_irqsave(&device_domain_lock, flags);
1508 /* somebody is fast */
1509 found = find_domain(pdev);
1510 if (found != NULL) {
1511 spin_unlock_irqrestore(&device_domain_lock, flags);
1512 if (found != domain) {
1513 domain_exit(domain);
1514 domain = found;
1515 }
1516 free_devinfo_mem(info);
1517 return domain;
1518 }
1519 list_add(&info->link, &domain->devices);
1520 list_add(&info->global, &device_domain_list);
358dd8ac 1521 pdev->dev.archdata.iommu = info;
ba395927
KA
1522 spin_unlock_irqrestore(&device_domain_lock, flags);
1523 return domain;
1524error:
1525 /* recheck it here, maybe others set it */
1526 return find_domain(pdev);
1527}
1528
1529static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1530{
1531 struct dmar_domain *domain;
1532 unsigned long size;
1533 u64 base;
1534 int ret;
1535
1536 printk(KERN_INFO
1537 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1538 pci_name(pdev), start, end);
1539 /* page table init */
1540 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1541 if (!domain)
1542 return -ENOMEM;
1543
1544 /* The address might not be aligned */
1545 base = start & PAGE_MASK_4K;
1546 size = end - base;
1547 size = PAGE_ALIGN_4K(size);
1548 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1549 IOVA_PFN(base + size) - 1)) {
1550 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1551 ret = -ENOMEM;
1552 goto error;
1553 }
1554
1555 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1556 size, base, pci_name(pdev));
1557 /*
1558 * RMRR range might have overlap with physical memory range,
1559 * clear it first
1560 */
1561 dma_pte_clear_range(domain, base, base + size);
1562
1563 ret = domain_page_mapping(domain, base, base, size,
1564 DMA_PTE_READ|DMA_PTE_WRITE);
1565 if (ret)
1566 goto error;
1567
1568 /* context entry init */
1569 ret = domain_context_mapping(domain, pdev);
1570 if (!ret)
1571 return 0;
1572error:
1573 domain_exit(domain);
1574 return ret;
1575
1576}
1577
1578static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1579 struct pci_dev *pdev)
1580{
358dd8ac 1581 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1582 return 0;
1583 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1584 rmrr->end_address + 1);
1585}
1586
e820482c
KA
1587#ifdef CONFIG_DMAR_GFX_WA
1588extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1589static void __init iommu_prepare_gfx_mapping(void)
1590{
1591 struct pci_dev *pdev = NULL;
1592 u64 base, size;
1593 int slot;
1594 int ret;
1595
1596 for_each_pci_dev(pdev) {
358dd8ac 1597 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1598 !IS_GFX_DEVICE(pdev))
1599 continue;
1600 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1601 pci_name(pdev));
1602 slot = arch_get_ram_range(0, &base, &size);
1603 while (slot >= 0) {
1604 ret = iommu_prepare_identity_map(pdev,
1605 base, base + size);
1606 if (ret)
1607 goto error;
1608 slot = arch_get_ram_range(slot, &base, &size);
1609 }
1610 continue;
1611error:
1612 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1613 }
1614}
1615#endif
1616
49a0429e
KA
1617#ifdef CONFIG_DMAR_FLOPPY_WA
1618static inline void iommu_prepare_isa(void)
1619{
1620 struct pci_dev *pdev;
1621 int ret;
1622
1623 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1624 if (!pdev)
1625 return;
1626
1627 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1628 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1629
1630 if (ret)
1631 printk("IOMMU: Failed to create 0-64M identity map, "
1632 "floppy might not work\n");
1633
1634}
1635#else
1636static inline void iommu_prepare_isa(void)
1637{
1638 return;
1639}
1640#endif /* !CONFIG_DMAR_FLPY_WA */
1641
ba395927
KA
1642int __init init_dmars(void)
1643{
1644 struct dmar_drhd_unit *drhd;
1645 struct dmar_rmrr_unit *rmrr;
1646 struct pci_dev *pdev;
1647 struct intel_iommu *iommu;
1648 int ret, unit = 0;
1649
1650 /*
1651 * for each drhd
1652 * allocate root
1653 * initialize and program root entry to not present
1654 * endfor
1655 */
1656 for_each_drhd_unit(drhd) {
1657 if (drhd->ignored)
1658 continue;
1659 iommu = alloc_iommu(drhd);
1660 if (!iommu) {
1661 ret = -ENOMEM;
1662 goto error;
1663 }
1664
1665 /*
1666 * TBD:
1667 * we could share the same root & context tables
1668 * amoung all IOMMU's. Need to Split it later.
1669 */
1670 ret = iommu_alloc_root_entry(iommu);
1671 if (ret) {
1672 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1673 goto error;
1674 }
1675 }
1676
1677 /*
1678 * For each rmrr
1679 * for each dev attached to rmrr
1680 * do
1681 * locate drhd for dev, alloc domain for dev
1682 * allocate free domain
1683 * allocate page table entries for rmrr
1684 * if context not allocated for bus
1685 * allocate and init context
1686 * set present in root table for this bus
1687 * init context with domain, translation etc
1688 * endfor
1689 * endfor
1690 */
1691 for_each_rmrr_units(rmrr) {
1692 int i;
1693 for (i = 0; i < rmrr->devices_cnt; i++) {
1694 pdev = rmrr->devices[i];
1695 /* some BIOS lists non-exist devices in DMAR table */
1696 if (!pdev)
1697 continue;
1698 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1699 if (ret)
1700 printk(KERN_ERR
1701 "IOMMU: mapping reserved region failed\n");
1702 }
1703 }
1704
e820482c
KA
1705 iommu_prepare_gfx_mapping();
1706
49a0429e
KA
1707 iommu_prepare_isa();
1708
ba395927
KA
1709 /*
1710 * for each drhd
1711 * enable fault log
1712 * global invalidate context cache
1713 * global invalidate iotlb
1714 * enable translation
1715 */
1716 for_each_drhd_unit(drhd) {
1717 if (drhd->ignored)
1718 continue;
1719 iommu = drhd->iommu;
1720 sprintf (iommu->name, "dmar%d", unit++);
1721
1722 iommu_flush_write_buffer(iommu);
1723
3460a6d9
KA
1724 ret = dmar_set_interrupt(iommu);
1725 if (ret)
1726 goto error;
1727
ba395927
KA
1728 iommu_set_root_entry(iommu);
1729
1730 iommu_flush_context_global(iommu, 0);
1731 iommu_flush_iotlb_global(iommu, 0);
1732
1733 ret = iommu_enable_translation(iommu);
1734 if (ret)
1735 goto error;
1736 }
1737
1738 return 0;
1739error:
1740 for_each_drhd_unit(drhd) {
1741 if (drhd->ignored)
1742 continue;
1743 iommu = drhd->iommu;
1744 free_iommu(iommu);
1745 }
1746 return ret;
1747}
1748
1749static inline u64 aligned_size(u64 host_addr, size_t size)
1750{
1751 u64 addr;
1752 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1753 return PAGE_ALIGN_4K(addr);
1754}
1755
1756struct iova *
f76aec76 1757iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1758{
ba395927
KA
1759 struct iova *piova;
1760
1761 /* Make sure it's in range */
ba395927 1762 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1763 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1764 return NULL;
1765
1766 piova = alloc_iova(&domain->iovad,
f76aec76 1767 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
ba395927
KA
1768 return piova;
1769}
1770
f76aec76
KA
1771static struct iova *
1772__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1773 size_t size)
ba395927 1774{
ba395927 1775 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1776 struct iova *iova = NULL;
ba395927 1777
7d3b03ce 1778 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
f76aec76 1779 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1780 } else {
1781 /*
1782 * First try to allocate an io virtual address in
1783 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1784 * from higher range
ba395927 1785 */
f76aec76 1786 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1787 if (!iova)
f76aec76 1788 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1789 }
1790
1791 if (!iova) {
1792 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1793 return NULL;
1794 }
1795
1796 return iova;
1797}
1798
1799static struct dmar_domain *
1800get_valid_domain_for_dev(struct pci_dev *pdev)
1801{
1802 struct dmar_domain *domain;
1803 int ret;
1804
1805 domain = get_domain_for_dev(pdev,
1806 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1807 if (!domain) {
1808 printk(KERN_ERR
1809 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1810 return NULL;
ba395927
KA
1811 }
1812
1813 /* make sure context mapping is ok */
1814 if (unlikely(!domain_context_mapped(domain, pdev))) {
1815 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1816 if (ret) {
1817 printk(KERN_ERR
1818 "Domain context map for %s failed",
1819 pci_name(pdev));
4fe05bbc 1820 return NULL;
f76aec76 1821 }
ba395927
KA
1822 }
1823
f76aec76
KA
1824 return domain;
1825}
1826
1827static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1828 size_t size, int dir)
1829{
1830 struct pci_dev *pdev = to_pci_dev(hwdev);
1831 int ret;
1832 struct dmar_domain *domain;
1833 unsigned long start_addr;
1834 struct iova *iova;
1835 int prot = 0;
1836
1837 BUG_ON(dir == DMA_NONE);
358dd8ac 1838 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76
KA
1839 return virt_to_bus(addr);
1840
1841 domain = get_valid_domain_for_dev(pdev);
1842 if (!domain)
1843 return 0;
1844
1845 addr = (void *)virt_to_phys(addr);
1846 size = aligned_size((u64)addr, size);
1847
1848 iova = __intel_alloc_iova(hwdev, domain, size);
1849 if (!iova)
1850 goto error;
1851
1852 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1853
ba395927
KA
1854 /*
1855 * Check if DMAR supports zero-length reads on write only
1856 * mappings..
1857 */
1858 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1859 !cap_zlr(domain->iommu->cap))
1860 prot |= DMA_PTE_READ;
1861 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1862 prot |= DMA_PTE_WRITE;
1863 /*
1864 * addr - (addr + size) might be partial page, we should map the whole
1865 * page. Note: if two part of one page are separately mapped, we
1866 * might have two guest_addr mapping to the same host addr, but this
1867 * is not a big problem
1868 */
f76aec76
KA
1869 ret = domain_page_mapping(domain, start_addr,
1870 ((u64)addr) & PAGE_MASK_4K, size, prot);
ba395927
KA
1871 if (ret)
1872 goto error;
1873
1874 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1875 pci_name(pdev), size, (u64)addr,
f76aec76
KA
1876 size, (u64)start_addr, dir);
1877
1878 /* it's a non-present to present mapping */
1879 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1880 start_addr, size >> PAGE_SHIFT_4K, 1);
1881 if (ret)
1882 iommu_flush_write_buffer(domain->iommu);
1883
1884 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
ba395927 1885
ba395927 1886error:
f76aec76
KA
1887 if (iova)
1888 __free_iova(&domain->iovad, iova);
ba395927
KA
1889 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1890 pci_name(pdev), size, (u64)addr, dir);
1891 return 0;
1892}
1893
f76aec76 1894static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
ba395927
KA
1895 size_t size, int dir)
1896{
ba395927 1897 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
1898 struct dmar_domain *domain;
1899 unsigned long start_addr;
ba395927
KA
1900 struct iova *iova;
1901
358dd8ac 1902 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 1903 return;
ba395927
KA
1904 domain = find_domain(pdev);
1905 BUG_ON(!domain);
1906
1907 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 1908 if (!iova)
ba395927 1909 return;
ba395927 1910
f76aec76
KA
1911 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1912 size = aligned_size((u64)dev_addr, size);
ba395927 1913
f76aec76
KA
1914 pr_debug("Device %s unmapping: %lx@%llx\n",
1915 pci_name(pdev), size, (u64)start_addr);
ba395927 1916
f76aec76
KA
1917 /* clear the whole page */
1918 dma_pte_clear_range(domain, start_addr, start_addr + size);
1919 /* free page tables */
1920 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1921
1922 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1923 size >> PAGE_SHIFT_4K, 0))
ba395927 1924 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
1925
1926 /* free iova */
1927 __free_iova(&domain->iovad, iova);
ba395927
KA
1928}
1929
1930static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1931 dma_addr_t *dma_handle, gfp_t flags)
1932{
1933 void *vaddr;
1934 int order;
1935
1936 size = PAGE_ALIGN_4K(size);
1937 order = get_order(size);
1938 flags &= ~(GFP_DMA | GFP_DMA32);
1939
1940 vaddr = (void *)__get_free_pages(flags, order);
1941 if (!vaddr)
1942 return NULL;
1943 memset(vaddr, 0, size);
1944
1945 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1946 if (*dma_handle)
1947 return vaddr;
1948 free_pages((unsigned long)vaddr, order);
1949 return NULL;
1950}
1951
1952static void intel_free_coherent(struct device *hwdev, size_t size,
1953 void *vaddr, dma_addr_t dma_handle)
1954{
1955 int order;
1956
1957 size = PAGE_ALIGN_4K(size);
1958 order = get_order(size);
1959
1960 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1961 free_pages((unsigned long)vaddr, order);
1962}
1963
12d4d40e 1964#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
c03ab37c 1965static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
ba395927
KA
1966 int nelems, int dir)
1967{
1968 int i;
1969 struct pci_dev *pdev = to_pci_dev(hwdev);
1970 struct dmar_domain *domain;
f76aec76
KA
1971 unsigned long start_addr;
1972 struct iova *iova;
1973 size_t size = 0;
1974 void *addr;
c03ab37c 1975 struct scatterlist *sg;
ba395927 1976
358dd8ac 1977 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1978 return;
1979
1980 domain = find_domain(pdev);
ba395927 1981
c03ab37c 1982 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
1983 if (!iova)
1984 return;
c03ab37c 1985 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
1986 addr = SG_ENT_VIRT_ADDRESS(sg);
1987 size += aligned_size((u64)addr, sg->length);
1988 }
1989
1990 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1991
1992 /* clear the whole page */
1993 dma_pte_clear_range(domain, start_addr, start_addr + size);
1994 /* free page tables */
1995 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1996
1997 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1998 size >> PAGE_SHIFT_4K, 0))
ba395927 1999 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2000
2001 /* free iova */
2002 __free_iova(&domain->iovad, iova);
ba395927
KA
2003}
2004
ba395927 2005static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2006 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2007{
2008 int i;
c03ab37c 2009 struct scatterlist *sg;
ba395927 2010
c03ab37c 2011 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2012 BUG_ON(!sg_page(sg));
c03ab37c
FT
2013 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2014 sg->dma_length = sg->length;
ba395927
KA
2015 }
2016 return nelems;
2017}
2018
c03ab37c
FT
2019static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2020 int nelems, int dir)
ba395927
KA
2021{
2022 void *addr;
2023 int i;
ba395927
KA
2024 struct pci_dev *pdev = to_pci_dev(hwdev);
2025 struct dmar_domain *domain;
f76aec76
KA
2026 size_t size = 0;
2027 int prot = 0;
2028 size_t offset = 0;
2029 struct iova *iova = NULL;
2030 int ret;
c03ab37c 2031 struct scatterlist *sg;
f76aec76 2032 unsigned long start_addr;
ba395927
KA
2033
2034 BUG_ON(dir == DMA_NONE);
358dd8ac 2035 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2036 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2037
f76aec76
KA
2038 domain = get_valid_domain_for_dev(pdev);
2039 if (!domain)
2040 return 0;
2041
c03ab37c 2042 for_each_sg(sglist, sg, nelems, i) {
ba395927 2043 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2044 addr = (void *)virt_to_phys(addr);
2045 size += aligned_size((u64)addr, sg->length);
2046 }
2047
2048 iova = __intel_alloc_iova(hwdev, domain, size);
2049 if (!iova) {
c03ab37c 2050 sglist->dma_length = 0;
f76aec76
KA
2051 return 0;
2052 }
2053
2054 /*
2055 * Check if DMAR supports zero-length reads on write only
2056 * mappings..
2057 */
2058 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2059 !cap_zlr(domain->iommu->cap))
2060 prot |= DMA_PTE_READ;
2061 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2062 prot |= DMA_PTE_WRITE;
2063
2064 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2065 offset = 0;
c03ab37c 2066 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2067 addr = SG_ENT_VIRT_ADDRESS(sg);
2068 addr = (void *)virt_to_phys(addr);
2069 size = aligned_size((u64)addr, sg->length);
2070 ret = domain_page_mapping(domain, start_addr + offset,
2071 ((u64)addr) & PAGE_MASK_4K,
2072 size, prot);
2073 if (ret) {
2074 /* clear the page */
2075 dma_pte_clear_range(domain, start_addr,
2076 start_addr + offset);
2077 /* free page tables */
2078 dma_pte_free_pagetable(domain, start_addr,
2079 start_addr + offset);
2080 /* free iova */
2081 __free_iova(&domain->iovad, iova);
ba395927
KA
2082 return 0;
2083 }
f76aec76
KA
2084 sg->dma_address = start_addr + offset +
2085 ((u64)addr & (~PAGE_MASK_4K));
ba395927 2086 sg->dma_length = sg->length;
f76aec76 2087 offset += size;
ba395927
KA
2088 }
2089
ba395927 2090 /* it's a non-present to present mapping */
f76aec76
KA
2091 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2092 start_addr, offset >> PAGE_SHIFT_4K, 1))
ba395927
KA
2093 iommu_flush_write_buffer(domain->iommu);
2094 return nelems;
2095}
2096
2097static struct dma_mapping_ops intel_dma_ops = {
2098 .alloc_coherent = intel_alloc_coherent,
2099 .free_coherent = intel_free_coherent,
2100 .map_single = intel_map_single,
2101 .unmap_single = intel_unmap_single,
2102 .map_sg = intel_map_sg,
2103 .unmap_sg = intel_unmap_sg,
2104};
2105
2106static inline int iommu_domain_cache_init(void)
2107{
2108 int ret = 0;
2109
2110 iommu_domain_cache = kmem_cache_create("iommu_domain",
2111 sizeof(struct dmar_domain),
2112 0,
2113 SLAB_HWCACHE_ALIGN,
2114
2115 NULL);
2116 if (!iommu_domain_cache) {
2117 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2118 ret = -ENOMEM;
2119 }
2120
2121 return ret;
2122}
2123
2124static inline int iommu_devinfo_cache_init(void)
2125{
2126 int ret = 0;
2127
2128 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2129 sizeof(struct device_domain_info),
2130 0,
2131 SLAB_HWCACHE_ALIGN,
2132
2133 NULL);
2134 if (!iommu_devinfo_cache) {
2135 printk(KERN_ERR "Couldn't create devinfo cache\n");
2136 ret = -ENOMEM;
2137 }
2138
2139 return ret;
2140}
2141
2142static inline int iommu_iova_cache_init(void)
2143{
2144 int ret = 0;
2145
2146 iommu_iova_cache = kmem_cache_create("iommu_iova",
2147 sizeof(struct iova),
2148 0,
2149 SLAB_HWCACHE_ALIGN,
2150
2151 NULL);
2152 if (!iommu_iova_cache) {
2153 printk(KERN_ERR "Couldn't create iova cache\n");
2154 ret = -ENOMEM;
2155 }
2156
2157 return ret;
2158}
2159
2160static int __init iommu_init_mempool(void)
2161{
2162 int ret;
2163 ret = iommu_iova_cache_init();
2164 if (ret)
2165 return ret;
2166
2167 ret = iommu_domain_cache_init();
2168 if (ret)
2169 goto domain_error;
2170
2171 ret = iommu_devinfo_cache_init();
2172 if (!ret)
2173 return ret;
2174
2175 kmem_cache_destroy(iommu_domain_cache);
2176domain_error:
2177 kmem_cache_destroy(iommu_iova_cache);
2178
2179 return -ENOMEM;
2180}
2181
2182static void __init iommu_exit_mempool(void)
2183{
2184 kmem_cache_destroy(iommu_devinfo_cache);
2185 kmem_cache_destroy(iommu_domain_cache);
2186 kmem_cache_destroy(iommu_iova_cache);
2187
2188}
2189
2190void __init detect_intel_iommu(void)
2191{
2192 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2193 return;
2194 if (early_dmar_detect()) {
2195 iommu_detected = 1;
2196 }
2197}
2198
2199static void __init init_no_remapping_devices(void)
2200{
2201 struct dmar_drhd_unit *drhd;
2202
2203 for_each_drhd_unit(drhd) {
2204 if (!drhd->include_all) {
2205 int i;
2206 for (i = 0; i < drhd->devices_cnt; i++)
2207 if (drhd->devices[i] != NULL)
2208 break;
2209 /* ignore DMAR unit if no pci devices exist */
2210 if (i == drhd->devices_cnt)
2211 drhd->ignored = 1;
2212 }
2213 }
2214
2215 if (dmar_map_gfx)
2216 return;
2217
2218 for_each_drhd_unit(drhd) {
2219 int i;
2220 if (drhd->ignored || drhd->include_all)
2221 continue;
2222
2223 for (i = 0; i < drhd->devices_cnt; i++)
2224 if (drhd->devices[i] &&
2225 !IS_GFX_DEVICE(drhd->devices[i]))
2226 break;
2227
2228 if (i < drhd->devices_cnt)
2229 continue;
2230
2231 /* bypass IOMMU if it is just for gfx devices */
2232 drhd->ignored = 1;
2233 for (i = 0; i < drhd->devices_cnt; i++) {
2234 if (!drhd->devices[i])
2235 continue;
358dd8ac 2236 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2237 }
2238 }
2239}
2240
2241int __init intel_iommu_init(void)
2242{
2243 int ret = 0;
2244
2245 if (no_iommu || swiotlb || dmar_disabled)
2246 return -ENODEV;
2247
2248 if (dmar_table_init())
2249 return -ENODEV;
2250
2251 iommu_init_mempool();
2252 dmar_init_reserved_ranges();
2253
2254 init_no_remapping_devices();
2255
2256 ret = init_dmars();
2257 if (ret) {
2258 printk(KERN_ERR "IOMMU: dmar init failed\n");
2259 put_iova_domain(&reserved_iova_list);
2260 iommu_exit_mempool();
2261 return ret;
2262 }
2263 printk(KERN_INFO
2264 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2265
2266 force_iommu = 1;
2267 dma_ops = &intel_dma_ops;
2268 return 0;
2269}
e820482c 2270