2 * VFIO: IOMMU DMA mapping support for Type1 IOMMU
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
15 * We arbitrarily define a Type1 IOMMU as one matching the below code.
16 * It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel
17 * VT-d, but that makes it harder to re-use as theoretically anyone
18 * implementing a similar IOMMU could make use of this. We expect the
19 * IOMMU to support the IOMMU API and have few to no restrictions around
20 * the IOVA range that can be mapped. The Type1 IOMMU is currently
21 * optimized for relatively static mappings of a userspace process with
22 * userpsace pages pinned into memory. We also assume devices and IOMMU
23 * domains are PCI based as the IOMMU API is still centered around a
24 * device/bus interface rather than a group interface.
27 #include <linux/compat.h>
28 #include <linux/device.h>
30 #include <linux/iommu.h>
31 #include <linux/module.h>
33 #include <linux/rbtree.h>
34 #include <linux/sched.h>
35 #include <linux/slab.h>
36 #include <linux/uaccess.h>
37 #include <linux/vfio.h>
38 #include <linux/workqueue.h>
40 #define DRIVER_VERSION "0.2"
41 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
42 #define DRIVER_DESC "Type1 IOMMU driver for VFIO"
44 static bool allow_unsafe_interrupts
;
45 module_param_named(allow_unsafe_interrupts
,
46 allow_unsafe_interrupts
, bool, S_IRUGO
| S_IWUSR
);
47 MODULE_PARM_DESC(allow_unsafe_interrupts
,
48 "Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
50 static bool disable_hugepages
;
51 module_param_named(disable_hugepages
,
52 disable_hugepages
, bool, S_IRUGO
| S_IWUSR
);
53 MODULE_PARM_DESC(disable_hugepages
,
54 "Disable VFIO IOMMU support for IOMMU hugepages.");
57 struct list_head domain_list
;
59 struct rb_root dma_list
;
65 struct iommu_domain
*domain
;
66 struct list_head next
;
67 struct list_head group_list
;
68 int prot
; /* IOMMU_CACHE */
73 dma_addr_t iova
; /* Device address */
74 unsigned long vaddr
; /* Process virtual addr */
75 size_t size
; /* Map size (bytes) */
76 int prot
; /* IOMMU_READ/WRITE */
80 struct iommu_group
*iommu_group
;
81 struct list_head next
;
85 * This code handles mapping and unmapping of user data buffers
86 * into DMA'ble space using the IOMMU
89 static struct vfio_dma
*vfio_find_dma(struct vfio_iommu
*iommu
,
90 dma_addr_t start
, size_t size
)
92 struct rb_node
*node
= iommu
->dma_list
.rb_node
;
95 struct vfio_dma
*dma
= rb_entry(node
, struct vfio_dma
, node
);
97 if (start
+ size
<= dma
->iova
)
99 else if (start
>= dma
->iova
+ dma
->size
)
100 node
= node
->rb_right
;
108 static void vfio_link_dma(struct vfio_iommu
*iommu
, struct vfio_dma
*new)
110 struct rb_node
**link
= &iommu
->dma_list
.rb_node
, *parent
= NULL
;
111 struct vfio_dma
*dma
;
115 dma
= rb_entry(parent
, struct vfio_dma
, node
);
117 if (new->iova
+ new->size
<= dma
->iova
)
118 link
= &(*link
)->rb_left
;
120 link
= &(*link
)->rb_right
;
123 rb_link_node(&new->node
, parent
, link
);
124 rb_insert_color(&new->node
, &iommu
->dma_list
);
127 static void vfio_unlink_dma(struct vfio_iommu
*iommu
, struct vfio_dma
*old
)
129 rb_erase(&old
->node
, &iommu
->dma_list
);
133 struct mm_struct
*mm
;
135 struct work_struct work
;
138 /* delayed decrement/increment for locked_vm */
139 static void vfio_lock_acct_bg(struct work_struct
*work
)
141 struct vwork
*vwork
= container_of(work
, struct vwork
, work
);
142 struct mm_struct
*mm
;
145 down_write(&mm
->mmap_sem
);
146 mm
->locked_vm
+= vwork
->npage
;
147 up_write(&mm
->mmap_sem
);
152 static void vfio_lock_acct(long npage
)
155 struct mm_struct
*mm
;
157 if (!current
->mm
|| !npage
)
158 return; /* process exited or nothing to do */
160 if (down_write_trylock(¤t
->mm
->mmap_sem
)) {
161 current
->mm
->locked_vm
+= npage
;
162 up_write(¤t
->mm
->mmap_sem
);
167 * Couldn't get mmap_sem lock, so must setup to update
168 * mm->locked_vm later. If locked_vm were atomic, we
169 * wouldn't need this silliness
171 vwork
= kmalloc(sizeof(struct vwork
), GFP_KERNEL
);
174 mm
= get_task_mm(current
);
179 INIT_WORK(&vwork
->work
, vfio_lock_acct_bg
);
181 vwork
->npage
= npage
;
182 schedule_work(&vwork
->work
);
186 * Some mappings aren't backed by a struct page, for example an mmap'd
187 * MMIO range for our own or another device. These use a different
188 * pfn conversion and shouldn't be tracked as locked pages.
190 static bool is_invalid_reserved_pfn(unsigned long pfn
)
192 if (pfn_valid(pfn
)) {
194 struct page
*tail
= pfn_to_page(pfn
);
195 struct page
*head
= compound_head(tail
);
196 reserved
= !!(PageReserved(head
));
199 * "head" is not a dangling pointer
200 * (compound_head takes care of that)
201 * but the hugepage may have been split
202 * from under us (and we may not hold a
203 * reference count on the head page so it can
204 * be reused before we run PageReferenced), so
205 * we've to check PageTail before returning
212 return PageReserved(tail
);
218 static int put_pfn(unsigned long pfn
, int prot
)
220 if (!is_invalid_reserved_pfn(pfn
)) {
221 struct page
*page
= pfn_to_page(pfn
);
222 if (prot
& IOMMU_WRITE
)
230 static int vaddr_get_pfn(unsigned long vaddr
, int prot
, unsigned long *pfn
)
232 struct page
*page
[1];
233 struct vm_area_struct
*vma
;
236 if (get_user_pages_fast(vaddr
, 1, !!(prot
& IOMMU_WRITE
), page
) == 1) {
237 *pfn
= page_to_pfn(page
[0]);
241 down_read(¤t
->mm
->mmap_sem
);
243 vma
= find_vma_intersection(current
->mm
, vaddr
, vaddr
+ 1);
245 if (vma
&& vma
->vm_flags
& VM_PFNMAP
) {
246 *pfn
= ((vaddr
- vma
->vm_start
) >> PAGE_SHIFT
) + vma
->vm_pgoff
;
247 if (is_invalid_reserved_pfn(*pfn
))
251 up_read(¤t
->mm
->mmap_sem
);
257 * Attempt to pin pages. We really don't want to track all the pfns and
258 * the iommu can only map chunks of consecutive pfns anyway, so get the
259 * first page and all consecutive pages with the same locking.
261 static long vfio_pin_pages(unsigned long vaddr
, long npage
,
262 int prot
, unsigned long *pfn_base
)
264 unsigned long limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
265 bool lock_cap
= capable(CAP_IPC_LOCK
);
271 ret
= vaddr_get_pfn(vaddr
, prot
, pfn_base
);
275 if (is_invalid_reserved_pfn(*pfn_base
))
278 if (!lock_cap
&& current
->mm
->locked_vm
+ 1 > limit
) {
279 put_pfn(*pfn_base
, prot
);
280 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__
,
281 limit
<< PAGE_SHIFT
);
285 if (unlikely(disable_hugepages
)) {
290 /* Lock all the consecutive pages from pfn_base */
291 for (i
= 1, vaddr
+= PAGE_SIZE
; i
< npage
; i
++, vaddr
+= PAGE_SIZE
) {
292 unsigned long pfn
= 0;
294 ret
= vaddr_get_pfn(vaddr
, prot
, &pfn
);
298 if (pfn
!= *pfn_base
+ i
|| is_invalid_reserved_pfn(pfn
)) {
303 if (!lock_cap
&& current
->mm
->locked_vm
+ i
+ 1 > limit
) {
305 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
306 __func__
, limit
<< PAGE_SHIFT
);
316 static long vfio_unpin_pages(unsigned long pfn
, long npage
,
317 int prot
, bool do_accounting
)
319 unsigned long unlocked
= 0;
322 for (i
= 0; i
< npage
; i
++)
323 unlocked
+= put_pfn(pfn
++, prot
);
326 vfio_lock_acct(-unlocked
);
331 static void vfio_unmap_unpin(struct vfio_iommu
*iommu
, struct vfio_dma
*dma
)
333 dma_addr_t iova
= dma
->iova
, end
= dma
->iova
+ dma
->size
;
334 struct vfio_domain
*domain
, *d
;
340 * We use the IOMMU to track the physical addresses, otherwise we'd
341 * need a much more complicated tracking system. Unfortunately that
342 * means we need to use one of the iommu domains to figure out the
343 * pfns to unpin. The rest need to be unmapped in advance so we have
344 * no iommu translations remaining when the pages are unpinned.
346 domain
= d
= list_first_entry(&iommu
->domain_list
,
347 struct vfio_domain
, next
);
349 list_for_each_entry_continue(d
, &iommu
->domain_list
, next
)
350 iommu_unmap(d
->domain
, dma
->iova
, dma
->size
);
356 phys
= iommu_iova_to_phys(domain
->domain
, iova
);
357 if (WARN_ON(!phys
)) {
362 unmapped
= iommu_unmap(domain
->domain
, iova
, PAGE_SIZE
);
363 if (WARN_ON(!unmapped
))
366 unlocked
+= vfio_unpin_pages(phys
>> PAGE_SHIFT
,
367 unmapped
>> PAGE_SHIFT
,
372 vfio_lock_acct(-unlocked
);
375 static void vfio_remove_dma(struct vfio_iommu
*iommu
, struct vfio_dma
*dma
)
377 vfio_unmap_unpin(iommu
, dma
);
378 vfio_unlink_dma(iommu
, dma
);
382 static unsigned long vfio_pgsize_bitmap(struct vfio_iommu
*iommu
)
384 struct vfio_domain
*domain
;
385 unsigned long bitmap
= PAGE_MASK
;
387 mutex_lock(&iommu
->lock
);
388 list_for_each_entry(domain
, &iommu
->domain_list
, next
)
389 bitmap
&= domain
->domain
->ops
->pgsize_bitmap
;
390 mutex_unlock(&iommu
->lock
);
395 static int vfio_dma_do_unmap(struct vfio_iommu
*iommu
,
396 struct vfio_iommu_type1_dma_unmap
*unmap
)
399 struct vfio_dma
*dma
;
403 mask
= ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu
))) - 1;
405 if (unmap
->iova
& mask
)
407 if (!unmap
->size
|| unmap
->size
& mask
)
410 WARN_ON(mask
& PAGE_MASK
);
412 mutex_lock(&iommu
->lock
);
415 * vfio-iommu-type1 (v1) - User mappings were coalesced together to
416 * avoid tracking individual mappings. This means that the granularity
417 * of the original mapping was lost and the user was allowed to attempt
418 * to unmap any range. Depending on the contiguousness of physical
419 * memory and page sizes supported by the IOMMU, arbitrary unmaps may
420 * or may not have worked. We only guaranteed unmap granularity
421 * matching the original mapping; even though it was untracked here,
422 * the original mappings are reflected in IOMMU mappings. This
423 * resulted in a couple unusual behaviors. First, if a range is not
424 * able to be unmapped, ex. a set of 4k pages that was mapped as a
425 * 2M hugepage into the IOMMU, the unmap ioctl returns success but with
426 * a zero sized unmap. Also, if an unmap request overlaps the first
427 * address of a hugepage, the IOMMU will unmap the entire hugepage.
428 * This also returns success and the returned unmap size reflects the
429 * actual size unmapped.
431 * We attempt to maintain compatibility with this "v1" interface, but
432 * we take control out of the hands of the IOMMU. Therefore, an unmap
433 * request offset from the beginning of the original mapping will
434 * return success with zero sized unmap. And an unmap request covering
435 * the first iova of mapping will unmap the entire range.
437 * The v2 version of this interface intends to be more deterministic.
438 * Unmap requests must fully cover previous mappings. Multiple
439 * mappings may still be unmaped by specifying large ranges, but there
440 * must not be any previous mappings bisected by the range. An error
441 * will be returned if these conditions are not met. The v2 interface
442 * will only return success and a size of zero if there were no
443 * mappings within the range.
446 dma
= vfio_find_dma(iommu
, unmap
->iova
, 0);
447 if (dma
&& dma
->iova
!= unmap
->iova
) {
451 dma
= vfio_find_dma(iommu
, unmap
->iova
+ unmap
->size
- 1, 0);
452 if (dma
&& dma
->iova
+ dma
->size
!= unmap
->iova
+ unmap
->size
) {
458 while ((dma
= vfio_find_dma(iommu
, unmap
->iova
, unmap
->size
))) {
459 if (!iommu
->v2
&& unmap
->iova
> dma
->iova
)
461 unmapped
+= dma
->size
;
462 vfio_remove_dma(iommu
, dma
);
466 mutex_unlock(&iommu
->lock
);
468 /* Report how much was unmapped */
469 unmap
->size
= unmapped
;
475 * Turns out AMD IOMMU has a page table bug where it won't map large pages
476 * to a region that previously mapped smaller pages. This should be fixed
477 * soon, so this is just a temporary workaround to break mappings down into
478 * PAGE_SIZE. Better to map smaller pages than nothing.
480 static int map_try_harder(struct vfio_domain
*domain
, dma_addr_t iova
,
481 unsigned long pfn
, long npage
, int prot
)
486 for (i
= 0; i
< npage
; i
++, pfn
++, iova
+= PAGE_SIZE
) {
487 ret
= iommu_map(domain
->domain
, iova
,
488 (phys_addr_t
)pfn
<< PAGE_SHIFT
,
489 PAGE_SIZE
, prot
| domain
->prot
);
494 for (; i
< npage
&& i
> 0; i
--, iova
-= PAGE_SIZE
)
495 iommu_unmap(domain
->domain
, iova
, PAGE_SIZE
);
500 static int vfio_iommu_map(struct vfio_iommu
*iommu
, dma_addr_t iova
,
501 unsigned long pfn
, long npage
, int prot
)
503 struct vfio_domain
*d
;
506 list_for_each_entry(d
, &iommu
->domain_list
, next
) {
507 ret
= iommu_map(d
->domain
, iova
, (phys_addr_t
)pfn
<< PAGE_SHIFT
,
508 npage
<< PAGE_SHIFT
, prot
| d
->prot
);
511 map_try_harder(d
, iova
, pfn
, npage
, prot
))
519 list_for_each_entry_continue_reverse(d
, &iommu
->domain_list
, next
)
520 iommu_unmap(d
->domain
, iova
, npage
<< PAGE_SHIFT
);
525 static int vfio_dma_do_map(struct vfio_iommu
*iommu
,
526 struct vfio_iommu_type1_dma_map
*map
)
528 dma_addr_t iova
= map
->iova
;
529 unsigned long vaddr
= map
->vaddr
;
530 size_t size
= map
->size
;
532 int ret
= 0, prot
= 0;
534 struct vfio_dma
*dma
;
537 /* Verify that none of our __u64 fields overflow */
538 if (map
->size
!= size
|| map
->vaddr
!= vaddr
|| map
->iova
!= iova
)
541 mask
= ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu
))) - 1;
543 WARN_ON(mask
& PAGE_MASK
);
545 /* READ/WRITE from device perspective */
546 if (map
->flags
& VFIO_DMA_MAP_FLAG_WRITE
)
548 if (map
->flags
& VFIO_DMA_MAP_FLAG_READ
)
551 if (!prot
|| !size
|| (size
| iova
| vaddr
) & mask
)
554 /* Don't allow IOVA or virtual address wrap */
555 if (iova
+ size
- 1 < iova
|| vaddr
+ size
- 1 < vaddr
)
558 mutex_lock(&iommu
->lock
);
560 if (vfio_find_dma(iommu
, iova
, size
)) {
561 mutex_unlock(&iommu
->lock
);
565 dma
= kzalloc(sizeof(*dma
), GFP_KERNEL
);
567 mutex_unlock(&iommu
->lock
);
575 /* Insert zero-sized and grow as we map chunks of it */
576 vfio_link_dma(iommu
, dma
);
579 /* Pin a contiguous chunk of memory */
580 npage
= vfio_pin_pages(vaddr
+ dma
->size
,
581 size
>> PAGE_SHIFT
, prot
, &pfn
);
589 ret
= vfio_iommu_map(iommu
, iova
+ dma
->size
, pfn
, npage
, prot
);
591 vfio_unpin_pages(pfn
, npage
, prot
, true);
595 size
-= npage
<< PAGE_SHIFT
;
596 dma
->size
+= npage
<< PAGE_SHIFT
;
600 vfio_remove_dma(iommu
, dma
);
602 mutex_unlock(&iommu
->lock
);
606 static int vfio_bus_type(struct device
*dev
, void *data
)
608 struct bus_type
**bus
= data
;
610 if (*bus
&& *bus
!= dev
->bus
)
618 static int vfio_iommu_replay(struct vfio_iommu
*iommu
,
619 struct vfio_domain
*domain
)
621 struct vfio_domain
*d
;
625 /* Arbitrarily pick the first domain in the list for lookups */
626 d
= list_first_entry(&iommu
->domain_list
, struct vfio_domain
, next
);
627 n
= rb_first(&iommu
->dma_list
);
629 /* If there's not a domain, there better not be any mappings */
630 if (WARN_ON(n
&& !d
))
633 for (; n
; n
= rb_next(n
)) {
634 struct vfio_dma
*dma
;
637 dma
= rb_entry(n
, struct vfio_dma
, node
);
640 while (iova
< dma
->iova
+ dma
->size
) {
641 phys_addr_t phys
= iommu_iova_to_phys(d
->domain
, iova
);
644 if (WARN_ON(!phys
)) {
651 while (iova
+ size
< dma
->iova
+ dma
->size
&&
652 phys
+ size
== iommu_iova_to_phys(d
->domain
,
656 ret
= iommu_map(domain
->domain
, iova
, phys
,
657 size
, dma
->prot
| domain
->prot
);
668 static int vfio_iommu_type1_attach_group(void *iommu_data
,
669 struct iommu_group
*iommu_group
)
671 struct vfio_iommu
*iommu
= iommu_data
;
672 struct vfio_group
*group
, *g
;
673 struct vfio_domain
*domain
, *d
;
674 struct bus_type
*bus
= NULL
;
677 mutex_lock(&iommu
->lock
);
679 list_for_each_entry(d
, &iommu
->domain_list
, next
) {
680 list_for_each_entry(g
, &d
->group_list
, next
) {
681 if (g
->iommu_group
!= iommu_group
)
684 mutex_unlock(&iommu
->lock
);
689 group
= kzalloc(sizeof(*group
), GFP_KERNEL
);
690 domain
= kzalloc(sizeof(*domain
), GFP_KERNEL
);
691 if (!group
|| !domain
) {
696 group
->iommu_group
= iommu_group
;
698 /* Determine bus_type in order to allocate a domain */
699 ret
= iommu_group_for_each_dev(iommu_group
, &bus
, vfio_bus_type
);
703 domain
->domain
= iommu_domain_alloc(bus
);
704 if (!domain
->domain
) {
709 if (iommu
->nesting
) {
712 ret
= iommu_domain_set_attr(domain
->domain
, DOMAIN_ATTR_NESTING
,
718 ret
= iommu_attach_group(domain
->domain
, iommu_group
);
722 INIT_LIST_HEAD(&domain
->group_list
);
723 list_add(&group
->next
, &domain
->group_list
);
725 if (!allow_unsafe_interrupts
&&
726 !iommu_domain_has_cap(domain
->domain
, IOMMU_CAP_INTR_REMAP
)) {
727 pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
733 if (iommu_domain_has_cap(domain
->domain
, IOMMU_CAP_CACHE_COHERENCY
))
734 domain
->prot
|= IOMMU_CACHE
;
737 * Try to match an existing compatible domain. We don't want to
738 * preclude an IOMMU driver supporting multiple bus_types and being
739 * able to include different bus_types in the same IOMMU domain, so
740 * we test whether the domains use the same iommu_ops rather than
741 * testing if they're on the same bus_type.
743 list_for_each_entry(d
, &iommu
->domain_list
, next
) {
744 if (d
->domain
->ops
== domain
->domain
->ops
&&
745 d
->prot
== domain
->prot
) {
746 iommu_detach_group(domain
->domain
, iommu_group
);
747 if (!iommu_attach_group(d
->domain
, iommu_group
)) {
748 list_add(&group
->next
, &d
->group_list
);
749 iommu_domain_free(domain
->domain
);
751 mutex_unlock(&iommu
->lock
);
755 ret
= iommu_attach_group(domain
->domain
, iommu_group
);
761 /* replay mappings on new domains */
762 ret
= vfio_iommu_replay(iommu
, domain
);
766 list_add(&domain
->next
, &iommu
->domain_list
);
768 mutex_unlock(&iommu
->lock
);
773 iommu_detach_group(domain
->domain
, iommu_group
);
775 iommu_domain_free(domain
->domain
);
779 mutex_unlock(&iommu
->lock
);
783 static void vfio_iommu_unmap_unpin_all(struct vfio_iommu
*iommu
)
785 struct rb_node
*node
;
787 while ((node
= rb_first(&iommu
->dma_list
)))
788 vfio_remove_dma(iommu
, rb_entry(node
, struct vfio_dma
, node
));
791 static void vfio_iommu_type1_detach_group(void *iommu_data
,
792 struct iommu_group
*iommu_group
)
794 struct vfio_iommu
*iommu
= iommu_data
;
795 struct vfio_domain
*domain
;
796 struct vfio_group
*group
;
798 mutex_lock(&iommu
->lock
);
800 list_for_each_entry(domain
, &iommu
->domain_list
, next
) {
801 list_for_each_entry(group
, &domain
->group_list
, next
) {
802 if (group
->iommu_group
!= iommu_group
)
805 iommu_detach_group(domain
->domain
, iommu_group
);
806 list_del(&group
->next
);
809 * Group ownership provides privilege, if the group
810 * list is empty, the domain goes away. If it's the
811 * last domain, then all the mappings go away too.
813 if (list_empty(&domain
->group_list
)) {
814 if (list_is_singular(&iommu
->domain_list
))
815 vfio_iommu_unmap_unpin_all(iommu
);
816 iommu_domain_free(domain
->domain
);
817 list_del(&domain
->next
);
825 mutex_unlock(&iommu
->lock
);
828 static void *vfio_iommu_type1_open(unsigned long arg
)
830 struct vfio_iommu
*iommu
;
832 iommu
= kzalloc(sizeof(*iommu
), GFP_KERNEL
);
834 return ERR_PTR(-ENOMEM
);
837 case VFIO_TYPE1_IOMMU
:
839 case VFIO_TYPE1_NESTING_IOMMU
:
840 iommu
->nesting
= true;
841 case VFIO_TYPE1v2_IOMMU
:
846 return ERR_PTR(-EINVAL
);
849 INIT_LIST_HEAD(&iommu
->domain_list
);
850 iommu
->dma_list
= RB_ROOT
;
851 mutex_init(&iommu
->lock
);
856 static void vfio_iommu_type1_release(void *iommu_data
)
858 struct vfio_iommu
*iommu
= iommu_data
;
859 struct vfio_domain
*domain
, *domain_tmp
;
860 struct vfio_group
*group
, *group_tmp
;
862 vfio_iommu_unmap_unpin_all(iommu
);
864 list_for_each_entry_safe(domain
, domain_tmp
,
865 &iommu
->domain_list
, next
) {
866 list_for_each_entry_safe(group
, group_tmp
,
867 &domain
->group_list
, next
) {
868 iommu_detach_group(domain
->domain
, group
->iommu_group
);
869 list_del(&group
->next
);
872 iommu_domain_free(domain
->domain
);
873 list_del(&domain
->next
);
880 static int vfio_domains_have_iommu_cache(struct vfio_iommu
*iommu
)
882 struct vfio_domain
*domain
;
885 mutex_lock(&iommu
->lock
);
886 list_for_each_entry(domain
, &iommu
->domain_list
, next
) {
887 if (!(domain
->prot
& IOMMU_CACHE
)) {
892 mutex_unlock(&iommu
->lock
);
897 static long vfio_iommu_type1_ioctl(void *iommu_data
,
898 unsigned int cmd
, unsigned long arg
)
900 struct vfio_iommu
*iommu
= iommu_data
;
903 if (cmd
== VFIO_CHECK_EXTENSION
) {
905 case VFIO_TYPE1_IOMMU
:
906 case VFIO_TYPE1v2_IOMMU
:
907 case VFIO_TYPE1_NESTING_IOMMU
:
909 case VFIO_DMA_CC_IOMMU
:
912 return vfio_domains_have_iommu_cache(iommu
);
916 } else if (cmd
== VFIO_IOMMU_GET_INFO
) {
917 struct vfio_iommu_type1_info info
;
919 minsz
= offsetofend(struct vfio_iommu_type1_info
, iova_pgsizes
);
921 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
924 if (info
.argsz
< minsz
)
929 info
.iova_pgsizes
= vfio_pgsize_bitmap(iommu
);
931 return copy_to_user((void __user
*)arg
, &info
, minsz
);
933 } else if (cmd
== VFIO_IOMMU_MAP_DMA
) {
934 struct vfio_iommu_type1_dma_map map
;
935 uint32_t mask
= VFIO_DMA_MAP_FLAG_READ
|
936 VFIO_DMA_MAP_FLAG_WRITE
;
938 minsz
= offsetofend(struct vfio_iommu_type1_dma_map
, size
);
940 if (copy_from_user(&map
, (void __user
*)arg
, minsz
))
943 if (map
.argsz
< minsz
|| map
.flags
& ~mask
)
946 return vfio_dma_do_map(iommu
, &map
);
948 } else if (cmd
== VFIO_IOMMU_UNMAP_DMA
) {
949 struct vfio_iommu_type1_dma_unmap unmap
;
952 minsz
= offsetofend(struct vfio_iommu_type1_dma_unmap
, size
);
954 if (copy_from_user(&unmap
, (void __user
*)arg
, minsz
))
957 if (unmap
.argsz
< minsz
|| unmap
.flags
)
960 ret
= vfio_dma_do_unmap(iommu
, &unmap
);
964 return copy_to_user((void __user
*)arg
, &unmap
, minsz
);
970 static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1
= {
971 .name
= "vfio-iommu-type1",
972 .owner
= THIS_MODULE
,
973 .open
= vfio_iommu_type1_open
,
974 .release
= vfio_iommu_type1_release
,
975 .ioctl
= vfio_iommu_type1_ioctl
,
976 .attach_group
= vfio_iommu_type1_attach_group
,
977 .detach_group
= vfio_iommu_type1_detach_group
,
980 static int __init
vfio_iommu_type1_init(void)
982 return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1
);
985 static void __exit
vfio_iommu_type1_cleanup(void)
987 vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1
);
990 module_init(vfio_iommu_type1_init
);
991 module_exit(vfio_iommu_type1_cleanup
);
993 MODULE_VERSION(DRIVER_VERSION
);
994 MODULE_LICENSE("GPL v2");
995 MODULE_AUTHOR(DRIVER_AUTHOR
);
996 MODULE_DESCRIPTION(DRIVER_DESC
);